---
title: "Analyzing test metabolomics data"
author: "Ariana S Huffmyer"
date: "2024"
output: 
  html_document:
    code_folding: hide
    toc: yes
    toc_depth: 6
    toc_float: yes
editor_options: 
  chunk_output_type: console
--- 

# Set Up    

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE, warning=FALSE, message=FALSE)
```

Load libraries. 

```{r}
library(tidyverse)
library(ggplot2)
library(readxl)
library(vegan)
library(factoextra)
library(ggfortify)
library(ComplexHeatmap)
library(viridis)
```

# 1. Metabolite relative concentration

## Load and prepare data 

```{r}
data<-read_xlsx("M-multi-species/data/metabolomics/test_Metabolites.xlsx", sheet="Relative Quant Data", skip=1)%>%
  select(!`HMDB ID`)%>%
  select(!`KEGG ID`)%>%
  mutate(across(everything(), ~ na_if(.x, "N/A")))
```

Convert to numeric format. 

```{r}
# Keep the "Compound" column unchanged, convert all other columns to numeric
data_wide <- as.data.frame(lapply(names(data), function(col) {
  if (col == "Compound") {
    return(data[[col]])  # Keep the "Compound" column as is
  } else {
    # Convert other columns to character first, then to numeric
    return(as.numeric(as.character(data[[col]])))
  }
}))

# Ensure the column names remain unchanged
colnames(data_wide) <- names(data)

str(data_wide)
```

There are 404 compounds in this list. Some are not measured in our samples, some are measured in <4 samples, some are measured in all 4 samples. 

First, remove all compounds that are not detected in all 4 samples. We cannot separate an NA from a true 0 because a metabolite may be present, but just below the detection limit. 

```{r}
data_wide<-data_wide%>%
  drop_na()
```

There are 68 metabolites that were measured in all four samples, including 4 internal standards.  

Select the internal standards. Glutamic acid had a CV of 0.2%, so we will normalize to this standard. This was the most consistent standard reading. 

```{r}
standards<-data_wide%>%
  filter(str_detect(Compound, "C13"))%>%
  filter(Compound=="2C13-Glutamic acid")

data_wide<-data_wide%>%
  filter(!str_detect(Compound, "C13"))
```

Change to long format and add in standard value for each sample. 
```{r}
data_long<-data_wide%>%
  pivot_longer(names_to="sample", values_to="concentration", cols=c(2:5))

standards<-standards%>%
  pivot_longer(names_to="sample", values_to="standard", cols=c(2:5))%>%
  select(!Compound)

data_long<-left_join(data_long, standards)
```

Calculate normalized concentration to internal standard. 
```{r}
data_long$conc_norm<-data_long$concentration/data_long$standard
```

Add in metadata manually. 

```{r}
data_long<-data_long%>%
  mutate(species=if_else(sample=="70 Host", "Porites", 
                         if_else(sample=="68 Host", "Pocillopora", 
                                 if_else(sample=="140 Host", "Acropora", 
                                         if_else(sample=="165 Host", "Acropora", NA)))))%>%
  mutate(tissue.mg=if_else(sample=="70 Host", 16.63, 
                         if_else(sample=="68 Host", 53.56, 
                                 if_else(sample=="140 Host", 5.84, 
                                         if_else(sample=="165 Host", 17.06, NA)))))
```

Normalize to tissue input. 

```{r}
data_long$conc_norm_mg<-data_long$conc_norm/data_long$tissue.mg
```

Data are now normalized. 

## Generate a list of metabolites in the data set 

```{r}
unique(data_long$Compound)
```

[1] "Alanine"                                "Cadaverine"                            
 [3] "Choline"                                "Serine"                                
 [5] "Cytosine"                               "Leucine /D-Norleucine"                 
 [7] "Ornithine"                              "Asparagine"                            
 [9] "Aspartic Acid"                          "Tyramine"                              
[11] "Trigonelline"                           "4-Guanidinobutanoate"                  
[13] "Glutamine"                              "Lysine"                                
[15] "Glutamic acid"                          "Guanine"                               
[17] "Histidine"                              "Tryptamine"                            
[19] "Carnitine"                              "Phenylalanine"                         
[21] "Methionine Sulfoxide"                   "Arginine"                              
[23] "N6-Trimethyllysine"                     "Caffeine"                              
[25] "Dimethylarginine (A/SDMA)"              "Tryptophan"                            
[27] "2'-Deoxycytidine"                       "Cytidine"                              
[29] "Uridine"                                "isoValerylcarnitine"                   
[31] "Glycerophosphocholine"                  "Hexanoyl-L-Carnitine (6:0 Carnitine)"  
[33] "Deoxyguanosine"                         "Adenosine"                             
[35] "Inosine"                                "Oleamide"                              
[37] "Retinol"                                "Octanoyl-L-Carnitine (8:0 Carnitine)"  
[39] "5'-Methylthioadenosine"                 "GMP"                                   
[41] "Myristoyl-L-Carnitine (14:0 Carnitine)" "Riboflavin"                            
[43] "Cholecalciferol"                        "Ergocalciferol"                        
[45] "S-Adenosylmethionine (SAM)"             "PAF C-16"                              
[47] "Nicotinic Acid"                         "Adenine"                               
[49] "Hypoxanthine"                           "Phenylacetic Acid"                     
[51] "Acetylphosphate"                        "Hydrocinnamic Acid"                    
[53] "Xanthine"                               "Glycerol-3-P"                          
[55] "Inositol"                               "Azelaic Acid"                          
[57] "Erythrose-4-Phosphate"                  "Pantothenate"                          
[59] "Ribose-5-P"                             "Biotin"                                
[61] "G1P/F1P/F6P"                            "Linoleic Acid"                         
[63] "IMP"                                    "Adenylosuccinate"   

Importantly, core metabolites such as glucose, pyruvate, a-ketoglutarate, lactate, acetyl CoA were not high enough to get a signal. We did get a signal for glutamine, glutamate, and SAM. We likely need to increase tissue input.  

## Plot PCA of samples 

Generate wide data. 
```{r}
data_wide<-data_long%>%
  ungroup()%>%
  select(Compound, sample, conc_norm_mg, species)%>%
  pivot_wider(names_from=Compound, values_from=conc_norm_mg)
```

```{r}
scaled.pca<-prcomp(data_wide%>%select(where(is.numeric)), scale=TRUE, center=TRUE) 
```

Prepare a PCA plot
```{r}
# scale data
vegan <- scale(data_wide%>%select(where(is.numeric)))

# PerMANOVA 
permanova<-adonis2(vegan ~ sample, data = data_wide, method='eu')
permanova
```

```{r}
pca1<-ggplot2::autoplot(scaled.pca, data=data_wide, loadings=FALSE,  colour="sample", loadings.label.colour="black", loadings.colour="black", loadings.label=FALSE, frame=FALSE, loadings.label.size=5, loadings.label.vjust=-1, size=5) + 
  geom_text(aes(x = PC1, y = PC2, label = species), vjust = -0.5)+
  theme_classic()+
   theme(legend.text = element_text(size=18), 
         legend.position="right",
        plot.background = element_blank(),
        legend.title = element_text(size=18, face="bold"), 
        axis.text = element_text(size=18), 
        axis.title = element_text(size=18,  face="bold"));pca1
```

## View metabolite concentration trends  

With a small sample set, its hard to infer differences in concentration. This is preliminary. 

View concentration of metabolites of interest that we have signals for. 

- SAM
- Glutamine
- Glutamate 

```{r}
list<-c("S-Adenosylmethionine (SAM)", "Glutamine", "Glutamic acid")
```

```{r}
data_long%>%
  filter(Compound %in% list)%>%
  
  ggplot(aes(x=sample, y=conc_norm))+
  geom_point()+
  facet_grid(~Compound)+
  theme_classic()
```