---
title: "Analyzing lipidomic data for timeseries molecular samples"
author: "Ariana S Huffmyer"
date: "2025"
output: 
  html_document:
    code_folding: hide
    toc: yes
    toc_depth: 6
    toc_float: yes
editor_options: 
  chunk_output_type: console
--- 

This post analyzes metabolomics data for the E5 timeseries project. 

Next steps: evaulate outliers, investigate changes over time, correlate with lipids

Use same approach for lipids: <10% missing values keep with imputation, add site to lipid analysis 

# Set Up    

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE, warning=FALSE, message=FALSE)
```

Load libraries. 

```{r}
library(mixOmics)
library("RVAideMemoire")
library(tidyverse)
library(ggplot2)
library(readxl)
library(vegan)
library(factoextra)
library(ggfortify)
library(ComplexHeatmap)
library(viridis)
library(lme4)
library(lmerTest)
library(emmeans)
library(broom.mixed)
library(broom)
```

# Read and load in data 

## Load data. 

```{r}
metab_quant<-read_xlsx("M-multi-species/data/metabolomics/2025-04-15_Roberts-98.xlsx", sheet="Relative Quant Data")%>%
  rename(compound=`Current MS Compounds`)
```

Format NAs. 

```{r}
metab_quant[metab_quant == "N/A"] <- NA

str(metab_quant)

#remove kegg columns 
metab_quant<-metab_quant%>%
  select(!"HMDB ID")%>%
  select(!"KEGG ID")
```

Values missing or not quantified are now NA. 

Convert to numeric format. 

```{r}
# Keep the "sample" column unchanged, convert all other columns to numeric
metab_quant_wide <- as.data.frame(lapply(names(metab_quant), function(col) {
  if (col == "compound") {
    return(metab_quant[[col]])  # Keep the "sample" column as is
  } else {
    # Convert other columns to character first, then to numeric
    return(as.numeric(as.character(metab_quant[[col]])))
  }
}))

# Ensure the column names remain unchanged
colnames(metab_quant_wide) <- names(metab_quant)

str(metab_quant_wide)
```

Convert to long format. 

```{r}
metab_quant_long<-metab_quant_wide%>%
  pivot_longer(names_to="sample", values_to="rel.quant", cols = -c(compound))
```

## Add in metadata 

```{r}
metab_quant_long<-metab_quant_long%>%

  mutate(
    colony = str_extract(sample, "^[^_]+"),  # Extract everything before the underscore
    timepoint = str_extract(sample, "(?<=_).*"),  # Extract everything after the underscore
    species = case_when(
      str_detect(sample, "ACR") ~ "Acropora",
      str_detect(sample, "POC") ~ "Pocillopora",
      str_detect(sample, "POR") ~ "Porites",
      TRUE ~ NA_character_  # Default to NA if no match
    ), 
    type = case_when(
      str_detect(sample, "PBQC") ~ "PBQC", 
      TRUE ~ "sample"
      
    )
  )
```

```{r}
#add in site metadata
sites<-read_csv("M-multi-species/data/phys_master_timeseries.csv")%>%select(c("colony_id_corr", "site"))%>%rename(colony="colony_id_corr")

metab_quant_long<-left_join(metab_quant_long, sites)

metab_quant_long<-unique(metab_quant_long)
```

Add in protein data from lipid analyses (protein is the same because samples were taken from the same replicate). 

```{r}
protein<-read_xlsx("M-multi-species/data/metabolomics/2025-04-15_Roberts-98.xlsx", sheet="Protein")
```

Merge in protein data. 

```{r}
metab_quant_long<-left_join(metab_quant_long, protein)
```

## Filtering and normalizing 

How many lipids do we have currently? 
```{r}
length(levels(as.factor(metab_quant_long$compound)))
```
404 total metabolites 

## Remove metabolites not detected in the PBQC samples 

Remove facility internal standard metabolites. 
```{r}
standards<-c("3C13-Glycine", "4C13-Alanine", "2C13-Choline", "4C13-Serine", "d3-Creatinine", "6C13-Proline", "6C13-Valine", "5C13-Threonine", "5C13-Aspartic Acid", "6C13-Asparagine", "7C13-Leucine", "7C13-iso-Leucine", "2C13-Glutamic acid", "7C13-Glutamine", "6C13-Glutamic acid", "8C13-Lysine", "6C13-Methionine", "9C13-Histidine", "10C13-Phenylalanine", "2C13-Tyrosine", "10C13-Arginine", "10C13-Tyrosine", "13C13-Tryptophan", "8C13-Cystine", "11C13-Uridine", "3C13-Lactate", "4C13-3HBA", "2N15-Xanthine", "2N15-Urate", "1C13-Citrulline", "6C13-Glucose", "4C13-Pentothenate")

metab_quant_long<-metab_quant_long%>%
  filter(!compound %in% standards)

length(levels(as.factor(metab_quant_long$compound)))
```
372 metabolites after removing standards.  

Only keep metabolites that were measured in at least one of the PBQCs.  

```{r}
length(levels(as.factor(metab_quant_long$compound)))

#view metabolites not measured in the PBQC
test <- metab_quant_long %>%
  group_by(compound) %>%
  filter((type == "PBQC" & is.na(rel.quant)))%>%
  pull(compound)

test<-unique(test)

length(unique(test))

metab_quant_long<-metab_quant_long%>%
  filter(!compound %in% test)

length(levels(as.factor(metab_quant_long$compound)))

metab_quant_long<-metab_quant_long%>%
  filter(!type=="PBQC")%>%
  select(!type)
```
There are 187 metabolites not detected in the PBQCs. After removing those we have 185 total compounds.  

Remove metabolites that have NA in more than 10% of samples.  

```{r}
# Identify compounds with less than 25% NA in rel_quant
compounds_lt10pct_NA <- metab_quant_long %>%
  group_by(compound) %>%
  summarize(
    total = n(),
    na_count = sum(is.na(rel.quant)),
    na_prop = na_count / total
  ) %>%
  filter(na_prop < 0.1) %>%
  pull(compound)

length(compounds_lt10pct_NA)

metab_quant_long<-metab_quant_long%>%
  filter(compound %in% compounds_lt10pct_NA)

length(levels(as.factor(metab_quant_long$compound)))
```
We have 141 compounds in the dataset that are measured in >90% of samples. We will use imputation to account for the NAs.    

Rename dataframe. 

```{r}
data<-metab_quant_long
```

## Normalize to tissue input 

Next, conduct normalization to tissue input and multiply by constants as directed by the UW facility.  

```{r}
data<-data%>%
  mutate(rel.quant.ug=rel.quant/protein.ug)%>%
  select(!protein.ug)%>%
  select(!rel.quant)
```

Metabolite concentration is now normalized to protein.  

# Imputate missing values with group median 

Do this with species and time point for now. Add site here as well later.  
```{r}
data_im <- data %>%
  group_by(compound, species, timepoint, site) %>%
  mutate(
    rel.quant.ug = if_else(
      is.na(rel.quant.ug),
      median(rel.quant.ug, na.rm = TRUE),
      rel.quant.ug
    )
  ) %>%
  ungroup()
```

# Plot PCA of species effects  

Generate wide data. 
```{r}
data_wide<-data_im%>%
  ungroup()%>%
  pivot_wider(names_from=compound, values_from=rel.quant.ug)
```

```{r}
scaled.pca<-prcomp(data_wide%>%select(where(is.numeric)), scale=TRUE, center=TRUE) 
```

Prepare a PCA plot
```{r}
# scale data
vegan <- scale(data_wide%>%select(where(is.numeric)))

# PerMANOVA 
permanova<-adonis2(vegan ~ species * timepoint * site, data = data_wide, method='eu')
permanova
```

Significant differences in metabolomic profile between species and time points and an interaction with species x time x site. 

View by species 
```{r}
pca1<-ggplot2::autoplot(scaled.pca, data=data_wide, loadings=FALSE,  colour="species", shape = "site", loadings.label.colour="black", loadings.colour="black", loadings.label=FALSE, frame=TRUE, loadings.label.size=5, loadings.label.vjust=-1, size=5) + 
  theme_classic()+
   theme(legend.text = element_text(size=18), 
         legend.position="right",
        plot.background = element_blank(),
        legend.title = element_text(size=18, face="bold"), 
        axis.text = element_text(size=18), 
        axis.title = element_text(size=18,  face="bold"));pca1

```

View by time point 
```{r}
pca2<-ggplot2::autoplot(scaled.pca, data=data_wide, loadings=FALSE,  colour="timepoint", shape="site", loadings.label.colour="black", loadings.colour="black", loadings.label=FALSE, frame=TRUE, loadings.label.size=5, loadings.label.vjust=-1, size=5) + 
  theme_classic()+
   theme(legend.text = element_text(size=18), 
         legend.position="right",
        plot.background = element_blank(),
        legend.title = element_text(size=18, face="bold"), 
        axis.text = element_text(size=18), 
        axis.title = element_text(size=18,  face="bold"));pca2
```

# Plot PCA of time and site effects within species   

## Acropora 

Generate wide data. 
```{r}
acr_wide<-data_im%>%
  filter(species=="Acropora")%>%
  ungroup()%>%
  pivot_wider(names_from=compound, values_from=rel.quant.ug)
```

```{r}
scaled.pca.acr<-prcomp(acr_wide%>%select(where(is.numeric)), scale=TRUE, center=TRUE) 
```

Prepare a PCA plot
```{r}
# scale data
vegan_acr <- scale(acr_wide%>%select(where(is.numeric)))

# PerMANOVA 
permanova<-adonis2(vegan_acr ~ timepoint * site, data = acr_wide, method='eu')
permanova
```

Significant effect of time, but not site, in Acropora.  

```{r}
pca3<-ggplot2::autoplot(scaled.pca.acr, data=acr_wide, loadings=FALSE,  colour="timepoint", shape="site", loadings.label.colour="black", loadings.colour="black", loadings.label=FALSE, frame=TRUE, loadings.label.size=5, loadings.label.vjust=-1, size=5) + 
  theme_classic()+
   theme(legend.text = element_text(size=18), 
         legend.position="right",
        plot.background = element_blank(),
        legend.title = element_text(size=18, face="bold"), 
        axis.text = element_text(size=18), 
        axis.title = element_text(size=18,  face="bold"));pca3

```

## Pocillopora

Generate wide data. 
```{r}
poc_wide<-data_im%>%
  filter(species=="Pocillopora")%>%
  ungroup()%>%
  pivot_wider(names_from=compound, values_from=rel.quant.ug)
```

```{r}
scaled.pca.poc<-prcomp(poc_wide%>%select(where(is.numeric)), scale=TRUE, center=TRUE) 
```

Prepare a PCA plot
```{r}
# scale data
vegan_poc <- scale(poc_wide%>%select(where(is.numeric)))

# PerMANOVA 
permanova<-adonis2(vegan_poc ~ timepoint * site, data = poc_wide, method='eu')
permanova
```

Significant effect of time, but not site in Pocillopora  

```{r}
pca4<-ggplot2::autoplot(scaled.pca.poc, data=poc_wide, loadings=FALSE,  colour="timepoint", shape="site", loadings.label.colour="black", loadings.colour="black", loadings.label=FALSE, frame=TRUE, loadings.label.size=5, loadings.label.vjust=-1, size=5) + 
  theme_classic()+
   theme(legend.text = element_text(size=18), 
         legend.position="right",
        plot.background = element_blank(),
        legend.title = element_text(size=18, face="bold"), 
        axis.text = element_text(size=18), 
        axis.title = element_text(size=18,  face="bold"));pca4

```

## Porites 

Generate wide data. 
```{r}
por_wide<-data_im%>%
  filter(species=="Porites")%>%
  ungroup()%>%
  pivot_wider(names_from=compound, values_from=rel.quant.ug)
```

```{r}
scaled.pca.por<-prcomp(por_wide%>%select(where(is.numeric)), scale=TRUE, center=TRUE) 
```

Prepare a PCA plot
```{r}
# scale data
vegan_por <- scale(por_wide%>%select(where(is.numeric)))

# PerMANOVA 
permanova<-adonis2(vegan_por ~ timepoint * site, data = por_wide, method='eu')
permanova
```

No effect of time or site in Porites. There may be some outliers.   

```{r}
pca5<-ggplot2::autoplot(scaled.pca.por, data=por_wide, loadings=FALSE,  colour="timepoint", shape="site", loadings.label.colour="black", loadings.colour="black", loadings.label=FALSE, frame=TRUE, loadings.label.size=5, loadings.label.vjust=-1, size=5) + 
  theme_classic()+
   theme(legend.text = element_text(size=18), 
         legend.position="right",
        plot.background = element_blank(),
        legend.title = element_text(size=18, face="bold"), 
        axis.text = element_text(size=18), 
        axis.title = element_text(size=18,  face="bold"));pca5

```

Because site effects are not significant within each species, we will analyze by time in subsequent analyses. 

# Plot a heatmap of metabolite concentration (scaled to z score) by species and by timepoint within species 

## Heatmap between species 
```{r}
# Convert concentration to Z-scores
heat_data <- data_im %>% 
  group_by(compound) %>% 
  mutate(z_score = scale(rel.quant.ug)) %>% 
  group_by(species, compound)%>%
  summarise(z_score = mean(z_score, na.rm=TRUE))%>%
  ungroup()

# Pivot data to have metabolites as rows and lifestage as columns
heatmap_data <- heat_data %>% 
  dplyr::select(compound, species, z_score) %>%
  spread(key = species, value = z_score)

# Convert to matrix and set rownames
heatmap_matrix <- as.matrix(column_to_rownames(heatmap_data, var = "compound"))

# Create the heatmap
heatmap <- Heatmap(
  heatmap_matrix,
  name = "Z-score",
  col = inferno(10),
  cluster_rows = TRUE,
  cluster_columns = FALSE,
  show_row_names = FALSE,
  show_column_names = TRUE,
  row_split=8,
  row_title = "Metabolite", 
  column_names_gp =  gpar(fontsize = 12, border=FALSE),
  column_names_rot = 45,
  row_gap = unit(1, "mm"), 
  border = TRUE,
  row_names_gp = gpar(fontsize = 12, alpha = 0.75, border = FALSE)
)

# Draw the heatmap
draw(heatmap)
```

There are metabolites that are characteristic of each species.  

## Heatmap within species 

### Acropora 
```{r}
# Convert concentration to Z-scores
heat_data <- data_im %>% 
  filter(species=="Acropora")%>%
  group_by(compound) %>% 
  mutate(z_score = scale(rel.quant.ug)) %>% 
  group_by(timepoint, compound)%>%
  summarise(z_score = mean(z_score, na.rm=TRUE))%>%
  ungroup()

# Pivot data to have metabolites as rows and lifestage as columns
heatmap_data <- heat_data %>% 
  dplyr::select(compound, timepoint, z_score) %>%
  spread(key = timepoint, value = z_score)

# Convert to matrix and set rownames
heatmap_matrix <- as.matrix(column_to_rownames(heatmap_data, var = "compound"))

# Create the heatmap
heatmap <- Heatmap(
  heatmap_matrix,
  name = "Z-score",
  col = inferno(10),
  cluster_rows = TRUE,
  cluster_columns = FALSE,
  show_row_names = FALSE,
  show_column_names = TRUE,
  row_split=8,
  row_title = "Metabolite", 
  column_names_gp =  gpar(fontsize = 12, border=FALSE),
  column_names_rot = 45,
  row_gap = unit(1, "mm"), 
  border = TRUE,
  row_names_gp = gpar(fontsize = 12, alpha = 0.75, border = FALSE)
)

# Draw the heatmap
draw(heatmap)
```

There are some metabolites that are higher in TP1, many in TP3, and some in TP4.   


### Pocillopora 
```{r}
# Convert concentration to Z-scores
heat_data <- data_im %>% 
  filter(species=="Pocillopora")%>%
  group_by(compound) %>% 
  mutate(z_score = scale(rel.quant.ug)) %>% 
  group_by(timepoint, compound)%>%
  summarise(z_score = mean(z_score, na.rm=TRUE))%>%
  ungroup()

# Pivot data to have metabolites as rows and lifestage as columns
heatmap_data <- heat_data %>% 
  dplyr::select(compound, timepoint, z_score) %>%
  spread(key = timepoint, value = z_score)

# Convert to matrix and set rownames
heatmap_matrix <- as.matrix(column_to_rownames(heatmap_data, var = "compound"))

# Create the heatmap
heatmap <- Heatmap(
  heatmap_matrix,
  name = "Z-score",
  col = inferno(10),
  cluster_rows = TRUE,
  cluster_columns = FALSE,
  show_row_names = FALSE,
  show_column_names = TRUE,
  row_split=8,
  row_title = "Metabolite", 
  column_names_gp =  gpar(fontsize = 12, border=FALSE),
  column_names_rot = 45,
  row_gap = unit(1, "mm"), 
  border = TRUE,
  row_names_gp = gpar(fontsize = 12, alpha = 0.75, border = FALSE)
)

# Draw the heatmap
draw(heatmap)
```

There are some metabolites that are higher in TP1, many in TP3, and some in TP4.   


### Porites 
```{r}
# Convert concentration to Z-scores
heat_data <- data_im %>% 
  filter(species=="Porites")%>%
  group_by(compound) %>% 
  mutate(z_score = scale(rel.quant.ug)) %>% 
  group_by(timepoint, compound)%>%
  summarise(z_score = mean(z_score, na.rm=TRUE))%>%
  ungroup()

# Pivot data to have metabolites as rows and lifestage as columns
heatmap_data <- heat_data %>% 
  dplyr::select(compound, timepoint, z_score) %>%
  spread(key = timepoint, value = z_score)

# Convert to matrix and set rownames
heatmap_matrix <- as.matrix(column_to_rownames(heatmap_data, var = "compound"))

# Create the heatmap
heatmap <- Heatmap(
  heatmap_matrix,
  name = "Z-score",
  col = inferno(10),
  cluster_rows = TRUE,
  cluster_columns = FALSE,
  show_row_names = FALSE,
  show_column_names = TRUE,
  row_split=8,
  row_title = "Metabolite", 
  column_names_gp =  gpar(fontsize = 12, border=FALSE),
  column_names_rot = 45,
  row_gap = unit(1, "mm"), 
  border = TRUE,
  row_names_gp = gpar(fontsize = 12, alpha = 0.75, border = FALSE)
)

# Draw the heatmap
draw(heatmap)
```

Metabolites are more stable across time in Porites with some subsets elevated at each time point.     

# Examine differential lipids with a PLSDA and VIP scores 

Generate a PLSDA supervised model to examine metabolites that 1) drive separation between species and 2) then those that differentiate time points within species.  

## PLSDA and VIPs between species 

Generate a PLS-DA and plot.  
```{r, results=FALSE}
#assigning datasets 
X_metabolites <- data_wide

levels(as.factor(X_metabolites$species))

Y_metabolites <- as.factor(X_metabolites$species) #select treatment names
Y_metabolites

X_metabolites<-X_metabolites[6:146] #pull only data columns

# run PLSDA 
MyResult.plsda <- plsda(X_metabolites,Y_metabolites) # 1 Run the method

species_cols<-c("darkgray", "orange", "purple") 
            
plotIndiv(MyResult.plsda, col=species_cols, ind.names = FALSE, legend=TRUE, legend.title = "Metabolite ~ Species", ellipse = FALSE, title="", style = "graphics", centroid=FALSE, point.lwd = 2, cex=2)
```

View the VIP lipids that are most highly differentiating metabolites between species 

```{r}
#extract
metabolite_VIP <- PLSDA.VIP(MyResult.plsda)
metabolite_VIP_df <- as.data.frame(metabolite_VIP[["tab"]])
metabolite_VIP_df

# Converting row names to column
VIP_table <- rownames_to_column(metabolite_VIP_df, var = "Metabolite")

#filter for VIP > 1
VIP_1 <- VIP_table %>% 
  filter(VIP >= 1)

#plot
VIP_list_plot<-VIP_1 %>%
            arrange(VIP) %>%
  
  ggplot( aes(x = VIP, y = reorder(Metabolite,VIP,sum))) +
  geom_point() +
  ylab("Metabolite") +
  xlab("VIP Score") +
  ggtitle("Metabolite VIP - Species") +
  theme_bw() + theme(panel.border = element_rect(linetype = "solid", color = "black"), panel.grid.major = element_blank(), #Makes background theme white
                     panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"));VIP_list_plot
```

There are a lot of TGs and some FFAs that are strong VIPs between species.  

## PLSDA and VIPs between time points within species 

Pocillopora and Acropora were the only species that showed a multivariate difference in metabolite concentration across time. 

### Pocillopora 

Generate a PLS-DA and plot.  
```{r, results=FALSE}
#assigning datasets 
X_metabolites <- data_wide%>%filter(species=="Pocillopora")

levels(as.factor(X_metabolites$timepoint))

Y_metabolites <- as.factor(X_metabolites$timepoint) #select treatment names
Y_metabolites

X_metabolites<-X_metabolites[6:146] #pull only data columns

# run PLSDA 
MyResult.plsda <- plsda(X_metabolites,Y_metabolites) # 1 Run the method

timepoint_cols<-c("darkgray", "blue", "purple") 
            
plotIndiv(MyResult.plsda, col=timepoint_cols, ind.names = FALSE, legend=TRUE, legend.title = "POC Metabolites ~ Timepoint", ellipse = FALSE, title="", style = "graphics", centroid=FALSE, point.lwd = 2, cex=2)
```

View the VIP lipids that are most highly differentiating lipids between timepoints.  

```{r}
#extract
metabolite_VIP <- PLSDA.VIP(MyResult.plsda)
metabolite_VIP_df <- as.data.frame(metabolite_VIP[["tab"]])
metabolite_VIP_df

# Converting row names to column
VIP_table <- rownames_to_column(metabolite_VIP_df, var = "Metabolite")

#filter for VIP > 1
VIP_1 <- VIP_table %>% 
  filter(VIP >= 1)

#plot
VIP_list_plot<-VIP_1 %>%
            arrange(VIP) %>%
  
  ggplot( aes(x = VIP, y = reorder(Metabolite,VIP,sum))) +
  geom_point() +
  ylab("Metabolite") +
  xlab("VIP Score") +
  ggtitle("POC Metabolite VIP - Timepoint") +
  theme_bw() + theme(panel.border = element_rect(linetype = "solid", color = "black"), panel.grid.major = element_blank(), #Makes background theme white
                     panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"));VIP_list_plot
```

Filter dataframe by VIP lipids and plot presence in each time point.  
```{r}
vip_list<-c(VIP_1$Metabolite)

vip_lipids<-data_im%>%
  filter(compound %in% vip_list)%>%
  filter(species=="Pocillopora")
```

Plot abundance of these metabolites in each sample. 

```{r}
vip_abund_plot<-vip_lipids%>%
  
  ggplot(aes(x=timepoint, y=rel.quant.ug, color=timepoint, group=timepoint))+
  facet_wrap(~compound, scales="free")+
  geom_boxplot(aes(group=interaction(compound, timepoint)),position=position_dodge(0.9))+
  geom_point(position=position_jitterdodge(0.2))+
  scale_color_manual(values=c("darkgray", "blue", "purple"), name="Timepoint")+
  ylab("abundance")+
  xlab("Timepoint")+
  ggtitle("VIP Abundance - POC")+
  theme_classic()+
  theme(
    axis.text=element_text(color="black", size=12), 
    axis.text.x=element_text(angle=45, hjust=1), 
    axis.title=element_text(color="black", face="bold", size=12),
    plot.margin=margin(1,1,1,1, "cm"),
    legend.text=element_text(size=12),
    legend.title=element_text(size=12, face="bold")
  );vip_abund_plot
```

### Acropora 

Generate a PLS-DA and plot.  
```{r, results=FALSE}
#assigning datasets 
X_metabolites <- data_wide%>%filter(species=="Acropora")

levels(as.factor(X_metabolites$timepoint))

Y_metabolites <- as.factor(X_metabolites$timepoint) #select treatment names
Y_metabolites

X_metabolites<-X_metabolites[6:146] #pull only data columns

# run PLSDA 
MyResult.plsda <- plsda(X_metabolites,Y_metabolites) # 1 Run the method

timepoint_cols<-c("darkgray", "blue", "purple") 
            
plotIndiv(MyResult.plsda, col=timepoint_cols, ind.names = FALSE, legend=TRUE, legend.title = "ACR Metabolites ~ Timepoint", ellipse = FALSE, title="", style = "graphics", centroid=FALSE, point.lwd = 2, cex=2)
```

View the VIP lipids that are most highly differentiating lipids between timepoints.  

```{r}
#extract
metabolite_VIP <- PLSDA.VIP(MyResult.plsda)
metabolite_VIP_df <- as.data.frame(metabolite_VIP[["tab"]])
metabolite_VIP_df

# Converting row names to column
VIP_table <- rownames_to_column(metabolite_VIP_df, var = "Metabolite")

#filter for VIP > 1
VIP_1 <- VIP_table %>% 
  filter(VIP >= 1)

#plot
VIP_list_plot<-VIP_1 %>%
            arrange(VIP) %>%
  
  ggplot( aes(x = VIP, y = reorder(Metabolite,VIP,sum))) +
  geom_point() +
  ylab("Metabolite") +
  xlab("VIP Score") +
  ggtitle("ACR Metabolite VIP - Timepoint") +
  theme_bw() + theme(panel.border = element_rect(linetype = "solid", color = "black"), panel.grid.major = element_blank(), #Makes background theme white
                     panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"));VIP_list_plot
```

Filter dataframe by VIP lipids and plot presence in each time point.  
```{r}
vip_list<-c(VIP_1$Metabolite)

vip_lipids<-data_im%>%
  filter(compound %in% vip_list)%>%
  filter(species=="Acropora")
```

Plot abundance of these metabolites in each sample. 

```{r}
vip_abund_plot<-vip_lipids%>%
  
  ggplot(aes(x=timepoint, y=rel.quant.ug, color=timepoint, group=timepoint))+
  facet_wrap(~compound, scales="free")+
  geom_boxplot(aes(group=interaction(compound, timepoint)),position=position_dodge(0.9))+
  geom_point(position=position_jitterdodge(0.2))+
  scale_color_manual(values=c("darkgray", "blue", "purple"), name="Timepoint")+
  ylab("abundance")+
  xlab("Timepoint")+
  ggtitle("VIP Abundance - ACR")+
  theme_classic()+
  theme(
    axis.text=element_text(color="black", size=12), 
    axis.text.x=element_text(angle=45, hjust=1), 
    axis.title=element_text(color="black", face="bold", size=12),
    plot.margin=margin(1,1,1,1, "cm"),
    legend.text=element_text(size=12),
    legend.title=element_text(size=12, face="bold")
  );vip_abund_plot
```


### Porites 

Generate a PLS-DA and plot.  
```{r, results=FALSE}
#assigning datasets 
X_metabolites <- data_wide%>%filter(species=="Porites")

levels(as.factor(X_metabolites$timepoint))

Y_metabolites <- as.factor(X_metabolites$timepoint) #select treatment names
Y_metabolites

X_metabolites<-X_metabolites[6:146] #pull only data columns

# run PLSDA 
MyResult.plsda <- plsda(X_metabolites,Y_metabolites) # 1 Run the method

timepoint_cols<-c("darkgray", "orange", "blue", "purple") 
            
plotIndiv(MyResult.plsda, col=timepoint_cols, ind.names = FALSE, legend=TRUE, legend.title = "POR Metabolites ~ Timepoint", ellipse = FALSE, title="", style = "graphics", centroid=FALSE, point.lwd = 2, cex=2)
```

View the VIP lipids that are most highly differentiating lipids between timepoints.  

```{r}
#extract
metabolite_VIP <- PLSDA.VIP(MyResult.plsda)
metabolite_VIP_df <- as.data.frame(metabolite_VIP[["tab"]])
metabolite_VIP_df

# Converting row names to column
VIP_table <- rownames_to_column(metabolite_VIP_df, var = "Metabolite")

#filter for VIP > 1
VIP_1 <- VIP_table %>% 
  filter(VIP >= 1)

#plot
VIP_list_plot<-VIP_1 %>%
            arrange(VIP) %>%
  
  ggplot( aes(x = VIP, y = reorder(Metabolite,VIP,sum))) +
  geom_point() +
  ylab("Metabolite") +
  xlab("VIP Score") +
  ggtitle("POR Metabolite VIP - Timepoint") +
  theme_bw() + theme(panel.border = element_rect(linetype = "solid", color = "black"), panel.grid.major = element_blank(), #Makes background theme white
                     panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"));VIP_list_plot
```

Filter dataframe by VIP lipids and plot presence in each time point.  
```{r}
vip_list<-c(VIP_1$Metabolite)

vip_lipids<-data_im%>%
  filter(compound %in% vip_list)%>%
  filter(species=="Porites")
```

Plot abundance of these metabolites in each sample. 

```{r}
vip_abund_plot<-vip_lipids%>%
  
  ggplot(aes(x=timepoint, y=rel.quant.ug, color=timepoint, group=timepoint))+
  facet_wrap(~compound, scales="free")+
  geom_boxplot(aes(group=interaction(compound, timepoint)),position=position_dodge(0.9))+
  geom_point(position=position_jitterdodge(0.2))+
  scale_color_manual(values=c("darkgray", "orange", "blue", "purple"), name="Timepoint")+
  ylab("abundance")+
  xlab("Timepoint")+
  ggtitle("VIP Abundance - POR")+
  theme_classic()+
  theme(
    axis.text=element_text(color="black", size=12), 
    axis.text.x=element_text(angle=45, hjust=1), 
    axis.title=element_text(color="black", face="bold", size=12),
    plot.margin=margin(1,1,1,1, "cm"),
    legend.text=element_text(size=12),
    legend.title=element_text(size=12, face="bold")
  );vip_abund_plot
```

# Metabolite signatures of energetic state (preliminary)

Calculate the following indicators from literature search:  

- Glucose (translocation, glycolysis)
- G1P/F1P/F6P (glycolysis)
- G6P (glycolysis)
- Glutamine (nitrogen metabolism)
- Glutamic acid (nitrogen metabolism)
- Glycerol-3-P (central metabolism)
- Inositol (signaling)
- Octanoyl-L-Carnitine (8:0 Carnitine) (fatty acid metabolism)
- S-Adenosylmethionine (SAM) (methylation methyl donor)

## Glucose 

```{r}
glucose<-data_im %>%
  filter(compound=="Glucose")
```

Plot over time 

```{r}
glucose_plot1<-glucose%>%
  group_by(colony, species, timepoint, compound)%>%
  summarise(mean=mean(rel.quant.ug, na.rm=TRUE))%>%
  
  ggplot(aes(x = timepoint, y = mean, colour = species, fill=species)) +
  geom_point(alpha=0.2) +
  geom_line(aes(group=colony), alpha=0.2)+
  geom_smooth(aes(group=species), se=FALSE)+
  labs(x = "Time Point", y = "Relative Concentration (ug protein)", title = "Glucose") +
  theme_classic() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1), 
        legend.position="right");glucose_plot1
```

Run a model
```{r}
model<-glucose%>%
  
  lmer(rel.quant.ug ~ species * timepoint + (1|colony), data=.)

anova(model)

emm<-emmeans(model, ~timepoint|species)
pairs(emm)
```

Significant species x time.  

Slightly significant increase in POR at TP2. Stable in Pocillopora. Higher at TP3 in Acropora. 


## G1P/F1P/F6P 

```{r}
gly<-data_im %>%
  filter(compound=="G1P/F1P/F6P")
```

Plot over time 

```{r}
gly_plot1<-gly%>%
  group_by(colony, species, timepoint, compound)%>%
  summarise(mean=mean(rel.quant.ug, na.rm=TRUE))%>%
  
  ggplot(aes(x = timepoint, y = mean, colour = species, fill=species)) +
  geom_point(alpha=0.2) +
  geom_line(aes(group=colony), alpha=0.2)+
  geom_smooth(aes(group=species), se=FALSE)+
  labs(x = "Time Point", y = "Relative Concentration (ug protein)", title = "G1P/F1P/F6P") +
  theme_classic() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1), 
        legend.position="right");gly_plot1
```

Run a model
```{r}
model<-gly%>%
  
  lmer(rel.quant.ug ~ species * timepoint + (1|colony), data=.)

anova(model)

emm<-emmeans(model, ~timepoint|species)
pairs(emm)
```

Significant species x time.  

Higher at TP1 and TP3 in Acropora with TP4 as the lowest. Lower at TP1 in Pocillopora. Stable in Porites. 


## G6P 

```{r}
gly2<-data_im %>%
  filter(compound=="G6P")
```

Plot over time 

```{r}
gly2_plot1<-gly2%>%
  group_by(colony, species, timepoint, compound)%>%
  summarise(mean=mean(rel.quant.ug, na.rm=TRUE))%>%
  
  ggplot(aes(x = timepoint, y = mean, colour = species, fill=species)) +
  geom_point(alpha=0.2) +
  geom_line(aes(group=colony), alpha=0.2)+
  geom_smooth(aes(group=species), se=FALSE)+
  labs(x = "Time Point", y = "Relative Concentration (ug protein)", title = "G6P") +
  theme_classic() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1), 
        legend.position="right");gly2_plot1
```

Run a model
```{r}
model<-gly2%>%
  
  lmer(rel.quant.ug ~ species * timepoint + (1|colony), data=.)

anova(model)

emm<-emmeans(model, ~timepoint|species)
pairs(emm)
```

No changes. 


## Glutamine 

```{r}
glutamine<-data_im %>%
  filter(compound=="Glutamine")
```

Plot over time 

```{r}
glutamine_plot1<-glutamine%>%
  group_by(colony, species, timepoint, compound)%>%
  summarise(mean=mean(rel.quant.ug, na.rm=TRUE))%>%
  
  ggplot(aes(x = timepoint, y = mean, colour = species, fill=species)) +
  geom_point(alpha=0.2) +
  geom_line(aes(group=colony), alpha=0.2)+
  geom_smooth(aes(group=species), se=FALSE)+
  labs(x = "Time Point", y = "Relative Concentration (ug protein)", title = "Glutamine") +
  theme_classic() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1), 
        legend.position="right");glutamine_plot1
```

Run a model
```{r}
model<-glutamine%>%
  
  lmer(rel.quant.ug ~ species * timepoint + (1|colony), data=.)

anova(model)

emm<-emmeans(model, ~timepoint|species)
pairs(emm)
```

Significant species x time interaction. Higher at TP4 in Porites. Stable in Acropora and Pocillopora.  

## Glutamic acid/Glutamate 

```{r}
glutamate<-data_im %>%
  filter(compound=="Glutamic acid")
```

Plot over time 

```{r}
glutamate_plot1<-glutamate%>%
  group_by(colony, species, timepoint, compound)%>%
  summarise(mean=mean(rel.quant.ug, na.rm=TRUE))%>%
  
  ggplot(aes(x = timepoint, y = mean, colour = species, fill=species)) +
  geom_point(alpha=0.2) +
  geom_line(aes(group=colony), alpha=0.2)+
  geom_smooth(aes(group=species), se=FALSE)+
  labs(x = "Time Point", y = "Relative Concentration (ug protein)", title = "Glutamate/Glutamic Acid") +
  theme_classic() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1), 
        legend.position="right");glutamate_plot1
```

Run a model
```{r}
model<-glutamate%>%
  
  lmer(rel.quant.ug ~ species * timepoint + (1|colony), data=.)

anova(model)

emm<-emmeans(model, ~timepoint|species)
pairs(emm)
```

Significant difference between species, no effect of time. Lower in Porites.  

## Glycerol-3-P 

```{r}
glycerol<-data_im %>%
  filter(compound=="Glycerol-3-P")
```

Plot over time 

```{r}
glycerol_plot1<-glycerol%>%
  group_by(colony, species, timepoint, compound)%>%
  summarise(mean=mean(rel.quant.ug, na.rm=TRUE))%>%
  
  ggplot(aes(x = timepoint, y = mean, colour = species, fill=species)) +
  geom_point(alpha=0.2) +
  geom_line(aes(group=colony), alpha=0.2)+
  geom_smooth(aes(group=species), se=FALSE)+
  labs(x = "Time Point", y = "Relative Concentration (ug protein)", title = "Glycerol-3-P") +
  theme_classic() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1), 
        legend.position="right");glycerol_plot1
```

Run a model
```{r}
model<-glycerol%>%
  
  lmer(rel.quant.ug ~ species * timepoint + (1|colony), data=.)

anova(model)

emm<-emmeans(model, ~timepoint|species)
pairs(emm)
```

Significant difference between species, no effect of time. Higher in Pocillopora.  


## Inositol 

```{r}
inositol<-data_im %>%
  filter(compound=="Inositol")
```

Plot over time 

```{r}
inositol_plot1<-inositol%>%
  group_by(colony, species, timepoint, compound)%>%
  summarise(mean=mean(rel.quant.ug, na.rm=TRUE))%>%
  
  ggplot(aes(x = timepoint, y = mean, colour = species, fill=species)) +
  geom_point(alpha=0.2) +
  geom_line(aes(group=colony), alpha=0.2)+
  geom_smooth(aes(group=species), se=FALSE)+
  labs(x = "Time Point", y = "Relative Concentration (ug protein)", title = "Inositol") +
  theme_classic() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1), 
        legend.position="right");inositol_plot1
```

Run a model
```{r}
model<-inositol%>%
  
  lmer(rel.quant.ug ~ species * timepoint + (1|colony), data=.)

anova(model)

emm<-emmeans(model, ~timepoint|species)
pairs(emm)
```

Significant difference between species, no effect of time. Higher in Porites.   

## Octanoyl-L-Carnitine (8:0 Carnitine) 

```{r}
carnitine<-data_im %>%
  filter(compound=="Octanoyl-L-Carnitine (8:0 Carnitine)")
```

Plot over time 

```{r}
carnitine_plot1<-carnitine%>%
  group_by(colony, species, timepoint, compound)%>%
  summarise(mean=mean(rel.quant.ug, na.rm=TRUE))%>%
  
  ggplot(aes(x = timepoint, y = mean, colour = species, fill=species)) +
  geom_point(alpha=0.2) +
  geom_line(aes(group=colony), alpha=0.2)+
  geom_smooth(aes(group=species), se=FALSE)+
  labs(x = "Time Point", y = "Relative Concentration (ug protein)", title = "Octanoyl-L-Carnitine (8:0 Carnitine)") +
  theme_classic() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1), 
        legend.position="right");carnitine_plot1
```

Run a model
```{r}
model<-carnitine%>%
  
  lmer(rel.quant.ug ~ species * timepoint + (1|colony), data=.)

anova(model)

emm<-emmeans(model, ~timepoint|species)
pairs(emm)
```

Significant difference between species, no effect of time. Lowest in Pocillopora.     


## S-Adenosylmethionine (SAM)

```{r}
sam<-data_im %>%
  filter(compound=="S-Adenosylmethionine (SAM)")
```

Plot over time 

```{r}
sam_plot1<-sam%>%
  group_by(colony, species, timepoint, compound)%>%
  summarise(mean=mean(rel.quant.ug, na.rm=TRUE))%>%
  
  ggplot(aes(x = timepoint, y = mean, colour = species, fill=species)) +
  geom_point(alpha=0.2) +
  geom_line(aes(group=colony), alpha=0.2)+
  geom_smooth(aes(group=species), se=FALSE)+
  labs(x = "Time Point", y = "Relative Concentration (ug protein)", title = "S-Adenosylmethionine (SAM)") +
  theme_classic() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1), 
        legend.position="right");sam_plot1
```

Run a model
```{r}
model<-sam%>%
  
  lmer(rel.quant.ug ~ species * timepoint + (1|colony), data=.)

anova(model)

emm<-emmeans(model, ~timepoint|species)
pairs(emm)
```

Significant species x time interaction. Stable in Porites and Pocillopora. Higher at TP1 in Acropora decreasing over time.