---
title: "Nickel Impacts on Morphology of *Botryllus schlosseri*"
subtitle: "Analysis of pilot study."
author: "Celeste Valdivia"
date: "2023-02-213"
---

```{r, eval =TRUE}
library(knitr)
library(tidyverse)
library(tidyr)
library(dplyr)
library(hrbrthemes)
library(ggplot2)
library(RColorBrewer)
library(ggpubr)
knitr::opts_chunk$set(echo = TRUE,
                      eval = FALSE)
```

# Objective and Background

Data below was collected from a pilot study conducted in Autumn 2023 where field-collected individual *Botryllus schlosseri* were exposed to a low and high treatment of nickel chloride and then were assessed for changes in morphology. Refer to [Pilot 1.1 Nickel Exposure](https://valeste.github.io/tough-tunicates/posts/nicl2-botryllus-pilot/) notebook post for experimental design details.

# Retrieving Data from Google Sheets


Make sure you have made your Google sheet publicly available to anyone that has the link. If you make any updates to the sheet just re-curl the data, meaning just re-run the code below.

```{r, engine='bash', eval=FALSE}
cd ..

curl -L https://docs.google.com/spreadsheets/d/10uM3N3PD9xIP4yUnadfhkcXa8TPMRMD-adOIKbmkYzY/export?exportFormat=csv | tee data/morph.csv
```
Read in the data to your local R environment.

```{r, eval=TRUE}
setwd('..')
morph <- read.csv(file = "data/morph.csv")  
```

# Cleaning up Data

```{r, eval=TRUE}
morph$date <- mdy(morph$date) #convert the date column from characters to true date
```

```{r, eval=TRUE}
morph <- morph %>%
  separate(jar_id, c("treatment_mg_per_L", "replicate"), sep = "-") #create two new columns, treatment and replicate from jar id 
```

```{r, eval=TRUE}
morph_fact <- morph %>%
  mutate(treatment_mg_per_L = as.factor(treatment_mg_per_L)) %>%
  mutate(stage = as.factor(stage)) %>%
  mutate(animal_id = as.factor(animal_id)) %>%
  mutate(date = as.factor(date)) %>%
  mutate(treatment_order = factor(paste(treatment_mg_per_L, animal_id))) # Create a new variable for ordering by treatment
```

```{r}
# Create a new column 'simple_stage' based on conditions
morph_fact <- morph_fact %>%
  mutate(simple_stage = case_when(
    stage %in% c("A1", "A2") ~ "A",
    stage %in% c("B1", "B2") ~ "B",
    stage %in% c("C1", "C2") ~ "C",
    stage == "TO" ~ "D",
    TRUE ~ NA_character_  # This will handle any other cases or return NA if none match
  ))
```

New data frame with only the rows of infor after 24 hours of exposure.
```{r}
stage_24 <- morph_fact[morph_fact$hpe == 24, ]
```


Count data on blastogenic stage per treatment
```{r}
stage_summary <- stage_24 %>%
  group_by(treatment_mg_per_L, stage_2) %>%
  summarise(count = n())  # n() calculates the count of each group

print(stage_summary)
```

write table for blastogenic stage count.

```{r}
setwd('..')
write.csv(stage_summary, file ="output/blast_count.csv")
```

# Identify Distribution of Blastogenic Stage per Treatment at 24 hpe


```{r}
# Convert treatment_mg_per_L to factor with specific order
stage_summary$treatment_mg_per_L <- factor(stage_summary$treatment_mg_per_L, levels = c(0, 5, 45))

setwd('..')
png(filename = "output/blastogenic_dist_2.png", width = 700, height = 600)

ggplot(stage_summary, aes(x = treatment_mg_per_L, y = count, fill = stage_2)) +
  geom_bar(stat = "identity", position = "dodge", width = 0.9) +
  scale_fill_manual(name = "Stage",
                    values = c("lightblue",  "blue", "darkblue"),  # Specify the colors
                    labels = c("early", "mid", "late")) +
  labs(x = "Treatment (Nickel Chloride mg/L)",
       y = "Count") +
  theme_minimal() +
  scale_y_continuous(limits = c(0, 8)) +
  theme(panel.grid.major.x = element_blank(),  # Remove major vertical gridlines
        panel.grid.minor.x = element_blank(),
        panel.grid.major.y = element_line(color = "darkgrey", size = 0.05),
        panel.grid.minor.y = element_blank(), 
        axis.text = element_text(size = 30),
        axis.title = element_text(size = 30),
        legend.text = element_text(size = 30),
        legend.title = element_text(size = 30))

dev.off()

```


```{r}
setwd('..')
png(filename = "output/blastogenic_dist.png", width = 700, height = 600)


ggplot(stage_summary, aes(x = treatment_mg_per_L, y = count, fill = simple_stage)) +
  geom_bar(stat = "identity", position = "dodge", width = 0.9) +
   scale_fill_manual(name = "Stage",
                     values = c("#FFD92F", "#E78AC3",  "#8DA0CB", "#A6D854"),  # Specify the colors
                     labels = c("A", "B", "C", "TO")) +
    labs(x = "Treatment (Nickel Chloride mg/L)",
       y = "Count") +
  theme_minimal() +
  scale_y_continuous(limits = c(0, 8)) +
  theme(panel.grid.major.x = element_blank(),  # Remove major vertical gridlines
        panel.grid.minor.x = element_blank(),
        panel.grid.major.y = element_line(color = "darkgrey", size = 0.05),
        panel.grid.minor.y = element_blank(), 
        axis.text = element_text(size = 15),
        axis.title = element_text(size = 15),
        legend.text = element_text(size = 15),
        legend.title = element_text(size = 15))
dev.off()
```

# Exploring Health Score Decline After Nickel Exposure
```{r, eval=TRUE}
# Calculate the mean health score binned by hours post-exposure and by treatment
mean_health_score <- morph_fact %>%
  group_by(hpe, treatment_mg_per_L) %>%
  summarise(mean_health = round(mean(health, na.rm = TRUE)))

# Calculate SD
SD_health_score <- morph_fact %>%
  group_by(hpe, treatment_mg_per_L) %>%
  summarise(SD_health = round(sd(health, na.rm = TRUE)))

n <- 14

SE_health_score <- SD_health_score$SD_health/ sqrt(n)
# Print the calculated mean health scores
print(mean_health_score)
print(SE_health_score)
```

# Exploratory Plots

```{r}
# To visualize the mean health scores using ggplot2
ggplot(mean_health_score, aes(x = hpe, y = mean_health, color = treatment_mg_per_L)) +
  geom_line() +
  geom_point() +
  labs(title = "Mean Health Score by Days Post-Exposure and Treatment",
       x = "Hours Post-Exposure",
       y = "Mean Health Score",
       color = "Concentration of Nickel Chloride (mg/L)") +
  theme_minimal()
```

Just the plot alone with no added significance bars and not adjusted for printing.

```{r}
# Create the clustered bar graph
ggplot(mean_health_score, aes(x = factor(hpe), y = mean_health, fill = as.factor(treatment_mg_per_L))) +
  geom_bar(stat = "identity", position = "dodge", width = 0.8) +
scale_fill_brewer(palette = "Green", name = "Nickel Chloride (mg/L)", 
                    labels = c("0" = "0", "05" = "5", "45" = "45")) +
  labs(title = "Health Score by Hours Post-Exposure and Treatment",
       x = "Hours Post-Exposure",
       y = "Health Score") +
  theme_minimal() +
  scale_y_continuous(breaks = c(0, 2, 4, 6, 8, 10), limits = c(0, 10))

```

# Statistically evaluate differences in health score

```{r}
## Convert Treatment and HoursPostExposure to factors if they are not already factors
morph_fact$treatment_mg_per_L <- as.factor(morph_fact$treatment_mg_per_L)
morph_fact$hpe <- as.factor(morph_fact$hpe)


## Perform one-way ANOVA
anova_result <- aov(health ~ treatment_mg_per_L + hpe + treatment_mg_per_L:hpe , data = morph_fact)

# Summarize the ANOVA results
summary(anova_result)


## Meeting the ANOVA assumptions
leveneTest(health ~ treatment_mg_per_L*hpe, data = morph_fact)
#p-value > 0.05, therefore equal-variances are met

# Check normality of residuals using Shapiro-Wilk test
shapiro_test <- shapiro.test(residuals(anova_result))
print(shapiro_test)
#meets assumption of normality, p value > 0.05

## Perform Tukey's HSD test for significant interaction

tukey_result <- TukeyHSD(x= anova_result, ordered = TRUE, conf.level = 0.95)
print(tukey_result)
```

Printing out a png image of graph. Make sure to adjust pixel dimensions for the ideal DPI and desired printing size.

# Expository Graphs 

```{r, eval=FALSE}
#png(filename = "output/bar_plot.png", width = 800, height = 800) # good for making a 3x3 in image for printing ~150 dpi

# Add significance bars based on specific comparisons
ggplot(mean_health_score, aes(x = factor(hpe), y = mean_health, fill = as.factor(treatment_mg_per_L))) +
  geom_bar(stat = "identity", position = "dodge", width = 0.8) +
scale_fill_brewer(palette = "Dark2", name = "Treatment", 
                    labels = c("0" = "Control", "05" = "5 mg/L", "45" = "45 mg/L")) +
  labs(
       x = "Hours Post-Exposure",
       y = "Mean Health Score", ) +
  theme_minimal() +
  scale_y_continuous(breaks = c(0, 5, 10), limits = c(0, 10)) +
  geom_errorbar(aes(ymin=mean_health_score$mean_health-SE_health_score, ymax=mean_health_score$mean_health+SE_health_score), width=.2,
                 position=position_dodge(0.81)) +
  theme(panel.grid.major.x = element_blank(),  # Remove major vertical gridlines
        panel.grid.minor.x = element_blank(),
        panel.grid.major.y = element_line(color = "darkgrey", size = 1),
        panel.grid.minor.y = element_line(color = "darkgrey", size = 0.5), 
        axis.text=element_text(size=30),
        axis.title=element_text(size=40),
        legend.text = element_text(size = 40),
        legend.title = element_text(size = 40)) + # Adjust the font size of the legend text 
  geom_text(aes(x = 1.98, y = 9.25, label = "**"), size = 14, vjust = -0.5) +  # Asterisk for significant difference at 24 hours for Control vs. 45 mg/L
    geom_segment(aes(x = 1.65, xend = 2.27, y = 9.5, yend = 9.5), linetype = "dashed", size = 1 ) +  # Line below the asterisk for Control vs. 45 mg/L
  geom_segment(aes(x = 2.27, xend = 2.27, y = 6.7, yend = 9.5), linetype = "dashed", size = 1 ) +
  geom_segment(aes(x = 1.65, xend = 1.65, y = 8.4, yend = 9.5), linetype = "dashed", size = 1 ) +
  geom_text(aes(x = 1.9, y = 8.25, label = "**"), size = 14, vjust = -0.5) +   #asterisk for significant difference at 24 hours for control vs 5 mg/L 
  geom_segment(aes(x = 1.8, xend = 2, y = 8.5, yend = 8.5), linetype = "dashed", size = 1 ) +
  geom_segment(aes(x = 1.8, xend = 1.8, y = 8.4, yend = 8.5), linetype = "dashed", size = 1 ) +
  geom_segment(aes(x = 2, xend = 2, y = 6.5, yend = 8.5), linetype = "dashed", size = 1 )
```