--- title: "Nickel Impacts on Morphology of *Botryllus schlosseri*" subtitle: "Analysis of pilot study." author: "Celeste Valdivia" date: "2023-02-213" --- ```{r, eval =TRUE} library(knitr) library(tidyverse) library(tidyr) library(dplyr) library(hrbrthemes) library(ggplot2) library(RColorBrewer) library(ggpubr) knitr::opts_chunk$set(echo = TRUE, eval = FALSE) ``` # Objective and Background Data below was collected from a pilot study conducted in Autumn 2023 where field-collected individual *Botryllus schlosseri* were exposed to a low and high treatment of nickel chloride and then were assessed for changes in morphology. Refer to [Pilot 1.1 Nickel Exposure](https://valeste.github.io/tough-tunicates/posts/nicl2-botryllus-pilot/) notebook post for experimental design details. # Retrieving Data from Google Sheets Make sure you have made your Google sheet publicly available to anyone that has the link. If you make any updates to the sheet just re-curl the data, meaning just re-run the code below. ```{r, engine='bash', eval=FALSE} cd .. curl -L https://docs.google.com/spreadsheets/d/10uM3N3PD9xIP4yUnadfhkcXa8TPMRMD-adOIKbmkYzY/export?exportFormat=csv | tee data/morph.csv ``` Read in the data to your local R environment. ```{r, eval=TRUE} setwd('..') morph <- read.csv(file = "data/morph.csv") ``` # Cleaning up Data ```{r, eval=TRUE} morph$date <- mdy(morph$date) #convert the date column from characters to true date ``` ```{r, eval=TRUE} morph <- morph %>% separate(jar_id, c("treatment_mg_per_L", "replicate"), sep = "-") #create two new columns, treatment and replicate from jar id ``` ```{r, eval=TRUE} morph_fact <- morph %>% mutate(treatment_mg_per_L = as.factor(treatment_mg_per_L)) %>% mutate(stage = as.factor(stage)) %>% mutate(animal_id = as.factor(animal_id)) %>% mutate(date = as.factor(date)) %>% mutate(treatment_order = factor(paste(treatment_mg_per_L, animal_id))) # Create a new variable for ordering by treatment ``` ```{r} # Create a new column 'simple_stage' based on conditions morph_fact <- morph_fact %>% mutate(simple_stage = case_when( stage %in% c("A1", "A2") ~ "A", stage %in% c("B1", "B2") ~ "B", stage %in% c("C1", "C2") ~ "C", stage == "TO" ~ "D", TRUE ~ NA_character_ # This will handle any other cases or return NA if none match )) ``` New data frame with only the rows of infor after 24 hours of exposure. ```{r} stage_24 <- morph_fact[morph_fact$hpe == 24, ] ``` Count data on blastogenic stage per treatment ```{r} stage_summary <- stage_24 %>% group_by(treatment_mg_per_L, stage_2) %>% summarise(count = n()) # n() calculates the count of each group print(stage_summary) ``` write table for blastogenic stage count. ```{r} setwd('..') write.csv(stage_summary, file ="output/blast_count.csv") ``` # Identify Distribution of Blastogenic Stage per Treatment at 24 hpe ```{r} # Convert treatment_mg_per_L to factor with specific order stage_summary$treatment_mg_per_L <- factor(stage_summary$treatment_mg_per_L, levels = c(0, 5, 45)) setwd('..') png(filename = "output/blastogenic_dist_2.png", width = 700, height = 600) ggplot(stage_summary, aes(x = treatment_mg_per_L, y = count, fill = stage_2)) + geom_bar(stat = "identity", position = "dodge", width = 0.9) + scale_fill_manual(name = "Stage", values = c("lightblue", "blue", "darkblue"), # Specify the colors labels = c("early", "mid", "late")) + labs(x = "Treatment (Nickel Chloride mg/L)", y = "Count") + theme_minimal() + scale_y_continuous(limits = c(0, 8)) + theme(panel.grid.major.x = element_blank(), # Remove major vertical gridlines panel.grid.minor.x = element_blank(), panel.grid.major.y = element_line(color = "darkgrey", size = 0.05), panel.grid.minor.y = element_blank(), axis.text = element_text(size = 30), axis.title = element_text(size = 30), legend.text = element_text(size = 30), legend.title = element_text(size = 30)) dev.off() ``` ```{r} setwd('..') png(filename = "output/blastogenic_dist.png", width = 700, height = 600) ggplot(stage_summary, aes(x = treatment_mg_per_L, y = count, fill = simple_stage)) + geom_bar(stat = "identity", position = "dodge", width = 0.9) + scale_fill_manual(name = "Stage", values = c("#FFD92F", "#E78AC3", "#8DA0CB", "#A6D854"), # Specify the colors labels = c("A", "B", "C", "TO")) + labs(x = "Treatment (Nickel Chloride mg/L)", y = "Count") + theme_minimal() + scale_y_continuous(limits = c(0, 8)) + theme(panel.grid.major.x = element_blank(), # Remove major vertical gridlines panel.grid.minor.x = element_blank(), panel.grid.major.y = element_line(color = "darkgrey", size = 0.05), panel.grid.minor.y = element_blank(), axis.text = element_text(size = 15), axis.title = element_text(size = 15), legend.text = element_text(size = 15), legend.title = element_text(size = 15)) dev.off() ``` # Exploring Health Score Decline After Nickel Exposure ```{r, eval=TRUE} # Calculate the mean health score binned by hours post-exposure and by treatment mean_health_score <- morph_fact %>% group_by(hpe, treatment_mg_per_L) %>% summarise(mean_health = round(mean(health, na.rm = TRUE))) # Calculate SD SD_health_score <- morph_fact %>% group_by(hpe, treatment_mg_per_L) %>% summarise(SD_health = round(sd(health, na.rm = TRUE))) n <- 14 SE_health_score <- SD_health_score$SD_health/ sqrt(n) # Print the calculated mean health scores print(mean_health_score) print(SE_health_score) ``` # Exploratory Plots ```{r} # To visualize the mean health scores using ggplot2 ggplot(mean_health_score, aes(x = hpe, y = mean_health, color = treatment_mg_per_L)) + geom_line() + geom_point() + labs(title = "Mean Health Score by Days Post-Exposure and Treatment", x = "Hours Post-Exposure", y = "Mean Health Score", color = "Concentration of Nickel Chloride (mg/L)") + theme_minimal() ``` Just the plot alone with no added significance bars and not adjusted for printing. ```{r} # Create the clustered bar graph ggplot(mean_health_score, aes(x = factor(hpe), y = mean_health, fill = as.factor(treatment_mg_per_L))) + geom_bar(stat = "identity", position = "dodge", width = 0.8) + scale_fill_brewer(palette = "Green", name = "Nickel Chloride (mg/L)", labels = c("0" = "0", "05" = "5", "45" = "45")) + labs(title = "Health Score by Hours Post-Exposure and Treatment", x = "Hours Post-Exposure", y = "Health Score") + theme_minimal() + scale_y_continuous(breaks = c(0, 2, 4, 6, 8, 10), limits = c(0, 10)) ``` # Statistically evaluate differences in health score ```{r} ## Convert Treatment and HoursPostExposure to factors if they are not already factors morph_fact$treatment_mg_per_L <- as.factor(morph_fact$treatment_mg_per_L) morph_fact$hpe <- as.factor(morph_fact$hpe) ## Perform one-way ANOVA anova_result <- aov(health ~ treatment_mg_per_L + hpe + treatment_mg_per_L:hpe , data = morph_fact) # Summarize the ANOVA results summary(anova_result) ## Meeting the ANOVA assumptions leveneTest(health ~ treatment_mg_per_L*hpe, data = morph_fact) #p-value > 0.05, therefore equal-variances are met # Check normality of residuals using Shapiro-Wilk test shapiro_test <- shapiro.test(residuals(anova_result)) print(shapiro_test) #meets assumption of normality, p value > 0.05 ## Perform Tukey's HSD test for significant interaction tukey_result <- TukeyHSD(x= anova_result, ordered = TRUE, conf.level = 0.95) print(tukey_result) ``` Printing out a png image of graph. Make sure to adjust pixel dimensions for the ideal DPI and desired printing size. # Expository Graphs ```{r, eval=FALSE} #png(filename = "output/bar_plot.png", width = 800, height = 800) # good for making a 3x3 in image for printing ~150 dpi # Add significance bars based on specific comparisons ggplot(mean_health_score, aes(x = factor(hpe), y = mean_health, fill = as.factor(treatment_mg_per_L))) + geom_bar(stat = "identity", position = "dodge", width = 0.8) + scale_fill_brewer(palette = "Dark2", name = "Treatment", labels = c("0" = "Control", "05" = "5 mg/L", "45" = "45 mg/L")) + labs( x = "Hours Post-Exposure", y = "Mean Health Score", ) + theme_minimal() + scale_y_continuous(breaks = c(0, 5, 10), limits = c(0, 10)) + geom_errorbar(aes(ymin=mean_health_score$mean_health-SE_health_score, ymax=mean_health_score$mean_health+SE_health_score), width=.2, position=position_dodge(0.81)) + theme(panel.grid.major.x = element_blank(), # Remove major vertical gridlines panel.grid.minor.x = element_blank(), panel.grid.major.y = element_line(color = "darkgrey", size = 1), panel.grid.minor.y = element_line(color = "darkgrey", size = 0.5), axis.text=element_text(size=30), axis.title=element_text(size=40), legend.text = element_text(size = 40), legend.title = element_text(size = 40)) + # Adjust the font size of the legend text geom_text(aes(x = 1.98, y = 9.25, label = "**"), size = 14, vjust = -0.5) + # Asterisk for significant difference at 24 hours for Control vs. 45 mg/L geom_segment(aes(x = 1.65, xend = 2.27, y = 9.5, yend = 9.5), linetype = "dashed", size = 1 ) + # Line below the asterisk for Control vs. 45 mg/L geom_segment(aes(x = 2.27, xend = 2.27, y = 6.7, yend = 9.5), linetype = "dashed", size = 1 ) + geom_segment(aes(x = 1.65, xend = 1.65, y = 8.4, yend = 9.5), linetype = "dashed", size = 1 ) + geom_text(aes(x = 1.9, y = 8.25, label = "**"), size = 14, vjust = -0.5) + #asterisk for significant difference at 24 hours for control vs 5 mg/L geom_segment(aes(x = 1.8, xend = 2, y = 8.5, yend = 8.5), linetype = "dashed", size = 1 ) + geom_segment(aes(x = 1.8, xend = 1.8, y = 8.4, yend = 8.5), linetype = "dashed", size = 1 ) + geom_segment(aes(x = 2, xend = 2, y = 6.5, yend = 8.5), linetype = "dashed", size = 1 ) ```