--- title: "LC50 Nickel x Botryllus" author: "Celeste Valdivia" date: "2024-04-10" output: html_document --- # Setup ```{r, eval =TRUE} library(knitr) library(tidyverse) library(tidyr) library(dplyr) library(hrbrthemes) library(ggplot2) library(RColorBrewer) library(ggpubr) library(ecotox) library(wesanderson) library(viridis) library(ggrepel) knitr::opts_chunk$set(echo = TRUE, eval = FALSE) ``` # Retrieving Data from Google Sheets Make sure you have made your Google sheet publicly available to anyone that has the link. If you make any updates to the sheet just re-curl the data, meaning just re-run the code below. ```{r, engine='bash', eval=FALSE} cd .. curl -L https://docs.google.com/spreadsheets/d/1JtJO3EX06BYK4pYwZZ7b7ZCoTyZcAeuTRka_9kA-yPk/export?exportFormat=csv | tee data/LC50.csv ``` Read in the data to your local R environment. ```{r, eval=TRUE} setwd('..') LC50 <- read.csv(file = "data/LC50.csv") ``` # Cleaning up Data ## Add simplified scoring columbs and convert dates Convert Datees ```{r, eval=TRUE} LC50$Date <- mdy(LC50$Date) #convert the Date column from characters to true Date ``` Simple Stage column added: ```{r} LC50 <- LC50 %>% mutate(simple_Stage = case_when( Stage %in% c("A1", "A2") ~ "A", Stage %in% c("B1", "B2") ~ "B", Stage %in% c("C1", "C2") ~ "C", Stage == "D" ~ "D", TRUE ~ NA_character_ # This will handle any other cases or return NA if none match )) ``` Simple health scoring column addd ```{r} # Create new simplified health scoring LC50 <- LC50 %>% mutate(simple_health = case_when( Health == "0" ~ "0", Health %in% c("1", "2") ~ "1", Health %in% c("3", "4") ~ "2", Health %in% c("5", "6") ~ "3", Health %in% c("7", "8") ~ "4", Health %in% c("9", "10") ~ "5", TRUE ~ NA_character_)) %>% transform(simple_health = as.numeric(simple_health)) summary(LC50) ``` ## Remove outlier individuals Here we will remove the individuals that are insuitable for our analyses acros the SOD and LC50 assays. Will need to exclude two individuals from study. DM032024_C03 (0.1 mg/L condition) and DM042024_C02 (1 mg/L condition). Results in 8 replicates for all treatments except 7 only for the 1 mg/L condition. DM032024_C03: Cut at 24 hour mark. DM042024_C02: Decline in health as it was not healthy enough to begin with. DM022024_C01: Does not have a match for the SOD assay at the 96 hr mark so will be excluded from that. Will need to be consistent. ```{r} t_LC50 <- LC50[!is.na(LC50$true_rep), ] ``` ## Adjust dependent variables into factors dont use for now..... ```{r} #t_LC50 <- t_LC50 %>% #mutate(Ni.Conc. = as.factor(Ni.Conc.)) %>% # mutate(hpe = as.factor(hpe)) ``` ## Produce some data frames for plotting mean health scores per independent variable ```{r} mean_health_score <- t_LC50 %>% group_by(hpe, Ni.Conc.) %>% summarise( mean_health = round(mean(Health, na.rm = TRUE)), se = sd(Health, na.rm = TRUE) / sqrt(n()), .groups = 'drop' # Ungroup after summarizing ) t_LC50_clean <- t_LC50 %>% filter(!is.na(Health) & is.finite(Health)) ``` ## Produce data frames to plot log x-axis health graphs Below we produce a couple of data frames that will facilitate plotting the Health Score (y-axis) over Ni Conc (x-axis). Since the concentrations increase on a logarithmic scale, you need to plot it on a logarithmic scale on the x-axis if you want to best see those values. This below produces two new columns called Ni.Conc.trans that forces the 0 to become a 0.01 since the log of 0 would not be plottable in the log graph below using the function scale_x_log10(). ```{r} t2_LC50 <- t_LC50 %>% mutate(Ni.Conc.trans = ifelse(Ni.Conc. == 0, 0.01, Ni.Conc.)) %>% filter(hpe !=0) mean_health_score_no0hpe <- mean_health_score %>% mutate(Ni.Conc.trans = ifelse(Ni.Conc. == 0, 0.01, Ni.Conc.)) %>% filter(hpe !=0) ``` # Mortality ## Mortality Percent Function ```{r} require(binom) calc_mort_percent <- function(data) { total_individuals <- aggregate(Mortality ~ hpe + Ni.Conc., data = data, FUN = length) total_mortality <- aggregate(Mortality ~ hpe + Ni.Conc., data = data, FUN = sum) merged_data <- merge(total_individuals, total_mortality, by = c("hpe", "Ni.Conc."), suffixes = c(".total", ".count")) # Calculate proportions and confidence intervals merged_data$percentage_mortality <- (merged_data$Mortality.count / merged_data$Mortality.total) * 100 # Calculate confidence intervals for proportions using Wilson score interval merged_data$ci_lower <- binom::binom.confint(merged_data$Mortality.count, merged_data$Mortality.total, method = "wilson")[,"lower"] merged_data$ci_upper <- binom::binom.confint(merged_data$Mortality.count, merged_data$Mortality.total, method = "wilson")[,"upper"] return(merged_data) } ``` ## Using the mortality function on our data ```{r} mortality_percentage <- calc_mort_percent(t_LC50) mortality_percentage$hpe <- as.character(mortality_percentage$hpe) # Did this so I can have discrete colors for plot for the time points instead of a gradient ``` Review the data frame mortality_percentage. ```{r} print(mortality_percentage) summary(mortality_percentage) hist(mortality_percentage$percentage_mortality) ``` Data follows a bimodal distribution. # Survivorship For now will not be using Survivorship in final deliverables but may be useful in future toxicology studies ## Survivorship Function ```{r} calc_survivor_percent <- function(data) { total_individuals <- aggregate(Alive ~ hpe + Ni.Conc., data = data, FUN = length) total_survivor <- aggregate(Alive ~ hpe + Ni.Conc., data = data, FUN = sum) merged_data <- merge(total_individuals, total_survivor, by = c( "hpe", "Ni.Conc."), suffixes = c(".total", ".count")) merged_data$percentage_survivor <- (merged_data$Alive.count / merged_data$Alive.total) *100 return(merged_data) } survival_percent <- calc_survivor_percent(t_LC50) survival_percent$hpe <- as.character(survival_percent$hpe) # Did this so I can have discrete colors for plot for the time points instead of a gradient print(survival_percent) summary(survival_percent) survival_percent$Ni.Conc. <- as.numeric(survival_percent$Ni.Conc.) # Did this so geom_smooth actually works ``` # Mortality Graphs ## Data frame for making graphs ```{r} mort_exp <- subset(mortality_percentage, hpe != 0) mort_exp$proportion_mortality <- mort_exp$percentage_mortality / 100 ``` ### Mortality Linear Trendline ```{r} # Plot the data linear setwd('..') png(filename = "output/mort_linear.png", width = 1200, height = 900) ggplot(mort_exp, aes(x = Ni.Conc., y = percentage_mortality)) + geom_point(size = 5, color = "black") + geom_errorbar(aes(ymin = ci_lower, ymax = ci_upper), width = 0.2, color = "black") + # Error bars geom_hline(yintercept = 50, linetype = "dashed", color = "red", linewidth = 2) + # Add red dotted line at y = 50 labs(x = "Nickel Concentration (mg/L)", y = "Mortality (%)") + scale_y_continuous(limits = c(0, 100), breaks = seq(0, 100, by = 20), expand = c(0.01, 0.1)) + scale_x_continuous(expand = c(0.01, 0.01)) + facet_wrap(~ hpe, labeller = as_labeller(c("24" = "24 hpe", "48" = "48 hpe", "72" = "72 hpe", "96" = "96 hpe")), scales = "free") + # Facet by hpe theme_minimal() + theme( panel.grid.major.x = element_blank(), # Remove major vertical gridlines panel.grid.minor.x = element_line(color = "lightgrey", size = 0.5), panel.grid.major.y = element_blank(), panel.grid.minor.y = element_blank(), axis.text.y = element_text(size = 20, hjust = 1, margin = margin(r = 20)), axis.text.x = element_text(size = 20, angle = 90, vjust = 0.5, hjust = 1), # Adjust text size and angle axis.title = element_text(size = 30), legend.text = element_text(size = 30), legend.title = element_text(size = 30), strip.text = element_text(size = 25), strip.background = element_rect(fill = "grey", color = NA), # Ensure background is visible strip.placement = "outside", # Place strip text outside the plot area strip.placement.x = "bottom" # Place x-axis title at the bottom ) dev.off() ``` ### Mortality Logarithmic Trendline ```{r} setwd('..') png(filename = "output/mort_percent_log.png", width = 1200, height = 600) ggplot(mort_exp, aes(x = Ni.Conc., y = percentage_mortality)) + geom_point(size = 5, color = "black") + facet_wrap(~ hpe, labeller = as_labeller(c("24" = "24 hpe", "48" = "48 hpe", "72" = "72 hpe", "96" = "96 hpe"))) + geom_smooth(method = "loess", se = TRUE, color = "darkblue", linewidth = 2) + xlim(0, 1500) + geom_hline(yintercept = 50, linetype = "dashed", color = "red", linewidth = 2) + # Add red dotted line at y = 50 labs(x = "Nickel Concentration mg/L", y = "Mortality (%)") + scale_y_continuous(limits = c(0,100), breaks = seq(0, 100, by = 20), expand = c(0.01,0.1)) + scale_x_log10(breaks = c(0.1, 1, 10, 100, 1000), labels = c("0.1", "1", "10", "100", "1000"), expand = c(0.1,0.01)) + theme_test() + theme(panel.grid.major.x = element_blank(), # Remove major vertical gridlines panel.grid.minor.x = element_line(color = "lightgrey", size = 0.5), panel.grid.major.y = element_blank(), panel.grid.minor.y = element_blank(), panel.background = element_rect(fill = "lightgrey"), axis.text.y = element_text(size = 20, hjust = 1, margin = margin(r=20)), axis.text.x = element_text(size = 20, angle = 90, vjust = 0.5, hjust= 1.2, margin = margin(t = 20)), axis.title = element_text(size = 30), legend.text = element_text(size = 30), legend.title = element_text(size = 30), strip.text = element_text(size = 25), strip.background = element_rect(fill = "grey", color = NA), # Ensure background is visible strip.placement = "outside", # Place strip text outside the plot area strip.placement.x = "bottom") # Place x-axis title at the bottom) dev.off() ``` ## Extra mortality graphs not included in deliverables (sandbox area) ```{r} setwd('..') png(filename = "output/mort_percent_nolog.png", width = 1200, height = 900) library(ggplot2) # Adjusted code with x-axis labels under each facet ggplot(mort_exp, aes(x = Ni.Conc., y = percentage_mortality)) + geom_point(size = 5, color = "black") + facet_wrap(~ hpe, labeller = as_labeller(c("24" = "24 hpe", "48" = "48 hpe", "72" = "72 hpe", "96" = "96 hpe")), scales = "free") + geom_smooth(method = "loess", se = TRUE, color = "darkblue", linewidth = 2) + geom_hline(yintercept = 50, linetype = "dashed", color = "red", linewidth = 2) + # Add red dotted line at y = 50 labs(x = "Nickel Concentration mg/L", y = "Mortality (%)") + scale_y_continuous(limits = c(0, 100), breaks = seq(0, 100, by = 20), expand = c(0.01, 0.1)) + scale_x_continuous(expand = c(0.01, 0.01)) + theme(panel.grid.major.x = element_blank(), # Remove major vertical gridlines panel.grid.minor.x = element_line(color = "lightgrey", size = 0.5), panel.grid.major.y = element_blank(), panel.grid.minor.y = element_blank(), axis.text.y = element_text(size = 20, hjust = 1, margin = margin(r = 20)), axis.text.x = element_text(size = 20, angle = 90, vjust = 0.5, hjust = 1.2, margin = margin(t =20)), # Adjust text size and angle axis.title = element_text(size = 30), legend.text = element_text(size = 30), legend.title = element_text(size = 30), strip.text = element_text(size = 25), strip.background = element_rect(fill = "grey", color = NA), # Ensure background is visible strip.placement = "outside", # Place strip text outside the plot area strip.placement.x = "bottom") # Place x-axis title at the bottom dev.off() ``` ### Regression producing sigmoidal curve mortality trend ```{r} library(ggplot2) # Plot the data with sigmoidal (logistic) curve ggplot(mort_exp, aes(x = Ni.Conc., y = proportion_mortality)) + # Use proportion for logistic regression geom_point(size = 2) + facet_wrap(~ hpe) + geom_smooth(method = "glm", method.args = list(family = "binomial"), se = TRUE, color = "black") + # Use logistic regression geom_hline(yintercept = 0.5, linetype = "dotted", color = "red") + # Add horizontal red dotted line at y = 0.5 (50%) scale_y_continuous(labels = scales::percent) + # Convert y-axis back to percentage format labs(x = "Nickel Concentration mg/L", y = "Mortality (%)") + scale_x_log10() # Add vertical red dotted line at LC50 ``` ```{r} library(ggplot2) # Convert percentage_mortality to proportion for logistic regression mort_exp$proportion_mortality <- mort_exp$percentage_mortality / 100 # Plot the data with sigmoidal (logistic) curve p <- ggplot(mort_exp, aes(x = Ni.Conc., y = proportion_mortality)) + # Use proportion for logistic regression geom_point(size = 2) + facet_wrap(~ hpe) + geom_smooth(method = "glm", method.args = list(family = "binomial"), se = TRUE, color = "black") + # Use logistic regression scale_y_continuous(labels = scales::percent) + # Convert y-axis back to percentage format labs(x = "Nickel Concentration mg/L", y = "Mortality (%)") # Calculate the x-value where the logistic curve intersects y = 50% model <- glm(proportion_mortality ~ Ni.Conc., data = mort_exp, family = "binomial") intercept_x <- (log(0.5 / (1 - 0.5)) - coef(model)[1]) / coef(model)[2] p ``` # Survivor Graphs ### subset data ```{r} surv_24_96 <-subset(survival_percent, hpe %in% c(24, 96)) ``` ```{r} # Calculate the slope and intersection points for each hpe group fit_data <- surv_24_96 %>% filter(Ni.Conc. %in% c(100, 1000)) %>% group_by(hpe) %>% summarize( slope = (percentage_survivor[Ni.Conc. == 1000] - percentage_survivor[Ni.Conc. == 100]) / (1000 - 100), intercept = percentage_survivor[Ni.Conc. == 100] - slope * 100, x_intercept50 = (50 - intercept) / slope, x_intercept90 = (90 - intercept) / slope, equation = paste("y = ", round(slope, 2), "x + ", round(intercept, 2)) ) # Merge the intersection data with the original data surv_24_96 <- surv_24_96 %>% left_join(fit_data, by = "hpe") ``` ### survival 24 and 96 hpe ```{r} setwd('..') png(filename = "output/survival_24_96.png", width = 1200, height = 700) ggplot(surv_24_96, aes(x = Ni.Conc., y = percentage_survivor, color = factor(hpe))) + facet_wrap(~hpe) + geom_point(size = 6) + geom_line(aes(group = hpe), size = 2) + scale_color_manual(values = c("thistle3", "mediumpurple4"), name = "hpe") + labs(x = "Nickel Concentration mg/L", y = "Survival (%)") + scale_y_continuous(breaks = seq(0, 100, by = 10), expand = c(0.02, 0.02)) + scale_x_continuous(expand = c(0.02, 0.02)) + theme_test() + theme( axis.ticks = element_blank(), axis.line = element_line(colour = "grey50"), panel.grid = element_line(color = "gray98"), panel.grid.minor = element_blank(), panel.grid.major.x = element_blank(), panel.grid.major.y = element_line(linetype = "dashed"), panel.background = element_rect(fill = "gray98", color = "gray98"), plot.background = element_rect(fill = "white", color = "white"), axis.text = element_text(size = 30), axis.title = element_text(size = 40), axis.text.y = element_text(size = 30, hjust = 1.2, margin = margin(r = 20)), axis.text.x = element_text(size = 30, vjust = 3, hjust = 0.5, margin = margin(t = 20)), legend.text = element_text(size = 30), legend.title = element_text(size = 30), strip.text = element_blank(), # Remove strip text strip.background = element_blank(), panel.spacing = unit(5, "lines") # Increase spacing between panels ) + geom_label_repel( data = fit_data, aes(x = x_intercept90, y = 90, label = paste(round(x_intercept90), "mg/L")), box.padding = unit(2, "lines"), point.padding = unit(0.5, "lines"), label.padding = unit(0.6, "lines"), segment.size = 1, size = 8, color = "black", nudge_x = 50, nudge_y = 5, show.legend = FALSE ) + geom_label_repel( data = fit_data, aes(x = x_intercept50, y = 50, label = paste(round(x_intercept50), "mg/L")), box.padding = unit(2, "lines"), label.padding = unit(0.6, "lines"), point.padding = unit(0.5, "lines"), segment.size = 1, size = 8, color = "black", nudge_x = 50, nudge_y = 5, show.legend = FALSE ) + # 50% survival lines geom_segment(data = fit_data, aes(x = min(surv_24_96$Ni.Conc.), xend = x_intercept50, y = 50, yend = 50), linetype = "dashed", color = "red3", linewidth = 1) + geom_segment(aes(x = x_intercept50, xend = x_intercept50, y = 0, yend = 50), linetype = "dashed", color = "red3", linewidth = 1) + # 90% survivial lines geom_segment(data = fit_data, aes(x = min(surv_24_96$Ni.Conc.), xend = x_intercept90, y = 90, yend = 90), linetype = "dashed", color = "royalblue1", linewidth = 1) + geom_segment(aes(x = x_intercept90, xend = x_intercept90, y = 0, yend = 90), linetype = "dashed", color = "royalblue1", linewidth = 1) dev.off() ``` # LC50 Estimation ##Logit ### Dose Response Models Readings We do not have a traditional sigmoidal dose response curve as an option. Botryllus remain with heartbeat up to 100 mg/L and all cease blood movement at 1000 mg/L after 48 hours. https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0146021 https://www.linkedin.com/pulse/simple-lc50-calculation-pesticide-toxicity-bioassay-using-adhiwibawa Also need to decide that for the ecotox modeling if it makes sense to use a probit, logit, or linear regression model to assess LC50 for whatever time point of interest. *Probit Model:* The probit model assumes that the dose-response relationship follows a cumulative *normal distribution*. It is often used when the response variable is binary (alive/dead) or when the response follows a sigmoidal shape. Suitable when responses are discrete and there's a natural threshold for response. *Logit Model:* Similar to the probit model, the logit model is used for binary response data. It assumes that the log odds of the response is linearly related to the dose. Like probit, it's suitable for binary responses but may be preferred for computational reasons or if assumptions align better with the data. *Linear Model:* The linear model assumes a linear relationship between the dose and the response. It may be appropriate when responses are continuous and show a linear trend across doses. However, it may not capture non-linear relationships well, especially if the response plateaus or changes direction. Given my data follows a bimodal distribution it would most likely be most appropriate to follow a logit model. HOWEVER IT IS ALSO A REPEATED MEASURES EXP. SO YOU CAN'T USE A LOGIT. So instead use: Generalized Linear Mixed-Effects Model (GLMM) with a binomial family and logit link function ### Example Ecotox Data ```{r, dummy ecotox data} #install.packages("ecotox") library(ecotox) head(lamprey_tox) hist(lamprey_tox$response) m <- LC_logit((response / total) ~ log10(dose), p = c(50, 99), weights = total, data = lamprey_tox[lamprey_tox$nominal_dose != 0, ], subset = c(month == "May")) print(m) lc_may <- subset(lamprey_tox, month %in% c("May")) p1 <- ggplot(data = lc_may[lc_may$nominal_dose != 0, ], aes(x = dose, y = (response / total))) + geom_point() + geom_smooth(method = "glm", method.args = list(family = binomial(link = "logit")), aes(weight = total), colour = "#FF0000", se = TRUE) p1 ``` ### Use of LC50 dataframe for LC50 Estimations with Logit Note you may want to use the alternative data frame t_LC50 below to best represent what was observed and remove outliers. ```{r} head(t_LC50) hist(t_LC50$Mortality) ``` ```{r} #24 h LC50_24h <- LC_logit((Mortality / Total) ~ log10(Ni.Conc.), p = c(50), weights = Total, data = t_LC50[LC50$Ni.Conc. != 0, ], subset = c(hpe == "24")) print(LC50_24h) # 48 h LC50_48h <- LC_logit((Mortality / Total) ~ log10(Ni.Conc.), p = c(50), weights = Total, data = LC50[LC50$Ni.Conc. != 0, ], subset = c(hpe == "48")) print(LC50_48h) # 72 h LC50_72h <- LC_logit((Mortality / Total) ~ log10(Ni.Conc.), p = c(50), weights = Total, data = LC50[LC50$Ni.Conc. != 0, ], subset = c(hpe == "72")) print(LC50_72h) #96 h LC50_96h <- LC_logit((Mortality / Total) ~ log10(Ni.Conc.), p = c(50), weights = Total, data = LC50[LC50$Ni.Conc. != 0, ], subset = c(hpe == "96")) print(LC50_96h) ``` ### Combine LC50 over time ```{r} library(dplyr) # Combine the LC50 results into one data frame combined_lc50 <- bind_rows( LC50_24h %>% mutate(time_point = 24), LC50_48h %>% mutate(time_point = 48), LC50_72h %>% mutate(time_point = 72), LC50_96h %>% mutate(time_point = 96) ) ``` ```{r} ggplot(data = t_LC50[t_LC50$Ni.Conc. != 0, ], aes(x = Ni.Conc., y = (Mortality / Total))) + geom_point() + geom_smooth(method = "glm", method.args = list(family = binomial(link = "logit")), aes(weight = Total), colour = "#FF0000", se = TRUE) ``` ## GLMM: Generalized Linear Mixed-Effects Model with a binomial family and logit link function ```{r} install.packages("lme4") library(lme4) ``` ```{r} glmm_model <- glmer(Mortality ~ hpe + Ni.Conc. + (1|Tunicate.ID), data = t_LC50, family = binomial(link = "logit")) # Print model summary summary(glmm_model) ``` ```{r} t_LC50_high_dose <- subset(t_LC50, Ni.Conc. == 1000) table(t_LC50_high_dose$Mortality) library(lme4) # Fit a GLMM with just the high dose data glmm_model_high_dose <- glmer(Mortality ~ hpe + (1 | Tunicate.ID), data = t_LC50_high_dose, family = binomial(link = "logit"), control = glmerControl(optimizer = "bobyqa")) # Print model summary summary(glmm_model_high_dose) ``` # LC50 Graph ```{r} library(ggrepel) setwd('..') png(filename = "output/LC50_overtime.png", width = 1000, height = 500) # You can now plot the LC50 values over time ggplot(combined_lc50, aes(x = time_point, y = dose)) + geom_point() + geom_smooth(method = "loess", color = "black") + # Set line color to black geom_label_repel(aes(label = paste(time_point,"hour", "LC50", "=", round(dose), "mg/L")), size = 6, nudge_x = 2, nudge_y = 2, arrow = arrow(length = unit(2, "npc"))) + # Add labels with arrows labs(x = "Time (hours)", y = " Nickel Concentration (mg/L)") + ylim(0, 1000) + # Set y-axis limits theme( axis.text = element_text(size = 30), axis.title = element_text(size =30), legend.text = element_text(size = 25), legend.title = element_text(size = 25) ) dev.off() ``` # Health Graphs ## R Square Calcs ```{r} r_squared_values_mean <- mean_health_score %>% group_by(Ni.Conc.) %>% do({ model <- lm(mean_health ~ hpe, data = .) r_squared <- summary(model)$r.squared data.frame(Ni.Conc = unique(.$Ni.Conc.), r_squared = r_squared) }) %>% ungroup() r_squared_values_raw <- t_LC50 %>% group_by(Ni.Conc.) %>% do({ model <- lm(Health ~ hpe, data = .) r_squared <- summary(model)$r.squared data.frame(Ni.Conc = unique(.$Ni.Conc.), r_squared = r_squared) }) %>% ungroup() r_squared_values_raw_loess <- t_LC50_clean %>% group_by(Ni.Conc.) %>% do({ loess_fit <- loess(Health ~ hpe, data = ., span = 1) predicted <- predict(loess_fit) residuals <- .$Health - predicted ss_total <- sum((.$Health - mean(.$Health))^2) ss_residuals <- sum(residuals^2) r_squared <- 1 - (ss_residuals / ss_total) data.frame(Ni.Conc = unique(.$Ni.Conc.), r_squared = r_squared) }) %>% ungroup() ``` ## Multipanel plot health by time ```{r} setwd('..') png(filename = "output/health_mean_se_raw.png", width = 2000, height = 1500) ggplot() + geom_point(data = t_LC50_clean, aes(x = hpe, y = Health), size = 5, color = "white", alpha = 0.5) + # Raw data points for SE calculation and made it white so it blends into the background geom_point(data = mean_health_score, aes(x = hpe, y = mean_health), size = 9, color = "black") + # Mean points geom_errorbar(data = mean_health_score, aes(x = hpe, ymin = mean_health - se, ymax = mean_health + se), width = 4) + # Error bars facet_wrap(~ Ni.Conc., labeller = as_labeller(c("0" = "0 mg/L", "0.1" = "0.1 mg/L", "1" = "1 mg/L", "10" = "10 mg/L", "100" = "100 mg/L", "1000" = "1000 mg/L"))) + scale_x_continuous(breaks = c(0, 24, 48, 72, 96), expand = c(0.05, 0.1)) + scale_y_continuous(breaks = seq(0, 10, by = 2), limits = c(0,10)) + geom_smooth(data = t_LC50_clean, aes(x = hpe, y = Health), method = "loess", se = TRUE, fill= "lightsteelblue2", color = "lightsteelblue3", linewidth = 4, formula = (y ~ x), span = 1) + # Add linear regression line with SE based on raw data labs(x = "Time (hours)", y = "Health Score" ) + theme_test() + theme( strip.text = element_text(size = 55), # Adjust the size of the facet wrap labels axis.title = element_text(size = 60), # adjust size of title axis.text.y = element_text(size = 55, hjust = 1, margin = margin(r = 20)), axis.text.x = element_text(size = 55, vjust = 3.5, hjust = 0.5, margin = margin(t=50)) ) #+ # geom_text(data = r_squared_values_raw_loess, aes(x = 48, y = 9, label = paste("R² =", round(r_squared, 2))), size = 16, color = "firebrick", vjust = -0.5, hjust = -0.4) dev.off() ``` ```{r} setwd('..') png(filename = "output/health_mean.png", width = 2000, height = 1120) ggplot() + geom_point(data = t_LC50_clean, aes(x = hpe, y = Health), size = 0.1, color = "white", alpha = 0.5) + # Raw data points for SE calculation and made it white so it blends into the background geom_point(data = mean_health_score, aes(x = hpe, y = mean_health), size = 9, color = "black") + # Mean points geom_errorbar(data = mean_health_score, aes(x = hpe, ymin = mean_health - se, ymax = mean_health + se), width = 5, position = position_dodge(width = 0.5)) + # Error bars facet_wrap(~ Ni.Conc., labeller = as_labeller(c("0" = "0 mg/L", "0.1" = "0.1 mg/L", "1" = "1 mg/L", "10" = "10 mg/L", "100" = "100 mg/L", "1000" = "1000 mg/L"))) + scale_x_continuous(breaks = c(0, 24, 48, 72, 96), expand = c(0.05, 0.1)) + scale_y_continuous(breaks = seq(0, 10, by = 2), limits = c(0,10)) + geom_smooth(data = mean_health_score, aes(x = hpe, y = mean_health), method = "lm", se = TRUE, color = "thistle", fill= "thistle" , linewidth = 5) + # Add linear regression line with SE based on raw data labs(x = "Time (hours)", y = "Average Health Score" ) + theme_test() + theme( strip.text = element_text(size = 40), # Adjust the size of the facet wrap labels axis.title = element_text(size = 50), axis.text.y = element_text(size = 40, hjust = 1, margin = margin(r = 20)), axis.text.x = element_text(size = 40, vjust = 3, hjust = 0.5, margin = margin(t=20)) ) #+ # geom_text(data = r_squared_values_mean, aes(x = 48, y = 9, label = paste("R² =", round(r_squared, 2))), size = 8, color = "blue") dev.off() ``` ## Plot the trendlines of declined health over time on one graph ```{r} setwd('..') png(filename = "output/health_mean_one.png", width = 1400, height = 1120) # Define a custom labeller custom_labeller <- as_labeller(c("0" = "0", "0.1" = "0.1", "1" = "1", "10" = "10", "100" = "100", "1000" = "1000")) ggplot() + geom_point(data = t_LC50_clean, aes(x = hpe, y = Health), size = 0.1, color = "white", alpha = 0.5) + # Raw data points for SE calculation and made it white so it blends into the background geom_point(data = mean_health_score, aes(x = hpe, y = mean_health, color = as.factor(Ni.Conc.)), size = 4) + # Mean points with color by Ni.Conc. geom_errorbar(data = mean_health_score, aes(x = hpe, ymin = mean_health - se, ymax = mean_health + se, color = as.factor(Ni.Conc.)), width = 3) + # Error bars with color by Ni.Conc. geom_smooth(data = t_LC50_clean, aes(x = hpe, y = Health, color = as.factor(Ni.Conc.), fill = as.factor(Ni.Conc.)), method = "lm", se = TRUE, linewidth = 2) + # Add linear regression line with SE based on raw data scale_color_brewer(palette = "PuOr", labels = custom_labeller) + # Customize colors and labels for lines scale_fill_brewer(palette = "PuOr", labels = custom_labeller) + # Customize fill for SE scale_x_continuous(breaks = c(0, 24, 48, 72, 96), expand = c(0.05, 0.1)) + scale_y_continuous(breaks = seq(0, 10, by = 2), limits = c(0,10)) + labs(x = "Time (hours)", y = "Health Score", color = "Nickel (mg/L)", # Legend title for lines fill = "Nickel (mg/L)" # Legend title for fill ) + theme_bw() + theme( legend.position = "right", legend.title = element_text(size = 40), # Adjust the legend title size legend.text = element_text(size = 35), # Adjust the legend text size axis.title = element_text(size = 50), axis.text.y = element_text(size = 40, hjust = 1, margin = margin(r = 20)), axis.text.x = element_text(size = 40, vjust = 3, hjust = 0.5, margin = margin(t=20)) ) dev.off() ``` ## Exploaratory raw health data over time ```{r} setwd('..') png(filename = "output/health_raw.png", width = 1400, height = 1120) ggplot(t_LC50, aes(x = hpe, y = Health)) + geom_point(size = 3, color = "black") + # Adjust the size of the points facet_wrap(~ Ni.Conc., labeller = as_labeller(c("0" = "0 mg/L", "0.1" = "0.1 mg/L", "1" = "1 mg/L", "10" = "10 mg/L", "100" = "100 mg/L", "1000" = "1000 mg/L"))) + scale_x_continuous(breaks = c(0, 24, 48, 72, 96), expand = c(0.05, 0.1)) + scale_y_continuous(breaks = seq(0, 10, by = 1), expand = c(0.0,0.0)) + ylim(0, 10) + geom_smooth(method = "lm", se = TRUE, color = "darkblue", linewidth = 5) + # Add linear regression line with SE labs(x = "Time (hours)", y = "Health Score" ) + theme_test() + theme( strip.text = element_text(size = 40), # Adjust the size of the facet wrap labels axis.title = element_text(size = 50), axis.text.y = element_text(size = 40, hjust = 1.5, margin = margin(r = 20)), axis.text.x = element_text(size = 40, vjust = 3, hjust = 0.5, margin = margin(t=20)) ) dev.off() ``` ## Health over increasing Ni Conc ```{r} library(scales) setwd('..') png(filename = "output/health_by_conc.png", width = 1400, height = 1100) ggplot() + geom_point(data = t2_LC50, aes(x = Ni.Conc.trans, y = Health), size = 0.01, color = "white", alpha = 0.5) + # Raw data points for SE calculation geom_point(data = mean_health_score_no0hpe, aes(x = Ni.Conc.trans, y = mean_health), size = 4, color = "black") + # Mean points geom_errorbar(data = mean_health_score_no0hpe, aes(x = Ni.Conc.trans, ymin = mean_health - se, ymax = mean_health + se), width = 0.2) + # Error bars facet_wrap(~ hpe, labeller = as_labeller(c("24" = "24 hpe", "48" = "48 hpe", "72" = "72 hpe", "96" = "96 hpe"))) + scale_x_log10(breaks = c(0.01, 0.1, 1, 10, 100, 1000), labels = c("0", "0.1", "1", "10", "100", "1000")) + # Refined breaks and labels scale_y_continuous(breaks = seq(0, 10, by = 2), expand = c(0.01, 0.01), limits = c(0, 10)) + geom_smooth(data = t2_LC50, aes(x = Ni.Conc.trans, y = Health), method = "lm", se = TRUE, color = "darkblue", linewidth = 5) + # Add linear regression line with SE based on raw data labs(x = "Nickel Concentration (mg/L)", y = "Health Score" ) + theme_test() + theme( strip.text = element_text(size = 25), # Adjust the size of the facet wrap labels axis.text = element_text(size = 30), # Adjust the size of the axis labels axis.title = element_text(size = 30), axis.text.y = element_text(size = 20, hjust = 1.5, margin = margin(r = 20)), axis.text.x = element_text(size = 20, vjust = 3, hjust = 0.5, margin = margin(t = 20)) ) dev.off() ``` # Statistical analysis of health decline differences If you look at the graphs, it appears that the decline in health over time in accordance with the independent variables may be significantly different. Need to sandbox a way to analyze differences in slopes among trend lines. ## ANCOVA of linear models for health decline ```{r} # Load necessary libraries library(dplyr) library(ggplot2) # Fit linear models for each concentration lm_models <- mean_health_score %>% group_by(Ni.Conc.) %>% do(model = lm(mean_health ~ hpe, data = .)) # Summarize the models model_summaries <- lm_models %>% summarize( Ni.Conc. = unique(Ni.Conc.), intercept = coef(model)[1], slope = coef(model)[2], p_value = summary(model)$coefficients[2, 4] ) # Print the summaries print(model_summaries) #ANCOVA # Combine all data and fit a single model with interaction to compare slopes or the rate in decline in health over time by concentration of nickel combined_model <- lm(mean_health ~ hpe * Ni.Conc., data = mean_health_score) summary(combined_model) ``` ## ANOVA? ### ANOVA assumptions for raw health data ```{r} summary(t_LC50) fact_LC50 <- t_LC50 %>% mutate(hpe = as.factor(hpe)) %>% mutate(Ni.Conc. = as.factor(Ni.Conc.)) summary(fact_LC50) ## Meeting the ANOVA assumptions leveneTest(Health ~ Ni.Conc.*hpe, data = fact_LC50) #p-value < 0.05, therefore equal-variances are NOT met, WOULD NEED TO TRANSFORM THE DATA shapiro.test(fact_LC50$Health) #DOES NOT meets assumption of normality, p value < 0.05 hist(fact_LC50$Health) qqnorm(fact_LC50$Health) ``` ### Transformation to try to make assumptions work for ANOVA raw health data ```{r} # Transform Health variable t_LC50$Health_sqrt <- sqrt(t_LC50$Health) # Check histogram of transformed data hist(t_LC50$Health_sqrt, main = "Transformed Health (sqrt)", xlab = "Health") # Check normality of transformed data (optional) shapiro.test(t_LC50$Health_sqrt) # Perform Levene's test on transformed data leveneTest(Health_sqrt ~ Ni.Conc., data = t_LC50) # Run ANOVA on transformed data #anova_model <- aov(Health_sqrt ~ Ni.Conc., data = t_LC50) #summary(anova_model) ``` Still does not meet assumptions. Can't use ANOVA for health data. ## Post Hoc Testing Simple Slopes This bit needs to be fixed and also further researched. As above there is evidently a signficant difference in the slopes by Ni.Conc.:hpe but at the moment can't say exactly which are different from which. ```{r} library(emmeans) # Simple slopes analysis simple_slopes <- emmeans(combined_model, ~ hpe | Ni.Conc.) summary(simple_slopes) pairwise_comparisons <- pairs(simple_slopes) summary(pairwise_comparisons) ```