---
title: "LC50 Nickel x Botryllus"
author: "Celeste Valdivia"
date: "2024-04-10"
output: html_document
---

# Setup
```{r, eval =TRUE}
library(knitr)
library(tidyverse)
library(tidyr)
library(dplyr)
library(hrbrthemes)
library(ggplot2)
library(RColorBrewer)
library(ggpubr)
library(ecotox)
library(wesanderson)
library(viridis)
library(ggrepel)
knitr::opts_chunk$set(echo = TRUE,
                      eval = FALSE)
```

# Retrieving Data from Google Sheets

Make sure you have made your Google sheet publicly available to anyone that has the link. If you make any updates to the sheet just re-curl the data, meaning just re-run the code below.

```{r, engine='bash', eval=FALSE}
cd ..

curl -L https://docs.google.com/spreadsheets/d/1JtJO3EX06BYK4pYwZZ7b7ZCoTyZcAeuTRka_9kA-yPk/export?exportFormat=csv | tee data/LC50.csv
```

Read in the data to your local R environment.

```{r, eval=TRUE}
setwd('..')
LC50 <- read.csv(file = "data/LC50.csv")  
```

# Cleaning up Data

## Add simplified scoring columbs and convert dates
Convert Datees
```{r, eval=TRUE}
LC50$Date <- mdy(LC50$Date) #convert the Date column from characters to true Date
```

Simple Stage column added:
```{r}
LC50 <- LC50 %>%
  mutate(simple_Stage = case_when(
    Stage %in% c("A1", "A2") ~ "A",
    Stage %in% c("B1", "B2") ~ "B",
    Stage %in% c("C1", "C2") ~ "C",
    Stage == "D" ~ "D",
    TRUE ~ NA_character_  # This will handle any other cases or return NA if none match
  ))
```

Simple health scoring column addd
```{r}
# Create new simplified health scoring
LC50 <- LC50 %>%
  mutate(simple_health = case_when(
    Health == "0" ~ "0",
    Health %in% c("1", "2") ~ "1",
    Health %in% c("3", "4") ~ "2",
    Health %in% c("5", "6") ~ "3",
    Health %in% c("7", "8") ~ "4",
    Health %in% c("9", "10") ~ "5",
    TRUE ~ NA_character_)) %>%
  transform(simple_health = as.numeric(simple_health))

summary(LC50)
```


## Remove outlier individuals
Here we will remove the individuals that are insuitable for our analyses acros the SOD and LC50 assays.

Will need to exclude two individuals from study. DM032024_C03 (0.1 mg/L condition) and DM042024_C02 (1 mg/L condition). Results in 8 replicates for all treatments except 7 only for the 1 mg/L condition.

DM032024_C03: Cut at 24 hour mark.

DM042024_C02: Decline in health as it was not healthy enough to begin with.

DM022024_C01: Does not have a match for the SOD assay at the 96 hr mark so will be excluded from that. Will need to be consistent.

```{r}
t_LC50 <- LC50[!is.na(LC50$true_rep), ]
```

## Adjust dependent variables into factors

dont use for now.....
```{r}
#t_LC50 <-  t_LC50 %>% 
  #mutate(Ni.Conc. = as.factor(Ni.Conc.)) %>%
 # mutate(hpe = as.factor(hpe))
```

## Produce some data frames for plotting mean health scores per independent variable
```{r}
mean_health_score <- t_LC50 %>%
  group_by(hpe, Ni.Conc.) %>%
  summarise(
    mean_health = round(mean(Health, na.rm = TRUE)),
    se = sd(Health, na.rm = TRUE) / sqrt(n()),
    .groups = 'drop'  # Ungroup after summarizing
  )

t_LC50_clean <- t_LC50 %>%
  filter(!is.na(Health) & is.finite(Health))
```

## Produce data frames to plot log x-axis health graphs
Below we produce a couple of data frames that will facilitate plotting the Health Score (y-axis) over Ni Conc (x-axis). Since the concentrations increase on a logarithmic scale, you need to plot it on a logarithmic scale on the x-axis if you want to best see those values. This below produces two new columns called Ni.Conc.trans that forces the 0 to become a 0.01 since the log of 0 would not be plottable in the log graph below using the function scale_x_log10().
```{r}
t2_LC50 <- t_LC50 %>%
  mutate(Ni.Conc.trans = ifelse(Ni.Conc. == 0, 0.01, Ni.Conc.)) %>%
  filter(hpe !=0)

mean_health_score_no0hpe <- mean_health_score %>%
  mutate(Ni.Conc.trans = ifelse(Ni.Conc. == 0, 0.01, Ni.Conc.)) %>%
  filter(hpe !=0)
```


# Mortality

## Mortality Percent Function

```{r}
require(binom)

calc_mort_percent <- function(data) {
  total_individuals <- aggregate(Mortality ~ hpe + Ni.Conc., data = data, FUN = length)
  total_mortality <- aggregate(Mortality ~ hpe + Ni.Conc., data = data, FUN = sum)
  
  merged_data <- merge(total_individuals, total_mortality, by = c("hpe", "Ni.Conc."), suffixes = c(".total", ".count"))
  
  # Calculate proportions and confidence intervals
  merged_data$percentage_mortality <- (merged_data$Mortality.count / merged_data$Mortality.total) * 100
  
  # Calculate confidence intervals for proportions using Wilson score interval
  merged_data$ci_lower <- binom::binom.confint(merged_data$Mortality.count, merged_data$Mortality.total, method = "wilson")[,"lower"]
  merged_data$ci_upper <- binom::binom.confint(merged_data$Mortality.count, merged_data$Mortality.total, method = "wilson")[,"upper"]
  
  return(merged_data)
}
```

## Using the mortality function on our data
```{r}

mortality_percentage <- calc_mort_percent(t_LC50)

mortality_percentage$hpe <- as.character(mortality_percentage$hpe) # Did this so I can have discrete colors for plot for the time points instead of a gradient
```

Review the data frame mortality_percentage.
```{r}
print(mortality_percentage)
summary(mortality_percentage)

hist(mortality_percentage$percentage_mortality)
```
Data follows a bimodal distribution.

# Survivorship

For now will not be using Survivorship in final deliverables but may be useful in future toxicology studies
## Survivorship Function
```{r}
calc_survivor_percent <- function(data) {
  
  total_individuals <- aggregate(Alive ~ hpe + Ni.Conc., data = data, FUN = length)
  
  total_survivor <- aggregate(Alive ~ hpe + Ni.Conc., data = data, FUN = sum)
  
  merged_data <- merge(total_individuals, total_survivor, by = c( "hpe",  "Ni.Conc."), suffixes = c(".total", ".count"))
  
  merged_data$percentage_survivor <- (merged_data$Alive.count / merged_data$Alive.total) *100
  
  return(merged_data)
}

survival_percent <- calc_survivor_percent(t_LC50)

survival_percent$hpe <- as.character(survival_percent$hpe) # Did this so I can have discrete colors for plot for the time points instead of a gradient

print(survival_percent)
summary(survival_percent)

survival_percent$Ni.Conc. <- as.numeric(survival_percent$Ni.Conc.) # Did this so geom_smooth actually works
```


# Mortality Graphs
## Data frame for making graphs
```{r}
mort_exp <- subset(mortality_percentage, hpe != 0)
mort_exp$proportion_mortality <- mort_exp$percentage_mortality / 100
```

### Mortality Linear Trendline
```{r}
# Plot the data linear
setwd('..')
png(filename = "output/mort_linear.png", width = 1200, height = 900)
ggplot(mort_exp, aes(x = Ni.Conc., y = percentage_mortality)) + 
  geom_point(size = 5, color = "black") +
  geom_errorbar(aes(ymin = ci_lower, ymax = ci_upper), width = 0.2, color = "black") +  # Error bars
  geom_hline(yintercept = 50, linetype = "dashed", color = "red", linewidth = 2) +  # Add red dotted line at y = 50 
  labs(x = "Nickel Concentration (mg/L)",
       y = "Mortality (%)") +
  scale_y_continuous(limits = c(0, 100), breaks = seq(0, 100, by = 20), expand = c(0.01, 0.1)) +
  scale_x_continuous(expand = c(0.01, 0.01)) +
  facet_wrap(~ hpe, labeller = as_labeller(c("24" = "24 hpe", "48" = "48 hpe", "72" = "72 hpe", "96" = "96 hpe")), scales = "free") +  # Facet by hpe
  theme_minimal() +
  theme(
    panel.grid.major.x = element_blank(),  # Remove major vertical gridlines
    panel.grid.minor.x = element_line(color = "lightgrey", size = 0.5),
    panel.grid.major.y = element_blank(),
    panel.grid.minor.y = element_blank(), 
    axis.text.y = element_text(size = 20, hjust = 1, margin = margin(r = 20)),
    axis.text.x = element_text(size = 20, angle = 90, vjust = 0.5, hjust = 1),  # Adjust text size and angle
    axis.title = element_text(size = 30),
    legend.text = element_text(size = 30),
    legend.title = element_text(size = 30),
    strip.text = element_text(size = 25),
    strip.background = element_rect(fill = "grey", color = NA),  # Ensure background is visible
    strip.placement = "outside",  # Place strip text outside the plot area
    strip.placement.x = "bottom"  # Place x-axis title at the bottom
  )
dev.off()
```

### Mortality Logarithmic Trendline
```{r}
setwd('..')
png(filename = "output/mort_percent_log.png", width = 1200, height = 600)

ggplot(mort_exp, aes(x = Ni.Conc., y = percentage_mortality)) + 
  geom_point(size = 5, color = "black") +
  facet_wrap(~ hpe, labeller = as_labeller(c("24" = "24 hpe", "48" = "48 hpe", "72" = "72 hpe", "96" = "96 hpe"))) +
  geom_smooth(method = "loess", se = TRUE, color = "darkblue", linewidth = 2) + 
  xlim(0, 1500) +
  geom_hline(yintercept = 50, linetype = "dashed", color = "red", linewidth = 2) +  # Add red dotted line at y = 50
  labs(x = "Nickel Concentration mg/L",
       y = "Mortality (%)") +
  scale_y_continuous(limits = c(0,100), breaks = seq(0, 100, by = 20), expand = c(0.01,0.1)) +
  scale_x_log10(breaks = c(0.1, 1, 10, 100, 1000), labels = c("0.1", "1", "10", "100", "1000"), expand = c(0.1,0.01)) +
  theme_test() +
    theme(panel.grid.major.x = element_blank(),  # Remove major vertical gridlines
        panel.grid.minor.x = element_line(color = "lightgrey", size = 0.5),
        panel.grid.major.y = element_blank(),
        panel.grid.minor.y = element_blank(), 
        panel.background = element_rect(fill = "lightgrey"),
        axis.text.y = element_text(size = 20, hjust = 1, margin = margin(r=20)),
        axis.text.x = element_text(size = 20, angle = 90, vjust = 0.5, hjust= 1.2, margin = margin(t = 20)),
        axis.title = element_text(size = 30),
        legend.text = element_text(size = 30),
        legend.title = element_text(size = 30),
        strip.text = element_text(size = 25),
        strip.background = element_rect(fill = "grey", color = NA),  # Ensure background is visible
        strip.placement = "outside",  # Place strip text outside the plot area
        strip.placement.x = "bottom")  # Place x-axis title at the bottom)

dev.off()
```

## Extra mortality graphs not included in deliverables (sandbox area)
```{r}
setwd('..')
png(filename = "output/mort_percent_nolog.png", width = 1200, height = 900)

library(ggplot2)

# Adjusted code with x-axis labels under each facet
ggplot(mort_exp, aes(x = Ni.Conc., y = percentage_mortality)) + 
  geom_point(size = 5, color = "black") +
  facet_wrap(~ hpe, labeller = as_labeller(c("24" = "24 hpe", "48" = "48 hpe", "72" = "72 hpe", "96" = "96 hpe")), scales = "free") +
  geom_smooth(method = "loess", se = TRUE, color = "darkblue", linewidth = 2) + 
  geom_hline(yintercept = 50, linetype = "dashed", color = "red", linewidth = 2) +  # Add red dotted line at y = 50
  labs(x = "Nickel Concentration mg/L",
       y = "Mortality (%)") +
  scale_y_continuous(limits = c(0, 100), breaks = seq(0, 100, by = 20), expand = c(0.01, 0.1)) +
  scale_x_continuous(expand = c(0.01, 0.01)) +
  theme(panel.grid.major.x = element_blank(),  # Remove major vertical gridlines
        panel.grid.minor.x = element_line(color = "lightgrey", size = 0.5),
        panel.grid.major.y = element_blank(),
        panel.grid.minor.y = element_blank(), 
        axis.text.y = element_text(size = 20, hjust = 1, margin = margin(r = 20)),
        axis.text.x = element_text(size = 20, angle = 90, vjust = 0.5, hjust = 1.2, margin = margin(t =20)),  # Adjust text size and angle
        axis.title = element_text(size = 30),
        legend.text = element_text(size = 30),
        legend.title = element_text(size = 30),
        strip.text = element_text(size = 25),
        strip.background = element_rect(fill = "grey", color = NA),  # Ensure background is visible
        strip.placement = "outside",  # Place strip text outside the plot area
        strip.placement.x = "bottom")  # Place x-axis title at the bottom


dev.off()
```

### Regression producing sigmoidal curve mortality trend

```{r}
library(ggplot2)

# Plot the data with sigmoidal (logistic) curve
ggplot(mort_exp, aes(x = Ni.Conc., y = proportion_mortality)) +  # Use proportion for logistic regression
  geom_point(size = 2) +
  facet_wrap(~ hpe) +
  geom_smooth(method = "glm", method.args = list(family = "binomial"), se = TRUE, color = "black") +  # Use logistic regression
  geom_hline(yintercept = 0.5, linetype = "dotted", color = "red") +  # Add horizontal red dotted line at y = 0.5 (50%)
  scale_y_continuous(labels = scales::percent) +  # Convert y-axis back to percentage format
  labs(x = "Nickel Concentration mg/L",
       y = "Mortality (%)") +
  scale_x_log10()

# Add vertical red dotted line at LC50

```


```{r}
library(ggplot2)

# Convert percentage_mortality to proportion for logistic regression
mort_exp$proportion_mortality <- mort_exp$percentage_mortality / 100

# Plot the data with sigmoidal (logistic) curve
p <- ggplot(mort_exp, aes(x = Ni.Conc., y = proportion_mortality)) +  # Use proportion for logistic regression
  geom_point(size = 2) +
  facet_wrap(~ hpe) +
  geom_smooth(method = "glm", method.args = list(family = "binomial"), se = TRUE, color = "black") +  # Use logistic regression
  scale_y_continuous(labels = scales::percent) +  # Convert y-axis back to percentage format
  labs(x = "Nickel Concentration mg/L",
       y = "Mortality (%)")

# Calculate the x-value where the logistic curve intersects y = 50%
model <- glm(proportion_mortality ~ Ni.Conc., data = mort_exp, family = "binomial")
intercept_x <- (log(0.5 / (1 - 0.5)) - coef(model)[1]) / coef(model)[2]

p 

```

# Survivor Graphs
### subset data
```{r}
surv_24_96 <-subset(survival_percent, hpe %in% c(24, 96))
```

```{r}
# Calculate the slope and intersection points for each hpe group
fit_data <- surv_24_96 %>%
  filter(Ni.Conc. %in% c(100, 1000)) %>%
  group_by(hpe) %>%
  summarize(
    slope = (percentage_survivor[Ni.Conc. == 1000] - percentage_survivor[Ni.Conc. == 100]) / 
            (1000 - 100),
    intercept = percentage_survivor[Ni.Conc. == 100] - slope * 100,
    x_intercept50 = (50 - intercept) / slope,
    x_intercept90 = (90 - intercept) / slope,
    equation = paste("y = ", round(slope, 2), "x + ", round(intercept, 2))

  )

# Merge the intersection data with the original data
surv_24_96 <- surv_24_96 %>%
  left_join(fit_data, by = "hpe")
```


### survival 24 and 96 hpe
```{r}
setwd('..')
png(filename = "output/survival_24_96.png", width = 1200, height = 700)

ggplot(surv_24_96, aes(x = Ni.Conc., y = percentage_survivor, color = factor(hpe))) + 
  facet_wrap(~hpe) +
  geom_point(size = 6) +
  geom_line(aes(group = hpe), size = 2) +
  scale_color_manual(values = c("thistle3", "mediumpurple4"), name = "hpe") +
  labs(x = "Nickel Concentration mg/L",
       y = "Survival (%)") +
  scale_y_continuous(breaks = seq(0, 100, by = 10), expand = c(0.02, 0.02)) +
  scale_x_continuous(expand = c(0.02, 0.02)) +
  theme_test() +
  theme(
    axis.ticks = element_blank(),
    axis.line = element_line(colour = "grey50"),
    panel.grid = element_line(color = "gray98"),
    panel.grid.minor = element_blank(),
    panel.grid.major.x = element_blank(),
    panel.grid.major.y = element_line(linetype = "dashed"),
    panel.background = element_rect(fill = "gray98", color = "gray98"),
    plot.background = element_rect(fill = "white", color = "white"),
    axis.text = element_text(size = 30),
    axis.title = element_text(size = 40),
    axis.text.y = element_text(size = 30, hjust = 1.2, margin = margin(r = 20)),
    axis.text.x = element_text(size = 30, vjust = 3, hjust = 0.5, margin = margin(t = 20)),
    legend.text = element_text(size = 30),
    legend.title = element_text(size = 30),
    strip.text = element_blank(),  # Remove strip text  
    strip.background = element_blank(),
    panel.spacing = unit(5, "lines")  # Increase spacing between panels

  ) +
  geom_label_repel(
    data = fit_data,
    aes(x = x_intercept90, y = 90, label = paste(round(x_intercept90), "mg/L")),
    box.padding = unit(2, "lines"),
    point.padding = unit(0.5, "lines"),
    label.padding = unit(0.6, "lines"),
    segment.size = 1,
    size = 8,
    color = "black",
    nudge_x = 50,
    nudge_y = 5,
    show.legend = FALSE
  ) +
  geom_label_repel(
    data = fit_data,
    aes(x = x_intercept50, y = 50, label = paste(round(x_intercept50), "mg/L")),
    box.padding = unit(2, "lines"),
    label.padding = unit(0.6, "lines"),
    point.padding = unit(0.5, "lines"),
    segment.size = 1,
    size = 8,
    color = "black",
    nudge_x = 50,
    nudge_y = 5,
    show.legend = FALSE
  ) +
# 50% survival lines
  geom_segment(data = fit_data, 
               aes(x = min(surv_24_96$Ni.Conc.), 
                   xend = x_intercept50, 
                   y = 50, yend = 50), 
               linetype = "dashed", color = "red3", linewidth = 1) +  
  geom_segment(aes(x = x_intercept50, xend = x_intercept50, 
                   y = 0, yend = 50), 
               linetype = "dashed", color = "red3", linewidth = 1)  +
  # 90% survivial lines
 geom_segment(data = fit_data, aes(x = min(surv_24_96$Ni.Conc.), 
                                   xend = x_intercept90, 
                                   y = 90, yend = 90), 
              linetype = "dashed", color = "royalblue1", linewidth = 1) +  
  geom_segment(aes(x = x_intercept90, xend = x_intercept90, 
                   y = 0, yend = 90), 
               linetype = "dashed", color = "royalblue1", linewidth = 1) 

dev.off()
```


# LC50 Estimation

##Logit
### Dose Response Models Readings

We do not have a traditional sigmoidal dose response curve as an option. Botryllus remain with heartbeat up to 100 mg/L and all cease blood movement at 1000 mg/L after 48 hours.

https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0146021

https://www.linkedin.com/pulse/simple-lc50-calculation-pesticide-toxicity-bioassay-using-adhiwibawa


Also need to decide that for the ecotox modeling if it makes sense to use a probit, logit, or linear regression model to assess LC50 for whatever time point of interest.

*Probit Model:*
The probit model assumes that the dose-response relationship follows a cumulative *normal distribution*.
It is often used when the response variable is binary (alive/dead) or when the response follows a sigmoidal shape.
Suitable when responses are discrete and there's a natural threshold for response.

*Logit Model:*
Similar to the probit model, the logit model is used for binary response data.
It assumes that the log odds of the response is linearly related to the dose.
Like probit, it's suitable for binary responses but may be preferred for computational reasons or if assumptions align better with the data.

*Linear Model:*
The linear model assumes a linear relationship between the dose and the response.
It may be appropriate when responses are continuous and show a linear trend across doses.
However, it may not capture non-linear relationships well, especially if the response plateaus or changes direction.

Given my data follows a bimodal distribution it would most likely be most appropriate to follow a logit model. HOWEVER IT IS ALSO A REPEATED MEASURES EXP. SO YOU CAN'T USE A LOGIT. So instead use: Generalized Linear Mixed-Effects Model (GLMM) with a binomial family and logit link function 

### Example Ecotox Data
```{r, dummy ecotox data}
#install.packages("ecotox")
library(ecotox)

head(lamprey_tox)

hist(lamprey_tox$response)

m <- LC_logit((response / total) ~ log10(dose), p = c(50, 99),
weights = total,
data = lamprey_tox[lamprey_tox$nominal_dose != 0, ],
subset = c(month == "May"))

print(m)

lc_may <- subset(lamprey_tox, month %in% c("May"))

p1 <- ggplot(data = lc_may[lc_may$nominal_dose != 0, ],
  aes(x = dose, y = (response / total))) +
  geom_point() +
  geom_smooth(method = "glm",
   method.args = list(family = binomial(link = "logit")),
 aes(weight = total), colour = "#FF0000", se = TRUE)

 p1

```

### Use of LC50 dataframe for LC50 Estimations with Logit

Note you may want to use the alternative data frame t_LC50 below to best represent what was observed and remove outliers.

```{r}
head(t_LC50)

hist(t_LC50$Mortality)
```

```{r}
#24 h
LC50_24h <- LC_logit((Mortality / Total) ~ log10(Ni.Conc.), p = c(50),
weights = Total,
data = t_LC50[LC50$Ni.Conc. != 0, ],
subset = c(hpe == "24"))

print(LC50_24h)

# 48 h
LC50_48h <- LC_logit((Mortality / Total) ~ log10(Ni.Conc.), p = c(50),
weights = Total,
data = LC50[LC50$Ni.Conc. != 0, ],
subset = c(hpe == "48"))

print(LC50_48h)

# 72 h
LC50_72h <- LC_logit((Mortality / Total) ~ log10(Ni.Conc.), p = c(50),
weights = Total,
data = LC50[LC50$Ni.Conc. != 0, ],
subset = c(hpe == "72"))

print(LC50_72h)

#96 h
LC50_96h <- LC_logit((Mortality / Total) ~ log10(Ni.Conc.), p = c(50),
weights = Total,
data = LC50[LC50$Ni.Conc. != 0, ],
subset = c(hpe == "96"))
print(LC50_96h)
```


### Combine LC50 over time
```{r}
library(dplyr)

# Combine the LC50 results into one data frame
combined_lc50 <- bind_rows(
  LC50_24h %>% mutate(time_point = 24),
  LC50_48h %>% mutate(time_point = 48),
  LC50_72h %>% mutate(time_point = 72),
  LC50_96h %>% mutate(time_point = 96)
)

```

```{r}

ggplot(data = t_LC50[t_LC50$Ni.Conc. != 0, ],
  aes(x = Ni.Conc., y = (Mortality / Total))) +
  geom_point() +
  geom_smooth(method = "glm",
   method.args = list(family = binomial(link = "logit")),
 aes(weight = Total), colour = "#FF0000", se = TRUE)

```


## GLMM: Generalized Linear Mixed-Effects Model with a binomial family and logit link function 

```{r}
install.packages("lme4")
library(lme4)
```

```{r}
glmm_model <- glmer(Mortality ~ hpe + Ni.Conc. + (1|Tunicate.ID),
                    data = t_LC50,
                    family = binomial(link = "logit"))

# Print model summary
summary(glmm_model)
```
```{r}
t_LC50_high_dose <- subset(t_LC50, Ni.Conc. == 1000)
table(t_LC50_high_dose$Mortality)

library(lme4)

# Fit a GLMM with just the high dose data
glmm_model_high_dose <- glmer(Mortality ~ hpe + (1 | Tunicate.ID),
                              data = t_LC50_high_dose,
                              family = binomial(link = "logit"),
                              control = glmerControl(optimizer = "bobyqa"))

# Print model summary
summary(glmm_model_high_dose)

```


# LC50 Graph
```{r}
library(ggrepel)

setwd('..')
png(filename = "output/LC50_overtime.png",  width = 1000, height = 500)

# You can now plot the LC50 values over time
ggplot(combined_lc50, aes(x = time_point, y = dose)) +
  geom_point() +
  geom_smooth(method = "loess", color = "black") +  # Set line color to black
  geom_label_repel(aes(label = paste(time_point,"hour", "LC50", "=", round(dose), "mg/L")), 
                   size = 6, 
                   nudge_x = 2, 
                   nudge_y = 2,
                   arrow = arrow(length = unit(2, "npc"))) +  # Add labels with arrows
  labs(x = "Time (hours)", y = " Nickel Concentration (mg/L)") +
  ylim(0, 1000) + # Set y-axis limits
  theme(
    axis.text = element_text(size = 30),
        axis.title = element_text(size =30),
        legend.text = element_text(size = 25),
        legend.title = element_text(size = 25)
  )

dev.off()
```


# Health Graphs

## R Square Calcs
```{r}
r_squared_values_mean <- mean_health_score %>%
  group_by(Ni.Conc.) %>%
  do({
    model <- lm(mean_health ~ hpe, data = .)
    r_squared <- summary(model)$r.squared
    data.frame(Ni.Conc = unique(.$Ni.Conc.), r_squared = r_squared)
  }) %>%
  ungroup()

r_squared_values_raw <- t_LC50 %>%
  group_by(Ni.Conc.) %>%
  do({
    model <- lm(Health ~ hpe, data = .)
    r_squared <- summary(model)$r.squared
    data.frame(Ni.Conc = unique(.$Ni.Conc.), r_squared = r_squared)
  }) %>%
  ungroup()

r_squared_values_raw_loess <- t_LC50_clean %>%
  group_by(Ni.Conc.) %>%
  do({
    loess_fit <- loess(Health ~ hpe, data = ., span = 1)
    predicted <- predict(loess_fit)
    residuals <- .$Health - predicted
    ss_total <- sum((.$Health - mean(.$Health))^2)
    ss_residuals <- sum(residuals^2)
    r_squared <- 1 - (ss_residuals / ss_total)
    data.frame(Ni.Conc = unique(.$Ni.Conc.), r_squared = r_squared)
  }) %>%
  ungroup()

```

## Multipanel plot health by time
```{r}
setwd('..')
png(filename = "output/health_mean_se_raw.png", width = 2000, height = 1500)
ggplot() + 
  geom_point(data = t_LC50_clean, aes(x = hpe, y = Health), size = 5, color = "white", alpha = 0.5) +  # Raw data points for SE calculation and made it white so it blends into the background
  geom_point(data = mean_health_score, aes(x = hpe, y = mean_health), size = 9, color = "black") +  # Mean points
  geom_errorbar(data = mean_health_score, aes(x = hpe, ymin = mean_health - se, ymax = mean_health + se), width = 4) +  # Error bars
  facet_wrap(~ Ni.Conc., labeller = as_labeller(c("0" = "0 mg/L", "0.1" = "0.1 mg/L", "1" = "1 mg/L", "10" = "10 mg/L", "100" = "100 mg/L", "1000" = "1000 mg/L"))) +
  scale_x_continuous(breaks = c(0, 24, 48, 72, 96), expand = c(0.05, 0.1)) +
  scale_y_continuous(breaks = seq(0, 10, by = 2), limits = c(0,10)) +
  geom_smooth(data = t_LC50_clean, aes(x = hpe, y = Health), method = "loess", se = TRUE, fill= "lightsteelblue2", color = "lightsteelblue3", linewidth = 4, formula = (y ~ x), span = 1) +  # Add linear regression line with SE based on raw data
  labs(x = "Time (hours)", 
       y = "Health Score"
       ) +
  theme_test() +
  theme(
    strip.text = element_text(size = 55),  # Adjust the size of the facet wrap labels
    axis.title = element_text(size = 60), # adjust size of title
    axis.text.y = element_text(size = 55, hjust = 1, margin = margin(r = 20)),
    axis.text.x = element_text(size = 55, vjust = 3.5, hjust = 0.5, margin = margin(t=50))
  ) #+
   # geom_text(data = r_squared_values_raw_loess, aes(x = 48, y = 9, label = paste("R² =", round(r_squared, 2))), size = 16, color = "firebrick", vjust = -0.5, hjust = -0.4)

dev.off()
```


```{r}

setwd('..')
png(filename = "output/health_mean.png", width = 2000, height = 1120)
ggplot() + 
  geom_point(data = t_LC50_clean, aes(x = hpe, y = Health), size = 0.1, color = "white", alpha = 0.5) +  # Raw data points for SE calculation and made it white so it blends into the background
  geom_point(data = mean_health_score, aes(x = hpe, y = mean_health), size = 9, color = "black") +  # Mean points
  geom_errorbar(data = mean_health_score, aes(x = hpe, ymin = mean_health - se, ymax = mean_health + se), width = 5, position = position_dodge(width = 0.5)) +  # Error bars
  facet_wrap(~ Ni.Conc., labeller = as_labeller(c("0" = "0 mg/L", "0.1" = "0.1 mg/L", "1" = "1 mg/L", "10" = "10 mg/L", "100" = "100 mg/L", "1000" = "1000 mg/L"))) +
  scale_x_continuous(breaks = c(0, 24, 48, 72, 96), expand = c(0.05, 0.1)) +
  scale_y_continuous(breaks = seq(0, 10, by = 2), limits = c(0,10)) +
  geom_smooth(data = mean_health_score, aes(x = hpe, y = mean_health), method = "lm", se = TRUE, color = "thistle", fill= "thistle" , linewidth = 5) +  # Add linear regression line with SE based on raw data
  labs(x = "Time (hours)", 
       y = "Average Health Score"
       ) +
  theme_test() +
  theme(
    strip.text = element_text(size = 40),  # Adjust the size of the facet wrap labels
    axis.title = element_text(size = 50),
    axis.text.y = element_text(size = 40, hjust = 1, margin = margin(r = 20)),
    axis.text.x = element_text(size = 40, vjust = 3, hjust = 0.5, margin = margin(t=20))
  ) #+
   # geom_text(data = r_squared_values_mean, aes(x = 48, y = 9, label = paste("R² =", round(r_squared, 2))), size = 8, color = "blue")


dev.off()

```


## Plot the trendlines of declined health over time on one graph
```{r}
setwd('..')
png(filename = "output/health_mean_one.png", width = 1400, height = 1120)

# Define a custom labeller
custom_labeller <- as_labeller(c("0" = "0", "0.1" = "0.1", "1" = "1", "10" = "10", "100" = "100", "1000" = "1000"))

ggplot() + 
  geom_point(data = t_LC50_clean, aes(x = hpe, y = Health), size = 0.1, color = "white", alpha = 0.5) +  # Raw data points for SE calculation and made it white so it blends into the background
  geom_point(data = mean_health_score, aes(x = hpe, y = mean_health, color = as.factor(Ni.Conc.)), size = 4) +  # Mean points with color by Ni.Conc.
  geom_errorbar(data = mean_health_score, aes(x = hpe, ymin = mean_health - se, ymax = mean_health + se, color = as.factor(Ni.Conc.)), width = 3) +  # Error bars with color by Ni.Conc.
  geom_smooth(data = t_LC50_clean, aes(x = hpe, y = Health, color = as.factor(Ni.Conc.), fill = as.factor(Ni.Conc.)), method = "lm", se = TRUE, linewidth = 2) +  # Add linear regression line with SE based on raw data
  scale_color_brewer(palette = "PuOr", labels = custom_labeller) +  # Customize colors and labels for lines
  scale_fill_brewer(palette = "PuOr", labels = custom_labeller) +  # Customize fill for SE
  scale_x_continuous(breaks = c(0, 24, 48, 72, 96), expand = c(0.05, 0.1)) +
  scale_y_continuous(breaks = seq(0, 10, by = 2), limits = c(0,10)) +
  labs(x = "Time (hours)", 
       y = "Health Score",
       color = "Nickel (mg/L)",  # Legend title for lines
       fill = "Nickel (mg/L)"  # Legend title for fill
       ) +
  theme_bw() +
  theme(
    legend.position = "right",
    legend.title = element_text(size = 40),  # Adjust the legend title size
    legend.text = element_text(size = 35),  # Adjust the legend text size
    axis.title = element_text(size = 50),
    axis.text.y = element_text(size = 40, hjust = 1, margin = margin(r = 20)),
    axis.text.x = element_text(size = 40, vjust = 3, hjust = 0.5, margin = margin(t=20))
  )


dev.off()

```

## Exploaratory raw health data over time
```{r}

setwd('..')
png(filename = "output/health_raw.png", width = 1400, height = 1120)
ggplot(t_LC50, aes(x = hpe, y = Health)) + 
  geom_point(size = 3, color = "black") +  # Adjust the size of the points
  facet_wrap(~ Ni.Conc., labeller = as_labeller(c("0" = "0 mg/L", "0.1" = "0.1 mg/L", "1" = "1 mg/L", "10" = "10 mg/L", "100" = "100 mg/L", "1000" = "1000 mg/L"))) +
  scale_x_continuous(breaks = c(0, 24, 48, 72, 96), expand = c(0.05, 0.1)) +
  scale_y_continuous(breaks = seq(0, 10, by = 1), expand = c(0.0,0.0)) +
  ylim(0, 10) +
  geom_smooth(method = "lm", se = TRUE, color = "darkblue", linewidth = 5) +  # Add linear regression line with SE
  labs(x = "Time (hours)", 
       y = "Health Score"
       ) +
  theme_test() +
  theme(
    strip.text = element_text(size = 40),  # Adjust the size of the facet wrap labels
    axis.title = element_text(size = 50),
    axis.text.y = element_text(size = 40, hjust = 1.5, margin = margin(r = 20)),
    axis.text.x = element_text(size = 40, vjust = 3, hjust = 0.5, margin = margin(t=20))
  )
dev.off()
```


## Health over increasing Ni Conc
```{r}
library(scales)

setwd('..')
png(filename = "output/health_by_conc.png", width = 1400, height = 1100)
ggplot() + 
  geom_point(data = t2_LC50, aes(x = Ni.Conc.trans, y = Health), size = 0.01, color = "white", alpha = 0.5) +  # Raw data points for SE calculation
  geom_point(data = mean_health_score_no0hpe, aes(x = Ni.Conc.trans, y = mean_health), size = 4, color = "black") +  # Mean points
  geom_errorbar(data = mean_health_score_no0hpe, aes(x = Ni.Conc.trans, ymin = mean_health - se, ymax = mean_health + se), width = 0.2) +  # Error bars
  facet_wrap(~ hpe, labeller = as_labeller(c("24" = "24 hpe", "48" = "48 hpe", "72" = "72 hpe", "96" = "96 hpe"))) +
  scale_x_log10(breaks = c(0.01, 0.1, 1, 10, 100, 1000),
                labels = c("0", "0.1", "1", "10", "100", "1000")) +  # Refined breaks and labels
  scale_y_continuous(breaks = seq(0, 10, by = 2), expand = c(0.01, 0.01), limits = c(0, 10)) +
  geom_smooth(data = t2_LC50, aes(x = Ni.Conc.trans, y = Health), method = "lm", se = TRUE, color = "darkblue", linewidth = 5) +  # Add linear regression line with SE based on raw data
  labs(x = "Nickel Concentration (mg/L)", 
       y = "Health Score"
       ) +
  theme_test() +
  theme(
    strip.text = element_text(size = 25),  # Adjust the size of the facet wrap labels
    axis.text = element_text(size = 30),   # Adjust the size of the axis labels
    axis.title = element_text(size = 30),
    axis.text.y = element_text(size = 20, hjust = 1.5, margin = margin(r = 20)),
    axis.text.x = element_text(size = 20, vjust = 3, hjust = 0.5, margin = margin(t = 20))
  )
dev.off()
```


# Statistical analysis of health decline differences
If you look at the graphs, it appears that the decline in health over time in accordance with the independent variables may be significantly different. Need to sandbox a way to analyze differences in slopes among trend lines.

## ANCOVA of linear models for health decline
```{r}
# Load necessary libraries
library(dplyr)
library(ggplot2)

# Fit linear models for each concentration
lm_models <- mean_health_score %>% 
  group_by(Ni.Conc.) %>% 
  do(model = lm(mean_health ~ hpe, data = .))

# Summarize the models
model_summaries <- lm_models %>% 
  summarize(
    Ni.Conc. = unique(Ni.Conc.),
    intercept = coef(model)[1],
    slope = coef(model)[2],
    p_value = summary(model)$coefficients[2, 4]
  )

# Print the summaries
print(model_summaries)

#ANCOVA
# Combine all data and fit a single model with interaction to compare slopes or the rate in decline in health over time by concentration of nickel
combined_model <- lm(mean_health ~ hpe * Ni.Conc., data = mean_health_score)
summary(combined_model)

```
## ANOVA?
### ANOVA assumptions for raw health data
```{r}
summary(t_LC50)

fact_LC50 <- t_LC50 %>%
  mutate(hpe = as.factor(hpe)) %>%
  mutate(Ni.Conc. = as.factor(Ni.Conc.))

summary(fact_LC50)

## Meeting the ANOVA assumptions
leveneTest(Health ~ Ni.Conc.*hpe, data = fact_LC50)
#p-value < 0.05, therefore equal-variances are NOT met, WOULD NEED TO TRANSFORM THE DATA

shapiro.test(fact_LC50$Health)
#DOES NOT meets assumption of normality, p value < 0.05

hist(fact_LC50$Health)

qqnorm(fact_LC50$Health)
```

### Transformation to try to make assumptions work for ANOVA raw health data
```{r}
# Transform Health variable
t_LC50$Health_sqrt <- sqrt(t_LC50$Health)

# Check histogram of transformed data
hist(t_LC50$Health_sqrt, main = "Transformed Health (sqrt)", xlab = "Health")

# Check normality of transformed data (optional)
shapiro.test(t_LC50$Health_sqrt)

# Perform Levene's test on transformed data
leveneTest(Health_sqrt ~ Ni.Conc., data = t_LC50)

# Run ANOVA on transformed data
#anova_model <- aov(Health_sqrt ~ Ni.Conc., data = t_LC50)
#summary(anova_model)

```

Still does not meet assumptions. Can't use ANOVA for health data.


## Post Hoc Testing Simple Slopes
This bit needs to be fixed and also further researched. As above there is evidently a signficant difference in the slopes by Ni.Conc.:hpe but at the moment can't say exactly which are different from which.
```{r}
library(emmeans)
# Simple slopes analysis
simple_slopes <- emmeans(combined_model, ~ hpe | Ni.Conc.)
summary(simple_slopes)

pairwise_comparisons <- pairs(simple_slopes)
summary(pairwise_comparisons)

```