---
title: "65 - Exon Coverage"
author: Steven Roberts
date: "`r format(Sys.time(), '%d %B, %Y')`" 
output: 
  github_document:
    toc: true
    toc_depth: 3
    number_sections: true
    html_preview: true
---

```{r setup, include=FALSE}
library(tidyverse)
library(kableExtra)
library(DESeq2)
library(pheatmap)
library(RColorBrewer)
library(data.table)
library(DT)
library(Biostrings)
library(methylKit)
knitr::opts_chunk$set(
  echo = TRUE,         # Display code chunks
  eval = FALSE,         # Evaluate code chunks
  warning = TRUE,     # Hide warnings
  message = TRUE,     # Hide messages
  fig.width = 6,       # Set plot width in inches
  fig.height = 4,      # Set plot height in inches
  fig.align = "center" # Align plots to the center
)
```

Following Li et al. Will take bams and get some data..

Trimmed reads were mapped to the Aiptasia genome using HISAT2 v2.1.0, and mapping coverage per position was extracted using BEDTools v2.17.0. To assess spurious transcription levels, we deter- mined the coverage per exon normalized across all six replicates (assuming every replicate had a coverage of 1 million sequences in total) and then calculated the average coverage ratios of exons 2 to 6 versus exon 1 for every gene

Individual BAMS and their corresponding index files are in each individual sample subirectory:

https://gannet.fish.washington.edu/Atumefaciens/20230821-cvir-stringtie-GCF_002022765.2-isoforms/

A merged BAM of all samples is here (79GB):

https://gannet.fish.washington.edu/Atumefaciens/20230821-cvir-stringtie-GCF_002022765.2-isoforms/20230821_cvir_stringtie_GCF_002022765-sorted-bams-merged.bam

Merged BAM index file (23MB):

https://gannet.fish.washington.edu/Atumefaciens/20230821-cvir-stringtie-GCF_002022765.2-isoforms/20230821_cvir_stringtie_GCF_002022765-sorted-bams-merged.bam.bai

# Define vectors

## Vectors for subsetting samples by different groups
```{r assign-variables}
all <- c("S13M", "S16F", "S19F", "S39F", "S44F", "S52F", "S53F", "S54F", "S64M", "S6M", "S76F", "S7M", "S12M", "S22F", "S23M", "S29F", "S31M", "S35F", "S36F", "S3F", "S41F", "S48M", "S50F", "S59M", "S77F", "S9M")
controls <- c("S13M", "S16F", "S19F", "S39F", "S44F", "S52F", "S53F", "S54F", "S64M", "S6M", "S76F", "S7M")
exposed <- c("S12M", "S22F", "S23M", "S29F", "S31M", "S35F", "S36F", "S3F", "S41F", "S48M", "S50F", "S59M", "S77F", "S9M")
females <- c("S16F", "S19F", "S39F", "S44F", "S52F", "S53F", "S54F", "S76F", "S22F", "S29F", "S35F", "S36F", "S3F", "S41F", "S50F", "S77F")
males <- c("S12M", "S23M", "S31M", "S48M", "S59M", "S9M","S13M", "S64M", "S6M", "S7M")
controls_males <- c("S13M", "S64M", "S6M", "S7M")
exposed_males <- c("S12M", "S23M", "S31M", "S48M", "S59M", "S9M")
controls_females <- c("S16F", "S19F", "S39F", "S44F", "S52F", "S53F", "S54F", "S76F")
exposed_females <- c("S22F", "S29F", "S35F", "S36F", "S3F", "S41F", "S50F", "S77F")
```

## Set exon coverage sum threshold

Used for sum of reads over Exons 1 - 6 per gene.

Value of [10 was decided in this GitHub Issue](https://github.com/sr320/ceabigr/issues/88#issuecomment-1908563777).

```{r exon-sum-threshold}
exon_sum_threshold <- 10
```

# Define functions

##  Function to calculate log fold change
```{r function-fold-change}
calculate_log_fold_change <- function(data) {
  result <- data %>%
    dplyr::select(GeneID, Exon1, Exon2, Exon3, Exon4, Exon5, Exon6) %>%
    mutate(fold2 = log(Exon2 / Exon1)) %>%
    mutate(fold3 = log(Exon3 / Exon1)) %>%
    mutate(fold4 = log(Exon4 / Exon1)) %>%
    mutate(fold5 = log(Exon5 / Exon1)) %>%
    mutate(fold6 = log(Exon6 / Exon1))
  
  return(result)
}
```

```{r fold-change}
calculate_fold_change <- function(data) {
  result <- data %>%
    dplyr::select(GeneID, Exon1, Exon2, Exon3, Exon4, Exon5, Exon6) %>%
    mutate(fold2 = (Exon2 / Exon1)) %>%
    mutate(fold3 = (Exon3 / Exon1)) %>%
    mutate(fold4 = (Exon4 / Exon1)) %>%
    mutate(fold5 = (Exon5 / Exon1)) %>%
    mutate(fold6 = (Exon6 / Exon1))
  
  return(result)
}
```


# Download BAMs

```{r, engine='bash'}
wget -r \
--no-directories --no-parent \
-P ../data/big/ \
-A "*sorted.bam" https://gannet.fish.washington.edu/Atumefaciens/20230821-cvir-stringtie-GCF_002022765.2-isoforms/
```

```{r, engine='bash'}
wget -r \
--no-directories --no-parent \
-P ../data/big/ \
-A "*sorted.bam.bai" https://gannet.fish.washington.edu/Atumefaciens/20230821-cvir-stringtie-GCF_002022765.2-isoforms/
```


# bedtools coverage

```{r, engine='bash'}
/home/shared/bedtools2/bin/bedtools coverage \
-a ../genome-features/GCF_002022765.2_C_virginica-3.0_genomic.gff \
-b ../data/big/S9M.sorted.bam \
> ../output/65-exon-coverage/S9gff_exon_coverage.txt
```

```{r, engine='bash', eval=TRUE}
head ../genome-features/GCF_002022765.2_C_virginica-3.0_genomic.gff
```

```{bash}
cd ../data

/home/shared/datasets download genome accession GCF_002022765.2 --include gff3,gtf
```


```{r, engine='bash', eval=TRUE}
head /home/shared/8TB_HDD_01/sr320/github/ceabigr/data/ncbi_dataset/data/GCF_002022765.2/genomic.gtf

```
```{bash}
cat ../data/ncbi_dataset/data/GCF_002022765.2/genomic.gtf | grep "Gnomon	exon" | head
```

```{bash}
cat ../data/ncbi_dataset/data/GCF_002022765.2/genomic.gtf | grep "Gnomon	exon" > ../genome-features/GCF_002022765.2_exon.gtf

```


```{r, engine='bash'}
cd    ../genome-features
curl -O https://eagle.fish.washington.edu/Cvirg_tracks/C_virginica-3.0_Gnomon_exon.bed
```


```{r, engine='bash', eval=TRUE}
head ../genome-features/C_virginica-3.0_Gnomon_exon.bed
```


Lets get bam that overlaps with exons

`bedtools intersect -a sorted.bam -b exons.gtf > exon_reads.bam`


`bedtools multicov -bams exon_reads.bam -bed exons.gtf > exon_expression_counts.txt`

```{r, engine='bash'}
/home/shared/bedtools2/bin/bedtools multicov \
-bams ../data/big/S77F.sorted.bam \
-bed ../genome-features/GCF_002022765.2_exon.gtf \
> ../output/65-exon-coverage/S77_exon_exp_counts.txt
```

```{r, engine='bash'}
tail -20 ../output/65-exon-coverage/S77_exon_exp_counts.txt
```


```{r, engine='bash'}
/home/shared/bedtools2/bin/bedtools coverage \
-a ../genome-features/GCF_902806645.1_cgigas_uk_roslin_v1_genomic-mito.gtf \
-b ../data/big/S9M.sorted.bam \
> ../output/65-exon-coverage/S9gtf_exon_coverage.txt
```

```{r, engine='bash', eval=TRUE}
head -1 ../output/65-exon-coverage/S9gtf_exon_coverage.txt
```

loops

bedtools intersect -a sorted.bam -b exons.gtf > exon_reads.bam

```{r, engine='bash'}
for bamfile in ../data/big/*sorted.bam; do
    # Calculate coverage using BEDTools
    echo ${bamfile%.bam}
done
```

```{r, engine='bash'}
for bamfile in ../data/big/*sorted.bam; do
    # Extract the base name of the file without the path and extension
    base_name=$(basename "$bamfile" .sorted.bam)

    # Define the output file name
    output_file="${base_name}_exon_reads.bam"

    # Calculate coverage using BEDTools
    /home/shared/bedtools2/bin/bedtools intersect -a "$bamfile" -b ../genome-features/GCF_002022765.2_exon.gtf > ../output/65-exon-coverage/$output_file
done
```

```{bash}
find ../data/big/ -name "*sorted.bam" | xargs -I {} -P 20 bash -c '
    bamfile="{}"
    base_name=$(basename "$bamfile" .sorted.bam)
    output_file="../output/65-exon-coverage/${base_name}_exon_reads.bam"
    /home/shared/bedtools2/bin/bedtools intersect -a "$bamfile" -b ../genome-features/GCF_002022765.2_exon.gtf > "$output_file"
'

```

```{bash}
for bamfile in ../output/65-exon-coverage/*.bam; do
    /home/shared/samtools-1.12/samtools index -@ 40 "$bamfile"
done
```


```{bash}
find ../output/65-exon-coverage/ -name "*exon_reads.bam" | xargs -I {} -P 40 bash -c '
    bamfile="{}"
    base_name=$(basename "$bamfile" .exon_reads.bam)
    output_file="../output/65-exon-coverage/${base_name}_multicov.txt"
    /home/shared/bedtools2/bin/bedtools multicov -bams "$bamfile" -bed ../genome-features/GCF_002022765.2_exon.gtf > "$output_file"
'

```


```{r, engine='bash'}
head ../output/65-exon-coverage/*extracted_data.txt | head -25

```


```{r}

S9data <- read.table("../output/65-exon-coverage/S9M_exon_reads.extracted_data.txt", header = TRUE)
```

```{r}

S9data <- read.table("../output/65-exon-coverage/S9M_exon_reads.extracted_data.txt", header = TRUE)

summarized_data <- S9data %>%
  group_by(GeneID, ExonNumber) %>%
  summarize(CoverageValue = mean(CoverageValue))

# Reshape the data using pivot_wider
reshaped_data <- summarized_data %>%
  pivot_wider(names_from = ExonNumber, values_from = CoverageValue, names_prefix = "Exon")

```

```{r}
library(tidyverse)

# Set the directory where your files are located
directory_path <- "../output/65-exon-coverage"

# List all files ending with 'extracted_data.txt'
files <- list.files(directory_path, pattern = "extracted_data\\.txt$", full.names = TRUE)

# Function to read, process, and write a single file
process_and_write_file <- function(file_path) {
  data <- read.table(file_path, header = TRUE)
  
  summarized_data <- data %>%
    group_by(GeneID, ExonNumber) %>%
    summarize(CoverageValue = mean(CoverageValue), .groups = 'drop')

  reshaped_data <- summarized_data %>%
    pivot_wider(names_from = ExonNumber, values_from = CoverageValue, names_prefix = "Exon")

  # Generate a new file name based on the original file name
  output_file_name <- gsub("extracted_data\\.txt$", "processed_data.csv", basename(file_path))
  output_file_path <- file.path(directory_path, output_file_name)

  # Write the reshaped data to a new file
  write_csv(reshaped_data, output_file_path)
}

# Apply the function to each file
lapply(files, process_and_write_file)

```


```{r, engine='bash'}
head ../output/65-exon-coverage/S31M*process*
```
```{bash}
head ../output/65-exon-coverage/S9M_exon_reads.processed_data.csv

```

# TESTING
```{r}
S9Mdata <- read.csv("../output/65-exon-coverage/S9M_exon_reads.processed_data.csv")
```

## Natural log fold change calculations
```{r}
test <- S9Mdata %>%
  dplyr::select(GeneID,Exon1,Exon2,Exon3,Exon4,Exon5,Exon6) %>%
  mutate(fold2 = log(Exon2 / Exon1)) %>%
  mutate(fold3 = log(Exon3 / Exon1)) %>%
  mutate(fold4 = log(Exon4 / Exon1)) %>%
  mutate(fold5 = log(Exon5 / Exon1)) %>%
  mutate(fold6 = log(Exon6 / Exon1))
```

```{r}
head(test)
```
```{r}
data_long <- test %>%
  pivot_longer(cols = starts_with("fold"), names_to = "fold", values_to = "Value") %>%
  mutate(fold = as.factor(fold))  # Convert column to a factor

head(data_long)
```

```{r}
data_long$Value[data_long$Value == "-Inf" | data_long$Value == "Inf"] <- NA

```

## Calculate mean, SD, SE
```{r}
summary_data <- data_long %>%
  group_by(fold) %>%
  summarise(
    Mean = mean(Value, na.rm = TRUE),
    SE = sd(Value, na.rm = TRUE) / sqrt(sum(!is.na(Value)))
  )
```

```{r}
class(summary_data$Mean)
```

```{r}
ggplot(summary_data, aes(x = fold, y = Mean)) +
  geom_point() +
  geom_errorbar(aes(ymin = Mean - SE, ymax = Mean + SE), width = 0.2) +
  labs(
    x = "Exon",
    y = "Mean Value",
    title = "Mean Values with Standard Error by fold"
  ) +
  theme_minimal()

```


::: {.callout-warning}
Be sure to load the [vector groups chunk](#vectors-for-subsetting-samples-by-different-groups)

and

the [functions chunk](function-to-calculate-log-fold-change) before running!
:::

## Test exon sum threshold

### Sums of Exons 1 - 6
```{r test-exon-sums}
test_sum <- S9Mdata %>%
  dplyr::select(GeneID,Exon1,Exon2,Exon3,Exon4,Exon5,Exon6)

test_sum$Sum <- rowSums(test_sum[, -1], na.rm = TRUE)

str(test_sum)
```

### Gene counts with exon coverage threshold
```{r test-check-gene-counts-exon-sums}

# Count the occurrences of values in test_sum$Sum
sum_counts <- table(ifelse(test_sum$Sum <= exon_sum_threshold,
                           paste0("<= ", exon_sum_threshold),
                           paste0("> ", exon_sum_threshold)))

# Display the counts
print(sum_counts)
```


## Plot raw expression
```{r}
# Replace NA values with a placeholder value (e.g., 0)
cleaned_data <- test_sum %>%
  mutate(across(-Sum, ~ ifelse(is.na(.), 0, .)))

# Reshape the data to long format
long_data <- tidyr::pivot_longer(cleaned_data, cols = starts_with("Exon"), names_to = "Exon", values_to = "Values")

# Plotting a dot plot (scatter plot)
ggplot(long_data, aes(x = factor(Exon), y = Values)) +
  geom_point(position = position_jitter(width = 0.3, height = 0), alpha = 0.15) +
  scale_y_log10() +
  labs(x = "Exon", y = "Read Counts (log10)") +
  theme_minimal()

```

## Save plot
```{r}
ggsave("../output/65-exon-coverage/figures/raw_exon_coverage-scatter.png", plot = last_plot(), dpi = 300, bg = "white")
```

# Filter genes with exon threshold

## Sum of Exons 1-6 for each file.
```{r}
# Initialize an empty list to store results for each file
sums_list <- list()

# Loop through each file in the list
for (file in all) {
  # Read in the data file
  data <- read.csv(paste0("../output/65-exon-coverage/", file, "_exon_reads.processed_data.csv"))

  # Calculate the sum of Exons 1-6
  sum_result <- data %>%
    dplyr::select(GeneID, Exon1, Exon2, Exon3, Exon4, Exon5, Exon6) %>%
    mutate(Sum = rowSums(.[, -1], na.rm = TRUE))

  # Count GeneIDs meeting the threshold for the current file
  geneid_count <- sum_result %>%
    dplyr::filter(Sum >= exon_sum_threshold) %>%
    nrow()
  cat("\nCount of GeneIDs meeting the threshold (", exon_sum_threshold, "summed reads ) in", file, ":", geneid_count, "\n")

  # Append the result and count to the lists
  sums_list[[file]] <- sum_result
}
```
## Filter across all samples

Join all data frames in the `sums_list` on `GeneID` and
then filter rows for any `GeneID` with sums >= set threshold.

```{r}
# Rename the Sum column in each data frame in sums_list
# Assigns the original filename to the Sum column in each data frame
renamed_sums_list <- lapply(seq_along(sums_list), function(i) {
  colname <- paste0(all[i], "_Sum")
  col_index <- grep("Sum", colnames(sums_list[[i]]))
  colnames(sums_list[[i]])[col_index] <- colname
  dplyr::select(sums_list[[i]], GeneID, colname)
})

# Join data frames in renamed_sums_list on GeneID
all_geneids_df <- Reduce(function(x, y) dplyr::full_join(x, y, by = "GeneID"), renamed_sums_list)

# Filter rows where all values are >= the set threshold
exon_threshold_geneids <- all_geneids_df %>%
  dplyr::filter(if_all(ends_with("_Sum"), ~. >= exon_sum_threshold)) %>%
  dplyr::select(GeneID)

# Print count of GeneIDs meeting the threshold across all files
cat("Final count of unique GeneIDs meeting the threshold (", exon_sum_threshold, "summed reads ) across all files:", nrow(exon_threshold_geneids), "\n")

# Print the structure of the final data frame
str(exon_threshold_geneids)
```

### Write filtered GeneIDs to file
```{r}
exon_threshold_out_file <- paste0("../output/65-exon-coverage/geneids-exon_threshold-", exon_sum_threshold, ".csv", collapse = "")
write.csv(exon_threshold_geneids, file = exon_threshold_out_file)
```

# Calculate fold change for all groups

Uses `GeneIDs` passing threshold determined above.
```{r}
# Create a list to store results for each group
results_list <- list()

# Loop through each group
for (group_name in c("all", "controls", "exposed", "controls_males", "exposed_males", "controls_females", "exposed_females", "females", "males")) {
  # Get the corresponding vector
  current_vector <- eval(parse(text = group_name))
  
  # Create an empty list to store results for each file in the group
  group_results <- list()
  
  # Loop through each file in the group
  for (file_id in current_vector) {
    # Read the data frame
    current_data <- read.csv(paste0("../output/65-exon-coverage/", file_id, "_exon_reads.processed_data.csv"))
    
    # Filter current_data based on exon_threshold_geneids
    current_data <- subset(current_data, GeneID %in% exon_threshold_geneids$GeneID)
    
    # Check if any rows are left after filtering
    if (nrow(current_data) > 0) {
      # Calculate  fold change
      current_result <- calculate_fold_change(current_data)
      
      # Replace infinite values within the data frame
      current_result <- as.data.frame(lapply(current_result, function(x) ifelse(is.infinite(x), NA, x)))
      
      # Ensure unique column names
      colnames(current_result) <- make.unique(as.character(colnames(current_result)), sep = "_")
      
        # write out
  
  fc <- paste0("../output/65-exon-coverage/", file_id, "_exonsum_", exon_sum_threshold, "_fold.csv")
  write.csv(current_result, file = fc)
  
  
      # Store the result
      group_results[[file_id]] <- current_result
    } else {
      # If no rows are left after filtering, you might want to handle this case accordingly
      print(paste("No matching GeneIDs found in file:", file_id))
    }
  }

  # Bind the data frames together
  mean_fold_change <- do.call(rbind, group_results)


  # Convert the data frame to a tibble to support summarise
  mean_fold_change <- as_tibble(mean_fold_change)
  
  # Reshape the data to long format
  data_long <- mean_fold_change %>%
    pivot_longer(cols = starts_with("fold"), names_to = "fold", values_to = "Value") %>%
    mutate(fold = as.factor(fold))  # Convert column to a factor
  
  # Calculate summary statistics
  summary_data <- data_long %>%
    group_by(fold) %>%
    summarise(
      Mean = mean(Value, na.rm = TRUE),
      SE = sd(Value, na.rm = TRUE) / sqrt(sum(!is.na(Value)))
    )
  
  # Store the summary data in the main results list
  results_list[[group_name]] <- summary_data
}

```

```{r}
mmf <- read.csv("../output/65-exon-coverage/malesmeanfold.csv")
```


```{bash}
head ../output/65-exon-coverage/*fold.csv
wc -l ../output/65-exon-coverage/*fold.csv

```


## Access results for a specific group (e.g., "all")
```{r}
all_group_result <- results_list[["all"]]
```

# Plotting

## Controls vs. Exposed
```{r}
# Recreate combined_results
combined_results <- bind_rows(
  mutate(results_list$controls, group = "controls"),
  mutate(results_list$exposed, group = "exposed")
)

# Convert 'fold' to a factor
combined_results$fold <- factor(combined_results$fold, levels = c("fold2", "fold3", "fold4", "fold5", "fold6"))

# Plotting code
library(ggplot2)

ggplot(combined_results, aes(x = fold, y = Mean, group = group, color = group)) +
  geom_point(position = position_dodge(width = 0.0), size = 1) +
  geom_line(position = position_dodge(width = 0.0), size = 0.5) +
  geom_errorbar(aes(ymin = Mean - SE, ymax = Mean + SE),
                position = position_dodge(width = 0.0),
                width = 0.25, color = "black") +
  labs(
       x = "Exon",
       y = "Mean ln(FC vs. Exon 1)") +
  theme_minimal() +
  scale_x_discrete(labels = c("fold2" = "Exon 2", "fold3" = "Exon 3", "fold4" = "Exon 4", "fold5" = "Exon 5", "fold6" = "Exon 6"))

```
### Save plot
```{r}
ggsave("../output/65-exon-coverage/figures/controls.v.exposed-line_plot.png", plot = last_plot(), dpi = 300, bg = "white")
```

## Females vs. Males
```{r}
# Recreate combined_results
combined_results <- bind_rows(
  mutate(results_list$females, group = "females"),
  mutate(results_list$males, group = "males")
)

# Convert 'fold' to a factor
combined_results$fold <- factor(combined_results$fold, levels = c("fold2", "fold3", "fold4", "fold5", "fold6"))

# Plotting code
library(ggplot2)

ggplot(combined_results, aes(x = fold, y = Mean, group = group, color = group)) +
  geom_point(position = position_dodge(width = 0.0), size = 1) +
  geom_line(position = position_dodge(width = 0.0), size = 0.5) +
  geom_errorbar(aes(ymin = Mean - SE, ymax = Mean + SE),
                position = position_dodge(width = 0.0),
                width = 0.25, color = "black") +
  labs(
       x = "Exon",
       y = "Mean ln(FC vs. Exon 1)") +
  theme_minimal() +
  scale_x_discrete(labels = c("fold2" = "Exon 2", "fold3" = "Exon 3", "fold4" = "Exon 4", "fold5" = "Exon 5", "fold6" = "Exon 6"))
```

### Save plot
```{r}
ggsave("../output/65-exon-coverage/figures/females.v.males-line_plot.png", plot = last_plot(), dpi = 300, bg = "white")
```

## Control Females vs. Control Males
```{r}
# Recreate combined_results
combined_results <- bind_rows(
  mutate(results_list$controls_females, group = "control females"),
  mutate(results_list$controls_males, group = "control males")
)

# Convert 'fold' to a factor
combined_results$fold <- factor(combined_results$fold, levels = c("fold2", "fold3", "fold4", "fold5", "fold6"))

# Plotting code
library(ggplot2)

ggplot(combined_results, aes(x = fold, y = Mean, group = group, color = group)) +
  geom_point(position = position_dodge(width = 0.0), size = 1) +
  geom_line(position = position_dodge(width = 0.0), size = 0.5) +
  geom_errorbar(aes(ymin = Mean - SE, ymax = Mean + SE),
                position = position_dodge(width = 0.0),
                width = 0.25, color = "black") +
  labs(
       x = "Exon",
       y = "Mean ln(FC vs. Exon 1)") +
  theme_minimal() +
  scale_x_discrete(labels = c("fold2" = "Exon 2", "fold3" = "Exon 3", "fold4" = "Exon 4", "fold5" = "Exon 5", "fold6" = "Exon 6"))
```

### Save plot
```{r}
ggsave("../output/65-exon-coverage/figures/control-females.v.control-males-line_plot.png", plot = last_plot(), dpi = 300, bg = "white")
```

## Exposed Females vs. Exposed Males
```{r}
# Recreate combined_results
combined_results <- bind_rows(
  mutate(results_list$exposed_females, group = "exposed females"),
  mutate(results_list$exposed_males, group = "exposed males")
)

# Convert 'fold' to a factor
combined_results$fold <- factor(combined_results$fold, levels = c("fold2", "fold3", "fold4", "fold5", "fold6"))

# Plotting code
library(ggplot2)

ggplot(combined_results, aes(x = fold, y = Mean, group = group, color = group)) +
  geom_point(position = position_dodge(width = 0.0), size = 1) +
  geom_line(position = position_dodge(width = 0.0), size = 0.5) +
  geom_errorbar(aes(ymin = Mean - SE, ymax = Mean + SE),
                position = position_dodge(width = 0.0),
                width = 0.25, color = "black") +
  labs(
       x = "Exon",
       y = "Mean ln(FC vs. Exon 1)") +
  theme_minimal() +
  scale_x_discrete(labels = c("fold2" = "Exon 2", "fold3" = "Exon 3", "fold4" = "Exon 4", "fold5" = "Exon 5", "fold6" = "Exon 6"))
```

### Save plot
```{r}
ggsave("../output/65-exon-coverage/figures/exposed-females.v.exposed-males-line_plot.png", plot = last_plot(), dpi = 300, bg = "white")
```

## Control Females vs. Exposed Females
```{r}
# Recreate combined_results
combined_results <- bind_rows(
  mutate(results_list$controls_females, group = "control females"),
  mutate(results_list$exposed_females, group = "exposed females")
)

# Convert 'fold' to a factor
combined_results$fold <- factor(combined_results$fold, levels = c("fold2", "fold3", "fold4", "fold5", "fold6"))

# Plotting code
library(ggplot2)

ggplot(combined_results, aes(x = fold, y = Mean, group = group, color = group)) +
  geom_point(position = position_dodge(width = 0.0), size = 1) +
  geom_line(position = position_dodge(width = 0.0), size = 0.5) +
  geom_errorbar(aes(ymin = Mean - SE, ymax = Mean + SE),
                position = position_dodge(width = 0.0),
                width = 0.25, color = "black") +
  labs(
       x = "Exon",
       y = "Mean ln(FC vs. Exon 1)") +
  theme_minimal() +
  scale_x_discrete(labels = c("fold2" = "Exon 2", "fold3" = "Exon 3", "fold4" = "Exon 4", "fold5" = "Exon 5", "fold6" = "Exon 6"))
```

### Save plot
```{r}
ggsave("../output/65-exon-coverage/figures/control-females.v.exposed-females-line_plot.png", plot = last_plot(), dpi = 300, bg = "white")
```

## Control Males vs. Exposed Males
```{r}
# Recreate combined_results
combined_results <- bind_rows(
  mutate(results_list$controls_males, group = "control males"),
  mutate(results_list$exposed_males, group = "exposed males")
)

# Convert 'fold' to a factor
combined_results$fold <- factor(combined_results$fold, levels = c("fold2", "fold3", "fold4", "fold5", "fold6"))

# Plotting code
library(ggplot2)

ggplot(combined_results, aes(x = fold, y = Mean, group = group, color = group)) +
  geom_point(position = position_dodge(width = 0.0), size = 1) +
  geom_line(position = position_dodge(width = 0.0), size = 0.5) +
  geom_errorbar(aes(ymin = Mean - SE, ymax = Mean + SE),
                position = position_dodge(width = 0.0),
                width = 0.25, color = "black") +
  labs(
       x = "Exon",
       y = "Mean ln(FC vs. Exon 1)") +
  theme_minimal() +
  scale_x_discrete(labels = c("fold2" = "Exon 2", "fold3" = "Exon 3", "fold4" = "Exon 4", "fold5" = "Exon 5", "fold6" = "Exon 6"))
```

### Save plot
```{r}
ggsave("../output/65-exon-coverage/figures/control-males.v.exposed-males-line_plot.png", plot = last_plot(), dpi = 300, bg = "white")
```