--- title: "65 - Exon Coverage" author: Steven Roberts date: "`r format(Sys.time(), '%d %B, %Y')`" output: github_document: toc: true toc_depth: 3 number_sections: true html_preview: true --- ```{r setup, include=FALSE} library(tidyverse) library(kableExtra) library(DESeq2) library(pheatmap) library(RColorBrewer) library(data.table) library(DT) library(Biostrings) library(methylKit) knitr::opts_chunk$set( echo = TRUE, # Display code chunks eval = FALSE, # Evaluate code chunks warning = TRUE, # Hide warnings message = TRUE, # Hide messages fig.width = 6, # Set plot width in inches fig.height = 4, # Set plot height in inches fig.align = "center" # Align plots to the center ) ``` Following Li et al. Will take bams and get some data.. Trimmed reads were mapped to the Aiptasia genome using HISAT2 v2.1.0, and mapping coverage per position was extracted using BEDTools v2.17.0. To assess spurious transcription levels, we deter- mined the coverage per exon normalized across all six replicates (assuming every replicate had a coverage of 1 million sequences in total) and then calculated the average coverage ratios of exons 2 to 6 versus exon 1 for every gene Individual BAMS and their corresponding index files are in each individual sample subirectory: https://gannet.fish.washington.edu/Atumefaciens/20230821-cvir-stringtie-GCF_002022765.2-isoforms/ A merged BAM of all samples is here (79GB): https://gannet.fish.washington.edu/Atumefaciens/20230821-cvir-stringtie-GCF_002022765.2-isoforms/20230821_cvir_stringtie_GCF_002022765-sorted-bams-merged.bam Merged BAM index file (23MB): https://gannet.fish.washington.edu/Atumefaciens/20230821-cvir-stringtie-GCF_002022765.2-isoforms/20230821_cvir_stringtie_GCF_002022765-sorted-bams-merged.bam.bai # Define vectors ## Vectors for subsetting samples by different groups ```{r assign-variables} all <- c("S13M", "S16F", "S19F", "S39F", "S44F", "S52F", "S53F", "S54F", "S64M", "S6M", "S76F", "S7M", "S12M", "S22F", "S23M", "S29F", "S31M", "S35F", "S36F", "S3F", "S41F", "S48M", "S50F", "S59M", "S77F", "S9M") controls <- c("S13M", "S16F", "S19F", "S39F", "S44F", "S52F", "S53F", "S54F", "S64M", "S6M", "S76F", "S7M") exposed <- c("S12M", "S22F", "S23M", "S29F", "S31M", "S35F", "S36F", "S3F", "S41F", "S48M", "S50F", "S59M", "S77F", "S9M") females <- c("S16F", "S19F", "S39F", "S44F", "S52F", "S53F", "S54F", "S76F", "S22F", "S29F", "S35F", "S36F", "S3F", "S41F", "S50F", "S77F") males <- c("S12M", "S23M", "S31M", "S48M", "S59M", "S9M","S13M", "S64M", "S6M", "S7M") controls_males <- c("S13M", "S64M", "S6M", "S7M") exposed_males <- c("S12M", "S23M", "S31M", "S48M", "S59M", "S9M") controls_females <- c("S16F", "S19F", "S39F", "S44F", "S52F", "S53F", "S54F", "S76F") exposed_females <- c("S22F", "S29F", "S35F", "S36F", "S3F", "S41F", "S50F", "S77F") ``` ## Set exon coverage sum threshold Used for sum of reads over Exons 1 - 6 per gene. Value of [10 was decided in this GitHub Issue](https://github.com/sr320/ceabigr/issues/88#issuecomment-1908563777). ```{r exon-sum-threshold} exon_sum_threshold <- 10 ``` # Define functions ## Function to calculate log fold change ```{r function-fold-change} calculate_log_fold_change <- function(data) { result <- data %>% dplyr::select(GeneID, Exon1, Exon2, Exon3, Exon4, Exon5, Exon6) %>% mutate(fold2 = log(Exon2 / Exon1)) %>% mutate(fold3 = log(Exon3 / Exon1)) %>% mutate(fold4 = log(Exon4 / Exon1)) %>% mutate(fold5 = log(Exon5 / Exon1)) %>% mutate(fold6 = log(Exon6 / Exon1)) return(result) } ``` ```{r fold-change} calculate_fold_change <- function(data) { result <- data %>% dplyr::select(GeneID, Exon1, Exon2, Exon3, Exon4, Exon5, Exon6) %>% mutate(fold2 = (Exon2 / Exon1)) %>% mutate(fold3 = (Exon3 / Exon1)) %>% mutate(fold4 = (Exon4 / Exon1)) %>% mutate(fold5 = (Exon5 / Exon1)) %>% mutate(fold6 = (Exon6 / Exon1)) return(result) } ``` # Download BAMs ```{r, engine='bash'} wget -r \ --no-directories --no-parent \ -P ../data/big/ \ -A "*sorted.bam" https://gannet.fish.washington.edu/Atumefaciens/20230821-cvir-stringtie-GCF_002022765.2-isoforms/ ``` ```{r, engine='bash'} wget -r \ --no-directories --no-parent \ -P ../data/big/ \ -A "*sorted.bam.bai" https://gannet.fish.washington.edu/Atumefaciens/20230821-cvir-stringtie-GCF_002022765.2-isoforms/ ``` # bedtools coverage ```{r, engine='bash'} /home/shared/bedtools2/bin/bedtools coverage \ -a ../genome-features/GCF_002022765.2_C_virginica-3.0_genomic.gff \ -b ../data/big/S9M.sorted.bam \ > ../output/65-exon-coverage/S9gff_exon_coverage.txt ``` ```{r, engine='bash', eval=TRUE} head ../genome-features/GCF_002022765.2_C_virginica-3.0_genomic.gff ``` ```{bash} cd ../data /home/shared/datasets download genome accession GCF_002022765.2 --include gff3,gtf ``` ```{r, engine='bash', eval=TRUE} head /home/shared/8TB_HDD_01/sr320/github/ceabigr/data/ncbi_dataset/data/GCF_002022765.2/genomic.gtf ``` ```{bash} cat ../data/ncbi_dataset/data/GCF_002022765.2/genomic.gtf | grep "Gnomon exon" | head ``` ```{bash} cat ../data/ncbi_dataset/data/GCF_002022765.2/genomic.gtf | grep "Gnomon exon" > ../genome-features/GCF_002022765.2_exon.gtf ``` ```{r, engine='bash'} cd ../genome-features curl -O https://eagle.fish.washington.edu/Cvirg_tracks/C_virginica-3.0_Gnomon_exon.bed ``` ```{r, engine='bash', eval=TRUE} head ../genome-features/C_virginica-3.0_Gnomon_exon.bed ``` Lets get bam that overlaps with exons `bedtools intersect -a sorted.bam -b exons.gtf > exon_reads.bam` `bedtools multicov -bams exon_reads.bam -bed exons.gtf > exon_expression_counts.txt` ```{r, engine='bash'} /home/shared/bedtools2/bin/bedtools multicov \ -bams ../data/big/S77F.sorted.bam \ -bed ../genome-features/GCF_002022765.2_exon.gtf \ > ../output/65-exon-coverage/S77_exon_exp_counts.txt ``` ```{r, engine='bash'} tail -20 ../output/65-exon-coverage/S77_exon_exp_counts.txt ``` ```{r, engine='bash'} /home/shared/bedtools2/bin/bedtools coverage \ -a ../genome-features/GCF_902806645.1_cgigas_uk_roslin_v1_genomic-mito.gtf \ -b ../data/big/S9M.sorted.bam \ > ../output/65-exon-coverage/S9gtf_exon_coverage.txt ``` ```{r, engine='bash', eval=TRUE} head -1 ../output/65-exon-coverage/S9gtf_exon_coverage.txt ``` loops bedtools intersect -a sorted.bam -b exons.gtf > exon_reads.bam ```{r, engine='bash'} for bamfile in ../data/big/*sorted.bam; do # Calculate coverage using BEDTools echo ${bamfile%.bam} done ``` ```{r, engine='bash'} for bamfile in ../data/big/*sorted.bam; do # Extract the base name of the file without the path and extension base_name=$(basename "$bamfile" .sorted.bam) # Define the output file name output_file="${base_name}_exon_reads.bam" # Calculate coverage using BEDTools /home/shared/bedtools2/bin/bedtools intersect -a "$bamfile" -b ../genome-features/GCF_002022765.2_exon.gtf > ../output/65-exon-coverage/$output_file done ``` ```{bash} find ../data/big/ -name "*sorted.bam" | xargs -I {} -P 20 bash -c ' bamfile="{}" base_name=$(basename "$bamfile" .sorted.bam) output_file="../output/65-exon-coverage/${base_name}_exon_reads.bam" /home/shared/bedtools2/bin/bedtools intersect -a "$bamfile" -b ../genome-features/GCF_002022765.2_exon.gtf > "$output_file" ' ``` ```{bash} for bamfile in ../output/65-exon-coverage/*.bam; do /home/shared/samtools-1.12/samtools index -@ 40 "$bamfile" done ``` ```{bash} find ../output/65-exon-coverage/ -name "*exon_reads.bam" | xargs -I {} -P 40 bash -c ' bamfile="{}" base_name=$(basename "$bamfile" .exon_reads.bam) output_file="../output/65-exon-coverage/${base_name}_multicov.txt" /home/shared/bedtools2/bin/bedtools multicov -bams "$bamfile" -bed ../genome-features/GCF_002022765.2_exon.gtf > "$output_file" ' ``` ```{r, engine='bash'} head ../output/65-exon-coverage/*extracted_data.txt | head -25 ``` ```{r} S9data <- read.table("../output/65-exon-coverage/S9M_exon_reads.extracted_data.txt", header = TRUE) ``` ```{r} S9data <- read.table("../output/65-exon-coverage/S9M_exon_reads.extracted_data.txt", header = TRUE) summarized_data <- S9data %>% group_by(GeneID, ExonNumber) %>% summarize(CoverageValue = mean(CoverageValue)) # Reshape the data using pivot_wider reshaped_data <- summarized_data %>% pivot_wider(names_from = ExonNumber, values_from = CoverageValue, names_prefix = "Exon") ``` ```{r} library(tidyverse) # Set the directory where your files are located directory_path <- "../output/65-exon-coverage" # List all files ending with 'extracted_data.txt' files <- list.files(directory_path, pattern = "extracted_data\\.txt$", full.names = TRUE) # Function to read, process, and write a single file process_and_write_file <- function(file_path) { data <- read.table(file_path, header = TRUE) summarized_data <- data %>% group_by(GeneID, ExonNumber) %>% summarize(CoverageValue = mean(CoverageValue), .groups = 'drop') reshaped_data <- summarized_data %>% pivot_wider(names_from = ExonNumber, values_from = CoverageValue, names_prefix = "Exon") # Generate a new file name based on the original file name output_file_name <- gsub("extracted_data\\.txt$", "processed_data.csv", basename(file_path)) output_file_path <- file.path(directory_path, output_file_name) # Write the reshaped data to a new file write_csv(reshaped_data, output_file_path) } # Apply the function to each file lapply(files, process_and_write_file) ``` ```{r, engine='bash'} head ../output/65-exon-coverage/S31M*process* ``` ```{bash} head ../output/65-exon-coverage/S9M_exon_reads.processed_data.csv ``` # TESTING ```{r} S9Mdata <- read.csv("../output/65-exon-coverage/S9M_exon_reads.processed_data.csv") ``` ## Natural log fold change calculations ```{r} test <- S9Mdata %>% dplyr::select(GeneID,Exon1,Exon2,Exon3,Exon4,Exon5,Exon6) %>% mutate(fold2 = log(Exon2 / Exon1)) %>% mutate(fold3 = log(Exon3 / Exon1)) %>% mutate(fold4 = log(Exon4 / Exon1)) %>% mutate(fold5 = log(Exon5 / Exon1)) %>% mutate(fold6 = log(Exon6 / Exon1)) ``` ```{r} head(test) ``` ```{r} data_long <- test %>% pivot_longer(cols = starts_with("fold"), names_to = "fold", values_to = "Value") %>% mutate(fold = as.factor(fold)) # Convert column to a factor head(data_long) ``` ```{r} data_long$Value[data_long$Value == "-Inf" | data_long$Value == "Inf"] <- NA ``` ## Calculate mean, SD, SE ```{r} summary_data <- data_long %>% group_by(fold) %>% summarise( Mean = mean(Value, na.rm = TRUE), SE = sd(Value, na.rm = TRUE) / sqrt(sum(!is.na(Value))) ) ``` ```{r} class(summary_data$Mean) ``` ```{r} ggplot(summary_data, aes(x = fold, y = Mean)) + geom_point() + geom_errorbar(aes(ymin = Mean - SE, ymax = Mean + SE), width = 0.2) + labs( x = "Exon", y = "Mean Value", title = "Mean Values with Standard Error by fold" ) + theme_minimal() ``` ::: {.callout-warning} Be sure to load the [vector groups chunk](#vectors-for-subsetting-samples-by-different-groups) and the [functions chunk](function-to-calculate-log-fold-change) before running! ::: ## Test exon sum threshold ### Sums of Exons 1 - 6 ```{r test-exon-sums} test_sum <- S9Mdata %>% dplyr::select(GeneID,Exon1,Exon2,Exon3,Exon4,Exon5,Exon6) test_sum$Sum <- rowSums(test_sum[, -1], na.rm = TRUE) str(test_sum) ``` ### Gene counts with exon coverage threshold ```{r test-check-gene-counts-exon-sums} # Count the occurrences of values in test_sum$Sum sum_counts <- table(ifelse(test_sum$Sum <= exon_sum_threshold, paste0("<= ", exon_sum_threshold), paste0("> ", exon_sum_threshold))) # Display the counts print(sum_counts) ``` ## Plot raw expression ```{r} # Replace NA values with a placeholder value (e.g., 0) cleaned_data <- test_sum %>% mutate(across(-Sum, ~ ifelse(is.na(.), 0, .))) # Reshape the data to long format long_data <- tidyr::pivot_longer(cleaned_data, cols = starts_with("Exon"), names_to = "Exon", values_to = "Values") # Plotting a dot plot (scatter plot) ggplot(long_data, aes(x = factor(Exon), y = Values)) + geom_point(position = position_jitter(width = 0.3, height = 0), alpha = 0.15) + scale_y_log10() + labs(x = "Exon", y = "Read Counts (log10)") + theme_minimal() ``` ## Save plot ```{r} ggsave("../output/65-exon-coverage/figures/raw_exon_coverage-scatter.png", plot = last_plot(), dpi = 300, bg = "white") ``` # Filter genes with exon threshold ## Sum of Exons 1-6 for each file. ```{r} # Initialize an empty list to store results for each file sums_list <- list() # Loop through each file in the list for (file in all) { # Read in the data file data <- read.csv(paste0("../output/65-exon-coverage/", file, "_exon_reads.processed_data.csv")) # Calculate the sum of Exons 1-6 sum_result <- data %>% dplyr::select(GeneID, Exon1, Exon2, Exon3, Exon4, Exon5, Exon6) %>% mutate(Sum = rowSums(.[, -1], na.rm = TRUE)) # Count GeneIDs meeting the threshold for the current file geneid_count <- sum_result %>% dplyr::filter(Sum >= exon_sum_threshold) %>% nrow() cat("\nCount of GeneIDs meeting the threshold (", exon_sum_threshold, "summed reads ) in", file, ":", geneid_count, "\n") # Append the result and count to the lists sums_list[[file]] <- sum_result } ``` ## Filter across all samples Join all data frames in the `sums_list` on `GeneID` and then filter rows for any `GeneID` with sums >= set threshold. ```{r} # Rename the Sum column in each data frame in sums_list # Assigns the original filename to the Sum column in each data frame renamed_sums_list <- lapply(seq_along(sums_list), function(i) { colname <- paste0(all[i], "_Sum") col_index <- grep("Sum", colnames(sums_list[[i]])) colnames(sums_list[[i]])[col_index] <- colname dplyr::select(sums_list[[i]], GeneID, colname) }) # Join data frames in renamed_sums_list on GeneID all_geneids_df <- Reduce(function(x, y) dplyr::full_join(x, y, by = "GeneID"), renamed_sums_list) # Filter rows where all values are >= the set threshold exon_threshold_geneids <- all_geneids_df %>% dplyr::filter(if_all(ends_with("_Sum"), ~. >= exon_sum_threshold)) %>% dplyr::select(GeneID) # Print count of GeneIDs meeting the threshold across all files cat("Final count of unique GeneIDs meeting the threshold (", exon_sum_threshold, "summed reads ) across all files:", nrow(exon_threshold_geneids), "\n") # Print the structure of the final data frame str(exon_threshold_geneids) ``` ### Write filtered GeneIDs to file ```{r} exon_threshold_out_file <- paste0("../output/65-exon-coverage/geneids-exon_threshold-", exon_sum_threshold, ".csv", collapse = "") write.csv(exon_threshold_geneids, file = exon_threshold_out_file) ``` # Calculate fold change for all groups Uses `GeneIDs` passing threshold determined above. ```{r} # Create a list to store results for each group results_list <- list() # Loop through each group for (group_name in c("all", "controls", "exposed", "controls_males", "exposed_males", "controls_females", "exposed_females", "females", "males")) { # Get the corresponding vector current_vector <- eval(parse(text = group_name)) # Create an empty list to store results for each file in the group group_results <- list() # Loop through each file in the group for (file_id in current_vector) { # Read the data frame current_data <- read.csv(paste0("../output/65-exon-coverage/", file_id, "_exon_reads.processed_data.csv")) # Filter current_data based on exon_threshold_geneids current_data <- subset(current_data, GeneID %in% exon_threshold_geneids$GeneID) # Check if any rows are left after filtering if (nrow(current_data) > 0) { # Calculate fold change current_result <- calculate_fold_change(current_data) # Replace infinite values within the data frame current_result <- as.data.frame(lapply(current_result, function(x) ifelse(is.infinite(x), NA, x))) # Ensure unique column names colnames(current_result) <- make.unique(as.character(colnames(current_result)), sep = "_") # write out fc <- paste0("../output/65-exon-coverage/", file_id, "_exonsum_", exon_sum_threshold, "_fold.csv") write.csv(current_result, file = fc) # Store the result group_results[[file_id]] <- current_result } else { # If no rows are left after filtering, you might want to handle this case accordingly print(paste("No matching GeneIDs found in file:", file_id)) } } # Bind the data frames together mean_fold_change <- do.call(rbind, group_results) # Convert the data frame to a tibble to support summarise mean_fold_change <- as_tibble(mean_fold_change) # Reshape the data to long format data_long <- mean_fold_change %>% pivot_longer(cols = starts_with("fold"), names_to = "fold", values_to = "Value") %>% mutate(fold = as.factor(fold)) # Convert column to a factor # Calculate summary statistics summary_data <- data_long %>% group_by(fold) %>% summarise( Mean = mean(Value, na.rm = TRUE), SE = sd(Value, na.rm = TRUE) / sqrt(sum(!is.na(Value))) ) # Store the summary data in the main results list results_list[[group_name]] <- summary_data } ``` ```{r} mmf <- read.csv("../output/65-exon-coverage/malesmeanfold.csv") ``` ```{bash} head ../output/65-exon-coverage/*fold.csv wc -l ../output/65-exon-coverage/*fold.csv ``` ## Access results for a specific group (e.g., "all") ```{r} all_group_result <- results_list[["all"]] ``` # Plotting ## Controls vs. Exposed ```{r} # Recreate combined_results combined_results <- bind_rows( mutate(results_list$controls, group = "controls"), mutate(results_list$exposed, group = "exposed") ) # Convert 'fold' to a factor combined_results$fold <- factor(combined_results$fold, levels = c("fold2", "fold3", "fold4", "fold5", "fold6")) # Plotting code library(ggplot2) ggplot(combined_results, aes(x = fold, y = Mean, group = group, color = group)) + geom_point(position = position_dodge(width = 0.0), size = 1) + geom_line(position = position_dodge(width = 0.0), size = 0.5) + geom_errorbar(aes(ymin = Mean - SE, ymax = Mean + SE), position = position_dodge(width = 0.0), width = 0.25, color = "black") + labs( x = "Exon", y = "Mean ln(FC vs. Exon 1)") + theme_minimal() + scale_x_discrete(labels = c("fold2" = "Exon 2", "fold3" = "Exon 3", "fold4" = "Exon 4", "fold5" = "Exon 5", "fold6" = "Exon 6")) ``` ### Save plot ```{r} ggsave("../output/65-exon-coverage/figures/controls.v.exposed-line_plot.png", plot = last_plot(), dpi = 300, bg = "white") ``` ## Females vs. Males ```{r} # Recreate combined_results combined_results <- bind_rows( mutate(results_list$females, group = "females"), mutate(results_list$males, group = "males") ) # Convert 'fold' to a factor combined_results$fold <- factor(combined_results$fold, levels = c("fold2", "fold3", "fold4", "fold5", "fold6")) # Plotting code library(ggplot2) ggplot(combined_results, aes(x = fold, y = Mean, group = group, color = group)) + geom_point(position = position_dodge(width = 0.0), size = 1) + geom_line(position = position_dodge(width = 0.0), size = 0.5) + geom_errorbar(aes(ymin = Mean - SE, ymax = Mean + SE), position = position_dodge(width = 0.0), width = 0.25, color = "black") + labs( x = "Exon", y = "Mean ln(FC vs. Exon 1)") + theme_minimal() + scale_x_discrete(labels = c("fold2" = "Exon 2", "fold3" = "Exon 3", "fold4" = "Exon 4", "fold5" = "Exon 5", "fold6" = "Exon 6")) ``` ### Save plot ```{r} ggsave("../output/65-exon-coverage/figures/females.v.males-line_plot.png", plot = last_plot(), dpi = 300, bg = "white") ``` ## Control Females vs. Control Males ```{r} # Recreate combined_results combined_results <- bind_rows( mutate(results_list$controls_females, group = "control females"), mutate(results_list$controls_males, group = "control males") ) # Convert 'fold' to a factor combined_results$fold <- factor(combined_results$fold, levels = c("fold2", "fold3", "fold4", "fold5", "fold6")) # Plotting code library(ggplot2) ggplot(combined_results, aes(x = fold, y = Mean, group = group, color = group)) + geom_point(position = position_dodge(width = 0.0), size = 1) + geom_line(position = position_dodge(width = 0.0), size = 0.5) + geom_errorbar(aes(ymin = Mean - SE, ymax = Mean + SE), position = position_dodge(width = 0.0), width = 0.25, color = "black") + labs( x = "Exon", y = "Mean ln(FC vs. Exon 1)") + theme_minimal() + scale_x_discrete(labels = c("fold2" = "Exon 2", "fold3" = "Exon 3", "fold4" = "Exon 4", "fold5" = "Exon 5", "fold6" = "Exon 6")) ``` ### Save plot ```{r} ggsave("../output/65-exon-coverage/figures/control-females.v.control-males-line_plot.png", plot = last_plot(), dpi = 300, bg = "white") ``` ## Exposed Females vs. Exposed Males ```{r} # Recreate combined_results combined_results <- bind_rows( mutate(results_list$exposed_females, group = "exposed females"), mutate(results_list$exposed_males, group = "exposed males") ) # Convert 'fold' to a factor combined_results$fold <- factor(combined_results$fold, levels = c("fold2", "fold3", "fold4", "fold5", "fold6")) # Plotting code library(ggplot2) ggplot(combined_results, aes(x = fold, y = Mean, group = group, color = group)) + geom_point(position = position_dodge(width = 0.0), size = 1) + geom_line(position = position_dodge(width = 0.0), size = 0.5) + geom_errorbar(aes(ymin = Mean - SE, ymax = Mean + SE), position = position_dodge(width = 0.0), width = 0.25, color = "black") + labs( x = "Exon", y = "Mean ln(FC vs. Exon 1)") + theme_minimal() + scale_x_discrete(labels = c("fold2" = "Exon 2", "fold3" = "Exon 3", "fold4" = "Exon 4", "fold5" = "Exon 5", "fold6" = "Exon 6")) ``` ### Save plot ```{r} ggsave("../output/65-exon-coverage/figures/exposed-females.v.exposed-males-line_plot.png", plot = last_plot(), dpi = 300, bg = "white") ``` ## Control Females vs. Exposed Females ```{r} # Recreate combined_results combined_results <- bind_rows( mutate(results_list$controls_females, group = "control females"), mutate(results_list$exposed_females, group = "exposed females") ) # Convert 'fold' to a factor combined_results$fold <- factor(combined_results$fold, levels = c("fold2", "fold3", "fold4", "fold5", "fold6")) # Plotting code library(ggplot2) ggplot(combined_results, aes(x = fold, y = Mean, group = group, color = group)) + geom_point(position = position_dodge(width = 0.0), size = 1) + geom_line(position = position_dodge(width = 0.0), size = 0.5) + geom_errorbar(aes(ymin = Mean - SE, ymax = Mean + SE), position = position_dodge(width = 0.0), width = 0.25, color = "black") + labs( x = "Exon", y = "Mean ln(FC vs. Exon 1)") + theme_minimal() + scale_x_discrete(labels = c("fold2" = "Exon 2", "fold3" = "Exon 3", "fold4" = "Exon 4", "fold5" = "Exon 5", "fold6" = "Exon 6")) ``` ### Save plot ```{r} ggsave("../output/65-exon-coverage/figures/control-females.v.exposed-females-line_plot.png", plot = last_plot(), dpi = 300, bg = "white") ``` ## Control Males vs. Exposed Males ```{r} # Recreate combined_results combined_results <- bind_rows( mutate(results_list$controls_males, group = "control males"), mutate(results_list$exposed_males, group = "exposed males") ) # Convert 'fold' to a factor combined_results$fold <- factor(combined_results$fold, levels = c("fold2", "fold3", "fold4", "fold5", "fold6")) # Plotting code library(ggplot2) ggplot(combined_results, aes(x = fold, y = Mean, group = group, color = group)) + geom_point(position = position_dodge(width = 0.0), size = 1) + geom_line(position = position_dodge(width = 0.0), size = 0.5) + geom_errorbar(aes(ymin = Mean - SE, ymax = Mean + SE), position = position_dodge(width = 0.0), width = 0.25, color = "black") + labs( x = "Exon", y = "Mean ln(FC vs. Exon 1)") + theme_minimal() + scale_x_discrete(labels = c("fold2" = "Exon 2", "fold3" = "Exon 3", "fold4" = "Exon 4", "fold5" = "Exon 5", "fold6" = "Exon 6")) ``` ### Save plot ```{r} ggsave("../output/65-exon-coverage/figures/control-males.v.exposed-males-line_plot.png", plot = last_plot(), dpi = 300, bg = "white") ```