--- title: "78 asca meth-diff" author: Steven Roberts date: "`r format(Sys.time(), '%d %B, %Y')`" output: html_document: theme: readable highlight: zenburn toc: true toc_float: true number_sections: true code_folding: show code_download: true # github_document: # toc: true # toc_depth: 3 # number_sections: true # html_preview: true --- Ariana created lists of genes were exon expression patterns were similar and different under OA conditions for both sexes. This script will investigate if there a clear difference in methylation patterns in these genes. This includes determining the presence of DML in these genes and if gene body methylation differed. # Set up R Markdown document ```{r} knitr::opts_chunk$set( echo = TRUE, # Display code chunks eval = TRUE, # Evaluate code chunks warning = FALSE, # Hide warnings message = FALSE, # Hide messages fig.width = 6, # Set plot width in inches fig.height = 4, # Set plot height in inches fig.align = "center" # Align plots to the center ) knitr::opts_knit$set(root.dir = "../output/78-asca-methdiff/") #Set root directory ``` ```{r} getwd() ``` # Install packages ```{r, include = FALSE} # if ("tidyverse" %in% rownames(installed.packages()) == 'FALSE') install.packages('tidyverse') # if ("data.table" %in% rownames(installed.packages()) == 'FALSE') install.packages('data.table') # if ("RColorBrewer" %in% rownames(installed.packages()) == 'FALSE') install.packages('RColorBrewer') # if ("patchwork" %in% rownames(installed.packages()) == 'FALSE') install.packages('patchwork') ``` ```{r} # if (!require("BiocManager", quietly = TRUE)) # install.packages("BiocManager") # # BiocManager::install("topGO") ``` ```{r setup, include = FALSE} require(tidyverse) require(data.table) require(RColorBrewer) require(patchwork) require(topGO) ``` ```{r} sessionInfo() ``` # Import overarching data ## Gene lengths ```{r} cov_data <- fread("https://raw.githubusercontent.com/epigeneticstoocean/2018_L18-adult-methylation/main/analyses/genes_treatment_fpkm_CoV_df.csv", col.names = c("gene_ids", "M_control","F_control", "M_exposed", "F_exposed")) #Import cov data cov_data_l <- cov_data %>% pivot_longer(names_to = "sample", values_to = "CoV_FPKM", cols = 2:5) #Pivot longer head(cov_data_l) #Confirm formatting ``` ```{r} Cvir_genom_feats <- fread("https://gannet.fish.washington.edu/Atumefaciens/20211209_cvir_gff-to-bed/20211209_cvir_GCF_002022765.2_genes.bed", col.names = c("chr", "start", "stop", "gene_ids", "V5", "strand")) %>% mutate(., length = stop - start + 1) #Import genome feature BEDfile and create a new column for gene length head(Cvir_genom_feats) #Confirm changes ``` ```{r} geneLengths <- cov_data_l[, c("gene_ids")] %>% unique(.) %>% merge(., Cvir_genom_feats[,c("gene_ids", "length")], by = "gene_ids") %>% dplyr::rename(gene_name = "gene_ids") %>% dplyr::select(., gene_name, length) %>% mutate(., gene_name = gsub(x = gene_name, pattern = "gene-", replacement = "")) #Add gene length to CoV data and retain only gene ID and length column. Rename columns to match naming conventions in this script and remove "gene-" prefix from names head(geneLengths) #Confirm formatting #Confirmm changes ``` ## Annotation data ```{r} mRNATrack <- read.delim("../genome-features/C_virginica-3.0-mRNA.gff", header = FALSE) %>% separate(., V9, into = c("V10", "V11"), sep = "ID=rna-") %>% separate(., V11, into = c("V12", "V13"), sep = ";Parent=") %>% separate(., V13, into = c("V14", "V15"), sep = ";Dbxref=") %>% separate(., V15, into = c("V16", "V17"), sep = ";product=") %>% separate(., V17, into = c("V18", "V19"), sep = ";transcript_id=") %>% dplyr::select(., c("V12", "V14", "V18")) %>% dplyr::rename(., transcript = V12, gene = V14, product = V18) #Winnow down mRNA track mRNATrack$gene_name <- gsub("gene-", "", mRNATrack$gene) #Make gene naming consistent head(mRNATrack) #Confirm changes ``` ```{r} GOterms <- read.delim("../genome-features/_blast-GO-unfolded.sorted", header = FALSE, col.names = c("GO", "transcript")) head(GOterms) ``` # Females ## Import data ```{r} female_splice <- read.table("../output/77-asca-exon/females_splicing_treatments_gene_list.csv", col.names = c("gene_name")) %>% mutate(desc = "f_diff") #Import the list of genes with changes in alternative splicing. Change column names and add descriptive column for what these genes are head(female_splice) #Confirm import and formatting ``` ```{r} female_noSplice <- read.table("../output/77-asca-exon/females_splicing_nodifference_gene_list.csv", col.names = c("gene_name")) %>% mutate(desc = "f_same") #Import the list of genes with no change in alternative splicing. Change column names and add descriptive column for what these genes are head(female_noSplice) #Confirm import and formatting ``` ```{r} female_dml <- read.table("../output/16-DML-annotation/female_dml_geneID.txt", col.names = c("gene_name")) %>% mutate(desc = "f_dml") head(female_dml) ``` ```{r} female_40_meth <- read_csv("../output/73-gene-methylation/female_40_meth.csv") %>% dplyr::rename(., gene_name = "name") %>% mutate(gene_name = str_replace(gene_name, "gene-", "")) #Import data with average gene methylation for control and exposed samples head(female_40_meth) #Confirm import ``` ## Genes with changes in the maximum transcript expressed ```{r} read.csv("../supplemental-files/01.01-fmcoe-max-predom-isos-gene_fpkm.csv") %>% dplyr::select(., c(1:2,4)) %>% mutate(., difference = females_exposed_max_transcript_counts - females_controls_max_transcript_counts) %>% filter(., difference != 0) %>% left_join(female_splice, ., by = "gene_name") %>% filter(., is.na(difference) == FALSE) %>% nrow(.) #5 genes with a change in maximum transcripts expressed were also alternatively spliced ``` ```{r} read.csv("../supplemental-files/01.01-fmcoe-max-predom-isos-gene_fpkm.csv") %>% dplyr::select(., c(1:2,4)) %>% mutate(., difference = females_exposed_max_transcript_counts - females_controls_max_transcript_counts) %>% filter(., difference != 0) %>% left_join(female_noSplice, ., by = "gene_name") %>% filter(., is.na(difference) == FALSE) %>% nrow(.) #6 genes with a change in maximum transcripts expressed were not alternatively spliced ``` ## DML presence in alternatively spliced genes ```{r} female_splice %>% left_join(., female_dml, by = "gene_name") %>% filter(., is.na(desc.y) == FALSE) %>% dplyr::select(., gene_name) %>% unique(.) %>% nrow(.) #0 alternatively spliced genes contain DML ``` ```{r} female_noSplice %>% left_join(., female_dml, by = "gene_name") %>% filter(., is.na(desc.y) == FALSE) %>% dplyr::select(., gene_name) %>% unique(.) %>% nrow(.) #1 not spliced gene contain DML ``` ## Gene body methylation To examine differences in gene body methylation, we'll create XY plots with average gene body methylation for control and OA-exposed samples. If genes that are differentially spliced significantly deviate from the 1-to-1 regression line, then it is possible methylation modulated alternative splicing. ### Format data ```{r} f_splice_meth <- female_40_meth %>% left_join(female_splice, by = "gene_name") %>% filter(desc == "f_diff") #Join average gene body methylation data with spliced genes. filter for genes that are spliced f_splice_meth #Confirm formatting ``` ```{r} f_noSplice_meth <- female_40_meth %>% left_join(female_noSplice, by = "gene_name") %>% filter(desc == "f_same") #Join average gene body methylation data with spliced genes. filter for genes that are not spliced head(f_noSplice_meth) #Confirm changes ``` ### Plot data ```{r} f_allSplice_meth <- rbind(f_noSplice_meth, f_splice_meth) #Combine data to make plotting easier head(f_allSplice_meth) #Confirm changes ``` ```{r} plotColors <- RColorBrewer::brewer.pal(9, "Greens") #Create color palette ``` ```{r} f_allSplice_meth %>% ggplot(aes(x = average_control, y = average_exposed, color = desc)) + geom_point() + geom_smooth(method = lm, data = f_noSplice_meth, aes(x = average_control, y = average_exposed), color = plotColors[5]) + geom_smooth(method = lm, data = f_splice_meth, aes(x = average_control, y = average_exposed), color = plotColors[8]) + labs(x = "Control Methylation (%)", y = "Exposed Methylation (%)") + scale_color_manual(name = "Splicing Pattern", values = c(plotColors[8], plotColors[5]), breaks = c("f_diff", "f_same"), labels = c("Difference", "No Difference")) + theme_classic(base_size = 15) #Take combined data and plot average control methylation vs. average exposed methylation. Assign color based on gene category. Add regressions with 95% confidence intervals for each data category separately ggsave("../output/78-asca-methdiff/fem-methylation-patterns-splice-type.pdf", height = 8.5, width = 11) ``` ### Model data I will use a binomial model to understand if average control methylation or average exposed methylation can predict success (gene being alternatively spliced by treatment). I will also include gene length in this model. #### Format data ```{r} f_allSplice_methLengths <- f_allSplice_meth %>% left_join(., geneLengths, by = "gene_name") #Join with gene length information head(f_allSplice_methLengths) #Confirm changes ``` #### Run model ```{r} femSpliceMethModel <- glm(I(desc == "f_diff") ~ average_control + average_exposed + log10(length), family = binomial(), data = f_allSplice_methLengths) #Create a binomial model, where 1 = success = different splicing by treatment. Use average control methylation, average exposed methylation, and gene length as explanatory variables summary(femSpliceMethModel) #No significant terms ``` ```{r} write.csv(broom::tidy(femSpliceMethModel), "../output/78-asca-methdiff/fem-splicing-binomial-model-output.csv", quote = FALSE, row.names = FALSE) #Save model output ``` ## Enrichment test I will use `topGO` to understand if genes that were alternatively spliced have any overrepresented biological functions. ### Make input files The background is all genes used in the ASCA. ```{r} read.csv("../output/72-exon-data-rfmt/female_exon_tf.csv") %>% select_if(~ !any(is.na(.))) %>% t(.) %>% as.data.frame(.) %>% rownames_to_column(., var = "gene_name") %>% dplyr::slice(., -1) %>% dplyr::select(., gene_name) %>% mutate(., desc = "f_background") %>% left_join(., y = mRNATrack, by = "gene_name") %>% left_join(., y = GOterms, by = "transcript") %>% dplyr::select(., gene_name, GO) %>% unique(.) %>% group_by(., gene_name) %>% summarise(., GO = paste(GO, collapse = ",")) %>% write.table(., "../output/78-asca-methdiff/geneid2go-fem_altSplice.tab", quote = FALSE, sep = "\t", col.names = FALSE, row.names = FALSE) #Take list of all genes used for analysis and join with mRNA and GO term information. Select only gene names and GO terms, then retain unique rows. Group by gene name and collapse all GO terms for each gene into one row separated by commas. Save as a tab file ``` ```{r} geneID2GO <- readMappings(file = "../output/78-asca-methdiff/geneid2go-fem_altSplice.tab") #Loading the GO annotations and GeneIDs. Each line has one transcript ID and all associated GOterms str(head(geneID2GO)) #Confirm file structure length(geneID2GO) #11270 genes with annotations ``` ```{r} geneNames <- names(geneID2GO) #Extract names to use as a gene universe head(geneNames) ``` ```{r} femAltSpliceGenes <- female_splice %>% dplyr::select(., gene_name) #Take data select the gene ID column. Save as a new object femAltSpliceGenes <- femAltSpliceGenes$gene_name #Save column as a vector (will not work otherwise!) ``` ```{r} femAltSpliceGeneList <- factor(as.integer(geneNames %in% femAltSpliceGenes)) #Create a factor vector to indicate genes that increased transcripts as significant (1) and didn't as not significant (0) names(femAltSpliceGeneList) <- geneNames str(femAltSpliceGeneList) ``` ### Run enrichment test ```{r} femAltSpliceDataBP <- new("topGOdata", ontology = "BP", allGenes = femAltSpliceGeneList, annot = annFUN.gene2GO, gene2GO = geneID2GO) #Create biological process topGO object femAltSpliceDataBP #Get summary of object ``` ```{r} test.stat <- new("classicCount", testStatistic = GOFisherTest, name = "Fisher test") resultFisher.femAltSpliceDataBP <- getSigGroups(femAltSpliceDataBP, test.stat) resultFisher.femAltSpliceDataBP ``` ```{r} pvalFis.femAltSpliceDataBP <- score(resultFisher.femAltSpliceDataBP) #Extract p-values head(pvalFis.femAltSpliceDataBP) hist(pvalFis.femAltSpliceDataBP, 50, xlab = "p-values") #Plot histogram of p-values ``` ```{r} allRes.femAltSpliceDataBP <- GenTable(femAltSpliceDataBP, classic = resultFisher.femAltSpliceDataBP, ranksOf = "classic", orderBy = "classic", topNodes = length(pvalFis.femAltSpliceDataBP)) #Create a statistical results table with statistical test results. Order by p-value (classic), and include all results (topNodes) head(allRes.femAltSpliceDataBP) ``` ```{r} write.csv(allRes.femAltSpliceDataBP, "../output/78-asca-methdiff/fem-altSplice-BP-FisherTestResults.csv", quote = FALSE, row.names = FALSE) #Save dataframe ``` ### Match enriched GOterms with general annotation information ```{r} sigRes.femAltSpliceDataBP <- allRes.femAltSpliceDataBP[1:38,c(1, 6)] #Filter significantly enriched GOterms, only keep GOID and p-value colnames(sigRes.femAltSpliceDataBP) <- c("GO", "p.value") #Change column names head(sigRes.femAltSpliceDataBP) ``` ```{r} female_splice %>% left_join(., y = mRNATrack, by = "gene_name") %>% left_join(., y = GOterms, by = "transcript") %>% separate(., col = product, into = c("product", "transcriptVar"), sep = "%2C transcript variant") %>% dplyr::select(., gene_name, desc, product, GO) %>% unique(.) %>% left_join(x = sigRes.femAltSpliceDataBP, y = ., by = "GO") %>% filter(., is.na(desc) == FALSE) %>% dplyr::select(., gene_name, desc, GO, p.value, product) %>% write.csv(., "../output/78-asca-methdiff/fem-altSplice-BP-EnrichedGO-geneAnnot.csv", quote = FALSE, row.names = FALSE) #Take genes that were alternatively splice and annotate. Remove extra information from annotations and select only columns of interest with unique rows. Join with enriched BPGO information and filter rows without gene matches. Reorder columns and save as a csv ``` # Males ## Import data ```{r} male_splice <- read.table("../output/77-asca-exon/males_splicing_treatments_gene_list.csv", col.names = c("gene_name")) %>% mutate(desc = "m_diff") #Import the list of genes with changes in alternative splicing. Change column names and add descriptive column for what these genes are head(male_splice) #Confirm import and formatting ``` ```{r} male_noSplice <- read.table("../output/77-asca-exon/males_splicing_nodifference_gene_list.csv", col.names = c("gene_name")) %>% mutate(desc = "m_same") #Import the list of genes with no change in alternative splicing. Change column names and add descriptive column for what these genes are head(male_noSplice) #Confirm import and formatting ``` ```{r} male_dml <- read.table("../output/16-DML-annotation/male_dml_geneID.txt", col.names = c("gene_name")) %>% mutate(desc = "m_dml") head(male_dml) ``` ```{r} male_40_meth <- read_csv("../output/73-gene-methylation/male_40_meth.csv") %>% dplyr::rename(., gene_name = "name") %>% mutate(gene_name = str_replace(gene_name, "gene-", "")) #Import data with average gene methylation for control and exposed samples head(male_40_meth) #Confirm import ``` ## Genes with changes in the maximum transcript expressed ```{r} read.csv("../supplemental-files/01.01-fmcoe-max-predom-isos-gene_fpkm.csv") %>% dplyr::select(., c(1,3,5)) %>% mutate(., difference = males_exposed_max_transcript_counts - males_controls_max_transcript_counts) %>% filter(., difference != 0) %>% left_join(male_splice, ., by = "gene_name") %>% filter(., is.na(difference) == FALSE) %>% nrow(.) #4 genes with a change in maximum transcripts expressed were also alternatively spliced ``` ```{r} read.csv("../supplemental-files/01.01-fmcoe-max-predom-isos-gene_fpkm.csv") %>% dplyr::select(., c(1,3,5)) %>% mutate(., difference = males_exposed_max_transcript_counts - males_controls_max_transcript_counts) %>% filter(., difference != 0) %>% left_join(male_noSplice, ., by = "gene_name") %>% filter(., is.na(difference) == FALSE) %>% nrow(.) #14 genes with a change in maximum transcripts expressed were not alternatively spliced ``` ## DML presence in alternatively spliced genes ```{r} male_splice %>% left_join(., male_dml, by = "gene_name") %>% filter(., is.na(desc.y) == FALSE) %>% dplyr::select(., gene_name) %>% unique(.) %>% nrow(.) #6 alternatively spliced genes contain DML ``` ```{r} male_noSplice %>% left_join(., male_dml, by = "gene_name") %>% filter(., is.na(desc.y) == FALSE) %>% dplyr::select(., gene_name) %>% unique(.) %>% nrow(.) #13 not spliced gene contain DML ``` ## Gene body methylation To examine differences in gene body methylation, we'll create XY plots with average gene body methylation for control and OA-exposed samples. If genes that are differentially spliced significantly deviate from the 1-to-1 regression line, then it is possible methylation modulated alternative splicing. ### Format data ```{r} m_splice_meth <- male_40_meth %>% left_join(male_splice, by = "gene_name") %>% filter(desc == "m_diff") #Join average gene body methylation data with spliced genes. filter for genes that are spliced m_splice_meth #Confirm formatting ``` ```{r} m_noSplice_meth <- male_40_meth %>% left_join(male_noSplice, by = "gene_name") %>% filter(desc == "m_same") #Join average gene body methylation data with spliced genes. filter for genes that are not spliced head(m_noSplice_meth) #Confirm changes ``` ### Plot data ```{r} m_allSplice_meth <- rbind(m_noSplice_meth, m_splice_meth) #Combine data to make plotting easier head(m_allSplice_meth) #Confirm changes ``` ```{r} m_allSplice_meth %>% ggplot(aes(x = average_control, y = average_exposed, color = desc)) + geom_point() + geom_smooth(method = lm, data = m_noSplice_meth, aes(x = average_control, y = average_exposed), color = plotColors[5]) + geom_smooth(method = lm, data = m_splice_meth, aes(x = average_control, y = average_exposed), color = plotColors[8]) + labs(x = "Control Methylation (%)", y = "Exposed Methylation (%)") + scale_color_manual(name = "Splicing Pattern", values = c(plotColors[8], plotColors[5]), breaks = c("m_diff", "m_same"), labels = c("Difference", "No Difference")) + theme_classic(base_size = 15) #Take combined data and plot average control methylation vs. average exposed methylation. Assign color based on gene category. Add regressions with 95% confidence intervals for each data category separately ggsave("../output/78-asca-methdiff/male-methylation-patterns-splice-type.pdf", height = 8.5, width = 11) ``` ### Model data I will use a binomial model to understand if average control methylation or average exposed methylation can predict success (gene being alternatively spliced by treatment). I will also include gene length in this model. #### Format data ```{r} m_allSplice_methLengths <- m_allSplice_meth %>% left_join(., geneLengths, by = "gene_name") #Join with gene length information head(m_allSplice_methLengths) #Confirm changes ``` #### Run model ```{r} maleSpliceMethModel <- glm(I(desc == "m_diff") ~ average_control + average_exposed + log10(length), family = binomial(), data = m_allSplice_methLengths) #Create a binomial model, where 1 = success = different splicing by treatment. Use average control methylation, average exposed methylation, and gene length as explanatory variables summary(maleSpliceMethModel) #No significant terms ``` ```{r} write.csv(broom::tidy(maleSpliceMethModel), "../output/78-asca-methdiff/male-splicing-binomial-model-output.csv", quote = FALSE, row.names = FALSE) #Save model output ``` ## Enrichment test ### Make input files ```{r} read.csv("../output/72-exon-data-rfmt/male_exon_tf.csv") %>% select_if(~ !any(is.na(.))) %>% t(.) %>% as.data.frame(.) %>% rownames_to_column(., var = "gene_name") %>% dplyr::slice(., -1) %>% dplyr::select(., gene_name) %>% mutate(., desc = "m_background") %>% left_join(., y = mRNATrack, by = "gene_name") %>% left_join(., y = GOterms, by = "transcript") %>% dplyr::select(., gene_name, GO) %>% unique(.) %>% group_by(., gene_name) %>% summarise(., GO = paste(GO, collapse = ",")) %>% write.table(., "../output/78-asca-methdiff/geneid2go-male_altSplice.tab", quote = FALSE, sep = "\t", col.names = FALSE, row.names = FALSE) #Take list of important genes from all PCs and join with mRNA and GO term information. Select only gene names and GO terms, then retain unique rows. Group by gene name and collapse all GO terms for each gene into one row separated by commas. Save as a tab file ``` ```{r} geneID2GO <- readMappings(file = "../output/78-asca-methdiff/geneid2go-male_altSplice.tab") #Loading the GO annotations and GeneIDs. Each line has one transcript ID and all associated GOterms str(head(geneID2GO)) #Confirm file structure length(geneID2GO) #12641 genes with annotations ``` ```{r} geneNames <- names(geneID2GO) #Extract names to use as a gene universe head(geneNames) ``` ```{r} maleAltSpliceGenes <- male_splice %>% dplyr::select(., gene_name) #Take data select the gene ID column. Save as a new object maleAltSpliceGenes <- maleAltSpliceGenes$gene_name #Save column as a vector (will not work otherwise!) ``` ```{r} maleAltSpliceGeneList <- factor(as.integer(geneNames %in% maleAltSpliceGenes)) #Create a factor vector to indicate genes that increased transcripts as significant (1) and didn't as not significant (0) names(maleAltSpliceGeneList) <- geneNames str(maleAltSpliceGeneList) ``` ### Run enrichment test ```{r} maleAltSpliceDataBP <- new("topGOdata", ontology = "BP", allGenes = maleAltSpliceGeneList, annot = annFUN.gene2GO, gene2GO = geneID2GO) #Create biological process topGO object maleAltSpliceDataBP #Get summary of object ``` ```{r} test.stat <- new("classicCount", testStatistic = GOFisherTest, name = "Fisher test") resultFisher.maleAltSpliceDataBP <- getSigGroups(maleAltSpliceDataBP, test.stat) resultFisher.maleAltSpliceDataBP ``` ```{r} pvalFis.maleAltSpliceDataBP <- score(resultFisher.maleAltSpliceDataBP) #Extract p-values head(pvalFis.maleAltSpliceDataBP) hist(pvalFis.maleAltSpliceDataBP, 50, xlab = "p-values") #Plot histogram of p-values ``` ```{r} allRes.maleAltSpliceDataBP <- GenTable(maleAltSpliceDataBP, classic = resultFisher.maleAltSpliceDataBP, ranksOf = "classic", orderBy = "classic", topNodes = length(pvalFis.maleAltSpliceDataBP)) #Create a statistical results table with statistical test results. Order by p-value (classic), and include all results (topNodes) head(allRes.maleAltSpliceDataBP) ``` ```{r} write.csv(allRes.maleAltSpliceDataBP, "../output/78-asca-methdiff/male-altSplice-BP-FisherTestResults.csv", quote = FALSE, row.names = FALSE) #Save dataframe ``` ### Match enriched GOterms with general annotation information ```{r} sigRes.maleAltSpliceDataBP <- allRes.maleAltSpliceDataBP[1:26,c(1, 6)] #Filter significantly enriched GOterms, only keep GOID and p-value colnames(sigRes.maleAltSpliceDataBP) <- c("GO", "p.value") #Change column names head(sigRes.maleAltSpliceDataBP) ``` ```{r} male_splice %>% left_join(., y = mRNATrack, by = "gene_name") %>% left_join(., y = GOterms, by = "transcript") %>% separate(., col = product, into = c("product", "transcriptVar"), sep = "%2C transcript variant") %>% dplyr::select(., gene_name, desc, product, GO) %>% unique(.) %>% left_join(x = sigRes.maleAltSpliceDataBP, y = ., by = "GO") %>% filter(., is.na(desc) == FALSE) %>% dplyr::select(., gene_name, desc, GO, p.value, product) %>% write.csv(., "../output/78-asca-methdiff/male-altSplice-BP-EnrichedGO-geneAnnot.csv", quote = FALSE, row.names = FALSE) #Take genes that were alternatively splice and annotate. Remove extra information from annotations and select only columns of interest with unique rows. Join with enriched BPGO information and filter rows without gene matches. Reorder columns and save as a csv ``` # Create multipanel plot ```{r} femAllSpliceMethPlot <- f_allSplice_meth %>% ggplot(aes(x = average_control, y = average_exposed, color = desc)) + geom_point() + geom_smooth(method = lm, data = f_noSplice_meth, aes(x = average_control, y = average_exposed), color = plotColors[5], lty = 2, alpha = 0.3) + geom_smooth(method = lm, data = f_splice_meth, aes(x = average_control, y = average_exposed), color = plotColors[8], lty = 2, alpha = 0.3) + xlim(c(0,100)) + scale_y_continuous(breaks = seq(0, 100, 25), limits = c(0,110)) + labs(x = "", y = "Exposed Methylation (%)", title = "A. Females") + scale_color_manual(values = c(plotColors[8], plotColors[5]), breaks = c("f_diff", "f_same")) + theme_classic(base_size = 15) + theme(legend.position = "none") maleAllSpliceMethPlot <- m_allSplice_meth %>% ggplot(aes(x = average_control, y = average_exposed, color = desc)) + geom_point() + geom_smooth(method = lm, data = m_noSplice_meth, aes(x = average_control, y = average_exposed), color = plotColors[5], lty = 2, alpha = 0.3) + geom_smooth(method = lm, data = m_splice_meth, aes(x = average_control, y = average_exposed), color = plotColors[8], lty = 2, alpha = 0.3) + xlim(c(0,100)) + scale_y_continuous(breaks = seq(0, 100, 25), limits = c(0,110)) + labs(x = "Control Methylation (%)", y = "Exposed Methylation (%)", title = "B. Males") + scale_color_manual(name = "Splicing Pattern", values = c(plotColors[8], plotColors[5]), breaks = c("m_diff", "m_same"), labels = c("Difference", "No Difference")) + theme_classic(base_size = 15) + theme(legend.position = "bottom") ``` ```{r} femAllSpliceMethPlot / maleAllSpliceMethPlot #Create patchwork plot ggsave("../output/78-asca-methdiff/methylation-patterns-splice-type.pdf", height = 8.5, width = 11) ```