---
title: "78 asca meth-diff"
author: Steven Roberts
date: "`r format(Sys.time(), '%d %B, %Y')`" 
output: 
  html_document:
    theme: readable
    highlight: zenburn
    toc: true
    toc_float: true
    number_sections: true
    code_folding: show
    code_download: true
  # github_document:
  #   toc: true
  #   toc_depth: 3
  #   number_sections: true
  #   html_preview: true
---

Ariana created lists of genes were exon expression patterns were similar and different under OA conditions for both sexes. This script will investigate if there a clear difference in methylation patterns in these genes. This includes determining the presence of DML in these genes and if gene body methylation differed.

# Set up R Markdown document

```{r}
knitr::opts_chunk$set(
  echo = TRUE,         # Display code chunks
  eval = TRUE,         # Evaluate code chunks
  warning = FALSE,     # Hide warnings
  message = FALSE,     # Hide messages
  fig.width = 6,       # Set plot width in inches
  fig.height = 4,      # Set plot height in inches
  fig.align = "center" # Align plots to the center
)

knitr::opts_knit$set(root.dir = "../output/78-asca-methdiff/") #Set root directory
```

```{r}
getwd()
```

# Install packages

```{r, include = FALSE}
# if ("tidyverse" %in% rownames(installed.packages()) == 'FALSE') install.packages('tidyverse')
# if ("data.table" %in% rownames(installed.packages()) == 'FALSE') install.packages('data.table')
# if ("RColorBrewer" %in% rownames(installed.packages()) == 'FALSE') install.packages('RColorBrewer')
# if ("patchwork" %in% rownames(installed.packages()) == 'FALSE') install.packages('patchwork')
```

```{r}
# if (!require("BiocManager", quietly = TRUE))
#     install.packages("BiocManager")
# 
# BiocManager::install("topGO")
```

```{r setup, include = FALSE}
require(tidyverse)
require(data.table)
require(RColorBrewer)
require(patchwork)
require(topGO)
```

```{r}
sessionInfo()
```

# Import overarching data

## Gene lengths

```{r}
cov_data <- fread("https://raw.githubusercontent.com/epigeneticstoocean/2018_L18-adult-methylation/main/analyses/genes_treatment_fpkm_CoV_df.csv", col.names = c("gene_ids", "M_control","F_control", "M_exposed", "F_exposed")) #Import cov data
cov_data_l <- cov_data %>% 
  pivot_longer(names_to = "sample", values_to = "CoV_FPKM", cols = 2:5) #Pivot longer
head(cov_data_l) #Confirm formatting
```

```{r}
Cvir_genom_feats <- fread("https://gannet.fish.washington.edu/Atumefaciens/20211209_cvir_gff-to-bed/20211209_cvir_GCF_002022765.2_genes.bed", col.names = c("chr", "start", "stop", "gene_ids", "V5", "strand")) %>%
  mutate(., length = stop - start + 1) #Import genome feature BEDfile and create a new column for gene length
head(Cvir_genom_feats) #Confirm changes
```

```{r}
geneLengths <- cov_data_l[, c("gene_ids")] %>%
  unique(.) %>%
  merge(., Cvir_genom_feats[,c("gene_ids", "length")], by = "gene_ids") %>% 
  dplyr::rename(gene_name = "gene_ids") %>%
  dplyr::select(., gene_name, length) %>%
  mutate(., gene_name = gsub(x = gene_name, pattern = "gene-", replacement = "")) #Add gene length to CoV data and retain only gene ID and length column. Rename columns to match naming conventions in this script and remove "gene-" prefix from names
head(geneLengths) #Confirm formatting #Confirmm changes
```

## Annotation data

```{r}
mRNATrack <- read.delim("../genome-features/C_virginica-3.0-mRNA.gff", header = FALSE) %>%
  separate(., V9, into = c("V10", "V11"), sep = "ID=rna-") %>%
  separate(., V11, into = c("V12", "V13"), sep = ";Parent=") %>%
  separate(., V13, into = c("V14", "V15"), sep = ";Dbxref=") %>%
  separate(., V15, into = c("V16", "V17"), sep = ";product=") %>%
  separate(., V17, into = c("V18", "V19"), sep = ";transcript_id=") %>%
  dplyr::select(., c("V12", "V14", "V18")) %>%
  dplyr::rename(., transcript = V12, gene = V14, product = V18) #Winnow down mRNA track
mRNATrack$gene_name <- gsub("gene-", "", mRNATrack$gene) #Make gene naming consistent
head(mRNATrack) #Confirm changes
```

```{r}
GOterms <- read.delim("../genome-features/_blast-GO-unfolded.sorted", header = FALSE, col.names = c("GO", "transcript"))
head(GOterms)
```

# Females

## Import data

```{r}
female_splice <- read.table("../output/77-asca-exon/females_splicing_treatments_gene_list.csv", col.names = c("gene_name")) %>%
  mutate(desc = "f_diff") #Import the list of genes with changes in alternative splicing. Change column names and add descriptive column for what these genes are
head(female_splice) #Confirm import and formatting
```

```{r}
female_noSplice <- read.table("../output/77-asca-exon/females_splicing_nodifference_gene_list.csv", col.names = c("gene_name")) %>%
  mutate(desc = "f_same") #Import the list of genes with no change in alternative splicing. Change column names and add descriptive column for what these genes are
head(female_noSplice) #Confirm import and formatting
```

```{r}
female_dml <- read.table("../output/16-DML-annotation/female_dml_geneID.txt", col.names = c("gene_name")) %>%
  mutate(desc = "f_dml")
head(female_dml)
```

```{r}
female_40_meth <- read_csv("../output/73-gene-methylation/female_40_meth.csv") %>%
  dplyr::rename(., gene_name = "name") %>%
  mutate(gene_name = str_replace(gene_name, "gene-", "")) #Import data with average gene methylation for control and exposed samples
head(female_40_meth) #Confirm import
```

## Genes with changes in the maximum transcript expressed

```{r}
read.csv("../supplemental-files/01.01-fmcoe-max-predom-isos-gene_fpkm.csv") %>%
  dplyr::select(., c(1:2,4)) %>%
  mutate(., difference = females_exposed_max_transcript_counts - females_controls_max_transcript_counts) %>%
  filter(., difference != 0) %>%
  left_join(female_splice, ., by = "gene_name") %>%
  filter(., is.na(difference) == FALSE) %>%
  nrow(.) #5 genes with a change in maximum transcripts expressed were also alternatively spliced
```

```{r}
read.csv("../supplemental-files/01.01-fmcoe-max-predom-isos-gene_fpkm.csv") %>%
  dplyr::select(., c(1:2,4)) %>%
  mutate(., difference = females_exposed_max_transcript_counts - females_controls_max_transcript_counts) %>%
  filter(., difference != 0) %>%
  left_join(female_noSplice, ., by = "gene_name") %>%
  filter(., is.na(difference) == FALSE) %>%
  nrow(.) #6 genes with a change in maximum transcripts expressed were not alternatively spliced
```

## DML presence in alternatively spliced genes

```{r}
female_splice %>%
  left_join(., female_dml, by = "gene_name") %>%
  filter(., is.na(desc.y) == FALSE) %>%
  dplyr::select(., gene_name) %>%
  unique(.) %>%
  nrow(.) #0 alternatively spliced genes contain DML
```

```{r}
female_noSplice %>%
  left_join(., female_dml, by = "gene_name") %>%
  filter(., is.na(desc.y) == FALSE) %>%
  dplyr::select(., gene_name) %>%
  unique(.) %>%
  nrow(.) #1 not spliced gene contain DML
```

## Gene body methylation

To examine differences in gene body methylation, we'll create XY plots with average gene body methylation for control and OA-exposed samples. If genes that are differentially spliced significantly deviate from the 1-to-1 regression line, then it is possible methylation modulated alternative splicing.

### Format data

```{r}
f_splice_meth <- female_40_meth %>%
  left_join(female_splice, by = "gene_name") %>%
  filter(desc == "f_diff") #Join average gene body methylation data with spliced genes. filter for genes that are spliced
f_splice_meth #Confirm formatting
```

```{r}
f_noSplice_meth <- female_40_meth %>%
  left_join(female_noSplice, by = "gene_name") %>%
  filter(desc == "f_same") #Join average gene body methylation data with spliced genes. filter for genes that are not spliced
head(f_noSplice_meth) #Confirm changes
```

### Plot data

```{r}
f_allSplice_meth <- rbind(f_noSplice_meth, f_splice_meth) #Combine data to make plotting easier
head(f_allSplice_meth) #Confirm changes
```

```{r}
plotColors <- RColorBrewer::brewer.pal(9, "Greens") #Create color palette
```

```{r}
f_allSplice_meth %>%
  ggplot(aes(x = average_control, y = average_exposed, color = desc)) +
  geom_point() +
  geom_smooth(method = lm, data = f_noSplice_meth, aes(x = average_control, y = average_exposed), color = plotColors[5]) +
  geom_smooth(method = lm, data = f_splice_meth, aes(x = average_control, y = average_exposed), color = plotColors[8]) +
  labs(x = "Control Methylation (%)", y = "Exposed Methylation (%)") +
  scale_color_manual(name = "Splicing Pattern",
                     values = c(plotColors[8], plotColors[5]), 
                     breaks = c("f_diff", "f_same"),
                     labels = c("Difference", "No Difference")) +
  theme_classic(base_size = 15) #Take combined data and plot average control methylation vs. average exposed methylation. Assign color based on gene category. Add regressions with 95% confidence intervals for each data category separately
ggsave("../output/78-asca-methdiff/fem-methylation-patterns-splice-type.pdf", height = 8.5, width = 11)
```

### Model data

I will use a binomial model to understand if average control methylation or average exposed methylation can predict success (gene being alternatively spliced by treatment). I will also include gene length in this model.

#### Format data

```{r}
f_allSplice_methLengths <- f_allSplice_meth %>%
  left_join(., geneLengths, by = "gene_name") #Join with gene length information
head(f_allSplice_methLengths) #Confirm changes
```

#### Run model

```{r}
femSpliceMethModel <- glm(I(desc == "f_diff") ~ average_control + average_exposed + log10(length),
                          family = binomial(),
                          data = f_allSplice_methLengths) #Create a binomial model, where 1 = success = different splicing by treatment. Use average control methylation, average exposed methylation, and gene length as explanatory variables
summary(femSpliceMethModel) #No significant terms
```

```{r}
write.csv(broom::tidy(femSpliceMethModel), "../output/78-asca-methdiff/fem-splicing-binomial-model-output.csv", quote = FALSE, row.names = FALSE) #Save model output
```

## Enrichment test

I will use `topGO` to understand if genes that were alternatively spliced have any overrepresented biological functions.

### Make input files

The background is all genes used in the ASCA.

```{r}
read.csv("../output/72-exon-data-rfmt/female_exon_tf.csv") %>%
  select_if(~ !any(is.na(.))) %>%
  t(.) %>% as.data.frame(.) %>%
  rownames_to_column(., var = "gene_name") %>%
  dplyr::slice(., -1) %>%
  dplyr::select(., gene_name) %>%
  mutate(., desc = "f_background") %>%
  left_join(., y = mRNATrack, by = "gene_name") %>%
  left_join(., y = GOterms, by = "transcript") %>%
  dplyr::select(., gene_name, GO) %>%
  unique(.) %>%
  group_by(., gene_name) %>%
  summarise(., GO = paste(GO, collapse = ",")) %>%
  write.table(., "../output/78-asca-methdiff/geneid2go-fem_altSplice.tab", quote = FALSE, sep = "\t", col.names = FALSE, row.names = FALSE) #Take list of all genes used for analysis and join with mRNA and GO term information. Select only gene names and GO terms, then retain unique rows. Group by gene name and collapse all GO terms for each gene into one row separated by commas. Save as a tab file
```

```{r}
geneID2GO <- readMappings(file = "../output/78-asca-methdiff/geneid2go-fem_altSplice.tab") #Loading the GO annotations and GeneIDs. Each line has one transcript ID and all associated GOterms
str(head(geneID2GO)) #Confirm file structure
length(geneID2GO) #11270 genes with annotations
```

```{r}
geneNames <- names(geneID2GO) #Extract names to use as a gene universe
head(geneNames)
```

```{r}
femAltSpliceGenes <- female_splice %>%
  dplyr::select(., gene_name) #Take data select the gene ID column. Save as a new object
femAltSpliceGenes <- femAltSpliceGenes$gene_name #Save column as a vector (will not work otherwise!)
```

```{r}
femAltSpliceGeneList <- factor(as.integer(geneNames %in% femAltSpliceGenes))  #Create a factor vector to indicate genes that increased transcripts as significant (1) and didn't as not significant (0)
names(femAltSpliceGeneList) <- geneNames
str(femAltSpliceGeneList)
```

### Run enrichment test

```{r}
femAltSpliceDataBP <- new("topGOdata", ontology = "BP", allGenes = femAltSpliceGeneList,
                          annot = annFUN.gene2GO, gene2GO = geneID2GO) #Create biological process topGO object
femAltSpliceDataBP #Get summary of object
```

```{r}
test.stat <- new("classicCount", testStatistic = GOFisherTest, name = "Fisher test")
resultFisher.femAltSpliceDataBP <- getSigGroups(femAltSpliceDataBP, test.stat)
resultFisher.femAltSpliceDataBP
```

```{r}
pvalFis.femAltSpliceDataBP <- score(resultFisher.femAltSpliceDataBP) #Extract p-values
head(pvalFis.femAltSpliceDataBP)
hist(pvalFis.femAltSpliceDataBP, 50, xlab = "p-values") #Plot histogram of p-values
```

```{r}
allRes.femAltSpliceDataBP <- GenTable(femAltSpliceDataBP, classic = resultFisher.femAltSpliceDataBP, ranksOf = "classic", orderBy = "classic", topNodes = length(pvalFis.femAltSpliceDataBP))
#Create a statistical results table with statistical test results. Order by p-value (classic), and include all results (topNodes)
head(allRes.femAltSpliceDataBP)
```

```{r}
write.csv(allRes.femAltSpliceDataBP, "../output/78-asca-methdiff/fem-altSplice-BP-FisherTestResults.csv", quote = FALSE, row.names = FALSE) #Save dataframe
```

### Match enriched GOterms with general annotation information

```{r}
sigRes.femAltSpliceDataBP <- allRes.femAltSpliceDataBP[1:38,c(1, 6)] #Filter significantly enriched GOterms, only keep GOID and p-value
colnames(sigRes.femAltSpliceDataBP) <- c("GO", "p.value") #Change column names
head(sigRes.femAltSpliceDataBP)
```

```{r}
female_splice %>%
  left_join(., y = mRNATrack, by = "gene_name") %>%
  left_join(., y = GOterms, by = "transcript") %>%
  separate(., col = product, into = c("product", "transcriptVar"), sep = "%2C transcript variant") %>%
  dplyr::select(., gene_name, desc, product, GO) %>%
  unique(.) %>%
  left_join(x = sigRes.femAltSpliceDataBP, y = ., by = "GO") %>%
  filter(., is.na(desc) == FALSE) %>%
  dplyr::select(., gene_name, desc, GO, p.value, product) %>%
  write.csv(., "../output/78-asca-methdiff/fem-altSplice-BP-EnrichedGO-geneAnnot.csv", quote = FALSE, row.names = FALSE) #Take genes that were alternatively splice and annotate. Remove extra information from annotations and select only columns of interest with unique rows. Join with enriched BPGO information and filter rows without gene matches. Reorder columns and save as a csv
```

# Males

## Import data

```{r}
male_splice <- read.table("../output/77-asca-exon/males_splicing_treatments_gene_list.csv", col.names = c("gene_name")) %>%
  mutate(desc = "m_diff") #Import the list of genes with changes in alternative splicing. Change column names and add descriptive column for what these genes are
head(male_splice) #Confirm import and formatting
```

```{r}
male_noSplice <- read.table("../output/77-asca-exon/males_splicing_nodifference_gene_list.csv", col.names = c("gene_name")) %>%
  mutate(desc = "m_same") #Import the list of genes with no change in alternative splicing. Change column names and add descriptive column for what these genes are
head(male_noSplice) #Confirm import and formatting
```

```{r}
male_dml <- read.table("../output/16-DML-annotation/male_dml_geneID.txt", col.names = c("gene_name")) %>%
  mutate(desc = "m_dml")
head(male_dml)
```

```{r}
male_40_meth <- read_csv("../output/73-gene-methylation/male_40_meth.csv") %>%
  dplyr::rename(., gene_name = "name") %>%
  mutate(gene_name = str_replace(gene_name, "gene-", "")) #Import data with average gene methylation for control and exposed samples
head(male_40_meth) #Confirm import
```

## Genes with changes in the maximum transcript expressed

```{r}
read.csv("../supplemental-files/01.01-fmcoe-max-predom-isos-gene_fpkm.csv") %>%
  dplyr::select(., c(1,3,5)) %>%
  mutate(., difference = males_exposed_max_transcript_counts - males_controls_max_transcript_counts) %>%
  filter(., difference != 0) %>%
  left_join(male_splice, ., by = "gene_name") %>%
  filter(., is.na(difference) == FALSE) %>%
  nrow(.) #4 genes with a change in maximum transcripts expressed were also alternatively spliced
```

```{r}
read.csv("../supplemental-files/01.01-fmcoe-max-predom-isos-gene_fpkm.csv") %>%
  dplyr::select(., c(1,3,5)) %>%
  mutate(., difference = males_exposed_max_transcript_counts - males_controls_max_transcript_counts) %>%
  filter(., difference != 0) %>%
  left_join(male_noSplice, ., by = "gene_name") %>%
  filter(., is.na(difference) == FALSE) %>%
  nrow(.) #14 genes with a change in maximum transcripts expressed were not alternatively spliced
```

## DML presence in alternatively spliced genes

```{r}
male_splice %>%
  left_join(., male_dml, by = "gene_name") %>%
  filter(., is.na(desc.y) == FALSE) %>%
  dplyr::select(., gene_name) %>%
  unique(.) %>%
  nrow(.) #6 alternatively spliced genes contain DML
```

```{r}
male_noSplice %>%
  left_join(., male_dml, by = "gene_name") %>%
  filter(., is.na(desc.y) == FALSE) %>%
  dplyr::select(., gene_name) %>%
  unique(.) %>%
  nrow(.) #13 not spliced gene contain DML
```

## Gene body methylation

To examine differences in gene body methylation, we'll create XY plots with average gene body methylation for control and OA-exposed samples. If genes that are differentially spliced significantly deviate from the 1-to-1 regression line, then it is possible methylation modulated alternative splicing.

### Format data

```{r}
m_splice_meth <- male_40_meth %>%
  left_join(male_splice, by = "gene_name") %>%
  filter(desc == "m_diff") #Join average gene body methylation data with spliced genes. filter for genes that are spliced
m_splice_meth #Confirm formatting
```

```{r}
m_noSplice_meth <- male_40_meth %>%
  left_join(male_noSplice, by = "gene_name") %>%
  filter(desc == "m_same") #Join average gene body methylation data with spliced genes. filter for genes that are not spliced
head(m_noSplice_meth) #Confirm changes
```

### Plot data

```{r}
m_allSplice_meth <- rbind(m_noSplice_meth, m_splice_meth) #Combine data to make plotting easier
head(m_allSplice_meth) #Confirm changes
```

```{r}
m_allSplice_meth %>%
  ggplot(aes(x = average_control, y = average_exposed, color = desc)) +
  geom_point() +
  geom_smooth(method = lm, data = m_noSplice_meth, aes(x = average_control, y = average_exposed), color = plotColors[5]) +
  geom_smooth(method = lm, data = m_splice_meth, aes(x = average_control, y = average_exposed), color = plotColors[8]) +
  labs(x = "Control Methylation (%)", y = "Exposed Methylation (%)") +
  scale_color_manual(name = "Splicing Pattern",
                     values = c(plotColors[8], plotColors[5]), 
                     breaks = c("m_diff", "m_same"),
                     labels = c("Difference", "No Difference")) +
  theme_classic(base_size = 15) #Take combined data and plot average control methylation vs. average exposed methylation. Assign color based on gene category. Add regressions with 95% confidence intervals for each data category separately
ggsave("../output/78-asca-methdiff/male-methylation-patterns-splice-type.pdf", height = 8.5, width = 11)
```

### Model data

I will use a binomial model to understand if average control methylation or average exposed methylation can predict success (gene being alternatively spliced by treatment). I will also include gene length in this model.

#### Format data

```{r}
m_allSplice_methLengths <- m_allSplice_meth %>%
  left_join(., geneLengths, by = "gene_name") #Join with gene length information
head(m_allSplice_methLengths) #Confirm changes
```

#### Run model

```{r}
maleSpliceMethModel <- glm(I(desc == "m_diff") ~ average_control + average_exposed + log10(length),
                           family = binomial(),
                           data = m_allSplice_methLengths) #Create a binomial model, where 1 = success = different splicing by treatment. Use average control methylation, average exposed methylation, and gene length as explanatory variables
summary(maleSpliceMethModel) #No significant terms
```

```{r}
write.csv(broom::tidy(maleSpliceMethModel), "../output/78-asca-methdiff/male-splicing-binomial-model-output.csv", quote = FALSE, row.names = FALSE) #Save model output
```

## Enrichment test

### Make input files

```{r}
read.csv("../output/72-exon-data-rfmt/male_exon_tf.csv") %>%
  select_if(~ !any(is.na(.))) %>%
  t(.) %>% as.data.frame(.) %>%
  rownames_to_column(., var = "gene_name") %>%
  dplyr::slice(., -1) %>%
  dplyr::select(., gene_name) %>%
  mutate(., desc = "m_background") %>%
  left_join(., y = mRNATrack, by = "gene_name") %>%
  left_join(., y = GOterms, by = "transcript") %>%
  dplyr::select(., gene_name, GO) %>%
  unique(.) %>%
  group_by(., gene_name) %>%
  summarise(., GO = paste(GO, collapse = ",")) %>%
  write.table(., "../output/78-asca-methdiff/geneid2go-male_altSplice.tab", quote = FALSE, sep = "\t", col.names = FALSE, row.names = FALSE) #Take list of important genes from all PCs and join with mRNA and GO term information. Select only gene names and GO terms, then retain unique rows. Group by gene name and collapse all GO terms for each gene into one row separated by commas. Save as a tab file
```

```{r}
geneID2GO <- readMappings(file = "../output/78-asca-methdiff/geneid2go-male_altSplice.tab") #Loading the GO annotations and GeneIDs. Each line has one transcript ID and all associated GOterms
str(head(geneID2GO)) #Confirm file structure
length(geneID2GO) #12641 genes with annotations
```

```{r}
geneNames <- names(geneID2GO) #Extract names to use as a gene universe
head(geneNames)
```

```{r}
maleAltSpliceGenes <- male_splice %>%
  dplyr::select(., gene_name) #Take data select the gene ID column. Save as a new object
maleAltSpliceGenes <- maleAltSpliceGenes$gene_name #Save column as a vector (will not work otherwise!)
```

```{r}
maleAltSpliceGeneList <- factor(as.integer(geneNames %in% maleAltSpliceGenes))  #Create a factor vector to indicate genes that increased transcripts as significant (1) and didn't as not significant (0)
names(maleAltSpliceGeneList) <- geneNames
str(maleAltSpliceGeneList)
```

### Run enrichment test

```{r}
maleAltSpliceDataBP <- new("topGOdata", ontology = "BP", allGenes = maleAltSpliceGeneList,
                           annot = annFUN.gene2GO, gene2GO = geneID2GO) #Create biological process topGO object
maleAltSpliceDataBP #Get summary of object
```

```{r}
test.stat <- new("classicCount", testStatistic = GOFisherTest, name = "Fisher test")
resultFisher.maleAltSpliceDataBP <- getSigGroups(maleAltSpliceDataBP, test.stat)
resultFisher.maleAltSpliceDataBP
```

```{r}
pvalFis.maleAltSpliceDataBP <- score(resultFisher.maleAltSpliceDataBP) #Extract p-values
head(pvalFis.maleAltSpliceDataBP)
hist(pvalFis.maleAltSpliceDataBP, 50, xlab = "p-values") #Plot histogram of p-values
```

```{r}
allRes.maleAltSpliceDataBP <- GenTable(maleAltSpliceDataBP, classic = resultFisher.maleAltSpliceDataBP, ranksOf = "classic", orderBy = "classic", topNodes = length(pvalFis.maleAltSpliceDataBP))
#Create a statistical results table with statistical test results. Order by p-value (classic), and include all results (topNodes)
head(allRes.maleAltSpliceDataBP)
```

```{r}
write.csv(allRes.maleAltSpliceDataBP, "../output/78-asca-methdiff/male-altSplice-BP-FisherTestResults.csv", quote = FALSE, row.names = FALSE) #Save dataframe
```

### Match enriched GOterms with general annotation information

```{r}
sigRes.maleAltSpliceDataBP <- allRes.maleAltSpliceDataBP[1:26,c(1, 6)] #Filter significantly enriched GOterms, only keep GOID and p-value
colnames(sigRes.maleAltSpliceDataBP) <- c("GO", "p.value") #Change column names
head(sigRes.maleAltSpliceDataBP)
```

```{r}
male_splice %>%
  left_join(., y = mRNATrack, by = "gene_name") %>%
  left_join(., y = GOterms, by = "transcript") %>%
  separate(., col = product, into = c("product", "transcriptVar"), sep = "%2C transcript variant") %>%
  dplyr::select(., gene_name, desc, product, GO) %>%
  unique(.) %>%
  left_join(x = sigRes.maleAltSpliceDataBP, y = ., by = "GO") %>%
  filter(., is.na(desc) == FALSE) %>%
  dplyr::select(., gene_name, desc, GO, p.value, product) %>%
  write.csv(., "../output/78-asca-methdiff/male-altSplice-BP-EnrichedGO-geneAnnot.csv", quote = FALSE, row.names = FALSE) #Take genes that were alternatively splice and annotate. Remove extra information from annotations and select only columns of interest with unique rows. Join with enriched BPGO information and filter rows without gene matches. Reorder columns and save as a csv
```


# Create multipanel plot

```{r}
femAllSpliceMethPlot <- f_allSplice_meth %>%
  ggplot(aes(x = average_control, y = average_exposed, color = desc)) +
  geom_point() +
  geom_smooth(method = lm, data = f_noSplice_meth, aes(x = average_control, y = average_exposed), color = plotColors[5], lty = 2, alpha = 0.3) +
  geom_smooth(method = lm, data = f_splice_meth, aes(x = average_control, y = average_exposed), color = plotColors[8], lty = 2, alpha = 0.3) +
  xlim(c(0,100)) +
  scale_y_continuous(breaks = seq(0, 100, 25),
                     limits = c(0,110)) +
  labs(x = "", y = "Exposed Methylation (%)", title = "A. Females") +
  scale_color_manual(values = c(plotColors[8], plotColors[5]), 
                     breaks = c("f_diff", "f_same")) +
  theme_classic(base_size = 15) + theme(legend.position = "none")

maleAllSpliceMethPlot <- m_allSplice_meth %>%
  ggplot(aes(x = average_control, y = average_exposed, color = desc)) +
  geom_point() +
  geom_smooth(method = lm, data = m_noSplice_meth, aes(x = average_control, y = average_exposed), color = plotColors[5], lty = 2, alpha = 0.3) +
  geom_smooth(method = lm, data = m_splice_meth, aes(x = average_control, y = average_exposed), color = plotColors[8], lty = 2, alpha = 0.3) +
  xlim(c(0,100)) + 
  scale_y_continuous(breaks = seq(0, 100, 25),
                     limits = c(0,110)) +
  labs(x = "Control Methylation (%)", y = "Exposed Methylation (%)", title = "B. Males") +
  scale_color_manual(name = "Splicing Pattern",
                     values = c(plotColors[8], plotColors[5]), 
                     breaks = c("m_diff", "m_same"),
                     labels = c("Difference", "No Difference")) +
  theme_classic(base_size = 15) + theme(legend.position = "bottom")
```

```{r}
femAllSpliceMethPlot / maleAllSpliceMethPlot #Create patchwork plot
ggsave("../output/78-asca-methdiff/methylation-patterns-splice-type.pdf", height = 8.5, width = 11)
```