--- title: "Functional Enrichment" author: "Yaamini Venkataraman" output: html_document --- In this script, I'll create summary tables for genomic location information, run statistcal tests, and visualize the distribution of DML in the genome. # Prepare R Markdown file ```{r setup, include=FALSE} knitr::opts_chunk$set(echo = TRUE) knitr::opts_knit$set(root.dir = "/Users/yaaminivenkataraman/Documents/project-oyster-oa/analyses/Haws_09-functional-enrichment/") #Set root directory ``` # Install packages ```{r} require(BiocManager) BiocManager::install("topGO") BiocManager::install("simplifyEnrichment") #Needs newest R Version (4.1.0) ``` ```{r} require(BiocManager) require(topGO) require(simplifyEnrichment) ``` ```{r} require(tidyverse) ``` # Obtain session information ```{r} sessionInfo() ``` # Describe GO terms associated with DML Before launching into enrichment, I want to create lists of GOterms associated with each DML list for descriptive purposes. ## ploidy-DML ### Remove overlaps with C/T SNPs ```{r} ploidyDMLGOterms <- read.table("../Haws_08-GOterm-annotation/DML-ploidy-DSS.GeneIDs.geneOverlap.transcriptIDs.GOAnnot", header = FALSE, sep = "\t", col.names = c("transcript", "geneID", "chr", "start", "end", "GOID", "GOterm", "GOSlim", "GOcat")) #Import list of GO term annotations for ploidy DML ploidyDMLGOterms$chrStartEnd <- paste(ploidyDMLGOterms$chr, "-", ploidyDMLGOterms$start, "-", ploidyDMLGOterms$end, sep = "") #Create a column with chr, start, and end information separated by "-" head(ploidyDMLGOterms) ``` ```{r} uniqueSNPPloidy <- read.table("../Haws_07-DML-characterization/DML-ploidy-DSS-unique-CT-SNPs.bed", header = FALSE, sep = "\t", col.names = c("chr", "start", "end")) #Import overlaps between unique C/T SNPs and all DML produced with bedtools uniqueSNPPloidy$chrStartEnd <- paste(uniqueSNPPloidy$chr, "-", uniqueSNPPloidy$start, "-", uniqueSNPPloidy$end, sep = "") #Create a column with chr, start, and end information separated by "-" head(uniqueSNPPloidy) #Confirm import ``` ```{r} ploidyDMLGOterms <- left_join(x = ploidyDMLGOterms, y = uniqueSNPPloidy, by = "chrStartEnd") #Add all columns from uniqueSNPPloidy to ploidyDMLGOterms ploidyDMLGOtermsFiltered <- filter(ploidyDMLGOterms, is.na(chr.y) == "TRUE") %>% select(!c(chr.y, start.y, end.y)) #Keep all rows where chr.y has an "NA", since this indicates there was no matching entry in the DML-SNP overlap dataframe. Then drop chr.y, start.y, and end.y length(ploidyDMLGOtermsFiltered[,1]) #7246 rows without SNP overlaps colnames(ploidyDMLGOtermsFiltered) <- c("transcript", "geneID", "chr", "start", "end", "GOID", "GOterm", "GOSlim", "GOcat", "chrStartEnd") #Rename columns for consistency head(ploidyDMLGOtermsFiltered) #Confirm formatting ``` ```{r} length(unique(ploidyDMLGOtermsFiltered$geneID)) #84 annotated genes length(unique(ploidyDMLGOtermsFiltered$chrStartEnd)) #103 DML in annotated genes ``` ### Summarize GOterms ```{r} ploidyDMLGOtermsFilteredCounts <- as.data.frame(table(ploidyDMLGOtermsFiltered$GOID)) #Create table with GOID and counts colnames(ploidyDMLGOtermsFilteredCounts) <- c("GOID", "Freq") #Add column names range(ploidyDMLGOtermsFilteredCounts$Freq) #Get range of GOID frequency nrow(ploidyDMLGOtermsFilteredCounts) #Number of unique GOIDs associated with ploidy-DML ``` ```{r} ploidyDMLGOtermsBP <- ploidyDMLGOtermsFiltered %>% filter(., ploidyDMLGOtermsFiltered$GOcat == "P") #Filter biological process terms ploidyDMLGOtermCountsBP <- as.data.frame(table(ploidyDMLGOtermsBP$GOID)) #Count GOIDs colnames(ploidyDMLGOtermCountsBP) <- c("GOID", "Freq") #Add column names ploidyDMLGOtermCountsBP <- ploidyDMLGOtermCountsBP %>% filter(ploidyDMLGOtermCountsBP$Freq != 0) range(ploidyDMLGOtermCountsBP$Freq) #Get range of GOID frequency nrow(ploidyDMLGOtermCountsBP) #Number of unique GOIDs associated with ploidy-DML ``` ```{r} write.csv(ploidyDMLGOtermCountsBP, "ploidy-BP-GOterms.csv", quote = FALSE, row.names = FALSE) ``` ## pH-DML ### Remove overlaps with C/T SNPs ```{r} pHDMLGOterms <- read.table("../Haws_08-GOterm-annotation/DML-pH-DSS.GeneIDs.geneOverlap.transcriptIDs.GOAnnot", header = FALSE, sep = "\t", col.names = c("transcript", "geneID", "chr", "start", "end", "GOID", "GOterm", "GOSlim", "GOcat")) #Import list of GO term annotations for pH DML pHDMLGOterms$chrStartEnd <- paste(pHDMLGOterms$chr, "-", pHDMLGOterms$start, "-", pHDMLGOterms$end, sep = "") #Create a column with chr, start, and end information separated by "-" head(pHDMLGOterms) ``` ```{r} uniqueSNPpH <- read.table("../Haws_07-DML-characterization/DML-pH-DSS-unique-CT-SNPs.bed", header = FALSE, sep = "\t", col.names = c("chr", "start", "end")) #Import overlaps between unique C/T SNPs and all DML produced with bedtools uniqueSNPpH$chrStartEnd <- paste(uniqueSNPpH$chr, "-", uniqueSNPpH$start, "-", uniqueSNPpH$end, sep = "") #Create a column with chr, start, and end information separated by "-" head(uniqueSNPpH) #Confirm import ``` ```{r} pHDMLGOterms <- left_join(x = pHDMLGOterms, y = uniqueSNPpH, by = "chrStartEnd") #Add all columns from uniqueSNPpH to pHDMLGOterms pHDMLGOtermsFiltered <- filter(pHDMLGOterms, is.na(chr.y) == "TRUE") %>% select(!c(chr.y, start.y, end.y)) #Keep all rows where chr.y has an "NA", since this indicates there was no matching entry in the DML-SNP overlap dataframe. Then drop chr.y, start.y, and end.y length(pHDMLGOtermsFiltered[,1]) #4426 rows without SNP overlaps colnames(pHDMLGOtermsFiltered) <- c("transcript", "geneID", "chr", "start", "end", "GOID", "GOterm", "GOSlim", "GOcat", "chrStartEnd") #Rename columns for consistency head(pHDMLGOtermsFiltered) #Confirm formatting ``` ```{r} length(unique(pHDMLGOtermsFiltered$geneID)) #69 annotated genes length(unique(pHDMLGOtermsFiltered$chrStartEnd)) #87 DML in annotated genes ``` ### Summarize GOterms ```{r} pHDMLGOtermsFilteredCounts <- as.data.frame(table(pHDMLGOtermsFiltered$GOID)) #Create table with GOID and counts colnames(pHDMLGOtermsFilteredCounts) <- c("GOID", "Freq") #Add column names range(pHDMLGOtermsFilteredCounts$Freq) #Get range of GOID frequency nrow(pHDMLGOtermsFilteredCounts) #Number of unique GOIDs associated with pH-DML ``` ```{r} pHDMLGOtermsBP <- pHDMLGOtermsFiltered %>% filter(., pHDMLGOtermsFiltered$GOcat == "P") #Filter biological process terms pHDMLGOtermCountsBP <- as.data.frame(table(pHDMLGOtermsBP$GOID)) #Count GOIDs colnames(pHDMLGOtermCountsBP) <- c("GOID", "Freq") #Add column names pHDMLGOtermCountsBP <- pHDMLGOtermCountsBP %>% filter(pHDMLGOtermCountsBP$Freq != 0) range(pHDMLGOtermCountsBP$Freq) #Get range of GOID frequency nrow(pHDMLGOtermCountsBP) #Number of unique GOIDs associated with pH-DML ``` ```{r} write.csv(pHDMLGOtermCountsBP, "pH-BP-GOterms.csv", quote = FALSE, row.names = FALSE) ``` ```{r} pHDMLGOtermsMF <- pHDMLGOtermsFiltered %>% filter(., pHDMLGOtermsFiltered$GOcat == "F") #Filter biological process terms pHDMLGOtermCountsMF <- as.data.frame(table(pHDMLGOtermsMF$GOID)) #Count GOIDs colnames(pHDMLGOtermCountsMF) <- c("GOID", "Freq") #Add column names pHDMLGOtermCountsMF <- pHDMLGOtermCountsMF %>% filter(pHDMLGOtermCountsMF$Freq != 0) range(pHDMLGOtermCountsMF$Freq) #Get range of GOID frequency nrow(pHDMLGOtermCountsMF) #Number of unique GOIDs associated with pH-DML ``` ```{r} write.csv(pHDMLGOtermCountsMF, "pH-MF-GOterms.csv", quote = FALSE, row.names = FALSE) ``` ## ploidypH-DML ### Remove overlaps with C/T SNPs ```{r} ploidypHDMLGOterms <- read.table("../Haws_08-GOterm-annotation/DML-ploidypH-DSS.GeneIDs.geneOverlap.transcriptIDs.GOAnnot", header = FALSE, sep = "\t", col.names = c("transcript", "geneID", "chr", "start", "end", "GOID", "GOterm", "GOSlim", "GOcat")) #Import list of GO term annotations for ploidypH DML ploidypHDMLGOterms$chrStartEnd <- paste(ploidypHDMLGOterms$chr, "-", ploidypHDMLGOterms$start, "-", ploidypHDMLGOterms$end, sep = "") #Create a column with chr, start, and end information separated by "-" head(ploidypHDMLGOterms) ``` ```{r} uniqueSNPploidypH <- read.table("../Haws_07-DML-characterization/DML-ploidypH-DSS-unique-CT-SNPs.bed", header = FALSE, sep = "\t", col.names = c("chr", "start", "end")) #Import overlaps between unique C/T SNPs and all DML produced with bedtools uniqueSNPploidypH$chrStartEnd <- paste(uniqueSNPploidypH$chr, "-", uniqueSNPploidypH$start, "-", uniqueSNPploidypH$end, sep = "") #Create a column with chr, start, and end information separated by "-" head(uniqueSNPploidypH) #Confirm import ``` ```{r} ploidypHDMLGOterms <- left_join(x = ploidypHDMLGOterms, y = uniqueSNPploidypH, by = "chrStartEnd") #Add all columns from uniqueSNPploidypH to ploidypHDMLGOterms ploidypHDMLGOtermsFiltered <- filter(ploidypHDMLGOterms, is.na(chr.y) == "TRUE") %>% select(!c(chr.y, start.y, end.y)) #Keep all rows where chr.y has an "NA", since this indicates there was no matching entry in the DML-SNP overlap dataframe. Then drop chr.y, start.y, and end.y length(ploidypHDMLGOtermsFiltered[,1]) #1126 rows without SNP overlaps colnames(ploidypHDMLGOtermsFiltered) <- c("transcript", "geneID", "chr", "start", "end", "GOID", "GOterm", "GOSlim", "GOcat", "chrStartEnd") #Rename columns for consistency head(ploidypHDMLGOtermsFiltered) #Confirm formatting ``` ```{r} length(unique(ploidypHDMLGOtermsFiltered$geneID)) #19 annotated genes length(unique(ploidypHDMLGOtermsFiltered$chrStartEnd)) #17 DML in annotated genes ``` ### Summarize GOterms ```{r} ploidypHDMLGOtermsFilteredCounts <- as.data.frame(table(ploidypHDMLGOtermsFiltered$GOID)) #Create table with GOID and counts colnames(ploidypHDMLGOtermsFilteredCounts) <- c("GOID", "Freq") #Add column names range(ploidypHDMLGOtermsFilteredCounts$Freq) #Get range of GOID frequency nrow(ploidypHDMLGOtermsFilteredCounts) #Number of unique GOIDs associated with ploidypH-DML ``` ```{r} ploidypHDMLGOtermsBP <- ploidypHDMLGOtermsFiltered %>% filter(., ploidypHDMLGOtermsFiltered$GOcat == "P") #Filter biological process terms ploidypHDMLGOtermCountsBP <- as.data.frame(table(ploidypHDMLGOtermsBP$GOID)) #Count GOIDs colnames(ploidypHDMLGOtermCountsBP) <- c("GOID", "Freq") #Add column names ploidypHDMLGOtermCountsBP <- ploidypHDMLGOtermCountsBP %>% filter(ploidypHDMLGOtermCountsBP$Freq != 0) range(ploidypHDMLGOtermCountsBP$Freq) #Get range of GOID frequency nrow(ploidypHDMLGOtermCountsBP) #Number of unique GOIDs associated with ploidypH-DML ``` ```{r} write.csv(ploidypHDMLGOtermCountsBP, "ploidypH-BP-GOterms.csv", quote = FALSE, row.names = FALSE) ``` # Gene enrichment with `topGO` Following protocol from [Chandra Rajan et al. 2021](https://onlinelibrary.wiley.com/doi/full/10.1111/gcb.15675#support-information-section). Script found [here](https://onlinelibrary-wiley-com.offcampus.lib.washington.edu/action/downloadSupplement?doi=10.1111%2Fgcb.15675&file=gcb15675-sup-0002-FileS2.txt). ## Load data for gene enrichment ```{r} geneID2GO <- readMappings(file = "../Haws_08-GOterm-annotation/geneid2go-union1x.tab") #Loading the GO annotations and GeneIDs. Each line has one transcript ID and all associated GOterms str(head(geneID2GO)) #Confirm file structure ``` ```{r} geneNames <- names(geneID2GO) #Extract names to use as gene universe head(geneNames) ``` ## ploidy-DML I'm using the list of genes with DML as my list of interest. ```{r} ploidyGenes <- ploidyDMLGOtermsFiltered$transcript #Extract transcript ID ploidyGeneList <- factor(as.integer(geneNames %in% ploidyGenes)) names(ploidyGeneList) <- ploidyGenes str(ploidyGeneList) ``` ### Biological processes ```{r} ploidyGOdataBP <- new("topGOdata", ontology = "BP", allGenes = ploidyGeneList, annot = annFUN.gene2GO, gene2GO = geneID2GO) #Create biological process topGO object ploidyGOdataBP #Get summary of object ``` ```{r} test.stat <- new("classicCount", testStatistic = GOFisherTest, name = "Fisher test") resultFisher.ploidyBP <- getSigGroups(ploidyGOdataBP, test.stat) resultFisher.ploidyBP ``` ```{r} pvalFis.ploidyBP <- score(resultFisher.ploidyBP) #Extract p-values head(pvalFis.ploidyBP) hist(pvalFis.ploidyBP, 50, xlab = "p-values") #Plot histogram of p-values ``` ```{r} allRes.ploidyBP <- GenTable(ploidyGOdataBP, classic = resultFisher.ploidyBP, ranksOf = "classic", orderBy = "classic", topNodes = length(pvalFis.ploidyBP)) #Create a statistical results table with statistical test results. Order by p-value (classic), and include all results (topNodes) head(allRes.ploidyBP) ``` ```{r} write.csv(allRes.ploidyBP, "ploidy-BP-FisherTestResults.csv", quote = FALSE, row.names = FALSE) #Save dataframe ``` #### Match enriched GOterms with general annotation information ```{r} sigRes.ploidyBP <- allRes.ploidyBP[1:3,c(1, 6)] #Filter significantly enriched GOterms, only keep GOID and p-value colnames(sigRes.ploidyBP) <- c("GOID", "p.value") #Change column names head(sigRes.ploidyBP) ``` ```{r} sigRes.ploidyBPAnnot <- merge(sigRes.ploidyBP, ploidyDMLGOtermsBP, by = "GOID") #Additional annotations sigRes.ploidyBPAnnot <- unique(sigRes.ploidyBPAnnot[,-11]) #Drop GOcat column and retain only unique lines length(unique(sigRes.ploidyBPAnnot$geneID)) #1 unique genes length(unique(sigRes.ploidyBPAnnot$transcript)) #3 unique transcripts head(sigRes.ploidyBPAnnot) #Confirm formatting ``` ```{r} write.csv(sigRes.ploidyBPAnnot, "ploidy-BP-EnrichedGO-DML-withTranscript.csv", quote = FALSE, row.names = FALSE) #Save dataframe ``` ```{r} sigRes.ploidyBPAnnotnoT <- unique(sigRes.ploidyBPAnnot[,-3]) #Drop transcript column and keep only unique rows head(sigRes.ploidyBPAnnotnoT) #Confirm formatting ``` ```{r} write.csv(sigRes.ploidyBPAnnotnoT, "ploidy-BP-EnrichedGO-DML.csv", quote = FALSE, row.names = FALSE) #Save dataframe ``` ### Cellular components ```{r} ploidyGOdataCC <- new("topGOdata", ontology = "CC", allGenes = ploidyGeneList, annot = annFUN.gene2GO, gene2GO = geneID2GO) #Create biological process topGO object ploidyGOdataCC #Get summary of object ``` ```{r} resultFisher.ploidyCC <- getSigGroups(ploidyGOdataCC, test.stat) resultFisher.ploidyCC ``` ### Molecular function ```{r} ploidyGOdataMF <- new("topGOdata", ontology = "MF", allGenes = ploidyGeneList, annot = annFUN.gene2GO, gene2GO = geneID2GO) #Create biological process topGO object ploidyGOdataMF #Get summary of object ``` ```{r} resultFisher.ploidyMF <- getSigGroups(ploidyGOdataMF, test.stat) #Extract significant GOterms using Fisher exact test resultFisher.ploidyMF #Get results summary ``` ```{r} write.csv(allRes.ploidyMF, "ploidy-MF-FisherTestResults.csv", quote = FALSE, row.names = FALSE) #Save dataframe ``` ## pH-DML ```{r} pHGenes <- pHDMLGOtermsFiltered$transcript #Extract transcript ID pHGeneList <- factor(as.integer(geneNames %in% pHGenes)) names(pHGeneList) <- pHGenes str(pHGeneList) ``` ### Biological processes ```{r} pHGOdataBP <- new("topGOdata", ontology = "BP", allGenes = pHGeneList, annot = annFUN.gene2GO, gene2GO = geneID2GO) #Create biological process topGO object pHGOdataBP #Get summary of object ``` ```{r} test.stat <- new("classicCount", testStatistic = GOFisherTest, name = "Fisher test") resultFisher.pHBP <- getSigGroups(pHGOdataBP, test.stat) resultFisher.pHBP ``` ```{r} pvalFis.pHBP <- score(resultFisher.pHBP) #Extract p-values head(pvalFis.pHBP) hist(pvalFis.pHBP, 50, xlab = "p-values") #Plot histogram of p-values ``` ```{r} allRes.pHBP <- GenTable(pHGOdataBP, classic = resultFisher.pHBP, ranksOf = "classic", orderBy = "classic", topNodes = length(pvalFis.pHBP)) #Create a statistical results table with statistical test results. Order by p-value (classic), and include all results (topNodes) head(allRes.pHBP) ``` ```{r} write.csv(allRes.pHBP, "pH-BP-FisherTestResults.csv", quote = FALSE, row.names = FALSE) #Save dataframe ``` #### Match enriched GOterms with general annotation information ```{r} sigRes.pHBP <- allRes.pHBP[1:10,c(1, 6)] #Filter significantly enriched GOterms, only keep GOID and p-value colnames(sigRes.pHBP) <- c("GOID", "p.value") #Change column names head(sigRes.pHBP) ``` ```{r} sigRes.pHBPAnnot <- merge(sigRes.pHBP, pHDMLGOtermsBP, by = "GOID") #Additional annotations sigRes.pHBPAnnot <- unique(sigRes.pHBPAnnot[,-11]) #Drop GOcat column and retain only unique lines length(unique(sigRes.pHBPAnnot$geneID)) #8 unique genes length(unique(sigRes.pHBPAnnot$transcript)) #29 unique transcripts head(sigRes.pHBPAnnot) #Confirm formatting ``` ```{r} write.csv(sigRes.pHBPAnnot, "pH-BP-EnrichedGO-DML-withTranscript.csv", quote = FALSE, row.names = FALSE) #Save dataframe ``` ```{r} sigRes.pHBPAnnotnoT <- unique(sigRes.pHBPAnnot[,-3]) #Drop transcript column and keep only unique rows head(sigRes.pHBPAnnotnoT) #Confirm formatting ``` ```{r} write.csv(sigRes.pHBPAnnotnoT, "pH-BP-EnrichedGO-DML.csv", quote = FALSE, row.names = FALSE) #Save dataframe ``` ### Cellular components ```{r} pHGOdataCC <- new("topGOdata", ontology = "CC", allGenes = pHGeneList, annot = annFUN.gene2GO, gene2GO = geneID2GO) #Create biological process topGO object pHGOdataCC #Get summary of object ``` ```{r} resultFisher.pHCC <- getSigGroups(pHGOdataCC, test.stat) resultFisher.pHCC ``` ### Molecular function ```{r} pHGOdataMF <- new("topGOdata", ontology = "MF", allGenes = pHGeneList, annot = annFUN.gene2GO, gene2GO = geneID2GO) #Create biological process topGO object pHGOdataMF #Get summary of object ``` ```{r} resultFisher.pHMF <- getSigGroups(pHGOdataMF, test.stat) #Extract significant GOterms using Fisher exact test resultFisher.pHMF #Get results summary ``` ```{r} pvalFis.pHMF <- score(resultFisher.pHMF) #Extract p-values head(pvalFis.pHMF) hist(pvalFis.pHMF, 50, xlab = "p-values") #Plot histogram of p-values ``` ```{r} allRes.pHMF <- GenTable(pHGOdataMF, classic = resultFisher.pHMF, ranksOf = "classic", orderBy = "classic", topNodes = length(pvalFis.pHMF)) #Create a statistical results table with statistical test results. Order by p-value (classic), and include all results (topNodes) head(allRes.pHMF) ``` ```{r} write.csv(allRes.pHMF, "pH-MF-FisherTestResults.csv", quote = FALSE, row.names = FALSE) #Save dataframe ``` #### Match enriched GOterms with general annotation information ```{r} sigRes.pHMF <- allRes.pHMF[1:8,c(1, 6)] #Filter significantly enriched GOterms, only keep GOID and p-value colnames(sigRes.pHMF) <- c("GOID", "p.value") #Change column names head(sigRes.pHMF) ``` ```{r} sigRes.pHMFAnnot <- merge(sigRes.pHMF, pHDMLGOtermsMF, by = "GOID") #Additional annotations sigRes.pHMFAnnot <- unique(sigRes.pHMFAnnot[,-11]) #Drop GOcat column and retain only unique lines length(unique(sigRes.pHMFAnnot$geneID)) #13 unique genes length(unique(sigRes.pHMFAnnot$transcript)) #59 unique transcripts head(sigRes.pHMFAnnot) #Confirm formatting ``` ```{r} write.csv(sigRes.pHMFAnnot, "pH-MF-EnrichedGO-DML-withTranscript.csv", quote = FALSE, row.names = FALSE) #Save dataframe ``` ```{r} sigRes.pHMFAnnotnoT <- unique(sigRes.pHMFAnnot[,-3]) #Drop transcript column and keep only unique rows head(sigRes.pHMFAnnotnoT) #Confirm formatting ``` ```{r} write.csv(sigRes.pHMFAnnotnoT, "pH-MF-EnrichedGO-DML.csv", quote = FALSE, row.names = FALSE) #Save dataframe ``` ## ploidypH-DML ```{r} ploidypHGenes <- ploidypHDMLGOtermsFiltered$transcript #Extract transcript ID ploidypHGeneList <- factor(as.integer(geneNames %in% ploidypHGenes)) names(ploidypHGeneList) <- ploidypHGenes str(ploidypHGeneList) ``` ### Biological processes ```{r} ploidypHGOdataBP <- new("topGOdata", ontology = "BP", allGenes = ploidypHGeneList, annot = annFUN.gene2GO, gene2GO = geneID2GO) #Create biological process topGO object ploidypHGOdataBP #Get summary of object ``` ```{r} test.stat <- new("classicCount", testStatistic = GOFisherTest, name = "Fisher test") resultFisher.ploidypHBP <- getSigGroups(ploidypHGOdataBP, test.stat) resultFisher.ploidypHBP ``` ### Cellular components ```{r} ploidypHGOdataCC <- new("topGOdata", ontology = "CC", allGenes = ploidypHGeneList, annot = annFUN.gene2GO, gene2GO = geneID2GO) #Create biological process topGO object ploidypHGOdataCC #Get summary of object ``` ```{r} resultFisher.ploidypHCC <- getSigGroups(ploidypHGOdataCC, test.stat) resultFisher.ploidypHCC ``` ### Molecular function ```{r} ploidypHGOdataMF <- new("topGOdata", ontology = "MF", allGenes = ploidypHGeneList, annot = annFUN.gene2GO, gene2GO = geneID2GO) #Create biological process topGO object ploidypHGOdataMF #Get summary of object ``` ```{r} resultFisher.ploidypHMF <- getSigGroups(ploidypHGOdataMF, test.stat) #Extract significant GOterms using Fisher exact test resultFisher.ploidypHMF #Get results summary ``` ```{r} save.image("../../project-oyster-oa.RData") #Save R Data ``` # Figures I will use `simplifyEnrichment` to create heatmaps that cluster GO terms by semantic similarity. ```{bash} mkdir figures ``` ## ploidy-DML ### Biological processes ```{r} matPloidyBP <- GO_similarity(go_id = sigRes.ploidyBP$GOID, ont = "BP") #Calculate the semantic similarity matrix using the Rel method (default) ``` ```{r} #pdf("figures/simplifyEnrichment-ploidy-BP.pdf", width = 11, height = 8.5) #Save figure simplifyGO(matPloidyBP, column_title = "", col = rev(plotColors), fontsize_range = c(10,40), word_cloud_grob_param = list(col = "black", max_width = 25)) #Plot GOterms based on semantic similarity. Do not include a column title. Set colors to be plot colors, and set fontsize to range from 10 to 40. Pass arguments to word_cloud_grob_param to dictate the colors of the words and maximum width #dev.off() ``` ```{r} dfPloidyBP <- simplifyGO(matPloidyBP, plot = FALSE) #Cluster GOterms by semantic similarity head(dfPloidyBP) #Dataframe with GOIDs, GOterms, and cluster sort(table(dfPloidyBP$cluster)) #Obtain size of clusters ``` ## pH-DML ### Biological processes ```{r} matpHBP <- GO_similarity(go_id = sigRes.pHBP$GOID, ont = "BP") #Calculate the semantic similarity matrix using the Rel method (default) ``` ```{r} #pdf("figures/simplifyEnrichment-pH-BP.pdf", width = 11, height = 8.5) #Save figure simplifyGO(matpHBP, column_title = "", col = rev(plotColors), fontsize_range = c(10,40), word_cloud_grob_param = list(col = "black", max_width = 25)) #Plot GOterms based on semantic similarity. Do not include a column title. Set colors to be plot colors, and set fontsize to range from 10 to 40. Pass arguments to word_cloud_grob_param to dictate the colors of the words and maximum width #dev.off() ``` ```{r} dfpHBP <- simplifyGO(matpHBP, plot = FALSE) #Cluster GOterms by semantic similarity head(dfpHBP) #Dataframe with GOIDs, GOterms, and cluster sort(table(dfpHBP$cluster)) #Obtain size of clusters ``` ### Molecular function ```{r} matpHMF <- GO_similarity(go_id = sigRes.pHMF$GOID, ont = "MF") #Calculate the semantic similarity matrix using the Rel method (default) ``` ```{r} #pdf("figures/simplifyEnrichment-pH-MF.pdf", width = 11, height = 8.5) #Save figure simplifyGO(matpHMF, column_title = "", col = rev(plotColors), fontsize_range = c(10,40), word_cloud_grob_param = list(col = "black", max_width = 25)) #Plot GOterms based on semantic similarity. Do not include a column title. Set colors to be plot colors, and set fontsize to range from 10 to 40. Pass arguments to word_cloud_grob_param to dictate the colors of the words and maximum width #dev.off() ``` ```{r} dfpHMF <- simplifyGO(matpHMF, plot = FALSE) #Cluster GOterms by semantic similarity head(dfpHMF) #Dataframe with GOIDs, GOterms, and cluster sort(table(dfpHMF$cluster)) #Obtain size of clusters ``` ```{r} save.image("../../project-oyster-oa.RData") #Save R Data ```