--- title: "Proportion Test" author: "Yaamini Venkataraman" date: "1/15/2019" output: html_document --- I will use a proportion test to compare the proportion of genome feature overlaps between differentially methylated loci (DML), differentially methylated regions (DMR), and the gene background. ```{r setup, include=FALSE} knitr::opts_chunk$set(echo = TRUE) ``` # Obtain session information ```{r} sessionInfo() ``` # Import data ```{r} overlapData <- read.csv("2019-01-15-Overlap-Proportions.csv", header = TRUE) rownames(overlapData) <- overlapData$genomicFeature #Set genomic feature indication and rownames overlapData <- overlapData[,-1] #Remove genomic feature indication column head(overlapData) #Confirm import ``` # Conduct chi-squared tests of homogeneity The null hypothesis is that loci distributions in the genome are the same between different categories. ## Total CpGs vs. all CpGs with 5x coverage enriched by MBD treatment I want to see if MBD enriched CpGs in certain areas of the genome. ```{r} totalVersusEnriched <- overlapData[, 1:2] #Subset data for statistical testing totalVersusEnriched <- totalVersusEnriched[-6, ] #Remove totalLines row head(totalVersusEnriched) #Confirm changes ``` ```{r} totalVersusEnrichedTest <- chisq.test(t(totalVersusEnriched)) #Conduct a chi-squared test totalVersusEnrichedTest #The distribution of enriched CpGs from MBD is different from the distribution of all CG motifs in the C. virginica genome ``` ```{r} totalVersusEnrichedTest$observed #Look at observed valules totalVersusEnrichedTest$expected #Look at expected values totalVersusEnrichedTest$residuals #Look at residuals ``` ## Enriched CpGs vs. methylated CpGs Are methylated CpGs more populous in certain genomic regions? ```{r} enrichedVersusMethylated <- overlapData[, 2:3] #Subset data for statistical testing enrichedVersusMethylated <- enrichedVersusMethylated[-6, ] #Remove totalLines row head(enrichedVersusMethylated) #Confirm changes ``` ```{r} enrichedVersusMethylatedTest <- chisq.test(t(enrichedVersusMethylated)) #Conduct a chi-squared test enrichedVersusMethylatedTest #The distribution of methylated CpGs is significantly different from CpGs enriched by MBD ``` ## Enriched CpGs vs. methylated CpGs vs. sparsely methylated CpGs vs. unmethylated CpGs ```{r} allMethylationCategories <- overlapData[, 2:5] #Subset data for statistical testing allMethylationCategories <- allMethylationCategories[-6, ] #Remove totalLines row head(allMethylationCategories) #Confirm changes ``` ```{r} allMethylationCategoriesTest <- chisq.test(t(allMethylationCategories)) #Conduct a chi-squared test allMethylationCategoriesTest #The distributions differ from eachother. ``` ## Methylated CpGs vs. DML ```{r} methylatedVersusDML <- overlapData[,c(3,6)] #Subset data for statistical testing methylatedVersusDML <- methylatedVersusDML[-6,] #Remove totalLines row head(methylatedVersusDML) #Confirm changes ``` ```{r} methylatedVersusDMLTest <- chisq.test(t(methylatedVersusDML)) #Conduct a chi-squared test methylatedVersusDMLTest #The distribution of DML is significantly different from methylated CpGs. ``` # Create figures ## Reformat data ```{r} head(overlapData) ``` ```{r} proportionData <- overlapData #Copy overlap data as a new dataframe nLength <- length(proportionData$totalCpG) #Count number of rows for(i in 1:nLength) { proportionData[i,] <- (proportionData[i,]/proportionData[6,])*100 } #Divide each column of proportionData by respective totalLines entry. Multiply by 100 and and save the percentage head(proportionData) #Confirm changes ``` ```{r} proportionData <- proportionData[-6,] #Remove totalLines row tail(proportionData) #Confirm changes ``` ## Total CpGs vs. enriched CpGs ```{r} totalVersusEnrichedProportions <- proportionData[,1:2] #Subset data head(totalVersusEnrichedProportions) #Confirm subset ``` ```{r} #pdf("2019-04-10-Total-Versus-Enriched-CpGs.pdf", height = 8.5, width = 11) par(mar = c(3,5,1,1)) #Change figure boundaries barplot(t(totalVersusEnrichedProportions), beside = TRUE, axes = FALSE, names.arg = c("Exons", "Introns", "TE", "Promoters", "Other")) #Create a grouped barplot (beside = TRUE) using a transposed version of the proportion data. Use axes = FALSE to remove the y-axis and names.arg to set labels on the x-axis. axis(side = 2, at = seq(0, 40, by = 5), las = 2, col = "grey80") mtext(side = 2, "Proportion CpGs", line = 3) #Add y-axis label legend("topright", legend = c("All", "MBD-Enriched"), pch = 16, col = c("grey20", "grey80"), bty = "n") #Place a legend in the top right of the figure with no box #dev.off() ``` ## Enriched CpGs vs. methylated CpGs ```{r} enrichedVersusMethylatedProportions <- proportionData[,2:3] #Subset data head(enrichedVersusMethylatedProportions) #Confirm subset ``` ```{r} #pdf("2019-04-10-Total-Versus-Methylated-CpGs.pdf", height = 8.5, width = 11) par(mar = c(3,5,1,1)) #Change figure boundaries barplot(t(enrichedVersusMethylatedProportions), beside = TRUE, axes = FALSE, names.arg = c("Exons", "Introns", "TE", "Promoters", "Other")) #Create a grouped barplot (beside = TRUE) using a transposed version of the proportion data. Use axes = FALSE to remove the y-axis and names.arg to set labels on the x-axis. axis(side = 2, at = seq(0, 45, by = 5), las = 2, col = "grey80") mtext(side = 2, "Proportion CpGs", line = 3) #Add y-axis label legend("topright", legend = c("MBD-Enriched", "Methylated"), pch = 16, col = c("grey20", "grey80"), bty = "n") #Place a legend in the top right of the figure with no box #dev.off() ``` ## Enriched CpGs vs. methylated vs. sparsely methylated vs. unmethylated ```{r} allMethylationCategoriesProportions <- proportionData[,2:5] #Subset data head(allMethylationCategoriesProportions) #Confirm subset ``` ```{r} #pdf("2019-04-10-All-Methylation-Categories.pdf", height = 8.5, width = 11) par(mar = c(3,5,1,1)) #Change figure boundaries barplot(t(allMethylationCategoriesProportions), beside = TRUE, axes = FALSE, names.arg = c("Exons", "Introns", "TE", "Promoters", "Other")) #Create a grouped barplot (beside = TRUE) using a transposed version of the proportion data. Use axes = FALSE to remove the y-axis and names.arg to set labels on the x-axis. axis(side = 2, at = seq(0, 45, by = 5), las = 2, col = "grey80") mtext(side = 2, "Proportion CpGs", line = 3) #Add y-axis label legend("topright", legend = c("MBD-Enriched", "Methylated", "Sparsely Methylated", "Unmethylated"), pch = 16, col = c("grey20", "grey40", "grey60", "grey80"), bty = "n") #Place a legend in the top right of the figure with no box #dev.off() ``` ## Methylated CpGs vs. DML ```{r} methylatedVersusDMLProportions <- proportionData[,c(3,6)] #Subset data head(methylatedVersusDMLProportions) #Confirm subset ``` ```{r} #pdf("2019-04-10-Methylated-CpGs-Versus-DML.pdf", height = 8.5, width = 11) par(mar = c(3,5,1,1)) #Change figure boundaries barplot(t(methylatedVersusDMLProportions), beside = TRUE, axes = FALSE, names.arg = c("Exons", "Introns", "TE", "Promoters", "Other")) #Create a grouped barplot (beside = TRUE) using a transposed version of the proportion data. Use axes = FALSE to remove the y-axis and names.arg to set labels on the x-axis. axis(side = 2, at = seq(0, 60, by = 5), las = 2, col = "grey80") mtext(side = 2, "Proportion CpGs", line = 3) #Add y-axis label legend("topright", legend = c("Methylated CpGs", "DML"), pch = 16, col = c("grey20", "grey80"), bty = "n") #Place a legend in the top right of the figure with no box #dev.off() ```