--- title: "MethylKit" author: "Yaamini Venkataraman" date: "10/25/2018" output: html_document --- In this file, I'll identify differentially methylated loci (DMLs) using `methylKit`. # Prepare R Markdown file ```{r setup, include=FALSE} knitr::opts_chunk$set(echo = TRUE) ``` # Install packages ```{r} install.packages("vegan") #Install the vegan package require(vegan) #Load vegan ``` ```{r} install.packages("pheatmap") #Install the pheatmap package require(pheatmap) #Load pheatmap ``` ```{r} install.packages("gplots") #Install the gplots package require(gplots) #Load gplots ``` ```{r} install.packages("devtools") #Install the devtools package require(devtools) #Load devtools ``` ```{r} source("http://bioconductor.org/biocLite.R") #Source package from bioconductor BiocManager::install("methylKit") #Install methylkit ``` ```{r} install_github("al2na/methylKit", build_vignettes = FALSE, repos = BiocInstaller::biocinstallRepos(), dependencies = TRUE) #Install more methylKit options ``` ```{r} require(methylKit) #Load methylkit ``` # Obtain session information ```{r} sessionInfo() ``` # Download files from `gannet` ```{bash} wget -r -l1 --no-parent -A_dedup.sorted.bam http://gannet.fish.washington.edu/spartina/2018-10-10-project-virginica-oa-Large-Files/2018-11-07-Bismark-Mox/ #Download files from gannet. They will be stored in the same directory structure as they are online. ``` ```{bash} mv gannet.fish.washington.edu/spartina/2018-10-10-project-virginica-oa-Large-Files/2018-11-07-Bismark-Mox/* analyses/2018-10-25-MethylKit #Move all files from gannet folder to analyses folder ``` ```{bash} rm -r gannet.fish.washington.edu #Remove the empty gannet directory ``` # Process methylation data ```{r} analysisFiles <- list("zr2096_1_dedup.sorted.bam", "zr2096_2_dedup.sorted.bam", "zr2096_3_dedup.sorted.bam", "zr2096_4_dedup.sorted.bam", "zr2096_5_dedup.sorted.bam", "zr2096_6_dedup.sorted.bam", "zr2096_7_dedup.sorted.bam", "zr2096_8_dedup.sorted.bam", "zr2096_9_dedup.sorted.bam", "zr2096_10_dedup.sorted.bam") #Put all .bam files into a list for analysis. ``` ```{r} sample.IDs <- list("1", "2", "3", "4", "5", "6", "7", "8", "9", "10") #Create list of sample IDs treatmentSpecification <- c(rep(0, times = 5), rep(1, times = 5)) #Specify which treatment the samples were from. 1 is the treatment (high pCO2) and 0 is the control (ambient pCO2) ``` I'll use `mincov` in `processBismarkAln` to set different coverage metrics. I'll try 3x, 5x, and 10x coverage. ```{r} processedFiles <- processBismarkAln(location = analysisFiles, sample.id = sample.IDs, assembly = "v3", read.context = "CpG", mincov = 2, treatment = treatmentSpecification) #Process files for CpG methylation. Use 2x coverage for faster processing. Coverage will be adjusted later. First 5 files were from ambient conditions, and the second from high pCO2 conditions. ``` ```{r} processedFilteredFilesCov5 <- filterByCoverage(processedFiles, lo.count = 5, lo.perc = NULL, hi.count = 100, hi.perc = NULL) #filter processed files using lo.count and hi.count coverage thresholds. Coverage should be no less than 5 and should not exceed 100. ``` # Differentially methylated loci ## Obtain methylation and coverage plots ```{r} nFiles <- length(sample.IDs) #Count number of samples fileName <- data.frame("nameBase" = rep("../analyses/2018-10-25-Loci-Analysis/2018-11-07-Percent-CpG-Methylation", times = nFiles), "nameBase2" = rep("../analyses/2018-10-25-Loci-Analysis/2018-11-07-Percent-CpG-Coverage", times = nFiles), "sample.ID" = 1:10) #Create new dataframe for filenames head(fileName) #Confirm dataframe creation ``` ```{r} fileName$actualFileName7 <- paste(fileName$nameBase, "-Filtered", "-5xCoverage", "-Sample", fileName$sample.ID, ".jpeg", sep = "") #Create a new column for the full filename for filtered + 5x coverage + specific sample's percent CpG methylation plot fileName$actualFileName8 <- paste(fileName$nameBase2, "-Filtered", "-5xCoverage", "-Sample", fileName$sample.ID, ".jpeg", sep = "") #Create a new column for the full filename for filtered + 5x coverage + specific sample's percent CpG coverage plot head(fileName) #Confirm column creation ``` ```{r} for(i in 1:nFiles) { #For each data file jpeg(filename = fileName$actualFileName7[i], height = 1000, width = 1000) #Save file with designated name getMethylationStats(processedFilteredFilesCov5[[i]], plot = TRUE, both.strands = FALSE) #Get %CpG methylation information dev.off() #Turn off plotting device } #Plot and save %CpG methylation information ``` ```{r} for(i in 1:nFiles) { #For each data file jpeg(filename = fileName$actualFileName8[i], height = 1000, width = 1000) #Save file with designated name getCoverageStats(processedFilteredFilesCov5[[i]], plot = TRUE, both.strands = FALSE) #Get CpG coverage information dev.off() #Turn off plotting device } #Plot and save CpG coverage information ``` ## Obtain clustering information ```{r} methylationInformationFilteredCov5Destrand <- unite(processedFilteredFilesCov5, destrand = TRUE) #Combine all processed files into a single table head(methylationInformationFilteredCov5Destrand) #Confirm unite ``` ```{r} clusteringInformationFilteredCov5Destrand <- clusterSamples(methylationInformationFilteredCov5Destrand, dist = "correlation", method = "ward.D", plot = TRUE) #Save cluster information as a new object ``` ```{r} jpeg(filename = "../analyses/2018-10-25-Loci-Analysis/2019-04-05-Full-Sample-Pearson-Correlation-Plot-FilteredCov5Destrand.jpeg", height = 1000, width = 1000) #Save file with designated name getCorrelation(methylationInformationFilteredCov5Destrand, plot = TRUE) #Understand correlation between methylation patterns in different samples dev.off() ``` ```{r} jpeg(filename = "../analyses/2018-10-25-Loci-Analysis/2019-04-05-Full-Sample-CpG-Methylation-Clustering-FilteredCov5Destrand.jpeg", height = 1000, width = 1000) #Save file with designated name clusterSamples(methylationInformationFilteredCov5Destrand, dist = "correlation", method = "ward", plot = TRUE) #Cluster samples based on correlation coefficients dev.off() ``` ```{r} jpeg(filename = "../analyses/2018-10-25-Loci-Analysis/2019-04-05-Full-Sample-Methylation-PCA-FilteredCov5Destrand.jpeg", height = 1000, width = 1000) #Save file with designated name PCASamples(methylationInformationFilteredCov5Destrand) #Run a PCA analysis on percent methylation for all samples dev.off() #Turn off plotting device ``` ```{r} jpeg(filename = "../analyses/2018-10-25-Loci-Analysis/2019-04-05-Full-Sample-Methylation-Screeplot-FilteredCov5Destrand.jpeg", height = 1000, width = 1000) #Save file with designated name PCASamples(methylationInformationFilteredCov5Destrand, screeplot = TRUE) #Run the PCA analysis and plot variances against PC number in a screeplot dev.off() ``` ## Obtain differentially methylated loci ```{r} differentialMethylationStatsFilteredCov5Destrand <- calculateDiffMeth(methylationInformationFilteredCov5Destrand) #Calculate differential methylation statistics based on treatment indication from processBismarkAln head(differentialMethylationStatsFilteredCov5Destrand) #Look at differential methylation statistics ``` ```{r} write.csv(differentialMethylationStatsFilteredCov5Destrand, "../analyses/2018-10-25-Loci-Analysis/2019-06-30-All-Loci-Methylation-Statistic-Filtered-Destrand-50-Cov5.csv") #Save table as a .csv ``` ```{r} diffMethStats50FilteredCov5Destrand <- getMethylDiff(differentialMethylationStatsFilteredCov5Destrand, difference = 50, qvalue = 0.01) #Identify loci that are at least 50% different head(diffMethStats50FilteredCov5Destrand) #Confirm creation ``` ```{r} write.csv(diffMethStats50FilteredCov5Destrand, "../analyses/2018-10-25-Loci-Analysis/2019-04-05-Differentially-Methylated-Loci-Filtered-Destrand-50-Cov5.csv") #Save table as .csv ``` ##### Hypermethylated DML ```{r} diffMethStats50FilteredCov5DestrandHyper <- getMethylDiff(differentialMethylationStatsFilteredCov5Destrand, difference = 50, qvalue = 0.01, type = "hyper") #Identify hypermethylated loci that are at least 50% different head(diffMethStats50FilteredCov5DestrandHyper) #Confirm creation ``` ```{r} write.csv(diffMethStats50FilteredCov5DestrandHyper, "../analyses/2018-10-25-Loci-Analysis/2019-04-05-Differentially-Methylated-Loci-Filtered-Destrand-50-Cov5-Hypermethylated.csv") #Save table as .csv ``` ##### Hypomethylated DML ```{r} diffMethStats50FilteredCov5DestrandHypo <- getMethylDiff(differentialMethylationStatsFilteredCov5Destrand, difference = 50, qvalue = 0.01, type = "hypo") #Identify hypomethylated loci that are at least 50% different head(diffMethStats50FilteredCov5DestrandHypo) #Confirm creation ``` ```{r} write.csv(diffMethStats50FilteredCov5DestrandHypo, "../analyses/2018-10-25-Loci-Analysis/2019-04-05-Differentially-Methylated-Loci-Filtered-Destrand-50-Cov5-Hypomethylated.csv") #Save table as .csv ``` #### DML distribution figure I want to plot the distribution of DML across the *C. virginica* genome and compare it to the number of genes in each chromosome. ###### Calculate distribution of DML across chromosomes ```{r} DMLchrCounts <-as.data.frame(table(diffMethStats50FilteredCov5Destrand$chr)) #Count the number of DML/chromosome DMLchrCounts$chr <- c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10") #Create column with chr name DMLchrCounts <- DMLchrCounts[,-1] #Remove column with chromosome RefSeq ID DMLchrCounts <- DMLchrCounts[,c(2,1)] #Reorganize columns colnames(DMLchrCounts) <- c("chr", "DMLCount") #Rename columns head(DMLchrCounts) #Confirm formatting ``` ```{r} write.csv(DMLchrCounts, "../analyses/2018-10-25-MethylKit/2019-09-30-DML-per-Chromosome.csv", row.names = FALSE, quote = FALSE) #Save file ``` ###### [*C. virginica* genome information from NCBI](https://www.ncbi.nlm.nih.gov/genome/398) ```{r} DMLchrCounts$geneCount <- c(3932, 4137, 4712, 3896, 6239, 2630, 3015, 4445, 4960, 1527) #Create column with number of gene sequences in each chromosome DMLchrCounts$DMLbyGenes <- (DMLchrCounts$DMLCount / DMLchrCounts$geneCount)*100 head(DMLchrCounts) #Confirm dataframe creation ``` ```{bash} #Count the number of CGs in each chromosme (excluding MT chromosome) grep "NC_035780.1" ../genome-feature-tracks/C_virginica-3.0_CG-motif.bed | wc -l grep "NC_035781.1" ../genome-feature-tracks/C_virginica-3.0_CG-motif.bed | wc -l grep "NC_035782.1" ../genome-feature-tracks/C_virginica-3.0_CG-motif.bed | wc -l grep "NC_035783.1" ../genome-feature-tracks/C_virginica-3.0_CG-motif.bed | wc -l grep "NC_035784.1" ../genome-feature-tracks/C_virginica-3.0_CG-motif.bed | wc -l grep "NC_035785.1" ../genome-feature-tracks/C_virginica-3.0_CG-motif.bed | wc -l grep "NC_035786.1" ../genome-feature-tracks/C_virginica-3.0_CG-motif.bed | wc -l grep "NC_035787.1" ../genome-feature-tracks/C_virginica-3.0_CG-motif.bed | wc -l grep "NC_035788.1" ../genome-feature-tracks/C_virginica-3.0_CG-motif.bed | wc -l grep "NC_035789.1" ../genome-feature-tracks/C_virginica-3.0_CG-motif.bed | wc -l ``` ```{r} DMLchrCounts$chrCpGCounts <- c(1394962, 1237469, 1568195, 1200917, 2008680, 1121671, 1251184, 1633946, 2300335, 740913) #Create column with number of CpGs in each chromosome DMLchrCounts$DMLbyChrCpG <- (DMLchrCounts$DMLCount / DMLchrCounts$chrCpGCounts) #Normalize DML counts by number of CpGs in each chromosome range(DMLchrCounts$DMLbyChrCpG) #Look at range of values head(DMLchrCounts) #Confirm column creation ``` ##### Create figure ```{r} #pdf("../analyses/2018-10-25-MethylKit/2019-09-30-DML-and-Gene-Distribution.pdf", height = 8.5, width = 11) #Save figure as a pdf par(mar = c(5,7,2,10)) #Change figure boundaries DMLbarplot <- barplot(as.matrix(t(DMLchrCounts$DMLbyChrCpG)), axes = FALSE, names.arg = DMLchrCounts$chr, cex.names = 1.5, xlim = c(0.7,11.5), ylim = c(0,7e-05), col = "grey80") #Create a barplot and save as a new object. Use axes = FALSE to remove the y-axis and names.arg to set labels on the x-axis. The object contains x coordinates for bars, so xlim is set at 12 to compensate for maximum value of 11.5 mtext(side = 1, "Chromosome", line = 3, cex = 1.5) #Add x-axis label axis(side = 2, line = 1.5, at = seq(0, 7e-05, by = 1e-05), labels = seq(0, 7, by = 1), las = 2, col = "grey80", cex.axis = 1.2) #Add y-axis for DML counts mtext(side = 2, "Number DML per 10,000 CpGs", line = 5, cex = 1.5) #Add y-axis label for DML counts par(new = TRUE) #Create a new plot plot(x = DMLbarplot, y = DMLchrCounts$geneCount, type = "b", axes = FALSE, xlab = "", ylab = "", xaxs = "i", yaxs = "i", pch = 16, col = "grey20", xlim = c(0,12), ylim = c(0,6500)) #Plot points and lines (type = "b") for gene count by chromosome. Use the coordinates from DMLbarplot (x = DMLbarplot) and set xlim = (0,12) so plots are lined up. Use axes = FALSE to remove both axes. Remove x and y lables (xlab = ""; ylab = ""). Set ylim = (0,6500) to account for max y-values. Use xaxs and yxaxs to remove space between axes. axis(side = 4, line = 1.5, at = seq(0, 6500, by = 500), las = 2, col = "grey80", cex.axis = 1.2) #Add y-axis for gene sequence counts mtext(side = 4, "Number of Genes", line = 6, cex = 1.5) #Add y-axis label for gene sequence counts #dev.off() #Turn off plotting device ``` #### Preliminary Analyses ###### Plot customization ```{r} plotCustomization <- data.frame(sample = 1:10, treatmentSpecification) #Create dataframe with sample treatment information head(plotCustomization) #Confirm dataframe creation ``` ###### Principal Component Analyses `methylKit` creates PCA biplots, but I want to dig further into the analysis and create custom biplots. ####### All methylation data ```{r} allDataPCA <- PCASamples(methylationInformationFilteredCov5Destrand, obj.return = TRUE) #Run a PCA analysis on percent methylation for all samples. methylKit uses prcomp to create the PCA matrix summary(allDataPCA) #Look at summary statistics. The first PC explains 18.1% of variation, the second PC explains 11.7% of variation ``` ```{r} #pdf("../analyses/2018-10-25-MethylKit/2019-11-19-All-Data-PCA.pdf", width = 11, height = 8.5) par(mar = c(5, 5, 1, 1)) #Specify inner and outer margins fig.allDataPCA <- ordiplot(allDataPCA, choices = c(1, 2), type = "none", display = "sites", cex = 0.5, xlab = "", ylab = "", xaxt = "n", yaxt = "n") #Use ordiplot to create base biplot. Do not add any points points(fig.allDataPCA, "sites", col = c(rep(plotColors[2], times = 5), rep(plotColors[4], times = 5)), pch = c(rep(16, times = 5), rep(17, times = 5)), cex = 3) #Add each sample. Darker samples are ambient, lighter samples are elevated pCO2 #Add multiple white boxes on top of the default black box to manually change the color box(col = "white") box(col = "white") box(col = "white") box(col = "white") box(col = "white") box(col = "white") box(col = "white") box(col = "white") box(col = "white") box(col = "white") ordiellipse(allDataPCA, plotCustomization$treatment, show.groups = "1", col = plotColors[4]) #Add confidence ellipse around the samples in elevated pCO2 ordiellipse(allDataPCA, plotCustomization$treatment, show.groups = "0", col = plotColors[2]) #Add confidence ellipse around the samples in ambient pCO2 axis(side = 1, labels = TRUE, col = "grey80", cex.axis = 1.7) #Add x-axis mtext(side = 1, text = "PC 1 (18.1%)", line = 3, cex = 1.5) #Add x-axis label axis(side = 2, labels = TRUE, col = "grey80", cex.axis = 1.7) #Add y-axis mtext(side = 2, text = "PC 2 (11.7%)", line = 3, cex = 1.5) #Add y-axis label legend("topleft", pch = c(16, 17), legend = c("Control", "Elevated"), col = c(plotColors[2], plotColors[4]), cex = 1.7, bty = "n") #Add a legend with information about ambient and elevated samples #dev.off() ``` ####### DML The first thing I need to do is subset the data so it only includes DML. Then, I can use similar code to what I used above to create PCA plots. ```{r} DMLPositions <- rep(0, times = length(diffMethStats50FilteredCov5Destrand$chr)) #Create an empty vector with 598 places to store row numbers for (i in 1:length(DMLPositions)) { DMLPositions[i] <- which(getData(diffMethStats50FilteredCov5Destrand)$start[i] == getData(methylationInformationFilteredCov5Destrand)$start) } #For each DML, save the row number where that DML is found in methylationInformationFilteredCov5Destrand tail(DMLPositions) #Confirm vector was created ``` ```{r} DMLMatrix <- methylationInformationFilteredCov5Destrand[DMLPositions,] #Subset methylationInformationFilteredCov5Destrand to only include DML and save as a new methylBase object sum((DMLMatrix$start) == (diffMethStats50FilteredCov5Destrand$start)) == length(diffMethStats50FilteredCov5Destrand$start) #Confirm that start columns are identical. If they are identical, the sum of all TRUE statements should equal the length of the original methylBase object tail(DMLMatrix) #Confirm methylBase object creation ``` ```{r} DMLDataPCA <- PCASamples(DMLMatrix, obj.return = TRUE) #Run a PCA analysis on percent methylation for all samples. methylKit uses prcomp to create the PCA matrix summary(DMLDataPCA) #Look at summary statistics. The first PC explains 47.6% of variation, the second PC explains 9.5% of variation ``` ```{r} #pdf("../analyses/2018-10-25-MethylKit/2019-11-19-DML-Only-PCA.pdf", width = 11, height = 8.5) par(mar = c(5, 5, 1, 1)) #Specify inner and outer margins fig.DMLDataPCA <- ordiplot(DMLDataPCA, choices = c(1, 2), type = "none", display = "sites", cex = 0.5, xlab = "", ylab = "", xaxt = "n", yaxt = "n") #Use ordiplot to create base biplot. Do not add any points points(fig.DMLDataPCA, "sites", col = c(rep(plotColors[2], times = 5), rep(plotColors[4], times = 5)), pch = c(rep(16, times = 5), rep(17, times = 5)), cex = 3) #Add each sample. Darker samples are ambient, lighter samples are elevated pCO2 #Add multiple white boxes on top of the default black box to manually change the color box(col = "white") box(col = "white") box(col = "white") box(col = "white") box(col = "white") box(col = "white") box(col = "white") box(col = "white") box(col = "white") box(col = "white") ordiellipse(DMLDataPCA, plotCustomization$treatment, show.groups = "1", col = plotColors[4]) #Add confidence ellipse around the samples in elevated pCO2 ordiellipse(DMLDataPCA, plotCustomization$treatment, show.groups = "0", col = plotColors[2]) #Add confidence ellipse around the samples in ambient pCO2 axis(side = 1, labels = TRUE, col = "grey80", cex.axis = 1.7) #Add x-axis mtext(side = 1, text = "PC 1 (47.6%)", line = 3, cex = 1.5) #Add x-axis label axis(side = 2, labels = TRUE, col = "grey80", cex.axis = 1.7) #Add y-axis mtext(side = 2, text = "PC 2 (9.5%)", line = 3, cex = 1.5) #Add y-axis label legend("topright", pch = c(16, 17), legend = c("Control", "Elevated"), col = c(plotColors[2], plotColors[4]), cex = 1.7, bty = "n") #Add a legend with information about ambient and elevated samples #dev.off() ``` ####### Multipanel plot ```{r} #pdf("../analyses/2018-10-25-MethylKit/2019-11-19-PCA-Multpanel.pdf", height = 8.5, width = 11) #Save plot par(mfrow = c(1, 2), oma = c(5, 2, 2, 0), mar = c(0, 3, 0, 5)) #Set up parameters for multipanel plot #All CpG Loci fig.allDataPCA <- ordiplot(allDataPCA, choices = c(1, 2), type = "none", display = "sites", cex = 0.5, xlim = c(-400, 200), xlab = "", ylab = "", xaxt = "n", yaxt = "n") #Use ordiplot to create base biplot. Do not add any points points(fig.allDataPCA, "sites", col = c(rep(plotColors[2], times = 5), rep(plotColors[4], times = 5)), pch = c(rep(16, times = 5), rep(17, times = 5)), cex = 3) #Add each sample. Darker samples are ambient, lighter samples are elevated pCO2 #Add multiple white boxes on top of the default black box to manually change the color box(col = "white") box(col = "white") box(col = "white") box(col = "white") box(col = "white") box(col = "white") box(col = "white") box(col = "white") box(col = "white") box(col = "white") ordiellipse(allDataPCA, plotCustomization$treatment, show.groups = "1", col = plotColors[4]) #Add confidence ellipse around the samples in elevated pCO2 ordiellipse(allDataPCA, plotCustomization$treatment, show.groups = "0", col = plotColors[2]) #Add confidence ellipse around the samples in ambient pCO2 axis(side = 1, at = seq(-400, 200, 200), col = "grey80", cex.axis = 1.7) #Add x-axis mtext(side = 1, text = "PC 1 (18.1%)", line = 3, cex = 1.5) #Add x-axis label axis(side = 2, labels = TRUE, col = "grey80", cex.axis = 1.7) #Add y-axis mtext(side = 2, text = "PC 2 (11.7%)", line = 3, cex = 1.5) #Add y-axis label mtext(side = 3, line = -5, adj = c(-100,0), text = " a. All CpG Loci", cex = 1.5) #DML fig.DMLDataPCA <- ordiplot(DMLDataPCA, choices = c(1, 2), type = "none", display = "sites", cex = 0.5, xlim = c(-20,20), ylim = c(-10,20), xlab = "", ylab = "", xaxt = "n", yaxt = "n") #Use ordiplot to create base biplot. Do not add any points points(fig.DMLDataPCA, "sites", col = c(rep(plotColors[2], times = 5), rep(plotColors[4], times = 5)), pch = c(rep(16, times = 5), rep(17, times = 5)), cex = 3) #Add each sample. Darker samples are ambient, lighter samples are elevated pCO2 #Add multiple white boxes on top of the default black box to manually change the color box(col = "white") box(col = "white") box(col = "white") box(col = "white") box(col = "white") box(col = "white") box(col = "white") box(col = "white") box(col = "white") box(col = "white") ordiellipse(DMLDataPCA, plotCustomization$treatment, show.groups = "1", col = plotColors[4]) #Add confidence ellipse around the samples in elevated pCO2 ordiellipse(DMLDataPCA, plotCustomization$treatment, show.groups = "0", col = plotColors[2]) #Add confidence ellipse around the samples in ambient pCO2 axis(side = 1, labels = TRUE, col = "grey80", cex.axis = 1.7) #Add x-axis mtext(side = 1, text = "PC 1 (47.6%)", line = 3, cex = 1.5) #Add x-axis label axis(side = 2, labels = TRUE, col = "grey80", cex.axis = 1.7) #Add y-axis mtext(side = 2, text = "PC 2 (9.5%)", line = 3, cex = 1.5) #Add y-axis label mtext(side = 3, line = -5, adj = c(-100, 0), text = " b. DML", cex = 1.5) #Add test category legend(x = 0, y = -22, pch = c(16, 17), legend = c("Control", "Elevated"), col = c(plotColors[2], plotColors[4]), y.intersp = 1, x.intersp = 1, cex = 1.7, bty = "n") #Add a legend with information about ambient and elevated samples #dev.off() ``` ##### Heatmap ```{r} percMethDML <- percMethylation(DMLMatrix, rowids = TRUE) #Get percent methylation for all samples at DML. Include row IDs (chr, start, end) information head(percMethDML) #Confirm percent methylation matrix was created ``` I'll first use `pheatmap`. ```{r} pheatmap(percMethDML, color = rev(plotColors), cluster_rows = TRUE, clustering_distance_rows = "euclidean", treeheight_row = 70, show_rownames = FALSE, cluster_cols = TRUE, clustering_distance_cols = "euclidean", treeheight_col = 40, show_colnames = FALSE, annotation_col = data.frame(pCO2 = factor(rep(c("Ambient","Treatment"), each = 5))), annotation_colors = list(pCO2 = c(Ambient = "grey90", Treatment = "grey10")), annotation_legend = FALSE, annotation_names_col = FALSE, legend = TRUE) #Create heatmap using pheatmap using percMethDML and plotColors color scheme. Cluster rows and columns using euclidean distances. Adjust the dendogram tree heights and do not show any row or column names. Create a dataframe with treatment information using annotation_col. Use annotation_colors to indicate colors for treatment ("grey90") and ambient ("grey10") samples. Do not include an annotation_legend or name for annotations (annotatino_names_col). Include a legend. ``` I also want to try using a different heatmap package, `heatmap.2` from `gplots`. `heatmap.2` allows you to add a density plot with the legend, which could be interesting information to include. ```{r} #pdf("../analyses/2018-10-25-MethylKit/2019-11-19-DML-Only-Heatmap.pdf", height = 8.5, width = 11) par(oma = c(0, 1, 0, 0)) #Adjust outer margins heatmap.2(percMethDML, col = rev(plotColors), scale = "none", margins = c(1,1), trace = "none", tracecol = "black", labRow = FALSE, labCol = FALSE, ColSideColors = c(rep("grey90", times = 5), rep("grey10", times = 5)), key = TRUE, keysize = 1.8, density.info = "density", key.title = "", key.xlab = "% Methylation", key.ylab = "", key.par = list(cex.lab = 2.0, cex.axis = 1.5)) #Create heatmap using heatmap.2 from gplots package using percMethDML data. Use plotColors but do not scale data, label rows, or label columns. Use ColSideColors to indicate colors for treatment and ambient samples. Add a legend using key, and adjust keysize. Have key display density data with density.info. Do not add a key title or y-axis label, and label x axis with key.xlab. mtext("Density", cex = 1.6, las = 3, adj = 0.8, padj = -29) #Manually add y-axis label for key since heatmap.2 doesn't let you change font size #dev.off() ``` # Save DML as BEDfiles ## Install packages **Do not** install these packages until analysis in `methylKit` is complete, as some of the installed packages will mask others important for analysis. ```{r} library(readr) #Load package install.packages("tidyverse") #Install tidyverse library(tidyverse) #Load package ``` ## DML Background ```{r} head(methylationInformationFilteredCov5Destrand) #I only want the columns with chromosome, start position, stop position, and strand. ``` ```{r} methylationInformationFilteredCov5DestrandReduced <- data.frame("chr" = methylationInformationFilteredCov5Destrand$chr, "start" = methylationInformationFilteredCov5Destrand$start, "stop" = methylationInformationFilteredCov5Destrand$end, "strand" = methylationInformationFilteredCov5Destrand$strand) #Subset file head(methylationInformationFilteredCov5DestrandReduced) #Confirm subsetting ``` ```{r} methylationInformationFilteredCov5DestrandReduced$start <- (methylationInformationFilteredCov5DestrandReduced$start - 1) #Subtract 1 from the start position for visualization head(methylationInformationFilteredCov5DestrandReduced) #Confirm subtracting worked ``` ```{r} write_delim(methylationInformationFilteredCov5DestrandReduced, "../analyses/2018-10-25-MethylKit/2019-05-14-Methylation-Information-Filtered-Destrand-Cov5.bed", delim = "\t", col_names = FALSE) #Write out all methylation information as a background to be used for gene enrichment analyses. ``` ```{bash} head 2019-05-14-Methylation-Information-Filtered-Destrand-Cov5.bed #Confirm formatting was retained in export ``` ## DML ```{r} DML5xFilteredDestrand04052019 <- mutate(diffMethStats50FilteredCov5Destrand, start = start -1, end = end + 1) %>% select(chr, start, end, meth.diff) %>% mutate_if(is.numeric, as.integer) #Save as a BED file, and avoid writing information in scientific notation ``` ```{r} head(DML5xFilteredDestrand04052019) #Confirm changes ``` ```{r} write_delim(DML5xFilteredDestrand04052019, "../analyses/2018-10-25-MethylKit/2019-04-05-DML-Destrand-5x-Locations.bed", delim = '\t', col_names = FALSE) #Save data as a BED file ``` ##### Hypermethylated DML ```{r} DML5xFilteredDestrandHyper04302019 <- mutate(diffMethStats50FilteredCov5DestrandHyper, start = start -1, end = end + 1) %>% select(chr, start, end, meth.diff) %>% mutate_if(is.numeric, as.integer) #Save as a BED file, and avoid writing information in scientific notation ``` ```{r} head(DML5xFilteredDestrandHyper04302019) #Confirm changes ``` ```{r} write_delim(DML5xFilteredDestrandHyper04302019, "../analyses/2018-10-25-MethylKit/2019-04-05-DML-Destrand-5x-Locations-Hypermethylated.bed", delim = '\t', col_names = FALSE) #Save data as a BED file ``` ##### Hypomethyalted DML ```{r} DML5xFilteredDestrandHypo04302019 <- mutate(diffMethStats50FilteredCov5DestrandHypo, start = start -1, end = end + 1) %>% select(chr, start, end, meth.diff) %>% mutate_if(is.numeric, as.integer) #Save as a BED file, and avoid writing information in scientific notation ``` ```{r} head(DML5xFilteredDestrandHypo04302019) #Confirm changes ``` ```{r} write_delim(DML5xFilteredDestrandHypo04302019, "../analyses/2018-10-25-MethylKit/2019-04-05-DML-Destrand-5x-Locations-Hypomethylated.bed", delim = '\t', col_names = FALSE) #Save data as a BED file ```