--- title: "MethylKit" author: "Yaamini Venkataraman" date: "10/25/2018" output: html_document --- In this file, I'll identify differentially methylated loci (DMLs) and regions (DMRs) using `methylKit`. # Prepare R Markdown file ```{r setup, include=FALSE} knitr::opts_chunk$set(echo = TRUE) ``` # Install packages ```{r} install.packages("vegan") #Install the vegan package require(vegan) #Load vegan ``` ```{r} install.packages("pheatmap") #Install the pheatmap package require(pheatmap) #Load pheatmap ``` ```{r} install.packages("gplots") #Install the gplots package require(gplots) #Load gplots ``` ```{r} install.packages("devtools") #Install the devtools package require(devtools) #Load devtools ``` ```{r} source("http://bioconductor.org/biocLite.R") #Source package from bioconductor BiocManager::install("methylKit") #Install methylkit ``` ```{r} install_github("al2na/methylKit", build_vignettes = FALSE, repos = BiocInstaller::biocinstallRepos(), dependencies = TRUE) #Install more methylKit options ``` ```{r} require(methylKit) #Load methylkit ``` # Obtain session information ```{r} sessionInfo() ``` # Download files from `gannet` ```{bash} wget -r -l1 --no-parent -A_dedup.sorted.bam http://gannet.fish.washington.edu/spartina/2018-10-10-project-virginica-oa-Large-Files/2018-11-07-Bismark-Mox/ #Download files from gannet. They will be stored in the same directory structure as they are online. ``` ```{bash} mv gannet.fish.washington.edu/spartina/2018-10-10-project-virginica-oa-Large-Files/2018-11-07-Bismark-Mox/* analyses/2018-10-25-MethylKit #Move all files from gannet folder to analyses folder ``` ```{bash} rm -r gannet.fish.washington.edu #Remove the empty gannet directory ``` # Process methylation data ```{r} analysisFiles <- list("zr2096_1_dedup.sorted.bam", "zr2096_2_dedup.sorted.bam", "zr2096_3_dedup.sorted.bam", "zr2096_4_dedup.sorted.bam", "zr2096_5_dedup.sorted.bam", "zr2096_6_dedup.sorted.bam", "zr2096_7_dedup.sorted.bam", "zr2096_8_dedup.sorted.bam", "zr2096_9_dedup.sorted.bam", "zr2096_10_dedup.sorted.bam") #Put all .bam files into a list for analysis. ``` ```{r} sample.IDs <- list("1", "2", "3", "4", "5", "6", "7", "8", "9", "10") #Create list of sample IDs treatmentSpecification <- c(rep(0, times = 5), rep(1, times = 5)) #Specify which treatment the samples were from. 1 is the treatment (high pCO2) and 0 is the control (ambient pCO2) ``` I'll use `mincov` in `processBismarkAln` to set different coverage metrics. I'll try 3x, 5x, and 10x coverage. ## 3x coverage ```{r} processedFilesCov3 <- processBismarkAln(location = analysisFiles, sample.id = sample.IDs, assembly = "v3", read.context = "CpG", mincov = 3, treatment = treatmentSpecification) #Process files for CpG methylation using 5x coverage. First 5 files were from ambient conditions, and the second from high pCO2 conditions. ``` ## 5x coverage ```{r} processedFilesCov5 <- processBismarkAln(location = analysisFiles, sample.id = sample.IDs, assembly = "v3", read.context = "CpG", mincov = 5, treatment = treatmentSpecification) #Process files for CpG methylation using 5x coverage. First 5 files were from ambient conditions, and the second from high pCO2 conditions. ``` ## 10x coverage ```{r} processedFilesCov10 <- processBismarkAln(location = analysisFiles, sample.id = sample.IDs, assembly = "v3", read.context = "CpG", mincov = 10, treatment = treatmentSpecification) #Process files for CpG methylation using 10x coverage. First 5 files were from ambient conditions, and the second from high pCO2 conditions. ``` I also want to try a two-step process for setting coverage. I'll use `processBismarkAln` with `mincov = 2` to quickly process my files. I'll then use `filterByCoverage` to set minimum and maximum coverage thresholds. ## Filtered coverage ```{r} processedFiles <- processBismarkAln(location = analysisFiles, sample.id = sample.IDs, assembly = "v3", read.context = "CpG", mincov = 2, treatment = treatmentSpecification) #Process files for CpG methylation. Use 2x coverage for faster processing. Coverage will be adjusted later. First 5 files were from ambient conditions, and the second from high pCO2 conditions. ``` ```{r} processedFilteredFilesCov5 <- filterByCoverage(processedFiles, lo.count = 5, lo.perc = NULL, hi.count = 100, hi.perc = NULL) #filter processed files using lo.count and hi.count coverage thresholds. Coverage should be no less than 5 and should not exceed 100. ``` # Differentially methylated loci ## Obtain methylation and coverage plots ```{r} nFiles <- length(sample.IDs) #Count number of samples fileName <- data.frame("nameBase" = rep("2018-10-25-Loci-Analysis/2018-11-07-Percent-CpG-Methylation", times = nFiles), "nameBase2" = rep("2018-10-25-Loci-Analysis/2018-11-07-Percent-CpG-Coverage", times = nFiles), "sample.ID" = 1:10) #Create new dataframe for filenames head(fileName) #Confirm dataframe creation ``` ### 3x coverage ```{r} fileName$actualFileName <- paste(fileName$nameBase, "-3xCoverage", "-Sample", fileName$sample.ID, ".jpeg", sep = "") #Create a new column for the full filename for 3x coverage + specific sample's percent CpG methylation plot fileName$actualFileName2 <- paste(fileName$nameBase2, "-3xCoverage", "-Sample", fileName$sample.ID, ".jpeg", sep = "") #Create a new column for the full filename for 3x coverage + specific sample's percent CpG coverage plot head(fileName) #Confirm column creation ``` ```{r} for(i in 1:nFiles) { #For each data file jpeg(filename = fileName$actualFileName[i], height = 1000, width = 1000) #Save file with designated name getMethylationStats(processedFilesCov3[[i]], plot = TRUE, both.strands = FALSE) #Get %CpG methylation information dev.off() #Turn off plotting device } #Plot and save %CpG methylation information ``` ```{r} for(i in 1:nFiles) { #For each data file jpeg(filename = fileName$actualFileName2[i], height = 1000, width = 1000) #Save file with designated name getCoverageStats(processedFilesCov3[[i]], plot = TRUE, both.strands = FALSE) #Get CpG coverage information dev.off() #Turn off plotting device } #Plot and save CpG coverage information ``` ### 5x coverage ```{r} fileName$actualFileName3 <- paste(fileName$nameBase, "-5xCoverage", "-Sample", fileName$sample.ID, ".jpeg", sep = "") #Create a new column for the full filename for 5x coverage + specific sample's percent CpG methylation plot fileName$actualFileName4 <- paste(fileName$nameBase2, "-5xCoverage", "-Sample", fileName$sample.ID, ".jpeg", sep = "") #Create a new column for the full filename for 5x coverage + specific sample's percent CpG coverage plot head(fileName) #Confirm column creation ``` ```{r} for(i in 1:nFiles) { #For each data file jpeg(filename = fileName$actualFileName3[i], height = 1000, width = 1000) #Save file with designated name getMethylationStats(processedFilesCov5[[i]], plot = TRUE, both.strands = FALSE) #Get %CpG methylation information dev.off() #Turn off plotting device } #Plot and save %CpG methylation information ``` ```{r} for(i in 1:nFiles) { #For each data file jpeg(filename = fileName$actualFileName4[i], height = 1000, width = 1000) #Save file with designated name getCoverageStats(processedFilesCov5[[i]], plot = TRUE, both.strands = FALSE) #Get CpG coverage information dev.off() #Turn off plotting device } #Plot and save CpG coverage information ``` ### 10x coverage ### Filtered coverage ```{r} fileName$actualFileName7 <- paste(fileName$nameBase, "-Filtered", "-5xCoverage", "-Sample", fileName$sample.ID, ".jpeg", sep = "") #Create a new column for the full filename for filtered + 5x coverage + specific sample's percent CpG methylation plot fileName$actualFileName8 <- paste(fileName$nameBase2, "-Filtered", "-5xCoverage", "-Sample", fileName$sample.ID, ".jpeg", sep = "") #Create a new column for the full filename for filtered + 5x coverage + specific sample's percent CpG coverage plot head(fileName) #Confirm column creation ``` ```{r} for(i in 1:nFiles) { #For each data file jpeg(filename = fileName$actualFileName7[i], height = 1000, width = 1000) #Save file with designated name getMethylationStats(processedFilteredFilesCov5[[i]], plot = TRUE, both.strands = FALSE) #Get %CpG methylation information dev.off() #Turn off plotting device } #Plot and save %CpG methylation information ``` ```{r} for(i in 1:nFiles) { #For each data file jpeg(filename = fileName$actualFileName8[i], height = 1000, width = 1000) #Save file with designated name getCoverageStats(processedFilteredFilesCov5[[i]], plot = TRUE, both.strands = FALSE) #Get CpG coverage information dev.off() #Turn off plotting device } #Plot and save CpG coverage information ``` ## Obtain clustering information ### `destrand = FALSE` #### 3x coverage ```{r} methylationInformationCov3 <- unite(processedFilesCov3) #Combine all processed files into a single table head(methylationInformationCov3) #Confirm unite ``` ```{r} clusteringInformationCov3 <- clusterSamples(methylationInformationCov3, dist = "correlation", method = "ward", plot = FALSE) #Save cluster information as a new object ``` ```{r} jpeg(filename = "2018-10-25-Loci-Analysis/2018-11-07-Full-Sample-Pearson-Correlation-Plot-Cov3.jpeg", height = 1000, width = 1000) #Save file with designated name getCorrelation(methylationInformationCov3, plot = TRUE) #Understand correlation between methylation patterns in different samples dev.off() ``` ```{r} jpeg(filename = "2018-10-25-Loci-Analysis/2018-11-07-Full-Sample-CpG-Methylation-Clustering-Cov3.jpeg", height = 1000, width = 1000) #Save file with designated name clusterSamples(methylationInformationCov3, dist = "correlation", method = "ward", plot = TRUE) #Cluster samples based on correlation coefficients dev.off() ``` ```{r} jpeg(filename = "2018-10-25-Loci-Analysis/2018-11-07-Full-Sample-Methylation-PCA-Cov3.jpeg", height = 1000, width = 1000) #Save file with designated name PCASamples(methylationInformationCov3) #Run a PCA analysis on percent methylation for all samples dev.off() #Turn off plotting device ``` ```{r} jpeg(filename = "2018-10-25-Loci-Analysis/2018-11-07-Full-Sample-Methylation-Screeplot-Cov3.jpeg", height = 1000, width = 1000) #Save file with designated name PCASamples(methylationInformationCov3, screeplot = TRUE) #Run the PCA analysis and plot variances against PC number in a screeplot dev.off() ``` #### 5x coverage ```{r} methylationInformationCov5 <- unite(processedFilesCov5) #Combine all processed files into a single table head(methylationInformationCov5) #Confirm unite ``` #### 10x coverage ```{r} methylationInformationCov10 <- unite(processedFilesCov10) #Combine all processed files into a single table head(methylationInformationCov10) #Confirm unite ``` ### `destrand = TRUE` #### 5x coverage ```{r} methylationInformationCov5Destrand <- unite(processedFilesCov5, destrand = TRUE) #Combine all processed files into a single table head(methylationInformationCov5Destrand) #Confirm unite ``` ```{r} clusteringInformationCov5Destrand <- clusterSamples(methylationInformationCov5Destrand, dist = "correlation", method = "ward", plot = FALSE) #Save cluster information as a new object ``` ```{r} jpeg(filename = "2018-10-25-Loci-Analysis/2019-03-14-Full-Sample-Pearson-Correlation-Plot-Cov5Destrand.jpeg", height = 1000, width = 1000) #Save file with designated name getCorrelation(methylationInformationCov5Destrand, plot = TRUE) #Understand correlation between methylation patterns in different samples dev.off() ``` ```{r} jpeg(filename = "2018-10-25-Loci-Analysis/2019-03-14-Full-Sample-CpG-Methylation-Clustering-Cov5Destrand.jpeg", height = 1000, width = 1000) #Save file with designated name clusterSamples(methylationInformationCov5Destrand, dist = "correlation", method = "ward", plot = TRUE) #Cluster samples based on correlation coefficients dev.off() ``` ```{r} jpeg(filename = "2018-10-25-Loci-Analysis/2019-03-14-Full-Sample-Methylation-PCA-Cov5Destrand.jpeg", height = 1000, width = 1000) #Save file with designated name PCASamples(methylationInformationCov5Destrand) #Run a PCA analysis on percent methylation for all samples dev.off() #Turn off plotting device ``` ```{r} jpeg(filename = "2018-10-25-Loci-Analysis/2019-03-14-Full-Sample-Methylation-Screeplot-Cov5Destrand.jpeg", height = 1000, width = 1000) #Save file with designated name PCASamples(methylationInformationCov5Destrand, screeplot = TRUE) #Run the PCA analysis and plot variances against PC number in a screeplot dev.off() ``` #### 10x coverage ```{r} methylationInformationCov10Destrand <- unite(processedFilesCov10, destrand = TRUE) #Combine all processed files into a single table head(methylationInformationCov10Destrand) #Confirm unite ``` #### Filtered coverage ```{r} methylationInformationFilteredCov5Destrand <- unite(processedFilteredFilesCov5, destrand = TRUE) #Combine all processed files into a single table head(methylationInformationFilteredCov5Destrand) #Confirm unite ``` ```{r} clusteringInformationFilteredCov5Destrand <- clusterSamples(methylationInformationFilteredCov5Destrand, dist = "correlation", method = "ward.D", plot = TRUE) #Save cluster information as a new object ``` ```{r} jpeg(filename = "2018-10-25-Loci-Analysis/2019-04-05-Full-Sample-Pearson-Correlation-Plot-FilteredCov5Destrand.jpeg", height = 1000, width = 1000) #Save file with designated name getCorrelation(methylationInformationFilteredCov5Destrand, plot = TRUE) #Understand correlation between methylation patterns in different samples dev.off() ``` ```{r} jpeg(filename = "2018-10-25-Loci-Analysis/2019-04-05-Full-Sample-CpG-Methylation-Clustering-FilteredCov5Destrand.jpeg", height = 1000, width = 1000) #Save file with designated name clusterSamples(methylationInformationFilteredCov5Destrand, dist = "correlation", method = "ward", plot = TRUE) #Cluster samples based on correlation coefficients dev.off() ``` ```{r} jpeg(filename = "2018-10-25-Loci-Analysis/2019-04-05-Full-Sample-Methylation-PCA-FilteredCov5Destrand.jpeg", height = 1000, width = 1000) #Save file with designated name PCASamples(methylationInformationFilteredCov5Destrand) #Run a PCA analysis on percent methylation for all samples dev.off() #Turn off plotting device ``` ```{r} jpeg(filename = "2018-10-25-Loci-Analysis/2019-04-05-Full-Sample-Methylation-Screeplot-FilteredCov5Destrand.jpeg", height = 1000, width = 1000) #Save file with designated name PCASamples(methylationInformationFilteredCov5Destrand, screeplot = TRUE) #Run the PCA analysis and plot variances against PC number in a screeplot dev.off() ``` ## Obtain differentially methylated loci ### `destrand = FALSE` #### 3x coverage ```{r} differentialMethylationStatsCov3 <- calculateDiffMeth(methylationInformationCov3) #Calculate differential methylation statistics based on treatment indication from processBismarkAln diffMethStats50Cov3 <- getMethylDiff(differentialMethylationStatsCov3, difference = 50, qvalue = 0.01) #Identify loci that are at least 50% different head(diffMethStats50Cov3) #Confirm creation ``` ```{r} write.csv(diffMethStats50Cov3, "2018-10-25-Loci-Analysis/2018-11-07-Differentially-Methylated-Loci-50-Cov3.csv") #Save table as .csv ``` #### 5x coverage ```{r} differentialMethylationStatsCov5 <- calculateDiffMeth(methylationInformationCov5) #Calculate differential methylation statistics based on treatment indication from processBismarkAln diffMethStats50Cov5 <- getMethylDiff(differentialMethylationStatsCov5, difference = 50, qvalue = 0.01) #Identify loci that are at least 50% different head(diffMethStats50Cov5) #Confirm creation ``` ```{r} write.csv(diffMethStats50Cov5, "2018-10-25-Loci-Analysis/2019-03-13-Differentially-Methylated-Loci-50-Cov5.csv") #Save table as .csv ``` #### 10x coverage ```{r} differentialMethylationStatsCov10 <- calculateDiffMeth(methylationInformationCov10) #Calculate differential methylation statistics based on treatment indication from processBismarkAln diffMethStats50Cov10 <- getMethylDiff(differentialMethylationStatsCov10, difference = 50, qvalue = 0.01) #Identify loci that are at least 50% different head(diffMethStats50Cov10) #Confirm creation ``` ```{r} write.csv(diffMethStats50Cov10, "2018-10-25-Loci-Analysis/2019-03-13-Differentially-Methylated-Loci-50-Cov10.csv") #Save table as .csv ``` ### `destrand = TRUE` #### 5x coverage ```{r} differentialMethylationStatsCov5Destrand <- calculateDiffMeth(methylationInformationCov5Destrand) #Calculate differential methylation statistics based on treatment indication from processBismarkAln diffMethStats50Cov5Destrand <- getMethylDiff(differentialMethylationStatsCov5Destrand, difference = 50, qvalue = 0.01) #Identify loci that are at least 50% different head(diffMethStats50Cov5Destrand) #Confirm creation ``` ```{r} write.csv(diffMethStats50Cov5Destrand, "2018-10-25-Loci-Analysis/2019-03-13-Differentially-Methylated-Loci-Destrand-50-Cov5.csv") #Save table as .csv ``` #### 10x coverage ```{r} differentialMethylationStatsCov10Destrand <- calculateDiffMeth(methylationInformationCov10Destrand) #Calculate differential methylation statistics based on treatment indication from processBismarkAln diffMethStats50Cov10Destrand <- getMethylDiff(differentialMethylationStatsCov10Destrand, difference = 50, qvalue = 0.01) #Identify loci that are at least 50% different head(diffMethStats50Cov10Destrand) #Confirm creation ``` ```{r} write.csv(diffMethStats50Cov10Destrand, "2018-10-25-Loci-Analysis/2019-03-13-Differentially-Methylated-Loci-Destrand-50-Cov10.csv") #Save table as .csv ``` #### Filtered coverage ```{r} differentialMethylationStatsFilteredCov5Destrand <- calculateDiffMeth(methylationInformationFilteredCov5Destrand) #Calculate differential methylation statistics based on treatment indication from processBismarkAln head(differentialMethylationStatsFilteredCov5Destrand) #Look at differential methylation statistics ``` ```{r} write.csv(differentialMethylationStatsFilteredCov5Destrand, "2018-10-25-Loci-Analysis/2019-06-30-All-Loci-Methylation-Statistic-Filtered-Destrand-50-Cov5.csv") #Save table as a .csv ``` ```{r} diffMethStats50FilteredCov5Destrand <- getMethylDiff(differentialMethylationStatsFilteredCov5Destrand, difference = 50, qvalue = 0.01) #Identify loci that are at least 50% different head(diffMethStats50FilteredCov5Destrand) #Confirm creation ``` ```{r} write.csv(diffMethStats50FilteredCov5Destrand, "2018-10-25-Loci-Analysis/2019-04-05-Differentially-Methylated-Loci-Filtered-Destrand-50-Cov5.csv") #Save table as .csv ``` ##### Hypermethylated DML ```{r} diffMethStats50FilteredCov5DestrandHyper <- getMethylDiff(differentialMethylationStatsFilteredCov5Destrand, difference = 50, qvalue = 0.01, type = "hyper") #Identify hypermethylated loci that are at least 50% different head(diffMethStats50FilteredCov5DestrandHyper) #Confirm creation ``` ```{r} write.csv(diffMethStats50FilteredCov5DestrandHyper, "2018-10-25-Loci-Analysis/2019-04-05-Differentially-Methylated-Loci-Filtered-Destrand-50-Cov5-Hypermethylated.csv") #Save table as .csv ``` ##### Hypomethylated DML ```{r} diffMethStats50FilteredCov5DestrandHypo <- getMethylDiff(differentialMethylationStatsFilteredCov5Destrand, difference = 50, qvalue = 0.01, type = "hypo") #Identify hypomethylated loci that are at least 50% different head(diffMethStats50FilteredCov5DestrandHypo) #Confirm creation ``` ```{r} write.csv(diffMethStats50FilteredCov5DestrandHypo, "2018-10-25-Loci-Analysis/2019-04-05-Differentially-Methylated-Loci-Filtered-Destrand-50-Cov5-Hypomethylated.csv") #Save table as .csv ``` #### DML distribution figure I want to plot the distribution of DML across the *C. virginica* genome and compare it to the number of genes in each chromosome. ##### Calculate distribution of DML across chromosomes ```{r} DMLchrCounts <-as.data.frame(table(diffMethStats50FilteredCov5Destrand$chr)) #Count the number of DML/chromosome DMLchrCounts$chr <- c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10") #Create column with chr name DMLchrCounts <- DMLchrCounts[,-1] #Remove column with chromosome RefSeq ID DMLchrCounts <- DMLchrCounts[,c(2,1)] #Reorganize columns colnames(DMLchrCounts) <- c("chr", "DMLCount") #Rename columns head(DMLchrCounts) #Confirm formatting ``` ```{r} write.csv(DMLchrCounts, "2019-09-30-DML-per-Chromosome.csv", row.names = FALSE, quote = FALSE) #Save file ``` ##### [*C. virginica* genome information from NCBI](https://www.ncbi.nlm.nih.gov/genome/398) ```{r} DMLchrCounts$geneCount <- c(3932, 4137, 4712, 3896, 6239, 2630, 3015, 4445, 4960, 1527) #Create column with number of gene sequences in each chromosome DMLchrCounts$DMLbyGenes <- (DMLchrCounts$DMLCount / DMLchrCounts$geneCount)*100 head(DMLchrCounts) #Confirm dataframe creation ``` ##### Create figure ```{r} #pdf("2019-09-30-DML-and-Gene-Distribution.pdf", height = 8.5, width = 11) #Save figure as a pdf par(mar = c(5,7,2,10)) #Change figure boundaries DMLbarplot <- barplot(as.matrix(t(DMLchrCounts$DMLCount)), axes = FALSE, names.arg = DMLchrCounts$chr, cex.names = 1.5, xlim = c(0.7,11.5), ylim = c(0,150), col = "grey80") #Create a barplot and save as a new object. Use axes = FALSE to remove the y-axis and names.arg to set labels on the x-axis. The object contains x coordinates for bars, so xlim is set at 12 to compensate for maximum value of 11.5 mtext(side = 1, "Chromosome", line = 3, cex = 1.5) #Add x-axis label axis(side = 2, line = 1.5, at = seq(0, 150, by = 50), las = 2, col = "grey80", cex.axis = 1.2) #Add y-axis for DML counts mtext(side = 2, "Number of DML", line = 5, cex = 1.5) #Add y-axis label for DML counts par(new = TRUE) #Create a new plot plot(x = DMLbarplot, y = DMLchrCounts$geneCount, type = "b", axes = FALSE, xlab = "", ylab = "", xaxs = "i", yaxs = "i", pch = 16, col = "grey20", xlim = c(0,12), ylim = c(0,6500)) #Plot points and lines (type = "b") for gene count by chromosome. Use the coordinates from DMLbarplot (x = DMLbarplot) and set xlim = (0,12) so plots are lined up. Use axes = FALSE to remove both axes. Remove x and y lables (xlab = ""; ylab = ""). Set ylim = (0,6500) to account for max y-values. Use xaxs and yxaxs to remove space between axes. axis(side = 4, line = 1.5, at = seq(0, 6500, by = 500), las = 2, col = "grey80", cex.axis = 1.2) #Add y-axis for gene sequence counts mtext(side = 4, "Number of Genes", line = 6, cex = 1.5) #Add y-axis label for gene sequence counts #dev.off() #Turn off plotting device ``` #### Preliminary Analyses ###### Plot customization ```{r} plotCustomization <- data.frame(sample = 1:10, treatmentSpecification) #Create dataframe with sample treatment information head(plotCustomization) #Confirm dataframe creation ``` ###### Principal Component Analyses `methylKit` creates PCA biplots, but I want to dig further into the analysis and create custom biplots. ####### All methylation data ```{r} allDataPCA <- PCASamples(methylationInformationFilteredCov5Destrand, obj.return = TRUE) #Run a PCA analysis on percent methylation for all samples. methylKit uses prcomp to create the PCA matrix summary(allDataPCA) #Look at summary statistics. The first PC explains 18.1% of variation, the second PC explains 11.7% of variation ``` ```{r} #pdf("2019-11-19-All-Data-PCA.pdf", width = 11, height = 8.5) par(mar = c(5, 5, 1, 1)) #Specify inner and outer margins fig.allDataPCA <- ordiplot(allDataPCA, choices = c(1, 2), type = "none", display = "sites", cex = 0.5, xlab = "", ylab = "", xaxt = "n", yaxt = "n") #Use ordiplot to create base biplot. Do not add any points points(fig.allDataPCA, "sites", col = c(rep(plotColors[2], times = 5), rep(plotColors[4], times = 5)), pch = c(rep(16, times = 5), rep(17, times = 5)), cex = 3) #Add each sample. Darker samples are ambient, lighter samples are elevated pCO2 #Add multiple white boxes on top of the default black box to manually change the color box(col = "white") box(col = "white") box(col = "white") box(col = "white") box(col = "white") box(col = "white") box(col = "white") box(col = "white") box(col = "white") box(col = "white") ordiellipse(allDataPCA, plotCustomization$treatment, show.groups = "1", col = plotColors[4]) #Add confidence ellipse around the samples in elevated pCO2 ordiellipse(allDataPCA, plotCustomization$treatment, show.groups = "0", col = plotColors[2]) #Add confidence ellipse around the samples in ambient pCO2 axis(side = 1, labels = TRUE, col = "grey80", cex.axis = 1.7) #Add x-axis mtext(side = 1, text = "PC 1 (18.1%)", line = 3, cex = 1.5) #Add x-axis label axis(side = 2, labels = TRUE, col = "grey80", cex.axis = 1.7) #Add y-axis mtext(side = 2, text = "PC 2 (11.7%)", line = 3, cex = 1.5) #Add y-axis label legend("topleft", pch = c(16, 17), legend = c("Control", "Elevated"), col = c(plotColors[2], plotColors[4]), cex = 1.7, bty = "n") #Add a legend with information about ambient and elevated samples #dev.off() ``` ####### DML The first thing I need to do is subset the data so it only includes DML. Then, I can use similar code to what I used above to create PCA plots. ```{r} DMLPositions <- rep(0, times = length(diffMethStats50FilteredCov5Destrand$chr)) #Create an empty vector with 598 places to store row numbers for (i in 1:length(DMLPositions)) { DMLPositions[i] <- which(getData(diffMethStats50FilteredCov5Destrand)$start[i] == getData(methylationInformationFilteredCov5Destrand)$start) } #For each DML, save the row number where that DML is found in methylationInformationFilteredCov5Destrand tail(DMLPositions) #Confirm vector was created ``` ```{r} DMLMatrix <- methylationInformationFilteredCov5Destrand[DMLPositions,] #Subset methylationInformationFilteredCov5Destrand to only include DML and save as a new methylBase object sum((DMLMatrix$start) == (diffMethStats50FilteredCov5Destrand$start)) == length(diffMethStats50FilteredCov5Destrand$start) #Confirm that start columns are identical. If they are identical, the sum of all TRUE statements should equal the length of the original methylBase object tail(DMLMatrix) #Confirm methylBase object creation ``` ```{r} DMLDataPCA <- PCASamples(DMLMatrix, obj.return = TRUE) #Run a PCA analysis on percent methylation for all samples. methylKit uses prcomp to create the PCA matrix summary(DMLDataPCA) #Look at summary statistics. The first PC explains 47.6% of variation, the second PC explains 9.5% of variation ``` ```{r} #pdf("2019-11-19-DML-Only-PCA.pdf", width = 11, height = 8.5) par(mar = c(5, 5, 1, 1)) #Specify inner and outer margins fig.DMLDataPCA <- ordiplot(DMLDataPCA, choices = c(1, 2), type = "none", display = "sites", cex = 0.5, xlab = "", ylab = "", xaxt = "n", yaxt = "n") #Use ordiplot to create base biplot. Do not add any points points(fig.DMLDataPCA, "sites", col = c(rep(plotColors[2], times = 5), rep(plotColors[4], times = 5)), pch = c(rep(16, times = 5), rep(17, times = 5)), cex = 3) #Add each sample. Darker samples are ambient, lighter samples are elevated pCO2 #Add multiple white boxes on top of the default black box to manually change the color box(col = "white") box(col = "white") box(col = "white") box(col = "white") box(col = "white") box(col = "white") box(col = "white") box(col = "white") box(col = "white") box(col = "white") ordiellipse(DMLDataPCA, plotCustomization$treatment, show.groups = "1", col = plotColors[4]) #Add confidence ellipse around the samples in elevated pCO2 ordiellipse(DMLDataPCA, plotCustomization$treatment, show.groups = "0", col = plotColors[2]) #Add confidence ellipse around the samples in ambient pCO2 axis(side = 1, labels = TRUE, col = "grey80", cex.axis = 1.7) #Add x-axis mtext(side = 1, text = "PC 1 (47.6%)", line = 3, cex = 1.5) #Add x-axis label axis(side = 2, labels = TRUE, col = "grey80", cex.axis = 1.7) #Add y-axis mtext(side = 2, text = "PC 2 (9.5%)", line = 3, cex = 1.5) #Add y-axis label legend("topright", pch = c(16, 17), legend = c("Control", "Elevated"), col = c(plotColors[2], plotColors[4]), cex = 1.7, bty = "n") #Add a legend with information about ambient and elevated samples #dev.off() ``` ####### Multipanel plot ```{r} #pdf("2019-11-19-PCA-Multpanel.pdf", height = 8.5, width = 11) #Save plot par(mfrow = c(1, 2), oma = c(5, 2, 2, 0), mar = c(0, 3, 0, 5)) #Set up parameters for multipanel plot #All CpG Loci fig.allDataPCA <- ordiplot(allDataPCA, choices = c(1, 2), type = "none", display = "sites", cex = 0.5, xlim = c(-400, 200), xlab = "", ylab = "", xaxt = "n", yaxt = "n") #Use ordiplot to create base biplot. Do not add any points points(fig.allDataPCA, "sites", col = c(rep(plotColors[2], times = 5), rep(plotColors[4], times = 5)), pch = c(rep(16, times = 5), rep(17, times = 5)), cex = 3) #Add each sample. Darker samples are ambient, lighter samples are elevated pCO2 #Add multiple white boxes on top of the default black box to manually change the color box(col = "white") box(col = "white") box(col = "white") box(col = "white") box(col = "white") box(col = "white") box(col = "white") box(col = "white") box(col = "white") box(col = "white") ordiellipse(allDataPCA, plotCustomization$treatment, show.groups = "1", col = plotColors[4]) #Add confidence ellipse around the samples in elevated pCO2 ordiellipse(allDataPCA, plotCustomization$treatment, show.groups = "0", col = plotColors[2]) #Add confidence ellipse around the samples in ambient pCO2 axis(side = 1, at = seq(-400, 200, 200), col = "grey80", cex.axis = 1.7) #Add x-axis mtext(side = 1, text = "PC 1 (18.1%)", line = 3, cex = 1.5) #Add x-axis label axis(side = 2, labels = TRUE, col = "grey80", cex.axis = 1.7) #Add y-axis mtext(side = 2, text = "PC 2 (11.7%)", line = 3, cex = 1.5) #Add y-axis label mtext(side = 3, line = -5, adj = c(-100,0), text = " a. All CpG Loci", cex = 1.5) #DML fig.DMLDataPCA <- ordiplot(DMLDataPCA, choices = c(1, 2), type = "none", display = "sites", cex = 0.5, xlim = c(-20,20), ylim = c(-10,20), xlab = "", ylab = "", xaxt = "n", yaxt = "n") #Use ordiplot to create base biplot. Do not add any points points(fig.DMLDataPCA, "sites", col = c(rep(plotColors[2], times = 5), rep(plotColors[4], times = 5)), pch = c(rep(16, times = 5), rep(17, times = 5)), cex = 3) #Add each sample. Darker samples are ambient, lighter samples are elevated pCO2 #Add multiple white boxes on top of the default black box to manually change the color box(col = "white") box(col = "white") box(col = "white") box(col = "white") box(col = "white") box(col = "white") box(col = "white") box(col = "white") box(col = "white") box(col = "white") ordiellipse(DMLDataPCA, plotCustomization$treatment, show.groups = "1", col = plotColors[4]) #Add confidence ellipse around the samples in elevated pCO2 ordiellipse(DMLDataPCA, plotCustomization$treatment, show.groups = "0", col = plotColors[2]) #Add confidence ellipse around the samples in ambient pCO2 axis(side = 1, labels = TRUE, col = "grey80", cex.axis = 1.7) #Add x-axis mtext(side = 1, text = "PC 1 (47.6%)", line = 3, cex = 1.5) #Add x-axis label axis(side = 2, labels = TRUE, col = "grey80", cex.axis = 1.7) #Add y-axis mtext(side = 2, text = "PC 2 (9.5%)", line = 3, cex = 1.5) #Add y-axis label mtext(side = 3, line = -5, adj = c(-100, 0), text = " b. DML", cex = 1.5) #Add test category legend(x = 0, y = -22, pch = c(16, 17), legend = c("Control", "Elevated"), col = c(plotColors[2], plotColors[4]), y.intersp = 1, x.intersp = 1, cex = 1.7, bty = "n") #Add a legend with information about ambient and elevated samples #dev.off() ``` ##### Heatmap ```{r} percMethDML <- percMethylation(DMLMatrix, rowids = TRUE) #Get percent methylation for all samples at DML. Include row IDs (chr, start, end) information head(percMethDML) #Confirm percent methylation matrix was created ``` I'll first use `pheatmap`. ```{r} pheatmap(percMethDML, color = rev(plotColors), cluster_rows = TRUE, clustering_distance_rows = "euclidean", treeheight_row = 70, show_rownames = FALSE, cluster_cols = TRUE, clustering_distance_cols = "euclidean", treeheight_col = 40, show_colnames = FALSE, annotation_col = data.frame(pCO2 = factor(rep(c("Ambient","Treatment"), each = 5))), annotation_colors = list(pCO2 = c(Ambient = "grey90", Treatment = "grey10")), annotation_legend = FALSE, annotation_names_col = FALSE, legend = TRUE) #Create heatmap using pheatmap using percMethDML and plotColors color scheme. Cluster rows and columns using euclidean distances. Adjust the dendogram tree heights and do not show any row or column names. Create a dataframe with treatment information using annotation_col. Use annotation_colors to indicate colors for treatment ("grey90") and ambient ("grey10") samples. Do not include an annotation_legend or name for annotations (annotatino_names_col). Include a legend. ``` I also want to try using a different heatmap package, `heatmap.2` from `gplots`. `heatmap.2` allows you to add a density plot with the legend, which could be interesting information to include. ```{r} #pdf("2019-11-19-DML-Only-Heatmap.pdf", height = 8.5, width = 11) par(oma = c(0, 1, 0, 0)) #Adjust outer margins heatmap.2(percMethDML, col = plotColors, scale = "none", margins = c(1,1), trace = "none", tracecol = "black", labRow = FALSE, labCol = FALSE, ColSideColors = c(rep("grey90", times = 5), rep("grey10", times = 5)), key = TRUE, keysize = 1.8, density.info = "density", key.title = "", key.xlab = "% Methylation", key.ylab = "", key.par = list(cex.lab = 2.0, cex.axis = 1.5)) #Create heatmap using heatmap.2 from gplots package using percMethDML data. Use plotColors but do not scale data, label rows, or label columns. Use ColSideColors to indicate colors for treatment and ambient samples. Add a legend using key, and adjust keysize. Have key display density data with density.info. Do not add a key title or y-axis label, and label x axis with key.xlab. mtext("Density", cex = 1.6, las = 3, adj = 0.8, padj = -29) #Manually add y-axis label for key since heatmap.2 doesn't let you change font size #dev.off() ``` # Differentially methylated regions The code above allowed me to identify differentially methylated *loci* (DMLs). For an exploratory analysis, it is also useful to identify differentially methylated *regions* (DMRs). I will complete the tiling analysis using my `mincov = 3` object. I will use both 100 and 1000 bp sized windows and steps. ## Tiling window analysis ### 100 bp windows ```{r} tiles100 <- tileMethylCounts(processedFilteredFilesCov5, win.size = 100, step.size = 100, cov.bases = 1) #Add up C and T counts from each covered cytosine and return total C and T count for each tile. Perform this analysis with 100 bp sized windows, and slide 100 bp for each new window head(tiles100[[10]]) #Confirm object creation. [[10]] only shows tile100 head for the 10th sample processed. ``` #### Obtain clustering information ```{r} methylationInformationTiles100 <- unite(tiles100) #Combine all processed files into a single table clusteringInformationTiles100 <- clusterSamples(methylationInformationTiles100, dist = "correlation", method = "ward.D2", plot = FALSE) #Save clustering information as a new object ``` ```{r} jpeg(filename = "2018-10-25-Tiling-Analysis/2019-06-05-Full-Sample-Pearson-Correlation-Plot-FilteredCov5Destrand-Tiles100.jpeg", height = 1000, width = 1000) #Save file with designated name getCorrelation(methylationInformationTiles100, plot = TRUE) #Understand correlation between methylation patterns in different samples dev.off() ``` ```{r} jpeg(filename = "2018-10-25-Tiling-Analysis/2019-06-05-Full-Sample-CpG-Methylation-Clustering-FilteredCov5Destrand-Tiles100.jpeg", height = 1000, width = 1000) #Save file with designated name clusterSamples(methylationInformationTiles100, dist = "correlation", method = "ward.D2", plot = TRUE) #Cluster samples based on correlation coefficients dev.off() ``` ```{r} jpeg(filename = "2018-10-25-Tiling-Analysis/2019-06-05-Full-Sample-Methylation-PCA-FilteredCov5Destrand-Tiles100.jpeg", height = 1000, width = 1000) #Save file with designated name PCASamples(methylationInformationTiles100) #Run a PCA analysis on percent methylation for all samples dev.off() #Turn off plotting device ``` ```{r} jpeg(filename = "2018-10-25-Tiling-Analysis/2019-06-05-Full-Sample-Methylation-Screeplot-FilteredCov5Destrand-Tiles100.jpeg", height = 1000, width = 1000) #Save file with designated name PCASamples(methylationInformationTiles100, screeplot = TRUE) #Run the PCA analysis and plot variances against PC number in a screeplot dev.off() ``` #### Obtain differentially methylated regions ```{r} differentialMethylationStatsTiles100 <- calculateDiffMeth(methylationInformationTiles100) #Calculate differential methylation statistics based on treatment indication from processBismarkAln diffMethStats50Tiles100 <- getMethylDiff(differentialMethylationStatsTiles100, difference = 50, qvalue = 0.01) #Identify loci that are at least 50% different head(diffMethStats50Tiles100) #Confirm creation ``` ```{r} write.csv(diffMethStats50Tiles100, "2018-10-25-Tiling-Analysis/2019-06-05-Differentially-Methylated-Regions-50-FilteredCov5Destrand-Tiles100.csv") #Save table as .csv ``` ##### Hypermethylated DMR ```{r} diffMethStats50FilteredCov5DestrandTiles100Hyper <- getMethylDiff(differentialMethylationStatsTiles100, difference = 50, qvalue = 0.01, type = "hyper") #Identify hypermethylated regions that are at least 50% different head(diffMethStats50FilteredCov5DestrandTiles100Hyper) #Confirm creation ``` ```{r} write.csv(diffMethStats50FilteredCov5DestrandTiles100Hyper, "2018-10-25-Loci-Analysis/2019-06-05-Differentially-Methylated-Regions-50-FilteredCov5Destrand-50-Tiles100-Hypermethylated.csv") #Save table as .csv ``` ##### Hypomethylated DMR ```{r} diffMethStats50FilteredCov5DestrandTiles100Hypo <- getMethylDiff(differentialMethylationStatsTiles100, difference = 50, qvalue = 0.01, type = "hypo") #Identify hypomethylated regions that are at least 50% different head(diffMethStats50FilteredCov5DestrandTiles100Hypo) #Confirm creation ``` ```{r} write.csv(diffMethStats50FilteredCov5DestrandTiles100Hypo, "2018-10-25-Loci-Analysis/2019-06-05-Differentially-Methylated-Regions-50-FilteredCov5Destrand-50-Tiles100-Hypomethylated.csv") #Save table as .csv ``` ### 1000 bp windows ```{r} tiles1000 <- tileMethylCounts(processedFilteredFilesCov5, win.size = 1000, step.size = 1000, cov.bases = 1) #Add up C and T counts from each covered cytosine and return total C and T count for each tile. Perform this analysis with 1000 bp sized windows, and slide 1000 bp for each new window. This is the methylKit default. head(tiles1000[[10]]) #Confirm object creation. [[10]] only shows tile100 head for the 10th sample processed. ``` #### Obtain clustering information ```{r} methylationInformationTiles1000 <- unite(tiles1000) #Combine all processed files into a single table clusteringInformationTiles1000 <- clusterSamples(methylationInformationTiles1000, dist = "correlation", method = "ward.D2", plot = FALSE) #Save clustering information as a new object ``` ```{r} jpeg(filename = "2018-10-25-Tiling-Analysis/2019-06-05-Full-Sample-Pearson-Correlation-Plot-FilteredCov5Destrand-Tiles1000.jpeg", height = 1000, width = 1000) #Save file with designated name getCorrelation(methylationInformationTiles1000, plot = TRUE) #Understand correlation between methylation patterns in different samples dev.off() ``` ```{r} jpeg(filename = "2018-10-25-Tiling-Analysis/2019-06-05-Full-Sample-CpG-Methylation-Clustering-FilteredCov5Destrand-Tiles1000.jpeg", height = 1000, width = 1000) #Save file with designated name clusterSamples(methylationInformationTiles1000, dist = "correlation", method = "ward.D2", plot = TRUE) #Cluster samples based on correlation coefficients dev.off() ``` ```{r} jpeg(filename = "2018-10-25-Tiling-Analysis/2019-06-05-Full-Sample-Methylation-PCA-FilteredCov5Destrand-Tiles1000.jpeg", height = 1000, width = 1000) #Save file with designated name PCASamples(methylationInformationTiles1000) #Run a PCA analysis on percent methylation for all samples dev.off() #Turn off plotting device ``` ```{r} jpeg(filename = "2018-10-25-Tiling-Analysis/2019-06-05-Full-Sample-Methylation-Screeplot-FilteredCov5Destrand-Tiles1000.jpeg", height = 1000, width = 1000) #Save file with designated name PCASamples(methylationInformationTiles1000, screeplot = TRUE) #Run the PCA analysis and plot variances against PC number in a screeplot dev.off() ``` #### Obtain differentially methylated regions ```{r} differentialMethylationStatsTiles1000 <- calculateDiffMeth(methylationInformationTiles1000) #Calculate differential methylation statistics based on treatment indication from processBismarkAln diffMethStats50Tiles1000 <- getMethylDiff(differentialMethylationStatsTiles1000, difference = 50, qvalue = 0.01) #Identify loci that are at least 50% different head(diffMethStats50Tiles1000) #Confirm creation ``` ```{r} write.csv(diffMethStats50Tiles1000, "2018-10-25-Tiling-Analysis/2019-06-05-Differentially-Methylated-Regions-50-FilteredCov5Destrand-Tiles1000.csv") #Save table as .csv ``` ### 500 bp windows ```{r} tiles500 <- tileMethylCounts(processedFilteredFilesCov5, win.size = 500, step.size = 500, cov.bases = 1) #Add up C and T counts from each covered cytosine and return total C and T count for each tile. Perform this analysis with 500 bp sized windows, and slide 500 bp for each new window. head(tiles500[[10]]) #Confirm object creation. [[10]] only shows tile500 head for the 10th sample processed. ``` #### Obtain clustering information ```{r} methylationInformationTiles500 <- unite(tiles500) #Combine all processed files into a single table clusteringInformationTiles500 <- clusterSamples(methylationInformationTiles500, dist = "correlation", method = "ward.D2", plot = FALSE) #Save clustering information as a new object ``` ```{r} jpeg(filename = "2018-10-25-Tiling-Analysis/2019-06-05-Full-Sample-Pearson-Correlation-Plot-FilteredCov5Destrand-Tiles500.jpeg", height = 1000, width = 1000) #Save file with designated name getCorrelation(clusteringInformationTiles500, plot = TRUE) #Understand correlation between methylation patterns in different samples dev.off() ``` ```{r} jpeg(filename = "2018-10-25-Tiling-Analysis/2019-06-05-Full-Sample-CpG-Methylation-Clustering-FilteredCov5Destrand-Tiles500.jpeg", height = 1000, width = 1000) #Save file with designated name clusterSamples(clusteringInformationTiles500, dist = "correlation", method = "ward.D2", plot = TRUE) #Cluster samples based on correlation coefficients dev.off() ``` ```{r} jpeg(filename = "2018-10-25-Tiling-Analysis/2019-06-05-Full-Sample-Methylation-PCA-FilteredCov5Destrand-Tiles500.jpeg", height = 1000, width = 1000) #Save file with designated name PCASamples(clusteringInformationTiles500) #Run a PCA analysis on percent methylation for all samples dev.off() #Turn off plotting device ``` ```{r} jpeg(filename = "2018-10-25-Tiling-Analysis/2019-06-05-Full-Sample-Methylation-Screeplot-FilteredCov5Destrand-Tiles500.jpeg", height = 1000, width = 1000) #Save file with designated name PCASamples(clusteringInformationTiles500, screeplot = TRUE) #Run the PCA analysis and plot variances against PC number in a screeplot dev.off() ``` #### Obtain differentially methylated regions ```{r} differentialMethylationStatsTiles500 <- calculateDiffMeth(methylationInformationTiles500) #Calculate differential methylation statistics based on treatment indication from processBismarkAln diffMethStats50Tiles500 <- getMethylDiff(differentialMethylationStatsTiles500, difference = 50, qvalue = 0.01) #Identify loci that are at least 50% different head(diffMethStats50Tiles500) #Confirm creation ``` ```{r} write.csv(diffMethStats50Tiles500, "2018-10-25-Tiling-Analysis/2019-06-05-Differentially-Methylated-Regions-50-FilteredCov5Destrand-Tiles500.csv") #Save table as .csv ``` # Save DMLs and DMRs as BEDfiles ## Install packages **Do not** install these packages until analysis in `methylKit` is complete, as some of the installed packages will mask others important for analysis. ```{r} library(readr) #Load package install.packages("tidyverse") #Install tidyverse library(tidyverse) #Load package ``` ## DML Background ```{r} head(methylationInformationFilteredCov5Destrand) #I only want the columns with chromosome, start position, stop position, and strand. ``` ```{r} methylationInformationFilteredCov5DestrandReduced <- data.frame("chr" = methylationInformationFilteredCov5Destrand$chr, "start" = methylationInformationFilteredCov5Destrand$start, "stop" = methylationInformationFilteredCov5Destrand$end, "strand" = methylationInformationFilteredCov5Destrand$strand) #Subset file head(methylationInformationFilteredCov5DestrandReduced) #Confirm subsetting ``` ```{r} methylationInformationFilteredCov5DestrandReduced$start <- (methylationInformationFilteredCov5DestrandReduced$start - 1) #Subtract 1 from the start position for visualization head(methylationInformationFilteredCov5DestrandReduced) #Confirm subtracting worked ``` ```{r} write_delim(methylationInformationFilteredCov5DestrandReduced, "2019-05-14-Methylation-Information-Filtered-Destrand-Cov5.bed", delim = "\t", col_names = FALSE) #Write out all methylation information as a background to be used for gene enrichment analyses. ``` ```{bash} head 2019-05-14-Methylation-Information-Filtered-Destrand-Cov5.bed #Confirm formatting was retained in export ``` ## DMLs ### `destrand = FALSE` #### 3x coverage ```{r} DMLPlus10222018 <- filter(diffMethStats50Cov3, strand == "+") %>% mutate(start = start -1, end = end + 1) %>% select(chr, start, end, strand, meth.diff) #Save + strand of DMLs as a new object DMLMinus10222018 <- filter(diffMethStats50Cov3, strand == "-") %>% mutate(start = start -2) %>% select(chr, start, end, strand, meth.diff) #Save - strand of DMLs as a new object ``` ```{r} DML10222018 <- bind_rows(DMLPlus10222018, DMLMinus10222018) %>% arrange(chr, start) %>% mutate_if(is.numeric, as.integer) #Join + and - strand information to be saved as a BED file, and avoid writing information in scientific notation ``` ```{r} write_delim(DML10222018, "2018-11-07-DML-Locations.bed", delim = '\t', col_names = FALSE) #Save data as a BED file ``` #### 5x coverage ```{r} DML5xPlus03132019 <- filter(diffMethStats50Cov5, strand == "+") %>% mutate(start = start -1, end = end + 1) %>% select(chr, start, end, strand, meth.diff) #Save + strand of DMLs as a new object DML5xMinus03132019 <- filter(diffMethStats50Cov5, strand == "-") %>% mutate(start = start -2) %>% select(chr, start, end, strand, meth.diff) #Save - strand of DMLs as a new object ``` ```{r} DML5x03132019 <- bind_rows(DML5xPlus03132019, DML5xMinus03132019) %>% arrange(chr, start) %>% mutate_if(is.numeric, as.integer) #Join + and - strand information to be saved as a BED file, and avoid writing information in scientific notation ``` ```{r} write_delim(DML5x03132019, "2019-03-13-DML-5x-Locations.bed", delim = '\t', col_names = FALSE) #Save data as a BED file ``` #### 10x coverage ```{r} DML10xPlus03132019 <- filter(diffMethStats50Cov10, strand == "+") %>% mutate(start = start -1, end = end + 1) %>% select(chr, start, end, strand, meth.diff) #Save + strand of DMLs as a new object DML10xMinus03132019 <- filter(diffMethStats50Cov10, strand == "-") %>% mutate(start = start -2) %>% select(chr, start, end, strand, meth.diff) #Save - strand of DMLs as a new object ``` ```{r} DML10x03132019 <- bind_rows(DML10xPlus03132019, DML10xMinus03132019) %>% arrange(chr, start) %>% mutate_if(is.numeric, as.integer) #Join + and - strand information to be saved as a BED file, and avoid writing information in scientific notation ``` ```{r} write_delim(DML10x03132019, "2019-03-13-DML-10x-Locations.bed", delim = '\t', col_names = FALSE) #Save data as a BED file ``` ### `destrand = TRUE` #### 5x coverage ```{r} DML5xDestrand03132019 <- mutate(diffMethStats50Cov5Destrand, start = start -1, end = end + 1) %>% select(chr, start, end, meth.diff) %>% mutate_if(is.numeric, as.integer) #Save as a BED file, and avoid writing information in scientific notation ``` ```{r} head(DML5xDestrand03132019) #Confirm changes ``` ```{r} write_delim(DML5xDestrand03132019, "2019-03-13-DML-Destrand-5x-Locations.bed", delim = '\t', col_names = FALSE) #Save data as a BED file ``` #### 10x coverage ```{r} DML10xDestrand03132019 <- mutate(diffMethStats50Cov10Destrand, start = start -1, end = end + 1) %>% select(chr, start, end, meth.diff) %>% mutate_if(is.numeric, as.integer) #Save as a BED file, and avoid writing information in scientific notation ``` ```{r} head(DML10xDestrand03132019) #Confirm changes ``` ```{r} write_delim(DML10xDestrand03132019, "2019-03-13-DML-Destrand-10x-Locations.bed", delim = '\t', col_names = FALSE) #Save data as a BED file ``` #### Filtered coverage ```{r} DML5xFilteredDestrand04052019 <- mutate(diffMethStats50FilteredCov5Destrand, start = start -1, end = end + 1) %>% select(chr, start, end, meth.diff) %>% mutate_if(is.numeric, as.integer) #Save as a BED file, and avoid writing information in scientific notation ``` ```{r} head(DML5xFilteredDestrand04052019) #Confirm changes ``` ```{r} write_delim(DML5xFilteredDestrand04052019, "2019-04-05-DML-Destrand-5x-Locations.bed", delim = '\t', col_names = FALSE) #Save data as a BED file ``` ##### Hypermethylated DML ```{r} DML5xFilteredDestrandHyper04302019 <- mutate(diffMethStats50FilteredCov5DestrandHyper, start = start -1, end = end + 1) %>% select(chr, start, end, meth.diff) %>% mutate_if(is.numeric, as.integer) #Save as a BED file, and avoid writing information in scientific notation ``` ```{r} head(DML5xFilteredDestrandHyper04302019) #Confirm changes ``` ```{r} write_delim(DML5xFilteredDestrandHyper04302019, "2019-04-05-DML-Destrand-5x-Locations-Hypermethylated.bed", delim = '\t', col_names = FALSE) #Save data as a BED file ``` ##### Hypomethyalted DML ```{r} DML5xFilteredDestrandHypo04302019 <- mutate(diffMethStats50FilteredCov5DestrandHypo, start = start -1, end = end + 1) %>% select(chr, start, end, meth.diff) %>% mutate_if(is.numeric, as.integer) #Save as a BED file, and avoid writing information in scientific notation ``` ```{r} head(DML5xFilteredDestrandHypo04302019) #Confirm changes ``` ```{r} write_delim(DML5xFilteredDestrandHypo04302019, "2019-04-05-DML-Destrand-5x-Locations-Hypomethylated.bed", delim = '\t', col_names = FALSE) #Save data as a BED file ``` ## DMR Background ```{r} head(methylationInformationTiles100) #I only want the columns with chromosome, start position, stop position, and strand. ``` ```{r} methylationInformationTiles100Reduced <- data.frame("chr" = methylationInformationTiles100$chr, "start" = methylationInformationTiles100$start, "stop" = methylationInformationTiles100$end, "strand" = methylationInformationTiles100$strand) #Subset file head(methylationInformationTiles100Reduced) #Confirm subsetting ``` ```{r} write_delim(methylationInformationTiles100Reduced, "2019-06-05-Methylation-Information-Filtered-Destrand-Cov5-Tiles100.bed", delim = "\t", col_names = FALSE) #Write out all methylation information as a background to be used for gene enrichment analyses. ``` ```{bash} head 2019-06-05-Methylation-Information-Filtered-Destrand-Cov5-Tiles100.bed #Confirm formatting was retained in export ``` ## DMRs ### 100 bp windows ```{r} head(diffMethStats50Tiles100) #I only want the chr, start, end, strand, and meth.diff columns ``` ```{r} DMR06052019 <- select(diffMethStats50Tiles100, chr, start, end, strand, meth.diff) %>% mutate(start = start -1) %>% mutate_if(is.numeric, as.integer) %>% mutate(TYPE ="DMR") %>% select(chr, start, end, TYPE, meth.diff) #Reformat object so it can be saved as a BEDfile. head(DMR06052019) ``` ```{r} write_delim(DMR06052019, "2019-06-05-DMR-Locations.bed", delim = '\t', col_names = FALSE) #Save data as a BED file ``` ##### Hypermethylated DML ```{r} DMR5xFilteredDestrandHyper06052019 <- mutate(diffMethStats50FilteredCov5DestrandTiles100Hyper, start = start -1, end = end + 1) %>% select(chr, start, end, meth.diff) %>% mutate_if(is.numeric, as.integer) #Save as a BED file, and avoid writing information in scientific notation ``` ```{r} head(DMR5xFilteredDestrandHyper06052019) #Confirm changes ``` ```{r} write_delim(DMR5xFilteredDestrandHyper06052019, "2019-06-05-DMR-Destrand-5x-Locations-Tiles100-Hypermethylated.bed", delim = '\t', col_names = FALSE) #Save data as a BED file ``` ##### Hypomethyalted DML ```{r} DMR5xFilteredDestrandHypo06052019 <- mutate(diffMethStats50FilteredCov5DestrandTiles100Hypo, start = start -1, end = end + 1) %>% select(chr, start, end, meth.diff) %>% mutate_if(is.numeric, as.integer) #Save as a BED file, and avoid writing information in scientific notation ``` ```{r} head(DMR5xFilteredDestrandHypo06052019) #Confirm changes ``` ```{r} write_delim(DMR5xFilteredDestrandHypo06052019, "2019-06-05-DMR-Destrand-5x-Locations-Tiles100-Hypomethylated.bed", delim = '\t', col_names = FALSE) #Save data as a BED file ``` ### 1000 bp windows ```{r} DMR100006052019 <- select(diffMethStats50Tiles1000, chr, start, end, strand, meth.diff) %>% mutate(start = start -1) %>% mutate_if(is.numeric, as.integer) %>% mutate(TYPE ="DMR") %>% select(chr, start, end, TYPE, meth.diff) #Reformat object so it can be saved as a BEDfile. head(DMR100006052019) ``` ```{r} write_delim(DMR100006052019, "2019-06-05-1000bp-DMR-Locations.bed", delim = '\t', col_names = FALSE) #Save data as a BED file ``` ### 500 bp windows ```{r} DMR50006052019 <- select(diffMethStats50Tiles500, chr, start, end, strand, meth.diff) %>% mutate(start = start -1) %>% mutate_if(is.numeric, as.integer) %>% mutate(TYPE ="DMR") %>% select(chr, start, end, TYPE, meth.diff) #Reformat object so it can be saved as a BEDfile. head(DMR50006052019) ``` ```{r} write_delim(DMR50006052019, "2019-06-05-500bp-DMR-Locations.bed", delim = '\t', col_names = FALSE) #Save data as a BED file ```