---
title: "MethylKit"
author: "Yaamini Venkataraman"
date: "10/25/2018"
output: html_document
---

In this file, I'll identify differentially methylated loci (DMLs) and regions (DMRs) using `methylKit`.

# Prepare R Markdown file

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```

# Install packages

```{r}
install.packages("vegan") #Install the vegan package
require(vegan) #Load vegan
```

```{r}
install.packages("pheatmap") #Install the pheatmap package
require(pheatmap) #Load pheatmap
```

```{r}
install.packages("gplots") #Install the gplots package
require(gplots) #Load gplots
```

```{r}
install.packages("devtools") #Install the devtools package
require(devtools) #Load devtools
```

```{r}
source("http://bioconductor.org/biocLite.R") #Source package from bioconductor
BiocManager::install("methylKit") #Install methylkit
```

```{r}
install_github("al2na/methylKit", build_vignettes = FALSE, repos = BiocInstaller::biocinstallRepos(), dependencies = TRUE) #Install more methylKit options
```

```{r}
require(methylKit) #Load methylkit
```

# Obtain session information

```{r}
sessionInfo()
```

# Download files from `gannet`

```{bash}
wget -r -l1 --no-parent -A_dedup.sorted.bam http://gannet.fish.washington.edu/spartina/2018-10-10-project-virginica-oa-Large-Files/2018-11-07-Bismark-Mox/ #Download files from gannet. They will be stored in the same directory structure as they are online.
```

```{bash}
mv gannet.fish.washington.edu/spartina/2018-10-10-project-virginica-oa-Large-Files/2018-11-07-Bismark-Mox/* analyses/2018-10-25-MethylKit #Move all files from gannet folder to analyses folder
```

```{bash}
rm -r gannet.fish.washington.edu #Remove the empty gannet directory
```

# Process methylation data

```{r}
analysisFiles <- list("zr2096_1_dedup.sorted.bam",
                      "zr2096_2_dedup.sorted.bam",
                      "zr2096_3_dedup.sorted.bam",
                      "zr2096_4_dedup.sorted.bam",
                      "zr2096_5_dedup.sorted.bam",
                      "zr2096_6_dedup.sorted.bam",
                      "zr2096_7_dedup.sorted.bam",
                      "zr2096_8_dedup.sorted.bam",
                      "zr2096_9_dedup.sorted.bam",
                      "zr2096_10_dedup.sorted.bam") #Put all .bam files into a list for analysis.
```

```{r}
sample.IDs <- list("1", "2", "3", "4", "5", "6", "7", "8", "9", "10") #Create list of sample IDs
treatmentSpecification <- c(rep(0, times = 5), rep(1, times = 5)) #Specify which treatment the samples were from. 1 is the treatment (high pCO2) and 0 is the control (ambient pCO2)
```

I'll use `mincov` in `processBismarkAln` to set different coverage metrics. I'll try 3x, 5x, and 10x coverage.

## 3x coverage

```{r}
processedFilesCov3 <- processBismarkAln(location = analysisFiles, sample.id = sample.IDs, assembly = "v3", read.context = "CpG", mincov = 3, treatment = treatmentSpecification) #Process files for CpG methylation using 5x coverage. First 5 files were from ambient conditions, and the second from high pCO2 conditions.
```

## 5x coverage

```{r}
processedFilesCov5 <- processBismarkAln(location = analysisFiles, sample.id = sample.IDs, assembly = "v3", read.context = "CpG", mincov = 5, treatment = treatmentSpecification) #Process files for CpG methylation using 5x coverage. First 5 files were from ambient conditions, and the second from high pCO2 conditions.
```

## 10x coverage

```{r}
processedFilesCov10 <- processBismarkAln(location = analysisFiles, sample.id = sample.IDs, assembly = "v3", read.context = "CpG", mincov = 10, treatment = treatmentSpecification) #Process files for CpG methylation using 10x coverage. First 5 files were from ambient conditions, and the second from high pCO2 conditions.
```

I also want to try a two-step process for setting coverage. I'll use `processBismarkAln` with `mincov = 2` to quickly process my files. I'll then use `filterByCoverage` to set minimum and maximum coverage thresholds.

## Filtered coverage

```{r}
processedFiles <- processBismarkAln(location = analysisFiles, sample.id = sample.IDs, assembly = "v3", read.context = "CpG", mincov = 2, treatment = treatmentSpecification) #Process files for CpG methylation. Use 2x coverage for faster processing. Coverage will be adjusted later. First 5 files were from ambient conditions, and the second from high pCO2 conditions.
```

```{r}
processedFilteredFilesCov5 <- filterByCoverage(processedFiles, lo.count = 5, lo.perc = NULL, hi.count = 100, hi.perc = NULL) #filter processed files using lo.count and hi.count coverage thresholds. Coverage should be no less than 5 and should not exceed 100.
```

# Differentially methylated loci

## Obtain methylation and coverage plots

```{r}
nFiles <- length(sample.IDs) #Count number of samples
fileName <- data.frame("nameBase" = rep("2018-10-25-Loci-Analysis/2018-11-07-Percent-CpG-Methylation", times = nFiles),
                       "nameBase2" = rep("2018-10-25-Loci-Analysis/2018-11-07-Percent-CpG-Coverage", times = nFiles),
                       "sample.ID" = 1:10) #Create new dataframe for filenames
head(fileName) #Confirm dataframe creation
```

### 3x coverage

```{r}
fileName$actualFileName <- paste(fileName$nameBase, "-3xCoverage", "-Sample", fileName$sample.ID, ".jpeg", sep = "") #Create a new column for the full filename for 3x coverage + specific sample's percent CpG methylation plot
fileName$actualFileName2 <- paste(fileName$nameBase2, "-3xCoverage", "-Sample", fileName$sample.ID, ".jpeg", sep = "") #Create a new column for the full filename for 3x coverage + specific sample's percent CpG coverage plot
head(fileName) #Confirm column creation
```

```{r}
for(i in 1:nFiles) { #For each data file
  jpeg(filename = fileName$actualFileName[i], height = 1000, width = 1000) #Save file with designated name
  getMethylationStats(processedFilesCov3[[i]], plot = TRUE, both.strands = FALSE) #Get %CpG methylation information
  dev.off() #Turn off plotting device
} #Plot and save %CpG methylation information
```

```{r}
for(i in 1:nFiles) { #For each data file
  jpeg(filename = fileName$actualFileName2[i], height = 1000, width = 1000) #Save file with designated name
  getCoverageStats(processedFilesCov3[[i]], plot = TRUE, both.strands = FALSE) #Get CpG coverage information
  dev.off() #Turn off plotting device
} #Plot and save CpG coverage information
```

### 5x coverage

```{r}
fileName$actualFileName3 <- paste(fileName$nameBase, "-5xCoverage", "-Sample", fileName$sample.ID, ".jpeg", sep = "") #Create a new column for the full filename for 5x coverage + specific sample's percent CpG methylation plot
fileName$actualFileName4 <- paste(fileName$nameBase2, "-5xCoverage", "-Sample", fileName$sample.ID, ".jpeg", sep = "") #Create a new column for the full filename for 5x coverage + specific sample's percent CpG coverage plot
head(fileName) #Confirm column creation
```

```{r}
for(i in 1:nFiles) { #For each data file
  jpeg(filename = fileName$actualFileName3[i], height = 1000, width = 1000) #Save file with designated name
  getMethylationStats(processedFilesCov5[[i]], plot = TRUE, both.strands = FALSE) #Get %CpG methylation information
  dev.off() #Turn off plotting device
} #Plot and save %CpG methylation information
```

```{r}
for(i in 1:nFiles) { #For each data file
  jpeg(filename = fileName$actualFileName4[i], height = 1000, width = 1000) #Save file with designated name
  getCoverageStats(processedFilesCov5[[i]], plot = TRUE, both.strands = FALSE) #Get CpG coverage information
  dev.off() #Turn off plotting device
} #Plot and save CpG coverage information
```

### 10x coverage

### Filtered coverage

```{r}
fileName$actualFileName7 <- paste(fileName$nameBase, "-Filtered", "-5xCoverage", "-Sample", fileName$sample.ID, ".jpeg", sep = "") #Create a new column for the full filename for filtered + 5x coverage + specific sample's percent CpG methylation plot
fileName$actualFileName8 <- paste(fileName$nameBase2, "-Filtered", "-5xCoverage", "-Sample", fileName$sample.ID, ".jpeg", sep = "") #Create a new column for the full filename for filtered + 5x coverage + specific sample's percent CpG coverage plot
head(fileName) #Confirm column creation
```

```{r}
for(i in 1:nFiles) { #For each data file
  jpeg(filename = fileName$actualFileName7[i], height = 1000, width = 1000) #Save file with designated name
  getMethylationStats(processedFilteredFilesCov5[[i]], plot = TRUE, both.strands = FALSE) #Get %CpG methylation information
  dev.off() #Turn off plotting device
} #Plot and save %CpG methylation information
```

```{r}
for(i in 1:nFiles) { #For each data file
  jpeg(filename = fileName$actualFileName8[i], height = 1000, width = 1000) #Save file with designated name
  getCoverageStats(processedFilteredFilesCov5[[i]], plot = TRUE, both.strands = FALSE) #Get CpG coverage information
  dev.off() #Turn off plotting device
} #Plot and save CpG coverage information
```

## Obtain clustering information

### `destrand = FALSE`

#### 3x coverage

```{r}
methylationInformationCov3 <- unite(processedFilesCov3) #Combine all processed files into a single table
head(methylationInformationCov3) #Confirm unite
```

```{r}
clusteringInformationCov3 <- clusterSamples(methylationInformationCov3, dist = "correlation", method = "ward", plot = FALSE) #Save cluster information as a new object
```

```{r}
jpeg(filename = "2018-10-25-Loci-Analysis/2018-11-07-Full-Sample-Pearson-Correlation-Plot-Cov3.jpeg", height = 1000, width = 1000) #Save file with designated name
getCorrelation(methylationInformationCov3, plot = TRUE) #Understand correlation between methylation patterns in different samples
dev.off()
```

```{r}
jpeg(filename = "2018-10-25-Loci-Analysis/2018-11-07-Full-Sample-CpG-Methylation-Clustering-Cov3.jpeg", height = 1000, width = 1000) #Save file with designated name
clusterSamples(methylationInformationCov3, dist = "correlation", method = "ward", plot = TRUE) #Cluster samples based on correlation coefficients
dev.off()
```

```{r}
jpeg(filename = "2018-10-25-Loci-Analysis/2018-11-07-Full-Sample-Methylation-PCA-Cov3.jpeg", height = 1000, width = 1000) #Save file with designated name
PCASamples(methylationInformationCov3) #Run a PCA analysis on percent methylation for all samples
dev.off() #Turn off plotting device
```

```{r}
jpeg(filename = "2018-10-25-Loci-Analysis/2018-11-07-Full-Sample-Methylation-Screeplot-Cov3.jpeg", height = 1000, width = 1000) #Save file with designated name
PCASamples(methylationInformationCov3, screeplot = TRUE) #Run the PCA analysis and plot variances against PC number in a screeplot
dev.off()
```

#### 5x coverage

```{r}
methylationInformationCov5 <- unite(processedFilesCov5) #Combine all processed files into a single table
head(methylationInformationCov5) #Confirm unite
```

#### 10x coverage

```{r}
methylationInformationCov10 <- unite(processedFilesCov10) #Combine all processed files into a single table
head(methylationInformationCov10) #Confirm unite
```

### `destrand = TRUE`

#### 5x coverage

```{r}
methylationInformationCov5Destrand <- unite(processedFilesCov5, destrand = TRUE) #Combine all processed files into a single table
head(methylationInformationCov5Destrand) #Confirm unite
```

```{r}
clusteringInformationCov5Destrand <- clusterSamples(methylationInformationCov5Destrand, dist = "correlation", method = "ward", plot = FALSE) #Save cluster information as a new object
```

```{r}
jpeg(filename = "2018-10-25-Loci-Analysis/2019-03-14-Full-Sample-Pearson-Correlation-Plot-Cov5Destrand.jpeg", height = 1000, width = 1000) #Save file with designated name
getCorrelation(methylationInformationCov5Destrand, plot = TRUE) #Understand correlation between methylation patterns in different samples
dev.off()
```

```{r}
jpeg(filename = "2018-10-25-Loci-Analysis/2019-03-14-Full-Sample-CpG-Methylation-Clustering-Cov5Destrand.jpeg", height = 1000, width = 1000) #Save file with designated name
clusterSamples(methylationInformationCov5Destrand, dist = "correlation", method = "ward", plot = TRUE) #Cluster samples based on correlation coefficients
dev.off()
```

```{r}
jpeg(filename = "2018-10-25-Loci-Analysis/2019-03-14-Full-Sample-Methylation-PCA-Cov5Destrand.jpeg", height = 1000, width = 1000) #Save file with designated name
PCASamples(methylationInformationCov5Destrand) #Run a PCA analysis on percent methylation for all samples
dev.off() #Turn off plotting device
```

```{r}
jpeg(filename = "2018-10-25-Loci-Analysis/2019-03-14-Full-Sample-Methylation-Screeplot-Cov5Destrand.jpeg", height = 1000, width = 1000) #Save file with designated name
PCASamples(methylationInformationCov5Destrand, screeplot = TRUE) #Run the PCA analysis and plot variances against PC number in a screeplot
dev.off()
```

#### 10x coverage

```{r}
methylationInformationCov10Destrand <- unite(processedFilesCov10, destrand = TRUE) #Combine all processed files into a single table
head(methylationInformationCov10Destrand) #Confirm unite
```

#### Filtered coverage

```{r}
methylationInformationFilteredCov5Destrand <- unite(processedFilteredFilesCov5, destrand = TRUE) #Combine all processed files into a single table
head(methylationInformationFilteredCov5Destrand) #Confirm unite
```

```{r}
clusteringInformationFilteredCov5Destrand <- clusterSamples(methylationInformationFilteredCov5Destrand, dist = "correlation", method = "ward.D", plot = TRUE) #Save cluster information as a new object
```

```{r}
jpeg(filename = "2018-10-25-Loci-Analysis/2019-04-05-Full-Sample-Pearson-Correlation-Plot-FilteredCov5Destrand.jpeg", height = 1000, width = 1000) #Save file with designated name
getCorrelation(methylationInformationFilteredCov5Destrand, plot = TRUE) #Understand correlation between methylation patterns in different samples
dev.off()
```

```{r}
jpeg(filename = "2018-10-25-Loci-Analysis/2019-04-05-Full-Sample-CpG-Methylation-Clustering-FilteredCov5Destrand.jpeg", height = 1000, width = 1000) #Save file with designated name
clusterSamples(methylationInformationFilteredCov5Destrand, dist = "correlation", method = "ward", plot = TRUE) #Cluster samples based on correlation coefficients
dev.off()
```

```{r}
jpeg(filename = "2018-10-25-Loci-Analysis/2019-04-05-Full-Sample-Methylation-PCA-FilteredCov5Destrand.jpeg", height = 1000, width = 1000) #Save file with designated name
PCASamples(methylationInformationFilteredCov5Destrand) #Run a PCA analysis on percent methylation for all samples
dev.off() #Turn off plotting device
```

```{r}
jpeg(filename = "2018-10-25-Loci-Analysis/2019-04-05-Full-Sample-Methylation-Screeplot-FilteredCov5Destrand.jpeg", height = 1000, width = 1000) #Save file with designated name
PCASamples(methylationInformationFilteredCov5Destrand, screeplot = TRUE) #Run the PCA analysis and plot variances against PC number in a screeplot
dev.off()
```

## Obtain differentially methylated loci

### `destrand = FALSE`

#### 3x coverage

```{r}
differentialMethylationStatsCov3 <- calculateDiffMeth(methylationInformationCov3) #Calculate differential methylation statistics based on treatment indication from processBismarkAln
diffMethStats50Cov3 <- getMethylDiff(differentialMethylationStatsCov3, difference = 50, qvalue = 0.01) #Identify loci that are at least 50% different
head(diffMethStats50Cov3) #Confirm creation
```

```{r}
write.csv(diffMethStats50Cov3, "2018-10-25-Loci-Analysis/2018-11-07-Differentially-Methylated-Loci-50-Cov3.csv") #Save table as .csv
```

#### 5x coverage

```{r}
differentialMethylationStatsCov5 <- calculateDiffMeth(methylationInformationCov5) #Calculate differential methylation statistics based on treatment indication from processBismarkAln
diffMethStats50Cov5 <- getMethylDiff(differentialMethylationStatsCov5, difference = 50, qvalue = 0.01) #Identify loci that are at least 50% different
head(diffMethStats50Cov5) #Confirm creation
```

```{r}
write.csv(diffMethStats50Cov5, "2018-10-25-Loci-Analysis/2019-03-13-Differentially-Methylated-Loci-50-Cov5.csv") #Save table as .csv
```

#### 10x coverage

```{r}
differentialMethylationStatsCov10 <- calculateDiffMeth(methylationInformationCov10) #Calculate differential methylation statistics based on treatment indication from processBismarkAln
diffMethStats50Cov10 <- getMethylDiff(differentialMethylationStatsCov10, difference = 50, qvalue = 0.01) #Identify loci that are at least 50% different
head(diffMethStats50Cov10) #Confirm creation
```

```{r}
write.csv(diffMethStats50Cov10, "2018-10-25-Loci-Analysis/2019-03-13-Differentially-Methylated-Loci-50-Cov10.csv") #Save table as .csv
```

### `destrand = TRUE`

#### 5x coverage

```{r}
differentialMethylationStatsCov5Destrand <- calculateDiffMeth(methylationInformationCov5Destrand) #Calculate differential methylation statistics based on treatment indication from processBismarkAln
diffMethStats50Cov5Destrand <- getMethylDiff(differentialMethylationStatsCov5Destrand, difference = 50, qvalue = 0.01) #Identify loci that are at least 50% different
head(diffMethStats50Cov5Destrand) #Confirm creation
```

```{r}
write.csv(diffMethStats50Cov5Destrand, "2018-10-25-Loci-Analysis/2019-03-13-Differentially-Methylated-Loci-Destrand-50-Cov5.csv") #Save table as .csv
```

#### 10x coverage

```{r}
differentialMethylationStatsCov10Destrand <- calculateDiffMeth(methylationInformationCov10Destrand) #Calculate differential methylation statistics based on treatment indication from processBismarkAln
diffMethStats50Cov10Destrand <- getMethylDiff(differentialMethylationStatsCov10Destrand, difference = 50, qvalue = 0.01) #Identify loci that are at least 50% different
head(diffMethStats50Cov10Destrand) #Confirm creation
```

```{r}
write.csv(diffMethStats50Cov10Destrand, "2018-10-25-Loci-Analysis/2019-03-13-Differentially-Methylated-Loci-Destrand-50-Cov10.csv") #Save table as .csv
```

#### Filtered coverage

```{r}
differentialMethylationStatsFilteredCov5Destrand <- calculateDiffMeth(methylationInformationFilteredCov5Destrand) #Calculate differential methylation statistics based on treatment indication from processBismarkAln
head(differentialMethylationStatsFilteredCov5Destrand) #Look at differential methylation statistics
```

```{r}
write.csv(differentialMethylationStatsFilteredCov5Destrand, "2018-10-25-Loci-Analysis/2019-06-30-All-Loci-Methylation-Statistic-Filtered-Destrand-50-Cov5.csv") #Save table as a .csv
```

```{r}
diffMethStats50FilteredCov5Destrand <- getMethylDiff(differentialMethylationStatsFilteredCov5Destrand, difference = 50, qvalue = 0.01) #Identify loci that are at least 50% different
head(diffMethStats50FilteredCov5Destrand) #Confirm creation
```

```{r}
write.csv(diffMethStats50FilteredCov5Destrand, "2018-10-25-Loci-Analysis/2019-04-05-Differentially-Methylated-Loci-Filtered-Destrand-50-Cov5.csv") #Save table as .csv
```

##### Hypermethylated DML

```{r}
diffMethStats50FilteredCov5DestrandHyper <- getMethylDiff(differentialMethylationStatsFilteredCov5Destrand, difference = 50, qvalue = 0.01, type = "hyper") #Identify hypermethylated loci that are at least 50% different
head(diffMethStats50FilteredCov5DestrandHyper) #Confirm creation
```

```{r}
write.csv(diffMethStats50FilteredCov5DestrandHyper, "2018-10-25-Loci-Analysis/2019-04-05-Differentially-Methylated-Loci-Filtered-Destrand-50-Cov5-Hypermethylated.csv") #Save table as .csv
```

##### Hypomethylated DML

```{r}
diffMethStats50FilteredCov5DestrandHypo <- getMethylDiff(differentialMethylationStatsFilteredCov5Destrand, difference = 50, qvalue = 0.01, type = "hypo") #Identify hypomethylated loci that are at least 50% different
head(diffMethStats50FilteredCov5DestrandHypo) #Confirm creation
```

```{r}
write.csv(diffMethStats50FilteredCov5DestrandHypo, "2018-10-25-Loci-Analysis/2019-04-05-Differentially-Methylated-Loci-Filtered-Destrand-50-Cov5-Hypomethylated.csv") #Save table as .csv
```

#### DML distribution figure

I want to plot the distribution of DML across the *C. virginica* genome and compare it to the number of genes in each chromosome.

##### Calculate distribution of DML across chromosomes

```{r}
DMLchrCounts <-as.data.frame(table(diffMethStats50FilteredCov5Destrand$chr)) #Count the number of DML/chromosome
DMLchrCounts$chr <- c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10") #Create column with chr name
DMLchrCounts <- DMLchrCounts[,-1] #Remove column with chromosome RefSeq ID
DMLchrCounts <- DMLchrCounts[,c(2,1)] #Reorganize columns
colnames(DMLchrCounts) <- c("chr", "DMLCount") #Rename columns
head(DMLchrCounts) #Confirm formatting
```

```{r}
write.csv(DMLchrCounts, "2019-09-30-DML-per-Chromosome.csv", row.names = FALSE, quote = FALSE) #Save file
```

##### [*C. virginica* genome information from NCBI](https://www.ncbi.nlm.nih.gov/genome/398)

```{r}
DMLchrCounts$geneCount <- c(3932, 4137, 4712, 3896, 6239, 2630, 3015, 4445, 4960, 1527) #Create column with number of gene sequences in each chromosome
DMLchrCounts$DMLbyGenes <- (DMLchrCounts$DMLCount / DMLchrCounts$geneCount)*100
head(DMLchrCounts) #Confirm dataframe creation
```

##### Create figure

```{r}
#pdf("2019-09-30-DML-and-Gene-Distribution.pdf", height = 8.5, width = 11) #Save figure as a pdf

par(mar = c(5,7,2,10)) #Change figure boundaries

DMLbarplot <- barplot(as.matrix(t(DMLchrCounts$DMLCount)),
                      axes = FALSE, 
                      names.arg = DMLchrCounts$chr,
                      cex.names = 1.5,
                      xlim = c(0.7,11.5),
                      ylim = c(0,150),
                      col = "grey80") #Create a barplot and save as a new object. Use axes = FALSE to remove the y-axis and names.arg to set labels on the x-axis. The object contains x coordinates for bars, so xlim is set at 12 to compensate for maximum value of 11.5
mtext(side = 1, "Chromosome", line = 3, cex = 1.5) #Add x-axis label
axis(side = 2, line = 1.5, at = seq(0, 150, by = 50), las = 2, col = "grey80", cex.axis = 1.2) #Add y-axis for DML counts
mtext(side = 2, "Number of DML", line = 5, cex = 1.5) #Add y-axis label for DML counts

par(new = TRUE) #Create a new plot
plot(x = DMLbarplot,
     y = DMLchrCounts$geneCount,
     type = "b",
     axes = FALSE, xlab = "", ylab = "", xaxs = "i", yaxs = "i",
     pch = 16, col = "grey20",
     xlim = c(0,12), ylim = c(0,6500)) #Plot points and lines (type = "b") for gene count by chromosome. Use the coordinates from DMLbarplot (x = DMLbarplot) and set xlim = (0,12) so plots are lined up. Use axes = FALSE to remove both axes. Remove x and y lables (xlab = ""; ylab = ""). Set ylim = (0,6500) to account for max y-values. Use xaxs and yxaxs to remove space between axes.
axis(side = 4, line = 1.5, at = seq(0, 6500, by = 500), las = 2, col = "grey80", cex.axis = 1.2) #Add y-axis for gene sequence counts
mtext(side = 4, "Number of Genes", line = 6, cex = 1.5) #Add y-axis label for gene sequence counts

#dev.off() #Turn off plotting device
```

#### Preliminary Analyses

###### Plot customization

```{r}
plotCustomization <- data.frame(sample = 1:10,
                                treatmentSpecification) #Create dataframe with sample treatment information
head(plotCustomization) #Confirm dataframe creation
```

###### Principal Component Analyses

`methylKit` creates PCA biplots, but I want to dig further into the analysis and create custom biplots.

####### All methylation data

```{r}
allDataPCA <- PCASamples(methylationInformationFilteredCov5Destrand, obj.return = TRUE) #Run a PCA analysis on percent methylation for all samples. methylKit uses prcomp to create the PCA matrix
summary(allDataPCA) #Look at summary statistics. The first PC explains 18.1% of variation, the second PC explains 11.7% of variation
```

```{r}
#pdf("2019-11-19-All-Data-PCA.pdf", width = 11, height = 8.5)

par(mar = c(5, 5, 1, 1)) #Specify inner and outer margins

fig.allDataPCA <- ordiplot(allDataPCA, choices = c(1, 2), type = "none", display = "sites", cex = 0.5, xlab = "", ylab = "", xaxt = "n", yaxt = "n") #Use ordiplot to create base biplot. Do not add any points
points(fig.allDataPCA, "sites", col = c(rep(plotColors[2], times = 5), rep(plotColors[4], times = 5)), pch = c(rep(16, times = 5), rep(17, times = 5)), cex = 3) #Add each sample. Darker samples are ambient, lighter samples are elevated pCO2

#Add multiple white boxes on top of the default black box to manually change the color
box(col = "white")
box(col = "white")
box(col = "white")
box(col = "white")
box(col = "white")
box(col = "white")
box(col = "white")
box(col = "white")
box(col = "white")
box(col = "white")

ordiellipse(allDataPCA, plotCustomization$treatment, show.groups = "1", col = plotColors[4]) #Add confidence ellipse around the samples in elevated pCO2
ordiellipse(allDataPCA, plotCustomization$treatment, show.groups = "0", col = plotColors[2]) #Add confidence ellipse around the samples in ambient pCO2

axis(side =  1, labels = TRUE, col = "grey80", cex.axis = 1.7) #Add x-axis
mtext(side = 1, text = "PC 1 (18.1%)", line = 3, cex = 1.5) #Add x-axis label

axis(side =  2, labels = TRUE, col = "grey80", cex.axis = 1.7) #Add y-axis
mtext(side = 2, text = "PC 2 (11.7%)", line = 3, cex = 1.5) #Add y-axis label

legend("topleft", 
       pch = c(16, 17), 
       legend = c("Control", "Elevated"), 
       col = c(plotColors[2], plotColors[4]), 
       cex = 1.7, bty = "n") #Add a legend with information about ambient and elevated samples

#dev.off()
```

####### DML

The first thing I need to do is subset the data so it only includes DML. Then, I can use similar code to what I used above to create PCA plots.

```{r}
DMLPositions <- rep(0, times = length(diffMethStats50FilteredCov5Destrand$chr)) #Create an empty vector with 598 places to store row numbers
for (i in 1:length(DMLPositions)) {
  DMLPositions[i] <- which(getData(diffMethStats50FilteredCov5Destrand)$start[i] == getData(methylationInformationFilteredCov5Destrand)$start)
} #For each DML, save the row number where that DML is found in methylationInformationFilteredCov5Destrand
tail(DMLPositions) #Confirm vector was created
```

```{r}
DMLMatrix <- methylationInformationFilteredCov5Destrand[DMLPositions,] #Subset methylationInformationFilteredCov5Destrand to only include DML and save as a new methylBase object
sum((DMLMatrix$start) == (diffMethStats50FilteredCov5Destrand$start)) == length(diffMethStats50FilteredCov5Destrand$start) #Confirm that start columns are identical. If they are identical, the sum of all TRUE statements should equal the length of the original methylBase object
tail(DMLMatrix) #Confirm methylBase object creation
```

```{r}
DMLDataPCA <- PCASamples(DMLMatrix, obj.return = TRUE) #Run a PCA analysis on percent methylation for all samples. methylKit uses prcomp to create the PCA matrix
summary(DMLDataPCA) #Look at summary statistics. The first PC explains 47.6% of variation, the second PC explains 9.5% of variation
```

```{r}
#pdf("2019-11-19-DML-Only-PCA.pdf", width = 11, height = 8.5)

par(mar = c(5, 5, 1, 1)) #Specify inner and outer margins

fig.DMLDataPCA <- ordiplot(DMLDataPCA, choices = c(1, 2), type = "none", display = "sites", cex = 0.5, xlab = "", ylab = "", xaxt = "n", yaxt = "n") #Use ordiplot to create base biplot. Do not add any points
points(fig.DMLDataPCA, "sites", col = c(rep(plotColors[2], times = 5), rep(plotColors[4], times = 5)), pch = c(rep(16, times = 5), rep(17, times = 5)), cex = 3) #Add each sample. Darker samples are ambient, lighter samples are elevated pCO2

#Add multiple white boxes on top of the default black box to manually change the color
box(col = "white")
box(col = "white")
box(col = "white")
box(col = "white")
box(col = "white")
box(col = "white")
box(col = "white")
box(col = "white")
box(col = "white")
box(col = "white")

ordiellipse(DMLDataPCA, plotCustomization$treatment, show.groups = "1", col = plotColors[4]) #Add confidence ellipse around the samples in elevated pCO2
ordiellipse(DMLDataPCA, plotCustomization$treatment, show.groups = "0", col = plotColors[2]) #Add confidence ellipse around the samples in ambient pCO2

axis(side =  1, labels = TRUE, col = "grey80", cex.axis = 1.7) #Add x-axis
mtext(side = 1, text = "PC 1 (47.6%)", line = 3, cex = 1.5) #Add x-axis label

axis(side =  2, labels = TRUE, col = "grey80", cex.axis = 1.7) #Add y-axis
mtext(side = 2, text = "PC 2 (9.5%)", line = 3, cex = 1.5) #Add y-axis label

legend("topright", 
       pch = c(16, 17), 
       legend = c("Control", "Elevated"), 
       col = c(plotColors[2], plotColors[4]), 
       cex = 1.7, bty = "n") #Add a legend with information about ambient and elevated samples

#dev.off()
```

####### Multipanel plot

```{r}
#pdf("2019-11-19-PCA-Multpanel.pdf", height = 8.5, width = 11) #Save plot

par(mfrow = c(1, 2), oma = c(5, 2, 2, 0), mar = c(0, 3, 0, 5)) #Set up parameters for multipanel plot

#All CpG Loci

fig.allDataPCA <- ordiplot(allDataPCA, choices = c(1, 2), type = "none", display = "sites", cex = 0.5, xlim = c(-400, 200), xlab = "", ylab = "", xaxt = "n", yaxt = "n") #Use ordiplot to create base biplot. Do not add any points
points(fig.allDataPCA, "sites", col = c(rep(plotColors[2], times = 5), rep(plotColors[4], times = 5)), pch = c(rep(16, times = 5), rep(17, times = 5)), cex = 3) #Add each sample. Darker samples are ambient, lighter samples are elevated pCO2

#Add multiple white boxes on top of the default black box to manually change the color
box(col = "white")
box(col = "white")
box(col = "white")
box(col = "white")
box(col = "white")
box(col = "white")
box(col = "white")
box(col = "white")
box(col = "white")
box(col = "white")

ordiellipse(allDataPCA, plotCustomization$treatment, show.groups = "1", col = plotColors[4]) #Add confidence ellipse around the samples in elevated pCO2
ordiellipse(allDataPCA, plotCustomization$treatment, show.groups = "0", col = plotColors[2]) #Add confidence ellipse around the samples in ambient pCO2

axis(side =  1, at = seq(-400, 200, 200), col = "grey80", cex.axis = 1.7) #Add x-axis
mtext(side = 1, text = "PC 1 (18.1%)", line = 3, cex = 1.5) #Add x-axis label

axis(side =  2, labels = TRUE, col = "grey80", cex.axis = 1.7) #Add y-axis
mtext(side = 2, text = "PC 2 (11.7%)", line = 3, cex = 1.5) #Add y-axis label

mtext(side = 3, line = -5, adj = c(-100,0), text = "    a. All CpG Loci", cex = 1.5)

#DML

fig.DMLDataPCA <- ordiplot(DMLDataPCA, choices = c(1, 2), type = "none", display = "sites", cex = 0.5, xlim = c(-20,20), ylim = c(-10,20), xlab = "", ylab = "", xaxt = "n", yaxt = "n") #Use ordiplot to create base biplot. Do not add any points
points(fig.DMLDataPCA, "sites", col = c(rep(plotColors[2], times = 5), rep(plotColors[4], times = 5)), pch = c(rep(16, times = 5), rep(17, times = 5)), cex = 3) #Add each sample. Darker samples are ambient, lighter samples are elevated pCO2

#Add multiple white boxes on top of the default black box to manually change the color
box(col = "white")
box(col = "white")
box(col = "white")
box(col = "white")
box(col = "white")
box(col = "white")
box(col = "white")
box(col = "white")
box(col = "white")
box(col = "white")

ordiellipse(DMLDataPCA, plotCustomization$treatment, show.groups = "1", col = plotColors[4]) #Add confidence ellipse around the samples in elevated pCO2
ordiellipse(DMLDataPCA, plotCustomization$treatment, show.groups = "0", col = plotColors[2]) #Add confidence ellipse around the samples in ambient pCO2

axis(side =  1, labels = TRUE, col = "grey80", cex.axis = 1.7) #Add x-axis
mtext(side = 1, text = "PC 1 (47.6%)", line = 3, cex = 1.5) #Add x-axis label

axis(side =  2, labels = TRUE, col = "grey80", cex.axis = 1.7) #Add y-axis
mtext(side = 2, text = "PC 2 (9.5%)", line = 3, cex = 1.5) #Add y-axis label

mtext(side = 3, line = -5, adj = c(-100, 0), text = "    b. DML", cex = 1.5) #Add test category

legend(x = 0, y = -22, 
       pch = c(16, 17), 
       legend = c("Control", "Elevated"), 
       col = c(plotColors[2], plotColors[4]),
       y.intersp = 1, x.intersp = 1,
       cex = 1.7, bty = "n") #Add a legend with information about ambient and elevated samples

#dev.off()
```

##### Heatmap

```{r}
percMethDML <- percMethylation(DMLMatrix, rowids = TRUE) #Get percent methylation for all samples at DML. Include row IDs (chr, start, end) information
head(percMethDML) #Confirm percent methylation matrix was created
```

I'll first use `pheatmap`.

```{r}
pheatmap(percMethDML, color = rev(plotColors),
         cluster_rows = TRUE, clustering_distance_rows = "euclidean", treeheight_row = 70, show_rownames = FALSE,
         cluster_cols = TRUE, clustering_distance_cols = "euclidean", treeheight_col = 40, show_colnames = FALSE,
         annotation_col = data.frame(pCO2 = factor(rep(c("Ambient","Treatment"), each = 5))),
         annotation_colors = list(pCO2 = c(Ambient = "grey90", Treatment = "grey10")), 
         annotation_legend = FALSE, annotation_names_col = FALSE,
         legend = TRUE) #Create heatmap using pheatmap using percMethDML and plotColors color scheme. Cluster rows and columns using euclidean distances. Adjust the dendogram tree heights and do not show any row or column names. Create a dataframe with treatment information using annotation_col. Use annotation_colors to indicate colors for treatment ("grey90") and ambient ("grey10") samples. Do not include an annotation_legend or name for annotations (annotatino_names_col). Include a legend.
```

I also want to try using a different heatmap package, `heatmap.2` from `gplots`. `heatmap.2` allows you to add a density plot with the legend, which could be interesting information to include.

```{r}
#pdf("2019-11-19-DML-Only-Heatmap.pdf", height = 8.5, width = 11)

par(oma = c(0, 1, 0, 0)) #Adjust outer margins
heatmap.2(percMethDML, col = plotColors, scale = "none", margins = c(1,1),
          trace = "none", tracecol = "black",
          labRow = FALSE, labCol = FALSE, 
          ColSideColors = c(rep("grey90", times = 5), rep("grey10", times = 5)),
          key = TRUE, keysize = 1.8, density.info = "density", key.title = "", key.xlab = "% Methylation", key.ylab = "",
          key.par = list(cex.lab = 2.0, cex.axis = 1.5)) #Create heatmap using heatmap.2 from gplots package using percMethDML data. Use plotColors but do not scale data, label rows, or label columns. Use ColSideColors to indicate colors for treatment and ambient samples. Add a legend using key, and adjust keysize. Have key display density data with density.info. Do not add a key title or y-axis label, and label x axis with key.xlab.
mtext("Density", cex = 1.6, las = 3, adj = 0.8, padj = -29) #Manually add y-axis label for key since heatmap.2 doesn't let you change font size

#dev.off()
```

# Differentially methylated regions

The code above allowed me to identify differentially methylated *loci* (DMLs). For an exploratory analysis, it is also useful to identify differentially methylated *regions* (DMRs). I will complete the tiling analysis using my `mincov = 3` object. I will use both 100 and 1000 bp sized windows and steps.

## Tiling window analysis

### 100 bp windows

```{r}
tiles100 <- tileMethylCounts(processedFilteredFilesCov5, win.size = 100, step.size = 100, cov.bases = 1) #Add up C and T counts from each covered cytosine and return total C and T count for each tile. Perform this analysis with 100 bp sized windows, and slide 100 bp for each new window
head(tiles100[[10]]) #Confirm object creation. [[10]] only shows tile100 head for the 10th sample processed.
```

#### Obtain clustering information

```{r}
methylationInformationTiles100 <- unite(tiles100) #Combine all processed files into a single table
clusteringInformationTiles100 <- clusterSamples(methylationInformationTiles100, dist = "correlation", method = "ward.D2", plot = FALSE) #Save clustering information as a new object
```

```{r}
jpeg(filename = "2018-10-25-Tiling-Analysis/2019-06-05-Full-Sample-Pearson-Correlation-Plot-FilteredCov5Destrand-Tiles100.jpeg", height = 1000, width = 1000) #Save file with designated name
getCorrelation(methylationInformationTiles100, plot = TRUE) #Understand correlation between methylation patterns in different samples
dev.off()
```

```{r}
jpeg(filename = "2018-10-25-Tiling-Analysis/2019-06-05-Full-Sample-CpG-Methylation-Clustering-FilteredCov5Destrand-Tiles100.jpeg", height = 1000, width = 1000) #Save file with designated name
clusterSamples(methylationInformationTiles100, dist = "correlation", method = "ward.D2", plot = TRUE) #Cluster samples based on correlation coefficients
dev.off()
```

```{r}
jpeg(filename = "2018-10-25-Tiling-Analysis/2019-06-05-Full-Sample-Methylation-PCA-FilteredCov5Destrand-Tiles100.jpeg", height = 1000, width = 1000) #Save file with designated name
PCASamples(methylationInformationTiles100) #Run a PCA analysis on percent methylation for all samples
dev.off() #Turn off plotting device
```

```{r}
jpeg(filename = "2018-10-25-Tiling-Analysis/2019-06-05-Full-Sample-Methylation-Screeplot-FilteredCov5Destrand-Tiles100.jpeg", height = 1000, width = 1000) #Save file with designated name
PCASamples(methylationInformationTiles100, screeplot = TRUE) #Run the PCA analysis and plot variances against PC number in a screeplot
dev.off()
```

#### Obtain differentially methylated regions

```{r}
differentialMethylationStatsTiles100 <- calculateDiffMeth(methylationInformationTiles100) #Calculate differential methylation statistics based on treatment indication from processBismarkAln
diffMethStats50Tiles100 <- getMethylDiff(differentialMethylationStatsTiles100, difference = 50, qvalue = 0.01) #Identify loci that are at least 50% different
head(diffMethStats50Tiles100) #Confirm creation
```

```{r}
write.csv(diffMethStats50Tiles100, "2018-10-25-Tiling-Analysis/2019-06-05-Differentially-Methylated-Regions-50-FilteredCov5Destrand-Tiles100.csv") #Save table as .csv
```

##### Hypermethylated DMR

```{r}
diffMethStats50FilteredCov5DestrandTiles100Hyper <- getMethylDiff(differentialMethylationStatsTiles100, difference = 50, qvalue = 0.01, type = "hyper") #Identify hypermethylated regions that are at least 50% different
head(diffMethStats50FilteredCov5DestrandTiles100Hyper) #Confirm creation
```

```{r}
write.csv(diffMethStats50FilteredCov5DestrandTiles100Hyper, "2018-10-25-Loci-Analysis/2019-06-05-Differentially-Methylated-Regions-50-FilteredCov5Destrand-50-Tiles100-Hypermethylated.csv") #Save table as .csv
```

##### Hypomethylated DMR

```{r}
diffMethStats50FilteredCov5DestrandTiles100Hypo <- getMethylDiff(differentialMethylationStatsTiles100, difference = 50, qvalue = 0.01, type = "hypo") #Identify hypomethylated regions that are at least 50% different
head(diffMethStats50FilteredCov5DestrandTiles100Hypo) #Confirm creation
```

```{r}
write.csv(diffMethStats50FilteredCov5DestrandTiles100Hypo, "2018-10-25-Loci-Analysis/2019-06-05-Differentially-Methylated-Regions-50-FilteredCov5Destrand-50-Tiles100-Hypomethylated.csv") #Save table as .csv
```

### 1000 bp windows

```{r}
tiles1000 <- tileMethylCounts(processedFilteredFilesCov5, win.size = 1000, step.size = 1000, cov.bases = 1) #Add up C and T counts from each covered cytosine and return total C and T count for each tile. Perform this analysis with 1000 bp sized windows, and slide 1000 bp for each new window. This is the methylKit default.
head(tiles1000[[10]]) #Confirm object creation. [[10]] only shows tile100 head for the 10th sample processed.
```

#### Obtain clustering information

```{r}
methylationInformationTiles1000 <- unite(tiles1000) #Combine all processed files into a single table
clusteringInformationTiles1000 <- clusterSamples(methylationInformationTiles1000, dist = "correlation", method = "ward.D2", plot = FALSE) #Save clustering information as a new object
```

```{r}
jpeg(filename = "2018-10-25-Tiling-Analysis/2019-06-05-Full-Sample-Pearson-Correlation-Plot-FilteredCov5Destrand-Tiles1000.jpeg", height = 1000, width = 1000) #Save file with designated name
getCorrelation(methylationInformationTiles1000, plot = TRUE) #Understand correlation between methylation patterns in different samples
dev.off()
```

```{r}
jpeg(filename = "2018-10-25-Tiling-Analysis/2019-06-05-Full-Sample-CpG-Methylation-Clustering-FilteredCov5Destrand-Tiles1000.jpeg", height = 1000, width = 1000) #Save file with designated name
clusterSamples(methylationInformationTiles1000, dist = "correlation", method = "ward.D2", plot = TRUE) #Cluster samples based on correlation coefficients
dev.off()
```

```{r}
jpeg(filename = "2018-10-25-Tiling-Analysis/2019-06-05-Full-Sample-Methylation-PCA-FilteredCov5Destrand-Tiles1000.jpeg", height = 1000, width = 1000) #Save file with designated name
PCASamples(methylationInformationTiles1000) #Run a PCA analysis on percent methylation for all samples
dev.off() #Turn off plotting device
```

```{r}
jpeg(filename = "2018-10-25-Tiling-Analysis/2019-06-05-Full-Sample-Methylation-Screeplot-FilteredCov5Destrand-Tiles1000.jpeg", height = 1000, width = 1000) #Save file with designated name
PCASamples(methylationInformationTiles1000, screeplot = TRUE) #Run the PCA analysis and plot variances against PC number in a screeplot
dev.off()
```

#### Obtain differentially methylated regions

```{r}
differentialMethylationStatsTiles1000 <- calculateDiffMeth(methylationInformationTiles1000) #Calculate differential methylation statistics based on treatment indication from processBismarkAln
diffMethStats50Tiles1000 <- getMethylDiff(differentialMethylationStatsTiles1000, difference = 50, qvalue = 0.01) #Identify loci that are at least 50% different
head(diffMethStats50Tiles1000) #Confirm creation
```

```{r}
write.csv(diffMethStats50Tiles1000, "2018-10-25-Tiling-Analysis/2019-06-05-Differentially-Methylated-Regions-50-FilteredCov5Destrand-Tiles1000.csv") #Save table as .csv
```

### 500 bp windows

```{r}
tiles500 <- tileMethylCounts(processedFilteredFilesCov5, win.size = 500, step.size = 500, cov.bases = 1) #Add up C and T counts from each covered cytosine and return total C and T count for each tile. Perform this analysis with 500 bp sized windows, and slide 500 bp for each new window.
head(tiles500[[10]]) #Confirm object creation. [[10]] only shows tile500 head for the 10th sample processed.
```

#### Obtain clustering information

```{r}
methylationInformationTiles500 <- unite(tiles500) #Combine all processed files into a single table
clusteringInformationTiles500 <- clusterSamples(methylationInformationTiles500, dist = "correlation", method = "ward.D2", plot = FALSE) #Save clustering information as a new object
```

```{r}
jpeg(filename = "2018-10-25-Tiling-Analysis/2019-06-05-Full-Sample-Pearson-Correlation-Plot-FilteredCov5Destrand-Tiles500.jpeg", height = 1000, width = 1000) #Save file with designated name
getCorrelation(clusteringInformationTiles500, plot = TRUE) #Understand correlation between methylation patterns in different samples
dev.off()
```

```{r}
jpeg(filename = "2018-10-25-Tiling-Analysis/2019-06-05-Full-Sample-CpG-Methylation-Clustering-FilteredCov5Destrand-Tiles500.jpeg", height = 1000, width = 1000) #Save file with designated name
clusterSamples(clusteringInformationTiles500, dist = "correlation", method = "ward.D2", plot = TRUE) #Cluster samples based on correlation coefficients
dev.off()
```

```{r}
jpeg(filename = "2018-10-25-Tiling-Analysis/2019-06-05-Full-Sample-Methylation-PCA-FilteredCov5Destrand-Tiles500.jpeg", height = 1000, width = 1000) #Save file with designated name
PCASamples(clusteringInformationTiles500) #Run a PCA analysis on percent methylation for all samples
dev.off() #Turn off plotting device
```

```{r}
jpeg(filename = "2018-10-25-Tiling-Analysis/2019-06-05-Full-Sample-Methylation-Screeplot-FilteredCov5Destrand-Tiles500.jpeg", height = 1000, width = 1000) #Save file with designated name
PCASamples(clusteringInformationTiles500, screeplot = TRUE) #Run the PCA analysis and plot variances against PC number in a screeplot
dev.off()
```

#### Obtain differentially methylated regions

```{r}
differentialMethylationStatsTiles500 <- calculateDiffMeth(methylationInformationTiles500) #Calculate differential methylation statistics based on treatment indication from processBismarkAln
diffMethStats50Tiles500 <- getMethylDiff(differentialMethylationStatsTiles500, difference = 50, qvalue = 0.01) #Identify loci that are at least 50% different
head(diffMethStats50Tiles500) #Confirm creation
```

```{r}
write.csv(diffMethStats50Tiles500, "2018-10-25-Tiling-Analysis/2019-06-05-Differentially-Methylated-Regions-50-FilteredCov5Destrand-Tiles500.csv") #Save table as .csv
```

# Save DMLs and DMRs as BEDfiles

## Install packages

**Do not** install these packages until analysis in `methylKit` is complete, as some of the installed packages will mask others important for analysis.

```{r}
library(readr) #Load package

install.packages("tidyverse") #Install tidyverse
library(tidyverse) #Load package
```

## DML Background

```{r}
head(methylationInformationFilteredCov5Destrand) #I only want the columns with chromosome, start position, stop position, and strand.
```

```{r}
methylationInformationFilteredCov5DestrandReduced <- data.frame("chr" = methylationInformationFilteredCov5Destrand$chr,
                                                                "start" = methylationInformationFilteredCov5Destrand$start,
                                                                "stop" = methylationInformationFilteredCov5Destrand$end,
                                                                "strand" = methylationInformationFilteredCov5Destrand$strand) #Subset file 
head(methylationInformationFilteredCov5DestrandReduced) #Confirm subsetting
```

```{r}
methylationInformationFilteredCov5DestrandReduced$start <- (methylationInformationFilteredCov5DestrandReduced$start - 1) #Subtract 1 from the start position for visualization
head(methylationInformationFilteredCov5DestrandReduced) #Confirm subtracting worked
```

```{r}
write_delim(methylationInformationFilteredCov5DestrandReduced, "2019-05-14-Methylation-Information-Filtered-Destrand-Cov5.bed", delim = "\t", col_names = FALSE) #Write out all methylation information as a background to be used for gene enrichment analyses.
```

```{bash}
head 2019-05-14-Methylation-Information-Filtered-Destrand-Cov5.bed #Confirm formatting was retained in export
```

## DMLs

### `destrand = FALSE`

#### 3x coverage

```{r}
DMLPlus10222018 <- filter(diffMethStats50Cov3, strand == "+") %>% mutate(start = start -1, end = end + 1) %>% select(chr, start, end, strand, meth.diff) #Save + strand of DMLs as a new object
DMLMinus10222018 <- filter(diffMethStats50Cov3, strand == "-") %>% mutate(start = start -2) %>% select(chr, start, end, strand, meth.diff) #Save - strand of DMLs as a new object
```

```{r}
DML10222018 <- bind_rows(DMLPlus10222018, DMLMinus10222018) %>% arrange(chr, start) %>% mutate_if(is.numeric, as.integer) #Join + and - strand information to be saved as a BED file, and avoid writing information in scientific notation
```

```{r}
write_delim(DML10222018, "2018-11-07-DML-Locations.bed",  delim = '\t', col_names = FALSE) #Save data as a BED file
```

#### 5x coverage

```{r}
DML5xPlus03132019 <- filter(diffMethStats50Cov5, strand == "+") %>% mutate(start = start -1, end = end + 1) %>% select(chr, start, end, strand, meth.diff) #Save + strand of DMLs as a new object
DML5xMinus03132019 <- filter(diffMethStats50Cov5, strand == "-") %>% mutate(start = start -2) %>% select(chr, start, end, strand, meth.diff) #Save - strand of DMLs as a new object
```

```{r}
DML5x03132019 <- bind_rows(DML5xPlus03132019, DML5xMinus03132019) %>% arrange(chr, start) %>% mutate_if(is.numeric, as.integer) #Join + and - strand information to be saved as a BED file, and avoid writing information in scientific notation
```

```{r}
write_delim(DML5x03132019, "2019-03-13-DML-5x-Locations.bed",  delim = '\t', col_names = FALSE) #Save data as a BED file
```

#### 10x coverage

```{r}
DML10xPlus03132019 <- filter(diffMethStats50Cov10, strand == "+") %>% mutate(start = start -1, end = end + 1) %>% select(chr, start, end, strand, meth.diff) #Save + strand of DMLs as a new object
DML10xMinus03132019 <- filter(diffMethStats50Cov10, strand == "-") %>% mutate(start = start -2) %>% select(chr, start, end, strand, meth.diff) #Save - strand of DMLs as a new object
```

```{r}
DML10x03132019 <- bind_rows(DML10xPlus03132019, DML10xMinus03132019) %>% arrange(chr, start) %>% mutate_if(is.numeric, as.integer) #Join + and - strand information to be saved as a BED file, and avoid writing information in scientific notation
```

```{r}
write_delim(DML10x03132019, "2019-03-13-DML-10x-Locations.bed",  delim = '\t', col_names = FALSE) #Save data as a BED file
```

### `destrand = TRUE`

#### 5x coverage

```{r}
DML5xDestrand03132019 <- mutate(diffMethStats50Cov5Destrand, start = start -1, end = end + 1) %>% select(chr, start, end, meth.diff) %>% mutate_if(is.numeric, as.integer) #Save as a BED file, and avoid writing information in scientific notation
```

```{r}
head(DML5xDestrand03132019) #Confirm changes
```

```{r}
write_delim(DML5xDestrand03132019, "2019-03-13-DML-Destrand-5x-Locations.bed",  delim = '\t', col_names = FALSE) #Save data as a BED file
```

#### 10x coverage

```{r}
DML10xDestrand03132019 <- mutate(diffMethStats50Cov10Destrand, start = start -1, end = end + 1) %>% select(chr, start, end, meth.diff) %>% mutate_if(is.numeric, as.integer) #Save as a BED file, and avoid writing information in scientific notation
```

```{r}
head(DML10xDestrand03132019) #Confirm changes
```

```{r}
write_delim(DML10xDestrand03132019, "2019-03-13-DML-Destrand-10x-Locations.bed",  delim = '\t', col_names = FALSE) #Save data as a BED file
```

#### Filtered coverage

```{r}
DML5xFilteredDestrand04052019 <- mutate(diffMethStats50FilteredCov5Destrand, start = start -1, end = end + 1) %>% select(chr, start, end, meth.diff) %>% mutate_if(is.numeric, as.integer) #Save as a BED file, and avoid writing information in scientific notation
```

```{r}
head(DML5xFilteredDestrand04052019) #Confirm changes
```

```{r}
write_delim(DML5xFilteredDestrand04052019, "2019-04-05-DML-Destrand-5x-Locations.bed",  delim = '\t', col_names = FALSE) #Save data as a BED file
```

##### Hypermethylated DML

```{r}
DML5xFilteredDestrandHyper04302019 <- mutate(diffMethStats50FilteredCov5DestrandHyper, start = start -1, end = end + 1) %>% select(chr, start, end, meth.diff) %>% mutate_if(is.numeric, as.integer) #Save as a BED file, and avoid writing information in scientific notation
```

```{r}
head(DML5xFilteredDestrandHyper04302019) #Confirm changes
```

```{r}
write_delim(DML5xFilteredDestrandHyper04302019, "2019-04-05-DML-Destrand-5x-Locations-Hypermethylated.bed",  delim = '\t', col_names = FALSE) #Save data as a BED file
```

##### Hypomethyalted DML

```{r}
DML5xFilteredDestrandHypo04302019 <- mutate(diffMethStats50FilteredCov5DestrandHypo, start = start -1, end = end + 1) %>% select(chr, start, end, meth.diff) %>% mutate_if(is.numeric, as.integer) #Save as a BED file, and avoid writing information in scientific notation
```

```{r}
head(DML5xFilteredDestrandHypo04302019) #Confirm changes
```

```{r}
write_delim(DML5xFilteredDestrandHypo04302019, "2019-04-05-DML-Destrand-5x-Locations-Hypomethylated.bed",  delim = '\t', col_names = FALSE) #Save data as a BED file
```

## DMR Background

```{r}
head(methylationInformationTiles100) #I only want the columns with chromosome, start position, stop position, and strand.
```

```{r}
methylationInformationTiles100Reduced <- data.frame("chr" = methylationInformationTiles100$chr,
                                                    "start" = methylationInformationTiles100$start,
                                                    "stop" = methylationInformationTiles100$end,
                                                    "strand" = methylationInformationTiles100$strand) #Subset file 
head(methylationInformationTiles100Reduced) #Confirm subsetting
```

```{r}
write_delim(methylationInformationTiles100Reduced, "2019-06-05-Methylation-Information-Filtered-Destrand-Cov5-Tiles100.bed", delim = "\t", col_names = FALSE) #Write out all methylation information as a background to be used for gene enrichment analyses.
```

```{bash}
head 2019-06-05-Methylation-Information-Filtered-Destrand-Cov5-Tiles100.bed #Confirm formatting was retained in export
```

## DMRs

### 100 bp windows

```{r}
head(diffMethStats50Tiles100) #I only want the chr, start, end, strand, and meth.diff columns
```

```{r}
DMR06052019 <- select(diffMethStats50Tiles100, chr, start, end, strand, meth.diff) %>% mutate(start = start -1) %>% mutate_if(is.numeric, as.integer)  %>% mutate(TYPE ="DMR")  %>% select(chr, start, end, TYPE, meth.diff) #Reformat object so it can be saved as a BEDfile.
head(DMR06052019)
```

```{r}
write_delim(DMR06052019, "2019-06-05-DMR-Locations.bed",  delim = '\t', col_names = FALSE) #Save data as a BED file
```

##### Hypermethylated DML

```{r}
DMR5xFilteredDestrandHyper06052019 <- mutate(diffMethStats50FilteredCov5DestrandTiles100Hyper, start = start -1, end = end + 1) %>% select(chr, start, end, meth.diff) %>% mutate_if(is.numeric, as.integer) #Save as a BED file, and avoid writing information in scientific notation
```

```{r}
head(DMR5xFilteredDestrandHyper06052019) #Confirm changes
```

```{r}
write_delim(DMR5xFilteredDestrandHyper06052019, "2019-06-05-DMR-Destrand-5x-Locations-Tiles100-Hypermethylated.bed",  delim = '\t', col_names = FALSE) #Save data as a BED file
```

##### Hypomethyalted DML

```{r}
DMR5xFilteredDestrandHypo06052019 <- mutate(diffMethStats50FilteredCov5DestrandTiles100Hypo, start = start -1, end = end + 1) %>% select(chr, start, end, meth.diff) %>% mutate_if(is.numeric, as.integer) #Save as a BED file, and avoid writing information in scientific notation
```

```{r}
head(DMR5xFilteredDestrandHypo06052019) #Confirm changes
```

```{r}
write_delim(DMR5xFilteredDestrandHypo06052019, "2019-06-05-DMR-Destrand-5x-Locations-Tiles100-Hypomethylated.bed",  delim = '\t', col_names = FALSE) #Save data as a BED file
```

### 1000 bp windows

```{r}
DMR100006052019 <- select(diffMethStats50Tiles1000, chr, start, end, strand, meth.diff) %>% mutate(start = start -1) %>% mutate_if(is.numeric, as.integer)  %>% mutate(TYPE ="DMR")  %>% select(chr, start, end, TYPE, meth.diff) #Reformat object so it can be saved as a BEDfile.
head(DMR100006052019)
```

```{r}
write_delim(DMR100006052019, "2019-06-05-1000bp-DMR-Locations.bed",  delim = '\t', col_names = FALSE) #Save data as a BED file
```

### 500 bp windows

```{r}
DMR50006052019 <- select(diffMethStats50Tiles500, chr, start, end, strand, meth.diff) %>% mutate(start = start -1) %>% mutate_if(is.numeric, as.integer)  %>% mutate(TYPE ="DMR")  %>% select(chr, start, end, TYPE, meth.diff) #Reformat object so it can be saved as a BEDfile.
head(DMR50006052019)
```

```{r}
write_delim(DMR50006052019, "2019-06-05-500bp-DMR-Locations.bed",  delim = '\t', col_names = FALSE) #Save data as a BED file
```