--- title: "Untitled" author: "Shelly Trigg" date: "7/9/2020" output: html_document --- load libraries ```{r} library(data.table) library(ggplot2) library(ggpubr) ``` read in data ```{r} Mcap <- fread("https://gannet.fish.washington.edu/metacarcinus/FROGER_meth_compare/20200424/10-unionbedg/Mcap_union_5x.bedgraph") Pact <- fread("https://gannet.fish.washington.edu/metacarcinus/FROGER_meth_compare/20200424/10-unionbedg/Pact_union_5x.bedgraph") ``` convert N/A to NA ```{r} Mcap[Mcap == "N/A"] <- NA Pact[Pact == "N/A"] <- NA ``` filter for loci covered in all samples by removing lines with NA ```{r} Mcap_complete <- data.frame(Mcap[complete.cases(Mcap),]) Pact_complete <- data.frame(Pact[complete.cases(Pact),]) ``` see the structure of the df ```{r} str(Mcap_complete) str(Pact_complete) ``` The % methylation columns are character vectors because they initially contained "N/A" so convert all % methylation columns from character to numeric ```{r} Mcap_complete[,-c(1:3)] <- sapply(Mcap_complete[,-c(1:3)], as.numeric) Pact_complete[,-c(1:3)] <- sapply(Pact_complete[,-c(1:3)], as.numeric) ``` check the structure again ```{r} str(Mcap_complete) str(Pact_complete) ``` The conversion worked. create rownames that is the loci ID (chrom:start-stop) ```{r} row.names(Mcap_complete) <- paste0(Mcap_complete$chrom,":", Mcap_complete$start,"-",Mcap_complete$end) row.names(Pact_complete) <- paste0(Pact_complete$chrom,":", Pact_complete$start,"-",Pact_complete$end) ``` remove chrom, start, and end columns ```{r} Mcap_complete <- Mcap_complete[,-c(1:3)] Pact_complete <- Pact_complete[,-c(1:3)] ``` swap sample numbers for sample names in colnames ```{r} colnames(Mcap_complete) <- c("WGBS_1", "WGBS_2", "WGBS_3", "RRBS_1", "RRBS_2", "RRBS_3", "MBDBS_1", "MBDBS_2", "MBDBS_3") colnames(Pact_complete) <- c("WGBS_1", "WGBS_2", "WGBS_3", "RRBS_1", "RRBS_2", "RRBS_3", "MBDBS_1", "MBDBS_2", "MBDBS_3") ``` write out input files for QC ```{r} write.table(Mcap_complete, "../../Output/Mcap_union5xCpG_PCAinput.tsv", sep ="\t", quote = F) write.table(Mcap_complete, "../../Output/Pact_union5xCpG_PCAinput.tsv", sep ="\t", quote = F) ``` create a meta data df ```{r} Mcap_meta <- data.frame(row.names = colnames(Mcap_complete), Method = gsub("_.*","",colnames(Mcap_complete))) Pact_meta <- data.frame(row.names = colnames(Pact_complete), Method = gsub("_.*","",colnames(Pact_complete))) ``` run PCA ```{r} Mcap_pca <- prcomp(t(Mcap_complete)) Pact_pca <- prcomp(t(Pact_complete)) ``` add method for plotting ```{r} Mcap_pca_meta <- merge(Mcap_meta,Mcap_pca$x, by = "row.names") Pact_pca_meta <- merge(Pact_meta,Pact_pca$x, by = "row.names") ``` plot PC 1 and 2 scores ```{r} a <- ggplot(Mcap_pca_meta, aes(PC1, PC2)) + geom_point(aes(color = Method), size = 3) + scale_color_manual(values = c("#FD8D3C","#9E9AC8","#74C476")) + theme_bw() + ylab("PC2") + xlab("PC1") + ggtitle(expression(italic("M.capitata"))) b <- ggplot(Pact_pca_meta, aes(PC1, PC2)) + geom_point(aes(color = Method), size = 3) + scale_color_manual(values = c("#FD8D3C","#9E9AC8","#74C476")) + theme_bw() + ylab("PC2") + xlab("PC1") + ggtitle(expression(italic("P.acuta"))) jpeg("../../Output/PCA_union5xCpGloci.jpg", width = 10, height = 4, units = "in", res = 300) ggarrange(a,b, ncol = 2) dev.off() ```