R version 3.5.1 (2018-07-02) -- "Feather Spray" Copyright (C) 2018 The R Foundation for Statistical Computing Platform: x86_64-pc-linux-gnu (64-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. Natural language support but running in an English locale R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. Microsoft R Open 3.5.1 The enhanced R distribution from Microsoft Microsoft packages Copyright (C) 2018 Microsoft Corporation Using the Intel MKL for parallel mathematical computing (using 8 cores). Default CRAN mirror snapshot taken on 2018-08-01. See: https://mran.microsoft.com/. > sessionInfo() R version 3.5.1 (2018-07-02) Platform: x86_64-pc-linux-gnu (64-bit) Running under: Ubuntu 16.04.6 LTS Matrix products: default BLAS: /opt/microsoft/ropen/3.5.1/lib64/R/lib/libRblas.so LAPACK: /opt/microsoft/ropen/3.5.1/lib64/R/lib/libRlapack.so locale: [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8 [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8 [7] LC_PAPER=en_US.UTF-8 LC_NAME=C [9] LC_ADDRESS=C LC_TELEPHONE=C [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C attached base packages: [1] stats graphics grDevices utils datasets methods base other attached packages: [1] RevoUtils_11.0.1 RevoUtilsMath_11.0.0 loaded via a namespace (and not attached): [1] compiler_3.5.1 > > # load libraries > #```{r} > library(data.table) > library(dplyr) Attaching package: ‘dplyr’ The following objects are masked from ‘package:data.table’: between, first, last The following objects are masked from ‘package:stats’: filter, lag The following objects are masked from ‘package:base’: intersect, setdiff, setequal, union > library(ggplot2) > #install.packages("ggpubr") > #library(ggpubr) > library(tidyr) > #install.packages("psych") > #library(psych) > #``` > > #read in union bedgraph for Pact > #```{r} > > #Pact_meth <- read.table("Pact_union_Meth.bedgraph", header = T, sep = "\t", stringsAsFactors = F) > > #Pact_cov <- read.table("Pact_union_CpG_cov.bedgraph", header = T, sep = "\t", stringsAsFactors = F) > > #Mcap_meth <- read.table("Mcap_union_Meth.bedgraph", header = T, sep = "\t", stringsAsFactors = F) > > #Mcap_cov <- read.table("Mcap_union_Cov.bedgraph", header = T, sep = "\t", stringsAsFactors = F) > > #``` > > #convert unionBed file to long format > #```{r} > #Pact > #Pact_meth_l <- tidyr::gather(data.frame(Pact_meth),"Sample","perc_meth",4:12) > #Pact_cov_l <- tidyr::gather(data.frame(Pact_cov),"Sample","reads",4:12) > > #Mcap > #Mcap_meth_l <- tidyr::gather(data.frame(Mcap_meth),"Sample","perc_meth",4:12) > #Mcap_cov_l <- tidyr::gather(data.frame(Mcap_cov),"Sample","reads",4:12) > > #``` > > > #```{r} > #remove lines with NA (no data) > #Pact_cov_l[Pact_cov_l == "N/A"] <- NA > #Pact_meth_l[Pact_meth_l == "N/A"] <- NA > > #Pact_cov_l <- data.frame(Pact_cov_l[complete.cases(Pact_cov_l),]) > #Pact_meth_l <- data.frame(Pact_meth_l[complete.cases(Pact_meth_l),]) > > #edit create loci column with scaffold, start, and end as one string > #Pact_cov_l$loci <- paste0(Pact_cov_l$chrom,":", Pact_cov_l$start,"-",Pact_cov_l$end) > > #Pact_meth_l$loci <- paste0(Pact_meth_l$chrom,":", Pact_meth_l$start,"-",Pact_meth_l$end) > > #remove the scaffold, start, and end columns > #Pact_cov_l <- Pact_cov_l[,-c(1:3)] > #Pact_meth_l <- Pact_meth_l[,-c(1:3)] > > #remove the X in sample names > #Pact_cov_l$Sample <- gsub("X","", Pact_cov_l$Sample) > #Pact_meth_l$Sample <- gsub("X","", Pact_meth_l$Sample) > > #convery sample column to numeric > #Pact_cov_l$Sample <- as.numeric(as.character(Pact_cov_l$Sample)) > #Pact_meth_l$Sample <- as.numeric(as.character(Pact_meth_l$Sample)) > > #convert reads and methylation to numeric > #Pact_cov_l$reads <- as.numeric(as.character(Pact_cov_l$reads)) > #Pact_meth_l$perc_meth <- as.numeric(as.character(Pact_meth_l$perc_meth)) > > #add method column > #Pact_cov_l$method <- Pact_cov_l$Sample > #Pact_cov_l$method <- gsub("1|2|3","WGBS",Pact_cov_l$method) > #Pact_cov_l$method <- gsub("4|5|6","RRBS",Pact_cov_l$method) > #Pact_cov_l$method <- gsub("7|8|9","MBD",Pact_cov_l$method) > > #Pact_meth_l$method <- Pact_meth_l$Sample > #Pact_meth_l$method <- gsub("1|2|3","WGBS",Pact_meth_l$method) > #Pact_meth_l$method <- gsub("4|5|6","RRBS",Pact_meth_l$method) > #Pact_meth_l$method <- gsub("7|8|9","MBD",Pact_meth_l$method) > > #preview objects > #head(Pact_cov_l) > #head(Pact_meth_l) > > > ## Do the same for Mcap ## > > > #remove lines with NA (no data) > #Mcap_cov_l[Mcap_cov_l == "N/A"] <- NA > #Mcap_meth_l[Mcap_meth_l == "N/A"] <- NA > > #Mcap_cov_l <- data.frame(Mcap_cov_l[complete.cases(Mcap_cov_l),]) > #Mcap_meth_l <- data.frame(Mcap_meth_l[complete.cases(Mcap_meth_l),]) > > #edit create loci column with scaffold, start, and end as one string > #Mcap_cov_l$loci <- paste0(Mcap_cov_l$chrom,":", Mcap_cov_l$start,"-",Mcap_cov_l$end) > > #Mcap_meth_l$loci <- paste0(Mcap_meth_l$chrom,":", Mcap_meth_l$start,"-",Mcap_meth_l$end) > > #remove the scaffold, start, and end columns > #Mcap_cov_l <- Mcap_cov_l[,-c(1:3)] > #Mcap_meth_l <- Mcap_meth_l[,-c(1:3)] > > #remove the X in sample names > #Mcap_cov_l$Sample <- gsub("X","", Mcap_cov_l$Sample) > #Mcap_meth_l$Sample <- gsub("X","", Mcap_meth_l$Sample) > > #convery sample column to numeric > #Mcap_cov_l$Sample <- as.numeric(as.character(Mcap_cov_l$Sample)) > #Mcap_meth_l$Sample <- as.numeric(as.character(Mcap_meth_l$Sample)) > > #convert reads and methylation to numeric > #Mcap_cov_l$reads <- as.numeric(as.character(Mcap_cov_l$reads)) > #Mcap_meth_l$perc_meth <- as.numeric(as.character(Mcap_meth_l$perc_meth)) > > #add method column > #Mcap_cov_l$method <- Mcap_cov_l$Sample > #Mcap_cov_l$method <- gsub("1|2|3","WGBS",Mcap_cov_l$method) > #Mcap_cov_l$method <- gsub("4|5|6","RRBS",Mcap_cov_l$method) > #Mcap_cov_l$method <- gsub("7|8|9","MBD",Mcap_cov_l$method) > > #Mcap_meth_l$method <- Mcap_meth_l$Sample > #Mcap_meth_l$method <- gsub("1|2|3","WGBS",Mcap_meth_l$method) > #Mcap_meth_l$method <- gsub("4|5|6","RRBS",Mcap_meth_l$method) > #Mcap_meth_l$method <- gsub("7|8|9","MBD",Mcap_meth_l$method) > > #preview objects > #head(Mcap_cov_l) > #head(Mcap_meth_l) > > #``` > > > > #Make column to specify individual > #```{r} > > #Pact_cov_perc$individual <- Pact_cov_perc$Sample > #Pact_cov_perc$individual <- gsub("1|4|7","A",Pact_cov_perc$individual) > #Pact_cov_perc$individual <- gsub("2|5|8","B",Pact_cov_perc$individual) > #Pact_cov_perc$individual <- gsub("3|6|9","C",Pact_cov_perc$individual) > #``` > > # bin read coverage > #```{r} > # # specify interval/bin labels > # meth_tags <- c("[0-10%]","[10-20%]", "[20-30%]", "[30-40%]", "[40-50%]", "[50-60%]", "[60-70%]", "[70-80%]","[80-90%]", "[90-100%]") > # > # # specify interval/bin labels > # cov_tags <- c("[1-5]","[5-10]","[10-20]", "[20-30]", "[30-40]", "[40-50]", "[50-100]", "[100-500]", "[500-1000]", "[>1000]") > # > # # bucketing values into bins > # > # Pact_meth_tag <- as_tibble(Pact_meth_l) %>% mutate(meth_tag = case_when(perc_meth < 10 ~ meth_tags[1],perc_meth >= 10 & perc_meth < 20 ~ meth_tags[2],perc_meth >= 20 & perc_meth < 30 ~ meth_tags[3],perc_meth >= 30 & perc_meth < 40 ~ meth_tags[4],perc_meth >= 40 & perc_meth < 50 ~ meth_tags[5], perc_meth >= 50 & perc_meth < 60 ~ meth_tags[6], perc_meth >= 60 & perc_meth < 70 ~ meth_tags[7], perc_meth >= 70 & perc_meth < 80 ~ meth_tags[8], perc_meth >= 80 & perc_meth < 90 ~ meth_tags[9], perc_meth >= 90 & perc_meth <= 100 ~ meth_tags[10])) > # > # Pact_cov_tag <- as_tibble(Pact_cov_l) %>% mutate(cov_tag = case_when(reads < 5 ~ cov_tags[1],reads >= 5 & reads < 10 ~ cov_tags[2],reads >= 10 & reads < 20 ~ cov_tags[3],reads >= 20 & reads < 30 ~ cov_tags[4],reads >= 30 & reads < 40 ~ cov_tags[5], reads >= 40 & reads < 50 ~ cov_tags[6], reads >= 50 & reads < 100 ~ cov_tags[7], reads >=100 & reads < 500 ~ cov_tags[8], reads >=500 & reads < 1000 ~ cov_tags[9], reads >=1000 ~ cov_tags[10])) > #``` > > > #combine coverage and methylation data > #```{r} > #Pact_cov_perc <- merge(Pact_cov_l, Pact_meth_l, by = c("Sample", "loci", "method"), all = T) > > #head(Pact_cov_perc) > > #clean up environment > #rm(Pact_cov) > #rm(Pact_meth) > #rm(Pact_cov_l) > #rm(Pact_meth_l) > #rm(Mcap_cov) > #rm(Mcap_meth) > > #Mcap_cov_perc <- merge(Mcap_cov_l, Mcap_meth_l, by = c("Sample", "loci", "method"), all = T) > > #head(Mcap_cov_perc) > > #clean up environment > #rm(Mcap_cov_l) > #rm(Mcap_meth_l) > > #write out files > #write.csv(Mcap_cov_perc, "~/strigg/analyses/20200910/Mcap_cov_perc.txt", sep = "\t", row.names = F, quote = F) > > #write.csv(Pact_cov_perc, "~/strigg/analyses/20200910/Pact_cov_perc.txt", sep = "\t", row.names = F, quote = F) > > Mcap_cov_perc <- read.csv("~/strigg/analyses/20200910/Mcap_cov_perc.txt", stringsAsFactors = F, header = T) > > Pact_cov_perc <- read.csv("~/strigg/analyses/20200910/Pact_cov_perc.txt", stringsAsFactors = F, header = T) > > #``` > > #calculate median methylation and coverage > #```{r} > Pact_meth_stats <- Pact_cov_perc %>% group_by(loci,method) %>% summarise(medn_meth = median(perc_meth),medn_cov = median(reads),indivd = n()) > > Mcap_meth_stats <- Mcap_cov_perc %>% group_by(loci,method) %>% summarise(medn_meth = median(perc_meth),medn_cov = median(reads),indivd = n()) > > nrow(Pact_meth_stats) [1] 21876687 > str(Pact_meth_stats) Classes ‘grouped_df’, ‘tbl_df’, ‘tbl’ and 'data.frame': 21876687 obs. of 5 variables: $ loci : chr "scaffold1_cov55:102-104" "scaffold1_cov55:105-107" "scaffold1_cov55:116-118" "scaffold1_cov55:116-118" ... $ method : chr "WGBS" "WGBS" "MBD" "WGBS" ... $ medn_meth: num 0 0 0 0 0 0 0 0 0 0 ... $ medn_cov : num 6 6 1 5 1 5 1 5 1 4 ... $ indivd : int 3 3 1 3 1 3 1 3 2 3 ... - attr(*, "groups")=Classes ‘tbl_df’, ‘tbl’ and 'data.frame': 8683132 obs. of 2 variables: ..$ loci : chr "scaffold1_cov55:102-104" "scaffold1_cov55:105-107" "scaffold1_cov55:116-118" "scaffold1_cov55:119-121" ... ..$ .rows:List of 8683132 .. ..$ : int 1 .. ..$ : int 2 .. ..$ : int 3 4 .. ..$ : int 5 6 .. ..$ : int 7 8 .. ..$ : int 9 10 .. ..$ : int 11 12 .. ..$ : int 13 14 .. ..$ : int 15 16 .. ..$ : int 17 18 .. ..$ : int 19 20 .. ..$ : int 21 22 .. ..$ : int 23 24 .. ..$ : int 25 26 .. ..$ : int 27 28 .. ..$ : int 29 .. ..$ : int 30 .. ..$ : int 31 .. ..$ : int 32 33 .. ..$ : int 34 .. ..$ : int 35 .. ..$ : int 36 37 .. ..$ : int 38 39 .. ..$ : int 40 41 .. ..$ : int 42 43 44 .. ..$ : int 45 46 47 .. ..$ : int 48 49 50 .. ..$ : int 51 52 53 .. ..$ : int 54 55 56 .. ..$ : int 57 58 59 .. ..$ : int 60 61 62 .. ..$ : int 63 64 65 .. ..$ : int 66 67 68 .. ..$ : int 69 70 .. ..$ : int 71 72 .. ..$ : int 73 74 .. ..$ : int 75 76 .. ..$ : int 77 78 .. ..$ : int 79 80 .. ..$ : int 81 82 83 .. ..$ : int 84 85 86 .. ..$ : int 87 88 89 .. ..$ : int 90 91 92 .. ..$ : int 93 94 95 .. ..$ : int 96 97 98 .. ..$ : int 99 100 101 .. ..$ : int 102 103 .. ..$ : int 104 105 .. ..$ : int 106 107 108 .. ..$ : int 109 110 111 .. ..$ : int 112 113 114 .. ..$ : int 115 116 117 .. ..$ : int 118 119 120 .. ..$ : int 121 122 123 .. ..$ : int 124 125 126 .. ..$ : int 127 128 .. ..$ : int 129 130 .. ..$ : int 131 132 133 .. ..$ : int 134 135 136 .. ..$ : int 137 138 139 .. ..$ : int 140 141 142 .. ..$ : int 143 144 145 .. ..$ : int 146 147 148 .. ..$ : int 149 150 151 .. ..$ : int 152 153 154 .. ..$ : int 155 156 157 .. ..$ : int 158 159 160 .. ..$ : int 161 162 163 .. ..$ : int 164 165 166 .. ..$ : int 167 168 169 .. ..$ : int 170 171 172 .. ..$ : int 173 174 175 .. ..$ : int 176 177 178 .. ..$ : int 179 180 181 .. ..$ : int 182 183 184 .. ..$ : int 185 186 187 .. ..$ : int 188 189 190 .. ..$ : int 191 192 193 .. ..$ : int 194 195 196 .. ..$ : int 197 198 199 .. ..$ : int 200 201 202 .. ..$ : int 203 204 205 .. ..$ : int 206 207 208 .. ..$ : int 209 210 211 .. ..$ : int 212 213 214 .. ..$ : int 215 216 217 .. ..$ : int 218 219 220 .. ..$ : int 221 222 223 .. ..$ : int 224 225 226 .. ..$ : int 227 .. ..$ : int 228 229 230 .. ..$ : int 231 232 233 .. ..$ : int 234 235 236 .. ..$ : int 237 238 239 .. ..$ : int 240 241 242 .. ..$ : int 243 244 245 .. ..$ : int 246 247 248 .. ..$ : int 249 250 251 .. ..$ : int 252 253 254 .. .. [list output truncated] ..- attr(*, ".drop")= logi TRUE > head(Pact_meth_stats) # A tibble: 6 x 5 # Groups: loci [4] loci method medn_meth medn_cov indivd 1 scaffold1_cov55:102-104 WGBS 0 6 3 2 scaffold1_cov55:105-107 WGBS 0 6 3 3 scaffold1_cov55:116-118 MBD 0 1 1 4 scaffold1_cov55:116-118 WGBS 0 5 3 5 scaffold1_cov55:119-121 MBD 0 1 1 6 scaffold1_cov55:119-121 WGBS 0 5 3 > > nrow(Mcap_meth_stats) [1] 49635407 > str(Mcap_meth_stats) Classes ‘grouped_df’, ‘tbl_df’, ‘tbl’ and 'data.frame': 49635407 obs. of 5 variables: $ loci : chr "1:10000-10002" "1:10000-10002" "1:1000005-1000007" "1:1000005-1000007" ... $ method : chr "MBD" "WGBS" "MBD" "RRBS" ... $ medn_meth: num 50 0 0 0 0 0 0 0 0 0 ... $ medn_cov : num 2 3 1 3 2 1 2 1.5 1 2 ... $ indivd : int 1 1 2 1 3 2 3 2 1 3 ... - attr(*, "groups")=Classes ‘tbl_df’, ‘tbl’ and 'data.frame': 24968119 obs. of 2 variables: ..$ loci : chr "1:10000-10002" "1:1000005-1000007" "1:1000016-1000018" "1:1000029-1000031" ... ..$ .rows:List of 24968119 .. ..$ : int 1 2 .. ..$ : int 3 4 5 .. ..$ : int 6 7 .. ..$ : int 8 9 10 .. ..$ : int 11 12 13 .. ..$ : int 14 15 16 .. ..$ : int 17 18 19 .. ..$ : int 20 21 22 .. ..$ : int 23 24 25 .. ..$ : int 26 27 28 .. ..$ : int 29 30 .. ..$ : int 31 32 .. ..$ : int 33 34 .. ..$ : int 35 36 .. ..$ : int 37 38 .. ..$ : int 39 .. ..$ : int 40 .. ..$ : int 41 42 .. ..$ : int 43 44 .. ..$ : int 45 46 .. ..$ : int 47 48 .. ..$ : int 49 50 .. ..$ : int 51 52 53 .. ..$ : int 54 55 56 .. ..$ : int 57 58 59 .. ..$ : int 60 61 62 .. ..$ : int 63 64 65 .. ..$ : int 66 67 68 .. ..$ : int 69 70 71 .. ..$ : int 72 73 74 .. ..$ : int 75 76 77 .. ..$ : int 78 79 80 .. ..$ : int 81 82 83 .. ..$ : int 84 85 86 .. ..$ : int 87 88 89 .. ..$ : int 90 91 92 .. ..$ : int 93 94 95 .. ..$ : int 96 97 98 .. ..$ : int 99 100 101 .. ..$ : int 102 103 104 .. ..$ : int 105 106 107 .. ..$ : int 108 109 110 .. ..$ : int 111 112 113 .. ..$ : int 114 115 116 .. ..$ : int 117 118 119 .. ..$ : int 120 121 122 .. ..$ : int 123 124 125 .. ..$ : int 126 127 128 .. ..$ : int 129 130 131 .. ..$ : int 132 133 134 .. ..$ : int 135 136 137 .. ..$ : int 138 139 .. ..$ : int 140 141 .. ..$ : int 142 143 .. ..$ : int 144 145 146 .. ..$ : int 147 148 149 .. ..$ : int 150 151 152 .. ..$ : int 153 154 155 .. ..$ : int 156 157 158 .. ..$ : int 159 160 161 .. ..$ : int 162 163 164 .. ..$ : int 165 166 167 .. ..$ : int 168 169 170 .. ..$ : int 171 172 173 .. ..$ : int 174 175 176 .. ..$ : int 177 178 179 .. ..$ : int 180 181 182 .. ..$ : int 183 184 185 .. ..$ : int 186 187 188 .. ..$ : int 189 190 .. ..$ : int 191 192 193 .. ..$ : int 194 195 .. ..$ : int 196 197 198 .. ..$ : int 199 200 201 .. ..$ : int 202 203 .. ..$ : int 204 205 206 .. ..$ : int 207 208 209 .. ..$ : int 210 211 212 .. ..$ : int 213 214 215 .. ..$ : int 216 217 218 .. ..$ : int 219 220 221 .. ..$ : int 222 223 224 .. ..$ : int 225 226 227 .. ..$ : int 228 229 230 .. ..$ : int 231 232 233 .. ..$ : int 234 235 236 .. ..$ : int 237 238 239 .. ..$ : int 240 241 242 .. ..$ : int 243 244 245 .. ..$ : int 246 247 248 .. ..$ : int 249 250 251 .. ..$ : int 252 253 .. ..$ : int 254 255 .. ..$ : int 256 257 258 .. ..$ : int 259 260 261 .. ..$ : int 262 263 264 .. ..$ : int 265 266 267 .. ..$ : int 268 269 .. ..$ : int 270 271 .. .. [list output truncated] ..- attr(*, ".drop")= logi TRUE > head(Mcap_meth_stats) # A tibble: 6 x 5 # Groups: loci [3] loci method medn_meth medn_cov indivd 1 1:10000-10002 MBD 50 2 1 2 1:10000-10002 WGBS 0 3 1 3 1:1000005-1000007 MBD 0 1 2 4 1:1000005-1000007 RRBS 0 3 1 5 1:1000005-1000007 WGBS 0 2 3 6 1:1000016-1000018 MBD 0 1 2 > > > #``` > > #add labels for % meth and coverage bins > #```{r} > # specify % meth interval bin labels > meth_tags <- c("[0-10%]","[10-20%]", "[20-30%]", "[30-40%]", "[40-50%]", "[50-60%]", "[60-70%]", "[70-80%]","[80-90%]", "[90-100%]") > > # specify coverage bin labels > cov_tags <- c("[1-5]","[5-10]","[10-20]", "[20-30]", "[30-40]", "[40-50]", "[50-100]", "[100-500]", "[500-1000]", "[>1000]") > > # bucketing values into bins > > Pact_meth_stats_tag <- as_tibble(Pact_meth_stats) %>% mutate(meth_tag = case_when(medn_meth < 10 ~ meth_tags[1],medn_meth >= 10 & medn_meth < 20 ~ meth_tags[2],medn_meth >= 20 & medn_meth < 30 ~ meth_tags[3],medn_meth >= 30 & medn_meth < 40 ~ meth_tags[4],medn_meth >= 40 & medn_meth < 50 ~ meth_tags[5], medn_meth >= 50 & medn_meth < 60 ~ meth_tags[6], medn_meth >= 60 & medn_meth < 70 ~ meth_tags[7], medn_meth >= 70 & medn_meth < 80 ~ meth_tags[8], medn_meth >= 80 & medn_meth < 90 ~ meth_tags[9], medn_meth >= 90 & medn_meth <= 100 ~ meth_tags[10]), cov_tag = case_when(medn_cov < 5 ~ cov_tags[1],medn_cov >= 5 & medn_cov < 10 ~ cov_tags[2],medn_cov >= 10 & medn_cov < 20 ~ cov_tags[3],medn_cov >= 20 & medn_cov < 30 ~ cov_tags[4],medn_cov >= 30 & medn_cov < 40 ~ cov_tags[5], medn_cov >= 40 & medn_cov < 50 ~ cov_tags[6], medn_cov >= 50 & medn_cov < 100 ~ cov_tags[7], medn_cov >=100 & medn_cov < 500 ~ cov_tags[8], medn_cov >=500 & medn_cov < 1000 ~ cov_tags[9], medn_cov >=1000 ~ cov_tags[10])) > > > head(Pact_meth_stats_tag) # A tibble: 6 x 7 loci method medn_meth medn_cov indivd meth_tag cov_tag 1 scaffold1_cov55:102-104 WGBS 0 6 3 [0-10%] [5-10] 2 scaffold1_cov55:105-107 WGBS 0 6 3 [0-10%] [5-10] 3 scaffold1_cov55:116-118 MBD 0 1 1 [0-10%] [1-5] 4 scaffold1_cov55:116-118 WGBS 0 5 3 [0-10%] [5-10] 5 scaffold1_cov55:119-121 MBD 0 1 1 [0-10%] [1-5] 6 scaffold1_cov55:119-121 WGBS 0 5 3 [0-10%] [5-10] > nrow(Pact_meth_stats_tag) [1] 21876687 > > #Mcap > Mcap_meth_stats_tag <- as_tibble(Mcap_meth_stats) %>% mutate(meth_tag = case_when(medn_meth < 10 ~ meth_tags[1],medn_meth >= 10 & medn_meth < 20 ~ meth_tags[2],medn_meth >= 20 & medn_meth < 30 ~ meth_tags[3],medn_meth >= 30 & medn_meth < 40 ~ meth_tags[4],medn_meth >= 40 & medn_meth < 50 ~ meth_tags[5], medn_meth >= 50 & medn_meth < 60 ~ meth_tags[6], medn_meth >= 60 & medn_meth < 70 ~ meth_tags[7], medn_meth >= 70 & medn_meth < 80 ~ meth_tags[8], medn_meth >= 80 & medn_meth < 90 ~ meth_tags[9], medn_meth >= 90 & medn_meth <= 100 ~ meth_tags[10]), cov_tag = case_when(medn_cov < 5 ~ cov_tags[1],medn_cov >= 5 & medn_cov < 10 ~ cov_tags[2],medn_cov >= 10 & medn_cov < 20 ~ cov_tags[3],medn_cov >= 20 & medn_cov < 30 ~ cov_tags[4],medn_cov >= 30 & medn_cov < 40 ~ cov_tags[5], medn_cov >= 40 & medn_cov < 50 ~ cov_tags[6], medn_cov >= 50 & medn_cov < 100 ~ cov_tags[7], medn_cov >=100 & medn_cov < 500 ~ cov_tags[8], medn_cov >=500 & medn_cov < 1000 ~ cov_tags[9], medn_cov >=1000 ~ cov_tags[10])) > > > head(Mcap_meth_stats_tag) # A tibble: 6 x 7 loci method medn_meth medn_cov indivd meth_tag cov_tag 1 1:10000-10002 MBD 50 2 1 [50-60%] [1-5] 2 1:10000-10002 WGBS 0 3 1 [0-10%] [1-5] 3 1:1000005-1000007 MBD 0 1 2 [0-10%] [1-5] 4 1:1000005-1000007 RRBS 0 3 1 [0-10%] [1-5] 5 1:1000005-1000007 WGBS 0 2 3 [0-10%] [1-5] 6 1:1000016-1000018 MBD 0 1 2 [0-10%] [1-5] > nrow(Mcap_meth_stats_tag) [1] 49635407 > #``` > > > #create df for loci where all three individuals per method show coverage > #```{r} > Pact_meth_stats_tag_3x <- Pact_meth_stats_tag[which(Pact_meth_stats_tag$indivd == 3),] > > Mcap_meth_stats_tag_3x <- Mcap_meth_stats_tag[which(Mcap_meth_stats_tag$indivd == 3),] > #``` > > > #add group column > #```{r} > #create function for converting list to string > uniqMethod.FUN <- function(x){paste(sort(unique(unlist(strsplit(x, split =", "), use.names = F))), collapse = ", ")} > > #store methods that loci were detected in as a comma separated list > #Pact_cov_perc_group <- Pact_meth_stats_tag %>% group_by(loci) %>% mutate(group = toString(method)) > > ##converting group class (list to string) > #Pact_cov_perc_group$group_simp <- sapply(Pact_cov_perc_group$group, uniqMethod.FUN, USE.NAMES = F) > > #do the same for 3x > #store methods that loci were detected in as a comma separated list > Pact_cov_perc_group_3x <- Pact_meth_stats_tag_3x %>% group_by(loci) %>% mutate(group = toString(method)) > > ##converting group class (list to string) > Pact_cov_perc_group_3x$group_simp <- sapply(Pact_cov_perc_group_3x$group, uniqMethod.FUN, USE.NAMES = F) > > #check df integrity > #str(Pact_cov_perc_group) > #table(Pact_cov_perc_group$group_simp) > #nrow(Pact_cov_perc_group) > > str(Pact_cov_perc_group_3x) Classes ‘grouped_df’, ‘tbl_df’, ‘tbl’ and 'data.frame': 13789562 obs. of 9 variables: $ loci : chr "scaffold1_cov55:102-104" "scaffold1_cov55:105-107" "scaffold1_cov55:116-118" "scaffold1_cov55:119-121" ... $ method : chr "WGBS" "WGBS" "WGBS" "WGBS" ... $ medn_meth : num 0 0 0 0 0 0 0 0 0 0 ... $ medn_cov : num 6 6 5 5 5 4 5 5 5 4 ... $ indivd : int 3 3 3 3 3 3 3 3 3 3 ... $ meth_tag : chr "[0-10%]" "[0-10%]" "[0-10%]" "[0-10%]" ... $ cov_tag : chr "[5-10]" "[5-10]" "[5-10]" "[5-10]" ... $ group : chr "WGBS" "WGBS" "WGBS" "WGBS" ... $ group_simp: chr "WGBS" "WGBS" "WGBS" "WGBS" ... - attr(*, "groups")=Classes ‘tbl_df’, ‘tbl’ and 'data.frame': 7635352 obs. of 2 variables: ..$ loci : chr "scaffold1_cov55:102-104" "scaffold1_cov55:105-107" "scaffold1_cov55:116-118" "scaffold1_cov55:119-121" ... ..$ .rows:List of 7635352 .. ..$ : int 1 .. ..$ : int 2 .. ..$ : int 3 .. ..$ : int 4 .. ..$ : int 5 .. ..$ : int 6 .. ..$ : int 7 .. ..$ : int 8 .. ..$ : int 9 .. ..$ : int 10 .. ..$ : int 11 .. ..$ : int 12 .. ..$ : int 13 .. ..$ : int 14 .. ..$ : int 15 .. ..$ : int 16 .. ..$ : int 17 .. ..$ : int 18 .. ..$ : int 19 .. ..$ : int 20 .. ..$ : int 21 .. ..$ : int 22 .. ..$ : int 23 .. ..$ : int 24 .. ..$ : int 25 .. ..$ : int 26 .. ..$ : int 27 .. ..$ : int 28 .. ..$ : int 29 .. ..$ : int 30 .. ..$ : int 31 .. ..$ : int 32 .. ..$ : int 33 .. ..$ : int 34 .. ..$ : int 35 36 .. ..$ : int 37 38 .. ..$ : int 39 40 .. ..$ : int 41 .. ..$ : int 42 .. ..$ : int 43 .. ..$ : int 44 45 .. ..$ : int 46 .. ..$ : int 47 48 .. ..$ : int 49 50 .. ..$ : int 51 .. ..$ : int 52 .. ..$ : int 53 .. ..$ : int 54 .. ..$ : int 55 .. ..$ : int 56 .. ..$ : int 57 .. ..$ : int 58 59 .. ..$ : int 60 61 .. ..$ : int 62 63 .. ..$ : int 64 65 66 .. ..$ : int 67 68 69 .. ..$ : int 70 71 72 .. ..$ : int 73 74 75 .. ..$ : int 76 77 78 .. ..$ : int 79 80 .. ..$ : int 81 82 .. ..$ : int 83 84 85 .. ..$ : int 86 87 88 .. ..$ : int 89 90 .. ..$ : int 91 92 93 .. ..$ : int 94 95 96 .. ..$ : int 97 .. ..$ : int 98 99 100 .. ..$ : int 101 102 103 .. ..$ : int 104 105 106 .. ..$ : int 107 108 109 .. ..$ : int 110 111 .. ..$ : int 112 113 .. ..$ : int 114 115 .. ..$ : int 116 .. ..$ : int 117 .. ..$ : int 118 .. ..$ : int 119 .. ..$ : int 120 121 .. ..$ : int 122 .. ..$ : int 123 124 .. ..$ : int 125 126 .. ..$ : int 127 128 .. ..$ : int 129 130 .. ..$ : int 131 132 .. ..$ : int 133 134 .. ..$ : int 135 136 .. ..$ : int 137 138 .. ..$ : int 139 140 .. ..$ : int 141 142 .. ..$ : int 143 .. ..$ : int 144 145 .. ..$ : int 146 147 .. ..$ : int 148 149 .. ..$ : int 150 .. ..$ : int 151 152 .. ..$ : int 153 .. ..$ : int 154 .. ..$ : int 155 .. .. [list output truncated] ..- attr(*, ".drop")= logi TRUE > table(Pact_cov_perc_group_3x$group_simp) MBD MBD, RRBS MBD, RRBS, WGBS MBD, WGBS RRBS 72088 19916 3930603 5479488 65636 RRBS, WGBS WGBS 1568212 2653619 > nrow(Pact_cov_perc_group_3x) [1] 13789562 > > ## Mcap ## > > #store methods that loci were detected in as a comma separated list > > #do the same for 3x > #store methods that loci were detected in as a comma separated list > Mcap_cov_perc_group_3x <- Mcap_meth_stats_tag_3x %>% group_by(loci) %>% mutate(group = toString(method)) > > ##converting group class (list to string) > Mcap_cov_perc_group_3x$group_simp <- sapply(Mcap_cov_perc_group_3x$group, uniqMethod.FUN, USE.NAMES = F) > > > str(Mcap_cov_perc_group_3x) Classes ‘grouped_df’, ‘tbl_df’, ‘tbl’ and 'data.frame': 18675688 obs. of 9 variables: $ loci : chr "1:1000005-1000007" "1:1000016-1000018" "1:1000029-1000031" "1:1000053-1000055" ... $ method : chr "WGBS" "WGBS" "WGBS" "MBD" ... $ medn_meth : num 0 0 0 0 0 0 0 0 0 0 ... $ medn_cov : num 2 2 2 1 2 5 2 5 6 3 ... $ indivd : int 3 3 3 3 3 3 3 3 3 3 ... $ meth_tag : chr "[0-10%]" "[0-10%]" "[0-10%]" "[0-10%]" ... $ cov_tag : chr "[1-5]" "[1-5]" "[1-5]" "[1-5]" ... $ group : chr "WGBS" "WGBS" "WGBS" "MBD" ... $ group_simp: chr "WGBS" "WGBS" "WGBS" "MBD" ... - attr(*, "groups")=Classes ‘tbl_df’, ‘tbl’ and 'data.frame': 14148738 obs. of 2 variables: ..$ loci : chr "1:1000005-1000007" "1:1000016-1000018" "1:1000029-1000031" "1:1000053-1000055" ... ..$ .rows:List of 14148738 .. ..$ : int 1 .. ..$ : int 2 .. ..$ : int 3 .. ..$ : int 4 .. ..$ : int 5 6 .. ..$ : int 7 8 .. ..$ : int 9 .. ..$ : int 10 .. ..$ : int 11 .. ..$ : int 12 13 .. ..$ : int 14 15 .. ..$ : int 16 17 .. ..$ : int 18 19 .. ..$ : int 20 .. ..$ : int 21 .. ..$ : int 22 .. ..$ : int 23 .. ..$ : int 24 .. ..$ : int 25 .. ..$ : int 26 .. ..$ : int 27 .. ..$ : int 28 .. ..$ : int 29 .. ..$ : int 30 .. ..$ : int 31 .. ..$ : int 32 .. ..$ : int 33 .. ..$ : int 34 .. ..$ : int 35 .. ..$ : int 36 37 .. ..$ : int 38 .. ..$ : int 39 40 .. ..$ : int 41 42 .. ..$ : int 43 44 .. ..$ : int 45 46 47 .. ..$ : int 48 49 50 .. ..$ : int 51 52 53 .. ..$ : int 54 55 56 .. ..$ : int 57 58 59 .. ..$ : int 60 61 62 .. ..$ : int 63 64 .. ..$ : int 65 66 .. ..$ : int 67 68 69 .. ..$ : int 70 71 72 .. ..$ : int 73 74 75 .. ..$ : int 76 77 78 .. ..$ : int 79 80 .. ..$ : int 81 82 .. ..$ : int 83 84 .. ..$ : int 85 86 87 .. ..$ : int 88 89 .. ..$ : int 90 91 .. ..$ : int 92 93 .. ..$ : int 94 .. ..$ : int 95 96 .. ..$ : int 97 98 .. ..$ : int 99 100 .. ..$ : int 101 102 .. ..$ : int 103 104 .. ..$ : int 105 106 .. ..$ : int 107 108 .. ..$ : int 109 110 .. ..$ : int 111 112 .. ..$ : int 113 114 .. ..$ : int 115 116 .. ..$ : int 117 118 .. ..$ : int 119 120 .. ..$ : int 121 122 .. ..$ : int 123 .. ..$ : int 124 125 126 .. ..$ : int 127 128 .. ..$ : int 129 130 131 .. ..$ : int 132 133 134 .. ..$ : int 135 136 137 .. ..$ : int 138 139 140 .. ..$ : int 141 142 143 .. ..$ : int 144 145 .. ..$ : int 146 147 .. ..$ : int 148 149 .. ..$ : int 150 151 152 .. ..$ : int 153 154 155 .. ..$ : int 156 157 158 .. ..$ : int 159 160 .. ..$ : int 161 162 .. ..$ : int 163 164 165 .. ..$ : int 166 167 168 .. ..$ : int 169 170 171 .. ..$ : int 172 173 174 .. ..$ : int 175 176 177 .. ..$ : int 178 179 .. ..$ : int 180 181 .. ..$ : int 182 .. ..$ : int 183 .. ..$ : int 184 .. ..$ : int 185 186 .. ..$ : int 187 .. ..$ : int 188 .. ..$ : int 189 .. ..$ : int 190 .. .. [list output truncated] ..- attr(*, ".drop")= logi TRUE > table(Mcap_cov_perc_group_3x$group_simp) MBD MBD, RRBS MBD, RRBS, WGBS MBD, WGBS RRBS 255193 69518 1368324 4168592 567846 RRBS, WGBS WGBS 2991358 9254857 > nrow(Mcap_cov_perc_group_3x) [1] 18675688 > #``` > > #clean up environment > rm(Pact_cov_perc) > rm(Mcap_cov_perc) > rm(Pact_meth_stats) > rm(Mcap_meth_stats) > rm(Pact_meth_stats_tag) > rm(Mcap_meth_stats_tag) > rm(Pact_meth_stats_tag_3x) > rm(Mcap_meth_stats_tag_3x) > > #write out *cov_perc_group_3x data > write.csv(Mcap_cov_perc_group_3x, "~/strigg/analyses/20200910/Mcap_cov_perc_group_3x.csv", row.names = F, quote = F) > write.csv(Pact_cov_perc_group_3x, "~/strigg/analyses/20200910/Pact_cov_perc_group_3x.csv", row.names = F, quote = F) > > > > #summarize data > #```{r} > #collapse loci and just count the number of loci for each methylation and coverage bin > #Pact_cov_perc_STACKED <- data.frame(Pact_cov_perc_group) %>% group_by(group_simp, method,meth_tag, cov_tag) %>% summarise(count=n()) > > > # do the same for 3x loci > #Pact_cov_perc_3x_STACKED <- data.frame(Pact_cov_perc_group_3x) %>% group_by(group_simp, method, meth_tag, cov_tag) %>% summarise(count=n()) > > > #Pact_cov_perc_STACKED %>% group_by(group_simp) %>% summarise(method_total_count = sum(count)) > > # calculate the percent CpGs for each > #Pact_cov_perc_STACKED <- Pact_cov_perc_STACKED %>% group_by(group_simp,method) %>% mutate(perc_CpG=count/sum(count)) > > #Pact_cov_perc_3x_STACKED <- Pact_cov_perc_3x_STACKED %>% group_by(group_simp,method) %>% mutate(perc_CpG=count/sum(count)) > > #``` > > #create a df with MBD vs. WGBS data > #```{r} > #remove RRBS data > Pact_cov_perc_group_3x_mbd_rel <- Pact_cov_perc_group_3x[which(Pact_cov_perc_group_3x$method != "RRBS"),] > #subset WGBS groups > Pact_cov_perc_group_3x_mbd_rel <- Pact_cov_perc_group_3x_mbd_rel[grep("WGBS", Pact_cov_perc_group_3x_mbd_rel$group_simp),] > > > #set cov_tag to zero for CpGs not covered by MBD but covered by WGBS > Pact_cov_perc_group_3x_mbd_rel$mbd_cov_tag <- ifelse(Pact_cov_perc_group_3x_mbd_rel$method == "WGBS" & !grepl("MBD",Pact_cov_perc_group_3x_mbd_rel$group_simp),"[0]", Pact_cov_perc_group_3x_mbd_rel$cov_tag) > > #set WGBS cov_tag to NA for CpGs covered by WGBS and by MBD since we're gonna use the MBD cov_tags > Pact_cov_perc_group_3x_mbd_rel$mbd_cov_tag <- ifelse(Pact_cov_perc_group_3x_mbd_rel$method == "WGBS" & grepl("MBD",Pact_cov_perc_group_3x_mbd_rel$group_simp),NA, Pact_cov_perc_group_3x_mbd_rel$mbd_cov_tag) > > #Set methylation to NA for CpGs covered by MBD > Pact_cov_perc_group_3x_mbd_rel$wgbs_meth_tag <- ifelse(Pact_cov_perc_group_3x_mbd_rel$method == "WGBS",Pact_cov_perc_group_3x_mbd_rel$meth_tag,NA) > > > #remove all columns except loci, group, mbd_cov_tag, and wgbs_meth_tag > #convert to long format and filter out rows with NAs > #convert to wide format so that only mbd_cov_tag and wgbs_meth_tag values remain > Pact_cov_perc_group_3x_mbd_rel_reshape <- Pact_cov_perc_group_3x_mbd_rel[,c("loci","group_simp", "mbd_cov_tag", "wgbs_meth_tag")] %>% gather("tag", "value", mbd_cov_tag:wgbs_meth_tag) %>% na.omit() %>% spread("tag", "value") > > > table(Pact_cov_perc_group_3x_mbd_rel_reshape$group_simp) MBD, RRBS, WGBS MBD, WGBS RRBS, WGBS WGBS 1310201 2739744 784106 2653619 > > #summarize the data for plotting > Pact_cov_perc_group_3x_mbd_rel_sum <- Pact_cov_perc_group_3x_mbd_rel_reshape %>% group_by(mbd_cov_tag, wgbs_meth_tag) %>% summarise(CpGs =n()) > > jpeg("~/strigg/analyses/20200910/MBDcov_x_WGBSmeth_heatmap_Pact.jpg", width = 6, height = 5, units = "in", res = 300 ) > ggplot(data = Pact_cov_perc_group_3x_mbd_rel_sum, aes(x=wgbs_meth_tag, y=factor(mbd_cov_tag, levels = c("[0]",cov_tags)), fill=log(CpGs,10))) + geom_tile() + theme(axis.text.x = element_text(size = 7, angle = 60, hjust = 1)) + labs(x = "% methylation\n(from WGBS data)", y = "number of reads\n(from MBD data)", fill = "number of\nCpGs (log10)") + scale_fill_continuous(high = "#132B43", low = "#b5dfff") > dev.off() null device 1 > > > ## Mcap ## > > > #remove RRBS data > Mcap_cov_perc_group_3x_mbd_rel <- Mcap_cov_perc_group_3x[which(Mcap_cov_perc_group_3x$method != "RRBS"),] > #subset WGBS groups > Mcap_cov_perc_group_3x_mbd_rel <- Mcap_cov_perc_group_3x_mbd_rel[grep("WGBS", Mcap_cov_perc_group_3x_mbd_rel$group_simp),] > > > #set cov_tag to zero for CpGs not covered by MBD but covered by WGBS > Mcap_cov_perc_group_3x_mbd_rel$mbd_cov_tag <- ifelse(Mcap_cov_perc_group_3x_mbd_rel$method == "WGBS" & !grepl("MBD",Mcap_cov_perc_group_3x_mbd_rel$group_simp),"[0]", Mcap_cov_perc_group_3x_mbd_rel$cov_tag) > > #set WGBS cov_tag to NA for CpGs covered by WGBS and by MBD since we're gonna use the MBD cov_tags > Mcap_cov_perc_group_3x_mbd_rel$mbd_cov_tag <- ifelse(Mcap_cov_perc_group_3x_mbd_rel$method == "WGBS" & grepl("MBD",Mcap_cov_perc_group_3x_mbd_rel$group_simp),NA, Mcap_cov_perc_group_3x_mbd_rel$mbd_cov_tag) > > #Set methylation to NA for CpGs covered by MBD > Mcap_cov_perc_group_3x_mbd_rel$wgbs_meth_tag <- ifelse(Mcap_cov_perc_group_3x_mbd_rel$method == "WGBS",Mcap_cov_perc_group_3x_mbd_rel$meth_tag,NA) > > > #remove all columns except loci, group, mbd_cov_tag, and wgbs_meth_tag > #convert to long format and filter out rows with NAs > #convert to wide format so that only mbd_cov_tag and wgbs_meth_tag values remain > Mcap_cov_perc_group_3x_mbd_rel_reshape <- Mcap_cov_perc_group_3x_mbd_rel[,c("loci","group_simp", "mbd_cov_tag", "wgbs_meth_tag")] %>% gather("tag", "value", mbd_cov_tag:wgbs_meth_tag) %>% na.omit() %>% spread("tag", "value") > > > table(Mcap_cov_perc_group_3x_mbd_rel_reshape$group_simp) MBD, RRBS, WGBS MBD, WGBS RRBS, WGBS WGBS 456108 2084296 1495679 9254857 > > #summarize the data for plotting > Mcap_cov_perc_group_3x_mbd_rel_sum <- Mcap_cov_perc_group_3x_mbd_rel_reshape %>% group_by(mbd_cov_tag, wgbs_meth_tag) %>% summarise(CpGs =n()) > > jpeg("~/strigg/analyses/20200910/MBDcov_x_WGBSmeth_heatmap_Mcap.jpg", width = 6, height = 5, units = "in", res = 300 ) > ggplot(data = Mcap_cov_perc_group_3x_mbd_rel_sum, aes(x=wgbs_meth_tag, y=factor(mbd_cov_tag, levels = c("[0]",cov_tags)), fill=log(CpGs,10))) + geom_tile() + theme(axis.text.x = element_text(size = 7, angle = 60, hjust = 1)) + labs(x = "% methylation\n(from WGBS data)", y = "number of reads\n(from MBD data)", fill = "number of\nCpGs (log10)") + scale_fill_continuous(high = "#132B43", low = "#b5dfff") > dev.off() null device 1 > #``` > > #filter for loci covered by WGBS at 5x, 10x, 50x, 100x > #```{r} > loci_lessthan5x <- Pact_cov_perc_group_3x_mbd_rel[which(Pact_cov_perc_group_3x_mbd_rel$method=="WGBS" & Pact_cov_perc_group_3x_mbd_rel$cov_tag == "[1-5]"),"loci"] > > loci_lessthan10x <- Pact_cov_perc_group_3x_mbd_rel[which(Pact_cov_perc_group_3x_mbd_rel$method=="WGBS" & Pact_cov_perc_group_3x_mbd_rel$cov_tag == "[1-5]" | Pact_cov_perc_group_3x_mbd_rel$method=="WGBS" & Pact_cov_perc_group_3x_mbd_rel$cov_tag == "[5-10]" ),"loci"] > > loci_lessthan20x <- Pact_cov_perc_group_3x_mbd_rel[which(Pact_cov_perc_group_3x_mbd_rel$method=="WGBS" & Pact_cov_perc_group_3x_mbd_rel$cov_tag == "[1-5]" | Pact_cov_perc_group_3x_mbd_rel$method=="WGBS" & Pact_cov_perc_group_3x_mbd_rel$cov_tag == "[5-10]" | Pact_cov_perc_group_3x_mbd_rel$method=="WGBS" & Pact_cov_perc_group_3x_mbd_rel$cov_tag == "[10-20]"),"loci"] > > loci_lessthan30x <- Pact_cov_perc_group_3x_mbd_rel[which(Pact_cov_perc_group_3x_mbd_rel$method=="WGBS" & Pact_cov_perc_group_3x_mbd_rel$cov_tag == "[1-5]" | Pact_cov_perc_group_3x_mbd_rel$method=="WGBS" & Pact_cov_perc_group_3x_mbd_rel$cov_tag == "[5-10]" | Pact_cov_perc_group_3x_mbd_rel$method=="WGBS" & Pact_cov_perc_group_3x_mbd_rel$cov_tag == "[10-20]" | Pact_cov_perc_group_3x_mbd_rel$method=="WGBS" & Pact_cov_perc_group_3x_mbd_rel$cov_tag == "[20-30]"),"loci"] > > loci_lessthan40x <- Pact_cov_perc_group_3x_mbd_rel[which(Pact_cov_perc_group_3x_mbd_rel$method=="WGBS" & Pact_cov_perc_group_3x_mbd_rel$cov_tag == "[1-5]" | Pact_cov_perc_group_3x_mbd_rel$method=="WGBS" & Pact_cov_perc_group_3x_mbd_rel$cov_tag == "[5-10]" | Pact_cov_perc_group_3x_mbd_rel$method=="WGBS" & Pact_cov_perc_group_3x_mbd_rel$cov_tag == "[10-20]" | Pact_cov_perc_group_3x_mbd_rel$method=="WGBS" & Pact_cov_perc_group_3x_mbd_rel$cov_tag == "[20-30]" | Pact_cov_perc_group_3x_mbd_rel$method=="WGBS" & Pact_cov_perc_group_3x_mbd_rel$cov_tag == "[30-40]"),"loci"] > > loci_greaterthan50x <- Pact_cov_perc_group_3x_mbd_rel[which(Pact_cov_perc_group_3x_mbd_rel$method=="WGBS" & Pact_cov_perc_group_3x_mbd_rel$cov_tag == "[50-100]" | Pact_cov_perc_group_3x_mbd_rel$method=="WGBS" & Pact_cov_perc_group_3x_mbd_rel$cov_tag == "[100-500]" | Pact_cov_perc_group_3x_mbd_rel$method=="WGBS" & Pact_cov_perc_group_3x_mbd_rel$cov_tag == "[500-1000]" | Pact_cov_perc_group_3x_mbd_rel$method=="WGBS" & Pact_cov_perc_group_3x_mbd_rel$cov_tag == "[>1000]"),"loci"] > > loci_greaterthan100x <- Pact_cov_perc_group_3x_mbd_rel[which(Pact_cov_perc_group_3x_mbd_rel$method=="WGBS" & Pact_cov_perc_group_3x_mbd_rel$cov_tag == "[100-500]" | Pact_cov_perc_group_3x_mbd_rel$method=="WGBS" & Pact_cov_perc_group_3x_mbd_rel$cov_tag == "[500-1000]" | Pact_cov_perc_group_3x_mbd_rel$method=="WGBS" & Pact_cov_perc_group_3x_mbd_rel$cov_tag == "[>1000]"),"loci"] > > > Pact_cov_perc_group_3x_mbd_rel_reshape_wgbs5x <- Pact_cov_perc_group_3x_mbd_rel[which(!(Pact_cov_perc_group_3x_mbd_rel$loci %in% loci_lessthan5x$loci)),c("loci","group_simp", "mbd_cov_tag", "wgbs_meth_tag")] %>% gather("tag", "value", mbd_cov_tag:wgbs_meth_tag) %>% na.omit() %>% spread("tag", "value") > > Pact_cov_perc_group_3x_mbd_rel_reshape_wgbs10x <- Pact_cov_perc_group_3x_mbd_rel[which(!(Pact_cov_perc_group_3x_mbd_rel$loci %in% loci_lessthan10x$loci)),c("loci","group_simp", "mbd_cov_tag", "wgbs_meth_tag")] %>% gather("tag", "value", mbd_cov_tag:wgbs_meth_tag) %>% na.omit() %>% spread("tag", "value") > > Pact_cov_perc_group_3x_mbd_rel_reshape_wgbs20x <- Pact_cov_perc_group_3x_mbd_rel[which(!(Pact_cov_perc_group_3x_mbd_rel$loci %in% loci_lessthan20x$loci)),c("loci","group_simp", "mbd_cov_tag", "wgbs_meth_tag")] %>% gather("tag", "value", mbd_cov_tag:wgbs_meth_tag) %>% na.omit() %>% spread("tag", "value") > > Pact_cov_perc_group_3x_mbd_rel_reshape_wgbs30x <- Pact_cov_perc_group_3x_mbd_rel[which(!(Pact_cov_perc_group_3x_mbd_rel$loci %in% loci_lessthan30x$loci)),c("loci","group_simp", "mbd_cov_tag", "wgbs_meth_tag")] %>% gather("tag", "value", mbd_cov_tag:wgbs_meth_tag) %>% na.omit() %>% spread("tag", "value") > > Pact_cov_perc_group_3x_mbd_rel_reshape_wgbs40x <- Pact_cov_perc_group_3x_mbd_rel[which(!(Pact_cov_perc_group_3x_mbd_rel$loci %in% loci_lessthan40x$loci)),c("loci","group_simp", "mbd_cov_tag", "wgbs_meth_tag")] %>% gather("tag", "value", mbd_cov_tag:wgbs_meth_tag) %>% na.omit() %>% spread("tag", "value") > > Pact_cov_perc_group_3x_mbd_rel_reshape_wgbs50x <- Pact_cov_perc_group_3x_mbd_rel[which(Pact_cov_perc_group_3x_mbd_rel$loci %in% loci_greaterthan50x$loci),c("loci","group_simp", "mbd_cov_tag", "wgbs_meth_tag")] %>% gather("tag", "value", mbd_cov_tag:wgbs_meth_tag) %>% na.omit() %>% spread("tag", "value") > > Pact_cov_perc_group_3x_mbd_rel_reshape_wgbs100x <- Pact_cov_perc_group_3x_mbd_rel[which(Pact_cov_perc_group_3x_mbd_rel$loci %in% loci_greaterthan100x$loci),c("loci","group_simp", "mbd_cov_tag", "wgbs_meth_tag")] %>% gather("tag", "value", mbd_cov_tag:wgbs_meth_tag) %>% na.omit() %>% spread("tag", "value") > #summarize the data for plotting > Pact_cov_perc_group_3x_mbd_rel_sum_wgbs5x <- Pact_cov_perc_group_3x_mbd_rel_reshape_wgbs5x %>% group_by(mbd_cov_tag, wgbs_meth_tag) %>% summarise(CpGs =n()) > > Pact_cov_perc_group_3x_mbd_rel_sum_wgbs10x <- Pact_cov_perc_group_3x_mbd_rel_reshape_wgbs10x %>% group_by(mbd_cov_tag, wgbs_meth_tag) %>% summarise(CpGs =n()) > > Pact_cov_perc_group_3x_mbd_rel_sum_wgbs20x <- Pact_cov_perc_group_3x_mbd_rel_reshape_wgbs20x %>% group_by(mbd_cov_tag, wgbs_meth_tag) %>% summarise(CpGs =n()) > > Pact_cov_perc_group_3x_mbd_rel_sum_wgbs30x <- Pact_cov_perc_group_3x_mbd_rel_reshape_wgbs30x %>% group_by(mbd_cov_tag, wgbs_meth_tag) %>% summarise(CpGs =n()) > > Pact_cov_perc_group_3x_mbd_rel_sum_wgbs40x <- Pact_cov_perc_group_3x_mbd_rel_reshape_wgbs40x %>% group_by(mbd_cov_tag, wgbs_meth_tag) %>% summarise(CpGs =n()) > > Pact_cov_perc_group_3x_mbd_rel_sum_wgbs50x <- Pact_cov_perc_group_3x_mbd_rel_reshape_wgbs50x %>% group_by(mbd_cov_tag, wgbs_meth_tag) %>% summarise(CpGs =n()) > > Pact_cov_perc_group_3x_mbd_rel_sum_wgbs100x <- Pact_cov_perc_group_3x_mbd_rel_reshape_wgbs100x %>% group_by(mbd_cov_tag, wgbs_meth_tag) %>% summarise(CpGs =n()) > > > #create a df with all subsets > Pact_cov_perc_group_3x_mbd_rel_sum_wgbs_cutoffs <- dplyr::bind_rows(list(wgbs_1x=Pact_cov_perc_group_3x_mbd_rel_sum, wgbs_5x=Pact_cov_perc_group_3x_mbd_rel_sum_wgbs5x, wgbs_10x= Pact_cov_perc_group_3x_mbd_rel_sum_wgbs10x, wgbs_20x= Pact_cov_perc_group_3x_mbd_rel_sum_wgbs20x,wgbs_30x= Pact_cov_perc_group_3x_mbd_rel_sum_wgbs30x,wgbs_40x= Pact_cov_perc_group_3x_mbd_rel_sum_wgbs40x,wgbs_50x= Pact_cov_perc_group_3x_mbd_rel_sum_wgbs50x, wgbs_100x= Pact_cov_perc_group_3x_mbd_rel_sum_wgbs100x), .id = "id") > > Pact_cov_perc_group_3x_mbd_rel_sum_wgbs_cutoffs <- Pact_cov_perc_group_3x_mbd_rel_sum_wgbs_cutoffs %>% group_by(id, wgbs_meth_tag) %>% mutate(perc_CpG = CpGs/sum(CpGs)*100) > > #make wgbs capitalized > Pact_cov_perc_group_3x_mbd_rel_sum_wgbs_cutoffs$id <- gsub("wgbs", "WGBS", Pact_cov_perc_group_3x_mbd_rel_sum_wgbs_cutoffs$id) > > > > > > > > ## Mcap ## > > > loci_lessthan5x <- Mcap_cov_perc_group_3x_mbd_rel[which(Mcap_cov_perc_group_3x_mbd_rel$method=="WGBS" & Mcap_cov_perc_group_3x_mbd_rel$cov_tag == "[1-5]"),"loci"] > > loci_lessthan10x <- Mcap_cov_perc_group_3x_mbd_rel[which(Mcap_cov_perc_group_3x_mbd_rel$method=="WGBS" & Mcap_cov_perc_group_3x_mbd_rel$cov_tag == "[1-5]" | Mcap_cov_perc_group_3x_mbd_rel$method=="WGBS" & Mcap_cov_perc_group_3x_mbd_rel$cov_tag == "[5-10]" ),"loci"] > > loci_lessthan20x <- Mcap_cov_perc_group_3x_mbd_rel[which(Mcap_cov_perc_group_3x_mbd_rel$method=="WGBS" & Mcap_cov_perc_group_3x_mbd_rel$cov_tag == "[1-5]" | Mcap_cov_perc_group_3x_mbd_rel$method=="WGBS" & Mcap_cov_perc_group_3x_mbd_rel$cov_tag == "[5-10]" | Mcap_cov_perc_group_3x_mbd_rel$method=="WGBS" & Mcap_cov_perc_group_3x_mbd_rel$cov_tag == "[10-20]"),"loci"] > > loci_lessthan30x <- Mcap_cov_perc_group_3x_mbd_rel[which(Mcap_cov_perc_group_3x_mbd_rel$method=="WGBS" & Mcap_cov_perc_group_3x_mbd_rel$cov_tag == "[1-5]" | Mcap_cov_perc_group_3x_mbd_rel$method=="WGBS" & Mcap_cov_perc_group_3x_mbd_rel$cov_tag == "[5-10]" | Mcap_cov_perc_group_3x_mbd_rel$method=="WGBS" & Mcap_cov_perc_group_3x_mbd_rel$cov_tag == "[10-20]" | Mcap_cov_perc_group_3x_mbd_rel$method=="WGBS" & Mcap_cov_perc_group_3x_mbd_rel$cov_tag == "[20-30]"),"loci"] > > loci_lessthan40x <- Mcap_cov_perc_group_3x_mbd_rel[which(Mcap_cov_perc_group_3x_mbd_rel$method=="WGBS" & Mcap_cov_perc_group_3x_mbd_rel$cov_tag == "[1-5]" | Mcap_cov_perc_group_3x_mbd_rel$method=="WGBS" & Mcap_cov_perc_group_3x_mbd_rel$cov_tag == "[5-10]" | Mcap_cov_perc_group_3x_mbd_rel$method=="WGBS" & Mcap_cov_perc_group_3x_mbd_rel$cov_tag == "[10-20]" | Mcap_cov_perc_group_3x_mbd_rel$method=="WGBS" & Mcap_cov_perc_group_3x_mbd_rel$cov_tag == "[20-30]" | Mcap_cov_perc_group_3x_mbd_rel$method=="WGBS" & Mcap_cov_perc_group_3x_mbd_rel$cov_tag == "[30-40]"),"loci"] > > loci_greaterthan50x <- Mcap_cov_perc_group_3x_mbd_rel[which(Mcap_cov_perc_group_3x_mbd_rel$method=="WGBS" & Mcap_cov_perc_group_3x_mbd_rel$cov_tag == "[50-100]" | Mcap_cov_perc_group_3x_mbd_rel$method=="WGBS" & Mcap_cov_perc_group_3x_mbd_rel$cov_tag == "[100-500]" | Mcap_cov_perc_group_3x_mbd_rel$method=="WGBS" & Mcap_cov_perc_group_3x_mbd_rel$cov_tag == "[500-1000]" | Mcap_cov_perc_group_3x_mbd_rel$method=="WGBS" & Mcap_cov_perc_group_3x_mbd_rel$cov_tag == "[>1000]"),"loci"] > > loci_greaterthan100x <- Mcap_cov_perc_group_3x_mbd_rel[which(Mcap_cov_perc_group_3x_mbd_rel$method=="WGBS" & Mcap_cov_perc_group_3x_mbd_rel$cov_tag == "[100-500]" | Mcap_cov_perc_group_3x_mbd_rel$method=="WGBS" & Mcap_cov_perc_group_3x_mbd_rel$cov_tag == "[500-1000]" | Mcap_cov_perc_group_3x_mbd_rel$method=="WGBS" & Mcap_cov_perc_group_3x_mbd_rel$cov_tag == "[>1000]"),"loci"] > > > Mcap_cov_perc_group_3x_mbd_rel_reshape_wgbs5x <- Mcap_cov_perc_group_3x_mbd_rel[which(!(Mcap_cov_perc_group_3x_mbd_rel$loci %in% loci_lessthan5x$loci)),c("loci","group_simp", "mbd_cov_tag", "wgbs_meth_tag")] %>% gather("tag", "value", mbd_cov_tag:wgbs_meth_tag) %>% na.omit() %>% spread("tag", "value") > > Mcap_cov_perc_group_3x_mbd_rel_reshape_wgbs10x <- Mcap_cov_perc_group_3x_mbd_rel[which(!(Mcap_cov_perc_group_3x_mbd_rel$loci %in% loci_lessthan10x$loci)),c("loci","group_simp", "mbd_cov_tag", "wgbs_meth_tag")] %>% gather("tag", "value", mbd_cov_tag:wgbs_meth_tag) %>% na.omit() %>% spread("tag", "value") > > Mcap_cov_perc_group_3x_mbd_rel_reshape_wgbs20x <- Mcap_cov_perc_group_3x_mbd_rel[which(!(Mcap_cov_perc_group_3x_mbd_rel$loci %in% loci_lessthan20x$loci)),c("loci","group_simp", "mbd_cov_tag", "wgbs_meth_tag")] %>% gather("tag", "value", mbd_cov_tag:wgbs_meth_tag) %>% na.omit() %>% spread("tag", "value") > > Mcap_cov_perc_group_3x_mbd_rel_reshape_wgbs30x <- Mcap_cov_perc_group_3x_mbd_rel[which(!(Mcap_cov_perc_group_3x_mbd_rel$loci %in% loci_lessthan30x$loci)),c("loci","group_simp", "mbd_cov_tag", "wgbs_meth_tag")] %>% gather("tag", "value", mbd_cov_tag:wgbs_meth_tag) %>% na.omit() %>% spread("tag", "value") > > Mcap_cov_perc_group_3x_mbd_rel_reshape_wgbs40x <- Mcap_cov_perc_group_3x_mbd_rel[which(!(Mcap_cov_perc_group_3x_mbd_rel$loci %in% loci_lessthan40x$loci)),c("loci","group_simp", "mbd_cov_tag", "wgbs_meth_tag")] %>% gather("tag", "value", mbd_cov_tag:wgbs_meth_tag) %>% na.omit() %>% spread("tag", "value") > > Mcap_cov_perc_group_3x_mbd_rel_reshape_wgbs50x <- Mcap_cov_perc_group_3x_mbd_rel[which(Mcap_cov_perc_group_3x_mbd_rel$loci %in% loci_greaterthan50x$loci),c("loci","group_simp", "mbd_cov_tag", "wgbs_meth_tag")] %>% gather("tag", "value", mbd_cov_tag:wgbs_meth_tag) %>% na.omit() %>% spread("tag", "value") > > Mcap_cov_perc_group_3x_mbd_rel_reshape_wgbs100x <- Mcap_cov_perc_group_3x_mbd_rel[which(Mcap_cov_perc_group_3x_mbd_rel$loci %in% loci_greaterthan100x$loci),c("loci","group_simp", "mbd_cov_tag", "wgbs_meth_tag")] %>% gather("tag", "value", mbd_cov_tag:wgbs_meth_tag) %>% na.omit() %>% spread("tag", "value") > #summarize the data for plotting > Mcap_cov_perc_group_3x_mbd_rel_sum_wgbs5x <- Mcap_cov_perc_group_3x_mbd_rel_reshape_wgbs5x %>% group_by(mbd_cov_tag, wgbs_meth_tag) %>% summarise(CpGs =n()) > > Mcap_cov_perc_group_3x_mbd_rel_sum_wgbs10x <- Mcap_cov_perc_group_3x_mbd_rel_reshape_wgbs10x %>% group_by(mbd_cov_tag, wgbs_meth_tag) %>% summarise(CpGs =n()) > > Mcap_cov_perc_group_3x_mbd_rel_sum_wgbs20x <- Mcap_cov_perc_group_3x_mbd_rel_reshape_wgbs20x %>% group_by(mbd_cov_tag, wgbs_meth_tag) %>% summarise(CpGs =n()) > > Mcap_cov_perc_group_3x_mbd_rel_sum_wgbs30x <- Mcap_cov_perc_group_3x_mbd_rel_reshape_wgbs30x %>% group_by(mbd_cov_tag, wgbs_meth_tag) %>% summarise(CpGs =n()) > > Mcap_cov_perc_group_3x_mbd_rel_sum_wgbs40x <- Mcap_cov_perc_group_3x_mbd_rel_reshape_wgbs40x %>% group_by(mbd_cov_tag, wgbs_meth_tag) %>% summarise(CpGs =n()) > > Mcap_cov_perc_group_3x_mbd_rel_sum_wgbs50x <- Mcap_cov_perc_group_3x_mbd_rel_reshape_wgbs50x %>% group_by(mbd_cov_tag, wgbs_meth_tag) %>% summarise(CpGs =n()) > > Mcap_cov_perc_group_3x_mbd_rel_sum_wgbs100x <- Mcap_cov_perc_group_3x_mbd_rel_reshape_wgbs100x %>% group_by(mbd_cov_tag, wgbs_meth_tag) %>% summarise(CpGs =n()) > > > #create a df with all subsets > Mcap_cov_perc_group_3x_mbd_rel_sum_wgbs_cutoffs <- dplyr::bind_rows(list(wgbs_1x=Mcap_cov_perc_group_3x_mbd_rel_sum, wgbs_5x=Mcap_cov_perc_group_3x_mbd_rel_sum_wgbs5x, wgbs_10x= Mcap_cov_perc_group_3x_mbd_rel_sum_wgbs10x, wgbs_20x= Mcap_cov_perc_group_3x_mbd_rel_sum_wgbs20x,wgbs_30x= Mcap_cov_perc_group_3x_mbd_rel_sum_wgbs30x,wgbs_40x= Mcap_cov_perc_group_3x_mbd_rel_sum_wgbs40x,wgbs_50x= Mcap_cov_perc_group_3x_mbd_rel_sum_wgbs50x, wgbs_100x= Mcap_cov_perc_group_3x_mbd_rel_sum_wgbs100x), .id = "id") > > Mcap_cov_perc_group_3x_mbd_rel_sum_wgbs_cutoffs <- Mcap_cov_perc_group_3x_mbd_rel_sum_wgbs_cutoffs %>% group_by(id, wgbs_meth_tag) %>% mutate(perc_CpG = CpGs/sum(CpGs)*100) > > #make wgbs capitalized > Mcap_cov_perc_group_3x_mbd_rel_sum_wgbs_cutoffs$id <- gsub("wgbs", "WGBS", Mcap_cov_perc_group_3x_mbd_rel_sum_wgbs_cutoffs$id) > > #``` > > > #plot data > #```{r} > jpeg("~/strigg/analyses/20200910/MBDcov_x_WGBSmeth_heatmap_Pact_thresholds.jpg", width = 10, height = 5, units = "in", res = 300 ) > ggplot(data = Pact_cov_perc_group_3x_mbd_rel_sum_wgbs_cutoffs, aes(x=wgbs_meth_tag, y=factor(mbd_cov_tag, levels = c("[0]",cov_tags)), fill=log(CpGs,10))) + geom_tile() + theme(axis.text.x = element_text(size = 7, angle = 60, hjust = 1)) + labs(x = "% methylation\n(from WGBS data)", y = "number of reads\n(from MBD data)", fill = "number of\nCpGs (log10)") + facet_wrap(~factor(id, levels = c("WGBS_1x","WGBS_5x","WGBS_10x","WGBS_20x","WGBS_30x","WGBS_40x","WGBS_50x","WGBS_100x")), nrow = 2) + scale_fill_continuous(high = "#132B43", low = "#b5dfff") > dev.off() null device 1 > > #Mcap > jpeg("~/strigg/analyses/20200910/MBDcov_x_WGBSmeth_heatmap_Mcap_thresholds.jpg", width = 10, height = 5, units = "in", res = 300 ) > ggplot(data = Mcap_cov_perc_group_3x_mbd_rel_sum_wgbs_cutoffs, aes(x=wgbs_meth_tag, y=factor(mbd_cov_tag, levels = c("[0]",cov_tags)), fill=log(CpGs,10))) + geom_tile() + theme(axis.text.x = element_text(size = 7, angle = 60, hjust = 1)) + labs(x = "% methylation\n(from WGBS data)", y = "number of reads\n(from MBD data)", fill = "number of\nCpGs (log10)") + facet_wrap(~factor(id, levels = c("WGBS_1x","WGBS_5x","WGBS_10x","WGBS_20x","WGBS_30x","WGBS_40x","WGBS_50x","WGBS_100x")), nrow = 2) + scale_fill_continuous(high = "#132B43", low = "#b5dfff") > dev.off() null device 1 > > #d <- ggplot(data = Pact_cov_perc_group_3x_mbd_rel_sum_wgbs_cutoffs, aes(x=wgbs_meth_tag, y=factor(mbd_cov_tag, levels = c("[0]",cov_tags)), fill=log(CpGs,10))) + geom_tile() + theme(axis.text.x = element_text(size = 7, angle = 60, hjust = 1)) + labs(x = "% methylation\n(from WGBS data)", y = "number of reads\n(from MBD data)", fill = "number of\nCpGs (log10)", title = "P. acuta") + facet_wrap(~factor(id, levels = c("WGBS_1x","WGBS_5x","WGBS_10x","WGBS_20x","WGBS_30x","WGBS_40x","WGBS_50x","WGBS_100x")), nrow = 2) + scale_fill_continuous(high = "#132B43", low = "#b5dfff") + theme(plot.title = element_text(face = "italic")) > > > #c <- ggplot(data = Mcap_cov_perc_group_3x_mbd_rel_sum_wgbs_cutoffs, aes(x=wgbs_meth_tag, y=factor(mbd_cov_tag, levels = c("[0]",cov_tags)), fill=log(CpGs,10))) + geom_tile() + theme(axis.text.x = element_text(size = 7, angle = 60, hjust = 1)) + labs(x = "% methylation\n(from WGBS data)", y = "number of reads\n(from MBD data)", fill = "number of\nCpGs (log10)", title = "M. capitata") + facet_wrap(~factor(id, levels = c("WGBS_1x","WGBS_5x","WGBS_10x","WGBS_20x","WGBS_30x","WGBS_40x","WGBS_50x","WGBS_100x")), nrow = 2) + scale_fill_continuous(high = "#132B43", low = "#b5dfff") + theme(plot.title = element_text(face = "italic")) > > > #jpeg("~/strigg/analyses/20200910/MBDcov_x_WGBSmeth_heatmap_thresholds.jpg", width = 10, height = 5, units = "in", res = 300 ) > #ggarrange(c,d, nrow = 2,labels = "AUTO",common.legend = T) > #dev.off() > #``` > > > > #This code generates 3 heatmaps showing %methylation x # of reads for each method to see if any method is biased against detecting highly methylated reads (e.g. WGBS) > #```{r} > #subset loci for those only covered by WGBS data and only keep > > #subset loci for those only covered by WGBS data, > #only keep the loci, cov_tag, and meth_tag columns, > # and total the CpGs for each bin > Pact_cov_perc_group_3x_sum <- Pact_cov_perc_group_3x %>% group_by(method,cov_tag, meth_tag) %>% summarise(CpGs =n()) > > #convert column to ordered factor > Pact_cov_perc_group_3x_sum$cov_tag <- factor(Pact_cov_perc_group_3x_sum$cov_tag, levels = cov_tags) > > #plot the WGBS % meth x coverage > jpeg("~/strigg/analyses/20200910/method_x_cov_x_meth_heatmap_Pact.jpg", width = 10, height = 4, units = "in", res = 300 ) > ggplot(data = Pact_cov_perc_group_3x_sum, aes(x = meth_tag, y=cov_tag, fill=log(CpGs,10))) + geom_tile() + theme(axis.text.x = element_text(size = 7, angle = 60, hjust = 1)) + facet_wrap(~method) + labs(x = "% methylation", y = "number of reads", fill = "number of\nCpGs (log10)", title = "P. acuta") + scale_fill_continuous(high = "#132B43", low = "#b5dfff") > dev.off() null device 1 > > > > #subset loci for those only covered by WGBS data, > #only keep the loci, cov_tag, and meth_tag columns, > # and total the CpGs for each bin > Mcap_cov_perc_group_3x_sum <- Mcap_cov_perc_group_3x %>% group_by(method,cov_tag, meth_tag) %>% summarise(CpGs =n()) > > #convert column to ordered factor > Mcap_cov_perc_group_3x_sum$cov_tag <- factor(Mcap_cov_perc_group_3x_sum$cov_tag, levels = cov_tags) > > #plot the WGBS % meth x coverage > jpeg("~/strigg/analyses/20200910/method_x_cov_x_meth_heatmap_Mcap.jpg", width = 10, height = 4, units = "in", res = 300 ) > ggplot(data = Mcap_cov_perc_group_3x_sum, aes(x = meth_tag, y=cov_tag, fill=log(CpGs,10))) + geom_tile() + theme(axis.text.x = element_text(size = 7, angle = 60, hjust = 1)) + facet_wrap(~method) + labs(x = "% methylation", y = "number of reads", fill = "number of\nCpGs (log10)", title = "M. capitata") + scale_fill_continuous(high = "#132B43", low = "#b5dfff") > dev.off() null device 1 > > > #e <- ggplot(data = Mcap_cov_perc_group_3x_sum, aes(x = meth_tag, y=cov_tag, fill=log(CpGs,10))) + geom_tile() + theme(axis.text.x = element_text(size = 7, angle = 60, hjust = 1)) + facet_wrap(~method) + labs(x = "% methylation", y = "number of reads", fill = "number of\nCpGs (log10)", title = "M. capitata") + scale_fill_continuous(high = "#132B43", low = "#b5dfff") + theme(plot.title = element_text(face = "italic")) > > > #f <- ggplot(data = Pact_cov_perc_group_3x_sum, aes(x = meth_tag, y=cov_tag, fill=log(CpGs,10))) + geom_tile() + theme(axis.text.x = element_text(size = 7, angle = 60, hjust = 1)) + facet_wrap(~method) + labs(x = "% methylation", y = "number of reads", fill = "number of\nCpGs (log10)", title = "P. acuta") + scale_fill_continuous(high = "#132B43", low = "#b5dfff") + theme(plot.title = element_text(face = "italic")) > > > #jpeg("~/strigg/analyses/20200910/MBDcov_x_WGBSmeth_heatmap_thresholds.jpg", width = 10, height = 5, units = "in", res = 300 ) > #ggarrange(e,f, nrow = 2,labels = "AUTO",common.legend = T) > #dev.off() > #``` > > #RRBS vs WBGS > #create a df with RRBS vs. WGBS data > #```{r} > #remove RRBS data > Pact_cov_perc_group_3x_RRBS_rel <- Pact_cov_perc_group_3x[which(Pact_cov_perc_group_3x$method != "MBD"),] > #subset WGBS groups > Pact_cov_perc_group_3x_RRBS_rel <- Pact_cov_perc_group_3x_RRBS_rel[grep("WGBS", Pact_cov_perc_group_3x_RRBS_rel$group_simp),] > > > #set cov_tag to zero for CpGs not covered by RRBS but covered by WGBS > Pact_cov_perc_group_3x_RRBS_rel$RRBS_cov_tag <- ifelse(Pact_cov_perc_group_3x_RRBS_rel$method == "WGBS" & !grepl("RRBS",Pact_cov_perc_group_3x_RRBS_rel$group_simp),"[0]", Pact_cov_perc_group_3x_RRBS_rel$cov_tag) > > #set WGBS cov_tag to NA for CpGs covered by WGBS and by RRBS since we're gonna use the RRBS cov_tags > Pact_cov_perc_group_3x_RRBS_rel$RRBS_cov_tag <- ifelse(Pact_cov_perc_group_3x_RRBS_rel$method == "WGBS" & grepl("RRBS",Pact_cov_perc_group_3x_RRBS_rel$group_simp),NA, Pact_cov_perc_group_3x_RRBS_rel$RRBS_cov_tag) > > #Set methylation to NA for CpGs covered by RRBS > Pact_cov_perc_group_3x_RRBS_rel$wgbs_meth_tag <- ifelse(Pact_cov_perc_group_3x_RRBS_rel$method == "WGBS",Pact_cov_perc_group_3x_RRBS_rel$meth_tag,NA) > > > #remove all columns except loci, group, RRBS_cov_tag, and wgbs_meth_tag > #convert to long format and filter out rows with NAs > #convert to wide format so that only RRBS_cov_tag and wgbs_meth_tag values remain > Pact_cov_perc_group_3x_RRBS_rel_reshape <- Pact_cov_perc_group_3x_RRBS_rel[,c("loci","group_simp", "RRBS_cov_tag", "wgbs_meth_tag")] %>% gather("tag", "value", RRBS_cov_tag:wgbs_meth_tag) %>% na.omit() %>% spread("tag", "value") > > > table(Pact_cov_perc_group_3x_RRBS_rel_reshape$group_simp) MBD, RRBS, WGBS MBD, WGBS RRBS, WGBS WGBS 1310201 2739744 784106 2653619 > > #summarize the data for plotting > Pact_cov_perc_group_3x_RRBS_rel_sum <- Pact_cov_perc_group_3x_RRBS_rel_reshape %>% group_by(RRBS_cov_tag, wgbs_meth_tag) %>% summarise(CpGs =n()) > > jpeg("~/strigg/analyses/20200910/RRBScov_x_WGBSmeth_heatmap_Pact.jpg", width = 6, height = 5, units = "in", res = 300 ) > ggplot(data = Pact_cov_perc_group_3x_RRBS_rel_sum, aes(x=wgbs_meth_tag, y=factor(RRBS_cov_tag, levels = c("[0]",cov_tags)), fill=log(CpGs,10))) + geom_tile() + theme(axis.text.x = element_text(size = 7, angle = 60, hjust = 1)) + labs(x = "% methylation\n(from WGBS data)", y = "number of reads\n(from RRBS data)", fill = "number of\nCpGs (log10)") + scale_fill_continuous(high = "#132B43", low = "#b5dfff") > dev.off() null device 1 > > > ## Mcap ## > > > #remove RRBS data > Mcap_cov_perc_group_3x_RRBS_rel <- Mcap_cov_perc_group_3x[which(Mcap_cov_perc_group_3x$method != "MBD"),] > #subset WGBS groups > Mcap_cov_perc_group_3x_RRBS_rel <- Mcap_cov_perc_group_3x_RRBS_rel[grep("WGBS", Mcap_cov_perc_group_3x_RRBS_rel$group_simp),] > > > #set cov_tag to zero for CpGs not covered by RRBS but covered by WGBS > Mcap_cov_perc_group_3x_RRBS_rel$RRBS_cov_tag <- ifelse(Mcap_cov_perc_group_3x_RRBS_rel$method == "WGBS" & !grepl("RRBS",Mcap_cov_perc_group_3x_RRBS_rel$group_simp),"[0]", Mcap_cov_perc_group_3x_RRBS_rel$cov_tag) > > #set WGBS cov_tag to NA for CpGs covered by WGBS and by RRBS since we're gonna use the RRBS cov_tags > Mcap_cov_perc_group_3x_RRBS_rel$RRBS_cov_tag <- ifelse(Mcap_cov_perc_group_3x_RRBS_rel$method == "WGBS" & grepl("RRBS",Mcap_cov_perc_group_3x_RRBS_rel$group_simp),NA, Mcap_cov_perc_group_3x_RRBS_rel$RRBS_cov_tag) > > #Set methylation to NA for CpGs covered by RRBS > Mcap_cov_perc_group_3x_RRBS_rel$wgbs_meth_tag <- ifelse(Mcap_cov_perc_group_3x_RRBS_rel$method == "WGBS",Mcap_cov_perc_group_3x_RRBS_rel$meth_tag,NA) > > > #remove all columns except loci, group, RRBS_cov_tag, and wgbs_meth_tag > #convert to long format and filter out rows with NAs > #convert to wide format so that only RRBS_cov_tag and wgbs_meth_tag values remain > Mcap_cov_perc_group_3x_RRBS_rel_reshape <- Mcap_cov_perc_group_3x_RRBS_rel[,c("loci","group_simp", "RRBS_cov_tag", "wgbs_meth_tag")] %>% gather("tag", "value", RRBS_cov_tag:wgbs_meth_tag) %>% na.omit() %>% spread("tag", "value") > > > table(Mcap_cov_perc_group_3x_RRBS_rel_reshape$group_simp) MBD, RRBS, WGBS MBD, WGBS RRBS, WGBS WGBS 456108 2084296 1495679 9254857 > > #summarize the data for plotting > Mcap_cov_perc_group_3x_RRBS_rel_sum <- Mcap_cov_perc_group_3x_RRBS_rel_reshape %>% group_by(RRBS_cov_tag, wgbs_meth_tag) %>% summarise(CpGs =n()) > > jpeg("~/strigg/analyses/20200910/RRBScov_x_WGBSmeth_heatmap_Mcap.jpg", width = 6, height = 5, units = "in", res = 300 ) > ggplot(data = Mcap_cov_perc_group_3x_RRBS_rel_sum, aes(x=wgbs_meth_tag, y=factor(RRBS_cov_tag, levels = c("[0]",cov_tags)), fill=log(CpGs,10))) + geom_tile() + theme(axis.text.x = element_text(size = 7, angle = 60, hjust = 1)) + labs(x = "% methylation\n(from WGBS data)", y = "number of reads\n(from RRBS data)", fill = "number of\nCpGs (log10)") + scale_fill_continuous(high = "#132B43", low = "#b5dfff") > dev.off() null device 1 > #``` > > #filter for loci covered by WGBS at 5x, 10x, 50x, 100x > #```{r} > loci_lessthan5x <- Pact_cov_perc_group_3x_RRBS_rel[which(Pact_cov_perc_group_3x_RRBS_rel$method=="WGBS" & Pact_cov_perc_group_3x_RRBS_rel$cov_tag == "[1-5]"),"loci"] > > loci_lessthan10x <- Pact_cov_perc_group_3x_RRBS_rel[which(Pact_cov_perc_group_3x_RRBS_rel$method=="WGBS" & Pact_cov_perc_group_3x_RRBS_rel$cov_tag == "[1-5]" | Pact_cov_perc_group_3x_RRBS_rel$method=="WGBS" & Pact_cov_perc_group_3x_RRBS_rel$cov_tag == "[5-10]" ),"loci"] > > loci_lessthan20x <- Pact_cov_perc_group_3x_RRBS_rel[which(Pact_cov_perc_group_3x_RRBS_rel$method=="WGBS" & Pact_cov_perc_group_3x_RRBS_rel$cov_tag == "[1-5]" | Pact_cov_perc_group_3x_RRBS_rel$method=="WGBS" & Pact_cov_perc_group_3x_RRBS_rel$cov_tag == "[5-10]" | Pact_cov_perc_group_3x_RRBS_rel$method=="WGBS" & Pact_cov_perc_group_3x_RRBS_rel$cov_tag == "[10-20]"),"loci"] > > loci_lessthan30x <- Pact_cov_perc_group_3x_RRBS_rel[which(Pact_cov_perc_group_3x_RRBS_rel$method=="WGBS" & Pact_cov_perc_group_3x_RRBS_rel$cov_tag == "[1-5]" | Pact_cov_perc_group_3x_RRBS_rel$method=="WGBS" & Pact_cov_perc_group_3x_RRBS_rel$cov_tag == "[5-10]" | Pact_cov_perc_group_3x_RRBS_rel$method=="WGBS" & Pact_cov_perc_group_3x_RRBS_rel$cov_tag == "[10-20]" | Pact_cov_perc_group_3x_RRBS_rel$method=="WGBS" & Pact_cov_perc_group_3x_RRBS_rel$cov_tag == "[20-30]"),"loci"] > > loci_lessthan40x <- Pact_cov_perc_group_3x_RRBS_rel[which(Pact_cov_perc_group_3x_RRBS_rel$method=="WGBS" & Pact_cov_perc_group_3x_RRBS_rel$cov_tag == "[1-5]" | Pact_cov_perc_group_3x_RRBS_rel$method=="WGBS" & Pact_cov_perc_group_3x_RRBS_rel$cov_tag == "[5-10]" | Pact_cov_perc_group_3x_RRBS_rel$method=="WGBS" & Pact_cov_perc_group_3x_RRBS_rel$cov_tag == "[10-20]" | Pact_cov_perc_group_3x_RRBS_rel$method=="WGBS" & Pact_cov_perc_group_3x_RRBS_rel$cov_tag == "[20-30]" | Pact_cov_perc_group_3x_RRBS_rel$method=="WGBS" & Pact_cov_perc_group_3x_RRBS_rel$cov_tag == "[30-40]"),"loci"] > > loci_greaterthan50x <- Pact_cov_perc_group_3x_RRBS_rel[which(Pact_cov_perc_group_3x_RRBS_rel$method=="WGBS" & Pact_cov_perc_group_3x_RRBS_rel$cov_tag == "[50-100]" | Pact_cov_perc_group_3x_RRBS_rel$method=="WGBS" & Pact_cov_perc_group_3x_RRBS_rel$cov_tag == "[100-500]" | Pact_cov_perc_group_3x_RRBS_rel$method=="WGBS" & Pact_cov_perc_group_3x_RRBS_rel$cov_tag == "[500-1000]" | Pact_cov_perc_group_3x_RRBS_rel$method=="WGBS" & Pact_cov_perc_group_3x_RRBS_rel$cov_tag == "[>1000]"),"loci"] > > loci_greaterthan100x <- Pact_cov_perc_group_3x_RRBS_rel[which(Pact_cov_perc_group_3x_RRBS_rel$method=="WGBS" & Pact_cov_perc_group_3x_RRBS_rel$cov_tag == "[100-500]" | Pact_cov_perc_group_3x_RRBS_rel$method=="WGBS" & Pact_cov_perc_group_3x_RRBS_rel$cov_tag == "[500-1000]" | Pact_cov_perc_group_3x_RRBS_rel$method=="WGBS" & Pact_cov_perc_group_3x_RRBS_rel$cov_tag == "[>1000]"),"loci"] > > > Pact_cov_perc_group_3x_RRBS_rel_reshape_wgbs5x <- Pact_cov_perc_group_3x_RRBS_rel[which(!(Pact_cov_perc_group_3x_RRBS_rel$loci %in% loci_lessthan5x$loci)),c("loci","group_simp", "RRBS_cov_tag", "wgbs_meth_tag")] %>% gather("tag", "value", RRBS_cov_tag:wgbs_meth_tag) %>% na.omit() %>% spread("tag", "value") > > Pact_cov_perc_group_3x_RRBS_rel_reshape_wgbs10x <- Pact_cov_perc_group_3x_RRBS_rel[which(!(Pact_cov_perc_group_3x_RRBS_rel$loci %in% loci_lessthan10x$loci)),c("loci","group_simp", "RRBS_cov_tag", "wgbs_meth_tag")] %>% gather("tag", "value", RRBS_cov_tag:wgbs_meth_tag) %>% na.omit() %>% spread("tag", "value") > > Pact_cov_perc_group_3x_RRBS_rel_reshape_wgbs20x <- Pact_cov_perc_group_3x_RRBS_rel[which(!(Pact_cov_perc_group_3x_RRBS_rel$loci %in% loci_lessthan20x$loci)),c("loci","group_simp", "RRBS_cov_tag", "wgbs_meth_tag")] %>% gather("tag", "value", RRBS_cov_tag:wgbs_meth_tag) %>% na.omit() %>% spread("tag", "value") > > Pact_cov_perc_group_3x_RRBS_rel_reshape_wgbs30x <- Pact_cov_perc_group_3x_RRBS_rel[which(!(Pact_cov_perc_group_3x_RRBS_rel$loci %in% loci_lessthan30x$loci)),c("loci","group_simp", "RRBS_cov_tag", "wgbs_meth_tag")] %>% gather("tag", "value", RRBS_cov_tag:wgbs_meth_tag) %>% na.omit() %>% spread("tag", "value") > > Pact_cov_perc_group_3x_RRBS_rel_reshape_wgbs40x <- Pact_cov_perc_group_3x_RRBS_rel[which(!(Pact_cov_perc_group_3x_RRBS_rel$loci %in% loci_lessthan40x$loci)),c("loci","group_simp", "RRBS_cov_tag", "wgbs_meth_tag")] %>% gather("tag", "value", RRBS_cov_tag:wgbs_meth_tag) %>% na.omit() %>% spread("tag", "value") > > Pact_cov_perc_group_3x_RRBS_rel_reshape_wgbs50x <- Pact_cov_perc_group_3x_RRBS_rel[which(Pact_cov_perc_group_3x_RRBS_rel$loci %in% loci_greaterthan50x$loci),c("loci","group_simp", "RRBS_cov_tag", "wgbs_meth_tag")] %>% gather("tag", "value", RRBS_cov_tag:wgbs_meth_tag) %>% na.omit() %>% spread("tag", "value") > > Pact_cov_perc_group_3x_RRBS_rel_reshape_wgbs100x <- Pact_cov_perc_group_3x_RRBS_rel[which(Pact_cov_perc_group_3x_RRBS_rel$loci %in% loci_greaterthan100x$loci),c("loci","group_simp", "RRBS_cov_tag", "wgbs_meth_tag")] %>% gather("tag", "value", RRBS_cov_tag:wgbs_meth_tag) %>% na.omit() %>% spread("tag", "value") > #summarize the data for plotting > Pact_cov_perc_group_3x_RRBS_rel_sum_wgbs5x <- Pact_cov_perc_group_3x_RRBS_rel_reshape_wgbs5x %>% group_by(RRBS_cov_tag, wgbs_meth_tag) %>% summarise(CpGs =n()) > > Pact_cov_perc_group_3x_RRBS_rel_sum_wgbs10x <- Pact_cov_perc_group_3x_RRBS_rel_reshape_wgbs10x %>% group_by(RRBS_cov_tag, wgbs_meth_tag) %>% summarise(CpGs =n()) > > Pact_cov_perc_group_3x_RRBS_rel_sum_wgbs20x <- Pact_cov_perc_group_3x_RRBS_rel_reshape_wgbs20x %>% group_by(RRBS_cov_tag, wgbs_meth_tag) %>% summarise(CpGs =n()) > > Pact_cov_perc_group_3x_RRBS_rel_sum_wgbs30x <- Pact_cov_perc_group_3x_RRBS_rel_reshape_wgbs30x %>% group_by(RRBS_cov_tag, wgbs_meth_tag) %>% summarise(CpGs =n()) > > Pact_cov_perc_group_3x_RRBS_rel_sum_wgbs40x <- Pact_cov_perc_group_3x_RRBS_rel_reshape_wgbs40x %>% group_by(RRBS_cov_tag, wgbs_meth_tag) %>% summarise(CpGs =n()) > > Pact_cov_perc_group_3x_RRBS_rel_sum_wgbs50x <- Pact_cov_perc_group_3x_RRBS_rel_reshape_wgbs50x %>% group_by(RRBS_cov_tag, wgbs_meth_tag) %>% summarise(CpGs =n()) > > Pact_cov_perc_group_3x_RRBS_rel_sum_wgbs100x <- Pact_cov_perc_group_3x_RRBS_rel_reshape_wgbs100x %>% group_by(RRBS_cov_tag, wgbs_meth_tag) %>% summarise(CpGs =n()) > > > #create a df with all subsets > Pact_cov_perc_group_3x_RRBS_rel_sum_wgbs_cutoffs <- dplyr::bind_rows(list(wgbs_1x=Pact_cov_perc_group_3x_RRBS_rel_sum, wgbs_5x=Pact_cov_perc_group_3x_RRBS_rel_sum_wgbs5x, wgbs_10x= Pact_cov_perc_group_3x_RRBS_rel_sum_wgbs10x, wgbs_20x= Pact_cov_perc_group_3x_RRBS_rel_sum_wgbs20x,wgbs_30x= Pact_cov_perc_group_3x_RRBS_rel_sum_wgbs30x,wgbs_40x= Pact_cov_perc_group_3x_RRBS_rel_sum_wgbs40x,wgbs_50x= Pact_cov_perc_group_3x_RRBS_rel_sum_wgbs50x, wgbs_100x= Pact_cov_perc_group_3x_RRBS_rel_sum_wgbs100x), .id = "id") > > Pact_cov_perc_group_3x_RRBS_rel_sum_wgbs_cutoffs <- Pact_cov_perc_group_3x_RRBS_rel_sum_wgbs_cutoffs %>% group_by(id, wgbs_meth_tag) %>% mutate(perc_CpG = CpGs/sum(CpGs)*100) > > #make wgbs capitalized > Pact_cov_perc_group_3x_RRBS_rel_sum_wgbs_cutoffs$id <- gsub("wgbs", "WGBS", Pact_cov_perc_group_3x_RRBS_rel_sum_wgbs_cutoffs$id) > > > > > > > > ## Mcap ## > > > loci_lessthan5x <- Mcap_cov_perc_group_3x_RRBS_rel[which(Mcap_cov_perc_group_3x_RRBS_rel$method=="WGBS" & Mcap_cov_perc_group_3x_RRBS_rel$cov_tag == "[1-5]"),"loci"] > > loci_lessthan10x <- Mcap_cov_perc_group_3x_RRBS_rel[which(Mcap_cov_perc_group_3x_RRBS_rel$method=="WGBS" & Mcap_cov_perc_group_3x_RRBS_rel$cov_tag == "[1-5]" | Mcap_cov_perc_group_3x_RRBS_rel$method=="WGBS" & Mcap_cov_perc_group_3x_RRBS_rel$cov_tag == "[5-10]" ),"loci"] > > loci_lessthan20x <- Mcap_cov_perc_group_3x_RRBS_rel[which(Mcap_cov_perc_group_3x_RRBS_rel$method=="WGBS" & Mcap_cov_perc_group_3x_RRBS_rel$cov_tag == "[1-5]" | Mcap_cov_perc_group_3x_RRBS_rel$method=="WGBS" & Mcap_cov_perc_group_3x_RRBS_rel$cov_tag == "[5-10]" | Mcap_cov_perc_group_3x_RRBS_rel$method=="WGBS" & Mcap_cov_perc_group_3x_RRBS_rel$cov_tag == "[10-20]"),"loci"] > > loci_lessthan30x <- Mcap_cov_perc_group_3x_RRBS_rel[which(Mcap_cov_perc_group_3x_RRBS_rel$method=="WGBS" & Mcap_cov_perc_group_3x_RRBS_rel$cov_tag == "[1-5]" | Mcap_cov_perc_group_3x_RRBS_rel$method=="WGBS" & Mcap_cov_perc_group_3x_RRBS_rel$cov_tag == "[5-10]" | Mcap_cov_perc_group_3x_RRBS_rel$method=="WGBS" & Mcap_cov_perc_group_3x_RRBS_rel$cov_tag == "[10-20]" | Mcap_cov_perc_group_3x_RRBS_rel$method=="WGBS" & Mcap_cov_perc_group_3x_RRBS_rel$cov_tag == "[20-30]"),"loci"] > > loci_lessthan40x <- Mcap_cov_perc_group_3x_RRBS_rel[which(Mcap_cov_perc_group_3x_RRBS_rel$method=="WGBS" & Mcap_cov_perc_group_3x_RRBS_rel$cov_tag == "[1-5]" | Mcap_cov_perc_group_3x_RRBS_rel$method=="WGBS" & Mcap_cov_perc_group_3x_RRBS_rel$cov_tag == "[5-10]" | Mcap_cov_perc_group_3x_RRBS_rel$method=="WGBS" & Mcap_cov_perc_group_3x_RRBS_rel$cov_tag == "[10-20]" | Mcap_cov_perc_group_3x_RRBS_rel$method=="WGBS" & Mcap_cov_perc_group_3x_RRBS_rel$cov_tag == "[20-30]" | Mcap_cov_perc_group_3x_RRBS_rel$method=="WGBS" & Mcap_cov_perc_group_3x_RRBS_rel$cov_tag == "[30-40]"),"loci"] > > loci_greaterthan50x <- Mcap_cov_perc_group_3x_RRBS_rel[which(Mcap_cov_perc_group_3x_RRBS_rel$method=="WGBS" & Mcap_cov_perc_group_3x_RRBS_rel$cov_tag == "[50-100]" | Mcap_cov_perc_group_3x_RRBS_rel$method=="WGBS" & Mcap_cov_perc_group_3x_RRBS_rel$cov_tag == "[100-500]" | Mcap_cov_perc_group_3x_RRBS_rel$method=="WGBS" & Mcap_cov_perc_group_3x_RRBS_rel$cov_tag == "[500-1000]" | Mcap_cov_perc_group_3x_RRBS_rel$method=="WGBS" & Mcap_cov_perc_group_3x_RRBS_rel$cov_tag == "[>1000]"),"loci"] > > loci_greaterthan100x <- Mcap_cov_perc_group_3x_RRBS_rel[which(Mcap_cov_perc_group_3x_RRBS_rel$method=="WGBS" & Mcap_cov_perc_group_3x_RRBS_rel$cov_tag == "[100-500]" | Mcap_cov_perc_group_3x_RRBS_rel$method=="WGBS" & Mcap_cov_perc_group_3x_RRBS_rel$cov_tag == "[500-1000]" | Mcap_cov_perc_group_3x_RRBS_rel$method=="WGBS" & Mcap_cov_perc_group_3x_RRBS_rel$cov_tag == "[>1000]"),"loci"] > > > Mcap_cov_perc_group_3x_RRBS_rel_reshape_wgbs5x <- Mcap_cov_perc_group_3x_RRBS_rel[which(!(Mcap_cov_perc_group_3x_RRBS_rel$loci %in% loci_lessthan5x$loci)),c("loci","group_simp", "RRBS_cov_tag", "wgbs_meth_tag")] %>% gather("tag", "value", RRBS_cov_tag:wgbs_meth_tag) %>% na.omit() %>% spread("tag", "value") > > Mcap_cov_perc_group_3x_RRBS_rel_reshape_wgbs10x <- Mcap_cov_perc_group_3x_RRBS_rel[which(!(Mcap_cov_perc_group_3x_RRBS_rel$loci %in% loci_lessthan10x$loci)),c("loci","group_simp", "RRBS_cov_tag", "wgbs_meth_tag")] %>% gather("tag", "value", RRBS_cov_tag:wgbs_meth_tag) %>% na.omit() %>% spread("tag", "value") > > Mcap_cov_perc_group_3x_RRBS_rel_reshape_wgbs20x <- Mcap_cov_perc_group_3x_RRBS_rel[which(!(Mcap_cov_perc_group_3x_RRBS_rel$loci %in% loci_lessthan20x$loci)),c("loci","group_simp", "RRBS_cov_tag", "wgbs_meth_tag")] %>% gather("tag", "value", RRBS_cov_tag:wgbs_meth_tag) %>% na.omit() %>% spread("tag", "value") > > Mcap_cov_perc_group_3x_RRBS_rel_reshape_wgbs30x <- Mcap_cov_perc_group_3x_RRBS_rel[which(!(Mcap_cov_perc_group_3x_RRBS_rel$loci %in% loci_lessthan30x$loci)),c("loci","group_simp", "RRBS_cov_tag", "wgbs_meth_tag")] %>% gather("tag", "value", RRBS_cov_tag:wgbs_meth_tag) %>% na.omit() %>% spread("tag", "value") > > Mcap_cov_perc_group_3x_RRBS_rel_reshape_wgbs40x <- Mcap_cov_perc_group_3x_RRBS_rel[which(!(Mcap_cov_perc_group_3x_RRBS_rel$loci %in% loci_lessthan40x$loci)),c("loci","group_simp", "RRBS_cov_tag", "wgbs_meth_tag")] %>% gather("tag", "value", RRBS_cov_tag:wgbs_meth_tag) %>% na.omit() %>% spread("tag", "value") > > Mcap_cov_perc_group_3x_RRBS_rel_reshape_wgbs50x <- Mcap_cov_perc_group_3x_RRBS_rel[which(Mcap_cov_perc_group_3x_RRBS_rel$loci %in% loci_greaterthan50x$loci),c("loci","group_simp", "RRBS_cov_tag", "wgbs_meth_tag")] %>% gather("tag", "value", RRBS_cov_tag:wgbs_meth_tag) %>% na.omit() %>% spread("tag", "value") > > Mcap_cov_perc_group_3x_RRBS_rel_reshape_wgbs100x <- Mcap_cov_perc_group_3x_RRBS_rel[which(Mcap_cov_perc_group_3x_RRBS_rel$loci %in% loci_greaterthan100x$loci),c("loci","group_simp", "RRBS_cov_tag", "wgbs_meth_tag")] %>% gather("tag", "value", RRBS_cov_tag:wgbs_meth_tag) %>% na.omit() %>% spread("tag", "value") > #summarize the data for plotting > Mcap_cov_perc_group_3x_RRBS_rel_sum_wgbs5x <- Mcap_cov_perc_group_3x_RRBS_rel_reshape_wgbs5x %>% group_by(RRBS_cov_tag, wgbs_meth_tag) %>% summarise(CpGs =n()) > > Mcap_cov_perc_group_3x_RRBS_rel_sum_wgbs10x <- Mcap_cov_perc_group_3x_RRBS_rel_reshape_wgbs10x %>% group_by(RRBS_cov_tag, wgbs_meth_tag) %>% summarise(CpGs =n()) > > Mcap_cov_perc_group_3x_RRBS_rel_sum_wgbs20x <- Mcap_cov_perc_group_3x_RRBS_rel_reshape_wgbs20x %>% group_by(RRBS_cov_tag, wgbs_meth_tag) %>% summarise(CpGs =n()) > > Mcap_cov_perc_group_3x_RRBS_rel_sum_wgbs30x <- Mcap_cov_perc_group_3x_RRBS_rel_reshape_wgbs30x %>% group_by(RRBS_cov_tag, wgbs_meth_tag) %>% summarise(CpGs =n()) > > Mcap_cov_perc_group_3x_RRBS_rel_sum_wgbs40x <- Mcap_cov_perc_group_3x_RRBS_rel_reshape_wgbs40x %>% group_by(RRBS_cov_tag, wgbs_meth_tag) %>% summarise(CpGs =n()) > > Mcap_cov_perc_group_3x_RRBS_rel_sum_wgbs50x <- Mcap_cov_perc_group_3x_RRBS_rel_reshape_wgbs50x %>% group_by(RRBS_cov_tag, wgbs_meth_tag) %>% summarise(CpGs =n()) > > Mcap_cov_perc_group_3x_RRBS_rel_sum_wgbs100x <- Mcap_cov_perc_group_3x_RRBS_rel_reshape_wgbs100x %>% group_by(RRBS_cov_tag, wgbs_meth_tag) %>% summarise(CpGs =n()) > > > #create a df with all subsets > Mcap_cov_perc_group_3x_RRBS_rel_sum_wgbs_cutoffs <- dplyr::bind_rows(list(wgbs_1x=Mcap_cov_perc_group_3x_RRBS_rel_sum, wgbs_5x=Mcap_cov_perc_group_3x_RRBS_rel_sum_wgbs5x, wgbs_10x= Mcap_cov_perc_group_3x_RRBS_rel_sum_wgbs10x, wgbs_20x= Mcap_cov_perc_group_3x_RRBS_rel_sum_wgbs20x,wgbs_30x= Mcap_cov_perc_group_3x_RRBS_rel_sum_wgbs30x,wgbs_40x= Mcap_cov_perc_group_3x_RRBS_rel_sum_wgbs40x,wgbs_50x= Mcap_cov_perc_group_3x_RRBS_rel_sum_wgbs50x, wgbs_100x= Mcap_cov_perc_group_3x_RRBS_rel_sum_wgbs100x), .id = "id") > > Mcap_cov_perc_group_3x_RRBS_rel_sum_wgbs_cutoffs <- Mcap_cov_perc_group_3x_RRBS_rel_sum_wgbs_cutoffs %>% group_by(id, wgbs_meth_tag) %>% mutate(perc_CpG = CpGs/sum(CpGs)*100) > > #make wgbs capitalized > Mcap_cov_perc_group_3x_RRBS_rel_sum_wgbs_cutoffs$id <- gsub("wgbs", "WGBS", Mcap_cov_perc_group_3x_RRBS_rel_sum_wgbs_cutoffs$id) > > #``` > > > #plot data > #```{r} > jpeg("~/strigg/analyses/20200910/RRBScov_x_WGBSmeth_heatmap_Pact_thresholds.jpg", width = 10, height = 5, units = "in", res = 300 ) > ggplot(data = Pact_cov_perc_group_3x_RRBS_rel_sum_wgbs_cutoffs, aes(x=wgbs_meth_tag, y=factor(RRBS_cov_tag, levels = c("[0]",cov_tags)), fill=log(CpGs,10))) + geom_tile() + theme(axis.text.x = element_text(size = 7, angle = 60, hjust = 1)) + labs(x = "% methylation\n(from WGBS data)", y = "number of reads\n(from RRBS data)", fill = "number of\nCpGs (log10)") + facet_wrap(~factor(id, levels = c("WGBS_1x","WGBS_5x","WGBS_10x","WGBS_20x","WGBS_30x","WGBS_40x","WGBS_50x","WGBS_100x")), nrow = 2) + scale_fill_continuous(high = "#132B43", low = "#b5dfff") > dev.off() null device 1 > > #Mcap > jpeg("~/strigg/analyses/20200910/RRBScov_x_WGBSmeth_heatmap_Mcap_thresholds.jpg", width = 10, height = 5, units = "in", res = 300 ) > ggplot(data = Mcap_cov_perc_group_3x_RRBS_rel_sum_wgbs_cutoffs, aes(x=wgbs_meth_tag, y=factor(RRBS_cov_tag, levels = c("[0]",cov_tags)), fill=log(CpGs,10))) + geom_tile() + theme(axis.text.x = element_text(size = 7, angle = 60, hjust = 1)) + labs(x = "% methylation\n(from WGBS data)", y = "number of reads\n(from RRBS data)", fill = "number of\nCpGs (log10)") + facet_wrap(~factor(id, levels = c("WGBS_1x","WGBS_5x","WGBS_10x","WGBS_20x","WGBS_30x","WGBS_40x","WGBS_50x","WGBS_100x")), nrow = 2) + scale_fill_continuous(high = "#132B43", low = "#b5dfff") > dev.off() null device 1 > > #d <- ggplot(data = Pact_cov_perc_group_3x_RRBS_rel_sum_wgbs_cutoffs, aes(x=wgbs_meth_tag, y=factor(RRBS_cov_tag, levels = c("[0]",cov_tags)), fill=log(CpGs,10))) + geom_tile() + theme(axis.text.x = element_text(size = 7, angle = 60, hjust = 1)) + labs(x = "% methylation\n(from WGBS data)", y = "number of reads\n(from RRBS data)", fill = "number of\nCpGs (log10)", title = "P. acuta") + facet_wrap(~factor(id, levels = c("WGBS_1x","WGBS_5x","WGBS_10x","WGBS_20x","WGBS_30x","WGBS_40x","WGBS_50x","WGBS_100x")), nrow = 2) + scale_fill_continuous(high = "#132B43", low = "#b5dfff") + theme(plot.title = element_text(face = "italic")) > > > #c <- ggplot(data = Mcap_cov_perc_group_3x_RRBS_rel_sum_wgbs_cutoffs, aes(x=wgbs_meth_tag, y=factor(RRBS_cov_tag, levels = c("[0]",cov_tags)), fill=log(CpGs,10))) + geom_tile() + theme(axis.text.x = element_text(size = 7, angle = 60, hjust = 1)) + labs(x = "% methylation\n(from WGBS data)", y = "number of reads\n(from RRBS data)", fill = "number of\nCpGs (log10)", title = "M. capitata") + facet_wrap(~factor(id, levels = c("WGBS_1x","WGBS_5x","WGBS_10x","WGBS_20x","WGBS_30x","WGBS_40x","WGBS_50x","WGBS_100x")), nrow = 2) + scale_fill_continuous(high = "#132B43", low = "#b5dfff") + theme(plot.title = element_text(face = "italic")) > > > #jpeg("~/strigg/analyses/20200910/RRBScov_x_WGBSmeth_heatmap_thresholds.jpg", width = 10, height = 5, units = "in", res = 300 ) > #ggarrange(c,d, nrow = 2,labels = "AUTO",common.legend = T) > #dev.off() > #``` > > #write out data underneath each plot > > #MBD vs WGBS data with coverage cutoffs > write.csv(Pact_cov_perc_group_3x_mbd_rel_sum_wgbs_cutoffs, "~/strigg/analyses/Pact_cov_perc_group_3x_mbd_rel_sum_wgbs_cutoffs.csv", row.names = F, quote = F) > write.csv(Mcap_cov_perc_group_3x_mbd_rel_sum_wgbs_cutoffs, "~/strigg/analyses/20200910/Mcap_cov_perc_group_3x_mbd_rel_sum_wgbs_cutoffs.csv", row.names = F, quote = F) > > #RRBS vs WGBS data with coverage cutoffs > write.csv(Pact_cov_perc_group_3x_RRBS_rel_sum_wgbs_cutoffs, "~/strigg/analyses/Pact_cov_perc_group_3x_RRBS_rel_sum_wgbs_cutoffs.csv", row.names = F, quote = F) > write.csv(Mcap_cov_perc_group_3x_RRBS_rel_sum_wgbs_cutoffs, "~/strigg/analyses/20200910/Mcap_cov_perc_group_3x_RRBS_rel_sum_wgbs_cutoffs.csv", row.names = F, quote = F) > > #all loci from different methods binned by cov and meth > write.csv(Pact_cov_perc_group_3x_sum, "~/strigg/analyses/Pact_cov_perc_group_3x_sum.csv", row.names = F, quote = F) > write.csv(Mcap_cov_perc_group_3x_sum, "~/strigg/analyses/20200910/Mcap_cov_perc_group_3x_sum", row.names = F, quote = F) > > proc.time() user system elapsed 60449.296 62.072 60556.757 Warning message: system call failed: Cannot allocate memory