Shelly Trigg
10/24/2018
mapEff <- data.frame(read.table("/Volumes/web/metacarcinus/Pgenerosa/20181011/EPI_mapping_dedup_summary_clean.txt", nrows = 52))
mapEff$V1 <- as.numeric(substr(as.character(unlist(mapEff$V1)),1,4))
colnames(mapEff)[1] <- "mapping_efficiency"
ggplot(mapEff, aes(mapping_efficiency)) + geom_histogram(color = "darkblue", fill = "lightblue") + xlab("mapping efficiency (%)") + ylab("number of samples")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
| sample no. | mapping efficiency (%) | duplicate reads | duplicated reads (%) |
|------------|------------------------|-----------------|----------------------|
| 103 | 65.9 | 98 | 1.49 |
| 104 | 65.5 | 81 | 1.24 |
| 111 | 64.4 | 68 | 1.06 |
| 113 | 65.8 | 131 | 2 |
| 119 | 64.0 | 83 | 1.3 |
| 120 | 65.1 | 73 | 1.13 |
| 127 | 65.8 | 77 | 1.17 |
| 128 | 65.6 | 89 | 1.36 |
| 135 | 63.3 | 104 | 1.65 |
| 136 | 62.3 | 97 | 1.56 |
| 143 | 63.1 | 53 | 0.84 |
| 145 | 64.1 | 93 | 1.45 |
| 151 | 60.2 | 28 | 0.47 |
| 152 | 57.6 | 41 | 0.72 |
| 153 | 61.3 | 66 | 1.08 |
| 154 | 56.8 | 33 | 0.59 |
| 159 | 62.4 | 78 | 1.26 |
| 160 | 56.6 | 80 | 1.42 |
| 161 | 52.9 | 42 | 0.8 |
| 162 | 54.7 | 55 | 1.01 |
| 167 | 63.5 | 34 | 0.54 |
| 168 | 60.8 | 58 | 0.96 |
| 169 | 59.1 | 40 | 0.68 |
| 170 | 55.4 | 69 | 1.25 |
| 175 | 62.5 | 58 | 0.93 |
| 176 | 56.4 | 59 | 1.05 |
| 181 | 60.9 | 78 | 1.29 |
| 182 | 62.2 | 110 | 1.78 |
| 184 | 61.7 | 72 | 1.18 |
| 185 | 61.5 | 73 | 1.2 |
| 187 | 64.0 | 64 | 1.01 |
| 188 | 56.7 | 53 | 0.94 |
| 193 | 57.6 | 51 | 0.89 |
| 194 | 63.0 | 34 | 0.54 |
| 199 | 58.4 | 25 | 0.43 |
| 200 | 59.6 | 38 | 0.64 |
| 205 | 55.6 | 52 | 0.94 |
| 206 | 60.9 | 33 | 0.54 |
| 208 | 56.6 | 83 | 1.47 |
| 209 | 61.5 | 47 | 0.77 |
| 214 | 58.6 | 61 | 1.05 |
| 215 | 60.6 | 43 | 0.71 |
| 220 | 62.1 | 66 | 1.07 |
| 221 | 60.5 | 38 | 0.63 |
| 226 | 63.5 | 70 | 1.11 |
| 227 | 63.1 | 38 | 0.6 |
| 229 | 61.2 | 40 | 0.66 |
| 230 | 55.1 | 46 | 0.84 |
| 41 | 59.2 | 31 | 0.53 |
| 42 | 61.8 | 22 | 0.36 |
| 43 | 62.1 | 15 | 0.24 |
| 44 | 61.6 | 17 | 0.28 |
Create file list for reading in Bismark alignments (deduplicated sorted bam files)
file.list=c(dir("/Volumes/web/metacarcinus/Pgenerosa/20181011/dedup_sorted_bams/",pattern = "_dedup.sorted.bam"))
file.list <- paste("/Volumes/web/metacarcinus/Pgenerosa/20181011/dedup_sorted_bams/", file.list, sep = "")
file.list <- as.list(file.list)
Read in treatment info for these samples
treatment <- read.csv("~/Documents/GitHub/Shelly_Pgenerosa/data/Treatment_info.csv", header = TRUE)
#subset dataframe to exclude lines with no info in them
treatment <- treatment[1:52,]
#create a list of lists (the format that methylkit expects) for sample ids
li <- list()
for (i in 1:length(treatment$sampleno))
li[[i]] = as.character(treatment$sampleno[i])
#order treatment data frame by treatment to be able to add replicate numbers to each sample
treatment <- treatment[order(treatment$treatment),]
#create replicate numbers (1-4) for each treatment (13)
treatment$replicate <- rep(1:4,13)
Read methylation calls from Bismark alignments
myobj <- processBismarkAln(location = file.list, sample.id = li,
assembly = "v3", read.context="CpG", mincov=3, treatment = treatment$treatment)
## Conversion Statistics:
##
## total otherC considered (>95% C+T): 1954
## average conversion rate = 99.115675504804
## total otherC considered (Forward) (>95% C+T): 752
## average conversion rate (Forward) = 99.113235019658
## total otherC considered (Reverse) (>95% C+T): 1202
## average conversion rate (Reverse) = 99.117202330786
##
## Reading methylation percentage per base for sample: 103
##
## Conversion Statistics:
##
## total otherC considered (>95% C+T): 1703
## average conversion rate = 99.091566662237
## total otherC considered (Forward) (>95% C+T): 763
## average conversion rate (Forward) = 98.99472328806
## total otherC considered (Reverse) (>95% C+T): 940
## average conversion rate (Reverse) = 99.170174635106
##
## Reading methylation percentage per base for sample: 104
##
## Conversion Statistics:
##
## total otherC considered (>95% C+T): 1595
## average conversion rate = 99.086660032761
## total otherC considered (Forward) (>95% C+T): 687
## average conversion rate (Forward) = 99.200731735507
## total otherC considered (Reverse) (>95% C+T): 908
## average conversion rate (Reverse) = 99.00035247793
##
## Reading methylation percentage per base for sample: 111
##
## Conversion Statistics:
##
## total otherC considered (>95% C+T): 2267
## average conversion rate = 98.850434859364
## total otherC considered (Forward) (>95% C+T): 972
## average conversion rate (Forward) = 98.841197814332
## total otherC considered (Reverse) (>95% C+T): 1295
## average conversion rate (Reverse) = 98.857367992778
##
## Reading methylation percentage per base for sample: 113
##
## Conversion Statistics:
##
## total otherC considered (>95% C+T): 2914
## average conversion rate = 98.978387844372
## total otherC considered (Forward) (>95% C+T): 1285
## average conversion rate (Forward) = 98.841458105731
## total otherC considered (Reverse) (>95% C+T): 1629
## average conversion rate (Reverse) = 99.08640178799
##
## Reading methylation percentage per base for sample: 119
##
## Conversion Statistics:
##
## total otherC considered (>95% C+T): 1718
## average conversion rate = 98.020708359873
## total otherC considered (Forward) (>95% C+T): 848
## average conversion rate (Forward) = 98.980440923466
## total otherC considered (Reverse) (>95% C+T): 870
## average conversion rate (Reverse) = 97.08524489559
##
## Reading methylation percentage per base for sample: 120
##
## Conversion Statistics:
##
## total otherC considered (>95% C+T): 1983
## average conversion rate = 98.126261541491
## total otherC considered (Forward) (>95% C+T): 826
## average conversion rate (Forward) = 98.019043980188
## total otherC considered (Reverse) (>95% C+T): 1157
## average conversion rate (Reverse) = 98.202805798739
##
## Reading methylation percentage per base for sample: 127
##
## Conversion Statistics:
##
## total otherC considered (>95% C+T): 2055
## average conversion rate = 98.502804893619
## total otherC considered (Forward) (>95% C+T): 961
## average conversion rate (Forward) = 98.036645247115
## total otherC considered (Reverse) (>95% C+T): 1094
## average conversion rate (Reverse) = 98.912292480722
##
## Reading methylation percentage per base for sample: 128
##
## Conversion Statistics:
##
## total otherC considered (>95% C+T): 2032
## average conversion rate = 98.02613412332
## total otherC considered (Forward) (>95% C+T): 966
## average conversion rate (Forward) = 98.900538659227
## total otherC considered (Reverse) (>95% C+T): 1066
## average conversion rate (Reverse) = 97.233756279337
##
## Reading methylation percentage per base for sample: 135
##
## Conversion Statistics:
##
## total otherC considered (>95% C+T): 2158
## average conversion rate = 98.725726636734
## total otherC considered (Forward) (>95% C+T): 1008
## average conversion rate (Forward) = 99.164488603183
## total otherC considered (Reverse) (>95% C+T): 1150
## average conversion rate (Reverse) = 98.341142234838
##
## Reading methylation percentage per base for sample: 136
##
## Conversion Statistics:
##
## total otherC considered (>95% C+T): 1506
## average conversion rate = 98.598065160132
## total otherC considered (Forward) (>95% C+T): 667
## average conversion rate (Forward) = 99.219811772782
## total otherC considered (Reverse) (>95% C+T): 839
## average conversion rate (Reverse) = 98.10378030836
##
## Reading methylation percentage per base for sample: 143
##
## Conversion Statistics:
##
## total otherC considered (>95% C+T): 2238
## average conversion rate = 97.567936602561
## total otherC considered (Forward) (>95% C+T): 1113
## average conversion rate (Forward) = 96.551351158002
## total otherC considered (Reverse) (>95% C+T): 1125
## average conversion rate (Reverse) = 98.573678469045
##
## Reading methylation percentage per base for sample: 145
##
## Conversion Statistics:
##
## total otherC considered (>95% C+T): 2406
## average conversion rate = 96.836212382661
## total otherC considered (Forward) (>95% C+T): 984
## average conversion rate (Forward) = 98.584338167471
## total otherC considered (Reverse) (>95% C+T): 1422
## average conversion rate (Reverse) = 95.626538843805
##
## Reading methylation percentage per base for sample: 151
##
## Conversion Statistics:
##
## total otherC considered (>95% C+T): 1905
## average conversion rate = 98.065491486328
## total otherC considered (Forward) (>95% C+T): 837
## average conversion rate (Forward) = 97.00575823777
## total otherC considered (Reverse) (>95% C+T): 1068
## average conversion rate (Reverse) = 98.896012768204
##
## Reading methylation percentage per base for sample: 152
##
## Conversion Statistics:
##
## total otherC considered (>95% C+T): 2355
## average conversion rate = 98.554719980527
## total otherC considered (Forward) (>95% C+T): 835
## average conversion rate (Forward) = 98.555072329074
## total otherC considered (Reverse) (>95% C+T): 1520
## average conversion rate (Reverse) = 98.554526420634
##
## Reading methylation percentage per base for sample: 153
##
## Conversion Statistics:
##
## total otherC considered (>95% C+T): 1733
## average conversion rate = 98.872217074589
## total otherC considered (Forward) (>95% C+T): 756
## average conversion rate (Forward) = 98.633383971892
## total otherC considered (Reverse) (>95% C+T): 977
## average conversion rate (Reverse) = 99.057025493871
##
## Reading methylation percentage per base for sample: 154
##
## Conversion Statistics:
##
## total otherC considered (>95% C+T): 2020
## average conversion rate = 98.269743664748
## total otherC considered (Forward) (>95% C+T): 824
## average conversion rate (Forward) = 99.088496247838
## total otherC considered (Reverse) (>95% C+T): 1196
## average conversion rate (Reverse) = 97.705653256332
##
## Reading methylation percentage per base for sample: 159
##
## Conversion Statistics:
##
## total otherC considered (>95% C+T): 2204
## average conversion rate = 98.045374731075
## total otherC considered (Forward) (>95% C+T): 950
## average conversion rate (Forward) = 98.442836003677
## total otherC considered (Reverse) (>95% C+T): 1254
## average conversion rate (Reverse) = 97.744267706377
##
## Reading methylation percentage per base for sample: 160
##
## Conversion Statistics:
##
## total otherC considered (>95% C+T): 1994
## average conversion rate = 98.815982874117
## total otherC considered (Forward) (>95% C+T): 892
## average conversion rate (Forward) = 99.045457432259
## total otherC considered (Reverse) (>95% C+T): 1102
## average conversion rate (Reverse) = 98.63023758749
##
## Reading methylation percentage per base for sample: 161
##
## Conversion Statistics:
##
## total otherC considered (>95% C+T): 1856
## average conversion rate = 94.625681320783
## total otherC considered (Forward) (>95% C+T): 815
## average conversion rate (Forward) = 93.347255899684
## total otherC considered (Reverse) (>95% C+T): 1041
## average conversion rate (Reverse) = 95.626561933844
##
## Reading methylation percentage per base for sample: 162
##
## Conversion Statistics:
##
## total otherC considered (>95% C+T): 2174
## average conversion rate = 95.866101137019
## total otherC considered (Forward) (>95% C+T): 994
## average conversion rate (Forward) = 96.915505874158
## total otherC considered (Reverse) (>95% C+T): 1180
## average conversion rate (Reverse) = 94.982111044887
##
## Reading methylation percentage per base for sample: 167
##
## Conversion Statistics:
##
## total otherC considered (>95% C+T): 2546
## average conversion rate = 97.301169465597
## total otherC considered (Forward) (>95% C+T): 1160
## average conversion rate (Forward) = 95.574572402796
## total otherC considered (Reverse) (>95% C+T): 1386
## average conversion rate (Reverse) = 98.746229056397
##
## Reading methylation percentage per base for sample: 168
##
## Conversion Statistics:
##
## total otherC considered (>95% C+T): 2170
## average conversion rate = 98.672235327817
## total otherC considered (Forward) (>95% C+T): 922
## average conversion rate (Forward) = 98.506674784049
## total otherC considered (Reverse) (>95% C+T): 1248
## average conversion rate (Reverse) = 98.794548485954
##
## Reading methylation percentage per base for sample: 169
##
## Conversion Statistics:
##
## total otherC considered (>95% C+T): 2448
## average conversion rate = 98.557103975994
## total otherC considered (Forward) (>95% C+T): 1042
## average conversion rate (Forward) = 98.464589796715
## total otherC considered (Reverse) (>95% C+T): 1406
## average conversion rate (Reverse) = 98.625667115972
##
## Reading methylation percentage per base for sample: 170
##
## Conversion Statistics:
##
## total otherC considered (>95% C+T): 2114
## average conversion rate = 98.278143901507
## total otherC considered (Forward) (>95% C+T): 869
## average conversion rate (Forward) = 97.736472459386
## total otherC considered (Reverse) (>95% C+T): 1245
## average conversion rate (Reverse) = 98.656226217332
##
## Reading methylation percentage per base for sample: 175
##
## Conversion Statistics:
##
## total otherC considered (>95% C+T): 2421
## average conversion rate = 98.487245916148
## total otherC considered (Forward) (>95% C+T): 1023
## average conversion rate (Forward) = 98.957631397892
## total otherC considered (Reverse) (>95% C+T): 1398
## average conversion rate (Reverse) = 98.143036797533
##
## Reading methylation percentage per base for sample: 176
##
## Conversion Statistics:
##
## total otherC considered (>95% C+T): 2947
## average conversion rate = 96.304897656732
## total otherC considered (Forward) (>95% C+T): 1293
## average conversion rate (Forward) = 96.255046634379
## total otherC considered (Reverse) (>95% C+T): 1654
## average conversion rate (Reverse) = 96.343868256431
##
## Reading methylation percentage per base for sample: 181
##
## Conversion Statistics:
##
## total otherC considered (>95% C+T): 2290
## average conversion rate = 97.485583674046
## total otherC considered (Forward) (>95% C+T): 1007
## average conversion rate (Forward) = 98.838449523138
## total otherC considered (Reverse) (>95% C+T): 1283
## average conversion rate (Reverse) = 96.423747423043
##
## Reading methylation percentage per base for sample: 182
##
## Conversion Statistics:
##
## total otherC considered (>95% C+T): 2183
## average conversion rate = 96.82593058379
## total otherC considered (Forward) (>95% C+T): 997
## average conversion rate (Forward) = 95.693775147159
## total otherC considered (Reverse) (>95% C+T): 1186
## average conversion rate (Reverse) = 97.777666646456
##
## Reading methylation percentage per base for sample: 184
##
## Conversion Statistics:
##
## total otherC considered (>95% C+T): 3174
## average conversion rate = 93.266977189024
## total otherC considered (Forward) (>95% C+T): 1343
## average conversion rate (Forward) = 98.531398150215
## total otherC considered (Reverse) (>95% C+T): 1831
## average conversion rate (Reverse) = 89.405635107714
##
## Reading methylation percentage per base for sample: 185
##
## Conversion Statistics:
##
## total otherC considered (>95% C+T): 2770
## average conversion rate = 95.302060697731
## total otherC considered (Forward) (>95% C+T): 1226
## average conversion rate (Forward) = 96.716880050415
## total otherC considered (Reverse) (>95% C+T): 1544
## average conversion rate (Reverse) = 94.178635486338
##
## Reading methylation percentage per base for sample: 187
##
## Conversion Statistics:
##
## total otherC considered (>95% C+T): 2132
## average conversion rate = 98.559533232719
## total otherC considered (Forward) (>95% C+T): 910
## average conversion rate (Forward) = 98.415417485601
## total otherC considered (Reverse) (>95% C+T): 1222
## average conversion rate (Reverse) = 98.666853469935
##
## Reading methylation percentage per base for sample: 188
##
## Conversion Statistics:
##
## total otherC considered (>95% C+T): 1746
## average conversion rate = 98.835485936265
## total otherC considered (Forward) (>95% C+T): 755
## average conversion rate (Forward) = 98.67585416726
## total otherC considered (Reverse) (>95% C+T): 991
## average conversion rate (Reverse) = 98.957102470673
##
## Reading methylation percentage per base for sample: 193
##
## Conversion Statistics:
##
## total otherC considered (>95% C+T): 2158
## average conversion rate = 94.605942688871
## total otherC considered (Forward) (>95% C+T): 907
## average conversion rate (Forward) = 98.287279643887
## total otherC considered (Reverse) (>95% C+T): 1251
## average conversion rate (Reverse) = 91.9368998286
##
## Reading methylation percentage per base for sample: 194
##
## Conversion Statistics:
##
## total otherC considered (>95% C+T): 1951
## average conversion rate = 94.967529660038
## total otherC considered (Forward) (>95% C+T): 768
## average conversion rate (Forward) = 94.754508429213
## total otherC considered (Reverse) (>95% C+T): 1183
## average conversion rate (Reverse) = 95.105822394843
##
## Reading methylation percentage per base for sample: 199
##
## Conversion Statistics:
##
## total otherC considered (>95% C+T): 1501
## average conversion rate = 97.765649072761
## total otherC considered (Forward) (>95% C+T): 715
## average conversion rate (Forward) = 97.710956535398
## total otherC considered (Reverse) (>95% C+T): 786
## average conversion rate (Reverse) = 97.815401190083
##
## Reading methylation percentage per base for sample: 200
##
## Conversion Statistics:
##
## total otherC considered (>95% C+T): 1914
## average conversion rate = 96.815235366206
## total otherC considered (Forward) (>95% C+T): 715
## average conversion rate (Forward) = 95.359398142408
## total otherC considered (Reverse) (>95% C+T): 1199
## average conversion rate (Reverse) = 97.683395178563
##
## Reading methylation percentage per base for sample: 205
##
## Conversion Statistics:
##
## total otherC considered (>95% C+T): 2816
## average conversion rate = 97.932354438698
## total otherC considered (Forward) (>95% C+T): 1181
## average conversion rate (Forward) = 99.012537835168
## total otherC considered (Reverse) (>95% C+T): 1635
## average conversion rate (Reverse) = 97.152111875254
##
## Reading methylation percentage per base for sample: 206
##
## Conversion Statistics:
##
## total otherC considered (>95% C+T): 1728
## average conversion rate = 98.657308187001
## total otherC considered (Forward) (>95% C+T): 841
## average conversion rate (Forward) = 98.053308039528
## total otherC considered (Reverse) (>95% C+T): 887
## average conversion rate (Reverse) = 99.229984764256
##
## Reading methylation percentage per base for sample: 208
##
## Conversion Statistics:
##
## total otherC considered (>95% C+T): 2644
## average conversion rate = 98.608436378432
## total otherC considered (Forward) (>95% C+T): 1243
## average conversion rate (Forward) = 98.825228681932
## total otherC considered (Reverse) (>95% C+T): 1401
## average conversion rate (Reverse) = 98.416093171258
##
## Reading methylation percentage per base for sample: 209
##
## Conversion Statistics:
##
## total otherC considered (>95% C+T): 2018
## average conversion rate = 96.73525374762
## total otherC considered (Forward) (>95% C+T): 836
## average conversion rate (Forward) = 98.818270543169
## total otherC considered (Reverse) (>95% C+T): 1182
## average conversion rate (Reverse) = 95.26198636938
##
## Reading methylation percentage per base for sample: 214
##
## Conversion Statistics:
##
## total otherC considered (>95% C+T): 2092
## average conversion rate = 98.69135786153
## total otherC considered (Forward) (>95% C+T): 948
## average conversion rate (Forward) = 98.787396329275
## total otherC considered (Reverse) (>95% C+T): 1144
## average conversion rate (Reverse) = 98.611773536861
##
## Reading methylation percentage per base for sample: 215
##
## Conversion Statistics:
##
## total otherC considered (>95% C+T): 2496
## average conversion rate = 95.565299518454
## total otherC considered (Forward) (>95% C+T): 1127
## average conversion rate (Forward) = 96.169218530431
## total otherC considered (Reverse) (>95% C+T): 1369
## average conversion rate (Reverse) = 95.068136095154
##
## Reading methylation percentage per base for sample: 220
##
## Conversion Statistics:
##
## total otherC considered (>95% C+T): 1940
## average conversion rate = 95.433194504479
## total otherC considered (Forward) (>95% C+T): 804
## average conversion rate (Forward) = 93.748299254092
## total otherC considered (Reverse) (>95% C+T): 1136
## average conversion rate (Reverse) = 96.62567318521
##
## Reading methylation percentage per base for sample: 221
##
## Conversion Statistics:
##
## total otherC considered (>95% C+T): 2608
## average conversion rate = 93.728350745146
## total otherC considered (Forward) (>95% C+T): 1012
## average conversion rate (Forward) = 96.01891376117
## total otherC considered (Reverse) (>95% C+T): 1596
## average conversion rate (Reverse) = 92.275938607166
##
## Reading methylation percentage per base for sample: 226
##
## Conversion Statistics:
##
## total otherC considered (>95% C+T): 2085
## average conversion rate = 95.675277667106
## total otherC considered (Forward) (>95% C+T): 1016
## average conversion rate (Forward) = 98.600927782684
## total otherC considered (Reverse) (>95% C+T): 1069
## average conversion rate (Reverse) = 92.894678492712
##
## Reading methylation percentage per base for sample: 227
##
## Conversion Statistics:
##
## total otherC considered (>95% C+T): 2704
## average conversion rate = 98.683195902874
## total otherC considered (Forward) (>95% C+T): 1149
## average conversion rate (Forward) = 98.616017473235
## total otherC considered (Reverse) (>95% C+T): 1555
## average conversion rate (Reverse) = 98.732834498151
##
## Reading methylation percentage per base for sample: 229
##
## Conversion Statistics:
##
## total otherC considered (>95% C+T): 1969
## average conversion rate = 98.439037932237
## total otherC considered (Forward) (>95% C+T): 940
## average conversion rate (Forward) = 98.56021329913
## total otherC considered (Reverse) (>95% C+T): 1029
## average conversion rate (Reverse) = 98.328343233617
##
## Reading methylation percentage per base for sample: 230
##
## Conversion Statistics:
##
## total otherC considered (>95% C+T): 1562
## average conversion rate = 97.586185203713
## total otherC considered (Forward) (>95% C+T): 706
## average conversion rate (Forward) = 98.463026224959
## total otherC considered (Reverse) (>95% C+T): 856
## average conversion rate (Reverse) = 96.862996230583
##
## Reading methylation percentage per base for sample: 41
##
## Conversion Statistics:
##
## total otherC considered (>95% C+T): 1366
## average conversion rate = 95.234970693346
## total otherC considered (Forward) (>95% C+T): 656
## average conversion rate (Forward) = 95.542574551862
## total otherC considered (Reverse) (>95% C+T): 710
## average conversion rate (Reverse) = 94.950762057872
##
## Reading methylation percentage per base for sample: 42
##
## Conversion Statistics:
##
## total otherC considered (>95% C+T): 1606
## average conversion rate = 88.82252879908
## total otherC considered (Forward) (>95% C+T): 625
## average conversion rate (Forward) = 95.260778704525
## total otherC considered (Reverse) (>95% C+T): 981
## average conversion rate (Reverse) = 84.720687625886
##
## Reading methylation percentage per base for sample: 43
##
## Conversion Statistics:
##
## total otherC considered (>95% C+T): 1169
## average conversion rate = 92.933788243882
## total otherC considered (Forward) (>95% C+T): 515
## average conversion rate (Forward) = 98.640909791395
## total otherC considered (Reverse) (>95% C+T): 654
## average conversion rate (Reverse) = 88.439648187355
##
## Reading methylation percentage per base for sample: 44
calculate region coverage
tiles <- tileMethylCounts(myobj,win.size=1000,step.size=1000)
find regions covered by all samples (This finds regions that are covered by two or more samples I believe, because the covereage has NAs for some samples.)
mmeth <- unite(tiles, min.per.group = 1L)
#chromosome regions covered by at least two samples
nrow(getData(mmeth))
## [1] 39
#chromosome regions that are covered by all samples
d_noNAs <-getData(mmeth)[complete.cases(getData(mmeth)),]
nrow(d_noNAs)
## [1] 11
Make a data frame with unqiue chromosome positions, sample number, and coverage to plots coverage for each sample
#subset mmeth data for genome locations and sample coverage of each location
d <- getData(mmeth)[,c(1:2,grep("coverage", colnames(getData(mmeth))))]
#make genome region names easier to read; remove all characters downstream '__'
d$chr <- sub("__.*","",d$chr)
#remove 'PGA_scaffold' from genome region names, leaving only the scaffold number
d$chr <- sub("PGA_scaffold","",d$chr)
#add position info to the genome region names and convert to numeric for continuous plotting
d$chr <- as.numeric(paste(d$chr,d$start, sep ="."))
#load tidyr library to reshape data for plotting; can't load this in the beginning because it interfers with methylkit
library(tidyr)
## Warning: package 'tidyr' was built under R version 3.4.4
##
## Attaching package: 'tidyr'
## The following object is masked from 'package:methylKit':
##
## unite
## The following object is masked from 'package:S4Vectors':
##
## expand
#reshape data from wide to long for plotting
d <- gather(d, order, coverage, 3:ncol(d))
#remove 'coverage' from order column so only numbers remain
d$order <- sub("coverage","",d$order)
#merge treatment info with coverage data for plotting
d <- merge(d, treatment[,5:9])
#replace NAs with zeros
d[is.na(d)] <- 0
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
plot % methylation per region facetted by treatment and sample
#make data frame with all meth info (source code: https://github.com/al2na/methylKit/blob/master/R/methylDBFunctions.R)
all_tiles <- data.frame()
for (i in 1:length(tiles)){
temp <- getData(tiles[[i]])[,c(1,2,5,6)]
temp$chr <- sub("__.*","",temp$chr)
temp$chr <- sub("PGA_scaffold","",temp$chr)
temp$chr <- paste(temp$chr,temp$start, sep =".")
temp$order <- i
temp$percentMeth <- 100*temp$numCs/temp$coverage
all_tiles <- rbind(all_tiles, temp)
}
all_tiles <- merge(all_tiles, treatment, by = "order")
individual plots of % methylation per region
individual plots of read coverage per region