Ran MACAU in a Jupyter Notebook. See 05-MACAU.ipynb
In this notebook I assess patterns in methylated loci that MACAU identified as associated with the following, while also controlling for relatedness:
See my Jupyter notebook on these runs: code/05-MACAU.ipynb
list.of.packages <- c("vegan", "dplyr", "factoextra", "FactoMineR", "tidyverse", "tibble", "reshape2", "gplots", "qvalue", "cluster", "here", "gridExtra", "RColorBrewer") #add new libraries here
new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
if(length(new.packages)) install.packages(new.packages)
# Load all libraries
lapply(list.of.packages, FUN = function(X) {
do.call("require", list(X))
})
Use dataframes that have the siteID, and original contig, start #, and end # info
head(counts.filtered.meth <- read.table(file = here::here("analyses", "macau", "counts-filtered-meth.txt"), sep = "\t", header=T, stringsAsFactors = F))
## siteID numCs1 numCs2 numCs3 numCs4 numCs5 numCs6 numCs7 numCs8 numCs9
## 1 Contig0_39226 11 17 10 14 9 13 9 10 18
## 2 Contig0_39234 10 7 10 5 12 11 7 8 8
## 3 Contig0_64179 9 10 8 20 8 13 5 11 9
## 4 Contig0_71523 16 12 13 16 11 8 11 10 15
## 5 Contig0_71533 21 17 17 17 21 13 17 20 20
## 6 Contig0_71542 21 22 16 14 15 12 28 13 18
## numCs10 numCs11 numCs12 numCs13 numCs14 numCs15 numCs16 numCs17 numCs18
## 1 9 4 15 10 5 8 8 8 8
## 2 7 1 10 8 5 12 5 12 10
## 3 3 4 14 8 18 17 10 10 7
## 4 19 17 13 24 18 28 18 17 18
## 5 27 22 17 32 23 33 22 23 22
## 6 21 19 12 35 25 32 21 16 21
head(counts.filtered.tot <- read.table(file = here::here("analyses", "macau", "counts-filtered-total.txt"), sep = "\t", header=T, stringsAsFactors = F))
## siteID coverage1 coverage2 coverage3 coverage4 coverage5 coverage6
## 1 Contig0_39226 21 22 22 23 23 21
## 2 Contig0_39234 24 27 27 26 28 25
## 3 Contig0_64179 10 14 9 21 11 13
## 4 Contig0_71523 16 12 13 18 13 8
## 5 Contig0_71533 21 17 18 17 21 13
## 6 Contig0_71542 23 23 16 14 15 12
## coverage7 coverage8 coverage9 coverage10 coverage11 coverage12 coverage13
## 1 20 16 31 17 5 23 17
## 2 23 24 36 19 7 29 19
## 3 13 12 15 5 5 19 11
## 4 16 10 16 23 18 14 28
## 5 23 20 21 27 22 21 33
## 6 28 14 18 21 19 12 35
## coverage14 coverage15 coverage16 coverage17 coverage18
## 1 10 20 13 19 24
## 2 12 25 17 23 32
## 3 24 19 10 10 15
## 4 21 33 21 20 18
## 5 24 36 23 23 23
## 6 26 33 23 17 22
unite()
function in methylkit)MACAU.results.length <- read.table(file=here::here("analyses","macau","output","20200205-macau.assoc.txt"), sep = "\t", stringsAsFactors = F)
colnames(MACAU.results.length) <- MACAU.results.length[1,]
MACAU.results.length <- MACAU.results.length[-1, ]
MACAU.results.length$pvalue <- as.numeric(MACAU.results.length$pvalue)
hist(MACAU.results.length$pvalue)
length(MACAU.results.length$id)
## [1] 33284
length(counts.filtered.tot$siteID)
## [1] 33284
summary(MACAU.results.length$id == counts.filtered.tot$siteID) #confirm that macau results and original destranded dataframe are in same order
## Mode TRUE
## logical 33284
MACAU.results.length.c <- cbind(counts.filtered.tot[,1],MACAU.results.length) %>%
separate(`counts.filtered.tot[, 1]`, into = c("contig", "locus"), sep = "_")
write.table(MACAU.results.length.c, file = here::here("analyses", "macau", "MACAU.results.length.c.tab"), sep = "\t", row.names = FALSE, quote = FALSE)
From Mac’s 2019 salmon paper, doi:10.3390/genes10050356
“Multiple hypothesis testing was performed on P-values extracted from the MACAU output for each CG site using the false discovery rate (FDR) approach used in the R package qvalue [45]. We considered a CG site to be differentially methylated (i.e., differentially methylated cytosine (DMC) if it passed a 10% FDR threshold, consistent with [46].”
hist(MACAU.results.length.c$pvalue) # p-value distribution looks good (i.e. not u-shaped)
MACAU.results.length <- qvalue(p = MACAU.results.length.c$pvalue, fdr.level = 0.1) # create qvalue object, setting FDR=10%
summary(MACAU.results.length)
##
## Call:
## qvalue(p = MACAU.results.length.c$pvalue, fdr.level = 0.1)
##
## pi0: 0.9899386
##
## Cumulative number of significant calls:
##
## <1e-04 <0.001 <0.01 <0.025 <0.05 <0.1 <1
## p-value 25 100 492 1049 1866 3523 33284
## q-value 0 0 6 9 16 20 33284
## local FDR 0 0 1 6 8 12 3187
100*(1- MACAU.results.length$pi0) #% of all loci in df as truely sign.
## [1] 1.006138
hist(MACAU.results.length)
#if one wants to estimate the false discovery rate when calling all p-values ≤ 0.01 significant:
max(MACAU.results.length$qvalues[MACAU.results.length$pvalues <= 0.01])
## [1] 0.6694966
# The main purpose of the upper-left plot is to gauge the reliability of the π0 estimate, where the estimated π0 is plotted versus the tuning parameter λ. The remaining plots show how many tests are significant, as well as how many false positives to expect for each q-value cut-off.
plot(MACAU.results.length)
# Given our FDR=10% level, how many are significant?
summary(MACAU.results.length$significant) #20 sign. (# TRUE)
## Mode FALSE TRUE
## logical 33264 20
# Add qvalue results to macau df
macau.FDR.length <- cbind(MACAU.results.length.c, do.call(cbind.data.frame, MACAU.results.length[c("qvalues", "pvalues", "significant")]))
#plot macau p-values against pvalues in the qvalue results - should all fall on 1:1 line (double check that pvalues are in same order)
plot(macau.FDR.length$pvalue, macau.FDR.length$pvalues) # looks good!
save(macau.FDR.length, file="../analyses/macau/R-objects/macau.FDR.length")
head(macau.sign.length <- subset(macau.FDR.length, significant=="TRUE"))
## contig locus id n acpt_rate beta se_beta
## 1156 Contig109515 3402 Contig109515_3402 18 2.193e-01 -4.060e-01 7.741e-02
## 1901 Contig120547 984 Contig120547_984 18 3.543e-01 -2.070e-01 5.073e-02
## 3092 Contig138707 4503 Contig138707_4503 18 3.151e-01 4.395e-01 9.365e-02
## 3226 Contig141213 6476 Contig141213_6476 18 2.867e-01 1.034e+00 2.158e-01
## 6984 Contig19176 11627 Contig19176_11627 18 2.846e-01 -4.085e-01 9.930e-02
## 7514 Contig19628 579 Contig19628_579 18 2.665e-01 -3.676e-01 8.330e-02
## pvalue h se_h sigma2 se_sigma2 alpha0 se_alpha0
## 1156 1.569e-07 7.989e-01 2.186e-01 1.507e+01 1.999e+01 6.733e+00 1.606e+00
## 1901 4.517e-05 3.465e-01 1.817e-01 7.993e-01 7.939e-01 4.128e+00 9.196e-01
## 3092 2.691e-06 6.449e-01 2.903e-01 5.659e+00 1.150e+01 -3.239e+00 1.609e+00
## 3226 1.641e-06 6.892e-01 2.840e-01 1.143e+01 1.344e+01 -1.180e+01 3.283e+00
## 6984 3.897e-05 4.977e-01 2.528e-01 6.705e+00 1.174e+01 5.743e+00 1.743e+00
## 7514 1.017e-05 7.608e-01 2.218e-01 1.241e+01 2.019e+01 7.671e+00 2.011e+00
## alpha1 se_alpha1 qvalues pvalues significant
## 1156 3.135e+00 4.608e-01 0.005169716 1.569e-07 TRUE
## 1901 3.960e-02 2.050e-01 0.074415581 4.517e-05 TRUE
## 3092 -1.114e+00 4.003e-01 0.012666582 2.691e-06 TRUE
## 3226 -2.244e+00 6.419e-01 0.009796871 1.641e-06 TRUE
## 6984 2.971e+00 5.400e-01 0.071334839 3.897e-05 TRUE
## 7514 2.339e+00 5.564e-01 0.030462957 1.017e-05 TRUE
nrow(macau.sign.length) #number of significant loci = 20
## [1] 20
Merge macau results with count dataframe, save to file. These constitute master dataframes
macau.sign.length.meth.counts <- merge(x=macau.sign.length, y=subset(counts.filtered.meth, siteID %in% macau.sign.length$id), by.x="id", by.y="siteID")
macau.sign.length.tot.counts <- merge(x=macau.sign.length, y=subset(counts.filtered.tot, siteID %in% macau.sign.length$id), by.x="id", by.y="siteID")
write.table(macau.sign.length.meth.counts, file = here::here("analyses", "macau", "combined-macau-FDR-meth-counts.length.txt"), sep = "\t", row.names = FALSE, quote = FALSE)
write.table(macau.sign.length.tot.counts, file = here::here("analyses", "macau", "combined-macau-FDR-tot-counts.length.txt"), sep = "\t", row.names = FALSE, quote = FALSE)
macau.sign.length.tot.counts.na <- macau.sign.length.tot.counts %>%
mutate_at(vars(contains('coverage')), funs(ifelse(. < 5, NA, .)))
## Warning: funs() is soft deprecated as of dplyr 0.8.0
## Please use a list of either functions or lambdas:
##
## # Simple named list:
## list(mean = mean, median = median)
##
## # Auto named with `tibble::lst()`:
## tibble::lst(mean, median)
##
## # Using lambdas
## list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
## This warning is displayed once per session.
macau.sign.length.perc.meth <- macau.sign.length.meth.counts
macau.sign.length.perc.meth[,20:37] <- 100*(macau.sign.length.meth.counts[,20:37]/macau.sign.length.tot.counts.na[,20:37])
size <- read.csv(file = here::here("data","mbd_size.csv"), header=T, sep = "\t")
key <- read.csv(file=here::here("data","sample-key.csv"))
size.macau <- cbind(x=c(rep(1,times=18)), y=merge(y=size, x=key, by.y="Sample", by.x="SAMPLE")) %>%
mutate(population=as.factor(c(rep("HC", times=9), rep("SS", times=9)))) %>%
mutate(sample=paste("numCs", y.MBD.FILENAME, sep="")) %>%
arrange(y.Length)
mycols <- c("firebrick3", "dodgerblue3")
tree.order <- paste("numCs",c(15,17,12,10,11,13,18,14,16,8,1,6,9,2,4,5,3,7), sep="")
size.order <- paste("numCs",c(size.macau[order(size.macau$y.Length),"y.MBD.FILENAME"]), sep="")
pop.colors.tree <- c(rep("dodgerblue3", times=9), rep("firebrick3", times=9))
pop.colors.length <- c("dodgerblue3", rep("firebrick3", times=7), rep("dodgerblue3", times=3), "firebrick3","dodgerblue3","firebrick3",rep("dodgerblue3", times=4))
macau.sign.length.perc.meth <- macau.sign.length.perc.meth %>%
mutate(mean.HC = rowMeans(macau.sign.length.perc.meth[,20:28])) %>%
mutate(mean.SS = rowMeans(macau.sign.length.perc.meth[,29:37])) %>%
mutate(HC.SS.ratio = mean.HC/mean.SS) %>%
arrange(HC.SS.ratio)
rownames(macau.sign.length.perc.meth) <- macau.sign.length.perc.meth[,1]
save(macau.sign.length.perc.meth, file="../analyses/macau/R-objects/macau.sign.length.perc.meth")
save(size.macau, file="../analyses/macau/R-objects/size.macau")
par(cex.main=.8)
heatmap.2(as.matrix(macau.sign.length.perc.meth[, grep(c("numCs"), colnames(macau.sign.length.perc.meth))]),
main = "MACAU id'd % methylated\n5x cov\nOrdered by cluster analysis",
xlab = NA, ylab = NA,
hclustfun = function(x) hclust(x, method="ward.D"), distfunc <- function(x) daisy(x,metric="correlation"), trace="none", dendrogram = "column", na.color = "white",
col = colorRampPalette(brewer.pal(4,"YlGnBu")), colCol = c(rep("firebrick3", times=9), rep("dodgerblue3", times=9)), cexRow = 0.6)
## Warning in is.na(Rowv): is.na() applied to non-(list or vector) of type
## 'closure'
par(cex.main=.8)
heatmap.2(as.matrix(macau.sign.length.perc.meth[, grep(c("numCs"), colnames(macau.sign.length.perc.meth))][tree.order]),
main = "MACAU id'd % methylated\ngray=<5x cov\nOrdered by MethylKit tree",
xlab = NA,
na.color = "white",trace="none", dendrogram = "none", Rowv=FALSE, Colv = FALSE,
col = colorRampPalette(brewer.pal(4,"YlGnBu")), colCol = pop.colors.tree, cexRow = 0.6)
# par(cex.main=.8)
# heatmap.2(as.matrix(macau.sign.length.perc.meth[, grep(c("numCs"), colnames(macau.sign.length.perc.meth))][size.order]),
# main = "MACAU id'd % methylation\nFDR=10%\nOrdered by Length",
# xlab = NA, ylab = NA,
# trace="none", dendrogram = "none", Rowv=FALSE, Colv = FALSE,
# col = colorRampPalette(brewer.pal(4,"YlGnBu")), colCol = pop.colors.length, cexRow = 0.6)
#ggplotly(
as.matrix(macau.sign.length.perc.meth[, grep(c("numCs"), colnames(macau.sign.length.perc.meth))][size.order]) %>%
melt(value.name = "perc.meth", varnames = c("locus", "sample")) %>%
ggplot(aes(sample, locus, fill= perc.meth)) + xlab("Sample") + geom_tile(na.rm = T) +
scale_x_discrete(breaks=colnames(as.matrix(macau.sign.length.perc.meth[, grep(c("numCs"), colnames(macau.sign.length.perc.meth))][size.order])), labels=c("10","1","7","4","6","8","2","9","11","13","14","3","15","5","17","18","12","16")) +
#scale_fill_viridis(discrete=FALSE)
theme(axis.title.y=element_blank(),
axis.text.y=element_blank(),
axis.ticks.y=element_blank()) +
ggtitle("Heatmap of Methylated Loci Associated with Oyster Size") +
ylab("Loci") +
scale_fill_distiller(name = "% Methylation", palette = "YlGnBu", direction = 1, na.value="white")
#scale_fill_gradient(low="gray5", high="white") #)
#scale_fill_gradient(low="gray5", high="white")
par(oma=c(0,0,5,0))
barplot(size.macau[order(size.macau$y.Length),]$y.Length, names.arg = size.macau[order(size.macau$y.Length),]$y.MBD.FILENAME, col = mycols[size.macau[order(size.macau$y.Length),]$pop], cex.names = 0.8, main = "Shell Length (mm)", las=2, space=1) #could add this to cut yaxis off at 10mm: (ylim=c(10,40), xpd = FALSE)
abline(h = 0, col = "black", lwd = 1)
legend("topleft",
pch = c(16, 17),
legend = c("Hood Canal", "South Sound"),
col = c(c("firebrick3", "dodgerblue3")),
cex = 1.2, bty = "n") #Add a legend with information about ambient and elevated samples
mtext(side = 1, text = "Sample", line = 2.25, cex = 1) #Add x-axis label
macau.sign.length.perc.meth %>%
mutate(end = locus) %>%
rename(start = locus) %>%
dplyr::select(c("contig", "start", "end")) %>%
write_delim(here::here("analyses","macau", "macau.sign.length.perc.meth.bed"), delim = '\t', col_names = FALSE)
# All loci fed into/from macau
MACAU.results.length.c %>%
mutate(end = locus) %>%
rename(start = locus) %>%
dplyr::select(c("contig", "start", "end")) %>%
write_delim(here::here("analyses","macau", "macau-all-loci.bed"), delim = '\t', col_names = FALSE)
# All loci fed into/from macau with q-values
macau.FDR.length %>%
write_delim(here::here("analyses","macau", "macau-all-loci_qvalues.tab"), delim = '\t', col_names = FALSE)
load("../analyses/DMLs/R-objects/dml25")
macau.sign.length %>%
mutate_at(vars(locus), funs(as.integer)) %>%
dplyr::filter(contig %in% dml25$chr, locus %in% c(dml25$start+1))
## contig locus id n acpt_rate beta se_beta pvalue
## 1 Contig67368 5048 Contig67368_5048 18 1.623e-01 4.584e-01 1.070e-01 1.842e-05
## h se_h sigma2 se_sigma2 alpha0 se_alpha0 alpha1
## 1 8.804e-01 1.924e-01 6.033e+01 3.966e+01 -4.465e+00 2.405e+00 -2.667e+00
## se_alpha1 qvalues pvalues significant
## 1 4.827e-01 0.04289975 1.842e-05 TRUE
load("../analyses/DMGs/R-objects/DMLs.in.DMGs_2kbslop")
macau.sign.length %>%
mutate_at(vars(locus), funs(as.integer)) %>%
dplyr::filter(contig %in% DMLs.in.DMGs_2kbslop$chr, locus %in% c(DMLs.in.DMGs_2kbslop$start+1))
## [1] contig locus id n acpt_rate beta
## [7] se_beta pvalue h se_h sigma2 se_sigma2
## [13] alpha0 se_alpha0 alpha1 se_alpha1 qvalues pvalues
## [19] significant
## <0 rows> (or 0-length row.names)
MACAU.results.pop <- read.table(file=here::here("analyses","macau","output","20200310-macau.assoc.txt"), sep = "\t", stringsAsFactors = F)
colnames(MACAU.results.pop) <- MACAU.results.pop[1,]
MACAU.results.pop <- MACAU.results.pop[-1, ]
MACAU.results.pop$pvalue <- as.numeric(MACAU.results.pop$pvalue)
hist(MACAU.results.pop$pvalue)
# Check to see if contig IDs are in the same order for macau results and count dataframe
length(MACAU.results.pop$id) - length(counts.filtered.tot$siteID) # not the same, 63 fewer in MACAU results
## [1] -110
subset(counts.filtered.tot, !(siteID %in% c(MACAU.results.pop$id)))
## siteID coverage1 coverage2 coverage3 coverage4 coverage5
## 49 Contig100188_628 13 15 10 21 15
## 1885 Contig120320_3902 6 9 11 17 10
## 2159 Contig123099_1053 12 9 17 13 9
## 2307 Contig1245_9229 16 22 23 22 14
## 2447 Contig1270_4727 13 10 12 15 12
## 2508 Contig128000_3308 17 12 14 17 6
## 2574 Contig128973_13428 19 15 15 19 21
## 2775 Contig1322_24192 25 16 21 31 19
## 2808 Contig133198_1057 13 11 12 16 12
## 3421 Contig144141_3058 22 15 24 21 10
## 3787 Contig151605_1218 23 16 20 14 37
## 4128 Contig158671_1771 16 10 14 10 11
## 4225 Contig15981_4842 20 15 21 20 21
## 4434 Contig1625_10165 15 25 34 31 15
## 5012 Contig16934_7485 13 11 11 10 8
## 5072 Contig17038_8212 16 14 16 19 13
## 5152 Contig17068_18441 16 13 12 15 5
## 5154 Contig17083_887 28 20 22 22 13
## 5466 Contig173_13512 18 12 16 19 15
## 5505 Contig17378_25494 20 12 15 19 15
## 5801 Contig177178_9364 27 31 21 26 25
## 6224 Contig181412_5169 17 16 18 25 13
## 6227 Contig18145_1023 13 10 13 12 8
## 7147 Contig192358_515 18 9 11 12 14
## 8031 Contig202_11457 10 11 13 13 10
## 8216 Contig2051_12244 14 14 18 14 12
## 8278 Contig20582_15091 26 19 23 21 18
## 8347 Contig20655_14960 15 12 17 19 10
## 8425 Contig20843_642 13 11 18 13 13
## 8889 Contig21393_1796 18 22 12 13 13
## 9148 Contig21828_46053 18 12 22 25 19
## 10035 Contig22991_2343 17 18 17 19 15
## 10495 Contig23485_23330 19 12 18 18 7
## 10683 Contig23659_6863 13 14 13 20 7
## 11094 Contig24479_1279 16 10 14 20 10
## 11333 Contig24745_9189 18 16 19 20 14
## 11412 Contig24782_759 12 18 12 12 14
## 11445 Contig2482_5009 13 11 18 11 13
## 11619 Contig2514_11128 24 17 20 24 15
## 11727 Contig25333_2346 16 14 14 16 13
## 11916 Contig25670_5091 31 15 26 23 13
## 12041 Contig25867_11174 24 20 19 21 17
## 12440 Contig26552_20440 16 15 12 12 10
## 12509 Contig26653_6285 23 19 19 19 14
## 12582 Contig2679_976 18 15 17 20 17
## 15434 Contig31449_4876 16 12 28 20 18
## 15534 Contig31659_17609 22 17 18 16 14
## 15651 Contig31950_9831 23 23 18 21 18
## 17133 Contig34643_36955 10 13 14 10 13
## 17497 Contig35326_8771 20 15 17 18 10
## 17768 Contig35942_4747 11 14 20 18 22
## 17771 Contig35942_9245 13 11 14 10 10
## 17847 Contig36016_6368 20 16 15 21 12
## 17850 Contig36016_16474 31 18 33 32 17
## 18950 Contig38503_6074 24 28 18 28 19
## 19376 Contig39668_416 17 20 23 23 19
## 19526 Contig3986_17148 17 14 15 11 7
## 19634 Contig40083_4174 17 15 14 21 15
## 20356 Contig41436_9662 18 37 25 21 25
## 20386 Contig41593_3709 27 19 23 30 18
## 20407 Contig41593_5722 13 18 19 19 8
## 20950 Contig42897_22395 19 15 19 17 16
## 20990 Contig42978_11643 15 8 11 13 20
## 21114 Contig43285_17641 19 14 17 15 10
## 21130 Contig433458_111 22 7 14 19 9
## 21219 Contig43659_2298 16 10 14 18 14
## 21307 Contig43788_10140 22 21 23 20 10
## 21469 Contig44384_8987 14 12 13 20 8
## 21952 Contig4555_1540 28 17 16 18 16
## 22392 Contig47001_19693 8 11 16 13 17
## 22642 Contig47631_13438 26 20 24 27 17
## 22934 Contig48322_769 25 21 26 26 13
## 22945 Contig48322_1791 18 6 14 15 11
## 23102 Contig49026_1599 25 13 22 19 19
## 24257 Contig5196_3447 23 23 23 20 20
## 24368 Contig52148_10669 19 10 15 17 13
## 24380 Contig52148_11323 18 21 31 26 10
## 24734 Contig53_41140 27 23 20 25 15
## 25018 Contig53762_4854 14 12 13 12 16
## 25240 Contig54400_4532 11 15 16 15 8
## 25557 Contig55459_1612 17 14 17 15 17
## 25749 Contig56233_2461 10 16 13 12 11
## 25839 Contig5648_238 14 11 15 13 7
## 26066 Contig57553_10334 20 15 13 25 12
## 26173 Contig5801_6434 20 21 21 24 12
## 26704 Contig60274_9481 20 21 21 24 19
## 27012 Contig61494_118 19 11 14 15 17
## 27100 Contig61840_2327 22 23 23 25 17
## 27217 Contig62096_1554 20 22 16 26 15
## 27569 Contig63672_12459 21 27 17 28 12
## 27816 Contig64671_3498 14 15 16 7 11
## 27835 Contig64740_7598 18 23 22 19 19
## 28128 Contig65766_750 25 22 25 23 19
## 28417 Contig6699_8390 24 10 16 21 11
## 28480 Contig67306_3799 21 18 24 15 16
## 28554 Contig67616_4580 11 24 14 14 16
## 28663 Contig6829_891 19 12 11 15 18
## 30331 Contig77222_589 19 17 21 14 18
## 30450 Contig77846_1010 10 12 12 14 13
## 30585 Contig786_21682 15 16 16 17 13
## 30649 Contig78944_3198 14 19 20 16 14
## 30655 Contig79047_8403 12 11 5 11 10
## 31634 Contig84838_6888 10 13 15 13 12
## 31884 Contig85887_2262 19 14 23 22 15
## 32157 Contig87399_5008 12 12 21 14 8
## 32371 Contig89614_4162 16 11 16 15 16
## 32632 Contig92127_1241 28 11 21 21 10
## 32641 Contig92157_3362 13 10 12 12 9
## 32789 Contig9391_2496 12 11 13 17 11
## 32951 Contig95969_5942 19 11 21 24 17
## coverage6 coverage7 coverage8 coverage9 coverage10 coverage11 coverage12
## 49 5 17 12 14 10 14 13
## 1885 10 13 17 15 9 4 17
## 2159 10 13 17 12 22 30 9
## 2307 10 19 22 18 7 14 11
## 2447 11 17 19 19 12 12 13
## 2508 15 15 17 23 15 17 9
## 2574 13 16 16 19 28 26 17
## 2775 9 25 21 22 43 55 14
## 2808 8 13 16 16 18 17 13
## 3421 11 18 18 19 23 29 8
## 3787 20 18 11 33 10 29 20
## 4128 6 6 13 13 11 19 11
## 4225 15 25 26 30 30 36 21
## 4434 7 34 12 11 24 35 18
## 5012 7 17 15 13 17 14 18
## 5072 13 16 17 21 10 12 23
## 5152 6 11 11 15 16 13 11
## 5154 21 21 19 24 37 40 19
## 5466 10 22 19 29 25 36 14
## 5505 14 19 10 12 32 31 13
## 5801 16 28 21 28 41 60 21
## 6224 11 16 13 16 28 17 13
## 6227 8 13 12 13 10 15 11
## 7147 12 11 15 14 18 42 10
## 8031 12 20 14 19 21 8 7
## 8216 8 15 17 17 23 15 14
## 8278 20 21 22 32 32 19 18
## 8347 11 15 14 21 42 38 17
## 8425 11 13 15 12 18 24 12
## 8889 14 23 18 21 16 2 15
## 9148 6 26 17 20 10 11 16
## 10035 14 28 23 20 39 20 19
## 10495 15 19 21 23 31 32 14
## 10683 3 16 14 23 21 21 16
## 11094 9 16 25 10 27 35 14
## 11333 15 17 15 17 21 27 14
## 11412 7 24 13 15 7 4 16
## 11445 17 20 11 18 24 3 18
## 11619 18 21 18 22 41 22 18
## 11727 18 26 19 23 28 21 17
## 11916 5 11 20 18 12 41 16
## 12041 17 24 18 22 18 9 16
## 12440 10 21 15 19 9 8 14
## 12509 13 29 24 16 36 49 11
## 12582 16 14 17 19 12 6 15
## 15434 13 21 18 26 28 32 16
## 15534 10 15 18 24 9 26 13
## 15651 12 25 20 16 17 31 17
## 17133 10 12 11 16 15 9 12
## 17497 9 18 15 15 32 45 9
## 17768 12 20 14 20 20 15 17
## 17771 7 10 13 18 17 19 11
## 17847 9 24 19 22 14 27 8
## 17850 24 22 21 26 33 27 16
## 18950 19 29 22 12 47 42 25
## 19376 11 28 19 15 44 46 9
## 19526 10 16 15 17 20 15 17
## 19634 12 10 14 10 20 10 21
## 20356 23 25 31 36 46 34 35
## 20386 15 26 26 16 48 54 14
## 20407 6 21 22 19 42 27 15
## 20950 7 19 21 25 22 52 25
## 20990 7 16 14 20 25 29 10
## 21114 14 24 22 24 27 34 16
## 21130 17 18 18 13 31 20 12
## 21219 7 14 8 17 23 23 14
## 21307 12 24 16 19 39 31 16
## 21469 10 14 14 18 18 14 15
## 21952 9 17 15 16 11 5 14
## 22392 16 17 11 15 14 11 10
## 22642 11 23 20 29 28 37 22
## 22934 14 22 20 17 38 25 18
## 22945 18 14 14 22 29 36 12
## 23102 15 21 20 29 13 19 29
## 24257 18 28 29 25 13 9 18
## 24368 11 17 15 22 27 27 12
## 24380 14 21 22 24 38 56 15
## 24734 18 21 22 21 22 20 17
## 25018 11 16 17 16 15 16 13
## 25240 10 22 12 16 22 39 8
## 25557 11 15 19 19 18 17 20
## 25749 11 18 11 18 10 6 14
## 25839 8 12 11 15 20 22 7
## 26066 12 15 15 19 25 20 15
## 26173 12 24 18 18 35 48 12
## 26704 18 24 23 28 37 22 20
## 27012 15 13 18 24 12 5 16
## 27100 16 28 28 26 34 50 26
## 27217 17 17 21 20 27 17 20
## 27569 20 19 12 19 25 37 16
## 27816 14 20 16 12 25 9 10
## 27835 16 24 11 28 30 39 17
## 28128 6 28 24 27 38 42 19
## 28417 15 17 14 13 34 29 12
## 28480 15 23 25 21 28 38 21
## 28554 8 17 18 20 10 5 17
## 28663 12 23 17 28 24 10 12
## 30331 11 24 22 21 19 16 17
## 30450 11 12 13 16 21 19 11
## 30585 12 20 13 25 13 6 18
## 30649 12 19 14 26 32 24 10
## 30655 9 11 12 11 5 9 13
## 31634 9 10 13 20 10 9 8
## 31884 19 23 14 19 27 25 6
## 32157 13 18 16 22 7 17 16
## 32371 7 18 14 16 13 16 15
## 32632 8 16 21 22 19 37 16
## 32641 10 11 10 9 10 13 8
## 32789 10 18 13 21 17 11 12
## 32951 6 16 17 20 20 25 16
## coverage13 coverage14 coverage15 coverage16 coverage17 coverage18
## 49 26 19 23 19 17 14
## 1885 16 18 13 17 13 20
## 2159 17 19 31 28 18 11
## 2307 31 18 31 20 20 23
## 2447 21 25 23 24 25 19
## 2508 24 23 24 22 14 19
## 2574 34 29 27 39 24 25
## 2775 36 30 45 46 40 25
## 2808 14 20 20 10 19 13
## 3421 19 26 37 35 22 18
## 3787 18 32 16 30 12 21
## 4128 23 25 12 31 20 13
## 4225 32 40 33 41 28 28
## 4434 32 18 51 22 17 14
## 5012 22 23 19 23 9 23
## 5072 29 29 26 18 15 17
## 5152 18 25 17 19 20 14
## 5154 37 30 35 33 32 31
## 5466 29 21 34 39 35 19
## 5505 29 31 34 29 35 18
## 5801 42 46 56 55 35 32
## 6224 30 20 27 23 28 17
## 6227 13 12 15 16 11 11
## 7147 24 23 32 18 25 15
## 8031 19 16 20 27 16 18
## 8216 22 28 20 28 13 14
## 8278 33 41 39 38 23 31
## 8347 27 33 35 36 29 19
## 8425 18 16 25 20 11 10
## 8889 20 22 7 12 12 23
## 9148 29 37 30 40 23 25
## 10035 31 22 25 33 26 30
## 10495 30 30 34 34 24 23
## 10683 25 19 21 36 17 15
## 11094 28 27 34 29 24 24
## 11333 33 28 39 37 25 18
## 11412 15 17 10 12 14 17
## 11445 21 19 15 9 20 24
## 11619 26 26 30 33 34 23
## 11727 21 18 37 33 22 14
## 11916 30 13 59 52 13 11
## 12041 32 13 34 29 12 17
## 12440 22 26 19 18 16 25
## 12509 39 41 42 44 38 23
## 12582 16 14 14 17 15 18
## 15434 42 39 35 45 35 20
## 15534 30 36 34 33 19 18
## 15651 32 39 43 32 27 27
## 17133 15 12 10 6 14 14
## 17497 30 29 44 40 27 15
## 17768 23 24 20 30 23 20
## 17771 20 16 22 15 27 13
## 17847 21 30 29 23 18 18
## 17850 38 40 37 34 29 27
## 18950 39 42 24 47 26 31
## 19376 29 33 41 38 31 23
## 19526 18 26 26 24 18 23
## 19634 18 20 19 15 14 17
## 20356 41 47 35 43 34 40
## 20386 40 46 45 42 34 21
## 20407 25 17 32 30 31 23
## 20950 42 39 47 41 31 27
## 20990 15 34 25 25 32 17
## 21114 28 37 35 34 29 22
## 21130 23 33 27 30 17 18
## 21219 20 26 23 23 22 18
## 21307 34 41 36 37 30 27
## 21469 24 23 27 24 16 13
## 21952 19 23 20 12 9 20
## 22392 16 18 18 16 12 17
## 22642 35 35 39 38 37 25
## 22934 29 28 37 32 34 26
## 22945 29 10 35 27 24 16
## 23102 32 34 39 28 32 26
## 24257 31 36 34 27 29 28
## 24368 17 26 31 28 21 13
## 24380 29 42 43 45 29 25
## 24734 23 29 18 32 37 22
## 25018 35 28 16 33 22 14
## 25240 29 36 22 37 18 19
## 25557 28 25 31 31 18 18
## 25749 20 16 23 12 12 21
## 25839 21 19 22 20 19 12
## 26066 25 25 29 30 25 23
## 26173 27 37 38 40 27 16
## 26704 38 37 31 37 29 22
## 27012 22 14 17 10 16 19
## 27100 43 45 42 43 24 29
## 27217 31 34 21 21 30 33
## 27569 39 35 31 41 33 25
## 27816 17 19 17 17 15 13
## 27835 32 33 34 32 31 24
## 28128 37 34 46 37 47 25
## 28417 24 27 32 16 19 25
## 28480 37 42 43 32 32 12
## 28554 21 12 13 10 4 24
## 28663 18 18 24 25 15 14
## 30331 32 31 31 27 21 27
## 30450 22 32 31 23 12 10
## 30585 18 29 15 26 14 22
## 30649 20 20 29 22 25 13
## 30655 13 16 13 15 14 11
## 31634 16 18 20 18 17 11
## 31884 22 23 19 21 20 10
## 32157 18 26 21 33 12 14
## 32371 28 33 31 34 23 22
## 32632 42 42 46 50 25 15
## 32641 13 13 16 14 11 11
## 32789 15 30 17 20 11 18
## 32951 32 29 41 39 23 11
#identify code to see whether contig IDs are in the same order for macau results and count dataframe
summary(subset(counts.filtered.tot, (siteID %in% c(MACAU.results.pop$id)))[,"siteID"] == MACAU.results.pop$id)
## Mode TRUE
## logical 33174
# apply above code
MACAU.results.pop.c <- cbind(
subset(counts.filtered.tot, (siteID %in% c(MACAU.results.pop$id)))[,1],
MACAU.results.pop) %>%
separate(`subset(counts.filtered.tot, (siteID %in% c(MACAU.results.pop$id)))[, `,
into = c("contig", "locus"), sep = "_")
write.table(MACAU.results.pop.c, file = here::here("analyses", "macau", "MACAU.results.pop.c.tab"), sep = "\t", row.names = FALSE, quote = FALSE)
hist(MACAU.results.pop.c$pvalue) # p-value distribution looks good (i.e. not u-shaped)
macau.qvalue <- qvalue(p = MACAU.results.pop.c$pvalue, fdr.level = 0.1) # create qvalue object, setting FDR=10%
summary(macau.qvalue)
##
## Call:
## qvalue(p = MACAU.results.pop.c$pvalue, fdr.level = 0.1)
##
## pi0: 1
##
## Cumulative number of significant calls:
##
## <1e-04 <0.001 <0.01 <0.025 <0.05 <0.1 <1
## p-value 7 19 107 244 529 1288 33174
## q-value 0 0 0 0 0 0 33174
## local FDR 0 0 0 0 0 0 8
100*(1- macau.qvalue$pi0) #% of all loci in df as truely sign.
## [1] 0
hist(macau.qvalue)
max(macau.qvalue$qvalues[macau.qvalue$pvalues <= 0.01])
## [1] 0.9998713
plot(macau.qvalue)
summary(macau.qvalue$significant) #0 sign. (# TRUE)
## Mode FALSE
## logical 33174