Visualize MACAU results

Ran MACAU in a Jupyter Notebook. See 05-MACAU.ipynb

In this notebook I assess patterns in methylated loci that MACAU identified as influence by oyster phenotype, aka oyster shell length.

Load libraries

list.of.packages <- c("vegan", "dplyr", "factoextra", "FactoMineR", "tidyverse", "tibble", "reshape2", "gplots") #add new libraries here 
new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
if(length(new.packages)) install.packages(new.packages)

# Load all libraries 
lapply(list.of.packages, FUN = function(X) {
  do.call("require", list(X)) 
})
source("../resources/biostats.R")

Read in dataframe with MACAU assocation results

macau.20190812 <- read.table(file="../analyses/macau/output/20190812-macau.assoc.txt", sep = "\t", stringsAsFactors = F)
head(macau.20190812)
##             V1 V2        V3         V4        V5        V6        V7
## 1           id  n acpt_rate       beta   se_beta    pvalue         h
## 2 Contig038973 18 4.069e-01 -6.949e-02 5.548e-02 2.104e-01 7.713e-01
## 3 Contig039226 18 4.844e-01 -1.022e-02 3.708e-02 7.829e-01 6.961e-01
## 4 Contig039234 18 3.831e-01  4.603e-02 3.661e-02 2.087e-01 7.543e-01
## 5 Contig039252 18 4.388e-01 -4.211e-02 5.445e-02 4.393e-01 7.156e-01
## 6 Contig041234 18 3.909e-01  7.483e-02 9.470e-02 4.294e-01 6.285e-01
##          V8        V9       V10        V11       V12        V13       V14
## 1      se_h    sigma2 se_sigma2     alpha0 se_alpha0     alpha1 se_alpha1
## 2 2.336e-01 2.793e+00 5.452e+00  1.936e+00 1.027e+00  4.199e-01 2.701e-01
## 3 2.519e-01 9.479e-01 1.905e+00  8.591e-01 6.584e-01 -2.180e-01 1.730e-01
## 4 2.308e-01 3.532e+00 1.117e+01 -1.400e+00 7.399e-01 -1.432e-01 1.573e-01
## 5 2.512e-01 3.239e+00 8.253e+00  1.283e+00 1.019e+00  1.868e-01 2.675e-01
## 6 2.689e-01 5.002e+00 8.564e+00  4.363e-01 1.708e+00 -4.537e-01 3.745e-01
colnames(macau.20190812) <- macau.20190812[1,]
macau.20190812 <- macau.20190812[-1, ] 
macau.20190812$pvalue <- as.numeric(macau.20190812$pvalue)
hist(macau.20190812$pvalue)

tail(macau.20190812)
##                     id  n acpt_rate       beta   se_beta  pvalue         h
## 256039 Contig999686327 18 2.867e-01 -7.455e-02 1.685e-01 0.65820 4.446e-01
## 256040 Contig999686331 18 2.667e-01  2.807e-03 6.758e-02 0.96690 7.291e-01
## 256041 Contig999686335 18 2.837e-01  1.930e-01 1.060e-01 0.06853 6.724e-01
## 256042 Contig999686355 18 3.135e-01 -2.182e-01 1.371e-01 0.11150 7.448e-01
## 256043 Contig999686427 18 3.417e-01 -9.610e-02 1.251e-01 0.44250 3.734e-01
## 256044 Contig999924037 18 2.297e-01  9.472e-02 6.431e-02 0.14080 7.273e-01
##             se_h    sigma2 se_sigma2     alpha0 se_alpha0     alpha1
## 256039 1.970e-01 1.469e+01 1.571e+01  5.009e+00 3.466e+00  3.400e-01
## 256040 2.308e-01 5.274e+00 1.207e+01  2.761e+00 1.356e+00  1.124e-02
## 256041 2.738e-01 8.602e+00 1.410e+01  5.117e-01 1.757e+00 -3.651e-01
## 256042 2.632e-01 9.999e+00 1.805e+01  7.327e+00 2.813e+00  8.290e-01
## 256043 2.260e-01 5.672e+00 5.909e+00  6.830e-01 2.057e+00  5.862e-01
## 256044 1.666e-01 6.692e+00 7.840e+00 -1.337e+00 1.218e+00 -3.824e-01
##        se_alpha1
## 256039 7.843e-01
## 256040 2.735e-01
## 256041 4.128e-01
## 256042 6.921e-01
## 256043 5.910e-01
## 256044 2.708e-01

Extract IDs for loci where MACAU indicates DML, where p<0.05

head(macau.20190812.05 <- subset(macau.20190812, pvalue<0.05))
##                   id  n acpt_rate       beta   se_beta   pvalue         h
## 8       Contig064179 18 2.235e-01  1.405e-01 7.007e-02 0.044950 8.094e-01
## 70     Contig1071543 18 2.449e-01 -2.962e-01 1.421e-01 0.037100 6.220e-01
## 101 Contig1000172990 18 3.174e-01 -3.988e-01 1.783e-01 0.025310 7.202e-01
## 110   Contig10002463 18 2.163e-01 -4.471e-01 2.242e-01 0.046180 8.104e-01
## 125  Contig100061160 18 3.445e-01  2.492e-01 1.186e-01 0.035660 6.265e-01
## 139  Contig100099890 18 2.845e-01  4.332e-01 1.669e-01 0.009455 7.579e-01
##          se_h    sigma2 se_sigma2     alpha0 se_alpha0     alpha1
## 8   1.432e-01 1.559e+01 1.805e+01  4.177e-01 1.547e+00 -6.862e-01
## 70  2.205e-01 3.292e+01 3.525e+01  6.610e+00 3.033e+00  8.744e-01
## 101 2.606e-01 8.921e+00 1.648e+01  9.332e+00 2.670e+00  2.022e+00
## 110 1.802e-01 3.598e+01 2.970e+01  7.587e+00 4.226e+00  2.876e+00
## 125 2.615e-01 8.187e+00 1.343e+01 -2.538e+00 2.010e+00 -6.028e-03
## 139 2.059e-01 1.509e+01 2.230e+01 -4.009e+00 2.690e+00 -3.484e-01
##     se_alpha1
## 8   2.475e-01
## 70  5.343e-01
## 101 1.232e+00
## 110 1.094e+00
## 125 6.331e-01
## 139 5.902e-01

Read in count data

counts.tot.destrand <- read.table(file = "../analyses/counts-total-destrand.txt", header = T)
counts.meth.destrand <- read.table(file = "../analyses/counts-meth-destrand.txt", header = T)

Extract raw count data for loci where p<0.05 in MACAU

# merge macau results with total counts 
head(macau.20190812.tot.counts <- merge(x=macau.20190812.05, y=subset(counts.tot.destrand, siteID %in% macau.20190812.05$id), by.x="id", by.y="siteID"))
##                 id  n acpt_rate       beta   se_beta   pvalue         h
## 1     Contig064179 18 2.235e-01  1.405e-01 7.007e-02 0.044950 8.094e-01
## 2 Contig1000172990 18 3.174e-01 -3.988e-01 1.783e-01 0.025310 7.202e-01
## 3   Contig10002463 18 2.163e-01 -4.471e-01 2.242e-01 0.046180 8.104e-01
## 4  Contig100061160 18 3.445e-01  2.492e-01 1.186e-01 0.035660 6.265e-01
## 5  Contig100099890 18 2.845e-01  4.332e-01 1.669e-01 0.009455 7.579e-01
## 6 Contig1001181581 18 3.919e-01  2.306e-01 1.018e-01 0.023580 6.412e-01
##        se_h    sigma2 se_sigma2     alpha0 se_alpha0     alpha1 se_alpha1
## 1 1.432e-01 1.559e+01 1.805e+01  4.177e-01 1.547e+00 -6.862e-01 2.475e-01
## 2 2.606e-01 8.921e+00 1.648e+01  9.332e+00 2.670e+00  2.022e+00 1.232e+00
## 3 1.802e-01 3.598e+01 2.970e+01  7.587e+00 4.226e+00  2.876e+00 1.094e+00
## 4 2.615e-01 8.187e+00 1.343e+01 -2.538e+00 2.010e+00 -6.028e-03 6.331e-01
## 5 2.059e-01 1.509e+01 2.230e+01 -4.009e+00 2.690e+00 -3.484e-01 5.902e-01
## 6 2.604e-01 3.787e+00 7.594e+00 -1.466e+00 1.601e+00 -5.774e-01 4.115e-01
##   coverage1 coverage2 coverage3 coverage4 coverage5 coverage6 coverage7
## 1        10        14         9        21        11        13        13
## 2         9        12         7         4         3         3        13
## 3         4         2         7         6         4         4         5
## 4        11         2         4         5         5         6        11
## 5         2         9         3         5         9         7        15
## 6        10         9         8         7         4         2         7
##   coverage8 coverage9 coverage10 coverage11 coverage12 coverage13
## 1        12        15          5          5         19         11
## 2         8         6         10          8         17         16
## 3         4         6         11         17          5          7
## 4         7         9          3          5          6          9
## 5        10        14         12          3         13          7
## 6         6        17          6          2         11          8
##   coverage14 coverage15 coverage16 coverage17 coverage18
## 1         24         19         10         10         15
## 2         13         16         14          6          7
## 3          8         12         15          5          5
## 4         12          9          8          9          2
## 5         20         12          5         10         17
## 6         13         14          9          4          7
# merge macau results with methylated counts 
head(macau.20190812.met.counts <- merge(x=macau.20190812.05, y=subset(counts.meth.destrand, siteID %in% macau.20190812.05$id), by.x="id", by.y="siteID"))
##                 id  n acpt_rate       beta   se_beta   pvalue         h
## 1     Contig064179 18 2.235e-01  1.405e-01 7.007e-02 0.044950 8.094e-01
## 2 Contig1000172990 18 3.174e-01 -3.988e-01 1.783e-01 0.025310 7.202e-01
## 3   Contig10002463 18 2.163e-01 -4.471e-01 2.242e-01 0.046180 8.104e-01
## 4  Contig100061160 18 3.445e-01  2.492e-01 1.186e-01 0.035660 6.265e-01
## 5  Contig100099890 18 2.845e-01  4.332e-01 1.669e-01 0.009455 7.579e-01
## 6 Contig1001181581 18 3.919e-01  2.306e-01 1.018e-01 0.023580 6.412e-01
##        se_h    sigma2 se_sigma2     alpha0 se_alpha0     alpha1 se_alpha1
## 1 1.432e-01 1.559e+01 1.805e+01  4.177e-01 1.547e+00 -6.862e-01 2.475e-01
## 2 2.606e-01 8.921e+00 1.648e+01  9.332e+00 2.670e+00  2.022e+00 1.232e+00
## 3 1.802e-01 3.598e+01 2.970e+01  7.587e+00 4.226e+00  2.876e+00 1.094e+00
## 4 2.615e-01 8.187e+00 1.343e+01 -2.538e+00 2.010e+00 -6.028e-03 6.331e-01
## 5 2.059e-01 1.509e+01 2.230e+01 -4.009e+00 2.690e+00 -3.484e-01 5.902e-01
## 6 2.604e-01 3.787e+00 7.594e+00 -1.466e+00 1.601e+00 -5.774e-01 4.115e-01
##   numCs1 numCs2 numCs3 numCs4 numCs5 numCs6 numCs7 numCs8 numCs9 numCs10
## 1      9     10      8     20      8     13      5     11      9       3
## 2      9     12      6      3      3      3     13      8      6      10
## 3      4      2      7      6      4      4      5      4      5      11
## 4      9      2      3      4      5      6     10      7      9       2
## 5      2      9      3      5      9      7     14      9     14      12
## 6      8      9      7      7      4      2      5      5     14       6
##   numCs11 numCs12 numCs13 numCs14 numCs15 numCs16 numCs17 numCs18
## 1       4      14       8      18      17      10      10       7
## 2       8      17      16      13      15      14       6       7
## 3      17       3       5       7      10      12       5       5
## 4       5       4       9      11       9       8       8       2
## 5       3      11       7      20      12       4      10      17
## 6       2       9       7      11      14       8       4       5
nrow(macau.20190812.05) - nrow(macau.20190812.met.counts) #3 row diff (due to duplicate contig ID's, weird)
## [1] -3

Filter only loci with 10x or greater coverage for all samples

macau.20190812.tot.counts.10x <- macau.20190812.tot.counts %>% 
  filter_at(vars(starts_with("coverage")), all_vars(. > 9))

# Subset methylated count data / macau results for loci with 10x or greater coverage 
macau.20190812.met.counts.10x <- subset(macau.20190812.met.counts, id %in% macau.20190812.tot.counts.10x$id)
nrow(macau.20190812.met.counts.10x) - nrow(macau.20190812.tot.counts.10x) #2 extra rows in methyl.counts.10x df 
## [1] 1
# Find duplicated contig entries in methyl.counts.10x df 
z <- macau.20190812.met.counts.10x[duplicated(macau.20190812.met.counts.10x$id),"id"]
macau.20190812.met.counts.10x[macau.20190812.met.counts.10x$id %in% z,] # Weird, duplicate entries have diff't counts
##                    id  n acpt_rate      beta   se_beta    pvalue         h
## 10462 Contig477818949 18 2.935e-01 2.640e-01 7.230e-02 0.0002601 7.298e-01
## 10463 Contig477818949 18 2.935e-01 2.640e-01 7.230e-02 0.0002601 7.298e-01
##            se_h    sigma2 se_sigma2     alpha0 se_alpha0     alpha1
## 10462 2.412e-01 9.451e+00 1.601e+01 -5.883e-01 1.381e+00 -1.408e+00
## 10463 2.412e-01 9.451e+00 1.601e+01 -5.883e-01 1.381e+00 -1.408e+00
##       se_alpha1 numCs1 numCs2 numCs3 numCs4 numCs5 numCs6 numCs7 numCs8
## 10462 2.947e-01     10     10     14     11     14     11     12     16
## 10463 2.947e-01      6      9     11     15      7      3     17      9
##       numCs9 numCs10 numCs11 numCs12 numCs13 numCs14 numCs15 numCs16
## 10462     14      22      17      14      14      27      33      16
## 10463     14      18      30       2      11      22      24      35
##       numCs17 numCs18
## 10462      17       9
## 10463      29       8
#Remove rows with the duplicate contigs 
macau.20190812.met.counts.10x <- macau.20190812.met.counts.10x[!(macau.20190812.met.counts.10x$id %in% z), ]
macau.20190812.tot.counts.10x <- macau.20190812.tot.counts.10x[!(macau.20190812.tot.counts.10x$id %in% z),]

# Confirm meth.10x and tot.10x df are same 
identical(macau.20190812.tot.counts.10x$id, macau.20190812.met.counts.10x$id) #yes 
## [1] TRUE

Create % methylated dataframe from loci 10x coverage and MACAU p-value <0.05

# duplicate macau methylated count dataframe to make % methylated df 
macau.20190812.perc.meth.10x <- macau.20190812.met.counts.10x

# divide macau methylated counts by total counts 
macau.20190812.perc.meth.10x[,15:32] <- macau.20190812.met.counts.10x[,15:32]/macau.20190812.tot.counts.10x[,15:32]
head(macau.20190812.perc.meth.10x[15:32])
##       numCs1    numCs2    numCs3    numCs4    numCs5    numCs6    numCs7
## 7  0.7333333 0.9166667 0.9565217 1.0000000 0.9166667 0.9166667 0.7391304
## 12 1.0000000 1.0000000 0.9444444 1.0000000 0.8888889 0.9090909 0.9500000
## 38 0.9444444 0.8181818 1.0000000 0.8823529 0.9166667 1.0000000 0.8333333
## 57 1.0000000 0.8333333 1.0000000 0.8750000 0.8000000 0.7272727 0.8888889
## 58 0.9411765 1.0000000 1.0000000 0.8888889 1.0000000 0.9000000 0.7857143
## 62 0.7000000 0.9000000 1.0000000 1.0000000 0.9090909 1.0000000 0.9285714
##       numCs8    numCs9   numCs10   numCs11   numCs12   numCs13   numCs14
## 7  0.9411765 0.8928571 0.8378378 0.7575758 0.7307692 0.8823529 0.9655172
## 12 1.0000000 1.0000000 1.0000000 0.9677419 1.0000000 0.9615385 0.9565217
## 38 1.0000000 0.8000000 0.9130435 0.8823529 1.0000000 0.9629630 0.9714286
## 57 1.0000000 0.9523810 1.0000000 0.9375000 0.8888889 0.9142857 0.9024390
## 58 0.9473684 0.8235294 0.8800000 0.9285714 0.9166667 0.9687500 0.9642857
## 62 0.9411765 0.9000000 0.9565217 0.9090909 1.0000000 1.0000000 1.0000000
##      numCs15   numCs16   numCs17   numCs18
## 7  0.9459459 0.9062500 0.9666667 0.8695652
## 12 1.0000000 0.9230769 1.0000000 1.0000000
## 38 0.9393939 0.9677419 0.9090909 0.8275862
## 57 0.8974359 0.8974359 0.9523810 0.9200000
## 58 1.0000000 0.9642857 0.9047619 0.9411765
## 62 1.0000000 0.9500000 1.0000000 0.8888889

Visualize

Heatmaps using raw counts

—> mbd samples #1-9 = Hood Canal population
—> mbd samples #10-18 = South Sound population

par(cex.main=.8)
heatmap(as.matrix(subset(macau.20190812.met.counts.10x, pvalue<0.01)[15:32]), main = "MACAU id'd counts, pvalue <0.01", xlab = "mbd sample", ylab = "loci")

heatmap(as.matrix(subset(macau.20190812.met.counts.10x, pvalue<0.001)[15:32]), main = "MACAU id'd counts, pvalue <0.001", xlab = "mbd sample", ylab = "loci")

heatmap(as.matrix(subset(macau.20190812.met.counts.10x, pvalue<0.0001)[15:32]), main = "MACAU id'd counts, pvalue <0.0001", xlab = "mbd sample", ylab = "loci")

PCA using raw counts

# Transpose methylated count df 
macau.20190812.met.counts.10x.t <- macau.20190812.met.counts.10x[c(1,15:32)] %>% remove_rownames %>% gather(column, value, -id) %>% spread(id, value) %>% column_to_rownames(var="column")

# Conduct PCA with variance-covariance matrix 
met.count.pca <- prcomp(macau.20190812.met.counts.10x.t, scale=F) #scale=F for variance-covariance matrix

# Assess PCA results 
summary(met.count.pca)
## Importance of components:
##                             PC1      PC2      PC3      PC4      PC5
## Standard deviation     298.2108 89.89458 76.48173 68.35893 57.14897
## Proportion of Variance   0.6654  0.06047  0.04377  0.03497  0.02444
## Cumulative Proportion    0.6654  0.72590  0.76967  0.80463  0.82907
##                             PC6     PC7      PC8      PC9     PC10
## Standard deviation     52.60465 51.4462 50.23146 47.91748 44.86119
## Proportion of Variance  0.02071  0.0198  0.01888  0.01718  0.01506
## Cumulative Proportion   0.84978  0.8696  0.88846  0.90564  0.92070
##                            PC11     PC12     PC13    PC14    PC15     PC16
## Standard deviation     44.11741 42.26982 41.28330 39.7147 36.9132 34.18894
## Proportion of Variance  0.01456  0.01337  0.01275  0.0118  0.0102  0.00875
## Cumulative Proportion   0.93526  0.94863  0.96139  0.9732  0.9834  0.99213
##                            PC17      PC18
## Standard deviation     32.43052 9.032e-14
## Proportion of Variance  0.00787 0.000e+00
## Cumulative Proportion   1.00000 1.000e+00
pca.eigenval(met.count.pca) #The Proporation of Variance = %variance 
## Importance of components:
##                                 PC1          PC2          PC3          PC4
## Variance(eigenvalue)   8.892970e+04 8081.0354537 5849.4557516 4672.9438735
## Proportion of Variance 6.654284e-01    0.0604674    0.0437693    0.0349659
## Cumulative Proportion  6.654284e-01    0.7258959    0.7696652    0.8046311
## Broken-stick value     3.495108e+00    2.4951081    1.9951081    1.6617747
##                                 PC5          PC6          PC7          PC8
## Variance(eigenvalue)   3266.0045849 2767.2488294 2646.7116589 2523.1992311
## Proportion of Variance    0.0244383    0.0207063    0.0198044    0.0188802
## Cumulative Proportion     0.8290694    0.8497757    0.8695801    0.8884603
## Broken-stick value        1.4117747    1.2117747    1.0451081    0.9022509
##                                 PC9         PC10         PC11         PC12
## Variance(eigenvalue)   2296.0848828 2012.5263384 1946.3462382 1786.7377612
## Proportion of Variance    0.0171808    0.0150590    0.0145638    0.0133695
## Cumulative Proportion     0.9056411    0.9207001    0.9352639    0.9486334
## Broken-stick value        0.7772509    0.6661398    0.5661398    0.4752307
##                                PC13         PC14         PC15         PC16
## Variance(eigenvalue)   1704.3109050 1577.2613514 1362.5847796 1168.8838911
## Proportion of Variance    0.0127527    0.0118021    0.0101957    0.0087463
## Cumulative Proportion     0.9613861    0.9731882    0.9833839    0.9921302
## Broken-stick value        0.3918974    0.3149743    0.2435458    0.1768791
##                                PC17      PC18
## Variance(eigenvalue)   1051.7389374 0.0000000
## Proportion of Variance    0.0078698 0.0000000
## Cumulative Proportion     1.0000000 1.0000000
## Broken-stick value        0.1143791 0.0555556
fviz_screeplot(met.count.pca, addlabels = TRUE) #PC1 is sign., none other are  

fviz_contrib(met.count.pca, choice = "var", axes = 1, top=50) #check out loci that are high contributors

PC plot using PC1 and PC2

pops <- c("HC", rep("SS", times=9), rep("HC", times=8)) #create pop vector for color-coding 
fviz_pca_ind(met.count.pca, label="ind", col.ind=pops)+ ggtitle(label="PCA methylated counts, loci id'd by MACAU, PC 1x2")  #(could also use fviz_pca_biplot) 

Heatmaps using % methylated

par(cex.main=.8)
heatmap(as.matrix(subset(macau.20190812.perc.meth.10x, pvalue<0.01)[15:32]), main = "MACAU id'd % methylated, pvalue <0.01", xlab = "mbd sample", ylab = "loci")

heatmap(as.matrix(subset(macau.20190812.perc.meth.10x, pvalue<0.001)[15:32]), main = "MACAU id'd % methylated, pvalue <0.001", xlab = "mbd sample", ylab = "loci")

heatmap(as.matrix(subset(macau.20190812.perc.meth.10x, pvalue<0.0001)[15:32]), main = "MACAU id'd % methylated, pvalue <0.0001", xlab = "mbd sample", ylab = "loci")

PCA with % methylated df

# Transpose methylated count df 
macau.20190812.perc.meth.10x.t <- macau.20190812.perc.meth.10x[c(1,15:32)] %>% remove_rownames %>% gather(column, value, -id) %>% spread(id, value) %>% column_to_rownames(var="column")

met.perc.pca <- prcomp(macau.20190812.perc.meth.10x.t, scale=F) #scale=F for variance-covariance matrix
summary(met.perc.pca)
## Importance of components:
##                           PC1    PC2     PC3     PC4    PC5     PC6
## Standard deviation     1.9595 1.4621 1.36613 1.13609 1.0792 1.06907
## Proportion of Variance 0.1985 0.1105 0.09646 0.06671 0.0602 0.05907
## Cumulative Proportion  0.1985 0.3090 0.40542 0.47213 0.5323 0.59140
##                            PC7     PC8     PC9   PC10    PC11    PC12
## Standard deviation     1.02397 0.99354 0.93391 0.9206 0.86844 0.86096
## Proportion of Variance 0.05419 0.05102 0.04508 0.0438 0.03898 0.03831
## Cumulative Proportion  0.64559 0.69662 0.74170 0.7855 0.82448 0.86279
##                           PC13    PC14    PC15    PC16    PC17     PC18
## Standard deviation     0.82438 0.75115 0.71754 0.68358 0.65466 2.04e-15
## Proportion of Variance 0.03513 0.02916 0.02661 0.02415 0.02215 0.00e+00
## Cumulative Proportion  0.89792 0.92708 0.95370 0.97785 1.00000 1.00e+00
pca.eigenval(met.perc.pca) #The Proporation of Variance = %variance 
## Importance of components:
##                              PC1       PC2       PC3       PC4       PC5
## Variance(eigenvalue)   3.8397200 2.1376575 1.8663095 1.2906948 1.1646448
## Proportion of Variance 0.1984634 0.1104890 0.0964638 0.0667121 0.0601969
## Cumulative Proportion  0.1984634 0.3089524 0.4054163 0.4721283 0.5323253
## Broken-stick value     3.4951081 2.4951081 1.9951081 1.6617747 1.4117747
##                              PC6       PC7       PC8       PC9      PC10
## Variance(eigenvalue)   1.1429206 1.0485099 0.9871256 0.8721801 0.8474962
## Proportion of Variance 0.0590741 0.0541943 0.0510215 0.0450803 0.0438045
## Cumulative Proportion  0.5913994 0.6455936 0.6966152 0.7416955 0.7855000
## Broken-stick value     1.2117747 1.0451081 0.9022509 0.7772509 0.6661398
##                             PC11      PC12      PC13      PC14      PC15
## Variance(eigenvalue)   0.7541815 0.7412501 0.6796046 0.5642250 0.5148675
## Proportion of Variance 0.0389813 0.0383130 0.0351267 0.0291631 0.0266119
## Cumulative Proportion  0.8244813 0.8627943 0.8979210 0.9270840 0.9536960
## Broken-stick value     0.5661398 0.4752307 0.3918974 0.3149743 0.2435458
##                             PC16      PC17      PC18
## Variance(eigenvalue)   0.4672754 0.4285796 0.0000000
## Proportion of Variance 0.0241520 0.0221520 0.0000000
## Cumulative Proportion  0.9778480 1.0000000 1.0000000
## Broken-stick value     0.1768791 0.1143791 0.0555556
fviz_screeplot(met.perc.pca, addlabels = TRUE) #PC 1, 2 & 3 look sign.

fviz_contrib(met.perc.pca, choice = "var", axes = 1, top=50)

fviz_pca_biplot(met.perc.pca, label="ind", invisible = "var", col.ind=pops, axes = c(1,2), pointsize=3) + ggtitle(label="PCA % methylated, loci id'd by MACAU, PC 1x2")

fviz_pca_biplot(met.perc.pca, label="ind",  invisible = "var",col.ind=pops, axes = c(1,3), pointsize=3) + ggtitle(label="PCA % methylated, loci id'd by MACAU, PC 1x3")

fviz_pca_biplot(met.perc.pca, label="ind", invisible = "var", col.ind=pops, axes = c(2,3), pointsize=3) + ggtitle(label="PCA % methylated, loci id'd by MACAU, PC 2x3")  #dimension 2 = pop separation