--- title: "Foot_TC_DGE_viz" output: html_document date: "2024-02-14" --- ### OA vs treatment control ```{r} # DEG in the foot after exposure to ocean acidification # Filter data FOA_TC_treatmentinfo <- treatmentinfo.2 %>% filter(tissue == "F" | treatment == "control" | treatment == "OA") %>% filter(!(tissue == "G" | treatment == "OW" | treatment == "DO")) FOA_TC_treatmentinfo <- FOA_TC_treatmentinfo %>% filter(!(treatment == "control" & day == "0")) write.table(FOA_TC_treatmentinfo, file = "output/DEG_lists/Foot/FOA_TC_treatmentinfo.csv", row.names = FALSE) FOA_TC_countmatrix <- subset(countmatrix_2, select=row.names(FOA_TC_treatmentinfo)) write.table(FOA_TC_countmatrix, file = "output/DEG_lists/Foot/FOA_TC_countmatrix.csv", row.names = FALSE) # CaTCulate DESeq object dds12 <- DESeqDataSetFromMatrix(countData = FOA_TC_countmatrix, colData = FOA_TC_treatmentinfo, design = ~ treatment) dds12 <- DESeq(dds12) resultsNames(dds12) # lists the coefficients PCAdata <- plotPCA(vst(dds12), intgroup = c("treatment"), returnData=TRUE) percentVar <- round(100*attr(PCAdata, "percentVar")) #plot PCA of samples with all data ggplot(PCAdata, aes(PC1, PC2, color=treatment)) + geom_point(size=4, alpha = 5/10) + xlab(paste0("PC1: ",percentVar[1],"% variance")) + ylab(paste0("PC2: ",percentVar[2],"% variance")) + coord_fixed()+ stat_ellipse() # Filtering: keep genes that have at least 10 counts across 1/3 of the samples - https://support.bioconductor.org/p/110307/ keep <- rowSums(DESeq2::counts(dds12) >= 10) >= ncol(FOA_TC_countmatrix)/3 dds12_filt <- dds12[keep,] ``` ### OW vs treatment control ```{r} # Filter data FOW_TC_treatmentinfo <- treatmentinfo.2 %>% filter(tissue == "F" | treatment == "control" | treatment == "OW") %>% filter(!(tissue == "G" | treatment == "OA" | treatment == "DO")) FOW_TC_treatmentinfo <- FOW_TC_treatmentinfo %>% filter(!(treatment == "control" & day == "0")) write.table(FOW_TC_treatmentinfo, file = "output/DEG_lists/Foot/FOW_TC_treatmentinfo.csv", row.names = FALSE) FOW_TC_countmatrix <- subset(countmatrix_2, select=row.names(FOW_TC_treatmentinfo)) write.table(FOW_TC_countmatrix, file = "output/DEG_lists/Foot/FOW_TC_countmatrix.csv", row.names = FALSE) # CaTCulate DESeq object dds13 <- DESeqDataSetFromMatrix(countData = FOW_TC_countmatrix, colData = FOW_TC_treatmentinfo, design = ~ treatment) dds13 <- DESeq(dds13) resultsNames(dds13) # lists the coefficients # -- note: fitType='parametric', but the dispersion trend was not well captured by the # function: y = a/x + b, and a local regression fit was automatically substituted. # specify fitType='local' or 'mean' to avoid this message next time. PCAdata <- plotPCA(vst(dds13), intgroup = c("treatment"), returnData=TRUE) percentVar <- round(100*attr(PCAdata, "percentVar")) #plot PCA of samples with all data ggplot(PCAdata, aes(PC1, PC2, color=treatment)) + geom_point(size=4, alpha = 5/10) + xlab(paste0("PC1: ",percentVar[1],"% variance")) + ylab(paste0("PC2: ",percentVar[2],"% variance")) + coord_fixed()+ stat_ellipse(level=0.95) # Filtering: keep genes that have at least 10 counts across 1/3 of the samples - https://support.bioconductor.org/p/110307/ keep <- rowSums(DESeq2::counts(dds13) >= 10) >= ncol(FOW_TC_countmatrix)/3 dds13_filt <- dds13[keep,] ``` ### DO vs treatment control ```{r} # Filter data FDO_TC_treatmentinfo <- treatmentinfo.2 %>% filter(tissue == "F" | treatment == "control" | treatment == "DO") %>% filter(!(tissue == "G" | treatment == "OA" | treatment == "OW")) FDO_TC_treatmentinfo <- FDO_TC_treatmentinfo %>% filter(!(treatment == "control" & day == "0")) write.table(FDO_TC_treatmentinfo, file = "output/DEG_lists/Foot/FDO_TC_treatmentinfo.csv", row.names = FALSE) FDO_TC_countmatrix <- subset(countmatrix_2, select=row.names(FDO_TC_treatmentinfo)) write.table(FDO_TC_countmatrix, file = "output/DEG_lists/Foot/FDO_TC_countmatrix.csv", row.names = FALSE) # CaTCulate DESeq object dds14 <- DESeqDataSetFromMatrix(countData = FDO_TC_countmatrix, colData = FDO_TC_treatmentinfo, design = ~ treatment) dds14 <- DESeq(dds14) resultsNames(dds14) # lists the coefficients PCAdata <- plotPCA(vst(dds14), intgroup = c("treatment"), returnData=TRUE) # -- note: fitType='parametric', but the dispersion trend was not well captured by the # function: y = a/x + b, and a local regression fit was automatically substituted. # specify fitType='local' or 'mean' to avoid this message next time. percentVar <- round(100*attr(PCAdata, "percentVar")) #plot PCA of samples with all data ggplot(PCAdata, aes(PC1, PC2, color=treatment)) + geom_point(size=4, alpha = 5/10) + xlab(paste0("PC1: ",percentVar[1],"% variance")) + ylab(paste0("PC2: ",percentVar[2],"% variance")) + coord_fixed()+ stat_ellipse(level=0.95) # Filtering: keep genes that have at least 10 counts across 1/3 of the samples - https://support.bioconductor.org/p/110307/ keep <- rowSums(DESeq2::counts(dds14) >= 10) >= ncol(FDO_TC_countmatrix)/3 dds14_filt <- dds14[keep,] ```