--- title: "DGE_viz_gill" output: html_document date: "2024-02-06" --- ## Overall comparison with lab control ```{r} library(BiocManager) library(DESeq2) G_treatmentinfo <- treatmentinfo.2 %>% filter(tissue == "G") %>% filter(!( tissue == "F")) G_treatmentinfo <- G_treatmentinfo %>% mutate(treatment = ifelse(day == "3" & treatment == "control", paste0(treatment, "_3"), treatment)) write.table(G_treatmentinfo, file = "output/DEG_lists/Gill/G_treatmentinfo.csv", row.names = FALSE) G_countmatrix <- subset(countmatrix_2, select=row.names(G_treatmentinfo)) write.table(G_countmatrix, file = "output/DEG_lists/Gill/G_countmatrix.csv", row.names = FALSE) ddsG <- DESeqDataSetFromMatrix(countData = G_countmatrix, colData = G_treatmentinfo, design = ~ treatment) ddsG <- DESeq(ddsG) resultsNames(ddsG) # lists the coefficients summary(ddsG) PCAdataG <- plotPCA(vst(ddsG), intgroup = c("treatment"), returnData=TRUE) percentVarG <- round(100*attr(PCAdataG, "percentVar")) #plot PCA of samples with all data Gill <- ggplot(PCAdataG, aes(PC1, PC2, color=treatment)) + geom_point(size=4, alpha = 5/10) + xlab(paste0("PC1: ",percentVarG[1],"% variance")) + ylab(paste0("PC2: ",percentVarG[2],"% variance")) + coord_fixed()+ stat_ellipse(level=0.95)+ theme_classic()+ scale_y_continuous(breaks=c(-80, -40 ,0, 40, 80), limits= c(-80, 80))+ labs(color='Treatment')+ theme(axis.line = element_line(colour = 'black', size = 0.5))+ scale_color_manual(labels = c("Laboratory Control", "Treatment Control", "Hypoxia", "Ocean Acidification", "Ocean Warming"), values = c("blue", "lightblue", "purple","green", "red")) Gill ``` ### OA vs lab control ```{r} # Filter data GOA_LC_treatmentinfo <- treatmentinfo.2 %>% filter(tissue == "G" | treatment == "control" | treatment == "OA") %>% filter(!(tissue == "F" | treatment == "OW" | treatment == "DO")) GOA_LC_treatmentinfo <- GOA_LC_treatmentinfo %>% filter(!(treatment == "control" & day == "3")) write.table(GOA_LC_treatmentinfo, file = "output/DEG_lists/Gill/GOA_LC_treatmentinfo.csv", row.names = FALSE) GOA_LC_countmatrix <- subset(countmatrix_2, select=row.names(GOA_LC_treatmentinfo)) write.table(GOA_LC_countmatrix, file = "output/DEG_lists/Gill/GOA_LC_countmatrix.csv", row.names = FALSE) # Calculate DESeq object dds8 <- DESeqDataSetFromMatrix(countData = GOA_LC_countmatrix, colData = GOA_LC_treatmentinfo, design = ~ treatment) dds8 <- DESeq(dds8) resultsNames(dds8) # lists the coefficients PCAdata <- plotPCA(vst(dds8), intgroup = c("treatment"), returnData=TRUE) percentVar <- round(100*attr(PCAdata, "percentVar")) #plot PCA of samples with all data ggplot(PCAdata, aes(PC1, PC2, color=treatment)) + geom_point(size=4, alpha = 5/10) + xlab(paste0("PC1: ",percentVar[1],"% variance")) + ylab(paste0("PC2: ",percentVar[2],"% variance")) + coord_fixed()+ stat_ellipse() # Filtering: keep genes that have at least 10 counts across 1/3 of the samples - https://support.bioconductor.org/p/110307/ keep <- rowSums(DESeq2::counts(dds8) >= 10) >= ncol(GOA_LC_countmatrix)/3 dds8_filt <- dds8[keep,] # Generate Contrasts contrast_listGOA <- c("treatment", "OA", "control") # order is important: factor, treatment group, control res_tableGOA <- results(dds8_filt, contrast=contrast_listGOA, alpha = 0.05) res_tableGOA ``` ### OW vs lab control ```{r} # Filter data GOW_LC_treatmentinfo <- treatmentinfo.2 %>% filter(tissue == "G" | treatment == "control" | treatment == "OW") %>% filter(!(tissue == "F" | treatment == "OA" | treatment == "DO")) GOW_LC_treatmentinfo <- GOW_LC_treatmentinfo %>% filter(!(treatment == "control" & day == "3")) write.table(GOW_LC_treatmentinfo, file = "output/DEG_lists/Gill/GOW_LC_treatmentinfo.csv", row.names = FALSE) GOW_LC_countmatrix <- subset(countmatrix_2, select=row.names(GOW_LC_treatmentinfo)) write.table(GOW_LC_countmatrix, file = "output/DEG_lists/Gill/GOW_LC_countmatrix.csv", row.names = FALSE) # Calculate DESeq object dds9 <- DESeqDataSetFromMatrix(countData = GOW_LC_countmatrix, colData = GOW_LC_treatmentinfo, design = ~ treatment) dds9 <- DESeq(dds9) resultsNames(dds9) # lists the coefficients PCAdata <- plotPCA(vst(dds9), intgroup = c("treatment"), returnData=TRUE) percentVar <- round(100*attr(PCAdata, "percentVar")) #plot PCA of samples with all data ggplot(PCAdata, aes(PC1, PC2, color=treatment)) + geom_point(size=4, alpha = 5/10) + xlab(paste0("PC1: ",percentVar[1],"% variance")) + ylab(paste0("PC2: ",percentVar[2],"% variance")) + coord_fixed()+ stat_ellipse() # Filtering: keep genes that have at least 10 counts across 1/3 of the samples - https://support.bioconductor.org/p/110307/ keep <- rowSums(DESeq2::counts(dds9) >= 10) >= ncol(GOW_LC_countmatrix)/3 dds9_filt <- dds9[keep,] ``` ### DO vs lab control ```{r} # Filter data GDO_LC_treatmentinfo <- treatmentinfo.2 %>% filter(tissue == "G" | treatment == "control" | treatment == "DO") %>% filter(!(tissue == "F" | treatment == "OA" | treatment == "OW")) GDO_LC_treatmentinfo <- GDO_LC_treatmentinfo %>% filter(!(treatment == "control" & day == "3")) write.table(GDO_LC_treatmentinfo, file = "output/DEG_lists/Gill/GDO_LC_treatmentinfo.csv", row.names = FALSE) GDO_LC_countmatrix <- subset(countmatrix_2, select=row.names(GDO_LC_treatmentinfo)) write.table(GDO_LC_countmatrix, file = "output/DEG_lists/Gill/GDO_LC_countmatrix.csv", row.names = FALSE) # Calculate DESeq object dds10 <- DESeqDataSetFromMatrix(countData = GDO_LC_countmatrix, colData = GDO_LC_treatmentinfo, design = ~ treatment) dds10 <- DESeq(dds10) resultsNames(dds10) # lists the coefficients PCAdata <- plotPCA(vst(dds10), intgroup = c("treatment"), returnData=TRUE) percentVar <- round(100*attr(PCAdata, "percentVar")) #plot PCA of samples with all data ggplot(PCAdata, aes(PC1, PC2, color=treatment)) + geom_point(size=4, alpha = 5/10) + xlab(paste0("PC1: ",percentVar[1],"% variance")) + ylab(paste0("PC2: ",percentVar[2],"% variance")) + coord_fixed()+ stat_ellipse(level=0.95) # Filtering: keep genes that have at least 10 counts across 1/3 of the samples - https://support.bioconductor.org/p/110307/ keep <- rowSums(DESeq2::counts(dds10) >= 10) >= ncol(GDO_LC_countmatrix)/3 dds10_filt <- dds10[keep,] ``` ### treatment control vs lab control ```{r} # Filter data GTC_LC_treatmentinfo <- treatmentinfo.2 %>% filter(tissue == "G" | treatment == "control") %>% filter(!(tissue == "F" | treatment == "OA" | treatment == "OW" | treatment == "DO")) write.table(GTC_LC_treatmentinfo, file = "output/DEG_lists/Gill/GTC_LC_treatmentinfo.csv", row.names = FALSE) GTC_LC_countmatrix <- subset(countmatrix_2, select=row.names(GTC_LC_treatmentinfo)) write.table(GTC_LC_countmatrix, file = "output/DEG_lists/Gill/GTC_LC_countmatrix.csv", row.names = FALSE) # Calculate DESeq object dds11 <- DESeqDataSetFromMatrix(countData = GTC_LC_countmatrix, colData = GTC_LC_treatmentinfo, design = ~ day) dds11 <- DESeq(dds11) resultsNames(dds11) # lists the coefficients PCAdata <- plotPCA(vst(dds11), intgroup = c("day"), returnData=TRUE) percentVar <- round(100*attr(PCAdata, "percentVar")) #plot PCA of samples with all data ggplot(PCAdata, aes(PC1, PC2, color=day)) + geom_point(size=4) + xlab(paste0("PC1: ",percentVar[1],"% variance")) + ylab(paste0("PC2: ",percentVar[2],"% variance")) + coord_fixed()+ stat_ellipse(level=0.95) # Filtering: keep genes that have at least 10 counts across 1/3 of the samples - https://support.bioconductor.org/p/110307/ keep <- rowSums(DESeq2::counts(dds11) >= 10) >= ncol(GTC_LC_countmatrix)/3 dds11_filt <- dds11[keep,] ```