---
title: "Foot_TC_DGE_viz"
output: html_document
date: "2024-02-14"
---
### OA vs treatment control
```{r}
# DEG in the foot after exposure to ocean acidification
# Filter data
FOA_TC_treatmentinfo <- treatmentinfo.2 %>% filter(tissue == "F" | treatment == "control" | treatment == "OA") %>% filter(!(tissue == "G" | treatment == "OW" | treatment == "DO"))
FOA_TC_treatmentinfo <- FOA_TC_treatmentinfo %>%
filter(!(treatment == "control" & day == "0"))
write.table(FOA_TC_treatmentinfo, file = "output/DEG_lists/Foot/FOA_TC_treatmentinfo.csv", row.names = FALSE)
FOA_TC_countmatrix <- subset(countmatrix_2, select=row.names(FOA_TC_treatmentinfo))
write.table(FOA_TC_countmatrix, file = "output/DEG_lists/Foot/FOA_TC_countmatrix.csv", row.names = FALSE)
# CaTCulate DESeq object
dds12 <- DESeqDataSetFromMatrix(countData = FOA_TC_countmatrix,
colData = FOA_TC_treatmentinfo,
design = ~ treatment)
dds12 <- DESeq(dds12)
resultsNames(dds12) # lists the coefficients
PCAdata <- plotPCA(vst(dds12), intgroup = c("treatment"), returnData=TRUE)
percentVar <- round(100*attr(PCAdata, "percentVar")) #plot PCA of samples with all data
ggplot(PCAdata, aes(PC1, PC2, color=treatment)) +
geom_point(size=4, alpha = 5/10) +
xlab(paste0("PC1: ",percentVar[1],"% variance")) +
ylab(paste0("PC2: ",percentVar[2],"% variance")) +
coord_fixed()+
stat_ellipse()
# Filtering: keep genes that have at least 10 counts across 1/3 of the samples - https://support.bioconductor.org/p/110307/
keep <- rowSums(DESeq2::counts(dds12) >= 10) >= ncol(FOA_TC_countmatrix)/3
dds12_filt <- dds12[keep,]
```
### OW vs treatment control
```{r}
# Filter data
FOW_TC_treatmentinfo <- treatmentinfo.2 %>% filter(tissue == "F" | treatment == "control" | treatment == "OW") %>% filter(!(tissue == "G" | treatment == "OA" | treatment == "DO"))
FOW_TC_treatmentinfo <- FOW_TC_treatmentinfo %>%
filter(!(treatment == "control" & day == "0"))
write.table(FOW_TC_treatmentinfo, file = "output/DEG_lists/Foot/FOW_TC_treatmentinfo.csv", row.names = FALSE)
FOW_TC_countmatrix <- subset(countmatrix_2, select=row.names(FOW_TC_treatmentinfo))
write.table(FOW_TC_countmatrix, file = "output/DEG_lists/Foot/FOW_TC_countmatrix.csv", row.names = FALSE)
# CaTCulate DESeq object
dds13 <- DESeqDataSetFromMatrix(countData = FOW_TC_countmatrix,
colData = FOW_TC_treatmentinfo,
design = ~ treatment)
dds13 <- DESeq(dds13)
resultsNames(dds13) # lists the coefficients
# -- note: fitType='parametric', but the dispersion trend was not well captured by the
# function: y = a/x + b, and a local regression fit was automatically substituted.
# specify fitType='local' or 'mean' to avoid this message next time.
PCAdata <- plotPCA(vst(dds13), intgroup = c("treatment"), returnData=TRUE)
percentVar <- round(100*attr(PCAdata, "percentVar")) #plot PCA of samples with all data
ggplot(PCAdata, aes(PC1, PC2, color=treatment)) +
geom_point(size=4, alpha = 5/10) +
xlab(paste0("PC1: ",percentVar[1],"% variance")) +
ylab(paste0("PC2: ",percentVar[2],"% variance")) +
coord_fixed()+
stat_ellipse(level=0.95)
# Filtering: keep genes that have at least 10 counts across 1/3 of the samples - https://support.bioconductor.org/p/110307/
keep <- rowSums(DESeq2::counts(dds13) >= 10) >= ncol(FOW_TC_countmatrix)/3
dds13_filt <- dds13[keep,]
```
### DO vs treatment control
```{r}
# Filter data
FDO_TC_treatmentinfo <- treatmentinfo.2 %>% filter(tissue == "F" | treatment == "control" | treatment == "DO") %>% filter(!(tissue == "G" | treatment == "OA" | treatment == "OW"))
FDO_TC_treatmentinfo <- FDO_TC_treatmentinfo %>%
filter(!(treatment == "control" & day == "0"))
write.table(FDO_TC_treatmentinfo, file = "output/DEG_lists/Foot/FDO_TC_treatmentinfo.csv", row.names = FALSE)
FDO_TC_countmatrix <- subset(countmatrix_2, select=row.names(FDO_TC_treatmentinfo))
write.table(FDO_TC_countmatrix, file = "output/DEG_lists/Foot/FDO_TC_countmatrix.csv", row.names = FALSE)
# CaTCulate DESeq object
dds14 <- DESeqDataSetFromMatrix(countData = FDO_TC_countmatrix,
colData = FDO_TC_treatmentinfo,
design = ~ treatment)
dds14 <- DESeq(dds14)
resultsNames(dds14) # lists the coefficients
PCAdata <- plotPCA(vst(dds14), intgroup = c("treatment"), returnData=TRUE)
# -- note: fitType='parametric', but the dispersion trend was not well captured by the
# function: y = a/x + b, and a local regression fit was automatically substituted.
# specify fitType='local' or 'mean' to avoid this message next time.
percentVar <- round(100*attr(PCAdata, "percentVar")) #plot PCA of samples with all data
ggplot(PCAdata, aes(PC1, PC2, color=treatment)) +
geom_point(size=4, alpha = 5/10) +
xlab(paste0("PC1: ",percentVar[1],"% variance")) +
ylab(paste0("PC2: ",percentVar[2],"% variance")) +
coord_fixed()+
stat_ellipse(level=0.95)
# Filtering: keep genes that have at least 10 counts across 1/3 of the samples - https://support.bioconductor.org/p/110307/
keep <- rowSums(DESeq2::counts(dds14) >= 10) >= ncol(FDO_TC_countmatrix)/3
dds14_filt <- dds14[keep,]
```