---
title: "deseq2 visualiztion"
output: html_document
date: "2023-11-28"
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```
### load packages
```{r run once}
### only run if packages not already installed
# if (!requireNamespace("BiocManager", quietly = TRUE))
# install.packages("BiocManager")
# BiocManager::install("biocLite")
library(DESeq2)
library(tidyverse)
```
## DESeq2
### load in count data
```{r}
options(scipen = 999) #disables printing results in scientific notation
data <- read_delim("../output/kallisto.isoform.counts.matrix")
write.table(data, "../output/kallisto.matrix.tab", quote = FALSE, row.names = TRUE, sep = "\t")
## here we want to specify our row names from out first column of data. it can be a little funky, but saving this order should work:
# create object for row names from the first column
# used the $ index because brackets were giving trouble
rownames <- data$...1
head(rownames)
# remove that first column now that it's values are stored
data <- data[,-1]
head(data)
#ensures all data as integer
data <- round(data, digits = 0)
head(data)
# assign the row names to the data using the object created
rownames(data) <- rownames
head(data)
```
### build objects; specify which columns are in groups
```{r}
deseq2.colData <- data.frame(condition = factor(c(rep("Control", 15), rep("Treatment", 15))), type = factor(rep("paired-read", 30)))
rownames(deseq2.colData) <- colnames(data)
deseq2.dds <- DESeqDataSetFromMatrix(countData = data,
colData = deseq2.colData,
design = ~ condition)
```
### run analysis
```{r}
deseq2.dds <- DESeq(deseq2.dds)
deseq2.res <- results(deseq2.dds)
deseq2.res <- deseq2.res[order(rownames(deseq2.res)),]
summary(deseq2.res)
```
### Count number of hits with adjusted p-value less then 0.05
```{r}
DEG <- deseq2.res[!is.na(deseq2.res$padj) & deseq2.res$padj <= 0.05, ]
dim(DEG)
tmp <- deseq2.res
write.table(DEG,"../output/1213-DEG.tab", row.names = T)
```
## Visualization
### Volcano
```{r volcano}
plot(tmp$baseMean, tmp$log2FoldChange, pch=20, cex=0.45, ylim=c(-10, 10), log="x", col="darkgray",
main="Treatmetn versus Control (pval <= 0.05)",
xlab="mean of normalized counts",
ylab="Log2 Fold Change")
# Getting the significant points and plotting them again so they're a different color
tmp.sig <- deseq2.res[!is.na(deseq2.res$padj) & deseq2.res$padj <= 0.05, ]
points(tmp.sig$baseMean, tmp.sig$log2FoldChange, pch=20, cex=0.45, col="red")
# 2 FC lines
abline(h=c(-1,1), col="blue")
```
### Heatmap
```{r}
#load libraries for heatmap
library("RColorBrewer")
# library( "genefilter" )
library("pheatmap")
```
```{r}
res <- results(deseq2.dds)
res_ordered <- res[order(res$padj), ]
top_genes <- row.names(res_ordered)[1:25]
# Extract counts and normalize
counts <- counts(deseq2.dds, normalized = TRUE)
counts_top <- counts[top_genes, ]
# Log-transform counts
log_counts_top <- log2(counts_top + 1)
write.csv(log_counts_top, "../output/log_counts_top.csv")
# Generate heatmap
pheatmap(log_counts_top,
scale = "row",
cluster_cols = FALSE,
main = "Heatmap of Log2 Fold Change for Top 25 DEGs")
```
## DESEq2 using hisat / feature counts output
```{r}
# DEG list
DEG <- deseq2.res[!is.na(deseq2.res$padj) & deseq2.res$padj <= 0.05, ]
dim(DEG)
tmp <- deseq2.res
write.table(DEG,"../output/1213-DEG.tab", row.names = T)
# heatmap
res <- results(deseq2.dds)
res_ordered <- res[order(res$padj), ]
top_genes <- row.names(res_ordered)[1:25]
# Extract counts and normalize
counts <- counts(deseq2.dds, normalized = TRUE)
counts_top <- counts[top_genes, ]
# Log-transform counts
log_counts_top <- log2(counts_top + 1)
write.csv(log_counts_top, "../output/log_counts_top.csv")
# Generate heatmap
pheatmap(log_counts_top,
scale = "row",
cluster_cols = FALSE,
main = "Heatmap of Log2 Fold Change for Top 25 DEGs")
```