--- title: "deseq2 visualiztion" output: html_document date: "2023-11-28" --- ```{r setup, include=FALSE} knitr::opts_chunk$set(echo = TRUE) ``` ### load packages ```{r run once} ### only run if packages not already installed # if (!requireNamespace("BiocManager", quietly = TRUE)) # install.packages("BiocManager") # BiocManager::install("biocLite") library(DESeq2) library(tidyverse) ``` ## DESeq2 ### load in count data ```{r} options(scipen = 999) #disables printing results in scientific notation data <- read_delim("../output/kallisto.isoform.counts.matrix") write.table(data, "../output/kallisto.matrix.tab", quote = FALSE, row.names = TRUE, sep = "\t") ## here we want to specify our row names from out first column of data. it can be a little funky, but saving this order should work: # create object for row names from the first column # used the $ index because brackets were giving trouble rownames <- data$...1 head(rownames) # remove that first column now that it's values are stored data <- data[,-1] head(data) #ensures all data as integer data <- round(data, digits = 0) head(data) # assign the row names to the data using the object created rownames(data) <- rownames head(data) ``` ### build objects; specify which columns are in groups ```{r} deseq2.colData <- data.frame(condition = factor(c(rep("Control", 15), rep("Treatment", 15))), type = factor(rep("paired-read", 30))) rownames(deseq2.colData) <- colnames(data) deseq2.dds <- DESeqDataSetFromMatrix(countData = data, colData = deseq2.colData, design = ~ condition) ``` ### run analysis ```{r} deseq2.dds <- DESeq(deseq2.dds) deseq2.res <- results(deseq2.dds) deseq2.res <- deseq2.res[order(rownames(deseq2.res)),] summary(deseq2.res) ``` ### Count number of hits with adjusted p-value less then 0.05 ```{r} DEG <- deseq2.res[!is.na(deseq2.res$padj) & deseq2.res$padj <= 0.05, ] dim(DEG) tmp <- deseq2.res write.table(DEG,"../output/1213-DEG.tab", row.names = T) ``` ## Visualization ### Volcano ```{r volcano} plot(tmp$baseMean, tmp$log2FoldChange, pch=20, cex=0.45, ylim=c(-10, 10), log="x", col="darkgray", main="Treatmetn versus Control (pval <= 0.05)", xlab="mean of normalized counts", ylab="Log2 Fold Change") # Getting the significant points and plotting them again so they're a different color tmp.sig <- deseq2.res[!is.na(deseq2.res$padj) & deseq2.res$padj <= 0.05, ] points(tmp.sig$baseMean, tmp.sig$log2FoldChange, pch=20, cex=0.45, col="red") # 2 FC lines abline(h=c(-1,1), col="blue") ``` ### Heatmap ```{r} #load libraries for heatmap library("RColorBrewer") # library( "genefilter" ) library("pheatmap") ``` ```{r} res <- results(deseq2.dds) res_ordered <- res[order(res$padj), ] top_genes <- row.names(res_ordered)[1:25] # Extract counts and normalize counts <- counts(deseq2.dds, normalized = TRUE) counts_top <- counts[top_genes, ] # Log-transform counts log_counts_top <- log2(counts_top + 1) write.csv(log_counts_top, "../output/log_counts_top.csv") # Generate heatmap pheatmap(log_counts_top, scale = "row", cluster_cols = FALSE, main = "Heatmap of Log2 Fold Change for Top 25 DEGs") ``` ## DESEq2 using hisat / feature counts output ```{r} # DEG list DEG <- deseq2.res[!is.na(deseq2.res$padj) & deseq2.res$padj <= 0.05, ] dim(DEG) tmp <- deseq2.res write.table(DEG,"../output/1213-DEG.tab", row.names = T) # heatmap res <- results(deseq2.dds) res_ordered <- res[order(res$padj), ] top_genes <- row.names(res_ordered)[1:25] # Extract counts and normalize counts <- counts(deseq2.dds, normalized = TRUE) counts_top <- counts[top_genes, ] # Log-transform counts log_counts_top <- log2(counts_top + 1) write.csv(log_counts_top, "../output/log_counts_top.csv") # Generate heatmap pheatmap(log_counts_top, scale = "row", cluster_cols = FALSE, main = "Heatmap of Log2 Fold Change for Top 25 DEGs") ```