--- title: "week-02" output: html_document created: "2023-04-07" last edited: "2023-04-13" --- # Introduction This is code for examining the effect of ocean acidification on gene expression of snow crabs. ### Look at all RNASeq files ```{bash} ls /home/shared/8TB_HDD_01/snow_crab/5010 ``` ### Download C. bairdi reference, save it to "data" directory ```{bash} cd ../data curl -O https://owl.fish.washington.edu/halfshell/genomic-databank/cbai_genome_v1.0.fasta ``` ### Set index ```{bash} #index "cbai_genome_v1.0.fasta" and rename it as "cbai_genome_v1.0.index" /home/shared/kallisto/kallisto \ index -i \ ../data/cbai_genome_v1.0.index \ ../data/cbai_genome_v1.0.fasta ``` ```{bash} #list file names ls /home/shared/8TB_HDD_01/snow_crab/5010/exp01 ``` ### Create abundance estimate files for desired sequences ```{bash} #make a new directory in output mkdir ../output/kallisto_01 #find specific files and create abundance estimates find /home/shared/8TB_HDD_01/snow_crab/5010/exp01/*fastq.gz \ | xargs basename -s _001.fastq.gz | xargs -I{} \ /home/shared/kallisto/kallisto \ quant -i ../data/cbai_genome_v1.0.index \ -o ../output/kallisto_01/{} \ -t 40 \ --single -l 100 -s 10 /home/shared/8TB_HDD_01/snow_crab/5010/exp01/{}.fastq.gz ``` ### next, will run abundance_estimates_to_matrix.pl script from the Trinity RNA-seq assembly software package to create a gene expression matrix from kallisto output files ```{bash} perl /home/shared/trinityrnaseq-v2.12.0/util/abundance_estimates_to_matrix.pl \ --est_method kallisto \ --gene_trans_map none \ --out_prefix ../output/kallisto_01 \ --name_sample_by_basedir \ ../output/kallisto_01/5010_1_S1_L003_R1/abundance.tsv \ ../output/kallisto_01/5010_1_S1_L003_R2/abundance.tsv \ ../output/kallisto_01/5010_2_S2_L003_R1/abundance.tsv \ ../output/kallisto_01/5010_2_S2_L003_R2/abundance.tsv \ ../output/kallisto_01/5010_3_S3_L003_R1/abundance.tsv \ ../output/kallisto_01/5010_3_S3_L003_R2/abundance.tsv \ ../output/kallisto_01/5010_4_S4_L003_R1/abundance.tsv \ ../output/kallisto_01/5010_4_S4_L003_R2/abundance.tsv \ ../output/kallisto_01/5010_39_S39_L003_R1/abundance.tsv \ ../output/kallisto_01/5010_39_S39_L003_R2/abundance.tsv \ ../output/kallisto_01/5010_40_S40_L003_R1/abundance.tsv \ ../output/kallisto_01/5010_40_S40_L003_R2/abundance.tsv \ ../output/kallisto_01/5010_41_S41_L003_R1/abundance.tsv \ ../output/kallisto_01/5010_41_S41_L003_R2/abundance.tsv ``` ```{r} library(DESeq2) library(tidyverse) library(pheatmap) library(RColorBrewer) library(data.table) ``` ```{r} countmatrix <- read.delim("../output/kallisto_01.isoform.counts.matrix", header = TRUE, sep = '\t') rownames(countmatrix) <- countmatrix$X countmatrix <- countmatrix[,-1] head(countmatrix) ``` ```{r} countmatrix <- round(countmatrix, 0) str(countmatrix) ``` ```{r} deseq2.colData <- data.frame(condition=factor(c(rep("control", 8), rep("high pH", 6))), type=factor(rep("single-read", 14))) rownames(deseq2.colData) <- colnames(data) deseq2.dds <- DESeqDataSetFromMatrix(countData = countmatrix, colData = deseq2.colData, design = ~ condition) dim(countmatrix) dim(deseq2.colData) deseq2.colData deseq2.dds ``` ```{r} deseq2.dds <- DESeq(deseq2.dds) deseq2.res <- results(deseq2.dds) deseq2.res <- deseq2.res[order(rownames(deseq2.res)), ] ``` ## still missing sample 42 (5010_42_S42_L003_R2_001.fastq.gz)