echo $PATH
/home/shared/8TB_HDD_02/mattgeorgephd/.local/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/snap/bin:/usr/lib/rstudio-server/bin/postback:/usr/lib/rstudio-server/bin/postback:/usr/lib/rstudio-server/bin/postback:/usr/lib/rstudio-server/bin/postback:/usr/local/go/bin
cd sequences/
wget -r \
--no-directories --no-parent \
-P . \
-A GCF_006149115.2_Oner_1.1_genomic.gff https://gannet.fish.washington.edu/panopea/berdahl-sockeye-salmon/genome/ \
--no-check-certificate
wget -r \
--no-directories --no-parent \
-P . \
-A GCF_006149115.2_Oner_1.1_genomic.fna https://gannet.fish.washington.edu/panopea/berdahl-sockeye-salmon/genome/ \
--no-check-certificate
head sequences/GCF_006149115.2_Oner_1.1_genomic.fna
Calculate chromosome lengths
awk '$0 ~ ">" {print c; c=0;printf substr($0,2,14) "\t"; } $0 !~ ">" {c+=length($0);} END { print c; }' \
sequences/GCF_006149115.2_Oner_1.1_genomic.fna \
| sed 's/Cr//g' \
| awk '{print $1"\t"$3}' \
| tail -n +2 \
> sequences/GCF_006149115.2_Oner_1.1_genomic-sequence-lengths.txt
head sequences/GCF_006149115.2_Oner_1.1_genomic-sequence-lengths.txt
Generate mRNA feature track from genomic_sequence
head sequences/GCF_006149115.2_Oner_1.1_genomic.gff
grep -e "Gnomon mRNA" -e "RefSeq mRNA" -e "cmsearch mRNA" -e "tRNAscan-SE mRNA" \
sequences/GCF_006149115.2_Oner_1.1_genomic.gff \
| /home/shared/bedtools2/bin/sortBed \
-faidx sequences/GCF_006149115.2_Oner_1.1_genomic-sequence-lengths.txt \
> sequences/GCF_006149115.2_Oner_1.1_mRNA.gff
head sequences/GCF_006149115.2_Oner_1.1_mRNA.gff
Download tag-seq data
mkdir raw-data/
cd raw-data/
wget -r \
--no-directories --no-parent \
-P . \
-A .fastq.gz https://gannet.fish.washington.edu/panopea/berdahl-sockeye-salmon/20220714-tagseq/ \
--no-check-certificate
unzip .fastq.gz files
cd raw-data/
gunzip *.fastq.gz
Run fastqc on untrimmed files
mkdir fastqc/
mkdir fastqc/untrimmed/
/home/shared/FastQC/fastqc \
/home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/raw-data/*.fastq \
--outdir /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/fastqc/untrimmed/ \
--quiet
Run multiqc
eval "$(/opt/anaconda/anaconda3/bin/conda shell.bash hook)"
conda activate
cd fastqc/untrimmed/
multiqc .
trim adapter sequences
mkdir trim-fastq/
cd /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/raw-data/
for F in *.fastq
do
#strip .fastq and directory structure from each file, then
# add suffice .trim to create output name for each file
results_file="$(basename -a $F)"
# -u 15 : hard trim first 15 bp
# -m 20 : minimum length cutoff
# run cutadapt on each file
/home/shared/8TB_HDD_02/mattgeorgephd/.local/bin/cutadapt $F -a A{8} -a G{8} -a AGATCGG -u 15 -m 20 -o \
/home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/trim-fastq/$results_file
done
concatenate fastq files by lane
mkdir merged-fastq
cd trim-fastq/
printf '%s\n' *.fastq | sed 's/^\([^_]*_[^_]*\).*/\1/' | uniq |
while read prefix; do
cat "$prefix"*R1*.fastq >"${prefix}_R1.fastq"
# cat "$prefix"*R2*.fastq >"${prefix}_R2.fastq" # include if more than one run
done
# I moved files to merged-fastq
13633444 /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/trim-fastq/06C_S90_L002_R1_001.fastq
12176996 /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/trim-fastq/07C_S91_L001_R1_001.fastq
12257640 /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/trim-fastq/07C_S91_L002_R1_001.fastq
13565896 /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/trim-fastq/08C_S92_L001_R1_001.fastq
13802904 /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/trim-fastq/08C_S92_L002_R1_001.fastq
12848700 /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/trim-fastq/09C_S93_L001_R1_001.fastq
12867400 /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/trim-fastq/09C_S93_L002_R1_001.fastq
16338984 /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/trim-fastq/10C_S94_L001_R1_001.fastq
16255028 /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/trim-fastq/10C_S94_L002_R1_001.fastq
13960488 /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/trim-fastq/11C_S95_L001_R1_001.fastq
14004052 /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/trim-fastq/11C_S95_L002_R1_001.fastq
13511560 /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/trim-fastq/12C_S96_L001_R1_001.fastq
13632664 /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/trim-fastq/12C_S96_L002_R1_001.fastq
16882648 /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/trim-fastq/13C_S97_L001_R1_001.fastq
17132804 /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/trim-fastq/13C_S97_L002_R1_001.fastq
12819864 /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/trim-fastq/14C_S98_L001_R1_001.fastq
12936660 /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/trim-fastq/14C_S98_L002_R1_001.fastq
11411964 /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/trim-fastq/15C_S99_L001_R1_001.fastq
11686036 /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/trim-fastq/15C_S99_L002_R1_001.fastq
13981132 /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/trim-fastq/16C_S100_L001_R1_001.fastq
14065312 /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/trim-fastq/16C_S100_L002_R1_001.fastq
7421516 /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/trim-fastq/17C_S101_L001_R1_001.fastq
7426192 /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/trim-fastq/17C_S101_L002_R1_001.fastq
12192192 /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/trim-fastq/18C_S102_L001_R1_001.fastq
12214596 /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/trim-fastq/18C_S102_L002_R1_001.fastq
15393748 /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/trim-fastq/19C_S103_L001_R1_001.fastq
15301248 /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/trim-fastq/19C_S103_L002_R1_001.fastq
15108256 /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/trim-fastq/20C_S104_L001_R1_001.fastq
15209208 /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/trim-fastq/20C_S104_L002_R1_001.fastq
17598696 /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/trim-fastq/21C_S105_L001_R1_001.fastq
17535320 /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/trim-fastq/21C_S105_L002_R1_001.fastq
13962128 /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/trim-fastq/22C_S106_L001_R1_001.fastq
14009740 /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/trim-fastq/22C_S106_L002_R1_001.fastq
12933164 /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/trim-fastq/23C_S107_L001_R1_001.fastq
12891852 /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/trim-fastq/23C_S107_L002_R1_001.fastq
16135348 /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/trim-fastq/24C_S108_L001_R1_001.fastq
16095096 /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/trim-fastq/24C_S108_L002_R1_001.fastq
12703556 /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/trim-fastq/25C_S109_L001_R1_001.fastq
12788544 /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/trim-fastq/25C_S109_L002_R1_001.fastq
13546676 /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/trim-fastq/26C_S110_L001_R1_001.fastq
13454732 /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/trim-fastq/26C_S110_L002_R1_001.fastq
12437512 /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/trim-fastq/27C_S111_L001_R1_001.fastq
12485552 /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/trim-fastq/27C_S111_L002_R1_001.fastq
15652616 /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/trim-fastq/28C_S112_L001_R1_001.fastq
15813848 /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/trim-fastq/28C_S112_L002_R1_001.fastq
12814088 /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/trim-fastq/29C_S113_L001_R1_001.fastq
12823820 /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/trim-fastq/29C_S113_L002_R1_001.fastq
13891748 /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/trim-fastq/30C_S114_L001_R1_001.fastq
13811900 /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/trim-fastq/30C_S114_L002_R1_001.fastq
804787096 total
20772844 /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/merged-fastq/01C_S85_R1.fastq
25282884 /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/merged-fastq/02C_S86_R1.fastq
23364480 /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/merged-fastq/03C_S87_R1.fastq
27827996 /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/merged-fastq/04C_S88_R1.fastq
20571996 /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/merged-fastq/05C_S89_R1.fastq
27175272 /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/merged-fastq/06C_S90_R1.fastq
24434636 /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/merged-fastq/07C_S91_R1.fastq
27368800 /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/merged-fastq/08C_S92_R1.fastq
25716100 /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/merged-fastq/09C_S93_R1.fastq
32594012 /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/merged-fastq/10C_S94_R1.fastq
27964540 /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/merged-fastq/11C_S95_R1.fastq
27144224 /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/merged-fastq/12C_S96_R1.fastq
34015452 /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/merged-fastq/13C_S97_R1.fastq
25756524 /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/merged-fastq/14C_S98_R1.fastq
23098000 /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/merged-fastq/15C_S99_R1.fastq
28046444 /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/merged-fastq/16C_S100_R1.fastq
14847708 /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/merged-fastq/17C_S101_R1.fastq
24406788 /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/merged-fastq/18C_S102_R1.fastq
30694996 /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/merged-fastq/19C_S103_R1.fastq
30317464 /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/merged-fastq/20C_S104_R1.fastq
35134016 /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/merged-fastq/21C_S105_R1.fastq
27971868 /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/merged-fastq/22C_S106_R1.fastq
25825016 /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/merged-fastq/23C_S107_R1.fastq
32230444 /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/merged-fastq/24C_S108_R1.fastq
25492100 /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/merged-fastq/25C_S109_R1.fastq
27001408 /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/merged-fastq/26C_S110_R1.fastq
24923064 /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/merged-fastq/27C_S111_R1.fastq
31466464 /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/merged-fastq/28C_S112_R1.fastq
25637908 /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/merged-fastq/29C_S113_R1.fastq
27703648 /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/merged-fastq/30C_S114_R1.fastq
804787096 total
# Reads remaining after trimming and filtering (%)
1472899016/1582147952*100
Run fastqc on trimmed files
mkdir fastqc/
mkdir fastqc/trimmed/
/home/shared/FastQC/fastqc \
/home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/merged-fastq/*.fastq \
--outdir /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/fastqc/trimmed/ \
--quiet
Run multiqc on trimmed files
eval "$(/opt/anaconda/anaconda3/bin/conda shell.bash hook)"
conda activate
cd fastqc/trimmed/
multiqc .
create bowtie2 index for cgigas genome (took 8 min on Raven)
/home/shared/bowtie2-2.4.4-linux-x86_64/bowtie2-build \
/home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/sequences/rna.fna \
/home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/sequences/GENOME.fa
Run bowtie on trimmed reads, pre-set option= –sensitive-local
mkdir bowtie_sam/
cd bowtie_sam/
for file in /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/trim-fastq/*.fastq
do
results_file="$(basename -a $file).sam"
# run Bowtie2 on each file
/home/shared/bowtie2-2.4.4-linux-x86_64/bowtie2 \
--local \
-x /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/sequences/GENOME.fa \
--sensitive-local \
--threads 48 \
--no-unal \
-k 5 \
-U $file \
-S $results_file; \
done >> bowtieout.txt 2>&1
# check % alignment from Bowtie
grep "overall alignment rate" /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/bowtie_sam/bowtieout.txt
# average alignment rate = 65.91 +/- 4.87 sd
Convert .sam files to .bam files, create bam indices
mkdir bowtie_bam/
cd bowtie_bam/
for file in /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/bowtie_sam/*.sam
do
results_file="$(basename -a $file)_sorted.bam"
/home/shared/samtools-1.12/samtools view -b $file | /home/shared/samtools-1.12/samtools sort -o /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/bowtie_bam/$results_file
done
create hisat2 index for cgigas genome (took 31 min on Raven)
Sorting block time: 00:02:35
Returning block of 253935055 for bucket 4
Getting block 5 of 8
Reserving size (347087862) for bucket 5
Calculating Z arrays for bucket 5
Entering block accumulator loop for bucket 5:
bucket 5: 10%
bucket 5: 20%
bucket 5: 30%
bucket 5: 40%
bucket 5: 50%
bucket 5: 60%
bucket 5: 70%
bucket 5: 80%
bucket 5: 90%
bucket 5: 100%
Sorting block of length 262480610 for bucket 5
(Using difference cover)
Sorting block time: 00:02:41
Returning block of 262480611 for bucket 5
Getting block 6 of 8
Reserving size (347087862) for bucket 6
Calculating Z arrays for bucket 6
Entering block accumulator loop for bucket 6:
bucket 6: 10%
bucket 6: 20%
bucket 6: 30%
bucket 6: 40%
bucket 6: 50%
bucket 6: 60%
bucket 6: 70%
bucket 6: 80%
bucket 6: 90%
bucket 6: 100%
Sorting block of length 261295901 for bucket 6
(Using difference cover)
Sorting block time: 00:02:33
Returning block of 261295902 for bucket 6
Getting block 7 of 8
Reserving size (347087862) for bucket 7
Calculating Z arrays for bucket 7
Entering block accumulator loop for bucket 7:
bucket 7: 10%
bucket 7: 20%
bucket 7: 30%
bucket 7: 40%
bucket 7: 50%
bucket 7: 60%
bucket 7: 70%
bucket 7: 80%
bucket 7: 90%
bucket 7: 100%
Sorting block of length 300704932 for bucket 7
(Using difference cover)
Sorting block time: 00:02:58
Returning block of 300704933 for bucket 7
Getting block 8 of 8
Reserving size (347087862) for bucket 8
Calculating Z arrays for bucket 8
Entering block accumulator loop for bucket 8:
bucket 8: 10%
bucket 8: 20%
bucket 8: 30%
bucket 8: 40%
bucket 8: 50%
bucket 8: 60%
bucket 8: 70%
bucket 8: 80%
bucket 8: 90%
bucket 8: 100%
Sorting block of length 139674187 for bucket 8
(Using difference cover)
Sorting block time: 00:01:20
Returning block of 139674188 for bucket 8
Exited GFM loop
fchr[A]: 0
fchr[C]: 524334746
fchr[G]: 925667240
fchr[T]: 1327076578
fchr[$]: 1851135262
Exiting GFM::buildToDisk()
Returning from initFromVector
Wrote 627029974 bytes to primary GFM file: /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/sequences/hisat2_genome_index.fa.1.ht2
Wrote 462783820 bytes to secondary GFM file: /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/sequences/hisat2_genome_index.fa.2.ht2
Re-opening _in1 and _in2 as input streams
Returning from GFM constructor
Returning from initFromVector
Wrote 1070141415 bytes to primary GFM file: /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/sequences/hisat2_genome_index.fa.5.ht2
Wrote 470234864 bytes to secondary GFM file: /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/sequences/hisat2_genome_index.fa.6.ht2
Re-opening _in5 and _in5 as input streams
Returning from HGFM constructor
Headers:
len: 1851135262
gbwtLen: 1851135263
nodes: 1851135263
sz: 462783816
gbwtSz: 462783816
lineRate: 6
offRate: 4
offMask: 0xfffffff0
ftabChars: 10
eftabLen: 0
eftabSz: 0
ftabLen: 1048577
ftabSz: 4194308
offsLen: 115695954
offsSz: 462783816
lineSz: 64
sideSz: 64
sideGbwtSz: 48
sideGbwtLen: 192
numSides: 9641330
numLines: 9641330
gbwtTotLen: 617045120
gbwtTotSz: 617045120
reverse: 0
linearFM: Yes
Total time for call to driver() for forward index: 00:31:06
Run hisat2 on trimmed reads
mkdir hisat2_sam/
mkdir hisat2_bam/
cd /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/merged-fastq/
# This script exports alignments as bam files
# sorts the bam file because Stringtie takes a sorted file for input (--dta)
# removes the sam file because it is no longer needed
array=($(ls *.fastq)) # call the sequences - make an array to align
for i in ${array[@]}; do
sample_name=`echo $i| awk -F [.] '{print $1}'`
/home/shared/hisat2-2.2.1/hisat2 \
-p 16 \
--dta \
-x /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/sequences/hisat2_genome_index.fa \
-U ${i} \
-S /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/hisat2_sam/${sample_name}.sam
/home/shared/samtools-1.12/samtools sort -@ 8 -o /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/hisat2_bam/${sample_name}.bam /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/hisat2_sam/${sample_name}.sam
echo "${i} bam-ified!"
# rm ${sample_name}.sam
done >> hisat2out.txt 2>&1
mkdir: cannot create directory ‘hisat2_bam/’: File exists
# check % alignment from hisat2
grep "overall alignment rate" sequences/hisat2out.txt
# average alignment rate = 50.14 +/- 5.81 sd (w/o hard trim of first 15 bp)
# average alignment rate = 56.34 +/- 5.27 sd (w/ hard trim of first 15 bp)
90.97% overall alignment rate
89.06% overall alignment rate
90.28% overall alignment rate
89.45% overall alignment rate
81.33% overall alignment rate
91.22% overall alignment rate
90.19% overall alignment rate
89.83% overall alignment rate
85.35% overall alignment rate
89.24% overall alignment rate
90.51% overall alignment rate
87.71% overall alignment rate
87.30% overall alignment rate
88.49% overall alignment rate
89.23% overall alignment rate
89.60% overall alignment rate
85.05% overall alignment rate
90.24% overall alignment rate
88.40% overall alignment rate
89.49% overall alignment rate
89.41% overall alignment rate
83.36% overall alignment rate
89.58% overall alignment rate
89.25% overall alignment rate
88.93% overall alignment rate
87.87% overall alignment rate
89.55% overall alignment rate
89.55% overall alignment rate
89.82% overall alignment rate
89.42% overall alignment rate
Convert .sam files to .bam files, create bam indices
mkdir hisat2_bam/
cd hisat2_bam/
for file in /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/hisat2_sam/*.sam
do
results_file="$(basename -a $file)_sorted.bam"
/home/shared/samtools-1.12/samtools view -b $file | /home/shared/samtools-1.12/samtools sort -o /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/hisat2_bam/$results_file
done
Assemble hisat2 alignments w/ stringtie2 using mRNA genome feature track
StringTie assembly for seq file /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/hisat2_bam/06C_S90_R1.bam Mon Aug 8 17:16:17 PDT 2022
StringTie assembly for seq file /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/hisat2_bam/07C_S91_R1.bam Mon Aug 8 17:16:29 PDT 2022
StringTie assembly for seq file /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/hisat2_bam/08C_S92_R1.bam Mon Aug 8 17:16:43 PDT 2022
StringTie assembly for seq file /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/hisat2_bam/09C_S93_R1.bam Mon Aug 8 17:16:57 PDT 2022
StringTie assembly for seq file /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/hisat2_bam/10C_S94_R1.bam Mon Aug 8 17:17:13 PDT 2022
StringTie assembly for seq file /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/hisat2_bam/11C_S95_R1.bam Mon Aug 8 17:17:26 PDT 2022
StringTie assembly for seq file /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/hisat2_bam/12C_S96_R1.bam Mon Aug 8 17:17:40 PDT 2022
StringTie assembly for seq file /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/hisat2_bam/13C_S97_R1.bam Mon Aug 8 17:17:57 PDT 2022
StringTie assembly for seq file /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/hisat2_bam/14C_S98_R1.bam Mon Aug 8 17:18:12 PDT 2022
StringTie assembly for seq file /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/hisat2_bam/15C_S99_R1.bam Mon Aug 8 17:18:25 PDT 2022
StringTie assembly for seq file /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/hisat2_bam/16C_S100_R1.bam Mon Aug 8 17:18:40 PDT 2022
StringTie assembly for seq file /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/hisat2_bam/17C_S101_R1.bam Mon Aug 8 17:18:49 PDT 2022
StringTie assembly for seq file /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/hisat2_bam/18C_S102_R1.bam Mon Aug 8 17:19:02 PDT 2022
StringTie assembly for seq file /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/hisat2_bam/19C_S103_R1.bam Mon Aug 8 17:19:17 PDT 2022
StringTie assembly for seq file /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/hisat2_bam/20C_S104_R1.bam Mon Aug 8 17:19:33 PDT 2022
StringTie assembly for seq file /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/hisat2_bam/21C_S105_R1.bam Mon Aug 8 17:19:50 PDT 2022
StringTie assembly for seq file /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/hisat2_bam/22C_S106_R1.bam Mon Aug 8 17:20:04 PDT 2022
StringTie assembly for seq file /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/hisat2_bam/23C_S107_R1.bam Mon Aug 8 17:20:19 PDT 2022
StringTie assembly for seq file /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/hisat2_bam/24C_S108_R1.bam Mon Aug 8 17:20:35 PDT 2022
StringTie assembly for seq file /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/hisat2_bam/25C_S109_R1.bam Mon Aug 8 17:20:49 PDT 2022
StringTie assembly for seq file /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/hisat2_bam/26C_S110_R1.bam Mon Aug 8 17:21:03 PDT 2022
StringTie assembly for seq file /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/hisat2_bam/27C_S111_R1.bam Mon Aug 8 17:21:17 PDT 2022
StringTie assembly for seq file /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/hisat2_bam/28C_S112_R1.bam Mon Aug 8 17:21:33 PDT 2022
StringTie assembly for seq file /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/hisat2_bam/29C_S113_R1.bam Mon Aug 8 17:21:47 PDT 2022
StringTie assembly for seq file /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/hisat2_bam/30C_S114_R1.bam Mon Aug 8 17:22:02 PDT 2022
StringTie assembly COMPLETE, starting assembly analysis Mon Aug 8 17:22:02 PDT 2022
cd /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/hisat2_bam
# make gtf list file (needed for stringtie merge function)
for filename in *.gtf; do
echo $PWD/$filename;
done > gtf_list.txt
# make listGTF file (needed for count matrix), two columns w/ sample ID
for filename in *.gtf; do
echo $filename $PWD/$filename;
done > listGTF.txt
# merge GTFs into a single file
/home/shared/stringtie-2.2.1.Linux_x86_64/stringtie \
--merge \
-p 48 \
-G /home/shared/8TB_HDD_02/mattgeorgephd/berdahl-sockeye-salmon/sequences/GCF_006149115.2_Oner_1.1_mRNA.gff \
-o onerka_merged.gtf gtf_list.txt #Merge GTFs to form $
echo "Stringtie merge complete" $(date)
# Compute accuracy of gff
# gffcompare -r ../../../refs/Panopea-generosa-v1.0.a4.mRNA_SJG.gff3 -G -o merged Pgenerosa_merged.gtf #Compute the accuracy and pre$
# echo "GFFcompare complete, Starting gene count matrix assembly..." $(date)
# Compile gene count matrix from GTFs
/home/shared/stringtie-2.2.1.Linux_x86_64/prepDE.py \
-g onerka_gene_count_matrix.csv \
-i listGTF.txt #Compile the gene count matrix
echo "Gene count matrix compiled." $(date)
Stringtie merge complete Mon Aug 8 17:25:55 PDT 2022
Gene count matrix compiled. Mon Aug 8 17:26:09 PDT 2022
