Differentially Expressed Genes

Reads

ls /home/shared/8TB_HDD_02/graceac9/data/pycno2021/*
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-19_R1_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-19_R2_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-23_R1_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-23_R2_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-24_R1_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-24_R2_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-34_R1_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-34_R2_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-35_R1_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-35_R2_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-36_R1_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-36_R2_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-37_R1_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-37_R2_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-38_R1_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-38_R2_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-39_R1_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-39_R2_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-40_R1_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-40_R2_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-42_R1_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-42_R2_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-43_R1_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-43_R2_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-48_R1_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-48_R2_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-49_R1_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-49_R2_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-52_R1_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-52_R2_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-54_R1_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-54_R2_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-56_R1_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-56_R2_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-57_R1_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-57_R2_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-58_R1_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-58_R2_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-59_R1_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-59_R2_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-61_R1_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-61_R2_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-63_R1_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-63_R2_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-64_R1_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-64_R2_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-67_R1_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-67_R2_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-69_R1_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-69_R2_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-71_R1_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-71_R2_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-73_R1_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-73_R2_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-75_R1_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-75_R2_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-76_R1_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-76_R2_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-78_R1_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-78_R2_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-81_R1_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-81_R2_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-83_R1_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-83_R2_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/FastQC-0.12.1/fastqc \
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/*fq.gz \
-t 36 \
-o ../analyses/13-hisat-deseq2/
eval "$(/opt/anaconda/anaconda3/bin/conda shell.bash hook)"
conda activate
which multiqc


multiqc ../analyses/13-hisat-deseq2/ \
-o ../analyses/13-hisat-deseq2/

Genome

https://www.ncbi.nlm.nih.gov/datasets/genome/GCA_032158295.1/

cd ../data

/home/shared/datasets download genome accession GCA_032158295.1 --include gff3,rna,cds,protein,genome,seq-report
cd ../data 
unzip ncbi_dataset.zip
ls ../data/ncbi_dataset/data/GCA_032158295.1
GCA_032158295.1_ASM3215829v1_genomic.fna
sequence_report.jsonl

Annotation files

head ../analyses/12-fix-gff/mod_augustus.gtf
head ../data/ncbi_dataset/data/GCA_032158295.1/GCA_032158295.1_ASM3215829v1_genomic.fna
CM063256.1  AUGUSTUS    gene    7811    10401   1   +   .   g37
CM063256.1  AUGUSTUS    transcript  7811    10401   1   +   .   g37.t1
CM063256.1  AUGUSTUS    start_codon 7811    7813    .   +   0   transcript_id "g37.t1"; gene_id "g37";
CM063256.1  AUGUSTUS    CDS 7811    9083    1   +   0   transcript_id "g37.t1"; gene_id "g37";
CM063256.1  AUGUSTUS    exon    7811    9083    .   +   .   transcript_id "g37.t1"; gene_id "g37";
CM063256.1  AUGUSTUS    intron  9084    9778    1   +   .   transcript_id "g37.t1"; gene_id "g37";
CM063256.1  AUGUSTUS    CDS 9779    10401   1   +   2   transcript_id "g37.t1"; gene_id "g37";
CM063256.1  AUGUSTUS    exon    9779    10401   .   +   .   transcript_id "g37.t1"; gene_id "g37";
CM063256.1  AUGUSTUS    stop_codon  10399   10401   .   +   0   transcript_id "g37.t1"; gene_id "g37";
CM063256.1  AUGUSTUS    gene    13619   39786   0.77    -   .   g38
>CM063243.1 Pycnopodia helianthoides isolate M0D057908R chromosome 1, whole genome shotgun sequence
gttaaaataatttgaatattgGATTAGTTTCAAACCCTCCCAGATCCTTCTAGATCCTCTTTGTTGAAATacaggattca
gaaggactgagggctgtaggcccaaatgacagttgcttatcactgggtcaggcatacagTAGGGCAGatgggtgatggga
ttgtgcctttgatgatacaagcatggtaaggggcacacagtcagagtatagcATGACAAATTAATTTGTCTAAAAGGCCA
ACACAGATTTaacctttgaaaaatataagcaGAATATTAAACaaccaatggcagccattttttttaatcttgaaaattgc
ttgttattctacctgtaTTGTTTGACAAGACAAACCTGCATTGAAAAATaacctttgaaaaatataagcaGAATATTAAA
CaaccaatggcagccattttttttaatcttgaaaattgcttgttactcTACCTGTATTGTTTGACAGGACAAATCAGCAT
ATGACCAAAAGCTAaatgtctttctcatgacatactctgtgtGCCAAGGtctatgcttgtatcatcaaaggcacaattct
cCCAAATCCCTATCTGCCCCAGCATAGGATAATGGAAGATTGAGCTCACCTGTAGGCAAAAAGGACTATGGGTAAATATA
AATATGCTCgtctttttgtaaaattgcaaaaatgcaaacacaATGTACGTCTAATTGTAAAACGTACAAGAagtaaatac

Hisat

/home/shared/hisat2-2.2.1/hisat2_extract_exons.py \
../analyses/12-fix-gff/mod_augustus.gtf \
> ../analyses/13-hisat-deseq2/m_exon.tab
/home/shared/hisat2-2.2.1/hisat2_extract_splice_sites.py \
../analyses/12-fix-gff/mod_augustus.gtf \
> ../analyses/13-hisat-deseq2/m_spice_sites.tab
echo "13-hisat-deseq2/GCF*" >> ../analyses/.gitignore
echo "13-hisat-deseq2/GCF**fastq" >> ../analyses/.gitignore
/home/shared/hisat2-2.2.1/hisat2-build \
../data/ncbi_dataset/data/GCA_032158295.1/GCA_032158295.1_ASM3215829v1_genomic.fna \
../analyses/13-hisat-deseq2/GCA_032158295.index \
--exon ../analyses/13-hisat-deseq2/m_exon.tab \
--ss ../analyses/13-hisat-deseq2/m_spice_sites.tab \
-p 20 \
../analyses/12-fix-gff/mod_augustus.gtf \
2> ../analyses/13-hisat-deseq2/hisat2-build_stats.txt
echo "13-hisat-deseq2/*sam" >> ../analyses/.gitignore
find /home/shared/8TB_HDD_02/graceac9/data/pycno2021/*_R1_001.fastq.gz.fastp-trim.20220810.fq.gz | xargs basename -s _R1_001.fastq.gz.fastp-trim.20220810.fq.gz | xargs -I{} echo {}
PSC-19
PSC-23
PSC-24
PSC-34
PSC-35
PSC-36
PSC-37
PSC-38
PSC-39
PSC-40
PSC-42
PSC-43
PSC-48
PSC-49
PSC-52
PSC-54
PSC-56
PSC-57
PSC-58
PSC-59
PSC-61
PSC-63
PSC-64
PSC-67
PSC-69
PSC-71
PSC-73
PSC-75
PSC-76
PSC-78
PSC-81
PSC-83

keeping unmapped reads

find /home/shared/8TB_HDD_02/graceac9/data/pycno2021/*_R1_001.fastq.gz.fastp-trim.20220810.fq.gz \
| xargs -I{} basename -s _R1_001.fastq.gz.fastp-trim.20220810.fq.gz {} \
| xargs -I{} sh -c '/home/shared/hisat2-2.2.1/hisat2 \
-x ../analyses/13-hisat-deseq2/GCA_032158295.index \
--dta \
-p 32 \
-1 /home/shared/8TB_HDD_02/graceac9/data/pycno2021/{}_R1_001.fastq.gz.fastp-trim.20220810.fq.gz \
-2 /home/shared/8TB_HDD_02/graceac9/data/pycno2021/{}_R2_001.fastq.gz.fastp-trim.20220810.fq.gz \
-S ../analyses/13-hisat-deseq2/{}_03.sam \
--un-conc ../analyses/13-hisat-deseq2/{}_unmapped_reads.fastq \
> ../analyses/13-hisat-deseq2/{}_hisat03.stdout 2> ../analyses/13-hisat-deseq2/{}_hisat03.stderr'

Explanation xargs -I{}: This option allows you to replace {} in the command with the output from the previous command (i.e., basename). It’s used twice: first, to strip the suffix from the filenames, and second, to construct and execute the hisat2 command.

sh -c: This is used to execute a complex command within xargs. It’s necessary because the output redirection (>, 2>) is shell functionality, and without sh -c, xargs wouldn’t handle it correctly.

Output Redirection:

../analyses/13-hisat-deseq2/{}_hisat.stdout: Redirects the standard output to a unique file for each sample. 2> ../analyses/13-hisat-deseq2/{}_hisat.stderr: Redirects the standard error to a different unique file for each sample. This setup ensures that the output from each sample’s alignment process is neatly organized into separate files, making it easier to manage and debug individual runs.

echo "13-hisat-deseq2/*bam" >> ../analyses/.gitignore
echo "13-hisat-deseq2/*bam*" >> ../analyses/.gitignore
for samfile in ../analyses/13-hisat-deseq2/*.sam; do
  bamfile="${samfile%.sam}.bam"
  sorted_bamfile="${samfile%.sam}.sorted.bam"
  /home/shared/samtools-1.12/samtools view -bS -@ 20 "$samfile" > "$bamfile"
  /home/shared/samtools-1.12/samtools sort -@ 20 "$bamfile" -o "$sorted_bamfile"
  /home/shared/samtools-1.12/samtools index -@ 20 "$sorted_bamfile"
done

,

rm ../analyses/13-hisat-deseq2/*sam
ls ../analyses/13-hisat-deseq2/*sorted.bam | wc -l

Stringtie

echo "13-hisat-deseq2/*gtf" >> ../analyses/.gitignore
/home/shared/gffread-0.12.7.Linux_x86_64/gffread \
../analyses/12-fix-gff/mod_augustus.gtf \
-T \
-o ../analyses/13-hisat-deseq2/mod_augustus.gff
find ../analyses/13-hisat-deseq2/*sorted.bam \
| xargs basename -s .sorted.bam | xargs -I{} \
sh -c '/home/shared/stringtie-2.2.1.Linux_x86_64/stringtie \
-p 36 \
-eB \
-G ../analyses/13-hisat-deseq2/mod_augustus.gff \
-o ../analyses/13-hisat-deseq2/{}.gtf \
../analyses/13-hisat-deseq2/{}.sorted.bam'