ls /home/shared/8TB_HDD_02/graceac9/data/pycno2021/*
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-19_R1_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-19_R2_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-23_R1_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-23_R2_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-24_R1_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-24_R2_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-34_R1_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-34_R2_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-35_R1_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-35_R2_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-36_R1_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-36_R2_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-37_R1_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-37_R2_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-38_R1_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-38_R2_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-39_R1_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-39_R2_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-40_R1_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-40_R2_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-42_R1_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-42_R2_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-43_R1_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-43_R2_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-48_R1_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-48_R2_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-49_R1_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-49_R2_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-52_R1_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-52_R2_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-54_R1_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-54_R2_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-56_R1_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-56_R2_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-57_R1_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-57_R2_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-58_R1_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-58_R2_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-59_R1_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-59_R2_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-61_R1_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-61_R2_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-63_R1_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-63_R2_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-64_R1_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-64_R2_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-67_R1_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-67_R2_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-69_R1_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-69_R2_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-71_R1_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-71_R2_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-73_R1_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-73_R2_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-75_R1_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-75_R2_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-76_R1_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-76_R2_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-78_R1_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-78_R2_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-81_R1_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-81_R2_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-83_R1_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/PSC-83_R2_001.fastq.gz.fastp-trim.20220810.fq.gz
/home/shared/FastQC-0.12.1/fastqc \
/home/shared/8TB_HDD_02/graceac9/data/pycno2021/*fq.gz \
-t 36 \
-o ../analyses/13-hisat-deseq2/
eval "$(/opt/anaconda/anaconda3/bin/conda shell.bash hook)"
conda activate
which multiqc
multiqc ../analyses/13-hisat-deseq2/ \
-o ../analyses/13-hisat-deseq2/
https://www.ncbi.nlm.nih.gov/datasets/genome/GCA_032158295.1/
cd ../data
/home/shared/datasets download genome accession GCA_032158295.1 --include gff3,rna,cds,protein,genome,seq-report
cd ../data
unzip ncbi_dataset.zip
ls ../data/ncbi_dataset/data/GCA_032158295.1
GCA_032158295.1_ASM3215829v1_genomic.fna
sequence_report.jsonl
head ../analyses/12-fix-gff/mod_augustus.gtf
head ../data/ncbi_dataset/data/GCA_032158295.1/GCA_032158295.1_ASM3215829v1_genomic.fna
CM063256.1 AUGUSTUS gene 7811 10401 1 + . g37
CM063256.1 AUGUSTUS transcript 7811 10401 1 + . g37.t1
CM063256.1 AUGUSTUS start_codon 7811 7813 . + 0 transcript_id "g37.t1"; gene_id "g37";
CM063256.1 AUGUSTUS CDS 7811 9083 1 + 0 transcript_id "g37.t1"; gene_id "g37";
CM063256.1 AUGUSTUS exon 7811 9083 . + . transcript_id "g37.t1"; gene_id "g37";
CM063256.1 AUGUSTUS intron 9084 9778 1 + . transcript_id "g37.t1"; gene_id "g37";
CM063256.1 AUGUSTUS CDS 9779 10401 1 + 2 transcript_id "g37.t1"; gene_id "g37";
CM063256.1 AUGUSTUS exon 9779 10401 . + . transcript_id "g37.t1"; gene_id "g37";
CM063256.1 AUGUSTUS stop_codon 10399 10401 . + 0 transcript_id "g37.t1"; gene_id "g37";
CM063256.1 AUGUSTUS gene 13619 39786 0.77 - . g38
>CM063243.1 Pycnopodia helianthoides isolate M0D057908R chromosome 1, whole genome shotgun sequence
gttaaaataatttgaatattgGATTAGTTTCAAACCCTCCCAGATCCTTCTAGATCCTCTTTGTTGAAATacaggattca
gaaggactgagggctgtaggcccaaatgacagttgcttatcactgggtcaggcatacagTAGGGCAGatgggtgatggga
ttgtgcctttgatgatacaagcatggtaaggggcacacagtcagagtatagcATGACAAATTAATTTGTCTAAAAGGCCA
ACACAGATTTaacctttgaaaaatataagcaGAATATTAAACaaccaatggcagccattttttttaatcttgaaaattgc
ttgttattctacctgtaTTGTTTGACAAGACAAACCTGCATTGAAAAATaacctttgaaaaatataagcaGAATATTAAA
CaaccaatggcagccattttttttaatcttgaaaattgcttgttactcTACCTGTATTGTTTGACAGGACAAATCAGCAT
ATGACCAAAAGCTAaatgtctttctcatgacatactctgtgtGCCAAGGtctatgcttgtatcatcaaaggcacaattct
cCCAAATCCCTATCTGCCCCAGCATAGGATAATGGAAGATTGAGCTCACCTGTAGGCAAAAAGGACTATGGGTAAATATA
AATATGCTCgtctttttgtaaaattgcaaaaatgcaaacacaATGTACGTCTAATTGTAAAACGTACAAGAagtaaatac
/home/shared/hisat2-2.2.1/hisat2_extract_exons.py \
../analyses/12-fix-gff/mod_augustus.gtf \
> ../analyses/13-hisat-deseq2/m_exon.tab
/home/shared/hisat2-2.2.1/hisat2_extract_splice_sites.py \
../analyses/12-fix-gff/mod_augustus.gtf \
> ../analyses/13-hisat-deseq2/m_spice_sites.tab
echo "13-hisat-deseq2/GCF*" >> ../analyses/.gitignore
echo "13-hisat-deseq2/GCF**fastq" >> ../analyses/.gitignore
/home/shared/hisat2-2.2.1/hisat2-build \
../data/ncbi_dataset/data/GCA_032158295.1/GCA_032158295.1_ASM3215829v1_genomic.fna \
../analyses/13-hisat-deseq2/GCA_032158295.index \
--exon ../analyses/13-hisat-deseq2/m_exon.tab \
--ss ../analyses/13-hisat-deseq2/m_spice_sites.tab \
-p 20 \
../analyses/12-fix-gff/mod_augustus.gtf \
2> ../analyses/13-hisat-deseq2/hisat2-build_stats.txt
echo "13-hisat-deseq2/*sam" >> ../analyses/.gitignore
find /home/shared/8TB_HDD_02/graceac9/data/pycno2021/*_R1_001.fastq.gz.fastp-trim.20220810.fq.gz | xargs basename -s _R1_001.fastq.gz.fastp-trim.20220810.fq.gz | xargs -I{} echo {}
PSC-19
PSC-23
PSC-24
PSC-34
PSC-35
PSC-36
PSC-37
PSC-38
PSC-39
PSC-40
PSC-42
PSC-43
PSC-48
PSC-49
PSC-52
PSC-54
PSC-56
PSC-57
PSC-58
PSC-59
PSC-61
PSC-63
PSC-64
PSC-67
PSC-69
PSC-71
PSC-73
PSC-75
PSC-76
PSC-78
PSC-81
PSC-83
keeping unmapped reads
find /home/shared/8TB_HDD_02/graceac9/data/pycno2021/*_R1_001.fastq.gz.fastp-trim.20220810.fq.gz \
| xargs -I{} basename -s _R1_001.fastq.gz.fastp-trim.20220810.fq.gz {} \
| xargs -I{} sh -c '/home/shared/hisat2-2.2.1/hisat2 \
-x ../analyses/13-hisat-deseq2/GCA_032158295.index \
--dta \
-p 32 \
-1 /home/shared/8TB_HDD_02/graceac9/data/pycno2021/{}_R1_001.fastq.gz.fastp-trim.20220810.fq.gz \
-2 /home/shared/8TB_HDD_02/graceac9/data/pycno2021/{}_R2_001.fastq.gz.fastp-trim.20220810.fq.gz \
-S ../analyses/13-hisat-deseq2/{}_03.sam \
--un-conc ../analyses/13-hisat-deseq2/{}_unmapped_reads.fastq \
> ../analyses/13-hisat-deseq2/{}_hisat03.stdout 2> ../analyses/13-hisat-deseq2/{}_hisat03.stderr'
Explanation xargs -I{}: This option allows you to replace {} in the command with the output from the previous command (i.e., basename). It’s used twice: first, to strip the suffix from the filenames, and second, to construct and execute the hisat2 command.
sh -c: This is used to execute a complex command within xargs. It’s necessary because the output redirection (>, 2>) is shell functionality, and without sh -c, xargs wouldn’t handle it correctly.
Output Redirection:
../analyses/13-hisat-deseq2/{}_hisat.stdout: Redirects the standard output to a unique file for each sample. 2> ../analyses/13-hisat-deseq2/{}_hisat.stderr: Redirects the standard error to a different unique file for each sample. This setup ensures that the output from each sample’s alignment process is neatly organized into separate files, making it easier to manage and debug individual runs.
echo "13-hisat-deseq2/*bam" >> ../analyses/.gitignore
echo "13-hisat-deseq2/*bam*" >> ../analyses/.gitignore
for samfile in ../analyses/13-hisat-deseq2/*.sam; do
bamfile="${samfile%.sam}.bam"
sorted_bamfile="${samfile%.sam}.sorted.bam"
/home/shared/samtools-1.12/samtools view -bS -@ 20 "$samfile" > "$bamfile"
/home/shared/samtools-1.12/samtools sort -@ 20 "$bamfile" -o "$sorted_bamfile"
/home/shared/samtools-1.12/samtools index -@ 20 "$sorted_bamfile"
done
,
rm ../analyses/13-hisat-deseq2/*sam
ls ../analyses/13-hisat-deseq2/*sorted.bam | wc -l
echo "13-hisat-deseq2/*gtf" >> ../analyses/.gitignore
/home/shared/gffread-0.12.7.Linux_x86_64/gffread \
../analyses/12-fix-gff/mod_augustus.gtf \
-T \
-o ../analyses/13-hisat-deseq2/mod_augustus.gff
find ../analyses/13-hisat-deseq2/*sorted.bam \
| xargs basename -s .sorted.bam | xargs -I{} \
sh -c '/home/shared/stringtie-2.2.1.Linux_x86_64/stringtie \
-p 36 \
-eB \
-G ../analyses/13-hisat-deseq2/mod_augustus.gff \
-o ../analyses/13-hisat-deseq2/{}.gtf \
../analyses/13-hisat-deseq2/{}.sorted.bam'