This notebook will download trimmed WGBS FastQs (trimmed on 20220829 (Sam’s Notebook entry)) and align them to the C.viginica genome using Bismark (Krueger and Andrews 2011) and Bowtie2 (Langmead et al. 2018; Langmead and Salzberg 2012). Alignment results will be summarized by MultiQC (Ewels et al. 2016).
The expected outputs will be:
*_R1_001.fastp-trim-20220827_bismark_bt2_PE_report.txt
: A text file summarizing the alignment input and results. Despite the R1
naming, these reports are based on paired reads; the R1
naming is a quirk of Bismark.
*_R1_001.fastp-trim-20220827_bismark_bt2_pe.bam
:A BAM alignment. Despite the R1
naming, these BAMs are paired reads; the R1
naming is a quirk of Bismark.
bismark_summary.txt
: An overall summary report of the alignment process. Essentially, this is all of the individual *report.txt
files combined into a single file.
multiqc_report.html
: A summary report of the alignment results generated by MultiQC, in HTML format.
Due to the large file sizes of BAMS, these cannot be hosted in the ceasmallr GitHub repo. As such these files are available for download here:
This allows usage of Bash variables across R Markdown chunks.
{
echo "#### Assign Variables ####"
echo ""
echo "# Data directories"
echo 'export repo_dir=/home/shared/8TB_HDD_01/sam/gitrepos/ceasmallr'
echo 'export output_dir_top=${repo_dir}/output/02.00-bismark-bowtie2-alignment'
echo 'export trimmed_reads_url="https://gannet.fish.washington.edu/Atumefaciens/20220826-cvir-larvae_zygote-BSseq-fastp_trimming/"'
echo 'export trimmed_fastqs_dir="${output_dir_top}/trimmed-fastqs"'
echo ""
echo "# Input files"
echo 'export bisulfite_genome_url="http://owl.fish.washington.edu/halfshell/genomic-databank/Cvirginica_v300_bisulfite.tar.gz"'
echo 'export bisulfite_genome_gz="Cvirginica_v300_bisulfite.tar.gz"'
echo 'export bisulfite_genome_dir="${repo_dir}/data/Cvirginica_v300"'
echo ""
echo "# Paths to programs"
echo 'export programs_dir="/home/shared"'
echo 'export bismark_dir="${programs_dir}/Bismark-0.24.0"'
echo 'export bowtie2_dir="${programs_dir}/bowtie2-2.4.4-linux-x86_64"'
echo 'export multiqc="/home/sam/programs/mambaforge/bin/multiqc"'
echo 'export samtools_dir="${programs_dir}/samtools-1.12"'
echo ""
echo "# Program settings"
echo 'export bowtie2_min_score="L,0,-0.6"'
echo ""
echo "# Set FastQ filename patterns"
echo "export fastq_pattern='*.fq.gz'"
echo "export R1_fastq_pattern='*_R1_*.fq.gz'"
echo "export R2_fastq_pattern='*_R2_*.fq.gz'"
echo 'export reads_basename="_001.fastp-trim.20220827.fq.gz"'
echo 'export R1_reads_basename="_R1_001.fastp-trim.20220827.fq.gz"'
echo 'export R2_reads_basename="_R2_001.fastp-trim.20220827.fq.gz"'
echo "export trimmed_fastq_pattern='*fastp-trim*.fq.gz'"
echo ""
echo "# Set number of CPUs to use"
echo 'export threads=4'
echo "# Bismark already spawns multiple instances and additional threads are multiplicative."
echo 'export bismark_threads=2'
echo ""
echo "## Inititalize arrays"
echo 'export fastq_array_R1=()'
echo 'export fastq_array_R2=()'
echo 'export trimmed_fastqs_array=()'
echo 'export R1_names_array=()'
echo 'export R2_names_array=()'
echo ""
echo "# Print formatting"
echo 'export line="--------------------------------------------------------"'
echo ""
} > .bashvars
cat .bashvars
#### Assign Variables ####
# Data directories
export repo_dir=/home/shared/8TB_HDD_01/sam/gitrepos/ceasmallr
export output_dir_top=${repo_dir}/output/02.00-bismark-bowtie2-alignment
export trimmed_reads_url="https://gannet.fish.washington.edu/Atumefaciens/20220826-cvir-larvae_zygote-BSseq-fastp_trimming/"
export trimmed_fastqs_dir="${output_dir_top}/trimmed-fastqs"
# Input files
export bisulfite_genome_url="http://owl.fish.washington.edu/halfshell/genomic-databank/Cvirginica_v300_bisulfite.tar.gz"
export bisulfite_genome_gz="Cvirginica_v300_bisulfite.tar.gz"
export bisulfite_genome_dir="${repo_dir}/data/Cvirginica_v300"
# Paths to programs
export programs_dir="/home/shared"
export bismark_dir="${programs_dir}/Bismark-0.24.0"
export bowtie2_dir="${programs_dir}/bowtie2-2.4.4-linux-x86_64"
export multiqc="/home/sam/programs/mambaforge/bin/multiqc"
export samtools_dir="${programs_dir}/samtools-1.12"
# Program settings
export bowtie2_min_score="L,0,-0.6"
# Set FastQ filename patterns
export fastq_pattern='*.fq.gz'
export R1_fastq_pattern='*_R1_*.fq.gz'
export R2_fastq_pattern='*_R2_*.fq.gz'
export reads_basename="_001.fastp-trim.20220827.fq.gz"
export R1_reads_basename="_R1_001.fastp-trim.20220827.fq.gz"
export R2_reads_basename="_R2_001.fastp-trim.20220827.fq.gz"
export trimmed_fastq_pattern='*fastp-trim*.fq.gz'
# Set number of CPUs to use
export threads=4
# Bismark already spawns multiple instances and additional threads are multiplicative.
export bismark_threads=2
## Inititalize arrays
export fastq_array_R1=()
export fastq_array_R2=()
export trimmed_fastqs_array=()
export R1_names_array=()
export R2_names_array=()
# Print formatting
export line="--------------------------------------------------------"
# Load bash variables into memory
source .bashvars
cd "${repo_dir}"/data
wget --quiet \
--continue \
"${bisulfite_genome_url}"
ls -ltrh
total 2.6G
-rw-r--r-- 1 sam sam 662M Jun 7 2018 Cvirginica_v300.fa
drwxr-xr-x 3 sam sam 4.0K Mar 7 2019 Cvirginica_v300
-rw-r--r-- 1 sam sam 2.0G Mar 7 2019 Cvirginica_v300_bisulfite.tar.gz
-rw-r--r-- 1 sam sam 307 Oct 23 12:29 README.md
-rw-r--r-- 1 sam sam 2.6K Oct 23 12:29 L18_larvae_meta.csv
-rw-r--r-- 1 sam sam 398 Oct 23 13:38 Cvirginica_v300.fa.fai
# Load bash variables into memory
source .bashvars
cd "${repo_dir}"/data
if [ ! -d "${bisulfite_genome_dir}" ]; then
tar -xzf "${bisulfite_genome_gz}"
fi
tree "${bisulfite_genome_dir}"
/home/shared/8TB_HDD_01/sam/gitrepos/ceasmallr/data/Cvirginica_v300
├── Bisulfite_Genome
│ ├── CT_conversion
│ │ ├── BS_CT.1.bt2
│ │ ├── BS_CT.2.bt2
│ │ ├── BS_CT.3.bt2
│ │ ├── BS_CT.4.bt2
│ │ ├── BS_CT.rev.1.bt2
│ │ ├── BS_CT.rev.2.bt2
│ │ └── genome_mfa.CT_conversion.fa
│ └── GA_conversion
│ ├── BS_GA.1.bt2
│ ├── BS_GA.2.bt2
│ ├── BS_GA.3.bt2
│ ├── BS_GA.4.bt2
│ ├── BS_GA.rev.1.bt2
│ ├── BS_GA.rev.2.bt2
│ └── genome_mfa.GA_conversion.fa
├── Cvirginica_v300.fa
├── Cvirginica_v300.fa.fai
└── readme.txt
3 directories, 17 files
Reads are downloaded from https://gannet.fish.washington.edu/Atumefaciens/20220826-cvir-larvae_zygote-BSseq-fastp_trimming/
The --cut-dirs 2
command cuts the preceding directory structure (i.e. Atumefaciens/20220826-cvir-larvae_zygote-BSseq-fastp_trimming/
) so that we just end up with the reads.
# Load bash variables into memory
source .bashvars
# Make output directory if it doesn't exist
mkdir --parents ${trimmed_fastqs_dir}
# Run wget to retrieve FastQs and MD5 files
wget \
--directory-prefix ${trimmed_fastqs_dir} \
--no-check-certificate \
--continue \
--cut-dirs 2 \
--no-host-directories \
--no-parent \
--quiet \
--accept="${trimmed_fastq_pattern}, trimmed-fastq-checksums.md5" \
${trimmed_reads_url}
ls -lh "${trimmed_fastqs_dir}"
total 469G
-rw-r--r-- 1 sam sam 1.7G Aug 27 2022 CF01-CM01-Zygote_R1_001.fastp-trim.20220827.fq.gz
-rw-r--r-- 1 sam sam 1.8G Aug 27 2022 CF01-CM01-Zygote_R2_001.fastp-trim.20220827.fq.gz
-rw-r--r-- 1 sam sam 6.8G Aug 27 2022 CF01-CM02-Larvae_R1_001.fastp-trim.20220827.fq.gz
-rw-r--r-- 1 sam sam 7.1G Aug 27 2022 CF01-CM02-Larvae_R2_001.fastp-trim.20220827.fq.gz
-rw-r--r-- 1 sam sam 11G Aug 27 2022 CF02-CM02-Zygote_R1_001.fastp-trim.20220827.fq.gz
-rw-r--r-- 1 sam sam 11G Aug 27 2022 CF02-CM02-Zygote_R2_001.fastp-trim.20220827.fq.gz
-rw-r--r-- 1 sam sam 6.6G Aug 27 2022 CF03-CM03-Zygote_R1_001.fastp-trim.20220827.fq.gz
-rw-r--r-- 1 sam sam 6.7G Aug 27 2022 CF03-CM03-Zygote_R2_001.fastp-trim.20220827.fq.gz
-rw-r--r-- 1 sam sam 6.4G Aug 27 2022 CF03-CM04-Larvae_R1_001.fastp-trim.20220827.fq.gz
-rw-r--r-- 1 sam sam 6.7G Aug 27 2022 CF03-CM04-Larvae_R2_001.fastp-trim.20220827.fq.gz
-rw-r--r-- 1 sam sam 6.3G Aug 27 2022 CF03-CM05-Larvae_R1_001.fastp-trim.20220827.fq.gz
-rw-r--r-- 1 sam sam 6.5G Aug 27 2022 CF03-CM05-Larvae_R2_001.fastp-trim.20220827.fq.gz
-rw-r--r-- 1 sam sam 6.8G Aug 27 2022 CF04-CM04-Zygote_R1_001.fastp-trim.20220827.fq.gz
-rw-r--r-- 1 sam sam 7.0G Aug 27 2022 CF04-CM04-Zygote_R2_001.fastp-trim.20220827.fq.gz
-rw-r--r-- 1 sam sam 8.7G Aug 27 2022 CF05-CM02-Larvae_R1_001.fastp-trim.20220827.fq.gz
-rw-r--r-- 1 sam sam 9.1G Aug 27 2022 CF05-CM02-Larvae_R2_001.fastp-trim.20220827.fq.gz
-rw-r--r-- 1 sam sam 7.5G Aug 27 2022 CF05-CM05-Zygote_R1_001.fastp-trim.20220827.fq.gz
-rw-r--r-- 1 sam sam 7.7G Aug 27 2022 CF05-CM05-Zygote_R2_001.fastp-trim.20220827.fq.gz
-rw-r--r-- 1 sam sam 7.4G Aug 27 2022 CF06-CM01-Zygote_R1_001.fastp-trim.20220827.fq.gz
-rw-r--r-- 1 sam sam 7.6G Aug 27 2022 CF06-CM01-Zygote_R2_001.fastp-trim.20220827.fq.gz
-rw-r--r-- 1 sam sam 6.0G Aug 27 2022 CF06-CM02-Larvae_R1_001.fastp-trim.20220827.fq.gz
-rw-r--r-- 1 sam sam 6.3G Aug 27 2022 CF06-CM02-Larvae_R2_001.fastp-trim.20220827.fq.gz
-rw-r--r-- 1 sam sam 11G Aug 27 2022 CF07-CM02-Zygote_R1_001.fastp-trim.20220827.fq.gz
-rw-r--r-- 1 sam sam 12G Aug 27 2022 CF07-CM02-Zygote_R2_001.fastp-trim.20220827.fq.gz
-rw-r--r-- 1 sam sam 7.5G Aug 27 2022 CF08-CM03-Zygote_R1_001.fastp-trim.20220827.fq.gz
-rw-r--r-- 1 sam sam 7.7G Aug 27 2022 CF08-CM03-Zygote_R2_001.fastp-trim.20220827.fq.gz
-rw-r--r-- 1 sam sam 3.1G Aug 27 2022 CF08-CM04-Larvae_R1_001.fastp-trim.20220827.fq.gz
-rw-r--r-- 1 sam sam 3.2G Aug 27 2022 CF08-CM04-Larvae_R2_001.fastp-trim.20220827.fq.gz
-rw-r--r-- 1 sam sam 4.8G Aug 27 2022 CF08-CM05-Larvae_R1_001.fastp-trim.20220827.fq.gz
-rw-r--r-- 1 sam sam 4.9G Aug 27 2022 CF08-CM05-Larvae_R2_001.fastp-trim.20220827.fq.gz
-rw-r--r-- 1 sam sam 6.4G Aug 27 2022 EF01-EM01-Zygote_R1_001.fastp-trim.20220827.fq.gz
-rw-r--r-- 1 sam sam 6.7G Aug 27 2022 EF01-EM01-Zygote_R2_001.fastp-trim.20220827.fq.gz
-rw-r--r-- 1 sam sam 7.0G Aug 27 2022 EF02-EM02-Zygote_R1_001.fastp-trim.20220827.fq.gz
-rw-r--r-- 1 sam sam 7.2G Aug 27 2022 EF02-EM02-Zygote_R2_001.fastp-trim.20220827.fq.gz
-rw-r--r-- 1 sam sam 6.6G Aug 27 2022 EF03-EM03-Zygote_R1_001.fastp-trim.20220827.fq.gz
-rw-r--r-- 1 sam sam 6.8G Aug 27 2022 EF03-EM03-Zygote_R2_001.fastp-trim.20220827.fq.gz
-rw-r--r-- 1 sam sam 7.1G Aug 27 2022 EF03-EM04-Larvae_R1_001.fastp-trim.20220827.fq.gz
-rw-r--r-- 1 sam sam 7.3G Aug 27 2022 EF03-EM04-Larvae_R2_001.fastp-trim.20220827.fq.gz
-rw-r--r-- 1 sam sam 7.4G Aug 27 2022 EF03-EM05-Larvae_R1_001.fastp-trim.20220827.fq.gz
-rw-r--r-- 1 sam sam 8.0G Aug 27 2022 EF03-EM05-Larvae_R2_001.fastp-trim.20220827.fq.gz
-rw-r--r-- 1 sam sam 7.5G Aug 27 2022 EF04-EM04-Zygote_R1_001.fastp-trim.20220827.fq.gz
-rw-r--r-- 1 sam sam 7.8G Aug 27 2022 EF04-EM04-Zygote_R2_001.fastp-trim.20220827.fq.gz
-rw-r--r-- 1 sam sam 7.8G Aug 27 2022 EF04-EM05-Larvae_R1_001.fastp-trim.20220827.fq.gz
-rw-r--r-- 1 sam sam 8.2G Aug 27 2022 EF04-EM05-Larvae_R2_001.fastp-trim.20220827.fq.gz
-rw-r--r-- 1 sam sam 5.9G Aug 27 2022 EF05-EM01-Larvae_R1_001.fastp-trim.20220827.fq.gz
-rw-r--r-- 1 sam sam 6.1G Aug 27 2022 EF05-EM01-Larvae_R2_001.fastp-trim.20220827.fq.gz
-rw-r--r-- 1 sam sam 6.4G Aug 27 2022 EF05-EM05-Zygote_R1_001.fastp-trim.20220827.fq.gz
-rw-r--r-- 1 sam sam 6.6G Aug 27 2022 EF05-EM05-Zygote_R2_001.fastp-trim.20220827.fq.gz
-rw-r--r-- 1 sam sam 12G Aug 27 2022 EF05-EM06-Larvae_R1_001.fastp-trim.20220827.fq.gz
-rw-r--r-- 1 sam sam 13G Aug 27 2022 EF05-EM06-Larvae_R2_001.fastp-trim.20220827.fq.gz
-rw-r--r-- 1 sam sam 7.9G Aug 27 2022 EF06-EM01-Larvae_R1_001.fastp-trim.20220827.fq.gz
-rw-r--r-- 1 sam sam 8.3G Aug 27 2022 EF06-EM01-Larvae_R2_001.fastp-trim.20220827.fq.gz
-rw-r--r-- 1 sam sam 6.1G Aug 27 2022 EF06-EM02-Larvae_R1_001.fastp-trim.20220827.fq.gz
-rw-r--r-- 1 sam sam 6.4G Aug 27 2022 EF06-EM02-Larvae_R2_001.fastp-trim.20220827.fq.gz
-rw-r--r-- 1 sam sam 8.2G Aug 27 2022 EF06-EM06-Larvae_R1_001.fastp-trim.20220827.fq.gz
-rw-r--r-- 1 sam sam 8.4G Aug 27 2022 EF06-EM06-Larvae_R2_001.fastp-trim.20220827.fq.gz
-rw-r--r-- 1 sam sam 13G Aug 27 2022 EF07-EM01-Zygote_R1_001.fastp-trim.20220827.fq.gz
-rw-r--r-- 1 sam sam 14G Aug 27 2022 EF07-EM01-Zygote_R2_001.fastp-trim.20220827.fq.gz
-rw-r--r-- 1 sam sam 7.0G Aug 27 2022 EF07-EM03-Larvae_R1_001.fastp-trim.20220827.fq.gz
-rw-r--r-- 1 sam sam 7.3G Aug 27 2022 EF07-EM03-Larvae_R2_001.fastp-trim.20220827.fq.gz
-rw-r--r-- 1 sam sam 8.2G Aug 27 2022 EF08-EM03-Larvae_R1_001.fastp-trim.20220827.fq.gz
-rw-r--r-- 1 sam sam 8.5G Aug 27 2022 EF08-EM03-Larvae_R2_001.fastp-trim.20220827.fq.gz
-rw-r--r-- 1 sam sam 5.8G Aug 28 2022 EF08-EM04-Larvae_R1_001.fastp-trim.20220827.fq.gz
-rw-r--r-- 1 sam sam 6.0G Aug 28 2022 EF08-EM04-Larvae_R2_001.fastp-trim.20220827.fq.gz
-rw------- 1 sam sam 36K Nov 13 11:06 index.html.tmp
-rw-r--r-- 1 sam sam 5.3K Aug 28 2022 trimmed-fastq-checksums.md5
# Load bash variables into memory
source .bashvars
cd "${trimmed_fastqs_dir}"
# Checksums file contains other files, so this just looks for the sRNAseq files.
for file in *.md5
do
md5sum --check "${file}"
done
CF01-CM01-Zygote_R1_001.fastp-trim.20220827.fq.gz: OK
CF01-CM01-Zygote_R2_001.fastp-trim.20220827.fq.gz: OK
CF01-CM02-Larvae_R1_001.fastp-trim.20220827.fq.gz: OK
CF01-CM02-Larvae_R2_001.fastp-trim.20220827.fq.gz: OK
CF02-CM02-Zygote_R1_001.fastp-trim.20220827.fq.gz: OK
CF02-CM02-Zygote_R2_001.fastp-trim.20220827.fq.gz: OK
CF03-CM03-Zygote_R1_001.fastp-trim.20220827.fq.gz: OK
CF03-CM03-Zygote_R2_001.fastp-trim.20220827.fq.gz: OK
CF03-CM04-Larvae_R1_001.fastp-trim.20220827.fq.gz: OK
CF03-CM04-Larvae_R2_001.fastp-trim.20220827.fq.gz: OK
CF03-CM05-Larvae_R1_001.fastp-trim.20220827.fq.gz: OK
CF03-CM05-Larvae_R2_001.fastp-trim.20220827.fq.gz: OK
CF04-CM04-Zygote_R1_001.fastp-trim.20220827.fq.gz: OK
CF04-CM04-Zygote_R2_001.fastp-trim.20220827.fq.gz: OK
CF05-CM02-Larvae_R1_001.fastp-trim.20220827.fq.gz: OK
CF05-CM02-Larvae_R2_001.fastp-trim.20220827.fq.gz: OK
CF05-CM05-Zygote_R1_001.fastp-trim.20220827.fq.gz: OK
CF05-CM05-Zygote_R2_001.fastp-trim.20220827.fq.gz: OK
CF06-CM01-Zygote_R1_001.fastp-trim.20220827.fq.gz: OK
CF06-CM01-Zygote_R2_001.fastp-trim.20220827.fq.gz: OK
CF06-CM02-Larvae_R1_001.fastp-trim.20220827.fq.gz: OK
CF06-CM02-Larvae_R2_001.fastp-trim.20220827.fq.gz: OK
CF07-CM02-Zygote_R1_001.fastp-trim.20220827.fq.gz: OK
CF07-CM02-Zygote_R2_001.fastp-trim.20220827.fq.gz: OK
CF08-CM03-Zygote_R1_001.fastp-trim.20220827.fq.gz: OK
CF08-CM03-Zygote_R2_001.fastp-trim.20220827.fq.gz: OK
CF08-CM04-Larvae_R1_001.fastp-trim.20220827.fq.gz: OK
CF08-CM04-Larvae_R2_001.fastp-trim.20220827.fq.gz: OK
CF08-CM05-Larvae_R1_001.fastp-trim.20220827.fq.gz: OK
CF08-CM05-Larvae_R2_001.fastp-trim.20220827.fq.gz: OK
EF01-EM01-Zygote_R1_001.fastp-trim.20220827.fq.gz: OK
EF01-EM01-Zygote_R2_001.fastp-trim.20220827.fq.gz: OK
EF02-EM02-Zygote_R1_001.fastp-trim.20220827.fq.gz: OK
EF02-EM02-Zygote_R2_001.fastp-trim.20220827.fq.gz: OK
EF03-EM03-Zygote_R1_001.fastp-trim.20220827.fq.gz: OK
EF03-EM03-Zygote_R2_001.fastp-trim.20220827.fq.gz: OK
EF03-EM04-Larvae_R1_001.fastp-trim.20220827.fq.gz: OK
EF03-EM04-Larvae_R2_001.fastp-trim.20220827.fq.gz: OK
EF03-EM05-Larvae_R1_001.fastp-trim.20220827.fq.gz: OK
EF03-EM05-Larvae_R2_001.fastp-trim.20220827.fq.gz: OK
EF04-EM04-Zygote_R1_001.fastp-trim.20220827.fq.gz: OK
EF04-EM04-Zygote_R2_001.fastp-trim.20220827.fq.gz: OK
EF04-EM05-Larvae_R1_001.fastp-trim.20220827.fq.gz: OK
EF04-EM05-Larvae_R2_001.fastp-trim.20220827.fq.gz: OK
EF05-EM01-Larvae_R1_001.fastp-trim.20220827.fq.gz: OK
EF05-EM01-Larvae_R2_001.fastp-trim.20220827.fq.gz: OK
EF05-EM05-Zygote_R1_001.fastp-trim.20220827.fq.gz: OK
EF05-EM05-Zygote_R2_001.fastp-trim.20220827.fq.gz: OK
EF05-EM06-Larvae_R1_001.fastp-trim.20220827.fq.gz: OK
EF05-EM06-Larvae_R2_001.fastp-trim.20220827.fq.gz: OK
EF06-EM01-Larvae_R1_001.fastp-trim.20220827.fq.gz: OK
EF06-EM01-Larvae_R2_001.fastp-trim.20220827.fq.gz: OK
EF06-EM02-Larvae_R1_001.fastp-trim.20220827.fq.gz: OK
EF06-EM02-Larvae_R2_001.fastp-trim.20220827.fq.gz: OK
EF06-EM06-Larvae_R1_001.fastp-trim.20220827.fq.gz: OK
EF06-EM06-Larvae_R2_001.fastp-trim.20220827.fq.gz: OK
EF07-EM01-Zygote_R1_001.fastp-trim.20220827.fq.gz: OK
EF07-EM01-Zygote_R2_001.fastp-trim.20220827.fq.gz: OK
EF07-EM03-Larvae_R1_001.fastp-trim.20220827.fq.gz: OK
EF07-EM03-Larvae_R2_001.fastp-trim.20220827.fq.gz: OK
EF08-EM03-Larvae_R1_001.fastp-trim.20220827.fq.gz: OK
EF08-EM03-Larvae_R2_001.fastp-trim.20220827.fq.gz: OK
EF08-EM04-Larvae_R1_001.fastp-trim.20220827.fq.gz: OK
EF08-EM04-Larvae_R2_001.fastp-trim.20220827.fq.gz: OK
# Load bash variables into memory
source .bashvars
cd "${trimmed_fastqs_dir}"
find ${trimmed_fastqs_dir}/${R1_fastq_pattern} \
| xargs basename -s "${R1_reads_basename}" \
| xargs -I {} ${bismark_dir}/bismark \
--path_to_bowtie2 ${bowtie2_dir} \
--genome ${bisulfite_genome_dir} \
--score_min "${bowtie2_min_score}" \
--parallel "${bismark_threads}" \
--non_directional \
--samtools_path "${samtools_dir}" \
--gzip \
-p "${threads}" \
-1 ${trimmed_fastqs_dir}/{}"${R1_reads_basename}" \
-2 ${trimmed_fastqs_dir}/{}"${R2_reads_basename}" \
--output_dir "${output_dir_top}" \
2> "${output_dir_top}"/bismark_summary.txt
# Load bash variables into memory
source .bashvars
cd "${output_dir_top}"
ls -lh
total 184G
-rw-r--r-- 1 sam sam 1.2M Nov 6 15:20 bismark_summary.txt
-rw-r--r-- 1 sam sam 644M Oct 29 18:59 CF01-CM01-Zygote_R1_001.fastp-trim.20220827_bismark_bt2_pe.bam
-rw-r--r-- 1 sam sam 2.1K Oct 29 18:59 CF01-CM01-Zygote_R1_001.fastp-trim.20220827_bismark_bt2_PE_report.txt
-rw-r--r-- 1 sam sam 7.9G Oct 30 00:16 CF01-CM02-Larvae_R1_001.fastp-trim.20220827_bismark_bt2_pe.bam
-rw-r--r-- 1 sam sam 2.1K Oct 30 00:16 CF01-CM02-Larvae_R1_001.fastp-trim.20220827_bismark_bt2_PE_report.txt
-rw-r--r-- 1 sam sam 4.1G Oct 30 06:15 CF02-CM02-Zygote_R1_001.fastp-trim.20220827_bismark_bt2_pe.bam
-rw-r--r-- 1 sam sam 2.1K Oct 30 06:15 CF02-CM02-Zygote_R1_001.fastp-trim.20220827_bismark_bt2_PE_report.txt
-rw-r--r-- 1 sam sam 4.9G Oct 30 10:37 CF03-CM03-Zygote_R1_001.fastp-trim.20220827_bismark_bt2_pe.bam
-rw-r--r-- 1 sam sam 2.1K Oct 30 10:37 CF03-CM03-Zygote_R1_001.fastp-trim.20220827_bismark_bt2_PE_report.txt
-rw-r--r-- 1 sam sam 10G Oct 30 16:43 CF03-CM04-Larvae_R1_001.fastp-trim.20220827_bismark_bt2_pe.bam
-rw-r--r-- 1 sam sam 2.1K Oct 30 16:43 CF03-CM04-Larvae_R1_001.fastp-trim.20220827_bismark_bt2_PE_report.txt
-rw-r--r-- 1 sam sam 7.1G Oct 30 21:29 CF03-CM05-Larvae_R1_001.fastp-trim.20220827_bismark_bt2_pe.bam
-rw-r--r-- 1 sam sam 2.1K Oct 30 21:29 CF03-CM05-Larvae_R1_001.fastp-trim.20220827_bismark_bt2_PE_report.txt
-rw-r--r-- 1 sam sam 5.4G Oct 31 02:01 CF04-CM04-Zygote_R1_001.fastp-trim.20220827_bismark_bt2_pe.bam
-rw-r--r-- 1 sam sam 2.1K Oct 31 02:01 CF04-CM04-Zygote_R1_001.fastp-trim.20220827_bismark_bt2_PE_report.txt
-rw-r--r-- 1 sam sam 11G Oct 31 08:52 CF05-CM02-Larvae_R1_001.fastp-trim.20220827_bismark_bt2_pe.bam
-rw-r--r-- 1 sam sam 2.1K Oct 31 08:52 CF05-CM02-Larvae_R1_001.fastp-trim.20220827_bismark_bt2_PE_report.txt
-rw-r--r-- 1 sam sam 6.1G Oct 31 13:54 CF05-CM05-Zygote_R1_001.fastp-trim.20220827_bismark_bt2_pe.bam
-rw-r--r-- 1 sam sam 2.1K Oct 31 13:54 CF05-CM05-Zygote_R1_001.fastp-trim.20220827_bismark_bt2_PE_report.txt
-rw-r--r-- 1 sam sam 5.3G Oct 31 18:45 CF06-CM01-Zygote_R1_001.fastp-trim.20220827_bismark_bt2_pe.bam
-rw-r--r-- 1 sam sam 2.1K Oct 31 18:45 CF06-CM01-Zygote_R1_001.fastp-trim.20220827_bismark_bt2_PE_report.txt
-rw-r--r-- 1 sam sam 4.2G Oct 31 22:41 CF06-CM02-Larvae_R1_001.fastp-trim.20220827_bismark_bt2_pe.bam
-rw-r--r-- 1 sam sam 2.1K Oct 31 22:41 CF06-CM02-Larvae_R1_001.fastp-trim.20220827_bismark_bt2_PE_report.txt
-rw-r--r-- 1 sam sam 3.7G Nov 1 04:50 CF07-CM02-Zygote_R1_001.fastp-trim.20220827_bismark_bt2_pe.bam
-rw-r--r-- 1 sam sam 2.1K Nov 1 04:50 CF07-CM02-Zygote_R1_001.fastp-trim.20220827_bismark_bt2_PE_report.txt
-rw-r--r-- 1 sam sam 4.1G Nov 1 09:30 CF08-CM03-Zygote_R1_001.fastp-trim.20220827_bismark_bt2_pe.bam
-rw-r--r-- 1 sam sam 2.1K Nov 1 09:30 CF08-CM03-Zygote_R1_001.fastp-trim.20220827_bismark_bt2_PE_report.txt
-rw-r--r-- 1 sam sam 98M Nov 1 10:59 CF08-CM04-Larvae_R1_001.fastp-trim.20220827_bismark_bt2_pe.bam
-rw-r--r-- 1 sam sam 2.1K Nov 1 10:59 CF08-CM04-Larvae_R1_001.fastp-trim.20220827_bismark_bt2_PE_report.txt
-rw-r--r-- 1 sam sam 360M Nov 1 13:56 CF08-CM05-Larvae_R1_001.fastp-trim.20220827_bismark_bt2_pe.bam
-rw-r--r-- 1 sam sam 2.1K Nov 1 13:56 CF08-CM05-Larvae_R1_001.fastp-trim.20220827_bismark_bt2_PE_report.txt
-rw-rw-r-- 1 sam sam 6.3K Nov 6 19:46 checksums.md5
-rw-r--r-- 1 sam sam 4.7G Nov 1 18:09 EF01-EM01-Zygote_R1_001.fastp-trim.20220827_bismark_bt2_pe.bam
-rw-r--r-- 1 sam sam 2.1K Nov 1 18:09 EF01-EM01-Zygote_R1_001.fastp-trim.20220827_bismark_bt2_PE_report.txt
-rw-r--r-- 1 sam sam 5.0G Nov 1 22:50 EF02-EM02-Zygote_R1_001.fastp-trim.20220827_bismark_bt2_pe.bam
-rw-r--r-- 1 sam sam 2.1K Nov 1 22:50 EF02-EM02-Zygote_R1_001.fastp-trim.20220827_bismark_bt2_PE_report.txt
-rw-r--r-- 1 sam sam 3.3G Nov 2 02:53 EF03-EM03-Zygote_R1_001.fastp-trim.20220827_bismark_bt2_pe.bam
-rw-r--r-- 1 sam sam 2.1K Nov 2 02:53 EF03-EM03-Zygote_R1_001.fastp-trim.20220827_bismark_bt2_PE_report.txt
-rw-r--r-- 1 sam sam 7.0G Nov 2 08:02 EF03-EM04-Larvae_R1_001.fastp-trim.20220827_bismark_bt2_pe.bam
-rw-r--r-- 1 sam sam 2.1K Nov 2 08:02 EF03-EM04-Larvae_R1_001.fastp-trim.20220827_bismark_bt2_PE_report.txt
-rw-r--r-- 1 sam sam 7.0G Nov 2 13:24 EF03-EM05-Larvae_R1_001.fastp-trim.20220827_bismark_bt2_pe.bam
-rw-r--r-- 1 sam sam 2.1K Nov 2 13:24 EF03-EM05-Larvae_R1_001.fastp-trim.20220827_bismark_bt2_PE_report.txt
-rw-r--r-- 1 sam sam 1.8G Nov 2 22:18 EF04-EM04-Zygote_R1_001.fastp-trim.20220827_bismark_bt2_pe.bam
-rw-r--r-- 1 sam sam 2.1K Nov 2 22:18 EF04-EM04-Zygote_R1_001.fastp-trim.20220827_bismark_bt2_PE_report.txt
-rw-r--r-- 1 sam sam 9.5G Nov 3 12:42 EF04-EM05-Larvae_R1_001.fastp-trim.20220827_bismark_bt2_pe.bam
-rw-r--r-- 1 sam sam 2.1K Nov 3 12:42 EF04-EM05-Larvae_R1_001.fastp-trim.20220827_bismark_bt2_PE_report.txt
-rw-r--r-- 1 sam sam 5.6G Nov 3 18:16 EF05-EM01-Larvae_R1_001.fastp-trim.20220827_bismark_bt2_pe.bam
-rw-r--r-- 1 sam sam 2.1K Nov 3 18:16 EF05-EM01-Larvae_R1_001.fastp-trim.20220827_bismark_bt2_PE_report.txt
-rw-r--r-- 1 sam sam 1.1G Nov 3 22:02 EF05-EM05-Zygote_R1_001.fastp-trim.20220827_bismark_bt2_pe.bam
-rw-r--r-- 1 sam sam 2.1K Nov 3 22:02 EF05-EM05-Zygote_R1_001.fastp-trim.20220827_bismark_bt2_PE_report.txt
-rw-r--r-- 1 sam sam 15G Nov 4 10:27 EF05-EM06-Larvae_R1_001.fastp-trim.20220827_bismark_bt2_pe.bam
-rw-r--r-- 1 sam sam 2.1K Nov 4 10:27 EF05-EM06-Larvae_R1_001.fastp-trim.20220827_bismark_bt2_PE_report.txt
-rw-r--r-- 1 sam sam 9.2G Nov 4 16:52 EF06-EM01-Larvae_R1_001.fastp-trim.20220827_bismark_bt2_pe.bam
-rw-r--r-- 1 sam sam 2.1K Nov 4 16:52 EF06-EM01-Larvae_R1_001.fastp-trim.20220827_bismark_bt2_PE_report.txt
-rw-r--r-- 1 sam sam 7.1G Nov 4 22:24 EF06-EM02-Larvae_R1_001.fastp-trim.20220827_bismark_bt2_pe.bam
-rw-r--r-- 1 sam sam 2.1K Nov 4 22:24 EF06-EM02-Larvae_R1_001.fastp-trim.20220827_bismark_bt2_PE_report.txt
-rw-r--r-- 1 sam sam 6.3G Nov 5 06:56 EF06-EM06-Larvae_R1_001.fastp-trim.20220827_bismark_bt2_pe.bam
-rw-r--r-- 1 sam sam 2.1K Nov 5 06:56 EF06-EM06-Larvae_R1_001.fastp-trim.20220827_bismark_bt2_PE_report.txt
-rw-r--r-- 1 sam sam 3.5G Nov 5 17:19 EF07-EM01-Zygote_R1_001.fastp-trim.20220827_bismark_bt2_pe.bam
-rw-r--r-- 1 sam sam 2.1K Nov 5 17:19 EF07-EM01-Zygote_R1_001.fastp-trim.20220827_bismark_bt2_PE_report.txt
-rw-r--r-- 1 sam sam 7.7G Nov 6 03:05 EF07-EM03-Larvae_R1_001.fastp-trim.20220827_bismark_bt2_pe.bam
-rw-r--r-- 1 sam sam 2.1K Nov 6 03:05 EF07-EM03-Larvae_R1_001.fastp-trim.20220827_bismark_bt2_PE_report.txt
-rw-r--r-- 1 sam sam 11G Nov 6 10:19 EF08-EM03-Larvae_R1_001.fastp-trim.20220827_bismark_bt2_pe.bam
-rw-r--r-- 1 sam sam 2.1K Nov 6 10:20 EF08-EM03-Larvae_R1_001.fastp-trim.20220827_bismark_bt2_PE_report.txt
-rw-r--r-- 1 sam sam 6.7G Nov 6 15:20 EF08-EM04-Larvae_R1_001.fastp-trim.20220827_bismark_bt2_pe.bam
-rw-r--r-- 1 sam sam 2.1K Nov 6 15:20 EF08-EM04-Larvae_R1_001.fastp-trim.20220827_bismark_bt2_PE_report.txt
drwxr-xr-x 2 sam sam 4.0K Nov 9 12:15 multiqc_data
-rw-r--r-- 1 sam sam 1.1M Nov 9 12:15 multiqc_report.html
drwxr-xr-x 2 sam sam 12K Nov 13 10:26 trimmed-fastqs
# Load bash variables into memory
source .bashvars
cd "${output_dir_top}"
${multiqc} .
# Load bash variables into memory
source .bashvars
cd "${output_dir_top}"
for file in *; do
if [ "${file}" != "checksums.md5" ]; then
md5sum "${file}" >> checksums.md5
fi
done