This Rmd file will download raw WGBSseq FastQs and evaluate them using FastQC and MultiQC(Ewels et al. 2016).

1 Create a Bash variables file

This allows usage of Bash variables across R Markdown chunks.

{
echo "#### Assign Variables ####"
echo ""

echo "# Data directories"
echo 'export output_dir_top="../output/00.00-WGBSseq-reads-FastQC-MultiQC"'
echo 'export raw_fastqc_dir="${output_dir_top}/raw-fastqc"'
echo 'export raw_reads_dir="../data/raw-wgbs"'
echo 'export raw_reads_url="https://owl.fish.washington.edu/nightingales/M_trossulus/"'
echo ""


echo "# Set FastQ filename patterns"
echo "export fastq_pattern='*.fastq.gz'"
echo "export R1_fastq_pattern='*_1.fastq.gz'"
echo "export R2_fastq_pattern='*_2.fastq.gz'"
echo ""

echo "# Set number of CPUs to use"
echo 'export threads=50'
echo ""


echo "## Inititalize arrays"
echo 'export fastq_array_R1=()'
echo 'export fastq_array_R2=()'
echo 'export raw_fastqs_array=()'
echo 'export R1_names_array=()'
echo 'export R2_names_array=()'
echo ""

echo "# Print formatting"
echo 'export line="--------------------------------------------------------"'
echo ""
} > .bashvars

cat .bashvars
#### Assign Variables ####

# Data directories
export output_dir_top="../output/00.00-WGBSseq-reads-FastQC-MultiQC"
export raw_fastqc_dir="${output_dir_top}/raw-fastqc"
export raw_reads_dir="../data/raw-wgbs"
export raw_reads_url="https://owl.fish.washington.edu/nightingales/M_trossulus/"

# Set FastQ filename patterns
export fastq_pattern='*.fastq.gz'
export R1_fastq_pattern='*_1.fastq.gz'
export R2_fastq_pattern='*_2.fastq.gz'

# Set number of CPUs to use
export threads=50

## Inititalize arrays
export fastq_array_R1=()
export fastq_array_R2=()
export raw_fastqs_array=()
export R1_names_array=()
export R2_names_array=()

# Print formatting
export line="--------------------------------------------------------"

2 Download raw FastQs

2.1 Download raw reads

Reads are downloaded from https://owl.fish.washington.edu/nightingales/M_trossulus/

# Load bash variables into memory
source .bashvars

# Make output directory if it doesn't exist
mkdir --parents ${raw_reads_dir}

# Run wget to retrieve FastQs and MD5 files
wget \
--directory-prefix ${raw_reads_dir} \
--recursive \
--no-check-certificate \
--continue \
--cut-dirs 3 \
--no-host-directories \
--no-parent \
--quiet \
--level=1 \
--accept "[0-9]M*.fastq.gz,[0-9]M*.fastq.gz.md5sum" \
${raw_reads_url}

2.2 Verify raw read checksums

# Load bash variables into memory
source .bashvars

cd "${raw_reads_dir}"

# Checksums file contains other files, so this just looks for the sRNAseq files.
for file in *.md5sum
do
  md5sum --check "${file}"
done

3 FastQC/MultiQC on raw reads

# Load bash variables into memory
source .bashvars

# Make output directory if it doesn't exist
mkdir --parents "${raw_fastqc_dir}"

############ RUN FASTQC ############


# Create array of trimmed FastQs
raw_fastqs_array=(${raw_reads_dir}/${fastq_pattern})

# Pass array contents to new variable as space-delimited list
raw_fastqc_list=$(echo "${raw_fastqs_array[*]}")

echo "Beginning FastQC on raw reads..."
echo ""

# Run FastQC
### NOTE: Do NOT quote raw_fastqc_list
fastqc \
--threads ${threads} \
--outdir ${raw_fastqc_dir} \
--quiet \
${raw_fastqc_list}

echo "FastQC on raw reads complete!"
echo ""

############ END FASTQC ############

############ RUN MULTIQC ############
echo "Beginning MultiQC on raw FastQC..."
echo ""

multiqc ${raw_fastqc_dir} -o ${raw_fastqc_dir}

echo ""
echo "MultiQC on raw FastQs complete."
echo ""

############ END MULTIQC ############

echo "Removing FastQC zip files."
echo ""
rm ${raw_fastqc_dir}/*.zip
echo "FastQC zip files removed."
echo ""

# View directory contents
ls -lh ${raw_fastqc_dir}
Ewels, Philip, Måns Magnusson, Sverker Lundin, and Max Käller. 2016. “MultiQC: Summarize Analysis Results for Multiple Tools and Samples in a Single Report.” Bioinformatics 32 (19): 3047–48. https://doi.org/10.1093/bioinformatics/btw354.
LS0tCnRpdGxlOiAiMDAuMDAtV0dCU3NlcS1yZWFkcy1GYXN0UUMtTXVsdGlRQy5SbWQiCmF1dGhvcjogIlNhbSBXaGl0ZSIKZGF0ZTogIjIwMjQtMTEtMDciCm91dHB1dDogCiAgYm9va2Rvd246Omh0bWxfZG9jdW1lbnQyOgogICAgdGhlbWU6IGNvc21vCiAgICB0b2M6IHRydWUKICAgIHRvY19mbG9hdDogdHJ1ZQogICAgbnVtYmVyX3NlY3Rpb25zOiB0cnVlCiAgICBjb2RlX2ZvbGRpbmc6IHNob3cKICAgIGNvZGVfZG93bmxvYWQ6IHRydWUKICBnaXRodWJfZG9jdW1lbnQ6CiAgICB0b2M6IHRydWUKICAgIG51bWJlcl9zZWN0aW9uczogdHJ1ZQogIGh0bWxfZG9jdW1lbnQ6CiAgICB0aGVtZTogY29zbW8KICAgIHRvYzogdHJ1ZQogICAgdG9jX2Zsb2F0OiB0cnVlCiAgICBudW1iZXJfc2VjdGlvbnM6IHRydWUKICAgIGNvZGVfZm9sZGluZzogc2hvdwogICAgY29kZV9kb3dubG9hZDogdHJ1ZQpiaWJsaW9ncmFwaHk6IHJlZmVyZW5jZXMuYmliCi0tLQoKYGBge3Igc2V0dXAsIGluY2x1ZGU9RkFMU0V9CmxpYnJhcnkoa25pdHIpCmtuaXRyOjpvcHRzX2NodW5rJHNldCgKICBlY2hvID0gVFJVRSwgICAgICAgICAjIERpc3BsYXkgY29kZSBjaHVua3MKICBldmFsID0gRkFMU0UsICAgICAgICAjIEV2YWx1YXRlIGNvZGUgY2h1bmtzCiAgd2FybmluZyA9IEZBTFNFLCAgICAgIyBIaWRlIHdhcm5pbmdzCiAgbWVzc2FnZSA9IEZBTFNFLCAgICAgIyBIaWRlIG1lc3NhZ2VzCiAgY29tbWVudCA9ICIiICAgICAgICAgIyBQcmV2ZW50cyBhcHBlbmRpbmcgJyMjJyB0byBiZWdpbm5pbmcgb2YgbGluZXMgaW4gY29kZSBvdXRwdXQKKQpgYGAKClRoaXMgUm1kIGZpbGUgd2lsbCBkb3dubG9hZCByYXcgV0dCU3NlcSBGYXN0UXMgYW5kIGV2YWx1YXRlIHRoZW0gdXNpbmcgW0Zhc3RRQ10oaHR0cHM6Ly9naXRodWIuY29tL3MtYW5kcmV3cy9GYXN0UUMpIGFuZCBbTXVsdGlRQ10oaHR0cHM6Ly9tdWx0aXFjLmluZm8vKVtAZXdlbHMyMDE2XS4KCiMgQ3JlYXRlIGEgQmFzaCB2YXJpYWJsZXMgZmlsZQoKVGhpcyBhbGxvd3MgdXNhZ2Ugb2YgQmFzaCB2YXJpYWJsZXMgYWNyb3NzIFIgTWFya2Rvd24gY2h1bmtzLgoKYGBge3Igc2F2ZS1iYXNoLXZhcmlhYmxlcy10by1ydmFycy1maWxlLCBlbmdpbmU9J2Jhc2gnLCBldmFsPVRSVUV9CnsKZWNobyAiIyMjIyBBc3NpZ24gVmFyaWFibGVzICMjIyMiCmVjaG8gIiIKCmVjaG8gIiMgRGF0YSBkaXJlY3RvcmllcyIKZWNobyAnZXhwb3J0IG91dHB1dF9kaXJfdG9wPSIuLi9vdXRwdXQvMDAuMDAtV0dCU3NlcS1yZWFkcy1GYXN0UUMtTXVsdGlRQyInCmVjaG8gJ2V4cG9ydCByYXdfZmFzdHFjX2Rpcj0iJHtvdXRwdXRfZGlyX3RvcH0vcmF3LWZhc3RxYyInCmVjaG8gJ2V4cG9ydCByYXdfcmVhZHNfZGlyPSIuLi9kYXRhL3Jhdy13Z2JzIicKZWNobyAnZXhwb3J0IHJhd19yZWFkc191cmw9Imh0dHBzOi8vb3dsLmZpc2gud2FzaGluZ3Rvbi5lZHUvbmlnaHRpbmdhbGVzL01fdHJvc3N1bHVzLyInCmVjaG8gIiIKCgplY2hvICIjIFNldCBGYXN0USBmaWxlbmFtZSBwYXR0ZXJucyIKZWNobyAiZXhwb3J0IGZhc3RxX3BhdHRlcm49JyouZmFzdHEuZ3onIgplY2hvICJleHBvcnQgUjFfZmFzdHFfcGF0dGVybj0nKl8xLmZhc3RxLmd6JyIKZWNobyAiZXhwb3J0IFIyX2Zhc3RxX3BhdHRlcm49JypfMi5mYXN0cS5neiciCmVjaG8gIiIKCmVjaG8gIiMgU2V0IG51bWJlciBvZiBDUFVzIHRvIHVzZSIKZWNobyAnZXhwb3J0IHRocmVhZHM9NTAnCmVjaG8gIiIKCgplY2hvICIjIyBJbml0aXRhbGl6ZSBhcnJheXMiCmVjaG8gJ2V4cG9ydCBmYXN0cV9hcnJheV9SMT0oKScKZWNobyAnZXhwb3J0IGZhc3RxX2FycmF5X1IyPSgpJwplY2hvICdleHBvcnQgcmF3X2Zhc3Rxc19hcnJheT0oKScKZWNobyAnZXhwb3J0IFIxX25hbWVzX2FycmF5PSgpJwplY2hvICdleHBvcnQgUjJfbmFtZXNfYXJyYXk9KCknCmVjaG8gIiIKCmVjaG8gIiMgUHJpbnQgZm9ybWF0dGluZyIKZWNobyAnZXhwb3J0IGxpbmU9Ii0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tIicKZWNobyAiIgp9ID4gLmJhc2h2YXJzCgpjYXQgLmJhc2h2YXJzCmBgYAoKIyBEb3dubG9hZCByYXcgRmFzdFFzCgojIyBEb3dubG9hZCByYXcgcmVhZHMKClJlYWRzIGFyZSBkb3dubG9hZGVkIGZyb20gPGh0dHBzOi8vb3dsLmZpc2gud2FzaGluZ3Rvbi5lZHUvbmlnaHRpbmdhbGVzL01fdHJvc3N1bHVzLz4KCmBgYHtiYXNoIGRvd25sb2FkLXJhdy1yZWFkcywgZW5naW5lPSdiYXNoJywgZXZhbD1GQUxTRX0KIyBMb2FkIGJhc2ggdmFyaWFibGVzIGludG8gbWVtb3J5CnNvdXJjZSAuYmFzaHZhcnMKCiMgTWFrZSBvdXRwdXQgZGlyZWN0b3J5IGlmIGl0IGRvZXNuJ3QgZXhpc3QKbWtkaXIgLS1wYXJlbnRzICR7cmF3X3JlYWRzX2Rpcn0KCiMgUnVuIHdnZXQgdG8gcmV0cmlldmUgRmFzdFFzIGFuZCBNRDUgZmlsZXMKd2dldCBcCi0tZGlyZWN0b3J5LXByZWZpeCAke3Jhd19yZWFkc19kaXJ9IFwKLS1yZWN1cnNpdmUgXAotLW5vLWNoZWNrLWNlcnRpZmljYXRlIFwKLS1jb250aW51ZSBcCi0tY3V0LWRpcnMgMyBcCi0tbm8taG9zdC1kaXJlY3RvcmllcyBcCi0tbm8tcGFyZW50IFwKLS1xdWlldCBcCi0tbGV2ZWw9MSBcCi0tYWNjZXB0ICJbMC05XU0qLmZhc3RxLmd6LFswLTldTSouZmFzdHEuZ3oubWQ1c3VtIiBcCiR7cmF3X3JlYWRzX3VybH0KCmBgYAoKIyMgVmVyaWZ5IHJhdyByZWFkIGNoZWNrc3VtcwoKYGBge2Jhc2ggdmVyaWZ5LXJhdy1yZWFkLWNoZWNrc3VtcywgZW5naW5lPSdiYXNoJywgZXZhbD1GQUxTRX0KIyBMb2FkIGJhc2ggdmFyaWFibGVzIGludG8gbWVtb3J5CnNvdXJjZSAuYmFzaHZhcnMKCmNkICIke3Jhd19yZWFkc19kaXJ9IgoKIyBDaGVja3N1bXMgZmlsZSBjb250YWlucyBvdGhlciBmaWxlcywgc28gdGhpcyBqdXN0IGxvb2tzIGZvciB0aGUgc1JOQXNlcSBmaWxlcy4KZm9yIGZpbGUgaW4gKi5tZDVzdW0KZG8KICBtZDVzdW0gLS1jaGVjayAiJHtmaWxlfSIKZG9uZQpgYGAKCiMgRmFzdFFDL011bHRpUUMgb24gcmF3IHJlYWRzCgpgYGB7YmFzaCByYXctZmFzdHFjLW11bHRpcWMsIGVuZ2luZT0nYmFzaCcsIGV2YWw9RkFMU0V9CiMgTG9hZCBiYXNoIHZhcmlhYmxlcyBpbnRvIG1lbW9yeQpzb3VyY2UgLmJhc2h2YXJzCgojIE1ha2Ugb3V0cHV0IGRpcmVjdG9yeSBpZiBpdCBkb2Vzbid0IGV4aXN0Cm1rZGlyIC0tcGFyZW50cyAiJHtyYXdfZmFzdHFjX2Rpcn0iCgojIyMjIyMjIyMjIyMgUlVOIEZBU1RRQyAjIyMjIyMjIyMjIyMKCgojIENyZWF0ZSBhcnJheSBvZiB0cmltbWVkIEZhc3RRcwpyYXdfZmFzdHFzX2FycmF5PSgke3Jhd19yZWFkc19kaXJ9LyR7ZmFzdHFfcGF0dGVybn0pCgojIFBhc3MgYXJyYXkgY29udGVudHMgdG8gbmV3IHZhcmlhYmxlIGFzIHNwYWNlLWRlbGltaXRlZCBsaXN0CnJhd19mYXN0cWNfbGlzdD0kKGVjaG8gIiR7cmF3X2Zhc3Rxc19hcnJheVsqXX0iKQoKZWNobyAiQmVnaW5uaW5nIEZhc3RRQyBvbiByYXcgcmVhZHMuLi4iCmVjaG8gIiIKCiMgUnVuIEZhc3RRQwojIyMgTk9URTogRG8gTk9UIHF1b3RlIHJhd19mYXN0cWNfbGlzdApmYXN0cWMgXAotLXRocmVhZHMgJHt0aHJlYWRzfSBcCi0tb3V0ZGlyICR7cmF3X2Zhc3RxY19kaXJ9IFwKLS1xdWlldCBcCiR7cmF3X2Zhc3RxY19saXN0fQoKZWNobyAiRmFzdFFDIG9uIHJhdyByZWFkcyBjb21wbGV0ZSEiCmVjaG8gIiIKCiMjIyMjIyMjIyMjIyBFTkQgRkFTVFFDICMjIyMjIyMjIyMjIwoKIyMjIyMjIyMjIyMjIFJVTiBNVUxUSVFDICMjIyMjIyMjIyMjIwplY2hvICJCZWdpbm5pbmcgTXVsdGlRQyBvbiByYXcgRmFzdFFDLi4uIgplY2hvICIiCgptdWx0aXFjICR7cmF3X2Zhc3RxY19kaXJ9IC1vICR7cmF3X2Zhc3RxY19kaXJ9CgplY2hvICIiCmVjaG8gIk11bHRpUUMgb24gcmF3IEZhc3RRcyBjb21wbGV0ZS4iCmVjaG8gIiIKCiMjIyMjIyMjIyMjIyBFTkQgTVVMVElRQyAjIyMjIyMjIyMjIyMKCmVjaG8gIlJlbW92aW5nIEZhc3RRQyB6aXAgZmlsZXMuIgplY2hvICIiCnJtICR7cmF3X2Zhc3RxY19kaXJ9LyouemlwCmVjaG8gIkZhc3RRQyB6aXAgZmlsZXMgcmVtb3ZlZC4iCmVjaG8gIiIKCiMgVmlldyBkaXJlY3RvcnkgY29udGVudHMKbHMgLWxoICR7cmF3X2Zhc3RxY19kaXJ9CgpgYGAK