1 Description

This notebook will download raw sRNA-seq FastQs, concatenate them (there were multiple lanes run), and then assess with FastQC and MultiQC (Ewels et al. 2016).

1.1 Inputs

Raw FastQ files with the following pattern:

  • *.fastq.gz

1.2 Outputs

The expected outputs will be:

  • *.fastq.gz: Concatenated FastQ files.

  • *.fastqc.html: FastQC results, in HTML format.

  • multiqc_report.html: A summary report of the alignment results generated by MultiQC, in HTML format.

Due to large file sizes of FastQs, they cannot be added to GitHub. Full output from this notebook are available here:

2 Create a Bash variables file

This allows usage of Bash variables across R Markdown chunks.

{
echo "#### Assign Variables ####"
echo ""

echo "# Data directories"
echo 'export repo_dir=/home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa'
echo 'export output_dir_top=${repo_dir}/output/00.00-fastqc-concatenation-raw_reads'
echo 'export raw_reads_url="https://owl.fish.washington.edu/nightingales/R_philippinarum/"'
echo 'export raw_reads_dir="${repo_dir}/data/raw_reads"'
echo 'export project_dir_1="30-1035633055"'
echo 'export project_dir_2="30-1035633055-TS01"'
echo ""

echo "# Paths to programs"
echo 'export programs_dir="/home/shared"'
echo 'export fastqc="${programs_dir}/FastQC-0.12.1/fastqc"'
echo 'export multiqc="/home/sam/programs/mambaforge/bin/multiqc"'
echo ""


echo "# Set FastQ filename patterns"
echo "export fastq_pattern='*.fastq.gz'"
echo "export R1_fastq_pattern='*_R1_*.fastq.gz'"
echo "export R2_fastq_pattern='*_R2_*.fastq.gz'"
echo ""

echo "# Set number of CPUs to use"
echo 'export threads=40'
echo ""


echo "## Inititalize arrays"
echo 'export fastq_array_R1=()'
echo 'export fastq_array_R2=()'
echo 'export trimmed_fastqs_array=()'
echo 'export R1_names_array=()'
echo 'export R2_names_array=()'
echo ""

echo "# Print formatting"
echo 'export line="--------------------------------------------------------"'
echo ""
} > .bashvars

cat .bashvars
#### Assign Variables ####

# Data directories
export repo_dir=/home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa
export output_dir_top=${repo_dir}/output/00.00-fastqc-concatenation-raw_reads
export raw_reads_url="https://owl.fish.washington.edu/nightingales/R_philippinarum/"
export raw_reads_dir="${repo_dir}/data/raw_reads"
export project_dir_1="30-1035633055"
export project_dir_2="30-1035633055-TS01"

# Paths to programs
export programs_dir="/home/shared"
export fastqc="${programs_dir}/FastQC-0.12.1/fastqc"
export multiqc="/home/sam/programs/mambaforge/bin/multiqc"

# Set FastQ filename patterns
export fastq_pattern='*.fastq.gz'
export R1_fastq_pattern='*_R1_*.fastq.gz'
export R2_fastq_pattern='*_R2_*.fastq.gz'

# Set number of CPUs to use
export threads=40

## Inititalize arrays
export fastq_array_R1=()
export fastq_array_R2=()
export trimmed_fastqs_array=()
export R1_names_array=()
export R2_names_array=()

# Print formatting
export line="--------------------------------------------------------"

3 Download raw reads

The --cut-dirs 3 command cuts the preceding directory structure (i.e. R_philippinarum/30-1035633055/) so that we just end up with the reads.


# Load bash variables into memory
source .bashvars

# Create directory, if it doesn't exist
mkdir --parents \
${raw_reads_dir}/${project_dir_1} \
${raw_reads_dir}/${project_dir_2}

for directory in ${raw_reads_dir}/${project_dir_1} ${raw_reads_dir}/${project_dir_2}
do
  wget \
  --directory-prefix ${directory} \
  --recursive \
  --no-check-certificate \
  --continue \
  --cut-dirs 3 \
  --no-parent \
  --no-host-directories \
  --quiet \
  ${raw_reads_url}
  
  # Remove extraneous indext files
  rm ${raw_reads_dir}/${project_dir_1}/index*
  rm ${raw_reads_dir}/${project_dir_2}/index*
done

3.1 Overview of downloads

# Load bash variables into memory
source .bashvars

tree --du -h "${raw_reads_dir}"
/home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads
├── [ 28G]  30-1035633055
│   ├── [898M]  196_R1_001.fastq.gz
│   ├── [  56]  196_R1_001.fastq.gz.md5
│   ├── [617M]  196_R2_001.fastq.gz
│   ├── [  56]  196_R2_001.fastq.gz.md5
│   ├── [722M]  199_R1_001.fastq.gz
│   ├── [  56]  199_R1_001.fastq.gz.md5
│   ├── [481M]  199_R2_001.fastq.gz
│   ├── [  56]  199_R2_001.fastq.gz.md5
│   ├── [778M]  211_R1_001.fastq.gz
│   ├── [  56]  211_R1_001.fastq.gz.md5
│   ├── [508M]  211_R2_001.fastq.gz
│   ├── [  56]  211_R2_001.fastq.gz.md5
│   ├── [753M]  24_R1_001.fastq.gz
│   ├── [  55]  24_R1_001.fastq.gz.md5
│   ├── [535M]  24_R2_001.fastq.gz
│   ├── [  55]  24_R2_001.fastq.gz.md5
│   ├── [798M]  260_R1_001.fastq.gz
│   ├── [  56]  260_R1_001.fastq.gz.md5
│   ├── [544M]  260_R2_001.fastq.gz
│   ├── [  56]  260_R2_001.fastq.gz.md5
│   ├── [788M]  26_R1_001.fastq.gz
│   ├── [  55]  26_R1_001.fastq.gz.md5
│   ├── [507M]  26_R2_001.fastq.gz
│   ├── [  55]  26_R2_001.fastq.gz.md5
│   ├── [837M]  30_R1_001.fastq.gz
│   ├── [  55]  30_R1_001.fastq.gz.md5
│   ├── [562M]  30_R2_001.fastq.gz
│   ├── [  55]  30_R2_001.fastq.gz.md5
│   ├── [792M]  310_R1_001.fastq.gz
│   ├── [  56]  310_R1_001.fastq.gz.md5
│   ├── [554M]  310_R2_001.fastq.gz
│   ├── [  56]  310_R2_001.fastq.gz.md5
│   ├── [801M]  33_R1_001.fastq.gz
│   ├── [  55]  33_R1_001.fastq.gz.md5
│   ├── [561M]  33_R2_001.fastq.gz
│   ├── [  55]  33_R2_001.fastq.gz.md5
│   ├── [832M]  341_R1_001.fastq.gz
│   ├── [  56]  341_R1_001.fastq.gz.md5
│   ├── [555M]  341_R2_001.fastq.gz
│   ├── [  56]  341_R2_001.fastq.gz.md5
│   ├── [900M]  34_R1_001.fastq.gz
│   ├── [  55]  34_R1_001.fastq.gz.md5
│   ├── [640M]  34_R2_001.fastq.gz
│   ├── [  55]  34_R2_001.fastq.gz.md5
│   ├── [766M]  35_R1_001.fastq.gz
│   ├── [  55]  35_R1_001.fastq.gz.md5
│   ├── [497M]  35_R2_001.fastq.gz
│   ├── [  55]  35_R2_001.fastq.gz.md5
│   ├── [855M]  363_R1_001.fastq.gz
│   ├── [  56]  363_R1_001.fastq.gz.md5
│   ├── [591M]  363_R2_001.fastq.gz
│   ├── [  56]  363_R2_001.fastq.gz.md5
│   ├── [825M]  367_R1_001.fastq.gz
│   ├── [  56]  367_R1_001.fastq.gz.md5
│   ├── [541M]  367_R2_001.fastq.gz
│   ├── [  56]  367_R2_001.fastq.gz.md5
│   ├── [1.4G]  376_R1_001.fastq.gz
│   ├── [  56]  376_R1_001.fastq.gz.md5
│   ├── [1016M]  376_R2_001.fastq.gz
│   ├── [  56]  376_R2_001.fastq.gz.md5
│   ├── [798M]  460_R1_001.fastq.gz
│   ├── [  56]  460_R1_001.fastq.gz.md5
│   ├── [535M]  460_R2_001.fastq.gz
│   ├── [  56]  460_R2_001.fastq.gz.md5
│   ├── [814M]  485_R1_001.fastq.gz
│   ├── [  56]  485_R1_001.fastq.gz.md5
│   ├── [550M]  485_R2_001.fastq.gz
│   ├── [  56]  485_R2_001.fastq.gz.md5
│   ├── [886M]  501_R1_001.fastq.gz
│   ├── [  56]  501_R1_001.fastq.gz.md5
│   ├── [598M]  501_R2_001.fastq.gz
│   ├── [  56]  501_R2_001.fastq.gz.md5
│   ├── [946M]  71_R1_001.fastq.gz
│   ├── [  55]  71_R1_001.fastq.gz.md5
│   ├── [619M]  71_R2_001.fastq.gz
│   ├── [  55]  71_R2_001.fastq.gz.md5
│   ├── [939M]  88_R1_001.fastq.gz
│   ├── [  55]  88_R1_001.fastq.gz.md5
│   ├── [645M]  88_R2_001.fastq.gz
│   ├── [  55]  88_R2_001.fastq.gz.md5
│   └── [585K]  Azenta_30-1035633055_Data_Report.html
└── [5.5G]  30-1035633055-TS01
    ├── [175M]  196_R1_001.fastq.gz
    ├── [  56]  196_R1_001.fastq.gz.md5
    ├── [123M]  196_R2_001.fastq.gz
    ├── [  56]  196_R2_001.fastq.gz.md5
    ├── [137M]  199_R1_001.fastq.gz
    ├── [  56]  199_R1_001.fastq.gz.md5
    ├── [ 93M]  199_R2_001.fastq.gz
    ├── [  56]  199_R2_001.fastq.gz.md5
    ├── [148M]  211_R1_001.fastq.gz
    ├── [  56]  211_R1_001.fastq.gz.md5
    ├── [ 99M]  211_R2_001.fastq.gz
    ├── [  56]  211_R2_001.fastq.gz.md5
    ├── [146M]  24_R1_001.fastq.gz
    ├── [  55]  24_R1_001.fastq.gz.md5
    ├── [107M]  24_R2_001.fastq.gz
    ├── [  55]  24_R2_001.fastq.gz.md5
    ├── [157M]  260_R1_001.fastq.gz
    ├── [  56]  260_R1_001.fastq.gz.md5
    ├── [109M]  260_R2_001.fastq.gz
    ├── [  56]  260_R2_001.fastq.gz.md5
    ├── [155M]  26_R1_001.fastq.gz
    ├── [  55]  26_R1_001.fastq.gz.md5
    ├── [100M]  26_R2_001.fastq.gz
    ├── [  55]  26_R2_001.fastq.gz.md5
    ├── [163M]  30_R1_001.fastq.gz
    ├── [  55]  30_R1_001.fastq.gz.md5
    ├── [114M]  30_R2_001.fastq.gz
    ├── [  55]  30_R2_001.fastq.gz.md5
    ├── [151M]  310_R1_001.fastq.gz
    ├── [  56]  310_R1_001.fastq.gz.md5
    ├── [109M]  310_R2_001.fastq.gz
    ├── [  56]  310_R2_001.fastq.gz.md5
    ├── [154M]  33_R1_001.fastq.gz
    ├── [  55]  33_R1_001.fastq.gz.md5
    ├── [110M]  33_R2_001.fastq.gz
    ├── [  55]  33_R2_001.fastq.gz.md5
    ├── [161M]  341_R1_001.fastq.gz
    ├── [  56]  341_R1_001.fastq.gz.md5
    ├── [109M]  341_R2_001.fastq.gz
    ├── [  56]  341_R2_001.fastq.gz.md5
    ├── [179M]  34_R1_001.fastq.gz
    ├── [  55]  34_R1_001.fastq.gz.md5
    ├── [128M]  34_R2_001.fastq.gz
    ├── [  55]  34_R2_001.fastq.gz.md5
    ├── [148M]  35_R1_001.fastq.gz
    ├── [  55]  35_R1_001.fastq.gz.md5
    ├── [ 99M]  35_R2_001.fastq.gz
    ├── [  55]  35_R2_001.fastq.gz.md5
    ├── [165M]  363_R1_001.fastq.gz
    ├── [  56]  363_R1_001.fastq.gz.md5
    ├── [116M]  363_R2_001.fastq.gz
    ├── [  56]  363_R2_001.fastq.gz.md5
    ├── [159M]  367_R1_001.fastq.gz
    ├── [  56]  367_R1_001.fastq.gz.md5
    ├── [107M]  367_R2_001.fastq.gz
    ├── [  56]  367_R2_001.fastq.gz.md5
    ├── [290M]  376_R1_001.fastq.gz
    ├── [  56]  376_R1_001.fastq.gz.md5
    ├── [203M]  376_R2_001.fastq.gz
    ├── [  56]  376_R2_001.fastq.gz.md5
    ├── [153M]  460_R1_001.fastq.gz
    ├── [  56]  460_R1_001.fastq.gz.md5
    ├── [106M]  460_R2_001.fastq.gz
    ├── [  56]  460_R2_001.fastq.gz.md5
    ├── [160M]  485_R1_001.fastq.gz
    ├── [  56]  485_R1_001.fastq.gz.md5
    ├── [111M]  485_R2_001.fastq.gz
    ├── [  56]  485_R2_001.fastq.gz.md5
    ├── [176M]  501_R1_001.fastq.gz
    ├── [  56]  501_R1_001.fastq.gz.md5
    ├── [122M]  501_R2_001.fastq.gz
    ├── [  56]  501_R2_001.fastq.gz.md5
    ├── [181M]  71_R1_001.fastq.gz
    ├── [  55]  71_R1_001.fastq.gz.md5
    ├── [121M]  71_R2_001.fastq.gz
    ├── [  55]  71_R2_001.fastq.gz.md5
    ├── [181M]  88_R1_001.fastq.gz
    ├── [  55]  88_R1_001.fastq.gz.md5
    ├── [127M]  88_R2_001.fastq.gz
    ├── [  55]  88_R2_001.fastq.gz.md5
    └── [585K]  Azenta_30-1035633055-TS01_Data_Report.html

  34G used in 2 directories, 162 files

3.2 Verify checkums

# Load bash variables into memory
source .bashvars

cd "${raw_reads_dir}/${project_dir_1}"

pwd
echo ""

for checksum in *.md5
do
  md5sum --check ${checksum}
done

echo ""
echo "${line}"
echo ""

cd "${raw_reads_dir}/${project_dir_2}"

pwd
echo ""

for checksum in *.md5
do
  md5sum --check ${checksum}
done
/home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads/30-1035633055

./196_R1_001.fastq.gz: OK
./196_R2_001.fastq.gz: OK
./199_R1_001.fastq.gz: OK
./199_R2_001.fastq.gz: OK
./211_R1_001.fastq.gz: OK
./211_R2_001.fastq.gz: OK
./24_R1_001.fastq.gz: OK
./24_R2_001.fastq.gz: OK
./260_R1_001.fastq.gz: OK
./260_R2_001.fastq.gz: OK
./26_R1_001.fastq.gz: OK
./26_R2_001.fastq.gz: OK
./30_R1_001.fastq.gz: OK
./30_R2_001.fastq.gz: OK
./310_R1_001.fastq.gz: OK
./310_R2_001.fastq.gz: OK
./33_R1_001.fastq.gz: OK
./33_R2_001.fastq.gz: OK
./341_R1_001.fastq.gz: OK
./341_R2_001.fastq.gz: OK
./34_R1_001.fastq.gz: OK
./34_R2_001.fastq.gz: OK
./35_R1_001.fastq.gz: OK
./35_R2_001.fastq.gz: OK
./363_R1_001.fastq.gz: OK
./363_R2_001.fastq.gz: OK
./367_R1_001.fastq.gz: OK
./367_R2_001.fastq.gz: OK
./376_R1_001.fastq.gz: OK
./376_R2_001.fastq.gz: OK
./460_R1_001.fastq.gz: OK
./460_R2_001.fastq.gz: OK
./485_R1_001.fastq.gz: OK
./485_R2_001.fastq.gz: OK
./501_R1_001.fastq.gz: OK
./501_R2_001.fastq.gz: OK
./71_R1_001.fastq.gz: OK
./71_R2_001.fastq.gz: OK
./88_R1_001.fastq.gz: OK
./88_R2_001.fastq.gz: OK

--------------------------------------------------------

/home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads/30-1035633055-TS01

./196_R1_001.fastq.gz: OK
./196_R2_001.fastq.gz: OK
./199_R1_001.fastq.gz: OK
./199_R2_001.fastq.gz: OK
./211_R1_001.fastq.gz: OK
./211_R2_001.fastq.gz: OK
./24_R1_001.fastq.gz: OK
./24_R2_001.fastq.gz: OK
./260_R1_001.fastq.gz: OK
./260_R2_001.fastq.gz: OK
./26_R1_001.fastq.gz: OK
./26_R2_001.fastq.gz: OK
./30_R1_001.fastq.gz: OK
./30_R2_001.fastq.gz: OK
./310_R1_001.fastq.gz: OK
./310_R2_001.fastq.gz: OK
./33_R1_001.fastq.gz: OK
./33_R2_001.fastq.gz: OK
./341_R1_001.fastq.gz: OK
./341_R2_001.fastq.gz: OK
./34_R1_001.fastq.gz: OK
./34_R2_001.fastq.gz: OK
./35_R1_001.fastq.gz: OK
./35_R2_001.fastq.gz: OK
./363_R1_001.fastq.gz: OK
./363_R2_001.fastq.gz: OK
./367_R1_001.fastq.gz: OK
./367_R2_001.fastq.gz: OK
./376_R1_001.fastq.gz: OK
./376_R2_001.fastq.gz: OK
./460_R1_001.fastq.gz: OK
./460_R2_001.fastq.gz: OK
./485_R1_001.fastq.gz: OK
./485_R2_001.fastq.gz: OK
./501_R1_001.fastq.gz: OK
./501_R2_001.fastq.gz: OK
./71_R1_001.fastq.gz: OK
./71_R2_001.fastq.gz: OK
./88_R1_001.fastq.gz: OK
./88_R2_001.fastq.gz: OK

4 Concatenate reads

Concatenation also handles samples where there might be a missing set of R2 reads in the second round of sequencing.

# Load bash variables into memory
source .bashvars

# Make output directory, if it doens't exist
mkdir --parents ${output_dir_top}

cd "${raw_reads_dir}"


# Concatenate FastQ files from 1st and 2nd runs
# Do NOT quote fastq_pattern variable

# Declare an associative array to keep track of processed files
declare -A processed_files

for first_run_fastq in "${raw_reads_dir}"/"${project_dir_1}"/${fastq_pattern}
do
  # Strip full path to just get filename.
  first_run_fastq_name="${first_run_fastq##*/}"

  # Initialize a flag to check if a match is found
  match_found=false

  # Process second run and concatenate with corresponding FastQ from first run
  # Do NOT quote fastq_pattern variable
  for second_run_fastq in "${raw_reads_dir}"/"${project_dir_2}"/${fastq_pattern}
  do
    # Strip full path to just get filename.
    second_run_fastq_name="${second_run_fastq##*/}"

    # Concatenate FastQs with same filenames
    if [[ "${first_run_fastq_name}" == "${second_run_fastq_name}" ]]
    then
      echo "Concatenating ${first_run_fastq} with ${second_run_fastq} to ${output_dir_top}/${first_run_fastq_name}"
      echo ""
      cat "${first_run_fastq}" "${second_run_fastq}" >> "${output_dir_top}/${first_run_fastq_name}"
      match_found=true
      processed_files["${first_run_fastq_name}"]=true
      break
    fi
  done

  # If no match is found, copy the file to the target directory
  if [[ "${match_found}" == false ]]
  then
    if [[ -z "${processed_files[${first_run_fastq_name}]}" ]]
    then
      echo "NO MATCH!"
      echo "Copying ${first_run_fastq} to ${output_dir_top}"
      echo ""
      cp "${first_run_fastq}" "${output_dir_top}"
      processed_files["${first_run_fastq_name}"]=true
    fi
  fi
  
  # Generate MD5 checksums
  cd ${output_dir_top}
  echo "Generating checksums for concatenated FastQs..."
  md5sum "${first_run_fastq_name}" | tee --append "${first_run_fastq_name}".md5
  echo ""
  echo "${line}"
  echo ""
  cd -
done
Concatenating /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads/30-1035633055/196_R1_001.fastq.gz with /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads/30-1035633055-TS01/196_R1_001.fastq.gz to /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/output/00.00-fastqc-concatenation-raw_reads/196_R1_001.fastq.gz

Generating checksums for concatenated FastQs...
9417deb3991ecedd89bd0b796b609deb  196_R1_001.fastq.gz

--------------------------------------------------------

/home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads
Concatenating /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads/30-1035633055/196_R2_001.fastq.gz with /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads/30-1035633055-TS01/196_R2_001.fastq.gz to /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/output/00.00-fastqc-concatenation-raw_reads/196_R2_001.fastq.gz

Generating checksums for concatenated FastQs...
a24eecd018b5f0f8a60cc8df104a15f3  196_R2_001.fastq.gz

--------------------------------------------------------

/home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads
Concatenating /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads/30-1035633055/199_R1_001.fastq.gz with /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads/30-1035633055-TS01/199_R1_001.fastq.gz to /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/output/00.00-fastqc-concatenation-raw_reads/199_R1_001.fastq.gz

Generating checksums for concatenated FastQs...
862564fb77d382c9d18949335dd8aa23  199_R1_001.fastq.gz

--------------------------------------------------------

/home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads
Concatenating /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads/30-1035633055/199_R2_001.fastq.gz with /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads/30-1035633055-TS01/199_R2_001.fastq.gz to /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/output/00.00-fastqc-concatenation-raw_reads/199_R2_001.fastq.gz

Generating checksums for concatenated FastQs...
72369d2d653f177647331dc5c83adae9  199_R2_001.fastq.gz

--------------------------------------------------------

/home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads
Concatenating /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads/30-1035633055/211_R1_001.fastq.gz with /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads/30-1035633055-TS01/211_R1_001.fastq.gz to /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/output/00.00-fastqc-concatenation-raw_reads/211_R1_001.fastq.gz

Generating checksums for concatenated FastQs...
ade53539022aa28c1a60533823c38107  211_R1_001.fastq.gz

--------------------------------------------------------

/home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads
Concatenating /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads/30-1035633055/211_R2_001.fastq.gz with /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads/30-1035633055-TS01/211_R2_001.fastq.gz to /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/output/00.00-fastqc-concatenation-raw_reads/211_R2_001.fastq.gz

Generating checksums for concatenated FastQs...
ea1e84b96485e1e6d3e48e146c0c22b3  211_R2_001.fastq.gz

--------------------------------------------------------

/home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads
Concatenating /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads/30-1035633055/24_R1_001.fastq.gz with /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads/30-1035633055-TS01/24_R1_001.fastq.gz to /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/output/00.00-fastqc-concatenation-raw_reads/24_R1_001.fastq.gz

Generating checksums for concatenated FastQs...
e958e60126298c3567ba61f1ac061c31  24_R1_001.fastq.gz

--------------------------------------------------------

/home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads
Concatenating /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads/30-1035633055/24_R2_001.fastq.gz with /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads/30-1035633055-TS01/24_R2_001.fastq.gz to /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/output/00.00-fastqc-concatenation-raw_reads/24_R2_001.fastq.gz

Generating checksums for concatenated FastQs...
2dd7215f5695f85b5ae5eabf87d0aae2  24_R2_001.fastq.gz

--------------------------------------------------------

/home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads
Concatenating /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads/30-1035633055/260_R1_001.fastq.gz with /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads/30-1035633055-TS01/260_R1_001.fastq.gz to /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/output/00.00-fastqc-concatenation-raw_reads/260_R1_001.fastq.gz

Generating checksums for concatenated FastQs...
8f3df276c61a575a779f24a5a1524972  260_R1_001.fastq.gz

--------------------------------------------------------

/home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads
Concatenating /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads/30-1035633055/260_R2_001.fastq.gz with /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads/30-1035633055-TS01/260_R2_001.fastq.gz to /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/output/00.00-fastqc-concatenation-raw_reads/260_R2_001.fastq.gz

Generating checksums for concatenated FastQs...
e830f7f7e4ebee517243d0256107d036  260_R2_001.fastq.gz

--------------------------------------------------------

/home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads
Concatenating /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads/30-1035633055/26_R1_001.fastq.gz with /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads/30-1035633055-TS01/26_R1_001.fastq.gz to /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/output/00.00-fastqc-concatenation-raw_reads/26_R1_001.fastq.gz

Generating checksums for concatenated FastQs...
b0bacc581eb09abc35002df6242d4395  26_R1_001.fastq.gz

--------------------------------------------------------

/home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads
Concatenating /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads/30-1035633055/26_R2_001.fastq.gz with /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads/30-1035633055-TS01/26_R2_001.fastq.gz to /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/output/00.00-fastqc-concatenation-raw_reads/26_R2_001.fastq.gz

Generating checksums for concatenated FastQs...
0377d016bb2284f466388c56d0351c0a  26_R2_001.fastq.gz

--------------------------------------------------------

/home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads
Concatenating /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads/30-1035633055/30_R1_001.fastq.gz with /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads/30-1035633055-TS01/30_R1_001.fastq.gz to /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/output/00.00-fastqc-concatenation-raw_reads/30_R1_001.fastq.gz

Generating checksums for concatenated FastQs...
b27369e79417c98c558d86a8b11bb092  30_R1_001.fastq.gz

--------------------------------------------------------

/home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads
Concatenating /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads/30-1035633055/30_R2_001.fastq.gz with /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads/30-1035633055-TS01/30_R2_001.fastq.gz to /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/output/00.00-fastqc-concatenation-raw_reads/30_R2_001.fastq.gz

Generating checksums for concatenated FastQs...
3e246bcbe3b5f8d83ce16960b4c0ce9a  30_R2_001.fastq.gz

--------------------------------------------------------

/home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads
Concatenating /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads/30-1035633055/310_R1_001.fastq.gz with /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads/30-1035633055-TS01/310_R1_001.fastq.gz to /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/output/00.00-fastqc-concatenation-raw_reads/310_R1_001.fastq.gz

Generating checksums for concatenated FastQs...
1aee070979f4e2484d42ae1d53c13825  310_R1_001.fastq.gz

--------------------------------------------------------

/home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads
Concatenating /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads/30-1035633055/310_R2_001.fastq.gz with /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads/30-1035633055-TS01/310_R2_001.fastq.gz to /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/output/00.00-fastqc-concatenation-raw_reads/310_R2_001.fastq.gz

Generating checksums for concatenated FastQs...
0566e0d8d195fc9c6a6253e5c19d2032  310_R2_001.fastq.gz

--------------------------------------------------------

/home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads
Concatenating /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads/30-1035633055/33_R1_001.fastq.gz with /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads/30-1035633055-TS01/33_R1_001.fastq.gz to /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/output/00.00-fastqc-concatenation-raw_reads/33_R1_001.fastq.gz

Generating checksums for concatenated FastQs...
cf6af56a0a882e8c92f9b7a63b305711  33_R1_001.fastq.gz

--------------------------------------------------------

/home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads
Concatenating /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads/30-1035633055/33_R2_001.fastq.gz with /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads/30-1035633055-TS01/33_R2_001.fastq.gz to /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/output/00.00-fastqc-concatenation-raw_reads/33_R2_001.fastq.gz

Generating checksums for concatenated FastQs...
73fafa2ae9986be6d1be405f8f6fec78  33_R2_001.fastq.gz

--------------------------------------------------------

/home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads
Concatenating /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads/30-1035633055/341_R1_001.fastq.gz with /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads/30-1035633055-TS01/341_R1_001.fastq.gz to /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/output/00.00-fastqc-concatenation-raw_reads/341_R1_001.fastq.gz

Generating checksums for concatenated FastQs...
d84af357aca0e0237dd67b7bcbab32c3  341_R1_001.fastq.gz

--------------------------------------------------------

/home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads
Concatenating /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads/30-1035633055/341_R2_001.fastq.gz with /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads/30-1035633055-TS01/341_R2_001.fastq.gz to /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/output/00.00-fastqc-concatenation-raw_reads/341_R2_001.fastq.gz

Generating checksums for concatenated FastQs...
35d54cdf44f0deaed484b35d34b1a3e4  341_R2_001.fastq.gz

--------------------------------------------------------

/home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads
Concatenating /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads/30-1035633055/34_R1_001.fastq.gz with /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads/30-1035633055-TS01/34_R1_001.fastq.gz to /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/output/00.00-fastqc-concatenation-raw_reads/34_R1_001.fastq.gz

Generating checksums for concatenated FastQs...
a1d8006b21ac5f5ade62549d7377beef  34_R1_001.fastq.gz

--------------------------------------------------------

/home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads
Concatenating /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads/30-1035633055/34_R2_001.fastq.gz with /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads/30-1035633055-TS01/34_R2_001.fastq.gz to /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/output/00.00-fastqc-concatenation-raw_reads/34_R2_001.fastq.gz

Generating checksums for concatenated FastQs...
a44a721f9cb8dda05fe669b98ced2112  34_R2_001.fastq.gz

--------------------------------------------------------

/home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads
Concatenating /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads/30-1035633055/35_R1_001.fastq.gz with /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads/30-1035633055-TS01/35_R1_001.fastq.gz to /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/output/00.00-fastqc-concatenation-raw_reads/35_R1_001.fastq.gz

Generating checksums for concatenated FastQs...
25fccca8264c0a3eea8ece52d0ed6c4a  35_R1_001.fastq.gz

--------------------------------------------------------

/home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads
Concatenating /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads/30-1035633055/35_R2_001.fastq.gz with /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads/30-1035633055-TS01/35_R2_001.fastq.gz to /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/output/00.00-fastqc-concatenation-raw_reads/35_R2_001.fastq.gz

Generating checksums for concatenated FastQs...
f53632400b3a7b5142890df086594561  35_R2_001.fastq.gz

--------------------------------------------------------

/home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads
Concatenating /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads/30-1035633055/363_R1_001.fastq.gz with /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads/30-1035633055-TS01/363_R1_001.fastq.gz to /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/output/00.00-fastqc-concatenation-raw_reads/363_R1_001.fastq.gz

Generating checksums for concatenated FastQs...
476d3bfc92ed8234d4f05b2eb51316c0  363_R1_001.fastq.gz

--------------------------------------------------------

/home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads
Concatenating /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads/30-1035633055/363_R2_001.fastq.gz with /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads/30-1035633055-TS01/363_R2_001.fastq.gz to /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/output/00.00-fastqc-concatenation-raw_reads/363_R2_001.fastq.gz

Generating checksums for concatenated FastQs...
108a47c38b3ce4ec7a9c43c656d66d78  363_R2_001.fastq.gz

--------------------------------------------------------

/home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads
Concatenating /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads/30-1035633055/367_R1_001.fastq.gz with /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads/30-1035633055-TS01/367_R1_001.fastq.gz to /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/output/00.00-fastqc-concatenation-raw_reads/367_R1_001.fastq.gz

Generating checksums for concatenated FastQs...
9d4dc55e2c5a025cb452db4ba58b2426  367_R1_001.fastq.gz

--------------------------------------------------------

/home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads
Concatenating /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads/30-1035633055/367_R2_001.fastq.gz with /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads/30-1035633055-TS01/367_R2_001.fastq.gz to /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/output/00.00-fastqc-concatenation-raw_reads/367_R2_001.fastq.gz

Generating checksums for concatenated FastQs...
e9ca895177af7f4ecd7b4cea1efb8e94  367_R2_001.fastq.gz

--------------------------------------------------------

/home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads
Concatenating /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads/30-1035633055/376_R1_001.fastq.gz with /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads/30-1035633055-TS01/376_R1_001.fastq.gz to /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/output/00.00-fastqc-concatenation-raw_reads/376_R1_001.fastq.gz

Generating checksums for concatenated FastQs...
ed7f367e99fa14a79d9d76e7f6602504  376_R1_001.fastq.gz

--------------------------------------------------------

/home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads
Concatenating /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads/30-1035633055/376_R2_001.fastq.gz with /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads/30-1035633055-TS01/376_R2_001.fastq.gz to /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/output/00.00-fastqc-concatenation-raw_reads/376_R2_001.fastq.gz

Generating checksums for concatenated FastQs...
b34a6b8a21c2d753c70c6f0ada622ae3  376_R2_001.fastq.gz

--------------------------------------------------------

/home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads
Concatenating /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads/30-1035633055/460_R1_001.fastq.gz with /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads/30-1035633055-TS01/460_R1_001.fastq.gz to /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/output/00.00-fastqc-concatenation-raw_reads/460_R1_001.fastq.gz

Generating checksums for concatenated FastQs...
0b76ea694146524fb3d920d0ce893700  460_R1_001.fastq.gz

--------------------------------------------------------

/home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads
Concatenating /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads/30-1035633055/460_R2_001.fastq.gz with /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads/30-1035633055-TS01/460_R2_001.fastq.gz to /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/output/00.00-fastqc-concatenation-raw_reads/460_R2_001.fastq.gz

Generating checksums for concatenated FastQs...
129fa23dd27e800eefdf9857cd486e04  460_R2_001.fastq.gz

--------------------------------------------------------

/home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads
Concatenating /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads/30-1035633055/485_R1_001.fastq.gz with /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads/30-1035633055-TS01/485_R1_001.fastq.gz to /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/output/00.00-fastqc-concatenation-raw_reads/485_R1_001.fastq.gz

Generating checksums for concatenated FastQs...
8fb813628e19f16a2ebc38a3b25d86c9  485_R1_001.fastq.gz

--------------------------------------------------------

/home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads
Concatenating /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads/30-1035633055/485_R2_001.fastq.gz with /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads/30-1035633055-TS01/485_R2_001.fastq.gz to /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/output/00.00-fastqc-concatenation-raw_reads/485_R2_001.fastq.gz

Generating checksums for concatenated FastQs...
778fd2901d89edb88bd3b3258794483c  485_R2_001.fastq.gz

--------------------------------------------------------

/home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads
Concatenating /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads/30-1035633055/501_R1_001.fastq.gz with /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads/30-1035633055-TS01/501_R1_001.fastq.gz to /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/output/00.00-fastqc-concatenation-raw_reads/501_R1_001.fastq.gz

Generating checksums for concatenated FastQs...
661ecf50a08410be5f497191ccbcab53  501_R1_001.fastq.gz

--------------------------------------------------------

/home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads
Concatenating /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads/30-1035633055/501_R2_001.fastq.gz with /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads/30-1035633055-TS01/501_R2_001.fastq.gz to /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/output/00.00-fastqc-concatenation-raw_reads/501_R2_001.fastq.gz

Generating checksums for concatenated FastQs...
25fce7fb0d38c82790021a67f6d7f7bf  501_R2_001.fastq.gz

--------------------------------------------------------

/home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads
Concatenating /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads/30-1035633055/71_R1_001.fastq.gz with /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads/30-1035633055-TS01/71_R1_001.fastq.gz to /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/output/00.00-fastqc-concatenation-raw_reads/71_R1_001.fastq.gz

Generating checksums for concatenated FastQs...
91a62551b545b89180b8cf8bba7a9972  71_R1_001.fastq.gz

--------------------------------------------------------

/home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads
Concatenating /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads/30-1035633055/71_R2_001.fastq.gz with /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads/30-1035633055-TS01/71_R2_001.fastq.gz to /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/output/00.00-fastqc-concatenation-raw_reads/71_R2_001.fastq.gz

Generating checksums for concatenated FastQs...
061d5ddab8b33f54564f8fde19ceb479  71_R2_001.fastq.gz

--------------------------------------------------------

/home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads
Concatenating /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads/30-1035633055/88_R1_001.fastq.gz with /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads/30-1035633055-TS01/88_R1_001.fastq.gz to /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/output/00.00-fastqc-concatenation-raw_reads/88_R1_001.fastq.gz

Generating checksums for concatenated FastQs...
01f8e68f4993aa9fc4d6d56b29719afa  88_R1_001.fastq.gz

--------------------------------------------------------

/home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads
Concatenating /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads/30-1035633055/88_R2_001.fastq.gz with /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads/30-1035633055-TS01/88_R2_001.fastq.gz to /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/output/00.00-fastqc-concatenation-raw_reads/88_R2_001.fastq.gz

Generating checksums for concatenated FastQs...
2933ae02943ead5548223833cbeb41de  88_R2_001.fastq.gz

--------------------------------------------------------

/home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/data/raw_reads

5 FastQC/MultiQC on raw reads

# Load bash variables into memory
source .bashvars


############ RUN FASTQC ############


# Create array of trimmed FastQs
raw_fastqs_array=(${output_dir_top}/${fastq_pattern})

# Pass array contents to new variable as space-delimited list
raw_fastqc_list=$(echo "${raw_fastqs_array[*]}")

echo "Beginning FastQC on raw reads..."
echo ""

# Run FastQC
### NOTE: Do NOT quote raw_fastqc_list
${fastqc} \
--threads ${threads} \
--outdir ${output_dir_top} \
--quiet \
${raw_fastqc_list}

echo "FastQC on raw reads complete!"
echo ""

############ END FASTQC ############

############ RUN MULTIQC ############
echo "Beginning MultiQC on raw FastQC..."
echo ""

${multiqc} ${output_dir_top} -o ${output_dir_top}

echo ""
echo "MultiQC on raw FastQs complete."
echo ""

############ END MULTIQC ############

echo "Removing FastQC zip files."
echo ""
rm ${output_dir_top}/*.zip
echo "FastQC zip files removed."
echo ""
Beginning FastQC on raw reads...

application/gzip
application/gzip
application/gzip
application/gzip
application/gzip
application/gzip
application/gzip
application/gzip
application/gzip
application/gzip
application/gzip
application/gzip
application/gzip
application/gzip
application/gzip
application/gzip
application/gzip
application/gzip
application/gzip
application/gzip
application/gzip
application/gzip
application/gzip
application/gzip
application/gzip
application/gzip
application/gzip
application/gzip
application/gzip
application/gzip
application/gzip
application/gzip
application/gzip
application/gzip
application/gzip
application/gzip
application/gzip
application/gzip
application/gzip
application/gzip
FastQC on raw reads complete!

Beginning MultiQC on raw FastQC...


  /// MultiQC 🔍 | v1.14

|           multiqc | MultiQC Version v1.25.2 now available!
|           multiqc | Search path : /home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa/output/00.00-fastqc-concatenation-raw_reads
|         searching | ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 100% 160/160  
|            fastqc | Found 40 reports
|           multiqc | Compressing plot data
|           multiqc | Report      : ../output/00.00-fastqc-concatenation-raw_reads/multiqc_report.html
|           multiqc | Data        : ../output/00.00-fastqc-concatenation-raw_reads/multiqc_data
|           multiqc | MultiQC complete

MultiQC on raw FastQs complete.

Removing FastQC zip files.

FastQC zip files removed.

5.1 View directory contents

# Load bash variables into memory
source .bashvars
ls -lh ${output_dir_top}
total 34G
-rw-r--r-- 1 sam sam  694K Dec  5 14:17 196_R1_001_fastqc.html
-rw-r--r-- 1 sam sam  1.1G Dec  5 14:10 196_R1_001.fastq.gz
-rw-r--r-- 1 sam sam    54 Dec  5 14:10 196_R1_001.fastq.gz.md5
-rw-r--r-- 1 sam sam  680K Dec  5 14:17 196_R2_001_fastqc.html
-rw-r--r-- 1 sam sam  740M Dec  5 14:10 196_R2_001.fastq.gz
-rw-r--r-- 1 sam sam    54 Dec  5 14:10 196_R2_001.fastq.gz.md5
-rw-r--r-- 1 sam sam  700K Dec  5 14:17 199_R1_001_fastqc.html
-rw-r--r-- 1 sam sam  859M Dec  5 14:10 199_R1_001.fastq.gz
-rw-r--r-- 1 sam sam    54 Dec  5 14:10 199_R1_001.fastq.gz.md5
-rw-r--r-- 1 sam sam  682K Dec  5 14:16 199_R2_001_fastqc.html
-rw-r--r-- 1 sam sam  575M Dec  5 14:10 199_R2_001.fastq.gz
-rw-r--r-- 1 sam sam    54 Dec  5 14:10 199_R2_001.fastq.gz.md5
-rw-r--r-- 1 sam sam  699K Dec  5 14:17 211_R1_001_fastqc.html
-rw-r--r-- 1 sam sam  927M Dec  5 14:10 211_R1_001.fastq.gz
-rw-r--r-- 1 sam sam    54 Dec  5 14:10 211_R1_001.fastq.gz.md5
-rw-r--r-- 1 sam sam  684K Dec  5 14:16 211_R2_001_fastqc.html
-rw-r--r-- 1 sam sam  607M Dec  5 14:10 211_R2_001.fastq.gz
-rw-r--r-- 1 sam sam    54 Dec  5 14:10 211_R2_001.fastq.gz.md5
-rw-r--r-- 1 sam sam  700K Dec  5 14:17 24_R1_001_fastqc.html
-rw-r--r-- 1 sam sam  900M Dec  5 14:10 24_R1_001.fastq.gz
-rw-r--r-- 1 sam sam    53 Dec  5 14:10 24_R1_001.fastq.gz.md5
-rw-r--r-- 1 sam sam  681K Dec  5 14:17 24_R2_001_fastqc.html
-rw-r--r-- 1 sam sam  643M Dec  5 14:10 24_R2_001.fastq.gz
-rw-r--r-- 1 sam sam    53 Dec  5 14:10 24_R2_001.fastq.gz.md5
-rw-r--r-- 1 sam sam  706K Dec  5 14:17 260_R1_001_fastqc.html
-rw-r--r-- 1 sam sam  956M Dec  5 14:10 260_R1_001.fastq.gz
-rw-r--r-- 1 sam sam    54 Dec  5 14:10 260_R1_001.fastq.gz.md5
-rw-r--r-- 1 sam sam  682K Dec  5 14:17 260_R2_001_fastqc.html
-rw-r--r-- 1 sam sam  653M Dec  5 14:10 260_R2_001.fastq.gz
-rw-r--r-- 1 sam sam    54 Dec  5 14:10 260_R2_001.fastq.gz.md5
-rw-r--r-- 1 sam sam  701K Dec  5 14:17 26_R1_001_fastqc.html
-rw-r--r-- 1 sam sam  944M Dec  5 14:10 26_R1_001.fastq.gz
-rw-r--r-- 1 sam sam    53 Dec  5 14:10 26_R1_001.fastq.gz.md5
-rw-r--r-- 1 sam sam  682K Dec  5 14:16 26_R2_001_fastqc.html
-rw-r--r-- 1 sam sam  607M Dec  5 14:10 26_R2_001.fastq.gz
-rw-r--r-- 1 sam sam    53 Dec  5 14:10 26_R2_001.fastq.gz.md5
-rw-r--r-- 1 sam sam  703K Dec  5 14:17 30_R1_001_fastqc.html
-rw-r--r-- 1 sam sam 1000M Dec  5 14:10 30_R1_001.fastq.gz
-rw-r--r-- 1 sam sam    53 Dec  5 14:10 30_R1_001.fastq.gz.md5
-rw-r--r-- 1 sam sam  682K Dec  5 14:17 30_R2_001_fastqc.html
-rw-r--r-- 1 sam sam  677M Dec  5 14:10 30_R2_001.fastq.gz
-rw-r--r-- 1 sam sam    53 Dec  5 14:10 30_R2_001.fastq.gz.md5
-rw-r--r-- 1 sam sam  699K Dec  5 14:17 310_R1_001_fastqc.html
-rw-r--r-- 1 sam sam  943M Dec  5 14:10 310_R1_001.fastq.gz
-rw-r--r-- 1 sam sam    54 Dec  5 14:10 310_R1_001.fastq.gz.md5
-rw-r--r-- 1 sam sam  683K Dec  5 14:16 310_R2_001_fastqc.html
-rw-r--r-- 1 sam sam  663M Dec  5 14:10 310_R2_001.fastq.gz
-rw-r--r-- 1 sam sam    54 Dec  5 14:10 310_R2_001.fastq.gz.md5
-rw-r--r-- 1 sam sam  703K Dec  5 14:17 33_R1_001_fastqc.html
-rw-r--r-- 1 sam sam  955M Dec  5 14:10 33_R1_001.fastq.gz
-rw-r--r-- 1 sam sam    53 Dec  5 14:10 33_R1_001.fastq.gz.md5
-rw-r--r-- 1 sam sam  680K Dec  5 14:16 33_R2_001_fastqc.html
-rw-r--r-- 1 sam sam  672M Dec  5 14:10 33_R2_001.fastq.gz
-rw-r--r-- 1 sam sam    53 Dec  5 14:10 33_R2_001.fastq.gz.md5
-rw-r--r-- 1 sam sam  701K Dec  5 14:17 341_R1_001_fastqc.html
-rw-r--r-- 1 sam sam  993M Dec  5 14:10 341_R1_001.fastq.gz
-rw-r--r-- 1 sam sam    54 Dec  5 14:10 341_R1_001.fastq.gz.md5
-rw-r--r-- 1 sam sam  687K Dec  5 14:17 341_R2_001_fastqc.html
-rw-r--r-- 1 sam sam  665M Dec  5 14:10 341_R2_001.fastq.gz
-rw-r--r-- 1 sam sam    54 Dec  5 14:10 341_R2_001.fastq.gz.md5
-rw-r--r-- 1 sam sam  700K Dec  5 14:18 34_R1_001_fastqc.html
-rw-r--r-- 1 sam sam  1.1G Dec  5 14:10 34_R1_001.fastq.gz
-rw-r--r-- 1 sam sam    53 Dec  5 14:10 34_R1_001.fastq.gz.md5
-rw-r--r-- 1 sam sam  678K Dec  5 14:17 34_R2_001_fastqc.html
-rw-r--r-- 1 sam sam  769M Dec  5 14:10 34_R2_001.fastq.gz
-rw-r--r-- 1 sam sam    53 Dec  5 14:10 34_R2_001.fastq.gz.md5
-rw-r--r-- 1 sam sam  702K Dec  5 14:17 35_R1_001_fastqc.html
-rw-r--r-- 1 sam sam  914M Dec  5 14:11 35_R1_001.fastq.gz
-rw-r--r-- 1 sam sam    53 Dec  5 14:11 35_R1_001.fastq.gz.md5
-rw-r--r-- 1 sam sam  681K Dec  5 14:17 35_R2_001_fastqc.html
-rw-r--r-- 1 sam sam  596M Dec  5 14:11 35_R2_001.fastq.gz
-rw-r--r-- 1 sam sam    53 Dec  5 14:11 35_R2_001.fastq.gz.md5
-rw-r--r-- 1 sam sam  704K Dec  5 14:17 363_R1_001_fastqc.html
-rw-r--r-- 1 sam sam 1021M Dec  5 14:11 363_R1_001.fastq.gz
-rw-r--r-- 1 sam sam    54 Dec  5 14:11 363_R1_001.fastq.gz.md5
-rw-r--r-- 1 sam sam  682K Dec  5 14:17 363_R2_001_fastqc.html
-rw-r--r-- 1 sam sam  708M Dec  5 14:11 363_R2_001.fastq.gz
-rw-r--r-- 1 sam sam    54 Dec  5 14:11 363_R2_001.fastq.gz.md5
-rw-r--r-- 1 sam sam  699K Dec  5 14:17 367_R1_001_fastqc.html
-rw-r--r-- 1 sam sam  984M Dec  5 14:11 367_R1_001.fastq.gz
-rw-r--r-- 1 sam sam    54 Dec  5 14:11 367_R1_001.fastq.gz.md5
-rw-r--r-- 1 sam sam  682K Dec  5 14:17 367_R2_001_fastqc.html
-rw-r--r-- 1 sam sam  648M Dec  5 14:11 367_R2_001.fastq.gz
-rw-r--r-- 1 sam sam    54 Dec  5 14:11 367_R2_001.fastq.gz.md5
-rw-r--r-- 1 sam sam  701K Dec  5 14:19 376_R1_001_fastqc.html
-rw-r--r-- 1 sam sam  1.8G Dec  5 14:11 376_R1_001.fastq.gz
-rw-r--r-- 1 sam sam    54 Dec  5 14:11 376_R1_001.fastq.gz.md5
-rw-r--r-- 1 sam sam  683K Dec  5 14:18 376_R2_001_fastqc.html
-rw-r--r-- 1 sam sam  1.2G Dec  5 14:11 376_R2_001.fastq.gz
-rw-r--r-- 1 sam sam    54 Dec  5 14:11 376_R2_001.fastq.gz.md5
-rw-r--r-- 1 sam sam  697K Dec  5 14:17 460_R1_001_fastqc.html
-rw-r--r-- 1 sam sam  951M Dec  5 14:11 460_R1_001.fastq.gz
-rw-r--r-- 1 sam sam    54 Dec  5 14:11 460_R1_001.fastq.gz.md5
-rw-r--r-- 1 sam sam  683K Dec  5 14:17 460_R2_001_fastqc.html
-rw-r--r-- 1 sam sam  641M Dec  5 14:11 460_R2_001.fastq.gz
-rw-r--r-- 1 sam sam    54 Dec  5 14:11 460_R2_001.fastq.gz.md5
-rw-r--r-- 1 sam sam  701K Dec  5 14:17 485_R1_001_fastqc.html
-rw-r--r-- 1 sam sam  974M Dec  5 14:11 485_R1_001.fastq.gz
-rw-r--r-- 1 sam sam    54 Dec  5 14:11 485_R1_001.fastq.gz.md5
-rw-r--r-- 1 sam sam  683K Dec  5 14:17 485_R2_001_fastqc.html
-rw-r--r-- 1 sam sam  661M Dec  5 14:11 485_R2_001.fastq.gz
-rw-r--r-- 1 sam sam    54 Dec  5 14:11 485_R2_001.fastq.gz.md5
-rw-r--r-- 1 sam sam  702K Dec  5 14:17 501_R1_001_fastqc.html
-rw-r--r-- 1 sam sam  1.1G Dec  5 14:11 501_R1_001.fastq.gz
-rw-r--r-- 1 sam sam    54 Dec  5 14:11 501_R1_001.fastq.gz.md5
-rw-r--r-- 1 sam sam  680K Dec  5 14:17 501_R2_001_fastqc.html
-rw-r--r-- 1 sam sam  721M Dec  5 14:11 501_R2_001.fastq.gz
-rw-r--r-- 1 sam sam    54 Dec  5 14:11 501_R2_001.fastq.gz.md5
-rw-r--r-- 1 sam sam  693K Dec  5 14:18 71_R1_001_fastqc.html
-rw-r--r-- 1 sam sam  1.2G Dec  5 14:11 71_R1_001.fastq.gz
-rw-r--r-- 1 sam sam    53 Dec  5 14:11 71_R1_001.fastq.gz.md5
-rw-r--r-- 1 sam sam  671K Dec  5 14:17 71_R2_001_fastqc.html
-rw-r--r-- 1 sam sam  741M Dec  5 14:11 71_R2_001.fastq.gz
-rw-r--r-- 1 sam sam    53 Dec  5 14:11 71_R2_001.fastq.gz.md5
-rw-r--r-- 1 sam sam  700K Dec  5 14:18 88_R1_001_fastqc.html
-rw-r--r-- 1 sam sam  1.1G Dec  5 14:11 88_R1_001.fastq.gz
-rw-r--r-- 1 sam sam    53 Dec  5 14:11 88_R1_001.fastq.gz.md5
-rw-r--r-- 1 sam sam  679K Dec  5 14:17 88_R2_001_fastqc.html
-rw-r--r-- 1 sam sam  772M Dec  5 14:11 88_R2_001.fastq.gz
-rw-r--r-- 1 sam sam    53 Dec  5 14:11 88_R2_001.fastq.gz.md5
drwxr-xr-x 2 sam sam  4.0K Dec  5 14:19 multiqc_data
-rw-r--r-- 1 sam sam  1.6M Dec  5 14:19 multiqc_report.html
Ewels, Philip, Måns Magnusson, Sverker Lundin, and Max Käller. 2016. “MultiQC: Summarize Analysis Results for Multiple Tools and Samples in a Single Report.” Bioinformatics 32 (19): 3047–48. https://doi.org/10.1093/bioinformatics/btw354.
---
title: "00.00-fastqc-concatenation-raw_reads"
author: "Sam White"
date: "2024-12-05"
output: 
  bookdown::html_document2:
    theme: cosmo
    toc: true
    toc_float: true
    number_sections: true
    code_folding: show
    code_download: true
  github_document:
    toc: true
    number_sections: true
  html_document:
    theme: cosmo
    toc: true
    toc_float: true
    number_sections: true
    code_folding: show
    code_download: true
bibliography: references.bib
---


# Description

This notebook will download raw sRNA-seq FastQs, concatenate them (there
were multiple lanes run), and then assess with [FastQC](https://github.com/s-andrews/FastQC) and
[MultiQC](https://github.com/MultiQC/MultiQC) [@ewels2016].

## Inputs

Raw FastQ files with the following pattern:

- `*.fastq.gz`

## Outputs

The expected outputs will be:

- `*.fastq.gz`: Concatenated FastQ files.

- `*.fastqc.html`: FastQC results, in HTML format.

- `multiqc_report.html`: A summary report of the alignment results
    generated by [MultiQC](https://github.com/MultiQC/MultiQC), in HTML
    format.
    
Due to large file sizes of FastQs, they cannot be added to GitHub. Full output from this notebook are available here:

- [https://gannet.fish.washington.edu/gitrepos/project-clam-oa/output/00.00-fastqc-concatenation-raw_reads](https://gannet.fish.washington.edu/gitrepos/project-clam-oa/output/00.00-fastqc-concatenation-raw_reads)

```{r setup, include=FALSE}
library(knitr)
knitr::opts_chunk$set(
  echo = TRUE,         # Display code chunks
  eval = FALSE,        # Evaluate code chunks
  warning = FALSE,     # Hide warnings
  message = FALSE,     # Hide messages
  comment = ""         # Prevents appending '##' to beginning of lines in code output
)
```


# Create a Bash variables file

This allows usage of Bash variables across R Markdown chunks.

```{r save-bash-variables-to-rvars-file, engine='bash', eval=TRUE}
{
echo "#### Assign Variables ####"
echo ""

echo "# Data directories"
echo 'export repo_dir=/home/shared/8TB_HDD_01/sam/gitrepos/RobertsLab/project-clam-oa'
echo 'export output_dir_top=${repo_dir}/output/00.00-fastqc-concatenation-raw_reads'
echo 'export raw_reads_url="https://owl.fish.washington.edu/nightingales/R_philippinarum/"'
echo 'export raw_reads_dir="${repo_dir}/data/raw_reads"'
echo 'export project_dir_1="30-1035633055"'
echo 'export project_dir_2="30-1035633055-TS01"'
echo ""

echo "# Paths to programs"
echo 'export programs_dir="/home/shared"'
echo 'export fastqc="${programs_dir}/FastQC-0.12.1/fastqc"'
echo 'export multiqc="/home/sam/programs/mambaforge/bin/multiqc"'
echo ""


echo "# Set FastQ filename patterns"
echo "export fastq_pattern='*.fastq.gz'"
echo "export R1_fastq_pattern='*_R1_*.fastq.gz'"
echo "export R2_fastq_pattern='*_R2_*.fastq.gz'"
echo ""

echo "# Set number of CPUs to use"
echo 'export threads=40'
echo ""


echo "## Inititalize arrays"
echo 'export fastq_array_R1=()'
echo 'export fastq_array_R2=()'
echo 'export trimmed_fastqs_array=()'
echo 'export R1_names_array=()'
echo 'export R2_names_array=()'
echo ""

echo "# Print formatting"
echo 'export line="--------------------------------------------------------"'
echo ""
} > .bashvars

cat .bashvars
```


# Download raw reads

The `--cut-dirs 3` command cuts the preceding directory structure (i.e. `R_philippinarum/30-1035633055/`)
so that we just end up with the reads.

```{r download-raw-reads, engine='bash', eval=FALSE}

# Load bash variables into memory
source .bashvars

# Create directory, if it doesn't exist
mkdir --parents \
${raw_reads_dir}/${project_dir_1} \
${raw_reads_dir}/${project_dir_2}

for directory in ${raw_reads_dir}/${project_dir_1} ${raw_reads_dir}/${project_dir_2}
do
  wget \
  --directory-prefix ${directory} \
  --recursive \
  --no-check-certificate \
  --continue \
  --cut-dirs 3 \
  --no-parent \
  --no-host-directories \
  --quiet \
  ${raw_reads_url}
  
  # Remove extraneous indext files
  rm ${raw_reads_dir}/${project_dir_1}/index*
  rm ${raw_reads_dir}/${project_dir_2}/index*
done
```

## Overview of downloads

```{r check-downloads, engine='bash', eval=TRUE}
# Load bash variables into memory
source .bashvars

tree --du -h "${raw_reads_dir}"
```


## Verify checkums
```{r verify-checksums, engine='bash', eval=TRUE}
# Load bash variables into memory
source .bashvars

cd "${raw_reads_dir}/${project_dir_1}"

pwd
echo ""

for checksum in *.md5
do
  md5sum --check ${checksum}
done

echo ""
echo "${line}"
echo ""

cd "${raw_reads_dir}/${project_dir_2}"

pwd
echo ""

for checksum in *.md5
do
  md5sum --check ${checksum}
done
```

# Concatenate reads

Concatenation also handles samples where there might be a missing set of R2 reads in the second round of sequencing.

```{r concatenate-reads, engine='bash', eval=TRUE}
# Load bash variables into memory
source .bashvars

# Make output directory, if it doens't exist
mkdir --parents ${output_dir_top}

cd "${raw_reads_dir}"


# Concatenate FastQ files from 1st and 2nd runs
# Do NOT quote fastq_pattern variable

# Declare an associative array to keep track of processed files
declare -A processed_files

for first_run_fastq in "${raw_reads_dir}"/"${project_dir_1}"/${fastq_pattern}
do
  # Strip full path to just get filename.
  first_run_fastq_name="${first_run_fastq##*/}"

  # Initialize a flag to check if a match is found
  match_found=false

  # Process second run and concatenate with corresponding FastQ from first run
  # Do NOT quote fastq_pattern variable
  for second_run_fastq in "${raw_reads_dir}"/"${project_dir_2}"/${fastq_pattern}
  do
    # Strip full path to just get filename.
    second_run_fastq_name="${second_run_fastq##*/}"

    # Concatenate FastQs with same filenames
    if [[ "${first_run_fastq_name}" == "${second_run_fastq_name}" ]]
    then
      echo "Concatenating ${first_run_fastq} with ${second_run_fastq} to ${output_dir_top}/${first_run_fastq_name}"
      echo ""
      cat "${first_run_fastq}" "${second_run_fastq}" >> "${output_dir_top}/${first_run_fastq_name}"
      match_found=true
      processed_files["${first_run_fastq_name}"]=true
      break
    fi
  done

  # If no match is found, copy the file to the target directory
  if [[ "${match_found}" == false ]]
  then
    if [[ -z "${processed_files[${first_run_fastq_name}]}" ]]
    then
      echo "NO MATCH!"
      echo "Copying ${first_run_fastq} to ${output_dir_top}"
      echo ""
      cp "${first_run_fastq}" "${output_dir_top}"
      processed_files["${first_run_fastq_name}"]=true
    fi
  fi
  
  # Generate MD5 checksums
  cd ${output_dir_top}
  echo "Generating checksums for concatenated FastQs..."
  md5sum "${first_run_fastq_name}" | tee --append "${first_run_fastq_name}".md5
  echo ""
  echo "${line}"
  echo ""
  cd -
done
```

# FastQC/MultiQC on raw reads

```{bash raw-fastqc-multiqc, engine='bash', eval=TRUE}
# Load bash variables into memory
source .bashvars


############ RUN FASTQC ############


# Create array of trimmed FastQs
raw_fastqs_array=(${output_dir_top}/${fastq_pattern})

# Pass array contents to new variable as space-delimited list
raw_fastqc_list=$(echo "${raw_fastqs_array[*]}")

echo "Beginning FastQC on raw reads..."
echo ""

# Run FastQC
### NOTE: Do NOT quote raw_fastqc_list
${fastqc} \
--threads ${threads} \
--outdir ${output_dir_top} \
--quiet \
${raw_fastqc_list}

echo "FastQC on raw reads complete!"
echo ""

############ END FASTQC ############

############ RUN MULTIQC ############
echo "Beginning MultiQC on raw FastQC..."
echo ""

${multiqc} ${output_dir_top} -o ${output_dir_top}

echo ""
echo "MultiQC on raw FastQs complete."
echo ""

############ END MULTIQC ############

echo "Removing FastQC zip files."
echo ""
rm ${output_dir_top}/*.zip
echo "FastQC zip files removed."
echo ""
```


## View directory contents
```{bash list-output-files, engine='bash', eval=TRUE}
# Load bash variables into memory
source .bashvars
ls -lh ${output_dir_top}

```
