---
title: "11-mnultispecies-RNASeq-trimming"
output: html_document
date: "2025-02-18"
---


```{bash}
  # Run fastp
  # Specifies reports in HTML and JSON formats
 /home/shared/fastp \
  --in1 ${fastq_array_R1[index]} \
  --in2 ${fastq_array_R2[index]} \
  --detect_adapter_for_pe \
  --thread ${threads} \
  --html "${sample_name}".fastp-trim."${timestamp}".report.html \
  --json "${sample_name}".fastp-trim."${timestamp}".report.json \
  --out1 "${R1_sample_name}".fastp-trim."${timestamp}".fq.gz \
  --out2 "${R2_sample_name}".fastp-trim."${timestamp}".fq.gz
```

```{bash}
# Set the directory containing FASTQ files
FASTQ_DIR="/home/shared/8TB_HDD_02/graceac9/multispecies2023"
THREADS=16  # Adjust as needed
OUTDIR="../output/11-multi-fastp"

# Loop through all R1 files in the directory
for R1_FILE in ${FASTQ_DIR}/*_R1_001.fastq.gz; do
  # Derive corresponding R2 file name
  R2_FILE="${R1_FILE/_R1_001.fastq.gz/_R2_001.fastq.gz}"
  
  # Ensure the R2 file exists
  if [[ ! -f "$R2_FILE" ]]; then
    echo "Skipping ${R1_FILE}, no matching R2 file found."
    continue
  fi
  
  # Extract the sample name
  SAMPLE_NAME=$(basename "$R1_FILE" | sed 's/_R1_001.fastq.gz//')
  
  # Define output file names
  OUT_R1="${OUTDIR}/${SAMPLE_NAME}_R1.fastp-trim.fq.gz"
  OUT_R2="${OUTDIR}/${SAMPLE_NAME}_R2.fastp-trim.fq.gz"
  HTML_REPORT="${OUTDIR}/${SAMPLE_NAME}.fastp-trim.report.html"
  JSON_REPORT="${OUTDIR}/${SAMPLE_NAME}.fastp-trim.report.json"

  # Run fastp
  /home/shared/fastp --in1 "$R1_FILE" \
        --in2 "$R2_FILE" \
        --detect_adapter_for_pe \
        --trim_front1 10 \
        --trim_front2 10 \
        --thread "$THREADS" \
        --html "$HTML_REPORT" \
        --json "$JSON_REPORT" \
        --out1 "$OUT_R1" \
        --out2 "$OUT_R2"

  echo "Finished processing: $SAMPLE_NAME"
done

echo "All samples processed."
```