--- title: "11-mnultispecies-RNASeq-trimming" output: html_document date: "2025-02-18" --- ```{bash} # Run fastp # Specifies reports in HTML and JSON formats /home/shared/fastp \ --in1 ${fastq_array_R1[index]} \ --in2 ${fastq_array_R2[index]} \ --detect_adapter_for_pe \ --thread ${threads} \ --html "${sample_name}".fastp-trim."${timestamp}".report.html \ --json "${sample_name}".fastp-trim."${timestamp}".report.json \ --out1 "${R1_sample_name}".fastp-trim."${timestamp}".fq.gz \ --out2 "${R2_sample_name}".fastp-trim."${timestamp}".fq.gz ``` ```{bash} # Set the directory containing FASTQ files FASTQ_DIR="/home/shared/8TB_HDD_02/graceac9/multispecies2023" THREADS=16 # Adjust as needed OUTDIR="../output/11-multi-fastp" # Loop through all R1 files in the directory for R1_FILE in ${FASTQ_DIR}/*_R1_001.fastq.gz; do # Derive corresponding R2 file name R2_FILE="${R1_FILE/_R1_001.fastq.gz/_R2_001.fastq.gz}" # Ensure the R2 file exists if [[ ! -f "$R2_FILE" ]]; then echo "Skipping ${R1_FILE}, no matching R2 file found." continue fi # Extract the sample name SAMPLE_NAME=$(basename "$R1_FILE" | sed 's/_R1_001.fastq.gz//') # Define output file names OUT_R1="${OUTDIR}/${SAMPLE_NAME}_R1.fastp-trim.fq.gz" OUT_R2="${OUTDIR}/${SAMPLE_NAME}_R2.fastp-trim.fq.gz" HTML_REPORT="${OUTDIR}/${SAMPLE_NAME}.fastp-trim.report.html" JSON_REPORT="${OUTDIR}/${SAMPLE_NAME}.fastp-trim.report.json" # Run fastp /home/shared/fastp --in1 "$R1_FILE" \ --in2 "$R2_FILE" \ --detect_adapter_for_pe \ --trim_front1 10 \ --trim_front2 10 \ --thread "$THREADS" \ --html "$HTML_REPORT" \ --json "$JSON_REPORT" \ --out1 "$OUT_R1" \ --out2 "$OUT_R2" echo "Finished processing: $SAMPLE_NAME" done echo "All samples processed." ```