#!/bin/sh #SBATCH --job-name=bismark_array # Job name #SBATCH --output=%x_%A_%a.out # Standard output and error log #SBATCH --error=%x_%A_%a.err # Error log #SBATCH --account=srlab #SBATCH --partition=ckpt #update this line - use hyakalloc to find partitions you can use #SBATCH --time=04-02:00:00 #SBATCH --ntasks=1 # Run a single task #SBATCH --cpus-per-task=30 # Number of CPU cores per task #SBATCH --array=0-47 # Array range (adjust based on the number of samples) #SBATCH --mem=100G # Memory per node #SBATCH --chdir=/gscratch/scrubbed/sr320/github/project-mytilus-methylation/output/02-bismark-klone-array/ # Exit script if any command fails #set -e # Get most recent container git hash git_commit_hash=$(find /gscratch/srlab/containers/ \ -name "srlab-bioinformatics-container*" \ -printf "%T+ %p\n" \ | sort -n \ | awk -F[-.] 'NR == 1 {print $7}') # Load modules module load apptainer # Execute Roberts Lab bioinformatics container # Binds home directory # Binds /gscratch directory # Directory bindings allow outputs to be written to the hard drive. apptainer exec \ --home "$PWD" \ --bind /mmfs1/home/ \ --bind /mmfs1/gscratch/ \ /gscratch/srlab/containers/srlab-bioinformatics-container-"${git_commit_hash}".sif # Set directories and files reads_dir="/gscratch/scrubbed/sr320/github/project-mytilus-methylation/data/raw-wgbs/" #bismark_dir="/path/to/bismark/" #bowtie2_dir="/path/to/bowtie2/" genome_folder="/gscratch/scrubbed/sr320/github/project-mytilus-methylation/output/01-bismark-init/" output_dir="/gscratch/scrubbed/sr320/github/project-mytilus-methylation/output/02-bismark-klone-array/" checkpoint_file="/gscratch/scrubbed/sr320/github/project-mytilus-methylation/output/02-bismark-klone-array/completed_samples.log" # Create the checkpoint file if it doesn't exist touch ${checkpoint_file} # Get the list of sample files and corresponding sample names files=(${reads_dir}*_1.fastq.gz) file=${files[$SLURM_ARRAY_TASK_ID]} sample_name=$(basename "$file" _1.fastq.gz) # Check if the sample has already been processed if grep -q "^${sample_name}$" ${checkpoint_file}; then echo "Sample ${sample_name} already processed. Skipping..." exit 0 fi # Define log files for stdout and stderr stdout_log="${output_dir}${sample_name}_stdout.log" stderr_log="${output_dir}${sample_name}_stderr.log" # Run Bismark for this sample bismark \ -genome ${genome_folder} \ -p 30 \ -score_min L,0,-0.6 \ --non_directional \ -1 ${reads_dir}${sample_name}_1.fastq.gz \ -2 ${reads_dir}${sample_name}_2.fastq.gz \ -o ${output_dir} \ > ${stdout_log} 2> ${stderr_log} # Check if the command was successful if [ $? -eq 0 ]; then # Append the sample name to the checkpoint file echo ${sample_name} >> ${checkpoint_file} echo "Sample ${sample_name} processed successfully." else echo "Sample ${sample_name} failed. Check ${stderr_log} for details." fi