params { config_profile_description = 'UW Hyak Roberts labs cluster profile provided by nf-core/configs.' config_profile_contact = 'Steven Roberts' config_profile_url = 'https://faculty.washington.edu/sr320/' /* * Desired downsample size per FASTQ (paired => each file limited to this many READS, i.e. 100k lines /4). * NOTE: nf-core/methylseq does NOT have a native param for subsetting; create 100k-read FASTQs prior to run * (see usage notes at bottom of file). This value is here for documentation and potential templating. */ subset_reads = 100000 /* * List of Bismark --score_min settings to evaluate. The pipeline itself executes a single value per run. * Loop over these externally (see usage notes). Typical form: L,0,-X where X controls stringency. */ score_min_values = [ 'L,0,-0.2', 'L,0,-0.4', 'L,0,-0.6', 'L,0,-0.8' ] /* * Single score_min value actually applied for this run. Override at runtime: * --score_min 'L,0,-0.4' * Defaults to the most stringent (-0.8) in the list above. */ score_min = 'L,0,-0.8' } process { errorStrategy = 'retry' maxSubmitAwait = '60 min' maxRetries = 2 executor = 'slurm' queue = { task.attempt < 4 ? (task.attempt < 3 ? 'ckpt-all' : 'cpu-g2' ) : 'cpu-g2-mem2x' } clusterOptions = { task.attempt < 4 ? (task.attempt < 3 ? "-A srlab" : "-A coenv" ) : "-A srlab" } // Use user-specific scratch on gscratch to avoid writing large temps to $HOME scratch = "/mmfs1/gscratch/scrubbed/${System.getenv('USER')}/nf-scratch" resourceLimits = [ cpus: 48, memory: '400.GB', time: '72.h' ] withName: preseq { errorStrategy = 'ignore' } withName: BISMARK_ALIGN { cpus = 32 memory = '100.GB' time = '48.h' /* * We pass a single --score_min derived from params.score_min. Run the pipeline multiple times with different * values to compare (see bottom usage notes). Example override: --score_min 'L,0,-0.4' */ ext.args = [ "--score_min ${params.score_min}".toString(), ].join(' ') ext.prefix = { "${meta.id}_scmin_${params.score_min.replaceAll('[^0-9]+','')}" } } withName: BISMARK_METHYLATIONEXTRACTOR { ext.args = [ '--merge_non_CpG', '--comprehensive', '--multicore 24', '--buffer_size 75%' ].join(' ') ext.prefix = { "${meta.id}" } } withName: BISMARK_COVERAGE2CYTOSINE { ext.args = [ '--merge_CpG', '--zero_based' ].join(' ') ext.prefix = { "${meta.id}" } } } executor { queuesize = 50 submitRateLimit = '1 sec' } singularity { enabled = true autoMounts = true // Redirect Apptainer/Singularity image cache off $HOME cacheDir = "/mmfs1/gscratch/scrubbed/${System.getenv('USER')}/singularity-cache" } trace { enabled = true file = 'pipeline_trace.txt' fields = 'task_id,hash,name,status,exit,submit,duration,realtime,%cpu,rss,vmem,rchar,wchar,disk,queue,hostname,cpu_model' } debug { cleanup = false } /* ------------------------------------------------------------------------- USAGE NOTES FOR SCORE_MIN SWEEP + READ SUBSETTING ----------------------------------------------------------------------------- 1. Create 100k-read subsets (per mate) once (paired example using seqtk): for fq in data/*_R1.fastq.gz; do \ base=$(basename "$fq" _R1.fastq.gz); \ seqtk sample -s100 $fq 100000 | gzip > data/subset/${base}_R1_100k.fastq.gz; \ seqtk sample -s100 data/${base}_R2.fastq.gz 100000 | gzip > data/subset/${base}_R2_100k.fastq.gz; \ done (Make sure to mkdir -p data/subset first.) 2. Prepare a samplesheet pointing to the subset FASTQs (e.g. 05.samplesheet.csv) and run one pipeline execution per score: for s in L,0,-0.2 L,0,-0.4 L,0,-0.6 L,0,-0.8; do \ nextflow run nf-core/methylseq \ -profile \ -c code/05.config \ --input code/05.samplesheet.csv \ --outdir output/05-scoremin-${s//[,.-]/} \ --score_min "$s"; \ done 3. Results will be separated by outdir; prefixes include the numeric portion of score_min for clarity. 4. Compare alignment rates and methylation metrics across score_min thresholds (MultiQC, Bismark reports). This block is purely informational; it does not alter pipeline execution. ----------------------------------------------------------------------------*/