#!/usr/bin/env bash
set -euo pipefail

# Wrapper to sweep multiple --score_min values for nf-core/methylseq using a single invocation.
# Each score_min triggers its own pipeline run with a distinct outdir and report filenames.

###############################################################################
# CONFIGURABLE SECTION (override via env vars when calling this script)       #
###############################################################################
SAMPLESHEET=${SAMPLESHEET:-/mmfs1/gscratch/scrubbed/sr320/github/project-caligus-methylation/code/05.samplesheet.csv}
REFERENCE_FASTA=${REFERENCE_FASTA:-/mmfs1/gscratch/scrubbed/sr320/github/project-caligus-methylation/data/GCA_013387185.1_ASM1338718v1_genomic.fa}
BASE_OUTDIR=${BASE_OUTDIR:-/mmfs1/gscratch/scrubbed/sr320/github/project-caligus-methylation/output}
CONFIG_FILE=${CONFIG_FILE:-/mmfs1/gscratch/scrubbed/sr320/github/project-caligus-methylation/code/05.config}

# Space-separated list of score_min values to test. Accepts commas inside values; wrap in quotes when overriding.
SCORE_MIN_VALUES=${SCORE_MIN_VALUES:-"L,0,-0.2 L,0,-0.4 L,0,-0.6 L,0,-0.8"}

# Additional user-supplied extra args passed verbatim to all pipeline runs (optional)
EXTRA_ARGS=${EXTRA_ARGS:-""}

###############################################################################
# ENV / CACHES                                                                 #
###############################################################################
BASE_SCRATCH=/mmfs1/gscratch/scrubbed/${USER}
export NXF_HOME="${BASE_SCRATCH}/.nextflow"           # Nextflow runtime & plugin cache
export NXF_TEMP="${BASE_SCRATCH}/tmp"                # General temp space
export APPTAINER_CACHEDIR="${BASE_SCRATCH}/singularity-cache"  # Image cache
export SINGULARITY_CACHEDIR="$APPTAINER_CACHEDIR"    # Backward compat
export NXF_SINGULARITY_CACHEDIR="$APPTAINER_CACHEDIR"

mkdir -p "$NXF_HOME" "$NXF_TEMP" "$APPTAINER_CACHEDIR" "$BASE_OUTDIR"

###############################################################################
# VALIDATION                                                                  #
###############################################################################
if [[ ! -s "$SAMPLESHEET" ]]; then
	echo "ERROR: Samplesheet not found or empty: $SAMPLESHEET" >&2
	exit 1
fi
if [[ ! -s "$REFERENCE_FASTA" ]]; then
	echo "ERROR: Reference FASTA not found: $REFERENCE_FASTA" >&2
	exit 1
fi
if [[ ! -s "$CONFIG_FILE" ]]; then
	echo "ERROR: Config file not found: $CONFIG_FILE" >&2
	exit 1
fi

echo "Starting score_min sweep: $SCORE_MIN_VALUES" | sed 's/ /, /g'
echo "Samplesheet: $SAMPLESHEET"
echo "Reference : $REFERENCE_FASTA"
echo "Config    : $CONFIG_FILE"
echo "Base out  : $BASE_OUTDIR"
[[ -n "$EXTRA_ARGS" ]] && echo "Extra args: $EXTRA_ARGS"

###############################################################################
# LOOP                                                                        #
###############################################################################
run_idx=0
for SCORE_MIN in $SCORE_MIN_VALUES; do
	((run_idx++)) || true
	# Sanitize score_min for directory naming: remove commas, periods, hyphens, plus signs
	tag=$(echo "$SCORE_MIN" | tr -d ',.+' | sed 's/--*/_/g')
	OUTDIR="$BASE_OUTDIR/05-scoremin-${tag}"
	mkdir -p "$OUTDIR"

	echo "\n[Run ${run_idx}] score_min=$SCORE_MIN -> $OUTDIR" | tee "$OUTDIR/run.log"

	# Unique report names per run
	REPORT_FILE="$OUTDIR/nf_report.html"
	TIMELINE_FILE="$OUTDIR/nf_timeline.html"
	TRACE_FILE="$OUTDIR/pipeline_trace.txt"

	nextflow run nf-core/methylseq \
		-c "$CONFIG_FILE" \
		--input "$SAMPLESHEET" \
		--outdir "$OUTDIR" \
		--fasta "$REFERENCE_FASTA" \
		--score_min "$SCORE_MIN" \
		--nomeseq \
		-with-report "$REPORT_FILE" \
		-with-timeline "$TIMELINE_FILE" \
		-with-trace "$TRACE_FILE" \
		-resume \
		$EXTRA_ARGS | tee -a "$OUTDIR/run.log"

	echo "[Run ${run_idx}] Completed score_min=$SCORE_MIN" | tee -a "$OUTDIR/run.log"
done

echo "All runs complete. Caches located under $BASE_SCRATCH (NXF_HOME, tmp, singularity-cache)."
