--- title: "19-bismark-fin" author: "Steven Roberts" date: "`r format(Sys.time(), '%d %B, %Y')`" output: github_document: toc: true toc_depth: 3 number_sections: true html_preview: true html_document: theme: readable highlight: zenburn toc: true toc_float: true number_sections: true code_folding: show code_download: true editor_options: markdown: wrap: sentence --- ```{r setup, include=FALSE} library(knitr) knitr::opts_chunk$set( echo = TRUE, eval = TRUE, warning = FALSE, message = FALSE, fig.width = 6, fig.height = 4, fig.align = "center", comment = "" ) ``` ### Fill in these URLs first - Add one sample's forward and reverse read URLs and the genome FASTA URL. - Reads and genome will be saved under `../data/19-bismark-fin/`. - All outputs will be written to `../output/19-bismark-fin/`. ```{bash} # User-editable inputs READ1_URL="https://owl.fish.washington.edu/nightingales/G_macrocephalus/30-1067895835/1D11_R1_001.fastq.gz" # e.g., https://.../sample_R1.fastq.gz READ2_URL="https://owl.fish.washington.edu/nightingales/G_macrocephalus/30-1067895835/1D11_R2_001.fastq.gz" # e.g., https://.../sample_R2.fastq.gz GENOME_URL="https://gannet.fish.washington.edu/v1_web/owlshell/bu-github/project-cod-temperature/data/GCF_031168955.1_ASM3116895v1_genomic.fa" # e.g., https://.../reference.fna.gz or .fa/.fna # Optional: name used for outputs (no spaces) SAMPLE="mysample" # Compute resources THREADS=42 # Derived directories and environment file DATA_DIR="../data/19-bismark-fin" READS_DIR="${DATA_DIR}/reads" GENOME_DIR="${DATA_DIR}/genome" OUT_DIR="../output/19-bismark-fin" ENV_FILE="${DATA_DIR}/env.sh" mkdir -p "${READS_DIR}" "${GENOME_DIR}" "${OUT_DIR}" cat > "${ENV_FILE}" < "${GENOME_DIR}/genome.fna" else cp "${GENOME_FILE_ORIG}" "${GENOME_DIR}/genome.fna" fi ls -lh "${GENOME_DIR}" ``` ### Bismark genome preparation ```{bash} source "../data/19-bismark-fin/env.sh" bismark_genome_preparation \ --verbose \ --parallel ${THREADS} \ "${GENOME_DIR}" ``` ### Align paired-end reads with Bismark ```{bash} source "../data/19-bismark-fin/env.sh" bismark \ -genome "${GENOME_DIR}" \ -1 "${READS_DIR}/${R1_NAME}" \ -2 "${READS_DIR}/${R2_NAME}" \ -p ${THREADS} \ -o "${OUT_DIR}" \ --basename "${SAMPLE}" \ --score_min L,0,-0.8 ls -lh "${OUT_DIR}" ``` ### Deduplicate paired-end BAM ```{bash} source "../data/19-bismark-fin/env.sh" # Bismark names paired-end BAM as ${SAMPLE}_pe.bam when --basename is used deduplicate_bismark \ --bam \ --paired \ --output_dir "${OUT_DIR}" \ "${OUT_DIR}/${SAMPLE}_pe.bam" ls -lh "${OUT_DIR}" | grep "${SAMPLE}" ``` ### Sort and index deduplicated BAM ```{bash} source "../data/19-bismark-fin/env.sh" DEDUP_BAM="${OUT_DIR}/${SAMPLE}_pe.deduplicated.bam" SORTED_BAM="${OUT_DIR}/${SAMPLE}_pe.deduplicated.sorted.bam" samtools sort -@ ${THREADS} -o "${SORTED_BAM}" "${DEDUP_BAM}" samtools index "${SORTED_BAM}" ``` ### Extract methylation calls and generate reports ```{bash} source "../data/19-bismark-fin/env.sh" bismark_methylation_extractor \ --gzip \ --bedGraph \ --counts \ --comprehensive \ --multicore ${THREADS} \ --paired-end \ -o "${OUT_DIR}" \ "${SORTED_BAM}" # Create Bismark HTML reports and summary inside OUT_DIR ( cd "${OUT_DIR}" && \ bismark2report && \ bismark2summary ) ls -lh "${OUT_DIR}" ``` ### Optional: coverage2cytosine (merge CpG context) ```{bash} source "../data/19-bismark-fin/env.sh" mkdir -p "${OUT_DIR}/coverage2cytosine" coverage2cytosine \ --merge_CpG \ --genome_folder "${GENOME_DIR}" \ --dir "${OUT_DIR}/coverage2cytosine" \ "${OUT_DIR}"/*.bismark.cov.gz ls -lh "${OUT_DIR}/coverage2cytosine" ``` ### Notes - Ensure `bismark`, `bowtie2`, `samtools`, and related tools are available in your environment. - Update `THREADS` to match your system. - The key outputs are in `../output/19-bismark-fin/`, including BAMs, coverage files, and HTML reports.