--- title: "14-BWA" author: "Steven Roberts" date: "`r format(Sys.time(), '%d %B, %Y')`" output: github_document: toc: true toc_depth: 3 number_sections: true html_preview: true html_document: theme: readable highlight: zenburn toc: true toc_float: true number_sections: true code_folding: show code_download: true editor_options: markdown: wrap: sentence --- ```{r setup, include=FALSE} library(knitr) library(tidyverse) library(kableExtra) library(DT) library(Biostrings) library(tm) library(pheatmap) library(DESeq2) knitr::opts_chunk$set( echo = TRUE, # Display code chunks eval = FALSE, # Evaluate code chunks warning = FALSE, # Hide warnings message = FALSE, # Hide messages fig.width = 6, # Set plot width in inches fig.height = 4, # Set plot height in inches fig.align = "center", # Align plots to the center comment = "" # Prevents appending '##' to beginning of lines in code output ) ``` # Genome ```{bash} cd ../data curl -O https://owl.fish.washington.edu/halfshell/genomic-databank/GCF_031168955.1_ASM3116895v1_genomic.fna ``` ```{bash} bwa index ../data/GCF_031168955.1_ASM3116895v1_genomic.fna ``` # Reads - getting on raven ecotype ```{bash} mkdir -p ../data/fastq-ecotype cd ../data/fastq-ecotype wget -r -np -nd -A "*.gz" https://owl.fish.washington.edu/nightingales/G_macrocephalus/30-1149634506/00_fastq/ ``` # ALign ```{bash} # Path to reference genome (must be indexed with bwa index) REFERENCE="../data/GCF_031168955.1_ASM3116895v1_genomic.fna" # Number of threads per BWA job THREADS=42 # Directory containing fastq files FASTQ_DIR="../data/fastq-ecotype" # Output directory (you can change this) OUT_DIR="../output/14-BWA" # File suffixes R1_SUFFIX="_R1_001.fastq.gz" R2_SUFFIX="_R2_001.fastq.gz" OUT_SUFFIX=".sam" # ------------------------------- # RUN BWA IN PARALLEL # ------------------------------- # Run loop for sample in $(ls "${FASTQ_DIR}"/*"${R1_SUFFIX}" | xargs -n 1 basename | sed "s/${R1_SUFFIX}//" | sort -u); do R1_FILE="${FASTQ_DIR}/${sample}${R1_SUFFIX}" R2_FILE="${FASTQ_DIR}/${sample}${R2_SUFFIX}" OUT_FILE="${OUT_DIR}/${sample}${OUT_SUFFIX}" echo "Running BWA on ${sample}..." bwa mem -t "${THREADS}" "${REFERENCE}" "${R1_FILE}" "${R2_FILE}" > "${OUT_FILE}" done ```