library(tidyverse)
library(dplyr)
library(magrittr)
library(knitr)
library(ggplot2)
library(plotly)

Code for aligning RNAseq data to reference transcriptome/genome, to be used on Pacific cod RNAseq data.

  • trimmed reads generated in 05-cod-RNAseq-trimming
  • Transcriptome downloaded from NCBI, stored here as a part of lab genomic resources

Note: Kallisto pseudoalignment doesn’t necessarily require input reads to be trimmed, provided they are of sufficient quality.

1 Create a Bash variables file

This allows usage of Bash variables (e.g. paths to common directories) across R Markdown chunks.

{
echo "#### Assign Variables ####"
echo ""

echo "# Data directories"
echo 'export cod_dir=/home/shared/8TB_HDD_02/shedurkin/project-cod-temperature'
echo 'export output_dir_top=${cod_dir}/output/06-cod-RNAseq-alignment'
echo 'export trimmed_fastqc_dir=${cod_dir}/output/05-cod-RNAseq-trimming/trimmed-fastqc'
echo 'export trimmed_reads_dir=${cod_dir}/output/05-cod-RNAseq-trimming/trimmed-reads'
echo 'export kallisto_output_dir=${output_dir_top}/kallisto'
echo ""


echo "# Input/Output files"
echo 'export transcriptome_fasta_dir=${cod_dir}/data'
echo 'export transcriptome_fasta_name="GCF_031168955.1_ASM3116895v1_rna"'
echo 'export transcriptome_fasta="${transcriptome_fasta_dir}/${transcriptome_fasta_name}"'
echo 'export kallisto_index_name="G_macrocephalus_kallisto_index.idx"'


echo "# External data URLs and checksums"
echo 'export transcriptome_fasta_url="https://owl.fish.washington.edu/halfshell/genomic-databank/GCF_031168955.1_ASM3116895v1_rna.fna"'
echo 'export transcriptome_checksum="2a6c7c98982727e688f033a9b236725b"'
echo ""


echo "# Paths to programs"
echo 'export kallisto=/home/shared/kallisto/kallisto'
echo 'export multiqc=/home/sam/programs/mambaforge/bin/multiqc'
echo 'export trinity_abund_to_matrix=/home/shared/trinityrnaseq-v2.12.0/util/abundance_estimates_to_matrix.pl'
echo ""


echo "# Set number of CPUs to use"
echo 'export threads=20'
echo ""


echo "# Programs associative array"
echo "declare -A programs_array"
echo "programs_array=("
echo '[kallisto]="${kallisto}" \'
echo '[multiqc]="${multiqc}" \'
echo '[trinity_abund_to_matrix]="${trinity_abund_to_matrix}" \'
echo ")"
} > .bashvars

cat .bashvars
#### Assign Variables ####

# Data directories
export cod_dir=/home/shared/8TB_HDD_02/shedurkin/project-cod-temperature
export output_dir_top=${cod_dir}/output/06-cod-RNAseq-alignment
export trimmed_fastqc_dir=${cod_dir}/output/05-cod-RNAseq-trimming/trimmed-fastqc
export trimmed_reads_dir=${cod_dir}/output/05-cod-RNAseq-trimming/trimmed-reads
export kallisto_output_dir=${output_dir_top}/kallisto

# Input/Output files
export transcriptome_fasta_dir=${cod_dir}/data
export transcriptome_fasta_name="GCF_031168955.1_ASM3116895v1_rna"
export transcriptome_fasta="${transcriptome_fasta_dir}/${transcriptome_fasta_name}"
export kallisto_index_name="G_macrocephalus_kallisto_index.idx"
# External data URLs and checksums
export transcriptome_fasta_url="https://owl.fish.washington.edu/halfshell/genomic-databank/GCF_031168955.1_ASM3116895v1_rna.fna"
export transcriptome_checksum="2a6c7c98982727e688f033a9b236725b"

# Paths to programs
export kallisto=/home/shared/kallisto/kallisto
export multiqc=/home/sam/programs/mambaforge/bin/multiqc
export trinity_abund_to_matrix=/home/shared/trinityrnaseq-v2.12.0/util/abundance_estimates_to_matrix.pl

# Set number of CPUs to use
export threads=20

# Programs associative array
declare -A programs_array
programs_array=(
[kallisto]="${kallisto}" \
[multiqc]="${multiqc}" \
[trinity_abund_to_matrix]="${trinity_abund_to_matrix}" \
)

I will be running kallisto with trimmed/QCd reads

2 Align to reference transcriptome (Kallisto pseudoalignment)

2.1 Retrieving the reference transcriptome

# Load bash variables into memory
source .bashvars

wget \
--directory-prefix ${transcriptome_fasta_dir} \
--recursive \
--no-check-certificate \
--continue \
--no-host-directories \
--no-directories \
--no-parent \
--quiet \
--execute robots=off \
--accept "${transcriptome_fasta_name}.fna" ${transcriptome_fasta_url}
# Load bash variables into memory
source .bashvars

ls -lh "${transcriptome_fasta_dir}"
total 5.2G
drwxr-xr-x 3 shedurkin labmembers 4.0K Mar  4 11:05 05-cod-RNAseq-trimming
-rw-r--r-- 1 shedurkin labmembers  13K Dec 27 15:45 Cod_RNAseq_NGS_Template_File.xlsx
-rw-r--r-- 1 shedurkin labmembers  253 May 22 11:36 conditions.txt
-rw-r--r-- 1 shedurkin labmembers 2.1K Mar 20 20:55 DESeq2_Sample_Information.csv
-rw-r--r-- 1 shedurkin labmembers  38M Oct 25  2023 Gadus_macrocephalus.coding.gene.V1.cds
-rw-r--r-- 1 shedurkin labmembers 537M Oct 16  2023 GCF_031168955.1_ASM3116895v1_genomic.fna
-rw-r--r-- 1 shedurkin labmembers  875 May  7 14:05 GCF_031168955.1_ASM3116895v1_genomic.fna.fai
-rw-r--r-- 1 shedurkin labmembers 351M Oct 16  2023 GCF_031168955.1_ASM3116895v1.gff
-rw-r--r-- 1 shedurkin labmembers 169M Oct 16  2023 GCF_031168955.1_ASM3116895v1_rna.fna
-rw-r--r-- 1 shedurkin labmembers 404M Apr 23 14:29 genomic.gtf
-rw-r--r-- 1 shedurkin labmembers 1.5G May  8 15:44 Gmac_genes_fasta.fasta
-rw-r--r-- 1 shedurkin labmembers 1.5G May  8 16:14 Gmac_genes_fasta.tab
-rw-r--r-- 1 shedurkin labmembers 1.3K May 22 11:36 list01.txt
-rw-r--r-- 1 shedurkin labmembers  47K Oct 25  2023 Pcod Temp Growth experiment 2022-23 DATA.xlsx
-rw-r--r-- 1 shedurkin labmembers 1.6K May 22 11:36 README.md
-rw-r--r-- 1 shedurkin labmembers 231K Mar  4 17:41 Sample.QC.report.of_30-943133806_240118025106.pdf
-rw-r--r-- 1 shedurkin labmembers  12K Mar  4 17:41 Sample.QC.report.of_30-943133806_240118025106.xlsx
-rw-r--r-- 1 shedurkin labmembers  12K Oct 25  2023 temp-experiment.csv
-rw-r--r-- 1 shedurkin labmembers 271M Oct 25  2023 uniprot_sprot_r2023_04.fasta
-rw-r--r-- 1 shedurkin labmembers  88M May  8 10:57 uniprot_sprot_r2023_04.fasta.gz
-rw-r--r-- 1 shedurkin labmembers 415M May  8 10:57 uniprot_table_r2023_01.tab

2.2 Verify transcriptome FastA MD5 checksum

# Load bash variables into memory
source .bashvars

cd "${transcriptome_fasta_dir}"

# Checksums file contains other files, so this just looks for the sRNAseq files.
md5sum --check <<< "${transcriptome_checksum}  ${transcriptome_fasta_name}.fna"
GCF_031168955.1_ASM3116895v1_rna.fna: OK

2.3 Building Index

# Load bash variables into memory
source .bashvars

cd "${kallisto_output_dir}"

${programs_array[kallisto]} index \
--threads=${threads} \
--index="${kallisto_index_name}" \
"${transcriptome_fasta}.fna"
# Load bash variables into memory
source .bashvars

ls -lh ${kallisto_output_dir}
total 1.5G
-rw-r--r-- 1 shedurkin labmembers 1.5G Mar 18 16:08 G_macrocephalus_kallisto_index.idx
-rw-r--r-- 1 shedurkin labmembers  20M Jun 10 16:12 kallisto.isoform.counts.matrix
-rw-r--r-- 1 shedurkin labmembers    0 Jun 10 16:12 kallisto.isoform.TMM.EXPR.matrix
-rw-r--r-- 1 shedurkin labmembers  25M Jun 10 16:12 kallisto.isoform.TPM.not_cross_norm
-rw-r--r-- 1 shedurkin labmembers  532 Jun 10 16:12 kallisto.isoform.TPM.not_cross_norm.runTMM.R
drwxr-xr-x 2 shedurkin labmembers 4.0K Jun 10 13:50 kallisto_quant_1
drwxr-xr-x 2 shedurkin labmembers 4.0K Jun 10 12:43 kallisto_quant_10
drwxr-xr-x 2 shedurkin labmembers 4.0K Jun 10 12:34 kallisto_quant_100
-rw-r--r-- 1 shedurkin labmembers 5.3K Jun 10 12:35 kallisto_quant_100.log
drwxr-xr-x 2 shedurkin labmembers 4.0K Jun 10 12:36 kallisto_quant_107
-rw-r--r-- 1 shedurkin labmembers 5.3K Jun 10 12:36 kallisto_quant_107.log
drwxr-xr-x 2 shedurkin labmembers 4.0K Jun 10 12:38 kallisto_quant_108
-rw-r--r-- 1 shedurkin labmembers 5.3K Jun 10 12:39 kallisto_quant_108.log
drwxr-xr-x 2 shedurkin labmembers 4.0K Jun 10 12:41 kallisto_quant_109
-rw-r--r-- 1 shedurkin labmembers 5.3K Jun 10 12:41 kallisto_quant_109.log
-rw-r--r-- 1 shedurkin labmembers 5.3K Jun 10 12:43 kallisto_quant_10.log
drwxr-xr-x 2 shedurkin labmembers 4.0K Jun 10 12:54 kallisto_quant_11
drwxr-xr-x 2 shedurkin labmembers 4.0K Jun 10 12:45 kallisto_quant_110
-rw-r--r-- 1 shedurkin labmembers 5.3K Jun 10 12:45 kallisto_quant_110.log
drwxr-xr-x 2 shedurkin labmembers 4.0K Jun 10 12:47 kallisto_quant_117
-rw-r--r-- 1 shedurkin labmembers 5.3K Jun 10 12:48 kallisto_quant_117.log
drwxr-xr-x 2 shedurkin labmembers 4.0K Jun 10 12:49 kallisto_quant_118
-rw-r--r-- 1 shedurkin labmembers 5.3K Jun 10 12:50 kallisto_quant_118.log
drwxr-xr-x 2 shedurkin labmembers 4.0K Jun 10 12:52 kallisto_quant_119
-rw-r--r-- 1 shedurkin labmembers 5.3K Jun 10 12:52 kallisto_quant_119.log
-rw-r--r-- 1 shedurkin labmembers 5.3K Jun 10 12:55 kallisto_quant_11.log
drwxr-xr-x 2 shedurkin labmembers 4.0K Jun 10 13:06 kallisto_quant_12
drwxr-xr-x 2 shedurkin labmembers 4.0K Jun 10 12:56 kallisto_quant_120
-rw-r--r-- 1 shedurkin labmembers 5.3K Jun 10 12:57 kallisto_quant_120.log
drwxr-xr-x 2 shedurkin labmembers 4.0K Jun 10 12:59 kallisto_quant_121
-rw-r--r-- 1 shedurkin labmembers 5.3K Jun 10 12:59 kallisto_quant_121.log
drwxr-xr-x 2 shedurkin labmembers 4.0K Jun 10 13:01 kallisto_quant_127
-rw-r--r-- 1 shedurkin labmembers 5.3K Jun 10 13:02 kallisto_quant_127.log
drwxr-xr-x 2 shedurkin labmembers 4.0K Jun 10 13:03 kallisto_quant_128
-rw-r--r-- 1 shedurkin labmembers 5.3K Jun 10 13:03 kallisto_quant_128.log
drwxr-xr-x 2 shedurkin labmembers 4.0K Jun 10 13:04 kallisto_quant_129
-rw-r--r-- 1 shedurkin labmembers 5.3K Jun 10 13:04 kallisto_quant_129.log
-rw-r--r-- 1 shedurkin labmembers 5.3K Jun 10 13:07 kallisto_quant_12.log
drwxr-xr-x 2 shedurkin labmembers 4.0K Jun 10 13:15 kallisto_quant_13
drwxr-xr-x 2 shedurkin labmembers 4.0K Jun 10 13:08 kallisto_quant_131
-rw-r--r-- 1 shedurkin labmembers 5.3K Jun 10 13:08 kallisto_quant_131.log
drwxr-xr-x 2 shedurkin labmembers 4.0K Jun 10 13:09 kallisto_quant_137
-rw-r--r-- 1 shedurkin labmembers 5.3K Jun 10 13:09 kallisto_quant_137.log
drwxr-xr-x 2 shedurkin labmembers 4.0K Jun 10 13:11 kallisto_quant_138
-rw-r--r-- 1 shedurkin labmembers 5.3K Jun 10 13:11 kallisto_quant_138.log
drwxr-xr-x 2 shedurkin labmembers 4.0K Jun 10 13:12 kallisto_quant_139
-rw-r--r-- 1 shedurkin labmembers 5.3K Jun 10 13:13 kallisto_quant_139.log
-rw-r--r-- 1 shedurkin labmembers 5.3K Jun 10 13:15 kallisto_quant_13.log
drwxr-xr-x 2 shedurkin labmembers 4.0K Jun 10 13:16 kallisto_quant_140
-rw-r--r-- 1 shedurkin labmembers 5.3K Jun 10 13:17 kallisto_quant_140.log
drwxr-xr-x 2 shedurkin labmembers 4.0K Jun 10 13:18 kallisto_quant_147
-rw-r--r-- 1 shedurkin labmembers 5.3K Jun 10 13:18 kallisto_quant_147.log
drwxr-xr-x 2 shedurkin labmembers 4.0K Jun 10 13:20 kallisto_quant_148
-rw-r--r-- 1 shedurkin labmembers 5.3K Jun 10 13:21 kallisto_quant_148.log
drwxr-xr-x 2 shedurkin labmembers 4.0K Jun 10 13:29 kallisto_quant_149
-rw-r--r-- 1 shedurkin labmembers 5.3K Jun 10 13:31 kallisto_quant_149.log
drwxr-xr-x 2 shedurkin labmembers 4.0K Jun 10 13:34 kallisto_quant_150
-rw-r--r-- 1 shedurkin labmembers 5.3K Jun 10 13:34 kallisto_quant_150.log
drwxr-xr-x 2 shedurkin labmembers 4.0K Jun 10 13:36 kallisto_quant_18
-rw-r--r-- 1 shedurkin labmembers 5.3K Jun 10 13:37 kallisto_quant_18.log
drwxr-xr-x 2 shedurkin labmembers 4.0K Jun 10 13:39 kallisto_quant_19
drwxr-xr-x 2 shedurkin labmembers 4.0K Jun 10 13:42 kallisto_quant_19-G
-rw-r--r-- 1 shedurkin labmembers 5.3K Jun 10 13:43 kallisto_quant_19-G.log
-rw-r--r-- 1 shedurkin labmembers 5.3K Jun 10 13:40 kallisto_quant_19.log
drwxr-xr-x 2 shedurkin labmembers 4.0K Jun 10 13:46 kallisto_quant_19-S
-rw-r--r-- 1 shedurkin labmembers 5.3K Jun 10 13:47 kallisto_quant_19-S.log
-rw-r--r-- 1 shedurkin labmembers 5.3K Jun 10 13:51 kallisto_quant_1.log
drwxr-xr-x 2 shedurkin labmembers 4.0K Jun 10 14:11 kallisto_quant_2
drwxr-xr-x 2 shedurkin labmembers 4.0K Jun 10 13:53 kallisto_quant_20
drwxr-xr-x 2 shedurkin labmembers 4.0K Jun 10 13:56 kallisto_quant_20-G
-rw-r--r-- 1 shedurkin labmembers 5.3K Jun 10 13:57 kallisto_quant_20-G.log
-rw-r--r-- 1 shedurkin labmembers 5.3K Jun 10 13:53 kallisto_quant_20.log
drwxr-xr-x 2 shedurkin labmembers 4.0K Jun 10 13:59 kallisto_quant_20-S
-rw-r--r-- 1 shedurkin labmembers 5.3K Jun 10 14:00 kallisto_quant_20-S.log
drwxr-xr-x 2 shedurkin labmembers 4.0K Jun 10 14:02 kallisto_quant_21
-rw-r--r-- 1 shedurkin labmembers 5.3K Jun 10 14:03 kallisto_quant_21.log
drwxr-xr-x 2 shedurkin labmembers 4.0K Jun 10 14:05 kallisto_quant_28
-rw-r--r-- 1 shedurkin labmembers 5.3K Jun 10 14:06 kallisto_quant_28.log
drwxr-xr-x 2 shedurkin labmembers 4.0K Jun 10 14:08 kallisto_quant_29
-rw-r--r-- 1 shedurkin labmembers 5.3K Jun 10 14:09 kallisto_quant_29.log
-rw-r--r-- 1 shedurkin labmembers 5.3K Jun 10 14:12 kallisto_quant_2.log
drwxr-xr-x 2 shedurkin labmembers 4.0K Jun 10 14:28 kallisto_quant_3
drwxr-xr-x 2 shedurkin labmembers 4.0K Jun 10 14:14 kallisto_quant_30
-rw-r--r-- 1 shedurkin labmembers 5.3K Jun 10 14:15 kallisto_quant_30.log
drwxr-xr-x 2 shedurkin labmembers 4.0K Jun 10 14:17 kallisto_quant_31
-rw-r--r-- 1 shedurkin labmembers 5.3K Jun 10 14:18 kallisto_quant_31.log
drwxr-xr-x 2 shedurkin labmembers 4.0K Jun 10 14:20 kallisto_quant_37
-rw-r--r-- 1 shedurkin labmembers 5.3K Jun 10 14:20 kallisto_quant_37.log
drwxr-xr-x 2 shedurkin labmembers 4.0K Jun 10 14:22 kallisto_quant_38
-rw-r--r-- 1 shedurkin labmembers 5.3K Jun 10 14:23 kallisto_quant_38.log
drwxr-xr-x 2 shedurkin labmembers 4.0K Jun 10 14:25 kallisto_quant_39
-rw-r--r-- 1 shedurkin labmembers 5.3K Jun 10 14:26 kallisto_quant_39.log
-rw-r--r-- 1 shedurkin labmembers 5.3K Jun 10 14:29 kallisto_quant_3.log
drwxr-xr-x 2 shedurkin labmembers 4.0K Jun 10 14:45 kallisto_quant_4
drwxr-xr-x 2 shedurkin labmembers 4.0K Jun 10 14:31 kallisto_quant_40
-rw-r--r-- 1 shedurkin labmembers 5.3K Jun 10 14:32 kallisto_quant_40.log
drwxr-xr-x 2 shedurkin labmembers 4.0K Jun 10 14:33 kallisto_quant_41
-rw-r--r-- 1 shedurkin labmembers 5.3K Jun 10 14:34 kallisto_quant_41.log
drwxr-xr-x 2 shedurkin labmembers 4.0K Jun 10 14:36 kallisto_quant_47
-rw-r--r-- 1 shedurkin labmembers 5.3K Jun 10 14:37 kallisto_quant_47.log
drwxr-xr-x 2 shedurkin labmembers 4.0K Jun 10 14:39 kallisto_quant_48
-rw-r--r-- 1 shedurkin labmembers 5.3K Jun 10 14:39 kallisto_quant_48.log
drwxr-xr-x 2 shedurkin labmembers 4.0K Jun 10 14:42 kallisto_quant_49
-rw-r--r-- 1 shedurkin labmembers 5.3K Jun 10 14:43 kallisto_quant_49.log
-rw-r--r-- 1 shedurkin labmembers 5.3K Jun 10 14:46 kallisto_quant_4.log
drwxr-xr-x 2 shedurkin labmembers 4.0K Jun 10 15:13 kallisto_quant_5
drwxr-xr-x 2 shedurkin labmembers 4.0K Jun 10 14:48 kallisto_quant_50
-rw-r--r-- 1 shedurkin labmembers 5.3K Jun 10 14:49 kallisto_quant_50.log
drwxr-xr-x 2 shedurkin labmembers 4.0K Jun 10 14:51 kallisto_quant_57
drwxr-xr-x 2 shedurkin labmembers 4.0K Jun 10 14:55 kallisto_quant_57-G
-rw-r--r-- 1 shedurkin labmembers 5.3K Jun 10 14:56 kallisto_quant_57-G.log
-rw-r--r-- 1 shedurkin labmembers 5.3K Jun 10 14:51 kallisto_quant_57.log
drwxr-xr-x 2 shedurkin labmembers 4.0K Jun 10 14:57 kallisto_quant_57-S
-rw-r--r-- 1 shedurkin labmembers 5.3K Jun 10 14:58 kallisto_quant_57-S.log
drwxr-xr-x 2 shedurkin labmembers 4.0K Jun 10 15:00 kallisto_quant_58
drwxr-xr-x 2 shedurkin labmembers 4.0K Jun 10 15:04 kallisto_quant_58-G
-rw-r--r-- 1 shedurkin labmembers 5.3K Jun 10 15:06 kallisto_quant_58-G.log
-rw-r--r-- 1 shedurkin labmembers 5.3K Jun 10 15:01 kallisto_quant_58.log
drwxr-xr-x 2 shedurkin labmembers 4.0K Jun 10 15:07 kallisto_quant_58-S
-rw-r--r-- 1 shedurkin labmembers 5.3K Jun 10 15:08 kallisto_quant_58-S.log
drwxr-xr-x 2 shedurkin labmembers 4.0K Jun 10 15:10 kallisto_quant_59
-rw-r--r-- 1 shedurkin labmembers 5.3K Jun 10 15:11 kallisto_quant_59.log
-rw-r--r-- 1 shedurkin labmembers 5.3K Jun 10 15:14 kallisto_quant_5.log
drwxr-xr-x 2 shedurkin labmembers 4.0K Jun 10 15:16 kallisto_quant_60
-rw-r--r-- 1 shedurkin labmembers 5.3K Jun 10 15:17 kallisto_quant_60.log
drwxr-xr-x 2 shedurkin labmembers 4.0K Jun 10 15:19 kallisto_quant_67
-rw-r--r-- 1 shedurkin labmembers 5.3K Jun 10 15:20 kallisto_quant_67.log
drwxr-xr-x 2 shedurkin labmembers 4.0K Jun 10 15:22 kallisto_quant_68
-rw-r--r-- 1 shedurkin labmembers 5.3K Jun 10 15:23 kallisto_quant_68.log
drwxr-xr-x 2 shedurkin labmembers 4.0K Jun 10 15:25 kallisto_quant_69
-rw-r--r-- 1 shedurkin labmembers 5.3K Jun 10 15:26 kallisto_quant_69.log
drwxr-xr-x 2 shedurkin labmembers 4.0K Jun 10 15:27 kallisto_quant_70
-rw-r--r-- 1 shedurkin labmembers 5.3K Jun 10 15:28 kallisto_quant_70.log
drwxr-xr-x 2 shedurkin labmembers 4.0K Jun 10 15:30 kallisto_quant_78
-rw-r--r-- 1 shedurkin labmembers 5.3K Jun 10 15:31 kallisto_quant_78.log
drwxr-xr-x 2 shedurkin labmembers 4.0K Jun 10 15:33 kallisto_quant_79
-rw-r--r-- 1 shedurkin labmembers 5.3K Jun 10 15:33 kallisto_quant_79.log
drwxr-xr-x 2 shedurkin labmembers 4.0K Jun 10 15:35 kallisto_quant_80
-rw-r--r-- 1 shedurkin labmembers 5.3K Jun 10 15:36 kallisto_quant_80.log
drwxr-xr-x 2 shedurkin labmembers 4.0K Jun 10 15:38 kallisto_quant_83
-rw-r--r-- 1 shedurkin labmembers 5.3K Jun 10 15:38 kallisto_quant_83.log
drwxr-xr-x 2 shedurkin labmembers 4.0K Jun 10 15:40 kallisto_quant_88
-rw-r--r-- 1 shedurkin labmembers 5.3K Jun 10 15:41 kallisto_quant_88.log
drwxr-xr-x 2 shedurkin labmembers 4.0K Jun 10 15:43 kallisto_quant_90
-rw-r--r-- 1 shedurkin labmembers 5.3K Jun 10 15:43 kallisto_quant_90.log
drwxr-xr-x 2 shedurkin labmembers 4.0K Jun 10 15:46 kallisto_quant_91
-rw-r--r-- 1 shedurkin labmembers 5.3K Jun 10 15:46 kallisto_quant_91.log
drwxr-xr-x 2 shedurkin labmembers 4.0K Jun 10 15:48 kallisto_quant_92
-rw-r--r-- 1 shedurkin labmembers 5.3K Jun 10 15:48 kallisto_quant_92.log
drwxr-xr-x 2 shedurkin labmembers 4.0K Jun 10 15:50 kallisto_quant_97
-rw-r--r-- 1 shedurkin labmembers 5.3K Jun 10 15:51 kallisto_quant_97.log
drwxr-xr-x 2 shedurkin labmembers 4.0K Jun 10 15:53 kallisto_quant_98
-rw-r--r-- 1 shedurkin labmembers 5.3K Jun 10 15:54 kallisto_quant_98.log
drwxr-xr-x 2 shedurkin labmembers 4.0K Jun 10 15:55 kallisto_quant_99
-rw-r--r-- 1 shedurkin labmembers 5.3K Jun 10 15:56 kallisto_quant_99.log
drwxr-xr-x 2 shedurkin labmembers 4.0K Jun 10 15:58 kallisto_quant_RESUB-116
-rw-r--r-- 1 shedurkin labmembers 5.3K Jun 10 15:58 kallisto_quant_RESUB-116.log
drwxr-xr-x 2 shedurkin labmembers 4.0K Jun 10 16:00 kallisto_quant_RESUB-156
-rw-r--r-- 1 shedurkin labmembers 5.3K Jun 10 16:01 kallisto_quant_RESUB-156.log
drwxr-xr-x 2 shedurkin labmembers 4.0K Jun 10 16:02 kallisto_quant_RESUB-36
-rw-r--r-- 1 shedurkin labmembers 5.3K Jun 10 16:03 kallisto_quant_RESUB-36.log
drwxr-xr-x 2 shedurkin labmembers 4.0K Jun 10 16:04 kallisto_quant_RESUB-76
-rw-r--r-- 1 shedurkin labmembers 5.3K Jun 10 16:05 kallisto_quant_RESUB-76.log
drwxr-xr-x 2 shedurkin labmembers 4.0K Jun 10 16:07 kallisto_quant_RESUB-94
-rw-r--r-- 1 shedurkin labmembers 5.3K Jun 10 16:07 kallisto_quant_RESUB-94.log

2.4 Sample Quantification

Kallisto can run quantification on either single- or paired-end reads. The default option is paired-end, which requires the input of an even number of paired fastq files (e.g., pairA_R1.fastq, pairA_R2.fastq). To use single-end mode, include the –single flag, as well as -l (–fragment-length=DOUBLE, estimated avg. fragment length) and -s (–sd=DOUBLE, estimates stand. dev. of fragment length), and a number of fastq files. Again, gzipped files are acceptable.

Kallisto quant is rather finicky about how you input sets of paired reads, and you can only input a single pair at a time. To circumvent, I’ll create a quantification function and apply it iteratively to each pair of reads using a loop.

# Load bash variables into memory
source .bashvars

# Function to run kallisto quant. Takes two (paired) reads as input, outputs to sample-associated directory
run_kallisto_quant() {
    source .bashvars  # Source .bashvars inside the function to make its variables accessible
    local R1_fastq=${1}
    local R2_fastq=${2}
    
    cd ${kallisto_output_dir}
    sample_num=$(basename "${R1_fastq}" ".flexbar_trim.R_1.fastq.gz")
    mkdir kallisto_quant_${sample_num}

    ${programs_array[kallisto]} quant \
        --threads=${threads} \
        --index="${kallisto_output_dir}/${kallisto_index_name}" \
        --output-dir="${kallisto_output_dir}/kallisto_quant_${sample_num}" \
        --bootstrap-samples=100 \
        ${trimmed_reads_dir}/${R1_fastq} ${trimmed_reads_dir}/${R2_fastq} \
        &> "${kallisto_output_dir}/kallisto_quant_${sample_num}.log"
}



# Iteratively apply run_kallisto_quant on each pair of input reads
for file_r1 in "${trimmed_reads_dir}"/*.flexbar_trim.R_1.fastq.gz; do
    # Extract the sample name from the file name
    sample_name=$(basename "${file_r1}" ".flexbar_trim.R_1.fastq.gz")

    # Form the file names (function takes input file names, not paths)
    file_r1_name="${sample_name}.flexbar_trim.R_1.fastq.gz"
    file_r2_name="${sample_name}.flexbar_trim.R_2.fastq.gz"

    # Check that the sample hasn't already been quantified
    if [ ! -d "${kallisto_output_dir}/kallisto_quant_${sample_name}" ]; then
    
        # Check if the corresponding R2 file exists
        if [ -e "${trimmed_reads_dir}/${file_r2}" ]; then
            # Run kallisto quant on the file pair
            run_kallisto_quant "${file_r1_name}" "${file_r2_name}" 

            echo "Processed sample: ${sample_name}"
        fi
    else
        echo "Sample already processed: ${sample_name}"
    fi
done

Check that we have the appropriate number of output folders. We should have one log file for each pair of reads

# Load bash variables into memory
source .bashvars

# Count number of raw read files
cd ${trimmed_reads_dir}
echo "Number of raw reads:"
ls -1 *.fastq.gz | wc -l

# Count number of kallisto output 
cd ${kallisto_output_dir}
echo "Number of output log files"
find . -type f -name "*.log" | wc -l
Number of raw reads:
160
Number of output log files
80

2.5 MultiQC on Kallisto output logs

# Load bash variables into memory
source .bashvars

############ RUN MULTIQC ############
echo "Beginning MultiQC on raw FastQC..."
echo ""

${programs_array[multiqc]} ${kallisto_output_dir}/*.log -o ${output_dir_top}

echo ""
echo "MultiQC on raw FastQs complete."
echo ""

############ END MULTIQC ############

echo "Removing FastQC zip files."
echo ""
rm ${output_dir_top}/*.zip
echo "FastQC zip files removed."
echo ""

# View directory contents
ls -lh ${output_dir_top}

I also want to include the treatment/tank info when plotting alignment rates across samples

# Load multiqc stats
kallisto_multiqc <- read.csv("../output/06-cod-RNAseq-alignment/multiqc_data/multiqc_kallisto.txt", sep = '\t')
# Adjust sample name formatting (to prep for join)
kallisto_multiqc$Sample <- gsub("_R1_001", "", kallisto_multiqc$Sample) 
kallisto_multiqc$Sample <- paste("sample_", kallisto_multiqc$Sample, sep = "")
# Load experimental data
cod_sample_info_OG <- read.csv("../data/DESeq2_Sample_Information.csv")

kallisto_multiqc_plustreatment <- left_join(cod_sample_info_OG, kallisto_multiqc, by = c("sample_name" = "Sample")) %>% 
  na.omit()
kallisto_multiqc_plustreatment <- kallisto_multiqc_plustreatment[order(kallisto_multiqc_plustreatment$sample_number),]

ggplot(kallisto_multiqc_plustreatment,
       aes(x=reorder(sample_name, sample_number), y=percent_aligned, fill=as.factor(temp_treatment))) +
  geom_bar(stat="identity") +
  theme(axis.text.x = element_text(angle = 60, vjust = 1, hjust=1, size=7))

ggplot(kallisto_multiqc_plustreatment,
       aes(x=reorder(sample_name, sample_number), y=total_reads, fill=as.factor(temp_treatment))) +
  geom_bar(stat="identity") +
  theme(axis.text.x = element_text(angle = 60, vjust = 1, hjust=1, size=7))

# sample149 is kind of throwing off the visualization, so lets remove and redo
ggplot(kallisto_multiqc_plustreatment[kallisto_multiqc_plustreatment$sample_name != "sample_149", ],
       aes(x=reorder(sample_name, sample_number), y=total_reads, fill=as.factor(temp_treatment))) +
  geom_bar(stat="identity") +
  theme(axis.text.x = element_text(angle = 60, vjust = 1, hjust=1, size=7))

ggplot(kallisto_multiqc_plustreatment,
       aes(x=reorder(sample_name, sample_number), y=fragment_length, fill=as.factor(temp_treatment))) +
  geom_bar(stat="identity") +
  theme(axis.text.x = element_text(angle = 60, vjust = 1, hjust=1, size=7))

2.6 Trinity Matrix with Kallisto Output

# Load bash variables into memory
source .bashvars

cd ${kallisto_output_dir}

${programs_array[trinity_abund_to_matrix]} \
--est_method 'kallisto' \
--gene_trans_map 'none' \
--out_prefix 'kallisto' \
--name_sample_by_basedir ${kallisto_output_dir}/kallisto_quant_*/abundance.tsv

ls -lh ${kallisto_output_dir}
LS0tCnRpdGxlOiAiMDYtY29kLVJOQXNlcS1hbGlnbm1lbnQiCmF1dGhvcjogIkthdGhsZWVuIER1cmtpbiIKZGF0ZTogIjIwMjQtMDMtMTkiCmFsd2F5c19hbGxvd19odG1sOiB0cnVlCm91dHB1dDogCiAgYm9va2Rvd246Omh0bWxfZG9jdW1lbnQyOgogICAgdGhlbWU6IGNvc21vCiAgICB0b2M6IHRydWUKICAgIHRvY19mbG9hdDogdHJ1ZQogICAgbnVtYmVyX3NlY3Rpb25zOiB0cnVlCiAgICBjb2RlX2ZvbGRpbmc6IHNob3cKICAgIGNvZGVfZG93bmxvYWQ6IHRydWUKICBnaXRodWJfZG9jdW1lbnQ6CiAgICB0b2M6IHRydWUKICAgIHRvY19kZXB0aDogMwogICAgbnVtYmVyX3NlY3Rpb25zOiB0cnVlCiAgICBodG1sX3ByZXZpZXc6IHRydWUgCi0tLQoKYGBge3Igc2V0dXAsIGluY2x1ZGU9RkFMU0V9CmxpYnJhcnkoa25pdHIpCmtuaXRyOjpvcHRzX2NodW5rJHNldCgKICBlY2hvID0gVFJVRSwgICAgICAgICAjIERpc3BsYXkgY29kZSBjaHVua3MKICBldmFsID0gRkFMU0UsICAgICAgICAjIEV2YWx1YXRlIGNvZGUgY2h1bmtzCiAgd2FybmluZyA9IEZBTFNFLCAgICAgIyBIaWRlIHdhcm5pbmdzCiAgbWVzc2FnZSA9IEZBTFNFLCAgICAgIyBIaWRlIG1lc3NhZ2VzCiAgY29tbWVudCA9ICIiICAgICAgICAgIyBQcmV2ZW50cyBhcHBlbmRpbmcgJyMjJyB0byBiZWdpbm5pbmcgb2YgbGluZXMgaW4gY29kZSBvdXRwdXQKKQpgYGAKCmBgYHtyIGxvYWQtcGFja2FnZXMsIGV2YWw9VFJVRX0KbGlicmFyeSh0aWR5dmVyc2UpCmxpYnJhcnkoZHBseXIpCmxpYnJhcnkobWFncml0dHIpCmxpYnJhcnkoa25pdHIpCmxpYnJhcnkoZ2dwbG90MikKbGlicmFyeShwbG90bHkpCmBgYApDb2RlIGZvciBhbGlnbmluZyBSTkFzZXEgZGF0YSB0byByZWZlcmVuY2UgdHJhbnNjcmlwdG9tZS9nZW5vbWUsIHRvIGJlIHVzZWQgb24gW1BhY2lmaWMgY29kIFJOQXNlcSBkYXRhXShodHRwczovL3NoZWR1cmtpbi5naXRodWIuaW8vUm9iZXJ0cy1MYWJOb3RlYm9vay9wb3N0cy9wcm9qZWN0cy9wYWNpZmljX2NvZC8yMDIzXzEyXzEzX3BhY2lmaWNfY29kLmh0bWwpLiAKCi0gdHJpbW1lZCByZWFkcyBnZW5lcmF0ZWQgaW4gYDA1LWNvZC1STkFzZXEtdHJpbW1pbmdgCi0gVHJhbnNjcmlwdG9tZSBkb3dubG9hZGVkIGZyb20gW05DQkldKGh0dHBzOi8vd3d3Lm5jYmkubmxtLm5paC5nb3YvZGF0YXNldHMvZ2Vub21lL0dDRl8wMzExNjg5NTUuMS8pLCBzdG9yZWQgW2hlcmVdKGh0dHBzOi8vb3dsLmZpc2gud2FzaGluZ3Rvbi5lZHUvaGFsZnNoZWxsL2dlbm9taWMtZGF0YWJhbmsvR0NGXzAzMTE2ODk1NS4xX0FTTTMxMTY4OTV2MV9ybmEuZm5hKSBhcyBhIHBhcnQgb2YgbGFiIFtnZW5vbWljIHJlc291cmNlc10oaHR0cHM6Ly9yb2JlcnRzbGFiLmdpdGh1Yi5pby9yZXNvdXJjZXMvR2Vub21pYy1SZXNvdXJjZXMvI2dhZHVzLW1hY3JvY2VwaGFsdXMtcGFjaWZpYy1jb2QpCgpOb3RlOiBLYWxsaXN0byBwc2V1ZG9hbGlnbm1lbnQgZG9lc24ndCBuZWNlc3NhcmlseSByZXF1aXJlIGlucHV0IHJlYWRzIHRvIGJlIHRyaW1tZWQsIHByb3ZpZGVkIHRoZXkgYXJlIG9mIHN1ZmZpY2llbnQgcXVhbGl0eS4KCiMgQ3JlYXRlIGEgQmFzaCB2YXJpYWJsZXMgZmlsZQoKVGhpcyBhbGxvd3MgdXNhZ2Ugb2YgQmFzaCB2YXJpYWJsZXMgKGUuZy4gcGF0aHMgdG8gY29tbW9uIGRpcmVjdG9yaWVzKSBhY3Jvc3MgUiBNYXJrZG93biBjaHVua3MuCmBgYHtyIHNhdmUtYmFzaC12YXJpYWJsZXMtdG8tcnZhcnMtZmlsZSwgZW5naW5lPSdiYXNoJywgZXZhbD1UUlVFfQp7CmVjaG8gIiMjIyMgQXNzaWduIFZhcmlhYmxlcyAjIyMjIgplY2hvICIiCgplY2hvICIjIERhdGEgZGlyZWN0b3JpZXMiCmVjaG8gJ2V4cG9ydCBjb2RfZGlyPS9ob21lL3NoYXJlZC84VEJfSEREXzAyL3NoZWR1cmtpbi9wcm9qZWN0LWNvZC10ZW1wZXJhdHVyZScKZWNobyAnZXhwb3J0IG91dHB1dF9kaXJfdG9wPSR7Y29kX2Rpcn0vb3V0cHV0LzA2LWNvZC1STkFzZXEtYWxpZ25tZW50JwplY2hvICdleHBvcnQgdHJpbW1lZF9mYXN0cWNfZGlyPSR7Y29kX2Rpcn0vb3V0cHV0LzA1LWNvZC1STkFzZXEtdHJpbW1pbmcvdHJpbW1lZC1mYXN0cWMnCmVjaG8gJ2V4cG9ydCB0cmltbWVkX3JlYWRzX2Rpcj0ke2NvZF9kaXJ9L291dHB1dC8wNS1jb2QtUk5Bc2VxLXRyaW1taW5nL3RyaW1tZWQtcmVhZHMnCmVjaG8gJ2V4cG9ydCBrYWxsaXN0b19vdXRwdXRfZGlyPSR7b3V0cHV0X2Rpcl90b3B9L2thbGxpc3RvJwplY2hvICIiCgoKZWNobyAiIyBJbnB1dC9PdXRwdXQgZmlsZXMiCmVjaG8gJ2V4cG9ydCB0cmFuc2NyaXB0b21lX2Zhc3RhX2Rpcj0ke2NvZF9kaXJ9L2RhdGEnCmVjaG8gJ2V4cG9ydCB0cmFuc2NyaXB0b21lX2Zhc3RhX25hbWU9IkdDRl8wMzExNjg5NTUuMV9BU00zMTE2ODk1djFfcm5hIicKZWNobyAnZXhwb3J0IHRyYW5zY3JpcHRvbWVfZmFzdGE9IiR7dHJhbnNjcmlwdG9tZV9mYXN0YV9kaXJ9LyR7dHJhbnNjcmlwdG9tZV9mYXN0YV9uYW1lfSInCmVjaG8gJ2V4cG9ydCBrYWxsaXN0b19pbmRleF9uYW1lPSJHX21hY3JvY2VwaGFsdXNfa2FsbGlzdG9faW5kZXguaWR4IicKCgplY2hvICIjIEV4dGVybmFsIGRhdGEgVVJMcyBhbmQgY2hlY2tzdW1zIgplY2hvICdleHBvcnQgdHJhbnNjcmlwdG9tZV9mYXN0YV91cmw9Imh0dHBzOi8vb3dsLmZpc2gud2FzaGluZ3Rvbi5lZHUvaGFsZnNoZWxsL2dlbm9taWMtZGF0YWJhbmsvR0NGXzAzMTE2ODk1NS4xX0FTTTMxMTY4OTV2MV9ybmEuZm5hIicKZWNobyAnZXhwb3J0IHRyYW5zY3JpcHRvbWVfY2hlY2tzdW09IjJhNmM3Yzk4OTgyNzI3ZTY4OGYwMzNhOWIyMzY3MjViIicKZWNobyAiIgoKCmVjaG8gIiMgUGF0aHMgdG8gcHJvZ3JhbXMiCmVjaG8gJ2V4cG9ydCBrYWxsaXN0bz0vaG9tZS9zaGFyZWQva2FsbGlzdG8va2FsbGlzdG8nCmVjaG8gJ2V4cG9ydCBtdWx0aXFjPS9ob21lL3NhbS9wcm9ncmFtcy9tYW1iYWZvcmdlL2Jpbi9tdWx0aXFjJwplY2hvICdleHBvcnQgdHJpbml0eV9hYnVuZF90b19tYXRyaXg9L2hvbWUvc2hhcmVkL3RyaW5pdHlybmFzZXEtdjIuMTIuMC91dGlsL2FidW5kYW5jZV9lc3RpbWF0ZXNfdG9fbWF0cml4LnBsJwplY2hvICIiCgoKZWNobyAiIyBTZXQgbnVtYmVyIG9mIENQVXMgdG8gdXNlIgplY2hvICdleHBvcnQgdGhyZWFkcz0yMCcKZWNobyAiIgoKCmVjaG8gIiMgUHJvZ3JhbXMgYXNzb2NpYXRpdmUgYXJyYXkiCmVjaG8gImRlY2xhcmUgLUEgcHJvZ3JhbXNfYXJyYXkiCmVjaG8gInByb2dyYW1zX2FycmF5PSgiCmVjaG8gJ1trYWxsaXN0b109IiR7a2FsbGlzdG99IiBcJwplY2hvICdbbXVsdGlxY109IiR7bXVsdGlxY30iIFwnCmVjaG8gJ1t0cmluaXR5X2FidW5kX3RvX21hdHJpeF09IiR7dHJpbml0eV9hYnVuZF90b19tYXRyaXh9IiBcJwplY2hvICIpIgp9ID4gLmJhc2h2YXJzCgpjYXQgLmJhc2h2YXJzCmBgYAoKSSB3aWxsIGJlIHJ1bm5pbmcga2FsbGlzdG8gd2l0aCB0cmltbWVkL1FDZCByZWFkcwoKIyBBbGlnbiB0byByZWZlcmVuY2UgdHJhbnNjcmlwdG9tZSAoS2FsbGlzdG8gcHNldWRvYWxpZ25tZW50KQoKCiMjIFJldHJpZXZpbmcgdGhlIHJlZmVyZW5jZSB0cmFuc2NyaXB0b21lCgpgYGB7ciBkb3dubG9hZC10cmFuc2NyaXB0b21lLWZhc3RhLCBlbmdpbmU9J2Jhc2gnfQojIExvYWQgYmFzaCB2YXJpYWJsZXMgaW50byBtZW1vcnkKc291cmNlIC5iYXNodmFycwoKd2dldCBcCi0tZGlyZWN0b3J5LXByZWZpeCAke3RyYW5zY3JpcHRvbWVfZmFzdGFfZGlyfSBcCi0tcmVjdXJzaXZlIFwKLS1uby1jaGVjay1jZXJ0aWZpY2F0ZSBcCi0tY29udGludWUgXAotLW5vLWhvc3QtZGlyZWN0b3JpZXMgXAotLW5vLWRpcmVjdG9yaWVzIFwKLS1uby1wYXJlbnQgXAotLXF1aWV0IFwKLS1leGVjdXRlIHJvYm90cz1vZmYgXAotLWFjY2VwdCAiJHt0cmFuc2NyaXB0b21lX2Zhc3RhX25hbWV9LmZuYSIgJHt0cmFuc2NyaXB0b21lX2Zhc3RhX3VybH0KYGBgCgpgYGB7ciBjaGVjay10cmFuc2NyaXB0b21lLWRpciwgZW5naW5lPSdiYXNoJywgZXZhbD1UUlVFfQojIExvYWQgYmFzaCB2YXJpYWJsZXMgaW50byBtZW1vcnkKc291cmNlIC5iYXNodmFycwoKbHMgLWxoICIke3RyYW5zY3JpcHRvbWVfZmFzdGFfZGlyfSIKYGBgCgoKIyMgVmVyaWZ5IHRyYW5zY3JpcHRvbWUgRmFzdEEgTUQ1IGNoZWNrc3VtCgpgYGB7ciB2ZXJpZnktdHJhbnNjcmlwdG9tZS1mYXN0YS1jaGVja3N1bSwgZW5naW5lPSdiYXNoJywgZXZhbD1UUlVFfQojIExvYWQgYmFzaCB2YXJpYWJsZXMgaW50byBtZW1vcnkKc291cmNlIC5iYXNodmFycwoKY2QgIiR7dHJhbnNjcmlwdG9tZV9mYXN0YV9kaXJ9IgoKIyBDaGVja3N1bXMgZmlsZSBjb250YWlucyBvdGhlciBmaWxlcywgc28gdGhpcyBqdXN0IGxvb2tzIGZvciB0aGUgc1JOQXNlcSBmaWxlcy4KbWQ1c3VtIC0tY2hlY2sgPDw8ICIke3RyYW5zY3JpcHRvbWVfY2hlY2tzdW19ICAke3RyYW5zY3JpcHRvbWVfZmFzdGFfbmFtZX0uZm5hIgoKYGBgCgoKIyMgQnVpbGRpbmcgSW5kZXgKCmBgYHtyIGthbGxpc3RvLWluZGV4aW5nLCBlbmdpbmU9J2Jhc2gnfQojIExvYWQgYmFzaCB2YXJpYWJsZXMgaW50byBtZW1vcnkKc291cmNlIC5iYXNodmFycwoKY2QgIiR7a2FsbGlzdG9fb3V0cHV0X2Rpcn0iCgoke3Byb2dyYW1zX2FycmF5W2thbGxpc3RvXX0gaW5kZXggXAotLXRocmVhZHM9JHt0aHJlYWRzfSBcCi0taW5kZXg9IiR7a2FsbGlzdG9faW5kZXhfbmFtZX0iIFwKIiR7dHJhbnNjcmlwdG9tZV9mYXN0YX0uZm5hIgpgYGAKCmBgYHtyIGNoZWNrLWluZGV4LWZpbGUsIGVuZ2luZT0nYmFzaCcsIGV2YWw9VFJVRX0KIyBMb2FkIGJhc2ggdmFyaWFibGVzIGludG8gbWVtb3J5CnNvdXJjZSAuYmFzaHZhcnMKCmxzIC1saCAke2thbGxpc3RvX291dHB1dF9kaXJ9CmBgYAoKCiMjIFNhbXBsZSBRdWFudGlmaWNhdGlvbgoKS2FsbGlzdG8gY2FuIHJ1biBxdWFudGlmaWNhdGlvbiBvbiBlaXRoZXIgc2luZ2xlLSBvciBwYWlyZWQtZW5kIHJlYWRzLiBUaGUgZGVmYXVsdCBvcHRpb24gaXMgcGFpcmVkLWVuZCwgd2hpY2ggcmVxdWlyZXMgdGhlIGlucHV0IG9mIGFuIGV2ZW4gbnVtYmVyIG9mIHBhaXJlZCBmYXN0cSBmaWxlcyAoZS5nLiwgcGFpckFfUjEuZmFzdHEsIHBhaXJBX1IyLmZhc3RxKS4gClRvIHVzZSBzaW5nbGUtZW5kIG1vZGUsIGluY2x1ZGUgdGhlIC0tc2luZ2xlIGZsYWcsIGFzIHdlbGwgYXMgLWwgKC0tZnJhZ21lbnQtbGVuZ3RoPURPVUJMRSwgZXN0aW1hdGVkIGF2Zy4gZnJhZ21lbnQgbGVuZ3RoKSBhbmQgLXMgKC0tc2Q9RE9VQkxFLCBlc3RpbWF0ZXMgc3RhbmQuIGRldi4gb2YgZnJhZ21lbnQgbGVuZ3RoKSwgYW5kIGEgbnVtYmVyIG9mIGZhc3RxIGZpbGVzLgpBZ2FpbiwgZ3ppcHBlZCBmaWxlcyBhcmUgYWNjZXB0YWJsZS4KCkthbGxpc3RvIHF1YW50IGlzIHJhdGhlciBmaW5pY2t5IGFib3V0IGhvdyB5b3UgaW5wdXQgc2V0cyBvZiBwYWlyZWQgcmVhZHMsIGFuZCB5b3UgY2FuIG9ubHkgaW5wdXQgYSBzaW5nbGUgcGFpciBhdCBhIHRpbWUuIFRvIGNpcmN1bXZlbnQsIEknbGwgY3JlYXRlIGEgcXVhbnRpZmljYXRpb24gZnVuY3Rpb24gYW5kIGFwcGx5IGl0IGl0ZXJhdGl2ZWx5IHRvIGVhY2ggcGFpciBvZiByZWFkcyB1c2luZyBhIGxvb3AuCgpgYGB7ciBrYWxsaXN0by1xdWFudGlmaWNhdGlvbiwgZW5naW5lPSdiYXNoJ30KIyBMb2FkIGJhc2ggdmFyaWFibGVzIGludG8gbWVtb3J5CnNvdXJjZSAuYmFzaHZhcnMKCiMgRnVuY3Rpb24gdG8gcnVuIGthbGxpc3RvIHF1YW50LiBUYWtlcyB0d28gKHBhaXJlZCkgcmVhZHMgYXMgaW5wdXQsIG91dHB1dHMgdG8gc2FtcGxlLWFzc29jaWF0ZWQgZGlyZWN0b3J5CnJ1bl9rYWxsaXN0b19xdWFudCgpIHsKICAgIHNvdXJjZSAuYmFzaHZhcnMgICMgU291cmNlIC5iYXNodmFycyBpbnNpZGUgdGhlIGZ1bmN0aW9uIHRvIG1ha2UgaXRzIHZhcmlhYmxlcyBhY2Nlc3NpYmxlCiAgICBsb2NhbCBSMV9mYXN0cT0kezF9CiAgICBsb2NhbCBSMl9mYXN0cT0kezJ9CiAgICAKICAgIGNkICR7a2FsbGlzdG9fb3V0cHV0X2Rpcn0KICAgIHNhbXBsZV9udW09JChiYXNlbmFtZSAiJHtSMV9mYXN0cX0iICIuZmxleGJhcl90cmltLlJfMS5mYXN0cS5neiIpCiAgICBta2RpciBrYWxsaXN0b19xdWFudF8ke3NhbXBsZV9udW19CgogICAgJHtwcm9ncmFtc19hcnJheVtrYWxsaXN0b119IHF1YW50IFwKICAgICAgICAtLXRocmVhZHM9JHt0aHJlYWRzfSBcCiAgICAgICAgLS1pbmRleD0iJHtrYWxsaXN0b19vdXRwdXRfZGlyfS8ke2thbGxpc3RvX2luZGV4X25hbWV9IiBcCiAgICAgICAgLS1vdXRwdXQtZGlyPSIke2thbGxpc3RvX291dHB1dF9kaXJ9L2thbGxpc3RvX3F1YW50XyR7c2FtcGxlX251bX0iIFwKICAgICAgICAtLWJvb3RzdHJhcC1zYW1wbGVzPTEwMCBcCiAgICAgICAgJHt0cmltbWVkX3JlYWRzX2Rpcn0vJHtSMV9mYXN0cX0gJHt0cmltbWVkX3JlYWRzX2Rpcn0vJHtSMl9mYXN0cX0gXAogICAgICAgICY+ICIke2thbGxpc3RvX291dHB1dF9kaXJ9L2thbGxpc3RvX3F1YW50XyR7c2FtcGxlX251bX0ubG9nIgp9CgoKCiMgSXRlcmF0aXZlbHkgYXBwbHkgcnVuX2thbGxpc3RvX3F1YW50IG9uIGVhY2ggcGFpciBvZiBpbnB1dCByZWFkcwpmb3IgZmlsZV9yMSBpbiAiJHt0cmltbWVkX3JlYWRzX2Rpcn0iLyouZmxleGJhcl90cmltLlJfMS5mYXN0cS5nejsgZG8KICAgICMgRXh0cmFjdCB0aGUgc2FtcGxlIG5hbWUgZnJvbSB0aGUgZmlsZSBuYW1lCiAgICBzYW1wbGVfbmFtZT0kKGJhc2VuYW1lICIke2ZpbGVfcjF9IiAiLmZsZXhiYXJfdHJpbS5SXzEuZmFzdHEuZ3oiKQoKICAgICMgRm9ybSB0aGUgZmlsZSBuYW1lcyAoZnVuY3Rpb24gdGFrZXMgaW5wdXQgZmlsZSBuYW1lcywgbm90IHBhdGhzKQogICAgZmlsZV9yMV9uYW1lPSIke3NhbXBsZV9uYW1lfS5mbGV4YmFyX3RyaW0uUl8xLmZhc3RxLmd6IgogICAgZmlsZV9yMl9uYW1lPSIke3NhbXBsZV9uYW1lfS5mbGV4YmFyX3RyaW0uUl8yLmZhc3RxLmd6IgoKICAgICMgQ2hlY2sgdGhhdCB0aGUgc2FtcGxlIGhhc24ndCBhbHJlYWR5IGJlZW4gcXVhbnRpZmllZAogICAgaWYgWyAhIC1kICIke2thbGxpc3RvX291dHB1dF9kaXJ9L2thbGxpc3RvX3F1YW50XyR7c2FtcGxlX25hbWV9IiBdOyB0aGVuCiAgICAKICAgICAgICAjIENoZWNrIGlmIHRoZSBjb3JyZXNwb25kaW5nIFIyIGZpbGUgZXhpc3RzCiAgICAgICAgaWYgWyAtZSAiJHt0cmltbWVkX3JlYWRzX2Rpcn0vJHtmaWxlX3IyfSIgXTsgdGhlbgogICAgICAgICAgICAjIFJ1biBrYWxsaXN0byBxdWFudCBvbiB0aGUgZmlsZSBwYWlyCiAgICAgICAgICAgIHJ1bl9rYWxsaXN0b19xdWFudCAiJHtmaWxlX3IxX25hbWV9IiAiJHtmaWxlX3IyX25hbWV9IiAKCiAgICAgICAgICAgIGVjaG8gIlByb2Nlc3NlZCBzYW1wbGU6ICR7c2FtcGxlX25hbWV9IgogICAgICAgIGZpCiAgICBlbHNlCiAgICAgICAgZWNobyAiU2FtcGxlIGFscmVhZHkgcHJvY2Vzc2VkOiAke3NhbXBsZV9uYW1lfSIKICAgIGZpCmRvbmUKYGBgCgpDaGVjayB0aGF0IHdlIGhhdmUgdGhlIGFwcHJvcHJpYXRlIG51bWJlciBvZiBvdXRwdXQgZm9sZGVycy4gV2Ugc2hvdWxkIGhhdmUgb25lIGxvZyBmaWxlIGZvciBlYWNoIHBhaXIgb2YgcmVhZHMKYGBge3IgY2hlY2sta2FsbGlzdG8tb3V0cHV0LCBlbmdpbmU9J2Jhc2gnLCBldmFsPVRSVUV9CiMgTG9hZCBiYXNoIHZhcmlhYmxlcyBpbnRvIG1lbW9yeQpzb3VyY2UgLmJhc2h2YXJzCgojIENvdW50IG51bWJlciBvZiByYXcgcmVhZCBmaWxlcwpjZCAke3RyaW1tZWRfcmVhZHNfZGlyfQplY2hvICJOdW1iZXIgb2YgcmF3IHJlYWRzOiIKbHMgLTEgKi5mYXN0cS5neiB8IHdjIC1sCgojIENvdW50IG51bWJlciBvZiBrYWxsaXN0byBvdXRwdXQgCmNkICR7a2FsbGlzdG9fb3V0cHV0X2Rpcn0KZWNobyAiTnVtYmVyIG9mIG91dHB1dCBsb2cgZmlsZXMiCmZpbmQgLiAtdHlwZSBmIC1uYW1lICIqLmxvZyIgfCB3YyAtbApgYGAKCiMjIE11bHRpUUMgb24gS2FsbGlzdG8gb3V0cHV0IGxvZ3MKCmBgYHtyIHJhdy1mYXN0cWMtbXVsdGlxYywgZW5naW5lPSdiYXNoJ30KIyBMb2FkIGJhc2ggdmFyaWFibGVzIGludG8gbWVtb3J5CnNvdXJjZSAuYmFzaHZhcnMKCiMjIyMjIyMjIyMjIyBSVU4gTVVMVElRQyAjIyMjIyMjIyMjIyMKZWNobyAiQmVnaW5uaW5nIE11bHRpUUMgb24gcmF3IEZhc3RRQy4uLiIKZWNobyAiIgoKJHtwcm9ncmFtc19hcnJheVttdWx0aXFjXX0gJHtrYWxsaXN0b19vdXRwdXRfZGlyfS8qLmxvZyAtbyAke291dHB1dF9kaXJfdG9wfQoKZWNobyAiIgplY2hvICJNdWx0aVFDIG9uIHJhdyBGYXN0UXMgY29tcGxldGUuIgplY2hvICIiCgojIyMjIyMjIyMjIyMgRU5EIE1VTFRJUUMgIyMjIyMjIyMjIyMjCgplY2hvICJSZW1vdmluZyBGYXN0UUMgemlwIGZpbGVzLiIKZWNobyAiIgpybSAke291dHB1dF9kaXJfdG9wfS8qLnppcAplY2hvICJGYXN0UUMgemlwIGZpbGVzIHJlbW92ZWQuIgplY2hvICIiCgojIFZpZXcgZGlyZWN0b3J5IGNvbnRlbnRzCmxzIC1saCAke291dHB1dF9kaXJfdG9wfQoKYGBgCgoKSSBhbHNvIHdhbnQgdG8gaW5jbHVkZSB0aGUgdHJlYXRtZW50L3RhbmsgaW5mbyB3aGVuIHBsb3R0aW5nIGFsaWdubWVudCByYXRlcyBhY3Jvc3Mgc2FtcGxlcwpgYGB7ciBwbG90LWFsaWdubWVudC1yYXRlcywgZXZhbD1UUlVFfQojIExvYWQgbXVsdGlxYyBzdGF0cwprYWxsaXN0b19tdWx0aXFjIDwtIHJlYWQuY3N2KCIuLi9vdXRwdXQvMDYtY29kLVJOQXNlcS1hbGlnbm1lbnQvbXVsdGlxY19kYXRhL211bHRpcWNfa2FsbGlzdG8udHh0Iiwgc2VwID0gJ1x0JykKIyBBZGp1c3Qgc2FtcGxlIG5hbWUgZm9ybWF0dGluZyAodG8gcHJlcCBmb3Igam9pbikKa2FsbGlzdG9fbXVsdGlxYyRTYW1wbGUgPC0gZ3N1YigiX1IxXzAwMSIsICIiLCBrYWxsaXN0b19tdWx0aXFjJFNhbXBsZSkgCmthbGxpc3RvX211bHRpcWMkU2FtcGxlIDwtIHBhc3RlKCJzYW1wbGVfIiwga2FsbGlzdG9fbXVsdGlxYyRTYW1wbGUsIHNlcCA9ICIiKQojIExvYWQgZXhwZXJpbWVudGFsIGRhdGEKY29kX3NhbXBsZV9pbmZvX09HIDwtIHJlYWQuY3N2KCIuLi9kYXRhL0RFU2VxMl9TYW1wbGVfSW5mb3JtYXRpb24uY3N2IikKCmthbGxpc3RvX211bHRpcWNfcGx1c3RyZWF0bWVudCA8LSBsZWZ0X2pvaW4oY29kX3NhbXBsZV9pbmZvX09HLCBrYWxsaXN0b19tdWx0aXFjLCBieSA9IGMoInNhbXBsZV9uYW1lIiA9ICJTYW1wbGUiKSkgJT4lIAogIG5hLm9taXQoKQprYWxsaXN0b19tdWx0aXFjX3BsdXN0cmVhdG1lbnQgPC0ga2FsbGlzdG9fbXVsdGlxY19wbHVzdHJlYXRtZW50W29yZGVyKGthbGxpc3RvX211bHRpcWNfcGx1c3RyZWF0bWVudCRzYW1wbGVfbnVtYmVyKSxdCgpnZ3Bsb3Qoa2FsbGlzdG9fbXVsdGlxY19wbHVzdHJlYXRtZW50LAogICAgICAgYWVzKHg9cmVvcmRlcihzYW1wbGVfbmFtZSwgc2FtcGxlX251bWJlciksIHk9cGVyY2VudF9hbGlnbmVkLCBmaWxsPWFzLmZhY3Rvcih0ZW1wX3RyZWF0bWVudCkpKSArCiAgZ2VvbV9iYXIoc3RhdD0iaWRlbnRpdHkiKSArCiAgdGhlbWUoYXhpcy50ZXh0LnggPSBlbGVtZW50X3RleHQoYW5nbGUgPSA2MCwgdmp1c3QgPSAxLCBoanVzdD0xLCBzaXplPTcpKQoKZ2dwbG90KGthbGxpc3RvX211bHRpcWNfcGx1c3RyZWF0bWVudCwKICAgICAgIGFlcyh4PXJlb3JkZXIoc2FtcGxlX25hbWUsIHNhbXBsZV9udW1iZXIpLCB5PXRvdGFsX3JlYWRzLCBmaWxsPWFzLmZhY3Rvcih0ZW1wX3RyZWF0bWVudCkpKSArCiAgZ2VvbV9iYXIoc3RhdD0iaWRlbnRpdHkiKSArCiAgdGhlbWUoYXhpcy50ZXh0LnggPSBlbGVtZW50X3RleHQoYW5nbGUgPSA2MCwgdmp1c3QgPSAxLCBoanVzdD0xLCBzaXplPTcpKQoKIyBzYW1wbGUxNDkgaXMga2luZCBvZiB0aHJvd2luZyBvZmYgdGhlIHZpc3VhbGl6YXRpb24sIHNvIGxldHMgcmVtb3ZlIGFuZCByZWRvCmdncGxvdChrYWxsaXN0b19tdWx0aXFjX3BsdXN0cmVhdG1lbnRba2FsbGlzdG9fbXVsdGlxY19wbHVzdHJlYXRtZW50JHNhbXBsZV9uYW1lICE9ICJzYW1wbGVfMTQ5IiwgXSwKICAgICAgIGFlcyh4PXJlb3JkZXIoc2FtcGxlX25hbWUsIHNhbXBsZV9udW1iZXIpLCB5PXRvdGFsX3JlYWRzLCBmaWxsPWFzLmZhY3Rvcih0ZW1wX3RyZWF0bWVudCkpKSArCiAgZ2VvbV9iYXIoc3RhdD0iaWRlbnRpdHkiKSArCiAgdGhlbWUoYXhpcy50ZXh0LnggPSBlbGVtZW50X3RleHQoYW5nbGUgPSA2MCwgdmp1c3QgPSAxLCBoanVzdD0xLCBzaXplPTcpKQoKZ2dwbG90KGthbGxpc3RvX211bHRpcWNfcGx1c3RyZWF0bWVudCwKICAgICAgIGFlcyh4PXJlb3JkZXIoc2FtcGxlX25hbWUsIHNhbXBsZV9udW1iZXIpLCB5PWZyYWdtZW50X2xlbmd0aCwgZmlsbD1hcy5mYWN0b3IodGVtcF90cmVhdG1lbnQpKSkgKwogIGdlb21fYmFyKHN0YXQ9ImlkZW50aXR5IikgKwogIHRoZW1lKGF4aXMudGV4dC54ID0gZWxlbWVudF90ZXh0KGFuZ2xlID0gNjAsIHZqdXN0ID0gMSwgaGp1c3Q9MSwgc2l6ZT03KSkKCmBgYAoKIyMgVHJpbml0eSBNYXRyaXggd2l0aCBLYWxsaXN0byBPdXRwdXQKCmBgYHtyIGthbGxpc3RvLXRyaW5pdHktbWF0cml4LCBlbmdpbmU9J2Jhc2gnfQojIExvYWQgYmFzaCB2YXJpYWJsZXMgaW50byBtZW1vcnkKc291cmNlIC5iYXNodmFycwoKY2QgJHtrYWxsaXN0b19vdXRwdXRfZGlyfQoKJHtwcm9ncmFtc19hcnJheVt0cmluaXR5X2FidW5kX3RvX21hdHJpeF19IFwKLS1lc3RfbWV0aG9kICdrYWxsaXN0bycgXAotLWdlbmVfdHJhbnNfbWFwICdub25lJyBcCi0tb3V0X3ByZWZpeCAna2FsbGlzdG8nIFwKLS1uYW1lX3NhbXBsZV9ieV9iYXNlZGlyICR7a2FsbGlzdG9fb3V0cHV0X2Rpcn0va2FsbGlzdG9fcXVhbnRfKi9hYnVuZGFuY2UudHN2CgpscyAtbGggJHtrYWxsaXN0b19vdXRwdXRfZGlyfQpgYGAKCg==