--- title: "GO Annotations" author: "Steven Roberts" date: "`r format(Sys.time(), '%d %B, %Y')`" format: md: toc: true toc-depth: 2 editor: markdown: wrap: 72 --- ```{r setup, include=FALSE} knitr::opts_chunk$set( echo = TRUE, # Display code chunks eval = TRUE, # Evaluate code chunks warning = FALSE, # Hide warnings message = FALSE, # Hide messages fig.width = 6, # Set plot width in inches fig.height = 4, # Set plot height in inches fig.align = "center", # Align plots to the center comment = "" # Prevents appending '##' to beginning of lines in code output ) ``` Want to start with grabbing protein with specific GOs see also https://www.ebi.ac.uk/QuickGO/annotations # Variables ```{r setup2, include=TRUE} # Global R options knitr::opts_chunk$set(echo = TRUE) # Define key paths and tool directories OUT_DIR <- "../output/27-Apul-pheno-annot/" evalue <- "1E-20" fasta <- "../data/Apulchra-genome.pep.faa" # Export these as environment variables for bash chunks. Sys.setenv( OUT_DIR = OUT_DIR, evalue = evalue, fasta =fasta ) ``` # Aerobic respiration (GO:0009060) ```{r, engine='bash'} GO="0009060" curl -H "Accept: text/plain" "https://rest.uniprot.org/uniprotkb/stream?format=fasta&query=%28%28go%3A"${GO}"%29%29+AND+%28reviewed%3Atrue%29" -o "${OUT_DIR}"SwissProt-GO:"${GO}".fa head "${OUT_DIR}"SwissProt-GO:"${GO}".fa echo "Number of Proteins" grep -c ">" "${OUT_DIR}"SwissProt-GO:"${GO}".fa /home/shared/ncbi-blast-2.15.0+/bin/makeblastdb \ -in "${OUT_DIR}"SwissProt-GO:"${GO}".fa \ -dbtype prot \ -out "${OUT_DIR}"SwissProt-GO:"${GO}" /home/shared/ncbi-blast-2.15.0+/bin/blastp \ -query $fasta \ -db "${OUT_DIR}"SwissProt-GO:"${GO}" \ -out "${OUT_DIR}"Apul_blastp-GO:"${GO}"_out.tab \ -evalue "${evalue}" \ -num_threads 42 \ -max_target_seqs 1 \ -max_hsps 1 \ -outfmt 6 \ 2> "${OUT_DIR}"blast_warnings"${GO}".txt head "${OUT_DIR}"Apul_blastp-GO:"${GO}"_out.tab echo "Number of hits" wc -l "${OUT_DIR}"Apul_blastp-GO:"${GO}"_out.tab ``` # Oxidative phosphorylation (GO:0006119) ```{r, engine='bash'} GO="0006119" curl -H "Accept: text/plain" "https://rest.uniprot.org/uniprotkb/stream?format=fasta&query=%28%28go%3A"${GO}"%29%29+AND+%28reviewed%3Atrue%29" -o "${OUT_DIR}"SwissProt-GO:"${GO}".fa head "${OUT_DIR}"SwissProt-GO:"${GO}".fa echo "Number of Proteins" grep -c ">" "${OUT_DIR}"SwissProt-GO:"${GO}".fa /home/shared/ncbi-blast-2.15.0+/bin/makeblastdb \ -in "${OUT_DIR}"SwissProt-GO:"${GO}".fa \ -dbtype prot \ -out "${OUT_DIR}"SwissProt-GO:"${GO}" /home/shared/ncbi-blast-2.15.0+/bin/blastp \ -query $fasta \ -db "${OUT_DIR}"SwissProt-GO:"${GO}" \ -out "${OUT_DIR}"Apul_blastp-GO:"${GO}"_out.tab \ -evalue "${evalue}" \ -num_threads 42 \ -max_target_seqs 1 \ -max_hsps 1 \ -outfmt 6 \ 2> "${OUT_DIR}"blast_warnings"${GO}".txt head "${OUT_DIR}"Apul_blastp-GO:"${GO}"_out.tab echo "Number of hits" wc -l "${OUT_DIR}"Apul_blastp-GO:"${GO}"_out.tab ``` # Canonical glycolysis (GO:0061621) ```{r, engine='bash'} GO="0061621" curl -H "Accept: text/plain" "https://rest.uniprot.org/uniprotkb/stream?format=fasta&query=%28%28go%3A"${GO}"%29%29+AND+%28reviewed%3Atrue%29" -o "${OUT_DIR}"SwissProt-GO:"${GO}".fa head "${OUT_DIR}"SwissProt-GO:"${GO}".fa echo "Number of Proteins" grep -c ">" "${OUT_DIR}"SwissProt-GO:"${GO}".fa /home/shared/ncbi-blast-2.15.0+/bin/makeblastdb \ -in "${OUT_DIR}"SwissProt-GO:"${GO}".fa \ -dbtype prot \ -out "${OUT_DIR}"SwissProt-GO:"${GO}" /home/shared/ncbi-blast-2.15.0+/bin/blastp \ -query $fasta \ -db "${OUT_DIR}"SwissProt-GO:"${GO}" \ -out "${OUT_DIR}"Apul_blastp-GO:"${GO}"_out.tab \ -evalue "${evalue}" \ -num_threads 42 \ -max_target_seqs 1 \ -max_hsps 1 \ -outfmt 6 \ 2> "${OUT_DIR}"blast_warnings"${GO}".txt head "${OUT_DIR}"Apul_blastp-GO:"${GO}"_out.tab echo "Number of hits" wc -l "${OUT_DIR}"Apul_blastp-GO:"${GO}"_out.tab ``` # Tricarboxylic Acid Cycle (GO:0006099) ```{r, engine='bash'} GO="0006099" curl -H "Accept: text/plain" "https://rest.uniprot.org/uniprotkb/stream?format=fasta&query=%28%28go%3A"${GO}"%29%29+AND+%28reviewed%3Atrue%29" -o "${OUT_DIR}"SwissProt-GO:"${GO}".fa head "${OUT_DIR}"SwissProt-GO:"${GO}".fa echo "Number of Proteins" grep -c ">" "${OUT_DIR}"SwissProt-GO:"${GO}".fa /home/shared/ncbi-blast-2.15.0+/bin/makeblastdb \ -in "${OUT_DIR}"SwissProt-GO:"${GO}".fa \ -dbtype prot \ -out "${OUT_DIR}"SwissProt-GO:"${GO}" /home/shared/ncbi-blast-2.15.0+/bin/blastp \ -query $fasta \ -db "${OUT_DIR}"SwissProt-GO:"${GO}" \ -out "${OUT_DIR}"Apul_blastp-GO:"${GO}"_out.tab \ -evalue "${evalue}" \ -num_threads 42 \ -max_target_seqs 1 \ -max_hsps 1 \ -outfmt 6 \ 2> "${OUT_DIR}"blast_warnings"${GO}".txt head "${OUT_DIR}"Apul_blastp-GO:"${GO}"_out.tab echo "Number of hits" wc -l "${OUT_DIR}"Apul_blastp-GO:"${GO}"_out.tab ``` # Summary of blast hits ```{r, engine='bash'} wc -l "${OUT_DIR}"*tab ```