23 GO Annoations

Author

Steven Roberts

Published

April 9, 2025

Grabbing GOs

#Variables

# Global R options
knitr::opts_chunk$set(echo = TRUE)

# Define key paths and tool directories
 
OUT_DIR <- "../output/23-Apul-energy-GO/"
evalue <- "1E-20"
fasta <- "../data/Apulchra-genome.pep.faa"

# Export these as environment variables for bash chunks.
Sys.setenv(
  OUT_DIR = OUT_DIR,
  evalue = evalue,
  fasta =fasta
)

Glycolysis GO:0006096

GO="0006096"

curl -H "Accept: text/plain" "https://rest.uniprot.org/uniprotkb/stream?format=fasta&query=%28%28go%3A"${GO}"%29%29+AND+%28reviewed%3Atrue%29" -o "${OUT_DIR}"SwissProt-GO:"${GO}".fa

head "${OUT_DIR}"SwissProt-GO:"${GO}".fa

echo "Number of Proteins"
grep -c ">" "${OUT_DIR}"SwissProt-GO:"${GO}".fa

/home/shared/ncbi-blast-2.15.0+/bin/makeblastdb \
-in "${OUT_DIR}"SwissProt-GO:"${GO}".fa \
-dbtype prot \
-out "${OUT_DIR}"SwissProt-GO:"${GO}"


/home/shared/ncbi-blast-2.15.0+/bin/blastp \
-query $fasta \
-db "${OUT_DIR}"SwissProt-GO:"${GO}" \
-out "${OUT_DIR}"Apul_blastp-GO:"${GO}"_out.tab \
-evalue "${evalue}" \
-num_threads 42 \
-max_target_seqs 1 \
-max_hsps 1 \
-outfmt 6 \
2> "${OUT_DIR}"blast_warnings"${GO}".txt

head "${OUT_DIR}"Apul_blastp-GO:"${GO}"_out.tab

echo "Number of hits"

wc -l "${OUT_DIR}"Apul_blastp-GO:"${GO}"_out.tab

Gluconeogenesis GO:0006094

GO="0006094"

curl -H "Accept: text/plain" "https://rest.uniprot.org/uniprotkb/stream?format=fasta&query=%28%28go%3A"${GO}"%29%29+AND+%28reviewed%3Atrue%29" -o "${OUT_DIR}"SwissProt-GO:"${GO}".fa

head "${OUT_DIR}"SwissProt-GO:"${GO}".fa

echo "Number of Proteins"
grep -c ">" "${OUT_DIR}"SwissProt-GO:"${GO}".fa

/home/shared/ncbi-blast-2.15.0+/bin/makeblastdb \
-in "${OUT_DIR}"SwissProt-GO:"${GO}".fa \
-dbtype prot \
-out "${OUT_DIR}"SwissProt-GO:"${GO}"


/home/shared/ncbi-blast-2.15.0+/bin/blastp \
-query $fasta \
-db "${OUT_DIR}"SwissProt-GO:"${GO}" \
-out "${OUT_DIR}"Apul_blastp-GO:"${GO}"_out.tab \
-evalue "${evalue}" \
-num_threads 42 \
-max_target_seqs 1 \
-max_hsps 1 \
-outfmt 6 \
2> "${OUT_DIR}"blast_warnings"${GO}".txt

head "${OUT_DIR}"Apul_blastp-GO:"${GO}"_out.tab

echo "Number of hits"

wc -l "${OUT_DIR}"Apul_blastp-GO:"${GO}"_out.tab

Lipolysis/lipid catabolism GO:0016042

GO="0016042"

curl -H "Accept: text/plain" "https://rest.uniprot.org/uniprotkb/stream?format=fasta&query=%28%28go%3A"${GO}"%29%29+AND+%28reviewed%3Atrue%29" -o "${OUT_DIR}"SwissProt-GO:"${GO}".fa

head "${OUT_DIR}"SwissProt-GO:"${GO}".fa

echo "Number of Proteins"
grep -c ">" "${OUT_DIR}"SwissProt-GO:"${GO}".fa

/home/shared/ncbi-blast-2.15.0+/bin/makeblastdb \
-in "${OUT_DIR}"SwissProt-GO:"${GO}".fa \
-dbtype prot \
-out "${OUT_DIR}"SwissProt-GO:"${GO}"


/home/shared/ncbi-blast-2.15.0+/bin/blastp \
-query $fasta \
-db "${OUT_DIR}"SwissProt-GO:"${GO}" \
-out "${OUT_DIR}"Apul_blastp-GO:"${GO}"_out.tab \
-evalue "${evalue}" \
-num_threads 42 \
-max_target_seqs 1 \
-max_hsps 1 \
-outfmt 6 \
2> "${OUT_DIR}"blast_warnings"${GO}".txt

head "${OUT_DIR}"Apul_blastp-GO:"${GO}"_out.tab

echo "Number of hits"

wc -l "${OUT_DIR}"Apul_blastp-GO:"${GO}"_out.tab

Fatty acid beta oxidation GO:0006635

GO="0006635"

curl -H "Accept: text/plain" "https://rest.uniprot.org/uniprotkb/stream?format=fasta&query=%28%28go%3A"${GO}"%29%29+AND+%28reviewed%3Atrue%29" -o "${OUT_DIR}"SwissProt-GO:"${GO}".fa

head "${OUT_DIR}"SwissProt-GO:"${GO}".fa

echo "Number of Proteins"
grep -c ">" "${OUT_DIR}"SwissProt-GO:"${GO}".fa

/home/shared/ncbi-blast-2.15.0+/bin/makeblastdb \
-in "${OUT_DIR}"SwissProt-GO:"${GO}".fa \
-dbtype prot \
-out "${OUT_DIR}"SwissProt-GO:"${GO}"


/home/shared/ncbi-blast-2.15.0+/bin/blastp \
-query $fasta \
-db "${OUT_DIR}"SwissProt-GO:"${GO}" \
-out "${OUT_DIR}"Apul_blastp-GO:"${GO}"_out.tab \
-evalue "${evalue}" \
-num_threads 42 \
-max_target_seqs 1 \
-max_hsps 1 \
-outfmt 6 \
2> "${OUT_DIR}"blast_warnings"${GO}".txt

head "${OUT_DIR}"Apul_blastp-GO:"${GO}"_out.tab

echo "Number of hits"

wc -l "${OUT_DIR}"Apul_blastp-GO:"${GO}"_out.tab

Starvation GO:0042594

GO="0042594"

curl -H "Accept: text/plain" "https://rest.uniprot.org/uniprotkb/stream?format=fasta&query=%28%28go%3A"${GO}"%29%29+AND+%28reviewed%3Atrue%29" -o "${OUT_DIR}"SwissProt-GO:"${GO}".fa

head "${OUT_DIR}"SwissProt-GO:"${GO}".fa

echo "Number of Proteins"
grep -c ">" "${OUT_DIR}"SwissProt-GO:"${GO}".fa

/home/shared/ncbi-blast-2.15.0+/bin/makeblastdb \
-in "${OUT_DIR}"SwissProt-GO:"${GO}".fa \
-dbtype prot \
-out "${OUT_DIR}"SwissProt-GO:"${GO}"


/home/shared/ncbi-blast-2.15.0+/bin/blastp \
-query $fasta \
-db "${OUT_DIR}"SwissProt-GO:"${GO}" \
-out "${OUT_DIR}"Apul_blastp-GO:"${GO}"_out.tab \
-evalue "${evalue}" \
-num_threads 42 \
-max_target_seqs 1 \
-max_hsps 1 \
-outfmt 6 \
2> "${OUT_DIR}"blast_warnings"${GO}".txt

head "${OUT_DIR}"Apul_blastp-GO:"${GO}"_out.tab

echo "Number of hits"

wc -l "${OUT_DIR}"Apul_blastp-GO:"${GO}"_out.tab

Lipid biosynthesis GO:0008610

GO="0008610"

curl -H "Accept: text/plain" "https://rest.uniprot.org/uniprotkb/stream?format=fasta&query=%28%28go%3A"${GO}"%29%29+AND+%28reviewed%3Atrue%29" -o "${OUT_DIR}"SwissProt-GO:"${GO}".fa

head "${OUT_DIR}"SwissProt-GO:"${GO}".fa

echo "Number of Proteins"
grep -c ">" "${OUT_DIR}"SwissProt-GO:"${GO}".fa

/home/shared/ncbi-blast-2.15.0+/bin/makeblastdb \
-in "${OUT_DIR}"SwissProt-GO:"${GO}".fa \
-dbtype prot \
-out "${OUT_DIR}"SwissProt-GO:"${GO}"


/home/shared/ncbi-blast-2.15.0+/bin/blastp \
-query $fasta \
-db "${OUT_DIR}"SwissProt-GO:"${GO}" \
-out "${OUT_DIR}"Apul_blastp-GO:"${GO}"_out.tab \
-evalue "${evalue}" \
-num_threads 42 \
-max_target_seqs 1 \
-max_hsps 1 \
-outfmt 6 \
2> "${OUT_DIR}"blast_warnings"${GO}".txt

head "${OUT_DIR}"Apul_blastp-GO:"${GO}"_out.tab

echo "Number of hits"

wc -l "${OUT_DIR}"Apul_blastp-GO:"${GO}"_out.tab

Protein catabolic process GO:0030163

GO="0030163"

curl -H "Accept: text/plain" "https://rest.uniprot.org/uniprotkb/stream?format=fasta&query=%28%28go%3A"${GO}"%29%29+AND+%28reviewed%3Atrue%29" -o "${OUT_DIR}"SwissProt-GO:"${GO}".fa

head "${OUT_DIR}"SwissProt-GO:"${GO}".fa

echo "Number of Proteins"
grep -c ">" "${OUT_DIR}"SwissProt-GO:"${GO}".fa

/home/shared/ncbi-blast-2.15.0+/bin/makeblastdb \
-in "${OUT_DIR}"SwissProt-GO:"${GO}".fa \
-dbtype prot \
-out "${OUT_DIR}"SwissProt-GO:"${GO}"


/home/shared/ncbi-blast-2.15.0+/bin/blastp \
-query $fasta \
-db "${OUT_DIR}"SwissProt-GO:"${GO}" \
-out "${OUT_DIR}"Apul_blastp-GO:"${GO}"_out.tab \
-evalue "${evalue}" \
-num_threads 42 \
-max_target_seqs 1 \
-max_hsps 1 \
-outfmt 6 \
2> "${OUT_DIR}"blast_warnings"${GO}".txt

head "${OUT_DIR}"Apul_blastp-GO:"${GO}"_out.tab

echo "Number of hits"

wc -l "${OUT_DIR}"Apul_blastp-GO:"${GO}"_out.tab
wc -l "${OUT_DIR}"*tab
   185 ../output/23-Apul-energy-GO/Apul_blastp-GO:0006094_out.tab
   175 ../output/23-Apul-energy-GO/Apul_blastp-GO:0006096_out.tab
   192 ../output/23-Apul-energy-GO/Apul_blastp-GO:0006635_out.tab
  1843 ../output/23-Apul-energy-GO/Apul_blastp-GO:0008610_out.tab
   892 ../output/23-Apul-energy-GO/Apul_blastp-GO:0016042_out.tab
  2571 ../output/23-Apul-energy-GO/Apul_blastp-GO:0030163_out.tab
  2374 ../output/23-Apul-energy-GO/Apul_blastp-GO:0042594_out.tab
  8232 total