# Global R options
::opts_chunk$set(echo = TRUE)
knitr
# Define key paths and tool directories
<- "../output/23-Apul-energy-GO/"
OUT_DIR <- "1E-20"
evalue <- "../data/Apulchra-genome.pep.faa"
fasta
# Export these as environment variables for bash chunks.
Sys.setenv(
OUT_DIR = OUT_DIR,
evalue = evalue,
fasta =fasta
)
23 GO Annoations
Grabbing GOs
- Glycolysis GO:0006096
- Gluconeogenesis GO:0006094
- Lipolysis/lipid catabolism GO:0016042
- Fatty acid beta oxidation GO:0006635
- Starvation GO:0042594
- Lipid biosynthesis GO:0008610
- Protein catabolic process GO:0030163
#Variables
Glycolysis GO:0006096
GO="0006096"
curl -H "Accept: text/plain" "https://rest.uniprot.org/uniprotkb/stream?format=fasta&query=%28%28go%3A"${GO}"%29%29+AND+%28reviewed%3Atrue%29" -o "${OUT_DIR}"SwissProt-GO:"${GO}".fa
head "${OUT_DIR}"SwissProt-GO:"${GO}".fa
echo "Number of Proteins"
grep -c ">" "${OUT_DIR}"SwissProt-GO:"${GO}".fa
/home/shared/ncbi-blast-2.15.0+/bin/makeblastdb \
"${OUT_DIR}"SwissProt-GO:"${GO}".fa \
-in \
-dbtype prot "${OUT_DIR}"SwissProt-GO:"${GO}"
-out
/home/shared/ncbi-blast-2.15.0+/bin/blastp \
$fasta \
-query "${OUT_DIR}"SwissProt-GO:"${GO}" \
-db "${OUT_DIR}"Apul_blastp-GO:"${GO}"_out.tab \
-out "${evalue}" \
-evalue \
-num_threads 42 \
-max_target_seqs 1 \
-max_hsps 1 \
-outfmt 6 > "${OUT_DIR}"blast_warnings"${GO}".txt
2
head "${OUT_DIR}"Apul_blastp-GO:"${GO}"_out.tab
echo "Number of hits"
wc -l "${OUT_DIR}"Apul_blastp-GO:"${GO}"_out.tab
Gluconeogenesis GO:0006094
GO="0006094"
curl -H "Accept: text/plain" "https://rest.uniprot.org/uniprotkb/stream?format=fasta&query=%28%28go%3A"${GO}"%29%29+AND+%28reviewed%3Atrue%29" -o "${OUT_DIR}"SwissProt-GO:"${GO}".fa
head "${OUT_DIR}"SwissProt-GO:"${GO}".fa
echo "Number of Proteins"
grep -c ">" "${OUT_DIR}"SwissProt-GO:"${GO}".fa
/home/shared/ncbi-blast-2.15.0+/bin/makeblastdb \
"${OUT_DIR}"SwissProt-GO:"${GO}".fa \
-in \
-dbtype prot "${OUT_DIR}"SwissProt-GO:"${GO}"
-out
/home/shared/ncbi-blast-2.15.0+/bin/blastp \
$fasta \
-query "${OUT_DIR}"SwissProt-GO:"${GO}" \
-db "${OUT_DIR}"Apul_blastp-GO:"${GO}"_out.tab \
-out "${evalue}" \
-evalue \
-num_threads 42 \
-max_target_seqs 1 \
-max_hsps 1 \
-outfmt 6 > "${OUT_DIR}"blast_warnings"${GO}".txt
2
head "${OUT_DIR}"Apul_blastp-GO:"${GO}"_out.tab
echo "Number of hits"
wc -l "${OUT_DIR}"Apul_blastp-GO:"${GO}"_out.tab
Lipolysis/lipid catabolism GO:0016042
GO="0016042"
curl -H "Accept: text/plain" "https://rest.uniprot.org/uniprotkb/stream?format=fasta&query=%28%28go%3A"${GO}"%29%29+AND+%28reviewed%3Atrue%29" -o "${OUT_DIR}"SwissProt-GO:"${GO}".fa
head "${OUT_DIR}"SwissProt-GO:"${GO}".fa
echo "Number of Proteins"
grep -c ">" "${OUT_DIR}"SwissProt-GO:"${GO}".fa
/home/shared/ncbi-blast-2.15.0+/bin/makeblastdb \
"${OUT_DIR}"SwissProt-GO:"${GO}".fa \
-in \
-dbtype prot "${OUT_DIR}"SwissProt-GO:"${GO}"
-out
/home/shared/ncbi-blast-2.15.0+/bin/blastp \
$fasta \
-query "${OUT_DIR}"SwissProt-GO:"${GO}" \
-db "${OUT_DIR}"Apul_blastp-GO:"${GO}"_out.tab \
-out "${evalue}" \
-evalue \
-num_threads 42 \
-max_target_seqs 1 \
-max_hsps 1 \
-outfmt 6 > "${OUT_DIR}"blast_warnings"${GO}".txt
2
head "${OUT_DIR}"Apul_blastp-GO:"${GO}"_out.tab
echo "Number of hits"
wc -l "${OUT_DIR}"Apul_blastp-GO:"${GO}"_out.tab
Fatty acid beta oxidation GO:0006635
GO="0006635"
curl -H "Accept: text/plain" "https://rest.uniprot.org/uniprotkb/stream?format=fasta&query=%28%28go%3A"${GO}"%29%29+AND+%28reviewed%3Atrue%29" -o "${OUT_DIR}"SwissProt-GO:"${GO}".fa
head "${OUT_DIR}"SwissProt-GO:"${GO}".fa
echo "Number of Proteins"
grep -c ">" "${OUT_DIR}"SwissProt-GO:"${GO}".fa
/home/shared/ncbi-blast-2.15.0+/bin/makeblastdb \
"${OUT_DIR}"SwissProt-GO:"${GO}".fa \
-in \
-dbtype prot "${OUT_DIR}"SwissProt-GO:"${GO}"
-out
/home/shared/ncbi-blast-2.15.0+/bin/blastp \
$fasta \
-query "${OUT_DIR}"SwissProt-GO:"${GO}" \
-db "${OUT_DIR}"Apul_blastp-GO:"${GO}"_out.tab \
-out "${evalue}" \
-evalue \
-num_threads 42 \
-max_target_seqs 1 \
-max_hsps 1 \
-outfmt 6 > "${OUT_DIR}"blast_warnings"${GO}".txt
2
head "${OUT_DIR}"Apul_blastp-GO:"${GO}"_out.tab
echo "Number of hits"
wc -l "${OUT_DIR}"Apul_blastp-GO:"${GO}"_out.tab
Starvation GO:0042594
GO="0042594"
curl -H "Accept: text/plain" "https://rest.uniprot.org/uniprotkb/stream?format=fasta&query=%28%28go%3A"${GO}"%29%29+AND+%28reviewed%3Atrue%29" -o "${OUT_DIR}"SwissProt-GO:"${GO}".fa
head "${OUT_DIR}"SwissProt-GO:"${GO}".fa
echo "Number of Proteins"
grep -c ">" "${OUT_DIR}"SwissProt-GO:"${GO}".fa
/home/shared/ncbi-blast-2.15.0+/bin/makeblastdb \
"${OUT_DIR}"SwissProt-GO:"${GO}".fa \
-in \
-dbtype prot "${OUT_DIR}"SwissProt-GO:"${GO}"
-out
/home/shared/ncbi-blast-2.15.0+/bin/blastp \
$fasta \
-query "${OUT_DIR}"SwissProt-GO:"${GO}" \
-db "${OUT_DIR}"Apul_blastp-GO:"${GO}"_out.tab \
-out "${evalue}" \
-evalue \
-num_threads 42 \
-max_target_seqs 1 \
-max_hsps 1 \
-outfmt 6 > "${OUT_DIR}"blast_warnings"${GO}".txt
2
head "${OUT_DIR}"Apul_blastp-GO:"${GO}"_out.tab
echo "Number of hits"
wc -l "${OUT_DIR}"Apul_blastp-GO:"${GO}"_out.tab
Lipid biosynthesis GO:0008610
GO="0008610"
curl -H "Accept: text/plain" "https://rest.uniprot.org/uniprotkb/stream?format=fasta&query=%28%28go%3A"${GO}"%29%29+AND+%28reviewed%3Atrue%29" -o "${OUT_DIR}"SwissProt-GO:"${GO}".fa
head "${OUT_DIR}"SwissProt-GO:"${GO}".fa
echo "Number of Proteins"
grep -c ">" "${OUT_DIR}"SwissProt-GO:"${GO}".fa
/home/shared/ncbi-blast-2.15.0+/bin/makeblastdb \
"${OUT_DIR}"SwissProt-GO:"${GO}".fa \
-in \
-dbtype prot "${OUT_DIR}"SwissProt-GO:"${GO}"
-out
/home/shared/ncbi-blast-2.15.0+/bin/blastp \
$fasta \
-query "${OUT_DIR}"SwissProt-GO:"${GO}" \
-db "${OUT_DIR}"Apul_blastp-GO:"${GO}"_out.tab \
-out "${evalue}" \
-evalue \
-num_threads 42 \
-max_target_seqs 1 \
-max_hsps 1 \
-outfmt 6 > "${OUT_DIR}"blast_warnings"${GO}".txt
2
head "${OUT_DIR}"Apul_blastp-GO:"${GO}"_out.tab
echo "Number of hits"
wc -l "${OUT_DIR}"Apul_blastp-GO:"${GO}"_out.tab
Protein catabolic process GO:0030163
GO="0030163"
curl -H "Accept: text/plain" "https://rest.uniprot.org/uniprotkb/stream?format=fasta&query=%28%28go%3A"${GO}"%29%29+AND+%28reviewed%3Atrue%29" -o "${OUT_DIR}"SwissProt-GO:"${GO}".fa
head "${OUT_DIR}"SwissProt-GO:"${GO}".fa
echo "Number of Proteins"
grep -c ">" "${OUT_DIR}"SwissProt-GO:"${GO}".fa
/home/shared/ncbi-blast-2.15.0+/bin/makeblastdb \
"${OUT_DIR}"SwissProt-GO:"${GO}".fa \
-in \
-dbtype prot "${OUT_DIR}"SwissProt-GO:"${GO}"
-out
/home/shared/ncbi-blast-2.15.0+/bin/blastp \
$fasta \
-query "${OUT_DIR}"SwissProt-GO:"${GO}" \
-db "${OUT_DIR}"Apul_blastp-GO:"${GO}"_out.tab \
-out "${evalue}" \
-evalue \
-num_threads 42 \
-max_target_seqs 1 \
-max_hsps 1 \
-outfmt 6 > "${OUT_DIR}"blast_warnings"${GO}".txt
2
head "${OUT_DIR}"Apul_blastp-GO:"${GO}"_out.tab
echo "Number of hits"
wc -l "${OUT_DIR}"Apul_blastp-GO:"${GO}"_out.tab
wc -l "${OUT_DIR}"*tab
185 ../output/23-Apul-energy-GO/Apul_blastp-GO:0006094_out.tab
175 ../output/23-Apul-energy-GO/Apul_blastp-GO:0006096_out.tab
192 ../output/23-Apul-energy-GO/Apul_blastp-GO:0006635_out.tab
1843 ../output/23-Apul-energy-GO/Apul_blastp-GO:0008610_out.tab
892 ../output/23-Apul-energy-GO/Apul_blastp-GO:0016042_out.tab
2571 ../output/23-Apul-energy-GO/Apul_blastp-GO:0030163_out.tab
2374 ../output/23-Apul-energy-GO/Apul_blastp-GO:0042594_out.tab
8232 total