# Global R options
knitr::opts_chunk$set(echo = TRUE)
# Define key paths and tool directories
OUT_DIR <- "../output/23-Apul-energy-GO/"
evalue <- "1E-20"
fasta <- "../data/Apulchra-genome.pep.faa"
# Export these as environment variables for bash chunks.
Sys.setenv(
OUT_DIR = OUT_DIR,
evalue = evalue,
fasta =fasta
)23 GO Annoations
Grabbing GOs
- Glycolysis GO:0006096
- Gluconeogenesis GO:0006094
- Lipolysis/lipid catabolism GO:0016042
- Fatty acid beta oxidation GO:0006635
- Starvation GO:0042594
- Lipid biosynthesis GO:0008610
- Protein catabolic process GO:0030163
#Variables
Glycolysis GO:0006096
GO="0006096"
curl -H "Accept: text/plain" "https://rest.uniprot.org/uniprotkb/stream?format=fasta&query=%28%28go%3A"${GO}"%29%29+AND+%28reviewed%3Atrue%29" -o "${OUT_DIR}"SwissProt-GO:"${GO}".fa
head "${OUT_DIR}"SwissProt-GO:"${GO}".fa
echo "Number of Proteins"
grep -c ">" "${OUT_DIR}"SwissProt-GO:"${GO}".fa
/home/shared/ncbi-blast-2.15.0+/bin/makeblastdb \
-in "${OUT_DIR}"SwissProt-GO:"${GO}".fa \
-dbtype prot \
-out "${OUT_DIR}"SwissProt-GO:"${GO}"
/home/shared/ncbi-blast-2.15.0+/bin/blastp \
-query $fasta \
-db "${OUT_DIR}"SwissProt-GO:"${GO}" \
-out "${OUT_DIR}"Apul_blastp-GO:"${GO}"_out.tab \
-evalue "${evalue}" \
-num_threads 42 \
-max_target_seqs 1 \
-max_hsps 1 \
-outfmt 6 \
2> "${OUT_DIR}"blast_warnings"${GO}".txt
head "${OUT_DIR}"Apul_blastp-GO:"${GO}"_out.tab
echo "Number of hits"
wc -l "${OUT_DIR}"Apul_blastp-GO:"${GO}"_out.tabGluconeogenesis GO:0006094
GO="0006094"
curl -H "Accept: text/plain" "https://rest.uniprot.org/uniprotkb/stream?format=fasta&query=%28%28go%3A"${GO}"%29%29+AND+%28reviewed%3Atrue%29" -o "${OUT_DIR}"SwissProt-GO:"${GO}".fa
head "${OUT_DIR}"SwissProt-GO:"${GO}".fa
echo "Number of Proteins"
grep -c ">" "${OUT_DIR}"SwissProt-GO:"${GO}".fa
/home/shared/ncbi-blast-2.15.0+/bin/makeblastdb \
-in "${OUT_DIR}"SwissProt-GO:"${GO}".fa \
-dbtype prot \
-out "${OUT_DIR}"SwissProt-GO:"${GO}"
/home/shared/ncbi-blast-2.15.0+/bin/blastp \
-query $fasta \
-db "${OUT_DIR}"SwissProt-GO:"${GO}" \
-out "${OUT_DIR}"Apul_blastp-GO:"${GO}"_out.tab \
-evalue "${evalue}" \
-num_threads 42 \
-max_target_seqs 1 \
-max_hsps 1 \
-outfmt 6 \
2> "${OUT_DIR}"blast_warnings"${GO}".txt
head "${OUT_DIR}"Apul_blastp-GO:"${GO}"_out.tab
echo "Number of hits"
wc -l "${OUT_DIR}"Apul_blastp-GO:"${GO}"_out.tabLipolysis/lipid catabolism GO:0016042
GO="0016042"
curl -H "Accept: text/plain" "https://rest.uniprot.org/uniprotkb/stream?format=fasta&query=%28%28go%3A"${GO}"%29%29+AND+%28reviewed%3Atrue%29" -o "${OUT_DIR}"SwissProt-GO:"${GO}".fa
head "${OUT_DIR}"SwissProt-GO:"${GO}".fa
echo "Number of Proteins"
grep -c ">" "${OUT_DIR}"SwissProt-GO:"${GO}".fa
/home/shared/ncbi-blast-2.15.0+/bin/makeblastdb \
-in "${OUT_DIR}"SwissProt-GO:"${GO}".fa \
-dbtype prot \
-out "${OUT_DIR}"SwissProt-GO:"${GO}"
/home/shared/ncbi-blast-2.15.0+/bin/blastp \
-query $fasta \
-db "${OUT_DIR}"SwissProt-GO:"${GO}" \
-out "${OUT_DIR}"Apul_blastp-GO:"${GO}"_out.tab \
-evalue "${evalue}" \
-num_threads 42 \
-max_target_seqs 1 \
-max_hsps 1 \
-outfmt 6 \
2> "${OUT_DIR}"blast_warnings"${GO}".txt
head "${OUT_DIR}"Apul_blastp-GO:"${GO}"_out.tab
echo "Number of hits"
wc -l "${OUT_DIR}"Apul_blastp-GO:"${GO}"_out.tabFatty acid beta oxidation GO:0006635
GO="0006635"
curl -H "Accept: text/plain" "https://rest.uniprot.org/uniprotkb/stream?format=fasta&query=%28%28go%3A"${GO}"%29%29+AND+%28reviewed%3Atrue%29" -o "${OUT_DIR}"SwissProt-GO:"${GO}".fa
head "${OUT_DIR}"SwissProt-GO:"${GO}".fa
echo "Number of Proteins"
grep -c ">" "${OUT_DIR}"SwissProt-GO:"${GO}".fa
/home/shared/ncbi-blast-2.15.0+/bin/makeblastdb \
-in "${OUT_DIR}"SwissProt-GO:"${GO}".fa \
-dbtype prot \
-out "${OUT_DIR}"SwissProt-GO:"${GO}"
/home/shared/ncbi-blast-2.15.0+/bin/blastp \
-query $fasta \
-db "${OUT_DIR}"SwissProt-GO:"${GO}" \
-out "${OUT_DIR}"Apul_blastp-GO:"${GO}"_out.tab \
-evalue "${evalue}" \
-num_threads 42 \
-max_target_seqs 1 \
-max_hsps 1 \
-outfmt 6 \
2> "${OUT_DIR}"blast_warnings"${GO}".txt
head "${OUT_DIR}"Apul_blastp-GO:"${GO}"_out.tab
echo "Number of hits"
wc -l "${OUT_DIR}"Apul_blastp-GO:"${GO}"_out.tabStarvation GO:0042594
GO="0042594"
curl -H "Accept: text/plain" "https://rest.uniprot.org/uniprotkb/stream?format=fasta&query=%28%28go%3A"${GO}"%29%29+AND+%28reviewed%3Atrue%29" -o "${OUT_DIR}"SwissProt-GO:"${GO}".fa
head "${OUT_DIR}"SwissProt-GO:"${GO}".fa
echo "Number of Proteins"
grep -c ">" "${OUT_DIR}"SwissProt-GO:"${GO}".fa
/home/shared/ncbi-blast-2.15.0+/bin/makeblastdb \
-in "${OUT_DIR}"SwissProt-GO:"${GO}".fa \
-dbtype prot \
-out "${OUT_DIR}"SwissProt-GO:"${GO}"
/home/shared/ncbi-blast-2.15.0+/bin/blastp \
-query $fasta \
-db "${OUT_DIR}"SwissProt-GO:"${GO}" \
-out "${OUT_DIR}"Apul_blastp-GO:"${GO}"_out.tab \
-evalue "${evalue}" \
-num_threads 42 \
-max_target_seqs 1 \
-max_hsps 1 \
-outfmt 6 \
2> "${OUT_DIR}"blast_warnings"${GO}".txt
head "${OUT_DIR}"Apul_blastp-GO:"${GO}"_out.tab
echo "Number of hits"
wc -l "${OUT_DIR}"Apul_blastp-GO:"${GO}"_out.tabLipid biosynthesis GO:0008610
GO="0008610"
curl -H "Accept: text/plain" "https://rest.uniprot.org/uniprotkb/stream?format=fasta&query=%28%28go%3A"${GO}"%29%29+AND+%28reviewed%3Atrue%29" -o "${OUT_DIR}"SwissProt-GO:"${GO}".fa
head "${OUT_DIR}"SwissProt-GO:"${GO}".fa
echo "Number of Proteins"
grep -c ">" "${OUT_DIR}"SwissProt-GO:"${GO}".fa
/home/shared/ncbi-blast-2.15.0+/bin/makeblastdb \
-in "${OUT_DIR}"SwissProt-GO:"${GO}".fa \
-dbtype prot \
-out "${OUT_DIR}"SwissProt-GO:"${GO}"
/home/shared/ncbi-blast-2.15.0+/bin/blastp \
-query $fasta \
-db "${OUT_DIR}"SwissProt-GO:"${GO}" \
-out "${OUT_DIR}"Apul_blastp-GO:"${GO}"_out.tab \
-evalue "${evalue}" \
-num_threads 42 \
-max_target_seqs 1 \
-max_hsps 1 \
-outfmt 6 \
2> "${OUT_DIR}"blast_warnings"${GO}".txt
head "${OUT_DIR}"Apul_blastp-GO:"${GO}"_out.tab
echo "Number of hits"
wc -l "${OUT_DIR}"Apul_blastp-GO:"${GO}"_out.tabProtein catabolic process GO:0030163
GO="0030163"
curl -H "Accept: text/plain" "https://rest.uniprot.org/uniprotkb/stream?format=fasta&query=%28%28go%3A"${GO}"%29%29+AND+%28reviewed%3Atrue%29" -o "${OUT_DIR}"SwissProt-GO:"${GO}".fa
head "${OUT_DIR}"SwissProt-GO:"${GO}".fa
echo "Number of Proteins"
grep -c ">" "${OUT_DIR}"SwissProt-GO:"${GO}".fa
/home/shared/ncbi-blast-2.15.0+/bin/makeblastdb \
-in "${OUT_DIR}"SwissProt-GO:"${GO}".fa \
-dbtype prot \
-out "${OUT_DIR}"SwissProt-GO:"${GO}"
/home/shared/ncbi-blast-2.15.0+/bin/blastp \
-query $fasta \
-db "${OUT_DIR}"SwissProt-GO:"${GO}" \
-out "${OUT_DIR}"Apul_blastp-GO:"${GO}"_out.tab \
-evalue "${evalue}" \
-num_threads 42 \
-max_target_seqs 1 \
-max_hsps 1 \
-outfmt 6 \
2> "${OUT_DIR}"blast_warnings"${GO}".txt
head "${OUT_DIR}"Apul_blastp-GO:"${GO}"_out.tab
echo "Number of hits"
wc -l "${OUT_DIR}"Apul_blastp-GO:"${GO}"_out.tabwc -l "${OUT_DIR}"*tab 185 ../output/23-Apul-energy-GO/Apul_blastp-GO:0006094_out.tab
175 ../output/23-Apul-energy-GO/Apul_blastp-GO:0006096_out.tab
192 ../output/23-Apul-energy-GO/Apul_blastp-GO:0006635_out.tab
1843 ../output/23-Apul-energy-GO/Apul_blastp-GO:0008610_out.tab
892 ../output/23-Apul-energy-GO/Apul_blastp-GO:0016042_out.tab
2571 ../output/23-Apul-energy-GO/Apul_blastp-GO:0030163_out.tab
2374 ../output/23-Apul-energy-GO/Apul_blastp-GO:0042594_out.tab
8232 total