{ "cells": [ { "cell_type": "raw", "metadata": {}, "source": [ "[sr320@mox1 020321-Cg-annot]$ cat job.sh \n", "#!/bin/bash\n", "## Job Name\n", "#SBATCH --job-name=blastp\n", "## Allocation Definition\n", "#SBATCH --account=coenv\n", "#SBATCH --partition=coenv\n", "## Resources\n", "## Nodes\n", "#SBATCH --nodes=1\n", "## Walltime (days-hours:minutes:seconds format)\n", "#SBATCH --time=1-00:00:00\n", "## Memory per node\n", "#SBATCH --mem=120G\n", "##turn on e-mail notification\n", "#SBATCH --mail-type=ALL\n", "#SBATCH --mail-user=sr320@uw.edu\n", "## Specify the working directory for this job\n", "#SBATCH --chdir=/gscratch/scrubbed/sr320/020321-Cg-annot\n", "\n", "\n", "# Load Python Mox module for Python module availability\n", "\n", "module load intel-python3_2017\n", "\n", "\n", "\n", "\n", "source /gscratch/srlab/programs/scripts/paths.sh\n", "\n", "\n", "/gscratch/srlab/programs/ncbi-blast-2.8.1+/bin/makeblastdb \\\n", "-in /gscratch/srlab/sr320/blastdb/Caenorhabditis_elegans.WBcel235.pep.all.fa \\\n", "-dbtype prot \\\n", "-out /gscratch/srlab/sr320/blastdb/Caenorhabditis_elegans.WBcel235.prot\n", "\n", "\n", "\n", "/gscratch/srlab/programs/ncbi-blast-2.8.1+/bin/blastx \\\n", "-query /gscratch/srlab/sr320/data/cg/GCF_000297895.1_oyster_v9_cds_from_genomic.fna \\\n", "-db /gscratch/srlab/sr320/blastdb/Caenorhabditis_elegans.WBcel235.prot \\\n", "-out /gscratch/scrubbed/sr320/020321-Cg-annot/Cg-WBcel235_blastx.tab \\\n", "-evalue 1E-05 \\\n", "-num_threads 28 \\\n", "-max_target_seqs 1 \\\n", "-max_hsps 1 \\\n", "-outfmt \"6 qaccver saccver evalue\"" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " % Total % Received % Xferd Average Speed Time Time Time Current\n", " Dload Upload Total Spent Left Speed\n", "100 1613k 100 1613k 0 0 1994k 0 --:--:-- --:--:-- --:--:-- 1991k\n" ] } ], "source": [ "!curl https://gannet.fish.washington.edu/seashell/bu-mox/scrubbed/020321-Cg-annot/Cg-WBcel235_blastx.tab \\\n", " > ../analyses/Cg-WBcel235_blastx.tab" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "lcl|NW_011934501.1_cds_XP_011426503.1_1\tT27A3.6.1\t1.35e-28\n", "lcl|NW_011934501.1_cds_XP_011434216.1_2\tT27A3.6.1\t1.35e-28\n", "lcl|NW_011934501.1_cds_XP_011445785.1_3\tC48E7.2.2\t1.83e-22\n", "lcl|NW_011934512.1_cds_XP_011426108.1_6\tY41D4A.6a.1\t3.05e-31\n", "lcl|NW_011934516.1_cds_XP_011428421.2_8\tR11F4.1.2\t1.55e-25\n", "lcl|NW_011934516.1_cds_XP_019923453.1_9\tR11F4.1.2\t1.55e-25\n", "lcl|NW_011934516.1_cds_XP_019923670.1_10\tR11F4.1.2\t1.55e-25\n", "lcl|NW_011934516.1_cds_XP_019923891.1_11\tR11F4.1.2\t6.11e-17\n", "lcl|NW_011934520.1_cds_XP_011429578.1_13\tF36H1.2h.1\t9.81e-27\n", "lcl|NW_011934527.1_cds_XP_011437695.1_20\tY49F6B.4.1\t1.24e-33\n" ] } ], "source": [ "!head ../analyses/Cg-WBcel235_blastx.tab" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Y110A7A.10.1\taap-1\n", "Y110A7A.10.2\taap-1\n", "F27C8.1.1\taat-1\n", "F27C8.1.2\taat-1\n", "F07C3.7.1\taat-2\n", "F52H2.2a.1\taat-3\n", "F52H2.2b.1\taat-3\n", "T13A10.10a.1\taat-4\n", "C55C2.5a.1\taat-5\n", "T11F9.4a.1\taat-6\n" ] } ], "source": [ "!head ../analyses/Caenorhabditis_elegans.WBcel235.gene.sym" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "!cat ../analyses/Caenorhabditis_elegans.WBcel235.gene.sym | sort > ../analyses/Caenorhabditis_elegans.WBcel235.gene.sym.sorted" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "2L52.1a.1\t2L52.1\n", "2L52.1b.1\t2L52.1\n", "2RSSE.1a.1\trga-9\n", "2RSSE.1b.1\trga-9\n", "2RSSE.1c.1\trga-9\n", "3R5.1a.1\tpot-3\n", "3R5.1b.1\tpot-3\n", "4R79.1a.1\tnas-6\n", "4R79.1b.1\tnas-6\n", "4R79.2a.1\t4R79.2\n" ] } ], "source": [ "!head ../analyses/Caenorhabditis_elegans.WBcel235.gene.sym.sorted" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "lcl|NW_011936910.1_cds_XP_011450162.2_28992\t2RSSE.1c.1\t7.49e-10\n", "lcl|NW_011937731.1_cds_XP_011415613.1_39140\t4R79.1a.1\t1.73e-39\n", "lcl|NW_011937731.1_cds_XP_011415614.1_39139\t4R79.1a.1\t1.73e-39\n", "lcl|NW_011937768.1_cds_XP_011416407.1_39857\t4R79.1a.1\t2.74e-38\n", "lcl|NW_011935585.1_cds_XP_011432046.1_12365\t4R79.1a.1\t7.03e-42\n", "lcl|NW_011937663.1_cds_XP_019919232.1_38476\t4R79.2b.1\t1.06e-62\n", "lcl|NW_011937663.1_cds_XP_011414883.1_38482\t4R79.2b.1\t2.32e-62\n", "lcl|NW_011937663.1_cds_XP_019919229.1_38478\t4R79.2b.1\t2.32e-62\n", "lcl|NW_011937663.1_cds_XP_019919230.1_38479\t4R79.2b.1\t2.32e-62\n", "lcl|NW_011937663.1_cds_XP_019919231.1_38480\t4R79.2b.1\t2.32e-62\n", "sort: Broken pipe\n" ] } ], "source": [ "!cat ../analyses/Cg-WBcel235_blastx.tab | sort -k2 | head" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "!cat ../analyses/Cg-WBcel235_blastx.tab | sort -k2 > ../analyses/Cg-WBcel235_blastx.sorted" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "ZK994.3.1\tlcl|NW_011936606.1_cds_XP_011446896.1_26003\t1.18e-06\tpxn-1\n", "ZK994.3.1\tlcl|NW_011936606.1_cds_XP_011446897.1_26005\t1.21e-06\tpxn-1\n", "ZK994.3.1\tlcl|NW_011935420.1_cds_XP_019923639.1_10616\t1.40e-17\tpxn-1\n", "ZK994.3.1\tlcl|NW_011937212.1_cds_XP_011454917.1_33410\t1.41e-16\tpxn-1\n", "ZK994.3.1\tlcl|NW_011937212.1_cds_XP_011454915.1_33411\t1.83e-16\tpxn-1\n", "ZK994.3.1\tlcl|NW_011937212.1_cds_XP_011454916.1_33412\t1.83e-16\tpxn-1\n", "ZK994.3.1\tlcl|NW_011935589.1_cds_XP_019924228.1_12546\t2.53e-10\tpxn-1\n", "ZK994.3.1\tlcl|NW_011936252.1_cds_XP_019927060.1_21499\t4.04e-10\tpxn-1\n", "ZK994.3.1\tlcl|NW_011942010.1_cds_XP_011421780.1_44796\t6.88e-08\tpxn-1\n", "ZK994.3.1\tlcl|NW_011936606.1_cds_XP_019928347.1_26004\t9.84e-07\tpxn-1\n" ] } ], "source": [ "!join -1 2 -2 1 -t $'\\t' \\\n", "../analyses/Cg-WBcel235_blastx.sorted \\\n", "../analyses/Caenorhabditis_elegans.WBcel235.gene.sym.sorted | tail" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "!join -1 2 -2 1 -t $'\\t' \\\n", "../analyses/Cg-WBcel235_blastx.sorted \\\n", "../analyses/Caenorhabditis_elegans.WBcel235.gene.sym.sorted \\\n", "> ../analyses/Cg_Ce-genesym.tab" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "2RSSE.1c.1\tlcl|NW_011936910.1_cds_XP_011450162.2_28992\t7.49e-10\trga-9\n", "4R79.1a.1\tlcl|NW_011937731.1_cds_XP_011415613.1_39140\t1.73e-39\tnas-6\n", "4R79.1a.1\tlcl|NW_011937731.1_cds_XP_011415614.1_39139\t1.73e-39\tnas-6\n", "4R79.1a.1\tlcl|NW_011937768.1_cds_XP_011416407.1_39857\t2.74e-38\tnas-6\n", "4R79.1a.1\tlcl|NW_011935585.1_cds_XP_011432046.1_12365\t7.03e-42\tnas-6\n", "4R79.2b.1\tlcl|NW_011937663.1_cds_XP_019919232.1_38476\t1.06e-62\t4R79.2\n", "4R79.2b.1\tlcl|NW_011937663.1_cds_XP_011414883.1_38482\t2.32e-62\t4R79.2\n", "4R79.2b.1\tlcl|NW_011937663.1_cds_XP_019919229.1_38478\t2.32e-62\t4R79.2\n", "4R79.2b.1\tlcl|NW_011937663.1_cds_XP_019919230.1_38479\t2.32e-62\t4R79.2\n", "4R79.2b.1\tlcl|NW_011937663.1_cds_XP_019919231.1_38480\t2.32e-62\t4R79.2\n" ] } ], "source": [ "!head ../analyses/Cg_Ce-genesym.tab" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5" } }, "nbformat": 4, "nbformat_minor": 4 }