{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Calculating CpG ratio for the *Acropora hyacinthus* transcriptome" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "This workflow calculates CpG ratio, or CpG O/E, for contigs in the *Acropora hyacinthus* [transcriptome](http://palumbi.stanford.edu/data/33496_Ahyacinthus_CoralContigs.fasta.zip). CpG ratio is an estimate of germline DNA methylation.\n", "\n", "This workflow is an extension of another IPython notebook workflow, `Ahya_blast_anno.ipynb`, that generates an annotation of the same transcriptome. This workflow assumes that you have created the directories and files specified in the annotation workflow." ] }, { "cell_type": "code", "execution_count": 51, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "/Users/jd/Documents/Projects/Coral-CpG-ratio-MS/data/Ahya\n" ] } ], "source": [ "cd .data/Ahya" ] }, { "cell_type": "code", "execution_count": 52, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ ">contig27\n", "CAAAATTCCAGCACTCCGTTTTGCATGGTAAACTTGTCTTAGTAGGACACTGTGGAAGATGTACAGCGCAAGACATCACAGTTGCAAGCGCCGACGAACAGCTGTTAAACTCTCCTCTCATATTCTCGAACAAACCAAATATTTCTTCCTCTCTGTTGTTGCTAACCTTTGAATATATGAAGCTGGCATTAGCACAGGACTCAAAGTTTCCGCCGAGCAGTTT\n", "\n", "number of seqs =\n", "33496\n" ] } ], "source": [ "#fasta file\n", "!head -2 33496_Ahyacinthus_CoralContigs.fasta\n", "!echo \n", "!echo number of seqs =\n", "!fgrep -c \">\" 33496_Ahyacinthus_CoralContigs.fasta" ] }, { "cell_type": "code", "execution_count": 54, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\r\n", "Converted 33496 FASTA records in 66992 lines to tabular format\r\n", "Total sequence length: 17056543\r\n", "\r\n" ] } ], "source": [ "#Converting FASTA to tabular format and placing output file in analyses directory\n", "!perl -e '$count=0; $len=0; while(<>) {s/\\r?\\n//; s/\\t/ /g; if (s/^>//) { if ($. != 1) {print \"\\n\"} s/ |$/\\t/; $count++; $_ .= \"\\t\";} else {s/ //g; $len += length($_)} print $_;} print \"\\n\"; warn \"\\nConverted $count FASTA records in $. lines to tabular format\\nTotal sequence length: $len\\n\\n\";' \\\n", "33496_Ahyacinthus_CoralContigs.fasta > ../../analyses/Ahya/fasta2tab" ] }, { "cell_type": "code", "execution_count": 55, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "/Users/jd/Documents/Projects/Coral-CpG-ratio-MS/analyses/Ahya\n" ] } ], "source": [ "cd ../../analyses/Ahya" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "contig27\t\tCAAAATTCCAGCACTCCGTTTTGCATGGTAAACTTGTCTTAGTAGGACACTGTGGAAGATGTACAGCGCAAGACATCACAGTTGCAAGCGCCGACGAACAGCTGTTAAACTCTCCTCTCATATTCTCGAACAAACCAAATATTTCTTCCTCTCTGTTGTTGCTAACCTTTGAATATATGAAGCTGGCATTAGCACAGGACTCAAAGTTTCCGCCGAGCAGTTT\r\n", "contig88\t\tTGTCCTGTGTTAGAGGCCAGCTTCAACCTCTTGCTTTCCCTGTCAGCCGAGTTTTCTTCTCCTTCAATAAGCTGGGATTTTCGATCTCTACTCAATGTTTCCATCAAACACCTGAGAGTTAAATCTGCCAGATAACGAAGAAATCCTCTTGCTAGAATACTTTTCAAAAGCCCTTCTTCATACATTGATCTTATCCCATTGCAAATTGCGTTGG\r\n" ] } ], "source": [ "#Checking header on new tabular format file\n", "!head -2 fasta2tab" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\r\n", "Added column with length of column 2 for 33496 lines.\r\n", "\r\n" ] } ], "source": [ "#Add column with length of sequence\n", "!perl -e '$col = 2;' -e 'while (<>) { s/\\r?\\n//; @F = split /\\t/, $_; $len = length($F[$col]); print \"$_\\t$len\\n\" } warn \"\\nAdded column with length of column $col for $. lines.\\n\\n\";' \\\n", "fasta2tab > tab_1" ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 33496 100488 17731523 tab_1\r\n" ] } ], "source": [ "!wc tab_1" ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "collapsed": false }, "outputs": [], "source": [ "#The file used to count Cs and Gs will only include the sequence\n", "!awk '{print $2}' tab_1 > tab_2" ] }, { "cell_type": "code", "execution_count": 19, "metadata": { "collapsed": false }, "outputs": [], "source": [ "#This counts CGs - both cases\n", "!echo \"CG\" | awk -F\\[Cc][Gg] '{print NF-1}' tab_2 > CG " ] }, { "cell_type": "code", "execution_count": 20, "metadata": { "collapsed": false }, "outputs": [], "source": [ "#Counts Cs\n", "!echo \"C\" | awk -F\\[Cc] '{print NF-1}' tab_2 > C " ] }, { "cell_type": "code", "execution_count": 21, "metadata": { "collapsed": false }, "outputs": [], "source": [ "#Counts Gs\n", "!echo \"G\" | awk -F\\[Gg] '{print NF-1}' tab_2 > G " ] }, { "cell_type": "code", "execution_count": 22, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "contig27\t\tCAAAATTCCAGCACTCCGTTTTGCATGGTAAACTTGTCTTAGTAGGACACTGTGGAAGATGTACAGCGCAAGACATCACAGTTGCAAGCGCCGACGAACAGCTGTTAAACTCTCCTCTCATATTCTCGAACAAACCAAATATTTCTTCCTCTCTGTTGTTGCTAACCTTTGAATATATGAAGCTGGCATTAGCACAGGACTCAAAGTTTCCGCCGAGCAGTTT\t223\t8\t55\t42\r\n" ] } ], "source": [ "#Combining counts\n", "!paste tab_1 \\\n", "CG \\\n", "C \\\n", "G \\\n", "> comb\n", "!head -1 comb" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Calculating CpGo/e based on [Gavery and Roberts (2010)](http://www.biomedcentral.com/1471-2164/11/483)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "" ] }, { "cell_type": "code", "execution_count": 23, "metadata": { "collapsed": false }, "outputs": [], "source": [ "#Calculation of CpG o/e\n", "!awk '{print $1, \"\\t\", (($4)/($5*$6))*(($3^2)/($3-1))}' comb > ID_CpG #use ^ instead of ** for exponent" ] }, { "cell_type": "code", "execution_count": 24, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "contig27 \t 0.775773\r\n", "contig88 \t 0.459903\r\n", "contig100 \t 0.254614\r\n", "contig211 \t 0.885658\r\n", "contig405 \t 0.689373\r\n", "contig443 \t 1.34126\r\n", "contig470 \t 0.323368\r\n", "contig503 \t 0.941889\r\n", "contig583 \t 0.625727\r\n", "contig590 \t 1.21135\r\n" ] } ], "source": [ "!head ID_CpG" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Now joining CpG to annotation, but first must sort files." ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "contig100010\tsp\tQ08174\tPCDH1_HUMAN\t45.12\t82\t42\t1\t4\t240\t536\t617\t2e-14\t72.4\r\n", "contig100021_110093_105915\tsp\tP10978\tPOLX_TOBAC\t44.34\t106\t54\t3\t1932\t2243\t774\t876\t4e-14\t81.3\r\n", "contig100025\tsp\tQ69ZS8\tKAZRN_MOUSE\t61.76\t68\t26\t0\t2\t205\t450\t517\t5e-21\t90.5\r\n", "contig100026\tsp\tQ6ZMW3\tEMAL6_HUMAN\t68.07\t119\t38\t0\t9\t365\t1805\t1923\t2e-49\t 174\r\n", "contig100031\tsp\tB0BNG0\tEMC2_RAT\t54.35\t92\t37\t3\t334\t71\t204\t294\t3e-23\t95.5\r\n", "contig100040\tsp\tQ9P215\tPOGK_HUMAN\t30.00\t100\t70\t0\t4\t303\t490\t589\t3e-11\t63.5\r\n", "contig100055\tsp\tQ32M45\tANO4_HUMAN\t52.33\t86\t41\t0\t259\t2\t291\t376\t7e-19\t85.5\r\n", "contig100067\tsp\tQ58EN8\tVP33B_DANRE\t52.50\t40\t17\t1\t138\t257\t94\t131\t4e-07\t50.4\r\n", "contig100105\tsp\tA4Q9F1\tTTLL8_MOUSE\t54.79\t146\t61\t1\t20\t442\t525\t670\t2e-47\t 169\r\n", "contig100110_36597\tsp\tQ6PDJ1\tCAHD1_MOUSE\t39.02\t164\t93\t3\t24\t503\t675\t835\t1e-33\t 131\r\n" ] } ], "source": [ "#Sorting Ahya Uniprot/Swissprot annotation file. This file was the result of work done in another notebook: Ahya_blast_anno.ipynb\n", "!sort Ahya_blastx_uniprot_sql.tab | tail -n +2 > Ahya_blastx_uniprot_sql.tab.sorted\n", "!head Ahya_blastx_uniprot_sql.tab.sorted" ] }, { "cell_type": "code", "execution_count": 25, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "contig100010\tcell adhesion\r", "\r\n", "contig100010\tcell-cell signaling\r", "\r\n", "contig100010\tdevelopmental processes\r", "\r\n", "contig100021_110093_105915\tDNA metabolism\r", "\r\n", "contig100021_110093_105915\tprotein metabolism\r", "\r\n", "contig100025\tdevelopmental processes\r", "\r\n", "contig100040\tRNA metabolism\r", "\r\n", "contig100040\tdevelopmental processes\r", "\r\n", "contig100055\ttransport\r", "\r\n", "contig100067\tdevelopmental processes\r", "\r\n" ] } ], "source": [ "#Sorting Ahya GOSlim annotation file. This file was the result of work done in another notebook: Ahya_blast_anno.ipynb\n", "!sort Ahya_GOSlim.tab | tail -n +2 > Ahya_GOSlim.sorted\n", "!head Ahya_GOSlim.sorted" ] }, { "cell_type": "code", "execution_count": 26, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "contig100 \t 0.254614\r\n", "contig100001 \t 0.431531\r\n", "contig100008 \t 0.276093\r\n", "contig100010 \t 0.476931\r\n", "contig100021_110093_105915 \t 2.0758\r\n", "contig100025 \t 0.299187\r\n", "contig100026 \t 1.0599\r\n", "contig100030 \t 0.854552\r\n", "contig100031 \t 0.64616\r\n", "contig100038_111047 \t 1.60515\r\n" ] } ], "source": [ "#Sorting Ahya CpG file\n", "!sort ID_CpG > ID_CpG.sorted\n", "!head ID_CpG.sorted" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": true }, "outputs": [], "source": [ "!join ID_CpG.sorted Ahya_blastx_uniprot_sql.tab.sorted | awk '{print $1, \"\\t\", $2}' > Ahya_cpg_anno" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "contig100010 \t 0.476931\r\n", "contig100021_110093_105915 \t 2.0758\r\n", "contig100025 \t 0.299187\r\n", "contig100026 \t 1.0599\r\n", "contig100031 \t 0.64616\r\n", "contig100040 \t 0.558145\r\n", "contig100055 \t 0.161543\r\n", "contig100067 \t 0.139249\r\n", "contig100105 \t 0.582234\r\n", "contig100110_36597 \t 0.762749\r\n" ] } ], "source": [ "!head Ahya_cpg_anno" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "collapsed": false }, "outputs": [], "source": [ "!join ID_CpG.sorted Ahya_GOSlim.sorted > Ahya_cpg_GOslim" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "contig100010 0.476931 cell adhesion\r", "\r\n", "contig100010 0.476931 cell-cell signaling\r", "\r\n", "contig100010 0.476931 developmental processes\r", "\r\n", "contig100021_110093_105915 2.0758 DNA metabolism\r", "\r\n", "contig100021_110093_105915 2.0758 protein metabolism\r", "\r\n", "contig100025 0.299187 developmental processes\r", "\r\n", "contig100040 0.558145 RNA metabolism\r", "\r\n", "contig100040 0.558145 developmental processes\r", "\r\n", "contig100055 0.161543 transport\r", "\r\n", "contig100067 0.139249 developmental processes\r", "\r\n" ] } ], "source": [ "!head Ahya_cpg_GOslim" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "contig100010 \t 0.476931 \t cell adhesion\r", " \r\n", "contig100010 \t 0.476931 \t cell-cell signaling\r", " \r\n", "contig100010 \t 0.476931 \t developmental processes\r", " \r\n", "contig100021_110093_105915 \t 2.0758 \t DNA metabolism\r", " \r\n", "contig100021_110093_105915 \t 2.0758 \t protein metabolism\r", " \r\n", "contig100025 \t 0.299187 \t developmental processes\r", " \r\n", "contig100040 \t 0.558145 \t RNA metabolism\r", " \r\n", "contig100040 \t 0.558145 \t developmental processes\r", " \r\n", "contig100055 \t 0.161543 \t transport\r", " \r\n", "contig100067 \t 0.139249 \t developmental processes\r", " \r\n" ] } ], "source": [ "#Putting tabs in between columns\n", "!awk '{print $1, \"\\t\", $2, \"\\t\", $3, $4, $5, $6}' Ahya_cpg_GOslim > Ahya_cpg_GOslim.tab\n", "!head Ahya_cpg_GOslim.tab" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Now time to plot data using pandas and matplot" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": false }, "outputs": [], "source": [ "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", " | 0 | \n", "1 | \n", "2 | \n", "
---|---|---|---|
0 | \n", "contig100010 | \n", "0.476931 | \n", "cell adhesion | \n", "
1 | \n", "\n", " | NaN | \n", "NaN | \n", "
2 | \n", "contig100010 | \n", "0.476931 | \n", "cell-cell signaling | \n", "
3 | \n", "\n", " | NaN | \n", "NaN | \n", "
4 | \n", "contig100010 | \n", "0.476931 | \n", "developmental processes | \n", "
5 | \n", "\n", " | NaN | \n", "NaN | \n", "
6 | \n", "contig100021_110093_105915 | \n", "2.075800 | \n", "DNA metabolism | \n", "
7 | \n", "\n", " | NaN | \n", "NaN | \n", "
8 | \n", "contig100021_110093_105915 | \n", "2.075800 | \n", "protein metabolism | \n", "
9 | \n", "\n", " | NaN | \n", "NaN | \n", "
10 | \n", "contig100025 | \n", "0.299187 | \n", "developmental processes | \n", "
11 | \n", "\n", " | NaN | \n", "NaN | \n", "
12 | \n", "contig100040 | \n", "0.558145 | \n", "RNA metabolism | \n", "
13 | \n", "\n", " | NaN | \n", "NaN | \n", "
14 | \n", "contig100040 | \n", "0.558145 | \n", "developmental processes | \n", "
15 | \n", "\n", " | NaN | \n", "NaN | \n", "
16 | \n", "contig100055 | \n", "0.161543 | \n", "transport | \n", "
17 | \n", "\n", " | NaN | \n", "NaN | \n", "
18 | \n", "contig100067 | \n", "0.139249 | \n", "developmental processes | \n", "
19 | \n", "\n", " | NaN | \n", "NaN | \n", "
20 | \n", "contig100067 | \n", "0.139249 | \n", "transport | \n", "
21 | \n", "\n", " | NaN | \n", "NaN | \n", "
22 | \n", "contig100105 | \n", "0.582234 | \n", "protein metabolism | \n", "
23 | \n", "\n", " | NaN | \n", "NaN | \n", "
24 | \n", "contig100110_36597 | \n", "0.762749 | \n", "other biological processes | \n", "
25 | \n", "\n", " | NaN | \n", "NaN | \n", "
26 | \n", "contig100110_36597 | \n", "0.762749 | \n", "transport | \n", "
27 | \n", "\n", " | NaN | \n", "NaN | \n", "
28 | \n", "contig100128 | \n", "0.526529 | \n", "other metabolic processes | \n", "
29 | \n", "\n", " | NaN | \n", "NaN | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "
47245 | \n", "\n", " | NaN | \n", "NaN | \n", "
47246 | \n", "contig99828 | \n", "0.698013 | \n", "signal transduction | \n", "
47247 | \n", "\n", " | NaN | \n", "NaN | \n", "
47248 | \n", "contig99828 | \n", "0.698013 | \n", "stress response | \n", "
47249 | \n", "\n", " | NaN | \n", "NaN | \n", "
47250 | \n", "contig99828 | \n", "0.698013 | \n", "transport | \n", "
47251 | \n", "\n", " | NaN | \n", "NaN | \n", "
47252 | \n", "contig99856 | \n", "0.311595 | \n", "other metabolic processes | \n", "
47253 | \n", "\n", " | NaN | \n", "NaN | \n", "
47254 | \n", "contig99903 | \n", "0.579263 | \n", "other metabolic processes | \n", "
47255 | \n", "\n", " | NaN | \n", "NaN | \n", "
47256 | \n", "contig99913_9827 | \n", "0.601522 | \n", "other metabolic processes | \n", "
47257 | \n", "\n", " | NaN | \n", "NaN | \n", "
47258 | \n", "contig99913_9827 | \n", "0.601522 | \n", "transport | \n", "
47259 | \n", "\n", " | NaN | \n", "NaN | \n", "
47260 | \n", "contig99921_218449_5860_158351_79662 | \n", "0.849566 | \n", "other metabolic processes | \n", "
47261 | \n", "\n", " | NaN | \n", "NaN | \n", "
47262 | \n", "contig99925 | \n", "0.658814 | \n", "signal transduction | \n", "
47263 | \n", "\n", " | NaN | \n", "NaN | \n", "
47264 | \n", "contig99970 | \n", "1.020940 | \n", "RNA metabolism | \n", "
47265 | \n", "\n", " | NaN | \n", "NaN | \n", "
47266 | \n", "contig99970 | \n", "1.020940 | \n", "cell cycle and proliferation | \n", "
47267 | \n", "contig99970 | \n", "1.020940 | \n", "developmental processes | \n", "
47268 | \n", "\n", " | NaN | \n", "NaN | \n", "
47269 | \n", "contig99970 | \n", "1.020940 | \n", "other biological processes | \n", "
47270 | \n", "\n", " | NaN | \n", "NaN | \n", "
47271 | \n", "contig99996_15114 | \n", "0.692021 | \n", "RNA metabolism | \n", "
47272 | \n", "\n", " | NaN | \n", "NaN | \n", "
47273 | \n", "contig99996_15114 | \n", "0.692021 | \n", "other biological processes | \n", "
47274 | \n", "\n", " | NaN | \n", "NaN | \n", "
47275 rows × 3 columns
\n", "\n", " | 0 | \n", "1 | \n", "
---|---|---|
0 | \n", "contig100010 | \n", "0.476931 | \n", "
1 | \n", "contig100021_110093_105915 | \n", "2.075800 | \n", "
2 | \n", "contig100025 | \n", "0.299187 | \n", "
3 | \n", "contig100026 | \n", "1.059900 | \n", "
4 | \n", "contig100031 | \n", "0.646160 | \n", "
5 | \n", "contig100040 | \n", "0.558145 | \n", "
6 | \n", "contig100055 | \n", "0.161543 | \n", "
7 | \n", "contig100067 | \n", "0.139249 | \n", "
8 | \n", "contig100105 | \n", "0.582234 | \n", "
9 | \n", "contig100110_36597 | \n", "0.762749 | \n", "
10 | \n", "contig100118 | \n", "0.245657 | \n", "
11 | \n", "contig100128 | \n", "0.526529 | \n", "
12 | \n", "contig100132 | \n", "0.710240 | \n", "
13 | \n", "contig10014 | \n", "0.398674 | \n", "
14 | \n", "contig10017 | \n", "0.297622 | \n", "
15 | \n", "contig100183 | \n", "0.533699 | \n", "
16 | \n", "contig100194_45147 | \n", "0.689714 | \n", "
17 | \n", "contig100209 | \n", "0.686760 | \n", "
18 | \n", "contig100227 | \n", "0.205884 | \n", "
19 | \n", "contig100237 | \n", "0.125373 | \n", "
20 | \n", "contig100251 | \n", "0.980997 | \n", "
21 | \n", "contig100252 | \n", "0.268736 | \n", "
22 | \n", "contig100259 | \n", "0.785491 | \n", "
23 | \n", "contig100308_100284 | \n", "0.869684 | \n", "
24 | \n", "contig100321 | \n", "0.747259 | \n", "
25 | \n", "contig100328 | \n", "0.761890 | \n", "
26 | \n", "contig100330 | \n", "0.585245 | \n", "
27 | \n", "contig100341 | \n", "0.625205 | \n", "
28 | \n", "contig100347_112826 | \n", "0.843350 | \n", "
29 | \n", "contig100349 | \n", "0.524223 | \n", "
... | \n", "... | \n", "... | \n", "
11563 | \n", "contig99614 | \n", "0.516533 | \n", "
11564 | \n", "contig99638 | \n", "0.244230 | \n", "
11565 | \n", "contig99649 | \n", "0.291315 | \n", "
11566 | \n", "contig99670 | \n", "0.511823 | \n", "
11567 | \n", "contig99674 | \n", "0.552142 | \n", "
11568 | \n", "contig99689_83153 | \n", "1.043430 | \n", "
11569 | \n", "contig99694 | \n", "0.368911 | \n", "
11570 | \n", "contig99698 | \n", "0.584653 | \n", "
11571 | \n", "contig99714 | \n", "0.914334 | \n", "
11572 | \n", "contig99748 | \n", "0.861369 | \n", "
11573 | \n", "contig99754 | \n", "0.967096 | \n", "
11574 | \n", "contig99766 | \n", "0.238253 | \n", "
11575 | \n", "contig99771 | \n", "0.586749 | \n", "
11576 | \n", "contig99778 | \n", "0.822174 | \n", "
11577 | \n", "contig99780 | \n", "0.754110 | \n", "
11578 | \n", "contig99784 | \n", "0.368969 | \n", "
11579 | \n", "contig99804 | \n", "1.013420 | \n", "
11580 | \n", "contig99808_211682_218627_80123 | \n", "0.852763 | \n", "
11581 | \n", "contig99810 | \n", "0.599800 | \n", "
11582 | \n", "contig99816_208470_81450 | \n", "0.905370 | \n", "
11583 | \n", "contig99826_145307 | \n", "0.670821 | \n", "
11584 | \n", "contig99828 | \n", "0.698013 | \n", "
11585 | \n", "contig99851 | \n", "0.252815 | \n", "
11586 | \n", "contig99856 | \n", "0.311595 | \n", "
11587 | \n", "contig99903 | \n", "0.579263 | \n", "
11588 | \n", "contig99913_9827 | \n", "0.601522 | \n", "
11589 | \n", "contig99921_218449_5860_158351_79662 | \n", "0.849566 | \n", "
11590 | \n", "contig99925 | \n", "0.658814 | \n", "
11591 | \n", "contig99970 | \n", "1.020940 | \n", "
11592 | \n", "contig99996_15114 | \n", "0.692021 | \n", "
11593 rows × 2 columns
\n", "\n", " | Column1 | \n", "Column2 | \n", "GOslim_bin | \n", "
---|---|---|---|
0 | \n", "contig100302_114262_202031 | \n", "NaN | \n", "NaN | \n", "
1 | \n", "contig100349 | \n", "0.524223 | \n", "RNA metabolism | \n", "
2 | \n", "contig100349 | \n", "0.524223 | \n", "stress response | \n", "
3 | \n", "contig100349 | \n", "0.524223 | \n", "other biological processes | \n", "
4 | \n", "contig100349 | \n", "0.524223 | \n", "other metabolic processes | \n", "
5 | \n", "contig102770 | \n", "1.215500 | \n", "RNA metabolism | \n", "
6 | \n", "contig102770 | \n", "1.215500 | \n", "transport | \n", "
7 | \n", "contig102770 | \n", "1.215500 | \n", "protein metabolism | \n", "
8 | \n", "contig102770 | \n", "1.215500 | \n", "cell cycle and proliferation | \n", "
9 | \n", "contig102770 | \n", "1.215500 | \n", "signal transduction | \n", "
10 | \n", "contig102770 | \n", "1.215500 | \n", "developmental processes | \n", "
11 | \n", "contig102770 | \n", "1.215500 | \n", "other metabolic processes | \n", "
12 | \n", "contig102770 | \n", "1.215500 | \n", "other biological processes | \n", "
13 | \n", "contig103080_193887 | \n", "NaN | \n", "NaN | \n", "
14 | \n", "contig104395_153016 | \n", "0.885662 | \n", "RNA metabolism | \n", "
15 | \n", "contig104395_153016 | \n", "0.885662 | \n", "other biological processes | \n", "
16 | \n", "contig105632_159216 | \n", "2.265430 | \n", "other metabolic processes | \n", "
17 | \n", "contig105645 | \n", "1.458040 | \n", "other biological processes | \n", "
18 | \n", "contig105949 | \n", "0.973447 | \n", "transport | \n", "
19 | \n", "contig105949 | \n", "0.973447 | \n", "cell organization and biogenesis | \n", "
20 | \n", "contig107034 | \n", "0.727932 | \n", "protein metabolism | \n", "
21 | \n", "contig107336 | \n", "NaN | \n", "NaN | \n", "
22 | \n", "contig110017 | \n", "NaN | \n", "NaN | \n", "
23 | \n", "contig110172 | \n", "0.729807 | \n", "other metabolic processes | \n", "
24 | \n", "contig110172 | \n", "0.729807 | \n", "signal transduction | \n", "
25 | \n", "contig110751 | \n", "1.080540 | \n", "transport | \n", "
26 | \n", "contig112007_150291_111307 | \n", "1.037680 | \n", "developmental processes | \n", "
27 | \n", "contig112007_150291_111307 | \n", "1.037680 | \n", "signal transduction | \n", "
28 | \n", "contig112007_150291_111307 | \n", "1.037680 | \n", "cell cycle and proliferation | \n", "
29 | \n", "contig112007_150291_111307 | \n", "1.037680 | \n", "cell-cell signaling | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "
1054 | \n", "contig92662 | \n", "0.295825 | \n", "RNA metabolism | \n", "
1055 | \n", "contig93670 | \n", "1.031360 | \n", "other biological processes | \n", "
1056 | \n", "contig93732 | \n", "0.830275 | \n", "other metabolic processes | \n", "
1057 | \n", "contig93732 | \n", "0.830275 | \n", "developmental processes | \n", "
1058 | \n", "contig93732 | \n", "0.830275 | \n", "other biological processes | \n", "
1059 | \n", "contig9516 | \n", "0.899129 | \n", "other metabolic processes | \n", "
1060 | \n", "contig96421_189251 | \n", "NaN | \n", "NaN | \n", "
1061 | \n", "contig96499 | \n", "NaN | \n", "NaN | \n", "
1062 | \n", "contig97794_175986_93895 | \n", "0.797713 | \n", "developmental processes | \n", "
1063 | \n", "contig97794_175986_93895 | \n", "0.797713 | \n", "RNA metabolism | \n", "
1064 | \n", "contig98199 | \n", "0.724031 | \n", "other biological processes | \n", "
1065 | \n", "contig98199 | \n", "0.724031 | \n", "cell organization and biogenesis | \n", "
1066 | \n", "contig98199 | \n", "0.724031 | \n", "death | \n", "
1067 | \n", "contig98199 | \n", "0.724031 | \n", "signal transduction | \n", "
1068 | \n", "contig98199 | \n", "0.724031 | \n", "other metabolic processes | \n", "
1069 | \n", "contig98199 | \n", "0.724031 | \n", "stress response | \n", "
1070 | \n", "contig98199 | \n", "0.724031 | \n", "protein metabolism | \n", "
1071 | \n", "contig98199 | \n", "0.724031 | \n", "RNA metabolism | \n", "
1072 | \n", "contig98618 | \n", "0.660431 | \n", "other metabolic processes | \n", "
1073 | \n", "contig98618 | \n", "0.660431 | \n", "other biological processes | \n", "
1074 | \n", "contig98984 | \n", "0.768580 | \n", "transport | \n", "
1075 | \n", "contig98984 | \n", "0.768580 | \n", "other metabolic processes | \n", "
1076 | \n", "contig98984 | \n", "0.768580 | \n", "stress response | \n", "
1077 | \n", "contig98984 | \n", "0.768580 | \n", "other biological processes | \n", "
1078 | \n", "contig99157 | \n", "0.511279 | \n", "transport | \n", "
1079 | \n", "contig99523 | \n", "0.785016 | \n", "cell adhesion | \n", "
1080 | \n", "contig99523 | \n", "0.785016 | \n", "cell cycle and proliferation | \n", "
1081 | \n", "contig99523 | \n", "0.785016 | \n", "signal transduction | \n", "
1082 | \n", "contig99523 | \n", "0.785016 | \n", "protein metabolism | \n", "
1083 | \n", "contig99523 | \n", "0.785016 | \n", "other biological processes | \n", "
1084 rows × 3 columns
\n", "\n", " | 0 | \n", "1 | \n", "
---|---|---|
0 | \n", "contig100349 | \n", "0.524223 | \n", "
1 | \n", "contig103080_193887 | \n", "0.661096 | \n", "
2 | \n", "contig104395_153016 | \n", "0.885662 | \n", "
3 | \n", "contig105632_159216 | \n", "2.265430 | \n", "
4 | \n", "contig105949 | \n", "0.973447 | \n", "
5 | \n", "contig107034 | \n", "0.727932 | \n", "
6 | \n", "contig110172 | \n", "0.729807 | \n", "
7 | \n", "contig110751 | \n", "1.080540 | \n", "
8 | \n", "contig112007_150291_111307 | \n", "1.037680 | \n", "
9 | \n", "contig112463 | \n", "0.627976 | \n", "
10 | \n", "contig112626_149884 | \n", "0.921502 | \n", "
11 | \n", "contig112688_170707_146883 | \n", "0.783514 | \n", "
12 | \n", "contig112809_149298_131394_104581_171125 | \n", "0.931365 | \n", "
13 | \n", "contig113128 | \n", "0.579855 | \n", "
14 | \n", "contig113501_174692_88537_82393 | \n", "1.220230 | \n", "
15 | \n", "contig113836 | \n", "0.504676 | \n", "
16 | \n", "contig115860_4992_195722_171755 | \n", "0.969490 | \n", "
17 | \n", "contig115890_205214 | \n", "0.924856 | \n", "
18 | \n", "contig116802_21164 | \n", "1.080490 | \n", "
19 | \n", "contig118916 | \n", "0.696836 | \n", "
20 | \n", "contig119016 | \n", "0.928775 | \n", "
21 | \n", "contig11991 | \n", "0.492810 | \n", "
22 | \n", "contig12336_105256 | \n", "0.992275 | \n", "
23 | \n", "contig123587_153243 | \n", "0.818188 | \n", "
24 | \n", "contig124917 | \n", "0.572254 | \n", "
25 | \n", "contig12867_142623_79520_172896_150105_152164_... | \n", "0.735131 | \n", "
26 | \n", "contig132286 | \n", "0.846371 | \n", "
27 | \n", "contig132443 | \n", "0.777932 | \n", "
28 | \n", "contig132966_210546_87461_180132 | \n", "2.841970 | \n", "
29 | \n", "contig13535_144960 | \n", "0.629115 | \n", "
... | \n", "... | \n", "... | \n", "
248 | \n", "contig75607_169164_74746 | \n", "1.357380 | \n", "
249 | \n", "contig76115 | \n", "0.673705 | \n", "
250 | \n", "contig76620_96728 | \n", "0.983838 | \n", "
251 | \n", "contig77121 | \n", "0.917984 | \n", "
252 | \n", "contig77601_207123 | \n", "0.701044 | \n", "
253 | \n", "contig77727 | \n", "0.876562 | \n", "
254 | \n", "contig77755_70614_107573 | \n", "0.823651 | \n", "
255 | \n", "contig77844 | \n", "0.668139 | \n", "
256 | \n", "contig78105_108948_169460 | \n", "1.213730 | \n", "
257 | \n", "contig78166 | \n", "0.707433 | \n", "
258 | \n", "contig78169 | \n", "0.765839 | \n", "
259 | \n", "contig78655_98713 | \n", "1.089800 | \n", "
260 | \n", "contig78998 | \n", "0.437363 | \n", "
261 | \n", "contig79495_132352 | \n", "1.093730 | \n", "
262 | \n", "contig79613_159385 | \n", "1.303220 | \n", "
263 | \n", "contig79838 | \n", "0.682852 | \n", "
264 | \n", "contig79841_173655 | \n", "1.105070 | \n", "
265 | \n", "contig79914 | \n", "0.453962 | \n", "
266 | \n", "contig80216 | \n", "0.647116 | \n", "
267 | \n", "contig80700 | \n", "1.019690 | \n", "
268 | \n", "contig8602_188720 | \n", "7.575420 | \n", "
269 | \n", "contig92386 | \n", "0.702157 | \n", "
270 | \n", "contig92662 | \n", "0.295825 | \n", "
271 | \n", "contig93732 | \n", "0.830275 | \n", "
272 | \n", "contig9516 | \n", "0.899129 | \n", "
273 | \n", "contig96421_189251 | \n", "0.356141 | \n", "
274 | \n", "contig97794_175986_93895 | \n", "0.797713 | \n", "
275 | \n", "contig98199 | \n", "0.724031 | \n", "
276 | \n", "contig98984 | \n", "0.768580 | \n", "
277 | \n", "contig99523 | \n", "0.785016 | \n", "
278 rows × 2 columns
\n", "