{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Calculating CpG ratio for the *Porites astreoides* transcriptome" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "This workflow calculates CpG ratio, or CpG O/E, for contigs in the *Porites astreoides* [transcriptome](https://dl.dropboxusercontent.com/u/37523721/pastreoides_transcriptome_july2014.zip). CpG ratio is an estimate of germline DNA methylation.\n", "\n", "This workflow is an extension of another IPython notebook workflow, `Past_blast_anno.ipynb`, that generates an annotation of the same transcriptome. This workflow assumes that you have created the directories and files specified in the annotation workflow." ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "/Users/jd/Documents/Projects/Coral-CpG-ratio-MS/data/Past\n" ] } ], "source": [ "cd .data/Past" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ ">GCKDGN101CF7JK gene=GCKDGN101CF7JK\n", "GCAGTTCACATGACCCGAGGCTAGGAGATCCCATGAGATAGCACAGTGCAAGCGTGACTCATTGCACATGTGCTTGCAACGCGACTGGCGTCTTCTCGACGAGCTCCTGAGTATTTTCACAAGATTGGCCCCTATTTCTCAATGCCGTTGGGAAAATTATTGGGCCGTAGAAAACAGAATATCACACTAATGTAGTTGGTAGGTTAGCAAAACCAGTACCAGC\n", "\n", "number of seqs =\n", "30740\n" ] } ], "source": [ "#fasta file\n", "!head -2 Past.fasta\n", "!echo \n", "!echo number of seqs =\n", "!fgrep -c \">\" Past.fasta" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ ">GCKDGN101CF7JK\n", "GCAGTTCACATGACCCGAGGCTAGGAGATCCCATGAGATAGCACAGTGCAAGCGTGACTCATTGCACATGTGCTTGCAACGCGACTGGCGTCTTCTCGACGAGCTCCTGAGTATTTTCACAAGATTGGCCCCTATTTCTCAATGCCGTTGGGAAAATTATTGGGCCGTAGAAAACAGAATATCACACTAATGTAGTTGGTAGGTTAGCAAAACCAGTACCAGC\n", ">GCKDGN101CAZ1A\n", "GCAGTTGACTGTCATTTCCCAGGGGACGATACAGTGCCACCACAATGACGTACTTACCGCTGGGGCTTGGCGCTGCCCCCATGCCTAGCTGATTGGTGTCTCTCCATATCATCTGTGTGAAGTGCTTTACTTTGTCATTGAGAACTGGATAGGAGAAGCTGTACGCTTTCTTCTCGCCGTACCATATAGTCGTGGCCTTGAGAGGTGCTCTCTGCAGATCGTGCCATATCTGTGCAATGTTTTCACCCGCTGGCTCGCCTTGGAAAGTGGACGGGTCACTGGCTACAGACTCTGCTATATTCTGCGCTTGTTCTGCTAGAGTGGCATTCCATTGTAAAGGGNGGACGTGATGAAGTGATCTGA\n", ">GCKDGN101ANI4S\n", "TGTCCTGTCTTTCTTCGTTGTTCTTCTTGACCTGCTATACACATTAAACCACTCTGGGTGATGATCCATCTTCTCCGACATGAGAGCCACGCGGCTCATGAAACCAAATGCCTGGTTGAAATTCTTGAACTTAAACTCTTTGTAAATGGCATCACGGCCTTCGACATCAGTCCACCCTGAGGACTTCAGAGGCTGAACCTCTGTTTCCCTTTCC\n", ">GCKDGN101BV6U3\n", "GCAGTATAGTCGTAGTTGATAGCGCATGGGTAGCATGTCTGGTGCGCAGGGCGCCAGTGCATATCCCACATAAACTTATTTGGCGATGTGTCAATGTATGACGTAAATTCGTCAAACGTTGGCCCAGAACACTCAACGCAACTGTCTGTTTTTCCCGCAGAACGACGAACCTTATTGACTATATCTCTGCCTAGCTTATTATACAATTGAGTCGTTTTTCCGACGAACTTGTCCCTATAGGCTGATAACAGTCTTTCGAAAGGTTCCCAAACAAAGA\n", ">GCKDGN101BQG8Q\n", "GCAGTATAACAGTAAATTGTTTCTACTCTCCACTTACCAAAGGAAACCATCAATATGACTGCCGTTTCTTTCTTTCAGTTCCATGTCTCGGTTACATTGAGCGCAGATCAGCGGGTTTGCCAAAAGAGATCGTTGTTGAAGCCACTGAATCAAATTGAGGCGTGGGCCCCGTTGCAAATCAATACATTCGTAAAGATCCATCTCCTTTATCATACGATGGTGCAAACGTCACTTAAATCGACTACCAAAATAGGGAGTAAAAACAGACCAGCTTATTCGAGGGTGGAGAAGTCCTATTAATCAACAATTCTTATGTATATGCGTAAGAGAAGACCGACTCCAGTGAACTACGTTTTGATTGGTTGAGGGTGATGTGGTGAAATAATTCTCATGTGAGCTCGTTTTGTTATCA\n", ">isotig29249\n", "TAGGAAGTGCTTTGAAACAAGCTATAGCAGTGCCAATACAATACAGTCGAACCTGtATTAAGCGGACACCCTCTATTAAGCGGACAGTAGCCGAAGTCCCAAAATTaCTTGAAATGAAACCTTTATtAAGCGGACACCTCTACTAAGCGGACGCGGACACCTAAAAaGTACCTGAAATGGTCATTTCTATtGTTTCCAACCTGTATTAAACGGACACTTGTAATTAAATTCCACCACCCAACgTgctAG\n", ">isotig32730\n", "AaTGGaCACGcTtCTTGTTTAAGACAGGAATACTGtCaTAAAAGATGTCCATTAaCATGGTTTTTCCACTTCCcAcTCCTCCGTGGAGATAGaGTCCCTGAGgTGCTTCCTGTGGTTGTGAat\n", ">isotig30623\n", "AACAAAGCCCAGCCAAGAATCACCGTGTAGTAAATGTTGATATACTGCATTACAAGCACACCGGCATAGCCAATTCCTtGGAAAagTGGACAGATTTtCCAAGCCGTGaTTCcACcTTgaCTCGTATATTGCCCCAGACCaatctccatcactaaaacgggtacaccagccacaatcaaacaaagcaagtagggaatgagaaaacaacc\n", ">isotig30838\n", "ACAGTCTGACTGTTGGAAGTGTGAAaGTTGATTTaCTTTGTAGAACTTTGCACTTGTAAGcctGTTTGTCatgtcttatgtgagaatttaaaggggctgtgtcatggatgtcaagttcattttctcaacattgcaaattatgcttccttatgcgctacagaacttaacaatagcgaaaagaaattaccaaatgataaaaca\n", ">isotig32258\n", "GCAGTTTGAAGTGTAATTTTAATATCATTTTtGGCTAAACTCTAGCTCCAGGCTCCTCTCTGTGCAATACCTCGACAACAAAGTTACCTGCATGAACTGAAAACTCTATTATAGTAGAGTTCATGGTCAAACTCAGTTGGGAGCA\n" ] } ], "source": [ "#Just printing first line w/out comments and looking at contig names\n", "!awk '{print $1}' Past.fasta > Past2.fasta\n", "!head -10 Past2.fasta\n", "!tail -10 Past2.fasta" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\r\n", "Converted 30740 FASTA records in 61480 lines to tabular format\r\n", "Total sequence length: 16907062\r\n", "\r\n" ] } ], "source": [ "#Converting FASTA to tabular format and placing output file in analyses directory\n", "!perl -e '$count=0; $len=0; while(<>) {s/\\r?\\n//; s/\\t/ /g; if (s/^>//) { if ($. != 1) {print \"\\n\"} s/ |$/\\t/; $count++; $_ .= \"\\t\";} else {s/ //g; $len += length($_)} print $_;} print \"\\n\"; warn \"\\nConverted $count FASTA records in $. lines to tabular format\\nTotal sequence length: $len\\n\\n\";' \\\n", "Past2.fasta > ../../analyses/Past/fasta2tab" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "/Users/jd/Documents/Projects/Coral-CpG-ratio-MS/analyses/Past\n" ] } ], "source": [ "cd ../../analyses/Past" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "GCKDGN101CF7JK\t\tGCAGTTCACATGACCCGAGGCTAGGAGATCCCATGAGATAGCACAGTGCAAGCGTGACTCATTGCACATGTGCTTGCAACGCGACTGGCGTCTTCTCGACGAGCTCCTGAGTATTTTCACAAGATTGGCCCCTATTTCTCAATGCCGTTGGGAAAATTATTGGGCCGTAGAAAACAGAATATCACACTAATGTAGTTGGTAGGTTAGCAAAACCAGTACCAGC\r\n", "GCKDGN101CAZ1A\t\tGCAGTTGACTGTCATTTCCCAGGGGACGATACAGTGCCACCACAATGACGTACTTACCGCTGGGGCTTGGCGCTGCCCCCATGCCTAGCTGATTGGTGTCTCTCCATATCATCTGTGTGAAGTGCTTTACTTTGTCATTGAGAACTGGATAGGAGAAGCTGTACGCTTTCTTCTCGCCGTACCATATAGTCGTGGCCTTGAGAGGTGCTCTCTGCAGATCGTGCCATATCTGTGCAATGTTTTCACCCGCTGGCTCGCCTTGGAAAGTGGACGGGTCACTGGCTACAGACTCTGCTATATTCTGCGCTTGTTCTGCTAGAGTGGCATTCCATTGTAAAGGGNGGACGTGATGAAGTGATCTGA\r\n" ] } ], "source": [ "#Checking header on new tabular format file\n", "!head -2 fasta2tab" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\r\n", "Added column with length of column 2 for 30740 lines.\r\n", "\r\n" ] } ], "source": [ "#Add column with length of sequence\n", "!perl -e '$col = 2;' -e 'while (<>) { s/\\r?\\n//; @F = split /\\t/, $_; $len = length($F[$col]); print \"$_\\t$len\\n\" } warn \"\\nAdded column with length of column $col for $. lines.\\n\\n\";' \\\n", "fasta2tab > tab_1" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 30740 92220 17482850 tab_1\r\n" ] } ], "source": [ "!wc tab_1" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": false }, "outputs": [], "source": [ "#The file used to count Cs and Gs will only include the sequence\n", "!awk '{print $2}' tab_1 > tab_2" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": false }, "outputs": [], "source": [ "#This counts CGs - both cases\n", "!echo \"CG\" | awk -F\\[Cc][Gg] '{print NF-1}' tab_2 > CG " ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": false }, "outputs": [], "source": [ "#Counts Cs\n", "!echo \"C\" | awk -F\\[Cc] '{print NF-1}' tab_2 > C " ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "collapsed": false }, "outputs": [], "source": [ "#Counts Gs\n", "!echo \"G\" | awk -F\\[Gg] '{print NF-1}' tab_2 > G " ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "GCKDGN101CF7JK\t\tGCAGTTCACATGACCCGAGGCTAGGAGATCCCATGAGATAGCACAGTGCAAGCGTGACTCATTGCACATGTGCTTGCAACGCGACTGGCGTCTTCTCGACGAGCTCCTGAGTATTTTCACAAGATTGGCCCCTATTTCTCAATGCCGTTGGGAAAATTATTGGGCCGTAGAAAACAGAATATCACACTAATGTAGTTGGTAGGTTAGCAAAACCAGTACCAGC\t223\t9\t53\t54\r\n" ] } ], "source": [ "#Combining counts\n", "!paste tab_1 \\\n", "CG \\\n", "C \\\n", "G \\\n", "> comb\n", "!head -1 comb" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Calculating CpGo/e based on [Gavery and Roberts (2010)](http://www.biomedcentral.com/1471-2164/11/483)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "collapsed": false }, "outputs": [], "source": [ "!awk '{print $1, \"\\t\", (($4)/($5*$6))*(($3^2)/($3-1))}' comb > ID_CpG #use ^ instead of ** for exponent\n" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "GCKDGN101CF7JK \t 0.704417\n", "GCKDGN101CAZ1A \t 0.590914\n", "GCKDGN101ANI4S \t 0.542256\n", "GCKDGN101BV6U3 \t 1.15835\n", "GCKDGN101BQG8Q \t 0.926274\n", "GCKDGN101CG9W2 \t 0.802327\n", "GCKDGN101A7H4U \t 1.20891\n", "GCKDGN101ATRIP \t 0.256737\n", "GCKDGN101CINM3 \t 0.342108\n", "GCKDGN101BOM13 \t 0.69067\n", " 30740 61480 716743 ID_CpG\n" ] } ], "source": [ "!head ID_CpG\n", "!wc ID_CpG" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Now joining CpG to annotation, but first must sort files." ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "GCKDGN101A00ZL\tsp\tQ23979\tMY61F_DROME\t70.97\t124\t35\t1\t371\t3\t322\t445\t3e-54\t 187\r\n", "GCKDGN101A02W5\tsp\tQ9PT84\tKCNH2_CHICK\t42.35\t85\t43\t2\t238\t2\t75\t159\t2e-09\t57.0\r\n", "GCKDGN101A03XE\tsp\tQ5RA96\tGUAA_PONAB\t76.15\t109\t26\t0\t338\t12\t28\t136\t1e-51\t 179\r\n", "GCKDGN101A06FB\tsp\tQ5PQ63\tIMP2L_XENLA\t65.45\t55\t19\t0\t7\t171\t105\t159\t2e-12\t63.9\r\n", "GCKDGN101A08CL\tsp\tQ09328\tMGT5A_HUMAN\t57.89\t57\t24\t0\t87\t257\t141\t197\t2e-16\t79.0\r\n", "GCKDGN101A097A\tsp\tQ3MHG6\tGTPBA_BOVIN\t43.42\t76\t42\t1\t17\t244\t81\t155\t6e-13\t66.6\r\n", "GCKDGN101A0KSC\tsp\tQ6P2K6\tP4R3A_MOUSE\t60.00\t90\t33\t1\t273\t4\t96\t182\t2e-25\t 105\r\n", "GCKDGN101A0M8H\tsp\tA2RUR9\tC144A_HUMAN\t29.31\t116\t75\t2\t386\t42\t1232\t1341\t3e-09\t58.5\r\n", "GCKDGN101A0NCL\tsp\tQ08AV6\tTBCC1_XENLA\t40.78\t103\t59\t2\t8\t310\t178\t280\t2e-17\t81.3\r\n", "GCKDGN101A0S0M\tsp\tO00763\tACACB_HUMAN\t67.20\t125\t41\t0\t381\t7\t326\t450\t8e-55\t 191\r\n" ] } ], "source": [ "#Sorting Past Uniprot/Swissprot annotation file. This file was the result of work done in another notebook: \n", "#Past_blast_anno.ipynb\n", "!sort Past_blastx_uniprot.sql.tab | tail -n +2 > Past_blastx_uniprot.sql.tab.sorted\n", "!head Past_blastx_uniprot.sql.tab.sorted" ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "GCKDGN101A00AO\tother biological processes\r", "\r\n", "GCKDGN101A00ZL\tcell organization and biogenesis\r", "\r\n", "GCKDGN101A00ZL\tdevelopmental processes\r", "\r\n", "GCKDGN101A00ZL\tstress response\r", "\r\n", "GCKDGN101A028J\tRNA metabolism\r", "\r\n", "GCKDGN101A028J\tother biological processes\r", "\r\n", "GCKDGN101A02W5\ttransport\r", "\r\n", "GCKDGN101A03QE\tother metabolic processes\r", "\r\n", "GCKDGN101A03XE\tother metabolic processes\r", "\r\n", "GCKDGN101A0607\tprotein metabolism\r", "\r\n" ] } ], "source": [ "#Sorting GOSlim annotation file. This file was the result of work done in another notebook: Past_blast_anno.ipynb\n", "!sort Past_GOSlim.tab | tail -n +2 > Past_GOSlim.sorted\n", "!head Past_GOSlim.sorted" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "GCKDGN101A00AO \t 0.383276\r\n", "GCKDGN101A00ZL \t 0.335379\r\n", "GCKDGN101A028J \t 0.524246\r\n", "GCKDGN101A02W5 \t 0.559472\r\n", "GCKDGN101A03QE \t 0.177286\r\n", "GCKDGN101A03XE \t 0.618671\r\n", "GCKDGN101A0607 \t 1.03824\r\n", "GCKDGN101A06FB \t 0.207357\r\n", "GCKDGN101A0884 \t 0.876202\r\n", "GCKDGN101A08CL \t 0.342674\r\n" ] } ], "source": [ "#Sorting CpG file\n", "!sort ID_CpG > ID_CpG.sorted\n", "!head ID_CpG.sorted" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": true }, "outputs": [], "source": [ "!join ID_CpG.sorted Past_blastx_uniprot.sql.tab.sorted | awk '{print $1, \"\\t\", $2}' > Past_cpg_anno" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "GCKDGN101A00ZL \t 0.335379\r\n", "GCKDGN101A02W5 \t 0.559472\r\n", "GCKDGN101A03XE \t 0.618671\r\n", "GCKDGN101A06FB \t 0.207357\r\n", "GCKDGN101A08CL \t 0.342674\r\n", "GCKDGN101A097A \t 0.679228\r\n", "GCKDGN101A0KSC \t 1.03959\r\n", "GCKDGN101A0M8H \t 0.78113\r\n", "GCKDGN101A0NCL \t 0.404416\r\n", "GCKDGN101A0S0M \t 0.254499\r\n" ] } ], "source": [ "!head Past_cpg_anno" ] }, { "cell_type": "code", "execution_count": 20, "metadata": { "collapsed": true }, "outputs": [], "source": [ "!join ID_CpG.sorted Past_GOSlim.sorted > Past_cpg_GOslim" ] }, { "cell_type": "code", "execution_count": 21, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "GCKDGN101A00AO 0.383276 other biological processes\r", "\r\n", "GCKDGN101A00ZL 0.335379 cell organization and biogenesis\r", "\r\n", "GCKDGN101A00ZL 0.335379 developmental processes\r", "\r\n", "GCKDGN101A00ZL 0.335379 stress response\r", "\r\n", "GCKDGN101A028J 0.524246 RNA metabolism\r", "\r\n", "GCKDGN101A028J 0.524246 other biological processes\r", "\r\n", "GCKDGN101A02W5 0.559472 transport\r", "\r\n", "GCKDGN101A03QE 0.177286 other metabolic processes\r", "\r\n", "GCKDGN101A03XE 0.618671 other metabolic processes\r", "\r\n", "GCKDGN101A0607 1.03824 protein metabolism\r", "\r\n" ] } ], "source": [ "!head Past_cpg_GOslim" ] }, { "cell_type": "code", "execution_count": 22, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "GCKDGN101A00AO \t 0.383276 \t other biological processes\r", " \r\n", "GCKDGN101A00ZL \t 0.335379 \t cell organization and biogenesis\r", "\r\n", "GCKDGN101A00ZL \t 0.335379 \t developmental processes\r", " \r\n", "GCKDGN101A00ZL \t 0.335379 \t stress response\r", " \r\n", "GCKDGN101A028J \t 0.524246 \t RNA metabolism\r", " \r\n", "GCKDGN101A028J \t 0.524246 \t other biological processes\r", " \r\n", "GCKDGN101A02W5 \t 0.559472 \t transport\r", " \r\n", "GCKDGN101A03QE \t 0.177286 \t other metabolic processes\r", " \r\n", "GCKDGN101A03XE \t 0.618671 \t other metabolic processes\r", " \r\n", "GCKDGN101A0607 \t 1.03824 \t protein metabolism\r", " \r\n" ] } ], "source": [ "#Putting tabs in between columns\n", "!awk '{print $1, \"\\t\", $2, \"\\t\", $3, $4, $5, $6}' Past_cpg_GOslim > Past_cpg_GOslim.tab\n", "!head Past_cpg_GOslim.tab" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Now time to plot data using pandas and matplot" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": false }, "outputs": [], "source": [ "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", " | 0 | \n", "1 | \n", "2 | \n", "
---|---|---|---|
0 | \n", "GCKDGN101A00AO | \n", "0.383276 | \n", "other biological processes | \n", "
1 | \n", "\n", " | NaN | \n", "NaN | \n", "
2 | \n", "GCKDGN101A00ZL | \n", "0.335379 | \n", "cell organization and biogenesis | \n", "
3 | \n", "GCKDGN101A00ZL | \n", "0.335379 | \n", "developmental processes | \n", "
4 | \n", "\n", " | NaN | \n", "NaN | \n", "
5 | \n", "GCKDGN101A00ZL | \n", "0.335379 | \n", "stress response | \n", "
6 | \n", "\n", " | NaN | \n", "NaN | \n", "
7 | \n", "GCKDGN101A028J | \n", "0.524246 | \n", "RNA metabolism | \n", "
8 | \n", "\n", " | NaN | \n", "NaN | \n", "
9 | \n", "GCKDGN101A028J | \n", "0.524246 | \n", "other biological processes | \n", "
10 | \n", "\n", " | NaN | \n", "NaN | \n", "
11 | \n", "GCKDGN101A02W5 | \n", "0.559472 | \n", "transport | \n", "
12 | \n", "\n", " | NaN | \n", "NaN | \n", "
13 | \n", "GCKDGN101A03QE | \n", "0.177286 | \n", "other metabolic processes | \n", "
14 | \n", "\n", " | NaN | \n", "NaN | \n", "
15 | \n", "GCKDGN101A03XE | \n", "0.618671 | \n", "other metabolic processes | \n", "
16 | \n", "\n", " | NaN | \n", "NaN | \n", "
17 | \n", "GCKDGN101A0607 | \n", "1.038240 | \n", "protein metabolism | \n", "
18 | \n", "\n", " | NaN | \n", "NaN | \n", "
19 | \n", "GCKDGN101A06FB | \n", "0.207357 | \n", "protein metabolism | \n", "
20 | \n", "\n", " | NaN | \n", "NaN | \n", "
21 | \n", "GCKDGN101A0884 | \n", "0.876202 | \n", "other biological processes | \n", "
22 | \n", "\n", " | NaN | \n", "NaN | \n", "
23 | \n", "GCKDGN101A08CL | \n", "0.342674 | \n", "other metabolic processes | \n", "
24 | \n", "\n", " | NaN | \n", "NaN | \n", "
25 | \n", "GCKDGN101A08CL | \n", "0.342674 | \n", "protein metabolism | \n", "
26 | \n", "\n", " | NaN | \n", "NaN | \n", "
27 | \n", "GCKDGN101A08WT | \n", "0.459782 | \n", "other biological processes | \n", "
28 | \n", "\n", " | NaN | \n", "NaN | \n", "
29 | \n", "GCKDGN101A08WT | \n", "0.459782 | \n", "transport | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "
102278 | \n", "isotig33103 | \n", "0.952471 | \n", "signal transduction | \n", "
102279 | \n", "\n", " | NaN | \n", "NaN | \n", "
102280 | \n", "isotig33105 | \n", "0.000000 | \n", "signal transduction | \n", "
102281 | \n", "\n", " | NaN | \n", "NaN | \n", "
102282 | \n", "isotig33121 | \n", "0.891861 | \n", "other biological processes | \n", "
102283 | \n", "\n", " | NaN | \n", "NaN | \n", "
102284 | \n", "isotig33125 | \n", "0.604802 | \n", "other biological processes | \n", "
102285 | \n", "\n", " | NaN | \n", "NaN | \n", "
102286 | \n", "isotig33125 | \n", "0.604802 | \n", "other metabolic processes | \n", "
102287 | \n", "\n", " | NaN | \n", "NaN | \n", "
102288 | \n", "isotig33125 | \n", "0.604802 | \n", "protein metabolism | \n", "
102289 | \n", "\n", " | NaN | \n", "NaN | \n", "
102290 | \n", "isotig33125 | \n", "0.604802 | \n", "signal transduction | \n", "
102291 | \n", "\n", " | NaN | \n", "NaN | \n", "
102292 | \n", "isotig33125 | \n", "0.604802 | \n", "stress response | \n", "
102293 | \n", "\n", " | NaN | \n", "NaN | \n", "
102294 | \n", "isotig33136 | \n", "0.809603 | \n", "other metabolic processes | \n", "
102295 | \n", "\n", " | NaN | \n", "NaN | \n", "
102296 | \n", "isotig33136 | \n", "0.809603 | \n", "transport | \n", "
102297 | \n", "\n", " | NaN | \n", "NaN | \n", "
102298 | \n", "isotig33137 | \n", "0.834088 | \n", "cell organization and biogenesis | \n", "
102299 | \n", "isotig33141 | \n", "0.000000 | \n", "other biological processes | \n", "
102300 | \n", "\n", " | NaN | \n", "NaN | \n", "
102301 | \n", "isotig33141 | \n", "0.000000 | \n", "protein metabolism | \n", "
102302 | \n", "\n", " | NaN | \n", "NaN | \n", "
102303 | \n", "isotig33152 | \n", "0.316646 | \n", "other biological processes | \n", "
102304 | \n", "\n", " | NaN | \n", "NaN | \n", "
102305 | \n", "isotig33159 | \n", "1.262630 | \n", "RNA metabolism | \n", "
102306 | \n", "\n", " | NaN | \n", "NaN | \n", "
102307 | \n", "isotig33159 | \n", "1.262630 | \n", "cell organization and biogenesis | \n", "
102308 rows × 3 columns
\n", "\n", " | 0 | \n", "1 | \n", "
---|---|---|
0 | \n", "GCKDGN101A00ZL | \n", "0.335379 | \n", "
1 | \n", "GCKDGN101A02W5 | \n", "0.559472 | \n", "
2 | \n", "GCKDGN101A03XE | \n", "0.618671 | \n", "
3 | \n", "GCKDGN101A06FB | \n", "0.207357 | \n", "
4 | \n", "GCKDGN101A08CL | \n", "0.342674 | \n", "
5 | \n", "GCKDGN101A097A | \n", "0.679228 | \n", "
6 | \n", "GCKDGN101A0KSC | \n", "1.039590 | \n", "
7 | \n", "GCKDGN101A0M8H | \n", "0.781130 | \n", "
8 | \n", "GCKDGN101A0NCL | \n", "0.404416 | \n", "
9 | \n", "GCKDGN101A0S0M | \n", "0.254499 | \n", "
10 | \n", "GCKDGN101A12VJ | \n", "0.184560 | \n", "
11 | \n", "GCKDGN101A13V9 | \n", "0.147354 | \n", "
12 | \n", "GCKDGN101A15QU | \n", "0.712784 | \n", "
13 | \n", "GCKDGN101A17ZT | \n", "0.472153 | \n", "
14 | \n", "GCKDGN101A19EE | \n", "0.850999 | \n", "
15 | \n", "GCKDGN101A1O0W | \n", "0.802256 | \n", "
16 | \n", "GCKDGN101A1T4S | \n", "0.076902 | \n", "
17 | \n", "GCKDGN101A1TLT | \n", "0.380306 | \n", "
18 | \n", "GCKDGN101A20KS | \n", "0.922365 | \n", "
19 | \n", "GCKDGN101A23YI | \n", "0.698875 | \n", "
20 | \n", "GCKDGN101A28J5 | \n", "0.698418 | \n", "
21 | \n", "GCKDGN101A2DZK | \n", "0.751943 | \n", "
22 | \n", "GCKDGN101A2LIL | \n", "0.605440 | \n", "
23 | \n", "GCKDGN101A2NZY | \n", "0.445518 | \n", "
24 | \n", "GCKDGN101A2R04 | \n", "0.694789 | \n", "
25 | \n", "GCKDGN101A2WY5 | \n", "0.628895 | \n", "
26 | \n", "GCKDGN101A35AI | \n", "0.170293 | \n", "
27 | \n", "GCKDGN101A38VU | \n", "0.886832 | \n", "
28 | \n", "GCKDGN101A39U8 | \n", "0.491907 | \n", "
29 | \n", "GCKDGN101A3AVK | \n", "0.524441 | \n", "
... | \n", "... | \n", "... | \n", "
13758 | \n", "isotig32619 | \n", "1.032320 | \n", "
13759 | \n", "isotig32645 | \n", "0.984676 | \n", "
13760 | \n", "isotig32665 | \n", "0.230087 | \n", "
13761 | \n", "isotig32717 | \n", "0.733053 | \n", "
13762 | \n", "isotig32730 | \n", "0.340215 | \n", "
13763 | \n", "isotig32740 | \n", "0.593341 | \n", "
13764 | \n", "isotig32767 | \n", "0.970656 | \n", "
13765 | \n", "isotig32781 | \n", "0.861883 | \n", "
13766 | \n", "isotig32795 | \n", "1.010170 | \n", "
13767 | \n", "isotig32805 | \n", "0.468420 | \n", "
13768 | \n", "isotig32821 | \n", "0.207627 | \n", "
13769 | \n", "isotig32829 | \n", "0.756356 | \n", "
13770 | \n", "isotig32836 | \n", "1.457250 | \n", "
13771 | \n", "isotig32872 | \n", "0.960073 | \n", "
13772 | \n", "isotig32873 | \n", "0.753304 | \n", "
13773 | \n", "isotig32875 | \n", "0.691457 | \n", "
13774 | \n", "isotig32909 | \n", "0.000000 | \n", "
13775 | \n", "isotig32914 | \n", "0.000000 | \n", "
13776 | \n", "isotig32936 | \n", "1.585850 | \n", "
13777 | \n", "isotig32941 | \n", "0.261595 | \n", "
13778 | \n", "isotig32968 | \n", "0.707426 | \n", "
13779 | \n", "isotig33023 | \n", "0.698778 | \n", "
13780 | \n", "isotig33042 | \n", "0.963724 | \n", "
13781 | \n", "isotig33055 | \n", "0.307721 | \n", "
13782 | \n", "isotig33063 | \n", "0.875935 | \n", "
13783 | \n", "isotig33069 | \n", "0.000000 | \n", "
13784 | \n", "isotig33082 | \n", "0.218770 | \n", "
13785 | \n", "isotig33093 | \n", "0.429516 | \n", "
13786 | \n", "isotig33136 | \n", "0.809603 | \n", "
13787 | \n", "isotig33159 | \n", "1.262630 | \n", "
13788 rows × 2 columns
\n", "