{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Genomic Location of DML" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "In this notebook, I will identify the genomic locations of [DML identified with `methylKit`](https://github.com/RobertsLab/project-oyster-oa/blob/master/code/Haws/04-methylKit.R). \n", "\n", "2. Create BEDfiles for DML\n", "4. Identify overlaps between pH- and ploidy-DML\n", "3. Characterize genomic locations for DML\n", "5. Identify overlaps between SNPs and DML" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 0. Set working directory" ] }, { "cell_type": "code", "execution_count": 23, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "'/Users/yaamini/Documents/project-oyster-oa/analyses/Haws_07-DML-characterization'" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pwd" ] }, { "cell_type": "code", "execution_count": 24, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "/Users/yaamini/Documents/project-oyster-oa/analyses\n" ] } ], "source": [ "cd ../../analyses/" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false }, "outputs": [], "source": [ "#mkdir Haws_07-DML-characterization" ] }, { "cell_type": "code", "execution_count": 25, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "/Users/yaamini/Documents/project-oyster-oa/analyses/Haws_07-DML-characterization\n" ] } ], "source": [ "cd Haws_07-DML-characterization/" ] }, { "cell_type": "code", "execution_count": 26, "metadata": { "collapsed": true }, "outputs": [], "source": [ "bedtoolsDirectory = \"/Users/Shared/bioinformatics/bedtools2/bin/\"" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 2. Create BEDfiles for DML" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "My DML lists are `.csv` files. To identify genomic locations with `bedtools intersect`, I need BEDfiles." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 2a. `methylKit`" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ ",chr,start,end,strand,pvalue,qvalue,meth.diff\r\n", "49115,NC_047559.1,5294172,5294174,*,6.81863140326384e-14,1.13190244626751e-07,40.2560083594566\r\n", "162616,NC_047559.1,15801827,15801829,*,7.35840565483495e-09,0.000872504096156049,-45.6918238993711\r\n", "890333,NC_047560.1,65604843,65604845,*,3.34714016321879e-07,0.00940017301493494,49.4839101396478\r\n", "1014648,NC_047561.1,7843128,7843130,*,5.49971909095006e-08,0.00313909989423398,-26.3157894736842\r\n", "1041384,NC_047561.1,10147466,10147468,*,5.73605741393552e-08,0.00313909989423398,-30.4647676161919\r\n", "1041599,NC_047561.1,10166213,10166215,*,1.68763140575909e-09,0.000371694309881221,-29.1507066437723\r\n", "1053918,NC_047561.1,11783086,11783088,*,1.4461592764831e-09,0.000371694309881221,-44.1576698155646\r\n", "1060146,NC_047561.1,12279075,12279077,*,3.2020995626083e-09,0.000514406178679344,-26.890756302521\r\n", "1109777,NC_047561.1,16521359,16521361,*,1.50728082250528e-09,0.000371694309881221,28.8444735692442\r\n" ] } ], "source": [ "#Look at csv file to determine what modifications need to be made\n", "#Column 2: chr, Column 3: start, Column 4: end, Column 8: meth.diff\n", "!head ../Haws_04-methylKit/DML/DML-pH-25-Cov5.csv" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "../Haws_04-methylKit/DML/DML-pH-25-Cov5.csv\r\n", "../Haws_04-methylKit/DML/DML-ploidy-25-Cov5.csv\r\n" ] } ], "source": [ "#Will use 25% meth diff cutoff for DML definition\n", "!find ../Haws_04-methylKit/DML/DML*25*" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "collapsed": false }, "outputs": [], "source": [ "%%bash\n", "\n", "#Replace , with tabs\n", "#Remove extraneous quotes entries (can also be done in R)\n", "#Print chr, start, end, meth.diff\n", "#Remove header\n", "#Save as BEDfile\n", "\n", "for f in ../Haws_04-methylKit/DML/DML*25*\n", "do\n", " tr \",\" \"\\t\" < ${f} \\\n", " | awk '{print $2\"\\t\"$3\"\\t\"$4\"\\t\"$8}' \\\n", " | tail -n+2 \\\n", " > ${f}.bed\n", "done" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "collapsed": false }, "outputs": [], "source": [ "%%bash\n", "\n", "#Move BEDfiles to current working directory\n", "mv ../Haws_04-methylKit/DML/*bed ." ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "collapsed": false, "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "==> DML-pH-25-Cov5.csv.bed <==\r\n", "NC_047559.1\t5294172\t5294174\t40.2560083594566\r\n", "NC_047559.1\t15801827\t15801829\t-45.6918238993711\r\n", "NC_047560.1\t65604843\t65604845\t49.4839101396478\r\n", "NC_047561.1\t7843128\t7843130\t-26.3157894736842\r\n", "NC_047561.1\t10147466\t10147468\t-30.4647676161919\r\n", "NC_047561.1\t10166213\t10166215\t-29.1507066437723\r\n", "NC_047561.1\t11783086\t11783088\t-44.1576698155646\r\n", "NC_047561.1\t12279075\t12279077\t-26.890756302521\r\n", "NC_047561.1\t16521359\t16521361\t28.8444735692442\r\n", "NC_047561.1\t19286180\t19286182\t-55.4137931034483\r\n", "\r\n", "==> DML-ploidy-25-Cov5.csv.bed <==\r\n", "NC_047559.1\t12799610\t12799612\t27.7297297297297\r\n", "NC_047559.1\t22468723\t22468725\t28.4117647058823\r\n", "NC_047559.1\t44801744\t44801746\t34.0988480118915\r\n", "NC_047559.1\t53732861\t53732863\t25.8426966292135\r\n", "NC_047561.1\t9365798\t9365800\t34.0129358830146\r\n", "NC_047561.1\t28489237\t28489239\t-25.6018518518519\r\n", "NC_047561.1\t40362698\t40362700\t29.4117647058824\r\n", "NC_047563.1\t39926052\t39926054\t42.6872058194266\r\n", "NC_047564.1\t23049738\t23049740\t29.2845880961766\r\n", "NC_047564.1\t24426622\t24426624\t-30.0865800865801\r\n" ] } ], "source": [ "!head *bed" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 2b. `DSS`" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ ",chr,pos,stat,pvals,fdrs\r\n", "10950,NC_047559.1,520576,5.36772390879675,7.97364876094437e-08,0.00669831008311927\r\n", "280929,NC_047559.1,13702829,5.86875710812115,4.39074111576282e-09,0.000889552056257843\r\n", "817563,NC_047559.1,41205913,5.9624742480836,2.4844681633341e-09,0.000541334814760626\r\n", "880189,NC_047559.1,44191406,5.35003134621439,8.7938998655662e-08,0.00720229317625906\r\n", "934243,NC_047559.1,47000336,-5.41718434198197,6.05449093413689e-08,0.00563850981052605\r\n", "993302,NC_047559.1,50090321,-6.64621526435129,3.00725354774864e-11,1.57854060369563e-05\r\n", "1089838,NC_047559.1,54761361,-5.54042150746187,3.01744466452751e-08,0.00368342194971688\r\n", "1203367,NC_047560.1,4561420,7.79831465653462,6.27394139573504e-15,1.44903490034857e-08\r\n", "1203368,NC_047560.1,4561429,7.36822858834852,1.72910124667661e-13,1.99677355479751e-07\r\n" ] } ], "source": [ "#Check format: chr, pos, stat, pvals, fdrs\n", "!head ../Haws_04-DSS/DML/DML-pH-DSS.csv" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": false }, "outputs": [], "source": [ "%%bash\n", "\n", "#Print chr, start, end\n", "#Remove header\n", "#Save as BEDfile\n", "\n", "for f in ../Haws_04-DSS/DML/DML*csv\n", "do\n", " tr \",\" \"\\t\" < ${f} \\\n", " | awk '{print $2\"\\t\"$3\"\\t\"$3+2}' \\\n", " | tail -n+2 \\\n", " > ${f}.bed\n", "done" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "collapsed": false, "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "==> ../Haws_04-DSS/DML/DML-pH-DSS.csv.bed <==\r\n", "NC_047559.1\t520576\t520578\r\n", "NC_047559.1\t13702829\t13702831\r\n", "NC_047559.1\t41205913\t41205915\r\n", "NC_047559.1\t44191406\t44191408\r\n", "NC_047559.1\t47000336\t47000338\r\n", "NC_047559.1\t50090321\t50090323\r\n", "NC_047559.1\t54761361\t54761363\r\n", "NC_047560.1\t4561420\t4561422\r\n", "NC_047560.1\t4561429\t4561431\r\n", "NC_047560.1\t4561492\t4561494\r\n", "\r\n", "==> ../Haws_04-DSS/DML/DML-ploidy-DSS.csv.bed <==\r\n", "NC_047559.1\t3159595\t3159597\r\n", "NC_047559.1\t3159620\t3159622\r\n", "NC_047559.1\t22732543\t22732545\r\n", "NC_047559.1\t30739063\t30739065\r\n", "NC_047559.1\t43886947\t43886949\r\n", "NC_047559.1\t44191406\t44191408\r\n", "NC_047559.1\t44850822\t44850824\r\n", "NC_047559.1\t45984057\t45984059\r\n", "NC_047559.1\t47884062\t47884064\r\n", "NC_047559.1\t48771720\t48771722\r\n", "\r\n", "==> ../Haws_04-DSS/DML/DML-ploidypH-DSS.csv.bed <==\r\n", "NC_047559.1\t3022288\t3022290\r\n", "NC_047559.1\t6445629\t6445631\r\n", "NC_047559.1\t46813912\t46813914\r\n", "NC_047559.1\t47000336\t47000338\r\n", "NC_047560.1\t4561492\t4561494\r\n", "NC_047560.1\t40407111\t40407113\r\n", "NC_047560.1\t55499797\t55499799\r\n", "NC_047560.1\t59701557\t59701559\r\n", "NC_047561.1\t25296188\t25296190\r\n", "NC_047562.1\t19799003\t19799005\r\n" ] } ], "source": [ "!head ../Haws_04-DSS/DML/*bed" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "collapsed": true }, "outputs": [], "source": [ "%%bash\n", "\n", "#Move BEDfiles to current working directory\n", "mv ../Haws_04-DSS/DML/*bed ." ] }, { "cell_type": "markdown", "metadata": { "collapsed": true }, "source": [ "I imported the BEDfiles into [this IGV session]() to visualize them." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 3. Identify overlaps between DML lists" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 3a. `methylKit`" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 30\n", " 12\n" ] } ], "source": [ "#Count hypomethylated DML\n", "#Count hypermethylated DML\n", "!grep \"-\" DML-pH-25-Cov5.csv.bed | wc -l\n", "!grep -v \"-\" DML-pH-25-Cov5.csv.bed | wc -l" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 10\n", " 19\n" ] } ], "source": [ "#Count hypomethylated DML\n", "#Count hypermethylated DML\n", "!grep \"-\" DML-ploidy-25-Cov5.csv.bed | wc -l\n", "!grep -v \"-\" DML-ploidy-25-Cov5.csv.bed | wc -l" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "NC_047561.1\t40362698\t40362700\t-31.0344827586207\n", "NC_047567.1\t9520723\t9520725\t-45.7492354740061\n", " 2 DML-Cov5-Overlaps.bed\n" ] } ], "source": [ "#Find overlaps between pH- and ploidy-DML\n", "#Check head\n", "#Count number of overlapping DML\n", "!{bedtoolsDirectory}intersectBed \\\n", "-u \\\n", "-a DML-pH-25-Cov5.csv.bed \\\n", "-b DML-ploidy-25-Cov5.csv.bed \\\n", "> DML-Cov5-Overlaps.bed\n", "!head DML-Cov5-Overlaps.bed\n", "!wc -l DML-Cov5-Overlaps.bed" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 3b. `DSS`" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "NC_047559.1\t44191406\t44191408\n", "NC_047559.1\t50090321\t50090323\n", "NC_047560.1\t4561429\t4561431\n", "NC_047560.1\t4561492\t4561494\n", "NC_047560.1\t19948171\t19948173\n", "NC_047560.1\t40407111\t40407113\n", "NC_047562.1\t13501413\t13501415\n", "NC_047563.1\t33073757\t33073759\n", "NC_047565.1\t41071596\t41071598\n", "NC_047565.1\t43573693\t43573695\n", " 21 DML-DSS-pHploidy-Overlaps.bed\n" ] } ], "source": [ "#Find overlaps between pH- and ploidy-DML\n", "#Check head\n", "#Count number of overlapping DML\n", "!{bedtoolsDirectory}intersectBed \\\n", "-u \\\n", "-a DML-pH-DSS.csv.bed \\\n", "-b DML-ploidy-DSS.csv.bed \\\n", "> DML-DSS-pHploidy-Overlaps.bed\n", "!head DML-DSS-pHploidy-Overlaps.bed\n", "!wc -l DML-DSS-pHploidy-Overlaps.bed" ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "NC_047559.1\t47000336\t47000338\n", "NC_047560.1\t4561492\t4561494\n", "NC_047560.1\t40407111\t40407113\n", "NC_047560.1\t59701557\t59701559\n", "NC_047561.1\t25296188\t25296190\n", "NC_047564.1\t48296668\t48296670\n", "NC_047565.1\t41071596\t41071598\n", "NC_047567.1\t31560080\t31560082\n", "NC_047567.1\t31560110\t31560112\n", "NC_047567.1\t31560120\t31560122\n", " 11 DML-DSS-pHint-Overlaps.bed\n" ] } ], "source": [ "#Find overlaps between pH- and interaction-DML\n", "#Check head\n", "#Count number of overlapping DML\n", "!{bedtoolsDirectory}intersectBed \\\n", "-u \\\n", "-a DML-pH-DSS.csv.bed \\\n", "-b DML-ploidypH-DSS.csv.bed \\\n", "> DML-DSS-pHint-Overlaps.bed\n", "!head DML-DSS-pHint-Overlaps.bed\n", "!wc -l DML-DSS-pHint-Overlaps.bed" ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "NC_047560.1\t4561492\t4561494\n", "NC_047560.1\t40407111\t40407113\n", "NC_047563.1\t6395389\t6395391\n", "NC_047565.1\t41071596\t41071598\n", "NC_047566.1\t15683888\t15683890\n", "NC_047566.1\t15685674\t15685676\n", "NC_047567.1\t3077162\t3077164\n", "NC_047567.1\t31559112\t31559114\n", "NC_047567.1\t31559989\t31559991\n", "NC_047567.1\t31560004\t31560006\n", " 17 DML-DSS-ploidyint-Overlaps.bed\n" ] } ], "source": [ "#Find overlaps between ploidy- and interaction-DML\n", "#Check head\n", "#Count number of overlapping DML\n", "!{bedtoolsDirectory}intersectBed \\\n", "-u \\\n", "-a DML-ploidy-DSS.csv.bed \\\n", "-b DML-ploidypH-DSS.csv.bed \\\n", "> DML-DSS-ploidyint-Overlaps.bed\n", "!head DML-DSS-ploidyint-Overlaps.bed\n", "!wc -l DML-DSS-ploidyint-Overlaps.bed" ] }, { "cell_type": "code", "execution_count": 20, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "NC_047559.1\t44191406\t44191408\n", "NC_047559.1\t47000336\t47000338\n", "NC_047559.1\t50090321\t50090323\n", "NC_047560.1\t4561429\t4561431\n", "NC_047560.1\t4561492\t4561494\n", "NC_047560.1\t19948171\t19948173\n", "NC_047560.1\t40407111\t40407113\n", "NC_047560.1\t59701557\t59701559\n", "NC_047561.1\t25296188\t25296190\n", "NC_047562.1\t13501413\t13501415\n", " 25 DML-DSS-all-Overlaps.bed\n" ] } ], "source": [ "#Find overlaps between pH-, ploidy- and interaction-DML\n", "#Check head\n", "#Count number of overlapping DML\n", "!{bedtoolsDirectory}intersectBed \\\n", "-u \\\n", "-a DML-pH-DSS.csv.bed \\\n", "-b DML-ploidy-DSS.csv.bed DML-ploidypH-DSS.csv.bed \\\n", "> DML-DSS-all-Overlaps.bed\n", "!head DML-DSS-all-Overlaps.bed\n", "!wc -l DML-DSS-all-Overlaps.bed" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 3c. `methylKit` and `DSS`" ] }, { "cell_type": "code", "execution_count": 21, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "NC_047567.1\t16984837\t16984839\t42.8241335044929\n", " 1 DML-pH-method-Overlaps.bed\n" ] } ], "source": [ "#Find overlaps between pH DML lists\n", "#Check head\n", "#Count number of overlapping DML\n", "!{bedtoolsDirectory}intersectBed \\\n", "-u \\\n", "-a DML-pH-25-Cov5.csv.bed \\\n", "-b DML-pH-DSS.csv.bed \\\n", "> DML-pH-method-Overlaps.bed\n", "!head DML-pH-method-Overlaps.bed\n", "!wc -l DML-pH-method-Overlaps.bed" ] }, { "cell_type": "code", "execution_count": 22, "metadata": { "collapsed": false, "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "NC_047561.1\t40362698\t40362700\t29.4117647058824\n", "NC_047564.1\t23049738\t23049740\t29.2845880961766\n", "NC_047565.1\t14899959\t14899961\t32.5955265610438\n", " 3 DML-ploidy-method-Overlaps.bed\n" ] } ], "source": [ "#Find overlaps between ploidy DML lists\n", "#Check head\n", "#Count number of overlapping DML\n", "!{bedtoolsDirectory}intersectBed \\\n", "-u \\\n", "-a DML-ploidy-25-Cov5.csv.bed \\\n", "-b DML-ploidy-DSS.csv.bed \\\n", "> DML-ploidy-method-Overlaps.bed\n", "!head DML-ploidy-method-Overlaps.bed\n", "!wc -l DML-ploidy-method-Overlaps.bed" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 3d. pH-DSS and gonad" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 0 DML-pH-tissue-Overlaps.bed\r\n" ] } ], "source": [ "#Find overlaps between pH DML lists from different tissues\n", "#Check head\n", "#Count number of overlapping DML\n", "!{bedtoolsDirectory}intersectBed \\\n", "-u \\\n", "-a DML-pH-DSS.csv.bed \\\n", "-b /Users/yaamini/Documents/project-gigas-oa-meth/output/10_DML-characterization/DML-pH-50-Cov5-All.csv.bed \\\n", "> DML-pH-tissue-Overlaps.bed\n", "!head DML-pH-tissue-Overlaps.bed\n", "!wc -l DML-pH-tissue-Overlaps.bed" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 4. Characterize genomic locations of DML" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "I will look at overlaps between genome features and either pH- or ploidy-DML." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 4a. Gene" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### `methylKit`" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "NC_047559.1\t5294172\t5294174\t40.2560083594566\n", "NC_047559.1\t15801827\t15801829\t-45.6918238993711\n", "NC_047560.1\t65604843\t65604845\t49.4839101396478\n", "NC_047561.1\t7843128\t7843130\t-26.3157894736842\n", "NC_047561.1\t10147466\t10147468\t-30.4647676161919\n", "NC_047561.1\t10166213\t10166215\t-29.1507066437723\n", "NC_047561.1\t11783086\t11783088\t-44.1576698155646\n", "NC_047561.1\t12279075\t12279077\t-26.890756302521\n", "NC_047561.1\t16521359\t16521361\t28.8444735692442\n", "NC_047561.1\t19545407\t19545409\t-41.4451612903226\n", " 36 DML-pH-25-Cov5-Gene.bed\n" ] } ], "source": [ "#Find overlaps between DML and feature\n", "#Look at output\n", "#Count number of overlaps\n", "\n", "!{bedtoolsDirectory}intersectBed \\\n", "-u \\\n", "-a DML-pH-25-Cov5.csv.bed \\\n", "-b /Volumes/web-1/halfshell/genomic-databank/cgigas_uk_roslin_v1_gene.gff \\\n", "> DML-pH-25-Cov5-Gene.bed\n", "!head DML-pH-25-Cov5-Gene.bed\n", "!wc -l DML-pH-25-Cov5-Gene.bed" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "NC_047559.1\t5294172\t5294174\t40.2560083594566\tNC_047559.1\tGnomon\tgene\t5232741\t5314657\t.\t+\t.\tID=gene-LOC105323223;Dbxref=GeneID:105323223;Name=LOC105323223;gbkey=Gene;gene=LOC105323223;gene_biotype=protein_coding\r\n", "NC_047559.1\t15801827\t15801829\t-45.6918238993711\tNC_047559.1\tGnomon\tgene\t15770190\t15841767\t.\t+\t.\tID=gene-LOC105337506;Dbxref=GeneID:105337506;Name=LOC105337506;gbkey=Gene;gene=LOC105337506;gene_biotype=protein_coding\r\n", "NC_047560.1\t65604843\t65604845\t49.4839101396478\tNC_047560.1\tGnomon\tgene\t65589988\t65617374\t.\t-\t.\tID=gene-LOC105347233;Dbxref=GeneID:105347233;Name=LOC105347233;gbkey=Gene;gene=LOC105347233;gene_biotype=protein_coding\r\n", "NC_047561.1\t7843128\t7843130\t-26.3157894736842\tNC_047561.1\tGnomon\tgene\t7840428\t7854938\t.\t-\t.\tID=gene-LOC105319999;Dbxref=GeneID:105319999;Name=LOC105319999;gbkey=Gene;gene=LOC105319999;gene_biotype=protein_coding\r\n", "NC_047561.1\t10147466\t10147468\t-30.4647676161919\tNC_047561.1\tGnomon\tgene\t10126075\t10148544\t.\t+\t.\tID=gene-LOC105337008;Dbxref=GeneID:105337008;Name=LOC105337008;gbkey=Gene;gene=LOC105337008;gene_biotype=protein_coding\r\n", "NC_047561.1\t10166213\t10166215\t-29.1507066437723\tNC_047561.1\tGnomon\tgene\t10158687\t10181603\t.\t+\t.\tID=gene-LOC105337004;Dbxref=GeneID:105337004;Name=LOC105337004;gbkey=Gene;gene=LOC105337004;gene_biotype=protein_coding\r\n", "NC_047561.1\t11783086\t11783088\t-44.1576698155646\tNC_047561.1\tGnomon\tgene\t11750567\t11834596\t.\t-\t.\tID=gene-LOC105346952;Dbxref=GeneID:105346952;Name=LOC105346952;gbkey=Gene;gene=LOC105346952;gene_biotype=protein_coding\r\n", "NC_047561.1\t12279075\t12279077\t-26.890756302521\tNC_047561.1\tGnomon\tgene\t12278501\t12283617\t.\t+\t.\tID=gene-LOC105335665;Dbxref=GeneID:105335665;Name=LOC105335665;gbkey=Gene;gene=LOC105335665;gene_biotype=protein_coding\r\n", "NC_047561.1\t16521359\t16521361\t28.8444735692442\tNC_047561.1\tGnomon\tgene\t16519780\t16543976\t.\t-\t.\tID=gene-LOC105345244;Dbxref=GeneID:105345244;Name=LOC105345244;gbkey=Gene;gene=LOC105345244;gene_biotype=protein_coding\r\n", "NC_047561.1\t19545407\t19545409\t-41.4451612903226\tNC_047561.1\tGnomon\tgene\t19544914\t19552612\t.\t-\t.\tID=gene-LOC105335660;Dbxref=GeneID:105335660;Name=LOC105335660;gbkey=Gene;gene=LOC105335660;gene_biotype=protein_coding\r\n" ] } ], "source": [ "#Find overlaps between DML and genes\n", "#Include original entry from gene GFF for each overlap, which will be used in downstream enrichment analyses (wb)\n", "#Look at output. Do not count overlaps because there are likely redundant entries\n", "\n", "!{bedtoolsDirectory}intersectBed \\\n", "-wb \\\n", "-a DML-pH-25-Cov5.csv.bed \\\n", "-b /Volumes/web-1/halfshell/genomic-databank/cgigas_uk_roslin_v1_gene.gff \\\n", "> DML-pH-25-Cov5-Gene-wb.bed\n", "!head DML-pH-25-Cov5-Gene-wb.bed" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "NC_047559.1\t12799610\t12799612\t27.7297297297297\n", "NC_047559.1\t22468723\t22468725\t28.4117647058823\n", "NC_047559.1\t44801744\t44801746\t34.0988480118915\n", "NC_047561.1\t9365798\t9365800\t34.0129358830146\n", "NC_047561.1\t28489237\t28489239\t-25.6018518518519\n", "NC_047561.1\t40362698\t40362700\t29.4117647058824\n", "NC_047563.1\t39926052\t39926054\t42.6872058194266\n", "NC_047564.1\t23049738\t23049740\t29.2845880961766\n", "NC_047564.1\t24426622\t24426624\t-30.0865800865801\n", "NC_047564.1\t25380708\t25380710\t-40.1414677276746\n", " 25 DML-ploidy-25-Cov5-Gene.bed\n" ] } ], "source": [ "!{bedtoolsDirectory}intersectBed \\\n", "-u \\\n", "-a DML-ploidy-25-Cov5.csv.bed \\\n", "-b /Volumes/web-1/halfshell/genomic-databank/cgigas_uk_roslin_v1_gene.gff \\\n", "> DML-ploidy-25-Cov5-Gene.bed\n", "!head DML-ploidy-25-Cov5-Gene.bed\n", "!wc -l DML-ploidy-25-Cov5-Gene.bed" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "NC_047559.1\t12799610\t12799612\t27.7297297297297\tNC_047559.1\tGnomon\tgene\t12794201\t12802669\t.\t-\t.\tID=gene-LOC105348590;Dbxref=GeneID:105348590;Name=LOC105348590;gbkey=Gene;gene=LOC105348590;gene_biotype=protein_coding\r\n", "NC_047559.1\t22468723\t22468725\t28.4117647058823\tNC_047559.1\tGnomon\tgene\t22463416\t22483483\t.\t-\t.\tID=gene-LOC105324425;Dbxref=GeneID:105324425;Name=LOC105324425;gbkey=Gene;gene=LOC105324425;gene_biotype=protein_coding\r\n", "NC_047559.1\t44801744\t44801746\t34.0988480118915\tNC_047559.1\tGnomon\tgene\t44790976\t44818476\t.\t-\t.\tID=gene-LOC105319166;Dbxref=GeneID:105319166;Name=LOC105319166;gbkey=Gene;gene=LOC105319166;gene_biotype=protein_coding\r\n", "NC_047561.1\t9365798\t9365800\t34.0129358830146\tNC_047561.1\tGnomon\tgene\t9361078\t9371161\t.\t+\t.\tID=gene-LOC105331136;Dbxref=GeneID:105331136;Name=LOC105331136;gbkey=Gene;gene=LOC105331136;gene_biotype=protein_coding\r\n", "NC_047561.1\t28489237\t28489239\t-25.6018518518519\tNC_047561.1\tGnomon\tgene\t28464736\t28504826\t.\t+\t.\tID=gene-LOC105329306;Dbxref=GeneID:105329306;Name=LOC105329306;gbkey=Gene;gene=LOC105329306;gene_biotype=protein_coding\r\n", "NC_047561.1\t40362698\t40362700\t29.4117647058824\tNC_047561.1\tGnomon\tgene\t40358245\t40364606\t.\t+\t.\tID=gene-LOC105324542;Dbxref=GeneID:105324542;Name=LOC105324542;gbkey=Gene;gene=LOC105324542;gene_biotype=protein_coding\r\n", "NC_047563.1\t39926052\t39926054\t42.6872058194266\tNC_047563.1\tGnomon\tgene\t39899519\t39927142\t.\t-\t.\tID=gene-LOC105326839;Dbxref=GeneID:105326839;Name=LOC105326839;gbkey=Gene;gene=LOC105326839;gene_biotype=protein_coding\r\n", "NC_047564.1\t23049738\t23049740\t29.2845880961766\tNC_047564.1\tGnomon\tgene\t23026724\t23059519\t.\t+\t.\tID=gene-LOC105337762;Dbxref=GeneID:105337762;Name=LOC105337762;gbkey=Gene;gene=LOC105337762;gene_biotype=protein_coding\r\n", "NC_047564.1\t24426622\t24426624\t-30.0865800865801\tNC_047564.1\tGnomon\tgene\t24422805\t24429598\t.\t-\t.\tID=gene-LOC105328665;Dbxref=GeneID:105328665;Name=LOC105328665;gbkey=Gene;gene=LOC105328665;gene_biotype=protein_coding\r\n", "NC_047564.1\t25380708\t25380710\t-40.1414677276746\tNC_047564.1\tGnomon\tgene\t25378564\t25382046\t.\t+\t.\tID=gene-LOC105317478;Dbxref=GeneID:105317478;Name=LOC105317478;gbkey=Gene;gene=LOC105317478;gene_biotype=protein_coding\r\n" ] } ], "source": [ "!{bedtoolsDirectory}intersectBed \\\n", "-wb \\\n", "-a DML-ploidy-25-Cov5.csv.bed \\\n", "-b /Volumes/web-1/halfshell/genomic-databank/cgigas_uk_roslin_v1_gene.gff \\\n", "> DML-ploidy-25-Cov5-Gene-wb.bed\n", "!head DML-ploidy-25-Cov5-Gene-wb.bed" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "NC_047561.1\t40362698\t40362700\t-31.0344827586207\n", "NC_047567.1\t9520723\t9520725\t-45.7492354740061\n", " 2 DML-Cov5-Overlaps-Gene.bed\n" ] } ], "source": [ "!{bedtoolsDirectory}intersectBed \\\n", "-u \\\n", "-a DML-Cov5-Overlaps.bed \\\n", "-b /Volumes/web-1/halfshell/genomic-databank/cgigas_uk_roslin_v1_gene.gff \\\n", "> DML-Cov5-Overlaps-Gene.bed\n", "!head DML-Cov5-Overlaps-Gene.bed\n", "!wc -l DML-Cov5-Overlaps-Gene.bed" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### `DSS`" ] }, { "cell_type": "code", "execution_count": 51, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "NC_047559.1\t41205913\t41205915\n", "NC_047559.1\t44191406\t44191408\n", "NC_047559.1\t47000336\t47000338\n", "NC_047559.1\t50090321\t50090323\n", "NC_047560.1\t4561420\t4561422\n", "NC_047560.1\t4561429\t4561431\n", "NC_047560.1\t4561492\t4561494\n", "NC_047560.1\t4561508\t4561510\n", "NC_047560.1\t4565018\t4565020\n", "NC_047560.1\t19948171\t19948173\n", " 123 DML-pH-DSS-Gene.bed\n" ] } ], "source": [ "#Find overlaps between DML and feature\n", "#Look at output\n", "#Count number of overlaps\n", "\n", "!{bedtoolsDirectory}intersectBed \\\n", "-u \\\n", "-a DML-pH-DSS.csv.bed \\\n", "-b /Volumes/web-1/halfshell/genomic-databank/cgigas_uk_roslin_v1_gene.gff \\\n", "> DML-pH-DSS-Gene.bed\n", "!head DML-pH-DSS-Gene.bed\n", "!wc -l DML-pH-DSS-Gene.bed" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": false, "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "NC_047559.1\t41205913\t41205915\tNC_047559.1\tGnomon\tgene\t41204179\t41236908\t.\t-\t.\tID=gene-LOC105323174;Dbxref=GeneID:105323174;Name=LOC105323174;gbkey=Gene;gene=LOC105323174;gene_biotype=protein_coding\r\n", "NC_047559.1\t44191406\t44191408\tNC_047559.1\tGnomon\tgene\t44187569\t44214377\t.\t+\t.\tID=gene-LOC117687755;Dbxref=GeneID:117687755;Name=LOC117687755;gbkey=Gene;gene=LOC117687755;gene_biotype=protein_coding\r\n", "NC_047559.1\t47000336\t47000338\tNC_047559.1\tGnomon\tgene\t47000029\t47008715\t.\t-\t.\tID=gene-LOC105328838;Dbxref=GeneID:105328838;Name=LOC105328838;gbkey=Gene;gene=LOC105328838;gene_biotype=protein_coding\r\n", "NC_047559.1\t50090321\t50090323\tNC_047559.1\tGnomon\tgene\t50064798\t50106863\t.\t-\t.\tID=gene-LOC105320585;Dbxref=GeneID:105320585;Name=LOC105320585;gbkey=Gene;gene=LOC105320585;gene_biotype=protein_coding\r\n", "NC_047560.1\t4561420\t4561422\tNC_047560.1\tGnomon\tgene\t4523027\t4567751\t.\t-\t.\tID=gene-LOC117687305;Dbxref=GeneID:117687305;Name=LOC117687305;gbkey=Gene;gene=LOC117687305;gene_biotype=protein_coding\r\n", "NC_047560.1\t4561420\t4561422\tNC_047560.1\tGnomon\tgene\t4387457\t4700231\t.\t-\t.\tID=gene-LOC117687382;Dbxref=GeneID:117687382;Name=LOC117687382;gbkey=Gene;gene=LOC117687382;gene_biotype=protein_coding\r\n", "NC_047560.1\t4561429\t4561431\tNC_047560.1\tGnomon\tgene\t4523027\t4567751\t.\t-\t.\tID=gene-LOC117687305;Dbxref=GeneID:117687305;Name=LOC117687305;gbkey=Gene;gene=LOC117687305;gene_biotype=protein_coding\r\n", "NC_047560.1\t4561429\t4561431\tNC_047560.1\tGnomon\tgene\t4387457\t4700231\t.\t-\t.\tID=gene-LOC117687382;Dbxref=GeneID:117687382;Name=LOC117687382;gbkey=Gene;gene=LOC117687382;gene_biotype=protein_coding\r\n", "NC_047560.1\t4561492\t4561494\tNC_047560.1\tGnomon\tgene\t4523027\t4567751\t.\t-\t.\tID=gene-LOC117687305;Dbxref=GeneID:117687305;Name=LOC117687305;gbkey=Gene;gene=LOC117687305;gene_biotype=protein_coding\r\n", "NC_047560.1\t4561492\t4561494\tNC_047560.1\tGnomon\tgene\t4387457\t4700231\t.\t-\t.\tID=gene-LOC117687382;Dbxref=GeneID:117687382;Name=LOC117687382;gbkey=Gene;gene=LOC117687382;gene_biotype=protein_coding\r\n" ] } ], "source": [ "!{bedtoolsDirectory}intersectBed \\\n", "-wb \\\n", "-a DML-pH-DSS.csv.bed \\\n", "-b /Volumes/web-1/halfshell/genomic-databank/cgigas_uk_roslin_v1_gene.gff \\\n", "> DML-pH-DSS-Gene-wb.bed\n", "!head DML-pH-DSS-Gene-wb.bed" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 94\r\n" ] } ], "source": [ "#Isolate column with gene IDs\n", "#Translate ; and = to tabs\n", "#Isolate column with gene IDs\n", "#Sort and identify unique gene IDs\n", "#Count the number of unique gene IDs that contain DML\n", "\n", "!cut -f12 DML-pH-DSS-Gene-wb.bed \\\n", "| tr \";\" \"\\t\" \\\n", "| tr \"=\" \"\\t\" \\\n", "| cut -f6 \\\n", "| sort | uniq \\\n", "| wc -l" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "LOC105323174\r\n", "LOC117687755\r\n", "LOC105328838\r\n", "LOC105320585\r\n", "LOC117687305\r\n", "LOC117687382\r\n", "LOC117687305\r\n", "LOC117687382\r\n", "LOC117687305\r\n", "LOC117687382\r\n" ] } ], "source": [ "#Isolate gene ID information and save\n", "\n", "#Isolate column with gene IDs\n", "#Translate ; and = to tabs\n", "#Isolate column with gene IDs\n", "#Sort and identify unique gene IDs\n", "#Count the number of unique gene IDs that contain DML\n", "\n", "!cut -f12 DML-pH-DSS-Gene-wb.bed \\\n", "| tr \";\" \"\\t\" \\\n", "| tr \"=\" \"\\t\" \\\n", "| cut -f6 \\\n", "> geneID-pH-DML-overlap.tab\n", "!head geneID-pH-DML-overlap.tab" ] }, { "cell_type": "code", "execution_count": 52, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "NC_047559.1\t3159595\t3159597\n", "NC_047559.1\t3159620\t3159622\n", "NC_047559.1\t30739063\t30739065\n", "NC_047559.1\t43886947\t43886949\n", "NC_047559.1\t44191406\t44191408\n", "NC_047559.1\t45984057\t45984059\n", "NC_047559.1\t47884062\t47884064\n", "NC_047559.1\t48771720\t48771722\n", "NC_047559.1\t50090321\t50090323\n", "NC_047559.1\t53771128\t53771130\n", " 145 DML-ploidy-DSS-Gene.bed\n" ] } ], "source": [ "!{bedtoolsDirectory}intersectBed \\\n", "-u \\\n", "-a DML-ploidy-DSS.csv.bed \\\n", "-b /Volumes/web-1/halfshell/genomic-databank/cgigas_uk_roslin_v1_gene.gff \\\n", "> DML-ploidy-DSS-Gene.bed\n", "!head DML-ploidy-DSS-Gene.bed\n", "!wc -l DML-ploidy-DSS-Gene.bed" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": false, "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "NC_047559.1\t3159595\t3159597\tNC_047559.1\tGnomon\tgene\t3158575\t3169070\t.\t+\t.\tID=gene-LOC105342725;Dbxref=GeneID:105342725;Name=LOC105342725;gbkey=Gene;gene=LOC105342725;gene_biotype=protein_coding\r\n", "NC_047559.1\t3159620\t3159622\tNC_047559.1\tGnomon\tgene\t3158575\t3169070\t.\t+\t.\tID=gene-LOC105342725;Dbxref=GeneID:105342725;Name=LOC105342725;gbkey=Gene;gene=LOC105342725;gene_biotype=protein_coding\r\n", "NC_047559.1\t30739063\t30739065\tNC_047559.1\tGnomon\tgene\t30728582\t30741948\t.\t-\t.\tID=gene-LOC105344651;Dbxref=GeneID:105344651;Name=LOC105344651;gbkey=Gene;gene=LOC105344651;gene_biotype=protein_coding\r\n", "NC_047559.1\t43886947\t43886949\tNC_047559.1\tGnomon\tgene\t43877299\t43899559\t.\t+\t.\tID=gene-LOC105339780;Dbxref=GeneID:105339780;Name=LOC105339780;gbkey=Gene;gene=LOC105339780;gene_biotype=protein_coding\r\n", "NC_047559.1\t44191406\t44191408\tNC_047559.1\tGnomon\tgene\t44187569\t44214377\t.\t+\t.\tID=gene-LOC117687755;Dbxref=GeneID:117687755;Name=LOC117687755;gbkey=Gene;gene=LOC117687755;gene_biotype=protein_coding\r\n", "NC_047559.1\t45984057\t45984059\tNC_047559.1\tGnomon\tgene\t45976550\t45993139\t.\t-\t.\tID=gene-LOC105333378;Dbxref=GeneID:105333378;Name=LOC105333378;gbkey=Gene;gene=LOC105333378;gene_biotype=protein_coding\r\n", "NC_047559.1\t47884062\t47884064\tNC_047559.1\tGnomon\tgene\t47880293\t47888292\t.\t+\t.\tID=gene-LOC117684625;Dbxref=GeneID:117684625;Name=LOC117684625;gbkey=Gene;gene=LOC117684625;gene_biotype=protein_coding\r\n", "NC_047559.1\t48771720\t48771722\tNC_047559.1\tGnomon\tgene\t48767452\t48775109\t.\t+\t.\tID=gene-LOC105341853;Dbxref=GeneID:105341853;Name=LOC105341853;gbkey=Gene;gene=LOC105341853;gene_biotype=protein_coding\r\n", "NC_047559.1\t50090321\t50090323\tNC_047559.1\tGnomon\tgene\t50064798\t50106863\t.\t-\t.\tID=gene-LOC105320585;Dbxref=GeneID:105320585;Name=LOC105320585;gbkey=Gene;gene=LOC105320585;gene_biotype=protein_coding\r\n", "NC_047559.1\t53771128\t53771130\tNC_047559.1\tGnomon\tgene\t53755998\t53781200\t.\t+\t.\tID=gene-LOC105341160;Dbxref=GeneID:105341160;Name=LOC105341160;gbkey=Gene;gene=LOC105341160;gene_biotype=protein_coding\r\n" ] } ], "source": [ "!{bedtoolsDirectory}intersectBed \\\n", "-wb \\\n", "-a DML-ploidy-DSS.csv.bed \\\n", "-b /Volumes/web-1/halfshell/genomic-databank/cgigas_uk_roslin_v1_gene.gff \\\n", "> DML-ploidy-DSS-Gene-wb.bed\n", "!head DML-ploidy-DSS-Gene-wb.bed" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 109\r\n" ] } ], "source": [ "#Isolate column with gene IDs\n", "#Translate ; and = to tabs\n", "#Isolate column with gene IDs\n", "#Sort and identify unique gene IDs\n", "#Count the number of unique gene IDs that contain DML\n", "\n", "!cut -f12 DML-ploidy-DSS-Gene-wb.bed \\\n", "| tr \";\" \"\\t\" \\\n", "| tr \"=\" \"\\t\" \\\n", "| cut -f6 \\\n", "| sort | uniq \\\n", "| wc -l" ] }, { "cell_type": "code", "execution_count": 22, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "LOC105342725\r\n", "LOC105342725\r\n", "LOC105344651\r\n", "LOC105339780\r\n", "LOC117687755\r\n", "LOC105333378\r\n", "LOC117684625\r\n", "LOC105341853\r\n", "LOC105320585\r\n", "LOC105341160\r\n" ] } ], "source": [ "#Isolate gene ID information and save\n", "\n", "#Isolate column with gene IDs\n", "#Translate ; and = to tabs\n", "#Isolate column with gene IDs\n", "#Sort and identify unique gene IDs\n", "#Count the number of unique gene IDs that contain DML\n", "\n", "!cut -f12 DML-ploidy-DSS-Gene-wb.bed \\\n", "| tr \";\" \"\\t\" \\\n", "| tr \"=\" \"\\t\" \\\n", "| cut -f6 \\\n", "> geneID-ploidy-DML-overlap.tab\n", "!head geneID-ploidy-DML-overlap.tab" ] }, { "cell_type": "code", "execution_count": 53, "metadata": { "collapsed": false, "scrolled": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "NC_047559.1\t3022288\t3022290\n", "NC_047559.1\t6445629\t6445631\n", "NC_047559.1\t46813912\t46813914\n", "NC_047559.1\t47000336\t47000338\n", "NC_047560.1\t4561492\t4561494\n", "NC_047560.1\t55499797\t55499799\n", "NC_047560.1\t59701557\t59701559\n", "NC_047561.1\t25296188\t25296190\n", "NC_047562.1\t19799003\t19799005\n", "NC_047563.1\t6395389\t6395391\n", " 48 DML-ploidypH-DSS-Gene.bed\n" ] } ], "source": [ "!{bedtoolsDirectory}intersectBed \\\n", "-u \\\n", "-a DML-ploidypH-DSS.csv.bed \\\n", "-b /Volumes/web-1/halfshell/genomic-databank/cgigas_uk_roslin_v1_gene.gff \\\n", "> DML-ploidypH-DSS-Gene.bed\n", "!head DML-ploidypH-DSS-Gene.bed\n", "!wc -l DML-ploidypH-DSS-Gene.bed" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "collapsed": false, "scrolled": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "NC_047559.1\t3022288\t3022290\tNC_047559.1\tGnomon\tgene\t3020010\t3024608\t.\t-\t.\tID=gene-LOC105337361;Dbxref=GeneID:105337361;Name=LOC105337361;gbkey=Gene;gene=LOC105337361;gene_biotype=protein_coding\r\n", "NC_047559.1\t6445629\t6445631\tNC_047559.1\tGnomon\tgene\t6434298\t6448829\t.\t+\t.\tID=gene-LOC105342013;Dbxref=GeneID:105342013;Name=LOC105342013;gbkey=Gene;gene=LOC105342013;gene_biotype=protein_coding\r\n", "NC_047559.1\t46813912\t46813914\tNC_047559.1\tGnomon\tgene\t46813603\t46821281\t.\t-\t.\tID=gene-LOC105330521;Dbxref=GeneID:105330521;Name=LOC105330521;gbkey=Gene;gene=LOC105330521;gene_biotype=protein_coding\r\n", "NC_047559.1\t46813912\t46813914\tNC_047559.1\tGnomon\tgene\t46808865\t46814128\t.\t+\t.\tID=gene-LOC105330522;Dbxref=GeneID:105330522;Name=LOC105330522;gbkey=Gene;gene=LOC105330522;gene_biotype=protein_coding\r\n", "NC_047559.1\t47000336\t47000338\tNC_047559.1\tGnomon\tgene\t47000029\t47008715\t.\t-\t.\tID=gene-LOC105328838;Dbxref=GeneID:105328838;Name=LOC105328838;gbkey=Gene;gene=LOC105328838;gene_biotype=protein_coding\r\n", "NC_047560.1\t4561492\t4561494\tNC_047560.1\tGnomon\tgene\t4523027\t4567751\t.\t-\t.\tID=gene-LOC117687305;Dbxref=GeneID:117687305;Name=LOC117687305;gbkey=Gene;gene=LOC117687305;gene_biotype=protein_coding\r\n", "NC_047560.1\t4561492\t4561494\tNC_047560.1\tGnomon\tgene\t4387457\t4700231\t.\t-\t.\tID=gene-LOC117687382;Dbxref=GeneID:117687382;Name=LOC117687382;gbkey=Gene;gene=LOC117687382;gene_biotype=protein_coding\r\n", "NC_047560.1\t55499797\t55499799\tNC_047560.1\tGnomon\tgene\t55225485\t55583389\t.\t+\t.\tID=gene-LOC105317430;Dbxref=GeneID:105317430;Name=LOC105317430;gbkey=Gene;gene=LOC105317430;gene_biotype=protein_coding\r\n", "NC_047560.1\t59701557\t59701559\tNC_047560.1\tGnomon\tgene\t59603941\t59706112\t.\t-\t.\tID=gene-LOC105348685;Dbxref=GeneID:105348685;Name=LOC105348685;gbkey=Gene;gene=LOC105348685;gene_biotype=protein_coding\r\n", "NC_047561.1\t25296188\t25296190\tNC_047561.1\tGnomon\tgene\t25296061\t25301591\t.\t-\t.\tID=gene-LOC105345208;Dbxref=GeneID:105345208;Name=LOC105345208;gbkey=Gene;gene=LOC105345208;gene_biotype=protein_coding\r\n" ] } ], "source": [ "!{bedtoolsDirectory}intersectBed \\\n", "-wb \\\n", "-a DML-ploidypH-DSS.csv.bed \\\n", "-b /Volumes/web-1/halfshell/genomic-databank/cgigas_uk_roslin_v1_gene.gff \\\n", "> DML-ploidypH-DSS-Gene-wb.bed\n", "!head DML-ploidypH-DSS-Gene-wb.bed" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 29\r\n" ] } ], "source": [ "#Isolate column with gene IDs\n", "#Translate ; and = to tabs\n", "#Isolate column with gene IDs\n", "#Sort and identify unique gene IDs\n", "#Count the number of unique gene IDs that contain DML\n", "\n", "!cut -f12 DML-ploidypH-DSS-Gene-wb.bed \\\n", "| tr \";\" \"\\t\" \\\n", "| tr \"=\" \"\\t\" \\\n", "| cut -f6 \\\n", "| sort | uniq \\\n", "| wc -l" ] }, { "cell_type": "code", "execution_count": 21, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "LOC105337361\r\n", "LOC105342013\r\n", "LOC105330521\r\n", "LOC105330522\r\n", "LOC105328838\r\n", "LOC117687305\r\n", "LOC117687382\r\n", "LOC105317430\r\n", "LOC105348685\r\n", "LOC105345208\r\n" ] } ], "source": [ "#Isolate gene ID information and save\n", "\n", "#Isolate column with gene IDs\n", "#Translate ; and = to tabs\n", "#Isolate column with gene IDs\n", "#Sort and identify unique gene IDs\n", "#Count the number of unique gene IDs that contain DML\n", "\n", "!cut -f12 DML-ploidypH-DSS-Gene-wb.bed \\\n", "| tr \";\" \"\\t\" \\\n", "| tr \"=\" \"\\t\" \\\n", "| cut -f6 \\\n", "> geneID-ploidypH-DML-overlap.tab\n", "!head geneID-ploidypH-DML-overlap.tab" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 4b. Exon UTR" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### `methylKit`" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false, "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "NC_047561.1\t10147466\t10147468\t-30.4647676161919\n", "NC_047563.1\t11760749\t11760751\t-34.033180778032\n", "NC_047564.1\t43801732\t43801734\t-26.7326732673267\n", "NC_047565.1\t4762558\t4762560\t-26.7316669176329\n", "NC_047566.1\t9548317\t9548319\t-34.3623481781376\n", " 5 DML-pH-25-Cov5-exonUTR.bed\n" ] } ], "source": [ "!{bedtoolsDirectory}intersectBed \\\n", "-u \\\n", "-a DML-pH-25-Cov5.csv.bed \\\n", "-b /Volumes/web-1/halfshell/genomic-databank/cgigas_uk_roslin_v1_exonUTR.gff \\\n", "> DML-pH-25-Cov5-exonUTR.bed\n", "!head DML-pH-25-Cov5-exonUTR.bed\n", "!wc -l DML-pH-25-Cov5-exonUTR.bed" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": false, "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 0 DML-ploidy-25-Cov5-exonUTR.bed\r\n" ] } ], "source": [ "!{bedtoolsDirectory}intersectBed \\\n", "-u \\\n", "-a DML-ploidy-25-Cov5.csv.bed \\\n", "-b /Volumes/web-1/halfshell/genomic-databank/cgigas_uk_roslin_v1_exonUTR.gff \\\n", "> DML-ploidy-25-Cov5-exonUTR.bed\n", "!head DML-ploidy-25-Cov5-exonUTR.bed\n", "!wc -l DML-ploidy-25-Cov5-exonUTR.bed" ] }, { "cell_type": "code", "execution_count": 20, "metadata": { "collapsed": true }, "outputs": [], "source": [ "#Remove empty file\n", "!rm DML-ploidy-25-Cov5-exonUTR.bed" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### `DSS`" ] }, { "cell_type": "code", "execution_count": 28, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "NC_047560.1\t19948171\t19948173\n", "NC_047564.1\t11125924\t11125926\n", "NC_047567.1\t3262397\t3262399\n", "NC_047567.1\t4830649\t4830651\n", " 4 DML-pH-DSS-exonUTR.bed\n" ] } ], "source": [ "!{bedtoolsDirectory}intersectBed \\\n", "-u \\\n", "-a DML-pH-DSS.csv.bed \\\n", "-b /Volumes/web-1/halfshell/genomic-databank/cgigas_uk_roslin_v1_exonUTR.gff \\\n", "> DML-pH-DSS-exonUTR.bed\n", "!head DML-pH-DSS-exonUTR.bed\n", "!wc -l DML-pH-DSS-exonUTR.bed" ] }, { "cell_type": "code", "execution_count": 29, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "NC_047560.1\t19948171\t19948173\n", "NC_047561.1\t50259518\t50259520\n", "NC_047564.1\t19499502\t19499504\n", "NC_047564.1\t32019304\t32019306\n", "NC_047566.1\t1757461\t1757463\n", "NC_047566.1\t15683888\t15683890\n", "NC_047566.1\t15685674\t15685676\n", "NC_047566.1\t15686778\t15686780\n", " 8 DML-ploidy-DSS-exonUTR.bed\n" ] } ], "source": [ "!{bedtoolsDirectory}intersectBed \\\n", "-u \\\n", "-a DML-ploidy-DSS.csv.bed \\\n", "-b /Volumes/web-1/halfshell/genomic-databank/cgigas_uk_roslin_v1_exonUTR.gff \\\n", "> DML-ploidy-DSS-exonUTR.bed\n", "!head DML-ploidy-DSS-exonUTR.bed\n", "!wc -l DML-ploidy-DSS-exonUTR.bed" ] }, { "cell_type": "code", "execution_count": 30, "metadata": { "collapsed": false, "scrolled": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "NC_047566.1\t15683888\t15683890\n", "NC_047566.1\t15685674\t15685676\n", "NC_047567.1\t23555225\t23555227\n", " 3 DML-ploidypH-DSS-exonUTR.bed\n" ] } ], "source": [ "!{bedtoolsDirectory}intersectBed \\\n", "-u \\\n", "-a DML-ploidypH-DSS.csv.bed \\\n", "-b /Volumes/web-1/halfshell/genomic-databank/cgigas_uk_roslin_v1_exonUTR.gff \\\n", "> DML-ploidypH-DSS-exonUTR.bed\n", "!head DML-ploidypH-DSS-exonUTR.bed\n", "!wc -l DML-ploidypH-DSS-exonUTR.bed" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 4c. CDS" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### `methylKit`" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": false, "scrolled": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "NC_047561.1\t10166213\t10166215\t-29.1507066437723\n", "NC_047561.1\t11783086\t11783088\t-44.1576698155646\n", "NC_047561.1\t39008886\t39008888\t-35.8974358974359\n", "NC_047561.1\t40362698\t40362700\t-31.0344827586207\n", "NC_047567.1\t15896903\t15896905\t-28.3455405508507\n", "NC_047567.1\t22295946\t22295948\t-26.9118276501641\n", "NC_047568.1\t46593770\t46593772\t-26.1194029850746\n", " 7 DML-pH-25-Cov5-CDS.bed\n" ] } ], "source": [ "!{bedtoolsDirectory}intersectBed \\\n", "-u \\\n", "-a DML-pH-25-Cov5.csv.bed \\\n", "-b /Volumes/web-1/halfshell/genomic-databank/cgigas_uk_roslin_v1_CDS.gff \\\n", "> DML-pH-25-Cov5-CDS.bed\n", "!head DML-pH-25-Cov5-CDS.bed\n", "!wc -l DML-pH-25-Cov5-CDS.bed" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "collapsed": false, "scrolled": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "NC_047559.1\t12799610\t12799612\t27.7297297297297\n", "NC_047559.1\t22468723\t22468725\t28.4117647058823\n", "NC_047561.1\t40362698\t40362700\t29.4117647058824\n", "NC_047564.1\t23049738\t23049740\t29.2845880961766\n", "NC_047564.1\t24426622\t24426624\t-30.0865800865801\n", "NC_047565.1\t11970715\t11970717\t46.6938636749958\n", "NC_047566.1\t46447078\t46447080\t37.3155447746109\n", " 7 DML-ploidy-25-Cov5-CDS.bed\n" ] } ], "source": [ "!{bedtoolsDirectory}intersectBed \\\n", "-u \\\n", "-a DML-ploidy-25-Cov5.csv.bed \\\n", "-b /Volumes/web-1/halfshell/genomic-databank/cgigas_uk_roslin_v1_CDS.gff \\\n", "> DML-ploidy-25-Cov5-CDS.bed\n", "!head DML-ploidy-25-Cov5-CDS.bed\n", "!wc -l DML-ploidy-25-Cov5-CDS.bed" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "NC_047561.1\t40362698\t40362700\t-31.0344827586207\n", " 1 DML-Cov5-Overlaps-CDS.bed\n" ] } ], "source": [ "!{bedtoolsDirectory}intersectBed \\\n", "-u \\\n", "-a DML-Cov5-Overlaps.bed \\\n", "-b /Volumes/web-1/halfshell/genomic-databank/cgigas_uk_roslin_v1_CDS.gff \\\n", "> DML-Cov5-Overlaps-CDS.bed\n", "!head DML-Cov5-Overlaps-CDS.bed\n", "!wc -l DML-Cov5-Overlaps-CDS.bed" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### `DSS`" ] }, { "cell_type": "code", "execution_count": 31, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "NC_047559.1\t41205913\t41205915\n", "NC_047559.1\t47000336\t47000338\n", "NC_047560.1\t4565018\t4565020\n", "NC_047561.1\t20199446\t20199448\n", "NC_047561.1\t22518848\t22518850\n", "NC_047561.1\t25296188\t25296190\n", "NC_047562.1\t38289332\t38289334\n", "NC_047563.1\t44904312\t44904314\n", "NC_047564.1\t22429851\t22429853\n", "NC_047565.1\t30437934\t30437936\n", " 15 DML-pH-DSS-CDS.bed\n" ] } ], "source": [ "!{bedtoolsDirectory}intersectBed \\\n", "-u \\\n", "-a DML-pH-DSS.csv.bed \\\n", "-b /Volumes/web-1/halfshell/genomic-databank/cgigas_uk_roslin_v1_CDS.gff \\\n", "> DML-pH-DSS-CDS.bed\n", "!head DML-pH-DSS-CDS.bed\n", "!wc -l DML-pH-DSS-CDS.bed" ] }, { "cell_type": "code", "execution_count": 32, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "NC_047559.1\t3159595\t3159597\n", "NC_047559.1\t3159620\t3159622\n", "NC_047559.1\t47884062\t47884064\n", "NC_047559.1\t48771720\t48771722\n", "NC_047560.1\t33240715\t33240717\n", "NC_047561.1\t2478679\t2478681\n", "NC_047561.1\t10264269\t10264271\n", "NC_047561.1\t36235176\t36235178\n", "NC_047561.1\t40362698\t40362700\n", "NC_047562.1\t3686118\t3686120\n", " 26 DML-ploidy-DSS-CDS.bed\n" ] } ], "source": [ "!{bedtoolsDirectory}intersectBed \\\n", "-u \\\n", "-a DML-ploidy-DSS.csv.bed \\\n", "-b /Volumes/web-1/halfshell/genomic-databank/cgigas_uk_roslin_v1_CDS.gff \\\n", "> DML-ploidy-DSS-CDS.bed\n", "!head DML-ploidy-DSS-CDS.bed\n", "!wc -l DML-ploidy-DSS-CDS.bed" ] }, { "cell_type": "code", "execution_count": 33, "metadata": { "collapsed": false, "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "NC_047559.1\t47000336\t47000338\n", "NC_047561.1\t25296188\t25296190\n", "NC_047563.1\t20372689\t20372691\n", "NC_047565.1\t38388780\t38388782\n", "NC_047567.1\t14572633\t14572635\n", "NC_047568.1\t52554330\t52554332\n", " 6 DML-ploidypH-DSS-CDS.bed\n" ] } ], "source": [ "!{bedtoolsDirectory}intersectBed \\\n", "-u \\\n", "-a DML-ploidypH-DSS.csv.bed \\\n", "-b /Volumes/web-1/halfshell/genomic-databank/cgigas_uk_roslin_v1_CDS.gff \\\n", "> DML-ploidypH-DSS-CDS.bed\n", "!head DML-ploidypH-DSS-CDS.bed\n", "!wc -l DML-ploidypH-DSS-CDS.bed" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 4d. Intron" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### `methylKit`" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": false, "scrolled": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "NC_047559.1\t5294172\t5294174\t40.2560083594566\n", "NC_047559.1\t15801827\t15801829\t-45.6918238993711\n", "NC_047560.1\t65604843\t65604845\t49.4839101396478\n", "NC_047561.1\t7843128\t7843130\t-26.3157894736842\n", "NC_047561.1\t12279075\t12279077\t-26.890756302521\n", "NC_047561.1\t16521359\t16521361\t28.8444735692442\n", "NC_047561.1\t19545407\t19545409\t-41.4451612903226\n", "NC_047561.1\t31290734\t31290736\t-30.2791262135922\n", "NC_047561.1\t46808693\t46808695\t-27.2727272727273\n", "NC_047563.1\t66794619\t66794621\t-29.651103651714\n", " 24 DML-pH-25-Cov5-intron.bed\n" ] } ], "source": [ "!{bedtoolsDirectory}intersectBed \\\n", "-u \\\n", "-a DML-pH-25-Cov5.csv.bed \\\n", "-b /Volumes/web-1/halfshell/genomic-databank/cgigas_uk_roslin_v1_intron.bed \\\n", "> DML-pH-25-Cov5-intron.bed\n", "!head DML-pH-25-Cov5-intron.bed\n", "!wc -l DML-pH-25-Cov5-intron.bed" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "collapsed": false, "scrolled": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "NC_047559.1\t44801744\t44801746\t34.0988480118915\n", "NC_047561.1\t9365798\t9365800\t34.0129358830146\n", "NC_047561.1\t28489237\t28489239\t-25.6018518518519\n", "NC_047563.1\t39926052\t39926054\t42.6872058194266\n", "NC_047564.1\t25380708\t25380710\t-40.1414677276746\n", "NC_047565.1\t10523508\t10523510\t38.0689469431726\n", "NC_047565.1\t13203393\t13203395\t41.1725955204216\n", "NC_047565.1\t14899959\t14899961\t32.5955265610438\n", "NC_047566.1\t27129225\t27129227\t37.7269975786925\n", "NC_047566.1\t35988011\t35988013\t-53.0531425651507\n", " 18 DML-ploidy-25-Cov5-intron.bed\n" ] } ], "source": [ "!{bedtoolsDirectory}intersectBed \\\n", "-u \\\n", "-a DML-ploidy-25-Cov5.csv.bed \\\n", "-b /Volumes/web-1/halfshell/genomic-databank/cgigas_uk_roslin_v1_intron.bed \\\n", "> DML-ploidy-25-Cov5-intron.bed\n", "!head DML-ploidy-25-Cov5-intron.bed\n", "!wc -l DML-ploidy-25-Cov5-intron.bed" ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "collapsed": false, "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "NC_047567.1\t9520723\t9520725\t-45.7492354740061\n", " 1 DML-Cov5-Overlaps-intron.bed\n" ] } ], "source": [ "!{bedtoolsDirectory}intersectBed \\\n", "-u \\\n", "-a DML-Cov5-Overlaps.bed \\\n", "-b /Volumes/web-1/halfshell/genomic-databank/cgigas_uk_roslin_v1_intron.bed \\\n", "> DML-Cov5-Overlaps-intron.bed\n", "!head DML-Cov5-Overlaps-intron.bed\n", "!wc -l DML-Cov5-Overlaps-intron.bed" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### `DSS`" ] }, { "cell_type": "code", "execution_count": 57, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "NC_047559.1\t44191406\t44191408\n", "NC_047559.1\t50090321\t50090323\n", "NC_047560.1\t4561420\t4561422\n", "NC_047560.1\t4561429\t4561431\n", "NC_047560.1\t4561492\t4561494\n", "NC_047560.1\t4561508\t4561510\n", "NC_047560.1\t33183588\t33183590\n", "NC_047560.1\t52833401\t52833403\n", "NC_047560.1\t52833440\t52833442\n", "NC_047560.1\t52833592\t52833594\n", " 104 DML-pH-DSS-intron.bed\n" ] } ], "source": [ "!{bedtoolsDirectory}intersectBed \\\n", "-u \\\n", "-a DML-pH-DSS.csv.bed \\\n", "-b /Volumes/web-1/halfshell/genomic-databank/cgigas_uk_roslin_v1_intron.bed \\\n", "> DML-pH-DSS-intron.bed\n", "!head DML-pH-DSS-intron.bed\n", "!wc -l DML-pH-DSS-intron.bed" ] }, { "cell_type": "code", "execution_count": 58, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "NC_047559.1\t30739063\t30739065\n", "NC_047559.1\t43886947\t43886949\n", "NC_047559.1\t44191406\t44191408\n", "NC_047559.1\t45984057\t45984059\n", "NC_047559.1\t50090321\t50090323\n", "NC_047559.1\t53771128\t53771130\n", "NC_047559.1\t53948058\t53948060\n", "NC_047560.1\t599422\t599424\n", "NC_047560.1\t599436\t599438\n", "NC_047560.1\t599438\t599440\n", " 114 DML-ploidy-DSS-intron.bed\n" ] } ], "source": [ "!{bedtoolsDirectory}intersectBed \\\n", "-u \\\n", "-a DML-ploidy-DSS.csv.bed \\\n", "-b /Volumes/web-1/halfshell/genomic-databank/cgigas_uk_roslin_v1_intron.bed \\\n", "> DML-ploidy-DSS-intron.bed\n", "!head DML-ploidy-DSS-intron.bed\n", "!wc -l DML-ploidy-DSS-intron.bed" ] }, { "cell_type": "code", "execution_count": 59, "metadata": { "collapsed": false, "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "NC_047559.1\t3022288\t3022290\n", "NC_047559.1\t6445629\t6445631\n", "NC_047559.1\t46813912\t46813914\n", "NC_047560.1\t4561492\t4561494\n", "NC_047560.1\t55499797\t55499799\n", "NC_047560.1\t59701557\t59701559\n", "NC_047562.1\t19799003\t19799005\n", "NC_047563.1\t6395389\t6395391\n", "NC_047563.1\t9081152\t9081154\n", "NC_047563.1\t28822878\t28822880\n", " 41 DML-ploidypH-DSS-intron.bed\n" ] } ], "source": [ "!{bedtoolsDirectory}intersectBed \\\n", "-u \\\n", "-a DML-ploidypH-DSS.csv.bed \\\n", "-b /Volumes/web-1/halfshell/genomic-databank/cgigas_uk_roslin_v1_intron.bed \\\n", "> DML-ploidypH-DSS-intron.bed\n", "!head DML-ploidypH-DSS-intron.bed\n", "!wc -l DML-ploidypH-DSS-intron.bed" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 4e. Upstream flanks" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": false, "scrolled": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 0 DML-pH-25-Cov5-upstream.bed\r\n" ] } ], "source": [ "!{bedtoolsDirectory}intersectBed \\\n", "-u \\\n", "-a DML-pH-25-Cov5.csv.bed \\\n", "-b /Volumes/web-1/halfshell/genomic-databank/cgigas_uk_roslin_v1_upstream.gff \\\n", "> DML-pH-25-Cov5-upstream.bed\n", "!head DML-pH-25-Cov5-upstream.bed\n", "!wc -l DML-pH-25-Cov5-upstream.bed" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "collapsed": false, "scrolled": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 0 DML-ploidy-25-Cov5-upstream.bed\r\n" ] } ], "source": [ "!{bedtoolsDirectory}intersectBed \\\n", "-u \\\n", "-a DML-ploidy-25-Cov5.csv.bed \\\n", "-b /Volumes/web-1/halfshell/genomic-databank/cgigas_uk_roslin_v1_upstream.gff \\\n", "> DML-ploidy-25-Cov5-upstream.bed\n", "!head DML-ploidy-25-Cov5-upstream.bed\n", "!wc -l DML-ploidy-25-Cov5-upstream.bed" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "collapsed": false }, "outputs": [], "source": [ "#Remove empty files\n", "!rm *upstream.bed" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### `DSS`" ] }, { "cell_type": "code", "execution_count": 34, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "NC_047563.1\t40936642\t40936644\n", "NC_047565.1\t61504990\t61504992\n", " 2 DML-pH-DSS-upstream.bed\n" ] } ], "source": [ "!{bedtoolsDirectory}intersectBed \\\n", "-u \\\n", "-a DML-pH-DSS.csv.bed \\\n", "-b /Volumes/web-1/halfshell/genomic-databank/cgigas_uk_roslin_v1_upstream.gff \\\n", "> DML-pH-DSS-upstream.bed\n", "!head DML-pH-DSS-upstream.bed\n", "!wc -l DML-pH-DSS-upstream.bed" ] }, { "cell_type": "code", "execution_count": 35, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "NC_047561.1\t26123058\t26123060\n", "NC_047567.1\t19065854\t19065856\n", "NC_047567.1\t19065864\t19065866\n", "NC_047567.1\t19065947\t19065949\n", "NC_047567.1\t19065949\t19065951\n", "NC_047567.1\t19065951\t19065953\n", "NC_047567.1\t19065978\t19065980\n", "NC_047568.1\t41270184\t41270186\n", " 8 DML-ploidy-DSS-upstream.bed\n" ] } ], "source": [ "!{bedtoolsDirectory}intersectBed \\\n", "-u \\\n", "-a DML-ploidy-DSS.csv.bed \\\n", "-b /Volumes/web-1/halfshell/genomic-databank/cgigas_uk_roslin_v1_upstream.gff \\\n", "> DML-ploidy-DSS-upstream.bed\n", "!head DML-ploidy-DSS-upstream.bed\n", "!wc -l DML-ploidy-DSS-upstream.bed" ] }, { "cell_type": "code", "execution_count": 19, "metadata": { "collapsed": false, "scrolled": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 0 DML-ploidypH-DSS-upstream.bed\r\n" ] } ], "source": [ "!{bedtoolsDirectory}intersectBed \\\n", "-u \\\n", "-a DML-ploidypH-DSS.csv.bed \\\n", "-b /Volumes/web-1/halfshell/genomic-databank/cgigas_uk_roslin_v1_upstream.gff \\\n", "> DML-ploidypH-DSS-upstream.bed\n", "!head DML-ploidypH-DSS-upstream.bed\n", "!wc -l DML-ploidypH-DSS-upstream.bed" ] }, { "cell_type": "code", "execution_count": 37, "metadata": { "collapsed": true }, "outputs": [], "source": [ "#Remove empty file\n", "!rm DML-ploidypH-DSS-upstream.bed" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 4f. Downstream flanks" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### `methylKit`" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": false, "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "NC_047561.1\t19286180\t19286182\t-55.4137931034483\n", "NC_047561.1\t21915577\t21915579\t46.9271523178808\n", "NC_047567.1\t16984837\t16984839\t42.8241335044929\n", "NW_022994991.1\t19672\t19674\t36.769801980198\n", " 4 DML-pH-25-Cov5-downstream.bed\n" ] } ], "source": [ "!{bedtoolsDirectory}intersectBed \\\n", "-u \\\n", "-a DML-pH-25-Cov5.csv.bed \\\n", "-b /Volumes/web-1/halfshell/genomic-databank/cgigas_uk_roslin_v1_downstream.gff \\\n", "> DML-pH-25-Cov5-downstream.bed\n", "!head DML-pH-25-Cov5-downstream.bed\n", "!wc -l DML-pH-25-Cov5-downstream.bed" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "collapsed": false, "scrolled": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "NC_047566.1\t24265305\t24265307\t-26.1261261261261\n", " 1 DML-ploidy-25-Cov5-downstream.bed\n" ] } ], "source": [ "!{bedtoolsDirectory}intersectBed \\\n", "-u \\\n", "-a DML-ploidy-25-Cov5.csv.bed \\\n", "-b /Volumes/web-1/halfshell/genomic-databank/cgigas_uk_roslin_v1_downstream.gff \\\n", "> DML-ploidy-25-Cov5-downstream.bed\n", "!head DML-ploidy-25-Cov5-downstream.bed\n", "!wc -l DML-ploidy-25-Cov5-downstream.bed" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### `DSS`" ] }, { "cell_type": "code", "execution_count": 38, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "NC_047563.1\t72683436\t72683438\n", "NC_047567.1\t16984837\t16984839\n", " 2 DML-pH-DSS-downstream.bed\n" ] } ], "source": [ "!{bedtoolsDirectory}intersectBed \\\n", "-u \\\n", "-a DML-pH-DSS.csv.bed \\\n", "-b /Volumes/web-1/halfshell/genomic-databank/cgigas_uk_roslin_v1_downstream.gff \\\n", "> DML-pH-DSS-downstream.bed\n", "!head DML-pH-DSS-downstream.bed\n", "!wc -l DML-pH-DSS-downstream.bed" ] }, { "cell_type": "code", "execution_count": 39, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "NC_047559.1\t44850822\t44850824\n", "NC_047561.1\t54056734\t54056736\n", "NC_047562.1\t20972631\t20972633\n", "NC_047564.1\t10653429\t10653431\n", "NC_047565.1\t28400115\t28400117\n", "NC_047566.1\t15686589\t15686591\n", "NC_047566.1\t15686778\t15686780\n", "NC_047568.1\t41270184\t41270186\n", " 8 DML-ploidy-DSS-downstream.bed\n" ] } ], "source": [ "!{bedtoolsDirectory}intersectBed \\\n", "-u \\\n", "-a DML-ploidy-DSS.csv.bed \\\n", "-b /Volumes/web-1/halfshell/genomic-databank/cgigas_uk_roslin_v1_downstream.gff \\\n", "> DML-ploidy-DSS-downstream.bed\n", "!head DML-ploidy-DSS-downstream.bed\n", "!wc -l DML-ploidy-DSS-downstream.bed" ] }, { "cell_type": "code", "execution_count": 20, "metadata": { "collapsed": false, "scrolled": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 0 DML-ploidypH-DSS-downstream.bed\r\n" ] } ], "source": [ "!{bedtoolsDirectory}intersectBed \\\n", "-u \\\n", "-a DML-ploidypH-DSS.csv.bed \\\n", "-b /Volumes/web-1/halfshell/genomic-databank/cgigas_uk_roslin_v1_downstream.gff \\\n", "> DML-ploidypH-DSS-downstream.bed\n", "!head DML-ploidypH-DSS-downstream.bed\n", "!wc -l DML-ploidypH-DSS-downstream.bed" ] }, { "cell_type": "code", "execution_count": 41, "metadata": { "collapsed": true }, "outputs": [], "source": [ "#Remove empty file\n", "!rm DML-ploidypH-DSS-downstream.bed" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 4g. Intergenic regions" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### `methylKit`" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": false, "scrolled": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "NC_047563.1\t61114616\t61114618\t-30.8823529411765\n", "NC_047565.1\t44521815\t44521817\t-30.3333333333333\n", " 2 DML-pH-25-Cov5-intergenic.bed\n" ] } ], "source": [ "!{bedtoolsDirectory}intersectBed \\\n", "-u \\\n", "-a DML-pH-25-Cov5.csv.bed \\\n", "-b /Volumes/web-1/halfshell/genomic-databank/cgigas_uk_roslin_v1_intergenic.bed \\\n", "> DML-pH-25-Cov5-intergenic.bed\n", "!head DML-pH-25-Cov5-intergenic.bed\n", "!wc -l DML-pH-25-Cov5-intergenic.bed" ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "collapsed": false, "scrolled": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "NC_047559.1\t53732861\t53732863\t25.8426966292135\n", "NC_047566.1\t24266096\t24266098\t-29.4736842105263\n", "NC_047566.1\t24266109\t24266111\t-27.7777777777778\n", " 3 DML-ploidy-25-Cov5-intergenic.bed\n" ] } ], "source": [ "!{bedtoolsDirectory}intersectBed \\\n", "-u \\\n", "-a DML-ploidy-25-Cov5.csv.bed \\\n", "-b /Volumes/web-1/halfshell/genomic-databank/cgigas_uk_roslin_v1_intergenic.bed \\\n", "> DML-ploidy-25-Cov5-intergenic.bed\n", "!head DML-ploidy-25-Cov5-intergenic.bed\n", "!wc -l DML-ploidy-25-Cov5-intergenic.bed" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### `DSS`" ] }, { "cell_type": "code", "execution_count": 42, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "NC_047559.1\t520576\t520578\n", "NC_047559.1\t13702829\t13702831\n", "NC_047559.1\t54761361\t54761363\n", "NC_047560.1\t40407111\t40407113\n", "NC_047560.1\t66087626\t66087628\n", "NC_047561.1\t22841405\t22841407\n", "NC_047561.1\t22841425\t22841427\n", "NC_047561.1\t22841435\t22841437\n", "NC_047561.1\t22841447\t22841449\n", "NC_047562.1\t21522451\t21522453\n", " 27 DML-pH-DSS-intergenic.bed\n" ] } ], "source": [ "!{bedtoolsDirectory}intersectBed \\\n", "-u \\\n", "-a DML-pH-DSS.csv.bed \\\n", "-b /Volumes/web-1/halfshell/genomic-databank/cgigas_uk_roslin_v1_intergenic.bed \\\n", "> DML-pH-DSS-intergenic.bed\n", "!head DML-pH-DSS-intergenic.bed\n", "!wc -l DML-pH-DSS-intergenic.bed" ] }, { "cell_type": "code", "execution_count": 43, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "NC_047559.1\t22732543\t22732545\n", "NC_047560.1\t40407111\t40407113\n", "NC_047560.1\t60343497\t60343499\n", "NC_047562.1\t17129038\t17129040\n", "NC_047563.1\t3904255\t3904257\n", "NC_047563.1\t3904287\t3904289\n", "NC_047563.1\t46190078\t46190080\n", "NC_047564.1\t36893052\t36893054\n", "NC_047564.1\t36893098\t36893100\n", "NC_047566.1\t15683888\t15683890\n", " 18 DML-ploidy-DSS-intergenic.bed\n" ] } ], "source": [ "!{bedtoolsDirectory}intersectBed \\\n", "-u \\\n", "-a DML-ploidy-DSS.csv.bed \\\n", "-b /Volumes/web-1/halfshell/genomic-databank/cgigas_uk_roslin_v1_intergenic.bed \\\n", "> DML-ploidy-DSS-intergenic.bed\n", "!head DML-ploidy-DSS-intergenic.bed\n", "!wc -l DML-ploidy-DSS-intergenic.bed" ] }, { "cell_type": "code", "execution_count": 44, "metadata": { "collapsed": false, "scrolled": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "NC_047560.1\t40407111\t40407113\n", "NC_047563.1\t1288380\t1288382\n", "NC_047566.1\t15683888\t15683890\n", "NC_047566.1\t15685674\t15685676\n", "NC_047567.1\t3077162\t3077164\n", " 5 DML-ploidypH-DSS-intergenic.bed\n" ] } ], "source": [ "!{bedtoolsDirectory}intersectBed \\\n", "-u \\\n", "-a DML-ploidypH-DSS.csv.bed \\\n", "-b /Volumes/web-1/halfshell/genomic-databank/cgigas_uk_roslin_v1_intergenic.bed \\\n", "> DML-ploidypH-DSS-intergenic.bed\n", "!head DML-ploidypH-DSS-intergenic.bed\n", "!wc -l DML-ploidypH-DSS-intergenic.bed" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 4h. lncRNA" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### `methylKit`" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": false, "scrolled": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "NC_047564.1\t43801732\t43801734\t-26.7326732673267\n", "NC_047565.1\t44578741\t44578743\t-26.7896446913321\n", "NC_047566.1\t9548317\t9548319\t-34.3623481781376\n", " 3 DML-pH-25-Cov5-lncRNA.bed\n" ] } ], "source": [ "!{bedtoolsDirectory}intersectBed \\\n", "-u \\\n", "-a DML-pH-25-Cov5.csv.bed \\\n", "-b /Volumes/web-1/halfshell/genomic-databank/cgigas_uk_roslin_v1_lncRNA.gff \\\n", "> DML-pH-25-Cov5-lncRNA.bed\n", "!head DML-pH-25-Cov5-lncRNA.bed\n", "!wc -l DML-pH-25-Cov5-lncRNA.bed" ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "collapsed": false, "scrolled": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 0 DML-ploidy-25-Cov5-lncRNA.bed\r\n" ] } ], "source": [ "!{bedtoolsDirectory}intersectBed \\\n", "-u \\\n", "-a DML-ploidy-25-Cov5.csv.bed \\\n", "-b /Volumes/web-1/halfshell/genomic-databank/cgigas_uk_roslin_v1_lncRNA.gff \\\n", "> DML-ploidy-25-Cov5-lncRNA.bed\n", "!head DML-ploidy-25-Cov5-lncRNA.bed\n", "!wc -l DML-ploidy-25-Cov5-lncRNA.bed" ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "collapsed": true }, "outputs": [], "source": [ "#Remove empty file\n", "!rm DML-ploidy-25-Cov5-lncRNA.bed" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### `DSS`" ] }, { "cell_type": "code", "execution_count": 45, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "NC_047564.1\t48296668\t48296670\n", "NC_047566.1\t12865695\t12865697\n", "NC_047567.1\t28693204\t28693206\n", "NC_047567.1\t28701547\t28701549\n", " 4 DML-pH-DSS-lncRNA.bed\n" ] } ], "source": [ "!{bedtoolsDirectory}intersectBed \\\n", "-u \\\n", "-a DML-pH-DSS.csv.bed \\\n", "-b /Volumes/web-1/halfshell/genomic-databank/cgigas_uk_roslin_v1_lncRNA.gff \\\n", "> DML-pH-DSS-lncRNA.bed\n", "!head DML-pH-DSS-lncRNA.bed\n", "!wc -l DML-pH-DSS-lncRNA.bed" ] }, { "cell_type": "code", "execution_count": 46, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "NC_047567.1\t28693636\t28693638\n", " 1 DML-ploidy-DSS-lncRNA.bed\n" ] } ], "source": [ "!{bedtoolsDirectory}intersectBed \\\n", "-u \\\n", "-a DML-ploidy-DSS.csv.bed \\\n", "-b /Volumes/web-1/halfshell/genomic-databank/cgigas_uk_roslin_v1_lncRNA.gff \\\n", "> DML-ploidy-DSS-lncRNA.bed\n", "!head DML-ploidy-DSS-lncRNA.bed\n", "!wc -l DML-ploidy-DSS-lncRNA.bed" ] }, { "cell_type": "code", "execution_count": 47, "metadata": { "collapsed": false, "scrolled": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "NC_047564.1\t48296668\t48296670\n", "NC_047567.1\t23555225\t23555227\n", " 2 DML-ploidypH-DSS-lncRNA.bed\n" ] } ], "source": [ "!{bedtoolsDirectory}intersectBed \\\n", "-u \\\n", "-a DML-ploidypH-DSS.csv.bed \\\n", "-b /Volumes/web-1/halfshell/genomic-databank/cgigas_uk_roslin_v1_lncRNA.gff \\\n", "> DML-ploidypH-DSS-lncRNA.bed\n", "!head DML-ploidypH-DSS-lncRNA.bed\n", "!wc -l DML-ploidypH-DSS-lncRNA.bed" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 4i. Tranposable elements" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### `methylKit`" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "collapsed": false, "scrolled": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "NC_047559.1\t5294172\t5294174\t40.2560083594566\n", "NC_047561.1\t12279075\t12279077\t-26.890756302521\n", "NC_047561.1\t19286180\t19286182\t-55.4137931034483\n", "NC_047561.1\t21915577\t21915579\t46.9271523178808\n", "NC_047563.1\t61114616\t61114618\t-30.8823529411765\n", "NC_047564.1\t2678443\t2678445\t-45.6953642384106\n", "NC_047565.1\t10619872\t10619874\t-25.6880733944954\n", "NC_047565.1\t44521815\t44521817\t-30.3333333333333\n", "NC_047565.1\t44578741\t44578743\t-26.7896446913321\n", "NC_047566.1\t23226898\t23226900\t25.3731343283582\n", " 16 DML-pH-25-Cov5-TE.bed\n" ] } ], "source": [ "!{bedtoolsDirectory}intersectBed \\\n", "-u \\\n", "-a DML-pH-25-Cov5.csv.bed \\\n", "-b /Volumes/web-1/halfshell/genomic-databank/cgigas_uk_roslin_v1_rm.te.bed \\\n", "> DML-pH-25-Cov5-TE.bed\n", "!head DML-pH-25-Cov5-TE.bed\n", "!wc -l DML-pH-25-Cov5-TE.bed" ] }, { "cell_type": "code", "execution_count": 19, "metadata": { "collapsed": false, "scrolled": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "NC_047559.1\t44801744\t44801746\t34.0988480118915\n", "NC_047559.1\t53732861\t53732863\t25.8426966292135\n", "NC_047561.1\t9365798\t9365800\t34.0129358830146\n", "NC_047561.1\t28489237\t28489239\t-25.6018518518519\n", "NC_047563.1\t39926052\t39926054\t42.6872058194266\n", "NC_047566.1\t50117081\t50117083\t32.0492517222266\n", "NC_047566.1\t51204319\t51204321\t35.812086064308\n", "NC_047567.1\t21017447\t21017449\t34.8875423641779\n", " 8 DML-ploidy-25-Cov5-TE.bed\n" ] } ], "source": [ "!{bedtoolsDirectory}intersectBed \\\n", "-u \\\n", "-a DML-ploidy-25-Cov5.csv.bed \\\n", "-b /Volumes/web-1/halfshell/genomic-databank/cgigas_uk_roslin_v1_rm.te.bed \\\n", "> DML-ploidy-25-Cov5-TE.bed\n", "!head DML-ploidy-25-Cov5-TE.bed\n", "!wc -l DML-ploidy-25-Cov5-TE.bed" ] }, { "cell_type": "code", "execution_count": 19, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 0 DML-Cov5-Overlaps-TE.bed\r\n" ] } ], "source": [ "!{bedtoolsDirectory}intersectBed \\\n", "-u \\\n", "-a DML-Cov5-Overlaps.bed \\\n", "-b /Volumes/web-1/halfshell/genomic-databank/cgigas_uk_roslin_v1_rm.te.bed \\\n", "> DML-Cov5-Overlaps-TE.bed\n", "!head DML-Cov5-Overlaps-TE.bed\n", "!wc -l DML-Cov5-Overlaps-TE.bed" ] }, { "cell_type": "code", "execution_count": 20, "metadata": { "collapsed": true }, "outputs": [], "source": [ "!rm DML-Cov5-Overlaps-TE.bed" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### `DSS`" ] }, { "cell_type": "code", "execution_count": 48, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "NC_047559.1\t13702829\t13702831\n", "NC_047559.1\t50090321\t50090323\n", "NC_047560.1\t4561420\t4561422\n", "NC_047560.1\t4561429\t4561431\n", "NC_047560.1\t4561492\t4561494\n", "NC_047560.1\t4561508\t4561510\n", "NC_047560.1\t19948171\t19948173\n", "NC_047560.1\t40407111\t40407113\n", "NC_047560.1\t52833401\t52833403\n", "NC_047560.1\t52833440\t52833442\n", " 86 DML-pH-DSS-TE.bed\n" ] } ], "source": [ "!{bedtoolsDirectory}intersectBed \\\n", "-u \\\n", "-a DML-pH-DSS.csv.bed \\\n", "-b /Volumes/web-1/halfshell/genomic-databank/cgigas_uk_roslin_v1_rm.te.bed \\\n", "> DML-pH-DSS-TE.bed\n", "!head DML-pH-DSS-TE.bed\n", "!wc -l DML-pH-DSS-TE.bed" ] }, { "cell_type": "code", "execution_count": 50, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "NC_047559.1\t43886947\t43886949\n", "NC_047559.1\t50090321\t50090323\n", "NC_047559.1\t53948058\t53948060\n", "NC_047560.1\t599422\t599424\n", "NC_047560.1\t599436\t599438\n", "NC_047560.1\t599438\t599440\n", "NC_047560.1\t4561429\t4561431\n", "NC_047560.1\t4561492\t4561494\n", "NC_047560.1\t19948171\t19948173\n", "NC_047560.1\t40407111\t40407113\n", " 66 DML-ploidy-DSS-TE.bed\n" ] } ], "source": [ "!{bedtoolsDirectory}intersectBed \\\n", "-u \\\n", "-a DML-ploidy-DSS.csv.bed \\\n", "-b /Volumes/web-1/halfshell/genomic-databank/cgigas_uk_roslin_v1_rm.te.bed \\\n", "> DML-ploidy-DSS-TE.bed\n", "!head DML-ploidy-DSS-TE.bed\n", "!wc -l DML-ploidy-DSS-TE.bed" ] }, { "cell_type": "code", "execution_count": 49, "metadata": { "collapsed": false, "scrolled": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "NC_047559.1\t6445629\t6445631\n", "NC_047559.1\t46813912\t46813914\n", "NC_047560.1\t4561492\t4561494\n", "NC_047560.1\t40407111\t40407113\n", "NC_047560.1\t55499797\t55499799\n", "NC_047562.1\t19799003\t19799005\n", "NC_047563.1\t1288380\t1288382\n", "NC_047563.1\t6395389\t6395391\n", "NC_047566.1\t36571923\t36571925\n", "NC_047567.1\t3077162\t3077164\n", " 14 DML-ploidypH-DSS-TE.bed\n" ] } ], "source": [ "!{bedtoolsDirectory}intersectBed \\\n", "-u \\\n", "-a DML-ploidypH-DSS.csv.bed \\\n", "-b /Volumes/web-1/halfshell/genomic-databank/cgigas_uk_roslin_v1_rm.te.bed \\\n", "> DML-ploidypH-DSS-TE.bed\n", "!head DML-ploidypH-DSS-TE.bed\n", "!wc -l DML-ploidypH-DSS-TE.bed" ] }, { "cell_type": "markdown", "metadata": { "collapsed": true }, "source": [ "## 5. SNP overlap" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "I will now look at overlaps between sex-specific DML and unique C/T SNPs." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 5a. Create BEDfiles" ] }, { "cell_type": "code", "execution_count": 37, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "NC_001276.1\t12440\t.\tC\tT\r\n", "NC_001276.1\t7226\t.\tC\tT\r\n", "NC_047559.1\t10001065\t.\tC\tT\r\n", "NC_047559.1\t10001128\t.\tC\tT\r\n", "NC_047559.1\t1000226\t.\tC\tT\r\n", "NC_047559.1\t10004318\t.\tC\tT\r\n", "NC_047559.1\t100045\t.\tC\tT\r\n", "NC_047559.1\t10004558\t.\tC\tT\r\n", "NC_047559.1\t10005322\t.\tC\tT\r\n", "NC_047559.1\t10005684\t.\tC\tT\r\n" ] } ], "source": [ "!head /Volumes/web/spartina/project-oyster-oa/Haws/BS-Snper/unique-CT-SNPs.tab" ] }, { "cell_type": "code", "execution_count": 38, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "NC_001276.1\t12440\t12440\r\n", "NC_001276.1\t7226\t7226\r\n", "NC_047559.1\t10001065\t10001065\r\n", "NC_047559.1\t10001128\t10001128\r\n", "NC_047559.1\t1000226\t1000226\r\n", "NC_047559.1\t10004318\t10004318\r\n", "NC_047559.1\t100045\t100045\r\n", "NC_047559.1\t10004558\t10004558\r\n", "NC_047559.1\t10005322\t10005322\r\n", "NC_047559.1\t10005684\t10005684\r\n" ] } ], "source": [ "!awk '{print $1\"\\t\"$2\"\\t\"$2}' /Volumes/web/spartina/project-oyster-oa/Haws/BS-Snper/unique-CT-SNPs.tab \\\n", "> /Volumes/web/spartina/project-oyster-oa/Haws/BS-Snper/unique-CT-SNPs.bed\n", "!head /Volumes/web/spartina/project-oyster-oa/Haws/BS-Snper/unique-CT-SNPs.bed" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 5b. Overlaps with Unique C/T SNPs" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### `methylKit`" ] }, { "cell_type": "code", "execution_count": 39, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "NC_047560.1\t65604843\t65604845\t49.4839101396478\n", "NC_047561.1\t7843128\t7843130\t-26.3157894736842\n", "NC_047561.1\t10166213\t10166215\t-29.1507066437723\n", "NC_047561.1\t39008886\t39008888\t-35.8974358974359\n", "NC_047567.1\t15896903\t15896905\t-28.3455405508507\n", "NC_047568.1\t46593770\t46593772\t-26.1194029850746\n", " 6 DML-pH-25-Cov5-unique-CT-SNPs.bed\n" ] } ], "source": [ "!{bedtoolsDirectory}intersectBed \\\n", "-u \\\n", "-a DML-pH-25-Cov5.csv.bed \\\n", "-b /Volumes/web/spartina/project-oyster-oa/Haws/BS-Snper/unique-CT-SNPs.bed \\\n", "> DML-pH-25-Cov5-unique-CT-SNPs.bed\n", "!head DML-pH-25-Cov5-unique-CT-SNPs.bed\n", "!wc -l DML-pH-25-Cov5-unique-CT-SNPs.bed" ] }, { "cell_type": "code", "execution_count": 40, "metadata": { "collapsed": false, "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "NC_047559.1\t22468723\t22468725\t28.4117647058823\n", "NC_047559.1\t44801744\t44801746\t34.0988480118915\n", "NC_047561.1\t28489237\t28489239\t-25.6018518518519\n", "NC_047565.1\t11970715\t11970717\t46.6938636749958\n", "NC_047568.1\t46583284\t46583286\t-33.1582332761578\n", " 5 DML-ploidy-25-Cov5-unique-CT-SNPs.bed\n" ] } ], "source": [ "!{bedtoolsDirectory}intersectBed \\\n", "-u \\\n", "-a DML-ploidy-25-Cov5.csv.bed \\\n", "-b /Volumes/web/spartina/project-oyster-oa/Haws/BS-Snper/unique-CT-SNPs.bed \\\n", "> DML-ploidy-25-Cov5-unique-CT-SNPs.bed\n", "!head DML-ploidy-25-Cov5-unique-CT-SNPs.bed\n", "!wc -l DML-ploidy-25-Cov5-unique-CT-SNPs.bed" ] }, { "cell_type": "code", "execution_count": 22, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 0 DML-Cov5-Overlaps-unique-CT-SNPs.bed\r\n" ] } ], "source": [ "!{bedtoolsDirectory}intersectBed \\\n", "-u \\\n", "-a DML-Cov5-Overlaps.bed \\\n", "-b /Volumes/web/spartina/project-oyster-oa/Haws/BS-Snper/unique-CT-SNPs.bed \\\n", "> DML-Cov5-Overlaps-unique-CT-SNPs.bed\n", "!head DML-Cov5-Overlaps-unique-CT-SNPs.bed\n", "!wc -l DML-Cov5-Overlaps-unique-CT-SNPs.bed" ] }, { "cell_type": "code", "execution_count": 23, "metadata": { "collapsed": true }, "outputs": [], "source": [ "#Remove empty file\n", "!rm DML-Cov5-Overlaps-unique-CT-SNPs.bed" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### `DSS`" ] }, { "cell_type": "code", "execution_count": 54, "metadata": { "collapsed": false, "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "NC_047561.1\t11873876\t11873878\n", "NC_047565.1\t14697037\t14697039\n", "NC_047565.1\t41071596\t41071598\n", "NC_047567.1\t23420256\t23420258\n", "NC_047568.1\t44121369\t44121371\n", " 5 DML-pH-DSS-unique-CT-SNPs.bed\n" ] } ], "source": [ "!{bedtoolsDirectory}intersectBed \\\n", "-u \\\n", "-a DML-pH-DSS.csv.bed \\\n", "-b /Volumes/web/spartina/project-oyster-oa/Haws/BS-Snper/unique-CT-SNPs.bed \\\n", "> DML-pH-DSS-unique-CT-SNPs.bed\n", "!head DML-pH-DSS-unique-CT-SNPs.bed\n", "!wc -l DML-pH-DSS-unique-CT-SNPs.bed" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "NC_047561.1\t11873876\t11873878\tNC_047561.1\tGnomon\tgene\t11845871\t11886768\t.\t+\t.\tID=gene-LOC105323811;Dbxref=GeneID:105323811;Name=LOC105323811;gbkey=Gene;gene=LOC105323811;gene_biotype=protein_coding\n", "NC_047565.1\t14697037\t14697039\tNC_047565.1\tGnomon\tgene\t14692913\t14700823\t.\t-\t.\tID=gene-LOC105334360;Dbxref=GeneID:105334360;Name=LOC105334360;gbkey=Gene;gene=LOC105334360;gene_biotype=protein_coding\n", "NC_047565.1\t41071596\t41071598\tNC_047565.1\tGnomon\tgene\t41066038\t41077950\t.\t-\t.\tID=gene-LOC105336258;Dbxref=GeneID:105336258;Name=LOC105336258;gbkey=Gene;gene=LOC105336258;gene_biotype=protein_coding\n", "NC_047567.1\t23420256\t23420258\tNC_047567.1\tGnomon\tgene\t23409856\t23421800\t.\t+\t.\tID=gene-LOC105337408;Dbxref=GeneID:105337408;Name=LOC105337408;gbkey=Gene;gene=LOC105337408;gene_biotype=protein_coding\n", "NC_047568.1\t44121369\t44121371\tNC_047568.1\tGnomon\tgene\t44115414\t44133450\t.\t-\t.\tID=gene-LOC105329817;Dbxref=GeneID:105329817;Name=LOC105329817;gbkey=Gene;gene=LOC105329817;gene_biotype=protein_coding\n", " 5 DML-pH-DSS-Gene-unique-CT-SNPs.bed\n" ] } ], "source": [ "#Number of genic DML that overlap with SNPs\n", "!{bedtoolsDirectory}intersectBed \\\n", "-u \\\n", "-a DML-pH-DSS-Gene-wb.bed \\\n", "-b /Volumes/web/spartina/project-oyster-oa/Haws/BS-Snper/unique-CT-SNPs.bed \\\n", "> DML-pH-DSS-Gene-unique-CT-SNPs.bed\n", "!head DML-pH-DSS-Gene-unique-CT-SNPs.bed\n", "!wc -l DML-pH-DSS-Gene-unique-CT-SNPs.bed" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 5\r\n" ] } ], "source": [ "#Number of unique genes with DML that overlap with SNPs\n", "!{bedtoolsDirectory}intersectBed \\\n", "-u \\\n", "-a DML-pH-DSS-Gene-wb.bed \\\n", "-b /Volumes/web/spartina/project-oyster-oa/Haws/BS-Snper/unique-CT-SNPs.bed \\\n", "| cut -f12 \\\n", "| tr \";\" \"\\t\" \\\n", "| tr \"=\" \"\\t\" \\\n", "| cut -f6 \\\n", "| sort | uniq \\\n", "| wc -l" ] }, { "cell_type": "code", "execution_count": 55, "metadata": { "collapsed": false, "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "NC_047561.1\t26374465\t26374467\n", "NC_047565.1\t41071596\t41071598\n", "NC_047565.1\t45816109\t45816111\n", " 3 DML-ploidy-DSS-unique-CT-SNPs.bed\n" ] } ], "source": [ "!{bedtoolsDirectory}intersectBed \\\n", "-u \\\n", "-a DML-ploidy-DSS.csv.bed \\\n", "-b /Volumes/web/spartina/project-oyster-oa/Haws/BS-Snper/unique-CT-SNPs.bed \\\n", "> DML-ploidy-DSS-unique-CT-SNPs.bed\n", "!head DML-ploidy-DSS-unique-CT-SNPs.bed\n", "!wc -l DML-ploidy-DSS-unique-CT-SNPs.bed" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "NC_047561.1\t26374465\t26374467\tNC_047561.1\tGnomon\tgene\t26354518\t26443353\t.\t+\t.\tID=gene-LOC105348209;Dbxref=GeneID:105348209;Name=LOC105348209;gbkey=Gene;gene=LOC105348209;gene_biotype=protein_coding\n", "NC_047565.1\t41071596\t41071598\tNC_047565.1\tGnomon\tgene\t41066038\t41077950\t.\t-\t.\tID=gene-LOC105336258;Dbxref=GeneID:105336258;Name=LOC105336258;gbkey=Gene;gene=LOC105336258;gene_biotype=protein_coding\n", "NC_047565.1\t45816109\t45816111\tNC_047565.1\tGnomon\tgene\t45810285\t45822566\t.\t+\t.\tID=gene-LOC105338681;Dbxref=GeneID:105338681;Name=LOC105338681;gbkey=Gene;gene=LOC105338681;gene_biotype=protein_coding\n", " 3 DML-ploidy-DSS-Gene-unique-CT-SNPs.bed\n" ] } ], "source": [ "#Number of genic DML that overlap with SNPs\n", "!{bedtoolsDirectory}intersectBed \\\n", "-u \\\n", "-a DML-ploidy-DSS-Gene-wb.bed \\\n", "-b /Volumes/web/spartina/project-oyster-oa/Haws/BS-Snper/unique-CT-SNPs.bed \\\n", "> DML-ploidy-DSS-Gene-unique-CT-SNPs.bed\n", "!head DML-ploidy-DSS-Gene-unique-CT-SNPs.bed\n", "!wc -l DML-ploidy-DSS-Gene-unique-CT-SNPs.bed" ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 3\r\n" ] } ], "source": [ "#Number of unique genes with DML that overlap with SNPs\n", "!{bedtoolsDirectory}intersectBed \\\n", "-u \\\n", "-a DML-ploidy-DSS-Gene-wb.bed \\\n", "-b /Volumes/web/spartina/project-oyster-oa/Haws/BS-Snper/unique-CT-SNPs.bed \\\n", "| cut -f12 \\\n", "| tr \";\" \"\\t\" \\\n", "| tr \"=\" \"\\t\" \\\n", "| cut -f6 \\\n", "| sort | uniq \\\n", "| wc -l" ] }, { "cell_type": "code", "execution_count": 56, "metadata": { "collapsed": false, "scrolled": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "NC_047565.1\t41071596\t41071598\n", " 1 DML-ploidypH-DSS-unique-CT-SNPs.bed\n" ] } ], "source": [ "!{bedtoolsDirectory}intersectBed \\\n", "-u \\\n", "-a DML-ploidypH-DSS.csv.bed \\\n", "-b /Volumes/web/spartina/project-oyster-oa/Haws/BS-Snper/unique-CT-SNPs.bed \\\n", "> DML-ploidypH-DSS-unique-CT-SNPs.bed\n", "!head DML-ploidypH-DSS-unique-CT-SNPs.bed\n", "!wc -l DML-ploidypH-DSS-unique-CT-SNPs.bed" ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "NC_047565.1\t41071596\t41071598\tNC_047565.1\tGnomon\tgene\t41066038\t41077950\t.\t-\t.\tID=gene-LOC105336258;Dbxref=GeneID:105336258;Name=LOC105336258;gbkey=Gene;gene=LOC105336258;gene_biotype=protein_coding\n", " 1 DML-ploidypH-DSS-Gene-unique-CT-SNPs.bed\n" ] } ], "source": [ "#Number of genic DML that overlap with SNPs\n", "!{bedtoolsDirectory}intersectBed \\\n", "-u \\\n", "-a DML-ploidypH-DSS-Gene-wb.bed \\\n", "-b /Volumes/web/spartina/project-oyster-oa/Haws/BS-Snper/unique-CT-SNPs.bed \\\n", "> DML-ploidypH-DSS-Gene-unique-CT-SNPs.bed\n", "!head DML-ploidypH-DSS-Gene-unique-CT-SNPs.bed\n", "!wc -l DML-ploidypH-DSS-Gene-unique-CT-SNPs.bed" ] }, { "cell_type": "code", "execution_count": 19, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 1\r\n" ] } ], "source": [ "#Number of unique genes with DML that overlap with SNPs\n", "!{bedtoolsDirectory}intersectBed \\\n", "-u \\\n", "-a DML-ploidypH-DSS-Gene-wb.bed \\\n", "-b /Volumes/web/spartina/project-oyster-oa/Haws/BS-Snper/unique-CT-SNPs.bed \\\n", "| cut -f12 \\\n", "| tr \";\" \"\\t\" \\\n", "| tr \"=\" \"\\t\" \\\n", "| cut -f6 \\\n", "| sort | uniq \\\n", "| wc -l" ] }, { "cell_type": "markdown", "metadata": { "collapsed": true }, "source": [ "## 6. Obtain line counts for overlap files" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "This will help with downstream visualization." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 6a. ploidy-DSS" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "DML-ploidy-DSS-CDS.bed\r\n", "DML-ploidy-DSS-Gene-wb.bed\r\n", "DML-ploidy-DSS-Gene.bed\r\n", "DML-ploidy-DSS-TE.bed\r\n", "DML-ploidy-DSS-downstream.bed\r\n", "DML-ploidy-DSS-exonUTR.bed\r\n", "DML-ploidy-DSS-intergenic.bed\r\n", "DML-ploidy-DSS-intron.bed\r\n", "DML-ploidy-DSS-lncRNA.bed\r\n", "DML-ploidy-DSS-unique-CT-SNPs.bed\r\n", "DML-ploidy-DSS-upstream.bed\r\n" ] } ], "source": [ "!find DML-ploidy-DSS-*" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": true }, "outputs": [], "source": [ "#Get line count for all DML overlap files\n", "#Remove the 12th line (total entries)\n", "#Print in a tab-delimited format\n", "#Save output\n", "\n", "!wc -l DML-ploidy-DSS-* \\\n", "| sed '12,$ d' \\\n", "| awk '{print $1\"\\t\"$2}' \\\n", "> DML-ploidy-DSS-Overlap-counts.txt" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": false, "scrolled": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "26\tDML-ploidy-DSS-CDS.bed\r\n", "161\tDML-ploidy-DSS-Gene-wb.bed\r\n", "145\tDML-ploidy-DSS-Gene.bed\r\n", "66\tDML-ploidy-DSS-TE.bed\r\n", "8\tDML-ploidy-DSS-downstream.bed\r\n", "8\tDML-ploidy-DSS-exonUTR.bed\r\n", "18\tDML-ploidy-DSS-intergenic.bed\r\n", "114\tDML-ploidy-DSS-intron.bed\r\n", "1\tDML-ploidy-DSS-lncRNA.bed\r\n", "3\tDML-ploidy-DSS-unique-CT-SNPs.bed\r\n", "8\tDML-ploidy-DSS-upstream.bed\r\n" ] } ], "source": [ "!cat DML-ploidy-DSS-Overlap-counts.txt" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 6b. pH-DSS" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "DML-pH-DSS-CDS.bed\r\n", "DML-pH-DSS-Gene-wb.bed\r\n", "DML-pH-DSS-Gene.bed\r\n", "DML-pH-DSS-TE.bed\r\n", "DML-pH-DSS-downstream.bed\r\n", "DML-pH-DSS-exonUTR.bed\r\n", "DML-pH-DSS-intergenic.bed\r\n", "DML-pH-DSS-intron.bed\r\n", "DML-pH-DSS-lncRNA.bed\r\n", "DML-pH-DSS-unique-CT-SNPs.bed\r\n", "DML-pH-DSS-upstream.bed\r\n" ] } ], "source": [ "!find DML-pH-DSS-*" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": true }, "outputs": [], "source": [ "#Get line count for all DML overlap files\n", "#Remove the 12th line (total entries)\n", "#Print in a tab-delimited format\n", "#Save output\n", "\n", "!wc -l DML-pH-DSS-* \\\n", "| sed '12,$ d' \\\n", "| awk '{print $1\"\\t\"$2}' \\\n", "> DML-pH-DSS-Overlap-counts.txt" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": false, "scrolled": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "15\tDML-pH-DSS-CDS.bed\r\n", "141\tDML-pH-DSS-Gene-wb.bed\r\n", "123\tDML-pH-DSS-Gene.bed\r\n", "86\tDML-pH-DSS-TE.bed\r\n", "2\tDML-pH-DSS-downstream.bed\r\n", "4\tDML-pH-DSS-exonUTR.bed\r\n", "27\tDML-pH-DSS-intergenic.bed\r\n", "104\tDML-pH-DSS-intron.bed\r\n", "4\tDML-pH-DSS-lncRNA.bed\r\n", "5\tDML-pH-DSS-unique-CT-SNPs.bed\r\n", "2\tDML-pH-DSS-upstream.bed\r\n" ] } ], "source": [ "!cat DML-pH-DSS-Overlap-counts.txt" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 6c. ploidypH-DSS" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "DML-ploidypH-DSS-CDS.bed\r\n", "DML-ploidypH-DSS-Gene-wb.bed\r\n", "DML-ploidypH-DSS-Gene.bed\r\n", "DML-ploidypH-DSS-TE.bed\r\n", "DML-ploidypH-DSS-exonUTR.bed\r\n", "DML-ploidypH-DSS-intergenic.bed\r\n", "DML-ploidypH-DSS-intron.bed\r\n", "DML-ploidypH-DSS-lncRNA.bed\r\n", "DML-ploidypH-DSS-unique-CT-SNPs.bed\r\n" ] } ], "source": [ "!find DML-ploidypH-DSS-*" ] }, { "cell_type": "code", "execution_count": 24, "metadata": { "collapsed": true }, "outputs": [], "source": [ "#Get line count for all DML overlap files\n", "#Remove the 12th line (total entries)\n", "#Print in a tab-delimited format\n", "#Save output\n", "\n", "!wc -l DML-ploidypH-DSS-* \\\n", "| sed '12,$ d' \\\n", "| awk '{print $1\"\\t\"$2}' \\\n", "> DML-ploidypH-DSS-Overlap-counts.txt" ] }, { "cell_type": "code", "execution_count": 25, "metadata": { "collapsed": false, "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "6\tDML-ploidypH-DSS-CDS.bed\r\n", "51\tDML-ploidypH-DSS-Gene-wb.bed\r\n", "48\tDML-ploidypH-DSS-Gene.bed\r\n", "14\tDML-ploidypH-DSS-TE.bed\r\n", "0\tDML-ploidypH-DSS-downstream.bed\r\n", "3\tDML-ploidypH-DSS-exonUTR.bed\r\n", "5\tDML-ploidypH-DSS-intergenic.bed\r\n", "41\tDML-ploidypH-DSS-intron.bed\r\n", "2\tDML-ploidypH-DSS-lncRNA.bed\r\n", "1\tDML-ploidypH-DSS-unique-CT-SNPs.bed\r\n", "0\tDML-ploidypH-DSS-upstream.bed\r\n" ] } ], "source": [ "!cat DML-ploidypH-DSS-Overlap-counts.txt" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "anaconda-cloud": {}, "kernelspec": { "display_name": "Python [default]", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.2" } }, "nbformat": 4, "nbformat_minor": 1 }