{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Identifying Genomic Locations\n", "\n", "In this notebook, the location of various CpG categories will be characterized in relation to genome features." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 0. Set working directory and obtain checksums" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "/Users/yaamini/Documents/Meth_Compare/scripts\r\n" ] } ], "source": [ "!pwd" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "/Users/yaamini/Documents/Meth_Compare/analyses\n" ] } ], "source": [ "cd ../analyses/" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": true }, "outputs": [], "source": [ "#!mkdir Identifying-genomic-locations" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false, "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "/Users/yaamini/Documents/Meth_Compare/analyses/Identifying-genomic-locations\n" ] } ], "source": [ "cd Identifying-genomic-locations/" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## *M. capitata*" ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "collapsed": true }, "outputs": [], "source": [ "#Make a directory for Mcap output\n", "#!mkdir Mcap" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false, "scrolled": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "/Users/yaamini/Documents/Meth_Compare/analyses/Identifying-genomic-locations/Mcap\n" ] } ], "source": [ "cd Mcap/" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 1. Download files" ] }, { "cell_type": "code", "execution_count": 20, "metadata": { "collapsed": false, "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "--2020-05-07 23:05:12-- https://gannet.fish.washington.edu/metacarcinus/FROGER_meth_compare/20200504/data/Mcap/\n", "Resolving gannet.fish.washington.edu (gannet.fish.washington.edu)... 128.95.149.52\n", "Connecting to gannet.fish.washington.edu (gannet.fish.washington.edu)|128.95.149.52|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: unspecified [text/html]\n", "Saving to: ‘./index.html.tmp’\n", "\n", "index.html.tmp [ <=> ] 7.88K --.-KB/s in 0s \n", "\n", "2020-05-07 23:05:13 (19.7 MB/s) - ‘./index.html.tmp’ saved [8072]\n", "\n", "Loading robots.txt; please ignore errors.\n", "--2020-05-07 23:05:13-- https://gannet.fish.washington.edu/robots.txt\n", "Reusing existing connection to gannet.fish.washington.edu:443.\n", "HTTP request sent, awaiting response... 404 Not Found\n", "2020-05-07 23:05:13 ERROR 404: Not Found.\n", "\n", "Removing ./index.html.tmp since it should be rejected.\n", "\n", "--2020-05-07 23:05:13-- https://gannet.fish.washington.edu/metacarcinus/FROGER_meth_compare/20200504/data/Mcap/?C=N;O=D\n", "Reusing existing connection to gannet.fish.washington.edu:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: unspecified [text/html]\n", "Saving to: ‘./index.html?C=N;O=D.tmp’\n", "\n", "index.html?C=N;O=D. [ <=> ] 7.88K --.-KB/s in 0s \n", "\n", "2020-05-07 23:05:13 (19.7 MB/s) - ‘./index.html?C=N;O=D.tmp’ saved [8072]\n", "\n", "Removing ./index.html?C=N;O=D.tmp since it should be rejected.\n", "\n", "--2020-05-07 23:05:13-- https://gannet.fish.washington.edu/metacarcinus/FROGER_meth_compare/20200504/data/Mcap/?C=M;O=A\n", "Reusing existing connection to gannet.fish.washington.edu:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: unspecified [text/html]\n", "Saving to: ‘./index.html?C=M;O=A.tmp’\n", "\n", "index.html?C=M;O=A. [ <=> ] 7.88K --.-KB/s in 0s \n", "\n", "2020-05-07 23:05:13 (30.8 MB/s) - ‘./index.html?C=M;O=A.tmp’ saved [8072]\n", "\n", "Removing ./index.html?C=M;O=A.tmp since it should be rejected.\n", "\n", "--2020-05-07 23:05:13-- https://gannet.fish.washington.edu/metacarcinus/FROGER_meth_compare/20200504/data/Mcap/?C=S;O=A\n", "Reusing existing connection to gannet.fish.washington.edu:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: unspecified [text/html]\n", "Saving to: ‘./index.html?C=S;O=A.tmp’\n", "\n", "index.html?C=S;O=A. [ <=> ] 7.88K --.-KB/s in 0s \n", "\n", "2020-05-07 23:05:13 (31.8 MB/s) - ‘./index.html?C=S;O=A.tmp’ saved [8072]\n", "\n", "Removing ./index.html?C=S;O=A.tmp since it should be rejected.\n", "\n", "--2020-05-07 23:05:13-- https://gannet.fish.washington.edu/metacarcinus/FROGER_meth_compare/20200504/data/Mcap/?C=D;O=A\n", "Reusing existing connection to gannet.fish.washington.edu:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: unspecified [text/html]\n", "Saving to: ‘./index.html?C=D;O=A.tmp’\n", "\n", "index.html?C=D;O=A. [ <=> ] 7.88K --.-KB/s in 0s \n", "\n", "2020-05-07 23:05:13 (33.8 MB/s) - ‘./index.html?C=D;O=A.tmp’ saved [8072]\n", "\n", "Removing ./index.html?C=D;O=A.tmp since it should be rejected.\n", "\n", "--2020-05-07 23:05:13-- https://gannet.fish.washington.edu/metacarcinus/FROGER_meth_compare/20200504/data/Mcap/Mcap_MBD_only_CpGs.bed\n", "Reusing existing connection to gannet.fish.washington.edu:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 2609425 (2.5M) [application/vnd.realvnc.bed]\n", "Saving to: ‘./Mcap_MBD_only_CpGs.bed’\n", "\n", "Mcap_MBD_only_CpGs. 100%[===================>] 2.49M --.-KB/s in 0.06s \n", "\n", "2020-05-07 23:05:13 (41.9 MB/s) - ‘./Mcap_MBD_only_CpGs.bed’ saved [2609425/2609425]\n", "\n", "--2020-05-07 23:05:13-- https://gannet.fish.washington.edu/metacarcinus/FROGER_meth_compare/20200504/data/Mcap/Mcap_RRBS_MBD_only_CpGs.bed\n", "Reusing existing connection to gannet.fish.washington.edu:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 548161 (535K) [application/vnd.realvnc.bed]\n", "Saving to: ‘./Mcap_RRBS_MBD_only_CpGs.bed’\n", "\n", "Mcap_RRBS_MBD_only_ 100%[===================>] 535.31K --.-KB/s in 0.03s \n", "\n", "2020-05-07 23:05:13 (19.6 MB/s) - ‘./Mcap_RRBS_MBD_only_CpGs.bed’ saved [548161/548161]\n", "\n", "--2020-05-07 23:05:13-- https://gannet.fish.washington.edu/metacarcinus/FROGER_meth_compare/20200504/data/Mcap/Mcap_RRBS_only_CpGs.bed\n", "Reusing existing connection to gannet.fish.washington.edu:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 50870504 (49M) [application/vnd.realvnc.bed]\n", "Saving to: ‘./Mcap_RRBS_only_CpGs.bed’\n", "\n", "Mcap_RRBS_only_CpGs 100%[===================>] 48.51M 106MB/s in 0.5s \n", "\n", "2020-05-07 23:05:13 (106 MB/s) - ‘./Mcap_RRBS_only_CpGs.bed’ saved [50870504/50870504]\n", "\n", "--2020-05-07 23:05:13-- https://gannet.fish.washington.edu/metacarcinus/FROGER_meth_compare/20200504/data/Mcap/Mcap_WGBS_MBD_only_CpGs.bed\n", "Reusing existing connection to gannet.fish.washington.edu:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 15979974 (15M) [application/vnd.realvnc.bed]\n", "Saving to: ‘./Mcap_WGBS_MBD_only_CpGs.bed’\n", "\n", "Mcap_WGBS_MBD_only_ 100%[===================>] 15.24M 71.4MB/s in 0.2s \n", "\n", "2020-05-07 23:05:14 (71.4 MB/s) - ‘./Mcap_WGBS_MBD_only_CpGs.bed’ saved [15979974/15979974]\n", "\n", "--2020-05-07 23:05:14-- https://gannet.fish.washington.edu/metacarcinus/FROGER_meth_compare/20200504/data/Mcap/Mcap_WGBS_RRBS_MBD_only_CpGs.bed\n", "Reusing existing connection to gannet.fish.washington.edu:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 4636262 (4.4M) [application/vnd.realvnc.bed]\n", "Saving to: ‘./Mcap_WGBS_RRBS_MBD_only_CpGs.bed’\n", "\n", "Mcap_WGBS_RRBS_MBD_ 100%[===================>] 4.42M --.-KB/s in 0.06s \n", "\n", "2020-05-07 23:05:14 (78.0 MB/s) - ‘./Mcap_WGBS_RRBS_MBD_only_CpGs.bed’ saved [4636262/4636262]\n", "\n", "--2020-05-07 23:05:14-- https://gannet.fish.washington.edu/metacarcinus/FROGER_meth_compare/20200504/data/Mcap/Mcap_WGBS_RRBS_only_CpGs.bed\n", "Reusing existing connection to gannet.fish.washington.edu:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 56911301 (54M) [application/vnd.realvnc.bed]\n", "Saving to: ‘./Mcap_WGBS_RRBS_only_CpGs.bed’\n", "\n", "Mcap_WGBS_RRBS_only 100%[===================>] 54.27M 70.6MB/s in 0.8s \n", "\n", "2020-05-07 23:05:15 (70.6 MB/s) - ‘./Mcap_WGBS_RRBS_only_CpGs.bed’ saved [56911301/56911301]\n", "\n", "--2020-05-07 23:05:15-- https://gannet.fish.washington.edu/metacarcinus/FROGER_meth_compare/20200504/data/Mcap/Mcap_WGBS_only_CpGs.bed\n", "Reusing existing connection to gannet.fish.washington.edu:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 257688372 (246M) [application/vnd.realvnc.bed]\n", "Saving to: ‘./Mcap_WGBS_only_CpGs.bed’\n", "\n", "Mcap_WGBS_only_CpGs 100%[===================>] 245.75M 90.2MB/s in 2.7s \n", "\n", "2020-05-07 23:05:18 (90.2 MB/s) - ‘./Mcap_WGBS_only_CpGs.bed’ saved [257688372/257688372]\n", "\n", "--2020-05-07 23:05:18-- https://gannet.fish.washington.edu/metacarcinus/FROGER_meth_compare/20200504/data/Mcap/Mcap_no_method_CpGs.bed\n", "Reusing existing connection to gannet.fish.washington.edu:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 483146205 (461M) [application/vnd.realvnc.bed]\n", "Saving to: ‘./Mcap_no_method_CpGs.bed’\n", "\n", "Mcap_no_method_CpGs 100%[===================>] 460.76M 73.8MB/s in 6.0s \n", "\n", "2020-05-07 23:05:24 (77.3 MB/s) - ‘./Mcap_no_method_CpGs.bed’ saved [483146205/483146205]\n", "\n", "--2020-05-07 23:05:24-- https://gannet.fish.washington.edu/metacarcinus/FROGER_meth_compare/20200504/data/Mcap/?C=N;O=A\n", "Reusing existing connection to gannet.fish.washington.edu:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: unspecified [text/html]\n", "Saving to: ‘./index.html?C=N;O=A.tmp’\n", "\n", "index.html?C=N;O=A. [ <=> ] 7.88K --.-KB/s in 0s \n", "\n", "2020-05-07 23:05:24 (29.7 MB/s) - ‘./index.html?C=N;O=A.tmp’ saved [8072]\n", "\n", "Removing ./index.html?C=N;O=A.tmp since it should be rejected.\n", "\n", "--2020-05-07 23:05:24-- https://gannet.fish.washington.edu/metacarcinus/FROGER_meth_compare/20200504/data/Mcap/?C=M;O=D\n", "Reusing existing connection to gannet.fish.washington.edu:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: unspecified [text/html]\n", "Saving to: ‘./index.html?C=M;O=D.tmp’\n", "\n", "index.html?C=M;O=D. [ <=> ] 7.88K --.-KB/s in 0s \n", "\n", "2020-05-07 23:05:24 (34.2 MB/s) - ‘./index.html?C=M;O=D.tmp’ saved [8072]\n", "\n", "Removing ./index.html?C=M;O=D.tmp since it should be rejected.\n", "\n", "--2020-05-07 23:05:24-- https://gannet.fish.washington.edu/metacarcinus/FROGER_meth_compare/20200504/data/Mcap/?C=S;O=D\n", "Reusing existing connection to gannet.fish.washington.edu:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: unspecified [text/html]\n", "Saving to: ‘./index.html?C=S;O=D.tmp’\n", "\n", "index.html?C=S;O=D. [ <=> ] 7.88K --.-KB/s in 0s \n", "\n", "2020-05-07 23:05:24 (36.5 MB/s) - ‘./index.html?C=S;O=D.tmp’ saved [8072]\n", "\n", "Removing ./index.html?C=S;O=D.tmp since it should be rejected.\n", "\n", "--2020-05-07 23:05:24-- https://gannet.fish.washington.edu/metacarcinus/FROGER_meth_compare/20200504/data/Mcap/?C=D;O=D\n", "Reusing existing connection to gannet.fish.washington.edu:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: unspecified [text/html]\n", "Saving to: ‘./index.html?C=D;O=D.tmp’\n", "\n", "index.html?C=D;O=D. [ <=> ] 7.88K --.-KB/s in 0s \n", "\n", "2020-05-07 23:05:24 (30.7 MB/s) - ‘./index.html?C=D;O=D.tmp’ saved [8072]\n", "\n", "Removing ./index.html?C=D;O=D.tmp since it should be rejected.\n", "\n", "FINISHED --2020-05-07 23:05:24--\n", "Total wall clock time: 11s\n", "Downloaded: 17 files, 832M in 10s (81.0 MB/s)\n" ] } ], "source": [ "#Download Mcap upset plot data\n", "!wget -r \\\n", "--no-directories --no-parent --reject \"index.html*\" \\\n", "-P . \\\n", "-A CpGs.bed https://gannet.fish.washington.edu/metacarcinus/FROGER_meth_compare/20200504/data/Mcap/" ] }, { "cell_type": "code", "execution_count": 21, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Mcap_MBD_only_CpGs.bed Mcap_WGBS_RRBS_MBD_only_CpGs.bed\r\n", "Mcap_RRBS_MBD_only_CpGs.bed Mcap_WGBS_RRBS_only_CpGs.bed\r\n", "Mcap_RRBS_only_CpGs.bed Mcap_WGBS_only_CpGs.bed\r\n", "Mcap_WGBS_MBD_only_CpGs.bed Mcap_no_method_CpGs.bed\r\n" ] } ], "source": [ "#Check downloaded files\n", "!ls *bed" ] }, { "cell_type": "code", "execution_count": 22, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1\t37\t38\t1\tN/A\tN/A\tN/A\r\n", "1\t90\t91\t1\tN/A\tN/A\tN/A\r\n", "1\t121\t122\t1\tN/A\tN/A\tN/A\r\n", "1\t132\t133\t1\tN/A\tN/A\tN/A\r\n", "1\t153\t154\t1\tN/A\tN/A\tN/A\r\n", "1\t170\t171\t1\tN/A\tN/A\tN/A\r\n", "1\t220\t221\t1\tN/A\tN/A\tN/A\r\n", "1\t224\t225\t1\tN/A\tN/A\tN/A\r\n", "1\t233\t234\t1\tN/A\tN/A\tN/A\r\n", "1\t272\t273\t1\tN/A\tN/A\tN/A\r\n" ] } ], "source": [ "!head Mcap_no_method_CpGs.bed" ] }, { "cell_type": "code", "execution_count": 23, "metadata": { "collapsed": false, "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 88218 Mcap_MBD_only_CpGs.bed\n", " 19873 Mcap_RRBS_MBD_only_CpGs.bed\n", " 1722340 Mcap_RRBS_only_CpGs.bed\n", " 577695 Mcap_WGBS_MBD_only_CpGs.bed\n", " 180769 Mcap_WGBS_RRBS_MBD_only_CpGs.bed\n", " 2058468 Mcap_WGBS_RRBS_only_CpGs.bed\n", " 8692905 Mcap_WGBS_only_CpGs.bed\n", " 15335122 Mcap_no_method_CpGs.bed\n", " 28675390 total\n" ] } ], "source": [ "!wc -l *bed" ] }, { "cell_type": "code", "execution_count": 24, "metadata": { "collapsed": true }, "outputs": [], "source": [ "!wc -l *bed > Mcap-upset-data-counts.txt" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 3. Characterize genomic locations of CpGs" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### 3a. Genes" ] }, { "cell_type": "code", "execution_count": 26, "metadata": { "collapsed": true, "scrolled": true }, "outputs": [], "source": [ "%%bash\n", "\n", "for f in *bed\n", "do\n", " /usr/local/bin/intersectBed \\\n", " -wb \\\n", " -a ${f} \\\n", " -b ../../../genome-feature-files/Mcap.GFFannotation.gene.gff \\\n", " > ${f}-mcGenes\n", "done" ] }, { "cell_type": "code", "execution_count": 27, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "==> Mcap_MBD_only_CpGs.bed-mcGenes <==\r\n", "1\t77097\t77098\t1\tN/A\tN/A\t1\t1\tAUGUSTUS\tgene\t64466\t84798\t1\t+\t.\tg21536\r\n", "1\t77146\t77147\t1\tN/A\tN/A\t1\t1\tAUGUSTUS\tgene\t64466\t84798\t1\t+\t.\tg21536\r\n", "1\t77152\t77153\t1\tN/A\tN/A\t1\t1\tAUGUSTUS\tgene\t64466\t84798\t1\t+\t.\tg21536\r\n", "1\t77180\t77181\t1\tN/A\tN/A\t1\t1\tAUGUSTUS\tgene\t64466\t84798\t1\t+\t.\tg21536\r\n", "1\t109671\t109672\t1\tN/A\tN/A\t1\t1\tAUGUSTUS\tgene\t100215\t109729\t0.99\t-\t.\tg21538\r\n", "1\t323037\t323038\t1\tN/A\tN/A\t1\t1\tAUGUSTUS\tgene\t323002\t331686\t1\t-\t.\tg21554\r\n", "1\t323052\t323053\t1\tN/A\tN/A\t1\t1\tAUGUSTUS\tgene\t323002\t331686\t1\t-\t.\tg21554\r\n", "1\t328383\t328384\t1\tN/A\tN/A\t1\t1\tAUGUSTUS\tgene\t323002\t331686\t1\t-\t.\tg21554\r\n", "1\t328387\t328388\t1\tN/A\tN/A\t1\t1\tAUGUSTUS\tgene\t323002\t331686\t1\t-\t.\tg21554\r\n", "1\t328413\t328414\t1\tN/A\tN/A\t1\t1\tAUGUSTUS\tgene\t323002\t331686\t1\t-\t.\tg21554\r\n", "\r\n", "==> Mcap_RRBS_MBD_only_CpGs.bed-mcGenes <==\r\n", "1\t323632\t323633\t1\tN/A\t1\t1\t1\tAUGUSTUS\tgene\t323002\t331686\t1\t-\t.\tg21554\r\n", "1\t328685\t328686\t1\tN/A\t1\t1\t1\tAUGUSTUS\tgene\t323002\t331686\t1\t-\t.\tg21554\r\n", "1\t533645\t533646\t1\tN/A\t1\t1\t1\tAUGUSTUS\tgene\t524298\t537113\t1\t-\t.\tg21573\r\n", "1\t533649\t533650\t1\tN/A\t1\t1\t1\tAUGUSTUS\tgene\t524298\t537113\t1\t-\t.\tg21573\r\n", "1\t533661\t533662\t1\tN/A\t1\t1\t1\tAUGUSTUS\tgene\t524298\t537113\t1\t-\t.\tg21573\r\n", "1\t533664\t533665\t1\tN/A\t1\t1\t1\tAUGUSTUS\tgene\t524298\t537113\t1\t-\t.\tg21573\r\n", "1\t533670\t533671\t1\tN/A\t1\t1\t1\tAUGUSTUS\tgene\t524298\t537113\t1\t-\t.\tg21573\r\n", "1\t638647\t638648\t1\tN/A\t1\t1\t1\tAUGUSTUS\tgene\t638269\t638736\t0.88\t-\t.\tg21584\r\n", "1\t1432366\t1432367\t1\tN/A\t1\t1\t1\tAUGUSTUS\tgene\t1431855\t1441370\t0.83\t-\t.\tg21638\r\n", "1\t1432387\t1432388\t1\tN/A\t1\t1\t1\tAUGUSTUS\tgene\t1431855\t1441370\t0.83\t-\t.\tg21638\r\n", "\r\n", "==> Mcap_RRBS_only_CpGs.bed-mcGenes <==\r\n", "1\t22446\t22447\t1\tN/A\t1\tN/A\t1\tAUGUSTUS\tgene\t22321\t27293\t0.23\t-\t.\tg21533\r\n", "1\t22506\t22507\t1\tN/A\t1\tN/A\t1\tAUGUSTUS\tgene\t22321\t27293\t0.23\t-\t.\tg21533\r\n", "1\t22514\t22515\t1\tN/A\t1\tN/A\t1\tAUGUSTUS\tgene\t22321\t27293\t0.23\t-\t.\tg21533\r\n", "1\t22532\t22533\t1\tN/A\t1\tN/A\t1\tAUGUSTUS\tgene\t22321\t27293\t0.23\t-\t.\tg21533\r\n", "1\t22535\t22536\t1\tN/A\t1\tN/A\t1\tAUGUSTUS\tgene\t22321\t27293\t0.23\t-\t.\tg21533\r\n", "1\t22548\t22549\t1\tN/A\t1\tN/A\t1\tAUGUSTUS\tgene\t22321\t27293\t0.23\t-\t.\tg21533\r\n", "1\t22564\t22565\t1\tN/A\t1\tN/A\t1\tAUGUSTUS\tgene\t22321\t27293\t0.23\t-\t.\tg21533\r\n", "1\t22576\t22577\t1\tN/A\t1\tN/A\t1\tAUGUSTUS\tgene\t22321\t27293\t0.23\t-\t.\tg21533\r\n", "1\t23118\t23119\t1\tN/A\t1\tN/A\t1\tAUGUSTUS\tgene\t22321\t27293\t0.23\t-\t.\tg21533\r\n", "1\t39434\t39435\t1\tN/A\t1\tN/A\t1\tAUGUSTUS\tgene\t37447\t52266\t1\t+\t.\tg21534\r\n", "\r\n", "==> Mcap_WGBS_MBD_only_CpGs.bed-mcGenes <==\r\n", "1\t59439\t59440\t1\t1\tN/A\t1\t1\tAUGUSTUS\tgene\t58322\t62557\t1\t-\t.\tg21535\r\n", "1\t81813\t81814\t1\t1\tN/A\t1\t1\tAUGUSTUS\tgene\t64466\t84798\t1\t+\t.\tg21536\r\n", "1\t106174\t106175\t1\t1\tN/A\t1\t1\tAUGUSTUS\tgene\t100215\t109729\t0.99\t-\t.\tg21538\r\n", "1\t106203\t106204\t1\t1\tN/A\t1\t1\tAUGUSTUS\tgene\t100215\t109729\t0.99\t-\t.\tg21538\r\n", "1\t238113\t238114\t1\t1\tN/A\t1\t1\tAUGUSTUS\tgene\t237955\t238956\t1\t+\t.\tg21550\r\n", "1\t238134\t238135\t1\t1\tN/A\t1\t1\tAUGUSTUS\tgene\t237955\t238956\t1\t+\t.\tg21550\r\n", "1\t323067\t323068\t1\t1\tN/A\t1\t1\tAUGUSTUS\tgene\t323002\t331686\t1\t-\t.\tg21554\r", "\r\n", "1\t323096\t323097\t1\t1\tN/A\t1\t1\tAUGUSTUS\tgene\t323002\t331686\t1\t-\t.\tg21554\r\n", "1\t323099\t323100\t1\t1\tN/A\t1\t1\tAUGUSTUS\tgene\t323002\t331686\t1\t-\t.\tg21554\r\n", "1\t323151\t323152\t1\t1\tN/A\t1\t1\tAUGUSTUS\tgene\t323002\t331686\t1\t-\t.\tg21554\r\n", "\r\n", "==> Mcap_WGBS_RRBS_MBD_only_CpGs.bed-mcGenes <==\r\n", "1\t81818\t81819\t1\t1\t1\t1\t1\tAUGUSTUS\tgene\t64466\t84798\t1\t+\t.\tg21536\r\n", "1\t81836\t81837\t1\t1\t1\t1\t1\tAUGUSTUS\tgene\t64466\t84798\t1\t+\t.\tg21536\r\n", "1\t81875\t81876\t1\t1\t1\t1\t1\tAUGUSTUS\tgene\t64466\t84798\t1\t+\t.\tg21536\r\n", "1\t81888\t81889\t1\t1\t1\t1\t1\tAUGUSTUS\tgene\t64466\t84798\t1\t+\t.\tg21536\r\n", "1\t323173\t323174\t1\t1\t1\t1\t1\tAUGUSTUS\tgene\t323002\t331686\t1\t-\t.\tg21554\r\n", "1\t323178\t323179\t1\t1\t1\t1\t1\tAUGUSTUS\tgene\t323002\t331686\t1\t-\t.\tg21554\r\n", "1\t323191\t323192\t1\t1\t1\t1\t1\tAUGUSTUS\tgene\t323002\t331686\t1\t-\t.\tg21554\r\n", "1\t323557\t323558\t1\t1\t1\t1\t1\tAUGUSTUS\tgene\t323002\t331686\t1\t-\t.\tg21554\r\n", "1\t323572\t323573\t1\t1\t1\t1\t1\tAUGUSTUS\tgene\t323002\t331686\t1\t-\t.\tg21554\r\n", "1\t323581\t323582\t1\t1\t1\t1\t1\tAUGUSTUS\tgene\t323002\t331686\t1\t-\t.\tg21554\r\n", "\r\n", "==> Mcap_WGBS_RRBS_only_CpGs.bed-mcGenes <==\r\n", "1\t23140\t23141\t1\t1\t1\tN/A\t1\tAUGUSTUS\tgene\t22321\t27293\t0.23\t-\t.\tg21533\r\n", "1\t23174\t23175\t1\t1\t1\tN/A\t1\tAUGUSTUS\tgene\t22321\t27293\t0.23\t-\t.\tg21533\r\n", "1\t23203\t23204\t1\t1\t1\tN/A\t1\tAUGUSTUS\tgene\t22321\t27293\t0.23\t-\t.\tg21533\r\n", "1\t23327\t23328\t1\t1\t1\tN/A\t1\tAUGUSTUS\tgene\t22321\t27293\t0.23\t-\t.\tg21533\r\n", "1\t23335\t23336\t1\t1\t1\tN/A\t1\tAUGUSTUS\tgene\t22321\t27293\t0.23\t-\t.\tg21533\r\n", "1\t23383\t23384\t1\t1\t1\tN/A\t1\tAUGUSTUS\tgene\t22321\t27293\t0.23\t-\t.\tg21533\r\n", "1\t40451\t40452\t1\t1\t1\tN/A\t1\tAUGUSTUS\tgene\t37447\t52266\t1\t+\t.\tg21534\r\n", "1\t40487\t40488\t1\t1\t1\tN/A\t1\tAUGUSTUS\tgene\t37447\t52266\t1\t+\t.\tg21534\r\n", "1\t40542\t40543\t1\t1\t1\tN/A\t1\tAUGUSTUS\tgene\t37447\t52266\t1\t+\t.\tg21534\r\n", "1\t40553\t40554\t1\t1\t1\tN/A\t1\tAUGUSTUS\tgene\t37447\t52266\t1\t+\t.\tg21534\r\n", "\r\n", "==> Mcap_WGBS_only_CpGs.bed-mcGenes <==\r\n", "1\t23004\t23005\t1\t1\tN/A\tN/A\t1\tAUGUSTUS\tgene\t22321\t27293\t0.23\t-\t.\tg21533\r\n", "1\t23007\t23008\t1\t1\tN/A\tN/A\t1\tAUGUSTUS\tgene\t22321\t27293\t0.23\t-\t.\tg21533\r\n", "1\t23020\t23021\t1\t1\tN/A\tN/A\t1\tAUGUSTUS\tgene\t22321\t27293\t0.23\t-\t.\tg21533\r\n", "1\t23405\t23406\t1\t1\tN/A\tN/A\t1\tAUGUSTUS\tgene\t22321\t27293\t0.23\t-\t.\tg21533\r\n", "1\t23426\t23427\t1\t1\tN/A\tN/A\t1\tAUGUSTUS\tgene\t22321\t27293\t0.23\t-\t.\tg21533\r\n", "1\t23446\t23447\t1\t1\tN/A\tN/A\t1\tAUGUSTUS\tgene\t22321\t27293\t0.23\t-\t.\tg21533\r\n", "1\t37556\t37557\t1\t1\tN/A\tN/A\t1\tAUGUSTUS\tgene\t37447\t52266\t1\t+\t.\tg21534\r\n", "1\t37568\t37569\t1\t1\tN/A\tN/A\t1\tAUGUSTUS\tgene\t37447\t52266\t1\t+\t.\tg21534\r\n", "1\t38218\t38219\t1\t1\tN/A\tN/A\t1\tAUGUSTUS\tgene\t37447\t52266\t1\t+\t.\tg21534\r\n", "1\t38238\t38239\t1\t1\tN/A\tN/A\t1\tAUGUSTUS\tgene\t37447\t52266\t1\t+\t.\tg21534\r\n", "\r\n", "==> Mcap_no_method_CpGs.bed-mcGenes <==\r\n", "1\t18418\t18419\t1\tN/A\tN/A\tN/A\t1\tAUGUSTUS\tgene\t18387\t18755\t0.97\t-\t.\tg21532\r\n", "1\t18430\t18431\t1\tN/A\tN/A\tN/A\t1\tAUGUSTUS\tgene\t18387\t18755\t0.97\t-\t.\tg21532\r\n", "1\t18481\t18482\t1\tN/A\tN/A\tN/A\t1\tAUGUSTUS\tgene\t18387\t18755\t0.97\t-\t.\tg21532\r\n", "1\t18534\t18535\t1\tN/A\tN/A\tN/A\t1\tAUGUSTUS\tgene\t18387\t18755\t0.97\t-\t.\tg21532\r\n", "1\t18539\t18540\t1\tN/A\tN/A\tN/A\t1\tAUGUSTUS\tgene\t18387\t18755\t0.97\t-\t.\tg21532\r\n", "1\t18542\t18543\t1\tN/A\tN/A\tN/A\t1\tAUGUSTUS\tgene\t18387\t18755\t0.97\t-\t.\tg21532\r\n", "1\t18544\t18545\t1\tN/A\tN/A\tN/A\t1\tAUGUSTUS\tgene\t18387\t18755\t0.97\t-\t.\tg21532\r\n", "1\t18553\t18554\t1\tN/A\tN/A\tN/A\t1\tAUGUSTUS\tgene\t18387\t18755\t0.97\t-\t.\tg21532\r\n", "1\t18635\t18636\t1\tN/A\tN/A\tN/A\t1\tAUGUSTUS\tgene\t18387\t18755\t0.97\t-\t.\tg21532\r\n", "1\t18713\t18714\t1\tN/A\tN/A\tN/A\t1\tAUGUSTUS\tgene\t18387\t18755\t0.97\t-\t.\tg21532\r\n" ] } ], "source": [ "#Check output\n", "!head *mcGenes" ] }, { "cell_type": "code", "execution_count": 28, "metadata": { "collapsed": false, "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 32304 Mcap_MBD_only_CpGs.bed-mcGenes\n", " 7213 Mcap_RRBS_MBD_only_CpGs.bed-mcGenes\n", " 522515 Mcap_RRBS_only_CpGs.bed-mcGenes\n", " 208880 Mcap_WGBS_MBD_only_CpGs.bed-mcGenes\n", " 61140 Mcap_WGBS_RRBS_MBD_only_CpGs.bed-mcGenes\n", " 716214 Mcap_WGBS_RRBS_only_CpGs.bed-mcGenes\n", " 3154364 Mcap_WGBS_only_CpGs.bed-mcGenes\n", " 4747934 Mcap_no_method_CpGs.bed-mcGenes\n", " 9450564 total\n" ] } ], "source": [ "#Count number of overlaps\n", "!wc -l *mcGenes" ] }, { "cell_type": "code", "execution_count": 29, "metadata": { "collapsed": true }, "outputs": [], "source": [ "!wc -l *mcGenes > Mcap-upset-data-mcGenes-counts.txt" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### 3b. Coding Sequences (CDS)" ] }, { "cell_type": "code", "execution_count": 30, "metadata": { "collapsed": true, "scrolled": true }, "outputs": [], "source": [ "%%bash\n", "\n", "for f in *bed\n", "do\n", " /usr/local/bin/intersectBed \\\n", " -wb \\\n", " -a ${f} \\\n", " -b ../../../genome-feature-files/Mcap.GFFannotation.CDS.gff \\\n", " > ${f}-mcCDS\n", "done" ] }, { "cell_type": "code", "execution_count": 31, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "==> Mcap_MBD_only_CpGs.bed-mcCDS <==\r\n", "1\t109671\t109672\t1\tN/A\tN/A\t1\t1\tAUGUSTUS\tCDS\t109638\t109729\t1\t-\t0\ttranscript_id \"g21538.t1\"; gene_id \"g21538\";\r\n", "1\t323037\t323038\t1\tN/A\tN/A\t1\t1\tAUGUSTUS\tCDS\t323002\t323103\t1\t-\t0\ttranscript_id \"g21554.t1\"; gene_id \"g21554\";\r\n", "1\t323052\t323053\t1\tN/A\tN/A\t1\t1\tAUGUSTUS\tCDS\t323002\t323103\t1\t-\t0\ttranscript_id \"g21554.t1\"; gene_id \"g21554\";\r\n", "1\t362188\t362189\t1\tN/A\tN/A\t1\t1\tAUGUSTUS\tCDS\t361628\t363571\t0.97\t-\t0\ttranscript_id \"g21558.t1\"; gene_id \"g21558\";\r\n", "1\t363058\t363059\t1\tN/A\tN/A\t1\t1\tAUGUSTUS\tCDS\t361628\t363571\t0.97\t-\t0\ttranscript_id \"g21558.t1\"; gene_id \"g21558\";\r\n", "1\t363079\t363080\t1\tN/A\tN/A\t1\t1\tAUGUSTUS\tCDS\t361628\t363571\t0.97\t-\t0\ttranscript_id \"g21558.t1\"; gene_id \"g21558\";\r\n", "1\t363111\t363112\t1\tN/A\tN/A\t1\t1\tAUGUSTUS\tCDS\t361628\t363571\t0.97\t-\t0\ttranscript_id \"g21558.t1\"; gene_id \"g21558\";\r\n", "1\t402886\t402887\t1\tN/A\tN/A\t1\t1\tAUGUSTUS\tCDS\t401783\t403405\t0.76\t+\t0\ttranscript_id \"g21560.t1\"; gene_id \"g21560\";\r\n", "1\t435465\t435466\t1\tN/A\tN/A\t1\t1\tAUGUSTUS\tCDS\t435136\t435468\t1\t+\t0\ttranscript_id \"g21564.t1\"; gene_id \"g21564\";\r\n", "1\t525494\t525495\t1\tN/A\tN/A\t1\t1\tAUGUSTUS\tCDS\t524298\t526199\t1\t-\t0\ttranscript_id \"g21573.t1\"; gene_id \"g21573\";\r\n", "\r\n", "==> Mcap_RRBS_MBD_only_CpGs.bed-mcCDS <==\r\n", "1\t638647\t638648\t1\tN/A\t1\t1\t1\tAUGUSTUS\tCDS\t638269\t638736\t0.88\t-\t0\ttranscript_id \"g21584.t1\"; gene_id \"g21584\";\r\n", "1\t1432366\t1432367\t1\tN/A\t1\t1\t1\tAUGUSTUS\tCDS\t1431855\t1432904\t1\t-\t0\ttranscript_id \"g21638.t1\"; gene_id \"g21638\";\r\n", "1\t1432387\t1432388\t1\tN/A\t1\t1\t1\tAUGUSTUS\tCDS\t1431855\t1432904\t1\t-\t0\ttranscript_id \"g21638.t1\"; gene_id \"g21638\";\r\n", "1\t1432428\t1432429\t1\tN/A\t1\t1\t1\tAUGUSTUS\tCDS\t1431855\t1432904\t1\t-\t0\ttranscript_id \"g21638.t1\"; gene_id \"g21638\";\r\n", "10\t1136212\t1136213\t1\tN/A\t1\t1\t10\tAUGUSTUS\tCDS\t1136204\t1136419\t0.89\t+\t0\ttranscript_id \"g23405.t1\"; gene_id \"g23405\";\r\n", "10\t1136227\t1136228\t1\tN/A\t1\t1\t10\tAUGUSTUS\tCDS\t1136204\t1136419\t0.89\t+\t0\ttranscript_id \"g23405.t1\"; gene_id \"g23405\";\r\n", "10\t1246160\t1246161\t1\tN/A\t1\t1\t10\tAUGUSTUS\tCDS\t1245387\t1246860\t0.8\t+\t2\ttranscript_id \"g23447.t1\"; gene_id \"g23447\";\r\n", "10\t1246175\t1246176\t1\tN/A\t1\t1\t10\tAUGUSTUS\tCDS\t1245387\t1246860\t0.8\t+\t2\ttranscript_id \"g23447.t1\"; gene_id \"g23447\";\r\n", "10\t1246189\t1246190\t1\tN/A\t1\t1\t10\tAUGUSTUS\tCDS\t1245387\t1246860\t0.8\t+\t2\ttranscript_id \"g23447.t1\"; gene_id \"g23447\";\r\n", "10\t1246216\t1246217\t1\tN/A\t1\t1\t10\tAUGUSTUS\tCDS\t1245387\t1246860\t0.8\t+\t2\ttranscript_id \"g23447.t1\"; gene_id \"g23447\";\r\n", "\r\n", "==> Mcap_RRBS_only_CpGs.bed-mcCDS <==\r\n", "1\t22446\t22447\t1\tN/A\t1\tN/A\t1\tAUGUSTUS\tCDS\t22321\t22608\t0.55\t-\t0\ttranscript_id \"g21533.t1\"; gene_id \"g21533\";\r\n", "1\t22506\t22507\t1\tN/A\t1\tN/A\t1\tAUGUSTUS\tCDS\t22321\t22608\t0.55\t-\t0\ttranscript_id \"g21533.t1\"; gene_id \"g21533\";\r\n", "1\t22514\t22515\t1\tN/A\t1\tN/A\t1\tAUGUSTUS\tCDS\t22321\t22608\t0.55\t-\t0\ttranscript_id \"g21533.t1\"; gene_id \"g21533\";\r\n", "1\t22532\t22533\t1\tN/A\t1\tN/A\t1\tAUGUSTUS\tCDS\t22321\t22608\t0.55\t-\t0\ttranscript_id \"g21533.t1\"; gene_id \"g21533\";\r\n", "1\t22535\t22536\t1\tN/A\t1\tN/A\t1\tAUGUSTUS\tCDS\t22321\t22608\t0.55\t-\t0\ttranscript_id \"g21533.t1\"; gene_id \"g21533\";\r\n", "1\t22548\t22549\t1\tN/A\t1\tN/A\t1\tAUGUSTUS\tCDS\t22321\t22608\t0.55\t-\t0\ttranscript_id \"g21533.t1\"; gene_id \"g21533\";\r\n", "1\t22564\t22565\t1\tN/A\t1\tN/A\t1\tAUGUSTUS\tCDS\t22321\t22608\t0.55\t-\t0\ttranscript_id \"g21533.t1\"; gene_id \"g21533\";\r\n", "1\t22576\t22577\t1\tN/A\t1\tN/A\t1\tAUGUSTUS\tCDS\t22321\t22608\t0.55\t-\t0\ttranscript_id \"g21533.t1\"; gene_id \"g21533\";\r\n", "1\t45047\t45048\t1\tN/A\t1\tN/A\t1\tAUGUSTUS\tCDS\t45038\t45208\t1\t+\t2\ttranscript_id \"g21534.t1\"; gene_id \"g21534\";\r\n", "1\t45071\t45072\t1\tN/A\t1\tN/A\t1\tAUGUSTUS\tCDS\t45038\t45208\t1\t+\t2\ttranscript_id \"g21534.t1\"; gene_id \"g21534\";\r\n", "\r\n", "==> Mcap_WGBS_MBD_only_CpGs.bed-mcCDS <==\r\n", "1\t59439\t59440\t1\t1\tN/A\t1\t1\tAUGUSTUS\tCDS\t58322\t59506\t1\t-\t0\ttranscript_id \"g21535.t1\"; gene_id \"g21535\";\r\n", "1\t238113\t238114\t1\t1\tN/A\t1\t1\tAUGUSTUS\tCDS\t237955\t238956\t1\t+\t0\ttranscript_id \"g21550.t1\"; gene_id \"g21550\";\r\n", "1\t238134\t238135\t1\t1\tN/A\t1\t1\tAUGUSTUS\tCDS\t237955\t238956\t1\t+\t0\ttranscript_id \"g21550.t1\"; gene_id \"g21550\";\r\n", "1\t323067\t323068\t1\t1\tN/A\t1\t1\tAUGUSTUS\tCDS\t323002\t323103\t1\t-\t0\ttranscript_id \"g21554.t1\"; gene_id \"g21554\";\r\n", "1\t323096\t323097\t1\t1\tN/A\t1\t1\tAUGUSTUS\tCDS\t323002\t323103\t1\t-\t0\ttranscript_id \"g21554.t1\"; gene_id \"g21554\";\r\n", "1\t323099\t323100\t1\t1\tN/A\t1\t1\tAUGUSTUS\tCDS\t323002\t323103\t1\t-\t0\ttranscript_id \"g21554.t1\"; gene_id \"g21554\";\r\n", "1\t354587\t354588\t1\t1\tN/A\t1\t1\tAUGUSTUS\tCDS\t354547\t354629\t1\t+\t2\ttranscript_id \"g21557.t1\"; gene_id \"g21557\";\r\n", "1\t354617\t354618\t1\t1\tN/A\t1\t1\tAUGUSTUS\tCDS\t354547\t354629\t1\t+\t2\ttranscript_id \"g21557.t1\"; gene_id \"g21557\";\r\n", "1\t354623\t354624\t1\t1\tN/A\t1\t1\tAUGUSTUS\tCDS\t354547\t354629\t1\t+\t2\ttranscript_id \"g21557.t1\"; gene_id \"g21557\";\r\n", "1\t361976\t361977\t1\t1\tN/A\t1\t1\tAUGUSTUS\tCDS\t361628\t363571\t0.97\t-\t0\ttranscript_id \"g21558.t1\"; gene_id \"g21558\";\r\n", "\r\n", "==> Mcap_WGBS_RRBS_MBD_only_CpGs.bed-mcCDS <==\r\n", "1\t525768\t525769\t1\t1\t1\t1\t1\tAUGUSTUS\tCDS\t524298\t526199\t1\t-\t0\ttranscript_id \"g21573.t1\"; gene_id \"g21573\";\r\n", "1\t525778\t525779\t1\t1\t1\t1\t1\tAUGUSTUS\tCDS\t524298\t526199\t1\t-\t0\ttranscript_id \"g21573.t1\"; gene_id \"g21573\";\r\n", "1\t525791\t525792\t1\t1\t1\t1\t1\tAUGUSTUS\tCDS\t524298\t526199\t1\t-\t0\ttranscript_id \"g21573.t1\"; gene_id \"g21573\";\r\n", "1\t525822\t525823\t1\t1\t1\t1\t1\tAUGUSTUS\tCDS\t524298\t526199\t1\t-\t0\ttranscript_id \"g21573.t1\"; gene_id \"g21573\";\r\n", "1\t525953\t525954\t1\t1\t1\t1\t1\tAUGUSTUS\tCDS\t524298\t526199\t1\t-\t0\ttranscript_id \"g21573.t1\"; gene_id \"g21573\";\r\n", "1\t601713\t601714\t1\t1\t1\t1\t1\tAUGUSTUS\tCDS\t601281\t601955\t1\t+\t0\ttranscript_id \"g21580.t1\"; gene_id \"g21580\";\r\n", "1\t601730\t601731\t1\t1\t1\t1\t1\tAUGUSTUS\tCDS\t601281\t601955\t1\t+\t0\ttranscript_id \"g21580.t1\"; gene_id \"g21580\";\r\n", "1\t601748\t601749\t1\t1\t1\t1\t1\tAUGUSTUS\tCDS\t601281\t601955\t1\t+\t0\ttranscript_id \"g21580.t1\"; gene_id \"g21580\";\r\n", "1\t607815\t607816\t1\t1\t1\t1\t1\tAUGUSTUS\tCDS\t607804\t608337\t1\t+\t0\ttranscript_id \"g21581.t1\"; gene_id \"g21581\";\r\n", "1\t607822\t607823\t1\t1\t1\t1\t1\tAUGUSTUS\tCDS\t607804\t608337\t1\t+\t0\ttranscript_id \"g21581.t1\"; gene_id \"g21581\";\r\n", "\r\n", "==> Mcap_WGBS_RRBS_only_CpGs.bed-mcCDS <==\r\n", "1\t45111\t45112\t1\t1\t1\tN/A\t1\tAUGUSTUS\tCDS\t45038\t45208\t1\t+\t2\ttranscript_id \"g21534.t1\"; gene_id \"g21534\";\r\n", "1\t45117\t45118\t1\t1\t1\tN/A\t1\tAUGUSTUS\tCDS\t45038\t45208\t1\t+\t2\ttranscript_id \"g21534.t1\"; gene_id \"g21534\";\r\n", "1\t45129\t45130\t1\t1\t1\tN/A\t1\tAUGUSTUS\tCDS\t45038\t45208\t1\t+\t2\ttranscript_id \"g21534.t1\"; gene_id \"g21534\";\r\n", "1\t45200\t45201\t1\t1\t1\tN/A\t1\tAUGUSTUS\tCDS\t45038\t45208\t1\t+\t2\ttranscript_id \"g21534.t1\"; gene_id \"g21534\";\r\n", "1\t46649\t46650\t1\t1\t1\tN/A\t1\tAUGUSTUS\tCDS\t46625\t47272\t1\t+\t2\ttranscript_id \"g21534.t1\"; gene_id \"g21534\";\r\n", "1\t58619\t58620\t1\t1\t1\tN/A\t1\tAUGUSTUS\tCDS\t58322\t59506\t1\t-\t0\ttranscript_id \"g21535.t1\"; gene_id \"g21535\";\r\n", "1\t58746\t58747\t1\t1\t1\tN/A\t1\tAUGUSTUS\tCDS\t58322\t59506\t1\t-\t0\ttranscript_id \"g21535.t1\"; gene_id \"g21535\";\r\n", "1\t156959\t156960\t1\t1\t1\tN/A\t1\tAUGUSTUS\tCDS\t156444\t157268\t1\t+\t0\ttranscript_id \"g21543.t1\"; gene_id \"g21543\";\r\n", "1\t156978\t156979\t1\t1\t1\tN/A\t1\tAUGUSTUS\tCDS\t156444\t157268\t1\t+\t0\ttranscript_id \"g21543.t1\"; gene_id \"g21543\";\r\n", "1\t156984\t156985\t1\t1\t1\tN/A\t1\tAUGUSTUS\tCDS\t156444\t157268\t1\t+\t0\ttranscript_id \"g21543.t1\"; gene_id \"g21543\";\r\n", "\r\n", "==> Mcap_WGBS_only_CpGs.bed-mcCDS <==\r\n", "1\t37556\t37557\t1\t1\tN/A\tN/A\t1\tAUGUSTUS\tCDS\t37447\t37810\t1\t+\t0\ttranscript_id \"g21534.t1\"; gene_id \"g21534\";\r\n", "1\t37568\t37569\t1\t1\tN/A\tN/A\t1\tAUGUSTUS\tCDS\t37447\t37810\t1\t+\t0\ttranscript_id \"g21534.t1\"; gene_id \"g21534\";\r\n", "1\t46634\t46635\t1\t1\tN/A\tN/A\t1\tAUGUSTUS\tCDS\t46625\t47272\t1\t+\t2\ttranscript_id \"g21534.t1\"; gene_id \"g21534\";\r\n", "1\t46643\t46644\t1\t1\tN/A\tN/A\t1\tAUGUSTUS\tCDS\t46625\t47272\t1\t+\t2\ttranscript_id \"g21534.t1\"; gene_id \"g21534\";\r\n", "1\t51925\t51926\t1\t1\tN/A\tN/A\t1\tAUGUSTUS\tCDS\t51903\t52266\t1\t+\t1\ttranscript_id \"g21534.t1\"; gene_id \"g21534\";\r\n", "1\t51933\t51934\t1\t1\tN/A\tN/A\t1\tAUGUSTUS\tCDS\t51903\t52266\t1\t+\t1\ttranscript_id \"g21534.t1\"; gene_id \"g21534\";\r\n", "1\t51935\t51936\t1\t1\tN/A\tN/A\t1\tAUGUSTUS\tCDS\t51903\t52266\t1\t+\t1\ttranscript_id \"g21534.t1\"; gene_id \"g21534\";\r\n", "1\t51939\t51940\t1\t1\tN/A\tN/A\t1\tAUGUSTUS\tCDS\t51903\t52266\t1\t+\t1\ttranscript_id \"g21534.t1\"; gene_id \"g21534\";\r\n", "1\t51960\t51961\t1\t1\tN/A\tN/A\t1\tAUGUSTUS\tCDS\t51903\t52266\t1\t+\t1\ttranscript_id \"g21534.t1\"; gene_id \"g21534\";\r\n", "1\t51971\t51972\t1\t1\tN/A\tN/A\t1\tAUGUSTUS\tCDS\t51903\t52266\t1\t+\t1\ttranscript_id \"g21534.t1\"; gene_id \"g21534\";\r\n", "\r\n", "==> Mcap_no_method_CpGs.bed-mcCDS <==\r\n", "1\t18418\t18419\t1\tN/A\tN/A\tN/A\t1\tAUGUSTUS\tCDS\t18387\t18755\t0.97\t-\t0\ttranscript_id \"g21532.t1\"; gene_id \"g21532\";\r\n", "1\t18430\t18431\t1\tN/A\tN/A\tN/A\t1\tAUGUSTUS\tCDS\t18387\t18755\t0.97\t-\t0\ttranscript_id \"g21532.t1\"; gene_id \"g21532\";\r\n", "1\t18481\t18482\t1\tN/A\tN/A\tN/A\t1\tAUGUSTUS\tCDS\t18387\t18755\t0.97\t-\t0\ttranscript_id \"g21532.t1\"; gene_id \"g21532\";\r\n", "1\t18534\t18535\t1\tN/A\tN/A\tN/A\t1\tAUGUSTUS\tCDS\t18387\t18755\t0.97\t-\t0\ttranscript_id \"g21532.t1\"; gene_id \"g21532\";\r\n", "1\t18539\t18540\t1\tN/A\tN/A\tN/A\t1\tAUGUSTUS\tCDS\t18387\t18755\t0.97\t-\t0\ttranscript_id \"g21532.t1\"; gene_id \"g21532\";\r\n", "1\t18542\t18543\t1\tN/A\tN/A\tN/A\t1\tAUGUSTUS\tCDS\t18387\t18755\t0.97\t-\t0\ttranscript_id \"g21532.t1\"; gene_id \"g21532\";\r\n", "1\t18544\t18545\t1\tN/A\tN/A\tN/A\t1\tAUGUSTUS\tCDS\t18387\t18755\t0.97\t-\t0\ttranscript_id \"g21532.t1\"; gene_id \"g21532\";\r\n", "1\t18553\t18554\t1\tN/A\tN/A\tN/A\t1\tAUGUSTUS\tCDS\t18387\t18755\t0.97\t-\t0\ttranscript_id \"g21532.t1\"; gene_id \"g21532\";\r\n", "1\t18635\t18636\t1\tN/A\tN/A\tN/A\t1\tAUGUSTUS\tCDS\t18387\t18755\t0.97\t-\t0\ttranscript_id \"g21532.t1\"; gene_id \"g21532\";\r\n", "1\t18713\t18714\t1\tN/A\tN/A\tN/A\t1\tAUGUSTUS\tCDS\t18387\t18755\t0.97\t-\t0\ttranscript_id \"g21532.t1\"; gene_id \"g21532\";\r\n" ] } ], "source": [ "#Check output\n", "!head *mcCDS" ] }, { "cell_type": "code", "execution_count": 32, "metadata": { "collapsed": false, "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 6318 Mcap_MBD_only_CpGs.bed-mcCDS\n", " 1444 Mcap_RRBS_MBD_only_CpGs.bed-mcCDS\n", " 71568 Mcap_RRBS_only_CpGs.bed-mcCDS\n", " 76504 Mcap_WGBS_MBD_only_CpGs.bed-mcCDS\n", " 21396 Mcap_WGBS_RRBS_MBD_only_CpGs.bed-mcCDS\n", " 149890 Mcap_WGBS_RRBS_only_CpGs.bed-mcCDS\n", " 746460 Mcap_WGBS_only_CpGs.bed-mcCDS\n", " 876856 Mcap_no_method_CpGs.bed-mcCDS\n", " 1950436 total\n" ] } ], "source": [ "#Count number of overlaps\n", "!wc -l *mcCDS" ] }, { "cell_type": "code", "execution_count": 33, "metadata": { "collapsed": true }, "outputs": [], "source": [ "!wc -l *mcCDS > Mcap-upset-data-mcCDS-counts.txt" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### 3c. Introns" ] }, { "cell_type": "code", "execution_count": 34, "metadata": { "collapsed": true, "scrolled": true }, "outputs": [], "source": [ "%%bash\n", "\n", "for f in *bed\n", "do\n", " /usr/local/bin/intersectBed \\\n", " -wb \\\n", " -a ${f} \\\n", " -b ../../../genome-feature-files/Mcap.GFFannotation.intron.gff \\\n", " > ${f}-mcIntrons\n", "done" ] }, { "cell_type": "code", "execution_count": 35, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "==> Mcap_MBD_only_CpGs.bed-mcIntrons <==\n", "1\t77097\t77098\t1\tN/A\tN/A\t1\t1\tAUGUSTUS\tintron\t76753\t79038\t1\t+\t.\ttranscript_id \"g21536.t1\"; gene_id \"g21536\";\n", "1\t77146\t77147\t1\tN/A\tN/A\t1\t1\tAUGUSTUS\tintron\t76753\t79038\t1\t+\t.\ttranscript_id \"g21536.t1\"; gene_id \"g21536\";\n", "1\t77152\t77153\t1\tN/A\tN/A\t1\t1\tAUGUSTUS\tintron\t76753\t79038\t1\t+\t.\ttranscript_id \"g21536.t1\"; gene_id \"g21536\";\n", "1\t77180\t77181\t1\tN/A\tN/A\t1\t1\tAUGUSTUS\tintron\t76753\t79038\t1\t+\t.\ttranscript_id \"g21536.t1\"; gene_id \"g21536\";\n", "1\t328383\t328384\t1\tN/A\tN/A\t1\t1\tAUGUSTUS\tintron\t328309\t331581\t1\t-\t.\ttranscript_id \"g21554.t1\"; gene_id \"g21554\";\n", "1\t328387\t328388\t1\tN/A\tN/A\t1\t1\tAUGUSTUS\tintron\t328309\t331581\t1\t-\t.\ttranscript_id \"g21554.t1\"; gene_id \"g21554\";\n", "1\t328413\t328414\t1\tN/A\tN/A\t1\t1\tAUGUSTUS\tintron\t328309\t331581\t1\t-\t.\ttranscript_id \"g21554.t1\"; gene_id \"g21554\";\n", "1\t328478\t328479\t1\tN/A\tN/A\t1\t1\tAUGUSTUS\tintron\t328309\t331581\t1\t-\t.\ttranscript_id \"g21554.t1\"; gene_id \"g21554\";\n", "1\t344032\t344033\t1\tN/A\tN/A\t1\t1\tAUGUSTUS\tintron\t341159\t344880\t1\t-\t.\ttranscript_id \"g21555.t1\"; gene_id \"g21555\";\n", "1\t344045\t344046\t1\tN/A\tN/A\t1\t1\tAUGUSTUS\tintron\t341159\t344880\t1\t-\t.\ttranscript_id \"g21555.t1\"; gene_id \"g21555\";\n", "\n", "==> Mcap_RRBS_MBD_only_CpGs.bed-mcIntrons <==\n", "1\t323632\t323633\t1\tN/A\t1\t1\t1\tAUGUSTUS\tintron\t323104\t325589\t1\t-\t.\ttranscript_id \"g21554.t1\"; gene_id \"g21554\";\n", "1\t328685\t328686\t1\tN/A\t1\t1\t1\tAUGUSTUS\tintron\t328309\t331581\t1\t-\t.\ttranscript_id \"g21554.t1\"; gene_id \"g21554\";\n", "1\t533645\t533646\t1\tN/A\t1\t1\t1\tAUGUSTUS\tintron\t532687\t534535\t1\t-\t.\ttranscript_id \"g21573.t1\"; gene_id \"g21573\";\n", "1\t533649\t533650\t1\tN/A\t1\t1\t1\tAUGUSTUS\tintron\t532687\t534535\t1\t-\t.\ttranscript_id \"g21573.t1\"; gene_id \"g21573\";\n", "1\t533661\t533662\t1\tN/A\t1\t1\t1\tAUGUSTUS\tintron\t532687\t534535\t1\t-\t.\ttranscript_id \"g21573.t1\"; gene_id \"g21573\";\n", "1\t533664\t533665\t1\tN/A\t1\t1\t1\tAUGUSTUS\tintron\t532687\t534535\t1\t-\t.\ttranscript_id \"g21573.t1\"; gene_id \"g21573\";\n", "1\t533670\t533671\t1\tN/A\t1\t1\t1\tAUGUSTUS\tintron\t532687\t534535\t1\t-\t.\ttranscript_id \"g21573.t1\"; gene_id \"g21573\";\n", "1\t1572418\t1572419\t1\tN/A\t1\t1\t1\tAUGUSTUS\tintron\t1572205\t1573610\t1\t+\t.\ttranscript_id \"g21645.t1\"; gene_id \"g21645\";\n", "1\t1572434\t1572435\t1\tN/A\t1\t1\t1\tAUGUSTUS\tintron\t1572205\t1573610\t1\t+\t.\ttranscript_id \"g21645.t1\"; gene_id \"g21645\";\n", "1\t1987588\t1987589\t1\tN/A\t1\t1\t1\tAUGUSTUS\tintron\t1986330\t1988617\t1\t+\t.\ttranscript_id \"g21685.t1\"; gene_id \"g21685\";\n", "\n", "==> Mcap_RRBS_only_CpGs.bed-mcIntrons <==\n", "1\t23118\t23119\t1\tN/A\t1\tN/A\t1\tAUGUSTUS\tintron\t22609\t26300\t0.25\t-\t.\ttranscript_id \"g21533.t1\"; gene_id \"g21533\";\n", "1\t39434\t39435\t1\tN/A\t1\tN/A\t1\tAUGUSTUS\tintron\t37811\t45037\t1\t+\t.\ttranscript_id \"g21534.t1\"; gene_id \"g21534\";\n", "1\t39478\t39479\t1\tN/A\t1\tN/A\t1\tAUGUSTUS\tintron\t37811\t45037\t1\t+\t.\ttranscript_id \"g21534.t1\"; gene_id \"g21534\";\n", "1\t39510\t39511\t1\tN/A\t1\tN/A\t1\tAUGUSTUS\tintron\t37811\t45037\t1\t+\t.\ttranscript_id \"g21534.t1\"; gene_id \"g21534\";\n", "1\t39553\t39554\t1\tN/A\t1\tN/A\t1\tAUGUSTUS\tintron\t37811\t45037\t1\t+\t.\ttranscript_id \"g21534.t1\"; gene_id \"g21534\";\n", "1\t39560\t39561\t1\tN/A\t1\tN/A\t1\tAUGUSTUS\tintron\t37811\t45037\t1\t+\t.\ttranscript_id \"g21534.t1\"; gene_id \"g21534\";\n", "1\t39784\t39785\t1\tN/A\t1\tN/A\t1\tAUGUSTUS\tintron\t37811\t45037\t1\t+\t.\ttranscript_id \"g21534.t1\"; gene_id \"g21534\";\n", "1\t39819\t39820\t1\tN/A\t1\tN/A\t1\tAUGUSTUS\tintron\t37811\t45037\t1\t+\t.\ttranscript_id \"g21534.t1\"; gene_id \"g21534\";\n", "1\t39822\t39823\t1\tN/A\t1\tN/A\t1\tAUGUSTUS\tintron\t37811\t45037\t1\t+\t.\ttranscript_id \"g21534.t1\"; gene_id \"g21534\";\n", "1\t39851\t39852\t1\tN/A\t1\tN/A\t1\tAUGUSTUS\tintron\t37811\t45037\t1\t+\t.\ttranscript_id \"g21534.t1\"; gene_id \"g21534\";\n", "\n", "==> Mcap_WGBS_MBD_only_CpGs.bed-mcIntrons <==\n", "1\t81813\t81814\t1\t1\tN/A\t1\t1\tAUGUSTUS\tintron\t80701\t82492\t1\t+\t.\ttranscript_id \"g21536.t1\"; gene_id \"g21536\";\n", "1\t106174\t106175\t1\t1\tN/A\t1\t1\tAUGUSTUS\tintron\t104815\t109637\t1\t-\t.\ttranscript_id \"g21538.t1\"; gene_id \"g21538\";\n", "1\t106203\t106204\t1\t1\tN/A\t1\t1\tAUGUSTUS\tintron\t104815\t109637\t1\t-\t.\ttranscript_id \"g21538.t1\"; gene_id \"g21538\";\n", "1\t323151\t323152\t1\t1\tN/A\t1\t1\tAUGUSTUS\tintron\t323104\t325589\t1\t-\t.\ttranscript_id \"g21554.t1\"; gene_id \"g21554\";\n", "1\t323158\t323159\t1\t1\tN/A\t1\t1\tAUGUSTUS\tintron\t323104\t325589\t1\t-\t.\ttranscript_id \"g21554.t1\"; gene_id \"g21554\";\n", "1\t324316\t324317\t1\t1\tN/A\t1\t1\tAUGUSTUS\tintron\t323104\t325589\t1\t-\t.\ttranscript_id \"g21554.t1\"; gene_id \"g21554\";\n", "1\t324333\t324334\t1\t1\tN/A\t1\t1\tAUGUSTUS\tintron\t323104\t325589\t1\t-\t.\ttranscript_id \"g21554.t1\"; gene_id \"g21554\";\n", "1\t324349\t324350\t1\t1\tN/A\t1\t1\tAUGUSTUS\tintron\t323104\t325589\t1\t-\t.\ttranscript_id \"g21554.t1\"; gene_id \"g21554\";\n", "1\t324369\t324370\t1\t1\tN/A\t1\t1\tAUGUSTUS\tintron\t323104\t325589\t1\t-\t.\ttranscript_id \"g21554.t1\"; gene_id \"g21554\";\n", "1\t324425\t324426\t1\t1\tN/A\t1\t1\tAUGUSTUS\tintron\t323104\t325589\t1\t-\t.\ttranscript_id \"g21554.t1\"; gene_id \"g21554\";\n", "\n", "==> Mcap_WGBS_RRBS_MBD_only_CpGs.bed-mcIntrons <==\n", "1\t81818\t81819\t1\t1\t1\t1\t1\tAUGUSTUS\tintron\t80701\t82492\t1\t+\t.\ttranscript_id \"g21536.t1\"; gene_id \"g21536\";\n", "1\t81836\t81837\t1\t1\t1\t1\t1\tAUGUSTUS\tintron\t80701\t82492\t1\t+\t.\ttranscript_id \"g21536.t1\"; gene_id \"g21536\";\n", "1\t81875\t81876\t1\t1\t1\t1\t1\tAUGUSTUS\tintron\t80701\t82492\t1\t+\t.\ttranscript_id \"g21536.t1\"; gene_id \"g21536\";\n", "1\t81888\t81889\t1\t1\t1\t1\t1\tAUGUSTUS\tintron\t80701\t82492\t1\t+\t.\ttranscript_id \"g21536.t1\"; gene_id \"g21536\";\n", "1\t323173\t323174\t1\t1\t1\t1\t1\tAUGUSTUS\tintron\t323104\t325589\t1\t-\t.\ttranscript_id \"g21554.t1\"; gene_id \"g21554\";\n", "1\t323178\t323179\t1\t1\t1\t1\t1\tAUGUSTUS\tintron\t323104\t325589\t1\t-\t.\ttranscript_id \"g21554.t1\"; gene_id \"g21554\";\n", "1\t323191\t323192\t1\t1\t1\t1\t1\tAUGUSTUS\tintron\t323104\t325589\t1\t-\t.\ttranscript_id \"g21554.t1\"; gene_id \"g21554\";\n", "1\t323557\t323558\t1\t1\t1\t1\t1\tAUGUSTUS\tintron\t323104\t325589\t1\t-\t.\ttranscript_id \"g21554.t1\"; gene_id \"g21554\";\n", "1\t323572\t323573\t1\t1\t1\t1\t1\tAUGUSTUS\tintron\t323104\t325589\t1\t-\t.\ttranscript_id \"g21554.t1\"; gene_id \"g21554\";\n", "1\t323581\t323582\t1\t1\t1\t1\t1\tAUGUSTUS\tintron\t323104\t325589\t1\t-\t.\ttranscript_id \"g21554.t1\"; gene_id \"g21554\";\n", "\n", "==> Mcap_WGBS_RRBS_only_CpGs.bed-mcIntrons <==\n", "1\t23140\t23141\t1\t1\t1\tN/A\t1\tAUGUSTUS\tintron\t22609\t26300\t0.25\t-\t.\ttranscript_id \"g21533.t1\"; gene_id \"g21533\";\n", "1\t23174\t23175\t1\t1\t1\tN/A\t1\tAUGUSTUS\tintron\t22609\t26300\t0.25\t-\t.\ttranscript_id \"g21533.t1\"; gene_id \"g21533\";\n", "1\t23203\t23204\t1\t1\t1\tN/A\t1\tAUGUSTUS\tintron\t22609\t26300\t0.25\t-\t.\ttranscript_id \"g21533.t1\"; gene_id \"g21533\";\n", "1\t23327\t23328\t1\t1\t1\tN/A\t1\tAUGUSTUS\tintron\t22609\t26300\t0.25\t-\t.\ttranscript_id \"g21533.t1\"; gene_id \"g21533\";\n", "1\t23335\t23336\t1\t1\t1\tN/A\t1\tAUGUSTUS\tintron\t22609\t26300\t0.25\t-\t.\ttranscript_id \"g21533.t1\"; gene_id \"g21533\";\n", "1\t23383\t23384\t1\t1\t1\tN/A\t1\tAUGUSTUS\tintron\t22609\t26300\t0.25\t-\t.\ttranscript_id \"g21533.t1\"; gene_id \"g21533\";\n", "1\t40451\t40452\t1\t1\t1\tN/A\t1\tAUGUSTUS\tintron\t37811\t45037\t1\t+\t.\ttranscript_id \"g21534.t1\"; gene_id \"g21534\";\n", "1\t40487\t40488\t1\t1\t1\tN/A\t1\tAUGUSTUS\tintron\t37811\t45037\t1\t+\t.\ttranscript_id \"g21534.t1\"; gene_id \"g21534\";\n", "1\t40542\t40543\t1\t1\t1\tN/A\t1\tAUGUSTUS\tintron\t37811\t45037\t1\t+\t.\ttranscript_id \"g21534.t1\"; gene_id \"g21534\";\n", "1\t40553\t40554\t1\t1\t1\tN/A\t1\tAUGUSTUS\tintron\t37811\t45037\t1\t+\t.\ttranscript_id \"g21534.t1\"; gene_id \"g21534\";\n", "\n", "==> Mcap_WGBS_only_CpGs.bed-mcIntrons <==\n", "1\t23004\t23005\t1\t1\tN/A\tN/A\t1\tAUGUSTUS\tintron\t22609\t26300\t0.25\t-\t.\ttranscript_id \"g21533.t1\"; gene_id \"g21533\";\n", "1\t23007\t23008\t1\t1\tN/A\tN/A\t1\tAUGUSTUS\tintron\t22609\t26300\t0.25\t-\t.\ttranscript_id \"g21533.t1\"; gene_id \"g21533\";\n", "1\t23020\t23021\t1\t1\tN/A\tN/A\t1\tAUGUSTUS\tintron\t22609\t26300\t0.25\t-\t.\ttranscript_id \"g21533.t1\"; gene_id \"g21533\";\n", "1\t23405\t23406\t1\t1\tN/A\tN/A\t1\tAUGUSTUS\tintron\t22609\t26300\t0.25\t-\t.\ttranscript_id \"g21533.t1\"; gene_id \"g21533\";\n", "1\t23426\t23427\t1\t1\tN/A\tN/A\t1\tAUGUSTUS\tintron\t22609\t26300\t0.25\t-\t.\ttranscript_id \"g21533.t1\"; gene_id \"g21533\";\n", "1\t23446\t23447\t1\t1\tN/A\tN/A\t1\tAUGUSTUS\tintron\t22609\t26300\t0.25\t-\t.\ttranscript_id \"g21533.t1\"; gene_id \"g21533\";\n", "1\t38218\t38219\t1\t1\tN/A\tN/A\t1\tAUGUSTUS\tintron\t37811\t45037\t1\t+\t.\ttranscript_id \"g21534.t1\"; gene_id \"g21534\";\n", "1\t38238\t38239\t1\t1\tN/A\tN/A\t1\tAUGUSTUS\tintron\t37811\t45037\t1\t+\t.\ttranscript_id \"g21534.t1\"; gene_id \"g21534\";\n", "1\t40254\t40255\t1\t1\tN/A\tN/A\t1\tAUGUSTUS\tintron\t37811\t45037\t1\t+\t.\ttranscript_id \"g21534.t1\"; gene_id \"g21534\";\n", "1\t40358\t40359\t1\t1\tN/A\tN/A\t1\tAUGUSTUS\tintron\t37811\t45037\t1\t+\t.\ttranscript_id \"g21534.t1\"; gene_id \"g21534\";\n", "\n", "==> Mcap_no_method_CpGs.bed-mcIntrons <==\n", "1\t22626\t22627\t1\tN/A\tN/A\tN/A\t1\tAUGUSTUS\tintron\t22609\t26300\t0.25\t-\t.\ttranscript_id \"g21533.t1\"; gene_id \"g21533\";\n", "1\t22647\t22648\t1\tN/A\tN/A\tN/A\t1\tAUGUSTUS\tintron\t22609\t26300\t0.25\t-\t.\ttranscript_id \"g21533.t1\"; gene_id \"g21533\";\n", "1\t22651\t22652\t1\tN/A\tN/A\tN/A\t1\tAUGUSTUS\tintron\t22609\t26300\t0.25\t-\t.\ttranscript_id \"g21533.t1\"; gene_id \"g21533\";\n", "1\t22689\t22690\t1\tN/A\tN/A\tN/A\t1\tAUGUSTUS\tintron\t22609\t26300\t0.25\t-\t.\ttranscript_id \"g21533.t1\"; gene_id \"g21533\";\n", "1\t22695\t22696\t1\tN/A\tN/A\tN/A\t1\tAUGUSTUS\tintron\t22609\t26300\t0.25\t-\t.\ttranscript_id \"g21533.t1\"; gene_id \"g21533\";\n", "1\t22712\t22713\t1\tN/A\tN/A\tN/A\t1\tAUGUSTUS\tintron\t22609\t26300\t0.25\t-\t.\ttranscript_id \"g21533.t1\"; gene_id \"g21533\";\n", "1\t22727\t22728\t1\tN/A\tN/A\tN/A\t1\tAUGUSTUS\tintron\t22609\t26300\t0.25\t-\t.\ttranscript_id \"g21533.t1\"; gene_id \"g21533\";\n", "1\t22755\t22756\t1\tN/A\tN/A\tN/A\t1\tAUGUSTUS\tintron\t22609\t26300\t0.25\t-\t.\ttranscript_id \"g21533.t1\"; gene_id \"g21533\";\n", "1\t22821\t22822\t1\tN/A\tN/A\tN/A\t1\tAUGUSTUS\tintron\t22609\t26300\t0.25\t-\t.\ttranscript_id \"g21533.t1\"; gene_id \"g21533\";\n", "1\t22836\t22837\t1\tN/A\tN/A\tN/A\t1\tAUGUSTUS\tintron\t22609\t26300\t0.25\t-\t.\ttranscript_id \"g21533.t1\"; gene_id \"g21533\";\n" ] } ], "source": [ "#Check output\n", "!head *mcIntrons" ] }, { "cell_type": "code", "execution_count": 36, "metadata": { "collapsed": false, "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 25986 Mcap_MBD_only_CpGs.bed-mcIntrons\n", " 5771 Mcap_RRBS_MBD_only_CpGs.bed-mcIntrons\n", " 451002 Mcap_RRBS_only_CpGs.bed-mcIntrons\n", " 132376 Mcap_WGBS_MBD_only_CpGs.bed-mcIntrons\n", " 39745 Mcap_WGBS_RRBS_MBD_only_CpGs.bed-mcIntrons\n", " 566347 Mcap_WGBS_RRBS_only_CpGs.bed-mcIntrons\n", " 2407953 Mcap_WGBS_only_CpGs.bed-mcIntrons\n", " 3871291 Mcap_no_method_CpGs.bed-mcIntrons\n", " 7500471 total\n" ] } ], "source": [ "#Count number of overlaps\n", "!wc -l *mcIntrons" ] }, { "cell_type": "code", "execution_count": 37, "metadata": { "collapsed": true }, "outputs": [], "source": [ "!wc -l *mcIntrons > Mcap-upset-data-mcIntrons-counts.txt" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### 3d. Flanking regions" ] }, { "cell_type": "code", "execution_count": 38, "metadata": { "collapsed": true, "scrolled": true }, "outputs": [], "source": [ "%%bash\n", "\n", "for f in *bed\n", "do\n", " /usr/local/bin/intersectBed \\\n", " -wb \\\n", " -a ${f} \\\n", " -b ../../../genome-feature-files/Mcap.GFFannotation.flanks.gff \\\n", " > ${f}-mcFlanks\n", "done" ] }, { "cell_type": "code", "execution_count": 39, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "==> Mcap_MBD_only_CpGs.bed-mcFlanks <==\n", "1\t582171\t582172\t1\tN/A\tN/A\t1\t1\tAUGUSTUS\tgene\t581816\t582815\t0.67\t-\t.\tg21577\n", "1\t582227\t582228\t1\tN/A\tN/A\t1\t1\tAUGUSTUS\tgene\t581816\t582815\t0.67\t-\t.\tg21577\n", "1\t1063132\t1063133\t1\tN/A\tN/A\t1\t1\tAUGUSTUS\tgene\t1063077\t1064076\t1\t+\t.\tg21615\n", "1\t1063205\t1063206\t1\tN/A\tN/A\t1\t1\tAUGUSTUS\tgene\t1063077\t1064076\t1\t+\t.\tg21615\n", "1\t1063283\t1063284\t1\tN/A\tN/A\t1\t1\tAUGUSTUS\tgene\t1063077\t1064076\t1\t+\t.\tg21615\n", "1\t1063293\t1063294\t1\tN/A\tN/A\t1\t1\tAUGUSTUS\tgene\t1063077\t1064076\t1\t+\t.\tg21615\n", "1\t1143040\t1143041\t1\tN/A\tN/A\t1\t1\tAUGUSTUS\tgene\t1142462\t1143461\t1\t+\t.\tg21621\n", "1\t1143044\t1143045\t1\tN/A\tN/A\t1\t1\tAUGUSTUS\tgene\t1142462\t1143461\t1\t+\t.\tg21621\n", "1\t1326380\t1326381\t1\tN/A\tN/A\t1\t1\tAUGUSTUS\tgene\t1325918\t1326917\t1\t-\t.\tg21630\n", "1\t1326395\t1326396\t1\tN/A\tN/A\t1\t1\tAUGUSTUS\tgene\t1325918\t1326917\t1\t-\t.\tg21630\n", "\n", "==> Mcap_RRBS_MBD_only_CpGs.bed-mcFlanks <==\n", "1\t322964\t322965\t1\tN/A\t1\t1\t1\tAUGUSTUS\tgene\t322002\t323001\t1\t-\t.\tg21554\n", "1\t618324\t618325\t1\tN/A\t1\t1\t1\tAUGUSTUS\tgene\t617737\t618736\t1\t+\t.\tg21582\n", "1\t618328\t618329\t1\tN/A\t1\t1\t1\tAUGUSTUS\tgene\t617737\t618736\t1\t+\t.\tg21582\n", "1\t1143106\t1143107\t1\tN/A\t1\t1\t1\tAUGUSTUS\tgene\t1142462\t1143461\t1\t+\t.\tg21621\n", "1\t1143121\t1143122\t1\tN/A\t1\t1\t1\tAUGUSTUS\tgene\t1142462\t1143461\t1\t+\t.\tg21621\n", "1\t2304929\t2304930\t1\tN/A\t1\t1\t1\tAUGUSTUS\tgene\t2304316\t2305315\t1\t+\t.\tg21704\n", "1\t2370126\t2370127\t1\tN/A\t1\t1\t1\tAUGUSTUS\tgene\t2369843\t2370842\t1\t+\t.\tg21706\n", "1\t2370130\t2370131\t1\tN/A\t1\t1\t1\tAUGUSTUS\tgene\t2369843\t2370842\t1\t+\t.\tg21706\n", "1\t2747593\t2747594\t1\tN/A\t1\t1\t1\tAUGUSTUS\tgene\t2747256\t2748255\t1\t-\t.\tg21730\n", "1\t2900298\t2900299\t1\tN/A\t1\t1\t1\tAUGUSTUS\tgene\t2899356\t2900355\t1\t-\t.\tg21742\n", "\n", "==> Mcap_RRBS_only_CpGs.bed-mcFlanks <==\n", "1\t21968\t21969\t1\tN/A\t1\tN/A\t1\tAUGUSTUS\tgene\t21321\t22320\t0.23\t-\t.\tg21533\n", "1\t21981\t21982\t1\tN/A\t1\tN/A\t1\tAUGUSTUS\tgene\t21321\t22320\t0.23\t-\t.\tg21533\n", "1\t22009\t22010\t1\tN/A\t1\tN/A\t1\tAUGUSTUS\tgene\t21321\t22320\t0.23\t-\t.\tg21533\n", "1\t22090\t22091\t1\tN/A\t1\tN/A\t1\tAUGUSTUS\tgene\t21321\t22320\t0.23\t-\t.\tg21533\n", "1\t22260\t22261\t1\tN/A\t1\tN/A\t1\tAUGUSTUS\tgene\t21321\t22320\t0.23\t-\t.\tg21533\n", "1\t52988\t52989\t1\tN/A\t1\tN/A\t1\tAUGUSTUS\tgene\t52267\t53266\t1\t+\t.\tg21534\n", "1\t53002\t53003\t1\tN/A\t1\tN/A\t1\tAUGUSTUS\tgene\t52267\t53266\t1\t+\t.\tg21534\n", "1\t53063\t53064\t1\tN/A\t1\tN/A\t1\tAUGUSTUS\tgene\t52267\t53266\t1\t+\t.\tg21534\n", "1\t53066\t53067\t1\tN/A\t1\tN/A\t1\tAUGUSTUS\tgene\t52267\t53266\t1\t+\t.\tg21534\n", "1\t53069\t53070\t1\tN/A\t1\tN/A\t1\tAUGUSTUS\tgene\t52267\t53266\t1\t+\t.\tg21534\n", "\n", "==> Mcap_WGBS_MBD_only_CpGs.bed-mcFlanks <==\n", "1\t217199\t217200\t1\t1\tN/A\t1\t1\tAUGUSTUS\tgene\t217011\t218010\t0.95\t-\t.\tg21549\n", "1\t217220\t217221\t1\t1\tN/A\t1\t1\tAUGUSTUS\tgene\t217011\t218010\t0.95\t-\t.\tg21549\n", "1\t217249\t217250\t1\t1\tN/A\t1\t1\tAUGUSTUS\tgene\t217011\t218010\t0.95\t-\t.\tg21549\n", "1\t217270\t217271\t1\t1\tN/A\t1\t1\tAUGUSTUS\tgene\t217011\t218010\t0.95\t-\t.\tg21549\n", "1\t237190\t237191\t1\t1\tN/A\t1\t1\tAUGUSTUS\tgene\t236955\t237954\t1\t+\t.\tg21550\n", "1\t375502\t375503\t1\t1\tN/A\t1\t1\tAUGUSTUS\tgene\t375372\t376371\t0.32\t-\t.\tg21558\n", "1\t375507\t375508\t1\t1\tN/A\t1\t1\tAUGUSTUS\tgene\t375372\t376371\t0.32\t-\t.\tg21558\n", "1\t376178\t376179\t1\t1\tN/A\t1\t1\tAUGUSTUS\tgene\t375372\t376371\t0.32\t-\t.\tg21558\n", "1\t376201\t376202\t1\t1\tN/A\t1\t1\tAUGUSTUS\tgene\t375372\t376371\t0.32\t-\t.\tg21558\n", "1\t376221\t376222\t1\t1\tN/A\t1\t1\tAUGUSTUS\tgene\t375372\t376371\t0.32\t-\t.\tg21558\n", "\n", "==> Mcap_WGBS_RRBS_MBD_only_CpGs.bed-mcFlanks <==\n", "1\t322945\t322946\t1\t1\t1\t1\t1\tAUGUSTUS\tgene\t322002\t323001\t1\t-\t.\tg21554\n", "1\t434640\t434641\t1\t1\t1\t1\t1\tAUGUSTUS\tgene\t434136\t435135\t0.92\t+\t.\tg21564\n", "1\t459557\t459558\t1\t1\t1\t1\t1\tAUGUSTUS\tgene\t459387\t460386\t0.87\t-\t.\tg21567\n", "1\t459594\t459595\t1\t1\t1\t1\t1\tAUGUSTUS\tgene\t459387\t460386\t0.87\t-\t.\tg21567\n", "1\t459597\t459598\t1\t1\t1\t1\t1\tAUGUSTUS\tgene\t459387\t460386\t0.87\t-\t.\tg21567\n", "1\t459606\t459607\t1\t1\t1\t1\t1\tAUGUSTUS\tgene\t459387\t460386\t0.87\t-\t.\tg21567\n", "1\t580666\t580667\t1\t1\t1\t1\t1\tAUGUSTUS\tgene\t580411\t581410\t0.67\t-\t.\tg21577\n", "1\t602057\t602058\t1\t1\t1\t1\t1\tAUGUSTUS\tgene\t601956\t602955\t1\t+\t.\tg21580\n", "1\t618163\t618164\t1\t1\t1\t1\t1\tAUGUSTUS\tgene\t617737\t618736\t1\t+\t.\tg21582\n", "1\t618177\t618178\t1\t1\t1\t1\t1\tAUGUSTUS\tgene\t617737\t618736\t1\t+\t.\tg21582\n", "\n", "==> Mcap_WGBS_RRBS_only_CpGs.bed-mcFlanks <==\n", "1\t21740\t21741\t1\t1\t1\tN/A\t1\tAUGUSTUS\tgene\t21321\t22320\t0.23\t-\t.\tg21533\n", "1\t21758\t21759\t1\t1\t1\tN/A\t1\tAUGUSTUS\tgene\t21321\t22320\t0.23\t-\t.\tg21533\n", "1\t21831\t21832\t1\t1\t1\tN/A\t1\tAUGUSTUS\tgene\t21321\t22320\t0.23\t-\t.\tg21533\n", "1\t21841\t21842\t1\t1\t1\tN/A\t1\tAUGUSTUS\tgene\t21321\t22320\t0.23\t-\t.\tg21533\n", "1\t21882\t21883\t1\t1\t1\tN/A\t1\tAUGUSTUS\tgene\t21321\t22320\t0.23\t-\t.\tg21533\n", "1\t22107\t22108\t1\t1\t1\tN/A\t1\tAUGUSTUS\tgene\t21321\t22320\t0.23\t-\t.\tg21533\n", "1\t22112\t22113\t1\t1\t1\tN/A\t1\tAUGUSTUS\tgene\t21321\t22320\t0.23\t-\t.\tg21533\n", "1\t22118\t22119\t1\t1\t1\tN/A\t1\tAUGUSTUS\tgene\t21321\t22320\t0.23\t-\t.\tg21533\n", "1\t22166\t22167\t1\t1\t1\tN/A\t1\tAUGUSTUS\tgene\t21321\t22320\t0.23\t-\t.\tg21533\n", "1\t22171\t22172\t1\t1\t1\tN/A\t1\tAUGUSTUS\tgene\t21321\t22320\t0.23\t-\t.\tg21533\n", "\n", "==> Mcap_WGBS_only_CpGs.bed-mcFlanks <==\n", "1\t17710\t17711\t1\t1\tN/A\tN/A\t1\tAUGUSTUS\tgene\t17387\t18386\t0.97\t-\t.\tg21532\n", "1\t17724\t17725\t1\t1\tN/A\tN/A\t1\tAUGUSTUS\tgene\t17387\t18386\t0.97\t-\t.\tg21532\n", "1\t19419\t19420\t1\t1\tN/A\tN/A\t1\tAUGUSTUS\tgene\t18756\t19755\t0.97\t-\t.\tg21532\n", "1\t19488\t19489\t1\t1\tN/A\tN/A\t1\tAUGUSTUS\tgene\t18756\t19755\t0.97\t-\t.\tg21532\n", "1\t19534\t19535\t1\t1\tN/A\tN/A\t1\tAUGUSTUS\tgene\t18756\t19755\t0.97\t-\t.\tg21532\n", "1\t19542\t19543\t1\t1\tN/A\tN/A\t1\tAUGUSTUS\tgene\t18756\t19755\t0.97\t-\t.\tg21532\n", "1\t19555\t19556\t1\t1\tN/A\tN/A\t1\tAUGUSTUS\tgene\t18756\t19755\t0.97\t-\t.\tg21532\n", "1\t19574\t19575\t1\t1\tN/A\tN/A\t1\tAUGUSTUS\tgene\t18756\t19755\t0.97\t-\t.\tg21532\n", "1\t19591\t19592\t1\t1\tN/A\tN/A\t1\tAUGUSTUS\tgene\t18756\t19755\t0.97\t-\t.\tg21532\n", "1\t19615\t19616\t1\t1\tN/A\tN/A\t1\tAUGUSTUS\tgene\t18756\t19755\t0.97\t-\t.\tg21532\n", "\n", "==> Mcap_no_method_CpGs.bed-mcFlanks <==\n", "1\t17501\t17502\t1\tN/A\tN/A\tN/A\t1\tAUGUSTUS\tgene\t17387\t18386\t0.97\t-\t.\tg21532\n", "1\t17507\t17508\t1\tN/A\tN/A\tN/A\t1\tAUGUSTUS\tgene\t17387\t18386\t0.97\t-\t.\tg21532\n", "1\t17557\t17558\t1\tN/A\tN/A\tN/A\t1\tAUGUSTUS\tgene\t17387\t18386\t0.97\t-\t.\tg21532\n", "1\t17624\t17625\t1\tN/A\tN/A\tN/A\t1\tAUGUSTUS\tgene\t17387\t18386\t0.97\t-\t.\tg21532\n", "1\t17761\t17762\t1\tN/A\tN/A\tN/A\t1\tAUGUSTUS\tgene\t17387\t18386\t0.97\t-\t.\tg21532\n", "1\t17763\t17764\t1\tN/A\tN/A\tN/A\t1\tAUGUSTUS\tgene\t17387\t18386\t0.97\t-\t.\tg21532\n", "1\t17869\t17870\t1\tN/A\tN/A\tN/A\t1\tAUGUSTUS\tgene\t17387\t18386\t0.97\t-\t.\tg21532\n", "1\t17889\t17890\t1\tN/A\tN/A\tN/A\t1\tAUGUSTUS\tgene\t17387\t18386\t0.97\t-\t.\tg21532\n", "1\t18015\t18016\t1\tN/A\tN/A\tN/A\t1\tAUGUSTUS\tgene\t17387\t18386\t0.97\t-\t.\tg21532\n", "1\t18160\t18161\t1\tN/A\tN/A\tN/A\t1\tAUGUSTUS\tgene\t17387\t18386\t0.97\t-\t.\tg21532\n" ] } ], "source": [ "#Check output\n", "!head *mcFlanks" ] }, { "cell_type": "code", "execution_count": 40, "metadata": { "collapsed": false, "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 9646 Mcap_MBD_only_CpGs.bed-mcFlanks\n", " 2230 Mcap_RRBS_MBD_only_CpGs.bed-mcFlanks\n", " 177685 Mcap_RRBS_only_CpGs.bed-mcFlanks\n", " 65175 Mcap_WGBS_MBD_only_CpGs.bed-mcFlanks\n", " 20206 Mcap_WGBS_RRBS_MBD_only_CpGs.bed-mcFlanks\n", " 226831 Mcap_WGBS_RRBS_only_CpGs.bed-mcFlanks\n", " 966286 Mcap_WGBS_only_CpGs.bed-mcFlanks\n", " 1558168 Mcap_no_method_CpGs.bed-mcFlanks\n", " 3026227 total\n" ] } ], "source": [ "#Count number of overlaps\n", "!wc -l *mcFlanks" ] }, { "cell_type": "code", "execution_count": 41, "metadata": { "collapsed": true }, "outputs": [], "source": [ "!wc -l *mcFlanks > Mcap-upset-data-mcFlanks-counts.txt" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### 3e. Upstream flanking regions" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": true, "scrolled": true }, "outputs": [], "source": [ "%%bash\n", "\n", "for f in *bed\n", "do\n", " /usr/local/bin/intersectBed \\\n", " -wb \\\n", " -a ${f} \\\n", " -b ../../../genome-feature-files/Mcap.GFFannotation.flanks.Upstream.gff \\\n", " > ${f}-mcFlanksUpstream\n", "done" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "==> Mcap_MBD_only_CpGs.bed-mcFlanksUpstream <==\r\n", "1\t582171\t582172\t1\tN/A\tN/A\t1\t1\tAUGUSTUS\tgene\t581816\t582815\t0.67\t-\t.\tg21577\r\n", "1\t582227\t582228\t1\tN/A\tN/A\t1\t1\tAUGUSTUS\tgene\t581816\t582815\t0.67\t-\t.\tg21577\r\n", "1\t1063132\t1063133\t1\tN/A\tN/A\t1\t1\tAUGUSTUS\tgene\t1063077\t1064076\t1\t+\t.\tg21615\r\n", "1\t1063205\t1063206\t1\tN/A\tN/A\t1\t1\tAUGUSTUS\tgene\t1063077\t1064076\t1\t+\t.\tg21615\r\n", "1\t1063283\t1063284\t1\tN/A\tN/A\t1\t1\tAUGUSTUS\tgene\t1063077\t1064076\t1\t+\t.\tg21615\r\n", "1\t1063293\t1063294\t1\tN/A\tN/A\t1\t1\tAUGUSTUS\tgene\t1063077\t1064076\t1\t+\t.\tg21615\r\n", "1\t1441380\t1441381\t1\tN/A\tN/A\t1\t1\tAUGUSTUS\tgene\t1441371\t1442370\t0.83\t-\t.\tg21638\r\n", "1\t1441383\t1441384\t1\tN/A\tN/A\t1\t1\tAUGUSTUS\tgene\t1441371\t1442370\t0.83\t-\t.\tg21638\r\n", "1\t1443186\t1443187\t1\tN/A\tN/A\t1\t1\tAUGUSTUS\tgene\t1443020\t1444019\t0.41\t-\t.\tg21639\r\n", "1\t1852945\t1852946\t1\tN/A\tN/A\t1\t1\tAUGUSTUS\tgene\t1852056\t1853055\t1\t-\t.\tg21670\r\n", "\r\n", "==> Mcap_RRBS_MBD_only_CpGs.bed-mcFlanksUpstream <==\r\n", "1\t618324\t618325\t1\tN/A\t1\t1\t1\tAUGUSTUS\tgene\t617737\t618736\t1\t+\t.\tg21582\r\n", "1\t618328\t618329\t1\tN/A\t1\t1\t1\tAUGUSTUS\tgene\t617737\t618736\t1\t+\t.\tg21582\r\n", "1\t2370126\t2370127\t1\tN/A\t1\t1\t1\tAUGUSTUS\tgene\t2369843\t2370842\t1\t+\t.\tg21706\r\n", "1\t2370130\t2370131\t1\tN/A\t1\t1\t1\tAUGUSTUS\tgene\t2369843\t2370842\t1\t+\t.\tg21706\r\n", "1\t2900298\t2900299\t1\tN/A\t1\t1\t1\tAUGUSTUS\tgene\t2899356\t2900355\t1\t-\t.\tg21742\r\n", "10\t1341557\t1341558\t1\tN/A\t1\t1\t10\tAUGUSTUS\tgene\t1341281\t1341831\t0.75\t+\t.\tg23463\r\n", "10\t1643047\t1643048\t1\tN/A\t1\t1\t10\tAUGUSTUS\tgene\t1642474\t1643473\t0.63\t-\t.\tg23484\r\n", "10\t1643066\t1643067\t1\tN/A\t1\t1\t10\tAUGUSTUS\tgene\t1642474\t1643473\t0.63\t-\t.\tg23484\r\n", "1008\t228391\t228392\t1\tN/A\t1\t1\t1008\tAUGUSTUS\tgene\t228362\t229361\t0.43\t-\t.\tg2047\r\n", "101\t671896\t671897\t1\tN/A\t1\t1\t101\tAUGUSTUS\tgene\t671500\t672499\t0.74\t+\t.\tg33876\r\n", "\r\n", "==> Mcap_RRBS_only_CpGs.bed-mcFlanksUpstream <==\r\n", "1\t62756\t62757\t1\tN/A\t1\tN/A\t1\tAUGUSTUS\tgene\t62558\t63557\t1\t-\t.\tg21535\r\n", "1\t62842\t62843\t1\tN/A\t1\tN/A\t1\tAUGUSTUS\tgene\t62558\t63557\t1\t-\t.\tg21535\r\n", "1\t63351\t63352\t1\tN/A\t1\tN/A\t1\tAUGUSTUS\tgene\t62558\t63557\t1\t-\t.\tg21535\r\n", "1\t63358\t63359\t1\tN/A\t1\tN/A\t1\tAUGUSTUS\tgene\t62558\t63557\t1\t-\t.\tg21535\r\n", "1\t63370\t63371\t1\tN/A\t1\tN/A\t1\tAUGUSTUS\tgene\t62558\t63557\t1\t-\t.\tg21535\r\n", "1\t63389\t63390\t1\tN/A\t1\tN/A\t1\tAUGUSTUS\tgene\t62558\t63557\t1\t-\t.\tg21535\r\n", "1\t63391\t63392\t1\tN/A\t1\tN/A\t1\tAUGUSTUS\tgene\t62558\t63557\t1\t-\t.\tg21535\r\n", "1\t63432\t63433\t1\tN/A\t1\tN/A\t1\tAUGUSTUS\tgene\t62558\t63557\t1\t-\t.\tg21535\r\n", "1\t63444\t63445\t1\tN/A\t1\tN/A\t1\tAUGUSTUS\tgene\t62558\t63557\t1\t-\t.\tg21535\r\n", "1\t63486\t63487\t1\tN/A\t1\tN/A\t1\tAUGUSTUS\tgene\t62558\t63557\t1\t-\t.\tg21535\r\n", "\r\n", "==> Mcap_WGBS_MBD_only_CpGs.bed-mcFlanksUpstream <==\r\n", "1\t237190\t237191\t1\t1\tN/A\t1\t1\tAUGUSTUS\tgene\t236955\t237954\t1\t+\t.\tg21550\r\n", "1\t375502\t375503\t1\t1\tN/A\t1\t1\tAUGUSTUS\tgene\t375372\t376371\t0.32\t-\t.\tg21558\r\n", "1\t375507\t375508\t1\t1\tN/A\t1\t1\tAUGUSTUS\tgene\t375372\t376371\t0.32\t-\t.\tg21558\r\n", "1\t376178\t376179\t1\t1\tN/A\t1\t1\tAUGUSTUS\tgene\t375372\t376371\t0.32\t-\t.\tg21558\r\n", "1\t376201\t376202\t1\t1\tN/A\t1\t1\tAUGUSTUS\tgene\t375372\t376371\t0.32\t-\t.\tg21558\r\n", "1\t376221\t376222\t1\t1\tN/A\t1\t1\tAUGUSTUS\tgene\t375372\t376371\t0.32\t-\t.\tg21558\r\n", "1\t376236\t376237\t1\t1\tN/A\t1\t1\tAUGUSTUS\tgene\t375372\t376371\t0.32\t-\t.\tg21558\r\n", "1\t376262\t376263\t1\t1\tN/A\t1\t1\tAUGUSTUS\tgene\t375372\t376371\t0.32\t-\t.\tg21558\r\n", "1\t376284\t376285\t1\t1\tN/A\t1\t1\tAUGUSTUS\tgene\t375372\t376371\t0.32\t-\t.\tg21558\r\n", "1\t376289\t376290\t1\t1\tN/A\t1\t1\tAUGUSTUS\tgene\t375372\t376371\t0.32\t-\t.\tg21558\r\n", "\r\n", "==> Mcap_WGBS_RRBS_MBD_only_CpGs.bed-mcFlanksUpstream <==\r\n", "1\t434640\t434641\t1\t1\t1\t1\t1\tAUGUSTUS\tgene\t434136\t435135\t0.92\t+\t.\tg21564\r\n", "1\t459557\t459558\t1\t1\t1\t1\t1\tAUGUSTUS\tgene\t459387\t460386\t0.87\t-\t.\tg21567\r\n", "1\t459594\t459595\t1\t1\t1\t1\t1\tAUGUSTUS\tgene\t459387\t460386\t0.87\t-\t.\tg21567\r\n", "1\t459597\t459598\t1\t1\t1\t1\t1\tAUGUSTUS\tgene\t459387\t460386\t0.87\t-\t.\tg21567\r\n", "1\t459606\t459607\t1\t1\t1\t1\t1\tAUGUSTUS\tgene\t459387\t460386\t0.87\t-\t.\tg21567\r\n", "1\t618163\t618164\t1\t1\t1\t1\t1\tAUGUSTUS\tgene\t617737\t618736\t1\t+\t.\tg21582\r\n", "1\t618177\t618178\t1\t1\t1\t1\t1\tAUGUSTUS\tgene\t617737\t618736\t1\t+\t.\tg21582\r\n", "1\t618191\t618192\t1\t1\t1\t1\t1\tAUGUSTUS\tgene\t617737\t618736\t1\t+\t.\tg21582\r\n", "1\t618193\t618194\t1\t1\t1\t1\t1\tAUGUSTUS\tgene\t617737\t618736\t1\t+\t.\tg21582\r\n", "1\t618206\t618207\t1\t1\t1\t1\t1\tAUGUSTUS\tgene\t617737\t618736\t1\t+\t.\tg21582\r\n", "\r\n", "==> Mcap_WGBS_RRBS_only_CpGs.bed-mcFlanksUpstream <==\r\n", "1\t63548\t63549\t1\t1\t1\tN/A\t1\tAUGUSTUS\tgene\t62558\t63557\t1\t-\t.\tg21535\r\n", "1\t63548\t63549\t1\t1\t1\tN/A\t1\tAUGUSTUS\tgene\t63466\t64465\t1\t+\t.\tg21536\r\n", "1\t63580\t63581\t1\t1\t1\tN/A\t1\tAUGUSTUS\tgene\t63466\t64465\t1\t+\t.\tg21536\r\n", "1\t63582\t63583\t1\t1\t1\tN/A\t1\tAUGUSTUS\tgene\t63466\t64465\t1\t+\t.\tg21536\r\n", "1\t148170\t148171\t1\t1\t1\tN/A\t1\tAUGUSTUS\tgene\t147344\t148343\t0.44\t+\t.\tg21541\r\n", "1\t148180\t148181\t1\t1\t1\tN/A\t1\tAUGUSTUS\tgene\t147344\t148343\t0.44\t+\t.\tg21541\r\n", "1\t185845\t185846\t1\t1\t1\tN/A\t1\tAUGUSTUS\tgene\t185773\t186181\t0.68\t+\t.\tg21547\r\n", "1\t185869\t185870\t1\t1\t1\tN/A\t1\tAUGUSTUS\tgene\t185773\t186181\t0.68\t+\t.\tg21547\r\n", "1\t185880\t185881\t1\t1\t1\tN/A\t1\tAUGUSTUS\tgene\t185773\t186181\t0.68\t+\t.\tg21547\r\n", "1\t185883\t185884\t1\t1\t1\tN/A\t1\tAUGUSTUS\tgene\t185773\t186181\t0.68\t+\t.\tg21547\r\n", "\r\n", "==> Mcap_WGBS_only_CpGs.bed-mcFlanksUpstream <==\r\n", "1\t19419\t19420\t1\t1\tN/A\tN/A\t1\tAUGUSTUS\tgene\t18756\t19755\t0.97\t-\t.\tg21532\r\n", "1\t19488\t19489\t1\t1\tN/A\tN/A\t1\tAUGUSTUS\tgene\t18756\t19755\t0.97\t-\t.\tg21532\r\n", "1\t19534\t19535\t1\t1\tN/A\tN/A\t1\tAUGUSTUS\tgene\t18756\t19755\t0.97\t-\t.\tg21532\r\n", "1\t19542\t19543\t1\t1\tN/A\tN/A\t1\tAUGUSTUS\tgene\t18756\t19755\t0.97\t-\t.\tg21532\r\n", "1\t19555\t19556\t1\t1\tN/A\tN/A\t1\tAUGUSTUS\tgene\t18756\t19755\t0.97\t-\t.\tg21532\r\n", "1\t19574\t19575\t1\t1\tN/A\tN/A\t1\tAUGUSTUS\tgene\t18756\t19755\t0.97\t-\t.\tg21532\r\n", "1\t19591\t19592\t1\t1\tN/A\tN/A\t1\tAUGUSTUS\tgene\t18756\t19755\t0.97\t-\t.\tg21532\r\n", "1\t19615\t19616\t1\t1\tN/A\tN/A\t1\tAUGUSTUS\tgene\t18756\t19755\t0.97\t-\t.\tg21532\r\n", "1\t19618\t19619\t1\t1\tN/A\tN/A\t1\tAUGUSTUS\tgene\t18756\t19755\t0.97\t-\t.\tg21532\r\n", "1\t19620\t19621\t1\t1\tN/A\tN/A\t1\tAUGUSTUS\tgene\t18756\t19755\t0.97\t-\t.\tg21532\r\n", "\r\n", "==> Mcap_no_method_CpGs.bed-mcFlanksUpstream <==\r\n", "1\t18812\t18813\t1\tN/A\tN/A\tN/A\t1\tAUGUSTUS\tgene\t18756\t19755\t0.97\t-\t.\tg21532\r\n", "1\t18858\t18859\t1\tN/A\tN/A\tN/A\t1\tAUGUSTUS\tgene\t18756\t19755\t0.97\t-\t.\tg21532\r\n", "1\t18989\t18990\t1\tN/A\tN/A\tN/A\t1\tAUGUSTUS\tgene\t18756\t19755\t0.97\t-\t.\tg21532\r\n", "1\t19010\t19011\t1\tN/A\tN/A\tN/A\t1\tAUGUSTUS\tgene\t18756\t19755\t0.97\t-\t.\tg21532\r\n", "1\t19019\t19020\t1\tN/A\tN/A\tN/A\t1\tAUGUSTUS\tgene\t18756\t19755\t0.97\t-\t.\tg21532\r\n", "1\t19037\t19038\t1\tN/A\tN/A\tN/A\t1\tAUGUSTUS\tgene\t18756\t19755\t0.97\t-\t.\tg21532\r\n", "1\t19140\t19141\t1\tN/A\tN/A\tN/A\t1\tAUGUSTUS\tgene\t18756\t19755\t0.97\t-\t.\tg21532\r\n", "1\t19161\t19162\t1\tN/A\tN/A\tN/A\t1\tAUGUSTUS\tgene\t18756\t19755\t0.97\t-\t.\tg21532\r\n", "1\t19170\t19171\t1\tN/A\tN/A\tN/A\t1\tAUGUSTUS\tgene\t18756\t19755\t0.97\t-\t.\tg21532\r\n", "1\t19173\t19174\t1\tN/A\tN/A\tN/A\t1\tAUGUSTUS\tgene\t18756\t19755\t0.97\t-\t.\tg21532\r\n" ] } ], "source": [ "#Check output\n", "!head *mcFlanksUpstream" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": false, "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 5143 Mcap_MBD_only_CpGs.bed-mcFlanksUpstream\n", " 1188 Mcap_RRBS_MBD_only_CpGs.bed-mcFlanksUpstream\n", " 97613 Mcap_RRBS_only_CpGs.bed-mcFlanksUpstream\n", " 35459 Mcap_WGBS_MBD_only_CpGs.bed-mcFlanksUpstream\n", " 11175 Mcap_WGBS_RRBS_MBD_only_CpGs.bed-mcFlanksUpstream\n", " 125659 Mcap_WGBS_RRBS_only_CpGs.bed-mcFlanksUpstream\n", " 512876 Mcap_WGBS_only_CpGs.bed-mcFlanksUpstream\n", " 809361 Mcap_no_method_CpGs.bed-mcFlanksUpstream\n", " 1598474 total\n" ] } ], "source": [ "#Count number of overlaps\n", "!wc -l *mcFlanksUpstream" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": true }, "outputs": [], "source": [ "!wc -l *mcFlanksUpstream > Mcap-upset-data-mcFlanksUpstream-counts.txt" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### 3f. Downstream flanking regions" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": true, "scrolled": true }, "outputs": [], "source": [ "%%bash\n", "\n", "for f in *bed\n", "do\n", " /usr/local/bin/intersectBed \\\n", " -wb \\\n", " -a ${f} \\\n", " -b ../../../genome-feature-files/Mcap.GFFannotation.flanks.Downstream.gff \\\n", " > ${f}-mcFlanksDownstream\n", "done" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "==> Mcap_MBD_only_CpGs.bed-mcFlanksDownstream <==\r\n", "1\t1143040\t1143041\t1\tN/A\tN/A\t1\t1\tAUGUSTUS\tgene\t1142462\t1143461\t1\t+\t.\tg21621\r\n", "1\t1143044\t1143045\t1\tN/A\tN/A\t1\t1\tAUGUSTUS\tgene\t1142462\t1143461\t1\t+\t.\tg21621\r\n", "1\t1326380\t1326381\t1\tN/A\tN/A\t1\t1\tAUGUSTUS\tgene\t1325918\t1326917\t1\t-\t.\tg21630\r\n", "1\t1326395\t1326396\t1\tN/A\tN/A\t1\t1\tAUGUSTUS\tgene\t1325918\t1326917\t1\t-\t.\tg21630\r\n", "1\t1709207\t1709208\t1\tN/A\tN/A\t1\t1\tAUGUSTUS\tgene\t1708460\t1709459\t0.66\t-\t.\tg21655\r\n", "1\t2070733\t2070734\t1\tN/A\tN/A\t1\t1\tAUGUSTUS\tgene\t2070531\t2071530\t0.88\t-\t.\tg21691\r\n", "1\t2505457\t2505458\t1\tN/A\tN/A\t1\t1\tAUGUSTUS\tgene\t2505259\t2506258\t1\t-\t.\tg21716\r\n", "1\t2505460\t2505461\t1\tN/A\tN/A\t1\t1\tAUGUSTUS\tgene\t2505259\t2506258\t1\t-\t.\tg21716\r\n", "1\t2505473\t2505474\t1\tN/A\tN/A\t1\t1\tAUGUSTUS\tgene\t2505259\t2506258\t1\t-\t.\tg21716\r\n", "1\t2505475\t2505476\t1\tN/A\tN/A\t1\t1\tAUGUSTUS\tgene\t2505259\t2506258\t1\t-\t.\tg21716\r\n", "\r\n", "==> Mcap_RRBS_MBD_only_CpGs.bed-mcFlanksDownstream <==\r\n", "1\t322964\t322965\t1\tN/A\t1\t1\t1\tAUGUSTUS\tgene\t322002\t323001\t1\t-\t.\tg21554\r\n", "1\t1143106\t1143107\t1\tN/A\t1\t1\t1\tAUGUSTUS\tgene\t1142462\t1143461\t1\t+\t.\tg21621\r\n", "1\t1143121\t1143122\t1\tN/A\t1\t1\t1\tAUGUSTUS\tgene\t1142462\t1143461\t1\t+\t.\tg21621\r\n", "1\t2304929\t2304930\t1\tN/A\t1\t1\t1\tAUGUSTUS\tgene\t2304316\t2305315\t1\t+\t.\tg21704\r\n", "1\t2747593\t2747594\t1\tN/A\t1\t1\t1\tAUGUSTUS\tgene\t2747256\t2748255\t1\t-\t.\tg21730\r\n", "10\t244008\t244009\t1\tN/A\t1\t1\t10\tAUGUSTUS\tgene\t243185\t244184\t0.92\t-\t.\tg23294\r\n", "10\t244045\t244046\t1\tN/A\t1\t1\t10\tAUGUSTUS\tgene\t243185\t244184\t0.92\t-\t.\tg23294\r\n", "10\t244049\t244050\t1\tN/A\t1\t1\t10\tAUGUSTUS\tgene\t243185\t244184\t0.92\t-\t.\tg23294\r\n", "10\t244077\t244078\t1\tN/A\t1\t1\t10\tAUGUSTUS\tgene\t243185\t244184\t0.92\t-\t.\tg23294\r\n", "10\t244080\t244081\t1\tN/A\t1\t1\t10\tAUGUSTUS\tgene\t243185\t244184\t0.92\t-\t.\tg23294\r\n", "\r\n", "==> Mcap_RRBS_only_CpGs.bed-mcFlanksDownstream <==\r\n", "1\t21968\t21969\t1\tN/A\t1\tN/A\t1\tAUGUSTUS\tgene\t21321\t22320\t0.23\t-\t.\tg21533\r\n", "1\t21981\t21982\t1\tN/A\t1\tN/A\t1\tAUGUSTUS\tgene\t21321\t22320\t0.23\t-\t.\tg21533\r\n", "1\t22009\t22010\t1\tN/A\t1\tN/A\t1\tAUGUSTUS\tgene\t21321\t22320\t0.23\t-\t.\tg21533\r\n", "1\t22090\t22091\t1\tN/A\t1\tN/A\t1\tAUGUSTUS\tgene\t21321\t22320\t0.23\t-\t.\tg21533\r\n", "1\t22260\t22261\t1\tN/A\t1\tN/A\t1\tAUGUSTUS\tgene\t21321\t22320\t0.23\t-\t.\tg21533\r\n", "1\t52988\t52989\t1\tN/A\t1\tN/A\t1\tAUGUSTUS\tgene\t52267\t53266\t1\t+\t.\tg21534\r\n", "1\t53002\t53003\t1\tN/A\t1\tN/A\t1\tAUGUSTUS\tgene\t52267\t53266\t1\t+\t.\tg21534\r\n", "1\t53063\t53064\t1\tN/A\t1\tN/A\t1\tAUGUSTUS\tgene\t52267\t53266\t1\t+\t.\tg21534\r\n", "1\t53066\t53067\t1\tN/A\t1\tN/A\t1\tAUGUSTUS\tgene\t52267\t53266\t1\t+\t.\tg21534\r\n", "1\t53069\t53070\t1\tN/A\t1\tN/A\t1\tAUGUSTUS\tgene\t52267\t53266\t1\t+\t.\tg21534\r\n", "\r\n", "==> Mcap_WGBS_MBD_only_CpGs.bed-mcFlanksDownstream <==\r\n", "1\t217199\t217200\t1\t1\tN/A\t1\t1\tAUGUSTUS\tgene\t217011\t218010\t0.95\t-\t.\tg21549\r\n", "1\t217220\t217221\t1\t1\tN/A\t1\t1\tAUGUSTUS\tgene\t217011\t218010\t0.95\t-\t.\tg21549\r\n", "1\t217249\t217250\t1\t1\tN/A\t1\t1\tAUGUSTUS\tgene\t217011\t218010\t0.95\t-\t.\tg21549\r\n", "1\t217270\t217271\t1\t1\tN/A\t1\t1\tAUGUSTUS\tgene\t217011\t218010\t0.95\t-\t.\tg21549\r\n", "1\t458553\t458554\t1\t1\tN/A\t1\t1\tAUGUSTUS\tgene\t458012\t459011\t0.87\t-\t.\tg21567\r\n", "1\t458649\t458650\t1\t1\tN/A\t1\t1\tAUGUSTUS\tgene\t458012\t459011\t0.87\t-\t.\tg21567\r\n", "1\t458667\t458668\t1\t1\tN/A\t1\t1\tAUGUSTUS\tgene\t458012\t459011\t0.87\t-\t.\tg21567\r\n", "1\t458704\t458705\t1\t1\tN/A\t1\t1\tAUGUSTUS\tgene\t458012\t459011\t0.87\t-\t.\tg21567\r\n", "1\t458919\t458920\t1\t1\tN/A\t1\t1\tAUGUSTUS\tgene\t458012\t459011\t0.87\t-\t.\tg21567\r\n", "1\t458934\t458935\t1\t1\tN/A\t1\t1\tAUGUSTUS\tgene\t458012\t459011\t0.87\t-\t.\tg21567\r\n", "\r\n", "==> Mcap_WGBS_RRBS_MBD_only_CpGs.bed-mcFlanksDownstream <==\r\n", "1\t322945\t322946\t1\t1\t1\t1\t1\tAUGUSTUS\tgene\t322002\t323001\t1\t-\t.\tg21554\r\n", "1\t580666\t580667\t1\t1\t1\t1\t1\tAUGUSTUS\tgene\t580411\t581410\t0.67\t-\t.\tg21577\r\n", "1\t602057\t602058\t1\t1\t1\t1\t1\tAUGUSTUS\tgene\t601956\t602955\t1\t+\t.\tg21580\r\n", "1\t743456\t743457\t1\t1\t1\t1\t1\tAUGUSTUS\tgene\t742708\t743707\t0.98\t-\t.\tg21595\r\n", "1\t743460\t743461\t1\t1\t1\t1\t1\tAUGUSTUS\tgene\t742708\t743707\t0.98\t-\t.\tg21595\r\n", "1\t743463\t743464\t1\t1\t1\t1\t1\tAUGUSTUS\tgene\t742708\t743707\t0.98\t-\t.\tg21595\r\n", "1\t743505\t743506\t1\t1\t1\t1\t1\tAUGUSTUS\tgene\t742708\t743707\t0.98\t-\t.\tg21595\r\n", "1\t743521\t743522\t1\t1\t1\t1\t1\tAUGUSTUS\tgene\t742708\t743707\t0.98\t-\t.\tg21595\r\n", "1\t743528\t743529\t1\t1\t1\t1\t1\tAUGUSTUS\tgene\t742708\t743707\t0.98\t-\t.\tg21595\r\n", "1\t778697\t778698\t1\t1\t1\t1\t1\tAUGUSTUS\tgene\t777826\t778825\t1\t-\t.\tg21596\r\n", "\r\n", "==> Mcap_WGBS_RRBS_only_CpGs.bed-mcFlanksDownstream <==\r\n", "1\t21740\t21741\t1\t1\t1\tN/A\t1\tAUGUSTUS\tgene\t21321\t22320\t0.23\t-\t.\tg21533\r\n", "1\t21758\t21759\t1\t1\t1\tN/A\t1\tAUGUSTUS\tgene\t21321\t22320\t0.23\t-\t.\tg21533\r\n", "1\t21831\t21832\t1\t1\t1\tN/A\t1\tAUGUSTUS\tgene\t21321\t22320\t0.23\t-\t.\tg21533\r\n", "1\t21841\t21842\t1\t1\t1\tN/A\t1\tAUGUSTUS\tgene\t21321\t22320\t0.23\t-\t.\tg21533\r\n", "1\t21882\t21883\t1\t1\t1\tN/A\t1\tAUGUSTUS\tgene\t21321\t22320\t0.23\t-\t.\tg21533\r\n", "1\t22107\t22108\t1\t1\t1\tN/A\t1\tAUGUSTUS\tgene\t21321\t22320\t0.23\t-\t.\tg21533\r\n", "1\t22112\t22113\t1\t1\t1\tN/A\t1\tAUGUSTUS\tgene\t21321\t22320\t0.23\t-\t.\tg21533\r\n", "1\t22118\t22119\t1\t1\t1\tN/A\t1\tAUGUSTUS\tgene\t21321\t22320\t0.23\t-\t.\tg21533\r\n", "1\t22166\t22167\t1\t1\t1\tN/A\t1\tAUGUSTUS\tgene\t21321\t22320\t0.23\t-\t.\tg21533\r\n", "1\t22171\t22172\t1\t1\t1\tN/A\t1\tAUGUSTUS\tgene\t21321\t22320\t0.23\t-\t.\tg21533\r\n", "\r\n", "==> Mcap_WGBS_only_CpGs.bed-mcFlanksDownstream <==\r\n", "1\t17710\t17711\t1\t1\tN/A\tN/A\t1\tAUGUSTUS\tgene\t17387\t18386\t0.97\t-\t.\tg21532\r\n", "1\t17724\t17725\t1\t1\tN/A\tN/A\t1\tAUGUSTUS\tgene\t17387\t18386\t0.97\t-\t.\tg21532\r\n", "1\t21735\t21736\t1\t1\tN/A\tN/A\t1\tAUGUSTUS\tgene\t21321\t22320\t0.23\t-\t.\tg21533\r\n", "1\t52503\t52504\t1\t1\tN/A\tN/A\t1\tAUGUSTUS\tgene\t52267\t53266\t1\t+\t.\tg21534\r\n", "1\t52509\t52510\t1\t1\tN/A\tN/A\t1\tAUGUSTUS\tgene\t52267\t53266\t1\t+\t.\tg21534\r\n", "1\t52527\t52528\t1\t1\tN/A\tN/A\t1\tAUGUSTUS\tgene\t52267\t53266\t1\t+\t.\tg21534\r\n", "1\t52529\t52530\t1\t1\tN/A\tN/A\t1\tAUGUSTUS\tgene\t52267\t53266\t1\t+\t.\tg21534\r\n", "1\t52566\t52567\t1\t1\tN/A\tN/A\t1\tAUGUSTUS\tgene\t52267\t53266\t1\t+\t.\tg21534\r\n", "1\t52590\t52591\t1\t1\tN/A\tN/A\t1\tAUGUSTUS\tgene\t52267\t53266\t1\t+\t.\tg21534\r\n", "1\t129422\t129423\t1\t1\tN/A\tN/A\t1\tAUGUSTUS\tgene\t128511\t129510\t0.89\t+\t.\tg21539\r\n", "\r\n", "==> Mcap_no_method_CpGs.bed-mcFlanksDownstream <==\r\n", "1\t17501\t17502\t1\tN/A\tN/A\tN/A\t1\tAUGUSTUS\tgene\t17387\t18386\t0.97\t-\t.\tg21532\r\n", "1\t17507\t17508\t1\tN/A\tN/A\tN/A\t1\tAUGUSTUS\tgene\t17387\t18386\t0.97\t-\t.\tg21532\r\n", "1\t17557\t17558\t1\tN/A\tN/A\tN/A\t1\tAUGUSTUS\tgene\t17387\t18386\t0.97\t-\t.\tg21532\r\n", "1\t17624\t17625\t1\tN/A\tN/A\tN/A\t1\tAUGUSTUS\tgene\t17387\t18386\t0.97\t-\t.\tg21532\r\n", "1\t17761\t17762\t1\tN/A\tN/A\tN/A\t1\tAUGUSTUS\tgene\t17387\t18386\t0.97\t-\t.\tg21532\r\n", "1\t17763\t17764\t1\tN/A\tN/A\tN/A\t1\tAUGUSTUS\tgene\t17387\t18386\t0.97\t-\t.\tg21532\r\n", "1\t17869\t17870\t1\tN/A\tN/A\tN/A\t1\tAUGUSTUS\tgene\t17387\t18386\t0.97\t-\t.\tg21532\r\n", "1\t17889\t17890\t1\tN/A\tN/A\tN/A\t1\tAUGUSTUS\tgene\t17387\t18386\t0.97\t-\t.\tg21532\r\n", "1\t18015\t18016\t1\tN/A\tN/A\tN/A\t1\tAUGUSTUS\tgene\t17387\t18386\t0.97\t-\t.\tg21532\r\n", "1\t18160\t18161\t1\tN/A\tN/A\tN/A\t1\tAUGUSTUS\tgene\t17387\t18386\t0.97\t-\t.\tg21532\r\n" ] } ], "source": [ "#Check output\n", "!head *mcFlanksDownstream" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": false, "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 4503 Mcap_MBD_only_CpGs.bed-mcFlanksDownstream\n", " 1042 Mcap_RRBS_MBD_only_CpGs.bed-mcFlanksDownstream\n", " 80072 Mcap_RRBS_only_CpGs.bed-mcFlanksDownstream\n", " 29716 Mcap_WGBS_MBD_only_CpGs.bed-mcFlanksDownstream\n", " 9031 Mcap_WGBS_RRBS_MBD_only_CpGs.bed-mcFlanksDownstream\n", " 101172 Mcap_WGBS_RRBS_only_CpGs.bed-mcFlanksDownstream\n", " 453410 Mcap_WGBS_only_CpGs.bed-mcFlanksDownstream\n", " 748807 Mcap_no_method_CpGs.bed-mcFlanksDownstream\n", " 1427753 total\n" ] } ], "source": [ "#Count number of overlaps\n", "!wc -l *mcFlanksDownstream" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "collapsed": true }, "outputs": [], "source": [ "!wc -l *mcFlanksDownstream > Mcap-upset-data-mcFlanksDownstream-counts.txt" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### 3h. Intergenic" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Intergenic is defined as anything not in a gene or flanking region." ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": false }, "outputs": [], "source": [ "%%bash \n", "\n", "for f in *bed\n", "do\n", " /usr/local/bin/intersectBed \\\n", " -wb \\\n", " -a ${f} \\\n", " -b ../../../genome-feature-files/Mcap.GFFannotation.intergenic.bed \\\n", " > ${f}-mcIntergenic\n", "done" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": false, "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "==> Mcap_MBD_only_CpGs.bed-mcIntergenic <==\r\n", "1\t8114\t8115\t1\tN/A\tN/A\t1\t1\t0\t17386\r\n", "1\t255403\t255404\t1\tN/A\tN/A\t1\t1\t239956\t306771\r\n", "1\t294176\t294177\t1\tN/A\tN/A\t1\t1\t239956\t306771\r\n", "1\t300420\t300421\t1\tN/A\tN/A\t1\t1\t239956\t306771\r\n", "1\t300423\t300424\t1\tN/A\tN/A\t1\t1\t239956\t306771\r\n", "1\t300428\t300429\t1\tN/A\tN/A\t1\t1\t239956\t306771\r\n", "1\t378641\t378642\t1\tN/A\tN/A\t1\t1\t376371\t395420\r\n", "1\t481649\t481650\t1\tN/A\tN/A\t1\t1\t466452\t523297\r\n", "1\t481652\t481653\t1\tN/A\tN/A\t1\t1\t466452\t523297\r\n", "1\t481655\t481656\t1\tN/A\tN/A\t1\t1\t466452\t523297\r\n", "\r\n", "==> Mcap_RRBS_MBD_only_CpGs.bed-mcIntergenic <==\r\n", "1\t456501\t456502\t1\tN/A\t1\t1\t1\t444635\t458011\r\n", "1\t456509\t456510\t1\tN/A\t1\t1\t1\t444635\t458011\r\n", "1\t456549\t456550\t1\tN/A\t1\t1\t1\t444635\t458011\r\n", "1\t456568\t456569\t1\tN/A\t1\t1\t1\t444635\t458011\r\n", "1\t456586\t456587\t1\tN/A\t1\t1\t1\t444635\t458011\r\n", "1\t456590\t456591\t1\tN/A\t1\t1\t1\t444635\t458011\r\n", "1\t947953\t947954\t1\tN/A\t1\t1\t1\t947113\t951902\r\n", "1\t947994\t947995\t1\tN/A\t1\t1\t1\t947113\t951902\r\n", "1\t1138699\t1138700\t1\tN/A\t1\t1\t1\t1135134\t1140501\r\n", "1\t1138703\t1138704\t1\tN/A\t1\t1\t1\t1135134\t1140501\r\n", "\r\n", "==> Mcap_RRBS_only_CpGs.bed-mcIntergenic <==\r\n", "1\t3494\t3495\t1\tN/A\t1\tN/A\t1\t0\t17386\r\n", "1\t3519\t3520\t1\tN/A\t1\tN/A\t1\t0\t17386\r\n", "1\t3728\t3729\t1\tN/A\t1\tN/A\t1\t0\t17386\r\n", "1\t3753\t3754\t1\tN/A\t1\tN/A\t1\t0\t17386\r\n", "1\t3758\t3759\t1\tN/A\t1\tN/A\t1\t0\t17386\r\n", "1\t3771\t3772\t1\tN/A\t1\tN/A\t1\t0\t17386\r\n", "1\t11877\t11878\t1\tN/A\t1\tN/A\t1\t0\t17386\r\n", "1\t11888\t11889\t1\tN/A\t1\tN/A\t1\t0\t17386\r\n", "1\t11895\t11896\t1\tN/A\t1\tN/A\t1\t0\t17386\r\n", "1\t11942\t11943\t1\tN/A\t1\tN/A\t1\t0\t17386\r\n", "\r\n", "==> Mcap_WGBS_MBD_only_CpGs.bed-mcIntergenic <==\r\n", "1\t5229\t5230\t1\t1\tN/A\t1\t1\t0\t17386\r\n", "1\t5244\t5245\t1\t1\tN/A\t1\t1\t0\t17386\r\n", "1\t5248\t5249\t1\t1\tN/A\t1\t1\t0\t17386\r\n", "1\t5297\t5298\t1\t1\tN/A\t1\t1\t0\t17386\r\n", "1\t192754\t192755\t1\t1\tN/A\t1\t1\t187520\t217010\r\n", "1\t210922\t210923\t1\t1\tN/A\t1\t1\t187520\t217010\r\n", "1\t210931\t210932\t1\t1\tN/A\t1\t1\t187520\t217010\r\n", "1\t211906\t211907\t1\t1\tN/A\t1\t1\t187520\t217010\r\n", "1\t211908\t211909\t1\t1\tN/A\t1\t1\t187520\t217010\r\n", "1\t211918\t211919\t1\t1\tN/A\t1\t1\t187520\t217010\r\n", "\r\n", "==> Mcap_WGBS_RRBS_MBD_only_CpGs.bed-mcIntergenic <==\r\n", "1\t224610\t224611\t1\t1\t1\t1\t1\t219415\t236954\r\n", "1\t230792\t230793\t1\t1\t1\t1\t1\t219415\t236954\r\n", "1\t230798\t230799\t1\t1\t1\t1\t1\t219415\t236954\r\n", "1\t233877\t233878\t1\t1\t1\t1\t1\t219415\t236954\r\n", "1\t233895\t233896\t1\t1\t1\t1\t1\t219415\t236954\r\n", "1\t233981\t233982\t1\t1\t1\t1\t1\t219415\t236954\r\n", "1\t234159\t234160\t1\t1\t1\t1\t1\t219415\t236954\r\n", "1\t244564\t244565\t1\t1\t1\t1\t1\t239956\t306771\r\n", "1\t244571\t244572\t1\t1\t1\t1\t1\t239956\t306771\r\n", "1\t244589\t244590\t1\t1\t1\t1\t1\t239956\t306771\r\n", "\r\n", "==> Mcap_WGBS_RRBS_only_CpGs.bed-mcIntergenic <==\r\n", "1\t12044\t12045\t1\t1\t1\tN/A\t1\t0\t17386\r\n", "1\t15105\t15106\t1\t1\t1\tN/A\t1\t0\t17386\r\n", "1\t15130\t15131\t1\t1\t1\tN/A\t1\t0\t17386\r\n", "1\t15195\t15196\t1\t1\t1\tN/A\t1\t0\t17386\r\n", "1\t15222\t15223\t1\t1\t1\tN/A\t1\t0\t17386\r\n", "1\t15773\t15774\t1\t1\t1\tN/A\t1\t0\t17386\r\n", "1\t15782\t15783\t1\t1\t1\tN/A\t1\t0\t17386\r\n", "1\t15784\t15785\t1\t1\t1\tN/A\t1\t0\t17386\r\n", "1\t15798\t15799\t1\t1\t1\tN/A\t1\t0\t17386\r\n", "1\t15802\t15803\t1\t1\t1\tN/A\t1\t0\t17386\r\n", "\r\n", "==> Mcap_WGBS_only_CpGs.bed-mcIntergenic <==\r\n", "1\t4063\t4064\t1\t1\tN/A\tN/A\t1\t0\t17386\r\n", "1\t4070\t4071\t1\t1\tN/A\tN/A\t1\t0\t17386\r\n", "1\t4078\t4079\t1\t1\tN/A\tN/A\t1\t0\t17386\r\n", "1\t4087\t4088\t1\t1\tN/A\tN/A\t1\t0\t17386\r\n", "1\t4147\t4148\t1\t1\tN/A\tN/A\t1\t0\t17386\r\n", "1\t4151\t4152\t1\t1\tN/A\tN/A\t1\t0\t17386\r\n", "1\t4156\t4157\t1\t1\tN/A\tN/A\t1\t0\t17386\r\n", "1\t4173\t4174\t1\t1\tN/A\tN/A\t1\t0\t17386\r\n", "1\t4185\t4186\t1\t1\tN/A\tN/A\t1\t0\t17386\r\n", "1\t4191\t4192\t1\t1\tN/A\tN/A\t1\t0\t17386\r\n", "\r\n", "==> Mcap_no_method_CpGs.bed-mcIntergenic <==\r\n", "1\t37\t38\t1\tN/A\tN/A\tN/A\t1\t0\t17386\r\n", "1\t90\t91\t1\tN/A\tN/A\tN/A\t1\t0\t17386\r\n", "1\t121\t122\t1\tN/A\tN/A\tN/A\t1\t0\t17386\r\n", "1\t132\t133\t1\tN/A\tN/A\tN/A\t1\t0\t17386\r\n", "1\t153\t154\t1\tN/A\tN/A\tN/A\t1\t0\t17386\r\n", "1\t170\t171\t1\tN/A\tN/A\tN/A\t1\t0\t17386\r\n", "1\t220\t221\t1\tN/A\tN/A\tN/A\t1\t0\t17386\r\n", "1\t224\t225\t1\tN/A\tN/A\tN/A\t1\t0\t17386\r\n", "1\t233\t234\t1\tN/A\tN/A\tN/A\t1\t0\t17386\r\n", "1\t272\t273\t1\tN/A\tN/A\tN/A\t1\t0\t17386\r\n" ] } ], "source": [ "#Check output\n", "!head *mcIntergenic" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": false, "scrolled": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 47247 Mcap_MBD_only_CpGs.bed-mcIntergenic\n", " 10679 Mcap_RRBS_MBD_only_CpGs.bed-mcIntergenic\n", " 1035941 Mcap_RRBS_only_CpGs.bed-mcIntergenic\n", " 311751 Mcap_WGBS_MBD_only_CpGs.bed-mcIntergenic\n", " 101790 Mcap_WGBS_RRBS_MBD_only_CpGs.bed-mcIntergenic\n", " 1134624 Mcap_WGBS_RRBS_only_CpGs.bed-mcIntergenic\n", " 4660188 Mcap_WGBS_only_CpGs.bed-mcIntergenic\n", " 9164856 Mcap_no_method_CpGs.bed-mcIntergenic\n", " 16467076 total\n" ] } ], "source": [ "#Count number of overlaps\n", "!wc -l *mcIntergenic" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": true }, "outputs": [], "source": [ "!wc -l *mcIntergenic > Mcap-upset-data-mcIntergenic-counts.txt" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## *P. acuta*" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "/Users/yaamini/Documents/Meth_Compare/analyses/Identifying-genomic-locations\n" ] } ], "source": [ "cd .." ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "collapsed": true }, "outputs": [], "source": [ "#Make a directory for Pact output\n", "#!mkdir Pact" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "collapsed": false, "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "/Users/yaamini/Documents/Meth_Compare/analyses/Identifying-genomic-locations/Pact\n" ] } ], "source": [ "cd Pact/" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 1. Download files" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### 1a. Upset plot data" ] }, { "cell_type": "code", "execution_count": 49, "metadata": { "collapsed": false, "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "--2020-05-07 23:26:25-- https://gannet.fish.washington.edu/metacarcinus/FROGER_meth_compare/20200504/data/Pact/\n", "Resolving gannet.fish.washington.edu (gannet.fish.washington.edu)... 128.95.149.52\n", "Connecting to gannet.fish.washington.edu (gannet.fish.washington.edu)|128.95.149.52|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: unspecified [text/html]\n", "Saving to: ‘./index.html.tmp’\n", "\n", "index.html.tmp [ <=> ] 7.87K --.-KB/s in 0.001s \n", "\n", "2020-05-07 23:26:26 (14.7 MB/s) - ‘./index.html.tmp’ saved [8063]\n", "\n", "Loading robots.txt; please ignore errors.\n", "--2020-05-07 23:26:26-- https://gannet.fish.washington.edu/robots.txt\n", "Reusing existing connection to gannet.fish.washington.edu:443.\n", "HTTP request sent, awaiting response... 404 Not Found\n", "2020-05-07 23:26:26 ERROR 404: Not Found.\n", "\n", "Removing ./index.html.tmp since it should be rejected.\n", "\n", "--2020-05-07 23:26:26-- https://gannet.fish.washington.edu/metacarcinus/FROGER_meth_compare/20200504/data/Pact/?C=N;O=D\n", "Reusing existing connection to gannet.fish.washington.edu:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: unspecified [text/html]\n", "Saving to: ‘./index.html?C=N;O=D.tmp’\n", "\n", "index.html?C=N;O=D. [ <=> ] 7.87K --.-KB/s in 0s \n", "\n", "2020-05-07 23:26:26 (36.6 MB/s) - ‘./index.html?C=N;O=D.tmp’ saved [8063]\n", "\n", "Removing ./index.html?C=N;O=D.tmp since it should be rejected.\n", "\n", "--2020-05-07 23:26:26-- https://gannet.fish.washington.edu/metacarcinus/FROGER_meth_compare/20200504/data/Pact/?C=M;O=A\n", "Reusing existing connection to gannet.fish.washington.edu:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: unspecified [text/html]\n", "Saving to: ‘./index.html?C=M;O=A.tmp’\n", "\n", "index.html?C=M;O=A. [ <=> ] 7.87K --.-KB/s in 0s \n", "\n", "2020-05-07 23:26:26 (31.5 MB/s) - ‘./index.html?C=M;O=A.tmp’ saved [8063]\n", "\n", "Removing ./index.html?C=M;O=A.tmp since it should be rejected.\n", "\n", "--2020-05-07 23:26:26-- https://gannet.fish.washington.edu/metacarcinus/FROGER_meth_compare/20200504/data/Pact/?C=S;O=A\n", "Reusing existing connection to gannet.fish.washington.edu:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: unspecified [text/html]\n", "Saving to: ‘./index.html?C=S;O=A.tmp’\n", "\n", "index.html?C=S;O=A. [ <=> ] 7.87K --.-KB/s in 0s \n", "\n", "2020-05-07 23:26:26 (17.9 MB/s) - ‘./index.html?C=S;O=A.tmp’ saved [8063]\n", "\n", "Removing ./index.html?C=S;O=A.tmp since it should be rejected.\n", "\n", "--2020-05-07 23:26:26-- https://gannet.fish.washington.edu/metacarcinus/FROGER_meth_compare/20200504/data/Pact/?C=D;O=A\n", "Reusing existing connection to gannet.fish.washington.edu:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: unspecified [text/html]\n", "Saving to: ‘./index.html?C=D;O=A.tmp’\n", "\n", "index.html?C=D;O=A. [ <=> ] 7.87K --.-KB/s in 0s \n", "\n", "2020-05-07 23:26:26 (33.1 MB/s) - ‘./index.html?C=D;O=A.tmp’ saved [8063]\n", "\n", "Removing ./index.html?C=D;O=A.tmp since it should be rejected.\n", "\n", "--2020-05-07 23:26:26-- https://gannet.fish.washington.edu/metacarcinus/FROGER_meth_compare/20200504/data/Pact/Pact_MBD_only_CpGs.bed\n", "Reusing existing connection to gannet.fish.washington.edu:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 2992095 (2.9M) [application/vnd.realvnc.bed]\n", "Saving to: ‘./Pact_MBD_only_CpGs.bed’\n", "\n", "Pact_MBD_only_CpGs. 100%[===================>] 2.85M --.-KB/s in 0.05s \n", "\n", "2020-05-07 23:26:26 (61.7 MB/s) - ‘./Pact_MBD_only_CpGs.bed’ saved [2992095/2992095]\n", "\n", "--2020-05-07 23:26:26-- https://gannet.fish.washington.edu/metacarcinus/FROGER_meth_compare/20200504/data/Pact/Pact_RRBS_MBD_only_CpGs.bed\n", "Reusing existing connection to gannet.fish.washington.edu:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 424949 (415K) [application/vnd.realvnc.bed]\n", "Saving to: ‘./Pact_RRBS_MBD_only_CpGs.bed’\n", "\n", "Pact_RRBS_MBD_only_ 100%[===================>] 414.99K --.-KB/s in 0.005s \n", "\n", "2020-05-07 23:26:26 (81.8 MB/s) - ‘./Pact_RRBS_MBD_only_CpGs.bed’ saved [424949/424949]\n", "\n", "--2020-05-07 23:26:26-- https://gannet.fish.washington.edu/metacarcinus/FROGER_meth_compare/20200504/data/Pact/Pact_RRBS_only_CpGs.bed\n", "Reusing existing connection to gannet.fish.washington.edu:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 6259188 (6.0M) [application/vnd.realvnc.bed]\n", "Saving to: ‘./Pact_RRBS_only_CpGs.bed’\n", "\n", "Pact_RRBS_only_CpGs 100%[===================>] 5.97M --.-KB/s in 0.06s \n", "\n", "2020-05-07 23:26:26 (103 MB/s) - ‘./Pact_RRBS_only_CpGs.bed’ saved [6259188/6259188]\n", "\n", "--2020-05-07 23:26:26-- https://gannet.fish.washington.edu/metacarcinus/FROGER_meth_compare/20200504/data/Pact/Pact_WGBS_MBD_only_CpGs.bed\n", "Reusing existing connection to gannet.fish.washington.edu:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 98649079 (94M) [application/vnd.realvnc.bed]\n", "Saving to: ‘./Pact_WGBS_MBD_only_CpGs.bed’\n", "\n", "Pact_WGBS_MBD_only_ 100%[===================>] 94.08M 86.0MB/s in 1.1s \n", "\n", "2020-05-07 23:26:27 (86.0 MB/s) - ‘./Pact_WGBS_MBD_only_CpGs.bed’ saved [98649079/98649079]\n", "\n", "--2020-05-07 23:26:27-- https://gannet.fish.washington.edu/metacarcinus/FROGER_meth_compare/20200504/data/Pact/Pact_WGBS_RRBS_MBD_only_CpGs.bed\n", "Reusing existing connection to gannet.fish.washington.edu:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 50029624 (48M) [application/vnd.realvnc.bed]\n", "Saving to: ‘./Pact_WGBS_RRBS_MBD_only_CpGs.bed’\n", "\n", "Pact_WGBS_RRBS_MBD_ 100%[===================>] 47.71M 84.0MB/s in 0.6s \n", "\n", "2020-05-07 23:26:28 (84.0 MB/s) - ‘./Pact_WGBS_RRBS_MBD_only_CpGs.bed’ saved [50029624/50029624]\n", "\n", "--2020-05-07 23:26:28-- https://gannet.fish.washington.edu/metacarcinus/FROGER_meth_compare/20200504/data/Pact/Pact_WGBS_RRBS_only_CpGs.bed\n", "Reusing existing connection to gannet.fish.washington.edu:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 35306811 (34M) [application/vnd.realvnc.bed]\n", "Saving to: ‘./Pact_WGBS_RRBS_only_CpGs.bed’\n", "\n", "Pact_WGBS_RRBS_only 100%[===================>] 33.67M 101MB/s in 0.3s \n", "\n", "2020-05-07 23:26:29 (101 MB/s) - ‘./Pact_WGBS_RRBS_only_CpGs.bed’ saved [35306811/35306811]\n", "\n", "--2020-05-07 23:26:29-- https://gannet.fish.washington.edu/metacarcinus/FROGER_meth_compare/20200504/data/Pact/Pact_WGBS_only_CpGs.bed\n", "Reusing existing connection to gannet.fish.washington.edu:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 114668697 (109M) [application/vnd.realvnc.bed]\n", "Saving to: ‘./Pact_WGBS_only_CpGs.bed’\n", "\n", "Pact_WGBS_only_CpGs 100%[===================>] 109.36M 93.8MB/s in 1.2s \n", "\n", "2020-05-07 23:26:30 (93.8 MB/s) - ‘./Pact_WGBS_only_CpGs.bed’ saved [114668697/114668697]\n", "\n", "--2020-05-07 23:26:30-- https://gannet.fish.washington.edu/metacarcinus/FROGER_meth_compare/20200504/data/Pact/Pact_no_method_CpGs.bed\n", "Reusing existing connection to gannet.fish.washington.edu:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 79741636 (76M) [application/vnd.realvnc.bed]\n", "Saving to: ‘./Pact_no_method_CpGs.bed’\n", "\n", "Pact_no_method_CpGs 100%[===================>] 76.05M 80.7MB/s in 0.9s \n", "\n", "2020-05-07 23:26:31 (80.7 MB/s) - ‘./Pact_no_method_CpGs.bed’ saved [79741636/79741636]\n", "\n", "--2020-05-07 23:26:31-- https://gannet.fish.washington.edu/metacarcinus/FROGER_meth_compare/20200504/data/Pact/?C=N;O=A\n", "Reusing existing connection to gannet.fish.washington.edu:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: unspecified [text/html]\n", "Saving to: ‘./index.html?C=N;O=A.tmp’\n", "\n", "index.html?C=N;O=A. [ <=> ] 7.87K --.-KB/s in 0s \n", "\n", "2020-05-07 23:26:31 (35.3 MB/s) - ‘./index.html?C=N;O=A.tmp’ saved [8063]\n", "\n", "Removing ./index.html?C=N;O=A.tmp since it should be rejected.\n", "\n", "--2020-05-07 23:26:31-- https://gannet.fish.washington.edu/metacarcinus/FROGER_meth_compare/20200504/data/Pact/?C=M;O=D\n", "Reusing existing connection to gannet.fish.washington.edu:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: unspecified [text/html]\n", "Saving to: ‘./index.html?C=M;O=D.tmp’\n", "\n", "index.html?C=M;O=D. [ <=> ] 7.87K --.-KB/s in 0s \n", "\n", "2020-05-07 23:26:31 (34.8 MB/s) - ‘./index.html?C=M;O=D.tmp’ saved [8063]\n", "\n", "Removing ./index.html?C=M;O=D.tmp since it should be rejected.\n", "\n", "--2020-05-07 23:26:31-- https://gannet.fish.washington.edu/metacarcinus/FROGER_meth_compare/20200504/data/Pact/?C=S;O=D\n", "Reusing existing connection to gannet.fish.washington.edu:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: unspecified [text/html]\n", "Saving to: ‘./index.html?C=S;O=D.tmp’\n", "\n", "index.html?C=S;O=D. [ <=> ] 7.87K --.-KB/s in 0s \n", "\n", "2020-05-07 23:26:31 (33.1 MB/s) - ‘./index.html?C=S;O=D.tmp’ saved [8063]\n", "\n", "Removing ./index.html?C=S;O=D.tmp since it should be rejected.\n", "\n", "--2020-05-07 23:26:31-- https://gannet.fish.washington.edu/metacarcinus/FROGER_meth_compare/20200504/data/Pact/?C=D;O=D\n", "Reusing existing connection to gannet.fish.washington.edu:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: unspecified [text/html]\n", "Saving to: ‘./index.html?C=D;O=D.tmp’\n", "\n", "index.html?C=D;O=D. [ <=> ] 7.87K --.-KB/s in 0s \n", "\n", "2020-05-07 23:26:31 (18.1 MB/s) - ‘./index.html?C=D;O=D.tmp’ saved [8063]\n", "\n", "Removing ./index.html?C=D;O=D.tmp since it should be rejected.\n", "\n", "FINISHED --2020-05-07 23:26:31--\n", "Total wall clock time: 5.8s\n", "Downloaded: 17 files, 370M in 4.2s (87.8 MB/s)\n" ] } ], "source": [ "#Download Pact upset plot data\n", "!wget -r \\\n", "--no-directories --no-parent --reject \"index.html*\" \\\n", "-P . \\\n", "-A CpGs.bed https://gannet.fish.washington.edu/metacarcinus/FROGER_meth_compare/20200504/data/Pact/" ] }, { "cell_type": "code", "execution_count": 54, "metadata": { "collapsed": false, "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Pact_MBD_only_CpGs.bed Pact_WGBS_RRBS_MBD_only_CpGs.bed\r\n", "Pact_RRBS_MBD_only_CpGs.bed Pact_WGBS_RRBS_only_CpGs.bed\r\n", "Pact_RRBS_only_CpGs.bed Pact_WGBS_only_CpGs.bed\r\n", "Pact_WGBS_MBD_only_CpGs.bed Pact_no_method_CpGs.bed\r\n" ] } ], "source": [ "#Check downloaded files\n", "!ls *CpGs.bed" ] }, { "cell_type": "code", "execution_count": 52, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "scaffold100004_cov43\t32\t33\t1\tN/A\tN/A\t1\r\n", "scaffold100009_cov142\t107\t108\t1\tN/A\tN/A\t1\r\n", "scaffold100026_cov65\t763\t764\t1\tN/A\tN/A\t1\r\n", "scaffold100028_cov103\t368\t369\t1\tN/A\tN/A\t1\r\n", "scaffold10002_cov101\t1074\t1075\t1\tN/A\tN/A\t1\r\n", "scaffold100032_cov90\t312\t313\t1\tN/A\tN/A\t1\r\n", "scaffold100032_cov90\t348\t349\t1\tN/A\tN/A\t1\r\n", "scaffold10006_cov97\t537\t538\t1\tN/A\tN/A\t1\r\n", "scaffold100076_cov52\t73\t74\t1\tN/A\tN/A\t1\r\n", "scaffold100076_cov52\t98\t99\t1\tN/A\tN/A\t1\r\n" ] } ], "source": [ "#Check format\n", "!head Pact_MBD_only_CpGs.bed" ] }, { "cell_type": "code", "execution_count": 55, "metadata": { "collapsed": false, "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 70784 Pact_MBD_only_CpGs.bed\n", " 10526 Pact_RRBS_MBD_only_CpGs.bed\n", " 148043 Pact_RRBS_only_CpGs.bed\n", " 2360477 Pact_WGBS_MBD_only_CpGs.bed\n", " 1262249 Pact_WGBS_RRBS_MBD_only_CpGs.bed\n", " 844961 Pact_WGBS_RRBS_only_CpGs.bed\n", " 2629257 Pact_WGBS_only_CpGs.bed\n", " 1829323 Pact_no_method_CpGs.bed\n", " 9155620 total\n" ] } ], "source": [ "!wc -l *CpGs.bed" ] }, { "cell_type": "code", "execution_count": 56, "metadata": { "collapsed": true }, "outputs": [], "source": [ "!wc -l *CpGs.bed > Pact-upset-data-counts.txt" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### 1b. Method-associated DMC" ] }, { "cell_type": "code", "execution_count": 57, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "../../Pact_methDiff/Pact_RvM_50pMethDiff.bed\r\n", "../../Pact_methDiff/Pact_WvM_50pMethDiff.bed\r\n" ] } ], "source": [ "#Find DMC files\n", "!ls ../../Pact_methDiff/*bed" ] }, { "cell_type": "code", "execution_count": 58, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "scaffold10006_cov97\t121\t123\t4.50663240548246e-17\r\n", "scaffold10006_cov97\t173\t175\t1.27003267241539e-17\r\n", "scaffold100173_cov47\t451\t453\t7.6591683257852e-74\r\n", "scaffold100173_cov47\t472\t474\t1.13567297947311e-76\r\n", "scaffold100202_cov48\t549\t551\t1.94537586282645e-87\r\n", "scaffold100202_cov48\t553\t555\t3.33672699925078e-88\r\n", "scaffold100202_cov48\t577\t579\t1.09299192954077e-110\r\n", "scaffold100202_cov48\t583\t585\t6.02517568178073e-55\r\n", "scaffold100202_cov48\t591\t593\t2.94227399245393e-58\r\n", "scaffold100202_cov48\t705\t707\t3.89392107786878e-166\r\n" ] } ], "source": [ "#Check format\n", "!head ../../Pact_methDiff/Pact_RvM_50pMethDiff.bed" ] }, { "cell_type": "code", "execution_count": 59, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 10889 ../../Pact_methDiff/Pact_RvM_50pMethDiff.bed\r\n", " 27251 ../../Pact_methDiff/Pact_WvM_50pMethDiff.bed\r\n", " 38140 total\r\n" ] } ], "source": [ "!wc -l ../../Pact_methDiff/*bed" ] }, { "cell_type": "code", "execution_count": 60, "metadata": { "collapsed": true }, "outputs": [], "source": [ "!wc -l ../../Pact_methDiff/*bed > Pact-DMC-data-counts.txt" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 3. Characterize genomic locations of CpGs" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### 3a. Genes" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### Upset data" ] }, { "cell_type": "code", "execution_count": 61, "metadata": { "collapsed": true, "scrolled": true }, "outputs": [], "source": [ "%%bash\n", "\n", "for f in *CpGs.bed\n", "do\n", " /usr/local/bin/intersectBed \\\n", " -wb \\\n", " -a ${f} \\\n", " -b ../../../genome-feature-files/Pact.GFFannotation.Genes.gff \\\n", " > ${f}-paGenes\n", "done" ] }, { "cell_type": "code", "execution_count": 62, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "==> Pact_MBD_only_CpGs.bed-paGenes <==\n", "scaffold10024_cov87\t5947\t5948\t1\tN/A\tN/A\t1\tscaffold10024_cov87\tAUGUSTUS\tgene\t5032\t12161\t0.26\t+\t.\tg48296\n", "scaffold10024_cov87\t11621\t11622\t1\tN/A\tN/A\t1\tscaffold10024_cov87\tAUGUSTUS\tgene\t5032\t12161\t0.26\t+\t.\tg48296\n", "scaffold10024_cov87\t11630\t11631\t1\tN/A\tN/A\t1\tscaffold10024_cov87\tAUGUSTUS\tgene\t5032\t12161\t0.26\t+\t.\tg48296\n", "scaffold10024_cov87\t11651\t11652\t1\tN/A\tN/A\t1\tscaffold10024_cov87\tAUGUSTUS\tgene\t5032\t12161\t0.26\t+\t.\tg48296\n", "scaffold10024_cov87\t11676\t11677\t1\tN/A\tN/A\t1\tscaffold10024_cov87\tAUGUSTUS\tgene\t5032\t12161\t0.26\t+\t.\tg48296\n", "scaffold10024_cov87\t29844\t29845\t1\tN/A\tN/A\t1\tscaffold10024_cov87\tAUGUSTUS\tgene\t16926\t29985\t0.81\t-\t.\tg48298\n", "scaffold10029_cov108\t1953\t1954\t1\tN/A\tN/A\t1\tscaffold10029_cov108\tAUGUSTUS\tgene\t1875\t5714\t0.49\t-\t.\tg48307\n", "scaffold10053_cov85\t10206\t10207\t1\tN/A\tN/A\t1\tscaffold10053_cov85\tAUGUSTUS\tgene\t9983\t11110\t0.58\t+\t.\tg48314\n", "scaffold10065_cov90\t19126\t19127\t1\tN/A\tN/A\t1\tscaffold10065_cov90\tAUGUSTUS\tgene\t19014\t19197\t0.36\t-\t.\tg48336\n", "scaffold10065_cov90\t19126\t19127\t1\tN/A\tN/A\t1\tscaffold10065_cov90\tAUGUSTUS\tgene\t19058\t19197\t0.64\t-\t.\tg48337\n", "\n", "==> Pact_RRBS_MBD_only_CpGs.bed-paGenes <==\n", "scaffold10024_cov87\t10007\t10008\t1\tN/A\t1\t1\tscaffold10024_cov87\tAUGUSTUS\tgene\t5032\t12161\t0.26\t+\t.\tg48296\n", "scaffold10065_cov90\t15205\t15206\t1\tN/A\t1\t1\tscaffold10065_cov90\tAUGUSTUS\tgene\t15189\t15549\t0.88\t+\t.\tg48332\n", "scaffold10092_cov94\t5228\t5229\t1\tN/A\t1\t1\tscaffold10092_cov94\tAUGUSTUS\tgene\t3573\t5337\t0.94\t-\t.\tg48344\n", "scaffold10092_cov94\t5239\t5240\t1\tN/A\t1\t1\tscaffold10092_cov94\tAUGUSTUS\tgene\t3573\t5337\t0.94\t-\t.\tg48344\n", "scaffold10092_cov94\t5291\t5292\t1\tN/A\t1\t1\tscaffold10092_cov94\tAUGUSTUS\tgene\t3573\t5337\t0.94\t-\t.\tg48344\n", "scaffold10101_cov102\t78403\t78404\t1\tN/A\t1\t1\tscaffold10101_cov102\tAUGUSTUS\tgene\t78240\t84352\t0.98\t+\t.\tg48365\n", "scaffold10101_cov102\t110395\t110396\t1\tN/A\t1\t1\tscaffold10101_cov102\tAUGUSTUS\tgene\t110177\t115298\t1\t+\t.\tg48370\n", "scaffold10156_cov58\t1626\t1627\t1\tN/A\t1\t1\tscaffold10156_cov58\tAUGUSTUS\tgene\t1\t3029\t0.64\t+\t.\tg48398\n", "scaffold10156_cov58\t1630\t1631\t1\tN/A\t1\t1\tscaffold10156_cov58\tAUGUSTUS\tgene\t1\t3029\t0.64\t+\t.\tg48398\n", "scaffold1015_cov73\t31998\t31999\t1\tN/A\t1\t1\tscaffold1015_cov73\tAUGUSTUS\tgene\t28932\t36264\t0.07\t+\t.\tg12891\n", "\n", "==> Pact_RRBS_only_CpGs.bed-paGenes <==\n", "scaffold10024_cov87\t5292\t5293\t1\tN/A\t1\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t5032\t12161\t0.26\t+\t.\tg48296\n", "scaffold10024_cov87\t10033\t10034\t1\tN/A\t1\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t5032\t12161\t0.26\t+\t.\tg48296\n", "scaffold10024_cov87\t16321\t16322\t1\tN/A\t1\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t14671\t16593\t0.64\t-\t.\tg48297\n", "scaffold10024_cov87\t16376\t16377\t1\tN/A\t1\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t14671\t16593\t0.64\t-\t.\tg48297\n", "scaffold10024_cov87\t27754\t27755\t1\tN/A\t1\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t16926\t29985\t0.81\t-\t.\tg48298\n", "scaffold10024_cov87\t27773\t27774\t1\tN/A\t1\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t16926\t29985\t0.81\t-\t.\tg48298\n", "scaffold10024_cov87\t27898\t27899\t1\tN/A\t1\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t16926\t29985\t0.81\t-\t.\tg48298\n", "scaffold10024_cov87\t27905\t27906\t1\tN/A\t1\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t16926\t29985\t0.81\t-\t.\tg48298\n", "scaffold10024_cov87\t27930\t27931\t1\tN/A\t1\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t16926\t29985\t0.81\t-\t.\tg48298\n", "scaffold10024_cov87\t27946\t27947\t1\tN/A\t1\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t16926\t29985\t0.81\t-\t.\tg48298\n", "\n", "==> Pact_WGBS_MBD_only_CpGs.bed-paGenes <==\n", "scaffold10024_cov87\t2800\t2801\t1\t1\tN/A\t1\tscaffold10024_cov87\tAUGUSTUS\tgene\t2612\t3029\t0.75\t+\t.\tg48293\n", "scaffold10024_cov87\t2805\t2806\t1\t1\tN/A\t1\tscaffold10024_cov87\tAUGUSTUS\tgene\t2612\t3029\t0.75\t+\t.\tg48293\n", "scaffold10024_cov87\t2813\t2814\t1\t1\tN/A\t1\tscaffold10024_cov87\tAUGUSTUS\tgene\t2612\t3029\t0.75\t+\t.\tg48293\n", "scaffold10024_cov87\t2825\t2826\t1\t1\tN/A\t1\tscaffold10024_cov87\tAUGUSTUS\tgene\t2612\t3029\t0.75\t+\t.\tg48293\n", "scaffold10024_cov87\t2837\t2838\t1\t1\tN/A\t1\tscaffold10024_cov87\tAUGUSTUS\tgene\t2612\t3029\t0.75\t+\t.\tg48293\n", "scaffold10024_cov87\t2845\t2846\t1\t1\tN/A\t1\tscaffold10024_cov87\tAUGUSTUS\tgene\t2612\t3029\t0.75\t+\t.\tg48293\n", "scaffold10024_cov87\t2860\t2861\t1\t1\tN/A\t1\tscaffold10024_cov87\tAUGUSTUS\tgene\t2612\t3029\t0.75\t+\t.\tg48293\n", "scaffold10024_cov87\t2873\t2874\t1\t1\tN/A\t1\tscaffold10024_cov87\tAUGUSTUS\tgene\t2612\t3029\t0.75\t+\t.\tg48293\n", "scaffold10024_cov87\t2897\t2898\t1\t1\tN/A\t1\tscaffold10024_cov87\tAUGUSTUS\tgene\t2612\t3029\t0.75\t+\t.\tg48293\n", "scaffold10024_cov87\t2920\t2921\t1\t1\tN/A\t1\tscaffold10024_cov87\tAUGUSTUS\tgene\t2612\t3029\t0.75\t+\t.\tg48293\n", "\n", "==> Pact_WGBS_RRBS_MBD_only_CpGs.bed-paGenes <==\n", "scaffold10024_cov87\t4118\t4119\t1\t1\t1\t1\tscaffold10024_cov87\tAUGUSTUS\tgene\t4020\t4675\t0.89\t-\t.\tg48294\n", "scaffold10024_cov87\t4126\t4127\t1\t1\t1\t1\tscaffold10024_cov87\tAUGUSTUS\tgene\t4020\t4675\t0.89\t-\t.\tg48294\n", "scaffold10024_cov87\t4137\t4138\t1\t1\t1\t1\tscaffold10024_cov87\tAUGUSTUS\tgene\t4020\t4675\t0.89\t-\t.\tg48294\n", "scaffold10024_cov87\t4141\t4142\t1\t1\t1\t1\tscaffold10024_cov87\tAUGUSTUS\tgene\t4020\t4675\t0.89\t-\t.\tg48294\n", "scaffold10024_cov87\t4167\t4168\t1\t1\t1\t1\tscaffold10024_cov87\tAUGUSTUS\tgene\t4020\t4675\t0.89\t-\t.\tg48294\n", "scaffold10024_cov87\t4181\t4182\t1\t1\t1\t1\tscaffold10024_cov87\tAUGUSTUS\tgene\t4020\t4675\t0.89\t-\t.\tg48294\n", "scaffold10024_cov87\t6272\t6273\t1\t1\t1\t1\tscaffold10024_cov87\tAUGUSTUS\tgene\t5032\t12161\t0.26\t+\t.\tg48296\n", "scaffold10024_cov87\t6287\t6288\t1\t1\t1\t1\tscaffold10024_cov87\tAUGUSTUS\tgene\t5032\t12161\t0.26\t+\t.\tg48296\n", "scaffold10024_cov87\t6325\t6326\t1\t1\t1\t1\tscaffold10024_cov87\tAUGUSTUS\tgene\t5032\t12161\t0.26\t+\t.\tg48296\n", "scaffold10024_cov87\t6344\t6345\t1\t1\t1\t1\tscaffold10024_cov87\tAUGUSTUS\tgene\t5032\t12161\t0.26\t+\t.\tg48296\n", "\n", "==> Pact_WGBS_RRBS_only_CpGs.bed-paGenes <==\n", "scaffold10024_cov87\t4945\t4946\t1\t1\t1\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t4733\t4954\t0.54\t+\t.\tg48295\n", "scaffold10024_cov87\t5084\t5085\t1\t1\t1\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t5032\t12161\t0.26\t+\t.\tg48296\n", "scaffold10024_cov87\t5129\t5130\t1\t1\t1\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t5032\t12161\t0.26\t+\t.\tg48296\n", "scaffold10024_cov87\t5136\t5137\t1\t1\t1\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t5032\t12161\t0.26\t+\t.\tg48296\n", "scaffold10024_cov87\t5150\t5151\t1\t1\t1\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t5032\t12161\t0.26\t+\t.\tg48296\n", "scaffold10024_cov87\t5174\t5175\t1\t1\t1\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t5032\t12161\t0.26\t+\t.\tg48296\n", "scaffold10024_cov87\t5177\t5178\t1\t1\t1\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t5032\t12161\t0.26\t+\t.\tg48296\n", "scaffold10024_cov87\t5247\t5248\t1\t1\t1\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t5032\t12161\t0.26\t+\t.\tg48296\n", "scaffold10024_cov87\t5496\t5497\t1\t1\t1\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t5032\t12161\t0.26\t+\t.\tg48296\n", "scaffold10024_cov87\t5515\t5516\t1\t1\t1\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t5032\t12161\t0.26\t+\t.\tg48296\n", "\n", "==> Pact_WGBS_only_CpGs.bed-paGenes <==\n", "scaffold10024_cov87\t2620\t2621\t1\t1\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t2612\t3029\t0.75\t+\t.\tg48293\n", "scaffold10024_cov87\t2645\t2646\t1\t1\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t2612\t3029\t0.75\t+\t.\tg48293\n", "scaffold10024_cov87\t2657\t2658\t1\t1\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t2612\t3029\t0.75\t+\t.\tg48293\n", "scaffold10024_cov87\t2665\t2666\t1\t1\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t2612\t3029\t0.75\t+\t.\tg48293\n", "scaffold10024_cov87\t2680\t2681\t1\t1\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t2612\t3029\t0.75\t+\t.\tg48293\n", "scaffold10024_cov87\t2685\t2686\t1\t1\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t2612\t3029\t0.75\t+\t.\tg48293\n", "scaffold10024_cov87\t2687\t2688\t1\t1\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t2612\t3029\t0.75\t+\t.\tg48293\n", "scaffold10024_cov87\t2693\t2694\t1\t1\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t2612\t3029\t0.75\t+\t.\tg48293\n", "scaffold10024_cov87\t2705\t2706\t1\t1\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t2612\t3029\t0.75\t+\t.\tg48293\n", "scaffold10024_cov87\t2713\t2714\t1\t1\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t2612\t3029\t0.75\t+\t.\tg48293\n", "\n", "==> Pact_no_method_CpGs.bed-paGenes <==\n", "scaffold10024_cov87\t4253\t4254\t1\tN/A\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t4020\t4675\t0.89\t-\t.\tg48294\n", "scaffold10024_cov87\t4292\t4293\t1\tN/A\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t4020\t4675\t0.89\t-\t.\tg48294\n", "scaffold10024_cov87\t4300\t4301\t1\tN/A\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t4020\t4675\t0.89\t-\t.\tg48294\n", "scaffold10024_cov87\t4349\t4350\t1\tN/A\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t4020\t4675\t0.89\t-\t.\tg48294\n", "scaffold10024_cov87\t4378\t4379\t1\tN/A\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t4020\t4675\t0.89\t-\t.\tg48294\n", "scaffold10024_cov87\t4407\t4408\t1\tN/A\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t4020\t4675\t0.89\t-\t.\tg48294\n", "scaffold10024_cov87\t4497\t4498\t1\tN/A\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t4020\t4675\t0.89\t-\t.\tg48294\n", "scaffold10024_cov87\t4503\t4504\t1\tN/A\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t4020\t4675\t0.89\t-\t.\tg48294\n", "scaffold10024_cov87\t4512\t4513\t1\tN/A\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t4020\t4675\t0.89\t-\t.\tg48294\n", "scaffold10024_cov87\t4737\t4738\t1\tN/A\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t4733\t4954\t0.54\t+\t.\tg48295\n" ] } ], "source": [ "#Check output\n", "!head *CpGs.bed*paGenes" ] }, { "cell_type": "code", "execution_count": 64, "metadata": { "collapsed": false, "scrolled": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 20887 Pact_MBD_only_CpGs.bed-paGenes\n", " 3125 Pact_RRBS_MBD_only_CpGs.bed-paGenes\n", " 37213 Pact_RRBS_only_CpGs.bed-paGenes\n", " 1125285 Pact_WGBS_MBD_only_CpGs.bed-paGenes\n", " 584366 Pact_WGBS_RRBS_MBD_only_CpGs.bed-paGenes\n", " 322665 Pact_WGBS_RRBS_only_CpGs.bed-paGenes\n", " 978232 Pact_WGBS_only_CpGs.bed-paGenes\n", " 366125 Pact_no_method_CpGs.bed-paGenes\n", " 3437898 total\n" ] } ], "source": [ "#Count number of overlaps\n", "!wc -l *CpGs.bed*paGenes" ] }, { "cell_type": "code", "execution_count": 65, "metadata": { "collapsed": true }, "outputs": [], "source": [ "!wc -l *CpGs.bed*paGenes > Pact-upset-data-paGenes-counts.txt" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### Method-associated DMC" ] }, { "cell_type": "code", "execution_count": 82, "metadata": { "collapsed": true }, "outputs": [], "source": [ "%%bash\n", "\n", "for f in ../../Pact_methDiff/*bed\n", "do\n", " /usr/local/bin/intersectBed \\\n", " -wb \\\n", " -a ${f} \\\n", " -b ../../../genome-feature-files/Pact.GFFannotation.Genes.gff \\\n", " > ${f}-paGenes\n", "done" ] }, { "cell_type": "code", "execution_count": 84, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Pact_MBD_only_CpGs.bed-paGenes\r\n", "Pact_RRBS_MBD_only_CpGs.bed-paGenes\r\n", "Pact_RRBS_only_CpGs.bed-paGenes\r\n", "Pact_WGBS_MBD_only_CpGs.bed-paGenes\r\n", "Pact_WGBS_RRBS_MBD_only_CpGs.bed-paGenes\r\n", "Pact_WGBS_RRBS_only_CpGs.bed-paGenes\r\n", "Pact_WGBS_only_CpGs.bed-paGenes\r\n", "Pact_no_method_CpGs.bed-paGenes\r\n" ] } ], "source": [ "!ls *paGenes" ] }, { "cell_type": "code", "execution_count": 85, "metadata": { "collapsed": false, "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "==> ../../Pact_methDiff/Pact_RvM_50pMethDiff.bed-paGenes <==\r\n", "scaffold10029_cov108\t1135\t1137\t5.48827350655292e-14\tscaffold10029_cov108\tAUGUSTUS\tgene\t1\t1240\t0.97\t+\t.\tg48305\r\n", "scaffold10029_cov108\t1163\t1165\t3.72644245668257e-34\tscaffold10029_cov108\tAUGUSTUS\tgene\t1\t1240\t0.97\t+\t.\tg48305\r\n", "scaffold10029_cov108\t1206\t1208\t2.69756927245379e-73\tscaffold10029_cov108\tAUGUSTUS\tgene\t1\t1240\t0.97\t+\t.\tg48305\r\n", "scaffold10029_cov108\t1221\t1223\t7.25920609606858e-58\tscaffold10029_cov108\tAUGUSTUS\tgene\t1\t1240\t0.97\t+\t.\tg48305\r\n", "scaffold10029_cov108\t1235\t1237\t1.2272600294399e-58\tscaffold10029_cov108\tAUGUSTUS\tgene\t1\t1240\t0.97\t+\t.\tg48305\r\n", "scaffold10029_cov108\t1354\t1356\t3.49851955628437e-91\tscaffold10029_cov108\tAUGUSTUS\tgene\t1345\t1788\t0.88\t-\t.\tg48306\r\n", "scaffold10029_cov108\t1362\t1364\t9.27286668964075e-87\tscaffold10029_cov108\tAUGUSTUS\tgene\t1345\t1788\t0.88\t-\t.\tg48306\r\n", "scaffold10029_cov108\t1378\t1380\t4.00289842305736e-204\tscaffold10029_cov108\tAUGUSTUS\tgene\t1345\t1788\t0.88\t-\t.\tg48306\r\n", "scaffold10029_cov108\t1391\t1393\t9.75842830141021e-252\tscaffold10029_cov108\tAUGUSTUS\tgene\t1345\t1788\t0.88\t-\t.\tg48306\r\n", "scaffold10029_cov108\t1397\t1399\t6.14415256835285e-270\tscaffold10029_cov108\tAUGUSTUS\tgene\t1345\t1788\t0.88\t-\t.\tg48306\r\n", "\r\n", "==> ../../Pact_methDiff/Pact_WvM_50pMethDiff.bed-paGenes <==\r\n", "scaffold10029_cov108\t1142\t1144\t6.24217208821468e-42\tscaffold10029_cov108\tAUGUSTUS\tgene\t1\t1240\t0.97\t+\t.\tg48305\r\n", "scaffold10029_cov108\t1163\t1165\t4.0301837234847e-104\tscaffold10029_cov108\tAUGUSTUS\tgene\t1\t1240\t0.97\t+\t.\tg48305\r\n", "scaffold10029_cov108\t1206\t1208\t9.64177909968006e-188\tscaffold10029_cov108\tAUGUSTUS\tgene\t1\t1240\t0.97\t+\t.\tg48305\r\n", "scaffold10029_cov108\t1221\t1223\t2.01570629834058e-164\tscaffold10029_cov108\tAUGUSTUS\tgene\t1\t1240\t0.97\t+\t.\tg48305\r\n", "scaffold10029_cov108\t1235\t1237\t1.01322542811731e-157\tscaffold10029_cov108\tAUGUSTUS\tgene\t1\t1240\t0.97\t+\t.\tg48305\r\n", "scaffold10029_cov108\t1354\t1356\t1.98968530126048e-185\tscaffold10029_cov108\tAUGUSTUS\tgene\t1345\t1788\t0.88\t-\t.\tg48306\r\n", "scaffold10029_cov108\t1362\t1364\t8.85780565471889e-169\tscaffold10029_cov108\tAUGUSTUS\tgene\t1345\t1788\t0.88\t-\t.\tg48306\r\n", "scaffold10029_cov108\t1378\t1380\t3.23056795993557e-131\tscaffold10029_cov108\tAUGUSTUS\tgene\t1345\t1788\t0.88\t-\t.\tg48306\r\n", "scaffold10029_cov108\t1391\t1393\t2.27323504677434e-126\tscaffold10029_cov108\tAUGUSTUS\tgene\t1345\t1788\t0.88\t-\t.\tg48306\r\n", "scaffold10029_cov108\t1397\t1399\t1.26748707592785e-128\tscaffold10029_cov108\tAUGUSTUS\tgene\t1345\t1788\t0.88\t-\t.\tg48306\r\n" ] } ], "source": [ "#Check output\n", "!head ../../Pact_methDiff/*paGenes" ] }, { "cell_type": "code", "execution_count": 86, "metadata": { "collapsed": false, "scrolled": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 2692 ../../Pact_methDiff/Pact_RvM_50pMethDiff.bed-paGenes\r\n", " 5382 ../../Pact_methDiff/Pact_WvM_50pMethDiff.bed-paGenes\r\n", " 8074 total\r\n" ] } ], "source": [ "#Count number of overlaps\n", "!wc -l ../../Pact_methDiff/*paGenes" ] }, { "cell_type": "code", "execution_count": 87, "metadata": { "collapsed": true }, "outputs": [], "source": [ "!wc -l ../../Pact_methDiff/*paGenes > Pact-DMC-data-paGenes-counts.txt" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### 3b. Coding Sequences (CDS)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### Upset data" ] }, { "cell_type": "code", "execution_count": 66, "metadata": { "collapsed": true, "scrolled": true }, "outputs": [], "source": [ "%%bash\n", "\n", "for f in *CpGs.bed\n", "do\n", " /usr/local/bin/intersectBed \\\n", " -wb \\\n", " -a ${f} \\\n", " -b ../../../genome-feature-files/Pact.GFFannotation.CDS.gff \\\n", " > ${f}-paCDS\n", "done" ] }, { "cell_type": "code", "execution_count": 67, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "==> Pact_MBD_only_CpGs.bed-paCDS <==\n", "scaffold10024_cov87\t11621\t11622\t1\tN/A\tN/A\t1\tscaffold10024_cov87\tAUGUSTUS\tCDS\t11593\t11789\t0.59\t+\t2\ttranscript_id \"g48296.t1\"; gene_id \"g48296\";\n", "scaffold10024_cov87\t11630\t11631\t1\tN/A\tN/A\t1\tscaffold10024_cov87\tAUGUSTUS\tCDS\t11593\t11789\t0.59\t+\t2\ttranscript_id \"g48296.t1\"; gene_id \"g48296\";\n", "scaffold10024_cov87\t11651\t11652\t1\tN/A\tN/A\t1\tscaffold10024_cov87\tAUGUSTUS\tCDS\t11593\t11789\t0.59\t+\t2\ttranscript_id \"g48296.t1\"; gene_id \"g48296\";\n", "scaffold10024_cov87\t11676\t11677\t1\tN/A\tN/A\t1\tscaffold10024_cov87\tAUGUSTUS\tCDS\t11593\t11789\t0.59\t+\t2\ttranscript_id \"g48296.t1\"; gene_id \"g48296\";\n", "scaffold10029_cov108\t1953\t1954\t1\tN/A\tN/A\t1\tscaffold10029_cov108\tAUGUSTUS\tCDS\t1875\t1967\t0.88\t-\t0\ttranscript_id \"g48307.t1\"; gene_id \"g48307\";\n", "scaffold10053_cov85\t10206\t10207\t1\tN/A\tN/A\t1\tscaffold10053_cov85\tAUGUSTUS\tCDS\t9983\t11110\t0.58\t+\t0\ttranscript_id \"g48314.t1\"; gene_id \"g48314\";\n", "scaffold10092_cov94\t3866\t3867\t1\tN/A\tN/A\t1\tscaffold10092_cov94\tAUGUSTUS\tCDS\t3818\t3895\t1\t-\t1\ttranscript_id \"g48344.t1\"; gene_id \"g48344\";\n", "scaffold10092_cov94\t3866\t3867\t1\tN/A\tN/A\t1\tscaffold10092_cov94\tAUGUSTUS\tCDS\t3818\t3895\t1\t-\t1\ttranscript_id \"g48344.t2\"; gene_id \"g48344\";\n", "scaffold10092_cov94\t3870\t3871\t1\tN/A\tN/A\t1\tscaffold10092_cov94\tAUGUSTUS\tCDS\t3818\t3895\t1\t-\t1\ttranscript_id \"g48344.t1\"; gene_id \"g48344\";\n", "scaffold10092_cov94\t3870\t3871\t1\tN/A\tN/A\t1\tscaffold10092_cov94\tAUGUSTUS\tCDS\t3818\t3895\t1\t-\t1\ttranscript_id \"g48344.t2\"; gene_id \"g48344\";\n", "\n", "==> Pact_RRBS_MBD_only_CpGs.bed-paCDS <==\n", "scaffold10024_cov87\t10007\t10008\t1\tN/A\t1\t1\tscaffold10024_cov87\tAUGUSTUS\tCDS\t9087\t10497\t1\t+\t0\ttranscript_id \"g48296.t1\"; gene_id \"g48296\";\n", "scaffold10065_cov90\t15205\t15206\t1\tN/A\t1\t1\tscaffold10065_cov90\tAUGUSTUS\tCDS\t15189\t15342\t0.88\t+\t0\ttranscript_id \"g48332.t1\"; gene_id \"g48332\";\n", "scaffold10101_cov102\t110395\t110396\t1\tN/A\t1\t1\tscaffold10101_cov102\tAUGUSTUS\tCDS\t110177\t110503\t0.78\t+\t0\ttranscript_id \"g48370.t1\"; gene_id \"g48370\";\n", "scaffold10101_cov102\t110395\t110396\t1\tN/A\t1\t1\tscaffold10101_cov102\tAUGUSTUS\tCDS\t110180\t110503\t0.22\t+\t0\ttranscript_id \"g48370.t2\"; gene_id \"g48370\";\n", "scaffold1015_cov73\t31998\t31999\t1\tN/A\t1\t1\tscaffold1015_cov73\tAUGUSTUS\tCDS\t31779\t32039\t0.85\t+\t2\ttranscript_id \"g12891.t1\"; gene_id \"g12891\";\n", "scaffold1015_cov73\t31998\t31999\t1\tN/A\t1\t1\tscaffold1015_cov73\tAUGUSTUS\tCDS\t31779\t32039\t0.85\t+\t2\ttranscript_id \"g12891.t2\"; gene_id \"g12891\";\n", "scaffold1015_cov73\t32017\t32018\t1\tN/A\t1\t1\tscaffold1015_cov73\tAUGUSTUS\tCDS\t31779\t32039\t0.85\t+\t2\ttranscript_id \"g12891.t1\"; gene_id \"g12891\";\n", "scaffold1015_cov73\t32017\t32018\t1\tN/A\t1\t1\tscaffold1015_cov73\tAUGUSTUS\tCDS\t31779\t32039\t0.85\t+\t2\ttranscript_id \"g12891.t2\"; gene_id \"g12891\";\n", "scaffold101_cov104\t8596\t8597\t1\tN/A\t1\t1\tscaffold101_cov104\tAUGUSTUS\tCDS\t8392\t8639\t1\t-\t2\ttranscript_id \"g1286.t1\"; gene_id \"g1286\";\n", "scaffold1020_cov97\t26110\t26111\t1\tN/A\t1\t1\tscaffold1020_cov97\tAUGUSTUS\tCDS\t25918\t26209\t0.6\t-\t1\ttranscript_id \"g12920.t1\"; gene_id \"g12920\";\n", "\n", "==> Pact_RRBS_only_CpGs.bed-paCDS <==\n", "scaffold10024_cov87\t10033\t10034\t1\tN/A\t1\tN/A\tscaffold10024_cov87\tAUGUSTUS\tCDS\t9087\t10497\t1\t+\t0\ttranscript_id \"g48296.t1\"; gene_id \"g48296\";\n", "scaffold10024_cov87\t37507\t37508\t1\tN/A\t1\tN/A\tscaffold10024_cov87\tAUGUSTUS\tCDS\t37460\t37633\t1\t-\t0\ttranscript_id \"g48302.t1\"; gene_id \"g48302\";\n", "scaffold10024_cov87\t37507\t37508\t1\tN/A\t1\tN/A\tscaffold10024_cov87\tAUGUSTUS\tCDS\t37460\t37633\t1\t-\t0\ttranscript_id \"g48302.t2\"; gene_id \"g48302\";\n", "scaffold10024_cov87\t37517\t37518\t1\tN/A\t1\tN/A\tscaffold10024_cov87\tAUGUSTUS\tCDS\t37460\t37633\t1\t-\t0\ttranscript_id \"g48302.t1\"; gene_id \"g48302\";\n", "scaffold10024_cov87\t37517\t37518\t1\tN/A\t1\tN/A\tscaffold10024_cov87\tAUGUSTUS\tCDS\t37460\t37633\t1\t-\t0\ttranscript_id \"g48302.t2\"; gene_id \"g48302\";\n", "scaffold10024_cov87\t37552\t37553\t1\tN/A\t1\tN/A\tscaffold10024_cov87\tAUGUSTUS\tCDS\t37460\t37633\t1\t-\t0\ttranscript_id \"g48302.t1\"; gene_id \"g48302\";\n", "scaffold10024_cov87\t37552\t37553\t1\tN/A\t1\tN/A\tscaffold10024_cov87\tAUGUSTUS\tCDS\t37460\t37633\t1\t-\t0\ttranscript_id \"g48302.t2\"; gene_id \"g48302\";\n", "scaffold10024_cov87\t37564\t37565\t1\tN/A\t1\tN/A\tscaffold10024_cov87\tAUGUSTUS\tCDS\t37460\t37633\t1\t-\t0\ttranscript_id \"g48302.t1\"; gene_id \"g48302\";\n", "scaffold10024_cov87\t37564\t37565\t1\tN/A\t1\tN/A\tscaffold10024_cov87\tAUGUSTUS\tCDS\t37460\t37633\t1\t-\t0\ttranscript_id \"g48302.t2\"; gene_id \"g48302\";\n", "scaffold10024_cov87\t37579\t37580\t1\tN/A\t1\tN/A\tscaffold10024_cov87\tAUGUSTUS\tCDS\t37460\t37633\t1\t-\t0\ttranscript_id \"g48302.t1\"; gene_id \"g48302\";\n", "\n", "==> Pact_WGBS_MBD_only_CpGs.bed-paCDS <==\n", "scaffold10024_cov87\t2860\t2861\t1\t1\tN/A\t1\tscaffold10024_cov87\tAUGUSTUS\tCDS\t2852\t3029\t0.75\t+\t1\ttranscript_id \"g48293.t1\"; gene_id \"g48293\";\n", "scaffold10024_cov87\t2873\t2874\t1\t1\tN/A\t1\tscaffold10024_cov87\tAUGUSTUS\tCDS\t2852\t3029\t0.75\t+\t1\ttranscript_id \"g48293.t1\"; gene_id \"g48293\";\n", "scaffold10024_cov87\t2897\t2898\t1\t1\tN/A\t1\tscaffold10024_cov87\tAUGUSTUS\tCDS\t2852\t3029\t0.75\t+\t1\ttranscript_id \"g48293.t1\"; gene_id \"g48293\";\n", "scaffold10024_cov87\t2920\t2921\t1\t1\tN/A\t1\tscaffold10024_cov87\tAUGUSTUS\tCDS\t2852\t3029\t0.75\t+\t1\ttranscript_id \"g48293.t1\"; gene_id \"g48293\";\n", "scaffold10024_cov87\t2925\t2926\t1\t1\tN/A\t1\tscaffold10024_cov87\tAUGUSTUS\tCDS\t2852\t3029\t0.75\t+\t1\ttranscript_id \"g48293.t1\"; gene_id \"g48293\";\n", "scaffold10024_cov87\t2933\t2934\t1\t1\tN/A\t1\tscaffold10024_cov87\tAUGUSTUS\tCDS\t2852\t3029\t0.75\t+\t1\ttranscript_id \"g48293.t1\"; gene_id \"g48293\";\n", "scaffold10024_cov87\t2971\t2972\t1\t1\tN/A\t1\tscaffold10024_cov87\tAUGUSTUS\tCDS\t2852\t3029\t0.75\t+\t1\ttranscript_id \"g48293.t1\"; gene_id \"g48293\";\n", "scaffold10024_cov87\t4039\t4040\t1\t1\tN/A\t1\tscaffold10024_cov87\tAUGUSTUS\tCDS\t4020\t4161\t1\t-\t1\ttranscript_id \"g48294.t1\"; gene_id \"g48294\";\n", "scaffold10024_cov87\t4039\t4040\t1\t1\tN/A\t1\tscaffold10024_cov87\tAUGUSTUS\tCDS\t4020\t4161\t1\t-\t1\ttranscript_id \"g48294.t2\"; gene_id \"g48294\";\n", "scaffold10024_cov87\t4053\t4054\t1\t1\tN/A\t1\tscaffold10024_cov87\tAUGUSTUS\tCDS\t4020\t4161\t1\t-\t1\ttranscript_id \"g48294.t1\"; gene_id \"g48294\";\n", "\n", "==> Pact_WGBS_RRBS_MBD_only_CpGs.bed-paCDS <==\n", "scaffold10024_cov87\t4118\t4119\t1\t1\t1\t1\tscaffold10024_cov87\tAUGUSTUS\tCDS\t4020\t4161\t1\t-\t1\ttranscript_id \"g48294.t1\"; gene_id \"g48294\";\n", "scaffold10024_cov87\t4118\t4119\t1\t1\t1\t1\tscaffold10024_cov87\tAUGUSTUS\tCDS\t4020\t4161\t1\t-\t1\ttranscript_id \"g48294.t2\"; gene_id \"g48294\";\n", "scaffold10024_cov87\t4126\t4127\t1\t1\t1\t1\tscaffold10024_cov87\tAUGUSTUS\tCDS\t4020\t4161\t1\t-\t1\ttranscript_id \"g48294.t1\"; gene_id \"g48294\";\n", "scaffold10024_cov87\t4126\t4127\t1\t1\t1\t1\tscaffold10024_cov87\tAUGUSTUS\tCDS\t4020\t4161\t1\t-\t1\ttranscript_id \"g48294.t2\"; gene_id \"g48294\";\n", "scaffold10024_cov87\t4137\t4138\t1\t1\t1\t1\tscaffold10024_cov87\tAUGUSTUS\tCDS\t4020\t4161\t1\t-\t1\ttranscript_id \"g48294.t1\"; gene_id \"g48294\";\n", "scaffold10024_cov87\t4137\t4138\t1\t1\t1\t1\tscaffold10024_cov87\tAUGUSTUS\tCDS\t4020\t4161\t1\t-\t1\ttranscript_id \"g48294.t2\"; gene_id \"g48294\";\n", "scaffold10024_cov87\t4141\t4142\t1\t1\t1\t1\tscaffold10024_cov87\tAUGUSTUS\tCDS\t4020\t4161\t1\t-\t1\ttranscript_id \"g48294.t1\"; gene_id \"g48294\";\n", "scaffold10024_cov87\t4141\t4142\t1\t1\t1\t1\tscaffold10024_cov87\tAUGUSTUS\tCDS\t4020\t4161\t1\t-\t1\ttranscript_id \"g48294.t2\"; gene_id \"g48294\";\n", "scaffold10024_cov87\t6272\t6273\t1\t1\t1\t1\tscaffold10024_cov87\tAUGUSTUS\tCDS\t6055\t7793\t0.98\t+\t2\ttranscript_id \"g48296.t1\"; gene_id \"g48296\";\n", "scaffold10024_cov87\t6287\t6288\t1\t1\t1\t1\tscaffold10024_cov87\tAUGUSTUS\tCDS\t6055\t7793\t0.98\t+\t2\ttranscript_id \"g48296.t1\"; gene_id \"g48296\";\n", "\n", "==> Pact_WGBS_RRBS_only_CpGs.bed-paCDS <==\n", "scaffold10024_cov87\t4945\t4946\t1\t1\t1\tN/A\tscaffold10024_cov87\tAUGUSTUS\tCDS\t4733\t4954\t0.54\t+\t0\ttranscript_id \"g48295.t1\"; gene_id \"g48295\";\n", "scaffold10024_cov87\t5496\t5497\t1\t1\t1\tN/A\tscaffold10024_cov87\tAUGUSTUS\tCDS\t5490\t5720\t0.98\t+\t2\ttranscript_id \"g48296.t1\"; gene_id \"g48296\";\n", "scaffold10024_cov87\t5515\t5516\t1\t1\t1\tN/A\tscaffold10024_cov87\tAUGUSTUS\tCDS\t5490\t5720\t0.98\t+\t2\ttranscript_id \"g48296.t1\"; gene_id \"g48296\";\n", "scaffold10024_cov87\t5551\t5552\t1\t1\t1\tN/A\tscaffold10024_cov87\tAUGUSTUS\tCDS\t5490\t5720\t0.98\t+\t2\ttranscript_id \"g48296.t1\"; gene_id \"g48296\";\n", "scaffold10024_cov87\t5562\t5563\t1\t1\t1\tN/A\tscaffold10024_cov87\tAUGUSTUS\tCDS\t5490\t5720\t0.98\t+\t2\ttranscript_id \"g48296.t1\"; gene_id \"g48296\";\n", "scaffold10024_cov87\t6216\t6217\t1\t1\t1\tN/A\tscaffold10024_cov87\tAUGUSTUS\tCDS\t6055\t7793\t0.98\t+\t2\ttranscript_id \"g48296.t1\"; gene_id \"g48296\";\n", "scaffold10024_cov87\t6320\t6321\t1\t1\t1\tN/A\tscaffold10024_cov87\tAUGUSTUS\tCDS\t6055\t7793\t0.98\t+\t2\ttranscript_id \"g48296.t1\"; gene_id \"g48296\";\n", "scaffold10024_cov87\t8064\t8065\t1\t1\t1\tN/A\tscaffold10024_cov87\tAUGUSTUS\tCDS\t8032\t8205\t1\t+\t0\ttranscript_id \"g48296.t1\"; gene_id \"g48296\";\n", "scaffold10024_cov87\t8085\t8086\t1\t1\t1\tN/A\tscaffold10024_cov87\tAUGUSTUS\tCDS\t8032\t8205\t1\t+\t0\ttranscript_id \"g48296.t1\"; gene_id \"g48296\";\n", "scaffold10024_cov87\t9902\t9903\t1\t1\t1\tN/A\tscaffold10024_cov87\tAUGUSTUS\tCDS\t9087\t10497\t1\t+\t0\ttranscript_id \"g48296.t1\"; gene_id \"g48296\";\n", "\n", "==> Pact_WGBS_only_CpGs.bed-paCDS <==\n", "scaffold10024_cov87\t2620\t2621\t1\t1\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tCDS\t2612\t2697\t0.88\t+\t0\ttranscript_id \"g48293.t1\"; gene_id \"g48293\";\n", "scaffold10024_cov87\t2645\t2646\t1\t1\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tCDS\t2612\t2697\t0.88\t+\t0\ttranscript_id \"g48293.t1\"; gene_id \"g48293\";\n", "scaffold10024_cov87\t2657\t2658\t1\t1\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tCDS\t2612\t2697\t0.88\t+\t0\ttranscript_id \"g48293.t1\"; gene_id \"g48293\";\n", "scaffold10024_cov87\t2665\t2666\t1\t1\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tCDS\t2612\t2697\t0.88\t+\t0\ttranscript_id \"g48293.t1\"; gene_id \"g48293\";\n", "scaffold10024_cov87\t2680\t2681\t1\t1\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tCDS\t2612\t2697\t0.88\t+\t0\ttranscript_id \"g48293.t1\"; gene_id \"g48293\";\n", "scaffold10024_cov87\t2685\t2686\t1\t1\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tCDS\t2612\t2697\t0.88\t+\t0\ttranscript_id \"g48293.t1\"; gene_id \"g48293\";\n", "scaffold10024_cov87\t2687\t2688\t1\t1\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tCDS\t2612\t2697\t0.88\t+\t0\ttranscript_id \"g48293.t1\"; gene_id \"g48293\";\n", "scaffold10024_cov87\t2693\t2694\t1\t1\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tCDS\t2612\t2697\t0.88\t+\t0\ttranscript_id \"g48293.t1\"; gene_id \"g48293\";\n", "scaffold10024_cov87\t3012\t3013\t1\t1\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tCDS\t2852\t3029\t0.75\t+\t1\ttranscript_id \"g48293.t1\"; gene_id \"g48293\";\n", "scaffold10024_cov87\t4230\t4231\t1\t1\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tCDS\t4213\t4264\t1\t-\t2\ttranscript_id \"g48294.t1\"; gene_id \"g48294\";\n", "\n", "==> Pact_no_method_CpGs.bed-paCDS <==\n", "scaffold10024_cov87\t4253\t4254\t1\tN/A\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tCDS\t4213\t4264\t1\t-\t2\ttranscript_id \"g48294.t1\"; gene_id \"g48294\";\n", "scaffold10024_cov87\t4253\t4254\t1\tN/A\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tCDS\t4213\t4264\t1\t-\t2\ttranscript_id \"g48294.t2\"; gene_id \"g48294\";\n", "scaffold10024_cov87\t4497\t4498\t1\tN/A\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tCDS\t4480\t4675\t0.51\t-\t0\ttranscript_id \"g48294.t1\"; gene_id \"g48294\";\n", "scaffold10024_cov87\t4503\t4504\t1\tN/A\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tCDS\t4480\t4675\t0.51\t-\t0\ttranscript_id \"g48294.t1\"; gene_id \"g48294\";\n", "scaffold10024_cov87\t4512\t4513\t1\tN/A\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tCDS\t4480\t4675\t0.51\t-\t0\ttranscript_id \"g48294.t1\"; gene_id \"g48294\";\n", "scaffold10024_cov87\t4737\t4738\t1\tN/A\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tCDS\t4733\t4954\t0.54\t+\t0\ttranscript_id \"g48295.t1\"; gene_id \"g48295\";\n", "scaffold10024_cov87\t4740\t4741\t1\tN/A\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tCDS\t4733\t4954\t0.54\t+\t0\ttranscript_id \"g48295.t1\"; gene_id \"g48295\";\n", "scaffold10024_cov87\t4746\t4747\t1\tN/A\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tCDS\t4733\t4954\t0.54\t+\t0\ttranscript_id \"g48295.t1\"; gene_id \"g48295\";\n", "scaffold10024_cov87\t4867\t4868\t1\tN/A\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tCDS\t4733\t4954\t0.54\t+\t0\ttranscript_id \"g48295.t1\"; gene_id \"g48295\";\n", "scaffold10024_cov87\t6815\t6816\t1\tN/A\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tCDS\t6055\t7793\t0.98\t+\t2\ttranscript_id \"g48296.t1\"; gene_id \"g48296\";\n" ] } ], "source": [ "#Check output\n", "!head *CpGs.bed*paCDS" ] }, { "cell_type": "code", "execution_count": 68, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 10124 Pact_MBD_only_CpGs.bed-paCDS\n", " 1756 Pact_RRBS_MBD_only_CpGs.bed-paCDS\n", " 15147 Pact_RRBS_only_CpGs.bed-paCDS\n", " 752101 Pact_WGBS_MBD_only_CpGs.bed-paCDS\n", " 440549 Pact_WGBS_RRBS_MBD_only_CpGs.bed-paCDS\n", " 140544 Pact_WGBS_RRBS_only_CpGs.bed-paCDS\n", " 378631 Pact_WGBS_only_CpGs.bed-paCDS\n", " 138732 Pact_no_method_CpGs.bed-paCDS\n", " 1877584 total\n" ] } ], "source": [ "#Count number of overlaps\n", "!wc -l *CpGs.bed*paCDS" ] }, { "cell_type": "code", "execution_count": 69, "metadata": { "collapsed": true }, "outputs": [], "source": [ "!wc -l *CpGs.bed*paCDS > Pact-upset-data-paCDS-counts.txt" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### Method-associated DMC" ] }, { "cell_type": "code", "execution_count": 88, "metadata": { "collapsed": true, "scrolled": true }, "outputs": [], "source": [ "%%bash\n", "\n", "for f in ../../Pact_methDiff/*bed\n", "do\n", " /usr/local/bin/intersectBed \\\n", " -wb \\\n", " -a ${f} \\\n", " -b ../../../genome-feature-files/Pact.GFFannotation.CDS.gff \\\n", " > ${f}-paCDS\n", "done" ] }, { "cell_type": "code", "execution_count": 89, "metadata": { "collapsed": false, "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "==> ../../Pact_methDiff/Pact_RvM_50pMethDiff.bed-paCDS <==\r\n", "scaffold10029_cov108\t1135\t1137\t5.48827350655292e-14\tscaffold10029_cov108\tAUGUSTUS\tCDS\t1059\t1240\t0.98\t+\t2\ttranscript_id \"g48305.t1\"; gene_id \"g48305\";\r\n", "scaffold10029_cov108\t1163\t1165\t3.72644245668257e-34\tscaffold10029_cov108\tAUGUSTUS\tCDS\t1059\t1240\t0.98\t+\t2\ttranscript_id \"g48305.t1\"; gene_id \"g48305\";\r\n", "scaffold10029_cov108\t1206\t1208\t2.69756927245379e-73\tscaffold10029_cov108\tAUGUSTUS\tCDS\t1059\t1240\t0.98\t+\t2\ttranscript_id \"g48305.t1\"; gene_id \"g48305\";\r\n", "scaffold10029_cov108\t1221\t1223\t7.25920609606858e-58\tscaffold10029_cov108\tAUGUSTUS\tCDS\t1059\t1240\t0.98\t+\t2\ttranscript_id \"g48305.t1\"; gene_id \"g48305\";\r\n", "scaffold10029_cov108\t1235\t1237\t1.2272600294399e-58\tscaffold10029_cov108\tAUGUSTUS\tCDS\t1059\t1240\t0.98\t+\t2\ttranscript_id \"g48305.t1\"; gene_id \"g48305\";\r\n", "scaffold10029_cov108\t1354\t1356\t3.49851955628437e-91\tscaffold10029_cov108\tAUGUSTUS\tCDS\t1345\t1788\t0.88\t-\t0\ttranscript_id \"g48306.t1\"; gene_id \"g48306\";\r\n", "scaffold10029_cov108\t1362\t1364\t9.27286668964075e-87\tscaffold10029_cov108\tAUGUSTUS\tCDS\t1345\t1788\t0.88\t-\t0\ttranscript_id \"g48306.t1\"; gene_id \"g48306\";\r\n", "scaffold10029_cov108\t1378\t1380\t4.00289842305736e-204\tscaffold10029_cov108\tAUGUSTUS\tCDS\t1345\t1788\t0.88\t-\t0\ttranscript_id \"g48306.t1\"; gene_id \"g48306\";\r\n", "scaffold10029_cov108\t1391\t1393\t9.75842830141021e-252\tscaffold10029_cov108\tAUGUSTUS\tCDS\t1345\t1788\t0.88\t-\t0\ttranscript_id \"g48306.t1\"; gene_id \"g48306\";\r\n", "scaffold10029_cov108\t1397\t1399\t6.14415256835285e-270\tscaffold10029_cov108\tAUGUSTUS\tCDS\t1345\t1788\t0.88\t-\t0\ttranscript_id \"g48306.t1\"; gene_id \"g48306\";\r\n", "\r\n", "==> ../../Pact_methDiff/Pact_WvM_50pMethDiff.bed-paCDS <==\r\n", "scaffold10029_cov108\t1142\t1144\t6.24217208821468e-42\tscaffold10029_cov108\tAUGUSTUS\tCDS\t1059\t1240\t0.98\t+\t2\ttranscript_id \"g48305.t1\"; gene_id \"g48305\";\r\n", "scaffold10029_cov108\t1163\t1165\t4.0301837234847e-104\tscaffold10029_cov108\tAUGUSTUS\tCDS\t1059\t1240\t0.98\t+\t2\ttranscript_id \"g48305.t1\"; gene_id \"g48305\";\r\n", "scaffold10029_cov108\t1206\t1208\t9.64177909968006e-188\tscaffold10029_cov108\tAUGUSTUS\tCDS\t1059\t1240\t0.98\t+\t2\ttranscript_id \"g48305.t1\"; gene_id \"g48305\";\r\n", "scaffold10029_cov108\t1221\t1223\t2.01570629834058e-164\tscaffold10029_cov108\tAUGUSTUS\tCDS\t1059\t1240\t0.98\t+\t2\ttranscript_id \"g48305.t1\"; gene_id \"g48305\";\r\n", "scaffold10029_cov108\t1235\t1237\t1.01322542811731e-157\tscaffold10029_cov108\tAUGUSTUS\tCDS\t1059\t1240\t0.98\t+\t2\ttranscript_id \"g48305.t1\"; gene_id \"g48305\";\r\n", "scaffold10029_cov108\t1354\t1356\t1.98968530126048e-185\tscaffold10029_cov108\tAUGUSTUS\tCDS\t1345\t1788\t0.88\t-\t0\ttranscript_id \"g48306.t1\"; gene_id \"g48306\";\r\n", "scaffold10029_cov108\t1362\t1364\t8.85780565471889e-169\tscaffold10029_cov108\tAUGUSTUS\tCDS\t1345\t1788\t0.88\t-\t0\ttranscript_id \"g48306.t1\"; gene_id \"g48306\";\r\n", "scaffold10029_cov108\t1378\t1380\t3.23056795993557e-131\tscaffold10029_cov108\tAUGUSTUS\tCDS\t1345\t1788\t0.88\t-\t0\ttranscript_id \"g48306.t1\"; gene_id \"g48306\";\r\n", "scaffold10029_cov108\t1391\t1393\t2.27323504677434e-126\tscaffold10029_cov108\tAUGUSTUS\tCDS\t1345\t1788\t0.88\t-\t0\ttranscript_id \"g48306.t1\"; gene_id \"g48306\";\r\n", "scaffold10029_cov108\t1397\t1399\t1.26748707592785e-128\tscaffold10029_cov108\tAUGUSTUS\tCDS\t1345\t1788\t0.88\t-\t0\ttranscript_id \"g48306.t1\"; gene_id \"g48306\";\r\n" ] } ], "source": [ "#Check output\n", "!head ../../Pact_methDiff/*paCDS" ] }, { "cell_type": "code", "execution_count": 90, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 2328 ../../Pact_methDiff/Pact_RvM_50pMethDiff.bed-paCDS\r\n", " 4676 ../../Pact_methDiff/Pact_WvM_50pMethDiff.bed-paCDS\r\n", " 7004 total\r\n" ] } ], "source": [ "#Count number of overlaps\n", "!wc -l ../../Pact_methDiff/*paCDS" ] }, { "cell_type": "code", "execution_count": 91, "metadata": { "collapsed": true }, "outputs": [], "source": [ "!wc -l ../../Pact_methDiff/*paCDS > Pact-DMC-data-paCDS-counts.txt" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### 3c. Introns" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### Upset data" ] }, { "cell_type": "code", "execution_count": 70, "metadata": { "collapsed": true, "scrolled": true }, "outputs": [], "source": [ "%%bash\n", "\n", "for f in *CpGs.bed\n", "do\n", " /usr/local/bin/intersectBed \\\n", " -wb \\\n", " -a ${f} \\\n", " -b ../../../genome-feature-files/Pact.GFFannotation.Intron.gff \\\n", " > ${f}-paIntron\n", "done" ] }, { "cell_type": "code", "execution_count": 71, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "==> Pact_MBD_only_CpGs.bed-paIntron <==\n", "scaffold10024_cov87\t5947\t5948\t1\tN/A\tN/A\t1\tscaffold10024_cov87\tAUGUSTUS\tintron\t5721\t6054\t0.98\t+\t.\ttranscript_id \"g48296.t1\"; gene_id \"g48296\";\n", "scaffold10024_cov87\t29844\t29845\t1\tN/A\tN/A\t1\tscaffold10024_cov87\tAUGUSTUS\tintron\t29073\t29968\t0.81\t-\t.\ttranscript_id \"g48298.t1\"; gene_id \"g48298\";\n", "scaffold10065_cov90\t19126\t19127\t1\tN/A\tN/A\t1\tscaffold10065_cov90\tAUGUSTUS\tintron\t19092\t19197\t1\t-\t.\ttranscript_id \"g48336.t1\"; gene_id \"g48336\";\n", "scaffold10065_cov90\t19126\t19127\t1\tN/A\tN/A\t1\tscaffold10065_cov90\tAUGUSTUS\tintron\t19092\t19197\t1\t-\t.\ttranscript_id \"g48337.t1\"; gene_id \"g48337\";\n", "scaffold10065_cov90\t19133\t19134\t1\tN/A\tN/A\t1\tscaffold10065_cov90\tAUGUSTUS\tintron\t19092\t19197\t1\t-\t.\ttranscript_id \"g48336.t1\"; gene_id \"g48336\";\n", "scaffold10065_cov90\t19133\t19134\t1\tN/A\tN/A\t1\tscaffold10065_cov90\tAUGUSTUS\tintron\t19092\t19197\t1\t-\t.\ttranscript_id \"g48337.t1\"; gene_id \"g48337\";\n", "scaffold10101_cov102\t962\t963\t1\tN/A\tN/A\t1\tscaffold10101_cov102\tAUGUSTUS\tintron\t1\t2817\t0.28\t+\t.\ttranscript_id \"g48345.t1\"; gene_id \"g48345\";\n", "scaffold10101_cov102\t989\t990\t1\tN/A\tN/A\t1\tscaffold10101_cov102\tAUGUSTUS\tintron\t1\t2817\t0.28\t+\t.\ttranscript_id \"g48345.t1\"; gene_id \"g48345\";\n", "scaffold10101_cov102\t55323\t55324\t1\tN/A\tN/A\t1\tscaffold10101_cov102\tAUGUSTUS\tintron\t53949\t55457\t0.92\t-\t.\ttranscript_id \"g48356.t1\"; gene_id \"g48356\";\n", "scaffold101358_cov64\t3919\t3920\t1\tN/A\tN/A\t1\tscaffold101358_cov64\tAUGUSTUS\tintron\t3428\t5125\t1\t-\t.\ttranscript_id \"g59567.t1\"; gene_id \"g59567\";\n", "\n", "==> Pact_RRBS_MBD_only_CpGs.bed-paIntron <==\n", "scaffold10092_cov94\t5228\t5229\t1\tN/A\t1\t1\tscaffold10092_cov94\tAUGUSTUS\tintron\t3896\t5337\t0.59\t-\t.\ttranscript_id \"g48344.t1\"; gene_id \"g48344\";\n", "scaffold10092_cov94\t5239\t5240\t1\tN/A\t1\t1\tscaffold10092_cov94\tAUGUSTUS\tintron\t3896\t5337\t0.59\t-\t.\ttranscript_id \"g48344.t1\"; gene_id \"g48344\";\n", "scaffold10092_cov94\t5291\t5292\t1\tN/A\t1\t1\tscaffold10092_cov94\tAUGUSTUS\tintron\t3896\t5337\t0.59\t-\t.\ttranscript_id \"g48344.t1\"; gene_id \"g48344\";\n", "scaffold10101_cov102\t78403\t78404\t1\tN/A\t1\t1\tscaffold10101_cov102\tAUGUSTUS\tintron\t78333\t79652\t1\t+\t.\ttranscript_id \"g48365.t1\"; gene_id \"g48365\";\n", "scaffold10101_cov102\t78403\t78404\t1\tN/A\t1\t1\tscaffold10101_cov102\tAUGUSTUS\tintron\t78333\t79652\t1\t+\t.\ttranscript_id \"g48365.t2\"; gene_id \"g48365\";\n", "scaffold10156_cov58\t1626\t1627\t1\tN/A\t1\t1\tscaffold10156_cov58\tAUGUSTUS\tintron\t1058\t2979\t0.65\t+\t.\ttranscript_id \"g48398.t1\"; gene_id \"g48398\";\n", "scaffold10156_cov58\t1626\t1627\t1\tN/A\t1\t1\tscaffold10156_cov58\tAUGUSTUS\tintron\t1058\t2979\t0.65\t+\t.\ttranscript_id \"g48398.t2\"; gene_id \"g48398\";\n", "scaffold10156_cov58\t1630\t1631\t1\tN/A\t1\t1\tscaffold10156_cov58\tAUGUSTUS\tintron\t1058\t2979\t0.65\t+\t.\ttranscript_id \"g48398.t1\"; gene_id \"g48398\";\n", "scaffold10156_cov58\t1630\t1631\t1\tN/A\t1\t1\tscaffold10156_cov58\tAUGUSTUS\tintron\t1058\t2979\t0.65\t+\t.\ttranscript_id \"g48398.t2\"; gene_id \"g48398\";\n", "scaffold1015_cov73\t32051\t32052\t1\tN/A\t1\t1\tscaffold1015_cov73\tAUGUSTUS\tintron\t32040\t32532\t0.73\t+\t.\ttranscript_id \"g12891.t1\"; gene_id \"g12891\";\n", "\n", "==> Pact_RRBS_only_CpGs.bed-paIntron <==\n", "scaffold10024_cov87\t5292\t5293\t1\tN/A\t1\tN/A\tscaffold10024_cov87\tAUGUSTUS\tintron\t5081\t5489\t0.68\t+\t.\ttranscript_id \"g48296.t1\"; gene_id \"g48296\";\n", "scaffold10024_cov87\t16321\t16322\t1\tN/A\t1\tN/A\tscaffold10024_cov87\tAUGUSTUS\tintron\t16078\t16457\t1\t-\t.\ttranscript_id \"g48297.t1\"; gene_id \"g48297\";\n", "scaffold10024_cov87\t16321\t16322\t1\tN/A\t1\tN/A\tscaffold10024_cov87\tAUGUSTUS\tintron\t16078\t16457\t1\t-\t.\ttranscript_id \"g48297.t2\"; gene_id \"g48297\";\n", "scaffold10024_cov87\t16376\t16377\t1\tN/A\t1\tN/A\tscaffold10024_cov87\tAUGUSTUS\tintron\t16078\t16457\t1\t-\t.\ttranscript_id \"g48297.t1\"; gene_id \"g48297\";\n", "scaffold10024_cov87\t16376\t16377\t1\tN/A\t1\tN/A\tscaffold10024_cov87\tAUGUSTUS\tintron\t16078\t16457\t1\t-\t.\ttranscript_id \"g48297.t2\"; gene_id \"g48297\";\n", "scaffold10024_cov87\t27754\t27755\t1\tN/A\t1\tN/A\tscaffold10024_cov87\tAUGUSTUS\tintron\t26644\t28574\t1\t-\t.\ttranscript_id \"g48298.t1\"; gene_id \"g48298\";\n", "scaffold10024_cov87\t27773\t27774\t1\tN/A\t1\tN/A\tscaffold10024_cov87\tAUGUSTUS\tintron\t26644\t28574\t1\t-\t.\ttranscript_id \"g48298.t1\"; gene_id \"g48298\";\n", "scaffold10024_cov87\t27898\t27899\t1\tN/A\t1\tN/A\tscaffold10024_cov87\tAUGUSTUS\tintron\t26644\t28574\t1\t-\t.\ttranscript_id \"g48298.t1\"; gene_id \"g48298\";\n", "scaffold10024_cov87\t27905\t27906\t1\tN/A\t1\tN/A\tscaffold10024_cov87\tAUGUSTUS\tintron\t26644\t28574\t1\t-\t.\ttranscript_id \"g48298.t1\"; gene_id \"g48298\";\n", "scaffold10024_cov87\t27930\t27931\t1\tN/A\t1\tN/A\tscaffold10024_cov87\tAUGUSTUS\tintron\t26644\t28574\t1\t-\t.\ttranscript_id \"g48298.t1\"; gene_id \"g48298\";\n", "\n", "==> Pact_WGBS_MBD_only_CpGs.bed-paIntron <==\n", "scaffold10024_cov87\t2800\t2801\t1\t1\tN/A\t1\tscaffold10024_cov87\tAUGUSTUS\tintron\t2698\t2851\t0.88\t+\t.\ttranscript_id \"g48293.t1\"; gene_id \"g48293\";\n", "scaffold10024_cov87\t2805\t2806\t1\t1\tN/A\t1\tscaffold10024_cov87\tAUGUSTUS\tintron\t2698\t2851\t0.88\t+\t.\ttranscript_id \"g48293.t1\"; gene_id \"g48293\";\n", "scaffold10024_cov87\t2813\t2814\t1\t1\tN/A\t1\tscaffold10024_cov87\tAUGUSTUS\tintron\t2698\t2851\t0.88\t+\t.\ttranscript_id \"g48293.t1\"; gene_id \"g48293\";\n", "scaffold10024_cov87\t2825\t2826\t1\t1\tN/A\t1\tscaffold10024_cov87\tAUGUSTUS\tintron\t2698\t2851\t0.88\t+\t.\ttranscript_id \"g48293.t1\"; gene_id \"g48293\";\n", "scaffold10024_cov87\t2837\t2838\t1\t1\tN/A\t1\tscaffold10024_cov87\tAUGUSTUS\tintron\t2698\t2851\t0.88\t+\t.\ttranscript_id \"g48293.t1\"; gene_id \"g48293\";\n", "scaffold10024_cov87\t2845\t2846\t1\t1\tN/A\t1\tscaffold10024_cov87\tAUGUSTUS\tintron\t2698\t2851\t0.88\t+\t.\ttranscript_id \"g48293.t1\"; gene_id \"g48293\";\n", "scaffold10024_cov87\t5935\t5936\t1\t1\tN/A\t1\tscaffold10024_cov87\tAUGUSTUS\tintron\t5721\t6054\t0.98\t+\t.\ttranscript_id \"g48296.t1\"; gene_id \"g48296\";\n", "scaffold10024_cov87\t6001\t6002\t1\t1\tN/A\t1\tscaffold10024_cov87\tAUGUSTUS\tintron\t5721\t6054\t0.98\t+\t.\ttranscript_id \"g48296.t1\"; gene_id \"g48296\";\n", "scaffold10024_cov87\t9041\t9042\t1\t1\tN/A\t1\tscaffold10024_cov87\tAUGUSTUS\tintron\t8206\t9086\t1\t+\t.\ttranscript_id \"g48296.t1\"; gene_id \"g48296\";\n", "scaffold10024_cov87\t9045\t9046\t1\t1\tN/A\t1\tscaffold10024_cov87\tAUGUSTUS\tintron\t8206\t9086\t1\t+\t.\ttranscript_id \"g48296.t1\"; gene_id \"g48296\";\n", "\n", "==> Pact_WGBS_RRBS_MBD_only_CpGs.bed-paIntron <==\n", "scaffold10024_cov87\t4167\t4168\t1\t1\t1\t1\tscaffold10024_cov87\tAUGUSTUS\tintron\t4162\t4212\t1\t-\t.\ttranscript_id \"g48294.t1\"; gene_id \"g48294\";\n", "scaffold10024_cov87\t4167\t4168\t1\t1\t1\t1\tscaffold10024_cov87\tAUGUSTUS\tintron\t4162\t4212\t1\t-\t.\ttranscript_id \"g48294.t2\"; gene_id \"g48294\";\n", "scaffold10024_cov87\t4181\t4182\t1\t1\t1\t1\tscaffold10024_cov87\tAUGUSTUS\tintron\t4162\t4212\t1\t-\t.\ttranscript_id \"g48294.t1\"; gene_id \"g48294\";\n", "scaffold10024_cov87\t4181\t4182\t1\t1\t1\t1\tscaffold10024_cov87\tAUGUSTUS\tintron\t4162\t4212\t1\t-\t.\ttranscript_id \"g48294.t2\"; gene_id \"g48294\";\n", "scaffold10024_cov87\t8226\t8227\t1\t1\t1\t1\tscaffold10024_cov87\tAUGUSTUS\tintron\t8206\t9086\t1\t+\t.\ttranscript_id \"g48296.t1\"; gene_id \"g48296\";\n", "scaffold10024_cov87\t15143\t15144\t1\t1\t1\t1\tscaffold10024_cov87\tAUGUSTUS\tintron\t14785\t15880\t0.64\t-\t.\ttranscript_id \"g48297.t1\"; gene_id \"g48297\";\n", "scaffold10024_cov87\t15143\t15144\t1\t1\t1\t1\tscaffold10024_cov87\tAUGUSTUS\tintron\t14785\t15880\t0.64\t-\t.\ttranscript_id \"g48297.t2\"; gene_id \"g48297\";\n", "scaffold10024_cov87\t15149\t15150\t1\t1\t1\t1\tscaffold10024_cov87\tAUGUSTUS\tintron\t14785\t15880\t0.64\t-\t.\ttranscript_id \"g48297.t1\"; gene_id \"g48297\";\n", "scaffold10024_cov87\t15149\t15150\t1\t1\t1\t1\tscaffold10024_cov87\tAUGUSTUS\tintron\t14785\t15880\t0.64\t-\t.\ttranscript_id \"g48297.t2\"; gene_id \"g48297\";\n", "scaffold10024_cov87\t15183\t15184\t1\t1\t1\t1\tscaffold10024_cov87\tAUGUSTUS\tintron\t14785\t15880\t0.64\t-\t.\ttranscript_id \"g48297.t1\"; gene_id \"g48297\";\n", "\n", "==> Pact_WGBS_RRBS_only_CpGs.bed-paIntron <==\n", "scaffold10024_cov87\t5084\t5085\t1\t1\t1\tN/A\tscaffold10024_cov87\tAUGUSTUS\tintron\t5081\t5489\t0.68\t+\t.\ttranscript_id \"g48296.t1\"; gene_id \"g48296\";\n", "scaffold10024_cov87\t5129\t5130\t1\t1\t1\tN/A\tscaffold10024_cov87\tAUGUSTUS\tintron\t5081\t5489\t0.68\t+\t.\ttranscript_id \"g48296.t1\"; gene_id \"g48296\";\n", "scaffold10024_cov87\t5136\t5137\t1\t1\t1\tN/A\tscaffold10024_cov87\tAUGUSTUS\tintron\t5081\t5489\t0.68\t+\t.\ttranscript_id \"g48296.t1\"; gene_id \"g48296\";\n", "scaffold10024_cov87\t5150\t5151\t1\t1\t1\tN/A\tscaffold10024_cov87\tAUGUSTUS\tintron\t5081\t5489\t0.68\t+\t.\ttranscript_id \"g48296.t1\"; gene_id \"g48296\";\n", "scaffold10024_cov87\t5174\t5175\t1\t1\t1\tN/A\tscaffold10024_cov87\tAUGUSTUS\tintron\t5081\t5489\t0.68\t+\t.\ttranscript_id \"g48296.t1\"; gene_id \"g48296\";\n", "scaffold10024_cov87\t5177\t5178\t1\t1\t1\tN/A\tscaffold10024_cov87\tAUGUSTUS\tintron\t5081\t5489\t0.68\t+\t.\ttranscript_id \"g48296.t1\"; gene_id \"g48296\";\n", "scaffold10024_cov87\t5247\t5248\t1\t1\t1\tN/A\tscaffold10024_cov87\tAUGUSTUS\tintron\t5081\t5489\t0.68\t+\t.\ttranscript_id \"g48296.t1\"; gene_id \"g48296\";\n", "scaffold10024_cov87\t8693\t8694\t1\t1\t1\tN/A\tscaffold10024_cov87\tAUGUSTUS\tintron\t8206\t9086\t1\t+\t.\ttranscript_id \"g48296.t1\"; gene_id \"g48296\";\n", "scaffold10024_cov87\t8755\t8756\t1\t1\t1\tN/A\tscaffold10024_cov87\tAUGUSTUS\tintron\t8206\t9086\t1\t+\t.\ttranscript_id \"g48296.t1\"; gene_id \"g48296\";\n", "scaffold10024_cov87\t15365\t15366\t1\t1\t1\tN/A\tscaffold10024_cov87\tAUGUSTUS\tintron\t14785\t15880\t0.64\t-\t.\ttranscript_id \"g48297.t1\"; gene_id \"g48297\";\n", "\n", "==> Pact_WGBS_only_CpGs.bed-paIntron <==\n", "scaffold10024_cov87\t2705\t2706\t1\t1\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tintron\t2698\t2851\t0.88\t+\t.\ttranscript_id \"g48293.t1\"; gene_id \"g48293\";\n", "scaffold10024_cov87\t2713\t2714\t1\t1\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tintron\t2698\t2851\t0.88\t+\t.\ttranscript_id \"g48293.t1\"; gene_id \"g48293\";\n", "scaffold10024_cov87\t2740\t2741\t1\t1\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tintron\t2698\t2851\t0.88\t+\t.\ttranscript_id \"g48293.t1\"; gene_id \"g48293\";\n", "scaffold10024_cov87\t2745\t2746\t1\t1\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tintron\t2698\t2851\t0.88\t+\t.\ttranscript_id \"g48293.t1\"; gene_id \"g48293\";\n", "scaffold10024_cov87\t2765\t2766\t1\t1\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tintron\t2698\t2851\t0.88\t+\t.\ttranscript_id \"g48293.t1\"; gene_id \"g48293\";\n", "scaffold10024_cov87\t2777\t2778\t1\t1\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tintron\t2698\t2851\t0.88\t+\t.\ttranscript_id \"g48293.t1\"; gene_id \"g48293\";\n", "scaffold10024_cov87\t2786\t2787\t1\t1\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tintron\t2698\t2851\t0.88\t+\t.\ttranscript_id \"g48293.t1\"; gene_id \"g48293\";\n", "scaffold10024_cov87\t4458\t4459\t1\t1\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tintron\t4265\t4479\t0.51\t-\t.\ttranscript_id \"g48294.t1\"; gene_id \"g48294\";\n", "scaffold10024_cov87\t4458\t4459\t1\t1\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tintron\t4265\t4521\t0.38\t-\t.\ttranscript_id \"g48294.t2\"; gene_id \"g48294\";\n", "scaffold10024_cov87\t4472\t4473\t1\t1\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tintron\t4265\t4479\t0.51\t-\t.\ttranscript_id \"g48294.t1\"; gene_id \"g48294\";\n", "\n", "==> Pact_no_method_CpGs.bed-paIntron <==\n", "scaffold10024_cov87\t4292\t4293\t1\tN/A\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tintron\t4265\t4479\t0.51\t-\t.\ttranscript_id \"g48294.t1\"; gene_id \"g48294\";\n", "scaffold10024_cov87\t4292\t4293\t1\tN/A\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tintron\t4265\t4521\t0.38\t-\t.\ttranscript_id \"g48294.t2\"; gene_id \"g48294\";\n", "scaffold10024_cov87\t4300\t4301\t1\tN/A\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tintron\t4265\t4479\t0.51\t-\t.\ttranscript_id \"g48294.t1\"; gene_id \"g48294\";\n", "scaffold10024_cov87\t4300\t4301\t1\tN/A\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tintron\t4265\t4521\t0.38\t-\t.\ttranscript_id \"g48294.t2\"; gene_id \"g48294\";\n", "scaffold10024_cov87\t4349\t4350\t1\tN/A\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tintron\t4265\t4479\t0.51\t-\t.\ttranscript_id \"g48294.t1\"; gene_id \"g48294\";\n", "scaffold10024_cov87\t4349\t4350\t1\tN/A\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tintron\t4265\t4521\t0.38\t-\t.\ttranscript_id \"g48294.t2\"; gene_id \"g48294\";\n", "scaffold10024_cov87\t4378\t4379\t1\tN/A\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tintron\t4265\t4479\t0.51\t-\t.\ttranscript_id \"g48294.t1\"; gene_id \"g48294\";\n", "scaffold10024_cov87\t4378\t4379\t1\tN/A\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tintron\t4265\t4521\t0.38\t-\t.\ttranscript_id \"g48294.t2\"; gene_id \"g48294\";\n", "scaffold10024_cov87\t4407\t4408\t1\tN/A\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tintron\t4265\t4479\t0.51\t-\t.\ttranscript_id \"g48294.t1\"; gene_id \"g48294\";\n", "scaffold10024_cov87\t4407\t4408\t1\tN/A\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tintron\t4265\t4521\t0.38\t-\t.\ttranscript_id \"g48294.t2\"; gene_id \"g48294\";\n" ] } ], "source": [ "#Check output\n", "!head *CpGs.bed*paIntron" ] }, { "cell_type": "code", "execution_count": 72, "metadata": { "collapsed": false, "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 17048 Pact_MBD_only_CpGs.bed-paIntron\n", " 2222 Pact_RRBS_MBD_only_CpGs.bed-paIntron\n", " 33512 Pact_RRBS_only_CpGs.bed-paIntron\n", " 764481 Pact_WGBS_MBD_only_CpGs.bed-paIntron\n", " 338445 Pact_WGBS_RRBS_MBD_only_CpGs.bed-paIntron\n", " 295081 Pact_WGBS_RRBS_only_CpGs.bed-paIntron\n", " 949785 Pact_WGBS_only_CpGs.bed-paIntron\n", " 340695 Pact_no_method_CpGs.bed-paIntron\n", " 2741269 total\n" ] } ], "source": [ "#Count number of overlaps\n", "!wc -l *CpGs.bed*paIntron" ] }, { "cell_type": "code", "execution_count": 73, "metadata": { "collapsed": true }, "outputs": [], "source": [ "!wc -l *CpGs.bed*paIntron > Pact-upset-data-paIntron-counts.txt" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### Method-associated DMC" ] }, { "cell_type": "code", "execution_count": 92, "metadata": { "collapsed": true, "scrolled": true }, "outputs": [], "source": [ "%%bash\n", "\n", "for f in ../../Pact_methDiff/*bed\n", "do\n", " /usr/local/bin/intersectBed \\\n", " -wb \\\n", " -a ${f} \\\n", " -b ../../../genome-feature-files/Pact.GFFannotation.Intron.gff \\\n", " > ${f}-paIntron\n", "done" ] }, { "cell_type": "code", "execution_count": 93, "metadata": { "collapsed": false, "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "==> ../../Pact_methDiff/Pact_RvM_50pMethDiff.bed-paIntron <==\r\n", "scaffold10101_cov102\t153755\t153756\t7.54261136700762e-59\tscaffold10101_cov102\tAUGUSTUS\tintron\t153756\t153804\t1\t+\t.\ttranscript_id \"g48380.t1\"; gene_id \"g48380\";\r\n", "scaffold10101_cov102\t153755\t153756\t7.54261136700762e-59\tscaffold10101_cov102\tAUGUSTUS\tintron\t153756\t153804\t1\t+\t.\ttranscript_id \"g48380.t2\"; gene_id \"g48380\";\r\n", "scaffold101_cov104\t84407\t84409\t1.91595928017216e-18\tscaffold101_cov104\tAUGUSTUS\tintron\t84316\t84874\t1\t+\t.\ttranscript_id \"g1305.t1\"; gene_id \"g1305\";\r\n", "scaffold101_cov104\t84407\t84409\t1.91595928017216e-18\tscaffold101_cov104\tAUGUSTUS\tintron\t84316\t84874\t1\t+\t.\ttranscript_id \"g1305.t2\"; gene_id \"g1305\";\r\n", "scaffold10300_cov112\t346764\t346766\t1.01888040359818e-116\tscaffold10300_cov112\tAUGUSTUS\tintron\t346740\t346865\t0.41\t-\t.\ttranscript_id \"g48581.t2\"; gene_id \"g48581\";\r\n", "scaffold1035_cov113\t72632\t72634\t4.13747823225651e-16\tscaffold1035_cov113\tAUGUSTUS\tintron\t72622\t72665\t1\t-\t.\ttranscript_id \"g13050.t2\"; gene_id \"g13050\";\r\n", "scaffold1035_cov113\t72632\t72634\t4.13747823225651e-16\tscaffold1035_cov113\tAUGUSTUS\tintron\t72622\t72665\t1\t-\t.\ttranscript_id \"g13050.t3\"; gene_id \"g13050\";\r\n", "scaffold1089_cov101\t44439\t44441\t9.15850378654935e-82\tscaffold1089_cov101\tAUGUSTUS\tintron\t44402\t44564\t0.99\t-\t.\ttranscript_id \"g13864.t1\"; gene_id \"g13864\";\r\n", "scaffold1089_cov101\t44445\t44447\t3.29578562868739e-98\tscaffold1089_cov101\tAUGUSTUS\tintron\t44402\t44564\t0.99\t-\t.\ttranscript_id \"g13864.t1\"; gene_id \"g13864\";\r\n", "scaffold1089_cov101\t44450\t44452\t3.91445234363053e-93\tscaffold1089_cov101\tAUGUSTUS\tintron\t44402\t44564\t0.99\t-\t.\ttranscript_id \"g13864.t1\"; gene_id \"g13864\";\r\n", "\r\n", "==> ../../Pact_methDiff/Pact_WvM_50pMethDiff.bed-paIntron <==\r\n", "scaffold101_cov104\t94807\t94809\t8.82108976864582e-10\tscaffold101_cov104\tAUGUSTUS\tintron\t94714\t94946\t0.6\t-\t.\ttranscript_id \"g1306.t1\"; gene_id \"g1306\";\r\n", "scaffold10300_cov112\t346742\t346744\t5.92771344066576e-18\tscaffold10300_cov112\tAUGUSTUS\tintron\t346740\t346865\t0.41\t-\t.\ttranscript_id \"g48581.t2\"; gene_id \"g48581\";\r\n", "scaffold10300_cov112\t350267\t350269\t8.09732423433028e-07\tscaffold10300_cov112\tAUGUSTUS\tintron\t350068\t350669\t0.49\t+\t.\ttranscript_id \"g48583.t1\"; gene_id \"g48583\";\r\n", "scaffold1034_cov100\t44642\t44644\t4.1398459924651e-13\tscaffold1034_cov100\tAUGUSTUS\tintron\t44009\t47866\t0.42\t-\t.\ttranscript_id \"g13035.t1\"; gene_id \"g13035\";\r\n", "scaffold1034_cov100\t44642\t44644\t4.1398459924651e-13\tscaffold1034_cov100\tAUGUSTUS\tintron\t44009\t45423\t0.45\t-\t.\ttranscript_id \"g13035.t2\"; gene_id \"g13035\";\r\n", "scaffold1034_cov100\t44717\t44719\t2.38894183642631e-53\tscaffold1034_cov100\tAUGUSTUS\tintron\t44009\t47866\t0.42\t-\t.\ttranscript_id \"g13035.t1\"; gene_id \"g13035\";\r\n", "scaffold1034_cov100\t44717\t44719\t2.38894183642631e-53\tscaffold1034_cov100\tAUGUSTUS\tintron\t44009\t45423\t0.45\t-\t.\ttranscript_id \"g13035.t2\"; gene_id \"g13035\";\r\n", "scaffold1034_cov100\t44739\t44741\t5.69401887781703e-67\tscaffold1034_cov100\tAUGUSTUS\tintron\t44009\t47866\t0.42\t-\t.\ttranscript_id \"g13035.t1\"; gene_id \"g13035\";\r\n", "scaffold1034_cov100\t44739\t44741\t5.69401887781703e-67\tscaffold1034_cov100\tAUGUSTUS\tintron\t44009\t45423\t0.45\t-\t.\ttranscript_id \"g13035.t2\"; gene_id \"g13035\";\r\n", "scaffold1034_cov100\t44778\t44780\t1.44763513912288e-65\tscaffold1034_cov100\tAUGUSTUS\tintron\t44009\t47866\t0.42\t-\t.\ttranscript_id \"g13035.t1\"; gene_id \"g13035\";\r\n" ] } ], "source": [ "#Check output\n", "!head ../../Pact_methDiff/*paIntron" ] }, { "cell_type": "code", "execution_count": 94, "metadata": { "collapsed": false, "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 882 ../../Pact_methDiff/Pact_RvM_50pMethDiff.bed-paIntron\r\n", " 1840 ../../Pact_methDiff/Pact_WvM_50pMethDiff.bed-paIntron\r\n", " 2722 total\r\n" ] } ], "source": [ "#Count number of overlaps\n", "!wc -l ../../Pact_methDiff/*paIntron" ] }, { "cell_type": "code", "execution_count": 95, "metadata": { "collapsed": true }, "outputs": [], "source": [ "!wc -l ../../Pact_methDiff/*paIntron > Pact-DMC-data-paIntron-counts.txt" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### 3d. Flanking regions" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### Upset data" ] }, { "cell_type": "code", "execution_count": 74, "metadata": { "collapsed": true, "scrolled": true }, "outputs": [], "source": [ "%%bash\n", "\n", "for f in *CpGs.bed\n", "do\n", " /usr/local/bin/intersectBed \\\n", " -wb \\\n", " -a ${f} \\\n", " -b ../../../genome-feature-files/Pact.GFFannotation.flanks.gff \\\n", " > ${f}-paFlanks\n", "done" ] }, { "cell_type": "code", "execution_count": 75, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "==> Pact_MBD_only_CpGs.bed-paFlanks <==\n", "scaffold10053_cov85\t952\t953\t1\tN/A\tN/A\t1\tscaffold10053_cov85\tAUGUSTUS\tgene\t308\t1307\t0.81\t+\t.\tg48311\n", "scaffold10053_cov85\t1136\t1137\t1\tN/A\tN/A\t1\tscaffold10053_cov85\tAUGUSTUS\tgene\t308\t1307\t0.81\t+\t.\tg48311\n", "scaffold10053_cov85\t1169\t1170\t1\tN/A\tN/A\t1\tscaffold10053_cov85\tAUGUSTUS\tgene\t308\t1307\t0.81\t+\t.\tg48311\n", "scaffold10053_cov85\t1182\t1183\t1\tN/A\tN/A\t1\tscaffold10053_cov85\tAUGUSTUS\tgene\t308\t1307\t0.81\t+\t.\tg48311\n", "scaffold10053_cov85\t1219\t1220\t1\tN/A\tN/A\t1\tscaffold10053_cov85\tAUGUSTUS\tgene\t308\t1307\t0.81\t+\t.\tg48311\n", "scaffold10053_cov85\t9776\t9777\t1\tN/A\tN/A\t1\tscaffold10053_cov85\tAUGUSTUS\tgene\t9592\t9982\t0.94\t+\t.\tg48313\n", "scaffold10053_cov85\t9776\t9777\t1\tN/A\tN/A\t1\tscaffold10053_cov85\tAUGUSTUS\tgene\t9592\t9982\t0.58\t+\t.\tg48314\n", "scaffold10064_cov60\t14578\t14579\t1\tN/A\tN/A\t1\tscaffold10064_cov60\tAUGUSTUS\tgene\t13739\t14692\t0.68\t+\t.\tg48322\n", "scaffold10064_cov60\t14578\t14579\t1\tN/A\tN/A\t1\tscaffold10064_cov60\tAUGUSTUS\tgene\t13739\t14692\t0.81\t-\t.\tg48323\n", "scaffold10064_cov60\t14578\t14579\t1\tN/A\tN/A\t1\tscaffold10064_cov60\tAUGUSTUS\tgene\t14546\t14692\t1\t+\t.\tg48324\n", "\n", "==> Pact_RRBS_MBD_only_CpGs.bed-paFlanks <==\n", "scaffold10024_cov87\t30933\t30934\t1\tN/A\t1\t1\tscaffold10024_cov87\tAUGUSTUS\tgene\t29986\t30985\t0.81\t-\t.\tg48298\n", "scaffold10024_cov87\t30936\t30937\t1\tN/A\t1\t1\tscaffold10024_cov87\tAUGUSTUS\tgene\t29986\t30985\t0.81\t-\t.\tg48298\n", "scaffold10024_cov87\t30949\t30950\t1\tN/A\t1\t1\tscaffold10024_cov87\tAUGUSTUS\tgene\t29986\t30985\t0.81\t-\t.\tg48298\n", "scaffold10024_cov87\t30952\t30953\t1\tN/A\t1\t1\tscaffold10024_cov87\tAUGUSTUS\tgene\t29986\t30985\t0.81\t-\t.\tg48298\n", "scaffold10169_cov58\t8483\t8484\t1\tN/A\t1\t1\tscaffold10169_cov58\tAUGUSTUS\tgene\t8361\t8791\t0.82\t-\t.\tg48420\n", "scaffold10169_cov58\t8483\t8484\t1\tN/A\t1\t1\tscaffold10169_cov58\tAUGUSTUS\tgene\t8361\t8791\t0.86\t+\t.\tg48421\n", "scaffold10169_cov58\t8575\t8576\t1\tN/A\t1\t1\tscaffold10169_cov58\tAUGUSTUS\tgene\t8361\t8791\t0.82\t-\t.\tg48420\n", "scaffold10169_cov58\t8575\t8576\t1\tN/A\t1\t1\tscaffold10169_cov58\tAUGUSTUS\tgene\t8361\t8791\t0.86\t+\t.\tg48421\n", "scaffold10185_cov55\t6181\t6182\t1\tN/A\t1\t1\tscaffold10185_cov55\tAUGUSTUS\tgene\t6042\t7041\t1\t-\t.\tg48435\n", "scaffold10185_cov55\t6278\t6279\t1\tN/A\t1\t1\tscaffold10185_cov55\tAUGUSTUS\tgene\t6042\t7041\t1\t-\t.\tg48435\n", "\n", "==> Pact_RRBS_only_CpGs.bed-paFlanks <==\n", "scaffold10024_cov87\t3362\t3363\t1\tN/A\t1\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t3030\t4019\t0.75\t+\t.\tg48293\n", "scaffold10024_cov87\t3362\t3363\t1\tN/A\t1\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t3030\t4019\t0.89\t-\t.\tg48294\n", "scaffold10024_cov87\t3369\t3370\t1\tN/A\t1\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t3030\t4019\t0.75\t+\t.\tg48293\n", "scaffold10024_cov87\t3369\t3370\t1\tN/A\t1\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t3030\t4019\t0.89\t-\t.\tg48294\n", "scaffold10024_cov87\t3389\t3390\t1\tN/A\t1\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t3030\t4019\t0.75\t+\t.\tg48293\n", "scaffold10024_cov87\t3389\t3390\t1\tN/A\t1\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t3030\t4019\t0.89\t-\t.\tg48294\n", "scaffold10024_cov87\t13028\t13029\t1\tN/A\t1\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t12162\t13161\t0.26\t+\t.\tg48296\n", "scaffold10024_cov87\t13032\t13033\t1\tN/A\t1\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t12162\t13161\t0.26\t+\t.\tg48296\n", "scaffold10024_cov87\t46048\t46049\t1\tN/A\t1\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t45560\t46559\t0.93\t+\t.\tg48303\n", "scaffold10053_cov85\t2233\t2234\t1\tN/A\t1\tN/A\tscaffold10053_cov85\tAUGUSTUS\tgene\t1812\t2811\t0.17\t+\t.\tg48312\n", "\n", "==> Pact_WGBS_MBD_only_CpGs.bed-paFlanks <==\n", "scaffold10024_cov87\t1859\t1860\t1\t1\tN/A\t1\tscaffold10024_cov87\tAUGUSTUS\tgene\t1612\t2611\t0.75\t+\t.\tg48293\n", "scaffold10024_cov87\t1992\t1993\t1\t1\tN/A\t1\tscaffold10024_cov87\tAUGUSTUS\tgene\t1612\t2611\t0.75\t+\t.\tg48293\n", "scaffold10024_cov87\t2011\t2012\t1\t1\tN/A\t1\tscaffold10024_cov87\tAUGUSTUS\tgene\t1612\t2611\t0.75\t+\t.\tg48293\n", "scaffold10024_cov87\t2018\t2019\t1\t1\tN/A\t1\tscaffold10024_cov87\tAUGUSTUS\tgene\t1612\t2611\t0.75\t+\t.\tg48293\n", "scaffold10024_cov87\t2062\t2063\t1\t1\tN/A\t1\tscaffold10024_cov87\tAUGUSTUS\tgene\t1612\t2611\t0.75\t+\t.\tg48293\n", "scaffold10024_cov87\t2088\t2089\t1\t1\tN/A\t1\tscaffold10024_cov87\tAUGUSTUS\tgene\t1612\t2611\t0.75\t+\t.\tg48293\n", "scaffold10024_cov87\t2130\t2131\t1\t1\tN/A\t1\tscaffold10024_cov87\tAUGUSTUS\tgene\t1612\t2611\t0.75\t+\t.\tg48293\n", "scaffold10024_cov87\t2175\t2176\t1\t1\tN/A\t1\tscaffold10024_cov87\tAUGUSTUS\tgene\t1612\t2611\t0.75\t+\t.\tg48293\n", "scaffold10024_cov87\t2183\t2184\t1\t1\tN/A\t1\tscaffold10024_cov87\tAUGUSTUS\tgene\t1612\t2611\t0.75\t+\t.\tg48293\n", "scaffold10024_cov87\t2200\t2201\t1\t1\tN/A\t1\tscaffold10024_cov87\tAUGUSTUS\tgene\t1612\t2611\t0.75\t+\t.\tg48293\n", "\n", "==> Pact_WGBS_RRBS_MBD_only_CpGs.bed-paFlanks <==\n", "scaffold10024_cov87\t1850\t1851\t1\t1\t1\t1\tscaffold10024_cov87\tAUGUSTUS\tgene\t1612\t2611\t0.75\t+\t.\tg48293\n", "scaffold10024_cov87\t1874\t1875\t1\t1\t1\t1\tscaffold10024_cov87\tAUGUSTUS\tgene\t1612\t2611\t0.75\t+\t.\tg48293\n", "scaffold10024_cov87\t1880\t1881\t1\t1\t1\t1\tscaffold10024_cov87\tAUGUSTUS\tgene\t1612\t2611\t0.75\t+\t.\tg48293\n", "scaffold10024_cov87\t1905\t1906\t1\t1\t1\t1\tscaffold10024_cov87\tAUGUSTUS\tgene\t1612\t2611\t0.75\t+\t.\tg48293\n", "scaffold10024_cov87\t1931\t1932\t1\t1\t1\t1\tscaffold10024_cov87\tAUGUSTUS\tgene\t1612\t2611\t0.75\t+\t.\tg48293\n", "scaffold10024_cov87\t1951\t1952\t1\t1\t1\t1\tscaffold10024_cov87\tAUGUSTUS\tgene\t1612\t2611\t0.75\t+\t.\tg48293\n", "scaffold10024_cov87\t1973\t1974\t1\t1\t1\t1\tscaffold10024_cov87\tAUGUSTUS\tgene\t1612\t2611\t0.75\t+\t.\tg48293\n", "scaffold10024_cov87\t3417\t3418\t1\t1\t1\t1\tscaffold10024_cov87\tAUGUSTUS\tgene\t3030\t4019\t0.75\t+\t.\tg48293\n", "scaffold10024_cov87\t3417\t3418\t1\t1\t1\t1\tscaffold10024_cov87\tAUGUSTUS\tgene\t3030\t4019\t0.89\t-\t.\tg48294\n", "scaffold10024_cov87\t3427\t3428\t1\t1\t1\t1\tscaffold10024_cov87\tAUGUSTUS\tgene\t3030\t4019\t0.75\t+\t.\tg48293\n", "\n", "==> Pact_WGBS_RRBS_only_CpGs.bed-paFlanks <==\n", "scaffold10024_cov87\t1793\t1794\t1\t1\t1\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t1612\t2611\t0.75\t+\t.\tg48293\n", "scaffold10024_cov87\t1795\t1796\t1\t1\t1\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t1612\t2611\t0.75\t+\t.\tg48293\n", "scaffold10024_cov87\t1811\t1812\t1\t1\t1\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t1612\t2611\t0.75\t+\t.\tg48293\n", "scaffold10024_cov87\t2250\t2251\t1\t1\t1\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t1612\t2611\t0.75\t+\t.\tg48293\n", "scaffold10024_cov87\t2280\t2281\t1\t1\t1\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t1612\t2611\t0.75\t+\t.\tg48293\n", "scaffold10024_cov87\t2286\t2287\t1\t1\t1\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t1612\t2611\t0.75\t+\t.\tg48293\n", "scaffold10024_cov87\t3359\t3360\t1\t1\t1\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t3030\t4019\t0.75\t+\t.\tg48293\n", "scaffold10024_cov87\t3359\t3360\t1\t1\t1\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t3030\t4019\t0.89\t-\t.\tg48294\n", "scaffold10024_cov87\t3507\t3508\t1\t1\t1\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t3030\t4019\t0.75\t+\t.\tg48293\n", "scaffold10024_cov87\t3507\t3508\t1\t1\t1\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t3030\t4019\t0.89\t-\t.\tg48294\n", "\n", "==> Pact_WGBS_only_CpGs.bed-paFlanks <==\n", "scaffold10024_cov87\t1623\t1624\t1\t1\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t1612\t2611\t0.75\t+\t.\tg48293\n", "scaffold10024_cov87\t1628\t1629\t1\t1\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t1612\t2611\t0.75\t+\t.\tg48293\n", "scaffold10024_cov87\t1634\t1635\t1\t1\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t1612\t2611\t0.75\t+\t.\tg48293\n", "scaffold10024_cov87\t1759\t1760\t1\t1\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t1612\t2611\t0.75\t+\t.\tg48293\n", "scaffold10024_cov87\t2232\t2233\t1\t1\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t1612\t2611\t0.75\t+\t.\tg48293\n", "scaffold10024_cov87\t2330\t2331\t1\t1\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t1612\t2611\t0.75\t+\t.\tg48293\n", "scaffold10024_cov87\t2339\t2340\t1\t1\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t1612\t2611\t0.75\t+\t.\tg48293\n", "scaffold10024_cov87\t2357\t2358\t1\t1\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t1612\t2611\t0.75\t+\t.\tg48293\n", "scaffold10024_cov87\t2365\t2366\t1\t1\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t1612\t2611\t0.75\t+\t.\tg48293\n", "scaffold10024_cov87\t2389\t2390\t1\t1\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t1612\t2611\t0.75\t+\t.\tg48293\n", "\n", "==> Pact_no_method_CpGs.bed-paFlanks <==\n", "scaffold10024_cov87\t1654\t1655\t1\tN/A\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t1612\t2611\t0.75\t+\t.\tg48293\n", "scaffold10024_cov87\t1690\t1691\t1\tN/A\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t1612\t2611\t0.75\t+\t.\tg48293\n", "scaffold10024_cov87\t1695\t1696\t1\tN/A\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t1612\t2611\t0.75\t+\t.\tg48293\n", "scaffold10024_cov87\t1698\t1699\t1\tN/A\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t1612\t2611\t0.75\t+\t.\tg48293\n", "scaffold10024_cov87\t2543\t2544\t1\tN/A\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t1612\t2611\t0.75\t+\t.\tg48293\n", "scaffold10024_cov87\t3782\t3783\t1\tN/A\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t3030\t4019\t0.75\t+\t.\tg48293\n", "scaffold10024_cov87\t3782\t3783\t1\tN/A\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t3030\t4019\t0.89\t-\t.\tg48294\n", "scaffold10024_cov87\t3782\t3783\t1\tN/A\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t3733\t4019\t0.54\t+\t.\tg48295\n", "scaffold10024_cov87\t3877\t3878\t1\tN/A\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t3030\t4019\t0.75\t+\t.\tg48293\n", "scaffold10024_cov87\t3877\t3878\t1\tN/A\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t3030\t4019\t0.89\t-\t.\tg48294\n" ] } ], "source": [ "#Check output\n", "!head *CpGs.bed*paFlanks" ] }, { "cell_type": "code", "execution_count": 76, "metadata": { "collapsed": false, "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 16811 Pact_MBD_only_CpGs.bed-paFlanks\n", " 2751 Pact_RRBS_MBD_only_CpGs.bed-paFlanks\n", " 35794 Pact_RRBS_only_CpGs.bed-paFlanks\n", " 584556 Pact_WGBS_MBD_only_CpGs.bed-paFlanks\n", " 308431 Pact_WGBS_RRBS_MBD_only_CpGs.bed-paFlanks\n", " 244170 Pact_WGBS_RRBS_only_CpGs.bed-paFlanks\n", " 724216 Pact_WGBS_only_CpGs.bed-paFlanks\n", " 335155 Pact_no_method_CpGs.bed-paFlanks\n", " 2251884 total\n" ] } ], "source": [ "#Count number of overlaps\n", "!wc -l *CpGs.bed*paFlanks" ] }, { "cell_type": "code", "execution_count": 77, "metadata": { "collapsed": true }, "outputs": [], "source": [ "!wc -l *CpGs.bed*paFlanks > Pact-upset-data-paFlanks-counts.txt" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### Method-associated DMC" ] }, { "cell_type": "code", "execution_count": 96, "metadata": { "collapsed": true, "scrolled": true }, "outputs": [], "source": [ "%%bash\n", "\n", "for f in ../../Pact_methDiff/*bed\n", "do\n", " /usr/local/bin/intersectBed \\\n", " -wb \\\n", " -a ${f} \\\n", " -b ../../../genome-feature-files/Pact.GFFannotation.flanks.gff \\\n", " > ${f}-paFlanks\n", "done" ] }, { "cell_type": "code", "execution_count": 97, "metadata": { "collapsed": false, "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "==> ../../Pact_methDiff/Pact_RvM_50pMethDiff.bed-paFlanks <==\r\n", "scaffold10029_cov108\t1242\t1244\t4.06057732917881e-51\tscaffold10029_cov108\tAUGUSTUS\tgene\t1241\t1344\t0.97\t+\t.\tg48305\r\n", "scaffold10029_cov108\t1242\t1244\t4.06057732917881e-51\tscaffold10029_cov108\tAUGUSTUS\tgene\t1241\t1344\t0.88\t-\t.\tg48306\r\n", "scaffold10029_cov108\t1242\t1244\t4.06057732917881e-51\tscaffold10029_cov108\tAUGUSTUS\tgene\t1241\t1344\t0.49\t-\t.\tg48307\r\n", "scaffold10029_cov108\t1320\t1322\t1.56116529744743e-58\tscaffold10029_cov108\tAUGUSTUS\tgene\t1241\t1344\t0.97\t+\t.\tg48305\r\n", "scaffold10029_cov108\t1320\t1322\t1.56116529744743e-58\tscaffold10029_cov108\tAUGUSTUS\tgene\t1241\t1344\t0.88\t-\t.\tg48306\r\n", "scaffold10029_cov108\t1320\t1322\t1.56116529744743e-58\tscaffold10029_cov108\tAUGUSTUS\tgene\t1241\t1344\t0.49\t-\t.\tg48307\r\n", "scaffold10029_cov108\t11099\t11101\t6.59643058301839e-248\tscaffold10029_cov108\tAUGUSTUS\tgene\t10449\t11448\t0.88\t-\t.\tg48308\r\n", "scaffold10456_cov86\t6372\t6374\t1.40935800393671e-31\tscaffold10456_cov86\tAUGUSTUS\tgene\t6160\t6587\t0.82\t-\t.\tg48715\r\n", "scaffold10456_cov86\t6372\t6374\t1.40935800393671e-31\tscaffold10456_cov86\tAUGUSTUS\tgene\t6160\t7159\t1\t-\t.\tg48716\r\n", "scaffold10456_cov86\t6401\t6403\t1.61929212944984e-38\tscaffold10456_cov86\tAUGUSTUS\tgene\t6160\t6587\t0.82\t-\t.\tg48715\r\n", "\r\n", "==> ../../Pact_methDiff/Pact_WvM_50pMethDiff.bed-paFlanks <==\r\n", "scaffold10029_cov108\t1242\t1244\t1.55592447787434e-140\tscaffold10029_cov108\tAUGUSTUS\tgene\t1241\t1344\t0.97\t+\t.\tg48305\r\n", "scaffold10029_cov108\t1242\t1244\t1.55592447787434e-140\tscaffold10029_cov108\tAUGUSTUS\tgene\t1241\t1344\t0.88\t-\t.\tg48306\r\n", "scaffold10029_cov108\t1242\t1244\t1.55592447787434e-140\tscaffold10029_cov108\tAUGUSTUS\tgene\t1241\t1344\t0.49\t-\t.\tg48307\r\n", "scaffold10029_cov108\t1320\t1322\t1.38897986358164e-116\tscaffold10029_cov108\tAUGUSTUS\tgene\t1241\t1344\t0.97\t+\t.\tg48305\r\n", "scaffold10029_cov108\t1320\t1322\t1.38897986358164e-116\tscaffold10029_cov108\tAUGUSTUS\tgene\t1241\t1344\t0.88\t-\t.\tg48306\r\n", "scaffold10029_cov108\t1320\t1322\t1.38897986358164e-116\tscaffold10029_cov108\tAUGUSTUS\tgene\t1241\t1344\t0.49\t-\t.\tg48307\r\n", "scaffold10029_cov108\t1806\t1808\t3.01906841921698e-22\tscaffold10029_cov108\tAUGUSTUS\tgene\t1789\t1874\t0.97\t+\t.\tg48305\r\n", "scaffold10029_cov108\t1806\t1808\t3.01906841921698e-22\tscaffold10029_cov108\tAUGUSTUS\tgene\t1789\t1874\t0.88\t-\t.\tg48306\r\n", "scaffold10029_cov108\t1806\t1808\t3.01906841921698e-22\tscaffold10029_cov108\tAUGUSTUS\tgene\t1789\t1874\t0.49\t-\t.\tg48307\r\n", "scaffold10029_cov108\t1824\t1826\t9.54549106886193e-36\tscaffold10029_cov108\tAUGUSTUS\tgene\t1789\t1874\t0.97\t+\t.\tg48305\r\n" ] } ], "source": [ "#Check output\n", "!head ../../Pact_methDiff/*paFlanks" ] }, { "cell_type": "code", "execution_count": 98, "metadata": { "collapsed": false, "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 2650 ../../Pact_methDiff/Pact_RvM_50pMethDiff.bed-paFlanks\r\n", " 4948 ../../Pact_methDiff/Pact_WvM_50pMethDiff.bed-paFlanks\r\n", " 7598 total\r\n" ] } ], "source": [ "#Count number of overlaps\n", "!wc -l ../../Pact_methDiff/*paFlanks" ] }, { "cell_type": "code", "execution_count": 99, "metadata": { "collapsed": true }, "outputs": [], "source": [ "!wc -l ../../Pact_methDiff/*paFlanks > Pact-DMC-data-paFlanks-counts.txt" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### 3e. Upstream flanking regions" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### Upset data" ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "collapsed": true, "scrolled": true }, "outputs": [], "source": [ "%%bash\n", "\n", "for f in *CpGs.bed\n", "do\n", " /usr/local/bin/intersectBed \\\n", " -wb \\\n", " -a ${f} \\\n", " -b ../../../genome-feature-files/Pact.GFFannotation.flanks.Upstream.gff \\\n", " > ${f}-paFlanksUpstream\n", "done" ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "==> Pact_MBD_only_CpGs.bed-paFlanksUpstream <==\r\n", "scaffold10053_cov85\t9776\t9777\t1\tN/A\tN/A\t1\tscaffold10053_cov85\tAUGUSTUS\tgene\t9592\t9982\t0.58\t+\t.\tg48314\r\n", "scaffold10064_cov60\t14578\t14579\t1\tN/A\tN/A\t1\tscaffold10064_cov60\tAUGUSTUS\tgene\t14546\t14692\t1\t+\t.\tg48324\r\n", "scaffold10064_cov60\t14587\t14588\t1\tN/A\tN/A\t1\tscaffold10064_cov60\tAUGUSTUS\tgene\t14546\t14692\t1\t+\t.\tg48324\r\n", "scaffold10064_cov60\t14599\t14600\t1\tN/A\tN/A\t1\tscaffold10064_cov60\tAUGUSTUS\tgene\t14546\t14692\t1\t+\t.\tg48324\r\n", "scaffold10101_cov102\t19463\t19464\t1\tN/A\tN/A\t1\tscaffold10101_cov102\tAUGUSTUS\tgene\t19422\t20421\t0.43\t+\t.\tg48349\r\n", "scaffold10101_cov102\t19474\t19475\t1\tN/A\tN/A\t1\tscaffold10101_cov102\tAUGUSTUS\tgene\t19422\t20421\t0.43\t+\t.\tg48349\r\n", "scaffold10101_cov102\t19477\t19478\t1\tN/A\tN/A\t1\tscaffold10101_cov102\tAUGUSTUS\tgene\t19422\t20421\t0.43\t+\t.\tg48349\r\n", "scaffold10101_cov102\t19482\t19483\t1\tN/A\tN/A\t1\tscaffold10101_cov102\tAUGUSTUS\tgene\t19422\t20421\t0.43\t+\t.\tg48349\r\n", "scaffold10101_cov102\t19485\t19486\t1\tN/A\tN/A\t1\tscaffold10101_cov102\tAUGUSTUS\tgene\t19422\t20421\t0.43\t+\t.\tg48349\r\n", "scaffold10101_cov102\t19510\t19511\t1\tN/A\tN/A\t1\tscaffold10101_cov102\tAUGUSTUS\tgene\t19422\t20421\t0.43\t+\t.\tg48349\r\n", "\r\n", "==> Pact_RRBS_MBD_only_CpGs.bed-paFlanksUpstream <==\r\n", "scaffold10024_cov87\t30933\t30934\t1\tN/A\t1\t1\tscaffold10024_cov87\tAUGUSTUS\tgene\t29986\t30985\t0.81\t-\t.\tg48298\r\n", "scaffold10024_cov87\t30936\t30937\t1\tN/A\t1\t1\tscaffold10024_cov87\tAUGUSTUS\tgene\t29986\t30985\t0.81\t-\t.\tg48298\r\n", "scaffold10024_cov87\t30949\t30950\t1\tN/A\t1\t1\tscaffold10024_cov87\tAUGUSTUS\tgene\t29986\t30985\t0.81\t-\t.\tg48298\r\n", "scaffold10024_cov87\t30952\t30953\t1\tN/A\t1\t1\tscaffold10024_cov87\tAUGUSTUS\tgene\t29986\t30985\t0.81\t-\t.\tg48298\r\n", "scaffold10169_cov58\t8483\t8484\t1\tN/A\t1\t1\tscaffold10169_cov58\tAUGUSTUS\tgene\t8361\t8791\t0.82\t-\t.\tg48420\r\n", "scaffold10169_cov58\t8483\t8484\t1\tN/A\t1\t1\tscaffold10169_cov58\tAUGUSTUS\tgene\t8361\t8791\t0.86\t+\t.\tg48421\r\n", "scaffold10169_cov58\t8575\t8576\t1\tN/A\t1\t1\tscaffold10169_cov58\tAUGUSTUS\tgene\t8361\t8791\t0.82\t-\t.\tg48420\r\n", "scaffold10169_cov58\t8575\t8576\t1\tN/A\t1\t1\tscaffold10169_cov58\tAUGUSTUS\tgene\t8361\t8791\t0.86\t+\t.\tg48421\r\n", "scaffold10301_cov119\t4651\t4652\t1\tN/A\t1\t1\tscaffold10301_cov119\tAUGUSTUS\tgene\t4347\t5346\t0.83\t-\t.\tg48600\r\n", "scaffold1045_cov103\t8727\t8728\t1\tN/A\t1\t1\tscaffold1045_cov103\tAUGUSTUS\tgene\t8698\t9016\t1\t+\t.\tg13108\r\n", "\r\n", "==> Pact_RRBS_only_CpGs.bed-paFlanksUpstream <==\r\n", "scaffold10024_cov87\t46048\t46049\t1\tN/A\t1\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t45560\t46559\t0.93\t+\t.\tg48303\r\n", "scaffold10053_cov85\t2233\t2234\t1\tN/A\t1\tN/A\tscaffold10053_cov85\tAUGUSTUS\tgene\t1812\t2811\t0.17\t+\t.\tg48312\r\n", "scaffold10053_cov85\t11992\t11993\t1\tN/A\t1\tN/A\tscaffold10053_cov85\tAUGUSTUS\tgene\t11935\t12934\t0.58\t+\t.\tg48315\r\n", "scaffold10101_cov102\t147499\t147500\t1\tN/A\t1\tN/A\tscaffold10101_cov102\tAUGUSTUS\tgene\t147476\t148475\t0.37\t+\t.\tg48380\r\n", "scaffold10101_cov102\t147742\t147743\t1\tN/A\t1\tN/A\tscaffold10101_cov102\tAUGUSTUS\tgene\t147476\t148475\t0.37\t+\t.\tg48380\r\n", "scaffold10101_cov102\t147749\t147750\t1\tN/A\t1\tN/A\tscaffold10101_cov102\tAUGUSTUS\tgene\t147476\t148475\t0.37\t+\t.\tg48380\r\n", "scaffold10117_cov52\t9058\t9059\t1\tN/A\t1\tN/A\tscaffold10117_cov52\tAUGUSTUS\tgene\t8448\t9447\t0.35\t-\t.\tg48381\r\n", "scaffold10117_cov52\t9078\t9079\t1\tN/A\t1\tN/A\tscaffold10117_cov52\tAUGUSTUS\tgene\t8448\t9447\t0.35\t-\t.\tg48381\r\n", "scaffold10117_cov52\t9093\t9094\t1\tN/A\t1\tN/A\tscaffold10117_cov52\tAUGUSTUS\tgene\t8448\t9447\t0.35\t-\t.\tg48381\r\n", "scaffold10117_cov52\t9160\t9161\t1\tN/A\t1\tN/A\tscaffold10117_cov52\tAUGUSTUS\tgene\t8448\t9447\t0.35\t-\t.\tg48381\r\n", "\r\n", "==> Pact_WGBS_MBD_only_CpGs.bed-paFlanksUpstream <==\r\n", "scaffold10024_cov87\t1859\t1860\t1\t1\tN/A\t1\tscaffold10024_cov87\tAUGUSTUS\tgene\t1612\t2611\t0.75\t+\t.\tg48293\r\n", "scaffold10024_cov87\t1992\t1993\t1\t1\tN/A\t1\tscaffold10024_cov87\tAUGUSTUS\tgene\t1612\t2611\t0.75\t+\t.\tg48293\r\n", "scaffold10024_cov87\t2011\t2012\t1\t1\tN/A\t1\tscaffold10024_cov87\tAUGUSTUS\tgene\t1612\t2611\t0.75\t+\t.\tg48293\r\n", "scaffold10024_cov87\t2018\t2019\t1\t1\tN/A\t1\tscaffold10024_cov87\tAUGUSTUS\tgene\t1612\t2611\t0.75\t+\t.\tg48293\r\n", "scaffold10024_cov87\t2062\t2063\t1\t1\tN/A\t1\tscaffold10024_cov87\tAUGUSTUS\tgene\t1612\t2611\t0.75\t+\t.\tg48293\r\n", "scaffold10024_cov87\t2088\t2089\t1\t1\tN/A\t1\tscaffold10024_cov87\tAUGUSTUS\tgene\t1612\t2611\t0.75\t+\t.\tg48293\r\n", "scaffold10024_cov87\t2130\t2131\t1\t1\tN/A\t1\tscaffold10024_cov87\tAUGUSTUS\tgene\t1612\t2611\t0.75\t+\t.\tg48293\r\n", "scaffold10024_cov87\t2175\t2176\t1\t1\tN/A\t1\tscaffold10024_cov87\tAUGUSTUS\tgene\t1612\t2611\t0.75\t+\t.\tg48293\r\n", "scaffold10024_cov87\t2183\t2184\t1\t1\tN/A\t1\tscaffold10024_cov87\tAUGUSTUS\tgene\t1612\t2611\t0.75\t+\t.\tg48293\r\n", "scaffold10024_cov87\t2200\t2201\t1\t1\tN/A\t1\tscaffold10024_cov87\tAUGUSTUS\tgene\t1612\t2611\t0.75\t+\t.\tg48293\r\n", "\r\n", "==> Pact_WGBS_RRBS_MBD_only_CpGs.bed-paFlanksUpstream <==\r\n", "scaffold10024_cov87\t1850\t1851\t1\t1\t1\t1\tscaffold10024_cov87\tAUGUSTUS\tgene\t1612\t2611\t0.75\t+\t.\tg48293\r\n", "scaffold10024_cov87\t1874\t1875\t1\t1\t1\t1\tscaffold10024_cov87\tAUGUSTUS\tgene\t1612\t2611\t0.75\t+\t.\tg48293\r\n", "scaffold10024_cov87\t1880\t1881\t1\t1\t1\t1\tscaffold10024_cov87\tAUGUSTUS\tgene\t1612\t2611\t0.75\t+\t.\tg48293\r\n", "scaffold10024_cov87\t1905\t1906\t1\t1\t1\t1\tscaffold10024_cov87\tAUGUSTUS\tgene\t1612\t2611\t0.75\t+\t.\tg48293\r\n", "scaffold10024_cov87\t1931\t1932\t1\t1\t1\t1\tscaffold10024_cov87\tAUGUSTUS\tgene\t1612\t2611\t0.75\t+\t.\tg48293\r\n", "scaffold10024_cov87\t1951\t1952\t1\t1\t1\t1\tscaffold10024_cov87\tAUGUSTUS\tgene\t1612\t2611\t0.75\t+\t.\tg48293\r\n", "scaffold10024_cov87\t1973\t1974\t1\t1\t1\t1\tscaffold10024_cov87\tAUGUSTUS\tgene\t1612\t2611\t0.75\t+\t.\tg48293\r\n", "scaffold10024_cov87\t16597\t16598\t1\t1\t1\t1\tscaffold10024_cov87\tAUGUSTUS\tgene\t16594\t16925\t0.64\t-\t.\tg48297\r\n", "scaffold10024_cov87\t16612\t16613\t1\t1\t1\t1\tscaffold10024_cov87\tAUGUSTUS\tgene\t16594\t16925\t0.64\t-\t.\tg48297\r\n", "scaffold10024_cov87\t16629\t16630\t1\t1\t1\t1\tscaffold10024_cov87\tAUGUSTUS\tgene\t16594\t16925\t0.64\t-\t.\tg48297\r\n", "\r\n", "==> Pact_WGBS_RRBS_only_CpGs.bed-paFlanksUpstream <==\r\n", "scaffold10024_cov87\t1793\t1794\t1\t1\t1\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t1612\t2611\t0.75\t+\t.\tg48293\r\n", "scaffold10024_cov87\t1795\t1796\t1\t1\t1\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t1612\t2611\t0.75\t+\t.\tg48293\r\n", "scaffold10024_cov87\t1811\t1812\t1\t1\t1\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t1612\t2611\t0.75\t+\t.\tg48293\r\n", "scaffold10024_cov87\t2250\t2251\t1\t1\t1\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t1612\t2611\t0.75\t+\t.\tg48293\r\n", "scaffold10024_cov87\t2280\t2281\t1\t1\t1\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t1612\t2611\t0.75\t+\t.\tg48293\r\n", "scaffold10024_cov87\t2286\t2287\t1\t1\t1\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t1612\t2611\t0.75\t+\t.\tg48293\r\n", "scaffold10024_cov87\t4970\t4971\t1\t1\t1\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t4955\t5031\t0.89\t-\t.\tg48294\r\n", "scaffold10024_cov87\t4970\t4971\t1\t1\t1\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t4955\t5031\t0.26\t+\t.\tg48296\r\n", "scaffold10024_cov87\t4974\t4975\t1\t1\t1\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t4955\t5031\t0.89\t-\t.\tg48294\r\n", "scaffold10024_cov87\t4974\t4975\t1\t1\t1\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t4955\t5031\t0.26\t+\t.\tg48296\r\n", "\r\n", "==> Pact_WGBS_only_CpGs.bed-paFlanksUpstream <==\r\n", "scaffold10024_cov87\t1623\t1624\t1\t1\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t1612\t2611\t0.75\t+\t.\tg48293\r\n", "scaffold10024_cov87\t1628\t1629\t1\t1\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t1612\t2611\t0.75\t+\t.\tg48293\r\n", "scaffold10024_cov87\t1634\t1635\t1\t1\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t1612\t2611\t0.75\t+\t.\tg48293\r\n", "scaffold10024_cov87\t1759\t1760\t1\t1\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t1612\t2611\t0.75\t+\t.\tg48293\r\n", "scaffold10024_cov87\t2232\t2233\t1\t1\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t1612\t2611\t0.75\t+\t.\tg48293\r\n", "scaffold10024_cov87\t2330\t2331\t1\t1\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t1612\t2611\t0.75\t+\t.\tg48293\r\n", "scaffold10024_cov87\t2339\t2340\t1\t1\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t1612\t2611\t0.75\t+\t.\tg48293\r\n", "scaffold10024_cov87\t2357\t2358\t1\t1\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t1612\t2611\t0.75\t+\t.\tg48293\r\n", "scaffold10024_cov87\t2365\t2366\t1\t1\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t1612\t2611\t0.75\t+\t.\tg48293\r\n", "scaffold10024_cov87\t2389\t2390\t1\t1\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t1612\t2611\t0.75\t+\t.\tg48293\r\n", "\r\n", "==> Pact_no_method_CpGs.bed-paFlanksUpstream <==\r\n", "scaffold10024_cov87\t1654\t1655\t1\tN/A\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t1612\t2611\t0.75\t+\t.\tg48293\r\n", "scaffold10024_cov87\t1690\t1691\t1\tN/A\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t1612\t2611\t0.75\t+\t.\tg48293\r\n", "scaffold10024_cov87\t1695\t1696\t1\tN/A\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t1612\t2611\t0.75\t+\t.\tg48293\r\n", "scaffold10024_cov87\t1698\t1699\t1\tN/A\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t1612\t2611\t0.75\t+\t.\tg48293\r\n", "scaffold10024_cov87\t2543\t2544\t1\tN/A\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t1612\t2611\t0.75\t+\t.\tg48293\r\n", "scaffold10024_cov87\t3782\t3783\t1\tN/A\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t3733\t4019\t0.54\t+\t.\tg48295\r\n", "scaffold10024_cov87\t3877\t3878\t1\tN/A\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t3733\t4019\t0.54\t+\t.\tg48295\r\n", "scaffold10024_cov87\t3932\t3933\t1\tN/A\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t3733\t4019\t0.54\t+\t.\tg48295\r\n", "scaffold10024_cov87\t3957\t3958\t1\tN/A\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t3733\t4019\t0.54\t+\t.\tg48295\r\n", "scaffold10024_cov87\t4694\t4695\t1\tN/A\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t4676\t4732\t0.89\t-\t.\tg48294\r\n" ] } ], "source": [ "#Check output\n", "!head *CpGs.bed*paFlanksUpstream" ] }, { "cell_type": "code", "execution_count": 19, "metadata": { "collapsed": false, "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 8004 Pact_MBD_only_CpGs.bed-paFlanksUpstream\n", " 1317 Pact_RRBS_MBD_only_CpGs.bed-paFlanksUpstream\n", " 18040 Pact_RRBS_only_CpGs.bed-paFlanksUpstream\n", " 312968 Pact_WGBS_MBD_only_CpGs.bed-paFlanksUpstream\n", " 175371 Pact_WGBS_RRBS_MBD_only_CpGs.bed-paFlanksUpstream\n", " 134118 Pact_WGBS_RRBS_only_CpGs.bed-paFlanksUpstream\n", " 370186 Pact_WGBS_only_CpGs.bed-paFlanksUpstream\n", " 165826 Pact_no_method_CpGs.bed-paFlanksUpstream\n", " 1185830 total\n" ] } ], "source": [ "#Count number of overlaps\n", "!wc -l *CpGs.bed*paFlanksUpstream" ] }, { "cell_type": "code", "execution_count": 20, "metadata": { "collapsed": true }, "outputs": [], "source": [ "!wc -l *CpGs.bed*paFlanksUpstream > Pact-upset-data-paFlanksUpstream-counts.txt" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### Method-associated DMC" ] }, { "cell_type": "code", "execution_count": 21, "metadata": { "collapsed": true, "scrolled": true }, "outputs": [], "source": [ "%%bash\n", "\n", "for f in ../../Pact_methDiff/*bed\n", "do\n", " /usr/local/bin/intersectBed \\\n", " -wb \\\n", " -a ${f} \\\n", " -b ../../../genome-feature-files/Pact.GFFannotation.flanks.Upstream.gff \\\n", " > ${f}-paFlanksUpstream\n", "done" ] }, { "cell_type": "code", "execution_count": 22, "metadata": { "collapsed": false, "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "==> ../../Pact_methDiff/Pact_RvM_50pMethDiff.bed-paFlanksUpstream <==\r\n", "scaffold10029_cov108\t11099\t11101\t6.59643058301839e-248\tscaffold10029_cov108\tAUGUSTUS\tgene\t10449\t11448\t0.88\t-\t.\tg48308\r\n", "scaffold10456_cov86\t6372\t6374\t1.40935800393671e-31\tscaffold10456_cov86\tAUGUSTUS\tgene\t6160\t6587\t0.82\t-\t.\tg48715\r\n", "scaffold10456_cov86\t6372\t6374\t1.40935800393671e-31\tscaffold10456_cov86\tAUGUSTUS\tgene\t6160\t7159\t1\t-\t.\tg48716\r\n", "scaffold10456_cov86\t6401\t6403\t1.61929212944984e-38\tscaffold10456_cov86\tAUGUSTUS\tgene\t6160\t6587\t0.82\t-\t.\tg48715\r\n", "scaffold10456_cov86\t6401\t6403\t1.61929212944984e-38\tscaffold10456_cov86\tAUGUSTUS\tgene\t6160\t7159\t1\t-\t.\tg48716\r\n", "scaffold10629_cov64\t504\t506\t1.09989375168844e-24\tscaffold10629_cov64\tAUGUSTUS\tgene\t502\t523\t0.95\t-\t.\tg48961\r\n", "scaffold1068_cov98\t825600\t825602\t1.60921967575966e-105\tscaffold1068_cov98\tAUGUSTUS\tgene\t825346\t826345\t0.81\t+\t.\tg13468\r\n", "scaffold1068_cov98\t825642\t825644\t0\tscaffold1068_cov98\tAUGUSTUS\tgene\t825346\t826345\t0.81\t+\t.\tg13468\r\n", "scaffold1068_cov98\t825651\t825653\t0\tscaffold1068_cov98\tAUGUSTUS\tgene\t825346\t826345\t0.81\t+\t.\tg13468\r\n", "scaffold1068_cov98\t825736\t825738\t6.48098422809392e-262\tscaffold1068_cov98\tAUGUSTUS\tgene\t825346\t826345\t0.81\t+\t.\tg13468\r\n", "\r\n", "==> ../../Pact_methDiff/Pact_WvM_50pMethDiff.bed-paFlanksUpstream <==\r\n", "scaffold10029_cov108\t1806\t1808\t3.01906841921698e-22\tscaffold10029_cov108\tAUGUSTUS\tgene\t1789\t1874\t0.88\t-\t.\tg48306\r\n", "scaffold10029_cov108\t1824\t1826\t9.54549106886193e-36\tscaffold10029_cov108\tAUGUSTUS\tgene\t1789\t1874\t0.88\t-\t.\tg48306\r\n", "scaffold10029_cov108\t1853\t1855\t3.7159856050391e-36\tscaffold10029_cov108\tAUGUSTUS\tgene\t1789\t1874\t0.88\t-\t.\tg48306\r\n", "scaffold10029_cov108\t10665\t10667\t1.21911545093071e-57\tscaffold10029_cov108\tAUGUSTUS\tgene\t10449\t11448\t0.88\t-\t.\tg48308\r\n", "scaffold10029_cov108\t10667\t10669\t1.72515728491565e-56\tscaffold10029_cov108\tAUGUSTUS\tgene\t10449\t11448\t0.88\t-\t.\tg48308\r\n", "scaffold1020_cov97\t83675\t83677\t4.01098081174225e-10\tscaffold1020_cov97\tAUGUSTUS\tgene\t82809\t83808\t0.58\t+\t.\tg12931\r\n", "scaffold1020_cov97\t83677\t83679\t7.36229340437059e-11\tscaffold1020_cov97\tAUGUSTUS\tgene\t82809\t83808\t0.58\t+\t.\tg12931\r\n", "scaffold102916_cov118\t76\t78\t1.84660882871855e-16\tscaffold102916_cov118\tAUGUSTUS\tgene\t1\t694\t0.81\t+\t.\tg59572\r\n", "scaffold10301_cov119\t8889\t8891\t5.0930183168622e-29\tscaffold10301_cov119\tAUGUSTUS\tgene\t8686\t9501\t0.99\t-\t.\tg48602\r\n", "scaffold10301_cov119\t8928\t8930\t2.83417989907966e-40\tscaffold10301_cov119\tAUGUSTUS\tgene\t8686\t9501\t0.99\t-\t.\tg48602\r\n" ] } ], "source": [ "#Check output\n", "!head ../../Pact_methDiff/*paFlanksUpstream" ] }, { "cell_type": "code", "execution_count": 23, "metadata": { "collapsed": false, "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 1301 ../../Pact_methDiff/Pact_RvM_50pMethDiff.bed-paFlanksUpstream\r\n", " 2372 ../../Pact_methDiff/Pact_WvM_50pMethDiff.bed-paFlanksUpstream\r\n", " 3673 total\r\n" ] } ], "source": [ "#Count number of overlaps\n", "!wc -l ../../Pact_methDiff/*paFlanksUpstream" ] }, { "cell_type": "code", "execution_count": 24, "metadata": { "collapsed": true }, "outputs": [], "source": [ "!wc -l ../../Pact_methDiff/*paFlanksUpstream > Pact-DMC-data-paFlanksUpstream-counts.txt" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### 3f. Downstream flanking regions" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### Upset data" ] }, { "cell_type": "code", "execution_count": 25, "metadata": { "collapsed": true, "scrolled": true }, "outputs": [], "source": [ "%%bash\n", "\n", "for f in *CpGs.bed\n", "do\n", " /usr/local/bin/intersectBed \\\n", " -wb \\\n", " -a ${f} \\\n", " -b ../../../genome-feature-files/Pact.GFFannotation.flanks.Downstream.gff \\\n", " > ${f}-paFlanksDownstream\n", "done" ] }, { "cell_type": "code", "execution_count": 26, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "==> Pact_MBD_only_CpGs.bed-paFlanksDownstream <==\r\n", "scaffold10053_cov85\t952\t953\t1\tN/A\tN/A\t1\tscaffold10053_cov85\tAUGUSTUS\tgene\t308\t1307\t0.81\t+\t.\tg48311\r\n", "scaffold10053_cov85\t1136\t1137\t1\tN/A\tN/A\t1\tscaffold10053_cov85\tAUGUSTUS\tgene\t308\t1307\t0.81\t+\t.\tg48311\r\n", "scaffold10053_cov85\t1169\t1170\t1\tN/A\tN/A\t1\tscaffold10053_cov85\tAUGUSTUS\tgene\t308\t1307\t0.81\t+\t.\tg48311\r\n", "scaffold10053_cov85\t1182\t1183\t1\tN/A\tN/A\t1\tscaffold10053_cov85\tAUGUSTUS\tgene\t308\t1307\t0.81\t+\t.\tg48311\r\n", "scaffold10053_cov85\t1219\t1220\t1\tN/A\tN/A\t1\tscaffold10053_cov85\tAUGUSTUS\tgene\t308\t1307\t0.81\t+\t.\tg48311\r\n", "scaffold10053_cov85\t9776\t9777\t1\tN/A\tN/A\t1\tscaffold10053_cov85\tAUGUSTUS\tgene\t9592\t9982\t0.94\t+\t.\tg48313\r\n", "scaffold10064_cov60\t14578\t14579\t1\tN/A\tN/A\t1\tscaffold10064_cov60\tAUGUSTUS\tgene\t13739\t14692\t0.68\t+\t.\tg48322\r\n", "scaffold10064_cov60\t14578\t14579\t1\tN/A\tN/A\t1\tscaffold10064_cov60\tAUGUSTUS\tgene\t13739\t14692\t0.81\t-\t.\tg48323\r\n", "scaffold10064_cov60\t14587\t14588\t1\tN/A\tN/A\t1\tscaffold10064_cov60\tAUGUSTUS\tgene\t13739\t14692\t0.68\t+\t.\tg48322\r\n", "scaffold10064_cov60\t14587\t14588\t1\tN/A\tN/A\t1\tscaffold10064_cov60\tAUGUSTUS\tgene\t13739\t14692\t0.81\t-\t.\tg48323\r\n", "\r\n", "==> Pact_RRBS_MBD_only_CpGs.bed-paFlanksDownstream <==\r\n", "scaffold10185_cov55\t6181\t6182\t1\tN/A\t1\t1\tscaffold10185_cov55\tAUGUSTUS\tgene\t6042\t7041\t1\t-\t.\tg48435\r\n", "scaffold10185_cov55\t6278\t6279\t1\tN/A\t1\t1\tscaffold10185_cov55\tAUGUSTUS\tgene\t6042\t7041\t1\t-\t.\tg48435\r\n", "scaffold1035_cov113\t204021\t204022\t1\tN/A\t1\t1\tscaffold1035_cov113\tAUGUSTUS\tgene\t203984\t204764\t0.85\t+\t.\tg13073\r\n", "scaffold1035_cov113\t204021\t204022\t1\tN/A\t1\t1\tscaffold1035_cov113\tAUGUSTUS\tgene\t203984\t204764\t0.94\t-\t.\tg13074\r\n", "scaffold1045_cov103\t8727\t8728\t1\tN/A\t1\t1\tscaffold1045_cov103\tAUGUSTUS\tgene\t8698\t9016\t0.89\t+\t.\tg13107\r\n", "scaffold10546_cov104\t90762\t90763\t1\tN/A\t1\t1\tscaffold10546_cov104\tAUGUSTUS\tgene\t89860\t90859\t0.8\t+\t.\tg48807\r\n", "scaffold1058_cov94\t3987\t3988\t1\tN/A\t1\t1\tscaffold1058_cov94\tAUGUSTUS\tgene\t3739\t4738\t0.92\t-\t.\tg13277\r\n", "scaffold1058_cov94\t3992\t3993\t1\tN/A\t1\t1\tscaffold1058_cov94\tAUGUSTUS\tgene\t3739\t4738\t0.92\t-\t.\tg13277\r\n", "scaffold10640_cov57\t10891\t10892\t1\tN/A\t1\t1\tscaffold10640_cov57\tAUGUSTUS\tgene\t9924\t10923\t0.63\t-\t.\tg48975\r\n", "scaffold10640_cov57\t10894\t10895\t1\tN/A\t1\t1\tscaffold10640_cov57\tAUGUSTUS\tgene\t9924\t10923\t0.63\t-\t.\tg48975\r\n", "\r\n", "==> Pact_RRBS_only_CpGs.bed-paFlanksDownstream <==\r\n", "scaffold10024_cov87\t3362\t3363\t1\tN/A\t1\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t3030\t4019\t0.75\t+\t.\tg48293\r\n", "scaffold10024_cov87\t3362\t3363\t1\tN/A\t1\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t3030\t4019\t0.89\t-\t.\tg48294\r\n", "scaffold10024_cov87\t3369\t3370\t1\tN/A\t1\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t3030\t4019\t0.75\t+\t.\tg48293\r\n", "scaffold10024_cov87\t3369\t3370\t1\tN/A\t1\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t3030\t4019\t0.89\t-\t.\tg48294\r\n", "scaffold10024_cov87\t3389\t3390\t1\tN/A\t1\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t3030\t4019\t0.75\t+\t.\tg48293\r\n", "scaffold10024_cov87\t3389\t3390\t1\tN/A\t1\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t3030\t4019\t0.89\t-\t.\tg48294\r\n", "scaffold10024_cov87\t13028\t13029\t1\tN/A\t1\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t12162\t13161\t0.26\t+\t.\tg48296\r\n", "scaffold10024_cov87\t13032\t13033\t1\tN/A\t1\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t12162\t13161\t0.26\t+\t.\tg48296\r\n", "scaffold10053_cov85\t11992\t11993\t1\tN/A\t1\tN/A\tscaffold10053_cov85\tAUGUSTUS\tgene\t11111\t12110\t0.58\t+\t.\tg48314\r\n", "scaffold10064_cov60\t597\t598\t1\tN/A\t1\tN/A\tscaffold10064_cov60\tAUGUSTUS\tgene\t586\t1585\t1\t-\t.\tg48316\r\n", "\r\n", "==> Pact_WGBS_MBD_only_CpGs.bed-paFlanksDownstream <==\r\n", "scaffold10024_cov87\t3212\t3213\t1\t1\tN/A\t1\tscaffold10024_cov87\tAUGUSTUS\tgene\t3030\t4019\t0.75\t+\t.\tg48293\r\n", "scaffold10024_cov87\t3212\t3213\t1\t1\tN/A\t1\tscaffold10024_cov87\tAUGUSTUS\tgene\t3030\t4019\t0.89\t-\t.\tg48294\r\n", "scaffold10024_cov87\t3270\t3271\t1\t1\tN/A\t1\tscaffold10024_cov87\tAUGUSTUS\tgene\t3030\t4019\t0.75\t+\t.\tg48293\r\n", "scaffold10024_cov87\t3270\t3271\t1\t1\tN/A\t1\tscaffold10024_cov87\tAUGUSTUS\tgene\t3030\t4019\t0.89\t-\t.\tg48294\r\n", "scaffold10024_cov87\t3296\t3297\t1\t1\tN/A\t1\tscaffold10024_cov87\tAUGUSTUS\tgene\t3030\t4019\t0.75\t+\t.\tg48293\r\n", "scaffold10024_cov87\t3296\t3297\t1\t1\tN/A\t1\tscaffold10024_cov87\tAUGUSTUS\tgene\t3030\t4019\t0.89\t-\t.\tg48294\r\n", "scaffold10024_cov87\t13846\t13847\t1\t1\tN/A\t1\tscaffold10024_cov87\tAUGUSTUS\tgene\t13671\t14670\t0.64\t-\t.\tg48297\r\n", "scaffold10024_cov87\t14085\t14086\t1\t1\tN/A\t1\tscaffold10024_cov87\tAUGUSTUS\tgene\t13671\t14670\t0.64\t-\t.\tg48297\r\n", "scaffold10024_cov87\t14373\t14374\t1\t1\tN/A\t1\tscaffold10024_cov87\tAUGUSTUS\tgene\t13671\t14670\t0.64\t-\t.\tg48297\r\n", "scaffold10024_cov87\t14403\t14404\t1\t1\tN/A\t1\tscaffold10024_cov87\tAUGUSTUS\tgene\t13671\t14670\t0.64\t-\t.\tg48297\r\n", "\r\n", "==> Pact_WGBS_RRBS_MBD_only_CpGs.bed-paFlanksDownstream <==\r\n", "scaffold10024_cov87\t3417\t3418\t1\t1\t1\t1\tscaffold10024_cov87\tAUGUSTUS\tgene\t3030\t4019\t0.75\t+\t.\tg48293\r\n", "scaffold10024_cov87\t3417\t3418\t1\t1\t1\t1\tscaffold10024_cov87\tAUGUSTUS\tgene\t3030\t4019\t0.89\t-\t.\tg48294\r\n", "scaffold10024_cov87\t3427\t3428\t1\t1\t1\t1\tscaffold10024_cov87\tAUGUSTUS\tgene\t3030\t4019\t0.75\t+\t.\tg48293\r\n", "scaffold10024_cov87\t3427\t3428\t1\t1\t1\t1\tscaffold10024_cov87\tAUGUSTUS\tgene\t3030\t4019\t0.89\t-\t.\tg48294\r\n", "scaffold10024_cov87\t3443\t3444\t1\t1\t1\t1\tscaffold10024_cov87\tAUGUSTUS\tgene\t3030\t4019\t0.75\t+\t.\tg48293\r\n", "scaffold10024_cov87\t3443\t3444\t1\t1\t1\t1\tscaffold10024_cov87\tAUGUSTUS\tgene\t3030\t4019\t0.89\t-\t.\tg48294\r\n", "scaffold10024_cov87\t13054\t13055\t1\t1\t1\t1\tscaffold10024_cov87\tAUGUSTUS\tgene\t12162\t13161\t0.26\t+\t.\tg48296\r\n", "scaffold10024_cov87\t13098\t13099\t1\t1\t1\t1\tscaffold10024_cov87\tAUGUSTUS\tgene\t12162\t13161\t0.26\t+\t.\tg48296\r\n", "scaffold10024_cov87\t13116\t13117\t1\t1\t1\t1\tscaffold10024_cov87\tAUGUSTUS\tgene\t12162\t13161\t0.26\t+\t.\tg48296\r\n", "scaffold10024_cov87\t13123\t13124\t1\t1\t1\t1\tscaffold10024_cov87\tAUGUSTUS\tgene\t12162\t13161\t0.26\t+\t.\tg48296\r\n", "\r\n", "==> Pact_WGBS_RRBS_only_CpGs.bed-paFlanksDownstream <==\r\n", "scaffold10024_cov87\t3359\t3360\t1\t1\t1\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t3030\t4019\t0.75\t+\t.\tg48293\r\n", "scaffold10024_cov87\t3359\t3360\t1\t1\t1\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t3030\t4019\t0.89\t-\t.\tg48294\r\n", "scaffold10024_cov87\t3507\t3508\t1\t1\t1\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t3030\t4019\t0.75\t+\t.\tg48293\r\n", "scaffold10024_cov87\t3507\t3508\t1\t1\t1\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t3030\t4019\t0.89\t-\t.\tg48294\r\n", "scaffold10024_cov87\t3584\t3585\t1\t1\t1\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t3030\t4019\t0.75\t+\t.\tg48293\r\n", "scaffold10024_cov87\t3584\t3585\t1\t1\t1\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t3030\t4019\t0.89\t-\t.\tg48294\r\n", "scaffold10024_cov87\t3631\t3632\t1\t1\t1\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t3030\t4019\t0.75\t+\t.\tg48293\r\n", "scaffold10024_cov87\t3631\t3632\t1\t1\t1\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t3030\t4019\t0.89\t-\t.\tg48294\r\n", "scaffold10024_cov87\t3658\t3659\t1\t1\t1\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t3030\t4019\t0.75\t+\t.\tg48293\r\n", "scaffold10024_cov87\t3658\t3659\t1\t1\t1\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t3030\t4019\t0.89\t-\t.\tg48294\r\n", "\r\n", "==> Pact_WGBS_only_CpGs.bed-paFlanksDownstream <==\r\n", "scaffold10024_cov87\t3032\t3033\t1\t1\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t3030\t4019\t0.75\t+\t.\tg48293\r\n", "scaffold10024_cov87\t3032\t3033\t1\t1\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t3030\t4019\t0.89\t-\t.\tg48294\r\n", "scaffold10024_cov87\t3039\t3040\t1\t1\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t3030\t4019\t0.75\t+\t.\tg48293\r\n", "scaffold10024_cov87\t3039\t3040\t1\t1\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t3030\t4019\t0.89\t-\t.\tg48294\r\n", "scaffold10024_cov87\t3095\t3096\t1\t1\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t3030\t4019\t0.75\t+\t.\tg48293\r\n", "scaffold10024_cov87\t3095\t3096\t1\t1\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t3030\t4019\t0.89\t-\t.\tg48294\r\n", "scaffold10024_cov87\t3202\t3203\t1\t1\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t3030\t4019\t0.75\t+\t.\tg48293\r\n", "scaffold10024_cov87\t3202\t3203\t1\t1\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t3030\t4019\t0.89\t-\t.\tg48294\r\n", "scaffold10024_cov87\t3846\t3847\t1\t1\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t3030\t4019\t0.75\t+\t.\tg48293\r\n", "scaffold10024_cov87\t3846\t3847\t1\t1\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t3030\t4019\t0.89\t-\t.\tg48294\r\n", "\r\n", "==> Pact_no_method_CpGs.bed-paFlanksDownstream <==\r\n", "scaffold10024_cov87\t3782\t3783\t1\tN/A\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t3030\t4019\t0.75\t+\t.\tg48293\r\n", "scaffold10024_cov87\t3782\t3783\t1\tN/A\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t3030\t4019\t0.89\t-\t.\tg48294\r\n", "scaffold10024_cov87\t3877\t3878\t1\tN/A\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t3030\t4019\t0.75\t+\t.\tg48293\r\n", "scaffold10024_cov87\t3877\t3878\t1\tN/A\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t3030\t4019\t0.89\t-\t.\tg48294\r\n", "scaffold10024_cov87\t3932\t3933\t1\tN/A\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t3030\t4019\t0.75\t+\t.\tg48293\r\n", "scaffold10024_cov87\t3932\t3933\t1\tN/A\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t3030\t4019\t0.89\t-\t.\tg48294\r\n", "scaffold10024_cov87\t3957\t3958\t1\tN/A\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t3030\t4019\t0.75\t+\t.\tg48293\r\n", "scaffold10024_cov87\t3957\t3958\t1\tN/A\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t3030\t4019\t0.89\t-\t.\tg48294\r\n", "scaffold10024_cov87\t12243\t12244\t1\tN/A\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t12162\t13161\t0.26\t+\t.\tg48296\r\n", "scaffold10024_cov87\t12265\t12266\t1\tN/A\tN/A\tN/A\tscaffold10024_cov87\tAUGUSTUS\tgene\t12162\t13161\t0.26\t+\t.\tg48296\r\n" ] } ], "source": [ "#Check output\n", "!head *CpGs.bed*paFlanksDownstream" ] }, { "cell_type": "code", "execution_count": 27, "metadata": { "collapsed": false, "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 8807 Pact_MBD_only_CpGs.bed-paFlanksDownstream\n", " 1434 Pact_RRBS_MBD_only_CpGs.bed-paFlanksDownstream\n", " 17754 Pact_RRBS_only_CpGs.bed-paFlanksDownstream\n", " 271588 Pact_WGBS_MBD_only_CpGs.bed-paFlanksDownstream\n", " 133060 Pact_WGBS_RRBS_MBD_only_CpGs.bed-paFlanksDownstream\n", " 110052 Pact_WGBS_RRBS_only_CpGs.bed-paFlanksDownstream\n", " 354030 Pact_WGBS_only_CpGs.bed-paFlanksDownstream\n", " 169329 Pact_no_method_CpGs.bed-paFlanksDownstream\n", " 1066054 total\n" ] } ], "source": [ "#Count number of overlaps\n", "!wc -l *CpGs.bed*paFlanksDownstream" ] }, { "cell_type": "code", "execution_count": 28, "metadata": { "collapsed": true }, "outputs": [], "source": [ "!wc -l *CpGs.bed*paFlanksDownstream > Pact-upset-data-paFlanksDownstream-counts.txt" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### Method-associated DMC" ] }, { "cell_type": "code", "execution_count": 29, "metadata": { "collapsed": true, "scrolled": true }, "outputs": [], "source": [ "%%bash\n", "\n", "for f in ../../Pact_methDiff/*bed\n", "do\n", " /usr/local/bin/intersectBed \\\n", " -wb \\\n", " -a ${f} \\\n", " -b ../../../genome-feature-files/Pact.GFFannotation.flanks.Downstream.gff \\\n", " > ${f}-paFlanksDownstream\n", "done" ] }, { "cell_type": "code", "execution_count": 30, "metadata": { "collapsed": false, "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "==> ../../Pact_methDiff/Pact_RvM_50pMethDiff.bed-paFlanksDownstream <==\r\n", "scaffold10029_cov108\t1242\t1244\t4.06057732917881e-51\tscaffold10029_cov108\tAUGUSTUS\tgene\t1241\t1344\t0.97\t+\t.\tg48305\r\n", "scaffold10029_cov108\t1242\t1244\t4.06057732917881e-51\tscaffold10029_cov108\tAUGUSTUS\tgene\t1241\t1344\t0.88\t-\t.\tg48306\r\n", "scaffold10029_cov108\t1242\t1244\t4.06057732917881e-51\tscaffold10029_cov108\tAUGUSTUS\tgene\t1241\t1344\t0.49\t-\t.\tg48307\r\n", "scaffold10029_cov108\t1320\t1322\t1.56116529744743e-58\tscaffold10029_cov108\tAUGUSTUS\tgene\t1241\t1344\t0.97\t+\t.\tg48305\r\n", "scaffold10029_cov108\t1320\t1322\t1.56116529744743e-58\tscaffold10029_cov108\tAUGUSTUS\tgene\t1241\t1344\t0.88\t-\t.\tg48306\r\n", "scaffold10029_cov108\t1320\t1322\t1.56116529744743e-58\tscaffold10029_cov108\tAUGUSTUS\tgene\t1241\t1344\t0.49\t-\t.\tg48307\r\n", "scaffold10629_cov64\t504\t506\t1.09989375168844e-24\tscaffold10629_cov64\tAUGUSTUS\tgene\t502\t523\t0.53\t-\t.\tg48962\r\n", "scaffold1068_cov98\t825600\t825602\t1.60921967575966e-105\tscaffold1068_cov98\tAUGUSTUS\tgene\t824955\t825954\t1\t+\t.\tg13467\r\n", "scaffold1068_cov98\t825642\t825644\t0\tscaffold1068_cov98\tAUGUSTUS\tgene\t824955\t825954\t1\t+\t.\tg13467\r\n", "scaffold1068_cov98\t825651\t825653\t0\tscaffold1068_cov98\tAUGUSTUS\tgene\t824955\t825954\t1\t+\t.\tg13467\r\n", "\r\n", "==> ../../Pact_methDiff/Pact_WvM_50pMethDiff.bed-paFlanksDownstream <==\r\n", "scaffold10029_cov108\t1242\t1244\t1.55592447787434e-140\tscaffold10029_cov108\tAUGUSTUS\tgene\t1241\t1344\t0.97\t+\t.\tg48305\r\n", "scaffold10029_cov108\t1242\t1244\t1.55592447787434e-140\tscaffold10029_cov108\tAUGUSTUS\tgene\t1241\t1344\t0.88\t-\t.\tg48306\r\n", "scaffold10029_cov108\t1242\t1244\t1.55592447787434e-140\tscaffold10029_cov108\tAUGUSTUS\tgene\t1241\t1344\t0.49\t-\t.\tg48307\r\n", "scaffold10029_cov108\t1320\t1322\t1.38897986358164e-116\tscaffold10029_cov108\tAUGUSTUS\tgene\t1241\t1344\t0.97\t+\t.\tg48305\r\n", "scaffold10029_cov108\t1320\t1322\t1.38897986358164e-116\tscaffold10029_cov108\tAUGUSTUS\tgene\t1241\t1344\t0.88\t-\t.\tg48306\r\n", "scaffold10029_cov108\t1320\t1322\t1.38897986358164e-116\tscaffold10029_cov108\tAUGUSTUS\tgene\t1241\t1344\t0.49\t-\t.\tg48307\r\n", "scaffold10029_cov108\t1806\t1808\t3.01906841921698e-22\tscaffold10029_cov108\tAUGUSTUS\tgene\t1789\t1874\t0.97\t+\t.\tg48305\r\n", "scaffold10029_cov108\t1806\t1808\t3.01906841921698e-22\tscaffold10029_cov108\tAUGUSTUS\tgene\t1789\t1874\t0.49\t-\t.\tg48307\r\n", "scaffold10029_cov108\t1824\t1826\t9.54549106886193e-36\tscaffold10029_cov108\tAUGUSTUS\tgene\t1789\t1874\t0.97\t+\t.\tg48305\r\n", "scaffold10029_cov108\t1824\t1826\t9.54549106886193e-36\tscaffold10029_cov108\tAUGUSTUS\tgene\t1789\t1874\t0.49\t-\t.\tg48307\r\n" ] } ], "source": [ "#Check output\n", "!head ../../Pact_methDiff/*paFlanksDownstream" ] }, { "cell_type": "code", "execution_count": 31, "metadata": { "collapsed": false, "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 1349 ../../Pact_methDiff/Pact_RvM_50pMethDiff.bed-paFlanksDownstream\r\n", " 2576 ../../Pact_methDiff/Pact_WvM_50pMethDiff.bed-paFlanksDownstream\r\n", " 3925 total\r\n" ] } ], "source": [ "#Count number of overlaps\n", "!wc -l ../../Pact_methDiff/*paFlanksDownstream" ] }, { "cell_type": "code", "execution_count": 32, "metadata": { "collapsed": true }, "outputs": [], "source": [ "!wc -l ../../Pact_methDiff/*paFlanksDownstream > Pact-DMC-data-paFlanksDownstream-counts.txt" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### 3g. Intergenic" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### Upset data" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "collapsed": true }, "outputs": [], "source": [ "%%bash \n", "\n", "for f in *CpGs.bed\n", "do\n", " /usr/local/bin/intersectBed \\\n", " -wb \\\n", " -a ${f} \\\n", " -b ../../../genome-feature-files/Pact.GFFannotation.intergenic.bed \\\n", " > ${f}-paIntergenic\n", "done" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "collapsed": false, "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "==> Pact_MBD_only_CpGs.bed-paIntergenic <==\r\n", "scaffold100004_cov43\t32\t33\t1\tN/A\tN/A\t1\tscaffold100004_cov43\t0\t474\r\n", "scaffold100009_cov142\t107\t108\t1\tN/A\tN/A\t1\tscaffold100009_cov142\t0\t429\r\n", "scaffold100026_cov65\t763\t764\t1\tN/A\tN/A\t1\tscaffold100026_cov65\t0\t995\r\n", "scaffold100028_cov103\t368\t369\t1\tN/A\tN/A\t1\tscaffold100028_cov103\t0\t642\r\n", "scaffold10002_cov101\t1074\t1075\t1\tN/A\tN/A\t1\tscaffold10002_cov101\t0\t1598\r\n", "scaffold100032_cov90\t312\t313\t1\tN/A\tN/A\t1\tscaffold100032_cov90\t0\t690\r\n", "scaffold100032_cov90\t348\t349\t1\tN/A\tN/A\t1\tscaffold100032_cov90\t0\t690\r\n", "scaffold10006_cov97\t537\t538\t1\tN/A\tN/A\t1\tscaffold10006_cov97\t0\t1017\r\n", "scaffold100076_cov52\t73\t74\t1\tN/A\tN/A\t1\tscaffold100076_cov52\t0\t621\r\n", "scaffold100076_cov52\t98\t99\t1\tN/A\tN/A\t1\tscaffold100076_cov52\t0\t621\r\n", "\r\n", "==> Pact_RRBS_MBD_only_CpGs.bed-paIntergenic <==\r\n", "scaffold100149_cov57\t108\t109\t1\tN/A\t1\t1\tscaffold100149_cov57\t0\t236\r\n", "scaffold100149_cov57\t145\t146\t1\tN/A\t1\t1\tscaffold100149_cov57\t0\t236\r\n", "scaffold100173_cov47\t411\t412\t1\tN/A\t1\t1\tscaffold100173_cov47\t0\t560\r\n", "scaffold100173_cov47\t452\t453\t1\tN/A\t1\t1\tscaffold100173_cov47\t0\t560\r\n", "scaffold100173_cov47\t471\t472\t1\tN/A\t1\t1\tscaffold100173_cov47\t0\t560\r\n", "scaffold100173_cov47\t473\t474\t1\tN/A\t1\t1\tscaffold100173_cov47\t0\t560\r\n", "scaffold100173_cov47\t477\t478\t1\tN/A\t1\t1\tscaffold100173_cov47\t0\t560\r\n", "scaffold100173_cov47\t487\t488\t1\tN/A\t1\t1\tscaffold100173_cov47\t0\t560\r\n", "scaffold100173_cov47\t493\t494\t1\tN/A\t1\t1\tscaffold100173_cov47\t0\t560\r\n", "scaffold100173_cov47\t517\t518\t1\tN/A\t1\t1\tscaffold100173_cov47\t0\t560\r\n", "\r\n", "==> Pact_RRBS_only_CpGs.bed-paIntergenic <==\r\n", "scaffold10001_cov45\t184\t185\t1\tN/A\t1\tN/A\tscaffold10001_cov45\t0\t778\r\n", "scaffold100027_cov81\t540\t541\t1\tN/A\t1\tN/A\tscaffold100027_cov81\t0\t727\r\n", "scaffold100027_cov81\t606\t607\t1\tN/A\t1\tN/A\tscaffold100027_cov81\t0\t727\r\n", "scaffold100036_cov48\t179\t180\t1\tN/A\t1\tN/A\tscaffold100036_cov48\t0\t322\r\n", "scaffold100036_cov48\t201\t202\t1\tN/A\t1\tN/A\tscaffold100036_cov48\t0\t322\r\n", "scaffold100036_cov48\t224\t225\t1\tN/A\t1\tN/A\tscaffold100036_cov48\t0\t322\r\n", "scaffold100036_cov48\t279\t280\t1\tN/A\t1\tN/A\tscaffold100036_cov48\t0\t322\r\n", "scaffold100036_cov48\t283\t284\t1\tN/A\t1\tN/A\tscaffold100036_cov48\t0\t322\r\n", "scaffold100036_cov48\t290\t291\t1\tN/A\t1\tN/A\tscaffold100036_cov48\t0\t322\r\n", "scaffold100057_cov57\t902\t903\t1\tN/A\t1\tN/A\tscaffold100057_cov57\t0\t1351\r\n", "\r\n", "==> Pact_WGBS_MBD_only_CpGs.bed-paIntergenic <==\r\n", "scaffold100003_cov99\t77\t78\t1\t1\tN/A\t1\tscaffold100003_cov99\t0\t1744\r\n", "scaffold100003_cov99\t98\t99\t1\t1\tN/A\t1\tscaffold100003_cov99\t0\t1744\r\n", "scaffold100003_cov99\t112\t113\t1\t1\tN/A\t1\tscaffold100003_cov99\t0\t1744\r\n", "scaffold100003_cov99\t146\t147\t1\t1\tN/A\t1\tscaffold100003_cov99\t0\t1744\r\n", "scaffold100003_cov99\t177\t178\t1\t1\tN/A\t1\tscaffold100003_cov99\t0\t1744\r\n", "scaffold100003_cov99\t231\t232\t1\t1\tN/A\t1\tscaffold100003_cov99\t0\t1744\r\n", "scaffold100003_cov99\t257\t258\t1\t1\tN/A\t1\tscaffold100003_cov99\t0\t1744\r\n", "scaffold100003_cov99\t286\t287\t1\t1\tN/A\t1\tscaffold100003_cov99\t0\t1744\r\n", "scaffold100003_cov99\t902\t903\t1\t1\tN/A\t1\tscaffold100003_cov99\t0\t1744\r\n", "scaffold100003_cov99\t914\t915\t1\t1\tN/A\t1\tscaffold100003_cov99\t0\t1744\r\n", "\r\n", "==> Pact_WGBS_RRBS_MBD_only_CpGs.bed-paIntergenic <==\r\n", "scaffold100004_cov43\t101\t102\t1\t1\t1\t1\tscaffold100004_cov43\t0\t474\r\n", "scaffold100004_cov43\t108\t109\t1\t1\t1\t1\tscaffold100004_cov43\t0\t474\r\n", "scaffold100009_cov142\t181\t182\t1\t1\t1\t1\tscaffold100009_cov142\t0\t429\r\n", "scaffold100009_cov142\t213\t214\t1\t1\t1\t1\tscaffold100009_cov142\t0\t429\r\n", "scaffold100017_cov107\t1006\t1007\t1\t1\t1\t1\tscaffold100017_cov107\t0\t1766\r\n", "scaffold100017_cov107\t1014\t1015\t1\t1\t1\t1\tscaffold100017_cov107\t0\t1766\r\n", "scaffold100017_cov107\t1053\t1054\t1\t1\t1\t1\tscaffold100017_cov107\t0\t1766\r\n", "scaffold100017_cov107\t1083\t1084\t1\t1\t1\t1\tscaffold100017_cov107\t0\t1766\r\n", "scaffold100017_cov107\t1094\t1095\t1\t1\t1\t1\tscaffold100017_cov107\t0\t1766\r\n", "scaffold100017_cov107\t1151\t1152\t1\t1\t1\t1\tscaffold100017_cov107\t0\t1766\r\n", "\r\n", "==> Pact_WGBS_RRBS_only_CpGs.bed-paIntergenic <==\r\n", "scaffold100000_cov51\t110\t111\t1\t1\t1\tN/A\tscaffold100000_cov51\t0\t964\r\n", "scaffold100000_cov51\t113\t114\t1\t1\t1\tN/A\tscaffold100000_cov51\t0\t964\r\n", "scaffold100000_cov51\t206\t207\t1\t1\t1\tN/A\tscaffold100000_cov51\t0\t964\r\n", "scaffold100000_cov51\t214\t215\t1\t1\t1\tN/A\tscaffold100000_cov51\t0\t964\r\n", "scaffold100000_cov51\t237\t238\t1\t1\t1\tN/A\tscaffold100000_cov51\t0\t964\r\n", "scaffold100017_cov107\t1210\t1211\t1\t1\t1\tN/A\tscaffold100017_cov107\t0\t1766\r\n", "scaffold100017_cov107\t1212\t1213\t1\t1\t1\tN/A\tscaffold100017_cov107\t0\t1766\r\n", "scaffold100017_cov107\t1216\t1217\t1\t1\t1\tN/A\tscaffold100017_cov107\t0\t1766\r\n", "scaffold100017_cov107\t1255\t1256\t1\t1\t1\tN/A\tscaffold100017_cov107\t0\t1766\r\n", "scaffold100017_cov107\t1258\t1259\t1\t1\t1\tN/A\tscaffold100017_cov107\t0\t1766\r\n", "\r\n", "==> Pact_WGBS_only_CpGs.bed-paIntergenic <==\r\n", "scaffold100000_cov51\t23\t24\t1\t1\tN/A\tN/A\tscaffold100000_cov51\t0\t964\r\n", "scaffold100000_cov51\t61\t62\t1\t1\tN/A\tN/A\tscaffold100000_cov51\t0\t964\r\n", "scaffold100000_cov51\t69\t70\t1\t1\tN/A\tN/A\tscaffold100000_cov51\t0\t964\r\n", "scaffold100000_cov51\t79\t80\t1\t1\tN/A\tN/A\tscaffold100000_cov51\t0\t964\r\n", "scaffold100000_cov51\t246\t247\t1\t1\tN/A\tN/A\tscaffold100000_cov51\t0\t964\r\n", "scaffold100000_cov51\t252\t253\t1\t1\tN/A\tN/A\tscaffold100000_cov51\t0\t964\r\n", "scaffold100003_cov99\t333\t334\t1\t1\tN/A\tN/A\tscaffold100003_cov99\t0\t1744\r\n", "scaffold100003_cov99\t396\t397\t1\t1\tN/A\tN/A\tscaffold100003_cov99\t0\t1744\r\n", "scaffold100003_cov99\t401\t402\t1\t1\tN/A\tN/A\tscaffold100003_cov99\t0\t1744\r\n", "scaffold100003_cov99\t430\t431\t1\t1\tN/A\tN/A\tscaffold100003_cov99\t0\t1744\r\n", "\r\n", "==> Pact_no_method_CpGs.bed-paIntergenic <==\r\n", "scaffold100000_cov51\t5\t6\t1\tN/A\tN/A\tN/A\tscaffold100000_cov51\t0\t964\r\n", "scaffold100000_cov51\t352\t353\t1\tN/A\tN/A\tN/A\tscaffold100000_cov51\t0\t964\r\n", "scaffold100000_cov51\t371\t372\t1\tN/A\tN/A\tN/A\tscaffold100000_cov51\t0\t964\r\n", "scaffold100000_cov51\t394\t395\t1\tN/A\tN/A\tN/A\tscaffold100000_cov51\t0\t964\r\n", "scaffold100000_cov51\t426\t427\t1\tN/A\tN/A\tN/A\tscaffold100000_cov51\t0\t964\r\n", "scaffold100000_cov51\t445\t446\t1\tN/A\tN/A\tN/A\tscaffold100000_cov51\t0\t964\r\n", "scaffold100000_cov51\t453\t454\t1\tN/A\tN/A\tN/A\tscaffold100000_cov51\t0\t964\r\n", "scaffold100000_cov51\t470\t471\t1\tN/A\tN/A\tN/A\tscaffold100000_cov51\t0\t964\r\n", "scaffold100000_cov51\t472\t473\t1\tN/A\tN/A\tN/A\tscaffold100000_cov51\t0\t964\r\n", "scaffold100000_cov51\t518\t519\t1\tN/A\tN/A\tN/A\tscaffold100000_cov51\t0\t964\r\n" ] } ], "source": [ "#Check output\n", "!head *CpGs.bed*paIntergenic" ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "collapsed": false, "scrolled": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 38172 Pact_MBD_only_CpGs.bed-paIntergenic\n", " 5419 Pact_RRBS_MBD_only_CpGs.bed-paIntergenic\n", " 83994 Pact_RRBS_only_CpGs.bed-paIntergenic\n", " 791839 Pact_WGBS_MBD_only_CpGs.bed-paIntergenic\n", " 446394 Pact_WGBS_RRBS_MBD_only_CpGs.bed-paIntergenic\n", " 331576 Pact_WGBS_RRBS_only_CpGs.bed-paIntergenic\n", " 1082691 Pact_WGBS_only_CpGs.bed-paIntergenic\n", " 1208659 Pact_no_method_CpGs.bed-paIntergenic\n", " 3988744 total\n" ] } ], "source": [ "#Count number of overlaps\n", "!wc -l *CpGs.bed*paIntergenic" ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "collapsed": true }, "outputs": [], "source": [ "!wc -l *CpGs.bed*paIntergenic > Pact-upset-data-paIntergenic-counts.txt" ] }, { "cell_type": "markdown", "metadata": { "collapsed": true }, "source": [ "##### Method-associated DMC" ] }, { "cell_type": "code", "execution_count": 23, "metadata": { "collapsed": true }, "outputs": [], "source": [ "%%bash \n", "\n", "for f in ../../Pact_methDiff/*bed\n", "do\n", " /usr/local/bin/intersectBed \\\n", " -wb \\\n", " -a ${f} \\\n", " -b ../../../genome-feature-files/Pact.GFFannotation.intergenic.bed \\\n", " > ${f}-paIntergenic\n", "done" ] }, { "cell_type": "code", "execution_count": 24, "metadata": { "collapsed": false, "scrolled": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "==> ../../Pact_methDiff/Pact_RvM_50pMethDiff.bed-paIntergenic <==\r\n", "scaffold10006_cov97\t121\t123\t4.50663240548246e-17\tscaffold10006_cov97\t0\t1017\r\n", "scaffold10006_cov97\t173\t175\t1.27003267241539e-17\tscaffold10006_cov97\t0\t1017\r\n", "scaffold100173_cov47\t451\t453\t7.6591683257852e-74\tscaffold100173_cov47\t0\t560\r\n", "scaffold100173_cov47\t472\t474\t1.13567297947311e-76\tscaffold100173_cov47\t0\t560\r\n", "scaffold100202_cov48\t549\t551\t1.94537586282645e-87\tscaffold100202_cov48\t0\t1003\r\n", "scaffold100202_cov48\t553\t555\t3.33672699925078e-88\tscaffold100202_cov48\t0\t1003\r\n", "scaffold100202_cov48\t577\t579\t1.09299192954077e-110\tscaffold100202_cov48\t0\t1003\r\n", "scaffold100202_cov48\t583\t585\t6.02517568178073e-55\tscaffold100202_cov48\t0\t1003\r\n", "scaffold100202_cov48\t591\t593\t2.94227399245393e-58\tscaffold100202_cov48\t0\t1003\r\n", "scaffold100202_cov48\t705\t707\t3.89392107786878e-166\tscaffold100202_cov48\t0\t1003\r\n", "\r\n", "==> ../../Pact_methDiff/Pact_WvM_50pMethDiff.bed-paIntergenic <==\r\n", "scaffold100028_cov103\t625\t627\t6.04502352544367e-11\tscaffold100028_cov103\t0\t642\r\n", "scaffold100045_cov111\t104\t106\t2.80012254928996e-32\tscaffold100045_cov111\t0\t354\r\n", "scaffold100045_cov111\t109\t111\t7.99900370694721e-27\tscaffold100045_cov111\t0\t354\r\n", "scaffold100045_cov111\t144\t146\t2.0545338513345e-16\tscaffold100045_cov111\t0\t354\r\n", "scaffold100045_cov111\t186\t188\t4.30438645265298e-18\tscaffold100045_cov111\t0\t354\r\n", "scaffold100045_cov111\t211\t213\t2.0349055657616e-15\tscaffold100045_cov111\t0\t354\r\n", "scaffold100045_cov111\t214\t216\t1.22149235578891e-14\tscaffold100045_cov111\t0\t354\r\n", "scaffold100065_cov102\t65\t67\t3.65621410314477e-12\tscaffold100065_cov102\t0\t1381\r\n", "scaffold100065_cov102\t113\t115\t3.62713464995723e-16\tscaffold100065_cov102\t0\t1381\r\n", "scaffold10006_cov97\t173\t175\t9.81200364986217e-17\tscaffold10006_cov97\t0\t1017\r\n" ] } ], "source": [ "#Check output\n", "!head ../../Pact_methDiff/*paIntergenic" ] }, { "cell_type": "code", "execution_count": 25, "metadata": { "collapsed": false, "scrolled": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 6553 ../../Pact_methDiff/Pact_RvM_50pMethDiff.bed-paIntergenic\r\n", " 18802 ../../Pact_methDiff/Pact_WvM_50pMethDiff.bed-paIntergenic\r\n", " 25355 total\r\n" ] } ], "source": [ "#Count number of overlaps\n", "!wc -l ../../Pact_methDiff/*paIntergenic" ] }, { "cell_type": "code", "execution_count": 26, "metadata": { "collapsed": true }, "outputs": [], "source": [ "!wc -l ../../Pact_methDiff/*paIntergenic > Pact-DMC-data-paIntergenic-counts.txt" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "anaconda-cloud": {}, "kernelspec": { "display_name": "Python [default]", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.2" } }, "nbformat": 4, "nbformat_minor": 2 }