{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Here we see how many of all the CpGs are captured ..." ] }, { "cell_type": "markdown", "metadata": { "ExecuteTime": { "end_time": "2020-04-03T04:58:54.070909Z", "start_time": "2020-04-03T04:58:54.063291Z" } }, "source": [ "## In combined Mcap\n" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "ExecuteTime": { "end_time": "2020-04-03T04:46:17.072356Z", "start_time": "2020-04-03T04:46:16.927056Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "-rw-r--r--@ 1 sr320 staff 3.6G Mar 29 11:49 ../genome-feature-files/C1_CpG.gff\r\n", "-rw-r--r--@ 1 sr320 staff 2.5G Mar 29 11:12 ../genome-feature-files/Mcap_CpG.gff\r\n", "-rw-r--r--@ 1 sr320 staff 1.0G Mar 29 09:30 ../genome-feature-files/Pact_CpG.gff\r\n" ] } ], "source": [ "!ls -lh ../genome-feature-files/*CpG.gff" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "ExecuteTime": { "end_time": "2020-04-03T04:47:46.424195Z", "start_time": "2020-04-03T04:47:46.254440Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "chrom\tstart\tend\t10\t11\t12\t13\t14\t15\t16\t17\t18\r\n", "1\t3493\t3495\tN/A\tN/A\tN/A\t0.000000\tN/A\t0.000000\tN/A\tN/A\tN/A\r\n", "1\t3518\t3520\tN/A\tN/A\tN/A\t0.000000\tN/A\t0.000000\tN/A\tN/A\tN/A\r\n", "1\t3727\t3729\tN/A\tN/A\tN/A\t0.000000\t0.000000\t8.695652\tN/A\tN/A\tN/A\r\n", "1\t3752\t3754\tN/A\tN/A\tN/A\t0.000000\t0.000000\t0.000000\tN/A\tN/A\tN/A\r\n", "1\t3757\t3759\tN/A\tN/A\tN/A\t0.000000\t0.000000\t0.000000\tN/A\tN/A\tN/A\r\n", "1\t3770\t3772\tN/A\tN/A\tN/A\t0.000000\t0.000000\t0.000000\tN/A\tN/A\tN/A\r\n", "1\t4062\t4064\tN/A\tN/A\t0.000000\tN/A\tN/A\tN/A\tN/A\tN/A\tN/A\r\n", "1\t4069\t4071\tN/A\tN/A\t0.000000\tN/A\tN/A\tN/A\tN/A\tN/A\tN/A\r\n", "1\t4077\t4079\tN/A\tN/A\t0.000000\tN/A\tN/A\tN/A\tN/A\tN/A\tN/A\r\n" ] } ], "source": [ "!head ../analyses/10-unionbedg/Mcap_union_5x.bedgraph" ] }, { "cell_type": "code", "execution_count": 20, "metadata": { "ExecuteTime": { "end_time": "2020-04-03T05:12:49.646792Z", "start_time": "2020-04-03T05:12:48.685752Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 13340269 ../analyses/10-unionbedg/Mcap_union_5x.bedgraph\r\n" ] } ], "source": [ "!wc -l ../analyses/10-unionbedg/Mcap_union_5x.bedgraph" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "ExecuteTime": { "end_time": "2020-04-03T04:48:02.683099Z", "start_time": "2020-04-03T04:48:02.559900Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "##gff-version 2.0\r\n", "##date 2020-03-29\r\n", "##Type DNA 1\r\n", "1\tfuzznuc\tmisc_feature\t37\t38\t2.000\t+\t.\tSequence \"1.1\" ; note \"*pat pattern1\"\r\n", "1\tfuzznuc\tmisc_feature\t90\t91\t2.000\t+\t.\tSequence \"1.2\" ; note \"*pat pattern1\"\r\n", "1\tfuzznuc\tmisc_feature\t121\t122\t2.000\t+\t.\tSequence \"1.3\" ; note \"*pat pattern1\"\r\n", "1\tfuzznuc\tmisc_feature\t132\t133\t2.000\t+\t.\tSequence \"1.4\" ; note \"*pat pattern1\"\r\n", "1\tfuzznuc\tmisc_feature\t153\t154\t2.000\t+\t.\tSequence \"1.5\" ; note \"*pat pattern1\"\r\n", "1\tfuzznuc\tmisc_feature\t170\t171\t2.000\t+\t.\tSequence \"1.6\" ; note \"*pat pattern1\"\r\n", "1\tfuzznuc\tmisc_feature\t220\t221\t2.000\t+\t.\tSequence \"1.7\" ; note \"*pat pattern1\"\r\n" ] } ], "source": [ "!head ../genome-feature-files/Mcap_CpG.gff" ] }, { "cell_type": "code", "execution_count": 21, "metadata": { "ExecuteTime": { "end_time": "2020-04-03T05:15:23.567107Z", "start_time": "2020-04-03T05:14:56.772205Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "28675390\r\n" ] } ], "source": [ "!fgrep -c \"fuzznuc\tmisc_feature\" ../genome-feature-files/Mcap_CpG.gff" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "ExecuteTime": { "end_time": "2020-04-03T05:02:33.325801Z", "start_time": "2020-04-03T05:02:33.164864Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "3043\tfuzznuc\tmisc_feature\t19626\t19627\t2.000\t+\t.\tSequence \"3043.648\" ; note \"*pat pattern1\"\r\n", "3043\tfuzznuc\tmisc_feature\t19637\t19638\t2.000\t+\t.\tSequence \"3043.649\" ; note \"*pat pattern1\"\r\n", "3043\tfuzznuc\tmisc_feature\t19747\t19748\t2.000\t+\t.\tSequence \"3043.650\" ; note \"*pat pattern1\"\r\n", "3043\tfuzznuc\tmisc_feature\t19781\t19782\t2.000\t+\t.\tSequence \"3043.651\" ; note \"*pat pattern1\"\r\n", "3043\tfuzznuc\tmisc_feature\t19818\t19819\t2.000\t+\t.\tSequence \"3043.652\" ; note \"*pat pattern1\"\r\n", "3043\tfuzznuc\tmisc_feature\t19839\t19840\t2.000\t+\t.\tSequence \"3043.653\" ; note \"*pat pattern1\"\r\n", "3043\tfuzznuc\tmisc_feature\t19858\t19859\t2.000\t+\t.\tSequence \"3043.654\" ; note \"*pat pattern1\"\r\n", "3043\tfuzznuc\tmisc_feature\t19879\t19880\t2.000\t+\t.\tSequence \"3043.655\" ; note \"*pat pattern1\"\r\n", "3043\tfuzznuc\tmisc_feature\t20109\t20110\t2.000\t+\t.\tSequence \"3043.656\" ; note \"*pat pattern1\"\r\n", "3043\tfuzznuc\tmisc_feature\t20146\t20147\t2.000\t+\t.\tSequence \"3043.657\" ; note \"*pat pattern1\"\r\n" ] } ], "source": [ "!tail ../genome-feature-files/Mcap_CpG.gff" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# a little intersectbed (overkill)" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "ExecuteTime": { "end_time": "2020-04-03T04:58:20.370013Z", "start_time": "2020-04-03T04:53:03.375933Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 13340268\r\n" ] } ], "source": [ "!/Applications/bioinfo/bedtools2/bin/bedtools \\\n", "intersect \\\n", "-wa \\\n", "-a ../analyses/10-unionbedg/Mcap_union_5x.bedgraph \\\n", "-b ../genome-feature-files/Mcap_CpG.gff | wc -l\n" ] }, { "cell_type": "code", "execution_count": 19, "metadata": { "ExecuteTime": { "end_time": "2020-04-03T05:07:52.075616Z", "start_time": "2020-04-03T05:07:48.953981Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "/bin/sh: 10-Mcap-Canonical-Coverage-Track.ipynb: command not found\r\n" ] } ], "source": [ "!cat ../genome-feature-files/Mcap_CpG.gff | wc -l | 13340268" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "ExecuteTime": { "end_time": "2020-04-03T04:58:54.059168Z", "start_time": "2020-04-03T04:58:24.322100Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "28675390\r\n" ] } ], "source": [ "../analyses/10-unionbedg/Mcap_union_5x.bedgraph" ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "ExecuteTime": { "end_time": "2020-04-03T05:04:34.392116Z", "start_time": "2020-04-03T05:04:06.906711Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "28675390\r\n" ] } ], "source": [ "!fgrep -c \"2.000\" ../genome-feature-files/Mcap_CpG.gff" ] }, { "cell_type": "code", "execution_count": 22, "metadata": { "ExecuteTime": { "end_time": "2020-04-03T05:26:16.497219Z", "start_time": "2020-04-03T05:26:16.474011Z" } }, "outputs": [ { "data": { "text/plain": [ "0.4652166544203932" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "13340269/28675390" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.8" } }, "nbformat": 4, "nbformat_minor": 2 }