{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Characterizing the general methylation landscape" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "In this notebook, I will characterize the general methylation landscape. To characterize CpG methylation, I will use individual samples, as well as a union BEDgraph that concatenates all sample information.\n", "\n", "1. Concatenate coverage information\n", "2. Characterize methylation for each CpG dinucleotide in individual samples and union BEDgraph\n", "2. Determine genomic location of highly methylated, moderately methylated, and lowly CpGs" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 0. Set working directory" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "/Users/yaaminivenkataraman/Documents/ceabigr/code\r\n" ] } ], "source": [ "!pwd" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "/Users/yaaminivenkataraman/Documents/ceabigr/output\n" ] } ], "source": [ "cd ../output/" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "!mkdir methylation-landscape" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "/Users/yaaminivenkataraman/Documents/ceabigr/output/methylation-landscape\n" ] } ], "source": [ "cd methylation-landscape/" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Matplotlib is building the font cache using fc-list. This may take a moment.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "0.25.1\n" ] } ], "source": [ "#Install pandas for this notebook\n", "import pandas as pd\n", "print(pd.__version__)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 1. Obtain sample BEDgraphs" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "--2022-02-21 12:57:32-- https://gannet.fish.washington.edu/seashell/bu-mox/scrubbed/120321-cvBS/\n", "Resolving gannet.fish.washington.edu (gannet.fish.washington.edu)... 128.95.149.52\n", "Connecting to gannet.fish.washington.edu (gannet.fish.washington.edu)|128.95.149.52|:443... connected.\n", "WARNING: cannot verify gannet.fish.washington.edu's certificate, issued by ‘CN=InCommon RSA Server CA,OU=InCommon,O=Internet2,L=Ann Arbor,ST=MI,C=US’:\n", " Unable to locally verify the issuer's authority.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: unspecified [text/html]\n", "Saving to: ‘./index.html.tmp’\n", "\n", "index.html.tmp [ <=> ] 62.99K --.-KB/s in 0.03s \n", "\n", "2022-02-21 12:57:35 (1.87 MB/s) - ‘./index.html.tmp’ saved [64500]\n", "\n", "Loading robots.txt; please ignore errors.\n", "--2022-02-21 12:57:35-- https://gannet.fish.washington.edu/robots.txt\n", "Reusing existing connection to gannet.fish.washington.edu:443.\n", "HTTP request sent, awaiting response... 404 Not Found\n", "2022-02-21 12:57:35 ERROR 404: Not Found.\n", "\n", "Removing ./index.html.tmp since it should be rejected.\n", "\n", "--2022-02-21 12:57:35-- https://gannet.fish.washington.edu/seashell/bu-mox/scrubbed/120321-cvBS/?C=N;O=D\n", "Reusing existing connection to gannet.fish.washington.edu:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: unspecified [text/html]\n", "Saving to: ‘./index.html?C=N;O=D.tmp’\n", "\n", "index.html?C=N;O=D. [ <=> ] 62.99K --.-KB/s in 0.03s \n", "\n", "2022-02-21 12:57:38 (2.04 MB/s) - ‘./index.html?C=N;O=D.tmp’ saved [64500]\n", "\n", "Removing ./index.html?C=N;O=D.tmp since it should be rejected.\n", "\n", "--2022-02-21 12:57:38-- https://gannet.fish.washington.edu/seashell/bu-mox/scrubbed/120321-cvBS/?C=M;O=A\n", "Reusing existing connection to gannet.fish.washington.edu:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: unspecified [text/html]\n", "Saving to: ‘./index.html?C=M;O=A.tmp’\n", "\n", "index.html?C=M;O=A. [ <=> ] 62.99K --.-KB/s in 0.03s \n", "\n", "2022-02-21 12:57:41 (1.86 MB/s) - ‘./index.html?C=M;O=A.tmp’ saved [64500]\n", "\n", "Removing ./index.html?C=M;O=A.tmp since it should be rejected.\n", "\n", "--2022-02-21 12:57:41-- https://gannet.fish.washington.edu/seashell/bu-mox/scrubbed/120321-cvBS/?C=S;O=A\n", "Reusing existing connection to gannet.fish.washington.edu:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: unspecified [text/html]\n", "Saving to: ‘./index.html?C=S;O=A.tmp’\n", "\n", "index.html?C=S;O=A. [ <=> ] 62.99K --.-KB/s in 0.1s \n", "\n", "2022-02-21 12:57:43 (654 KB/s) - ‘./index.html?C=S;O=A.tmp’ saved [64500]\n", "\n", "Removing ./index.html?C=S;O=A.tmp since it should be rejected.\n", "\n", "--2022-02-21 12:57:43-- https://gannet.fish.washington.edu/seashell/bu-mox/scrubbed/120321-cvBS/?C=D;O=A\n", "Reusing existing connection to gannet.fish.washington.edu:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: unspecified [text/html]\n", "Saving to: ‘./index.html?C=D;O=A.tmp’\n", "\n", "index.html?C=D;O=A. [ <=> ] 62.99K --.-KB/s in 0.03s \n", "\n", "2022-02-21 12:57:46 (1.89 MB/s) - ‘./index.html?C=D;O=A.tmp’ saved [64500]\n", "\n", "Removing ./index.html?C=D;O=A.tmp since it should be rejected.\n", "\n", "--2022-02-21 12:57:46-- https://gannet.fish.washington.edu/seashell/bu-mox/scrubbed/120321-cvBS/3F_R1_val_1_5x.bedgraph\n", "Reusing existing connection to gannet.fish.washington.edu:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 373563270 (356M)\n", "Saving to: ‘./3F_R1_val_1_5x.bedgraph’\n", "\n", "3F_R1_val_1_5x.bedg 100%[===================>] 356.26M 34.1MB/s in 11s \n", "\n", "2022-02-21 12:57:57 (32.5 MB/s) - ‘./3F_R1_val_1_5x.bedgraph’ saved [373563270/373563270]\n", "\n", "--2022-02-21 12:57:58-- https://gannet.fish.washington.edu/seashell/bu-mox/scrubbed/120321-cvBS/6M_R1_val_1_5x.bedgraph\n", "Reusing existing connection to gannet.fish.washington.edu:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 351059500 (335M)\n", "Saving to: ‘./6M_R1_val_1_5x.bedgraph’\n", "\n", "6M_R1_val_1_5x.bedg 100%[===================>] 334.80M 30.3MB/s in 16s \n", "\n", "2022-02-21 12:58:14 (20.8 MB/s) - ‘./6M_R1_val_1_5x.bedgraph’ saved [351059500/351059500]\n", "\n", "--2022-02-21 12:58:15-- https://gannet.fish.washington.edu/seashell/bu-mox/scrubbed/120321-cvBS/7M_R1_val_1_5x.bedgraph\n", "Reusing existing connection to gannet.fish.washington.edu:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 362653673 (346M)\n", "Saving to: ‘./7M_R1_val_1_5x.bedgraph’\n", "\n", "7M_R1_val_1_5x.bedg 100%[===================>] 345.85M 20.0MB/s in 16s \n", "\n", "2022-02-21 12:58:31 (21.0 MB/s) - ‘./7M_R1_val_1_5x.bedgraph’ saved [362653673/362653673]\n", "\n", "--2022-02-21 12:58:32-- https://gannet.fish.washington.edu/seashell/bu-mox/scrubbed/120321-cvBS/9M_R1_val_1_5x.bedgraph\n", "Reusing existing connection to gannet.fish.washington.edu:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 357076772 (341M)\n", "Saving to: ‘./9M_R1_val_1_5x.bedgraph’\n", "\n", "9M_R1_val_1_5x.bedg 100%[===================>] 340.53M 32.6MB/s in 11s \n", "\n", "2022-02-21 12:58:44 (30.2 MB/s) - ‘./9M_R1_val_1_5x.bedgraph’ saved [357076772/357076772]\n", "\n", "--2022-02-21 12:58:45-- https://gannet.fish.washington.edu/seashell/bu-mox/scrubbed/120321-cvBS/12M_R1_val_1_5x.bedgraph\n", "Reusing existing connection to gannet.fish.washington.edu:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 376215091 (359M)\n", "Saving to: ‘./12M_R1_val_1_5x.bedgraph’\n", "\n", "12M_R1_val_1_5x.bed 100%[===================>] 358.79M 28.2MB/s in 17s \n", "\n", "2022-02-21 12:59:02 (21.6 MB/s) - ‘./12M_R1_val_1_5x.bedgraph’ saved [376215091/376215091]\n", "\n", "--2022-02-21 12:59:03-- https://gannet.fish.washington.edu/seashell/bu-mox/scrubbed/120321-cvBS/13M_R1_val_1_5x.bedgraph\n", "Reusing existing connection to gannet.fish.washington.edu:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 373670083 (356M)\n", "Saving to: ‘./13M_R1_val_1_5x.bedgraph’\n", "\n", "13M_R1_val_1_5x.bed 100%[===================>] 356.36M 31.0MB/s in 12s \n", "\n", "2022-02-21 12:59:15 (29.2 MB/s) - ‘./13M_R1_val_1_5x.bedgraph’ saved [373670083/373670083]\n", "\n", "--2022-02-21 12:59:16-- https://gannet.fish.washington.edu/seashell/bu-mox/scrubbed/120321-cvBS/16F_R1_val_1_5x.bedgraph\n", "Reusing existing connection to gannet.fish.washington.edu:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 372826428 (356M)\n", "Saving to: ‘./16F_R1_val_1_5x.bedgraph’\n", "\n", "16F_R1_val_1_5x.bed 100%[===================>] 355.55M 30.1MB/s in 13s \n", "\n", "2022-02-21 12:59:29 (26.9 MB/s) - ‘./16F_R1_val_1_5x.bedgraph’ saved [372826428/372826428]\n", "\n", "--2022-02-21 12:59:30-- https://gannet.fish.washington.edu/seashell/bu-mox/scrubbed/120321-cvBS/19F_R1_val_1_5x.bedgraph\n", "Reusing existing connection to gannet.fish.washington.edu:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 372720385 (355M)\n", "Saving to: ‘./19F_R1_val_1_5x.bedgraph’\n", "\n", "19F_R1_val_1_5x.bed 100%[===================>] 355.45M 33.6MB/s in 11s \n", "\n", "2022-02-21 12:59:41 (31.8 MB/s) - ‘./19F_R1_val_1_5x.bedgraph’ saved [372720385/372720385]\n", "\n", "--2022-02-21 12:59:42-- https://gannet.fish.washington.edu/seashell/bu-mox/scrubbed/120321-cvBS/22F_R1_val_1_5x.bedgraph\n", "Reusing existing connection to gannet.fish.washington.edu:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 355172781 (339M)\n", "Saving to: ‘./22F_R1_val_1_5x.bedgraph’\n", "\n", "22F_R1_val_1_5x.bed 100%[===================>] 338.72M 31.9MB/s in 11s \n", "\n", "2022-02-21 12:59:53 (32.0 MB/s) - ‘./22F_R1_val_1_5x.bedgraph’ saved [355172781/355172781]\n", "\n", "--2022-02-21 12:59:53-- https://gannet.fish.washington.edu/seashell/bu-mox/scrubbed/120321-cvBS/23M_R1_val_1_5x.bedgraph\n", "Reusing existing connection to gannet.fish.washington.edu:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 379907684 (362M)\n", "Saving to: ‘./23M_R1_val_1_5x.bedgraph’\n", "\n", "23M_R1_val_1_5x.bed 100%[===================>] 362.31M 32.8MB/s in 12s \n", "\n", "2022-02-21 13:00:04 (31.4 MB/s) - ‘./23M_R1_val_1_5x.bedgraph’ saved [379907684/379907684]\n", "\n", "--2022-02-21 13:00:05-- https://gannet.fish.washington.edu/seashell/bu-mox/scrubbed/120321-cvBS/29F_R1_val_1_5x.bedgraph\n", "Reusing existing connection to gannet.fish.washington.edu:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 364984329 (348M)\n", "Saving to: ‘./29F_R1_val_1_5x.bedgraph’\n", "\n", "29F_R1_val_1_5x.bed 100%[===================>] 348.08M 33.3MB/s in 11s \n", "\n", "2022-02-21 13:00:16 (31.2 MB/s) - ‘./29F_R1_val_1_5x.bedgraph’ saved [364984329/364984329]\n", "\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "--2022-02-21 13:00:17-- https://gannet.fish.washington.edu/seashell/bu-mox/scrubbed/120321-cvBS/31M_R1_val_1_5x.bedgraph\n", "Reusing existing connection to gannet.fish.washington.edu:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 359342062 (343M)\n", "Saving to: ‘./31M_R1_val_1_5x.bedgraph’\n", "\n", "31M_R1_val_1_5x.bed 100%[===================>] 342.69M 14.7MB/s in 16s \n", "\n", "2022-02-21 13:00:33 (21.9 MB/s) - ‘./31M_R1_val_1_5x.bedgraph’ saved [359342062/359342062]\n", "\n", "--2022-02-21 13:00:34-- https://gannet.fish.washington.edu/seashell/bu-mox/scrubbed/120321-cvBS/35F_R1_val_1_5x.bedgraph\n", "Reusing existing connection to gannet.fish.washington.edu:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 367994058 (351M)\n", "Saving to: ‘./35F_R1_val_1_5x.bedgraph’\n", "\n", "35F_R1_val_1_5x.bed 100%[===================>] 350.95M 32.5MB/s in 14s \n", "\n", "2022-02-21 13:00:48 (24.7 MB/s) - ‘./35F_R1_val_1_5x.bedgraph’ saved [367994058/367994058]\n", "\n", "--2022-02-21 13:00:49-- https://gannet.fish.washington.edu/seashell/bu-mox/scrubbed/120321-cvBS/36F_R1_val_1_5x.bedgraph\n", "Reusing existing connection to gannet.fish.washington.edu:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 369911637 (353M)\n", "Saving to: ‘./36F_R1_val_1_5x.bedgraph’\n", "\n", "36F_R1_val_1_5x.bed 100%[===================>] 352.77M 32.4MB/s in 11s \n", "\n", "2022-02-21 13:01:01 (30.8 MB/s) - ‘./36F_R1_val_1_5x.bedgraph’ saved [369911637/369911637]\n", "\n", "--2022-02-21 13:01:02-- https://gannet.fish.washington.edu/seashell/bu-mox/scrubbed/120321-cvBS/39F_R1_val_1_5x.bedgraph\n", "Reusing existing connection to gannet.fish.washington.edu:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 362556125 (346M)\n", "Saving to: ‘./39F_R1_val_1_5x.bedgraph’\n", "\n", "39F_R1_val_1_5x.bed 100%[===================>] 345.76M 33.5MB/s in 11s \n", "\n", "2022-02-21 13:01:13 (31.2 MB/s) - ‘./39F_R1_val_1_5x.bedgraph’ saved [362556125/362556125]\n", "\n", "--2022-02-21 13:01:14-- https://gannet.fish.washington.edu/seashell/bu-mox/scrubbed/120321-cvBS/41F_R1_val_1_5x.bedgraph\n", "Reusing existing connection to gannet.fish.washington.edu:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 336401370 (321M)\n", "Saving to: ‘./41F_R1_val_1_5x.bedgraph’\n", "\n", "41F_R1_val_1_5x.bed 100%[===================>] 320.82M 33.6MB/s in 10s \n", "\n", "2022-02-21 13:01:24 (31.9 MB/s) - ‘./41F_R1_val_1_5x.bedgraph’ saved [336401370/336401370]\n", "\n", "--2022-02-21 13:01:24-- https://gannet.fish.washington.edu/seashell/bu-mox/scrubbed/120321-cvBS/44F_R1_val_1_5x.bedgraph\n", "Reusing existing connection to gannet.fish.washington.edu:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 404043120 (385M)\n", "Saving to: ‘./44F_R1_val_1_5x.bedgraph’\n", "\n", "44F_R1_val_1_5x.bed 100%[===================>] 385.33M 33.6MB/s in 12s \n", "\n", "2022-02-21 13:01:37 (31.0 MB/s) - ‘./44F_R1_val_1_5x.bedgraph’ saved [404043120/404043120]\n", "\n", "--2022-02-21 13:01:38-- https://gannet.fish.washington.edu/seashell/bu-mox/scrubbed/120321-cvBS/48M_R1_val_1_5x.bedgraph\n", "Reusing existing connection to gannet.fish.washington.edu:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 362824035 (346M)\n", "Saving to: ‘./48M_R1_val_1_5x.bedgraph’\n", "\n", "48M_R1_val_1_5x.bed 100%[===================>] 346.02M 33.1MB/s in 11s \n", "\n", "2022-02-21 13:01:49 (31.8 MB/s) - ‘./48M_R1_val_1_5x.bedgraph’ saved [362824035/362824035]\n", "\n", "--2022-02-21 13:01:50-- https://gannet.fish.washington.edu/seashell/bu-mox/scrubbed/120321-cvBS/50F_R1_val_1_5x.bedgraph\n", "Reusing existing connection to gannet.fish.washington.edu:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 368816914 (352M)\n", "Saving to: ‘./50F_R1_val_1_5x.bedgraph’\n", "\n", "50F_R1_val_1_5x.bed 100%[===================>] 351.73M 32.2MB/s in 11s \n", "\n", "2022-02-21 13:02:01 (30.9 MB/s) - ‘./50F_R1_val_1_5x.bedgraph’ saved [368816914/368816914]\n", "\n", "--2022-02-21 13:02:02-- https://gannet.fish.washington.edu/seashell/bu-mox/scrubbed/120321-cvBS/52F_R1_val_1_5x.bedgraph\n", "Reusing existing connection to gannet.fish.washington.edu:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 375031273 (358M)\n", "Saving to: ‘./52F_R1_val_1_5x.bedgraph’\n", "\n", "52F_R1_val_1_5x.bed 100%[===================>] 357.66M 29.9MB/s in 12s \n", "\n", "2022-02-21 13:02:14 (28.9 MB/s) - ‘./52F_R1_val_1_5x.bedgraph’ saved [375031273/375031273]\n", "\n", "--2022-02-21 13:02:15-- https://gannet.fish.washington.edu/seashell/bu-mox/scrubbed/120321-cvBS/53F_R1_val_1_5x.bedgraph\n", "Reusing existing connection to gannet.fish.washington.edu:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 373291367 (356M)\n", "Saving to: ‘./53F_R1_val_1_5x.bedgraph’\n", "\n", "53F_R1_val_1_5x.bed 100%[===================>] 356.00M 31.4MB/s in 12s \n", "\n", "2022-02-21 13:02:27 (29.5 MB/s) - ‘./53F_R1_val_1_5x.bedgraph’ saved [373291367/373291367]\n", "\n", "--2022-02-21 13:02:28-- https://gannet.fish.washington.edu/seashell/bu-mox/scrubbed/120321-cvBS/54F_R1_val_1_5x.bedgraph\n", "Reusing existing connection to gannet.fish.washington.edu:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 375245139 (358M)\n", "Saving to: ‘./54F_R1_val_1_5x.bedgraph’\n", "\n", "54F_R1_val_1_5x.bed 100%[===================>] 357.86M 27.6MB/s in 19s \n", "\n", "2022-02-21 13:02:47 (18.6 MB/s) - ‘./54F_R1_val_1_5x.bedgraph’ saved [375245139/375245139]\n", "\n", "--2022-02-21 13:02:48-- https://gannet.fish.washington.edu/seashell/bu-mox/scrubbed/120321-cvBS/59M_R1_val_1_5x.bedgraph\n", "Reusing existing connection to gannet.fish.washington.edu:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 339839609 (324M)\n", "Saving to: ‘./59M_R1_val_1_5x.bedgraph’\n", "\n", "59M_R1_val_1_5x.bed 100%[===================>] 324.10M 22.9MB/s in 15s \n", "\n", "2022-02-21 13:03:04 (21.2 MB/s) - ‘./59M_R1_val_1_5x.bedgraph’ saved [339839609/339839609]\n", "\n", "--2022-02-21 13:03:04-- https://gannet.fish.washington.edu/seashell/bu-mox/scrubbed/120321-cvBS/64M_R1_val_1_5x.bedgraph\n", "Reusing existing connection to gannet.fish.washington.edu:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 367775339 (351M)\n", "Saving to: ‘./64M_R1_val_1_5x.bedgraph’\n", "\n", "64M_R1_val_1_5x.bed 100%[===================>] 350.74M 28.7MB/s in 17s \n", "\n", "2022-02-21 13:03:21 (21.1 MB/s) - ‘./64M_R1_val_1_5x.bedgraph’ saved [367775339/367775339]\n", "\n", "--2022-02-21 13:03:22-- https://gannet.fish.washington.edu/seashell/bu-mox/scrubbed/120321-cvBS/76F_R1_val_1_5x.bedgraph\n", "Reusing existing connection to gannet.fish.washington.edu:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 363645350 (347M)\n", "Saving to: ‘./76F_R1_val_1_5x.bedgraph’\n", "\n", "76F_R1_val_1_5x.bed 100%[===================>] 346.80M 28.5MB/s in 13s \n", "\n", "2022-02-21 13:03:35 (27.2 MB/s) - ‘./76F_R1_val_1_5x.bedgraph’ saved [363645350/363645350]\n", "\n", "--2022-02-21 13:03:36-- https://gannet.fish.washington.edu/seashell/bu-mox/scrubbed/120321-cvBS/77F_R1_val_1_5x.bedgraph\n", "Reusing existing connection to gannet.fish.washington.edu:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 369385673 (352M)\n", "Saving to: ‘./77F_R1_val_1_5x.bedgraph’\n", "\n", "77F_R1_val_1_5x.bed 100%[===================>] 352.27M 29.7MB/s in 12s \n", "\n", "2022-02-21 13:03:48 (28.8 MB/s) - ‘./77F_R1_val_1_5x.bedgraph’ saved [369385673/369385673]\n", "\n", "--2022-02-21 13:03:49-- https://gannet.fish.washington.edu/seashell/bu-mox/scrubbed/120321-cvBS/?C=N;O=A\n", "Reusing existing connection to gannet.fish.washington.edu:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: unspecified [text/html]\n", "Saving to: ‘./index.html?C=N;O=A.tmp’\n", "\n", "index.html?C=N;O=A. [ <=> ] 62.99K --.-KB/s in 0.01s \n", "\n", "2022-02-21 13:03:52 (6.44 MB/s) - ‘./index.html?C=N;O=A.tmp’ saved [64500]\n", "\n", "Removing ./index.html?C=N;O=A.tmp since it should be rejected.\n", "\n", "--2022-02-21 13:03:52-- https://gannet.fish.washington.edu/seashell/bu-mox/scrubbed/120321-cvBS/?C=M;O=D\n", "Reusing existing connection to gannet.fish.washington.edu:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: unspecified [text/html]\n", "Saving to: ‘./index.html?C=M;O=D.tmp’\n", "\n", "index.html?C=M;O=D. [ <=> ] 62.99K --.-KB/s in 0.04s \n", "\n", "2022-02-21 13:03:55 (1.41 MB/s) - ‘./index.html?C=M;O=D.tmp’ saved [64500]\n", "\n", "Removing ./index.html?C=M;O=D.tmp since it should be rejected.\n", "\n", "--2022-02-21 13:03:55-- https://gannet.fish.washington.edu/seashell/bu-mox/scrubbed/120321-cvBS/?C=S;O=D\n", "Reusing existing connection to gannet.fish.washington.edu:443.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "HTTP request sent, awaiting response... 200 OK\n", "Length: unspecified [text/html]\n", "Saving to: ‘./index.html?C=S;O=D.tmp’\n", "\n", "index.html?C=S;O=D. [ <=> ] 62.99K --.-KB/s in 0.03s \n", "\n", "2022-02-21 13:03:57 (2.03 MB/s) - ‘./index.html?C=S;O=D.tmp’ saved [64500]\n", "\n", "Removing ./index.html?C=S;O=D.tmp since it should be rejected.\n", "\n", "--2022-02-21 13:03:57-- https://gannet.fish.washington.edu/seashell/bu-mox/scrubbed/120321-cvBS/?C=D;O=D\n", "Reusing existing connection to gannet.fish.washington.edu:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: unspecified [text/html]\n", "Saving to: ‘./index.html?C=D;O=D.tmp’\n", "\n", "index.html?C=D;O=D. [ <=> ] 62.99K --.-KB/s in 0.05s \n", "\n", "2022-02-21 13:04:00 (1.22 MB/s) - ‘./index.html?C=D;O=D.tmp’ saved [64500]\n", "\n", "Removing ./index.html?C=D;O=D.tmp since it should be rejected.\n", "\n", "FINISHED --2022-02-21 13:04:00--\n", "Total wall clock time: 6m 28s\n", "Downloaded: 35 files, 8.9G in 5m 39s (26.8 MB/s)\n" ] } ], "source": [ "#Download 5x bedgraphs\n", "!wget -r \\\n", "--no-check-certificate --no-directories --no-parent --reject \"index.html*\" \\\n", "-P . \\\n", "-A \"*5x.bedgraph\" https://gannet.fish.washington.edu/seashell/bu-mox/scrubbed/120321-cvBS/" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "12M_R1_val_1_5x.bedgraph 36F_R1_val_1_5x.bedgraph 54F_R1_val_1_5x.bedgraph\r\n", "13M_R1_val_1_5x.bedgraph 39F_R1_val_1_5x.bedgraph 59M_R1_val_1_5x.bedgraph\r\n", "16F_R1_val_1_5x.bedgraph 3F_R1_val_1_5x.bedgraph 64M_R1_val_1_5x.bedgraph\r\n", "19F_R1_val_1_5x.bedgraph 41F_R1_val_1_5x.bedgraph 6M_R1_val_1_5x.bedgraph\r\n", "22F_R1_val_1_5x.bedgraph 44F_R1_val_1_5x.bedgraph 76F_R1_val_1_5x.bedgraph\r\n", "23M_R1_val_1_5x.bedgraph 48M_R1_val_1_5x.bedgraph 77F_R1_val_1_5x.bedgraph\r\n", "29F_R1_val_1_5x.bedgraph 50F_R1_val_1_5x.bedgraph 7M_R1_val_1_5x.bedgraph\r\n", "31M_R1_val_1_5x.bedgraph 52F_R1_val_1_5x.bedgraph 9M_R1_val_1_5x.bedgraph\r\n", "35F_R1_val_1_5x.bedgraph 53F_R1_val_1_5x.bedgraph\r\n" ] } ], "source": [ "#Check directory for all files\n", "!ls" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "MD5 (12M_R1_val_1_5x.bedgraph) = 2be3d2693ee2b983a98546fdb03ea7c4\n", "MD5 (13M_R1_val_1_5x.bedgraph) = e11bce6266565305b96392d99dfc97e8\n", "MD5 (16F_R1_val_1_5x.bedgraph) = 1b6587ea799136de40b08c6737c84c40\n", "MD5 (19F_R1_val_1_5x.bedgraph) = f37e3564d37446ecdad0bc353e7c389f\n", "MD5 (22F_R1_val_1_5x.bedgraph) = f3555e77e4148e61d749b2c2484d2991\n", "MD5 (23M_R1_val_1_5x.bedgraph) = 510db373f75569ab6cbbfc95d01963bc\n", "MD5 (29F_R1_val_1_5x.bedgraph) = 16a4229fc65000b14c3be02875243279\n", "MD5 (31M_R1_val_1_5x.bedgraph) = dffadff27e431d84856d5eafa3ba330e\n", "MD5 (35F_R1_val_1_5x.bedgraph) = f76ff82c606b28d97033e8582649c29f\n", "MD5 (36F_R1_val_1_5x.bedgraph) = d05f9d0fbd7ddde77772f56cfe6745af\n", "MD5 (39F_R1_val_1_5x.bedgraph) = c2ad5ea91c70896feb644e004b7acdc7\n", "MD5 (3F_R1_val_1_5x.bedgraph) = fa42bb14f17ecf12aa94356fa83d4427\n", "MD5 (41F_R1_val_1_5x.bedgraph) = cccecd107353fcc4e522a5840a54df0a\n", "MD5 (44F_R1_val_1_5x.bedgraph) = bb61884d72c7a55a166976f1352143a9\n", "MD5 (48M_R1_val_1_5x.bedgraph) = bea5502d6a7528fd6852ea1008b2ab32\n", "MD5 (50F_R1_val_1_5x.bedgraph) = 3bc280d9dcfb32db3c1aeab04d2a7283\n", "MD5 (52F_R1_val_1_5x.bedgraph) = f60abd2c7ea067653dd2d5ef6c5b72a7\n", "MD5 (53F_R1_val_1_5x.bedgraph) = 6f67c4d7afbe0674bfa19e1a96285d1b\n", "MD5 (54F_R1_val_1_5x.bedgraph) = 4bb1c7a85eb9f7a3c231aa9543189eef\n", "MD5 (59M_R1_val_1_5x.bedgraph) = f6c1296ac349fb47729ffd9f051e9c3f\n", "MD5 (64M_R1_val_1_5x.bedgraph) = b1d9ae1b9cf95d07f0757bdf68d718ad\n", "MD5 (6M_R1_val_1_5x.bedgraph) = b0e6f06d93e481969e1972b2d1dc7dd1\n", "MD5 (76F_R1_val_1_5x.bedgraph) = 7ad5b9656b0aadb5d252b42222fe0b1c\n", "MD5 (77F_R1_val_1_5x.bedgraph) = e86a040c9face157544f6deda18e1ee0\n", "MD5 (7M_R1_val_1_5x.bedgraph) = 609290ef3255a6ba8e6b51653fdf693e\n", "MD5 (9M_R1_val_1_5x.bedgraph) = 8cfa9c785cbebc5ceef6de40595345cf\n" ] } ], "source": [ "#Obtain md5\n", "!md5 *" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [], "source": [ "%%bash\n", "\n", "for f in *5x.bedgraph\n", "do\n", "/opt/homebrew/bin/sortBed \\\n", "-i ${f} \\\n", "> $(basename ${f%_5x.bedgraph})_5x.sort.bedgraph\n", "done" ] }, { "cell_type": "code", "execution_count": 19, "metadata": { "scrolled": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "12M_R1_val_1_5x.sort.bedgraph 44F_R1_val_1_5x.sort.bedgraph\r\n", "13M_R1_val_1_5x.sort.bedgraph 48M_R1_val_1_5x.sort.bedgraph\r\n", "16F_R1_val_1_5x.sort.bedgraph 50F_R1_val_1_5x.sort.bedgraph\r\n", "19F_R1_val_1_5x.sort.bedgraph 52F_R1_val_1_5x.sort.bedgraph\r\n", "22F_R1_val_1_5x.sort.bedgraph 53F_R1_val_1_5x.sort.bedgraph\r\n", "23M_R1_val_1_5x.sort.bedgraph 54F_R1_val_1_5x.sort.bedgraph\r\n", "29F_R1_val_1_5x.sort.bedgraph 59M_R1_val_1_5x.sort.bedgraph\r\n", "31M_R1_val_1_5x.sort.bedgraph 64M_R1_val_1_5x.sort.bedgraph\r\n", "35F_R1_val_1_5x.sort.bedgraph 6M_R1_val_1_5x.sort.bedgraph\r\n", "36F_R1_val_1_5x.sort.bedgraph 76F_R1_val_1_5x.sort.bedgraph\r\n", "39F_R1_val_1_5x.sort.bedgraph 77F_R1_val_1_5x.sort.bedgraph\r\n", "3F_R1_val_1_5x.sort.bedgraph 7M_R1_val_1_5x.sort.bedgraph\r\n", "41F_R1_val_1_5x.sort.bedgraph 9M_R1_val_1_5x.sort.bedgraph\r\n" ] } ], "source": [ "!ls *sort*" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 2. Concatenate percent methylation information" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 2a. Remove C->T SNPs" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "For each sample, I will use BS-Snper output to change the percent methylation for a C->T SNP to 0." ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "--2022-02-21 13:39:43-- https://gannet.fish.washington.edu/seashell/bu-github/nb-2022/C_virginica/analyses/bsnp01/\n", "Resolving gannet.fish.washington.edu (gannet.fish.washington.edu)... 128.95.149.52\n", "Connecting to gannet.fish.washington.edu (gannet.fish.washington.edu)|128.95.149.52|:443... connected.\n", "WARNING: cannot verify gannet.fish.washington.edu's certificate, issued by ‘CN=InCommon RSA Server CA,OU=InCommon,O=Internet2,L=Ann Arbor,ST=MI,C=US’:\n", " Unable to locally verify the issuer's authority.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: unspecified [text/html]\n", "Saving to: ‘./index.html.tmp’\n", "\n", "index.html.tmp [ <=> ] 32.59K --.-KB/s in 0.02s \n", "\n", "2022-02-21 13:39:45 (1.42 MB/s) - ‘./index.html.tmp’ saved [33377]\n", "\n", "Loading robots.txt; please ignore errors.\n", "--2022-02-21 13:39:45-- https://gannet.fish.washington.edu/robots.txt\n", "Reusing existing connection to gannet.fish.washington.edu:443.\n", "HTTP request sent, awaiting response... 404 Not Found\n", "2022-02-21 13:39:45 ERROR 404: Not Found.\n", "\n", "Removing ./index.html.tmp since it should be rejected.\n", "\n", "--2022-02-21 13:39:45-- https://gannet.fish.washington.edu/seashell/bu-github/nb-2022/C_virginica/analyses/bsnp01/?C=N;O=D\n", "Reusing existing connection to gannet.fish.washington.edu:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: unspecified [text/html]\n", "Saving to: ‘./index.html?C=N;O=D.tmp’\n", "\n", "index.html?C=N;O=D. [ <=> ] 32.59K --.-KB/s in 0.02s \n", "\n", "2022-02-21 13:39:45 (1.36 MB/s) - ‘./index.html?C=N;O=D.tmp’ saved [33377]\n", "\n", "Removing ./index.html?C=N;O=D.tmp since it should be rejected.\n", "\n", "--2022-02-21 13:39:45-- https://gannet.fish.washington.edu/seashell/bu-github/nb-2022/C_virginica/analyses/bsnp01/?C=M;O=A\n", "Reusing existing connection to gannet.fish.washington.edu:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: unspecified [text/html]\n", "Saving to: ‘./index.html?C=M;O=A.tmp’\n", "\n", "index.html?C=M;O=A. [ <=> ] 32.59K --.-KB/s in 0.01s \n", "\n", "2022-02-21 13:39:45 (2.67 MB/s) - ‘./index.html?C=M;O=A.tmp’ saved [33377]\n", "\n", "Removing ./index.html?C=M;O=A.tmp since it should be rejected.\n", "\n", "--2022-02-21 13:39:45-- https://gannet.fish.washington.edu/seashell/bu-github/nb-2022/C_virginica/analyses/bsnp01/?C=S;O=A\n", "Reusing existing connection to gannet.fish.washington.edu:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: unspecified [text/html]\n", "Saving to: ‘./index.html?C=S;O=A.tmp’\n", "\n", "index.html?C=S;O=A. [ <=> ] 32.59K --.-KB/s in 0.01s \n", "\n", "2022-02-21 13:39:45 (2.61 MB/s) - ‘./index.html?C=S;O=A.tmp’ saved [33377]\n", "\n", "Removing ./index.html?C=S;O=A.tmp since it should be rejected.\n", "\n", "--2022-02-21 13:39:45-- https://gannet.fish.washington.edu/seashell/bu-github/nb-2022/C_virginica/analyses/bsnp01/?C=D;O=A\n", "Reusing existing connection to gannet.fish.washington.edu:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: unspecified [text/html]\n", "Saving to: ‘./index.html?C=D;O=A.tmp’\n", "\n", "index.html?C=D;O=A. [ <=> ] 32.59K --.-KB/s in 0.01s \n", "\n", "2022-02-21 13:39:45 (2.66 MB/s) - ‘./index.html?C=D;O=A.tmp’ saved [33377]\n", "\n", "Removing ./index.html?C=D;O=A.tmp since it should be rejected.\n", "\n", "--2022-02-21 13:39:45-- https://gannet.fish.washington.edu/seashell/bu-github/nb-2022/C_virginica/analyses/bsnp01/3F_R1_val_1_bismark_bt2_pe.SNP-results.vcf\n", "Reusing existing connection to gannet.fish.washington.edu:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 465774006 (444M) [text/x-vcard]\n", "Saving to: ‘./3F_R1_val_1_bismark_bt2_pe.SNP-results.vcf’\n", "\n", "3F_R1_val_1_bismark 100%[===================>] 444.20M 27.5MB/s in 21s \n", "\n", "2022-02-21 13:40:05 (21.7 MB/s) - ‘./3F_R1_val_1_bismark_bt2_pe.SNP-results.vcf’ saved [465774006/465774006]\n", "\n", "--2022-02-21 13:40:05-- https://gannet.fish.washington.edu/seashell/bu-github/nb-2022/C_virginica/analyses/bsnp01/6M_R1_val_1_bismark_bt2_pe.SNP-results.vcf\n", "Reusing existing connection to gannet.fish.washington.edu:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 468265377 (447M) [text/x-vcard]\n", "Saving to: ‘./6M_R1_val_1_bismark_bt2_pe.SNP-results.vcf’\n", "\n", "6M_R1_val_1_bismark 100%[===================>] 446.57M 31.5MB/s in 15s \n", "\n", "2022-02-21 13:40:21 (29.4 MB/s) - ‘./6M_R1_val_1_bismark_bt2_pe.SNP-results.vcf’ saved [468265377/468265377]\n", "\n", "--2022-02-21 13:40:21-- https://gannet.fish.washington.edu/seashell/bu-github/nb-2022/C_virginica/analyses/bsnp01/7M_R1_val_1_bismark_bt2_pe.SNP-results.vcf\n", "Reusing existing connection to gannet.fish.washington.edu:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 488134865 (466M) [text/x-vcard]\n", "Saving to: ‘./7M_R1_val_1_bismark_bt2_pe.SNP-results.vcf’\n", "\n", "7M_R1_val_1_bismark 100%[===================>] 465.52M 32.5MB/s in 15s \n", "\n", "2022-02-21 13:40:36 (30.1 MB/s) - ‘./7M_R1_val_1_bismark_bt2_pe.SNP-results.vcf’ saved [488134865/488134865]\n", "\n", "--2022-02-21 13:40:36-- https://gannet.fish.washington.edu/seashell/bu-github/nb-2022/C_virginica/analyses/bsnp01/9M_R1_val_1_bismark_bt2_pe.SNP-results.vcf\n", "Reusing existing connection to gannet.fish.washington.edu:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 467316285 (446M) [text/x-vcard]\n", "Saving to: ‘./9M_R1_val_1_bismark_bt2_pe.SNP-results.vcf’\n", "\n", "9M_R1_val_1_bismark 100%[===================>] 445.67M 30.3MB/s in 14s \n", "\n", "2022-02-21 13:40:50 (31.3 MB/s) - ‘./9M_R1_val_1_bismark_bt2_pe.SNP-results.vcf’ saved [467316285/467316285]\n", "\n", "--2022-02-21 13:40:50-- https://gannet.fish.washington.edu/seashell/bu-github/nb-2022/C_virginica/analyses/bsnp01/12M_R1_val_1_bismark_bt2_pe.SNP-results.vcf\n", "Reusing existing connection to gannet.fish.washington.edu:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 527657541 (503M) [text/x-vcard]\n", "Saving to: ‘./12M_R1_val_1_bismark_bt2_pe.SNP-results.vcf’\n", "\n", "12M_R1_val_1_bismar 100%[===================>] 503.21M 29.9MB/s in 18s \n", "\n", "2022-02-21 13:41:08 (28.1 MB/s) - ‘./12M_R1_val_1_bismark_bt2_pe.SNP-results.vcf’ saved [527657541/527657541]\n", "\n", "--2022-02-21 13:41:08-- https://gannet.fish.washington.edu/seashell/bu-github/nb-2022/C_virginica/analyses/bsnp01/13M_R1_val_1_bismark_bt2_pe.SNP-results.vcf\n", "Reusing existing connection to gannet.fish.washington.edu:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 526112503 (502M) [text/x-vcard]\n", "Saving to: ‘./13M_R1_val_1_bismark_bt2_pe.SNP-results.vcf’\n", "\n", "13M_R1_val_1_bismar 100%[===================>] 501.74M 33.0MB/s in 15s \n", "\n", "2022-02-21 13:41:23 (33.4 MB/s) - ‘./13M_R1_val_1_bismark_bt2_pe.SNP-results.vcf’ saved [526112503/526112503]\n", "\n", "--2022-02-21 13:41:23-- https://gannet.fish.washington.edu/seashell/bu-github/nb-2022/C_virginica/analyses/bsnp01/16F_R1_val_1_bismark_bt2_pe.SNP-results.vcf\n", "Reusing existing connection to gannet.fish.washington.edu:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 525290939 (501M) [text/x-vcard]\n", "Saving to: ‘./16F_R1_val_1_bismark_bt2_pe.SNP-results.vcf’\n", "\n", "16F_R1_val_1_bismar 100%[===================>] 500.96M 33.3MB/s in 15s \n", "\n", "2022-02-21 13:41:39 (32.9 MB/s) - ‘./16F_R1_val_1_bismark_bt2_pe.SNP-results.vcf’ saved [525290939/525290939]\n", "\n", "--2022-02-21 13:41:39-- https://gannet.fish.washington.edu/seashell/bu-github/nb-2022/C_virginica/analyses/bsnp01/19F_R1_val_1_bismark_bt2_pe.SNP-results.vcf\n", "Reusing existing connection to gannet.fish.washington.edu:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 528774660 (504M) [text/x-vcard]\n", "Saving to: ‘./19F_R1_val_1_bismark_bt2_pe.SNP-results.vcf’\n", "\n", "19F_R1_val_1_bismar 100%[===================>] 504.28M 32.4MB/s in 16s \n", "\n", "2022-02-21 13:41:55 (31.5 MB/s) - ‘./19F_R1_val_1_bismark_bt2_pe.SNP-results.vcf’ saved [528774660/528774660]\n", "\n", "--2022-02-21 13:41:55-- https://gannet.fish.washington.edu/seashell/bu-github/nb-2022/C_virginica/analyses/bsnp01/22F_R1_val_1_bismark_bt2_pe.SNP-results.vcf\n", "Reusing existing connection to gannet.fish.washington.edu:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 478216190 (456M) [text/x-vcard]\n", "Saving to: ‘./22F_R1_val_1_bismark_bt2_pe.SNP-results.vcf’\n", "\n", "22F_R1_val_1_bismar 100%[===================>] 456.06M 33.6MB/s in 14s \n", "\n", "2022-02-21 13:42:08 (33.3 MB/s) - ‘./22F_R1_val_1_bismark_bt2_pe.SNP-results.vcf’ saved [478216190/478216190]\n", "\n", "--2022-02-21 13:42:08-- https://gannet.fish.washington.edu/seashell/bu-github/nb-2022/C_virginica/analyses/bsnp01/23M_R1_val_1_bismark_bt2_pe.SNP-results.vcf\n", "Reusing existing connection to gannet.fish.washington.edu:443.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "HTTP request sent, awaiting response... 200 OK\n", "Length: 537520139 (513M) [text/x-vcard]\n", "Saving to: ‘./23M_R1_val_1_bismark_bt2_pe.SNP-results.vcf’\n", "\n", "23M_R1_val_1_bismar 100%[===================>] 512.62M 33.3MB/s in 15s \n", "\n", "2022-02-21 13:42:24 (33.2 MB/s) - ‘./23M_R1_val_1_bismark_bt2_pe.SNP-results.vcf’ saved [537520139/537520139]\n", "\n", "--2022-02-21 13:42:24-- https://gannet.fish.washington.edu/seashell/bu-github/nb-2022/C_virginica/analyses/bsnp01/29F_R1_val_1_bismark_bt2_pe.SNP-results.vcf\n", "Reusing existing connection to gannet.fish.washington.edu:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 493305649 (470M) [text/x-vcard]\n", "Saving to: ‘./29F_R1_val_1_bismark_bt2_pe.SNP-results.vcf’\n", "\n", "29F_R1_val_1_bismar 100%[===================>] 470.45M 33.5MB/s in 14s \n", "\n", "2022-02-21 13:42:38 (33.2 MB/s) - ‘./29F_R1_val_1_bismark_bt2_pe.SNP-results.vcf’ saved [493305649/493305649]\n", "\n", "--2022-02-21 13:42:38-- https://gannet.fish.washington.edu/seashell/bu-github/nb-2022/C_virginica/analyses/bsnp01/31M_R1_val_1_bismark_bt2_pe.SNP-results.vcf\n", "Reusing existing connection to gannet.fish.washington.edu:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 444512354 (424M) [text/x-vcard]\n", "Saving to: ‘./31M_R1_val_1_bismark_bt2_pe.SNP-results.vcf’\n", "\n", "31M_R1_val_1_bismar 100%[===================>] 423.92M 34.1MB/s in 13s \n", "\n", "2022-02-21 13:42:51 (33.7 MB/s) - ‘./31M_R1_val_1_bismark_bt2_pe.SNP-results.vcf’ saved [444512354/444512354]\n", "\n", "--2022-02-21 13:42:51-- https://gannet.fish.washington.edu/seashell/bu-github/nb-2022/C_virginica/analyses/bsnp01/35F_R1_val_1_bismark_bt2_pe.SNP-results.vcf\n", "Reusing existing connection to gannet.fish.washington.edu:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 510692764 (487M) [text/x-vcard]\n", "Saving to: ‘./35F_R1_val_1_bismark_bt2_pe.SNP-results.vcf’\n", "\n", "35F_R1_val_1_bismar 100%[===================>] 487.03M 32.4MB/s in 14s \n", "\n", "2022-02-21 13:43:05 (33.6 MB/s) - ‘./35F_R1_val_1_bismark_bt2_pe.SNP-results.vcf’ saved [510692764/510692764]\n", "\n", "--2022-02-21 13:43:05-- https://gannet.fish.washington.edu/seashell/bu-github/nb-2022/C_virginica/analyses/bsnp01/36F_R1_val_1_bismark_bt2_pe.SNP-results.vcf\n", "Reusing existing connection to gannet.fish.washington.edu:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 512837899 (489M) [text/x-vcard]\n", "Saving to: ‘./36F_R1_val_1_bismark_bt2_pe.SNP-results.vcf’\n", "\n", "36F_R1_val_1_bismar 100%[===================>] 489.08M 33.2MB/s in 15s \n", "\n", "2022-02-21 13:43:20 (33.5 MB/s) - ‘./36F_R1_val_1_bismark_bt2_pe.SNP-results.vcf’ saved [512837899/512837899]\n", "\n", "--2022-02-21 13:43:20-- https://gannet.fish.washington.edu/seashell/bu-github/nb-2022/C_virginica/analyses/bsnp01/39F_R1_val_1_bismark_bt2_pe.SNP-results.vcf\n", "Reusing existing connection to gannet.fish.washington.edu:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 500262786 (477M) [text/x-vcard]\n", "Saving to: ‘./39F_R1_val_1_bismark_bt2_pe.SNP-results.vcf’\n", "\n", "39F_R1_val_1_bismar 100%[===================>] 477.09M 29.6MB/s in 15s \n", "\n", "2022-02-21 13:43:35 (32.2 MB/s) - ‘./39F_R1_val_1_bismark_bt2_pe.SNP-results.vcf’ saved [500262786/500262786]\n", "\n", "--2022-02-21 13:43:35-- https://gannet.fish.washington.edu/seashell/bu-github/nb-2022/C_virginica/analyses/bsnp01/41F_R1_val_1_bismark_bt2_pe.SNP-results.vcf\n", "Reusing existing connection to gannet.fish.washington.edu:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 444419102 (424M) [text/x-vcard]\n", "Saving to: ‘./41F_R1_val_1_bismark_bt2_pe.SNP-results.vcf’\n", "\n", "41F_R1_val_1_bismar 100%[===================>] 423.83M 33.5MB/s in 13s \n", "\n", "2022-02-21 13:43:48 (32.4 MB/s) - ‘./41F_R1_val_1_bismark_bt2_pe.SNP-results.vcf’ saved [444419102/444419102]\n", "\n", "--2022-02-21 13:43:48-- https://gannet.fish.washington.edu/seashell/bu-github/nb-2022/C_virginica/analyses/bsnp01/44F_R1_val_1_bismark_bt2_pe.SNP-results.vcf\n", "Reusing existing connection to gannet.fish.washington.edu:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 625998635 (597M) [text/x-vcard]\n", "Saving to: ‘./44F_R1_val_1_bismark_bt2_pe.SNP-results.vcf’\n", "\n", "44F_R1_val_1_bismar 100%[===================>] 597.00M 33.4MB/s in 18s \n", "\n", "2022-02-21 13:44:06 (33.1 MB/s) - ‘./44F_R1_val_1_bismark_bt2_pe.SNP-results.vcf’ saved [625998635/625998635]\n", "\n", "--2022-02-21 13:44:06-- https://gannet.fish.washington.edu/seashell/bu-github/nb-2022/C_virginica/analyses/bsnp01/48M_R1_val_1_bismark_bt2_pe.SNP-results.vcf\n", "Reusing existing connection to gannet.fish.washington.edu:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 505297722 (482M) [text/x-vcard]\n", "Saving to: ‘./48M_R1_val_1_bismark_bt2_pe.SNP-results.vcf’\n", "\n", "48M_R1_val_1_bismar 100%[===================>] 481.89M 32.7MB/s in 15s \n", "\n", "2022-02-21 13:44:21 (32.8 MB/s) - ‘./48M_R1_val_1_bismark_bt2_pe.SNP-results.vcf’ saved [505297722/505297722]\n", "\n", "--2022-02-21 13:44:21-- https://gannet.fish.washington.edu/seashell/bu-github/nb-2022/C_virginica/analyses/bsnp01/50F_R1_val_1_bismark_bt2_pe.SNP-results.vcf\n", "Reusing existing connection to gannet.fish.washington.edu:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 498023659 (475M) [text/x-vcard]\n", "Saving to: ‘./50F_R1_val_1_bismark_bt2_pe.SNP-results.vcf’\n", "\n", "50F_R1_val_1_bismar 100%[===================>] 474.95M 31.8MB/s in 17s \n", "\n", "2022-02-21 13:44:37 (28.7 MB/s) - ‘./50F_R1_val_1_bismark_bt2_pe.SNP-results.vcf’ saved [498023659/498023659]\n", "\n", "--2022-02-21 13:44:37-- https://gannet.fish.washington.edu/seashell/bu-github/nb-2022/C_virginica/analyses/bsnp01/52F_R1_val_1_bismark_bt2_pe.SNP-results.vcf\n", "Reusing existing connection to gannet.fish.washington.edu:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 524420425 (500M) [text/x-vcard]\n", "Saving to: ‘./52F_R1_val_1_bismark_bt2_pe.SNP-results.vcf’\n", "\n", "52F_R1_val_1_bismar 100%[===================>] 500.13M 31.5MB/s in 16s \n", "\n", "2022-02-21 13:44:54 (30.5 MB/s) - ‘./52F_R1_val_1_bismark_bt2_pe.SNP-results.vcf’ saved [524420425/524420425]\n", "\n", "--2022-02-21 13:44:54-- https://gannet.fish.washington.edu/seashell/bu-github/nb-2022/C_virginica/analyses/bsnp01/53F_R1_val_1_bismark_bt2_pe.SNP-results.vcf\n", "Reusing existing connection to gannet.fish.washington.edu:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 524395166 (500M) [text/x-vcard]\n", "Saving to: ‘./53F_R1_val_1_bismark_bt2_pe.SNP-results.vcf’\n", "\n", "53F_R1_val_1_bismar 100%[===================>] 500.10M 31.6MB/s in 16s \n", "\n", "2022-02-21 13:45:10 (31.8 MB/s) - ‘./53F_R1_val_1_bismark_bt2_pe.SNP-results.vcf’ saved [524395166/524395166]\n", "\n", "--2022-02-21 13:45:10-- https://gannet.fish.washington.edu/seashell/bu-github/nb-2022/C_virginica/analyses/bsnp01/54F_R1_val_1_bismark_bt2_pe.SNP-results.vcf\n", "Reusing existing connection to gannet.fish.washington.edu:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 528859571 (504M) [text/x-vcard]\n", "Saving to: ‘./54F_R1_val_1_bismark_bt2_pe.SNP-results.vcf’\n", "\n", "54F_R1_val_1_bismar 100%[===================>] 504.36M 33.1MB/s in 16s \n", "\n", "2022-02-21 13:45:26 (31.4 MB/s) - ‘./54F_R1_val_1_bismark_bt2_pe.SNP-results.vcf’ saved [528859571/528859571]\n", "\n", "--2022-02-21 13:45:26-- https://gannet.fish.washington.edu/seashell/bu-github/nb-2022/C_virginica/analyses/bsnp01/59M_R1_val_1_bismark_bt2_pe.SNP-results.vcf\n", "Reusing existing connection to gannet.fish.washington.edu:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 458813083 (438M) [text/x-vcard]\n", "Saving to: ‘./59M_R1_val_1_bismark_bt2_pe.SNP-results.vcf’\n", "\n", "59M_R1_val_1_bismar 100%[===================>] 437.56M 33.5MB/s in 13s \n", "\n", "2022-02-21 13:45:39 (33.4 MB/s) - ‘./59M_R1_val_1_bismark_bt2_pe.SNP-results.vcf’ saved [458813083/458813083]\n", "\n", "--2022-02-21 13:45:39-- https://gannet.fish.washington.edu/seashell/bu-github/nb-2022/C_virginica/analyses/bsnp01/64M_R1_val_1_bismark_bt2_pe.SNP-results.vcf\n", "Reusing existing connection to gannet.fish.washington.edu:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 503807460 (480M) [text/x-vcard]\n", "Saving to: ‘./64M_R1_val_1_bismark_bt2_pe.SNP-results.vcf’\n", "\n", "64M_R1_val_1_bismar 100%[===================>] 480.47M 34.4MB/s in 14s \n", "\n", "2022-02-21 13:45:53 (33.2 MB/s) - ‘./64M_R1_val_1_bismark_bt2_pe.SNP-results.vcf’ saved [503807460/503807460]\n", "\n", "--2022-02-21 13:45:53-- https://gannet.fish.washington.edu/seashell/bu-github/nb-2022/C_virginica/analyses/bsnp01/76F_R1_val_1_bismark_bt2_pe.SNP-results.vcf\n", "Reusing existing connection to gannet.fish.washington.edu:443.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "HTTP request sent, awaiting response... 200 OK\n", "Length: 495489563 (473M) [text/x-vcard]\n", "Saving to: ‘./76F_R1_val_1_bismark_bt2_pe.SNP-results.vcf’\n", "\n", "76F_R1_val_1_bismar 100%[===================>] 472.54M 33.5MB/s in 14s \n", "\n", "2022-02-21 13:46:08 (33.2 MB/s) - ‘./76F_R1_val_1_bismark_bt2_pe.SNP-results.vcf’ saved [495489563/495489563]\n", "\n", "--2022-02-21 13:46:08-- https://gannet.fish.washington.edu/seashell/bu-github/nb-2022/C_virginica/analyses/bsnp01/77F_R1_val_1_bismark_bt2_pe.SNP-results.vcf\n", "Reusing existing connection to gannet.fish.washington.edu:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 518615965 (495M) [text/x-vcard]\n", "Saving to: ‘./77F_R1_val_1_bismark_bt2_pe.SNP-results.vcf’\n", "\n", "77F_R1_val_1_bismar 100%[===================>] 494.59M 32.6MB/s in 15s \n", "\n", "2022-02-21 13:46:23 (32.1 MB/s) - ‘./77F_R1_val_1_bismark_bt2_pe.SNP-results.vcf’ saved [518615965/518615965]\n", "\n", "--2022-02-21 13:46:23-- https://gannet.fish.washington.edu/seashell/bu-github/nb-2022/C_virginica/analyses/bsnp01/?C=N;O=A\n", "Reusing existing connection to gannet.fish.washington.edu:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: unspecified [text/html]\n", "Saving to: ‘./index.html?C=N;O=A.tmp’\n", "\n", "index.html?C=N;O=A. [ <=> ] 32.59K --.-KB/s in 0s \n", "\n", "2022-02-21 13:46:23 (75.4 MB/s) - ‘./index.html?C=N;O=A.tmp’ saved [33377]\n", "\n", "Removing ./index.html?C=N;O=A.tmp since it should be rejected.\n", "\n", "--2022-02-21 13:46:23-- https://gannet.fish.washington.edu/seashell/bu-github/nb-2022/C_virginica/analyses/bsnp01/?C=M;O=D\n", "Reusing existing connection to gannet.fish.washington.edu:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: unspecified [text/html]\n", "Saving to: ‘./index.html?C=M;O=D.tmp’\n", "\n", "index.html?C=M;O=D. [ <=> ] 32.59K --.-KB/s in 0s \n", "\n", "2022-02-21 13:46:23 (95.6 MB/s) - ‘./index.html?C=M;O=D.tmp’ saved [33377]\n", "\n", "Removing ./index.html?C=M;O=D.tmp since it should be rejected.\n", "\n", "--2022-02-21 13:46:23-- https://gannet.fish.washington.edu/seashell/bu-github/nb-2022/C_virginica/analyses/bsnp01/?C=S;O=D\n", "Reusing existing connection to gannet.fish.washington.edu:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: unspecified [text/html]\n", "Saving to: ‘./index.html?C=S;O=D.tmp’\n", "\n", "index.html?C=S;O=D. [ <=> ] 32.59K --.-KB/s in 0s \n", "\n", "2022-02-21 13:46:23 (100 MB/s) - ‘./index.html?C=S;O=D.tmp’ saved [33377]\n", "\n", "Removing ./index.html?C=S;O=D.tmp since it should be rejected.\n", "\n", "--2022-02-21 13:46:23-- https://gannet.fish.washington.edu/seashell/bu-github/nb-2022/C_virginica/analyses/bsnp01/?C=D;O=D\n", "Reusing existing connection to gannet.fish.washington.edu:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: unspecified [text/html]\n", "Saving to: ‘./index.html?C=D;O=D.tmp’\n", "\n", "index.html?C=D;O=D. [ <=> ] 32.59K --.-KB/s in 0s \n", "\n", "2022-02-21 13:46:23 (103 MB/s) - ‘./index.html?C=D;O=D.tmp’ saved [33377]\n", "\n", "Removing ./index.html?C=D;O=D.tmp since it should be rejected.\n", "\n", "FINISHED --2022-02-21 13:46:23--\n", "Total wall clock time: 6m 40s\n", "Downloaded: 35 files, 12G in 6m 37s (31.5 MB/s)\n" ] } ], "source": [ "#Download 5x SNP\n", "!wget -r \\\n", "--no-check-certificate --no-directories --no-parent --reject \"index.html*\" \\\n", "-P . \\\n", "-A \"*vcf\" https://gannet.fish.washington.edu/seashell/bu-github/nb-2022/C_virginica/analyses/bsnp01/" ] }, { "cell_type": "code", "execution_count": 40, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "NC_035780.1\t32910\t.\tC\tT\t1000\tPASS\tDP=23;ADF=0,0;ADR=0,23;AD=0,23;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:23:0,0:0,23:0,23:0,0,21,0,0,23,0,0:0,0,37,0,0,36,0,0:0.000,1.000\n", "NC_035780.1\t80703\t.\tC\tT\t1000\tPASS\tDP=23;ADF=0,0;ADR=0,23;AD=0,23;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:23:0,0:0,23:0,23:0,0,10,0,0,23,0,0:0,0,36,0,0,36,0,0:0.000,1.000\n", "NC_035780.1\t89426\t.\tC\tT\t4\tLow\tDP=1;ADF=0,0;ADR=0,1;AD=0,1;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t1/1:1:0,0:0,1:0,1:0,0,0,0,0,1,0,0:0,0,0,0,0,37,0,0:0.000,1.000\n", "NC_035780.1\t90833\t.\tC\tT\t1000\tPASS\tDP=20;ADF=0,0;ADR=0,20;AD=0,20;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:20:0,0:0,20:0,20:0,0,36,0,0,20,0,0:0,0,37,0,0,36,0,0:0.000,1.000\n", "NC_035780.1\t109517\t.\tC\tT\t1000\tPASS\tDP=17;ADF=0,0;ADR=0,17;AD=0,17;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:17:0,0:0,17:0,17:0,0,29,0,0,17,0,0:0,0,37,0,0,36,0,0:0.000,1.000\n", "NC_035780.1\t110152\t.\tC\tT\t1000\tPASS\tDP=30;ADF=0,0;ADR=2,28;AD=2,28;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:30:0,0:2,28:2,28:0,0,12,0,0,28,2,0:0,0,35,0,0,37,37,0:0.067,0.933\n", "NC_035780.1\t124935\t.\tC\tT\t82\tPASS\tDP=11;ADF=0,0;ADR=0,11;AD=0,11;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:11:0,0:0,11:0,11:0,0,3,0,0,11,0,0:0,0,37,0,0,36,0,0:0.000,1.000\n", "NC_035780.1\t126029\t.\tC\tT\t154\tPASS\tDP=6;ADF=0,0;ADR=0,6;AD=0,6;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:6:0,0:0,6:0,6:0,0,13,0,0,6,0,0:0,0,36,0,0,37,0,0:0.000,1.000\n", "NC_035780.1\t126553\t.\tC\tT\t1000\tPASS\tDP=48;ADF=0,0;ADR=25,23;AD=25,23;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:48:0,0:25,23:25,23:0,0,54,0,0,23,25,0:0,0,36,0,0,35,37,0:0.521,0.479\n", "NC_035780.1\t172994\t.\tC\tT\t1000\tPASS\tDP=10;ADF=0,0;ADR=0,10;AD=0,10;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:10:0,0:0,10:0,10:0,0,7,0,0,10,0,0:0,0,37,0,0,36,0,0:0.000,1.000\n", "Error: line number 2850659 of file 3F_R1_val_1_bismark_bt2_pe.SNP-results.vcf has 1 fields, but 10 were expected.\n" ] } ], "source": [ "!{bedtoolsDirectory}intersectBed \\\n", "-u \\\n", "-a 3F_R1_val_1_bismark_bt2_pe.SNP-results.vcf \\\n", "-b 3F_R1_val_1_5x.sort.bedgraph \\\n", "| grep \"C\tT\" | head" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 2b. Create a union BEDgraph" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "I will use `unionBedGraphs` to concatenate information for all loci across samples." ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "bedtoolsDirectory = \"/opt/homebrew/bin/\"" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\r\n", "Tool: bedtools unionbedg (aka unionBedGraphs)\r\n", "Version: v2.30.0\r\n", "Summary: Combines multiple BedGraph files into a single file,\r\n", "\t allowing coverage comparisons between them.\r\n", "\r\n", "Usage: bedtools unionbedg [OPTIONS] -i FILE1 FILE2 .. FILEn\r\n", "\t Assumes that each BedGraph file is sorted by chrom/start \r\n", "\t and that the intervals in each are non-overlapping.\r\n", "\r\n", "Options: \r\n", "\t-header\t\tPrint a header line.\r\n", "\t\t\t(chrom/start/end + names of each file).\r\n", "\r\n", "\t-names\t\tA list of names (one/file) to describe each file in -i.\r\n", "\t\t\tThese names will be printed in the header line.\r\n", "\r\n", "\t-g\t\tUse genome file to calculate empty regions.\r\n", "\t\t\t- STRING.\r\n", "\r\n", "\t-empty\t\tReport empty regions (i.e., start/end intervals w/o\r\n", "\t\t\tvalues in all files).\r\n", "\t\t\t- Requires the '-g FILE' parameter.\r\n", "\r\n", "\t-filler TEXT\tUse TEXT when representing intervals having no value.\r\n", "\t\t\t- Default is '0', but you can use 'N/A' or any text.\r\n", "\r\n", "\t-examples\tShow detailed usage examples.\r\n", "\r\n" ] } ], "source": [ "!{bedtoolsDirectory}unionBedGraphs -h" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [], "source": [ "#Create union BEDgraph from sorted files\n", "#Include a header\n", "#Use N/A when there is no data for a CpG in a sample\n", "#Define sample IDs\n", "#Use sorted bedgraphs\n", "#Save output\n", "!{bedtoolsDirectory}unionBedGraphs \\\n", "-header \\\n", "-filler N/A \\\n", "-names 12 13 16 19 22 23 29 31 35 36 39 3 41 44 48 50 52 53 54 59 64 6 76 77 7 9 \\\n", "-i \\\n", "*5x.sort.bedgraph \\\n", "> union_5x.bedgraph" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "chrom\tstart\tend\t12\t13\t16\t19\t22\t23\t29\t31\t35\t36\t39\t3\t41\t44\t48\t50\t52\t53\t54\t59\t64\t6\t76\t77\t7\t9\n", "NC_007175.2\t48\t50\t0.000000\t0.000000\t1.923077\t0.731452\t1.015228\t0.000000\t1.444623\t3.125000\t0.844511\t1.699182\t0.541339\t1.694915\t1.928375\t2.136076\t1.086957\t1.672640\t1.806240\t1.100917\t1.780694\t0.000000\t3.973510\t0.653595\t0.682057\t1.661475\t3.750000\t1.754386\n", "NC_007175.2\t50\t52\t0.000000\t0.000000\t1.626016\t0.733855\t1.507937\t0.000000\t1.349325\t1.470588\t0.600462\t1.700880\t0.599908\t1.500000\t2.570694\t1.327434\t1.036269\t1.874311\t1.371951\t1.069218\t1.842105\t0.000000\t3.797468\t0.632911\t0.442478\t1.439539\t3.488372\t1.666667\n", "NC_007175.2\t87\t89\t1.169591\t1.293103\t1.045857\t0.286907\t0.789771\t0.990099\t0.859599\t0.671141\t0.666349\t0.813008\t0.444115\t1.284875\t0.800915\t1.361796\t0.245700\t0.959596\t0.852273\t0.758853\t0.866927\t0.000000\t0.319489\t0.666667\t0.208008\t1.021477\t0.478469\t0.000000\n", "NC_007175.2\t146\t148\t1.261830\t0.461894\t1.502146\t0.684369\t1.081187\t0.819672\t1.183206\t0.332226\t0.721028\t1.522344\t0.562023\t0.995025\t1.548541\t1.321760\t0.383142\t1.619645\t1.306458\t1.210914\t0.924855\t0.000000\t2.027027\t0.702988\t0.475325\t1.308017\t1.566580\t0.456621\n", "NC_007175.2\t192\t194\t1.129944\t1.271186\t1.726908\t0.691776\t1.094563\t1.955990\t1.097734\t0.303951\t0.996997\t1.613626\t0.538915\t1.198820\t1.716501\t1.497326\t0.907029\t1.575555\t1.486346\t1.194200\t0.896287\t0.476190\t1.369863\t0.161031\t0.558659\t1.286383\t2.195122\t0.840336\n", "NC_007175.2\t245\t247\t0.829876\t0.550964\t1.228250\t0.414110\t0.794148\t0.626959\t1.226456\t1.321586\t0.824253\t1.138753\t0.516834\t1.106095\t1.192146\t1.287830\t0.891530\t1.029454\t1.357658\t0.887178\t0.891720\t1.986755\t1.195219\t0.451467\t0.596787\t1.074455\t0.993377\t0.609756\n", "NC_007175.2\t256\t258\t0.738007\t0.496278\t1.417467\t0.503960\t0.938518\t0.877193\t0.672202\t0.404858\t0.983946\t1.295966\t0.583658\t1.337793\t1.944793\t1.562191\t0.405954\t1.254246\t1.390498\t1.110242\t0.780142\t1.111111\t2.083333\t0.202429\t0.662633\t1.096523\t0.282486\t1.117318\n", "NC_007175.2\t263\t265\t1.365188\t0.917431\t1.681034\t0.512963\t1.130356\t1.133144\t0.644183\t0.000000\t0.755699\t1.164596\t0.631720\t1.174831\t1.623572\t1.444653\t0.527704\t1.434933\t1.391432\t0.840336\t0.830341\t1.058201\t1.827243\t0.193424\t0.516615\t1.148459\t0.555556\t0.537634\n", "NC_007175.2\t265\t267\t0.337838\t0.453515\t1.534527\t0.494302\t1.063255\t1.111111\t0.748783\t0.763359\t1.026895\t1.370074\t0.717227\t1.159363\t1.255230\t1.520208\t0.651890\t1.445724\t1.271340\t1.008005\t0.890869\t1.595745\t1.495017\t0.382409\t0.470719\t1.058348\t0.274725\t0.000000\n", " 13563080 union_5x.bedgraph\n" ] } ], "source": [ "#Check output\n", "!head union_5x.bedgraph\n", "!wc -l union_5x.bedgraph" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 2b. Manipulate with `pandas`" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
chromstartend12131619222329...52535459646767779
0NC_007175.248500.0000000.0000001.9230770.7314521.0152280.0000001.444623...1.8062401.1009171.7806940.000003.9735100.6535950.6820571.6614753.7500001.754386
1NC_007175.250520.0000000.0000001.6260160.7338551.5079370.0000001.349325...1.3719511.0692181.8421050.000003.7974680.6329110.4424781.4395393.4883721.666667
2NC_007175.287891.1695911.2931031.0458570.2869070.7897710.9900990.859599...0.8522730.7588530.8669270.000000.3194890.6666670.2080081.0214770.4784690.000000
3NC_007175.21461481.2618300.4618941.5021460.6843691.0811870.8196721.183206...1.3064581.2109140.9248550.000002.0270270.7029880.4753251.3080171.5665800.456621
4NC_007175.21921941.1299441.2711861.7269080.6917761.0945631.9559901.097734...1.4863461.1942000.8962870.476191.3698630.1610310.5586591.2863832.1951220.840336
\n", "

5 rows × 29 columns

\n", "
" ], "text/plain": [ " chrom start end 12 13 16 19 22 \\\n", "0 NC_007175.2 48 50 0.000000 0.000000 1.923077 0.731452 1.015228 \n", "1 NC_007175.2 50 52 0.000000 0.000000 1.626016 0.733855 1.507937 \n", "2 NC_007175.2 87 89 1.169591 1.293103 1.045857 0.286907 0.789771 \n", "3 NC_007175.2 146 148 1.261830 0.461894 1.502146 0.684369 1.081187 \n", "4 NC_007175.2 192 194 1.129944 1.271186 1.726908 0.691776 1.094563 \n", "\n", " 23 29 ... 52 53 54 59 64 \\\n", "0 0.000000 1.444623 ... 1.806240 1.100917 1.780694 0.00000 3.973510 \n", "1 0.000000 1.349325 ... 1.371951 1.069218 1.842105 0.00000 3.797468 \n", "2 0.990099 0.859599 ... 0.852273 0.758853 0.866927 0.00000 0.319489 \n", "3 0.819672 1.183206 ... 1.306458 1.210914 0.924855 0.00000 2.027027 \n", "4 1.955990 1.097734 ... 1.486346 1.194200 0.896287 0.47619 1.369863 \n", "\n", " 6 76 77 7 9 \n", "0 0.653595 0.682057 1.661475 3.750000 1.754386 \n", "1 0.632911 0.442478 1.439539 3.488372 1.666667 \n", "2 0.666667 0.208008 1.021477 0.478469 0.000000 \n", "3 0.702988 0.475325 1.308017 1.566580 0.456621 \n", "4 0.161031 0.558659 1.286383 2.195122 0.840336 \n", "\n", "[5 rows x 29 columns]" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#Import union data into pandas\n", "#Check head\n", "df = pd.read_table(\"union_5x.bedgraph\")\n", "df.head(5)" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
chromstartend12131619222329...535459646767779total
13563069NC_035789.132649732326497340.00.00.0NaNNaNNaN0.0...0.0NaN0.0NaN0.00.00.00.0NaN0.000000
13563070NC_035789.132649736326497380.00.00.0NaNNaNNaN0.0...0.0NaN0.0NaN0.00.00.00.0NaN0.000000
13563071NC_035789.13264979932649801NaN0.0NaNNaNNaNNaNNaN...NaN0.0NaNNaNNaNNaNNaNNaNNaN0.000000
13563072NC_035789.132649876326498780.00.00.0NaN0.0NaN0.0...NaN0.00.0NaN0.00.00.00.0NaN0.000000
13563073NC_035789.132649885326498870.00.00.0NaN0.0NaN0.0...NaN0.00.00.00.00.00.00.0NaN0.000000
13563074NC_035789.132649895326498970.00.00.0NaN0.0NaN0.0...NaN0.00.00.00.00.00.00.0NaN0.000000
13563075NC_035789.132649930326499320.00.00.0NaN0.0NaN0.0...NaN0.00.00.00.00.00.00.0NaN0.000000
13563076NC_035789.132649933326499350.00.00.0NaN0.0NaN0.0...NaN0.00.00.00.00.00.00.0NaN0.000000
13563077NC_035789.132649966326499680.00.00.0NaN0.0NaN0.0...NaN0.00.00.00.00.00.0NaNNaN1.020408
13563078NC_035789.13265003532650037NaNNaNNaNNaNNaNNaNNaN...NaNNaNNaNNaNNaNNaNNaNNaNNaN0.000000
\n", "

10 rows × 30 columns

\n", "
" ], "text/plain": [ " chrom start end 12 13 16 19 22 23 29 \\\n", "13563069 NC_035789.1 32649732 32649734 0.0 0.0 0.0 NaN NaN NaN 0.0 \n", "13563070 NC_035789.1 32649736 32649738 0.0 0.0 0.0 NaN NaN NaN 0.0 \n", "13563071 NC_035789.1 32649799 32649801 NaN 0.0 NaN NaN NaN NaN NaN \n", "13563072 NC_035789.1 32649876 32649878 0.0 0.0 0.0 NaN 0.0 NaN 0.0 \n", "13563073 NC_035789.1 32649885 32649887 0.0 0.0 0.0 NaN 0.0 NaN 0.0 \n", "13563074 NC_035789.1 32649895 32649897 0.0 0.0 0.0 NaN 0.0 NaN 0.0 \n", "13563075 NC_035789.1 32649930 32649932 0.0 0.0 0.0 NaN 0.0 NaN 0.0 \n", "13563076 NC_035789.1 32649933 32649935 0.0 0.0 0.0 NaN 0.0 NaN 0.0 \n", "13563077 NC_035789.1 32649966 32649968 0.0 0.0 0.0 NaN 0.0 NaN 0.0 \n", "13563078 NC_035789.1 32650035 32650037 NaN NaN NaN NaN NaN NaN NaN \n", "\n", " ... 53 54 59 64 6 76 77 7 9 total \n", "13563069 ... 0.0 NaN 0.0 NaN 0.0 0.0 0.0 0.0 NaN 0.000000 \n", "13563070 ... 0.0 NaN 0.0 NaN 0.0 0.0 0.0 0.0 NaN 0.000000 \n", "13563071 ... NaN 0.0 NaN NaN NaN NaN NaN NaN NaN 0.000000 \n", "13563072 ... NaN 0.0 0.0 NaN 0.0 0.0 0.0 0.0 NaN 0.000000 \n", "13563073 ... NaN 0.0 0.0 0.0 0.0 0.0 0.0 0.0 NaN 0.000000 \n", "13563074 ... NaN 0.0 0.0 0.0 0.0 0.0 0.0 0.0 NaN 0.000000 \n", "13563075 ... NaN 0.0 0.0 0.0 0.0 0.0 0.0 0.0 NaN 0.000000 \n", "13563076 ... NaN 0.0 0.0 0.0 0.0 0.0 0.0 0.0 NaN 0.000000 \n", "13563077 ... NaN 0.0 0.0 0.0 0.0 0.0 0.0 NaN NaN 1.020408 \n", "13563078 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN 0.000000 \n", "\n", "[10 rows x 30 columns]" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#Average all samples for total genome methylation information and save as a new column\n", "#NA are not included in averages\n", "#Check output\n", "df['total'] = df[['12', '13', '16', '19', '22', '23', '29', '31', '35', '36', '39', '3', '41', '44', '48', '50', '52', '53', '54', '59', '64', '6', '76', '77', '7', '9']].mean(axis=1)\n", "df.tail(10)" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [], "source": [ "#Save dataframe in a tabular format and include N/As. Do not include quotes.\n", "df.to_csv(\"union-averages.bedgraph\", sep = \"\\t\", na_rep = \"N/A\", quoting = 3)" ] }, { "cell_type": "code", "execution_count": 27, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "13563069\tNC_035789.1\t32649732\t32649734\t0.0\t0.0\t0.0\tN/A\tN/A\tN/A\t0.0\tN/A\t0.0\t0.0\tN/A\t0.0\t0.0\tN/A\t0.0\t0.0\tN/A\t0.0\tN/A\t0.0\tN/A\t0.0\t0.0\t0.0\t0.0\tN/A\t0.0\r\n", "13563070\tNC_035789.1\t32649736\t32649738\t0.0\t0.0\t0.0\tN/A\tN/A\tN/A\t0.0\tN/A\t0.0\t0.0\tN/A\t0.0\t0.0\tN/A\t0.0\t0.0\tN/A\t0.0\tN/A\t0.0\tN/A\t0.0\t0.0\t0.0\t0.0\tN/A\t0.0\r\n", "13563071\tNC_035789.1\t32649799\t32649801\tN/A\t0.0\tN/A\tN/A\tN/A\tN/A\tN/A\tN/A\t0.0\tN/A\tN/A\t0.0\tN/A\tN/A\tN/A\tN/A\tN/A\tN/A\t0.0\tN/A\tN/A\tN/A\tN/A\tN/A\tN/A\tN/A\t0.0\r\n", "13563072\tNC_035789.1\t32649876\t32649878\t0.0\t0.0\t0.0\tN/A\t0.0\tN/A\t0.0\tN/A\t0.0\t0.0\tN/A\t0.0\tN/A\tN/A\t0.0\t0.0\t0.0\tN/A\t0.0\t0.0\tN/A\t0.0\t0.0\t0.0\t0.0\tN/A\t0.0\r\n", "13563073\tNC_035789.1\t32649885\t32649887\t0.0\t0.0\t0.0\tN/A\t0.0\tN/A\t0.0\tN/A\t0.0\t0.0\tN/A\t0.0\tN/A\tN/A\tN/A\t0.0\t0.0\tN/A\t0.0\t0.0\t0.0\t0.0\t0.0\t0.0\t0.0\tN/A\t0.0\r\n", "13563074\tNC_035789.1\t32649895\t32649897\t0.0\t0.0\t0.0\tN/A\t0.0\tN/A\t0.0\tN/A\t0.0\t0.0\tN/A\t0.0\tN/A\tN/A\tN/A\t0.0\t0.0\tN/A\t0.0\t0.0\t0.0\t0.0\t0.0\t0.0\t0.0\tN/A\t0.0\r\n", "13563075\tNC_035789.1\t32649930\t32649932\t0.0\t0.0\t0.0\tN/A\t0.0\tN/A\t0.0\tN/A\t0.0\t0.0\tN/A\t0.0\tN/A\tN/A\tN/A\t0.0\t0.0\tN/A\t0.0\t0.0\t0.0\t0.0\t0.0\t0.0\t0.0\tN/A\t0.0\r\n", "13563076\tNC_035789.1\t32649933\t32649935\t0.0\t0.0\t0.0\tN/A\t0.0\tN/A\t0.0\tN/A\t0.0\t0.0\tN/A\t0.0\tN/A\tN/A\tN/A\t0.0\t0.0\tN/A\t0.0\t0.0\t0.0\t0.0\t0.0\t0.0\t0.0\tN/A\t0.0\r\n", "13563077\tNC_035789.1\t32649966\t32649968\t0.0\t0.0\t0.0\tN/A\t0.0\tN/A\t0.0\tN/A\t0.0\tN/A\tN/A\t0.0\tN/A\tN/A\tN/A\tN/A\t14.285714000000002\tN/A\t0.0\t0.0\t0.0\t0.0\t0.0\t0.0\tN/A\tN/A\t1.020408142857143\r\n", "13563078\tNC_035789.1\t32650035\t32650037\tN/A\tN/A\tN/A\tN/A\tN/A\tN/A\tN/A\tN/A\tN/A\tN/A\tN/A\t0.0\tN/A\tN/A\tN/A\tN/A\tN/A\tN/A\tN/A\tN/A\tN/A\tN/A\tN/A\tN/A\tN/A\tN/A\t0.0\r\n" ] } ], "source": [ "#Check pandas manipulations\n", "!tail union-averages.bedgraph" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 3. Characterize methylation for each CpG dinucleotude" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "- methylated: ≥ 50%\n", "- sparsely methylated: 10-50%\n", "- unmethylated: ≤ 10%" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "scrolled": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "zr3616_1_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph\n", "zr3616_2_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph\n", "zr3616_3_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph\n", "zr3616_4_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph\n", "zr3616_5_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph\n", "zr3616_6_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph\n", "zr3616_7_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph\n", "zr3616_8_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph\n", "zr3616_union-averages_5x.bedgraph\n", " 8422611 zr3616_1_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph\n", " 8468611 zr3616_2_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph\n", " 8384664 zr3616_3_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph\n", " 8689568 zr3616_4_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph\n", " 8062541 zr3616_5_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph\n", " 8471405 zr3616_6_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph\n", " 8225132 zr3616_7_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph\n", " 8316308 zr3616_8_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph\n", " 11238222 zr3616_union-averages_5x.bedgraph\n", " 78279062 total\n" ] } ], "source": [ "#8 individual sample files + 1 union bedgraph\n", "#Count lines\n", "!find zr3616*5x.bedgraph\n", "!wc -l zr3616*5x.bedgraph" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "%%bash\n", "\n", "for f in zr3616*5x.bedgraph\n", "do\n", "/opt/homebrew/bin/subtractBed \\\n", "-a ${f} \\\n", "-b /Volumes/web/spartina/project-gigas-oa-meth/output/BS-Snper/unique-CT-SNPs.bed \\\n", "> ${f}.NO-SNPs\n", "done" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "zr3616_1_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs\n", "zr3616_2_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs\n", "zr3616_3_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs\n", "zr3616_4_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs\n", "zr3616_5_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs\n", "zr3616_6_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs\n", "zr3616_7_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs\n", "zr3616_8_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs\n", "zr3616_union-averages_5x.bedgraph.NO-SNPs\n", " 8148908 zr3616_1_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs\n", " 8193942 zr3616_2_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs\n", " 8110578 zr3616_3_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs\n", " 8412899 zr3616_4_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs\n", " 7793250 zr3616_5_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs\n", " 8196826 zr3616_6_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs\n", " 7952869 zr3616_7_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs\n", " 8043983 zr3616_8_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs\n", " 10939918 zr3616_union-averages_5x.bedgraph.NO-SNPs\n", " 75793173 total\n" ] } ], "source": [ "#Count lines of files with no SNP information\n", "!find *NO-SNPs\n", "!wc -l *NO-SNPs" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 3b. Methylated loci" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "%%bash\n", "for f in zr3616*NO-SNPs\n", "do\n", " awk '{if ($4 >= 50) { print $1, $2, $3, $4 }}' ${f} \\\n", " > ${f}-Meth\n", "done" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "==> zr3616_1_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth <==\n", "NC_047559.1 1547 1549 100.000000\n", "NC_047559.1 1571 1573 87.500000\n", "NC_047559.1 2267 2269 100.000000\n", "NC_047559.1 2291 2293 100.000000\n", "NC_047559.1 4073 4075 60.000000\n", "NC_047559.1 4791 4793 64.705882\n", "NC_047559.1 4835 4837 88.235294\n", "NC_047559.1 4843 4845 81.250000\n", "NC_047559.1 5605 5607 60.000000\n", "NC_047559.1 5613 5615 66.666667\n", "\n", "==> zr3616_2_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth <==\n", "NC_047559.1 1571 1573 83.333333\n", "NC_047559.1 2267 2269 100.000000\n", "NC_047559.1 2291 2293 100.000000\n", "NC_047559.1 2448 2450 100.000000\n", "NC_047559.1 2988 2990 81.818182\n", "NC_047559.1 3902 3904 57.142857\n", "NC_047559.1 3916 3918 100.000000\n", "NC_047559.1 4073 4075 90.909091\n", "NC_047559.1 4270 4272 57.142857\n", "NC_047559.1 4791 4793 71.428571\n", "\n", "==> zr3616_3_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth <==\n", "NC_047559.1 1571 1573 100.000000\n", "NC_047559.1 2291 2293 100.000000\n", "NC_047559.1 2448 2450 100.000000\n", "NC_047559.1 2988 2990 100.000000\n", "NC_047559.1 4791 4793 100.000000\n", "NC_047559.1 4835 4837 90.000000\n", "NC_047559.1 4843 4845 90.476190\n", "NC_047559.1 4887 4889 90.909091\n", "NC_047559.1 4909 4911 85.714286\n", "NC_047559.1 5999 6001 57.142857\n", "\n", "==> zr3616_4_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth <==\n", "NC_047559.1 2267 2269 80.000000\n", "NC_047559.1 2291 2293 100.000000\n", "NC_047559.1 4835 4837 82.258065\n", "NC_047559.1 4843 4845 76.271186\n", "NC_047559.1 4887 4889 79.411765\n", "NC_047559.1 4909 4911 59.090909\n", "NC_047559.1 7490 7492 86.666667\n", "NC_047559.1 7671 7673 90.909091\n", "NC_047559.1 7716 7718 50.000000\n", "NC_047559.1 7850 7852 100.000000\n", "\n", "==> zr3616_5_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth <==\n", "NC_047559.1 1547 1549 100.000000\n", "NC_047559.1 1571 1573 92.857143\n", "NC_047559.1 1907 1909 80.000000\n", "NC_047559.1 3168 3170 71.428571\n", "NC_047559.1 4073 4075 66.666667\n", "NC_047559.1 4097 4099 60.000000\n", "NC_047559.1 7867 7869 60.000000\n", "NC_047559.1 7885 7887 85.714286\n", "NC_047559.1 7923 7925 80.000000\n", "NC_047559.1 7939 7941 50.000000\n", "\n", "==> zr3616_6_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth <==\n", "NC_047559.1 1571 1573 87.500000\n", "NC_047559.1 2988 2990 85.714286\n", "NC_047559.1 4791 4793 71.428571\n", "NC_047559.1 4835 4837 66.666667\n", "NC_047559.1 4843 4845 54.545455\n", "NC_047559.1 6419 6421 80.000000\n", "NC_047559.1 6439 6441 83.333333\n", "NC_047559.1 6486 6488 80.000000\n", "NC_047559.1 7850 7852 80.000000\n", "NC_047559.1 7923 7925 62.162162\n", "\n", "==> zr3616_7_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth <==\n", "NC_047559.1 1547 1549 83.333333\n", "NC_047559.1 1571 1573 62.500000\n", "NC_047559.1 4791 4793 50.980392\n", "NC_047559.1 4835 4837 59.375000\n", "NC_047559.1 4843 4845 80.000000\n", "NC_047559.1 4887 4889 82.352941\n", "NC_047559.1 4909 4911 54.545455\n", "NC_047559.1 5500 5502 66.666667\n", "NC_047559.1 5605 5607 60.000000\n", "NC_047559.1 5613 5615 55.555556\n", "\n", "==> zr3616_8_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth <==\n", "NC_047559.1 2087 2089 100.000000\n", "NC_047559.1 2267 2269 100.000000\n", "NC_047559.1 2291 2293 75.000000\n", "NC_047559.1 2448 2450 66.666667\n", "NC_047559.1 2988 2990 100.000000\n", "NC_047559.1 4073 4075 100.000000\n", "NC_047559.1 4791 4793 92.857143\n", "NC_047559.1 4835 4837 90.909091\n", "NC_047559.1 4843 4845 77.777778\n", "NC_047559.1 7129 7131 90.909091\n", "\n", "==> zr3616_union-averages_5x.bedgraph.NO-SNPs-Meth <==\n", "NC_047559.1 1547 1549 94.44444433333332\n", "NC_047559.1 1571 1573 85.61507933333333\n", "NC_047559.1 1907 1909 80.0\n", "NC_047559.1 2267 2269 95.0\n", "NC_047559.1 2291 2293 85.83333333333333\n", "NC_047559.1 2448 2450 88.888889\n", "NC_047559.1 2988 2990 82.839827\n", "NC_047559.1 3168 3170 71.42857099999999\n", "NC_047559.1 3902 3904 57.14285699999999\n", "NC_047559.1 3916 3918 100.0\n" ] } ], "source": [ "!head *-Meth" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 852626 zr3616_1_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth\n", " 840818 zr3616_2_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth\n", " 861662 zr3616_3_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth\n", " 891492 zr3616_4_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth\n", " 808582 zr3616_5_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth\n", " 844274 zr3616_6_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth\n", " 891718 zr3616_7_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth\n", " 837432 zr3616_8_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth\n", " 966655 zr3616_union-averages_5x.bedgraph.NO-SNPs-Meth\n", " 7795259 total\n" ] } ], "source": [ "!wc -l *-Meth" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "#Get line counts for each fine\n", "# Remove 10th line (total entries)\n", "#Ensure output is tab-delimited\n", "#Save output\n", "!wc -l *-Meth \\\n", "| sed '10,$ d' \\\n", "| awk '{print $1\"\\t\"$2}' \\\n", "> zr3616_5x-Meth-counts.txt" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "scrolled": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "852626\tzr3616_1_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth\r\n", "840818\tzr3616_2_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth\r\n", "861662\tzr3616_3_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth\r\n", "891492\tzr3616_4_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth\r\n", "808582\tzr3616_5_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth\r\n", "844274\tzr3616_6_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth\r\n", "891718\tzr3616_7_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth\r\n", "837432\tzr3616_8_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth\r\n", "966655\tzr3616_union-averages_5x.bedgraph.NO-SNPs-Meth\r\n" ] } ], "source": [ "!head zr3616_5x-Meth-counts.txt" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 3c. Sparsely methylated loci" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [], "source": [ "%%bash\n", "for f in zr3616*NO-SNPs\n", "do\n", " awk '{if ($4 < 50) { print $1, $2, $3, $4}}' ${f} \\\n", " | awk '{if ($4 > 10) { print $1, $2, $3, $4 }}' \\\n", " > ${f}-sparseMeth\n", "done" ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "==> zr3616_1_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth <==\n", "NC_047559.1 4887 4889 20.000000\n", "NC_047559.1 4909 4911 33.333333\n", "NC_047559.1 5500 5502 25.000000\n", "NC_047559.1 7716 7718 40.000000\n", "NC_047559.1 7814 7816 47.058824\n", "NC_047559.1 9237 9239 29.166667\n", "NC_047559.1 9658 9660 37.500000\n", "NC_047559.1 9661 9663 22.222222\n", "NC_047559.1 10899 10901 12.195122\n", "NC_047559.1 18234 18236 11.111111\n", "\n", "==> zr3616_2_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth <==\n", "NC_001276.1 4444 4446 14.285714\n", "NC_047559.1 4397 4399 47.058824\n", "NC_047559.1 4909 4911 42.857143\n", "NC_047559.1 5605 5607 42.857143\n", "NC_047559.1 5613 5615 37.500000\n", "NC_047559.1 7716 7718 20.000000\n", "NC_047559.1 7850 7852 40.000000\n", "NC_047559.1 8970 8972 11.111111\n", "NC_047559.1 8979 8981 14.285714\n", "NC_047559.1 9658 9660 33.333333\n", "\n", "==> zr3616_3_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth <==\n", "NC_047559.1 4073 4075 42.857143\n", "NC_047559.1 8970 8972 25.000000\n", "NC_047559.1 8979 8981 11.111111\n", "NC_047559.1 9122 9124 14.634146\n", "NC_047559.1 9140 9142 16.666667\n", "NC_047559.1 10501 10503 16.666667\n", "NC_047559.1 19097 19099 13.043478\n", "NC_047559.1 19105 19107 16.666667\n", "NC_047559.1 19113 19115 12.500000\n", "NC_047559.1 23360 23362 44.444444\n", "\n", "==> zr3616_4_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth <==\n", "NC_047559.1 2087 2089 14.285714\n", "NC_047559.1 4397 4399 20.000000\n", "NC_047559.1 4791 4793 48.148148\n", "NC_047559.1 10292 10294 28.571429\n", "NC_047559.1 10314 10316 11.111111\n", "NC_047559.1 23610 23612 40.298507\n", "NC_047559.1 26448 26450 13.333333\n", "NC_047559.1 26463 26465 12.500000\n", "NC_047559.1 29235 29237 40.000000\n", "NC_047559.1 37587 37589 17.647059\n", "\n", "==> zr3616_5_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth <==\n", "NC_047559.1 5613 5615 16.666667\n", "NC_047559.1 5883 5885 16.666667\n", "NC_047559.1 5999 6001 22.222222\n", "NC_047559.1 7129 7131 26.666667\n", "NC_047559.1 7490 7492 40.909091\n", "NC_047559.1 8251 8253 20.000000\n", "NC_047559.1 8254 8256 20.000000\n", "NC_047559.1 8266 8268 20.000000\n", "NC_047559.1 9664 9666 16.666667\n", "NC_047559.1 10479 10481 20.000000\n", "\n", "==> zr3616_6_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth <==\n", "NC_047559.1 2291 2293 40.000000\n", "NC_047559.1 4073 4075 46.153846\n", "NC_047559.1 4397 4399 40.000000\n", "NC_047559.1 5500 5502 40.000000\n", "NC_047559.1 5883 5885 20.000000\n", "NC_047559.1 7867 7869 30.769231\n", "NC_047559.1 7885 7887 44.827586\n", "NC_047559.1 7979 7981 36.363636\n", "NC_047559.1 10358 10360 12.500000\n", "NC_047559.1 23360 23362 28.571429\n", "\n", "==> zr3616_7_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth <==\n", "NC_047559.1 2809 2811 25.000000\n", "NC_047559.1 2988 2990 46.666667\n", "NC_047559.1 4073 4075 20.000000\n", "NC_047559.1 4097 4099 20.000000\n", "NC_047559.1 5653 5655 16.666667\n", "NC_047559.1 7129 7131 36.065574\n", "NC_047559.1 7490 7492 11.627907\n", "NC_047559.1 7671 7673 12.500000\n", "NC_047559.1 7867 7869 18.666667\n", "NC_047559.1 7990 7992 13.636364\n", "\n", "==> zr3616_8_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth <==\n", "NC_047559.1 5500 5502 18.181818\n", "NC_047559.1 5999 6001 40.000000\n", "NC_047559.1 7716 7718 20.000000\n", "NC_047559.1 8970 8972 14.285714\n", "NC_047559.1 8979 8981 16.666667\n", "NC_047559.1 9240 9242 11.111111\n", "NC_047559.1 9661 9663 16.666667\n", "NC_047559.1 9664 9666 16.666667\n", "NC_047559.1 20473 20475 14.285714\n", "NC_047559.1 22979 22981 12.500000\n", "\n", "==> zr3616_union-averages_5x.bedgraph.NO-SNPs-sparseMeth <==\n", "NC_047559.1 2087 2089 41.125541\n", "NC_047559.1 2809 2811 25.0\n", "NC_047559.1 4097 4099 40.0\n", "NC_047559.1 4397 4399 26.764706\n", "NC_047559.1 4909 4911 45.923521\n", "NC_047559.1 5500 5502 45.969697000000004\n", "NC_047559.1 5605 5607 27.14285716666667\n", "NC_047559.1 5613 5615 30.913299833333337\n", "NC_047559.1 5653 5655 16.666667\n", "NC_047559.1 5999 6001 31.924602999999998\n" ] } ], "source": [ "!head *-sparseMeth" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 651862 zr3616_1_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth\n", " 654311 zr3616_2_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth\n", " 622053 zr3616_3_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth\n", " 668946 zr3616_4_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth\n", " 594948 zr3616_5_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth\n", " 645242 zr3616_6_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth\n", " 594427 zr3616_7_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth\n", " 634990 zr3616_8_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth\n", " 954751 zr3616_union-averages_5x.bedgraph.NO-SNPs-sparseMeth\n", " 6021530 total\n" ] } ], "source": [ "!wc -l *-sparseMeth" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [], "source": [ "#Get line counts for each fine\n", "# Remove 10th line (total entries)\n", "#Ensure output is tab-delimited\n", "#Save output\n", "!wc -l *-sparseMeth \\\n", "| sed '10,$ d' \\\n", "| awk '{print $1\"\\t\"$2}' \\\n", "> zr3616_5x-sparseMeth-counts.txt" ] }, { "cell_type": "code", "execution_count": 21, "metadata": { "scrolled": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "651862\tzr3616_1_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth\r\n", "654311\tzr3616_2_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth\r\n", "622053\tzr3616_3_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth\r\n", "668946\tzr3616_4_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth\r\n", "594948\tzr3616_5_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth\r\n", "645242\tzr3616_6_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth\r\n", "594427\tzr3616_7_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth\r\n", "634990\tzr3616_8_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth\r\n", "954751\tzr3616_union-averages_5x.bedgraph.NO-SNPs-sparseMeth\r\n" ] } ], "source": [ "!head zr3616_5x-sparseMeth-counts.txt" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 3d. Unmethylated loci" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [], "source": [ "%%bash\n", "for f in zr3616*NO-SNPs\n", "do\n", " awk '{if ($4 <= 10) { print $1, $2, $3, $4 }}' ${f} \\\n", " > ${f}-unMeth\n", "done" ] }, { "cell_type": "code", "execution_count": 23, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "==> zr3616_1_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth <==\n", "NC_001276.1 34 36 0.000000\n", "NC_001276.1 123 125 0.531915\n", "NC_001276.1 305 307 0.313480\n", "NC_001276.1 433 435 0.243112\n", "NC_001276.1 457 459 0.148368\n", "NC_001276.1 482 484 0.409500\n", "NC_001276.1 609 611 0.072993\n", "NC_001276.1 781 783 0.330852\n", "NC_001276.1 826 828 0.435540\n", "NC_001276.1 951 953 0.305499\n", "\n", "==> zr3616_2_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth <==\n", "NC_001276.1 34 36 1.672241\n", "NC_001276.1 123 125 0.582751\n", "NC_001276.1 305 307 0.303490\n", "NC_001276.1 433 435 0.106496\n", "NC_001276.1 457 459 0.476644\n", "NC_001276.1 482 484 0.418848\n", "NC_001276.1 609 611 0.484966\n", "NC_001276.1 781 783 0.209644\n", "NC_001276.1 826 828 0.226757\n", "NC_001276.1 951 953 0.276243\n", "\n", "==> zr3616_3_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth <==\n", "NC_001276.1 34 36 0.000000\n", "NC_001276.1 123 125 0.218579\n", "NC_001276.1 305 307 0.344828\n", "NC_001276.1 433 435 0.207900\n", "NC_001276.1 457 459 0.191022\n", "NC_001276.1 482 484 0.110619\n", "NC_001276.1 609 611 0.285442\n", "NC_001276.1 781 783 0.461361\n", "NC_001276.1 826 828 0.241838\n", "NC_001276.1 951 953 0.302572\n", "\n", "==> zr3616_4_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth <==\n", "NC_001276.1 34 36 0.886918\n", "NC_001276.1 123 125 0.502513\n", "NC_001276.1 305 307 0.418410\n", "NC_001276.1 433 435 0.285103\n", "NC_001276.1 457 459 0.194426\n", "NC_001276.1 482 484 0.292612\n", "NC_001276.1 609 611 0.333556\n", "NC_001276.1 781 783 0.289226\n", "NC_001276.1 826 828 0.231660\n", "NC_001276.1 951 953 0.195312\n", "\n", "==> zr3616_5_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth <==\n", "NC_001276.1 34 36 0.647249\n", "NC_001276.1 123 125 0.446927\n", "NC_001276.1 305 307 0.480769\n", "NC_001276.1 433 435 0.437158\n", "NC_001276.1 457 459 0.096993\n", "NC_001276.1 482 484 0.425532\n", "NC_001276.1 609 611 0.632911\n", "NC_001276.1 781 783 0.417101\n", "NC_001276.1 826 828 0.228833\n", "NC_001276.1 951 953 0.307220\n", "\n", "==> zr3616_6_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth <==\n", "NC_001276.1 34 36 0.294985\n", "NC_001276.1 123 125 0.544070\n", "NC_001276.1 305 307 0.869565\n", "NC_001276.1 433 435 0.361011\n", "NC_001276.1 457 459 0.336134\n", "NC_001276.1 482 484 0.486855\n", "NC_001276.1 609 611 0.000000\n", "NC_001276.1 781 783 0.393314\n", "NC_001276.1 826 828 0.623701\n", "NC_001276.1 951 953 0.260756\n", "\n", "==> zr3616_7_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth <==\n", "NC_001276.1 34 36 0.735294\n", "NC_001276.1 123 125 0.146951\n", "NC_001276.1 305 307 0.195312\n", "NC_001276.1 433 435 0.446999\n", "NC_001276.1 457 459 0.348028\n", "NC_001276.1 482 484 0.197759\n", "NC_001276.1 609 611 0.174419\n", "NC_001276.1 781 783 0.592885\n", "NC_001276.1 826 828 0.373413\n", "NC_001276.1 951 953 0.490597\n", "\n", "==> zr3616_8_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth <==\n", "NC_001276.1 34 36 1.428571\n", "NC_001276.1 123 125 0.400534\n", "NC_001276.1 305 307 0.000000\n", "NC_001276.1 433 435 0.479616\n", "NC_001276.1 457 459 0.778643\n", "NC_001276.1 482 484 0.633714\n", "NC_001276.1 609 611 0.227790\n", "NC_001276.1 781 783 0.368098\n", "NC_001276.1 826 828 0.392670\n", "NC_001276.1 951 953 0.660066\n", "\n", "==> zr3616_union-averages_5x.bedgraph.NO-SNPs-unMeth <==\n", "NC_001276.1 34 36 0.70815725\n", "NC_001276.1 123 125 0.42178\n", "NC_001276.1 305 307 0.36573174999999997\n", "NC_001276.1 433 435 0.320924375\n", "NC_001276.1 457 459 0.32128225\n", "NC_001276.1 482 484 0.37192987499999997\n", "NC_001276.1 609 611 0.27650962500000004\n", "NC_001276.1 781 783 0.38281012499999995\n", "NC_001276.1 826 828 0.3443015\n", "NC_001276.1 951 953 0.34978312500000003\n" ] } ], "source": [ "!head *-unMeth" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 6644420 zr3616_1_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth\n", " 6698813 zr3616_2_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth\n", " 6626863 zr3616_3_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth\n", " 6852461 zr3616_4_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth\n", " 6389720 zr3616_5_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth\n", " 6707310 zr3616_6_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth\n", " 6466724 zr3616_7_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth\n", " 6571561 zr3616_8_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth\n", " 9018512 zr3616_union-averages_5x.bedgraph.NO-SNPs-unMeth\n", " 61976384 total\n" ] } ], "source": [ "!wc -l *-unMeth" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [], "source": [ "#Get line counts for each fine\n", "# Remove 10th line (total entries)\n", "#Ensure output is tab-delimited\n", "#Save output\n", "!wc -l *-unMeth \\\n", "| sed '10,$ d' \\\n", "| awk '{print $1\"\\t\"$2}' \\\n", "> zr3616_5x-unMeth-counts.txt" ] }, { "cell_type": "code", "execution_count": 26, "metadata": { "scrolled": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "6644420\tzr3616_1_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth\r\n", "6698813\tzr3616_2_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth\r\n", "6626863\tzr3616_3_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth\r\n", "6852461\tzr3616_4_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth\r\n", "6389720\tzr3616_5_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth\r\n", "6707310\tzr3616_6_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth\r\n", "6466724\tzr3616_7_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth\r\n", "6571561\tzr3616_8_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth\r\n", "9018512\tzr3616_union-averages_5x.bedgraph.NO-SNPs-unMeth\r\n" ] } ], "source": [ "!head zr3616_5x-unMeth-counts.txt" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 4. Characterize genomic location of CpGs" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "I will identify overlaps between CpG loci (methylated, sparsely methylated, unmethylated) and various genome feature tracks:\n", "\n", "- gene\n", "- exon UTR\n", "- CDS\n", "- intron\n", "- upstream flanks\n", "- downstream flanks\n", "- intergenic regions\n", "- lncRNA\n", "- transposable elements\n", "\n", "Since the exon track = exon UTR + CDS, and mRNA = exon + intron, I will not need to use those tracks separately." ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "zr3616_1_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth\n", "zr3616_1_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth\n", "zr3616_1_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth\n", "zr3616_2_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth\n", "zr3616_2_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth\n", "zr3616_2_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth\n", "zr3616_3_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth\n", "zr3616_3_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth\n", "zr3616_3_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth\n", "zr3616_4_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth\n", "zr3616_4_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth\n", "zr3616_4_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth\n", "zr3616_5_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth\n", "zr3616_5_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth\n", "zr3616_5_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth\n", "zr3616_6_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth\n", "zr3616_6_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth\n", "zr3616_6_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth\n", "zr3616_7_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth\n", "zr3616_7_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth\n", "zr3616_7_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth\n", "zr3616_8_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth\n", "zr3616_8_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth\n", "zr3616_8_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth\n", "zr3616_union-averages_5x.bedgraph.NO-SNPs-Meth\n", "zr3616_union-averages_5x.bedgraph.NO-SNPs-sparseMeth\n", "zr3616_union-averages_5x.bedgraph.NO-SNPs-unMeth\n", " 27\n" ] } ], "source": [ "#9 file types (8 samples + 1 union), 3 files per type (Meth, sparseMeth, unMeth) = 27 total\n", "!find zr3616*5x.bedgraph*SNPs-*\n", "!find zr3616*5x.bedgraph*SNPs-* | wc -l" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 852626 zr3616_1_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed\n", " 651862 zr3616_1_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed\n", " 6644420 zr3616_1_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed\n", " 840818 zr3616_2_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed\n", " 654311 zr3616_2_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed\n", " 6698813 zr3616_2_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed\n", " 861662 zr3616_3_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed\n", " 622053 zr3616_3_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed\n", " 6626863 zr3616_3_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed\n", " 891492 zr3616_4_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed\n", " 668946 zr3616_4_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed\n", " 6852461 zr3616_4_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed\n", " 808582 zr3616_5_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed\n", " 594948 zr3616_5_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed\n", " 6389720 zr3616_5_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed\n", " 844274 zr3616_6_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed\n", " 645242 zr3616_6_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed\n", " 6707310 zr3616_6_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed\n", " 891718 zr3616_7_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed\n", " 594427 zr3616_7_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed\n", " 6466724 zr3616_7_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed\n", " 837432 zr3616_8_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed\n", " 634990 zr3616_8_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed\n", " 6571561 zr3616_8_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed\n", " 966655 zr3616_union-averages_5x.bedgraph.NO-SNPs-Meth.bed\n", " 954751 zr3616_union-averages_5x.bedgraph.NO-SNPs-sparseMeth.bed\n", " 9018512 zr3616_union-averages_5x.bedgraph.NO-SNPs-unMeth.bed\n" ] } ], "source": [ "%%bash\n", "\n", "for f in zr3616*5x.bedgraph*SNPs-*\n", "do\n", " awk '{print $1\"\\t\"$2\"\\t\"$3}' ${f} > ${f}.bed\n", " wc -l ${f}.bed\n", "done" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "zr3616_1_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed\r\n", "zr3616_1_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed\r\n", "zr3616_1_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed\r\n", "zr3616_2_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed\r\n", "zr3616_2_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed\r\n", "zr3616_2_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed\r\n", "zr3616_3_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed\r\n", "zr3616_3_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed\r\n", "zr3616_3_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed\r\n", "zr3616_4_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed\r\n", "zr3616_4_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed\r\n", "zr3616_4_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed\r\n", "zr3616_5_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed\r\n", "zr3616_5_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed\r\n", "zr3616_5_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed\r\n", "zr3616_6_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed\r\n", "zr3616_6_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed\r\n", "zr3616_6_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed\r\n", "zr3616_7_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed\r\n", "zr3616_7_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed\r\n", "zr3616_7_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed\r\n", "zr3616_8_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed\r\n", "zr3616_8_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed\r\n", "zr3616_8_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed\r\n", "zr3616_union-averages_5x.bedgraph.NO-SNPs-Meth.bed\r\n", "zr3616_union-averages_5x.bedgraph.NO-SNPs-sparseMeth.bed\r\n", "zr3616_union-averages_5x.bedgraph.NO-SNPs-unMeth.bed\r\n" ] } ], "source": [ "!find *bed" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 4b. Gene" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "NC_047559.1\t10155\t10157\tNC_047559.1\tGnomon\tgene\t9839\t11386\t.\t+\t.\tID=gene-LOC117693020;Dbxref=GeneID:117693020;Name=LOC117693020;gbkey=Gene;gene=LOC117693020;gene_biotype=lncRNA\n", "NC_047559.1\t10215\t10217\tNC_047559.1\tGnomon\tgene\t9839\t11386\t.\t+\t.\tID=gene-LOC117693020;Dbxref=GeneID:117693020;Name=LOC117693020;gbkey=Gene;gene=LOC117693020;gene_biotype=lncRNA\n", "NC_047559.1\t10270\t10272\tNC_047559.1\tGnomon\tgene\t9839\t11386\t.\t+\t.\tID=gene-LOC117693020;Dbxref=GeneID:117693020;Name=LOC117693020;gbkey=Gene;gene=LOC117693020;gene_biotype=lncRNA\n", "NC_047559.1\t10292\t10294\tNC_047559.1\tGnomon\tgene\t9839\t11386\t.\t+\t.\tID=gene-LOC117693020;Dbxref=GeneID:117693020;Name=LOC117693020;gbkey=Gene;gene=LOC117693020;gene_biotype=lncRNA\n", "NC_047559.1\t10314\t10316\tNC_047559.1\tGnomon\tgene\t9839\t11386\t.\t+\t.\tID=gene-LOC117693020;Dbxref=GeneID:117693020;Name=LOC117693020;gbkey=Gene;gene=LOC117693020;gene_biotype=lncRNA\n", "NC_047559.1\t10358\t10360\tNC_047559.1\tGnomon\tgene\t9839\t11386\t.\t+\t.\tID=gene-LOC117693020;Dbxref=GeneID:117693020;Name=LOC117693020;gbkey=Gene;gene=LOC117693020;gene_biotype=lncRNA\n", "NC_047559.1\t10380\t10382\tNC_047559.1\tGnomon\tgene\t9839\t11386\t.\t+\t.\tID=gene-LOC117693020;Dbxref=GeneID:117693020;Name=LOC117693020;gbkey=Gene;gene=LOC117693020;gene_biotype=lncRNA\n", "NC_047559.1\t10391\t10393\tNC_047559.1\tGnomon\tgene\t9839\t11386\t.\t+\t.\tID=gene-LOC117693020;Dbxref=GeneID:117693020;Name=LOC117693020;gbkey=Gene;gene=LOC117693020;gene_biotype=lncRNA\n", "NC_047559.1\t10402\t10404\tNC_047559.1\tGnomon\tgene\t9839\t11386\t.\t+\t.\tID=gene-LOC117693020;Dbxref=GeneID:117693020;Name=LOC117693020;gbkey=Gene;gene=LOC117693020;gene_biotype=lncRNA\n", "NC_047559.1\t10413\t10415\tNC_047559.1\tGnomon\tgene\t9839\t11386\t.\t+\t.\tID=gene-LOC117693020;Dbxref=GeneID:117693020;Name=LOC117693020;gbkey=Gene;gene=LOC117693020;gene_biotype=lncRNA\n", " 6997898 zr3616_union-averages_5x-Gene-wb.bed\n" ] } ], "source": [ "#Get overlaps between CpG background and genes for downstream annotation\n", "\n", "!{bedtoolsDirectory}intersectBed \\\n", "-wb \\\n", "-a zr3616_union-averages_5x.bedgraph.bed \\\n", "-b ../../genome-feature-files/cgigas_uk_roslin_v1_gene.gff \\\n", "> zr3616_union-averages_5x-Gene-wb.bed\n", "!head zr3616_union-averages_5x-Gene-wb.bed\n", "!wc -l zr3616_union-averages_5x-Gene-wb.bed" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [], "source": [ "%%bash\n", "for f in *bed\n", "do\n", " /opt/homebrew/bin/intersectBed \\\n", " -u \\\n", " -a ${f} \\\n", " -b ../../genome-feature-files/cgigas_uk_roslin_v1_gene.gff \\\n", " > ${f}-Gene\n", "done" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "==> zr3616_1_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-Gene <==\n", "NC_047559.1\t360322\t360324\n", "NC_047559.1\t360361\t360363\n", "NC_047559.1\t361347\t361349\n", "NC_047559.1\t364329\t364331\n", "NC_047559.1\t375341\t375343\n", "NC_047559.1\t375360\t375362\n", "NC_047559.1\t376111\t376113\n", "NC_047559.1\t376170\t376172\n", "NC_047559.1\t376962\t376964\n", "NC_047559.1\t377335\t377337\n", "\n", "==> zr3616_1_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-Gene <==\n", "NC_047559.1\t10899\t10901\n", "NC_047559.1\t18234\t18236\n", "NC_047559.1\t61081\t61083\n", "NC_047559.1\t68070\t68072\n", "NC_047559.1\t100249\t100251\n", "NC_047559.1\t100276\t100278\n", "NC_047559.1\t100305\t100307\n", "NC_047559.1\t100319\t100321\n", "NC_047559.1\t100440\t100442\n", "NC_047559.1\t100454\t100456\n", "\n", "==> zr3616_1_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-Gene <==\n", "NC_047559.1\t10270\t10272\n", "NC_047559.1\t10292\t10294\n", "NC_047559.1\t10314\t10316\n", "NC_047559.1\t10358\t10360\n", "NC_047559.1\t10380\t10382\n", "NC_047559.1\t10391\t10393\n", "NC_047559.1\t10402\t10404\n", "NC_047559.1\t10413\t10415\n", "NC_047559.1\t10457\t10459\n", "NC_047559.1\t10479\t10481\n", "\n", "==> zr3616_2_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-Gene <==\n", "NC_047559.1\t140334\t140336\n", "NC_047559.1\t356303\t356305\n", "NC_047559.1\t356335\t356337\n", "NC_047559.1\t356378\t356380\n", "NC_047559.1\t356395\t356397\n", "NC_047559.1\t356430\t356432\n", "NC_047559.1\t356436\t356438\n", "NC_047559.1\t356532\t356534\n", "NC_047559.1\t356685\t356687\n", "NC_047559.1\t356699\t356701\n", "\n", "==> zr3616_2_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-Gene <==\n", "NC_047559.1\t14319\t14321\n", "NC_047559.1\t18078\t18080\n", "NC_047559.1\t60313\t60315\n", "NC_047559.1\t60371\t60373\n", "NC_047559.1\t61081\t61083\n", "NC_047559.1\t61203\t61205\n", "NC_047559.1\t72245\t72247\n", "NC_047559.1\t100276\t100278\n", "NC_047559.1\t100319\t100321\n", "NC_047559.1\t110811\t110813\n", "\n", "==> zr3616_2_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-Gene <==\n", "NC_047559.1\t10292\t10294\n", "NC_047559.1\t10314\t10316\n", "NC_047559.1\t10380\t10382\n", "NC_047559.1\t10391\t10393\n", "NC_047559.1\t10402\t10404\n", "NC_047559.1\t10413\t10415\n", "NC_047559.1\t10457\t10459\n", "NC_047559.1\t10479\t10481\n", "NC_047559.1\t10501\t10503\n", "NC_047559.1\t10772\t10774\n", "\n", "==> zr3616_3_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-Gene <==\n", "NC_047559.1\t110811\t110813\n", "NC_047559.1\t160063\t160065\n", "NC_047559.1\t160111\t160113\n", "NC_047559.1\t160124\t160126\n", "NC_047559.1\t160131\t160133\n", "NC_047559.1\t160136\t160138\n", "NC_047559.1\t160142\t160144\n", "NC_047559.1\t160179\t160181\n", "NC_047559.1\t362609\t362611\n", "NC_047559.1\t364329\t364331\n", "\n", "==> zr3616_3_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-Gene <==\n", "NC_047559.1\t10501\t10503\n", "NC_047559.1\t19097\t19099\n", "NC_047559.1\t19105\t19107\n", "NC_047559.1\t19113\t19115\n", "NC_047559.1\t60492\t60494\n", "NC_047559.1\t61203\t61205\n", "NC_047559.1\t68070\t68072\n", "NC_047559.1\t68140\t68142\n", "NC_047559.1\t98346\t98348\n", "NC_047559.1\t99649\t99651\n", "\n", "==> zr3616_3_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-Gene <==\n", "NC_047559.1\t10358\t10360\n", "NC_047559.1\t10380\t10382\n", "NC_047559.1\t10391\t10393\n", "NC_047559.1\t10402\t10404\n", "NC_047559.1\t10413\t10415\n", "NC_047559.1\t10457\t10459\n", "NC_047559.1\t10479\t10481\n", "NC_047559.1\t10592\t10594\n", "NC_047559.1\t10612\t10614\n", "NC_047559.1\t10645\t10647\n", "\n", "==> zr3616_4_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-Gene <==\n", "NC_047559.1\t60492\t60494\n", "NC_047559.1\t64815\t64817\n", "NC_047559.1\t110795\t110797\n", "NC_047559.1\t110838\t110840\n", "NC_047559.1\t113151\t113153\n", "NC_047559.1\t356378\t356380\n", "NC_047559.1\t356395\t356397\n", "NC_047559.1\t356430\t356432\n", "NC_047559.1\t356436\t356438\n", "NC_047559.1\t356532\t356534\n", "\n", "==> zr3616_4_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-Gene <==\n", "NC_047559.1\t10292\t10294\n", "NC_047559.1\t10314\t10316\n", "NC_047559.1\t62770\t62772\n", "NC_047559.1\t66982\t66984\n", "NC_047559.1\t67015\t67017\n", "NC_047559.1\t67056\t67058\n", "NC_047559.1\t67070\t67072\n", "NC_047559.1\t71969\t71971\n", "NC_047559.1\t71973\t71975\n", "NC_047559.1\t71994\t71996\n", "\n", "==> zr3616_4_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-Gene <==\n", "NC_047559.1\t10270\t10272\n", "NC_047559.1\t10358\t10360\n", "NC_047559.1\t10380\t10382\n", "NC_047559.1\t10391\t10393\n", "NC_047559.1\t10402\t10404\n", "NC_047559.1\t10413\t10415\n", "NC_047559.1\t10457\t10459\n", "NC_047559.1\t10479\t10481\n", "NC_047559.1\t10501\t10503\n", "NC_047559.1\t10545\t10547\n", "\n", "==> zr3616_5_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-Gene <==\n", "NC_047559.1\t60371\t60373\n", "NC_047559.1\t68070\t68072\n", "NC_047559.1\t110838\t110840\n", "NC_047559.1\t140520\t140522\n", "NC_047559.1\t140539\t140541\n", "NC_047559.1\t140557\t140559\n", "NC_047559.1\t144127\t144129\n", "NC_047559.1\t144205\t144207\n", "NC_047559.1\t144210\t144212\n", "NC_047559.1\t154324\t154326\n", "\n", "==> zr3616_5_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-Gene <==\n", "NC_047559.1\t10479\t10481\n", "NC_047559.1\t19105\t19107\n", "NC_047559.1\t19119\t19121\n", "NC_047559.1\t60313\t60315\n", "NC_047559.1\t61081\t61083\n", "NC_047559.1\t61203\t61205\n", "NC_047559.1\t62770\t62772\n", "NC_047559.1\t80557\t80559\n", "NC_047559.1\t100276\t100278\n", "NC_047559.1\t100305\t100307\n", "\n", "==> zr3616_5_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-Gene <==\n", "NC_047559.1\t10270\t10272\n", "NC_047559.1\t10292\t10294\n", "NC_047559.1\t10314\t10316\n", "NC_047559.1\t10358\t10360\n", "NC_047559.1\t10380\t10382\n", "NC_047559.1\t10391\t10393\n", "NC_047559.1\t10402\t10404\n", "NC_047559.1\t10413\t10415\n", "NC_047559.1\t10457\t10459\n", "NC_047559.1\t10545\t10547\n", "\n", "==> zr3616_6_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-Gene <==\n", "NC_047559.1\t140241\t140243\n", "NC_047559.1\t140257\t140259\n", "NC_047559.1\t140284\t140286\n", "NC_047559.1\t140286\t140288\n", "NC_047559.1\t140292\t140294\n", "NC_047559.1\t140295\t140297\n", "NC_047559.1\t140539\t140541\n", "NC_047559.1\t140557\t140559\n", "NC_047559.1\t140589\t140591\n", "NC_047559.1\t140640\t140642\n", "\n", "==> zr3616_6_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-Gene <==\n", "NC_047559.1\t10358\t10360\n", "NC_047559.1\t60371\t60373\n", "NC_047559.1\t61660\t61662\n", "NC_047559.1\t61781\t61783\n", "NC_047559.1\t61788\t61790\n", "NC_047559.1\t61794\t61796\n", "NC_047559.1\t62234\t62236\n", "NC_047559.1\t63068\t63070\n", "NC_047559.1\t64815\t64817\n", "NC_047559.1\t64859\t64861\n", "\n", "==> zr3616_6_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-Gene <==\n", "NC_047559.1\t10292\t10294\n", "NC_047559.1\t10314\t10316\n", "NC_047559.1\t10380\t10382\n", "NC_047559.1\t10391\t10393\n", "NC_047559.1\t10457\t10459\n", "NC_047559.1\t10479\t10481\n", "NC_047559.1\t10501\t10503\n", "NC_047559.1\t10545\t10547\n", "NC_047559.1\t10547\t10549\n", "NC_047559.1\t10567\t10569\n", "\n", "==> zr3616_7_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-Gene <==\n", "NC_047559.1\t140241\t140243\n", "NC_047559.1\t140257\t140259\n", "NC_047559.1\t140284\t140286\n", "NC_047559.1\t140286\t140288\n", "NC_047559.1\t140292\t140294\n", "NC_047559.1\t140295\t140297\n", "NC_047559.1\t356021\t356023\n", "NC_047559.1\t356115\t356117\n", "NC_047559.1\t356173\t356175\n", "NC_047559.1\t356232\t356234\n", "\n", "==> zr3616_7_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-Gene <==\n", "NC_047559.1\t18747\t18749\n", "NC_047559.1\t19097\t19099\n", "NC_047559.1\t19105\t19107\n", "NC_047559.1\t19113\t19115\n", "NC_047559.1\t19119\t19121\n", "NC_047559.1\t63068\t63070\n", "NC_047559.1\t63072\t63074\n", "NC_047559.1\t63078\t63080\n", "NC_047559.1\t68070\t68072\n", "NC_047559.1\t68140\t68142\n", "\n", "==> zr3616_7_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-Gene <==\n", "NC_047559.1\t10155\t10157\n", "NC_047559.1\t10215\t10217\n", "NC_047559.1\t10270\t10272\n", "NC_047559.1\t10292\t10294\n", "NC_047559.1\t10314\t10316\n", "NC_047559.1\t10358\t10360\n", "NC_047559.1\t10380\t10382\n", "NC_047559.1\t10391\t10393\n", "NC_047559.1\t10402\t10404\n", "NC_047559.1\t10413\t10415\n", "\n", "==> zr3616_8_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-Gene <==\n", "NC_047559.1\t60492\t60494\n", "NC_047559.1\t61996\t61998\n", "NC_047559.1\t68070\t68072\n", "NC_047559.1\t160063\t160065\n", "NC_047559.1\t160142\t160144\n", "NC_047559.1\t356378\t356380\n", "NC_047559.1\t356395\t356397\n", "NC_047559.1\t356430\t356432\n", "NC_047559.1\t356436\t356438\n", "NC_047559.1\t356532\t356534\n", "\n", "==> zr3616_8_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-Gene <==\n", "NC_047559.1\t61081\t61083\n", "NC_047559.1\t62078\t62080\n", "NC_047559.1\t62234\t62236\n", "NC_047559.1\t68140\t68142\n", "NC_047559.1\t71969\t71971\n", "NC_047559.1\t71973\t71975\n", "NC_047559.1\t80845\t80847\n", "NC_047559.1\t83228\t83230\n", "NC_047559.1\t84125\t84127\n", "NC_047559.1\t100249\t100251\n", "\n", "==> zr3616_8_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-Gene <==\n", "NC_047559.1\t10270\t10272\n", "NC_047559.1\t10292\t10294\n", "NC_047559.1\t10314\t10316\n", "NC_047559.1\t10358\t10360\n", "NC_047559.1\t10380\t10382\n", "NC_047559.1\t10391\t10393\n", "NC_047559.1\t10402\t10404\n", "NC_047559.1\t10413\t10415\n", "NC_047559.1\t10457\t10459\n", "NC_047559.1\t10479\t10481\n", "\n", "==> zr3616_union-averages_5x.bedgraph.NO-SNPs-Meth.bed-Gene <==\n", "NC_047559.1\t61996\t61998\n", "NC_047559.1\t110795\t110797\n", "NC_047559.1\t140284\t140286\n", "NC_047559.1\t140286\t140288\n", "NC_047559.1\t356335\t356337\n", "NC_047559.1\t356378\t356380\n", "NC_047559.1\t356395\t356397\n", "NC_047559.1\t356430\t356432\n", "NC_047559.1\t356436\t356438\n", "NC_047559.1\t356532\t356534\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "==> zr3616_union-averages_5x.bedgraph.NO-SNPs-sparseMeth.bed-Gene <==\n", "NC_047559.1\t60313\t60315\n", "NC_047559.1\t60371\t60373\n", "NC_047559.1\t60492\t60494\n", "NC_047559.1\t61081\t61083\n", "NC_047559.1\t62078\t62080\n", "NC_047559.1\t62234\t62236\n", "NC_047559.1\t63068\t63070\n", "NC_047559.1\t63072\t63074\n", "NC_047559.1\t63078\t63080\n", "NC_047559.1\t64815\t64817\n", "\n", "==> zr3616_union-averages_5x.bedgraph.NO-SNPs-unMeth.bed-Gene <==\n", "NC_047559.1\t10155\t10157\n", "NC_047559.1\t10215\t10217\n", "NC_047559.1\t10270\t10272\n", "NC_047559.1\t10292\t10294\n", "NC_047559.1\t10314\t10316\n", "NC_047559.1\t10358\t10360\n", "NC_047559.1\t10380\t10382\n", "NC_047559.1\t10391\t10393\n", "NC_047559.1\t10402\t10404\n", "NC_047559.1\t10413\t10415\n" ] } ], "source": [ "#Check output\n", "!head *Gene" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 793356 zr3616_1_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-Gene\n", " 456348 zr3616_1_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-Gene\n", " 3615169 zr3616_1_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-Gene\n", " 780836 zr3616_2_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-Gene\n", " 454638 zr3616_2_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-Gene\n", " 3648886 zr3616_2_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-Gene\n", " 800933 zr3616_3_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-Gene\n", " 432941 zr3616_3_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-Gene\n", " 3613729 zr3616_3_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-Gene\n", " 828225 zr3616_4_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-Gene\n", " 463701 zr3616_4_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-Gene\n", " 3698290 zr3616_4_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-Gene\n", " 754255 zr3616_5_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-Gene\n", " 416950 zr3616_5_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-Gene\n", " 3492403 zr3616_5_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-Gene\n", " 785590 zr3616_6_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-Gene\n", " 449812 zr3616_6_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-Gene\n", " 3657813 zr3616_6_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-Gene\n", " 830407 zr3616_7_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-Gene\n", " 408129 zr3616_7_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-Gene\n", " 3547022 zr3616_7_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-Gene\n", " 778171 zr3616_8_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-Gene\n", " 440007 zr3616_8_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-Gene\n", " 3579106 zr3616_8_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-Gene\n", " 903933 zr3616_union-averages_5x.bedgraph.NO-SNPs-Meth.bed-Gene\n", " 670652 zr3616_union-averages_5x.bedgraph.NO-SNPs-sparseMeth.bed-Gene\n", " 4682696 zr3616_union-averages_5x.bedgraph.NO-SNPs-unMeth.bed-Gene\n", " 44983998 total\n" ] } ], "source": [ "#Count number of overlaps\n", "!wc -l *Gene" ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [], "source": [ "#Get line counts for each fine\n", "# Remove 28th line (total entries)\n", "#Ensure output is tab-delimited\n", "#Save output\n", "!wc -l *-Gene \\\n", "| sed '28,$ d' \\\n", "| awk '{print $1\"\\t\"$2}' \\\n", "> zr3616_5x-Gene-counts.txt" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 4c. CDS" ] }, { "cell_type": "code", "execution_count": 40, "metadata": {}, "outputs": [], "source": [ "%%bash\n", "for f in *bed\n", "do\n", " /opt/homebrew/bin/intersectBed \\\n", " -u \\\n", " -a ${f} \\\n", " -b ../../genome-feature-files/cgigas_uk_roslin_v1_CDS.gff \\\n", " > ${f}-CDS\n", "done" ] }, { "cell_type": "code", "execution_count": 41, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "==> zr3616_1_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-CDS <==\n", "NC_047559.1\t432531\t432533\n", "NC_047559.1\t432606\t432608\n", "NC_047559.1\t433203\t433205\n", "NC_047559.1\t433271\t433273\n", "NC_047559.1\t545968\t545970\n", "NC_047559.1\t548186\t548188\n", "NC_047559.1\t548188\t548190\n", "NC_047559.1\t548918\t548920\n", "NC_047559.1\t549827\t549829\n", "NC_047559.1\t549832\t549834\n", "\n", "==> zr3616_1_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-CDS <==\n", "NC_047559.1\t177876\t177878\n", "NC_047559.1\t337616\t337618\n", "NC_047559.1\t337639\t337641\n", "NC_047559.1\t338662\t338664\n", "NC_047559.1\t338708\t338710\n", "NC_047559.1\t338753\t338755\n", "NC_047559.1\t338777\t338779\n", "NC_047559.1\t338819\t338821\n", "NC_047559.1\t338862\t338864\n", "NC_047559.1\t432696\t432698\n", "\n", "==> zr3616_1_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-CDS <==\n", "NC_047559.1\t14862\t14864\n", "NC_047559.1\t15162\t15164\n", "NC_047559.1\t15194\t15196\n", "NC_047559.1\t15204\t15206\n", "NC_047559.1\t15291\t15293\n", "NC_047559.1\t15293\t15295\n", "NC_047559.1\t15336\t15338\n", "NC_047559.1\t15338\t15340\n", "NC_047559.1\t15357\t15359\n", "NC_047559.1\t15370\t15372\n", "\n", "==> zr3616_2_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-CDS <==\n", "NC_047559.1\t140334\t140336\n", "NC_047559.1\t432606\t432608\n", "NC_047559.1\t432670\t432672\n", "NC_047559.1\t433203\t433205\n", "NC_047559.1\t545968\t545970\n", "NC_047559.1\t548918\t548920\n", "NC_047559.1\t548941\t548943\n", "NC_047559.1\t548958\t548960\n", "NC_047559.1\t549827\t549829\n", "NC_047559.1\t549832\t549834\n", "\n", "==> zr3616_2_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-CDS <==\n", "NC_047559.1\t139297\t139299\n", "NC_047559.1\t139383\t139385\n", "NC_047559.1\t139389\t139391\n", "NC_047559.1\t139443\t139445\n", "NC_047559.1\t140131\t140133\n", "NC_047559.1\t140139\t140141\n", "NC_047559.1\t140172\t140174\n", "NC_047559.1\t140209\t140211\n", "NC_047559.1\t140241\t140243\n", "NC_047559.1\t140257\t140259\n", "\n", "==> zr3616_2_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-CDS <==\n", "NC_047559.1\t14862\t14864\n", "NC_047559.1\t15162\t15164\n", "NC_047559.1\t15194\t15196\n", "NC_047559.1\t15204\t15206\n", "NC_047559.1\t15291\t15293\n", "NC_047559.1\t15293\t15295\n", "NC_047559.1\t15336\t15338\n", "NC_047559.1\t15338\t15340\n", "NC_047559.1\t15357\t15359\n", "NC_047559.1\t15370\t15372\n", "\n", "==> zr3616_3_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-CDS <==\n", "NC_047559.1\t432606\t432608\n", "NC_047559.1\t432670\t432672\n", "NC_047559.1\t432696\t432698\n", "NC_047559.1\t433203\t433205\n", "NC_047559.1\t433271\t433273\n", "NC_047559.1\t545968\t545970\n", "NC_047559.1\t548186\t548188\n", "NC_047559.1\t548188\t548190\n", "NC_047559.1\t548918\t548920\n", "NC_047559.1\t548941\t548943\n", "\n", "==> zr3616_3_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-CDS <==\n", "NC_047559.1\t143007\t143009\n", "NC_047559.1\t185612\t185614\n", "NC_047559.1\t337565\t337567\n", "NC_047559.1\t337616\t337618\n", "NC_047559.1\t337624\t337626\n", "NC_047559.1\t337639\t337641\n", "NC_047559.1\t338745\t338747\n", "NC_047559.1\t338753\t338755\n", "NC_047559.1\t338765\t338767\n", "NC_047559.1\t338777\t338779\n", "\n", "==> zr3616_3_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-CDS <==\n", "NC_047559.1\t14862\t14864\n", "NC_047559.1\t15194\t15196\n", "NC_047559.1\t15204\t15206\n", "NC_047559.1\t15291\t15293\n", "NC_047559.1\t15293\t15295\n", "NC_047559.1\t15336\t15338\n", "NC_047559.1\t15338\t15340\n", "NC_047559.1\t15357\t15359\n", "NC_047559.1\t15370\t15372\n", "NC_047559.1\t15407\t15409\n", "\n", "==> zr3616_4_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-CDS <==\n", "NC_047559.1\t432531\t432533\n", "NC_047559.1\t432606\t432608\n", "NC_047559.1\t432670\t432672\n", "NC_047559.1\t432696\t432698\n", "NC_047559.1\t432920\t432922\n", "NC_047559.1\t433203\t433205\n", "NC_047559.1\t433271\t433273\n", "NC_047559.1\t433316\t433318\n", "NC_047559.1\t476146\t476148\n", "NC_047559.1\t545968\t545970\n", "\n", "==> zr3616_4_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-CDS <==\n", "NC_047559.1\t473307\t473309\n", "NC_047559.1\t473341\t473343\n", "NC_047559.1\t473426\t473428\n", "NC_047559.1\t473546\t473548\n", "NC_047559.1\t473579\t473581\n", "NC_047559.1\t473594\t473596\n", "NC_047559.1\t476101\t476103\n", "NC_047559.1\t476211\t476213\n", "NC_047559.1\t477115\t477117\n", "NC_047559.1\t561628\t561630\n", "\n", "==> zr3616_4_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-CDS <==\n", "NC_047559.1\t14862\t14864\n", "NC_047559.1\t15162\t15164\n", "NC_047559.1\t15194\t15196\n", "NC_047559.1\t15204\t15206\n", "NC_047559.1\t15291\t15293\n", "NC_047559.1\t15293\t15295\n", "NC_047559.1\t15336\t15338\n", "NC_047559.1\t15338\t15340\n", "NC_047559.1\t15357\t15359\n", "NC_047559.1\t15370\t15372\n", "\n", "==> zr3616_5_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-CDS <==\n", "NC_047559.1\t140520\t140522\n", "NC_047559.1\t140539\t140541\n", "NC_047559.1\t140557\t140559\n", "NC_047559.1\t144127\t144129\n", "NC_047559.1\t144205\t144207\n", "NC_047559.1\t144210\t144212\n", "NC_047559.1\t340959\t340961\n", "NC_047559.1\t342194\t342196\n", "NC_047559.1\t342209\t342211\n", "NC_047559.1\t342243\t342245\n", "\n", "==> zr3616_5_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-CDS <==\n", "NC_047559.1\t122520\t122522\n", "NC_047559.1\t125338\t125340\n", "NC_047559.1\t139297\t139299\n", "NC_047559.1\t139334\t139336\n", "NC_047559.1\t139383\t139385\n", "NC_047559.1\t139389\t139391\n", "NC_047559.1\t139396\t139398\n", "NC_047559.1\t140381\t140383\n", "NC_047559.1\t140424\t140426\n", "NC_047559.1\t140589\t140591\n", "\n", "==> zr3616_5_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-CDS <==\n", "NC_047559.1\t14862\t14864\n", "NC_047559.1\t15162\t15164\n", "NC_047559.1\t15194\t15196\n", "NC_047559.1\t15204\t15206\n", "NC_047559.1\t15291\t15293\n", "NC_047559.1\t15293\t15295\n", "NC_047559.1\t15336\t15338\n", "NC_047559.1\t15338\t15340\n", "NC_047559.1\t15357\t15359\n", "NC_047559.1\t15370\t15372\n", "\n", "==> zr3616_6_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-CDS <==\n", "NC_047559.1\t140241\t140243\n", "NC_047559.1\t140257\t140259\n", "NC_047559.1\t140284\t140286\n", "NC_047559.1\t140286\t140288\n", "NC_047559.1\t140292\t140294\n", "NC_047559.1\t140295\t140297\n", "NC_047559.1\t140539\t140541\n", "NC_047559.1\t140557\t140559\n", "NC_047559.1\t140589\t140591\n", "NC_047559.1\t140640\t140642\n", "\n", "==> zr3616_6_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-CDS <==\n", "NC_047559.1\t122566\t122568\n", "NC_047559.1\t139297\t139299\n", "NC_047559.1\t139334\t139336\n", "NC_047559.1\t139383\t139385\n", "NC_047559.1\t139389\t139391\n", "NC_047559.1\t139396\t139398\n", "NC_047559.1\t139430\t139432\n", "NC_047559.1\t139438\t139440\n", "NC_047559.1\t139443\t139445\n", "NC_047559.1\t140083\t140085\n", "\n", "==> zr3616_6_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-CDS <==\n", "NC_047559.1\t14862\t14864\n", "NC_047559.1\t15162\t15164\n", "NC_047559.1\t15194\t15196\n", "NC_047559.1\t15204\t15206\n", "NC_047559.1\t15291\t15293\n", "NC_047559.1\t15293\t15295\n", "NC_047559.1\t15336\t15338\n", "NC_047559.1\t15338\t15340\n", "NC_047559.1\t15357\t15359\n", "NC_047559.1\t15370\t15372\n", "\n", "==> zr3616_7_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-CDS <==\n", "NC_047559.1\t140241\t140243\n", "NC_047559.1\t140257\t140259\n", "NC_047559.1\t140284\t140286\n", "NC_047559.1\t140286\t140288\n", "NC_047559.1\t140292\t140294\n", "NC_047559.1\t140295\t140297\n", "NC_047559.1\t432670\t432672\n", "NC_047559.1\t433203\t433205\n", "NC_047559.1\t433271\t433273\n", "NC_047559.1\t433316\t433318\n", "\n", "==> zr3616_7_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-CDS <==\n", "NC_047559.1\t99190\t99192\n", "NC_047559.1\t99310\t99312\n", "NC_047559.1\t139297\t139299\n", "NC_047559.1\t139334\t139336\n", "NC_047559.1\t139383\t139385\n", "NC_047559.1\t139389\t139391\n", "NC_047559.1\t139396\t139398\n", "NC_047559.1\t139430\t139432\n", "NC_047559.1\t139438\t139440\n", "NC_047559.1\t139443\t139445\n", "\n", "==> zr3616_7_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-CDS <==\n", "NC_047559.1\t14862\t14864\n", "NC_047559.1\t15099\t15101\n", "NC_047559.1\t15162\t15164\n", "NC_047559.1\t15194\t15196\n", "NC_047559.1\t15204\t15206\n", "NC_047559.1\t15291\t15293\n", "NC_047559.1\t15293\t15295\n", "NC_047559.1\t15336\t15338\n", "NC_047559.1\t15338\t15340\n", "NC_047559.1\t15357\t15359\n", "\n", "==> zr3616_8_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-CDS <==\n", "NC_047559.1\t432531\t432533\n", "NC_047559.1\t432606\t432608\n", "NC_047559.1\t432670\t432672\n", "NC_047559.1\t432920\t432922\n", "NC_047559.1\t433203\t433205\n", "NC_047559.1\t433271\t433273\n", "NC_047559.1\t545968\t545970\n", "NC_047559.1\t548148\t548150\n", "NC_047559.1\t548186\t548188\n", "NC_047559.1\t548188\t548190\n", "\n", "==> zr3616_8_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-CDS <==\n", "NC_047559.1\t80845\t80847\n", "NC_047559.1\t341414\t341416\n", "NC_047559.1\t432696\t432698\n", "NC_047559.1\t473226\t473228\n", "NC_047559.1\t473649\t473651\n", "NC_047559.1\t474279\t474281\n", "NC_047559.1\t548918\t548920\n", "NC_047559.1\t575050\t575052\n", "NC_047559.1\t578436\t578438\n", "NC_047559.1\t582494\t582496\n", "\n", "==> zr3616_8_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-CDS <==\n", "NC_047559.1\t14862\t14864\n", "NC_047559.1\t15162\t15164\n", "NC_047559.1\t15194\t15196\n", "NC_047559.1\t15204\t15206\n", "NC_047559.1\t15291\t15293\n", "NC_047559.1\t15293\t15295\n", "NC_047559.1\t15336\t15338\n", "NC_047559.1\t15338\t15340\n", "NC_047559.1\t15357\t15359\n", "NC_047559.1\t15370\t15372\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "==> zr3616_union-averages_5x.bedgraph.NO-SNPs-Meth.bed-CDS <==\n", "NC_047559.1\t140284\t140286\n", "NC_047559.1\t140286\t140288\n", "NC_047559.1\t432531\t432533\n", "NC_047559.1\t432606\t432608\n", "NC_047559.1\t432670\t432672\n", "NC_047559.1\t432696\t432698\n", "NC_047559.1\t433203\t433205\n", "NC_047559.1\t433271\t433273\n", "NC_047559.1\t545968\t545970\n", "NC_047559.1\t548148\t548150\n", "\n", "==> zr3616_union-averages_5x.bedgraph.NO-SNPs-sparseMeth.bed-CDS <==\n", "NC_047559.1\t140131\t140133\n", "NC_047559.1\t140139\t140141\n", "NC_047559.1\t140172\t140174\n", "NC_047559.1\t140209\t140211\n", "NC_047559.1\t140241\t140243\n", "NC_047559.1\t140257\t140259\n", "NC_047559.1\t140292\t140294\n", "NC_047559.1\t140295\t140297\n", "NC_047559.1\t140334\t140336\n", "NC_047559.1\t140381\t140383\n", "\n", "==> zr3616_union-averages_5x.bedgraph.NO-SNPs-unMeth.bed-CDS <==\n", "NC_047559.1\t14862\t14864\n", "NC_047559.1\t15099\t15101\n", "NC_047559.1\t15162\t15164\n", "NC_047559.1\t15194\t15196\n", "NC_047559.1\t15204\t15206\n", "NC_047559.1\t15291\t15293\n", "NC_047559.1\t15293\t15295\n", "NC_047559.1\t15336\t15338\n", "NC_047559.1\t15338\t15340\n", "NC_047559.1\t15357\t15359\n" ] } ], "source": [ "#Check output\n", "!head *CDS" ] }, { "cell_type": "code", "execution_count": 42, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 305690 zr3616_1_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-CDS\n", " 62303 zr3616_1_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-CDS\n", " 829997 zr3616_1_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-CDS\n", " 304850 zr3616_2_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-CDS\n", " 63217 zr3616_2_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-CDS\n", " 827906 zr3616_2_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-CDS\n", " 310883 zr3616_3_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-CDS\n", " 55085 zr3616_3_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-CDS\n", " 826079 zr3616_3_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-CDS\n", " 313947 zr3616_4_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-CDS\n", " 58402 zr3616_4_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-CDS\n", " 835655 zr3616_4_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-CDS\n", " 300056 zr3616_5_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-CDS\n", " 60685 zr3616_5_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-CDS\n", " 816885 zr3616_5_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-CDS\n", " 303754 zr3616_6_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-CDS\n", " 60989 zr3616_6_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-CDS\n", " 830389 zr3616_6_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-CDS\n", " 314024 zr3616_7_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-CDS\n", " 51399 zr3616_7_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-CDS\n", " 819030 zr3616_7_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-CDS\n", " 305245 zr3616_8_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-CDS\n", " 61628 zr3616_8_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-CDS\n", " 823656 zr3616_8_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-CDS\n", " 329117 zr3616_union-averages_5x.bedgraph.NO-SNPs-Meth.bed-CDS\n", " 72085 zr3616_union-averages_5x.bedgraph.NO-SNPs-sparseMeth.bed-CDS\n", " 928052 zr3616_union-averages_5x.bedgraph.NO-SNPs-unMeth.bed-CDS\n", " 10871008 total\n" ] } ], "source": [ "#Count number of overlaps\n", "!wc -l *CDS" ] }, { "cell_type": "code", "execution_count": 43, "metadata": {}, "outputs": [], "source": [ "#Get line counts for each fine\n", "# Remove 28th line (total entries)\n", "#Ensure output is tab-delimited\n", "#Save output\n", "!wc -l *-CDS \\\n", "| sed '28,$ d' \\\n", "| awk '{print $1\"\\t\"$2}' \\\n", "> zr3616_5x-CDS-counts.txt" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 4d. Exon UTR" ] }, { "cell_type": "code", "execution_count": 44, "metadata": {}, "outputs": [], "source": [ "%%bash\n", "for f in *bed\n", "do\n", " /opt/homebrew/bin/intersectBed \\\n", " -u \\\n", " -a ${f} \\\n", " -b ../../genome-feature-files/cgigas_uk_roslin_v1_exonUTR.gff \\\n", " > ${f}-exonUTR\n", "done" ] }, { "cell_type": "code", "execution_count": 45, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "==> zr3616_1_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-exonUTR <==\n", "NC_047559.1\t545229\t545231\n", "NC_047559.1\t545256\t545258\n", "NC_047559.1\t571906\t571908\n", "NC_047559.1\t571929\t571931\n", "NC_047559.1\t572049\t572051\n", "NC_047559.1\t572233\t572235\n", "NC_047559.1\t572245\t572247\n", "NC_047559.1\t572263\t572265\n", "NC_047559.1\t572453\t572455\n", "NC_047559.1\t572557\t572559\n", "\n", "==> zr3616_1_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-exonUTR <==\n", "NC_047559.1\t10899\t10901\n", "NC_047559.1\t431124\t431126\n", "NC_047559.1\t431205\t431207\n", "NC_047559.1\t545687\t545689\n", "NC_047559.1\t571583\t571585\n", "NC_047559.1\t571838\t571840\n", "NC_047559.1\t572075\t572077\n", "NC_047559.1\t572153\t572155\n", "NC_047559.1\t572367\t572369\n", "NC_047559.1\t572615\t572617\n", "\n", "==> zr3616_1_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-exonUTR <==\n", "NC_047559.1\t10920\t10922\n", "NC_047559.1\t10950\t10952\n", "NC_047559.1\t11000\t11002\n", "NC_047559.1\t11026\t11028\n", "NC_047559.1\t14214\t14216\n", "NC_047559.1\t14232\t14234\n", "NC_047559.1\t14243\t14245\n", "NC_047559.1\t14259\t14261\n", "NC_047559.1\t14319\t14321\n", "NC_047559.1\t14463\t14465\n", "\n", "==> zr3616_2_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-exonUTR <==\n", "NC_047559.1\t415945\t415947\n", "NC_047559.1\t418038\t418040\n", "NC_047559.1\t418118\t418120\n", "NC_047559.1\t418153\t418155\n", "NC_047559.1\t418166\t418168\n", "NC_047559.1\t418179\t418181\n", "NC_047559.1\t544307\t544309\n", "NC_047559.1\t544387\t544389\n", "NC_047559.1\t571838\t571840\n", "NC_047559.1\t571906\t571908\n", "\n", "==> zr3616_2_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-exonUTR <==\n", "NC_047559.1\t14319\t14321\n", "NC_047559.1\t229096\t229098\n", "NC_047559.1\t343538\t343540\n", "NC_047559.1\t343574\t343576\n", "NC_047559.1\t343603\t343605\n", "NC_047559.1\t343655\t343657\n", "NC_047559.1\t344058\t344060\n", "NC_047559.1\t372095\t372097\n", "NC_047559.1\t415975\t415977\n", "NC_047559.1\t416078\t416080\n", "\n", "==> zr3616_2_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-exonUTR <==\n", "NC_047559.1\t10899\t10901\n", "NC_047559.1\t10920\t10922\n", "NC_047559.1\t14214\t14216\n", "NC_047559.1\t14232\t14234\n", "NC_047559.1\t14243\t14245\n", "NC_047559.1\t14259\t14261\n", "NC_047559.1\t14525\t14527\n", "NC_047559.1\t19083\t19085\n", "NC_047559.1\t19097\t19099\n", "NC_047559.1\t19105\t19107\n", "\n", "==> zr3616_3_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-exonUTR <==\n", "NC_047559.1\t431124\t431126\n", "NC_047559.1\t431205\t431207\n", "NC_047559.1\t545256\t545258\n", "NC_047559.1\t571583\t571585\n", "NC_047559.1\t571793\t571795\n", "NC_047559.1\t571838\t571840\n", "NC_047559.1\t571906\t571908\n", "NC_047559.1\t571929\t571931\n", "NC_047559.1\t572075\t572077\n", "NC_047559.1\t572153\t572155\n", "\n", "==> zr3616_3_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-exonUTR <==\n", "NC_047559.1\t19097\t19099\n", "NC_047559.1\t19105\t19107\n", "NC_047559.1\t19113\t19115\n", "NC_047559.1\t242324\t242326\n", "NC_047559.1\t418539\t418541\n", "NC_047559.1\t544366\t544368\n", "NC_047559.1\t544387\t544389\n", "NC_047559.1\t545687\t545689\n", "NC_047559.1\t597401\t597403\n", "NC_047559.1\t601710\t601712\n", "\n", "==> zr3616_3_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-exonUTR <==\n", "NC_047559.1\t10899\t10901\n", "NC_047559.1\t10920\t10922\n", "NC_047559.1\t10950\t10952\n", "NC_047559.1\t11000\t11002\n", "NC_047559.1\t11026\t11028\n", "NC_047559.1\t14214\t14216\n", "NC_047559.1\t14232\t14234\n", "NC_047559.1\t14243\t14245\n", "NC_047559.1\t14259\t14261\n", "NC_047559.1\t14319\t14321\n", "\n", "==> zr3616_4_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-exonUTR <==\n", "NC_047559.1\t431124\t431126\n", "NC_047559.1\t431205\t431207\n", "NC_047559.1\t545256\t545258\n", "NC_047559.1\t545687\t545689\n", "NC_047559.1\t571583\t571585\n", "NC_047559.1\t571793\t571795\n", "NC_047559.1\t571838\t571840\n", "NC_047559.1\t571906\t571908\n", "NC_047559.1\t571929\t571931\n", "NC_047559.1\t572049\t572051\n", "\n", "==> zr3616_4_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-exonUTR <==\n", "NC_047559.1\t453365\t453367\n", "NC_047559.1\t472663\t472665\n", "NC_047559.1\t544254\t544256\n", "NC_047559.1\t544307\t544309\n", "NC_047559.1\t544387\t544389\n", "NC_047559.1\t544552\t544554\n", "NC_047559.1\t544592\t544594\n", "NC_047559.1\t544784\t544786\n", "NC_047559.1\t545229\t545231\n", "NC_047559.1\t572367\t572369\n", "\n", "==> zr3616_4_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-exonUTR <==\n", "NC_047559.1\t10899\t10901\n", "NC_047559.1\t10920\t10922\n", "NC_047559.1\t10950\t10952\n", "NC_047559.1\t11000\t11002\n", "NC_047559.1\t11026\t11028\n", "NC_047559.1\t11080\t11082\n", "NC_047559.1\t14214\t14216\n", "NC_047559.1\t14232\t14234\n", "NC_047559.1\t14243\t14245\n", "NC_047559.1\t14259\t14261\n", "\n", "==> zr3616_5_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-exonUTR <==\n", "NC_047559.1\t431124\t431126\n", "NC_047559.1\t571583\t571585\n", "NC_047559.1\t571838\t571840\n", "NC_047559.1\t571906\t571908\n", "NC_047559.1\t571929\t571931\n", "NC_047559.1\t572075\t572077\n", "NC_047559.1\t572153\t572155\n", "NC_047559.1\t572233\t572235\n", "NC_047559.1\t572245\t572247\n", "NC_047559.1\t572557\t572559\n", "\n", "==> zr3616_5_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-exonUTR <==\n", "NC_047559.1\t19105\t19107\n", "NC_047559.1\t19119\t19121\n", "NC_047559.1\t142827\t142829\n", "NC_047559.1\t142843\t142845\n", "NC_047559.1\t142853\t142855\n", "NC_047559.1\t337166\t337168\n", "NC_047559.1\t431205\t431207\n", "NC_047559.1\t469575\t469577\n", "NC_047559.1\t472663\t472665\n", "NC_047559.1\t544552\t544554\n", "\n", "==> zr3616_5_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-exonUTR <==\n", "NC_047559.1\t10899\t10901\n", "NC_047559.1\t10920\t10922\n", "NC_047559.1\t10950\t10952\n", "NC_047559.1\t11000\t11002\n", "NC_047559.1\t11026\t11028\n", "NC_047559.1\t14214\t14216\n", "NC_047559.1\t14232\t14234\n", "NC_047559.1\t14243\t14245\n", "NC_047559.1\t14259\t14261\n", "NC_047559.1\t14319\t14321\n", "\n", "==> zr3616_6_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-exonUTR <==\n", "NC_047559.1\t431124\t431126\n", "NC_047559.1\t544552\t544554\n", "NC_047559.1\t571583\t571585\n", "NC_047559.1\t571838\t571840\n", "NC_047559.1\t571906\t571908\n", "NC_047559.1\t571929\t571931\n", "NC_047559.1\t572049\t572051\n", "NC_047559.1\t572075\t572077\n", "NC_047559.1\t572233\t572235\n", "NC_047559.1\t572245\t572247\n", "\n", "==> zr3616_6_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-exonUTR <==\n", "NC_047559.1\t62234\t62236\n", "NC_047559.1\t343538\t343540\n", "NC_047559.1\t355857\t355859\n", "NC_047559.1\t415945\t415947\n", "NC_047559.1\t415957\t415959\n", "NC_047559.1\t415975\t415977\n", "NC_047559.1\t473159\t473161\n", "NC_047559.1\t477530\t477532\n", "NC_047559.1\t544404\t544406\n", "NC_047559.1\t544533\t544535\n", "\n", "==> zr3616_6_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-exonUTR <==\n", "NC_047559.1\t10899\t10901\n", "NC_047559.1\t10920\t10922\n", "NC_047559.1\t10950\t10952\n", "NC_047559.1\t11000\t11002\n", "NC_047559.1\t11026\t11028\n", "NC_047559.1\t14214\t14216\n", "NC_047559.1\t14232\t14234\n", "NC_047559.1\t14243\t14245\n", "NC_047559.1\t14259\t14261\n", "NC_047559.1\t14319\t14321\n", "\n", "==> zr3616_7_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-exonUTR <==\n", "NC_047559.1\t545256\t545258\n", "NC_047559.1\t545687\t545689\n", "NC_047559.1\t571583\t571585\n", "NC_047559.1\t571793\t571795\n", "NC_047559.1\t571838\t571840\n", "NC_047559.1\t571906\t571908\n", "NC_047559.1\t571929\t571931\n", "NC_047559.1\t572049\t572051\n", "NC_047559.1\t572075\t572077\n", "NC_047559.1\t572153\t572155\n", "\n", "==> zr3616_7_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-exonUTR <==\n", "NC_047559.1\t19097\t19099\n", "NC_047559.1\t19105\t19107\n", "NC_047559.1\t19113\t19115\n", "NC_047559.1\t19119\t19121\n", "NC_047559.1\t431124\t431126\n", "NC_047559.1\t431205\t431207\n", "NC_047559.1\t453551\t453553\n", "NC_047559.1\t477548\t477550\n", "NC_047559.1\t544552\t544554\n", "NC_047559.1\t573040\t573042\n", "\n", "==> zr3616_7_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-exonUTR <==\n", "NC_047559.1\t10899\t10901\n", "NC_047559.1\t10920\t10922\n", "NC_047559.1\t10950\t10952\n", "NC_047559.1\t14214\t14216\n", "NC_047559.1\t14232\t14234\n", "NC_047559.1\t14243\t14245\n", "NC_047559.1\t14259\t14261\n", "NC_047559.1\t14319\t14321\n", "NC_047559.1\t14397\t14399\n", "NC_047559.1\t14401\t14403\n", "\n", "==> zr3616_8_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-exonUTR <==\n", "NC_047559.1\t416078\t416080\n", "NC_047559.1\t431124\t431126\n", "NC_047559.1\t571838\t571840\n", "NC_047559.1\t571906\t571908\n", "NC_047559.1\t571929\t571931\n", "NC_047559.1\t572049\t572051\n", "NC_047559.1\t572153\t572155\n", "NC_047559.1\t572233\t572235\n", "NC_047559.1\t572245\t572247\n", "NC_047559.1\t572263\t572265\n", "\n", "==> zr3616_8_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-exonUTR <==\n", "NC_047559.1\t62234\t62236\n", "NC_047559.1\t371895\t371897\n", "NC_047559.1\t415739\t415741\n", "NC_047559.1\t415975\t415977\n", "NC_047559.1\t415988\t415990\n", "NC_047559.1\t415998\t416000\n", "NC_047559.1\t416052\t416054\n", "NC_047559.1\t416071\t416073\n", "NC_047559.1\t416088\t416090\n", "NC_047559.1\t420757\t420759\n", "\n", "==> zr3616_8_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-exonUTR <==\n", "NC_047559.1\t10899\t10901\n", "NC_047559.1\t10920\t10922\n", "NC_047559.1\t10950\t10952\n", "NC_047559.1\t11000\t11002\n", "NC_047559.1\t11026\t11028\n", "NC_047559.1\t14214\t14216\n", "NC_047559.1\t14232\t14234\n", "NC_047559.1\t14243\t14245\n", "NC_047559.1\t14259\t14261\n", "NC_047559.1\t14319\t14321\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "==> zr3616_union-averages_5x.bedgraph.NO-SNPs-Meth.bed-exonUTR <==\n", "NC_047559.1\t418038\t418040\n", "NC_047559.1\t418118\t418120\n", "NC_047559.1\t418153\t418155\n", "NC_047559.1\t418166\t418168\n", "NC_047559.1\t418179\t418181\n", "NC_047559.1\t431124\t431126\n", "NC_047559.1\t545256\t545258\n", "NC_047559.1\t571583\t571585\n", "NC_047559.1\t571838\t571840\n", "NC_047559.1\t571906\t571908\n", "\n", "==> zr3616_union-averages_5x.bedgraph.NO-SNPs-sparseMeth.bed-exonUTR <==\n", "NC_047559.1\t62234\t62236\n", "NC_047559.1\t343574\t343576\n", "NC_047559.1\t343603\t343605\n", "NC_047559.1\t372095\t372097\n", "NC_047559.1\t415945\t415947\n", "NC_047559.1\t415975\t415977\n", "NC_047559.1\t415998\t416000\n", "NC_047559.1\t416071\t416073\n", "NC_047559.1\t416078\t416080\n", "NC_047559.1\t416088\t416090\n", "\n", "==> zr3616_union-averages_5x.bedgraph.NO-SNPs-unMeth.bed-exonUTR <==\n", "NC_047559.1\t10899\t10901\n", "NC_047559.1\t10920\t10922\n", "NC_047559.1\t10950\t10952\n", "NC_047559.1\t11000\t11002\n", "NC_047559.1\t11026\t11028\n", "NC_047559.1\t11080\t11082\n", "NC_047559.1\t14214\t14216\n", "NC_047559.1\t14232\t14234\n", "NC_047559.1\t14243\t14245\n", "NC_047559.1\t14259\t14261\n" ] } ], "source": [ "#Check output\n", "!head *exonUTR" ] }, { "cell_type": "code", "execution_count": 46, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 48945 zr3616_1_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-exonUTR\n", " 32728 zr3616_1_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-exonUTR\n", " 422149 zr3616_1_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-exonUTR\n", " 46369 zr3616_2_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-exonUTR\n", " 32998 zr3616_2_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-exonUTR\n", " 427078 zr3616_2_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-exonUTR\n", " 49947 zr3616_3_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-exonUTR\n", " 31460 zr3616_3_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-exonUTR\n", " 422437 zr3616_3_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-exonUTR\n", " 51903 zr3616_4_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-exonUTR\n", " 33259 zr3616_4_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-exonUTR\n", " 427037 zr3616_4_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-exonUTR\n", " 45650 zr3616_5_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-exonUTR\n", " 30199 zr3616_5_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-exonUTR\n", " 411163 zr3616_5_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-exonUTR\n", " 47790 zr3616_6_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-exonUTR\n", " 32086 zr3616_6_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-exonUTR\n", " 423727 zr3616_6_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-exonUTR\n", " 51800 zr3616_7_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-exonUTR\n", " 29616 zr3616_7_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-exonUTR\n", " 416641 zr3616_7_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-exonUTR\n", " 47830 zr3616_8_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-exonUTR\n", " 31217 zr3616_8_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-exonUTR\n", " 419745 zr3616_8_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-exonUTR\n", " 53951 zr3616_union-averages_5x.bedgraph.NO-SNPs-Meth.bed-exonUTR\n", " 48054 zr3616_union-averages_5x.bedgraph.NO-SNPs-sparseMeth.bed-exonUTR\n", " 505281 zr3616_union-averages_5x.bedgraph.NO-SNPs-unMeth.bed-exonUTR\n", " 4621060 total\n" ] } ], "source": [ "#Count number of overlaps\n", "!wc -l *exonUTR" ] }, { "cell_type": "code", "execution_count": 47, "metadata": {}, "outputs": [], "source": [ "#Get line counts for each fine\n", "# Remove 28th line (total entries)\n", "#Ensure output is tab-delimited\n", "#Save output\n", "!wc -l *-exonUTR \\\n", "| sed '28,$ d' \\\n", "| awk '{print $1\"\\t\"$2}' \\\n", "> zr3616_5x-exonUTR-counts.txt" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 4e. Intron" ] }, { "cell_type": "code", "execution_count": 48, "metadata": {}, "outputs": [], "source": [ "%%bash\n", "for f in *bed\n", "do\n", " /opt/homebrew/bin/intersectBed \\\n", " -u \\\n", " -a ${f} \\\n", " -b ../../genome-feature-files/cgigas_uk_roslin_v1_intron.bed \\\n", " > ${f}-intron\n", "done" ] }, { "cell_type": "code", "execution_count": 49, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "==> zr3616_1_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-intron <==\n", "NC_047559.1\t360322\t360324\n", "NC_047559.1\t360361\t360363\n", "NC_047559.1\t361347\t361349\n", "NC_047559.1\t364329\t364331\n", "NC_047559.1\t375341\t375343\n", "NC_047559.1\t375360\t375362\n", "NC_047559.1\t376111\t376113\n", "NC_047559.1\t376170\t376172\n", "NC_047559.1\t376962\t376964\n", "NC_047559.1\t377335\t377337\n", "\n", "==> zr3616_1_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-intron <==\n", "NC_047559.1\t18234\t18236\n", "NC_047559.1\t61081\t61083\n", "NC_047559.1\t68070\t68072\n", "NC_047559.1\t100249\t100251\n", "NC_047559.1\t100276\t100278\n", "NC_047559.1\t100305\t100307\n", "NC_047559.1\t100319\t100321\n", "NC_047559.1\t100440\t100442\n", "NC_047559.1\t100454\t100456\n", "NC_047559.1\t101107\t101109\n", "\n", "==> zr3616_1_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-intron <==\n", "NC_047559.1\t10270\t10272\n", "NC_047559.1\t10292\t10294\n", "NC_047559.1\t10314\t10316\n", "NC_047559.1\t10358\t10360\n", "NC_047559.1\t10380\t10382\n", "NC_047559.1\t10391\t10393\n", "NC_047559.1\t10402\t10404\n", "NC_047559.1\t10413\t10415\n", "NC_047559.1\t10457\t10459\n", "NC_047559.1\t10479\t10481\n", "\n", "==> zr3616_2_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-intron <==\n", "NC_047559.1\t356303\t356305\n", "NC_047559.1\t356335\t356337\n", "NC_047559.1\t356378\t356380\n", "NC_047559.1\t356395\t356397\n", "NC_047559.1\t356430\t356432\n", "NC_047559.1\t356436\t356438\n", "NC_047559.1\t356532\t356534\n", "NC_047559.1\t356685\t356687\n", "NC_047559.1\t356699\t356701\n", "NC_047559.1\t356910\t356912\n", "\n", "==> zr3616_2_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-intron <==\n", "NC_047559.1\t18078\t18080\n", "NC_047559.1\t60313\t60315\n", "NC_047559.1\t60371\t60373\n", "NC_047559.1\t61081\t61083\n", "NC_047559.1\t61203\t61205\n", "NC_047559.1\t72245\t72247\n", "NC_047559.1\t100276\t100278\n", "NC_047559.1\t100319\t100321\n", "NC_047559.1\t110811\t110813\n", "NC_047559.1\t110862\t110864\n", "\n", "==> zr3616_2_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-intron <==\n", "NC_047559.1\t10292\t10294\n", "NC_047559.1\t10314\t10316\n", "NC_047559.1\t10380\t10382\n", "NC_047559.1\t10391\t10393\n", "NC_047559.1\t10402\t10404\n", "NC_047559.1\t10413\t10415\n", "NC_047559.1\t10457\t10459\n", "NC_047559.1\t10479\t10481\n", "NC_047559.1\t10501\t10503\n", "NC_047559.1\t10772\t10774\n", "\n", "==> zr3616_3_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-intron <==\n", "NC_047559.1\t110811\t110813\n", "NC_047559.1\t160063\t160065\n", "NC_047559.1\t160111\t160113\n", "NC_047559.1\t160124\t160126\n", "NC_047559.1\t160131\t160133\n", "NC_047559.1\t160136\t160138\n", "NC_047559.1\t160142\t160144\n", "NC_047559.1\t160179\t160181\n", "NC_047559.1\t362609\t362611\n", "NC_047559.1\t364329\t364331\n", "\n", "==> zr3616_3_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-intron <==\n", "NC_047559.1\t10501\t10503\n", "NC_047559.1\t60492\t60494\n", "NC_047559.1\t61203\t61205\n", "NC_047559.1\t68070\t68072\n", "NC_047559.1\t68140\t68142\n", "NC_047559.1\t98346\t98348\n", "NC_047559.1\t99649\t99651\n", "NC_047559.1\t99802\t99804\n", "NC_047559.1\t100276\t100278\n", "NC_047559.1\t100319\t100321\n", "\n", "==> zr3616_3_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-intron <==\n", "NC_047559.1\t10358\t10360\n", "NC_047559.1\t10380\t10382\n", "NC_047559.1\t10391\t10393\n", "NC_047559.1\t10402\t10404\n", "NC_047559.1\t10413\t10415\n", "NC_047559.1\t10457\t10459\n", "NC_047559.1\t10479\t10481\n", "NC_047559.1\t10592\t10594\n", "NC_047559.1\t10612\t10614\n", "NC_047559.1\t10645\t10647\n", "\n", "==> zr3616_4_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-intron <==\n", "NC_047559.1\t60492\t60494\n", "NC_047559.1\t64815\t64817\n", "NC_047559.1\t110795\t110797\n", "NC_047559.1\t110838\t110840\n", "NC_047559.1\t113151\t113153\n", "NC_047559.1\t356378\t356380\n", "NC_047559.1\t356395\t356397\n", "NC_047559.1\t356430\t356432\n", "NC_047559.1\t356436\t356438\n", "NC_047559.1\t356532\t356534\n", "\n", "==> zr3616_4_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-intron <==\n", "NC_047559.1\t10292\t10294\n", "NC_047559.1\t10314\t10316\n", "NC_047559.1\t62770\t62772\n", "NC_047559.1\t66982\t66984\n", "NC_047559.1\t67015\t67017\n", "NC_047559.1\t67056\t67058\n", "NC_047559.1\t67070\t67072\n", "NC_047559.1\t71969\t71971\n", "NC_047559.1\t71973\t71975\n", "NC_047559.1\t71994\t71996\n", "\n", "==> zr3616_4_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-intron <==\n", "NC_047559.1\t10270\t10272\n", "NC_047559.1\t10358\t10360\n", "NC_047559.1\t10380\t10382\n", "NC_047559.1\t10391\t10393\n", "NC_047559.1\t10402\t10404\n", "NC_047559.1\t10413\t10415\n", "NC_047559.1\t10457\t10459\n", "NC_047559.1\t10479\t10481\n", "NC_047559.1\t10501\t10503\n", "NC_047559.1\t10545\t10547\n", "\n", "==> zr3616_5_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-intron <==\n", "NC_047559.1\t60371\t60373\n", "NC_047559.1\t68070\t68072\n", "NC_047559.1\t110838\t110840\n", "NC_047559.1\t154324\t154326\n", "NC_047559.1\t356335\t356337\n", "NC_047559.1\t356378\t356380\n", "NC_047559.1\t356395\t356397\n", "NC_047559.1\t356430\t356432\n", "NC_047559.1\t356436\t356438\n", "NC_047559.1\t356532\t356534\n", "\n", "==> zr3616_5_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-intron <==\n", "NC_047559.1\t10479\t10481\n", "NC_047559.1\t60313\t60315\n", "NC_047559.1\t61081\t61083\n", "NC_047559.1\t61203\t61205\n", "NC_047559.1\t62770\t62772\n", "NC_047559.1\t80557\t80559\n", "NC_047559.1\t100276\t100278\n", "NC_047559.1\t100305\t100307\n", "NC_047559.1\t100319\t100321\n", "NC_047559.1\t110854\t110856\n", "\n", "==> zr3616_5_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-intron <==\n", "NC_047559.1\t10270\t10272\n", "NC_047559.1\t10292\t10294\n", "NC_047559.1\t10314\t10316\n", "NC_047559.1\t10358\t10360\n", "NC_047559.1\t10380\t10382\n", "NC_047559.1\t10391\t10393\n", "NC_047559.1\t10402\t10404\n", "NC_047559.1\t10413\t10415\n", "NC_047559.1\t10457\t10459\n", "NC_047559.1\t10545\t10547\n", "\n", "==> zr3616_6_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-intron <==\n", "NC_047559.1\t153777\t153779\n", "NC_047559.1\t356173\t356175\n", "NC_047559.1\t356378\t356380\n", "NC_047559.1\t356395\t356397\n", "NC_047559.1\t356430\t356432\n", "NC_047559.1\t356436\t356438\n", "NC_047559.1\t356532\t356534\n", "NC_047559.1\t356910\t356912\n", "NC_047559.1\t356933\t356935\n", "NC_047559.1\t357362\t357364\n", "\n", "==> zr3616_6_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-intron <==\n", "NC_047559.1\t10358\t10360\n", "NC_047559.1\t60371\t60373\n", "NC_047559.1\t61660\t61662\n", "NC_047559.1\t61781\t61783\n", "NC_047559.1\t61788\t61790\n", "NC_047559.1\t61794\t61796\n", "NC_047559.1\t63068\t63070\n", "NC_047559.1\t64815\t64817\n", "NC_047559.1\t64859\t64861\n", "NC_047559.1\t68070\t68072\n", "\n", "==> zr3616_6_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-intron <==\n", "NC_047559.1\t10292\t10294\n", "NC_047559.1\t10314\t10316\n", "NC_047559.1\t10380\t10382\n", "NC_047559.1\t10391\t10393\n", "NC_047559.1\t10457\t10459\n", "NC_047559.1\t10479\t10481\n", "NC_047559.1\t10501\t10503\n", "NC_047559.1\t10545\t10547\n", "NC_047559.1\t10547\t10549\n", "NC_047559.1\t10567\t10569\n", "\n", "==> zr3616_7_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-intron <==\n", "NC_047559.1\t356021\t356023\n", "NC_047559.1\t356115\t356117\n", "NC_047559.1\t356173\t356175\n", "NC_047559.1\t356232\t356234\n", "NC_047559.1\t356303\t356305\n", "NC_047559.1\t356378\t356380\n", "NC_047559.1\t356395\t356397\n", "NC_047559.1\t356430\t356432\n", "NC_047559.1\t356436\t356438\n", "NC_047559.1\t356532\t356534\n", "\n", "==> zr3616_7_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-intron <==\n", "NC_047559.1\t18747\t18749\n", "NC_047559.1\t63068\t63070\n", "NC_047559.1\t63072\t63074\n", "NC_047559.1\t63078\t63080\n", "NC_047559.1\t68070\t68072\n", "NC_047559.1\t68140\t68142\n", "NC_047559.1\t100276\t100278\n", "NC_047559.1\t100305\t100307\n", "NC_047559.1\t115632\t115634\n", "NC_047559.1\t161188\t161190\n", "\n", "==> zr3616_7_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-intron <==\n", "NC_047559.1\t10155\t10157\n", "NC_047559.1\t10215\t10217\n", "NC_047559.1\t10270\t10272\n", "NC_047559.1\t10292\t10294\n", "NC_047559.1\t10314\t10316\n", "NC_047559.1\t10358\t10360\n", "NC_047559.1\t10380\t10382\n", "NC_047559.1\t10391\t10393\n", "NC_047559.1\t10402\t10404\n", "NC_047559.1\t10413\t10415\n", "\n", "==> zr3616_8_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-intron <==\n", "NC_047559.1\t60492\t60494\n", "NC_047559.1\t61996\t61998\n", "NC_047559.1\t68070\t68072\n", "NC_047559.1\t160063\t160065\n", "NC_047559.1\t160142\t160144\n", "NC_047559.1\t356378\t356380\n", "NC_047559.1\t356395\t356397\n", "NC_047559.1\t356430\t356432\n", "NC_047559.1\t356436\t356438\n", "NC_047559.1\t356532\t356534\n", "\n", "==> zr3616_8_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-intron <==\n", "NC_047559.1\t61081\t61083\n", "NC_047559.1\t62078\t62080\n", "NC_047559.1\t68140\t68142\n", "NC_047559.1\t71969\t71971\n", "NC_047559.1\t71973\t71975\n", "NC_047559.1\t83228\t83230\n", "NC_047559.1\t84125\t84127\n", "NC_047559.1\t100249\t100251\n", "NC_047559.1\t100276\t100278\n", "NC_047559.1\t115246\t115248\n", "\n", "==> zr3616_8_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-intron <==\n", "NC_047559.1\t10270\t10272\n", "NC_047559.1\t10292\t10294\n", "NC_047559.1\t10314\t10316\n", "NC_047559.1\t10358\t10360\n", "NC_047559.1\t10380\t10382\n", "NC_047559.1\t10391\t10393\n", "NC_047559.1\t10402\t10404\n", "NC_047559.1\t10413\t10415\n", "NC_047559.1\t10457\t10459\n", "NC_047559.1\t10479\t10481\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "==> zr3616_union-averages_5x.bedgraph.NO-SNPs-Meth.bed-intron <==\n", "NC_047559.1\t61996\t61998\n", "NC_047559.1\t110795\t110797\n", "NC_047559.1\t356335\t356337\n", "NC_047559.1\t356378\t356380\n", "NC_047559.1\t356395\t356397\n", "NC_047559.1\t356430\t356432\n", "NC_047559.1\t356436\t356438\n", "NC_047559.1\t356532\t356534\n", "NC_047559.1\t356685\t356687\n", "NC_047559.1\t356699\t356701\n", "\n", "==> zr3616_union-averages_5x.bedgraph.NO-SNPs-sparseMeth.bed-intron <==\n", "NC_047559.1\t60313\t60315\n", "NC_047559.1\t60371\t60373\n", "NC_047559.1\t60492\t60494\n", "NC_047559.1\t61081\t61083\n", "NC_047559.1\t62078\t62080\n", "NC_047559.1\t63068\t63070\n", "NC_047559.1\t63072\t63074\n", "NC_047559.1\t63078\t63080\n", "NC_047559.1\t64815\t64817\n", "NC_047559.1\t66982\t66984\n", "\n", "==> zr3616_union-averages_5x.bedgraph.NO-SNPs-unMeth.bed-intron <==\n", "NC_047559.1\t10155\t10157\n", "NC_047559.1\t10215\t10217\n", "NC_047559.1\t10270\t10272\n", "NC_047559.1\t10292\t10294\n", "NC_047559.1\t10314\t10316\n", "NC_047559.1\t10358\t10360\n", "NC_047559.1\t10380\t10382\n", "NC_047559.1\t10391\t10393\n", "NC_047559.1\t10402\t10404\n", "NC_047559.1\t10413\t10415\n" ] } ], "source": [ "#Check output\n", "!head *intron" ] }, { "cell_type": "code", "execution_count": 50, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 441010 zr3616_1_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-intron\n", " 361922 zr3616_1_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-intron\n", " 2370103 zr3616_1_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-intron\n", " 431889 zr3616_2_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-intron\n", " 359018 zr3616_2_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-intron\n", " 2400993 zr3616_2_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-intron\n", " 442471 zr3616_3_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-intron\n", " 346894 zr3616_3_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-intron\n", " 2372125 zr3616_3_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-intron\n", " 464828 zr3616_4_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-intron\n", " 372558 zr3616_4_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-intron\n", " 2442605 zr3616_4_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-intron\n", " 410764 zr3616_5_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-intron\n", " 326561 zr3616_5_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-intron\n", " 2271193 zr3616_5_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-intron\n", " 436348 zr3616_6_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-intron\n", " 357327 zr3616_6_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-intron\n", " 2410856 zr3616_6_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-intron\n", " 466969 zr3616_7_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-intron\n", " 327619 zr3616_7_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-intron\n", " 2318175 zr3616_7_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-intron\n", " 427347 zr3616_8_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-intron\n", " 347717 zr3616_8_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-intron\n", " 2342728 zr3616_8_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-intron\n", " 523386 zr3616_union-averages_5x.bedgraph.NO-SNPs-Meth.bed-intron\n", " 551291 zr3616_union-averages_5x.bedgraph.NO-SNPs-sparseMeth.bed-intron\n", " 3257979 zr3616_union-averages_5x.bedgraph.NO-SNPs-unMeth.bed-intron\n", " 29582676 total\n" ] } ], "source": [ "#Count number of overlaps\n", "!wc -l *intron" ] }, { "cell_type": "code", "execution_count": 51, "metadata": {}, "outputs": [], "source": [ "#Get line counts for each fine\n", "# Remove 28th line (total entries)\n", "#Ensure output is tab-delimited\n", "#Save output\n", "!wc -l *-intron \\\n", "| sed '28,$ d' \\\n", "| awk '{print $1\"\\t\"$2}' \\\n", "> zr3616_5x-intron-counts.txt" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 4f. Upstream flanks" ] }, { "cell_type": "code", "execution_count": 52, "metadata": {}, "outputs": [], "source": [ "%%bash\n", "for f in *bed\n", "do\n", " /opt/homebrew/bin/intersectBed \\\n", " -u \\\n", " -a ${f} \\\n", " -b ../../genome-feature-files/cgigas_uk_roslin_v1_upstream.gff \\\n", " > ${f}-upstreamFlanks\n", "done" ] }, { "cell_type": "code", "execution_count": 53, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "==> zr3616_1_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-upstreamFlanks <==\n", "NC_047559.1\t576634\t576636\n", "NC_047559.1\t576752\t576754\n", "NC_047559.1\t1468258\t1468260\n", "NC_047559.1\t1800917\t1800919\n", "NC_047559.1\t1800924\t1800926\n", "NC_047559.1\t2253122\t2253124\n", "NC_047559.1\t3763635\t3763637\n", "NC_047559.1\t3763649\t3763651\n", "NC_047559.1\t3763653\t3763655\n", "NC_047559.1\t3763678\t3763680\n", "\n", "==> zr3616_1_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-upstreamFlanks <==\n", "NC_047559.1\t9237\t9239\n", "NC_047559.1\t9658\t9660\n", "NC_047559.1\t9661\t9663\n", "NC_047559.1\t335850\t335852\n", "NC_047559.1\t335858\t335860\n", "NC_047559.1\t335878\t335880\n", "NC_047559.1\t335886\t335888\n", "NC_047559.1\t335892\t335894\n", "NC_047559.1\t576308\t576310\n", "NC_047559.1\t576693\t576695\n", "\n", "==> zr3616_1_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-upstreamFlanks <==\n", "NC_047559.1\t9122\t9124\n", "NC_047559.1\t9140\t9142\n", "NC_047559.1\t9159\t9161\n", "NC_047559.1\t9240\t9242\n", "NC_047559.1\t9664\t9666\n", "NC_047559.1\t9774\t9776\n", "NC_047559.1\t9781\t9783\n", "NC_047559.1\t9787\t9789\n", "NC_047559.1\t9795\t9797\n", "NC_047559.1\t9803\t9805\n", "\n", "==> zr3616_2_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-upstreamFlanks <==\n", "NC_047559.1\t141800\t141802\n", "NC_047559.1\t141858\t141860\n", "NC_047559.1\t141894\t141896\n", "NC_047559.1\t576634\t576636\n", "NC_047559.1\t576752\t576754\n", "NC_047559.1\t702986\t702988\n", "NC_047559.1\t1419893\t1419895\n", "NC_047559.1\t3819925\t3819927\n", "NC_047559.1\t3819934\t3819936\n", "NC_047559.1\t3819939\t3819941\n", "\n", "==> zr3616_2_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-upstreamFlanks <==\n", "NC_047559.1\t8970\t8972\n", "NC_047559.1\t8979\t8981\n", "NC_047559.1\t9658\t9660\n", "NC_047559.1\t9664\t9666\n", "NC_047559.1\t80315\t80317\n", "NC_047559.1\t141708\t141710\n", "NC_047559.1\t141717\t141719\n", "NC_047559.1\t141751\t141753\n", "NC_047559.1\t141777\t141779\n", "NC_047559.1\t141853\t141855\n", "\n", "==> zr3616_2_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-upstreamFlanks <==\n", "NC_047559.1\t9122\t9124\n", "NC_047559.1\t9159\t9161\n", "NC_047559.1\t9237\t9239\n", "NC_047559.1\t9240\t9242\n", "NC_047559.1\t9661\t9663\n", "NC_047559.1\t9774\t9776\n", "NC_047559.1\t9781\t9783\n", "NC_047559.1\t9787\t9789\n", "NC_047559.1\t9795\t9797\n", "NC_047559.1\t9803\t9805\n", "\n", "==> zr3616_3_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-upstreamFlanks <==\n", "NC_047559.1\t576308\t576310\n", "NC_047559.1\t576634\t576636\n", "NC_047559.1\t576693\t576695\n", "NC_047559.1\t576752\t576754\n", "NC_047559.1\t1468032\t1468034\n", "NC_047559.1\t2184591\t2184593\n", "NC_047559.1\t2184604\t2184606\n", "NC_047559.1\t2184708\t2184710\n", "NC_047559.1\t2184734\t2184736\n", "NC_047559.1\t2184744\t2184746\n", "\n", "==> zr3616_3_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-upstreamFlanks <==\n", "NC_047559.1\t8970\t8972\n", "NC_047559.1\t8979\t8981\n", "NC_047559.1\t9122\t9124\n", "NC_047559.1\t9140\t9142\n", "NC_047559.1\t335960\t335962\n", "NC_047559.1\t641492\t641494\n", "NC_047559.1\t642044\t642046\n", "NC_047559.1\t702986\t702988\n", "NC_047559.1\t703439\t703441\n", "NC_047559.1\t703495\t703497\n", "\n", "==> zr3616_3_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-upstreamFlanks <==\n", "NC_047559.1\t9159\t9161\n", "NC_047559.1\t9237\t9239\n", "NC_047559.1\t9240\t9242\n", "NC_047559.1\t9658\t9660\n", "NC_047559.1\t9661\t9663\n", "NC_047559.1\t9664\t9666\n", "NC_047559.1\t9774\t9776\n", "NC_047559.1\t9781\t9783\n", "NC_047559.1\t9787\t9789\n", "NC_047559.1\t9795\t9797\n", "\n", "==> zr3616_4_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-upstreamFlanks <==\n", "NC_047559.1\t576308\t576310\n", "NC_047559.1\t576634\t576636\n", "NC_047559.1\t576693\t576695\n", "NC_047559.1\t576752\t576754\n", "NC_047559.1\t3158461\t3158463\n", "NC_047559.1\t3158489\t3158491\n", "NC_047559.1\t3695107\t3695109\n", "NC_047559.1\t3764019\t3764021\n", "NC_047559.1\t3819638\t3819640\n", "NC_047559.1\t3837827\t3837829\n", "\n", "==> zr3616_4_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-upstreamFlanks <==\n", "NC_047559.1\t142465\t142467\n", "NC_047559.1\t268125\t268127\n", "NC_047559.1\t335715\t335717\n", "NC_047559.1\t335756\t335758\n", "NC_047559.1\t335850\t335852\n", "NC_047559.1\t335858\t335860\n", "NC_047559.1\t335878\t335880\n", "NC_047559.1\t335886\t335888\n", "NC_047559.1\t335892\t335894\n", "NC_047559.1\t702986\t702988\n", "\n", "==> zr3616_4_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-upstreamFlanks <==\n", "NC_047559.1\t8970\t8972\n", "NC_047559.1\t9122\t9124\n", "NC_047559.1\t9140\t9142\n", "NC_047559.1\t9159\t9161\n", "NC_047559.1\t9237\t9239\n", "NC_047559.1\t9240\t9242\n", "NC_047559.1\t9658\t9660\n", "NC_047559.1\t9661\t9663\n", "NC_047559.1\t9664\t9666\n", "NC_047559.1\t9774\t9776\n", "\n", "==> zr3616_5_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-upstreamFlanks <==\n", "NC_047559.1\t576308\t576310\n", "NC_047559.1\t576634\t576636\n", "NC_047559.1\t576752\t576754\n", "NC_047559.1\t702986\t702988\n", "NC_047559.1\t703177\t703179\n", "NC_047559.1\t1468195\t1468197\n", "NC_047559.1\t1468244\t1468246\n", "NC_047559.1\t1468258\t1468260\n", "NC_047559.1\t1873020\t1873022\n", "NC_047559.1\t2912522\t2912524\n", "\n", "==> zr3616_5_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-upstreamFlanks <==\n", "NC_047559.1\t9664\t9666\n", "NC_047559.1\t142465\t142467\n", "NC_047559.1\t142484\t142486\n", "NC_047559.1\t142490\t142492\n", "NC_047559.1\t142504\t142506\n", "NC_047559.1\t335715\t335717\n", "NC_047559.1\t335850\t335852\n", "NC_047559.1\t335858\t335860\n", "NC_047559.1\t576693\t576695\n", "NC_047559.1\t641815\t641817\n", "\n", "==> zr3616_5_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-upstreamFlanks <==\n", "NC_047559.1\t8970\t8972\n", "NC_047559.1\t9237\t9239\n", "NC_047559.1\t9240\t9242\n", "NC_047559.1\t9603\t9605\n", "NC_047559.1\t9658\t9660\n", "NC_047559.1\t9661\t9663\n", "NC_047559.1\t9774\t9776\n", "NC_047559.1\t9781\t9783\n", "NC_047559.1\t9787\t9789\n", "NC_047559.1\t9795\t9797\n", "\n", "==> zr3616_6_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-upstreamFlanks <==\n", "NC_047559.1\t576308\t576310\n", "NC_047559.1\t576634\t576636\n", "NC_047559.1\t576693\t576695\n", "NC_047559.1\t576752\t576754\n", "NC_047559.1\t1419873\t1419875\n", "NC_047559.1\t1419877\t1419879\n", "NC_047559.1\t1419893\t1419895\n", "NC_047559.1\t1698140\t1698142\n", "NC_047559.1\t2190616\t2190618\n", "NC_047559.1\t2190708\t2190710\n", "\n", "==> zr3616_6_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-upstreamFlanks <==\n", "NC_047559.1\t141708\t141710\n", "NC_047559.1\t141717\t141719\n", "NC_047559.1\t141777\t141779\n", "NC_047559.1\t141800\t141802\n", "NC_047559.1\t141853\t141855\n", "NC_047559.1\t141885\t141887\n", "NC_047559.1\t141888\t141890\n", "NC_047559.1\t141912\t141914\n", "NC_047559.1\t142451\t142453\n", "NC_047559.1\t142465\t142467\n", "\n", "==> zr3616_6_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-upstreamFlanks <==\n", "NC_047559.1\t9237\t9239\n", "NC_047559.1\t9240\t9242\n", "NC_047559.1\t9661\t9663\n", "NC_047559.1\t9774\t9776\n", "NC_047559.1\t9781\t9783\n", "NC_047559.1\t9787\t9789\n", "NC_047559.1\t9795\t9797\n", "NC_047559.1\t9803\t9805\n", "NC_047559.1\t9817\t9819\n", "NC_047559.1\t13113\t13115\n", "\n", "==> zr3616_7_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-upstreamFlanks <==\n", "NC_047559.1\t576634\t576636\n", "NC_047559.1\t576693\t576695\n", "NC_047559.1\t576752\t576754\n", "NC_047559.1\t1467826\t1467828\n", "NC_047559.1\t1468015\t1468017\n", "NC_047559.1\t1468032\t1468034\n", "NC_047559.1\t1468041\t1468043\n", "NC_047559.1\t1468045\t1468047\n", "NC_047559.1\t1468258\t1468260\n", "NC_047559.1\t2184643\t2184645\n", "\n", "==> zr3616_7_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-upstreamFlanks <==\n", "NC_047559.1\t9658\t9660\n", "NC_047559.1\t141777\t141779\n", "NC_047559.1\t141800\t141802\n", "NC_047559.1\t141853\t141855\n", "NC_047559.1\t141858\t141860\n", "NC_047559.1\t141868\t141870\n", "NC_047559.1\t141888\t141890\n", "NC_047559.1\t141894\t141896\n", "NC_047559.1\t141905\t141907\n", "NC_047559.1\t142465\t142467\n", "\n", "==> zr3616_7_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-upstreamFlanks <==\n", "NC_047559.1\t8970\t8972\n", "NC_047559.1\t8979\t8981\n", "NC_047559.1\t9122\t9124\n", "NC_047559.1\t9140\t9142\n", "NC_047559.1\t9159\t9161\n", "NC_047559.1\t9237\t9239\n", "NC_047559.1\t9240\t9242\n", "NC_047559.1\t9661\t9663\n", "NC_047559.1\t9664\t9666\n", "NC_047559.1\t9774\t9776\n", "\n", "==> zr3616_8_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-upstreamFlanks <==\n", "NC_047559.1\t576308\t576310\n", "NC_047559.1\t576634\t576636\n", "NC_047559.1\t576693\t576695\n", "NC_047559.1\t576752\t576754\n", "NC_047559.1\t1203443\t1203445\n", "NC_047559.1\t1203486\t1203488\n", "NC_047559.1\t1203489\t1203491\n", "NC_047559.1\t1203510\t1203512\n", "NC_047559.1\t1203513\t1203515\n", "NC_047559.1\t1203579\t1203581\n", "\n", "==> zr3616_8_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-upstreamFlanks <==\n", "NC_047559.1\t8970\t8972\n", "NC_047559.1\t8979\t8981\n", "NC_047559.1\t9240\t9242\n", "NC_047559.1\t9661\t9663\n", "NC_047559.1\t9664\t9666\n", "NC_047559.1\t268298\t268300\n", "NC_047559.1\t328514\t328516\n", "NC_047559.1\t328531\t328533\n", "NC_047559.1\t328575\t328577\n", "NC_047559.1\t335756\t335758\n", "\n", "==> zr3616_8_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-upstreamFlanks <==\n", "NC_047559.1\t9122\t9124\n", "NC_047559.1\t9140\t9142\n", "NC_047559.1\t9159\t9161\n", "NC_047559.1\t9237\t9239\n", "NC_047559.1\t9658\t9660\n", "NC_047559.1\t9774\t9776\n", "NC_047559.1\t9781\t9783\n", "NC_047559.1\t9787\t9789\n", "NC_047559.1\t9795\t9797\n", "NC_047559.1\t9803\t9805\n", "\n", "==> zr3616_union-averages_5x.bedgraph.NO-SNPs-Meth.bed-upstreamFlanks <==\n", "NC_047559.1\t576308\t576310\n", "NC_047559.1\t576634\t576636\n", "NC_047559.1\t576693\t576695\n", "NC_047559.1\t576752\t576754\n", "NC_047559.1\t1419893\t1419895\n", "NC_047559.1\t1467826\t1467828\n", "NC_047559.1\t1468015\t1468017\n", "NC_047559.1\t1468032\t1468034\n", "NC_047559.1\t1468041\t1468043\n", "NC_047559.1\t1468045\t1468047\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "==> zr3616_union-averages_5x.bedgraph.NO-SNPs-sparseMeth.bed-upstreamFlanks <==\n", "NC_047559.1\t8979\t8981\n", "NC_047559.1\t9658\t9660\n", "NC_047559.1\t141708\t141710\n", "NC_047559.1\t141717\t141719\n", "NC_047559.1\t141777\t141779\n", "NC_047559.1\t141800\t141802\n", "NC_047559.1\t141853\t141855\n", "NC_047559.1\t141858\t141860\n", "NC_047559.1\t141868\t141870\n", "NC_047559.1\t141885\t141887\n", "\n", "==> zr3616_union-averages_5x.bedgraph.NO-SNPs-unMeth.bed-upstreamFlanks <==\n", "NC_047559.1\t8970\t8972\n", "NC_047559.1\t9122\t9124\n", "NC_047559.1\t9140\t9142\n", "NC_047559.1\t9159\t9161\n", "NC_047559.1\t9237\t9239\n", "NC_047559.1\t9240\t9242\n", "NC_047559.1\t9603\t9605\n", "NC_047559.1\t9661\t9663\n", "NC_047559.1\t9664\t9666\n", "NC_047559.1\t9774\t9776\n" ] } ], "source": [ "#Check output\n", "!head *upstreamFlanks" ] }, { "cell_type": "code", "execution_count": 54, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 4883 zr3616_1_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-upstreamFlanks\n", " 15102 zr3616_1_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-upstreamFlanks\n", " 365079 zr3616_1_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-upstreamFlanks\n", " 4865 zr3616_2_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-upstreamFlanks\n", " 15819 zr3616_2_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-upstreamFlanks\n", " 366613 zr3616_2_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-upstreamFlanks\n", " 4934 zr3616_3_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-upstreamFlanks\n", " 15049 zr3616_3_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-upstreamFlanks\n", " 365126 zr3616_3_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-upstreamFlanks\n", " 5239 zr3616_4_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-upstreamFlanks\n", " 16084 zr3616_4_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-upstreamFlanks\n", " 375276 zr3616_4_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-upstreamFlanks\n", " 4381 zr3616_5_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-upstreamFlanks\n", " 14524 zr3616_5_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-upstreamFlanks\n", " 349918 zr3616_5_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-upstreamFlanks\n", " 4912 zr3616_6_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-upstreamFlanks\n", " 15684 zr3616_6_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-upstreamFlanks\n", " 365431 zr3616_6_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-upstreamFlanks\n", " 4944 zr3616_7_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-upstreamFlanks\n", " 14786 zr3616_7_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-upstreamFlanks\n", " 356314 zr3616_7_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-upstreamFlanks\n", " 4668 zr3616_8_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-upstreamFlanks\n", " 15321 zr3616_8_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-upstreamFlanks\n", " 361065 zr3616_8_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-upstreamFlanks\n", " 5064 zr3616_union-averages_5x.bedgraph.NO-SNPs-Meth.bed-upstreamFlanks\n", " 21529 zr3616_union-averages_5x.bedgraph.NO-SNPs-sparseMeth.bed-upstreamFlanks\n", " 480963 zr3616_union-averages_5x.bedgraph.NO-SNPs-unMeth.bed-upstreamFlanks\n", " 3573573 total\n" ] } ], "source": [ "#Count number of overlaps\n", "!wc -l *upstreamFlanks" ] }, { "cell_type": "code", "execution_count": 55, "metadata": {}, "outputs": [], "source": [ "#Get line counts for each fine\n", "# Remove 28th line (total entries)\n", "#Ensure output is tab-delimited\n", "#Save output\n", "!wc -l *-upstreamFlanks \\\n", "| sed '28,$ d' \\\n", "| awk '{print $1\"\\t\"$2}' \\\n", "> zr3616_5x-upstreamFlanks-counts.txt" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 4g. Downstream flanks" ] }, { "cell_type": "code", "execution_count": 56, "metadata": {}, "outputs": [], "source": [ "%%bash\n", "for f in *bed\n", "do\n", " /opt/homebrew/bin/intersectBed \\\n", " -u \\\n", " -a ${f} \\\n", " -b ../../genome-feature-files/cgigas_uk_roslin_v1_downstream.gff \\\n", " > ${f}-downstreamFlanks\n", "done" ] }, { "cell_type": "code", "execution_count": 57, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "==> zr3616_1_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-downstreamFlanks <==\n", "NC_047559.1\t264885\t264887\n", "NC_047559.1\t264911\t264913\n", "NC_047559.1\t264924\t264926\n", "NC_047559.1\t344440\t344442\n", "NC_047559.1\t344447\t344449\n", "NC_047559.1\t344477\t344479\n", "NC_047559.1\t344549\t344551\n", "NC_047559.1\t344794\t344796\n", "NC_047559.1\t344812\t344814\n", "NC_047559.1\t344829\t344831\n", "\n", "==> zr3616_1_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-downstreamFlanks <==\n", "NC_047559.1\t258442\t258444\n", "NC_047559.1\t264959\t264961\n", "NC_047559.1\t265013\t265015\n", "NC_047559.1\t265028\t265030\n", "NC_047559.1\t265111\t265113\n", "NC_047559.1\t326295\t326297\n", "NC_047559.1\t326317\t326319\n", "NC_047559.1\t344861\t344863\n", "NC_047559.1\t345009\t345011\n", "NC_047559.1\t434413\t434415\n", "\n", "==> zr3616_1_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-downstreamFlanks <==\n", "NC_047559.1\t16061\t16063\n", "NC_047559.1\t16105\t16107\n", "NC_047559.1\t16112\t16114\n", "NC_047559.1\t16220\t16222\n", "NC_047559.1\t16260\t16262\n", "NC_047559.1\t16289\t16291\n", "NC_047559.1\t16310\t16312\n", "NC_047559.1\t16369\t16371\n", "NC_047559.1\t16409\t16411\n", "NC_047559.1\t16438\t16440\n", "\n", "==> zr3616_2_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-downstreamFlanks <==\n", "NC_047559.1\t141858\t141860\n", "NC_047559.1\t141894\t141896\n", "NC_047559.1\t434126\t434128\n", "NC_047559.1\t576634\t576636\n", "NC_047559.1\t576752\t576754\n", "NC_047559.1\t576866\t576868\n", "NC_047559.1\t576880\t576882\n", "NC_047559.1\t577126\t577128\n", "NC_047559.1\t577456\t577458\n", "NC_047559.1\t1010121\t1010123\n", "\n", "==> zr3616_2_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-downstreamFlanks <==\n", "NC_047559.1\t86449\t86451\n", "NC_047559.1\t141853\t141855\n", "NC_047559.1\t141885\t141887\n", "NC_047559.1\t141888\t141890\n", "NC_047559.1\t142465\t142467\n", "NC_047559.1\t142484\t142486\n", "NC_047559.1\t142490\t142492\n", "NC_047559.1\t142504\t142506\n", "NC_047559.1\t142531\t142533\n", "NC_047559.1\t142545\t142547\n", "\n", "==> zr3616_2_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-downstreamFlanks <==\n", "NC_047559.1\t11706\t11708\n", "NC_047559.1\t16061\t16063\n", "NC_047559.1\t16105\t16107\n", "NC_047559.1\t16112\t16114\n", "NC_047559.1\t16220\t16222\n", "NC_047559.1\t16260\t16262\n", "NC_047559.1\t16289\t16291\n", "NC_047559.1\t16310\t16312\n", "NC_047559.1\t16369\t16371\n", "NC_047559.1\t16409\t16411\n", "\n", "==> zr3616_3_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-downstreamFlanks <==\n", "NC_047559.1\t259011\t259013\n", "NC_047559.1\t576634\t576636\n", "NC_047559.1\t576693\t576695\n", "NC_047559.1\t576752\t576754\n", "NC_047559.1\t576866\t576868\n", "NC_047559.1\t576880\t576882\n", "NC_047559.1\t577126\t577128\n", "NC_047559.1\t577456\t577458\n", "NC_047559.1\t1010121\t1010123\n", "NC_047559.1\t1010136\t1010138\n", "\n", "==> zr3616_3_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-downstreamFlanks <==\n", "NC_047559.1\t264885\t264887\n", "NC_047559.1\t264924\t264926\n", "NC_047559.1\t264959\t264961\n", "NC_047559.1\t265013\t265015\n", "NC_047559.1\t265028\t265030\n", "NC_047559.1\t265111\t265113\n", "NC_047559.1\t326295\t326297\n", "NC_047559.1\t326317\t326319\n", "NC_047559.1\t576914\t576916\n", "NC_047559.1\t889085\t889087\n", "\n", "==> zr3616_3_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-downstreamFlanks <==\n", "NC_047559.1\t16061\t16063\n", "NC_047559.1\t16105\t16107\n", "NC_047559.1\t16112\t16114\n", "NC_047559.1\t16220\t16222\n", "NC_047559.1\t16260\t16262\n", "NC_047559.1\t16289\t16291\n", "NC_047559.1\t16310\t16312\n", "NC_047559.1\t16369\t16371\n", "NC_047559.1\t16409\t16411\n", "NC_047559.1\t16438\t16440\n", "\n", "==> zr3616_4_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-downstreamFlanks <==\n", "NC_047559.1\t325845\t325847\n", "NC_047559.1\t344549\t344551\n", "NC_047559.1\t344623\t344625\n", "NC_047559.1\t433584\t433586\n", "NC_047559.1\t433588\t433590\n", "NC_047559.1\t433598\t433600\n", "NC_047559.1\t576634\t576636\n", "NC_047559.1\t576693\t576695\n", "NC_047559.1\t576752\t576754\n", "NC_047559.1\t576866\t576868\n", "\n", "==> zr3616_4_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-downstreamFlanks <==\n", "NC_047559.1\t142465\t142467\n", "NC_047559.1\t142752\t142754\n", "NC_047559.1\t210738\t210740\n", "NC_047559.1\t258749\t258751\n", "NC_047559.1\t258755\t258757\n", "NC_047559.1\t258791\t258793\n", "NC_047559.1\t258813\t258815\n", "NC_047559.1\t258854\t258856\n", "NC_047559.1\t258890\t258892\n", "NC_047559.1\t265013\t265015\n", "\n", "==> zr3616_4_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-downstreamFlanks <==\n", "NC_047559.1\t16061\t16063\n", "NC_047559.1\t16105\t16107\n", "NC_047559.1\t16112\t16114\n", "NC_047559.1\t16220\t16222\n", "NC_047559.1\t16260\t16262\n", "NC_047559.1\t16289\t16291\n", "NC_047559.1\t16310\t16312\n", "NC_047559.1\t16369\t16371\n", "NC_047559.1\t16409\t16411\n", "NC_047559.1\t16438\t16440\n", "\n", "==> zr3616_5_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-downstreamFlanks <==\n", "NC_047559.1\t433588\t433590\n", "NC_047559.1\t433598\t433600\n", "NC_047559.1\t434308\t434310\n", "NC_047559.1\t576634\t576636\n", "NC_047559.1\t576752\t576754\n", "NC_047559.1\t576866\t576868\n", "NC_047559.1\t576880\t576882\n", "NC_047559.1\t577126\t577128\n", "NC_047559.1\t577456\t577458\n", "NC_047559.1\t1010121\t1010123\n", "\n", "==> zr3616_5_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-downstreamFlanks <==\n", "NC_047559.1\t139183\t139185\n", "NC_047559.1\t142465\t142467\n", "NC_047559.1\t142484\t142486\n", "NC_047559.1\t142490\t142492\n", "NC_047559.1\t142504\t142506\n", "NC_047559.1\t142531\t142533\n", "NC_047559.1\t142545\t142547\n", "NC_047559.1\t142593\t142595\n", "NC_047559.1\t142689\t142691\n", "NC_047559.1\t142827\t142829\n", "\n", "==> zr3616_5_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-downstreamFlanks <==\n", "NC_047559.1\t11706\t11708\n", "NC_047559.1\t11725\t11727\n", "NC_047559.1\t16061\t16063\n", "NC_047559.1\t16105\t16107\n", "NC_047559.1\t16112\t16114\n", "NC_047559.1\t16220\t16222\n", "NC_047559.1\t16260\t16262\n", "NC_047559.1\t16289\t16291\n", "NC_047559.1\t16310\t16312\n", "NC_047559.1\t16369\t16371\n", "\n", "==> zr3616_6_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-downstreamFlanks <==\n", "NC_047559.1\t433584\t433586\n", "NC_047559.1\t433588\t433590\n", "NC_047559.1\t433598\t433600\n", "NC_047559.1\t576634\t576636\n", "NC_047559.1\t576693\t576695\n", "NC_047559.1\t576752\t576754\n", "NC_047559.1\t576866\t576868\n", "NC_047559.1\t576880\t576882\n", "NC_047559.1\t577126\t577128\n", "NC_047559.1\t577456\t577458\n", "\n", "==> zr3616_6_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-downstreamFlanks <==\n", "NC_047559.1\t139183\t139185\n", "NC_047559.1\t141853\t141855\n", "NC_047559.1\t141885\t141887\n", "NC_047559.1\t141888\t141890\n", "NC_047559.1\t141912\t141914\n", "NC_047559.1\t142451\t142453\n", "NC_047559.1\t142465\t142467\n", "NC_047559.1\t142484\t142486\n", "NC_047559.1\t142490\t142492\n", "NC_047559.1\t142504\t142506\n", "\n", "==> zr3616_6_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-downstreamFlanks <==\n", "NC_047559.1\t11706\t11708\n", "NC_047559.1\t11725\t11727\n", "NC_047559.1\t16061\t16063\n", "NC_047559.1\t16105\t16107\n", "NC_047559.1\t16112\t16114\n", "NC_047559.1\t16220\t16222\n", "NC_047559.1\t16260\t16262\n", "NC_047559.1\t16289\t16291\n", "NC_047559.1\t16310\t16312\n", "NC_047559.1\t16369\t16371\n", "\n", "==> zr3616_7_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-downstreamFlanks <==\n", "NC_047559.1\t344477\t344479\n", "NC_047559.1\t344785\t344787\n", "NC_047559.1\t344794\t344796\n", "NC_047559.1\t344803\t344805\n", "NC_047559.1\t344812\t344814\n", "NC_047559.1\t344829\t344831\n", "NC_047559.1\t344838\t344840\n", "NC_047559.1\t344861\t344863\n", "NC_047559.1\t433584\t433586\n", "NC_047559.1\t433588\t433590\n", "\n", "==> zr3616_7_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-downstreamFlanks <==\n", "NC_047559.1\t141853\t141855\n", "NC_047559.1\t141858\t141860\n", "NC_047559.1\t141868\t141870\n", "NC_047559.1\t141888\t141890\n", "NC_047559.1\t141894\t141896\n", "NC_047559.1\t141905\t141907\n", "NC_047559.1\t142465\t142467\n", "NC_047559.1\t142484\t142486\n", "NC_047559.1\t142490\t142492\n", "NC_047559.1\t142504\t142506\n", "\n", "==> zr3616_7_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-downstreamFlanks <==\n", "NC_047559.1\t16061\t16063\n", "NC_047559.1\t16105\t16107\n", "NC_047559.1\t16112\t16114\n", "NC_047559.1\t16220\t16222\n", "NC_047559.1\t16260\t16262\n", "NC_047559.1\t16289\t16291\n", "NC_047559.1\t16310\t16312\n", "NC_047559.1\t16369\t16371\n", "NC_047559.1\t16409\t16411\n", "NC_047559.1\t16438\t16440\n", "\n", "==> zr3616_8_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-downstreamFlanks <==\n", "NC_047559.1\t576634\t576636\n", "NC_047559.1\t576693\t576695\n", "NC_047559.1\t576752\t576754\n", "NC_047559.1\t576866\t576868\n", "NC_047559.1\t576880\t576882\n", "NC_047559.1\t577126\t577128\n", "NC_047559.1\t577456\t577458\n", "NC_047559.1\t1010121\t1010123\n", "NC_047559.1\t1010136\t1010138\n", "NC_047559.1\t1010757\t1010759\n", "\n", "==> zr3616_8_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-downstreamFlanks <==\n", "NC_047559.1\t241252\t241254\n", "NC_047559.1\t264815\t264817\n", "NC_047559.1\t264911\t264913\n", "NC_047559.1\t264924\t264926\n", "NC_047559.1\t264959\t264961\n", "NC_047559.1\t265111\t265113\n", "NC_047559.1\t326317\t326319\n", "NC_047559.1\t344549\t344551\n", "NC_047559.1\t344785\t344787\n", "NC_047559.1\t344794\t344796\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "==> zr3616_8_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-downstreamFlanks <==\n", "NC_047559.1\t16061\t16063\n", "NC_047559.1\t16105\t16107\n", "NC_047559.1\t16112\t16114\n", "NC_047559.1\t16220\t16222\n", "NC_047559.1\t16260\t16262\n", "NC_047559.1\t16289\t16291\n", "NC_047559.1\t16310\t16312\n", "NC_047559.1\t16369\t16371\n", "NC_047559.1\t16409\t16411\n", "NC_047559.1\t16438\t16440\n", "\n", "==> zr3616_union-averages_5x.bedgraph.NO-SNPs-Meth.bed-downstreamFlanks <==\n", "NC_047559.1\t433588\t433590\n", "NC_047559.1\t433598\t433600\n", "NC_047559.1\t576634\t576636\n", "NC_047559.1\t576693\t576695\n", "NC_047559.1\t576752\t576754\n", "NC_047559.1\t576866\t576868\n", "NC_047559.1\t576880\t576882\n", "NC_047559.1\t577126\t577128\n", "NC_047559.1\t577456\t577458\n", "NC_047559.1\t1010121\t1010123\n", "\n", "==> zr3616_union-averages_5x.bedgraph.NO-SNPs-sparseMeth.bed-downstreamFlanks <==\n", "NC_047559.1\t139183\t139185\n", "NC_047559.1\t141853\t141855\n", "NC_047559.1\t141858\t141860\n", "NC_047559.1\t141868\t141870\n", "NC_047559.1\t141885\t141887\n", "NC_047559.1\t141888\t141890\n", "NC_047559.1\t141894\t141896\n", "NC_047559.1\t141905\t141907\n", "NC_047559.1\t141912\t141914\n", "NC_047559.1\t142451\t142453\n", "\n", "==> zr3616_union-averages_5x.bedgraph.NO-SNPs-unMeth.bed-downstreamFlanks <==\n", "NC_047559.1\t11706\t11708\n", "NC_047559.1\t11725\t11727\n", "NC_047559.1\t16061\t16063\n", "NC_047559.1\t16105\t16107\n", "NC_047559.1\t16112\t16114\n", "NC_047559.1\t16220\t16222\n", "NC_047559.1\t16260\t16262\n", "NC_047559.1\t16289\t16291\n", "NC_047559.1\t16310\t16312\n", "NC_047559.1\t16369\t16371\n" ] } ], "source": [ "#Check output\n", "!head *downstreamFlanks" ] }, { "cell_type": "code", "execution_count": 58, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 20108 zr3616_1_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-downstreamFlanks\n", " 26394 zr3616_1_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-downstreamFlanks\n", " 304826 zr3616_1_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-downstreamFlanks\n", " 18944 zr3616_2_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-downstreamFlanks\n", " 26915 zr3616_2_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-downstreamFlanks\n", " 307855 zr3616_2_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-downstreamFlanks\n", " 19824 zr3616_3_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-downstreamFlanks\n", " 25556 zr3616_3_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-downstreamFlanks\n", " 306478 zr3616_3_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-downstreamFlanks\n", " 20462 zr3616_4_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-downstreamFlanks\n", " 28020 zr3616_4_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-downstreamFlanks\n", " 315071 zr3616_4_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-downstreamFlanks\n", " 18230 zr3616_5_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-downstreamFlanks\n", " 25015 zr3616_5_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-downstreamFlanks\n", " 291359 zr3616_5_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-downstreamFlanks\n", " 18927 zr3616_6_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-downstreamFlanks\n", " 26389 zr3616_6_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-downstreamFlanks\n", " 306873 zr3616_6_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-downstreamFlanks\n", " 19733 zr3616_7_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-downstreamFlanks\n", " 24606 zr3616_7_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-downstreamFlanks\n", " 297770 zr3616_7_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-downstreamFlanks\n", " 19326 zr3616_8_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-downstreamFlanks\n", " 26787 zr3616_8_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-downstreamFlanks\n", " 301358 zr3616_8_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-downstreamFlanks\n", " 21599 zr3616_union-averages_5x.bedgraph.NO-SNPs-Meth.bed-downstreamFlanks\n", " 43592 zr3616_union-averages_5x.bedgraph.NO-SNPs-sparseMeth.bed-downstreamFlanks\n", " 403020 zr3616_union-averages_5x.bedgraph.NO-SNPs-unMeth.bed-downstreamFlanks\n", " 3265037 total\n" ] } ], "source": [ "#Count number of overlaps\n", "!wc -l *downstreamFlanks" ] }, { "cell_type": "code", "execution_count": 59, "metadata": {}, "outputs": [], "source": [ "#Get line counts for each fine\n", "# Remove 28th line (total entries)\n", "#Ensure output is tab-delimited\n", "#Save output\n", "!wc -l *-downstreamFlanks \\\n", "| sed '28,$ d' \\\n", "| awk '{print $1\"\\t\"$2}' \\\n", "> zr3616_5x-downstreamFlanks-counts.txt" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 4h. Intergenic regions" ] }, { "cell_type": "code", "execution_count": 60, "metadata": {}, "outputs": [], "source": [ "%%bash\n", "for f in *bed\n", "do\n", " /opt/homebrew/bin/intersectBed \\\n", " -u \\\n", " -a ${f} \\\n", " -b ../../genome-feature-files/cgigas_uk_roslin_v1_intergenic.bed \\\n", " > ${f}-intergenic\n", "done" ] }, { "cell_type": "code", "execution_count": 61, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "==> zr3616_1_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-intergenic <==\n", "NC_047559.1\t1547\t1549\n", "NC_047559.1\t1571\t1573\n", "NC_047559.1\t2267\t2269\n", "NC_047559.1\t2291\t2293\n", "NC_047559.1\t4073\t4075\n", "NC_047559.1\t4791\t4793\n", "NC_047559.1\t4835\t4837\n", "NC_047559.1\t4843\t4845\n", "NC_047559.1\t5605\t5607\n", "NC_047559.1\t5613\t5615\n", "\n", "==> zr3616_1_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-intergenic <==\n", "NC_047559.1\t4887\t4889\n", "NC_047559.1\t4909\t4911\n", "NC_047559.1\t5500\t5502\n", "NC_047559.1\t7716\t7718\n", "NC_047559.1\t7814\t7816\n", "NC_047559.1\t23610\t23612\n", "NC_047559.1\t24932\t24934\n", "NC_047559.1\t24934\t24936\n", "NC_047559.1\t26463\t26465\n", "NC_047559.1\t26485\t26487\n", "\n", "==> zr3616_1_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-intergenic <==\n", "NC_047559.1\t5883\t5885\n", "NC_047559.1\t20252\t20254\n", "NC_047559.1\t20297\t20299\n", "NC_047559.1\t20319\t20321\n", "NC_047559.1\t20341\t20343\n", "NC_047559.1\t20363\t20365\n", "NC_047559.1\t20385\t20387\n", "NC_047559.1\t20407\t20409\n", "NC_047559.1\t20429\t20431\n", "NC_047559.1\t20451\t20453\n", "\n", "==> zr3616_2_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-intergenic <==\n", "NC_047559.1\t1571\t1573\n", "NC_047559.1\t2267\t2269\n", "NC_047559.1\t2291\t2293\n", "NC_047559.1\t2448\t2450\n", "NC_047559.1\t2988\t2990\n", "NC_047559.1\t3902\t3904\n", "NC_047559.1\t3916\t3918\n", "NC_047559.1\t4073\t4075\n", "NC_047559.1\t4270\t4272\n", "NC_047559.1\t4791\t4793\n", "\n", "==> zr3616_2_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-intergenic <==\n", "NC_047559.1\t4397\t4399\n", "NC_047559.1\t4909\t4911\n", "NC_047559.1\t5605\t5607\n", "NC_047559.1\t5613\t5615\n", "NC_047559.1\t7716\t7718\n", "NC_047559.1\t7850\t7852\n", "NC_047559.1\t22972\t22974\n", "NC_047559.1\t23610\t23612\n", "NC_047559.1\t25334\t25336\n", "NC_047559.1\t26966\t26968\n", "\n", "==> zr3616_2_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-intergenic <==\n", "NC_047559.1\t20252\t20254\n", "NC_047559.1\t20297\t20299\n", "NC_047559.1\t20319\t20321\n", "NC_047559.1\t20341\t20343\n", "NC_047559.1\t20363\t20365\n", "NC_047559.1\t20385\t20387\n", "NC_047559.1\t20407\t20409\n", "NC_047559.1\t20429\t20431\n", "NC_047559.1\t20451\t20453\n", "NC_047559.1\t20924\t20926\n", "\n", "==> zr3616_3_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-intergenic <==\n", "NC_047559.1\t1571\t1573\n", "NC_047559.1\t2291\t2293\n", "NC_047559.1\t2448\t2450\n", "NC_047559.1\t2988\t2990\n", "NC_047559.1\t4791\t4793\n", "NC_047559.1\t4835\t4837\n", "NC_047559.1\t4843\t4845\n", "NC_047559.1\t4887\t4889\n", "NC_047559.1\t4909\t4911\n", "NC_047559.1\t5999\t6001\n", "\n", "==> zr3616_3_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-intergenic <==\n", "NC_047559.1\t4073\t4075\n", "NC_047559.1\t23360\t23362\n", "NC_047559.1\t23610\t23612\n", "NC_047559.1\t25334\t25336\n", "NC_047559.1\t27802\t27804\n", "NC_047559.1\t38052\t38054\n", "NC_047559.1\t46678\t46680\n", "NC_047559.1\t50869\t50871\n", "NC_047559.1\t50878\t50880\n", "NC_047559.1\t50906\t50908\n", "\n", "==> zr3616_3_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-intergenic <==\n", "NC_047559.1\t7129\t7131\n", "NC_047559.1\t20363\t20365\n", "NC_047559.1\t20385\t20387\n", "NC_047559.1\t20407\t20409\n", "NC_047559.1\t20924\t20926\n", "NC_047559.1\t20938\t20940\n", "NC_047559.1\t20946\t20948\n", "NC_047559.1\t20954\t20956\n", "NC_047559.1\t20960\t20962\n", "NC_047559.1\t20967\t20969\n", "\n", "==> zr3616_4_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-intergenic <==\n", "NC_047559.1\t2267\t2269\n", "NC_047559.1\t2291\t2293\n", "NC_047559.1\t4835\t4837\n", "NC_047559.1\t4843\t4845\n", "NC_047559.1\t4887\t4889\n", "NC_047559.1\t4909\t4911\n", "NC_047559.1\t7490\t7492\n", "NC_047559.1\t7671\t7673\n", "NC_047559.1\t7716\t7718\n", "NC_047559.1\t7850\t7852\n", "\n", "==> zr3616_4_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-intergenic <==\n", "NC_047559.1\t2087\t2089\n", "NC_047559.1\t4397\t4399\n", "NC_047559.1\t4791\t4793\n", "NC_047559.1\t23610\t23612\n", "NC_047559.1\t26448\t26450\n", "NC_047559.1\t26463\t26465\n", "NC_047559.1\t29235\t29237\n", "NC_047559.1\t37587\t37589\n", "NC_047559.1\t42499\t42501\n", "NC_047559.1\t42696\t42698\n", "\n", "==> zr3616_4_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-intergenic <==\n", "NC_047559.1\t5883\t5885\n", "NC_047559.1\t5999\t6001\n", "NC_047559.1\t7129\t7131\n", "NC_047559.1\t12503\t12505\n", "NC_047559.1\t12520\t12522\n", "NC_047559.1\t12906\t12908\n", "NC_047559.1\t12923\t12925\n", "NC_047559.1\t20252\t20254\n", "NC_047559.1\t20297\t20299\n", "NC_047559.1\t20319\t20321\n", "\n", "==> zr3616_5_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-intergenic <==\n", "NC_047559.1\t1547\t1549\n", "NC_047559.1\t1571\t1573\n", "NC_047559.1\t1907\t1909\n", "NC_047559.1\t3168\t3170\n", "NC_047559.1\t4073\t4075\n", "NC_047559.1\t4097\t4099\n", "NC_047559.1\t7867\t7869\n", "NC_047559.1\t7885\t7887\n", "NC_047559.1\t7923\t7925\n", "NC_047559.1\t7939\t7941\n", "\n", "==> zr3616_5_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-intergenic <==\n", "NC_047559.1\t5613\t5615\n", "NC_047559.1\t5883\t5885\n", "NC_047559.1\t5999\t6001\n", "NC_047559.1\t7129\t7131\n", "NC_047559.1\t7490\t7492\n", "NC_047559.1\t8251\t8253\n", "NC_047559.1\t8254\t8256\n", "NC_047559.1\t8266\t8268\n", "NC_047559.1\t22966\t22968\n", "NC_047559.1\t23610\t23612\n", "\n", "==> zr3616_5_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-intergenic <==\n", "NC_047559.1\t4791\t4793\n", "NC_047559.1\t4835\t4837\n", "NC_047559.1\t4843\t4845\n", "NC_047559.1\t4887\t4889\n", "NC_047559.1\t4909\t4911\n", "NC_047559.1\t5605\t5607\n", "NC_047559.1\t8295\t8297\n", "NC_047559.1\t8308\t8310\n", "NC_047559.1\t8346\t8348\n", "NC_047559.1\t20297\t20299\n", "\n", "==> zr3616_6_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-intergenic <==\n", "NC_047559.1\t1571\t1573\n", "NC_047559.1\t2988\t2990\n", "NC_047559.1\t4791\t4793\n", "NC_047559.1\t4835\t4837\n", "NC_047559.1\t4843\t4845\n", "NC_047559.1\t6419\t6421\n", "NC_047559.1\t6439\t6441\n", "NC_047559.1\t6486\t6488\n", "NC_047559.1\t7850\t7852\n", "NC_047559.1\t7923\t7925\n", "\n", "==> zr3616_6_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-intergenic <==\n", "NC_047559.1\t2291\t2293\n", "NC_047559.1\t4073\t4075\n", "NC_047559.1\t4397\t4399\n", "NC_047559.1\t5500\t5502\n", "NC_047559.1\t5883\t5885\n", "NC_047559.1\t7867\t7869\n", "NC_047559.1\t7885\t7887\n", "NC_047559.1\t7979\t7981\n", "NC_047559.1\t23360\t23362\n", "NC_047559.1\t23610\t23612\n", "\n", "==> zr3616_6_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-intergenic <==\n", "NC_047559.1\t5605\t5607\n", "NC_047559.1\t5613\t5615\n", "NC_047559.1\t20252\t20254\n", "NC_047559.1\t20297\t20299\n", "NC_047559.1\t20319\t20321\n", "NC_047559.1\t20341\t20343\n", "NC_047559.1\t20363\t20365\n", "NC_047559.1\t20385\t20387\n", "NC_047559.1\t20407\t20409\n", "NC_047559.1\t20588\t20590\n", "\n", "==> zr3616_7_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-intergenic <==\n", "NC_047559.1\t1547\t1549\n", "NC_047559.1\t1571\t1573\n", "NC_047559.1\t4791\t4793\n", "NC_047559.1\t4835\t4837\n", "NC_047559.1\t4843\t4845\n", "NC_047559.1\t4887\t4889\n", "NC_047559.1\t4909\t4911\n", "NC_047559.1\t5500\t5502\n", "NC_047559.1\t5605\t5607\n", "NC_047559.1\t5613\t5615\n", "\n", "==> zr3616_7_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-intergenic <==\n", "NC_047559.1\t2809\t2811\n", "NC_047559.1\t2988\t2990\n", "NC_047559.1\t4073\t4075\n", "NC_047559.1\t4097\t4099\n", "NC_047559.1\t5653\t5655\n", "NC_047559.1\t7129\t7131\n", "NC_047559.1\t7490\t7492\n", "NC_047559.1\t7671\t7673\n", "NC_047559.1\t7867\t7869\n", "NC_047559.1\t7990\t7992\n", "\n", "==> zr3616_7_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-intergenic <==\n", "NC_047559.1\t2087\t2089\n", "NC_047559.1\t4397\t4399\n", "NC_047559.1\t7814\t7816\n", "NC_047559.1\t7850\t7852\n", "NC_047559.1\t7885\t7887\n", "NC_047559.1\t7923\t7925\n", "NC_047559.1\t7939\t7941\n", "NC_047559.1\t7959\t7961\n", "NC_047559.1\t7979\t7981\n", "NC_047559.1\t20252\t20254\n", "\n", "==> zr3616_8_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-intergenic <==\n", "NC_047559.1\t2087\t2089\n", "NC_047559.1\t2267\t2269\n", "NC_047559.1\t2291\t2293\n", "NC_047559.1\t2448\t2450\n", "NC_047559.1\t2988\t2990\n", "NC_047559.1\t4073\t4075\n", "NC_047559.1\t4791\t4793\n", "NC_047559.1\t4835\t4837\n", "NC_047559.1\t4843\t4845\n", "NC_047559.1\t7129\t7131\n", "\n", "==> zr3616_8_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-intergenic <==\n", "NC_047559.1\t5500\t5502\n", "NC_047559.1\t5999\t6001\n", "NC_047559.1\t7716\t7718\n", "NC_047559.1\t20473\t20475\n", "NC_047559.1\t22979\t22981\n", "NC_047559.1\t23610\t23612\n", "NC_047559.1\t24926\t24928\n", "NC_047559.1\t26419\t26421\n", "NC_047559.1\t26448\t26450\n", "NC_047559.1\t26463\t26465\n", "\n", "==> zr3616_8_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-intergenic <==\n", "NC_047559.1\t5605\t5607\n", "NC_047559.1\t5613\t5615\n", "NC_047559.1\t12439\t12441\n", "NC_047559.1\t12503\t12505\n", "NC_047559.1\t12520\t12522\n", "NC_047559.1\t20252\t20254\n", "NC_047559.1\t20297\t20299\n", "NC_047559.1\t20319\t20321\n", "NC_047559.1\t20341\t20343\n", "NC_047559.1\t20363\t20365\n", "\n", "==> zr3616_union-averages_5x.bedgraph.NO-SNPs-Meth.bed-intergenic <==\n", "NC_047559.1\t1547\t1549\n", "NC_047559.1\t1571\t1573\n", "NC_047559.1\t1907\t1909\n", "NC_047559.1\t2267\t2269\n", "NC_047559.1\t2291\t2293\n", "NC_047559.1\t2448\t2450\n", "NC_047559.1\t2988\t2990\n", "NC_047559.1\t3168\t3170\n", "NC_047559.1\t3902\t3904\n", "NC_047559.1\t3916\t3918\n", "\n", "==> zr3616_union-averages_5x.bedgraph.NO-SNPs-sparseMeth.bed-intergenic <==\n", "NC_047559.1\t2087\t2089\n", "NC_047559.1\t2809\t2811\n", "NC_047559.1\t4097\t4099\n", "NC_047559.1\t4397\t4399\n", "NC_047559.1\t4909\t4911\n", "NC_047559.1\t5500\t5502\n", "NC_047559.1\t5605\t5607\n", "NC_047559.1\t5613\t5615\n", "NC_047559.1\t5653\t5655\n", "NC_047559.1\t5999\t6001\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\r\n", "==> zr3616_union-averages_5x.bedgraph.NO-SNPs-unMeth.bed-intergenic <==\r\n", "NC_047559.1\t5883\t5885\r\n", "NC_047559.1\t8295\t8297\r\n", "NC_047559.1\t12439\t12441\r\n", "NC_047559.1\t12503\t12505\r\n", "NC_047559.1\t12520\t12522\r\n", "NC_047559.1\t12906\t12908\r\n", "NC_047559.1\t12923\t12925\r\n", "NC_047559.1\t20252\t20254\r\n", "NC_047559.1\t20297\t20299\r\n", "NC_047559.1\t20319\t20321\r\n" ] } ], "source": [ "#Check output\n", "!head *intergenic" ] }, { "cell_type": "code", "execution_count": 62, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 35770 zr3616_1_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-intergenic\n", " 155853 zr3616_1_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-intergenic\n", " 2389807 zr3616_1_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-intergenic\n", " 37568 zr3616_2_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-intergenic\n", " 159001 zr3616_2_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-intergenic\n", " 2406131 zr3616_2_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-intergenic\n", " 37512 zr3616_3_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-intergenic\n", " 150346 zr3616_3_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-intergenic\n", " 2372012 zr3616_3_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-intergenic\n", " 39199 zr3616_4_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-intergenic\n", " 163198 zr3616_4_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-intergenic\n", " 2495040 zr3616_4_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-intergenic\n", " 33044 zr3616_5_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-intergenic\n", " 140301 zr3616_5_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-intergenic\n", " 2284986 zr3616_5_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-intergenic\n", " 36330 zr3616_6_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-intergenic\n", " 155217 zr3616_6_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-intergenic\n", " 2407934 zr3616_6_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-intergenic\n", " 38163 zr3616_7_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-intergenic\n", " 148709 zr3616_7_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-intergenic\n", " 2295259 zr3616_7_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-intergenic\n", " 36748 zr3616_8_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-intergenic\n", " 154757 zr3616_8_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-intergenic\n", " 2360227 zr3616_8_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-intergenic\n", " 37734 zr3616_union-averages_5x.bedgraph.NO-SNPs-Meth.bed-intergenic\n", " 222130 zr3616_union-averages_5x.bedgraph.NO-SNPs-sparseMeth.bed-intergenic\n", " 3490894 zr3616_union-averages_5x.bedgraph.NO-SNPs-unMeth.bed-intergenic\n", " 24283870 total\n" ] } ], "source": [ "#Count number of overlaps\n", "!wc -l *intergenic" ] }, { "cell_type": "code", "execution_count": 63, "metadata": {}, "outputs": [], "source": [ "#Get line counts for each fine\n", "# Remove 28th line (total entries)\n", "#Ensure output is tab-delimited\n", "#Save output\n", "!wc -l *-intergenic \\\n", "| sed '28,$ d' \\\n", "| awk '{print $1\"\\t\"$2}' \\\n", "> zr3616_5x-intergenic-counts.txt" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 4i. lncRNA" ] }, { "cell_type": "code", "execution_count": 64, "metadata": {}, "outputs": [], "source": [ "%%bash\n", "for f in *bed\n", "do\n", " /opt/homebrew/bin/intersectBed \\\n", " -u \\\n", " -a ${f} \\\n", " -b ../../genome-feature-files/cgigas_uk_roslin_v1_lncRNA.gff \\\n", " > ${f}-lncRNA\n", "done" ] }, { "cell_type": "code", "execution_count": 65, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "==> zr3616_1_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-lncRNA <==\n", "NC_047559.1\t1664457\t1664459\n", "NC_047559.1\t1664745\t1664747\n", "NC_047559.1\t1664773\t1664775\n", "NC_047559.1\t1664902\t1664904\n", "NC_047559.1\t1665259\t1665261\n", "NC_047559.1\t1688362\t1688364\n", "NC_047559.1\t1688423\t1688425\n", "NC_047559.1\t1688433\t1688435\n", "NC_047559.1\t1688466\t1688468\n", "NC_047559.1\t2139066\t2139068\n", "\n", "==> zr3616_1_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-lncRNA <==\n", "NC_047559.1\t10899\t10901\n", "NC_047559.1\t255751\t255753\n", "NC_047559.1\t255789\t255791\n", "NC_047559.1\t416920\t416922\n", "NC_047559.1\t417337\t417339\n", "NC_047559.1\t418447\t418449\n", "NC_047559.1\t419514\t419516\n", "NC_047559.1\t786896\t786898\n", "NC_047559.1\t789201\t789203\n", "NC_047559.1\t789687\t789689\n", "\n", "==> zr3616_1_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-lncRNA <==\n", "NC_047559.1\t10270\t10272\n", "NC_047559.1\t10292\t10294\n", "NC_047559.1\t10314\t10316\n", "NC_047559.1\t10358\t10360\n", "NC_047559.1\t10380\t10382\n", "NC_047559.1\t10391\t10393\n", "NC_047559.1\t10402\t10404\n", "NC_047559.1\t10413\t10415\n", "NC_047559.1\t10457\t10459\n", "NC_047559.1\t10479\t10481\n", "\n", "==> zr3616_2_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-lncRNA <==\n", "NC_047559.1\t415945\t415947\n", "NC_047559.1\t417012\t417014\n", "NC_047559.1\t417944\t417946\n", "NC_047559.1\t418038\t418040\n", "NC_047559.1\t418118\t418120\n", "NC_047559.1\t418153\t418155\n", "NC_047559.1\t418166\t418168\n", "NC_047559.1\t418179\t418181\n", "NC_047559.1\t418551\t418553\n", "NC_047559.1\t418569\t418571\n", "\n", "==> zr3616_2_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-lncRNA <==\n", "NC_047559.1\t229096\t229098\n", "NC_047559.1\t415975\t415977\n", "NC_047559.1\t416078\t416080\n", "NC_047559.1\t416920\t416922\n", "NC_047559.1\t416993\t416995\n", "NC_047559.1\t417337\t417339\n", "NC_047559.1\t418447\t418449\n", "NC_047559.1\t418539\t418541\n", "NC_047559.1\t419514\t419516\n", "NC_047559.1\t788844\t788846\n", "\n", "==> zr3616_2_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-lncRNA <==\n", "NC_047559.1\t10292\t10294\n", "NC_047559.1\t10314\t10316\n", "NC_047559.1\t10380\t10382\n", "NC_047559.1\t10391\t10393\n", "NC_047559.1\t10402\t10404\n", "NC_047559.1\t10413\t10415\n", "NC_047559.1\t10457\t10459\n", "NC_047559.1\t10479\t10481\n", "NC_047559.1\t10501\t10503\n", "NC_047559.1\t10772\t10774\n", "\n", "==> zr3616_3_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-lncRNA <==\n", "NC_047559.1\t1332113\t1332115\n", "NC_047559.1\t1664459\t1664461\n", "NC_047559.1\t1664658\t1664660\n", "NC_047559.1\t1664745\t1664747\n", "NC_047559.1\t1664773\t1664775\n", "NC_047559.1\t1665111\t1665113\n", "NC_047559.1\t1665148\t1665150\n", "NC_047559.1\t1665259\t1665261\n", "NC_047559.1\t3828724\t3828726\n", "NC_047559.1\t3828793\t3828795\n", "\n", "==> zr3616_3_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-lncRNA <==\n", "NC_047559.1\t10501\t10503\n", "NC_047559.1\t242324\t242326\n", "NC_047559.1\t418447\t418449\n", "NC_047559.1\t418539\t418541\n", "NC_047559.1\t419514\t419516\n", "NC_047559.1\t788844\t788846\n", "NC_047559.1\t789733\t789735\n", "NC_047559.1\t789774\t789776\n", "NC_047559.1\t789880\t789882\n", "NC_047559.1\t789898\t789900\n", "\n", "==> zr3616_3_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-lncRNA <==\n", "NC_047559.1\t10358\t10360\n", "NC_047559.1\t10380\t10382\n", "NC_047559.1\t10391\t10393\n", "NC_047559.1\t10402\t10404\n", "NC_047559.1\t10413\t10415\n", "NC_047559.1\t10457\t10459\n", "NC_047559.1\t10479\t10481\n", "NC_047559.1\t10592\t10594\n", "NC_047559.1\t10612\t10614\n", "NC_047559.1\t10645\t10647\n", "\n", "==> zr3616_4_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-lncRNA <==\n", "NC_047559.1\t1664745\t1664747\n", "NC_047559.1\t1664773\t1664775\n", "NC_047559.1\t1665259\t1665261\n", "NC_047559.1\t3828724\t3828726\n", "NC_047559.1\t3828793\t3828795\n", "NC_047559.1\t3829511\t3829513\n", "NC_047559.1\t3829807\t3829809\n", "NC_047559.1\t3829827\t3829829\n", "NC_047559.1\t3829837\t3829839\n", "NC_047559.1\t3829843\t3829845\n", "\n", "==> zr3616_4_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-lncRNA <==\n", "NC_047559.1\t10292\t10294\n", "NC_047559.1\t10314\t10316\n", "NC_047559.1\t418447\t418449\n", "NC_047559.1\t788862\t788864\n", "NC_047559.1\t788889\t788891\n", "NC_047559.1\t1246663\t1246665\n", "NC_047559.1\t1247881\t1247883\n", "NC_047559.1\t1664902\t1664904\n", "NC_047559.1\t1665111\t1665113\n", "NC_047559.1\t1665377\t1665379\n", "\n", "==> zr3616_4_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-lncRNA <==\n", "NC_047559.1\t10270\t10272\n", "NC_047559.1\t10358\t10360\n", "NC_047559.1\t10380\t10382\n", "NC_047559.1\t10391\t10393\n", "NC_047559.1\t10402\t10404\n", "NC_047559.1\t10413\t10415\n", "NC_047559.1\t10457\t10459\n", "NC_047559.1\t10479\t10481\n", "NC_047559.1\t10501\t10503\n", "NC_047559.1\t10545\t10547\n", "\n", "==> zr3616_5_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-lncRNA <==\n", "NC_047559.1\t419514\t419516\n", "NC_047559.1\t1664773\t1664775\n", "NC_047559.1\t1664902\t1664904\n", "NC_047559.1\t1665111\t1665113\n", "NC_047559.1\t2911325\t2911327\n", "NC_047559.1\t3828724\t3828726\n", "NC_047559.1\t3828793\t3828795\n", "NC_047559.1\t3828838\t3828840\n", "NC_047559.1\t3829511\t3829513\n", "NC_047559.1\t3829807\t3829809\n", "\n", "==> zr3616_5_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-lncRNA <==\n", "NC_047559.1\t10479\t10481\n", "NC_047559.1\t416920\t416922\n", "NC_047559.1\t789880\t789882\n", "NC_047559.1\t789898\t789900\n", "NC_047559.1\t1332942\t1332944\n", "NC_047559.1\t1664032\t1664034\n", "NC_047559.1\t1664042\t1664044\n", "NC_047559.1\t1688423\t1688425\n", "NC_047559.1\t1688433\t1688435\n", "NC_047559.1\t1688466\t1688468\n", "\n", "==> zr3616_5_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-lncRNA <==\n", "NC_047559.1\t10270\t10272\n", "NC_047559.1\t10292\t10294\n", "NC_047559.1\t10314\t10316\n", "NC_047559.1\t10358\t10360\n", "NC_047559.1\t10380\t10382\n", "NC_047559.1\t10391\t10393\n", "NC_047559.1\t10402\t10404\n", "NC_047559.1\t10413\t10415\n", "NC_047559.1\t10457\t10459\n", "NC_047559.1\t10545\t10547\n", "\n", "==> zr3616_6_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-lncRNA <==\n", "NC_047559.1\t1664658\t1664660\n", "NC_047559.1\t1664745\t1664747\n", "NC_047559.1\t1664773\t1664775\n", "NC_047559.1\t1664902\t1664904\n", "NC_047559.1\t1665259\t1665261\n", "NC_047559.1\t2198255\t2198257\n", "NC_047559.1\t3828724\t3828726\n", "NC_047559.1\t3828793\t3828795\n", "NC_047559.1\t3829511\t3829513\n", "NC_047559.1\t3829807\t3829809\n", "\n", "==> zr3616_6_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-lncRNA <==\n", "NC_047559.1\t10358\t10360\n", "NC_047559.1\t415945\t415947\n", "NC_047559.1\t415957\t415959\n", "NC_047559.1\t415975\t415977\n", "NC_047559.1\t416920\t416922\n", "NC_047559.1\t417337\t417339\n", "NC_047559.1\t417944\t417946\n", "NC_047559.1\t419514\t419516\n", "NC_047559.1\t789774\t789776\n", "NC_047559.1\t789880\t789882\n", "\n", "==> zr3616_6_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-lncRNA <==\n", "NC_047559.1\t10292\t10294\n", "NC_047559.1\t10314\t10316\n", "NC_047559.1\t10380\t10382\n", "NC_047559.1\t10391\t10393\n", "NC_047559.1\t10457\t10459\n", "NC_047559.1\t10479\t10481\n", "NC_047559.1\t10501\t10503\n", "NC_047559.1\t10545\t10547\n", "NC_047559.1\t10547\t10549\n", "NC_047559.1\t10567\t10569\n", "\n", "==> zr3616_7_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-lncRNA <==\n", "NC_047559.1\t417337\t417339\n", "NC_047559.1\t419933\t419935\n", "NC_047559.1\t1664658\t1664660\n", "NC_047559.1\t1664745\t1664747\n", "NC_047559.1\t1664773\t1664775\n", "NC_047559.1\t1664902\t1664904\n", "NC_047559.1\t1665111\t1665113\n", "NC_047559.1\t1665259\t1665261\n", "NC_047559.1\t3828724\t3828726\n", "NC_047559.1\t3828793\t3828795\n", "\n", "==> zr3616_7_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-lncRNA <==\n", "NC_047559.1\t255462\t255464\n", "NC_047559.1\t418569\t418571\n", "NC_047559.1\t418582\t418584\n", "NC_047559.1\t419514\t419516\n", "NC_047559.1\t788844\t788846\n", "NC_047559.1\t788862\t788864\n", "NC_047559.1\t789687\t789689\n", "NC_047559.1\t789704\t789706\n", "NC_047559.1\t1248307\t1248309\n", "NC_047559.1\t1335958\t1335960\n", "\n", "==> zr3616_7_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-lncRNA <==\n", "NC_047559.1\t10155\t10157\n", "NC_047559.1\t10215\t10217\n", "NC_047559.1\t10270\t10272\n", "NC_047559.1\t10292\t10294\n", "NC_047559.1\t10314\t10316\n", "NC_047559.1\t10358\t10360\n", "NC_047559.1\t10380\t10382\n", "NC_047559.1\t10391\t10393\n", "NC_047559.1\t10402\t10404\n", "NC_047559.1\t10413\t10415\n", "\n", "==> zr3616_8_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-lncRNA <==\n", "NC_047559.1\t416078\t416080\n", "NC_047559.1\t419784\t419786\n", "NC_047559.1\t420805\t420807\n", "NC_047559.1\t1214427\t1214429\n", "NC_047559.1\t1214440\t1214442\n", "NC_047559.1\t1214479\t1214481\n", "NC_047559.1\t1664457\t1664459\n", "NC_047559.1\t1664745\t1664747\n", "NC_047559.1\t1664773\t1664775\n", "NC_047559.1\t1665111\t1665113\n", "\n", "==> zr3616_8_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-lncRNA <==\n", "NC_047559.1\t415739\t415741\n", "NC_047559.1\t415975\t415977\n", "NC_047559.1\t415988\t415990\n", "NC_047559.1\t415998\t416000\n", "NC_047559.1\t416052\t416054\n", "NC_047559.1\t416071\t416073\n", "NC_047559.1\t416088\t416090\n", "NC_047559.1\t419514\t419516\n", "NC_047559.1\t420583\t420585\n", "NC_047559.1\t420677\t420679\n", "\n", "==> zr3616_8_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-lncRNA <==\n", "NC_047559.1\t10270\t10272\n", "NC_047559.1\t10292\t10294\n", "NC_047559.1\t10314\t10316\n", "NC_047559.1\t10358\t10360\n", "NC_047559.1\t10380\t10382\n", "NC_047559.1\t10391\t10393\n", "NC_047559.1\t10402\t10404\n", "NC_047559.1\t10413\t10415\n", "NC_047559.1\t10457\t10459\n", "NC_047559.1\t10479\t10481\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "==> zr3616_union-averages_5x.bedgraph.NO-SNPs-Meth.bed-lncRNA <==\n", "NC_047559.1\t417944\t417946\n", "NC_047559.1\t418038\t418040\n", "NC_047559.1\t418118\t418120\n", "NC_047559.1\t418153\t418155\n", "NC_047559.1\t418166\t418168\n", "NC_047559.1\t418179\t418181\n", "NC_047559.1\t418646\t418648\n", "NC_047559.1\t418665\t418667\n", "NC_047559.1\t419784\t419786\n", "NC_047559.1\t419933\t419935\n", "\n", "==> zr3616_union-averages_5x.bedgraph.NO-SNPs-sparseMeth.bed-lncRNA <==\n", "NC_047559.1\t255751\t255753\n", "NC_047559.1\t255789\t255791\n", "NC_047559.1\t415945\t415947\n", "NC_047559.1\t415975\t415977\n", "NC_047559.1\t415998\t416000\n", "NC_047559.1\t416071\t416073\n", "NC_047559.1\t416078\t416080\n", "NC_047559.1\t416088\t416090\n", "NC_047559.1\t416920\t416922\n", "NC_047559.1\t416993\t416995\n", "\n", "==> zr3616_union-averages_5x.bedgraph.NO-SNPs-unMeth.bed-lncRNA <==\n", "NC_047559.1\t10155\t10157\n", "NC_047559.1\t10215\t10217\n", "NC_047559.1\t10270\t10272\n", "NC_047559.1\t10292\t10294\n", "NC_047559.1\t10314\t10316\n", "NC_047559.1\t10358\t10360\n", "NC_047559.1\t10380\t10382\n", "NC_047559.1\t10391\t10393\n", "NC_047559.1\t10402\t10404\n", "NC_047559.1\t10413\t10415\n" ] } ], "source": [ "#Check output\n", "!head *lncRNA" ] }, { "cell_type": "code", "execution_count": 66, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 16324 zr3616_1_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-lncRNA\n", " 24360 zr3616_1_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-lncRNA\n", " 215175 zr3616_1_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-lncRNA\n", " 15369 zr3616_2_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-lncRNA\n", " 24724 zr3616_2_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-lncRNA\n", " 218468 zr3616_2_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-lncRNA\n", " 15533 zr3616_3_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-lncRNA\n", " 23462 zr3616_3_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-lncRNA\n", " 217209 zr3616_3_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-lncRNA\n", " 16604 zr3616_4_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-lncRNA\n", " 25814 zr3616_4_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-lncRNA\n", " 221545 zr3616_4_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-lncRNA\n", " 14323 zr3616_5_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-lncRNA\n", " 21530 zr3616_5_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-lncRNA\n", " 206911 zr3616_5_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-lncRNA\n", " 15792 zr3616_6_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-lncRNA\n", " 23333 zr3616_6_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-lncRNA\n", " 217571 zr3616_6_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-lncRNA\n", " 16970 zr3616_7_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-lncRNA\n", " 22150 zr3616_7_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-lncRNA\n", " 211134 zr3616_7_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-lncRNA\n", " 15503 zr3616_8_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-lncRNA\n", " 22957 zr3616_8_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-lncRNA\n", " 210596 zr3616_8_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-lncRNA\n", " 18393 zr3616_union-averages_5x.bedgraph.NO-SNPs-Meth.bed-lncRNA\n", " 40024 zr3616_union-averages_5x.bedgraph.NO-SNPs-sparseMeth.bed-lncRNA\n", " 300543 zr3616_union-averages_5x.bedgraph.NO-SNPs-unMeth.bed-lncRNA\n", " 2392317 total\n" ] } ], "source": [ "#Count number of overlaps\n", "!wc -l *lncRNA" ] }, { "cell_type": "code", "execution_count": 67, "metadata": {}, "outputs": [], "source": [ "#Get line counts for each fine\n", "# Remove 28th line (total entries)\n", "#Ensure output is tab-delimited\n", "#Save output\n", "!wc -l *-lncRNA \\\n", "| sed '28,$ d' \\\n", "| awk '{print $1\"\\t\"$2}' \\\n", "> zr3616_5x-lncRNA-counts.txt" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 4j. Transposable elements" ] }, { "cell_type": "code", "execution_count": 68, "metadata": {}, "outputs": [], "source": [ "%%bash\n", "for f in *bed\n", "do\n", " /opt/homebrew/bin/intersectBed \\\n", " -u \\\n", " -a ${f} \\\n", " -b ../../genome-feature-files/cgigas_uk_roslin_v1_rm.te.bed \\\n", " > ${f}-TE\n", "done" ] }, { "cell_type": "code", "execution_count": 69, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "==> zr3616_1_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-TE <==\n", "NC_047559.1\t91258\t91260\n", "NC_047559.1\t91312\t91314\n", "NC_047559.1\t232127\t232129\n", "NC_047559.1\t234978\t234980\n", "NC_047559.1\t264885\t264887\n", "NC_047559.1\t264911\t264913\n", "NC_047559.1\t264924\t264926\n", "NC_047559.1\t293248\t293250\n", "NC_047559.1\t294921\t294923\n", "NC_047559.1\t294970\t294972\n", "\n", "==> zr3616_1_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-TE <==\n", "NC_047559.1\t26463\t26465\n", "NC_047559.1\t26485\t26487\n", "NC_047559.1\t26966\t26968\n", "NC_047559.1\t27211\t27213\n", "NC_047559.1\t44183\t44185\n", "NC_047559.1\t46646\t46648\n", "NC_047559.1\t47794\t47796\n", "NC_047559.1\t50864\t50866\n", "NC_047559.1\t50869\t50871\n", "NC_047559.1\t50878\t50880\n", "\n", "==> zr3616_1_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-TE <==\n", "NC_047559.1\t15746\t15748\n", "NC_047559.1\t24588\t24590\n", "NC_047559.1\t26419\t26421\n", "NC_047559.1\t26448\t26450\n", "NC_047559.1\t26509\t26511\n", "NC_047559.1\t26532\t26534\n", "NC_047559.1\t26547\t26549\n", "NC_047559.1\t26555\t26557\n", "NC_047559.1\t26561\t26563\n", "NC_047559.1\t26573\t26575\n", "\n", "==> zr3616_2_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-TE <==\n", "NC_047559.1\t52536\t52538\n", "NC_047559.1\t52564\t52566\n", "NC_047559.1\t140334\t140336\n", "NC_047559.1\t141800\t141802\n", "NC_047559.1\t141858\t141860\n", "NC_047559.1\t141894\t141896\n", "NC_047559.1\t291290\t291292\n", "NC_047559.1\t291319\t291321\n", "NC_047559.1\t292774\t292776\n", "NC_047559.1\t292780\t292782\n", "\n", "==> zr3616_2_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-TE <==\n", "NC_047559.1\t26966\t26968\n", "NC_047559.1\t38511\t38513\n", "NC_047559.1\t39496\t39498\n", "NC_047559.1\t42362\t42364\n", "NC_047559.1\t45139\t45141\n", "NC_047559.1\t47276\t47278\n", "NC_047559.1\t47282\t47284\n", "NC_047559.1\t49447\t49449\n", "NC_047559.1\t50869\t50871\n", "NC_047559.1\t50878\t50880\n", "\n", "==> zr3616_2_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-TE <==\n", "NC_047559.1\t15746\t15748\n", "NC_047559.1\t26419\t26421\n", "NC_047559.1\t26448\t26450\n", "NC_047559.1\t26463\t26465\n", "NC_047559.1\t26485\t26487\n", "NC_047559.1\t26509\t26511\n", "NC_047559.1\t26532\t26534\n", "NC_047559.1\t26547\t26549\n", "NC_047559.1\t26555\t26557\n", "NC_047559.1\t26561\t26563\n", "\n", "==> zr3616_3_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-TE <==\n", "NC_047559.1\t110811\t110813\n", "NC_047559.1\t160063\t160065\n", "NC_047559.1\t160111\t160113\n", "NC_047559.1\t160124\t160126\n", "NC_047559.1\t160131\t160133\n", "NC_047559.1\t160136\t160138\n", "NC_047559.1\t160142\t160144\n", "NC_047559.1\t160179\t160181\n", "NC_047559.1\t217636\t217638\n", "NC_047559.1\t232482\t232484\n", "\n", "==> zr3616_3_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-TE <==\n", "NC_047559.1\t38052\t38054\n", "NC_047559.1\t46678\t46680\n", "NC_047559.1\t50869\t50871\n", "NC_047559.1\t50878\t50880\n", "NC_047559.1\t50906\t50908\n", "NC_047559.1\t50911\t50913\n", "NC_047559.1\t50920\t50922\n", "NC_047559.1\t50927\t50929\n", "NC_047559.1\t50932\t50934\n", "NC_047559.1\t50941\t50943\n", "\n", "==> zr3616_3_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-TE <==\n", "NC_047559.1\t15746\t15748\n", "NC_047559.1\t24588\t24590\n", "NC_047559.1\t25485\t25487\n", "NC_047559.1\t25667\t25669\n", "NC_047559.1\t26135\t26137\n", "NC_047559.1\t26419\t26421\n", "NC_047559.1\t26448\t26450\n", "NC_047559.1\t26463\t26465\n", "NC_047559.1\t26485\t26487\n", "NC_047559.1\t26509\t26511\n", "\n", "==> zr3616_4_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-TE <==\n", "NC_047559.1\t60492\t60494\n", "NC_047559.1\t110795\t110797\n", "NC_047559.1\t110838\t110840\n", "NC_047559.1\t113151\t113153\n", "NC_047559.1\t188784\t188786\n", "NC_047559.1\t188796\t188798\n", "NC_047559.1\t188800\t188802\n", "NC_047559.1\t188813\t188815\n", "NC_047559.1\t262979\t262981\n", "NC_047559.1\t262982\t262984\n", "\n", "==> zr3616_4_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-TE <==\n", "NC_047559.1\t26448\t26450\n", "NC_047559.1\t26463\t26465\n", "NC_047559.1\t37587\t37589\n", "NC_047559.1\t42499\t42501\n", "NC_047559.1\t42696\t42698\n", "NC_047559.1\t42709\t42711\n", "NC_047559.1\t42715\t42717\n", "NC_047559.1\t42738\t42740\n", "NC_047559.1\t42762\t42764\n", "NC_047559.1\t49031\t49033\n", "\n", "==> zr3616_4_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-TE <==\n", "NC_047559.1\t15746\t15748\n", "NC_047559.1\t26419\t26421\n", "NC_047559.1\t26485\t26487\n", "NC_047559.1\t26509\t26511\n", "NC_047559.1\t26532\t26534\n", "NC_047559.1\t26547\t26549\n", "NC_047559.1\t26555\t26557\n", "NC_047559.1\t26561\t26563\n", "NC_047559.1\t26573\t26575\n", "NC_047559.1\t26591\t26593\n", "\n", "==> zr3616_5_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-TE <==\n", "NC_047559.1\t60371\t60373\n", "NC_047559.1\t68070\t68072\n", "NC_047559.1\t110838\t110840\n", "NC_047559.1\t140520\t140522\n", "NC_047559.1\t140539\t140541\n", "NC_047559.1\t140557\t140559\n", "NC_047559.1\t144127\t144129\n", "NC_047559.1\t144205\t144207\n", "NC_047559.1\t144210\t144212\n", "NC_047559.1\t154324\t154326\n", "\n", "==> zr3616_5_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-TE <==\n", "NC_047559.1\t38694\t38696\n", "NC_047559.1\t41242\t41244\n", "NC_047559.1\t41292\t41294\n", "NC_047559.1\t42362\t42364\n", "NC_047559.1\t42392\t42394\n", "NC_047559.1\t42410\t42412\n", "NC_047559.1\t42415\t42417\n", "NC_047559.1\t42468\t42470\n", "NC_047559.1\t42652\t42654\n", "NC_047559.1\t42715\t42717\n", "\n", "==> zr3616_5_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-TE <==\n", "NC_047559.1\t15746\t15748\n", "NC_047559.1\t24588\t24590\n", "NC_047559.1\t25485\t25487\n", "NC_047559.1\t26419\t26421\n", "NC_047559.1\t26448\t26450\n", "NC_047559.1\t26463\t26465\n", "NC_047559.1\t26485\t26487\n", "NC_047559.1\t26509\t26511\n", "NC_047559.1\t26532\t26534\n", "NC_047559.1\t26547\t26549\n", "\n", "==> zr3616_6_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-TE <==\n", "NC_047559.1\t90379\t90381\n", "NC_047559.1\t90383\t90385\n", "NC_047559.1\t140241\t140243\n", "NC_047559.1\t140257\t140259\n", "NC_047559.1\t140284\t140286\n", "NC_047559.1\t140286\t140288\n", "NC_047559.1\t140292\t140294\n", "NC_047559.1\t140295\t140297\n", "NC_047559.1\t140539\t140541\n", "NC_047559.1\t140557\t140559\n", "\n", "==> zr3616_6_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-TE <==\n", "NC_047559.1\t26463\t26465\n", "NC_047559.1\t26485\t26487\n", "NC_047559.1\t26509\t26511\n", "NC_047559.1\t44386\t44388\n", "NC_047559.1\t46646\t46648\n", "NC_047559.1\t48651\t48653\n", "NC_047559.1\t51099\t51101\n", "NC_047559.1\t51178\t51180\n", "NC_047559.1\t51242\t51244\n", "NC_047559.1\t51484\t51486\n", "\n", "==> zr3616_6_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-TE <==\n", "NC_047559.1\t15746\t15748\n", "NC_047559.1\t26419\t26421\n", "NC_047559.1\t26448\t26450\n", "NC_047559.1\t26532\t26534\n", "NC_047559.1\t26547\t26549\n", "NC_047559.1\t26555\t26557\n", "NC_047559.1\t26561\t26563\n", "NC_047559.1\t26573\t26575\n", "NC_047559.1\t26591\t26593\n", "NC_047559.1\t26593\t26595\n", "\n", "==> zr3616_7_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-TE <==\n", "NC_047559.1\t140241\t140243\n", "NC_047559.1\t140257\t140259\n", "NC_047559.1\t140284\t140286\n", "NC_047559.1\t140286\t140288\n", "NC_047559.1\t140292\t140294\n", "NC_047559.1\t140295\t140297\n", "NC_047559.1\t231630\t231632\n", "NC_047559.1\t231691\t231693\n", "NC_047559.1\t278433\t278435\n", "NC_047559.1\t278437\t278439\n", "\n", "==> zr3616_7_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-TE <==\n", "NC_047559.1\t26135\t26137\n", "NC_047559.1\t44918\t44920\n", "NC_047559.1\t45010\t45012\n", "NC_047559.1\t45103\t45105\n", "NC_047559.1\t45122\t45124\n", "NC_047559.1\t50848\t50850\n", "NC_047559.1\t50857\t50859\n", "NC_047559.1\t50864\t50866\n", "NC_047559.1\t50869\t50871\n", "NC_047559.1\t50878\t50880\n", "\n", "==> zr3616_7_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-TE <==\n", "NC_047559.1\t15746\t15748\n", "NC_047559.1\t24588\t24590\n", "NC_047559.1\t25485\t25487\n", "NC_047559.1\t25667\t25669\n", "NC_047559.1\t26419\t26421\n", "NC_047559.1\t26448\t26450\n", "NC_047559.1\t26463\t26465\n", "NC_047559.1\t26485\t26487\n", "NC_047559.1\t26509\t26511\n", "NC_047559.1\t26532\t26534\n", "\n", "==> zr3616_8_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-TE <==\n", "NC_047559.1\t56090\t56092\n", "NC_047559.1\t60492\t60494\n", "NC_047559.1\t68070\t68072\n", "NC_047559.1\t160063\t160065\n", "NC_047559.1\t160142\t160144\n", "NC_047559.1\t276024\t276026\n", "NC_047559.1\t295796\t295798\n", "NC_047559.1\t356378\t356380\n", "NC_047559.1\t356395\t356397\n", "NC_047559.1\t356430\t356432\n", "\n", "==> zr3616_8_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-TE <==\n", "NC_047559.1\t26419\t26421\n", "NC_047559.1\t26448\t26450\n", "NC_047559.1\t26463\t26465\n", "NC_047559.1\t27110\t27112\n", "NC_047559.1\t45010\t45012\n", "NC_047559.1\t49957\t49959\n", "NC_047559.1\t50821\t50823\n", "NC_047559.1\t50832\t50834\n", "NC_047559.1\t50843\t50845\n", "NC_047559.1\t50878\t50880\n", "\n", "==> zr3616_8_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-TE <==\n", "NC_047559.1\t15746\t15748\n", "NC_047559.1\t25485\t25487\n", "NC_047559.1\t26485\t26487\n", "NC_047559.1\t26509\t26511\n", "NC_047559.1\t26532\t26534\n", "NC_047559.1\t26547\t26549\n", "NC_047559.1\t26555\t26557\n", "NC_047559.1\t26561\t26563\n", "NC_047559.1\t26573\t26575\n", "NC_047559.1\t26591\t26593\n", "\n", "==> zr3616_union-averages_5x.bedgraph.NO-SNPs-Meth.bed-TE <==\n", "NC_047559.1\t110795\t110797\n", "NC_047559.1\t140284\t140286\n", "NC_047559.1\t140286\t140288\n", "NC_047559.1\t231630\t231632\n", "NC_047559.1\t231691\t231693\n", "NC_047559.1\t296198\t296200\n", "NC_047559.1\t310465\t310467\n", "NC_047559.1\t310528\t310530\n", "NC_047559.1\t311449\t311451\n", "NC_047559.1\t311476\t311478\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "==> zr3616_union-averages_5x.bedgraph.NO-SNPs-sparseMeth.bed-TE <==\n", "NC_047559.1\t45010\t45012\n", "NC_047559.1\t46646\t46648\n", "NC_047559.1\t47276\t47278\n", "NC_047559.1\t47794\t47796\n", "NC_047559.1\t50821\t50823\n", "NC_047559.1\t50848\t50850\n", "NC_047559.1\t50857\t50859\n", "NC_047559.1\t50864\t50866\n", "NC_047559.1\t50869\t50871\n", "NC_047559.1\t50878\t50880\n", "\n", "==> zr3616_union-averages_5x.bedgraph.NO-SNPs-unMeth.bed-TE <==\n", "NC_047559.1\t15746\t15748\n", "NC_047559.1\t24588\t24590\n", "NC_047559.1\t25485\t25487\n", "NC_047559.1\t25667\t25669\n", "NC_047559.1\t26135\t26137\n", "NC_047559.1\t26419\t26421\n", "NC_047559.1\t26448\t26450\n", "NC_047559.1\t26463\t26465\n", "NC_047559.1\t26485\t26487\n", "NC_047559.1\t26509\t26511\n" ] } ], "source": [ "#Check output\n", "!head *TE" ] }, { "cell_type": "code", "execution_count": 70, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 246906 zr3616_1_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-TE\n", " 345635 zr3616_1_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-TE\n", " 2341295 zr3616_1_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-TE\n", " 248977 zr3616_2_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-TE\n", " 352367 zr3616_2_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-TE\n", " 2353758 zr3616_2_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-TE\n", " 244808 zr3616_3_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-TE\n", " 334790 zr3616_3_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-TE\n", " 2329821 zr3616_3_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-TE\n", " 252579 zr3616_4_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-TE\n", " 365887 zr3616_4_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-TE\n", " 2452141 zr3616_4_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-TE\n", " 225902 zr3616_5_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-TE\n", " 314653 zr3616_5_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-TE\n", " 2208164 zr3616_5_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-TE\n", " 244767 zr3616_6_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-TE\n", " 347233 zr3616_6_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-TE\n", " 2382267 zr3616_6_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-TE\n", " 250436 zr3616_7_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-TE\n", " 325021 zr3616_7_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-TE\n", " 2235338 zr3616_7_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-TE\n", " 242278 zr3616_8_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-Meth.bed-TE\n", " 342100 zr3616_8_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-sparseMeth.bed-TE\n", " 2296255 zr3616_8_R1_val_1_val_1_val_1_bismark_bt2_pe._5x.bedgraph.NO-SNPs-unMeth.bed-TE\n", " 290272 zr3616_union-averages_5x.bedgraph.NO-SNPs-Meth.bed-TE\n", " 537496 zr3616_union-averages_5x.bedgraph.NO-SNPs-sparseMeth.bed-TE\n", " 3508620 zr3616_union-averages_5x.bedgraph.NO-SNPs-unMeth.bed-TE\n", " 27619766 total\n" ] } ], "source": [ "#Count number of overlaps\n", "!wc -l *TE" ] }, { "cell_type": "code", "execution_count": 71, "metadata": {}, "outputs": [], "source": [ "#Get line counts for each fine\n", "# Remove 28th line (total entries)\n", "#Ensure output is tab-delimited\n", "#Save output\n", "!wc -l *-TE \\\n", "| sed '28,$ d' \\\n", "| awk '{print $1\"\\t\"$2}' \\\n", "> zr3616_5x-TE-counts.txt" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "anaconda-cloud": {}, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.4" } }, "nbformat": 4, "nbformat_minor": 1 }