{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Genomic Location of DML" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "In this notebook, I will identify the genomic locations of [sex-specific DML identified with `methylKit`](https://github.com/epigeneticstoocean/2018_L18-adult-methylation/blob/main/code/03.4-methylkit.Rmd). " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 0. Set working directory" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'/Users/yaaminivenkataraman/Documents/ceabigr/code'" ] }, "execution_count": 1, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pwd" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "/Users/yaaminivenkataraman/Documents/ceabigr/output\n" ] } ], "source": [ "cd ../output/" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "mkdir: DML-characterization: File exists\r\n" ] } ], "source": [ "mkdir DML-characterization" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "/Users/yaaminivenkataraman/Documents/ceabigr/output/DML-characterization\n" ] } ], "source": [ "cd DML-characterization/" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "bedtoolsDirectory = \"/opt/homebrew/bin/\"" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 1. Remove C->T SNPs from DML" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "I will count how many DML overlap with SNPs, then remove those overlapping DML before proceeding with analyses." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 1a. Create BEDfile" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "!awk '{print $1\"\\t\"$2\"\\t\"$2}' ../../genome-features/unique-CT-SNPs.tab \\\n", "> ../../genome-features/unique-CT-SNPs.bed" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "NC_007175.2\t12774\t12774\n", "NC_007175.2\t15486\t15486\n", "NC_007175.2\t16441\t16441\n", "NC_007175.2\t2608\t2608\n", "NC_007175.2\t6075\t6075\n", "NC_007175.2\t6169\t6169\n", "NC_007175.2\t6742\t6742\n", "NC_007175.2\t7069\t7069\n", "NC_007175.2\t7089\t7089\n", "NC_007175.2\t7898\t7898\n", " 517245 ../../genome-features/unique-CT-SNPs.bed\n" ] } ], "source": [ "!head ../../genome-features/unique-CT-SNPs.bed\n", "!wc -l ../../genome-features/unique-CT-SNPs.bed" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 1b. Identify overlaps" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "scrolled": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "NC_035780.1\t402824\t402826\tf_DML\t50\n", "NC_035780.1\t21881895\t21881897\tf_DML\t-64\n", "NC_035780.1\t30125095\t30125097\tf_DML\t52\n", "NC_035780.1\t30763931\t30763933\tf_DML\t57\n", "NC_035780.1\t54165957\t54165959\tf_DML\t50\n", "NC_035780.1\t57958577\t57958579\tf_DML\t-56\n", "NC_035780.1\t58188856\t58188858\tf_DML\t-50\n", "NC_035780.1\t60369255\t60369257\tf_DML\t51\n", "NC_035780.1\t60372545\t60372547\tf_DML\t54\n", "NC_035781.1\t636330\t636332\tf_DML\t-51\n", " 89 ../../data/female_dml-NO-SNPs.bed\n" ] } ], "source": [ "#Remove SNPs from female DML list and save as a new file\n", "!{bedtoolsDirectory}subtractBed \\\n", "-a ../../data/female_dml.bed \\\n", "-b ../../genome-features/unique-CT-SNPs.bed \\\n", "> ../../data/female_dml-NO-SNPs.bed\n", "!head ../../data/female_dml-NO-SNPs.bed" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 128 ../../data/female_dml.bed\n", " 89 ../../data/female_dml-NO-SNPs.bed\n" ] } ], "source": [ "#Compare # DML with and without SNPs\n", "!wc -l ../../data/female_dml.bed\n", "!wc -l ../../data/female_dml-NO-SNPs.bed" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "scrolled": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "NC_035780.1\t250321\t250323\tm_DML\t-58\r\n", "NC_035780.1\t250344\t250346\tm_DML\t-59\r\n", "NC_035780.1\t250387\t250389\tm_DML\t-58\r\n", "NC_035780.1\t250394\t250396\tm_DML\t-58\r\n", "NC_035780.1\t250416\t250418\tm_DML\t-55\r\n", "NC_035780.1\t250425\t250427\tm_DML\t-52\r\n", "NC_035780.1\t250453\t250455\tm_DML\t-51\r\n", "NC_035780.1\t370644\t370646\tm_DML\t53\r\n", "NC_035780.1\t575312\t575314\tm_DML\t-51\r\n", "NC_035780.1\t575340\t575342\tm_DML\t57\r\n" ] } ], "source": [ "#Remove SNPs from male DML list and save as a new file\n", "!{bedtoolsDirectory}subtractBed \\\n", "-a ../../data/male_dml.bed \\\n", "-b ../../genome-features/unique-CT-SNPs.bed \\\n", "> ../../data/male_dml-NO-SNPs.bed\n", "!head ../../data/male_dml-NO-SNPs.bed" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 4175 ../../data/male_dml.bed\n", " 2916 ../../data/male_dml-NO-SNPs.bed\n" ] } ], "source": [ "#Compare # DML with and without SNPs\n", "!wc -l ../../data/male_dml.bed\n", "!wc -l ../../data/male_dml-NO-SNPs.bed" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "femaleDML = \"../../data/female_dml-NO-SNPs.bed\"" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "maleDML = \"../../data/male_dml-NO-SNPs.bed\"" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 43\n", " 46\n" ] } ], "source": [ "#Count the number of female hypomethylated DML\n", "#Count the number of female hypermethylated DML\n", "!grep \"-\" {femaleDML} | wc -l\n", "!grep -v \"-\" {femaleDML} | wc -l" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 1343\n", " 1573\n" ] } ], "source": [ "#Count the number of male hypomethylated DML\n", "#Count the number of male hypermethylated DML\n", "!grep \"-\" {maleDML} | wc -l\n", "!grep -v \"-\" {maleDML} | wc -l" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "NC_035781.1\t15563566\t15563568\tf_DML\t54\r\n" ] } ], "source": [ "#Look at common DML between female and male lists\n", "!{bedtoolsDirectory}intersectBed \\\n", "-u \\\n", "-a {femaleDML} \\\n", "-b {maleDML} \\" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 2. Characterize genomic locations of DML" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "I will look at overlaps between genome features and either female- or male-DML." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 3a. Gene" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "NC_035780.1\t402824\t402826\tf_DML\t50\n", "NC_035780.1\t21881895\t21881897\tf_DML\t-64\n", "NC_035780.1\t30125095\t30125097\tf_DML\t52\n", "NC_035780.1\t54165957\t54165959\tf_DML\t50\n", "NC_035780.1\t58188856\t58188858\tf_DML\t-50\n", "NC_035780.1\t60369255\t60369257\tf_DML\t51\n", "NC_035780.1\t60372545\t60372547\tf_DML\t54\n", "NC_035781.1\t636330\t636332\tf_DML\t-51\n", "NC_035781.1\t5269943\t5269945\tf_DML\t-50\n", "NC_035781.1\t14949530\t14949532\tf_DML\t-53\n", " 74 female_dml-Gene.bed\n" ] } ], "source": [ "#Find overlaps between DML and feature\n", "#Look at output\n", "#Count number of overlaps\n", "\n", "!{bedtoolsDirectory}intersectBed \\\n", "-u \\\n", "-a {femaleDML} \\\n", "-b ../../genome-features/C_virginica-3.0-gene.gff \\\n", "> female_dml-Gene.bed\n", "!head female_dml-Gene.bed\n", "!wc -l female_dml-Gene.bed" ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "NC_035780.1\t250321\t250323\tm_DML\t-58\n", "NC_035780.1\t250344\t250346\tm_DML\t-59\n", "NC_035780.1\t250387\t250389\tm_DML\t-58\n", "NC_035780.1\t250394\t250396\tm_DML\t-58\n", "NC_035780.1\t250416\t250418\tm_DML\t-55\n", "NC_035780.1\t250425\t250427\tm_DML\t-52\n", "NC_035780.1\t250453\t250455\tm_DML\t-51\n", "NC_035780.1\t370644\t370646\tm_DML\t53\n", "NC_035780.1\t575312\t575314\tm_DML\t-51\n", "NC_035780.1\t575340\t575342\tm_DML\t57\n", " 2322 male_dml-Gene.bed\n" ] } ], "source": [ "#Find overlaps between DML and feature\n", "#Look at output\n", "#Count number of overlaps\n", "\n", "!{bedtoolsDirectory}intersectBed \\\n", "-u \\\n", "-a {maleDML} \\\n", "-b ../../genome-features/C_virginica-3.0-gene.gff \\\n", "> male_dml-Gene.bed\n", "!head male_dml-Gene.bed\n", "!wc -l male_dml-Gene.bed" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 3b. Exon UTR" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "NC_035781.1\t42648337\t42648339\tf_DML\t50\n", "NC_035781.1\t52714896\t52714898\tf_DML\t54\n", "NC_035782.1\t45343464\t45343466\tf_DML\t50\n", "NC_035784.1\t29726386\t29726388\tf_DML\t-53\n", "NC_035784.1\t48340609\t48340611\tf_DML\t50\n", "NC_035784.1\t53502815\t53502817\tf_DML\t-52\n", "NC_035784.1\t87571843\t87571845\tf_DML\t52\n", "NC_035786.1\t49333359\t49333361\tf_DML\t-54\n", " 8 female_dml-exonUTR.bed\n" ] } ], "source": [ "#Find overlaps between DML and feature\n", "#Look at output\n", "#Count number of overlaps\n", "\n", "!{bedtoolsDirectory}intersectBed \\\n", "-u \\\n", "-a {femaleDML} \\\n", "-b ../../genome-features/C_virginica-3.0-exonUTR.gff \\\n", "> female_dml-exonUTR.bed\n", "!head female_dml-exonUTR.bed\n", "!wc -l female_dml-exonUTR.bed" ] }, { "cell_type": "code", "execution_count": 20, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "NC_035780.1\t370644\t370646\tm_DML\t53\n", "NC_035780.1\t7386872\t7386874\tm_DML\t53\n", "NC_035780.1\t11625096\t11625098\tm_DML\t51\n", "NC_035780.1\t19444899\t19444901\tm_DML\t-53\n", "NC_035780.1\t22111883\t22111885\tm_DML\t66\n", "NC_035780.1\t22242778\t22242780\tm_DML\t57\n", "NC_035780.1\t22513225\t22513227\tm_DML\t-65\n", "NC_035780.1\t23232584\t23232586\tm_DML\t-50\n", "NC_035780.1\t27560408\t27560410\tm_DML\t50\n", "NC_035780.1\t32417164\t32417166\tm_DML\t62\n", " 190 male_dml-exonUTR.bed\n" ] } ], "source": [ "#Find overlaps between DML and feature\n", "#Look at output\n", "#Count number of overlaps\n", "\n", "!{bedtoolsDirectory}intersectBed \\\n", "-u \\\n", "-a {maleDML} \\\n", "-b ../../genome-features/C_virginica-3.0-exonUTR.gff \\\n", "> male_dml-exonUTR.bed\n", "!head male_dml-exonUTR.bed\n", "!wc -l male_dml-exonUTR.bed" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 3c. CDS" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "NC_035780.1\t402824\t402826\tf_DML\t50\n", "NC_035780.1\t60372545\t60372547\tf_DML\t54\n", "NC_035781.1\t5269943\t5269945\tf_DML\t-50\n", "NC_035781.1\t14949676\t14949678\tf_DML\t-57\n", "NC_035781.1\t56223766\t56223768\tf_DML\t52\n", "NC_035782.1\t6826634\t6826636\tf_DML\t59\n", "NC_035782.1\t6881790\t6881792\tf_DML\t71\n", "NC_035782.1\t10168887\t10168889\tf_DML\t-51\n", "NC_035782.1\t51760228\t51760230\tf_DML\t58\n", "NC_035782.1\t53520805\t53520807\tf_DML\t-53\n", " 29 female_dml-CDS.bed\n" ] } ], "source": [ "#Find overlaps between DML and feature\n", "#Look at output\n", "#Count number of overlaps\n", "\n", "!{bedtoolsDirectory}intersectBed \\\n", "-u \\\n", "-a {femaleDML} \\\n", "-b ../../genome-features/C_virginica-3.0-CDS.gff \\\n", "> female_dml-CDS.bed\n", "!head female_dml-CDS.bed\n", "!wc -l female_dml-CDS.bed" ] }, { "cell_type": "code", "execution_count": 22, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "NC_035780.1\t250321\t250323\tm_DML\t-58\n", "NC_035780.1\t250344\t250346\tm_DML\t-59\n", "NC_035780.1\t250387\t250389\tm_DML\t-58\n", "NC_035780.1\t250394\t250396\tm_DML\t-58\n", "NC_035780.1\t250416\t250418\tm_DML\t-55\n", "NC_035780.1\t250425\t250427\tm_DML\t-52\n", "NC_035780.1\t250453\t250455\tm_DML\t-51\n", "NC_035780.1\t575312\t575314\tm_DML\t-51\n", "NC_035780.1\t575340\t575342\tm_DML\t57\n", "NC_035780.1\t2764956\t2764958\tm_DML\t60\n", " 815 male_dml-CDS.bed\n" ] } ], "source": [ "#Find overlaps between DML and feature\n", "#Look at output\n", "#Count number of overlaps\n", "\n", "!{bedtoolsDirectory}intersectBed \\\n", "-u \\\n", "-a {maleDML} \\\n", "-b ../../genome-features/C_virginica-3.0-CDS.gff \\\n", "> male_dml-CDS.bed\n", "!head male_dml-CDS.bed\n", "!wc -l male_dml-CDS.bed" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 3d. Intron" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "NC_035780.1\t21881895\t21881897\tf_DML\t-64\n", "NC_035780.1\t30125095\t30125097\tf_DML\t52\n", "NC_035780.1\t54165957\t54165959\tf_DML\t50\n", "NC_035780.1\t58188856\t58188858\tf_DML\t-50\n", "NC_035780.1\t60369255\t60369257\tf_DML\t51\n", "NC_035781.1\t636330\t636332\tf_DML\t-51\n", "NC_035781.1\t14949530\t14949532\tf_DML\t-53\n", "NC_035781.1\t15554538\t15554540\tf_DML\t53\n", "NC_035781.1\t15563566\t15563568\tf_DML\t54\n", "NC_035781.1\t30802003\t30802005\tf_DML\t-50\n", " 37 female_dml-intron.bed\n" ] } ], "source": [ "#Find overlaps between DML and feature\n", "#Look at output\n", "#Count number of overlaps\n", "\n", "!{bedtoolsDirectory}intersectBed \\\n", "-u \\\n", "-a {femaleDML} \\\n", "-b ../../genome-features/C_virginica-3.0-intron.bed \\\n", "> female_dml-intron.bed\n", "!head female_dml-intron.bed\n", "!wc -l female_dml-intron.bed" ] }, { "cell_type": "code", "execution_count": 24, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "NC_035780.1\t650896\t650898\tm_DML\t-55\n", "NC_035780.1\t778287\t778289\tm_DML\t59\n", "NC_035780.1\t986330\t986332\tm_DML\t-58\n", "NC_035780.1\t1937854\t1937856\tm_DML\t52\n", "NC_035780.1\t1974719\t1974721\tm_DML\t-52\n", "NC_035780.1\t1980627\t1980629\tm_DML\t-54\n", "NC_035780.1\t5130167\t5130169\tm_DML\t-50\n", "NC_035780.1\t6331828\t6331830\tm_DML\t-77\n", "NC_035780.1\t6395902\t6395904\tm_DML\t-57\n", "NC_035780.1\t6443038\t6443040\tm_DML\t64\n", " 1332 male_dml-intron.bed\n" ] } ], "source": [ "#Find overlaps between DML and feature\n", "#Look at output\n", "#Count number of overlaps\n", "\n", "!{bedtoolsDirectory}intersectBed \\\n", "-u \\\n", "-a {maleDML} \\\n", "-b ../../genome-features/C_virginica-3.0-intron.bed \\\n", "> male_dml-intron.bed\n", "!head male_dml-intron.bed\n", "!wc -l male_dml-intron.bed" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 3e. Upstream flanks" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "NC_035780.1\t57958577\t57958579\tf_DML\t-56\n", "NC_035784.1\t7603975\t7603977\tf_DML\t51\n", "NC_035784.1\t7604023\t7604025\tf_DML\t56\n", "NC_035787.1\t8623589\t8623591\tf_DML\t52\n", " 4 female_dml-upstream.bed\n" ] } ], "source": [ "#Find overlaps between DML and feature\n", "#Look at output\n", "#Count number of overlaps\n", "\n", "!{bedtoolsDirectory}intersectBed \\\n", "-u \\\n", "-a {femaleDML} \\\n", "-b ../../genome-features/C_virginica-3.0-upstream.gff \\\n", "> female_dml-upstream.bed\n", "!head female_dml-upstream.bed\n", "!wc -l female_dml-upstream.bed" ] }, { "cell_type": "code", "execution_count": 26, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "NC_035780.1\t28589885\t28589887\tm_DML\t-58\n", "NC_035780.1\t54937656\t54937658\tm_DML\t53\n", "NC_035780.1\t57003735\t57003737\tm_DML\t52\n", "NC_035780.1\t59709480\t59709482\tm_DML\t-52\n", "NC_035780.1\t59709495\t59709497\tm_DML\t-51\n", "NC_035780.1\t63649627\t63649629\tm_DML\t-54\n", "NC_035781.1\t4734755\t4734757\tm_DML\t-55\n", "NC_035781.1\t4936982\t4936984\tm_DML\t50\n", "NC_035781.1\t5524116\t5524118\tm_DML\t-58\n", "NC_035781.1\t21181616\t21181618\tm_DML\t55\n", " 60 male_dml-upstream.bed\n" ] } ], "source": [ "#Find overlaps between DML and feature\n", "#Look at output\n", "#Count number of overlaps\n", "\n", "!{bedtoolsDirectory}intersectBed \\\n", "-u \\\n", "-a {maleDML} \\\n", "-b ../../genome-features/C_virginica-3.0-upstream.gff \\\n", "> male_dml-upstream.bed\n", "!head male_dml-upstream.bed\n", "!wc -l male_dml-upstream.bed" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 3f. Downstream flanks" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "NC_035780.1\t30763931\t30763933\tf_DML\t57\n", "NC_035783.1\t44976930\t44976932\tf_DML\t50\n", "NC_035783.1\t44976937\t44976939\tf_DML\t55\n", "NC_035787.1\t60378685\t60378687\tf_DML\t-52\n", " 4 female_dml-downstream.bed\n" ] } ], "source": [ "#Find overlaps between DML and feature\n", "#Look at output\n", "#Count number of overlaps\n", "\n", "!{bedtoolsDirectory}intersectBed \\\n", "-u \\\n", "-a {femaleDML} \\\n", "-b ../../genome-features/C_virginica-3.0-downstream.gff \\\n", "> female_dml-downstream.bed\n", "!head female_dml-downstream.bed\n", "!wc -l female_dml-downstream.bed" ] }, { "cell_type": "code", "execution_count": 28, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "NC_035780.1\t776947\t776949\tm_DML\t63\n", "NC_035780.1\t2664616\t2664618\tm_DML\t-53\n", "NC_035780.1\t4091045\t4091047\tm_DML\t59\n", "NC_035780.1\t22028662\t22028664\tm_DML\t56\n", "NC_035780.1\t22638474\t22638476\tm_DML\t-51\n", "NC_035780.1\t25190574\t25190576\tm_DML\t52\n", "NC_035780.1\t27853669\t27853671\tm_DML\t-51\n", "NC_035780.1\t29476407\t29476409\tm_DML\t-55\n", "NC_035780.1\t54184794\t54184796\tm_DML\t-50\n", "NC_035780.1\t54937656\t54937658\tm_DML\t53\n", " 163 male_dml-downstream.bed\n" ] } ], "source": [ "#Find overlaps between DML and feature\n", "#Look at output\n", "#Count number of overlaps\n", "\n", "!{bedtoolsDirectory}intersectBed \\\n", "-u \\\n", "-a {maleDML} \\\n", "-b ../../genome-features/C_virginica-3.0-downstream.gff \\\n", "> male_dml-downstream.bed\n", "!head male_dml-downstream.bed\n", "!wc -l male_dml-downstream.bed" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 3g. Intergenic regions" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "NC_035781.1\t2421607\t2421609\tf_DML\t-52\n", "NC_035781.1\t27429566\t27429568\tf_DML\t50\n", "NC_035782.1\t50870537\t50870539\tf_DML\t52\n", "NC_035783.1\t30132228\t30132230\tf_DML\t-56\n", "NC_035784.1\t36924757\t36924759\tf_DML\t50\n", "NC_035784.1\t44806170\t44806172\tf_DML\t52\n", "NC_035786.1\t56643307\t56643309\tf_DML\t-52\n", " 7 female_dml-intergenic.bed\n" ] } ], "source": [ "#Find overlaps between DML and feature\n", "#Look at output\n", "#Count number of overlaps\n", "\n", "!{bedtoolsDirectory}intersectBed \\\n", "-u \\\n", "-a {femaleDML} \\\n", "-b ../../genome-features/C_virginica-3.0-intergenic.bed \\\n", "> female_dml-intergenic.bed\n", "!head female_dml-intergenic.bed\n", "!wc -l female_dml-intergenic.bed" ] }, { "cell_type": "code", "execution_count": 30, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "NC_035780.1\t2711286\t2711288\tm_DML\t61\n", "NC_035780.1\t10949623\t10949625\tm_DML\t50\n", "NC_035780.1\t15504176\t15504178\tm_DML\t52\n", "NC_035780.1\t15504590\t15504592\tm_DML\t53\n", "NC_035780.1\t22425661\t22425663\tm_DML\t-56\n", "NC_035780.1\t28249318\t28249320\tm_DML\t-52\n", "NC_035780.1\t29657745\t29657747\tm_DML\t52\n", "NC_035780.1\t33170289\t33170291\tm_DML\t-54\n", "NC_035780.1\t37999471\t37999473\tm_DML\t-54\n", "NC_035780.1\t39062735\t39062737\tm_DML\t50\n", " 379 male_dml-intergenic.bed\n" ] } ], "source": [ "#Find overlaps between DML and feature\n", "#Look at output\n", "#Count number of overlaps\n", "\n", "!{bedtoolsDirectory}intersectBed \\\n", "-u \\\n", "-a {maleDML} \\\n", "-b ../../genome-features/C_virginica-3.0-intergenic.bed \\\n", "> male_dml-intergenic.bed\n", "!head male_dml-intergenic.bed\n", "!wc -l male_dml-intergenic.bed" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 3h. lncRNA" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "NC_035784.1\t29726386\t29726388\tf_DML\t-53\n", " 1 female_dml-lncRNA.bed\n" ] } ], "source": [ "#Find overlaps between DML and feature\n", "#Look at output\n", "#Count number of overlaps\n", "\n", "!{bedtoolsDirectory}intersectBed \\\n", "-u \\\n", "-a {femaleDML} \\\n", "-b ../../genome-features/C_virginica-3.0-lncRNA.gff \\\n", "> female_dml-lncRNA.bed\n", "!head female_dml-lncRNA.bed\n", "!wc -l female_dml-lncRNA.bed" ] }, { "cell_type": "code", "execution_count": 32, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "NC_035780.1\t22242778\t22242780\tm_DML\t57\n", "NC_035780.1\t22513225\t22513227\tm_DML\t-65\n", "NC_035780.1\t52991473\t52991475\tm_DML\t-53\n", "NC_035781.1\t5827818\t5827820\tm_DML\t-52\n", "NC_035781.1\t19392093\t19392095\tm_DML\t56\n", "NC_035781.1\t19392651\t19392653\tm_DML\t-53\n", "NC_035781.1\t26864443\t26864445\tm_DML\t52\n", "NC_035781.1\t26865400\t26865402\tm_DML\t63\n", "NC_035781.1\t27512492\t27512494\tm_DML\t-60\n", "NC_035781.1\t27512507\t27512509\tm_DML\t-64\n", " 52 male_dml-lncRNA.bed\n" ] } ], "source": [ "#Find overlaps between DML and feature\n", "#Look at output\n", "#Count number of overlaps\n", "\n", "!{bedtoolsDirectory}intersectBed \\\n", "-u \\\n", "-a {maleDML} \\\n", "-b ../../genome-features/C_virginica-3.0-lncRNA.gff \\\n", "> male_dml-lncRNA.bed\n", "!head male_dml-lncRNA.bed\n", "!wc -l male_dml-lncRNA.bed" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 3i. Tranposable elements" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "NC_035780.1\t30763931\t30763933\tf_DML\t57\n", " 1 female_dml-TE.bed\n" ] } ], "source": [ "#Find overlaps between DML and feature\n", "#Look at output\n", "#Count number of overlaps\n", "\n", "!{bedtoolsDirectory}intersectBed \\\n", "-u \\\n", "-a {femaleDML} \\\n", "-b ../../genome-features/C_virginica-3.0-rm.te.bed \\\n", "> female_dml-TE.bed\n", "!head female_dml-TE.bed\n", "!wc -l female_dml-TE.bed" ] }, { "cell_type": "code", "execution_count": 34, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "NC_035780.1\t20541717\t20541719\tm_DML\t-50\n", "NC_035780.1\t33170289\t33170291\tm_DML\t-54\n", "NC_035780.1\t33176472\t33176474\tm_DML\t-61\n", "NC_035780.1\t57003735\t57003737\tm_DML\t52\n", "NC_035780.1\t58223495\t58223497\tm_DML\t50\n", "NC_035781.1\t2464874\t2464876\tm_DML\t-92\n", "NC_035781.1\t3546040\t3546042\tm_DML\t54\n", "NC_035781.1\t7422274\t7422276\tm_DML\t53\n", "NC_035781.1\t22970645\t22970647\tm_DML\t67\n", "NC_035781.1\t24392776\t24392778\tm_DML\t58\n", " 144 male_dml-TE.bed\n" ] } ], "source": [ "#Find overlaps between DML and feature\n", "#Look at output\n", "#Count number of overlaps\n", "\n", "!{bedtoolsDirectory}intersectBed \\\n", "-u \\\n", "-a {maleDML} \\\n", "-b ../../genome-features/C_virginica-3.0-rm.te.bed \\\n", "> male_dml-TE.bed\n", "!head male_dml-TE.bed\n", "!wc -l male_dml-TE.bed" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 4. Combine line counts" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "This will make it easier for downstream analysis." ] }, { "cell_type": "code", "execution_count": 35, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "female_dml-CDS.bed\r\n", "female_dml-Gene.bed\r\n", "female_dml-TE.bed\r\n", "female_dml-downstream.bed\r\n", "female_dml-exonUTR.bed\r\n", "female_dml-intergenic.bed\r\n", "female_dml-intron.bed\r\n", "female_dml-lncRNA.bed\r\n", "female_dml-upstream.bed\r\n" ] } ], "source": [ "!find female_dml*bed" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "29\tfemale_dml-CDS.bed\r\n", "74\tfemale_dml-Gene.bed\r\n", "1\tfemale_dml-TE.bed\r\n", "4\tfemale_dml-downstream.bed\r\n", "8\tfemale_dml-exonUTR.bed\r\n", "7\tfemale_dml-intergenic.bed\r\n", "37\tfemale_dml-intron.bed\r\n", "1\tfemale_dml-lncRNA.bed\r\n", "4\tfemale_dml-upstream.bed\r\n" ] } ], "source": [ "#Get line count for all DML overlap files\n", "#Remove the 10th line (total entries)\n", "#Print in a tab-delimited format\n", "#Save output\n", "\n", "!wc -l female_dml*bed \\\n", "| sed '10,$ d' \\\n", "| awk '{print $1\"\\t\"$2}' \\\n", "> female_dml-Overlap-counts.txt\n", "!head female_dml-Overlap-counts.txt" ] }, { "cell_type": "code", "execution_count": 37, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "male_dml-CDS.bed\r\n", "male_dml-Gene.bed\r\n", "male_dml-TE.bed\r\n", "male_dml-downstream.bed\r\n", "male_dml-exonUTR.bed\r\n", "male_dml-intergenic.bed\r\n", "male_dml-intron.bed\r\n", "male_dml-lncRNA.bed\r\n", "male_dml-upstream.bed\r\n" ] } ], "source": [ "!find male_dml*bed" ] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "815\tmale_dml-CDS.bed\r\n", "2322\tmale_dml-Gene.bed\r\n", "144\tmale_dml-TE.bed\r\n", "163\tmale_dml-downstream.bed\r\n", "190\tmale_dml-exonUTR.bed\r\n", "379\tmale_dml-intergenic.bed\r\n", "1332\tmale_dml-intron.bed\r\n", "52\tmale_dml-lncRNA.bed\r\n", "60\tmale_dml-upstream.bed\r\n" ] } ], "source": [ "#Get line count for all DML overlap files\n", "#Remove the 10th line (total entries)\n", "#Print in a tab-delimited format\n", "#Save output\n", "\n", "!wc -l male_dml*bed \\\n", "| sed '10,$ d' \\\n", "| awk '{print $1\"\\t\"$2}' \\\n", "> male_dml-Overlap-counts.txt\n", "!head male_dml-Overlap-counts.txt" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "anaconda-cloud": {}, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.4" } }, "nbformat": 4, "nbformat_minor": 1 }