{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Comparing Bismark alignments from two WGBS samples with and without crude trimming and how they relate to _P.generosa_ genome-wide CpG coverage"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"##### Variables to be set by user\n",
"\n",
"Re: two \"work_dir\" variables -\n",
"Both are needed, as one is needed by Bash and the other by Python (respectively)"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"env: work_dir=/Users/strigg/Documents/GitHub/Shelly_Pgenerosa/analyses/WGBS_conditionedBroodstockHemolymph/20190503_coverage_analysis\n"
]
}
],
"source": [
"%env work_dir = /Users/strigg/Documents/GitHub/Shelly_Pgenerosa/analyses/WGBS_conditionedBroodstockHemolymph/20190503_coverage_analysis\n",
"work_dir = \"/Users/strigg/Documents/GitHub/Shelly_Pgenerosa/analyses/WGBS_conditionedBroodstockHemolymph/20190503_coverage_analysis\"\n",
"output_plot = \"20190503_Pgenr_cov_comparison.png\""
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"##### Import necessary modules"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"import pandas\n",
"import os\n",
"import numpy\n",
"from IPython.display import display\n",
"import matplotlib.pyplot as plt"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"##### Make new working directory, download files and rename using wget ```--output-document``` argument"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"/Users/strigg/Documents/GitHub/Shelly_Pgenerosa/analyses/WGBS_conditionedBroodstockHemolymph/20190503_coverage_analysis\n"
]
}
],
"source": [
"cd $work_dir"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Loop through coverage files to calculate percent sequencing coverage for each Bismark subset\n",
"\n",
"** had to unzip my files first (gunzip *CpG_report.txt.gz)"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['/Volumes/web/metacarcinus/Pgenerosa/20190415_10K/Tank2-025-026_cytosine_CpG_cov_report.CpG_report.txt', '/Volumes/web/metacarcinus/Pgenerosa/20190415_10K/Tank2-025-026_trim_cytosine_CpG_cov_report.CpG_report.txt', '/Volumes/web/metacarcinus/Pgenerosa/20190415_10K/Tank3-15-16_cytosine_CpG_cov_report.CpG_report.txt', '/Volumes/web/metacarcinus/Pgenerosa/20190415_10K/Tank3-15-16_trim_cytosine_CpG_cov_report.CpG_report.txt']\n",
"----------------------------------------------\n",
"----------------------------------------------\n",
"/Volumes/web/metacarcinus/Pgenerosa/20190415_10K/Tank2-025-026_cytosine_CpG_cov_report.CpG_report.txt\n"
]
},
{
"data": {
"text/plain": [
"'Total coverage: 37135668'"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"'3x coverage: 24908042'"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"'5x coverage: 13436327'"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"'10x coverage: 2375356'"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"'Mean coverage: 3.6'"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"'No coverage: 11040954'"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"'Percent coverage: 77.1'"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"'Percent 3x coverage: 51.7'"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"'Percent 5x coverage: 27.9'"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"'Percent 10x coverage: 4.9'"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" chrom | \n",
" pos | \n",
" strand | \n",
" meth | \n",
" unmeth | \n",
" C-context | \n",
" trinucleotide | \n",
" coverage | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" PGA_scaffold1__77_contigs__length_89643857 | \n",
" 4 | \n",
" + | \n",
" 0 | \n",
" 1 | \n",
" CG | \n",
" CGC | \n",
" 1 | \n",
"
\n",
" \n",
" 1 | \n",
" PGA_scaffold1__77_contigs__length_89643857 | \n",
" 5 | \n",
" - | \n",
" 0 | \n",
" 0 | \n",
" CG | \n",
" CGC | \n",
" 0 | \n",
"
\n",
" \n",
" 2 | \n",
" PGA_scaffold1__77_contigs__length_89643857 | \n",
" 18 | \n",
" + | \n",
" 0 | \n",
" 1 | \n",
" CG | \n",
" CGG | \n",
" 1 | \n",
"
\n",
" \n",
" 3 | \n",
" PGA_scaffold1__77_contigs__length_89643857 | \n",
" 19 | \n",
" - | \n",
" 0 | \n",
" 0 | \n",
" CG | \n",
" CGG | \n",
" 0 | \n",
"
\n",
" \n",
" 4 | \n",
" PGA_scaffold1__77_contigs__length_89643857 | \n",
" 54 | \n",
" + | \n",
" 0 | \n",
" 1 | \n",
" CG | \n",
" CGT | \n",
" 1 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" chrom pos strand meth unmeth \\\n",
"0 PGA_scaffold1__77_contigs__length_89643857 4 + 0 1 \n",
"1 PGA_scaffold1__77_contigs__length_89643857 5 - 0 0 \n",
"2 PGA_scaffold1__77_contigs__length_89643857 18 + 0 1 \n",
"3 PGA_scaffold1__77_contigs__length_89643857 19 - 0 0 \n",
"4 PGA_scaffold1__77_contigs__length_89643857 54 + 0 1 \n",
"\n",
" C-context trinucleotide coverage \n",
"0 CG CGC 1 \n",
"1 CG CGC 0 \n",
"2 CG CGG 1 \n",
"3 CG CGG 0 \n",
"4 CG CGT 1 "
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"----------------------------------------------\n",
"----------------------------------------------\n",
"/Volumes/web/metacarcinus/Pgenerosa/20190415_10K/Tank2-025-026_trim_cytosine_CpG_cov_report.CpG_report.txt\n"
]
},
{
"data": {
"text/plain": [
"'Total coverage: 34605633'"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"'3x coverage: 18676350'"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"'5x coverage: 7733102'"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"'10x coverage: 1082497'"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"'Mean coverage: 2.6'"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"'No coverage: 13570989'"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"'Percent coverage: 71.8'"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"'Percent 3x coverage: 38.8'"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"'Percent 5x coverage: 16.1'"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"'Percent 10x coverage: 2.2'"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" chrom | \n",
" pos | \n",
" strand | \n",
" meth | \n",
" unmeth | \n",
" C-context | \n",
" trinucleotide | \n",
" coverage | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" PGA_scaffold1__77_contigs__length_89643857 | \n",
" 4 | \n",
" + | \n",
" 0 | \n",
" 0 | \n",
" CG | \n",
" CGC | \n",
" 0 | \n",
"
\n",
" \n",
" 1 | \n",
" PGA_scaffold1__77_contigs__length_89643857 | \n",
" 5 | \n",
" - | \n",
" 1 | \n",
" 0 | \n",
" CG | \n",
" CGC | \n",
" 1 | \n",
"
\n",
" \n",
" 2 | \n",
" PGA_scaffold1__77_contigs__length_89643857 | \n",
" 18 | \n",
" + | \n",
" 0 | \n",
" 1 | \n",
" CG | \n",
" CGG | \n",
" 1 | \n",
"
\n",
" \n",
" 3 | \n",
" PGA_scaffold1__77_contigs__length_89643857 | \n",
" 19 | \n",
" - | \n",
" 1 | \n",
" 0 | \n",
" CG | \n",
" CGG | \n",
" 1 | \n",
"
\n",
" \n",
" 4 | \n",
" PGA_scaffold1__77_contigs__length_89643857 | \n",
" 54 | \n",
" + | \n",
" 0 | \n",
" 1 | \n",
" CG | \n",
" CGT | \n",
" 1 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" chrom pos strand meth unmeth \\\n",
"0 PGA_scaffold1__77_contigs__length_89643857 4 + 0 0 \n",
"1 PGA_scaffold1__77_contigs__length_89643857 5 - 1 0 \n",
"2 PGA_scaffold1__77_contigs__length_89643857 18 + 0 1 \n",
"3 PGA_scaffold1__77_contigs__length_89643857 19 - 1 0 \n",
"4 PGA_scaffold1__77_contigs__length_89643857 54 + 0 1 \n",
"\n",
" C-context trinucleotide coverage \n",
"0 CG CGC 0 \n",
"1 CG CGC 1 \n",
"2 CG CGG 1 \n",
"3 CG CGG 1 \n",
"4 CG CGT 1 "
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"----------------------------------------------\n",
"----------------------------------------------\n",
"/Volumes/web/metacarcinus/Pgenerosa/20190415_10K/Tank3-15-16_cytosine_CpG_cov_report.CpG_report.txt\n"
]
},
{
"data": {
"text/plain": [
"'Total coverage: 35240668'"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"'3x coverage: 20301942'"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"'5x coverage: 9022441'"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"'10x coverage: 1266479'"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"'Mean coverage: 2.8'"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"'No coverage: 12935954'"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"'Percent coverage: 73.1'"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"'Percent 3x coverage: 42.1'"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"'Percent 5x coverage: 18.7'"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"'Percent 10x coverage: 2.6'"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" chrom | \n",
" pos | \n",
" strand | \n",
" meth | \n",
" unmeth | \n",
" C-context | \n",
" trinucleotide | \n",
" coverage | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" PGA_scaffold1__77_contigs__length_89643857 | \n",
" 4 | \n",
" + | \n",
" 0 | \n",
" 0 | \n",
" CG | \n",
" CGC | \n",
" 0 | \n",
"
\n",
" \n",
" 1 | \n",
" PGA_scaffold1__77_contigs__length_89643857 | \n",
" 5 | \n",
" - | \n",
" 0 | \n",
" 0 | \n",
" CG | \n",
" CGC | \n",
" 0 | \n",
"
\n",
" \n",
" 2 | \n",
" PGA_scaffold1__77_contigs__length_89643857 | \n",
" 18 | \n",
" + | \n",
" 0 | \n",
" 1 | \n",
" CG | \n",
" CGG | \n",
" 1 | \n",
"
\n",
" \n",
" 3 | \n",
" PGA_scaffold1__77_contigs__length_89643857 | \n",
" 19 | \n",
" - | \n",
" 0 | \n",
" 0 | \n",
" CG | \n",
" CGG | \n",
" 0 | \n",
"
\n",
" \n",
" 4 | \n",
" PGA_scaffold1__77_contigs__length_89643857 | \n",
" 54 | \n",
" + | \n",
" 0 | \n",
" 1 | \n",
" CG | \n",
" CGT | \n",
" 1 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" chrom pos strand meth unmeth \\\n",
"0 PGA_scaffold1__77_contigs__length_89643857 4 + 0 0 \n",
"1 PGA_scaffold1__77_contigs__length_89643857 5 - 0 0 \n",
"2 PGA_scaffold1__77_contigs__length_89643857 18 + 0 1 \n",
"3 PGA_scaffold1__77_contigs__length_89643857 19 - 0 0 \n",
"4 PGA_scaffold1__77_contigs__length_89643857 54 + 0 1 \n",
"\n",
" C-context trinucleotide coverage \n",
"0 CG CGC 0 \n",
"1 CG CGC 0 \n",
"2 CG CGG 1 \n",
"3 CG CGG 0 \n",
"4 CG CGT 1 "
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"----------------------------------------------\n",
"----------------------------------------------\n",
"/Volumes/web/metacarcinus/Pgenerosa/20190415_10K/Tank3-15-16_trim_cytosine_CpG_cov_report.CpG_report.txt\n"
]
},
{
"data": {
"text/plain": [
"'Total coverage: 32124743'"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"'3x coverage: 14065848'"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"'5x coverage: 4647417'"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"'10x coverage: 559888'"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"'Mean coverage: 2.0'"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"'No coverage: 16051879'"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"'Percent coverage: 66.7'"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"'Percent 3x coverage: 29.2'"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"'Percent 5x coverage: 9.6'"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"'Percent 10x coverage: 1.2'"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" chrom | \n",
" pos | \n",
" strand | \n",
" meth | \n",
" unmeth | \n",
" C-context | \n",
" trinucleotide | \n",
" coverage | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" PGA_scaffold1__77_contigs__length_89643857 | \n",
" 4 | \n",
" + | \n",
" 0 | \n",
" 0 | \n",
" CG | \n",
" CGC | \n",
" 0 | \n",
"
\n",
" \n",
" 1 | \n",
" PGA_scaffold1__77_contigs__length_89643857 | \n",
" 5 | \n",
" - | \n",
" 0 | \n",
" 0 | \n",
" CG | \n",
" CGC | \n",
" 0 | \n",
"
\n",
" \n",
" 2 | \n",
" PGA_scaffold1__77_contigs__length_89643857 | \n",
" 18 | \n",
" + | \n",
" 0 | \n",
" 1 | \n",
" CG | \n",
" CGG | \n",
" 1 | \n",
"
\n",
" \n",
" 3 | \n",
" PGA_scaffold1__77_contigs__length_89643857 | \n",
" 19 | \n",
" - | \n",
" 0 | \n",
" 0 | \n",
" CG | \n",
" CGG | \n",
" 0 | \n",
"
\n",
" \n",
" 4 | \n",
" PGA_scaffold1__77_contigs__length_89643857 | \n",
" 54 | \n",
" + | \n",
" 0 | \n",
" 1 | \n",
" CG | \n",
" CGT | \n",
" 1 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" chrom pos strand meth unmeth \\\n",
"0 PGA_scaffold1__77_contigs__length_89643857 4 + 0 0 \n",
"1 PGA_scaffold1__77_contigs__length_89643857 5 - 0 0 \n",
"2 PGA_scaffold1__77_contigs__length_89643857 18 + 0 1 \n",
"3 PGA_scaffold1__77_contigs__length_89643857 19 - 0 0 \n",
"4 PGA_scaffold1__77_contigs__length_89643857 54 + 0 1 \n",
"\n",
" C-context trinucleotide coverage \n",
"0 CG CGC 0 \n",
"1 CG CGC 0 \n",
"2 CG CGG 1 \n",
"3 CG CGG 0 \n",
"4 CG CGT 1 "
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Variable declaration\n",
"bismark_subset_list = []\n",
"mean_seq_coverage = []\n",
"percent_seq_coverage = []\n",
"percent_3x_seq_coverage = []\n",
"percent_5x_seq_coverage = []\n",
"percent_10x_seq_coverage = []\n",
"\n",
"# Create list of coverage files in current directory\n",
"\n",
"cov_files = !find /Volumes/web/metacarcinus/Pgenerosa/20190415_10K/*.CpG_report.txt\n",
"print(cov_files)\n",
"\n",
"# Loop through coverage files\n",
"for file in cov_files:\n",
" print(\"----------------------------------------------\")\n",
" print(\"----------------------------------------------\")\n",
" print (file)\n",
" subset_name = file[:-4] # Remove file suffix (.txt)\n",
" bismark_subset_list.append(subset_name)\n",
" #\n",
" #\n",
" # Create dataframe and add column names (taken from Bismark documentation)\n",
" dataframe = pandas.read_csv(\n",
" file,\n",
" sep='\\t',\n",
" header=None,\n",
" names=[\"chrom\", \"pos\", \"strand\", \"meth\", \"unmeth\", \"C-context\", \"trinucleotide\"])\n",
" \n",
" dataframe['coverage'] = dataframe['meth'] + dataframe['unmeth'] # Sum of methylated and unmethylated coverage for each position.\n",
" \n",
" total_CpG = len(dataframe) # Count of all CpGs in genome.\n",
" \n",
" \n",
" coverage = sum(dataframe['coverage']>0) # Count of all CpG positions with sequence coverage\n",
" coverage_3x = sum(dataframe['coverage']>=3)\n",
" coverage_5x = sum(dataframe['coverage']>=5)\n",
" coverage_10x = sum(dataframe['coverage']>=10)\n",
" mean_coverage = round(dataframe[\"coverage\"].mean(), 1)\n",
" \n",
" display(\"Total coverage: \" + str(coverage))\n",
" display(\"3x coverage: \" + str(coverage_3x))\n",
" display(\"5x coverage: \" + str(coverage_5x))\n",
" display(\"10x coverage: \" + str(coverage_10x))\n",
" display(\"Mean coverage: \" + str(mean_coverage))\n",
" \n",
" no_coverage = sum(dataframe['coverage']==0) # Count of all CpG posiitions with no sequence coverage\n",
" percent_coverage = round((coverage / total_CpG * 100.0), 1) # Rounds to 1 decimal\n",
" percent_3x_coverage = round((coverage_3x / total_CpG * 100.0), 1) # Rounds to 1 decimal\n",
" percent_5x_coverage = round((coverage_5x / total_CpG * 100.0), 1) # Rounds to 1 decimal\n",
" percent_10x_coverage = round((coverage_10x / total_CpG * 100.0), 1) # Rounds to 1 decimal\n",
" \n",
" mean_seq_coverage.append(mean_coverage)\n",
" percent_seq_coverage.append(percent_coverage)\n",
" percent_3x_seq_coverage.append(percent_3x_coverage)\n",
" percent_5x_seq_coverage.append(percent_5x_coverage)\n",
" percent_10x_seq_coverage.append(percent_10x_coverage)\n",
" \n",
" display(\"No coverage: \" + str(no_coverage))\n",
" display(\"Percent coverage: \" + str(percent_coverage))\n",
" display(\"Percent 3x coverage: \" + str(percent_3x_coverage))\n",
" display(\"Percent 5x coverage: \" + str(percent_5x_coverage))\n",
" display(\"Percent 10x coverage: \" + str(percent_10x_coverage))\n",
" display(dataframe.head())"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Create new dataframe"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Bismark Sample | \n",
" Mean Coverage | \n",
" Percent Coverage | \n",
" Percent 3x Coverage | \n",
" Percent 5x Coverage | \n",
" Percent 10x Coverage | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" Tank2-025-026 | \n",
" 3.6 | \n",
" 77.1 | \n",
" 51.7 | \n",
" 27.9 | \n",
" 4.9 | \n",
"
\n",
" \n",
" 1 | \n",
" Tank2-025-026_trim | \n",
" 2.6 | \n",
" 71.8 | \n",
" 38.8 | \n",
" 16.1 | \n",
" 2.2 | \n",
"
\n",
" \n",
" 2 | \n",
" Tank3-15-16 | \n",
" 2.8 | \n",
" 73.1 | \n",
" 42.1 | \n",
" 18.7 | \n",
" 2.6 | \n",
"
\n",
" \n",
" 3 | \n",
" Tank3-15-16_trim | \n",
" 2.0 | \n",
" 66.7 | \n",
" 29.2 | \n",
" 9.6 | \n",
" 1.2 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Bismark Sample Mean Coverage Percent Coverage Percent 3x Coverage \\\n",
"0 Tank2-025-026 3.6 77.1 51.7 \n",
"1 Tank2-025-026_trim 2.6 71.8 38.8 \n",
"2 Tank3-15-16 2.8 73.1 42.1 \n",
"3 Tank3-15-16_trim 2.0 66.7 29.2 \n",
"\n",
" Percent 5x Coverage Percent 10x Coverage \n",
"0 27.9 4.9 \n",
"1 16.1 2.2 \n",
"2 18.7 2.6 \n",
"3 9.6 1.2 "
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"coverage_dataframe = pandas.DataFrame(\n",
" {\n",
" 'Bismark Sample': ['Tank2-025-026','Tank2-025-026_trim','Tank3-15-16','Tank3-15-16_trim'],\n",
" 'Mean Coverage': mean_seq_coverage,\n",
" 'Percent Coverage': percent_seq_coverage,\n",
" 'Percent 3x Coverage': percent_3x_seq_coverage,\n",
" 'Percent 5x Coverage': percent_5x_seq_coverage,\n",
" 'Percent 10x Coverage': percent_10x_seq_coverage,\n",
" \n",
" })\n",
"\n",
"coverage_dataframe"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Create line plot overlayed on bar chart, showing percent sequencing coverage for each Bismark subset option"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "\n",
"text/plain": [
"