{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Generating Genome Feature Tracks" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "In this notebook, I'll use the [NCBI assembly](https://www.ncbi.nlm.nih.gov/assembly/GCF_902806645.1/) and [NCBI Annotation Release 102](https://www.ncbi.nlm.nih.gov/genome/annotation_euk/Crassostrea_gigas/102/) to genome feature tracks for the Roslin *C. gigas* genome." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 0. Set working directory and variables" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "/Users/yaaminivenkataraman/Documents/ceabigr/code\r\n" ] } ], "source": [ "!pwd" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "#!mkdir ../genome-features/" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "scrolled": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "/Users/yaaminivenkataraman/Documents/ceabigr/genome-features\n" ] } ], "source": [ "cd ../genome-features/" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "/opt/homebrew/bin/bedtools\r\n" ] } ], "source": [ "!which bedtools" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "bedtoolsDirectory = \"/opt/homebrew/bin\"" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 1. Download NCBI assembly" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "I downloaded the GFF from [this link](https://www.ncbi.nlm.nih.gov/genome/?term=txid6565[orgn]). Can also curl from FTP links [here](https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/002/022/765/GCF_002022765.2_C_virginica-3.0/)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "##gff-version 3\r\n", "#!gff-spec-version 1.21\r\n", "#!processor NCBI annotwriter\r\n", "#!genome-build C_virginica-3.0\r\n", "#!genome-build-accession NCBI_Assembly:GCF_002022765.2\r\n", "#!annotation-source NCBI Crassostrea virginica Annotation Release 100\r\n", "##sequence-region NC_035780.1 1 65668440\r\n", "##species https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=6565\r\n", "NC_035780.1\tRefSeq\tregion\t1\t65668440\t.\t+\t.\tID=NC_035780.1:1..65668440;Dbxref=taxon:6565;Name=1;chromosome=1;collection-date=22-Mar-2015;country=USA;gbkey=Src;genome=chromosome;isolate=RU13XGHG1-28;isolation-source=Rutgers Haskin Shellfish Research Laboratory inbred lines (NJ);mol_type=genomic DNA;tissue-type=whole sample\r\n", "NC_035780.1\tGnomon\tgene\t13578\t14594\t.\t+\t.\tID=gene-LOC111116054;Dbxref=GeneID:111116054;Name=LOC111116054;gbkey=Gene;gene=LOC111116054;gene_biotype=lncRNA\r\n", "NC_035780.1\tGnomon\tlnc_RNA\t13578\t14594\t.\t+\t.\tID=rna-XR_002636969.1;Parent=gene-LOC111116054;Dbxref=GeneID:111116054,Genbank:XR_002636969.1;Name=XR_002636969.1;gbkey=ncRNA;gene=LOC111116054;model_evidence=Supporting evidence includes similarity to: 100%25 coverage of the annotated genomic feature by RNAseq alignments%2C including 1 sample with support for all annotated introns;product=uncharacterized LOC111116054;transcript_id=XR_002636969.1\r\n", "NC_035780.1\tGnomon\texon\t13578\t13603\t.\t+\t.\tID=exon-XR_002636969.1-1;Parent=rna-XR_002636969.1;Dbxref=GeneID:111116054,Genbank:XR_002636969.1;gbkey=ncRNA;gene=LOC111116054;product=uncharacterized LOC111116054;transcript_id=XR_002636969.1\r\n", "NC_035780.1\tGnomon\texon\t14237\t14290\t.\t+\t.\tID=exon-XR_002636969.1-2;Parent=rna-XR_002636969.1;Dbxref=GeneID:111116054,Genbank:XR_002636969.1;gbkey=ncRNA;gene=LOC111116054;product=uncharacterized LOC111116054;transcript_id=XR_002636969.1\r\n", "NC_035780.1\tGnomon\texon\t14557\t14594\t.\t+\t.\tID=exon-XR_002636969.1-3;Parent=rna-XR_002636969.1;Dbxref=GeneID:111116054,Genbank:XR_002636969.1;gbkey=ncRNA;gene=LOC111116054;product=uncharacterized LOC111116054;transcript_id=XR_002636969.1\r\n", "NC_035780.1\tGnomon\tgene\t28961\t33324\t.\t+\t.\tID=gene-LOC111126949;Dbxref=GeneID:111126949;Name=LOC111126949;gbkey=Gene;gene=LOC111126949;gene_biotype=protein_coding\r\n", "NC_035780.1\tGnomon\tmRNA\t28961\t33324\t.\t+\t.\tID=rna-XM_022471938.1;Parent=gene-LOC111126949;Dbxref=GeneID:111126949,Genbank:XM_022471938.1;Name=XM_022471938.1;gbkey=mRNA;gene=LOC111126949;model_evidence=Supporting evidence includes similarity to: 3 Proteins%2C and 100%25 coverage of the annotated genomic feature by RNAseq alignments%2C including 21 samples with support for all annotated introns;product=UNC5C-like protein;transcript_id=XM_022471938.1\r\n", "NC_035780.1\tGnomon\texon\t28961\t29073\t.\t+\t.\tID=exon-XM_022471938.1-1;Parent=rna-XM_022471938.1;Dbxref=GeneID:111126949,Genbank:XM_022471938.1;gbkey=mRNA;gene=LOC111126949;product=UNC5C-like protein;transcript_id=XM_022471938.1\r\n", "NC_035780.1\tGnomon\texon\t30524\t31557\t.\t+\t.\tID=exon-XM_022471938.1-2;Parent=rna-XM_022471938.1;Dbxref=GeneID:111126949,Genbank:XM_022471938.1;gbkey=mRNA;gene=LOC111126949;product=UNC5C-like protein;transcript_id=XM_022471938.1\r\n", "NC_035780.1\tGnomon\texon\t31736\t31887\t.\t+\t.\tID=exon-XM_022471938.1-3;Parent=rna-XM_022471938.1;Dbxref=GeneID:111126949,Genbank:XM_022471938.1;gbkey=mRNA;gene=LOC111126949;product=UNC5C-like protein;transcript_id=XM_022471938.1\r\n", "NC_035780.1\tGnomon\texon\t31977\t32565\t.\t+\t.\tID=exon-XM_022471938.1-4;Parent=rna-XM_022471938.1;Dbxref=GeneID:111126949,Genbank:XM_022471938.1;gbkey=mRNA;gene=LOC111126949;product=UNC5C-like protein;transcript_id=XM_022471938.1\r\n", "NC_035780.1\tGnomon\texon\t32959\t33324\t.\t+\t.\tID=exon-XM_022471938.1-5;Parent=rna-XM_022471938.1;Dbxref=GeneID:111126949,Genbank:XM_022471938.1;gbkey=mRNA;gene=LOC111126949;product=UNC5C-like protein;transcript_id=XM_022471938.1\r\n", "NC_035780.1\tGnomon\tCDS\t30535\t31557\t.\t+\t0\tID=cds-XP_022327646.1;Parent=rna-XM_022471938.1;Dbxref=GeneID:111126949,Genbank:XP_022327646.1;Name=XP_022327646.1;gbkey=CDS;gene=LOC111126949;product=UNC5C-like protein;protein_id=XP_022327646.1\r\n", "NC_035780.1\tGnomon\tCDS\t31736\t31887\t.\t+\t0\tID=cds-XP_022327646.1;Parent=rna-XM_022471938.1;Dbxref=GeneID:111126949,Genbank:XP_022327646.1;Name=XP_022327646.1;gbkey=CDS;gene=LOC111126949;product=UNC5C-like protein;protein_id=XP_022327646.1\r\n", "NC_035780.1\tGnomon\tCDS\t31977\t32565\t.\t+\t1\tID=cds-XP_022327646.1;Parent=rna-XM_022471938.1;Dbxref=GeneID:111126949,Genbank:XP_022327646.1;Name=XP_022327646.1;gbkey=CDS;gene=LOC111126949;product=UNC5C-like protein;protein_id=XP_022327646.1\r\n", "NC_035780.1\tGnomon\tCDS\t32959\t33204\t.\t+\t0\tID=cds-XP_022327646.1;Parent=rna-XM_022471938.1;Dbxref=GeneID:111126949,Genbank:XP_022327646.1;Name=XP_022327646.1;gbkey=CDS;gene=LOC111126949;product=UNC5C-like protein;protein_id=XP_022327646.1\r\n", "NC_035780.1\tGnomon\tgene\t43111\t66897\t.\t-\t.\tID=gene-LOC111110729;Dbxref=GeneID:111110729;Name=LOC111110729;gbkey=Gene;gene=LOC111110729;gene_biotype=protein_coding\r\n", "NC_035780.1\tGnomon\tmRNA\t43111\t66897\t.\t-\t.\tID=rna-XM_022447324.1;Parent=gene-LOC111110729;Dbxref=GeneID:111110729,Genbank:XM_022447324.1;Name=XM_022447324.1;gbkey=mRNA;gene=LOC111110729;model_evidence=Supporting evidence includes similarity to: 1 Protein%2C and 100%25 coverage of the annotated genomic feature by RNAseq alignments;product=FMRFamide receptor-like%2C transcript variant X1;transcript_id=XM_022447324.1\r\n", "NC_035780.1\tGnomon\texon\t66869\t66897\t.\t-\t.\tID=exon-XM_022447324.1-1;Parent=rna-XM_022447324.1;Dbxref=GeneID:111110729,Genbank:XM_022447324.1;gbkey=mRNA;gene=LOC111110729;product=FMRFamide receptor-like%2C transcript variant X1;transcript_id=XM_022447324.1\r\n", "NC_035780.1\tGnomon\texon\t64123\t64334\t.\t-\t.\tID=exon-XM_022447324.1-2;Parent=rna-XM_022447324.1;Dbxref=GeneID:111110729,Genbank:XM_022447324.1;gbkey=mRNA;gene=LOC111110729;product=FMRFamide receptor-like%2C transcript variant X1;transcript_id=XM_022447324.1\r\n", "NC_035780.1\tGnomon\texon\t43111\t44358\t.\t-\t.\tID=exon-XM_022447324.1-3;Parent=rna-XM_022447324.1;Dbxref=GeneID:111110729,Genbank:XM_022447324.1;gbkey=mRNA;gene=LOC111110729;product=FMRFamide receptor-like%2C transcript variant X1;transcript_id=XM_022447324.1\r\n", "NC_035780.1\tGnomon\tCDS\t64123\t64219\t.\t-\t0\tID=cds-XP_022303032.1;Parent=rna-XM_022447324.1;Dbxref=GeneID:111110729,Genbank:XP_022303032.1;Name=XP_022303032.1;gbkey=CDS;gene=LOC111110729;product=FMRFamide receptor-like isoform X1;protein_id=XP_022303032.1\r\n", "NC_035780.1\tGnomon\tCDS\t43262\t44358\t.\t-\t2\tID=cds-XP_022303032.1;Parent=rna-XM_022447324.1;Dbxref=GeneID:111110729,Genbank:XP_022303032.1;Name=XP_022303032.1;gbkey=CDS;gene=LOC111110729;product=FMRFamide receptor-like isoform X1;protein_id=XP_022303032.1\r\n", "NC_035780.1\tGnomon\tmRNA\t43111\t46506\t.\t-\t.\tID=rna-XM_022447333.1;Parent=gene-LOC111110729;Dbxref=GeneID:111110729,Genbank:XM_022447333.1;Name=XM_022447333.1;gbkey=mRNA;gene=LOC111110729;model_evidence=Supporting evidence includes similarity to: 1 Protein%2C and 100%25 coverage of the annotated genomic feature by RNAseq alignments%2C including 14 samples with support for all annotated introns;product=FMRFamide receptor-like%2C transcript variant X2;transcript_id=XM_022447333.1\r\n", "NC_035780.1\tGnomon\texon\t45913\t46506\t.\t-\t.\tID=exon-XM_022447333.1-1;Parent=rna-XM_022447333.1;Dbxref=GeneID:111110729,Genbank:XM_022447333.1;gbkey=mRNA;gene=LOC111110729;product=FMRFamide receptor-like%2C transcript variant X2;transcript_id=XM_022447333.1\r\n", "NC_035780.1\tGnomon\texon\t43111\t44358\t.\t-\t.\tID=exon-XM_022447333.1-2;Parent=rna-XM_022447333.1;Dbxref=GeneID:111110729,Genbank:XM_022447333.1;gbkey=mRNA;gene=LOC111110729;product=FMRFamide receptor-like%2C transcript variant X2;transcript_id=XM_022447333.1\r\n", "NC_035780.1\tGnomon\tCDS\t45913\t45997\t.\t-\t0\tID=cds-XP_022303041.1;Parent=rna-XM_022447333.1;Dbxref=GeneID:111110729,Genbank:XP_022303041.1;Name=XP_022303041.1;gbkey=CDS;gene=LOC111110729;product=FMRFamide receptor-like isoform X2;protein_id=XP_022303041.1\r\n", "NC_035780.1\tGnomon\tCDS\t43262\t44358\t.\t-\t2\tID=cds-XP_022303041.1;Parent=rna-XM_022447333.1;Dbxref=GeneID:111110729,Genbank:XP_022303041.1;Name=XP_022303041.1;gbkey=CDS;gene=LOC111110729;product=FMRFamide receptor-like isoform X2;protein_id=XP_022303041.1\r\n", "NC_035780.1\tGnomon\tgene\t85606\t95254\t.\t-\t.\tID=gene-LOC111112434;Dbxref=GeneID:111112434;Name=LOC111112434;gbkey=Gene;gene=LOC111112434;gene_biotype=protein_coding\r\n", "NC_035780.1\tGnomon\tmRNA\t85606\t95254\t.\t-\t.\tID=rna-XM_022449924.1;Parent=gene-LOC111112434;Dbxref=GeneID:111112434,Genbank:XM_022449924.1;Name=XM_022449924.1;gbkey=mRNA;gene=LOC111112434;model_evidence=Supporting evidence includes similarity to: 7 Proteins%2C and 100%25 coverage of the annotated genomic feature by RNAseq alignments%2C including 13 samples with support for all annotated introns;product=homeobox protein Hox-B7-like;transcript_id=XM_022449924.1\r\n", "NC_035780.1\tGnomon\texon\t94571\t95254\t.\t-\t.\tID=exon-XM_022449924.1-1;Parent=rna-XM_022449924.1;Dbxref=GeneID:111112434,Genbank:XM_022449924.1;gbkey=mRNA;gene=LOC111112434;product=homeobox protein Hox-B7-like;transcript_id=XM_022449924.1\r\n", "NC_035780.1\tGnomon\texon\t88423\t88589\t.\t-\t.\tID=exon-XM_022449924.1-2;Parent=rna-XM_022449924.1;Dbxref=GeneID:111112434,Genbank:XM_022449924.1;gbkey=mRNA;gene=LOC111112434;product=homeobox protein Hox-B7-like;transcript_id=XM_022449924.1\r\n", "NC_035780.1\tGnomon\texon\t85606\t85777\t.\t-\t.\tID=exon-XM_022449924.1-3;Parent=rna-XM_022449924.1;Dbxref=GeneID:111112434,Genbank:XM_022449924.1;gbkey=mRNA;gene=LOC111112434;product=homeobox protein Hox-B7-like;transcript_id=XM_022449924.1\r\n", "NC_035780.1\tGnomon\tCDS\t94571\t95042\t.\t-\t0\tID=cds-XP_022305632.1;Parent=rna-XM_022449924.1;Dbxref=GeneID:111112434,Genbank:XP_022305632.1;Name=XP_022305632.1;gbkey=CDS;gene=LOC111112434;product=homeobox protein Hox-B7-like;protein_id=XP_022305632.1\r\n", "NC_035780.1\tGnomon\tCDS\t88423\t88589\t.\t-\t2\tID=cds-XP_022305632.1;Parent=rna-XM_022449924.1;Dbxref=GeneID:111112434,Genbank:XP_022305632.1;Name=XP_022305632.1;gbkey=CDS;gene=LOC111112434;product=homeobox protein Hox-B7-like;protein_id=XP_022305632.1\r\n", "NC_035780.1\tGnomon\tCDS\t85616\t85777\t.\t-\t0\tID=cds-XP_022305632.1;Parent=rna-XM_022449924.1;Dbxref=GeneID:111112434,Genbank:XP_022305632.1;Name=XP_022305632.1;gbkey=CDS;gene=LOC111112434;product=homeobox protein Hox-B7-like;protein_id=XP_022305632.1\r\n", "NC_035780.1\tGnomon\tgene\t99840\t106460\t.\t+\t.\tID=gene-LOC111120752;Dbxref=GeneID:111120752;Name=LOC111120752;gbkey=Gene;gene=LOC111120752;gene_biotype=protein_coding\r\n", "NC_035780.1\tGnomon\tmRNA\t99840\t106460\t.\t+\t.\tID=rna-XM_022461698.1;Parent=gene-LOC111120752;Dbxref=GeneID:111120752,Genbank:XM_022461698.1;Name=XM_022461698.1;gbkey=mRNA;gene=LOC111120752;model_evidence=Supporting evidence includes similarity to: 10 Proteins%2C and 100%25 coverage of the annotated genomic feature by RNAseq alignments%2C including 27 samples with support for all annotated introns;product=ribulose-phosphate 3-epimerase-like;transcript_id=XM_022461698.1\r\n", "NC_035780.1\tGnomon\texon\t99840\t100122\t.\t+\t.\tID=exon-XM_022461698.1-1;Parent=rna-XM_022461698.1;Dbxref=GeneID:111120752,Genbank:XM_022461698.1;gbkey=mRNA;gene=LOC111120752;product=ribulose-phosphate 3-epimerase-like;transcript_id=XM_022461698.1\r\n", "NC_035780.1\tGnomon\texon\t100554\t100661\t.\t+\t.\tID=exon-XM_022461698.1-2;Parent=rna-XM_022461698.1;Dbxref=GeneID:111120752,Genbank:XM_022461698.1;gbkey=mRNA;gene=LOC111120752;product=ribulose-phosphate 3-epimerase-like;transcript_id=XM_022461698.1\r\n", "NC_035780.1\tGnomon\texon\t104929\t105063\t.\t+\t.\tID=exon-XM_022461698.1-3;Parent=rna-XM_022461698.1;Dbxref=GeneID:111120752,Genbank:XM_022461698.1;gbkey=mRNA;gene=LOC111120752;product=ribulose-phosphate 3-epimerase-like;transcript_id=XM_022461698.1\r\n", "NC_035780.1\tGnomon\texon\t105528\t105614\t.\t+\t.\tID=exon-XM_022461698.1-4;Parent=rna-XM_022461698.1;Dbxref=GeneID:111120752,Genbank:XM_022461698.1;gbkey=mRNA;gene=LOC111120752;product=ribulose-phosphate 3-epimerase-like;transcript_id=XM_022461698.1\r\n", "NC_035780.1\tGnomon\texon\t106004\t106460\t.\t+\t.\tID=exon-XM_022461698.1-5;Parent=rna-XM_022461698.1;Dbxref=GeneID:111120752,Genbank:XM_022461698.1;gbkey=mRNA;gene=LOC111120752;product=ribulose-phosphate 3-epimerase-like;transcript_id=XM_022461698.1\r\n", "NC_035780.1\tGnomon\tCDS\t99877\t100122\t.\t+\t0\tID=cds-XP_022317406.1;Parent=rna-XM_022461698.1;Dbxref=GeneID:111120752,Genbank:XP_022317406.1;Name=XP_022317406.1;gbkey=CDS;gene=LOC111120752;product=ribulose-phosphate 3-epimerase-like;protein_id=XP_022317406.1\r\n", "NC_035780.1\tGnomon\tCDS\t100554\t100661\t.\t+\t0\tID=cds-XP_022317406.1;Parent=rna-XM_022461698.1;Dbxref=GeneID:111120752,Genbank:XP_022317406.1;Name=XP_022317406.1;gbkey=CDS;gene=LOC111120752;product=ribulose-phosphate 3-epimerase-like;protein_id=XP_022317406.1\r\n", "NC_035780.1\tGnomon\tCDS\t104929\t105063\t.\t+\t0\tID=cds-XP_022317406.1;Parent=rna-XM_022461698.1;Dbxref=GeneID:111120752,Genbank:XP_022317406.1;Name=XP_022317406.1;gbkey=CDS;gene=LOC111120752;product=ribulose-phosphate 3-epimerase-like;protein_id=XP_022317406.1\r\n", "NC_035780.1\tGnomon\tCDS\t105528\t105614\t.\t+\t0\tID=cds-XP_022317406.1;Parent=rna-XM_022461698.1;Dbxref=GeneID:111120752,Genbank:XP_022317406.1;Name=XP_022317406.1;gbkey=CDS;gene=LOC111120752;product=ribulose-phosphate 3-epimerase-like;protein_id=XP_022317406.1\r\n", "NC_035780.1\tGnomon\tCDS\t106004\t106120\t.\t+\t0\tID=cds-XP_022317406.1;Parent=rna-XM_022461698.1;Dbxref=GeneID:111120752,Genbank:XP_022317406.1;Name=XP_022317406.1;gbkey=CDS;gene=LOC111120752;product=ribulose-phosphate 3-epimerase-like;protein_id=XP_022317406.1\r\n", "NC_035780.1\tGnomon\tgene\t108305\t110077\t.\t-\t.\tID=gene-LOC111128944;Dbxref=GeneID:111128944;Name=LOC111128944;gbkey=Gene;gene=LOC111128944;gene_biotype=protein_coding;partial=true;start_range=.,108305\r\n", "NC_035780.1\tGnomon\tmRNA\t108305\t110077\t.\t-\t.\tID=rna-XM_022474921.1;Parent=gene-LOC111128944;Dbxref=GeneID:111128944,Genbank:XM_022474921.1;Name=XM_022474921.1;gbkey=mRNA;gene=LOC111128944;model_evidence=Supporting evidence includes similarity to: 2 Proteins%2C and 93%25 coverage of the annotated genomic feature by RNAseq alignments;partial=true;product=mucin-19-like;start_range=.,108305;transcript_id=XM_022474921.1\r\n", "NC_035780.1\tGnomon\texon\t108305\t110077\t.\t-\t.\tID=exon-XM_022474921.1-1;Parent=rna-XM_022474921.1;Dbxref=GeneID:111128944,Genbank:XM_022474921.1;gbkey=mRNA;gene=LOC111128944;partial=true;product=mucin-19-like;start_range=.,108305;transcript_id=XM_022474921.1\r\n", "NC_035780.1\tGnomon\tCDS\t108305\t110077\t.\t-\t0\tID=cds-XP_022330629.1;Parent=rna-XM_022474921.1;Dbxref=GeneID:111128944,Genbank:XP_022330629.1;Name=XP_022330629.1;gbkey=CDS;gene=LOC111128944;partial=true;product=mucin-19-like;protein_id=XP_022330629.1;start_range=.,108305\r\n", "NC_035780.1\tGnomon\tgene\t151859\t157536\t.\t+\t.\tID=gene-LOC111128953;Dbxref=GeneID:111128953;Name=LOC111128953;gbkey=Gene;gene=LOC111128953;gene_biotype=protein_coding\r\n", "NC_035780.1\tGnomon\tmRNA\t151859\t157536\t.\t+\t.\tID=rna-XM_022474931.1;Parent=gene-LOC111128953;Dbxref=GeneID:111128953,Genbank:XM_022474931.1;Name=XM_022474931.1;gbkey=mRNA;gene=LOC111128953;model_evidence=Supporting evidence includes similarity to: 1 Protein;product=GATA zinc finger domain-containing protein 14-like;transcript_id=XM_022474931.1\r\n", "NC_035780.1\tGnomon\texon\t151859\t153368\t.\t+\t.\tID=exon-XM_022474931.1-1;Parent=rna-XM_022474931.1;Dbxref=GeneID:111128953,Genbank:XM_022474931.1;gbkey=mRNA;gene=LOC111128953;product=GATA zinc finger domain-containing protein 14-like;transcript_id=XM_022474931.1\r\n", "NC_035780.1\tGnomon\texon\t156764\t157536\t.\t+\t.\tID=exon-XM_022474931.1-2;Parent=rna-XM_022474931.1;Dbxref=GeneID:111128953,Genbank:XM_022474931.1;gbkey=mRNA;gene=LOC111128953;product=GATA zinc finger domain-containing protein 14-like;transcript_id=XM_022474931.1\r\n", "NC_035780.1\tGnomon\tCDS\t151859\t153368\t.\t+\t0\tID=cds-XP_022330639.1;Parent=rna-XM_022474931.1;Dbxref=GeneID:111128953,Genbank:XP_022330639.1;Name=XP_022330639.1;gbkey=CDS;gene=LOC111128953;product=GATA zinc finger domain-containing protein 14-like;protein_id=XP_022330639.1\r\n", "NC_035780.1\tGnomon\tCDS\t156764\t157536\t.\t+\t2\tID=cds-XP_022330639.1;Parent=rna-XM_022474931.1;Dbxref=GeneID:111128953,Genbank:XP_022330639.1;Name=XP_022330639.1;gbkey=CDS;gene=LOC111128953;product=GATA zinc finger domain-containing protein 14-like;protein_id=XP_022330639.1\r\n", "NC_035780.1\tGnomon\tgene\t163809\t183798\t.\t-\t.\tID=gene-LOC111105691;Dbxref=GeneID:111105691;Name=LOC111105691;gbkey=Gene;gene=LOC111105691;gene_biotype=protein_coding\r\n", "NC_035780.1\tGnomon\tmRNA\t163809\t183798\t.\t-\t.\tID=rna-XM_022440054.1;Parent=gene-LOC111105691;Dbxref=GeneID:111105691,Genbank:XM_022440054.1;Name=XM_022440054.1;gbkey=mRNA;gene=LOC111105691;model_evidence=Supporting evidence includes similarity to: 100%25 coverage of the annotated genomic feature by RNAseq alignments%2C including 9 samples with support for all annotated introns;product=uncharacterized LOC111105691;transcript_id=XM_022440054.1\r\n", "NC_035780.1\tGnomon\texon\t183732\t183798\t.\t-\t.\tID=exon-XM_022440054.1-1;Parent=rna-XM_022440054.1;Dbxref=GeneID:111105691,Genbank:XM_022440054.1;gbkey=mRNA;gene=LOC111105691;product=uncharacterized LOC111105691;transcript_id=XM_022440054.1\r\n", "NC_035780.1\tGnomon\texon\t163809\t164341\t.\t-\t.\tID=exon-XM_022440054.1-2;Parent=rna-XM_022440054.1;Dbxref=GeneID:111105691,Genbank:XM_022440054.1;gbkey=mRNA;gene=LOC111105691;product=uncharacterized LOC111105691;transcript_id=XM_022440054.1\r\n", "NC_035780.1\tGnomon\tCDS\t163835\t164266\t.\t-\t0\tID=cds-XP_022295762.1;Parent=rna-XM_022440054.1;Dbxref=GeneID:111105691,Genbank:XP_022295762.1;Name=XP_022295762.1;gbkey=CDS;gene=LOC111105691;product=uncharacterized protein LOC111105691;protein_id=XP_022295762.1\r\n", "NC_035780.1\tGnomon\tgene\t164820\t166793\t.\t+\t.\tID=gene-LOC111105685;Dbxref=GeneID:111105685;Name=LOC111105685;gbkey=Gene;gene=LOC111105685;gene_biotype=protein_coding\r\n", "NC_035780.1\tGnomon\tmRNA\t164820\t166793\t.\t+\t.\tID=rna-XM_022440042.1;Parent=gene-LOC111105685;Dbxref=GeneID:111105685,Genbank:XM_022440042.1;Name=XM_022440042.1;gbkey=mRNA;gene=LOC111105685;model_evidence=Supporting evidence includes similarity to: 100%25 coverage of the annotated genomic feature by RNAseq alignments%2C including 4 samples with support for all annotated introns;product=protein ANTAGONIST OF LIKE HETEROCHROMATIN PROTEIN 1-like;transcript_id=XM_022440042.1\r\n", "NC_035780.1\tGnomon\texon\t164820\t164941\t.\t+\t.\tID=exon-XM_022440042.1-1;Parent=rna-XM_022440042.1;Dbxref=GeneID:111105685,Genbank:XM_022440042.1;gbkey=mRNA;gene=LOC111105685;product=protein ANTAGONIST OF LIKE HETEROCHROMATIN PROTEIN 1-like;transcript_id=XM_022440042.1\r\n", "NC_035780.1\tGnomon\texon\t165620\t166793\t.\t+\t.\tID=exon-XM_022440042.1-2;Parent=rna-XM_022440042.1;Dbxref=GeneID:111105685,Genbank:XM_022440042.1;gbkey=mRNA;gene=LOC111105685;product=protein ANTAGONIST OF LIKE HETEROCHROMATIN PROTEIN 1-like;transcript_id=XM_022440042.1\r\n", "NC_035780.1\tGnomon\tCDS\t165746\t166681\t.\t+\t0\tID=cds-XP_022295750.1;Parent=rna-XM_022440042.1;Dbxref=GeneID:111105685,Genbank:XP_022295750.1;Name=XP_022295750.1;gbkey=CDS;gene=LOC111105685;product=protein ANTAGONIST OF LIKE HETEROCHROMATIN PROTEIN 1-like;protein_id=XP_022295750.1\r\n", "NC_035780.1\tGnomon\tgene\t169468\t170178\t.\t-\t.\tID=gene-LOC111105702;Dbxref=GeneID:111105702;Name=LOC111105702;gbkey=Gene;gene=LOC111105702;gene_biotype=lncRNA\r\n", "NC_035780.1\tGnomon\tlnc_RNA\t169468\t170178\t.\t-\t.\tID=rna-XR_002635081.1;Parent=gene-LOC111105702;Dbxref=GeneID:111105702,Genbank:XR_002635081.1;Name=XR_002635081.1;gbkey=ncRNA;gene=LOC111105702;model_evidence=Supporting evidence includes similarity to: 100%25 coverage of the annotated genomic feature by RNAseq alignments%2C including 3 samples with support for all annotated introns;product=uncharacterized LOC111105702;transcript_id=XR_002635081.1\r\n", "NC_035780.1\tGnomon\texon\t170129\t170178\t.\t-\t.\tID=exon-XR_002635081.1-1;Parent=rna-XR_002635081.1;Dbxref=GeneID:111105702,Genbank:XR_002635081.1;gbkey=ncRNA;gene=LOC111105702;product=uncharacterized LOC111105702;transcript_id=XR_002635081.1\r\n", "NC_035780.1\tGnomon\texon\t169907\t169960\t.\t-\t.\tID=exon-XR_002635081.1-2;Parent=rna-XR_002635081.1;Dbxref=GeneID:111105702,Genbank:XR_002635081.1;gbkey=ncRNA;gene=LOC111105702;product=uncharacterized LOC111105702;transcript_id=XR_002635081.1\r\n", "NC_035780.1\tGnomon\texon\t169622\t169675\t.\t-\t.\tID=exon-XR_002635081.1-3;Parent=rna-XR_002635081.1;Dbxref=GeneID:111105702,Genbank:XR_002635081.1;gbkey=ncRNA;gene=LOC111105702;product=uncharacterized LOC111105702;transcript_id=XR_002635081.1\r\n", "NC_035780.1\tGnomon\texon\t169468\t169508\t.\t-\t.\tID=exon-XR_002635081.1-4;Parent=rna-XR_002635081.1;Dbxref=GeneID:111105702,Genbank:XR_002635081.1;gbkey=ncRNA;gene=LOC111105702;product=uncharacterized LOC111105702;transcript_id=XR_002635081.1\r\n", "NC_035780.1\tGnomon\tgene\t190449\t193594\t.\t-\t.\tID=gene-LOC111133554;Dbxref=GeneID:111133554;Name=LOC111133554;gbkey=Gene;gene=LOC111133554;gene_biotype=protein_coding\r\n", "NC_035780.1\tGnomon\tmRNA\t190449\t193594\t.\t-\t.\tID=rna-XM_022482070.1;Parent=gene-LOC111133554;Dbxref=GeneID:111133554,Genbank:XM_022482070.1;Name=XM_022482070.1;gbkey=mRNA;gene=LOC111133554;model_evidence=Supporting evidence includes similarity to: 100%25 coverage of the annotated genomic feature by RNAseq alignments%2C including 3 samples with support for all annotated introns;product=putative uncharacterized protein DDB_G0277407;transcript_id=XM_022482070.1\r\n", "NC_035780.1\tGnomon\texon\t193257\t193594\t.\t-\t.\tID=exon-XM_022482070.1-1;Parent=rna-XM_022482070.1;Dbxref=GeneID:111133554,Genbank:XM_022482070.1;gbkey=mRNA;gene=LOC111133554;product=putative uncharacterized protein DDB_G0277407;transcript_id=XM_022482070.1\r\n", "NC_035780.1\tGnomon\texon\t190449\t190999\t.\t-\t.\tID=exon-XM_022482070.1-2;Parent=rna-XM_022482070.1;Dbxref=GeneID:111133554,Genbank:XM_022482070.1;gbkey=mRNA;gene=LOC111133554;product=putative uncharacterized protein DDB_G0277407;transcript_id=XM_022482070.1\r\n", "NC_035780.1\tGnomon\tCDS\t190493\t190924\t.\t-\t0\tID=cds-XP_022337778.1;Parent=rna-XM_022482070.1;Dbxref=GeneID:111133554,Genbank:XP_022337778.1;Name=XP_022337778.1;gbkey=CDS;gene=LOC111133554;product=putative uncharacterized protein DDB_G0277407;protein_id=XP_022337778.1\r\n", "NC_035780.1\tGnomon\tgene\t204243\t207743\t.\t-\t.\tID=gene-LOC111125466;Dbxref=GeneID:111125466;Name=LOC111125466;gbkey=Gene;gene=LOC111125466;gene_biotype=protein_coding\r\n", "NC_035780.1\tGnomon\tmRNA\t204243\t207743\t.\t-\t.\tID=rna-XM_022469388.1;Parent=gene-LOC111125466;Dbxref=GeneID:111125466,Genbank:XM_022469388.1;Name=XM_022469388.1;gbkey=mRNA;gene=LOC111125466;model_evidence=Supporting evidence includes similarity to: 100%25 coverage of the annotated genomic feature by RNAseq alignments%2C including 11 samples with support for all annotated introns;product=homeobox protein 2-like;transcript_id=XM_022469388.1\r\n", "NC_035780.1\tGnomon\texon\t207388\t207743\t.\t-\t.\tID=exon-XM_022469388.1-1;Parent=rna-XM_022469388.1;Dbxref=GeneID:111125466,Genbank:XM_022469388.1;gbkey=mRNA;gene=LOC111125466;product=homeobox protein 2-like;transcript_id=XM_022469388.1\r\n", "NC_035780.1\tGnomon\texon\t204243\t204795\t.\t-\t.\tID=exon-XM_022469388.1-2;Parent=rna-XM_022469388.1;Dbxref=GeneID:111125466,Genbank:XM_022469388.1;gbkey=mRNA;gene=LOC111125466;product=homeobox protein 2-like;transcript_id=XM_022469388.1\r\n", "NC_035780.1\tGnomon\tCDS\t204289\t204720\t.\t-\t0\tID=cds-XP_022325096.1;Parent=rna-XM_022469388.1;Dbxref=GeneID:111125466,Genbank:XP_022325096.1;Name=XP_022325096.1;gbkey=CDS;gene=LOC111125466;product=homeobox protein 2-like;protein_id=XP_022325096.1\r\n", "NC_035780.1\tGnomon\tgene\t214891\t215322\t.\t-\t.\tID=gene-LOC111128964;Dbxref=GeneID:111128964;Name=LOC111128964;gbkey=Gene;gene=LOC111128964;gene_biotype=protein_coding\r\n", "NC_035780.1\tGnomon\tmRNA\t214891\t215322\t.\t-\t.\tID=rna-XM_022474945.1;Parent=gene-LOC111128964;Dbxref=GeneID:111128964,Genbank:XM_022474945.1;Name=XM_022474945.1;gbkey=mRNA;gene=LOC111128964;model_evidence=Supporting evidence includes similarity to: 1 Protein;product=putative uncharacterized protein DDB_G0277407;transcript_id=XM_022474945.1\r\n", "NC_035780.1\tGnomon\texon\t214891\t215322\t.\t-\t.\tID=exon-XM_022474945.1-1;Parent=rna-XM_022474945.1;Dbxref=GeneID:111128964,Genbank:XM_022474945.1;gbkey=mRNA;gene=LOC111128964;product=putative uncharacterized protein DDB_G0277407;transcript_id=XM_022474945.1\r\n", "NC_035780.1\tGnomon\tCDS\t214891\t215322\t.\t-\t0\tID=cds-XP_022330653.1;Parent=rna-XM_022474945.1;Dbxref=GeneID:111128964,Genbank:XP_022330653.1;Name=XP_022330653.1;gbkey=CDS;gene=LOC111128964;product=putative uncharacterized protein DDB_G0277407;protein_id=XP_022330653.1\r\n", "NC_035780.1\tGnomon\tgene\t219451\t225076\t.\t-\t.\tID=gene-LOC111113860;Dbxref=GeneID:111113860;Name=LOC111113860;gbkey=Gene;gene=LOC111113860;gene_biotype=protein_coding\r\n", "NC_035780.1\tGnomon\tmRNA\t219451\t225076\t.\t-\t.\tID=rna-XM_022452155.1;Parent=gene-LOC111113860;Dbxref=GeneID:111113860,Genbank:XM_022452155.1;Name=XM_022452155.1;gbkey=mRNA;gene=LOC111113860;model_evidence=Supporting evidence includes similarity to: 10 Proteins%2C and 100%25 coverage of the annotated genomic feature by RNAseq alignments%2C including 24 samples with support for all annotated introns;product=sulfotransferase family cytosolic 1B member 1-like;transcript_id=XM_022452155.1\r\n", "NC_035780.1\tGnomon\texon\t224748\t225076\t.\t-\t.\tID=exon-XM_022452155.1-1;Parent=rna-XM_022452155.1;Dbxref=GeneID:111113860,Genbank:XM_022452155.1;gbkey=mRNA;gene=LOC111113860;product=sulfotransferase family cytosolic 1B member 1-like;transcript_id=XM_022452155.1\r\n" ] } ], "source": [ "!head -n100 GCF_002022765.2_C_virginica-3.0_genomic.gff" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 2. Prepare for feature track creation" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Before I pull out feature tracks, I need to know which databases were used for annotation, which features I can expect and how many of them there are, and identify chromosome lengths." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 2a. Annotation information" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "scrolled": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 1 ##sequence-region NC_035784.1 1 98698416\r\n", " 1 ##sequence-region NC_035785.1 1 51258098\r\n", " 1 ##sequence-region NC_035786.1 1 57830854\r\n", " 1 ##sequence-region NC_035787.1 1 75944018\r\n", " 1 ##sequence-region NC_035788.1 1 104168038\r\n", " 1 ##sequence-region NC_035789.1 1 32650045\r\n", " 11 ##species https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=6565\r\n", "1482188 Gnomon\r\n", "29345 RefSeq\r\n", "1739 tRNAscan-SE\r\n" ] } ], "source": [ "#Database identifiers for extracting features\n", "!cut -f2 GCF_002022765.2_C_virginica-3.0_genomic.gff | sort | uniq -c | tail" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 1 #!annotation-source NCBI Crassostrea virginica Annotation Release 100\n", " 1 #!genome-build C_virginica-3.0\n", " 1 #!genome-build-accession NCBI_Assembly:GCF_002022765.2\n", " 1 #!gff-spec-version 1.21\n", " 1 #!processor NCBI annotwriter\n", " 1 ###\n", " 1 ##gff-version 3\n", " 1 ##sequence-region NC_007175.2 1 17244\n", " 1 ##sequence-region NC_035780.1 1 65668440\n", " 1 ##sequence-region NC_035781.1 1 61752955\n", " 1 ##sequence-region NC_035782.1 1 77061148\n", " 1 ##sequence-region NC_035783.1 1 59691872\n", " 1 ##sequence-region NC_035784.1 1 98698416\n", " 1 ##sequence-region NC_035785.1 1 51258098\n", " 1 ##sequence-region NC_035786.1 1 57830854\n", " 1 ##sequence-region NC_035787.1 1 75944018\n", " 1 ##sequence-region NC_035788.1 1 104168038\n", " 1 ##sequence-region NC_035789.1 1 32650045\n", " 11 ##species https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=6565\n", "645368 CDS\n", "29258 cDNA_match\n", "731916 exon\n", "38838 gene\n", "4750 lnc_RNA\n", "60201 mRNA\n", " 667 pseudogene\n", " 2 rRNA\n", " 11 region\n", " 587 tRNA\n", "1674 transcript\n" ] } ], "source": [ "#Count the number of unique features in the GFF\n", "!cut -f3 GCF_002022765.2_C_virginica-3.0_genomic.gff | sort | uniq -c" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 2b. Chromosome lengths" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ ">NC_035780.1 Crassostrea virginica isolate RU13XGHG1-28 chromosome 1, C_virginica-3.0, whole genome shotgun sequence\r\n", "tgacacatatataaagttgaagTCCATACGTAAGAAACTCTGTGAGATATTAACCGAAAACCTTTTGAATCTTTacgaaa\r\n", "aatatacatgttgcGCCAACTGGCGTAAATCAAAACCGGAAGCAGTAAGCATGTCGTGTTTAGTGTCTATCAAATGGACC\r\n", "GGGGGAGTTCTAGTACATATCCAAAGATAAGGGCAATACATAAAATACTCGCAAAGTTATTGACCGtcaaagttgatgta\r\n", "cttttagaaaaaaataatggaaaatgtggcTTTAGTGGAACGGCatcaatgtaaatttaaaatagcaggGTTTGCGTTTG\r\n", "AATTAAAACATCGTGTTTGGTGTCGTTAGAAAGGTCTATTCCAGTTCTATAACATATCTAAAGGTCAGGTCAATCCgtta\r\n", "atttataaaagagaTATGGGCGATCGCGGCATGAGTACTACATGACACAGAGTTACTTGCTCTTTGCTACTTCAGCGTTT\r\n", "CCGGAAGCGTAGTTTTTTTCGTGCGTTTATTCCTTGCAGATAGCCAAGCAATTCCACAAGAATTGAACTCATTTGGCATT\r\n", "AaacttcttgaaaaaataacaaattctttcttttctcatatCAGGTGGATgtcgagtactttgattctaccgggcataga\r\n", "gtagcaacagtactttcagtaccgtgattttgctaccaatcataggactgaaagtactgtgtatagaccaatcacagtac\r\n" ] } ], "source": [ "#Obtained the full FASTA from this link: https://www.ncbi.nlm.nih.gov/genome/?term=txid6565[orgn]\n", "#Check information\n", "!head GCF_002022765.2_C_virginica-3.0_genomic.fna" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "#Extract chr and sequence length information\n", "!awk '$0 ~ \">\" {print c; c=0;printf substr($0,2,14) \"\\t\"; } $0 !~ \">\" {c+=length($0);} END { print c; }' \\\n", "GCF_002022765.2_C_virginica-3.0_genomic.fna \\\n", "| sed 's/Cr//g' \\\n", "| awk '{print $1\"\\t\"$2}' \\\n", "| tail -n +2 \\\n", "> C_virginica-3.0-sequence-lengths.txt" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "NC_035780.1\t65668440\r\n", "NC_035781.1\t61752955\r\n", "NC_035782.1\t77061148\r\n", "NC_035783.1\t59691872\r\n", "NC_035784.1\t98698416\r\n", "NC_035785.1\t51258098\r\n", "NC_035786.1\t57830854\r\n", "NC_035787.1\t75944018\r\n", "NC_035788.1\t104168038\r\n", "NC_035789.1\t32650045\r\n", "NC_007175.2\t17244\r\n" ] } ], "source": [ "#11 chr total including mitochondrial information\n", "!head -n11 C_virginica-3.0-sequence-lengths.txt" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [], "source": [ "#New file with chr names only\n", "!cut -f1 C_virginica-3.0-sequence-lengths.txt \\\n", "> C_virginica-3.0-chr.txt" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 2. Generate genome feature tracks" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "I will extract CDS, exon, gene, lncRNA and mRNA tracks. I can then use those existing tracks to produce intron and intergenic tracks, as well as 1 kb upstream and downstream flanking regions with `bedtools`. I will also use the RepeatMasker output from NCBI for my transposable element track." ] }, { "cell_type": "code", "execution_count": 31, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "bedtools v2.30.0\r\n" ] } ], "source": [ "!{bedtoolsDirectory}/bedtools --version" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 2a. Gene" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "#Isolate gene entries from multiple annotation databses. Tab mus be included between database and feature\n", "#Sort output for downstream use\n", "#Include chromosome name information\n", "!grep -e \"Gnomon\tgene\" -e \"RefSeq\tgene\" -e \"tRNAscan-SE\tgene\" \\\n", "GCF_002022765.2_C_virginica-3.0_genomic.gff \\\n", "| {bedtoolsDirectory}/sortBed \\\n", "-faidx C_virginica-3.0-chr.txt \\\n", "> C_virginica-3.0-gene.gff" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "NC_035780.1\tGnomon\tgene\t13578\t14594\t.\t+\t.\tID=gene-LOC111116054;Dbxref=GeneID:111116054;Name=LOC111116054;gbkey=Gene;gene=LOC111116054;gene_biotype=lncRNA\n", "NC_035780.1\tGnomon\tgene\t28961\t33324\t.\t+\t.\tID=gene-LOC111126949;Dbxref=GeneID:111126949;Name=LOC111126949;gbkey=Gene;gene=LOC111126949;gene_biotype=protein_coding\n", "NC_035780.1\tGnomon\tgene\t43111\t66897\t.\t-\t.\tID=gene-LOC111110729;Dbxref=GeneID:111110729;Name=LOC111110729;gbkey=Gene;gene=LOC111110729;gene_biotype=protein_coding\n", "NC_035780.1\tGnomon\tgene\t85606\t95254\t.\t-\t.\tID=gene-LOC111112434;Dbxref=GeneID:111112434;Name=LOC111112434;gbkey=Gene;gene=LOC111112434;gene_biotype=protein_coding\n", "NC_035780.1\tGnomon\tgene\t99840\t106460\t.\t+\t.\tID=gene-LOC111120752;Dbxref=GeneID:111120752;Name=LOC111120752;gbkey=Gene;gene=LOC111120752;gene_biotype=protein_coding\n", "NC_035780.1\tGnomon\tgene\t108305\t110077\t.\t-\t.\tID=gene-LOC111128944;Dbxref=GeneID:111128944;Name=LOC111128944;gbkey=Gene;gene=LOC111128944;gene_biotype=protein_coding;partial=true;start_range=.,108305\n", "NC_035780.1\tGnomon\tgene\t151859\t157536\t.\t+\t.\tID=gene-LOC111128953;Dbxref=GeneID:111128953;Name=LOC111128953;gbkey=Gene;gene=LOC111128953;gene_biotype=protein_coding\n", "NC_035780.1\tGnomon\tgene\t163809\t183798\t.\t-\t.\tID=gene-LOC111105691;Dbxref=GeneID:111105691;Name=LOC111105691;gbkey=Gene;gene=LOC111105691;gene_biotype=protein_coding\n", "NC_035780.1\tGnomon\tgene\t164820\t166793\t.\t+\t.\tID=gene-LOC111105685;Dbxref=GeneID:111105685;Name=LOC111105685;gbkey=Gene;gene=LOC111105685;gene_biotype=protein_coding\n", "NC_035780.1\tGnomon\tgene\t169468\t170178\t.\t-\t.\tID=gene-LOC111105702;Dbxref=GeneID:111105702;Name=LOC111105702;gbkey=Gene;gene=LOC111105702;gene_biotype=lncRNA\n", " 38838 C_virginica-3.0-gene.gff\n" ] } ], "source": [ "!head C_virginica-3.0-gene.gff\n", "!wc -l C_virginica-3.0-gene.gff" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 2b. CDS" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "!grep -e \"Gnomon\tCDS\" -e \"RefSeq\tCDS\" -e \"tRNAscan-SE\tCDS\" \\\n", "GCF_002022765.2_C_virginica-3.0_genomic.gff \\\n", "| {bedtoolsDirectory}/sortBed \\\n", "-faidx C_virginica-3.0-chr.txt \\\n", "> C_virginica-3.0-CDS.gff" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "NC_035780.1\tGnomon\tCDS\t30535\t31557\t.\t+\t0\tID=cds-XP_022327646.1;Parent=rna-XM_022471938.1;Dbxref=GeneID:111126949,Genbank:XP_022327646.1;Name=XP_022327646.1;gbkey=CDS;gene=LOC111126949;product=UNC5C-like protein;protein_id=XP_022327646.1\n", "NC_035780.1\tGnomon\tCDS\t31736\t31887\t.\t+\t0\tID=cds-XP_022327646.1;Parent=rna-XM_022471938.1;Dbxref=GeneID:111126949,Genbank:XP_022327646.1;Name=XP_022327646.1;gbkey=CDS;gene=LOC111126949;product=UNC5C-like protein;protein_id=XP_022327646.1\n", "NC_035780.1\tGnomon\tCDS\t31977\t32565\t.\t+\t1\tID=cds-XP_022327646.1;Parent=rna-XM_022471938.1;Dbxref=GeneID:111126949,Genbank:XP_022327646.1;Name=XP_022327646.1;gbkey=CDS;gene=LOC111126949;product=UNC5C-like protein;protein_id=XP_022327646.1\n", "NC_035780.1\tGnomon\tCDS\t32959\t33204\t.\t+\t0\tID=cds-XP_022327646.1;Parent=rna-XM_022471938.1;Dbxref=GeneID:111126949,Genbank:XP_022327646.1;Name=XP_022327646.1;gbkey=CDS;gene=LOC111126949;product=UNC5C-like protein;protein_id=XP_022327646.1\n", "NC_035780.1\tGnomon\tCDS\t43262\t44358\t.\t-\t2\tID=cds-XP_022303032.1;Parent=rna-XM_022447324.1;Dbxref=GeneID:111110729,Genbank:XP_022303032.1;Name=XP_022303032.1;gbkey=CDS;gene=LOC111110729;product=FMRFamide receptor-like isoform X1;protein_id=XP_022303032.1\n", "NC_035780.1\tGnomon\tCDS\t43262\t44358\t.\t-\t2\tID=cds-XP_022303041.1;Parent=rna-XM_022447333.1;Dbxref=GeneID:111110729,Genbank:XP_022303041.1;Name=XP_022303041.1;gbkey=CDS;gene=LOC111110729;product=FMRFamide receptor-like isoform X2;protein_id=XP_022303041.1\n", "NC_035780.1\tGnomon\tCDS\t45913\t45997\t.\t-\t0\tID=cds-XP_022303041.1;Parent=rna-XM_022447333.1;Dbxref=GeneID:111110729,Genbank:XP_022303041.1;Name=XP_022303041.1;gbkey=CDS;gene=LOC111110729;product=FMRFamide receptor-like isoform X2;protein_id=XP_022303041.1\n", "NC_035780.1\tGnomon\tCDS\t64123\t64219\t.\t-\t0\tID=cds-XP_022303032.1;Parent=rna-XM_022447324.1;Dbxref=GeneID:111110729,Genbank:XP_022303032.1;Name=XP_022303032.1;gbkey=CDS;gene=LOC111110729;product=FMRFamide receptor-like isoform X1;protein_id=XP_022303032.1\n", "NC_035780.1\tGnomon\tCDS\t85616\t85777\t.\t-\t0\tID=cds-XP_022305632.1;Parent=rna-XM_022449924.1;Dbxref=GeneID:111112434,Genbank:XP_022305632.1;Name=XP_022305632.1;gbkey=CDS;gene=LOC111112434;product=homeobox protein Hox-B7-like;protein_id=XP_022305632.1\n", "NC_035780.1\tGnomon\tCDS\t88423\t88589\t.\t-\t2\tID=cds-XP_022305632.1;Parent=rna-XM_022449924.1;Dbxref=GeneID:111112434,Genbank:XP_022305632.1;Name=XP_022305632.1;gbkey=CDS;gene=LOC111112434;product=homeobox protein Hox-B7-like;protein_id=XP_022305632.1\n", " 645368 C_virginica-3.0-CDS.gff\n" ] } ], "source": [ "!head C_virginica-3.0-CDS.gff\n", "!wc -l C_virginica-3.0-CDS.gff" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 2c. Exon" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "!grep -e \"Gnomon\texon\" -e \"RefSeq\texon\" -e \"tRNAscan-SE\texon\" \\\n", "GCF_002022765.2_C_virginica-3.0_genomic.gff \\\n", "| {bedtoolsDirectory}/sortBed \\\n", "-faidx C_virginica-3.0-chr.txt \\\n", "> C_virginica-3.0-exon.gff" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "NC_035780.1\tGnomon\texon\t13578\t13603\t.\t+\t.\tID=exon-XR_002636969.1-1;Parent=rna-XR_002636969.1;Dbxref=GeneID:111116054,Genbank:XR_002636969.1;gbkey=ncRNA;gene=LOC111116054;product=uncharacterized LOC111116054;transcript_id=XR_002636969.1\n", "NC_035780.1\tGnomon\texon\t14237\t14290\t.\t+\t.\tID=exon-XR_002636969.1-2;Parent=rna-XR_002636969.1;Dbxref=GeneID:111116054,Genbank:XR_002636969.1;gbkey=ncRNA;gene=LOC111116054;product=uncharacterized LOC111116054;transcript_id=XR_002636969.1\n", "NC_035780.1\tGnomon\texon\t14557\t14594\t.\t+\t.\tID=exon-XR_002636969.1-3;Parent=rna-XR_002636969.1;Dbxref=GeneID:111116054,Genbank:XR_002636969.1;gbkey=ncRNA;gene=LOC111116054;product=uncharacterized LOC111116054;transcript_id=XR_002636969.1\n", "NC_035780.1\tGnomon\texon\t28961\t29073\t.\t+\t.\tID=exon-XM_022471938.1-1;Parent=rna-XM_022471938.1;Dbxref=GeneID:111126949,Genbank:XM_022471938.1;gbkey=mRNA;gene=LOC111126949;product=UNC5C-like protein;transcript_id=XM_022471938.1\n", "NC_035780.1\tGnomon\texon\t30524\t31557\t.\t+\t.\tID=exon-XM_022471938.1-2;Parent=rna-XM_022471938.1;Dbxref=GeneID:111126949,Genbank:XM_022471938.1;gbkey=mRNA;gene=LOC111126949;product=UNC5C-like protein;transcript_id=XM_022471938.1\n", "NC_035780.1\tGnomon\texon\t31736\t31887\t.\t+\t.\tID=exon-XM_022471938.1-3;Parent=rna-XM_022471938.1;Dbxref=GeneID:111126949,Genbank:XM_022471938.1;gbkey=mRNA;gene=LOC111126949;product=UNC5C-like protein;transcript_id=XM_022471938.1\n", "NC_035780.1\tGnomon\texon\t31977\t32565\t.\t+\t.\tID=exon-XM_022471938.1-4;Parent=rna-XM_022471938.1;Dbxref=GeneID:111126949,Genbank:XM_022471938.1;gbkey=mRNA;gene=LOC111126949;product=UNC5C-like protein;transcript_id=XM_022471938.1\n", "NC_035780.1\tGnomon\texon\t32959\t33324\t.\t+\t.\tID=exon-XM_022471938.1-5;Parent=rna-XM_022471938.1;Dbxref=GeneID:111126949,Genbank:XM_022471938.1;gbkey=mRNA;gene=LOC111126949;product=UNC5C-like protein;transcript_id=XM_022471938.1\n", "NC_035780.1\tGnomon\texon\t43111\t44358\t.\t-\t.\tID=exon-XM_022447324.1-3;Parent=rna-XM_022447324.1;Dbxref=GeneID:111110729,Genbank:XM_022447324.1;gbkey=mRNA;gene=LOC111110729;product=FMRFamide receptor-like%2C transcript variant X1;transcript_id=XM_022447324.1\n", "NC_035780.1\tGnomon\texon\t43111\t44358\t.\t-\t.\tID=exon-XM_022447333.1-2;Parent=rna-XM_022447333.1;Dbxref=GeneID:111110729,Genbank:XM_022447333.1;gbkey=mRNA;gene=LOC111110729;product=FMRFamide receptor-like%2C transcript variant X2;transcript_id=XM_022447333.1\n", " 731916 C_virginica-3.0-exon.gff\n" ] } ], "source": [ "!head C_virginica-3.0-exon.gff\n", "!wc -l C_virginica-3.0-exon.gff" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 2d. lncRNA" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "!grep -e \"Gnomon\tlnc_RNA\" -e \"RefSeq\tlnc_RNA\" -e \"tRNAscan-SE\tlnc_RNA\" \\\n", "GCF_002022765.2_C_virginica-3.0_genomic.gff \\\n", "| {bedtoolsDirectory}/sortBed \\\n", "-faidx C_virginica-3.0-chr.txt \\\n", "> C_virginica-3.0-lncRNA.gff" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "NC_035780.1\tGnomon\tlnc_RNA\t13578\t14594\t.\t+\t.\tID=rna-XR_002636969.1;Parent=gene-LOC111116054;Dbxref=GeneID:111116054,Genbank:XR_002636969.1;Name=XR_002636969.1;gbkey=ncRNA;gene=LOC111116054;model_evidence=Supporting evidence includes similarity to: 100%25 coverage of the annotated genomic feature by RNAseq alignments%2C including 1 sample with support for all annotated introns;product=uncharacterized LOC111116054;transcript_id=XR_002636969.1\n", "NC_035780.1\tGnomon\tlnc_RNA\t169468\t170178\t.\t-\t.\tID=rna-XR_002635081.1;Parent=gene-LOC111105702;Dbxref=GeneID:111105702,Genbank:XR_002635081.1;Name=XR_002635081.1;gbkey=ncRNA;gene=LOC111105702;model_evidence=Supporting evidence includes similarity to: 100%25 coverage of the annotated genomic feature by RNAseq alignments%2C including 3 samples with support for all annotated introns;product=uncharacterized LOC111105702;transcript_id=XR_002635081.1\n", "NC_035780.1\tGnomon\tlnc_RNA\t900326\t903430\t.\t+\t.\tID=rna-XR_002636046.1;Parent=gene-LOC111111519;Dbxref=GeneID:111111519,Genbank:XR_002636046.1;Name=XR_002636046.1;gbkey=ncRNA;gene=LOC111111519;model_evidence=Supporting evidence includes similarity to: 100%25 coverage of the annotated genomic feature by RNAseq alignments%2C including 20 samples with support for all annotated introns;product=uncharacterized LOC111111519;transcript_id=XR_002636046.1\n", "NC_035780.1\tGnomon\tlnc_RNA\t1280831\t1282416\t.\t-\t.\tID=rna-XR_002638148.1;Parent=gene-LOC111124195;Dbxref=GeneID:111124195,Genbank:XR_002638148.1;Name=XR_002638148.1;gbkey=ncRNA;gene=LOC111124195;model_evidence=Supporting evidence includes similarity to: 100%25 coverage of the annotated genomic feature by RNAseq alignments%2C including 1 sample with support for all annotated introns;product=uncharacterized LOC111124195;transcript_id=XR_002638148.1\n", "NC_035780.1\tGnomon\tlnc_RNA\t1432944\t1458091\t.\t+\t.\tID=rna-XR_002639675.1;Parent=gene-LOC111135942;Dbxref=GeneID:111135942,Genbank:XR_002639675.1;Name=XR_002639675.1;gbkey=ncRNA;gene=LOC111135942;model_evidence=Supporting evidence includes similarity to: 100%25 coverage of the annotated genomic feature by RNAseq alignments%2C including 4 samples with support for all annotated introns;product=uncharacterized LOC111135942;transcript_id=XR_002639675.1\n", "NC_035780.1\tGnomon\tlnc_RNA\t1503802\t1513830\t.\t-\t.\tID=rna-XR_002636574.1;Parent=gene-LOC111114441;Dbxref=GeneID:111114441,Genbank:XR_002636574.1;Name=XR_002636574.1;gbkey=ncRNA;gene=LOC111114441;model_evidence=Supporting evidence includes similarity to: 100%25 coverage of the annotated genomic feature by RNAseq alignments%2C including 2 samples with support for all annotated introns;product=uncharacterized LOC111114441;transcript_id=XR_002636574.1\n", "NC_035780.1\tGnomon\tlnc_RNA\t1856841\t1863697\t.\t-\t.\tID=rna-XR_002636863.1;Parent=gene-LOC111115591;Dbxref=GeneID:111115591,Genbank:XR_002636863.1;Name=XR_002636863.1;gbkey=ncRNA;gene=LOC111115591;model_evidence=Supporting evidence includes similarity to: 100%25 coverage of the annotated genomic feature by RNAseq alignments%2C including 1 sample with support for all annotated introns;product=uncharacterized LOC111115591%2C transcript variant X1;transcript_id=XR_002636863.1\n", "NC_035780.1\tGnomon\tlnc_RNA\t1856841\t1863683\t.\t-\t.\tID=rna-XR_002636864.1;Parent=gene-LOC111115591;Dbxref=GeneID:111115591,Genbank:XR_002636864.1;Name=XR_002636864.1;gbkey=ncRNA;gene=LOC111115591;model_evidence=Supporting evidence includes similarity to: 100%25 coverage of the annotated genomic feature by RNAseq alignments;product=uncharacterized LOC111115591%2C transcript variant X2;transcript_id=XR_002636864.1\n", "NC_035780.1\tGnomon\tlnc_RNA\t2161223\t2166803\t.\t+\t.\tID=rna-XR_002635698.1;Parent=gene-LOC111109763;Dbxref=GeneID:111109763,Genbank:XR_002635698.1;Name=XR_002635698.1;gbkey=ncRNA;gene=LOC111109763;model_evidence=Supporting evidence includes similarity to: 100%25 coverage of the annotated genomic feature by RNAseq alignments%2C including 23 samples with support for all annotated introns;product=uncharacterized LOC111109763;transcript_id=XR_002635698.1\n", "NC_035780.1\tGnomon\tlnc_RNA\t2928484\t2930094\t.\t-\t.\tID=rna-XR_002637875.1;Parent=gene-LOC111122009;Dbxref=GeneID:111122009,Genbank:XR_002637875.1;Name=XR_002637875.1;gbkey=ncRNA;gene=LOC111122009;model_evidence=Supporting evidence includes similarity to: 100%25 coverage of the annotated genomic feature by RNAseq alignments%2C including 5 samples with support for all annotated introns;product=uncharacterized LOC111122009;transcript_id=XR_002637875.1\n", " 4750 C_virginica-3.0-lncRNA.gff\n" ] } ], "source": [ "!head C_virginica-3.0-lncRNA.gff\n", "!wc -l C_virginica-3.0-lncRNA.gff" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 2e. mRNA" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "!grep -e \"Gnomon\tmRNA\" -e \"RefSeq\tmRNA\" -e \"tRNAscan-SE\tmRNA\" \\\n", "GCF_002022765.2_C_virginica-3.0_genomic.gff \\\n", "| {bedtoolsDirectory}/sortBed \\\n", "-faidx C_virginica-3.0-chr.txt \\\n", "> C_virginica-3.0-mRNA.gff" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "NC_035780.1\tGnomon\tmRNA\t28961\t33324\t.\t+\t.\tID=rna-XM_022471938.1;Parent=gene-LOC111126949;Dbxref=GeneID:111126949,Genbank:XM_022471938.1;Name=XM_022471938.1;gbkey=mRNA;gene=LOC111126949;model_evidence=Supporting evidence includes similarity to: 3 Proteins%2C and 100%25 coverage of the annotated genomic feature by RNAseq alignments%2C including 21 samples with support for all annotated introns;product=UNC5C-like protein;transcript_id=XM_022471938.1\n", "NC_035780.1\tGnomon\tmRNA\t43111\t66897\t.\t-\t.\tID=rna-XM_022447324.1;Parent=gene-LOC111110729;Dbxref=GeneID:111110729,Genbank:XM_022447324.1;Name=XM_022447324.1;gbkey=mRNA;gene=LOC111110729;model_evidence=Supporting evidence includes similarity to: 1 Protein%2C and 100%25 coverage of the annotated genomic feature by RNAseq alignments;product=FMRFamide receptor-like%2C transcript variant X1;transcript_id=XM_022447324.1\n", "NC_035780.1\tGnomon\tmRNA\t43111\t46506\t.\t-\t.\tID=rna-XM_022447333.1;Parent=gene-LOC111110729;Dbxref=GeneID:111110729,Genbank:XM_022447333.1;Name=XM_022447333.1;gbkey=mRNA;gene=LOC111110729;model_evidence=Supporting evidence includes similarity to: 1 Protein%2C and 100%25 coverage of the annotated genomic feature by RNAseq alignments%2C including 14 samples with support for all annotated introns;product=FMRFamide receptor-like%2C transcript variant X2;transcript_id=XM_022447333.1\n", "NC_035780.1\tGnomon\tmRNA\t85606\t95254\t.\t-\t.\tID=rna-XM_022449924.1;Parent=gene-LOC111112434;Dbxref=GeneID:111112434,Genbank:XM_022449924.1;Name=XM_022449924.1;gbkey=mRNA;gene=LOC111112434;model_evidence=Supporting evidence includes similarity to: 7 Proteins%2C and 100%25 coverage of the annotated genomic feature by RNAseq alignments%2C including 13 samples with support for all annotated introns;product=homeobox protein Hox-B7-like;transcript_id=XM_022449924.1\n", "NC_035780.1\tGnomon\tmRNA\t99840\t106460\t.\t+\t.\tID=rna-XM_022461698.1;Parent=gene-LOC111120752;Dbxref=GeneID:111120752,Genbank:XM_022461698.1;Name=XM_022461698.1;gbkey=mRNA;gene=LOC111120752;model_evidence=Supporting evidence includes similarity to: 10 Proteins%2C and 100%25 coverage of the annotated genomic feature by RNAseq alignments%2C including 27 samples with support for all annotated introns;product=ribulose-phosphate 3-epimerase-like;transcript_id=XM_022461698.1\n", "NC_035780.1\tGnomon\tmRNA\t108305\t110077\t.\t-\t.\tID=rna-XM_022474921.1;Parent=gene-LOC111128944;Dbxref=GeneID:111128944,Genbank:XM_022474921.1;Name=XM_022474921.1;gbkey=mRNA;gene=LOC111128944;model_evidence=Supporting evidence includes similarity to: 2 Proteins%2C and 93%25 coverage of the annotated genomic feature by RNAseq alignments;partial=true;product=mucin-19-like;start_range=.,108305;transcript_id=XM_022474921.1\n", "NC_035780.1\tGnomon\tmRNA\t151859\t157536\t.\t+\t.\tID=rna-XM_022474931.1;Parent=gene-LOC111128953;Dbxref=GeneID:111128953,Genbank:XM_022474931.1;Name=XM_022474931.1;gbkey=mRNA;gene=LOC111128953;model_evidence=Supporting evidence includes similarity to: 1 Protein;product=GATA zinc finger domain-containing protein 14-like;transcript_id=XM_022474931.1\n", "NC_035780.1\tGnomon\tmRNA\t163809\t183798\t.\t-\t.\tID=rna-XM_022440054.1;Parent=gene-LOC111105691;Dbxref=GeneID:111105691,Genbank:XM_022440054.1;Name=XM_022440054.1;gbkey=mRNA;gene=LOC111105691;model_evidence=Supporting evidence includes similarity to: 100%25 coverage of the annotated genomic feature by RNAseq alignments%2C including 9 samples with support for all annotated introns;product=uncharacterized LOC111105691;transcript_id=XM_022440054.1\n", "NC_035780.1\tGnomon\tmRNA\t164820\t166793\t.\t+\t.\tID=rna-XM_022440042.1;Parent=gene-LOC111105685;Dbxref=GeneID:111105685,Genbank:XM_022440042.1;Name=XM_022440042.1;gbkey=mRNA;gene=LOC111105685;model_evidence=Supporting evidence includes similarity to: 100%25 coverage of the annotated genomic feature by RNAseq alignments%2C including 4 samples with support for all annotated introns;product=protein ANTAGONIST OF LIKE HETEROCHROMATIN PROTEIN 1-like;transcript_id=XM_022440042.1\n", "NC_035780.1\tGnomon\tmRNA\t190449\t193594\t.\t-\t.\tID=rna-XM_022482070.1;Parent=gene-LOC111133554;Dbxref=GeneID:111133554,Genbank:XM_022482070.1;Name=XM_022482070.1;gbkey=mRNA;gene=LOC111133554;model_evidence=Supporting evidence includes similarity to: 100%25 coverage of the annotated genomic feature by RNAseq alignments%2C including 3 samples with support for all annotated introns;product=putative uncharacterized protein DDB_G0277407;transcript_id=XM_022482070.1\n", " 60201 C_virginica-3.0-mRNA.gff\n" ] } ], "source": [ "!head C_virginica-3.0-mRNA.gff\n", "!wc -l C_virginica-3.0-mRNA.gff" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 2f. Non-coding sequences" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "#Find the complement to the exon track (non-coding sequences)\n", "#Create a BEDfile of IGV\n", "!{bedtoolsDirectory}/complementBed \\\n", "-i C_virginica-3.0-exon.gff \\\n", "-g C_virginica-3.0-sequence-lengths.txt \\\n", "> C_virginica-3.0-nonCDS.bed" ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "NC_035780.1\t0\t13577\n", "NC_035780.1\t13603\t14236\n", "NC_035780.1\t14290\t14556\n", "NC_035780.1\t14594\t28960\n", "NC_035780.1\t29073\t30523\n", "NC_035780.1\t31557\t31735\n", "NC_035780.1\t31887\t31976\n", "NC_035780.1\t32565\t32958\n", "NC_035780.1\t33324\t43110\n", "NC_035780.1\t44358\t45912\n", " 337305 C_virginica-3.0-nonCDS.bed\n" ] } ], "source": [ "!head C_virginica-3.0-nonCDS.bed\n", "!wc -l C_virginica-3.0-nonCDS.bed" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 2g. Intron" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [], "source": [ "#Find the intersection between the non-coding sequences and genes (introns)\n", "!{bedtoolsDirectory}/intersectBed \\\n", "-a C_virginica-3.0-nonCDS.bed \\\n", "-b C_virginica-3.0-gene.gff -sorted \\\n", "> C_virginica-3.0-intron.bed" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "NC_035780.1\t13603\t14236\n", "NC_035780.1\t14290\t14556\n", "NC_035780.1\t29073\t30523\n", "NC_035780.1\t31557\t31735\n", "NC_035780.1\t31887\t31976\n", "NC_035780.1\t32565\t32958\n", "NC_035780.1\t44358\t45912\n", "NC_035780.1\t46506\t64122\n", "NC_035780.1\t64334\t66868\n", "NC_035780.1\t85777\t88422\n", " 311341 C_virginica-3.0-intron.bed\n" ] } ], "source": [ "!head C_virginica-3.0-intron.bed\n", "!wc -l C_virginica-3.0-intron.bed" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 2h. Untranslated regions of exons" ] }, { "cell_type": "code", "execution_count": 20, "metadata": { "scrolled": true }, "outputs": [], "source": [ "#Obtain UTRs by subtracting CDS from exons\n", "!{bedtoolsDirectory}/subtractBed \\\n", "-a C_virginica-3.0-exon.gff \\\n", "-b C_virginica-3.0-CDS.gff \\\n", "-sorted \\\n", "-g C_virginica-3.0-sequence-lengths.txt \\\n", "> C_virginica-3.0-exonUTR.gff" ] }, { "cell_type": "code", "execution_count": 21, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "NC_035780.1\tGnomon\texon\t13578\t13603\t.\t+\t.\tID=exon-XR_002636969.1-1;Parent=rna-XR_002636969.1;Dbxref=GeneID:111116054,Genbank:XR_002636969.1;gbkey=ncRNA;gene=LOC111116054;product=uncharacterized LOC111116054;transcript_id=XR_002636969.1\n", "NC_035780.1\tGnomon\texon\t14237\t14290\t.\t+\t.\tID=exon-XR_002636969.1-2;Parent=rna-XR_002636969.1;Dbxref=GeneID:111116054,Genbank:XR_002636969.1;gbkey=ncRNA;gene=LOC111116054;product=uncharacterized LOC111116054;transcript_id=XR_002636969.1\n", "NC_035780.1\tGnomon\texon\t14557\t14594\t.\t+\t.\tID=exon-XR_002636969.1-3;Parent=rna-XR_002636969.1;Dbxref=GeneID:111116054,Genbank:XR_002636969.1;gbkey=ncRNA;gene=LOC111116054;product=uncharacterized LOC111116054;transcript_id=XR_002636969.1\n", "NC_035780.1\tGnomon\texon\t28961\t29073\t.\t+\t.\tID=exon-XM_022471938.1-1;Parent=rna-XM_022471938.1;Dbxref=GeneID:111126949,Genbank:XM_022471938.1;gbkey=mRNA;gene=LOC111126949;product=UNC5C-like protein;transcript_id=XM_022471938.1\n", "NC_035780.1\tGnomon\texon\t30524\t30534\t.\t+\t.\tID=exon-XM_022471938.1-2;Parent=rna-XM_022471938.1;Dbxref=GeneID:111126949,Genbank:XM_022471938.1;gbkey=mRNA;gene=LOC111126949;product=UNC5C-like protein;transcript_id=XM_022471938.1\n", "NC_035780.1\tGnomon\texon\t33205\t33324\t.\t+\t.\tID=exon-XM_022471938.1-5;Parent=rna-XM_022471938.1;Dbxref=GeneID:111126949,Genbank:XM_022471938.1;gbkey=mRNA;gene=LOC111126949;product=UNC5C-like protein;transcript_id=XM_022471938.1\n", "NC_035780.1\tGnomon\texon\t43111\t43261\t.\t-\t.\tID=exon-XM_022447324.1-3;Parent=rna-XM_022447324.1;Dbxref=GeneID:111110729,Genbank:XM_022447324.1;gbkey=mRNA;gene=LOC111110729;product=FMRFamide receptor-like%2C transcript variant X1;transcript_id=XM_022447324.1\n", "NC_035780.1\tGnomon\texon\t43111\t43261\t.\t-\t.\tID=exon-XM_022447333.1-2;Parent=rna-XM_022447333.1;Dbxref=GeneID:111110729,Genbank:XM_022447333.1;gbkey=mRNA;gene=LOC111110729;product=FMRFamide receptor-like%2C transcript variant X2;transcript_id=XM_022447333.1\n", "NC_035780.1\tGnomon\texon\t45998\t46506\t.\t-\t.\tID=exon-XM_022447333.1-1;Parent=rna-XM_022447333.1;Dbxref=GeneID:111110729,Genbank:XM_022447333.1;gbkey=mRNA;gene=LOC111110729;product=FMRFamide receptor-like%2C transcript variant X2;transcript_id=XM_022447333.1\n", "NC_035780.1\tGnomon\texon\t64220\t64334\t.\t-\t.\tID=exon-XM_022447324.1-2;Parent=rna-XM_022447324.1;Dbxref=GeneID:111110729,Genbank:XM_022447324.1;gbkey=mRNA;gene=LOC111110729;product=FMRFamide receptor-like%2C transcript variant X1;transcript_id=XM_022447324.1\n", " 183389 C_virginica-3.0-exonUTR.gff\n" ] } ], "source": [ "!head C_virginica-3.0-exonUTR.gff\n", "!wc -l C_virginica-3.0-exonUTR.gff" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 2i. Flanking regions (1 kb)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### All flanks" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [], "source": [ "#Create 1 kb flanking regions\n", "#Subtract existing genes from artificial flanks\n", "!{bedtoolsDirectory}/flankBed \\\n", "-i C_virginica-3.0-gene.gff \\\n", "-g C_virginica-3.0-sequence-lengths.txt \\\n", "-b 1000 \\\n", "| {bedtoolsDirectory}/subtractBed \\\n", "-a - \\\n", "-b C_virginica-3.0-gene.gff \\\n", "> C_virginica-3.0-flanks.gff" ] }, { "cell_type": "code", "execution_count": 23, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "NC_035780.1\tGnomon\tgene\t12578\t13577\t.\t+\t.\tID=gene-LOC111116054;Dbxref=GeneID:111116054;Name=LOC111116054;gbkey=Gene;gene=LOC111116054;gene_biotype=lncRNA\n", "NC_035780.1\tGnomon\tgene\t14595\t15594\t.\t+\t.\tID=gene-LOC111116054;Dbxref=GeneID:111116054;Name=LOC111116054;gbkey=Gene;gene=LOC111116054;gene_biotype=lncRNA\n", "NC_035780.1\tGnomon\tgene\t27961\t28960\t.\t+\t.\tID=gene-LOC111126949;Dbxref=GeneID:111126949;Name=LOC111126949;gbkey=Gene;gene=LOC111126949;gene_biotype=protein_coding\n", "NC_035780.1\tGnomon\tgene\t33325\t34324\t.\t+\t.\tID=gene-LOC111126949;Dbxref=GeneID:111126949;Name=LOC111126949;gbkey=Gene;gene=LOC111126949;gene_biotype=protein_coding\n", "NC_035780.1\tGnomon\tgene\t42111\t43110\t.\t-\t.\tID=gene-LOC111110729;Dbxref=GeneID:111110729;Name=LOC111110729;gbkey=Gene;gene=LOC111110729;gene_biotype=protein_coding\n", "NC_035780.1\tGnomon\tgene\t66898\t67897\t.\t-\t.\tID=gene-LOC111110729;Dbxref=GeneID:111110729;Name=LOC111110729;gbkey=Gene;gene=LOC111110729;gene_biotype=protein_coding\n", "NC_035780.1\tGnomon\tgene\t84606\t85605\t.\t-\t.\tID=gene-LOC111112434;Dbxref=GeneID:111112434;Name=LOC111112434;gbkey=Gene;gene=LOC111112434;gene_biotype=protein_coding\n", "NC_035780.1\tGnomon\tgene\t95255\t96254\t.\t-\t.\tID=gene-LOC111112434;Dbxref=GeneID:111112434;Name=LOC111112434;gbkey=Gene;gene=LOC111112434;gene_biotype=protein_coding\n", "NC_035780.1\tGnomon\tgene\t98840\t99839\t.\t+\t.\tID=gene-LOC111120752;Dbxref=GeneID:111120752;Name=LOC111120752;gbkey=Gene;gene=LOC111120752;gene_biotype=protein_coding\n", "NC_035780.1\tGnomon\tgene\t106461\t107460\t.\t+\t.\tID=gene-LOC111120752;Dbxref=GeneID:111120752;Name=LOC111120752;gbkey=Gene;gene=LOC111120752;gene_biotype=protein_coding\n", " 70041 C_virginica-3.0-flanks.gff\n" ] } ], "source": [ "!head C_virginica-3.0-flanks.gff\n", "!wc -l C_virginica-3.0-flanks.gff" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Upstream flanks" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [], "source": [ "#Create 1 kb upstream flanking regions (-l) based on strand (-s)\n", "#Subtract existing genes from artificial flanks\n", "!{bedtoolsDirectory}/flankBed \\\n", "-i C_virginica-3.0-gene.gff \\\n", "-g C_virginica-3.0-sequence-lengths.txt \\\n", "-l 1000 \\\n", "-r 0 \\\n", "-s \\\n", "| {bedtoolsDirectory}/subtractBed \\\n", "-a - \\\n", "-b C_virginica-3.0-gene.gff \\\n", "> C_virginica-3.0-upstream.gff" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "NC_035780.1\tGnomon\tgene\t12578\t13577\t.\t+\t.\tID=gene-LOC111116054;Dbxref=GeneID:111116054;Name=LOC111116054;gbkey=Gene;gene=LOC111116054;gene_biotype=lncRNA\n", "NC_035780.1\tGnomon\tgene\t27961\t28960\t.\t+\t.\tID=gene-LOC111126949;Dbxref=GeneID:111126949;Name=LOC111126949;gbkey=Gene;gene=LOC111126949;gene_biotype=protein_coding\n", "NC_035780.1\tGnomon\tgene\t66898\t67897\t.\t-\t.\tID=gene-LOC111110729;Dbxref=GeneID:111110729;Name=LOC111110729;gbkey=Gene;gene=LOC111110729;gene_biotype=protein_coding\n", "NC_035780.1\tGnomon\tgene\t95255\t96254\t.\t-\t.\tID=gene-LOC111112434;Dbxref=GeneID:111112434;Name=LOC111112434;gbkey=Gene;gene=LOC111112434;gene_biotype=protein_coding\n", "NC_035780.1\tGnomon\tgene\t98840\t99839\t.\t+\t.\tID=gene-LOC111120752;Dbxref=GeneID:111120752;Name=LOC111120752;gbkey=Gene;gene=LOC111120752;gene_biotype=protein_coding\n", "NC_035780.1\tGnomon\tgene\t110078\t111077\t.\t-\t.\tID=gene-LOC111128944;Dbxref=GeneID:111128944;Name=LOC111128944;gbkey=Gene;gene=LOC111128944;gene_biotype=protein_coding;partial=true;start_range=.,108305\n", "NC_035780.1\tGnomon\tgene\t150859\t151858\t.\t+\t.\tID=gene-LOC111128953;Dbxref=GeneID:111128953;Name=LOC111128953;gbkey=Gene;gene=LOC111128953;gene_biotype=protein_coding\n", "NC_035780.1\tGnomon\tgene\t183799\t184798\t.\t-\t.\tID=gene-LOC111105691;Dbxref=GeneID:111105691;Name=LOC111105691;gbkey=Gene;gene=LOC111105691;gene_biotype=protein_coding\n", "NC_035780.1\tGnomon\tgene\t193595\t194594\t.\t-\t.\tID=gene-LOC111133554;Dbxref=GeneID:111133554;Name=LOC111133554;gbkey=Gene;gene=LOC111133554;gene_biotype=protein_coding\n", "NC_035780.1\tGnomon\tgene\t207744\t208743\t.\t-\t.\tID=gene-LOC111125466;Dbxref=GeneID:111125466;Name=LOC111125466;gbkey=Gene;gene=LOC111125466;gene_biotype=protein_coding\n", " 34817 C_virginica-3.0-upstream.gff\n" ] } ], "source": [ "!head C_virginica-3.0-upstream.gff\n", "!wc -l C_virginica-3.0-upstream.gff" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Downstream flanks" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [], "source": [ "#Create 1 kb upstream flanking regions (-l) based on strand (-s)\n", "#Subtract existing genes from artificial flanks\n", "!{bedtoolsDirectory}/flankBed \\\n", "-i C_virginica-3.0-gene.gff \\\n", "-g C_virginica-3.0-sequence-lengths.txt \\\n", "-l 0 \\\n", "-r 1000 \\\n", "-s \\\n", "| {bedtoolsDirectory}/subtractBed \\\n", "-a - \\\n", "-b C_virginica-3.0-gene.gff \\\n", "> C_virginica-3.0-downstream.gff" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "NC_035780.1\tGnomon\tgene\t14595\t15594\t.\t+\t.\tID=gene-LOC111116054;Dbxref=GeneID:111116054;Name=LOC111116054;gbkey=Gene;gene=LOC111116054;gene_biotype=lncRNA\n", "NC_035780.1\tGnomon\tgene\t33325\t34324\t.\t+\t.\tID=gene-LOC111126949;Dbxref=GeneID:111126949;Name=LOC111126949;gbkey=Gene;gene=LOC111126949;gene_biotype=protein_coding\n", "NC_035780.1\tGnomon\tgene\t42111\t43110\t.\t-\t.\tID=gene-LOC111110729;Dbxref=GeneID:111110729;Name=LOC111110729;gbkey=Gene;gene=LOC111110729;gene_biotype=protein_coding\n", "NC_035780.1\tGnomon\tgene\t84606\t85605\t.\t-\t.\tID=gene-LOC111112434;Dbxref=GeneID:111112434;Name=LOC111112434;gbkey=Gene;gene=LOC111112434;gene_biotype=protein_coding\n", "NC_035780.1\tGnomon\tgene\t106461\t107460\t.\t+\t.\tID=gene-LOC111120752;Dbxref=GeneID:111120752;Name=LOC111120752;gbkey=Gene;gene=LOC111120752;gene_biotype=protein_coding\n", "NC_035780.1\tGnomon\tgene\t107305\t108304\t.\t-\t.\tID=gene-LOC111128944;Dbxref=GeneID:111128944;Name=LOC111128944;gbkey=Gene;gene=LOC111128944;gene_biotype=protein_coding;partial=true;start_range=.,108305\n", "NC_035780.1\tGnomon\tgene\t157537\t158536\t.\t+\t.\tID=gene-LOC111128953;Dbxref=GeneID:111128953;Name=LOC111128953;gbkey=Gene;gene=LOC111128953;gene_biotype=protein_coding\n", "NC_035780.1\tGnomon\tgene\t162809\t163808\t.\t-\t.\tID=gene-LOC111105691;Dbxref=GeneID:111105691;Name=LOC111105691;gbkey=Gene;gene=LOC111105691;gene_biotype=protein_coding\n", "NC_035780.1\tGnomon\tgene\t189449\t190448\t.\t-\t.\tID=gene-LOC111133554;Dbxref=GeneID:111133554;Name=LOC111133554;gbkey=Gene;gene=LOC111133554;gene_biotype=protein_coding\n", "NC_035780.1\tGnomon\tgene\t203243\t204242\t.\t-\t.\tID=gene-LOC111125466;Dbxref=GeneID:111125466;Name=LOC111125466;gbkey=Gene;gene=LOC111125466;gene_biotype=protein_coding\n", " 35224 C_virginica-3.0-downstream.gff\n" ] } ], "source": [ "!head C_virginica-3.0-downstream.gff\n", "!wc -l C_virginica-3.0-downstream.gff" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 2j. Intergenic regions" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [], "source": [ "#Find the complement of genes, then subtract flanks to obtain intergenic regions\n", "!{bedtoolsDirectory}/complementBed \\\n", "-i C_virginica-3.0-gene.gff -sorted \\\n", "-g C_virginica-3.0-sequence-lengths.txt \\\n", "| {bedtoolsDirectory}/subtractBed \\\n", "-a - \\\n", "-b C_virginica-3.0-flanks.gff \\\n", "> C_virginica-3.0-intergenic.bed" ] }, { "cell_type": "code", "execution_count": 29, "metadata": { "scrolled": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "NC_035780.1\t0\t12577\n", "NC_035780.1\t15594\t27960\n", "NC_035780.1\t34324\t42110\n", "NC_035780.1\t67897\t84605\n", "NC_035780.1\t96254\t98839\n", "NC_035780.1\t111077\t150858\n", "NC_035780.1\t158536\t162808\n", "NC_035780.1\t184798\t189448\n", "NC_035780.1\t194594\t203242\n", "NC_035780.1\t208743\t213890\n", " 23949 C_virginica-3.0-intergenic.bed\n" ] } ], "source": [ "!head C_virginica-3.0-intergenic.bed\n", "!wc -l C_virginica-3.0-intergenic.bed" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 2k. Transposable elements" ] }, { "cell_type": "code", "execution_count": 30, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " % Total % Received % Xferd Average Speed Time Time Time Current\n", " Dload Upload Total Spent Left Speed\n", "100 8847k 100 8847k 0 0 3621k 0 0:00:02 0:00:02 --:--:-- 3623k\n" ] } ], "source": [ "!curl https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/002/022/765/GCF_002022765.2_C_virginica-3.0/GCF_002022765.2_C_virginica-3.0_rm.out.gz \\\n", "> C_virginica-3.0-rm.te.gz" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [], "source": [ "!gunzip -k C_virginica-3.0-rm.te.gz" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " SW perc perc perc query position in query matching repeat position in repeat\r\n", "score div. del. ins. sequence begin end (left) repeat class/family begin end (left) ID\r\n", "\r\n", " 71 0.000 0.000 0.000 NC_035780.1 1473 1535 (65666905) + (TAACCC)n Simple_repeat 1 63 (0) 1\r\n", " 13 14.100 0.000 6.100 NC_035780.1 8261 8295 (65660145) + (CTCCT)n Simple_repeat 1 33 (0) 2\r\n", " 23 18.900 0.000 0.000 NC_035780.1 10552 10600 (65657840) + (TGAA)n Simple_repeat 1 49 (0) 3\r\n", " 37 0.000 0.000 0.000 NC_035780.1 11265 11298 (65657142) + (AAG)n Simple_repeat 1 34 (0) 4\r\n", " 72 0.000 0.000 0.000 NC_035780.1 12211 12271 (65656169) + (AG)n Simple_repeat 1 61 (0) 5\r\n", " 14 21.400 7.400 0.000 NC_035780.1 15431 15484 (65652956) + (TGTATG)n Simple_repeat 1 58 (0) 6\r\n", " 34 0.000 3.000 0.000 NC_035780.1 15520 15552 (65652888) + (GA)n Simple_repeat 1 34 (0) 7\r\n" ] } ], "source": [ "!head C_virginica-3.0-rm.te" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [], "source": [ "#Convert RepeatMasker output to a BEDfile\n", "#Skip the first 4 lines \n", "#Print columns 5-7 as a tab-delimited output\n", "!tail -n +4 C_virginica-3.0-rm.te \\\n", "| awk 'BEGIN{OFS= \"\\t\"} {print $5, $6, $7}' \\\n", "> C_virginica-3.0-rm.te.bed" ] }, { "cell_type": "code", "execution_count": 34, "metadata": { "scrolled": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "NC_035780.1\t1473\t1535\n", "NC_035780.1\t8261\t8295\n", "NC_035780.1\t10552\t10600\n", "NC_035780.1\t11265\t11298\n", "NC_035780.1\t12211\t12271\n", "NC_035780.1\t15431\t15484\n", "NC_035780.1\t15520\t15552\n", "NC_035780.1\t15585\t15619\n", "NC_035780.1\t16397\t16434\n", "NC_035780.1\t16631\t16653\n", " 344267 C_virginica-3.0-rm.te.bed\n" ] } ], "source": [ "!head C_virginica-3.0-rm.te.bed\n", "!wc -l C_virginica-3.0-rm.te.bed" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 2k. C->T SNPs" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "../output/methylation-landscape/12M_R1_val_1_bismark_bt2_pe.SNP-results.vcf\r\n", "../output/methylation-landscape/13M_R1_val_1_bismark_bt2_pe.SNP-results.vcf\r\n", "../output/methylation-landscape/16F_R1_val_1_bismark_bt2_pe.SNP-results.vcf\r\n", "../output/methylation-landscape/19F_R1_val_1_bismark_bt2_pe.SNP-results.vcf\r\n", "../output/methylation-landscape/22F_R1_val_1_bismark_bt2_pe.SNP-results.vcf\r\n", "../output/methylation-landscape/23M_R1_val_1_bismark_bt2_pe.SNP-results.vcf\r\n", "../output/methylation-landscape/29F_R1_val_1_bismark_bt2_pe.SNP-results.vcf\r\n", "../output/methylation-landscape/31M_R1_val_1_bismark_bt2_pe.SNP-results.vcf\r\n", "../output/methylation-landscape/35F_R1_val_1_bismark_bt2_pe.SNP-results.vcf\r\n", "../output/methylation-landscape/36F_R1_val_1_bismark_bt2_pe.SNP-results.vcf\r\n", "../output/methylation-landscape/39F_R1_val_1_bismark_bt2_pe.SNP-results.vcf\r\n", "../output/methylation-landscape/3F_R1_val_1_bismark_bt2_pe.SNP-results.vcf\r\n", "../output/methylation-landscape/41F_R1_val_1_bismark_bt2_pe.SNP-results.vcf\r\n", "../output/methylation-landscape/44F_R1_val_1_bismark_bt2_pe.SNP-results.vcf\r\n", "../output/methylation-landscape/48M_R1_val_1_bismark_bt2_pe.SNP-results.vcf\r\n", "../output/methylation-landscape/50F_R1_val_1_bismark_bt2_pe.SNP-results.vcf\r\n", "../output/methylation-landscape/52F_R1_val_1_bismark_bt2_pe.SNP-results.vcf\r\n", "../output/methylation-landscape/53F_R1_val_1_bismark_bt2_pe.SNP-results.vcf\r\n", "../output/methylation-landscape/54F_R1_val_1_bismark_bt2_pe.SNP-results.vcf\r\n", "../output/methylation-landscape/59M_R1_val_1_bismark_bt2_pe.SNP-results.vcf\r\n", "../output/methylation-landscape/64M_R1_val_1_bismark_bt2_pe.SNP-results.vcf\r\n", "../output/methylation-landscape/6M_R1_val_1_bismark_bt2_pe.SNP-results.vcf\r\n", "../output/methylation-landscape/76F_R1_val_1_bismark_bt2_pe.SNP-results.vcf\r\n", "../output/methylation-landscape/77F_R1_val_1_bismark_bt2_pe.SNP-results.vcf\r\n", "../output/methylation-landscape/7M_R1_val_1_bismark_bt2_pe.SNP-results.vcf\r\n", "../output/methylation-landscape/9M_R1_val_1_bismark_bt2_pe.SNP-results.vcf\r\n" ] } ], "source": [ "!find ../output/methylation-landscape/*vcf" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "NC_035780.1\t32910\t.\tC\tT\t1000\tPASS\tDP=56;ADF=0,0;ADR=35,21;AD=35,21;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:56:0,0:35,21:35,21:0,0,28,0,0,21,35,0:0,0,36,0,0,36,36,0:0.625,0.375\n", "NC_035780.1\t33152\t.\tC\tT\t1000\tPASS\tDP=50;ADF=0,0;ADR=25,25;AD=25,25;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:50:0,0:25,25:25,25:0,0,39,0,0,25,25,0:0,0,35,0,0,36,37,0:0.500,0.500\n", "NC_035780.1\t39853\t.\tC\tT\t1000\tPASS\tDP=80;ADF=0,0;ADR=48,32;AD=48,32;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:80:0,0:48,32:48,32:0,0,56,0,0,32,48,0:0,0,37,0,0,36,36,0:0.600,0.400\n", "NC_035780.1\t80456\t.\tC\tT\t108\tPASS\tDP=37;ADF=0,0;ADR=28,9;AD=28,9;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:37:0,0:28,9:28,9:0,0,38,0,0,9,28,0:0,0,36,0,0,36,36,0:0.757,0.243\n", "NC_035780.1\t80703\t.\tC\tT\t1000\tPASS\tDP=25;ADF=0,0;ADR=0,25;AD=0,25;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:25:0,0:0,25:0,25:0,0,14,0,0,25,0,0:0,0,36,0,0,37,0,0:0.000,1.000\n", "NC_035780.1\t90833\t.\tC\tT\t1000\tLow\tDP=7;ADF=0,0;ADR=0,7;AD=0,7;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:7:0,0:0,7:0,7:0,0,23,0,0,7,0,0:0,0,36,0,0,37,0,0:0.000,1.000\n", "NC_035780.1\t92393\t.\tC\tT\t1000\tPASS\tDP=47;ADF=0,0;ADR=19,28;AD=19,28;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:47:0,0:19,28:19,28:0,0,25,0,0,28,19,0:0,0,37,0,0,34,37,0:0.404,0.596\n", "NC_035780.1\t92606\t.\tC\tT\t15\tPASS\tDP=15;ADF=0,0;ADR=12,3;AD=12,3;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:15:0,0:12,3:12,3:0,0,11,0,0,3,12,0:0,0,35,0,0,37,37,0:0.800,0.200\n", "NC_035780.1\t102123\t.\tC\tT\t4\tPASS\tDP=23;ADF=0,0;ADR=20,3;AD=20,3;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:23:0,0:20,3:20,3:0,0,7,0,0,3,20,0:0,0,37,0,0,33,36,0:0.870,0.130\n", "NC_035780.1\t107783\t.\tC\tT\t1000\tPASS\tDP=25;ADF=0,0;ADR=14,11;AD=14,11;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:25:0,0:14,11:14,11:0,0,28,0,0,11,14,0:0,0,36,0,0,35,35,0:0.560,0.440\n", " 73105 ../output/methylation-landscape/12M_R1_val_1_bismark_bt2_pe.SNP-results.vcf_CT-SNPs.tab\n", "NC_035780.1\t31025\t.\tC\tT\t1000\tPASS\tDP=59;ADF=0,0;ADR=26,33;AD=26,33;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:59:0,0:26,33:26,33:0,0,91,0,0,33,26,0:0,0,36,0,0,36,37,0:0.441,0.559\n", "NC_035780.1\t32910\t.\tC\tT\t1000\tPASS\tDP=40;ADF=0,0;ADR=0,40;AD=0,40;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:40:0,0:0,40:0,40:0,0,28,0,0,40,0,0:0,0,36,0,0,37,0,0:0.000,1.000\n", "NC_035780.1\t39712\t.\tC\tT\t1000\tPASS\tDP=40;ADF=0,0;ADR=25,15;AD=25,15;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:40:0,0:25,15:25,15:0,0,41,0,0,15,25,0:0,0,36,0,0,34,37,0:0.625,0.375\n", "NC_035780.1\t80703\t.\tC\tT\t1000\tPASS\tDP=18;ADF=0,0;ADR=0,18;AD=0,18;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:18:0,0:0,18:0,18:0,0,16,0,0,18,0,0:0,0,35,0,0,36,0,0:0.000,1.000\n", "NC_035780.1\t89426\t.\tC\tT\t32\tLow\tDP=3;ADF=0,0;ADR=0,3;AD=0,3;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:3:0,0:0,3:0,3:0,0,1,0,0,3,0,0:0,0,37,0,0,37,0,0:0.000,1.000\n", "NC_035780.1\t90833\t.\tC\tT\t1000\tPASS\tDP=12;ADF=0,0;ADR=0,12;AD=0,12;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:12:0,0:0,12:0,12:0,0,41,0,0,12,0,0:0,0,36,0,0,37,0,0:0.000,1.000\n", "NC_035780.1\t97572\t.\tC\tT\t1000\tPASS\tDP=62;ADF=0,0;ADR=42,20;AD=42,20;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:62:0,0:42,20:42,20:0,0,80,0,0,20,42,0:0,0,36,0,0,36,36,0:0.677,0.323\n", "NC_035780.1\t98247\t.\tC\tT\t1000\tPASS\tDP=26;ADF=0,0;ADR=11,15;AD=11,15;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:26:0,0:11,15:11,15:0,0,29,0,0,15,11,0:0,0,36,0,0,36,36,0:0.423,0.577\n", "NC_035780.1\t99098\t.\tC\tT\t109\tPASS\tDP=12;ADF=0,0;ADR=0,12;AD=0,12;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:12:0,0:0,12:0,12:0,0,4,0,0,12,0,0:0,0,37,0,0,34,0,0:0.000,1.000\n", "NC_035780.1\t102123\t.\tC\tT\t1000\tPASS\tDP=11;ADF=0,0;ADR=0,11;AD=0,11;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:11:0,0:0,11:0,11:0,0,7,0,0,11,0,0:0,0,34,0,0,37,0,0:0.000,1.000\n", " 73381 ../output/methylation-landscape/13M_R1_val_1_bismark_bt2_pe.SNP-results.vcf_CT-SNPs.tab\n", "NC_035780.1\t3608\t.\tC\tT\t1000\tLow\tDP=7;ADF=0,0;ADR=0,7;AD=0,7;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:7:0,0:0,7:0,7:0,0,11,0,0,7,0,0:0,0,37,0,0,37,0,0:0.000,1.000\n", "NC_035780.1\t31025\t.\tC\tT\t1000\tPASS\tDP=33;ADF=0,0;ADR=17,16;AD=17,16;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:33:0,0:17,16:17,16:0,0,66,0,0,16,17,0:0,0,37,0,0,37,36,0:0.515,0.485\n", "NC_035780.1\t32910\t.\tC\tT\t1000\tPASS\tDP=37;ADF=0,0;ADR=0,37;AD=0,37;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:37:0,0:0,37:0,37:0,0,29,0,0,37,0,0:0,0,36,0,0,36,0,0:0.000,1.000\n", "NC_035780.1\t38869\t.\tC\tT\t1000\tLow\tDP=8;ADF=0,0;ADR=0,8;AD=0,8;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:8:0,0:0,8:0,8:0,0,12,0,0,8,0,0:0,0,37,0,0,37,0,0:0.000,1.000\n", "NC_035780.1\t80703\t.\tC\tT\t1000\tPASS\tDP=11;ADF=0,0;ADR=0,11;AD=0,11;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:11:0,0:0,11:0,11:0,0,10,0,0,11,0,0:0,0,37,0,0,37,0,0:0.000,1.000\n", "NC_035780.1\t90833\t.\tC\tT\t75\tLow\tDP=7;ADF=0,0;ADR=0,7;AD=0,7;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:7:0,0:0,7:0,7:0,0,52,0,0,7,0,0:0,0,36,0,0,37,0,0:0.000,1.000\n", "NC_035780.1\t91547\t.\tC\tT\t26\tLow\tDP=4;ADF=0,0;ADR=0,4;AD=0,4;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:4:0,0:0,4:0,4:0,0,1,0,0,4,0,0:0,0,37,0,0,34,0,0:0.000,1.000\n", "NC_035780.1\t92606\t.\tC\tT\t122\tLow\tDP=5;ADF=0,0;ADR=0,5;AD=0,5;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:5:0,0:0,5:0,5:0,0,8,0,0,5,0,0:0,0,34,0,0,37,0,0:0.000,1.000\n", "NC_035780.1\t99098\t.\tC\tT\t1000\tLow\tDP=8;ADF=0,0;ADR=0,8;AD=0,8;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:8:0,0:0,8:0,8:0,0,5,0,0,8,0,0:0,0,37,0,0,37,0,0:0.000,1.000\n", "NC_035780.1\t99902\t.\tC\tT\t1000\tPASS\tDP=33;ADF=0,0;ADR=20,13;AD=20,13;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:33:0,0:20,13:20,13:0,0,31,0,0,13,20,0:0,0,36,0,0,37,36,0:0.606,0.394\n", " 73148 ../output/methylation-landscape/16F_R1_val_1_bismark_bt2_pe.SNP-results.vcf_CT-SNPs.tab\n", "NC_035780.1\t7349\t.\tC\tT\t1000\tPASS\tDP=30;ADF=0,0;ADR=0,30;AD=0,30;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:30:0,0:0,30:0,30:0,0,36,0,0,30,0,0:0,0,36,0,0,36,0,0:0.000,1.000\n", "NC_035780.1\t8033\t.\tC\tT\t94\tPASS\tDP=67;ADF=0,0;ADR=0,67;AD=0,67;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:67:0,0:0,67:0,67:0,0,8,0,0,67,0,0:0,0,36,0,0,35,0,0:0.000,1.000\n", "NC_035780.1\t26127\t.\tC\tT\t1000\tPASS\tDP=33;ADF=0,0;ADR=0,33;AD=0,33;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:33:0,0:0,33:0,33:0,0,38,0,0,33,0,0:0,0,36,0,0,37,0,0:0.000,1.000\n", "NC_035780.1\t31440\t.\tC\tT\t1000\tPASS\tDP=51;ADF=0,0;ADR=0,51;AD=0,51;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:51:0,0:0,51:0,51:0,0,46,0,0,51,0,0:0,0,36,0,0,37,0,0:0.000,1.000\n", "NC_035780.1\t31941\t.\tC\tT\t1000\tLow\tDP=8;ADF=0,0;ADR=0,8;AD=0,8;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:8:0,0:0,8:0,8:0,0,21,0,0,8,0,0:0,0,36,0,0,37,0,0:0.000,1.000\n", "NC_035780.1\t32910\t.\tC\tT\t1000\tPASS\tDP=44;ADF=0,0;ADR=0,44;AD=0,44;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:44:0,0:0,44:0,44:0,0,25,0,0,44,0,0:0,0,37,0,0,37,0,0:0.000,1.000\n", "NC_035780.1\t37558\t.\tC\tT\t1000\tPASS\tDP=31;ADF=0,0;ADR=11,20;AD=11,20;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:31:0,0:11,20:11,20:0,0,45,0,0,20,11,0:0,0,36,0,0,35,36,0:0.355,0.645\n", "NC_035780.1\t38504\t.\tC\tT\t70\tPASS\tDP=22;ADF=0,0;ADR=16,6;AD=16,6;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:22:0,0:16,6:16,6:0,0,27,0,0,6,16,0:0,0,37,0,0,37,36,0:0.727,0.273\n", "NC_035780.1\t38869\t.\tC\tT\t1000\tPASS\tDP=10;ADF=0,0;ADR=0,10;AD=0,10;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:10:0,0:0,10:0,10:0,0,28,0,0,10,0,0:0,0,37,0,0,36,0,0:0.000,1.000\n", "NC_035780.1\t43027\t.\tC\tT\t105\tLow\tDP=4;ADF=0,0;ADR=0,4;AD=0,4;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:4:0,0:0,4:0,4:0,0,3,0,0,4,0,0:0,0,37,0,0,37,0,0:0.000,1.000\n", " 73242 ../output/methylation-landscape/19F_R1_val_1_bismark_bt2_pe.SNP-results.vcf_CT-SNPs.tab\n", "NC_035780.1\t3608\t.\tC\tT\t60\tLow\tDP=5;ADF=0,0;ADR=2,3;AD=2,3;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:5:0,0:2,3:2,3:0,0,6,0,0,3,2,0:0,0,37,0,0,37,37,0:0.400,0.600\n", "NC_035780.1\t5192\t.\tC\tT\t154\tPASS\tDP=22;ADF=0,0;ADR=15,7;AD=15,7;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:22:0,0:15,7:15,7:0,0,13,0,0,7,15,0:0,0,36,0,0,37,37,0:0.682,0.318\n", "NC_035780.1\t7349\t.\tC\tT\t1000\tPASS\tDP=15;ADF=0,0;ADR=5,10;AD=5,10;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:15:0,0:5,10:5,10:0,0,20,0,0,10,5,0:0,0,37,0,0,37,35,0:0.333,0.667\n", "NC_035780.1\t8033\t.\tC\tT\t1000\tPASS\tDP=42;ADF=0,0;ADR=20,22;AD=20,22;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:42:0,0:20,22:20,22:0,0,2,0,0,22,20,0:0,0,31,0,0,36,37,0:0.476,0.524\n", "NC_035780.1\t26127\t.\tC\tT\t1000\tPASS\tDP=21;ADF=0,0;ADR=12,9;AD=12,9;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:21:0,0:12,9:12,9:0,0,20,0,0,9,12,0:0,0,37,0,0,37,37,0:0.571,0.429\n", "NC_035780.1\t31440\t.\tC\tT\t1000\tPASS\tDP=38;ADF=0,0;ADR=18,20;AD=18,20;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:38:0,0:18,20:18,20:0,0,38,0,0,20,18,0:0,0,36,0,0,36,36,0:0.474,0.526\n", "NC_035780.1\t31941\t.\tC\tT\t4\tLow\tDP=8;ADF=0,0;ADR=5,3;AD=5,3;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:8:0,0:5,3:5,3:0,0,22,0,0,3,5,0:0,0,37,0,0,33,37,0:0.625,0.375\n", "NC_035780.1\t32910\t.\tC\tT\t1000\tPASS\tDP=26;ADF=0,0;ADR=0,26;AD=0,26;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:26:0,0:0,26:0,26:0,0,20,0,0,26,0,0:0,0,36,0,0,37,0,0:0.000,1.000\n", "NC_035780.1\t39159\t.\tC\tT\t1000\tPASS\tDP=30;ADF=0,0;ADR=12,18;AD=12,18;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:30:0,0:12,18:12,18:0,0,19,0,0,18,12,0:0,0,36,0,0,37,36,0:0.400,0.600\n", "NC_035780.1\t44638\t.\tC\tT\t1000\tPASS\tDP=21;ADF=0,0;ADR=6,15;AD=6,15;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:21:0,0:6,15:6,15:0,0,40,0,0,15,6,0:0,0,36,0,0,37,35,0:0.286,0.714\n", " 66421 ../output/methylation-landscape/22F_R1_val_1_bismark_bt2_pe.SNP-results.vcf_CT-SNPs.tab\n", "NC_035780.1\t7358\t.\tC\tT\t1000\tPASS\tDP=82;ADF=0,0;ADR=40,42;AD=40,42;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:82:0,0:40,42:40,42:0,0,31,0,0,42,40,0:0,0,37,0,0,36,36,0:0.488,0.512\n", "NC_035780.1\t31025\t.\tC\tT\t1000\tPASS\tDP=62;ADF=0,0;ADR=36,26;AD=36,26;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:62:0,0:36,26:36,26:0,0,90,0,0,26,36,0:0,0,36,0,0,37,36,0:0.581,0.419\n", "NC_035780.1\t32910\t.\tC\tT\t1000\tPASS\tDP=51;ADF=0,0;ADR=0,51;AD=0,51;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:51:0,0:0,51:0,51:0,0,24,0,0,51,0,0:0,0,36,0,0,37,0,0:0.000,1.000\n", "NC_035780.1\t38869\t.\tC\tT\t1000\tPASS\tDP=45;ADF=0,0;ADR=27,18;AD=27,18;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:45:0,0:27,18:27,18:0,0,72,0,0,18,27,0:0,0,36,0,0,37,36,0:0.600,0.400\n", "NC_035780.1\t43313\t.\tC\tT\t1000\tPASS\tDP=51;ADF=0,0;ADR=22,29;AD=22,29;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:51:0,0:22,29:22,29:0,0,49,0,0,29,22,0:0,0,36,0,0,37,36,0:0.431,0.569\n", "NC_035780.1\t53163\t.\tC\tT\t1000\tPASS\tDP=42;ADF=0,0;ADR=20,22;AD=20,22;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:42:0,0:20,22:20,22:0,0,19,0,0,22,20,0:0,0,36,0,0,37,36,0:0.476,0.524\n", "NC_035780.1\t66859\t.\tC\tT\t53\tPASS\tDP=24;ADF=0,0;ADR=19,5;AD=19,5;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:24:0,0:19,5:19,5:0,0,17,0,0,5,19,0:0,0,37,0,0,37,36,0:0.792,0.208\n", "NC_035780.1\t80493\t.\tC\tT\t1000\tPASS\tDP=13;ADF=0,0;ADR=0,13;AD=0,13;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:13:0,0:0,13:0,13:0,0,29,0,0,13,0,0:0,0,37,0,0,37,0,0:0.000,1.000\n", "NC_035780.1\t80664\t.\tC\tT\t1000\tPASS\tDP=23;ADF=0,0;ADR=16,7;AD=16,7;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:23:0,0:16,7:16,7:0,0,8,0,0,7,16,0:0,0,37,0,0,34,36,0:0.696,0.304\n", "NC_035780.1\t80703\t.\tC\tT\t59\tLow\tDP=7;ADF=0,0;ADR=0,7;AD=0,7;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:7:0,0:0,7:0,7:0,0,2,0,0,7,0,0:0,0,37,0,0,37,0,0:0.000,1.000\n", " 74657 ../output/methylation-landscape/23M_R1_val_1_bismark_bt2_pe.SNP-results.vcf_CT-SNPs.tab\n", "NC_035780.1\t3608\t.\tC\tT\t1000\tLow\tDP=7;ADF=0,0;ADR=0,7;AD=0,7;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:7:0,0:0,7:0,7:0,0,6,0,0,7,0,0:0,0,37,0,0,35,0,0:0.000,1.000\n", "NC_035780.1\t9172\t.\tC\tT\t1000\tPASS\tDP=55;ADF=0,0;ADR=0,55;AD=0,55;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:55:0,0:0,55:0,55:0,0,23,0,0,55,0,0:0,0,36,0,0,36,0,0:0.000,1.000\n", "NC_035780.1\t18840\t.\tC\tT\t1000\tPASS\tDP=12;ADF=0,0;ADR=0,12;AD=0,12;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:12:0,0:0,12:0,12:0,0,22,0,0,12,0,0:0,0,35,0,0,37,0,0:0.000,1.000\n", "NC_035780.1\t25174\t.\tC\tT\t76\tPASS\tDP=39;ADF=0,0;ADR=32,7;AD=32,7;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:39:0,0:32,7:32,7:0,0,22,0,0,7,32,0:0,0,35,0,0,35,37,0:0.821,0.179\n", "NC_035780.1\t32910\t.\tC\tT\t1000\tPASS\tDP=34;ADF=0,0;ADR=2,32;AD=2,32;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:34:0,0:2,32:2,32:0,0,30,0,0,32,2,0:0,0,36,0,0,37,37,0:0.059,0.941\n", "NC_035780.1\t38869\t.\tC\tT\t1000\tPASS\tDP=23;ADF=0,0;ADR=5,18;AD=5,18;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:23:0,0:5,18:5,18:0,0,35,0,0,18,5,0:0,0,36,0,0,35,37,0:0.217,0.783\n", "NC_035780.1\t63726\t.\tC\tT\t1000\tPASS\tDP=14;ADF=0,0;ADR=7,7;AD=7,7;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:14:0,0:7,7:7,7:0,0,9,0,0,7,7,0:0,0,34,0,0,37,37,0:0.500,0.500\n", "NC_035780.1\t66048\t.\tC\tT\t1000\tPASS\tDP=17;ADF=0,0;ADR=7,10;AD=7,10;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:17:0,0:7,10:7,10:0,0,18,0,0,10,7,0:0,0,36,0,0,37,35,0:0.412,0.588\n", "NC_035780.1\t79381\t.\tC\tT\t1000\tPASS\tDP=17;ADF=0,0;ADR=4,13;AD=4,13;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:17:0,0:4,13:4,13:0,0,5,0,0,13,4,0:0,0,35,0,0,37,31,0:0.235,0.765\n", "NC_035780.1\t80493\t.\tC\tT\t1000\tPASS\tDP=12;ADF=0,0;ADR=0,12;AD=0,12;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:12:0,0:0,12:0,12:0,0,11,0,0,12,0,0:0,0,37,0,0,37,0,0:0.000,1.000\n", " 67732 ../output/methylation-landscape/29F_R1_val_1_bismark_bt2_pe.SNP-results.vcf_CT-SNPs.tab\n", "NC_035780.1\t32910\t.\tC\tT\t1000\tPASS\tDP=46;ADF=0,0;ADR=0,46;AD=0,46;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:46:0,0:0,46:0,46:0,0,21,0,0,46,0,0:0,0,36,0,0,36,0,0:0.000,1.000\n", "NC_035780.1\t49537\t.\tC\tT\t1000\tPASS\tDP=57;ADF=0,0;ADR=17,40;AD=17,40;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:57:0,0:17,40:17,40:0,0,27,0,0,40,17,0:0,0,35,0,0,37,37,0:0.298,0.702\n", "NC_035780.1\t80703\t.\tC\tT\t1000\tPASS\tDP=10;ADF=0,0;ADR=0,10;AD=0,10;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:10:0,0:0,10:0,10:0,0,7,0,0,10,0,0:0,0,34,0,0,37,0,0:0.000,1.000\n", "NC_035780.1\t90833\t.\tC\tT\t1000\tLow\tDP=8;ADF=0,0;ADR=0,8;AD=0,8;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:8:0,0:0,8:0,8:0,0,25,0,0,8,0,0:0,0,37,0,0,37,0,0:0.000,1.000\n", "NC_035780.1\t103481\t.\tC\tT\t1000\tPASS\tDP=24;ADF=0,0;ADR=0,24;AD=0,24;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:24:0,0:0,24:0,24:0,0,14,0,0,24,0,0:0,0,36,0,0,36,0,0:0.000,1.000\n", "NC_035780.1\t105650\t.\tC\tT\t83\tLow\tDP=6;ADF=0,0;ADR=2,4;AD=2,4;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:6:0,0:2,4:2,4:0,0,11,0,0,4,2,0:0,0,36,0,0,37,37,0:0.333,0.667\n", "NC_035780.1\t109517\t.\tC\tT\t1000\tPASS\tDP=40;ADF=0,0;ADR=0,40;AD=0,40;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:40:0,0:0,40:0,40:0,0,40,0,0,40,0,0:0,0,34,0,0,36,0,0:0.000,1.000\n", "NC_035780.1\t126029\t.\tC\tT\t89\tLow\tDP=5;ADF=0,0;ADR=0,5;AD=0,5;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:5:0,0:0,5:0,5:0,0,24,0,0,5,0,0:0,0,37,0,0,37,0,0:0.000,1.000\n", "NC_035780.1\t126637\t.\tC\tT\t1000\tPASS\tDP=51;ADF=0,0;ADR=40,11;AD=40,11;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:51:0,0:40,11:40,11:0,0,33,0,0,11,40,0:0,0,36,0,0,36,36,0:0.784,0.216\n", "NC_035780.1\t127764\t.\tC\tT\t9\tLow\tDP=3;ADF=0,0;ADR=0,3;AD=0,3;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t1/1:3:0,0:0,3:0,3:0,0,0,0,0,3,0,0:0,0,0,0,0,37,0,0:0.000,1.000\n", " 60927 ../output/methylation-landscape/31M_R1_val_1_bismark_bt2_pe.SNP-results.vcf_CT-SNPs.tab\n", "NC_035780.1\t26922\t.\tC\tT\t1000\tPASS\tDP=40;ADF=0,0;ADR=20,20;AD=20,20;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:40:0,0:20,20:20,20:0,0,21,0,0,20,20,0:0,0,37,0,0,37,37,0:0.500,0.500\n", "NC_035780.1\t31025\t.\tC\tT\t1000\tPASS\tDP=16;ADF=0,0;ADR=0,16;AD=0,16;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:16:0,0:0,16:0,16:0,0,43,0,0,16,0,0:0,0,37,0,0,36,0,0:0.000,1.000\n", "NC_035780.1\t32910\t.\tC\tT\t1000\tPASS\tDP=34;ADF=0,0;ADR=0,34;AD=0,34;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:34:0,0:0,34:0,34:0,0,36,0,0,34,0,0:0,0,35,0,0,37,0,0:0.000,1.000\n", "NC_035780.1\t49803\t.\tC\tT\t1000\tPASS\tDP=17;ADF=0,0;ADR=0,17;AD=0,17;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:17:0,0:0,17:0,17:0,0,42,0,0,17,0,0:0,0,36,0,0,36,0,0:0.000,1.000\n", "NC_035780.1\t66859\t.\tC\tT\t91\tLow\tDP=7;ADF=0,0;ADR=0,7;AD=0,7;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:7:0,0:0,7:0,7:0,0,3,0,0,7,0,0:0,0,33,0,0,35,0,0:0.000,1.000\n", "NC_035780.1\t80493\t.\tC\tT\t1000\tLow\tDP=8;ADF=0,0;ADR=0,8;AD=0,8;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:8:0,0:0,8:0,8:0,0,24,0,0,8,0,0:0,0,37,0,0,36,0,0:0.000,1.000\n", "NC_035780.1\t90833\t.\tC\tT\t137\tLow\tDP=8;ADF=0,0;ADR=3,5;AD=3,5;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:8:0,0:3,5:3,5:0,0,5,0,0,5,3,0:0,0,37,0,0,35,37,0:0.375,0.625\n", "NC_035780.1\t92047\t.\tC\tT\t1000\tLow\tDP=7;ADF=0,0;ADR=0,7;AD=0,7;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:7:0,0:0,7:0,7:0,0,15,0,0,7,0,0:0,0,36,0,0,34,0,0:0.000,1.000\n", "NC_035780.1\t97572\t.\tC\tT\t1000\tPASS\tDP=33;ADF=0,0;ADR=21,12;AD=21,12;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:33:0,0:21,12:21,12:0,0,23,0,0,12,21,0:0,0,36,0,0,35,37,0:0.636,0.364\n", "NC_035780.1\t100665\t.\tC\tT\t47\tPASS\tDP=13;ADF=0,0;ADR=9,4;AD=9,4;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:13:0,0:9,4:9,4:0,0,16,0,0,4,9,0:0,0,36,0,0,34,37,0:0.692,0.308\n", " 70248 ../output/methylation-landscape/35F_R1_val_1_bismark_bt2_pe.SNP-results.vcf_CT-SNPs.tab\n", "NC_035780.1\t3608\t.\tC\tT\t1000\tPASS\tDP=17;ADF=0,0;ADR=10,7;AD=10,7;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:17:0,0:10,7:10,7:0,0,14,0,0,7,10,0:0,0,36,0,0,37,37,0:0.588,0.412\n", "NC_035780.1\t31025\t.\tC\tT\t1000\tPASS\tDP=34;ADF=0,0;ADR=21,13;AD=21,13;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:34:0,0:21,13:21,13:0,0,49,0,0,13,21,0:0,0,36,0,0,35,36,0:0.618,0.382\n", "NC_035780.1\t32910\t.\tC\tT\t1000\tPASS\tDP=43;ADF=0,0;ADR=0,43;AD=0,43;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:43:0,0:0,43:0,43:0,0,27,0,0,43,0,0:0,0,36,0,0,37,0,0:0.000,1.000\n", "NC_035780.1\t41708\t.\tC\tT\t1000\tPASS\tDP=15;ADF=0,0;ADR=8,7;AD=8,7;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:15:0,0:8,7:8,7:0,0,17,0,0,7,8,0:0,0,36,0,0,35,37,0:0.533,0.467\n", "NC_035780.1\t48987\t.\tC\tT\t112\tPASS\tDP=11;ADF=0,0;ADR=0,11;AD=0,11;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:11:0,0:0,11:0,11:0,0,4,0,0,11,0,0:0,0,37,0,0,34,0,0:0.000,1.000\n", "NC_035780.1\t63726\t.\tC\tT\t1000\tPASS\tDP=27;ADF=0,0;ADR=0,27;AD=0,27;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:27:0,0:0,27:0,27:0,0,9,0,0,27,0,0:0,0,34,0,0,36,0,0:0.000,1.000\n", "NC_035780.1\t80493\t.\tC\tT\t136\tPASS\tDP=26;ADF=0,0;ADR=18,8;AD=18,8;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:26:0,0:18,8:18,8:0,0,26,0,0,8,18,0:0,0,36,0,0,34,36,0:0.692,0.308\n", "NC_035780.1\t80664\t.\tC\tT\t43\tPASS\tDP=12;ADF=0,0;ADR=8,4;AD=8,4;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:12:0,0:8,4:8,4:0,0,17,0,0,4,8,0:0,0,35,0,0,37,36,0:0.667,0.333\n", "NC_035780.1\t80703\t.\tC\tT\t1000\tLow\tDP=8;ADF=0,0;ADR=0,8;AD=0,8;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:8:0,0:0,8:0,8:0,0,12,0,0,8,0,0:0,0,37,0,0,37,0,0:0.000,1.000\n", "NC_035780.1\t90833\t.\tC\tT\t63\tLow\tDP=3;ADF=0,0;ADR=0,3;AD=0,3;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:3:0,0:0,3:0,3:0,0,7,0,0,3,0,0:0,0,37,0,0,37,0,0:0.000,1.000\n", " 71113 ../output/methylation-landscape/36F_R1_val_1_bismark_bt2_pe.SNP-results.vcf_CT-SNPs.tab\n", "NC_035780.1\t3608\t.\tC\tT\t1000\tLow\tDP=8;ADF=0,0;ADR=0,8;AD=0,8;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:8:0,0:0,8:0,8:0,0,11,0,0,8,0,0:0,0,36,0,0,37,0,0:0.000,1.000\n", "NC_035780.1\t32910\t.\tC\tT\t1000\tPASS\tDP=37;ADF=0,0;ADR=0,37;AD=0,37;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:37:0,0:0,37:0,37:0,0,21,0,0,37,0,0:0,0,36,0,0,37,0,0:0.000,1.000\n", "NC_035780.1\t41708\t.\tC\tT\t1000\tPASS\tDP=22;ADF=0,0;ADR=12,10;AD=12,10;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:22:0,0:12,10:12,10:0,0,20,0,0,10,12,0:0,0,37,0,0,35,35,0:0.545,0.455\n", "NC_035780.1\t48987\t.\tC\tT\t153\tPASS\tDP=20;ADF=0,0;ADR=13,7;AD=13,7;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:20:0,0:13,7:13,7:0,0,13,0,0,7,13,0:0,0,35,0,0,35,36,0:0.650,0.350\n", "NC_035780.1\t63726\t.\tC\tT\t1000\tLow\tDP=9;ADF=0,0;ADR=1,8;AD=1,8;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:9:0,0:1,8:1,8:0,0,5,0,0,8,1,0:0,0,35,0,0,37,37,0:0.111,0.889\n", "NC_035780.1\t80493\t.\tC\tT\t1000\tPASS\tDP=23;ADF=0,0;ADR=0,23;AD=0,23;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:23:0,0:0,23:0,23:0,0,26,0,0,23,0,0:0,0,36,0,0,37,0,0:0.000,1.000\n", "NC_035780.1\t80664\t.\tC\tT\t1000\tPASS\tDP=17;ADF=0,0;ADR=0,17;AD=0,17;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:17:0,0:0,17:0,17:0,0,12,0,0,17,0,0:0,0,36,0,0,36,0,0:0.000,1.000\n", "NC_035780.1\t80703\t.\tC\tT\t51\tLow\tDP=3;ADF=0,0;ADR=0,3;AD=0,3;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:3:0,0:0,3:0,3:0,0,10,0,0,3,0,0:0,0,36,0,0,37,0,0:0.000,1.000\n", "NC_035780.1\t83801\t.\tC\tT\t9\tLow\tDP=3;ADF=0,0;ADR=0,3;AD=0,3;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t1/1:3:0,0:0,3:0,3:0,0,0,0,0,3,0,0:0,0,0,0,0,37,0,0:0.000,1.000\n", "NC_035780.1\t91547\t.\tC\tT\t15\tLow\tDP=5;ADF=0,0;ADR=0,5;AD=0,5;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t1/1:5:0,0:0,5:0,5:0,0,0,0,0,5,0,0:0,0,0,0,0,35,0,0:0.000,1.000\n", " 69483 ../output/methylation-landscape/39F_R1_val_1_bismark_bt2_pe.SNP-results.vcf_CT-SNPs.tab\n", "NC_035780.1\t32910\t.\tC\tT\t1000\tPASS\tDP=23;ADF=0,0;ADR=0,23;AD=0,23;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:23:0,0:0,23:0,23:0,0,21,0,0,23,0,0:0,0,37,0,0,36,0,0:0.000,1.000\n", "NC_035780.1\t80703\t.\tC\tT\t1000\tPASS\tDP=23;ADF=0,0;ADR=0,23;AD=0,23;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:23:0,0:0,23:0,23:0,0,10,0,0,23,0,0:0,0,36,0,0,36,0,0:0.000,1.000\n", "NC_035780.1\t89426\t.\tC\tT\t4\tLow\tDP=1;ADF=0,0;ADR=0,1;AD=0,1;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t1/1:1:0,0:0,1:0,1:0,0,0,0,0,1,0,0:0,0,0,0,0,37,0,0:0.000,1.000\n", "NC_035780.1\t90833\t.\tC\tT\t1000\tPASS\tDP=20;ADF=0,0;ADR=0,20;AD=0,20;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:20:0,0:0,20:0,20:0,0,36,0,0,20,0,0:0,0,37,0,0,36,0,0:0.000,1.000\n", "NC_035780.1\t109517\t.\tC\tT\t1000\tPASS\tDP=17;ADF=0,0;ADR=0,17;AD=0,17;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:17:0,0:0,17:0,17:0,0,29,0,0,17,0,0:0,0,37,0,0,36,0,0:0.000,1.000\n", "NC_035780.1\t110152\t.\tC\tT\t1000\tPASS\tDP=30;ADF=0,0;ADR=2,28;AD=2,28;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:30:0,0:2,28:2,28:0,0,12,0,0,28,2,0:0,0,35,0,0,37,37,0:0.067,0.933\n", "NC_035780.1\t124935\t.\tC\tT\t82\tPASS\tDP=11;ADF=0,0;ADR=0,11;AD=0,11;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:11:0,0:0,11:0,11:0,0,3,0,0,11,0,0:0,0,37,0,0,36,0,0:0.000,1.000\n", "NC_035780.1\t126029\t.\tC\tT\t154\tLow\tDP=6;ADF=0,0;ADR=0,6;AD=0,6;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:6:0,0:0,6:0,6:0,0,13,0,0,6,0,0:0,0,36,0,0,37,0,0:0.000,1.000\n", "NC_035780.1\t126553\t.\tC\tT\t1000\tPASS\tDP=48;ADF=0,0;ADR=25,23;AD=25,23;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:48:0,0:25,23:25,23:0,0,54,0,0,23,25,0:0,0,36,0,0,35,37,0:0.521,0.479\n", "NC_035780.1\t172994\t.\tC\tT\t1000\tPASS\tDP=10;ADF=0,0;ADR=0,10;AD=0,10;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:10:0,0:0,10:0,10:0,0,7,0,0,10,0,0:0,0,37,0,0,36,0,0:0.000,1.000\n", " 64627 ../output/methylation-landscape/3F_R1_val_1_bismark_bt2_pe.SNP-results.vcf_CT-SNPs.tab\n", "NC_035780.1\t8195\t.\tC\tT\t1000\tPASS\tDP=23;ADF=0,0;ADR=14,9;AD=14,9;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:23:0,0:14,9:14,9:0,0,0,0,0,9,14,0:0,0,0,0,0,37,37,0:0.609,0.391\n", "NC_035780.1\t26127\t.\tC\tT\t98\tPASS\tDP=13;ADF=0,0;ADR=8,5;AD=8,5;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:13:0,0:8,5:8,5:0,0,13,0,0,5,8,0:0,0,35,0,0,37,37,0:0.615,0.385\n", "NC_035780.1\t32910\t.\tC\tT\t1000\tPASS\tDP=14;ADF=0,0;ADR=0,14;AD=0,14;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:14:0,0:0,14:0,14:0,0,8,0,0,14,0,0:0,0,36,0,0,36,0,0:0.000,1.000\n", "NC_035780.1\t39853\t.\tC\tT\t1000\tPASS\tDP=38;ADF=0,0;ADR=18,20;AD=18,20;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:38:0,0:18,20:18,20:0,0,39,0,0,20,18,0:0,0,36,0,0,36,37,0:0.474,0.526\n", "NC_035780.1\t46597\t.\tC\tT\t92\tLow\tDP=7;ADF=0,0;ADR=3,4;AD=3,4;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:7:0,0:3,4:3,4:0,0,7,0,0,4,3,0:0,0,35,0,0,37,37,0:0.429,0.571\n", "NC_035780.1\t66859\t.\tC\tT\t27\tPASS\tDP=10;ADF=0,0;ADR=7,3;AD=7,3;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:10:0,0:7,3:7,3:0,0,12,0,0,3,7,0:0,0,37,0,0,37,37,0:0.700,0.300\n", "NC_035780.1\t80703\t.\tC\tT\t1000\tPASS\tDP=10;ADF=0,0;ADR=0,10;AD=0,10;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:10:0,0:0,10:0,10:0,0,9,0,0,10,0,0:0,0,36,0,0,36,0,0:0.000,1.000\n", "NC_035780.1\t90833\t.\tC\tT\t133\tPASS\tDP=11;ADF=0,0;ADR=5,6;AD=5,6;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:11:0,0:5,6:5,6:0,0,17,0,0,6,5,0:0,0,36,0,0,37,37,0:0.455,0.545\n", "NC_035780.1\t109517\t.\tC\tT\t1000\tPASS\tDP=35;ADF=0,0;ADR=2,33;AD=2,33;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:35:0,0:2,33:2,33:0,0,20,0,0,33,2,0:0,0,37,0,0,36,37,0:0.057,0.943\n", "NC_035780.1\t122812\t.\tC\tT\t1000\tPASS\tDP=21;ADF=0,0;ADR=11,10;AD=11,10;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:21:0,0:11,10:11,10:0,0,22,0,0,10,11,0:0,0,37,0,0,35,37,0:0.524,0.476\n", " 59597 ../output/methylation-landscape/41F_R1_val_1_bismark_bt2_pe.SNP-results.vcf_CT-SNPs.tab\n", "NC_035780.1\t7349\t.\tC\tT\t1000\tPASS\tDP=67;ADF=0,0;ADR=34,33;AD=34,33;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:67:0,0:34,33:34,33:0,0,47,0,0,33,34,0:0,0,36,0,0,36,37,0:0.507,0.493\n", "NC_035780.1\t8033\t.\tC\tT\t1000\tPASS\tDP=100;ADF=0,0;ADR=46,54;AD=46,54;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:100:0,0:46,54:46,54:0,0,10,0,0,54,46,0:0,0,37,0,0,36,37,0:0.460,0.540\n", "NC_035780.1\t8295\t.\tC\tT\t1000\tPASS\tDP=106;ADF=0,0;ADR=55,51;AD=55,51;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:106:0,0:55,51:55,51:0,0,2,0,0,51,55,0:0,0,37,0,0,36,37,0:0.519,0.481\n", "NC_035780.1\t26127\t.\tC\tT\t1000\tPASS\tDP=53;ADF=0,0;ADR=32,21;AD=32,21;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:53:0,0:32,21:32,21:0,0,75,0,0,21,32,0:0,0,36,0,0,37,35,0:0.604,0.396\n", "NC_035780.1\t31035\t.\tC\tT\t1000\tPASS\tDP=51;ADF=0,0;ADR=30,21;AD=30,21;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:51:0,0:30,21:30,21:0,0,54,0,0,21,30,0:0,0,36,0,0,35,37,0:0.588,0.412\n", "NC_035780.1\t31440\t.\tC\tT\t1000\tPASS\tDP=92;ADF=0,0;ADR=49,43;AD=49,43;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:92:0,0:49,43:49,43:0,0,62,0,0,43,49,0:0,0,36,0,0,36,36,0:0.533,0.467\n", "NC_035780.1\t31941\t.\tC\tT\t17\tPASS\tDP=31;ADF=0,0;ADR=25,6;AD=25,6;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:31:0,0:25,6:25,6:0,0,36,0,0,6,25,0:0,0,37,0,0,37,36,0:0.806,0.194\n", "NC_035780.1\t32910\t.\tC\tT\t1000\tPASS\tDP=65;ADF=0,0;ADR=0,65;AD=0,65;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:65:0,0:0,65:0,65:0,0,60,0,0,65,0,0:0,0,36,0,0,37,0,0:0.000,1.000\n", "NC_035780.1\t38666\t.\tC\tT\t154\tPASS\tDP=33;ADF=0,0;ADR=19,14;AD=19,14;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:33:0,0:19,14:19,14:0,0,94,0,0,14,19,0:0,0,36,0,0,37,36,0:0.576,0.424\n", "NC_035780.1\t38869\t.\tC\tT\t1000\tPASS\tDP=37;ADF=0,0;ADR=0,37;AD=0,37;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:37:0,0:0,37:0,37:0,0,81,0,0,37,0,0:0,0,36,0,0,37,0,0:0.000,1.000\n", " 27486 ../output/methylation-landscape/44F_R1_val_1_bismark_bt2_pe.SNP-results.vcf_CT-SNPs.tab\n", "NC_035780.1\t32910\t.\tC\tT\t1000\tPASS\tDP=39;ADF=0,0;ADR=0,39;AD=0,39;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:39:0,0:0,39:0,39:0,0,21,0,0,39,0,0:0,0,37,0,0,37,0,0:0.000,1.000\n", "NC_035780.1\t48987\t.\tC\tT\t1000\tPASS\tDP=58;ADF=0,0;ADR=42,16;AD=42,16;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:58:0,0:42,16:42,16:0,0,43,0,0,16,42,0:0,0,36,0,0,36,36,0:0.724,0.276\n", "NC_035780.1\t63726\t.\tC\tT\t1000\tPASS\tDP=36;ADF=0,0;ADR=3,33;AD=3,33;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:36:0,0:3,33:3,33:0,0,12,0,0,33,3,0:0,0,35,0,0,36,33,0:0.083,0.917\n", "NC_035780.1\t80493\t.\tC\tT\t69\tPASS\tDP=18;ADF=0,0;ADR=13,5;AD=13,5;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:18:0,0:13,5:13,5:0,0,16,0,0,5,13,0:0,0,36,0,0,37,36,0:0.722,0.278\n", "NC_035780.1\t80664\t.\tC\tT\t1000\tPASS\tDP=14;ADF=0,0;ADR=5,9;AD=5,9;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:14:0,0:5,9:5,9:0,0,11,0,0,9,5,0:0,0,36,0,0,37,37,0:0.357,0.643\n", "NC_035780.1\t80703\t.\tC\tT\t1000\tLow\tDP=6;ADF=0,0;ADR=0,6;AD=0,6;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:6:0,0:0,6:0,6:0,0,6,0,0,6,0,0:0,0,37,0,0,37,0,0:0.000,1.000\n", "NC_035780.1\t91547\t.\tC\tT\t1000\tLow\tDP=6;ADF=0,0;ADR=0,6;AD=0,6;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:6:0,0:0,6:0,6:0,0,8,0,0,6,0,0:0,0,36,0,0,37,0,0:0.000,1.000\n", "NC_035780.1\t97572\t.\tC\tT\t1000\tPASS\tDP=50;ADF=0,0;ADR=35,15;AD=35,15;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:50:0,0:35,15:35,15:0,0,84,0,0,15,35,0:0,0,36,0,0,37,37,0:0.700,0.300\n", "NC_035780.1\t103481\t.\tC\tT\t1000\tPASS\tDP=35;ADF=0,0;ADR=2,33;AD=2,33;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:35:0,0:2,33:2,33:0,0,13,0,0,33,2,0:0,0,36,0,0,37,37,0:0.057,0.943\n", "NC_035780.1\t109517\t.\tC\tT\t1000\tPASS\tDP=53;ADF=0,0;ADR=0,53;AD=0,53;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:53:0,0:0,53:0,53:0,0,145,0,0,53,0,0:0,0,36,0,0,35,0,0:0.000,1.000\n", " 69882 ../output/methylation-landscape/48M_R1_val_1_bismark_bt2_pe.SNP-results.vcf_CT-SNPs.tab\n", "NC_035780.1\t8195\t.\tC\tT\t24\tLow\tDP=8;ADF=0,0;ADR=0,8;AD=0,8;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t1/1:8:0,0:0,8:0,8:0,0,0,0,0,8,0,0:0,0,0,0,0,37,0,0:0.000,1.000\n", "NC_035780.1\t26127\t.\tC\tT\t94\tPASS\tDP=19;ADF=0,0;ADR=13,6;AD=13,6;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:19:0,0:13,6:13,6:0,0,22,0,0,6,13,0:0,0,37,0,0,37,37,0:0.684,0.316\n", "NC_035780.1\t31025\t.\tC\tT\t99\tPASS\tDP=28;ADF=0,0;ADR=21,7;AD=21,7;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:28:0,0:21,7:21,7:0,0,23,0,1,7,21,0:0,0,36,0,25,37,36,0:0.750,0.250\n", "NC_035780.1\t32910\t.\tC\tT\t1000\tPASS\tDP=40;ADF=0,0;ADR=0,40;AD=0,40;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:40:0,0:0,40:0,40:0,0,24,0,0,40,0,0:0,0,37,0,0,37,0,0:0.000,1.000\n", "NC_035780.1\t37829\t.\tC\tT\t72\tPASS\tDP=19;ADF=0,0;ADR=12,7;AD=12,7;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:19:0,0:12,7:12,7:0,0,41,0,0,7,12,0:0,0,36,0,0,35,35,0:0.632,0.368\n", "NC_035780.1\t44638\t.\tC\tT\t1000\tPASS\tDP=34;ADF=0,0;ADR=8,26;AD=8,26;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:34:0,0:8,26:8,26:0,0,45,0,0,26,8,0:0,0,37,0,0,36,36,0:0.235,0.765\n", "NC_035780.1\t52446\t.\tC\tT\t1000\tPASS\tDP=25;ADF=0,0;ADR=8,17;AD=8,17;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:25:0,0:8,17:8,17:0,0,19,0,0,17,8,0:0,0,36,0,0,36,37,0:0.320,0.680\n", "NC_035780.1\t63726\t.\tC\tT\t1000\tLow\tDP=6;ADF=0,0;ADR=0,6;AD=0,6;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:6:0,0:0,6:0,6:0,0,9,0,0,6,0,0:0,0,37,0,0,37,0,0:0.000,1.000\n", "NC_035780.1\t79381\t.\tC\tT\t52\tPASS\tDP=17;ADF=0,0;ADR=13,4;AD=13,4;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:17:0,0:13,4:13,4:0,0,9,0,1,4,13,0:0,0,34,0,37,37,36,0:0.765,0.235\n", "NC_035780.1\t80493\t.\tC\tT\t1000\tPASS\tDP=22;ADF=0,0;ADR=9,13;AD=9,13;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:22:0,0:9,13:9,13:0,0,22,0,0,13,9,0:0,0,35,0,0,36,37,0:0.409,0.591\n", " 69416 ../output/methylation-landscape/50F_R1_val_1_bismark_bt2_pe.SNP-results.vcf_CT-SNPs.tab\n", "NC_035780.1\t5855\t.\tC\tT\t1000\tPASS\tDP=58;ADF=0,0;ADR=27,31;AD=27,31;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:58:0,0:27,31:27,31:0,0,3,0,0,31,27,0:0,0,37,0,0,35,37,0:0.466,0.534\n", "NC_035780.1\t7349\t.\tC\tT\t1000\tPASS\tDP=69;ADF=0,0;ADR=47,22;AD=47,22;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:69:0,0:47,22:47,22:0,0,14,0,0,22,47,0:0,0,36,0,0,35,36,0:0.681,0.319\n", "NC_035780.1\t8033\t.\tC\tT\t1000\tPASS\tDP=44;ADF=0,0;ADR=20,24;AD=20,24;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:44:0,0:20,24:20,24:0,0,6,0,0,24,20,0:0,0,37,0,0,37,36,0:0.455,0.545\n", "NC_035780.1\t26127\t.\tC\tT\t20\tPASS\tDP=64;ADF=0,0;ADR=54,10;AD=54,10;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:64:0,0:54,10:54,10:1,0,54,0,0,10,54,0:37,0,36,0,0,36,36,0:0.844,0.156\n", "NC_035780.1\t31025\t.\tC\tT\t1000\tPASS\tDP=21;ADF=0,0;ADR=10,11;AD=10,11;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:21:0,0:10,11:10,11:0,0,37,0,0,11,10,0:0,0,37,0,0,36,35,0:0.476,0.524\n", "NC_035780.1\t31440\t.\tC\tT\t1000\tPASS\tDP=38;ADF=0,0;ADR=20,18;AD=20,18;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:38:0,0:20,18:20,18:0,0,38,0,0,18,20,0:0,0,37,0,0,36,36,0:0.526,0.474\n", "NC_035780.1\t32910\t.\tC\tT\t1000\tPASS\tDP=47;ADF=0,0;ADR=0,47;AD=0,47;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:47:0,0:0,47:0,47:0,0,25,0,0,47,0,0:0,0,36,0,0,36,0,0:0.000,1.000\n", "NC_035780.1\t37829\t.\tC\tT\t1000\tPASS\tDP=18;ADF=0,0;ADR=6,12;AD=6,12;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:18:0,0:6,12:6,12:0,0,35,0,0,12,6,0:0,0,36,0,0,35,37,0:0.333,0.667\n", "NC_035780.1\t44638\t.\tC\tT\t1000\tPASS\tDP=35;ADF=0,0;ADR=22,13;AD=22,13;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:35:0,0:22,13:22,13:0,0,60,0,0,13,22,0:0,0,37,0,0,35,35,0:0.629,0.371\n", "NC_035780.1\t52446\t.\tC\tT\t1000\tPASS\tDP=37;ADF=0,0;ADR=14,23;AD=14,23;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:37:0,0:14,23:14,23:0,0,38,0,0,23,14,0:0,0,37,0,0,36,37,0:0.378,0.622\n", " 73205 ../output/methylation-landscape/52F_R1_val_1_bismark_bt2_pe.SNP-results.vcf_CT-SNPs.tab\n", "NC_035780.1\t3608\t.\tC\tT\t133\tLow\tDP=5;ADF=0,0;ADR=0,5;AD=0,5;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:5:0,0:0,5:0,5:0,0,6,0,0,5,0,0:0,0,35,0,0,37,0,0:0.000,1.000\n", "NC_035780.1\t32910\t.\tC\tT\t1000\tPASS\tDP=45;ADF=0,0;ADR=0,45;AD=0,45;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:45:0,0:0,45:0,45:0,0,24,0,0,45,0,0:0,0,37,0,0,37,0,0:0.000,1.000\n", "NC_035780.1\t63726\t.\tC\tT\t1000\tPASS\tDP=26;ADF=0,0;ADR=13,13;AD=13,13;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:26:0,0:13,13:13,13:0,0,14,0,0,13,13,0:0,0,37,0,0,36,37,0:0.500,0.500\n", "NC_035780.1\t80703\t.\tC\tT\t1000\tPASS\tDP=20;ADF=0,0;ADR=0,20;AD=0,20;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:20:0,0:0,20:0,20:0,0,7,0,0,20,0,0:0,0,37,0,0,36,0,0:0.000,1.000\n", "NC_035780.1\t89426\t.\tC\tT\t29\tLow\tDP=4;ADF=0,0;ADR=0,4;AD=0,4;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:4:0,0:0,4:0,4:0,0,1,0,0,4,0,0:0,0,37,0,0,37,0,0:0.000,1.000\n", "NC_035780.1\t90833\t.\tC\tT\t1000\tPASS\tDP=18;ADF=0,0;ADR=0,18;AD=0,18;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:18:0,0:0,18:0,18:0,0,22,0,0,18,0,0:0,0,36,0,0,37,0,0:0.000,1.000\n", "NC_035780.1\t102123\t.\tC\tT\t9\tLow\tDP=3;ADF=0,0;ADR=0,3;AD=0,3;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t1/1:3:0,0:0,3:0,3:0,0,0,0,0,3,0,0:0,0,0,0,0,37,0,0:0.000,1.000\n", "NC_035780.1\t105650\t.\tC\tT\t30\tLow\tDP=6;ADF=0,0;ADR=3,3;AD=3,3;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:6:0,0:3,3:3,3:0,0,15,0,0,3,3,0:0,0,37,0,0,33,37,0:0.500,0.500\n", "NC_035780.1\t109517\t.\tC\tT\t1000\tPASS\tDP=39;ADF=0,0;ADR=0,39;AD=0,39;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:39:0,0:0,39:0,39:0,0,36,0,0,39,0,0:0,0,37,0,0,36,0,0:0.000,1.000\n", "NC_035780.1\t110152\t.\tC\tT\t1000\tPASS\tDP=46;ADF=0,0;ADR=32,14;AD=32,14;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:46:0,0:32,14:32,14:0,0,16,0,0,14,32,0:0,0,36,0,0,34,36,0:0.696,0.304\n", " 73212 ../output/methylation-landscape/53F_R1_val_1_bismark_bt2_pe.SNP-results.vcf_CT-SNPs.tab\n", "NC_035780.1\t3608\t.\tC\tT\t1000\tPASS\tDP=19;ADF=0,0;ADR=10,9;AD=10,9;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:19:0,0:10,9:10,9:0,0,17,0,0,9,10,0:0,0,37,0,0,37,37,0:0.526,0.474\n", "NC_035780.1\t25900\t.\tC\tT\t97\tPASS\tDP=17;ADF=0,0;ADR=10,7;AD=10,7;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:17:0,0:10,7:10,7:0,0,37,0,0,7,10,0:0,0,37,0,0,37,37,0:0.588,0.412\n", "NC_035780.1\t31025\t.\tC\tT\t1000\tPASS\tDP=22;ADF=0,0;ADR=10,12;AD=10,12;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:22:0,0:10,12:10,12:0,0,38,0,0,12,10,0:0,0,37,0,0,37,36,0:0.455,0.545\n", "NC_035780.1\t32910\t.\tC\tT\t1000\tPASS\tDP=43;ADF=0,0;ADR=0,43;AD=0,43;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:43:0,0:0,43:0,43:0,0,25,0,0,43,0,0:0,0,35,0,0,37,0,0:0.000,1.000\n", "NC_035780.1\t38869\t.\tC\tT\t1000\tPASS\tDP=16;ADF=0,0;ADR=7,9;AD=7,9;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:16:0,0:7,9:7,9:0,0,34,0,0,9,7,0:0,0,37,0,0,37,37,0:0.438,0.562\n", "NC_035780.1\t63726\t.\tC\tT\t1000\tLow\tDP=7;ADF=0,0;ADR=0,7;AD=0,7;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:7:0,0:0,7:0,7:1,0,10,0,0,7,0,0:37,0,35,0,0,32,0,0:0.000,1.000\n", "NC_035780.1\t66859\t.\tC\tT\t7\tPASS\tDP=30;ADF=0,0;ADR=25,5;AD=25,5;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:30:0,0:25,5:25,5:0,0,25,0,0,5,25,0:0,0,36,0,0,32,35,0:0.833,0.167\n", "NC_035780.1\t79381\t.\tC\tT\t73\tPASS\tDP=36;ADF=0,0;ADR=30,6;AD=30,6;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:36:0,0:30,6:30,6:0,0,12,0,0,6,30,0:0,0,37,0,0,35,36,0:0.833,0.167\n", "NC_035780.1\t80664\t.\tC\tT\t1000\tPASS\tDP=25;ADF=0,0;ADR=13,12;AD=13,12;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:25:0,0:13,12:13,12:0,0,18,0,0,12,13,0:0,0,36,0,0,37,36,0:0.520,0.480\n", "NC_035780.1\t80703\t.\tC\tT\t1000\tLow\tDP=9;ADF=0,0;ADR=0,9;AD=0,9;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:9:0,0:0,9:0,9:0,0,16,0,0,9,0,0:0,0,36,0,0,36,0,0:0.000,1.000\n", " 73875 ../output/methylation-landscape/54F_R1_val_1_bismark_bt2_pe.SNP-results.vcf_CT-SNPs.tab\n", "NC_035780.1\t3608\t.\tC\tT\t128\tLow\tDP=7;ADF=0,0;ADR=0,7;AD=0,7;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:7:0,0:0,7:0,7:0,0,4,0,0,7,0,0:0,0,37,0,0,35,0,0:0.000,1.000\n", "NC_035780.1\t8195\t.\tC\tT\t1000\tPASS\tDP=37;ADF=0,0;ADR=29,8;AD=29,8;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:37:0,0:29,8:29,8:0,0,2,0,0,8,29,0:0,0,37,0,0,37,37,0:0.784,0.216\n", "NC_035780.1\t25174\t.\tC\tT\t1000\tPASS\tDP=41;ADF=0,0;ADR=28,13;AD=28,13;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:41:0,0:28,13:28,13:0,0,24,0,0,13,28,0:0,0,36,0,0,35,35,0:0.683,0.317\n", "NC_035780.1\t26127\t.\tC\tT\t1000\tPASS\tDP=36;ADF=0,0;ADR=24,12;AD=24,12;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:36:0,0:24,12:24,12:0,0,20,0,0,12,24,0:0,0,36,0,0,36,37,0:0.667,0.333\n", "NC_035780.1\t30853\t.\tC\tT\t1000\tPASS\tDP=34;ADF=0,0;ADR=17,17;AD=17,17;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:34:0,0:17,17:17,17:0,0,57,0,0,17,17,0:0,0,37,0,0,37,36,0:0.500,0.500\n", "NC_035780.1\t32910\t.\tC\tT\t1000\tPASS\tDP=22;ADF=0,0;ADR=0,22;AD=0,22;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:22:0,0:0,22:0,22:0,0,28,0,0,22,0,0:0,0,37,0,0,36,0,0:0.000,1.000\n", "NC_035780.1\t48987\t.\tC\tT\t106\tPASS\tDP=24;ADF=0,0;ADR=18,6;AD=18,6;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:24:0,0:18,6:18,6:0,0,11,0,0,6,18,0:0,0,34,0,0,35,36,0:0.750,0.250\n", "NC_035780.1\t80493\t.\tC\tT\t67\tPASS\tDP=15;ADF=0,0;ADR=0,15;AD=0,15;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:15:0,0:0,15:0,15:0,0,3,0,0,15,0,0:0,0,37,0,0,35,0,0:0.000,1.000\n", "NC_035780.1\t80703\t.\tC\tT\t63\tLow\tDP=3;ADF=0,0;ADR=0,3;AD=0,3;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:3:0,0:0,3:0,3:0,0,5,0,0,3,0,0:0,0,35,0,0,37,0,0:0.000,1.000\n", "NC_035780.1\t86394\t.\tC\tT\t60\tLow\tDP=6;ADF=0,0;ADR=3,3;AD=3,3;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:6:0,0:3,3:3,3:0,0,5,0,0,3,3,0:0,0,35,0,0,37,37,0:0.500,0.500\n", " 62245 ../output/methylation-landscape/59M_R1_val_1_bismark_bt2_pe.SNP-results.vcf_CT-SNPs.tab\n", "NC_035780.1\t2841\t.\tC\tT\t156\tPASS\tDP=35;ADF=0,0;ADR=27,8;AD=27,8;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:35:0,0:27,8:27,8:0,0,13,0,0,8,27,0:0,0,37,0,0,37,37,0:0.771,0.229\n", "NC_035780.1\t3608\t.\tC\tT\t1000\tPASS\tDP=14;ADF=0,0;ADR=0,14;AD=0,14;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:14:0,0:0,14:0,14:0,0,14,0,0,14,0,0:0,0,35,0,0,36,0,0:0.000,1.000\n", "NC_035780.1\t28148\t.\tC\tT\t46\tLow\tDP=4;ADF=0,0;ADR=0,4;AD=0,4;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:4:0,0:0,4:0,4:0,0,24,0,0,4,0,0:0,0,36,0,0,37,0,0:0.000,1.000\n", "NC_035780.1\t32910\t.\tC\tT\t1000\tPASS\tDP=43;ADF=0,0;ADR=0,43;AD=0,43;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:43:0,0:0,43:0,43:0,0,37,0,0,43,0,0:0,0,36,0,0,37,0,0:0.000,1.000\n", "NC_035780.1\t48987\t.\tC\tT\t159\tPASS\tDP=17;ADF=0,0;ADR=11,6;AD=11,6;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:17:0,0:11,6:11,6:0,0,3,0,0,6,11,0:0,0,37,0,0,35,37,0:0.647,0.353\n", "NC_035780.1\t53163\t.\tC\tT\t47\tPASS\tDP=25;ADF=0,0;ADR=21,4;AD=21,4;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:25:0,0:21,4:21,4:0,0,4,0,0,4,21,0:0,0,37,0,0,37,36,0:0.840,0.160\n", "NC_035780.1\t62330\t.\tC\tT\t1000\tPASS\tDP=61;ADF=0,0;ADR=36,25;AD=36,25;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:61:0,0:36,25:36,25:0,0,1,0,0,25,36,0:0,0,37,0,0,37,37,0:0.590,0.410\n", "NC_035780.1\t65678\t.\tC\tT\t153\tPASS\tDP=16;ADF=0,0;ADR=7,9;AD=7,9;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:16:0,0:7,9:7,9:0,0,47,0,0,9,7,0:0,0,36,0,0,37,37,0:0.438,0.562\n", "NC_035780.1\t66859\t.\tC\tT\t1000\tPASS\tDP=32;ADF=0,0;ADR=18,14;AD=18,14;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:32:0,0:18,14:18,14:0,0,7,0,0,14,18,0:0,0,35,0,0,36,37,0:0.562,0.438\n", "NC_035780.1\t79381\t.\tC\tT\t1000\tPASS\tDP=24;ADF=0,0;ADR=0,24;AD=0,24;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:24:0,0:0,24:0,24:0,0,12,0,0,24,0,0:0,0,37,0,0,36,0,0:0.000,1.000\n", " 69451 ../output/methylation-landscape/64M_R1_val_1_bismark_bt2_pe.SNP-results.vcf_CT-SNPs.tab\n", "NC_035780.1\t32910\t.\tC\tT\t1000\tPASS\tDP=40;ADF=0,0;ADR=0,40;AD=0,40;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:40:0,0:0,40:0,40:0,0,17,0,0,40,0,0:0,0,34,0,0,34,0,0:0.000,1.000\n", "NC_035780.1\t80493\t.\tC\tT\t1000\tPASS\tDP=15;ADF=0,0;ADR=8,7;AD=8,7;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:15:0,0:8,7:8,7:0,0,13,0,0,7,8,0:0,0,35,0,0,35,36,0:0.533,0.467\n", "NC_035780.1\t80703\t.\tC\tT\t101\tLow\tDP=4;ADF=0,0;ADR=0,4;AD=0,4;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:4:0,0:0,4:0,4:0,0,7,0,0,4,0,0:0,0,37,0,0,31,0,0:0.000,1.000\n", "NC_035780.1\t83801\t.\tC\tT\t6\tLow\tDP=2;ADF=0,0;ADR=0,2;AD=0,2;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t1/1:2:0,0:0,2:0,2:0,0,0,0,0,2,0,0:0,0,0,0,0,37,0,0:0.000,1.000\n", "NC_035780.1\t90833\t.\tC\tT\t1000\tLow\tDP=7;ADF=0,0;ADR=0,7;AD=0,7;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:7:0,0:0,7:0,7:0,0,18,0,0,7,0,0:0,0,34,0,0,37,0,0:0.000,1.000\n", "NC_035780.1\t103481\t.\tC\tT\t1000\tPASS\tDP=19;ADF=0,0;ADR=0,19;AD=0,19;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:19:0,0:0,19:0,19:0,0,9,0,0,19,0,0:0,0,37,0,0,36,0,0:0.000,1.000\n", "NC_035780.1\t109517\t.\tC\tT\t1000\tPASS\tDP=23;ADF=0,0;ADR=0,23;AD=0,23;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:23:0,0:0,23:0,23:0,0,86,0,0,23,0,0:0,0,35,0,0,35,0,0:0.000,1.000\n", "NC_035780.1\t116331\t.\tC\tT\t6\tLow\tDP=2;ADF=0,0;ADR=0,2;AD=0,2;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t1/1:2:0,0:0,2:0,2:0,0,0,0,0,2,0,0:0,0,0,0,0,37,0,0:0.000,1.000\n", "NC_035780.1\t126029\t.\tC\tT\t1000\tLow\tDP=8;ADF=0,0;ADR=0,8;AD=0,8;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:8:0,0:0,8:0,8:0,0,24,0,0,8,0,0:0,0,37,0,0,37,0,0:0.000,1.000\n", "NC_035780.1\t134234\t.\tC\tT\t4\tLow\tDP=1;ADF=0,0;ADR=0,1;AD=0,1;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t1/1:1:0,0:0,1:0,1:0,0,0,0,0,1,0,0:0,0,0,0,0,37,0,0:0.000,1.000\n", " 63617 ../output/methylation-landscape/6M_R1_val_1_bismark_bt2_pe.SNP-results.vcf_CT-SNPs.tab\n", "NC_035780.1\t3608\t.\tC\tT\t1000\tLow\tDP=9;ADF=0,0;ADR=0,9;AD=0,9;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:9:0,0:0,9:0,9:0,0,8,0,0,9,0,0:0,0,37,0,0,34,0,0:0.000,1.000\n", "NC_035780.1\t32910\t.\tC\tT\t1000\tPASS\tDP=46;ADF=0,0;ADR=0,46;AD=0,46;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:46:0,0:0,46:0,46:0,0,29,0,0,46,0,0:0,0,36,0,0,37,0,0:0.000,1.000\n", "NC_035780.1\t41708\t.\tC\tT\t15\tPASS\tDP=18;ADF=0,0;ADR=15,3;AD=15,3;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:18:0,0:15,3:15,3:0,0,8,0,0,3,15,0:0,0,36,0,0,37,37,0:0.833,0.167\n", "NC_035780.1\t43313\t.\tC\tT\t130\tPASS\tDP=17;ADF=0,0;ADR=10,7;AD=10,7;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:17:0,0:10,7:10,7:0,0,26,0,0,7,10,0:0,0,34,0,0,35,37,0:0.588,0.412\n", "NC_035780.1\t48987\t.\tC\tT\t1000\tPASS\tDP=26;ADF=0,0;ADR=12,14;AD=12,14;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:26:0,0:12,14:12,14:0,0,19,0,0,14,12,0:0,0,36,0,0,36,37,0:0.462,0.538\n", "NC_035780.1\t63726\t.\tC\tT\t94\tPASS\tDP=26;ADF=0,0;ADR=20,6;AD=20,6;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:26:0,0:20,6:20,6:0,0,13,0,0,6,20,0:0,0,35,0,0,33,36,0:0.769,0.231\n", "NC_035780.1\t80493\t.\tC\tT\t1000\tPASS\tDP=17;ADF=0,0;ADR=0,17;AD=0,17;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:17:0,0:0,17:0,17:0,0,30,0,0,17,0,0:0,0,36,0,0,36,0,0:0.000,1.000\n", "NC_035780.1\t80664\t.\tC\tT\t1000\tPASS\tDP=11;ADF=0,0;ADR=0,11;AD=0,11;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:11:0,0:0,11:0,11:0,0,10,0,0,11,0,0:0,0,36,0,0,36,0,0:0.000,1.000\n", "NC_035780.1\t83801\t.\tC\tT\t6\tLow\tDP=2;ADF=0,0;ADR=0,2;AD=0,2;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t1/1:2:0,0:0,2:0,2:0,0,0,0,0,2,0,0:0,0,0,0,0,37,0,0:0.000,1.000\n", "NC_035780.1\t84499\t.\tC\tT\t1000\tPASS\tDP=11;ADF=0,0;ADR=3,8;AD=3,8;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:11:0,0:3,8:3,8:0,0,3,0,0,8,3,0:0,0,37,0,0,36,37,0:0.273,0.727\n", " 68750 ../output/methylation-landscape/76F_R1_val_1_bismark_bt2_pe.SNP-results.vcf_CT-SNPs.tab\n", "NC_035780.1\t9051\t.\tC\tT\t1000\tPASS\tDP=16;ADF=0,0;ADR=0,16;AD=0,16;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:16:0,0:0,16:0,16:0,0,13,0,0,16,0,0:0,0,34,0,0,36,0,0:0.000,1.000\n", "NC_035780.1\t26127\t.\tC\tT\t1000\tPASS\tDP=33;ADF=0,0;ADR=17,16;AD=17,16;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:33:0,0:17,16:17,16:0,0,33,0,0,16,17,0:0,0,36,0,0,36,37,0:0.515,0.485\n", "NC_035780.1\t31025\t.\tC\tT\t1000\tPASS\tDP=42;ADF=0,0;ADR=16,26;AD=16,26;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:42:0,0:16,26:16,26:0,0,78,0,0,26,16,0:0,0,37,0,0,35,37,0:0.381,0.619\n", "NC_035780.1\t31308\t.\tC\tT\t1000\tPASS\tDP=49;ADF=0,0;ADR=27,22;AD=27,22;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:49:0,0:27,22:27,22:0,0,53,0,0,22,27,0:0,0,37,0,0,36,37,0:0.551,0.449\n", "NC_035780.1\t32910\t.\tC\tT\t1000\tPASS\tDP=44;ADF=0,0;ADR=0,44;AD=0,44;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:44:0,0:0,44:0,44:0,0,32,0,0,44,0,0:0,0,36,0,0,36,0,0:0.000,1.000\n", "NC_035780.1\t63726\t.\tC\tT\t26\tPASS\tDP=18;ADF=0,0;ADR=0,18;AD=0,18;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:18:0,0:0,18:0,18:1,0,2,0,0,18,0,0:37,0,37,0,0,37,0,0:0.000,1.000\n", "NC_035780.1\t65678\t.\tC\tT\t1000\tPASS\tDP=17;ADF=0,0;ADR=8,9;AD=8,9;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:17:0,0:8,9:8,9:0,0,26,0,0,9,8,0:0,0,36,0,0,37,37,0:0.471,0.529\n", "NC_035780.1\t66859\t.\tC\tT\t1000\tPASS\tDP=18;ADF=0,0;ADR=8,10;AD=8,10;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:18:0,0:8,10:8,10:0,0,4,0,0,10,8,0:0,0,37,0,0,37,37,0:0.444,0.556\n", "NC_035780.1\t79381\t.\tC\tT\t1000\tPASS\tDP=39;ADF=0,0;ADR=12,27;AD=12,27;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:39:0,0:12,27:12,27:0,0,7,0,0,27,12,0:0,0,34,0,0,36,37,0:0.308,0.692\n", "NC_035780.1\t80493\t.\tC\tT\t1000\tPASS\tDP=29;ADF=0,0;ADR=0,29;AD=0,29;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:29:0,0:0,29:0,29:0,0,23,0,0,29,0,0:0,0,37,0,0,36,0,0:0.000,1.000\n", " 71886 ../output/methylation-landscape/77F_R1_val_1_bismark_bt2_pe.SNP-results.vcf_CT-SNPs.tab\n", "NC_035780.1\t26154\t.\tC\tT\t1000\tPASS\tDP=43;ADF=0,0;ADR=26,17;AD=26,17;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:43:0,0:26,17:26,17:0,0,35,0,0,17,26,0:0,0,36,0,0,35,37,0:0.605,0.395\n", "NC_035780.1\t31025\t.\tC\tT\t1000\tPASS\tDP=43;ADF=0,0;ADR=19,24;AD=19,24;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:43:0,0:19,24:19,24:0,0,38,0,0,24,19,0:0,0,37,0,0,37,37,0:0.442,0.558\n", "NC_035780.1\t31035\t.\tC\tT\t1000\tPASS\tDP=42;ADF=0,0;ADR=24,18;AD=24,18;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:42:0,0:24,18:24,18:0,0,38,1,0,18,24,0:0,0,34,25,0,36,36,0:0.571,0.429\n", "NC_035780.1\t32616\t.\tC\tT\t125\tLow\tDP=9;ADF=0,0;ADR=4,5;AD=4,5;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:9:0,0:4,5:4,5:0,0,8,0,0,5,4,0:0,0,37,0,0,37,37,0:0.444,0.556\n", "NC_035780.1\t32910\t.\tC\tT\t1000\tPASS\tDP=37;ADF=0,0;ADR=0,37;AD=0,37;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:37:0,0:0,37:0,37:0,0,31,0,0,37,0,0:0,0,37,0,0,36,0,0:0.000,1.000\n", "NC_035780.1\t39712\t.\tC\tT\t41\tPASS\tDP=15;ADF=0,0;ADR=10,5;AD=10,5;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:15:0,0:10,5:10,5:0,0,30,0,0,5,10,0:0,0,37,0,0,37,35,0:0.667,0.333\n", "NC_035780.1\t45432\t.\tC\tT\t6\tLow\tDP=9;ADF=0,0;ADR=5,4;AD=5,4;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:9:0,0:5,4:5,4:0,0,34,0,0,4,5,0:0,0,36,0,0,37,37,0:0.556,0.444\n", "NC_035780.1\t62330\t.\tC\tT\t58\tPASS\tDP=50;ADF=0,0;ADR=44,6;AD=44,6;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:50:0,0:44,6:44,6:0,0,3,0,0,6,44,0:0,0,37,0,0,37,37,0:0.880,0.120\n", "NC_035780.1\t63726\t.\tC\tT\t82\tLow\tDP=9;ADF=0,0;ADR=0,9;AD=0,9;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:9:0,0:0,9:0,9:0,0,3,0,0,9,0,0:0,0,37,0,0,34,0,0:0.000,1.000\n", "NC_035780.1\t79381\t.\tC\tT\t50\tPASS\tDP=22;ADF=0,0;ADR=18,4;AD=18,4;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:22:0,0:18,4:18,4:0,0,6,0,0,4,18,0:0,0,37,0,0,37,36,0:0.818,0.182\n", " 67375 ../output/methylation-landscape/7M_R1_val_1_bismark_bt2_pe.SNP-results.vcf_CT-SNPs.tab\n", "NC_035780.1\t27826\t.\tC\tT\t91\tPASS\tDP=12;ADF=0,0;ADR=6,6;AD=6,6;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:12:0,0:6,6:6,6:0,0,30,0,0,6,6,0:0,0,37,0,0,35,37,0:0.500,0.500\n", "NC_035780.1\t31025\t.\tC\tT\t1000\tPASS\tDP=52;ADF=0,0;ADR=34,18;AD=34,18;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:52:0,0:34,18:34,18:0,0,41,0,0,18,34,0:0,0,36,0,0,35,37,0:0.654,0.346\n", "NC_035780.1\t32910\t.\tC\tT\t1000\tPASS\tDP=21;ADF=0,0;ADR=0,21;AD=0,21;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:21:0,0:0,21:0,21:0,0,20,0,0,21,0,0:0,0,37,0,0,37,0,0:0.000,1.000\n", "NC_035780.1\t39712\t.\tC\tT\t1000\tPASS\tDP=31;ADF=0,0;ADR=22,9;AD=22,9;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:31:0,0:22,9:22,9:0,0,21,0,0,9,22,0:0,0,36,0,0,34,36,0:0.710,0.290\n", "NC_035780.1\t53163\t.\tC\tT\t1000\tPASS\tDP=25;ADF=0,0;ADR=14,11;AD=14,11;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:25:0,0:14,11:14,11:0,0,0,0,0,11,14,0:0,0,0,0,0,34,36,0:0.560,0.440\n", "NC_035780.1\t63726\t.\tC\tT\t1000\tPASS\tDP=11;ADF=0,0;ADR=0,11;AD=0,11;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:11:0,0:0,11:0,11:1,0,8,0,0,11,0,0:37,0,37,0,0,36,0,0:0.000,1.000\n", "NC_035780.1\t65273\t.\tC\tT\t1000\tPASS\tDP=25;ADF=0,0;ADR=16,9;AD=16,9;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:25:0,0:16,9:16,9:0,0,20,0,0,9,16,0:0,0,36,0,0,37,36,0:0.640,0.360\n", "NC_035780.1\t65497\t.\tC\tT\t1000\tPASS\tDP=16;ADF=0,0;ADR=6,10;AD=6,10;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:16:0,0:6,10:6,10:0,0,6,0,0,10,6,0:0,0,37,0,0,37,37,0:0.375,0.625\n", "NC_035780.1\t66859\t.\tC\tT\t7\tPASS\tDP=17;ADF=0,0;ADR=14,3;AD=14,3;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:17:0,0:14,3:14,3:0,0,12,0,0,3,14,0:0,0,37,0,0,33,34,0:0.824,0.176\n", "NC_035780.1\t80493\t.\tC\tT\t45\tPASS\tDP=11;ADF=0,0;ADR=0,11;AD=0,11;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:11:0,0:0,11:0,11:0,0,2,0,0,11,0,0:0,0,37,0,0,36,0,0:0.000,1.000\n", " 64948 ../output/methylation-landscape/9M_R1_val_1_bismark_bt2_pe.SNP-results.vcf_CT-SNPs.tab\n" ] } ], "source": [ "%%bash\n", "\n", "for f in ../output/methylation-landscape/*vcf\n", "do\n", " /opt/homebrew/bin/intersectBed \\\n", " -u \\\n", " -a ${f} \\\n", " -b C_virginica-3.0-fuzznuc-CGmotif.gff \\\n", " | grep \"C\tT\" \\\n", " > ${f}_CT-SNPs.tab\n", " head ${f}_CT-SNPs.tab\n", " wc -l ${f}_CT-SNPs.tab\n", "done" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [], "source": [ "%%bash\n", "\n", "for f in ../output/methylation-landscape/*CT-SNPs.tab\n", "do\n", " [ -f ${f} ] || continue\n", " mv \"${f}\" \"${f//_R1_val_1_bismark_bt2_pe.SNP-results.vcf/}\"\n", "\n", "done" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "../output/methylation-landscape/12M_CT-SNPs.tab\r\n", "../output/methylation-landscape/13M_CT-SNPs.tab\r\n", "../output/methylation-landscape/16F_CT-SNPs.tab\r\n", "../output/methylation-landscape/19F_CT-SNPs.tab\r\n", "../output/methylation-landscape/22F_CT-SNPs.tab\r\n", "../output/methylation-landscape/23M_CT-SNPs.tab\r\n", "../output/methylation-landscape/29F_CT-SNPs.tab\r\n", "../output/methylation-landscape/31M_CT-SNPs.tab\r\n", "../output/methylation-landscape/35F_CT-SNPs.tab\r\n", "../output/methylation-landscape/36F_CT-SNPs.tab\r\n", "../output/methylation-landscape/39F_CT-SNPs.tab\r\n", "../output/methylation-landscape/3F_CT-SNPs.tab\r\n", "../output/methylation-landscape/41F_CT-SNPs.tab\r\n", "../output/methylation-landscape/44F_CT-SNPs.tab\r\n", "../output/methylation-landscape/48M_CT-SNPs.tab\r\n", "../output/methylation-landscape/50F_CT-SNPs.tab\r\n", "../output/methylation-landscape/52F_CT-SNPs.tab\r\n", "../output/methylation-landscape/53F_CT-SNPs.tab\r\n", "../output/methylation-landscape/54F_CT-SNPs.tab\r\n", "../output/methylation-landscape/59M_CT-SNPs.tab\r\n", "../output/methylation-landscape/64M_CT-SNPs.tab\r\n", "../output/methylation-landscape/6M_CT-SNPs.tab\r\n", "../output/methylation-landscape/76F_CT-SNPs.tab\r\n", "../output/methylation-landscape/77F_CT-SNPs.tab\r\n", "../output/methylation-landscape/7M_CT-SNPs.tab\r\n", "../output/methylation-landscape/9M_CT-SNPs.tab\r\n" ] } ], "source": [ "!find ../output/methylation-landscape/*CT-SNPs.tab" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "NC_035780.1\t32910\t.\tC\tT\t1000\tPASS\tDP=56;ADF=0,0;ADR=35,21;AD=35,21;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:56:0,0:35,21:35,21:0,0,28,0,0,21,35,0:0,0,36,0,0,36,36,0:0.625,0.375\n", "NC_035780.1\t33152\t.\tC\tT\t1000\tPASS\tDP=50;ADF=0,0;ADR=25,25;AD=25,25;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:50:0,0:25,25:25,25:0,0,39,0,0,25,25,0:0,0,35,0,0,36,37,0:0.500,0.500\n", "NC_035780.1\t39853\t.\tC\tT\t1000\tPASS\tDP=80;ADF=0,0;ADR=48,32;AD=48,32;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:80:0,0:48,32:48,32:0,0,56,0,0,32,48,0:0,0,37,0,0,36,36,0:0.600,0.400\n", "NC_035780.1\t80456\t.\tC\tT\t108\tPASS\tDP=37;ADF=0,0;ADR=28,9;AD=28,9;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:37:0,0:28,9:28,9:0,0,38,0,0,9,28,0:0,0,36,0,0,36,36,0:0.757,0.243\n", "NC_035780.1\t80703\t.\tC\tT\t1000\tPASS\tDP=25;ADF=0,0;ADR=0,25;AD=0,25;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:25:0,0:0,25:0,25:0,0,14,0,0,25,0,0:0,0,36,0,0,37,0,0:0.000,1.000\n", "NC_035780.1\t90833\t.\tC\tT\t1000\tLow\tDP=7;ADF=0,0;ADR=0,7;AD=0,7;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:7:0,0:0,7:0,7:0,0,23,0,0,7,0,0:0,0,36,0,0,37,0,0:0.000,1.000\n", "NC_035780.1\t92393\t.\tC\tT\t1000\tPASS\tDP=47;ADF=0,0;ADR=19,28;AD=19,28;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:47:0,0:19,28:19,28:0,0,25,0,0,28,19,0:0,0,37,0,0,34,37,0:0.404,0.596\n", "NC_035780.1\t92606\t.\tC\tT\t15\tPASS\tDP=15;ADF=0,0;ADR=12,3;AD=12,3;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:15:0,0:12,3:12,3:0,0,11,0,0,3,12,0:0,0,35,0,0,37,37,0:0.800,0.200\n", "NC_035780.1\t102123\t.\tC\tT\t4\tPASS\tDP=23;ADF=0,0;ADR=20,3;AD=20,3;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:23:0,0:20,3:20,3:0,0,7,0,0,3,20,0:0,0,37,0,0,33,36,0:0.870,0.130\n", "NC_035780.1\t107783\t.\tC\tT\t1000\tPASS\tDP=25;ADF=0,0;ADR=14,11;AD=14,11;\tGT:DP:ADF:ADR:AD:BSD:BSQ:ALFR\t0/1:25:0,0:14,11:14,11:0,0,28,0,0,11,14,0:0,0,36,0,0,35,35,0:0.560,0.440\n", " 1753029 all-CT-SNPs.tab\n" ] } ], "source": [ "#Combine C/T SNPs into one file\n", "!cat ../output/methylation-landscape/*CT-SNPs.tab >> all-CT-SNPs.tab\n", "!head all-CT-SNPs.tab\n", "!wc -l all-CT-SNPs.tab" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "NC_007175.2\t12774\t.\tC\tT\n", "NC_007175.2\t15486\t.\tC\tT\n", "NC_007175.2\t16441\t.\tC\tT\n", "NC_007175.2\t2608\t.\tC\tT\n", "NC_007175.2\t6075\t.\tC\tT\n", "NC_007175.2\t6169\t.\tC\tT\n", "NC_007175.2\t6742\t.\tC\tT\n", "NC_007175.2\t7069\t.\tC\tT\n", "NC_007175.2\t7089\t.\tC\tT\n", "NC_007175.2\t7898\t.\tC\tT\n", " 517245 unique-CT-SNPs.tab\n" ] } ], "source": [ "#Take columns 1-5\n", "#Sort combined C/T SNPs\n", "#Only keep unique SNPs\n", "!awk '{print $1\"\\t\"$2\"\\t\"$3\"\\t\"$4\"\\t\"$5}' all-CT-SNPs.tab \\\n", "| sort \\\n", "| uniq \\\n", "> unique-CT-SNPs.tab\n", "!head unique-CT-SNPs.tab\n", "!wc -l unique-CT-SNPs.tab" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 3. CG motifs" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 3a. Count CGs" ] }, { "cell_type": "code", "execution_count": 73, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 14277725\r\n" ] } ], "source": [ "#Obtain a rough count of CGs in the genome\n", "!fgrep -o -i CG GCF_002022765.2_C_virginica-3.0_genomic.fna \\\n", "| wc -l" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Generated CG motif track with `fuzznuc` on [Galaxy](usegalaxy.org)\n", "\n", "![Screen Shot 2022-05-11 at 6 58 51 PM](https://user-images.githubusercontent.com/22335838/167960980-73121d80-0aff-45e3-a156-febef79bc2d3.png)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "##gff-version 2.0\r\n", "##date 2022-05-11\r\n", "##Type DNA NC_035780.1\r\n", "NC_035780.1\tfuzznuc\tmisc_feature\t29\t30\t2.000\t+\t.\tSequence \"NC_035780.1.1\" ; note \"*pat pattern1\"\r\n", "NC_035780.1\tfuzznuc\tmisc_feature\t55\t56\t2.000\t+\t.\tSequence \"NC_035780.1.2\" ; note \"*pat pattern1\"\r\n", "NC_035780.1\tfuzznuc\tmisc_feature\t76\t77\t2.000\t+\t.\tSequence \"NC_035780.1.3\" ; note \"*pat pattern1\"\r\n", "NC_035780.1\tfuzznuc\tmisc_feature\t94\t95\t2.000\t+\t.\tSequence \"NC_035780.1.4\" ; note \"*pat pattern1\"\r\n", "NC_035780.1\tfuzznuc\tmisc_feature\t104\t105\t2.000\t+\t.\tSequence \"NC_035780.1.5\" ; note \"*pat pattern1\"\r\n", "NC_035780.1\tfuzznuc\tmisc_feature\t117\t118\t2.000\t+\t.\tSequence \"NC_035780.1.6\" ; note \"*pat pattern1\"\r\n", "NC_035780.1\tfuzznuc\tmisc_feature\t135\t136\t2.000\t+\t.\tSequence \"NC_035780.1.7\" ; note \"*pat pattern1\"\r\n" ] } ], "source": [ "#Check Galaxy output\n", "!head C_virginica-3.0-fuzznuc-CGmotif.gff\n", "!wc -l C_virginica-3.0-fuzznuc-CGmotif.gff" ] }, { "cell_type": "markdown", "metadata": { "collapsed": true }, "source": [ "### 3b. Count CG overlaps with all genome feature tracks" ] }, { "cell_type": "code", "execution_count": 75, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 7778105\r\n" ] } ], "source": [ "#Genes\n", "!{bedtoolsDirectory}/intersectBed \\\n", "-u \\\n", "-a C_virginica-3.0-fuzznuc-CGmotif.gff \\\n", "-b C_virginica-3.0-gene.gff \\\n", "| wc -l" ] }, { "cell_type": "code", "execution_count": 76, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 1728303\r\n" ] } ], "source": [ "#CDS\n", "!{bedtoolsDirectory}/intersectBed \\\n", "-u \\\n", "-a C_virginica-3.0-fuzznuc-CGmotif.gff \\\n", "-b C_virginica-3.0-CDS.gff \\\n", "| wc -l" ] }, { "cell_type": "code", "execution_count": 77, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 2334303\r\n" ] } ], "source": [ "#Exon\n", "!{bedtoolsDirectory}/intersectBed \\\n", "-u \\\n", "-a C_virginica-3.0-fuzznuc-CGmotif.gff \\\n", "-b C_virginica-3.0-exon.gff \\\n", "| wc -l" ] }, { "cell_type": "code", "execution_count": 78, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 281715\r\n" ] } ], "source": [ "#lncRNA\n", "!{bedtoolsDirectory}/intersectBed \\\n", "-u \\\n", "-a C_virginica-3.0-fuzznuc-CGmotif.gff \\\n", "-b C_virginica-3.0-lncRNA.gff \\\n", "| wc -l" ] }, { "cell_type": "code", "execution_count": 79, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 7507167\r\n" ] } ], "source": [ "#mRNA\n", "!{bedtoolsDirectory}/intersectBed \\\n", "-u \\\n", "-a C_virginica-3.0-fuzznuc-CGmotif.gff \\\n", "-b C_virginica-3.0-mRNA.gff \\\n", "| wc -l" ] }, { "cell_type": "code", "execution_count": 80, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 12138514\r\n" ] } ], "source": [ "#nonCDS\n", "!{bedtoolsDirectory}/intersectBed \\\n", "-u \\\n", "-a C_virginica-3.0-fuzznuc-CGmotif.gff \\\n", "-b C_virginica-3.0-nonCDS.bed \\\n", "| wc -l" ] }, { "cell_type": "code", "execution_count": 81, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 5497597\r\n" ] } ], "source": [ "#Introns\n", "!{bedtoolsDirectory}/intersectBed \\\n", "-u \\\n", "-a C_virginica-3.0-fuzznuc-CGmotif.gff \\\n", "-b C_virginica-3.0-intron.bed \\\n", "| wc -l" ] }, { "cell_type": "code", "execution_count": 82, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 606308\r\n" ] } ], "source": [ "#Exon UTR\n", "!{bedtoolsDirectory}/intersectBed \\\n", "-u \\\n", "-a C_virginica-3.0-fuzznuc-CGmotif.gff \\\n", "-b C_virginica-3.0-exonUTR.gff \\\n", "| wc -l" ] }, { "cell_type": "code", "execution_count": 83, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 694265\r\n" ] } ], "source": [ "#Upstream flanks\n", "!{bedtoolsDirectory}/intersectBed \\\n", "-u \\\n", "-a C_virginica-3.0-fuzznuc-CGmotif.gff \\\n", "-b C_virginica-3.0-upstream.gff \\\n", "| wc -l" ] }, { "cell_type": "code", "execution_count": 84, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 616684\r\n" ] } ], "source": [ "#Downstream flanks\n", "!{bedtoolsDirectory}/intersectBed \\\n", "-u \\\n", "-a C_virginica-3.0-fuzznuc-CGmotif.gff \\\n", "-b C_virginica-3.0-downstream.gff \\\n", "| wc -l" ] }, { "cell_type": "code", "execution_count": 85, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 5417334\r\n" ] } ], "source": [ "#Intergenic regions\n", "!{bedtoolsDirectory}/intersectBed \\\n", "-u \\\n", "-a C_virginica-3.0-fuzznuc-CGmotif.gff \\\n", "-b C_virginica-3.0-intergenic.bed \\\n", "| wc -l" ] }, { "cell_type": "code", "execution_count": 86, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 611471\r\n" ] } ], "source": [ "#Transposable elements\n", "!{bedtoolsDirectory}/intersectBed \\\n", "-u \\\n", "-a C_virginica-3.0-fuzznuc-CGmotif.gff \\\n", "-b C_virginica-3.0-rm.te.bed \\\n", "| wc -l" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "anaconda-cloud": {}, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.4" } }, "nbformat": 4, "nbformat_minor": 1 }