{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "Giles did a reassembly on raw Oly ngs reads. \n", "\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "![](http://eagle.fish.washington.edu/cnidarian/skitch/Transcriptome_assembly_methods_-_roberts_sbr_gmail_com_-_Gmail_1B3CA0E5.png)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "First step was cating\n", "\n", "```\n", "# Paired folder\n", "cat *_R2.fastq >> all.R2.fastq\n", "cat filtered_106A_Female_Mix_GATCAG_L004_R1.fastq \\\n", " filtered_106A_Male_Mix_TAGCTT_L004_R1.fastq \\\n", " filtered_108A_Female_Mix_GGCTAC_L004_R1.fastq >> all.R1.fastq\n", "\n", "# Unpaired folder\n", "cat *_R1.fastq >> tmp.R1.fastq\n", "\n", "# Then combining the two R1s\n", "cat tmp.R1.fastq >> all.R1.fastq\n", " \n", "```" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "---\n", "\n", "```\n", "\n", "#!/bin/bash\n", "\n", "export PATH=/share/bioinformatics/samtools-1.2:${PATH}\n", "export PATH=/share/bioinformatics/bowtie2:${PATH}\n", "export PATH=/share/bioinformatics/bowtie:${PATH}\n", "export TRIN=/share/bioinformatics/trinityrnaseq-2.0.6\n", "\n", "${TRIN}/Trinity \\\n", " --seqType fq \\\n", " --max_memory 44G \\\n", " --left all.R1.fastq \\\n", " --right all.R2.fastq \\\n", " --CPU 6 \\\n", " --normalize_reads\n", "``` " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Results" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "```\n", "\n", "################################\n", "## Counts of transcripts, etc.\n", "################################\n", "Total trinity 'genes': 129057\n", "Total trinity transcripts: 213990\n", "Percent GC: 39.07\n", "\n", "########################################\n", "Stats based on ALL transcript contigs:\n", "########################################\n", "\n", " Contig N10: 5067\n", " Contig N20: 3576\n", " Contig N30: 2694\n", " Contig N40: 2075\n", " Contig N50: 1587\n", "\n", " Median contig length: 446\n", " Average contig: 886.10\n", " Total assembled bases: 189616017\n", "\n", "\n", "#####################################################\n", "## Stats based on ONLY LONGEST ISOFORM per 'GENE':\n", "#####################################################\n", "\n", " Contig N10: 4760\n", " Contig N20: 3213\n", " Contig N30: 2326\n", " Contig N40: 1709\n", " Contig N50: 1200\n", "\n", " Median contig length: 372\n", " Average contig: 722.55\n", " Total assembled bases: 93250071\n", " \n", "``` \n" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ ">TR1|c0_g1_i1 len=489 path=[467:0-488] [-1, 467, -2]\r\n", "GTTTTGACCAACGAGAGTCGGTACTGAAGATGATCACTTGAAATAAAATTAAATGATAAC\r\n", "GTTTTCTTAAAACCCTTCATGTCGGCGTGATACATCCGTAGTTGAACGCCGTCACGAAAG\r\n", "GTAAGCCAAAGGGGGGTGTTGCAGTCCATACCCACACGTATAATTTATTCCATAAATAGT\r\n", "AGCAGCATAAAGCACACAATGATAATTTAAAATACGTTTCATTCACCAATGAATTAAGTA\r\n", "AGAATTGGTGCTTTTCCCTAGGTCTATTTGTCTGATCTTTTGTATGTCTGTTTATTGCTA\r\n", "AGTTGTTTGAGCCACCTCTTTTCCACACCTTATTTTGCTTACTCCTAGACACCTGATCCC\r\n", "AACTCTGCTATATCCACAGGTCCGTGCATGCGCTACACTTAATATTGTATTTCTATGGGA\r\n", "AACATTAGATAAATCACTGTTCATAAGAATAATGCTGTTTATATGTACAATGTACTTTCT\r\n", "TAATAAAAC\r\n", ">TR1|c1_g1_i1 len=430 path=[815:0-429] [-1, 815, -2]\r\n", "GGCAAACACAGATCCCTGGACGCACTACAGGTGGGATCAGGTGTCTAGGAGTAAGTAGCC\r\n", "CCTGTCGACAGGTTATATCCGCCGTGAGTCCCATATTTTGATCGGGTAAACGGAGCAATT\r\n", "GTGATAAAAATTAGTATATACATGTATAGAACGGTCTAACAATTGCAATGAAACCCGTCA\r\n", "GACAGCATTTGGTCCAATGGCGGGTTGTATTGGCAAACTAGATCTTTATAACGACCATAA\r\n", "CATTTCCGAAATGTGGATATTAACCGAAACTGTAGAAACCCCTGTAACACCACCTTGTTT\r\n", "GTCAGTAGCATGTCTCGACTTCAAACTAATCCTGCGCAGAACAAGCTCTTGCGTATTTAG\r\n", "TACGTTTAGAGACATACACACCTTATGCGGGTAATGATGGAATATTGCTGCATGGGGGTG\r\n", "GGAGGTTGAC\r\n", ">TR2|c0_g1_i1 len=254 path=[497:0-145 498:146-163 499:164-253] [-1, 497, 498, 499, -2]\r\n" ] } ], "source": [ "!head -20 /Users/sr320/data-genomic/tentacle/OlyO-v4-transcriptome.fasta" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "213990\r\n" ] } ], "source": [ "!fgrep -c \">\" /Users/sr320/data-genomic/tentacle/OlyO-v4-transcriptome.fasta" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.0" } }, "nbformat": 4, "nbformat_minor": 0 }