# Please cite: Mario Stanke, Mark Diekhans, Robert Baertsch, David Haussler (2008), # Using native and syntenically mapped cDNA alignments to improve de novo gene finding # Bioinformatics 24: 637-644, doi 10.1093/bioinformatics/btn013 # No extrinsic information on sequences given. # Initialising the parameters using config directory /gscratch/scrubbed/samwhite/outputs/20200924_cbai_genome_v1.01_busco/cbai_genome_v1.01.fasta_augustus/config/ ... # Using protein profile unknown # --[3..23]--> unknown_A (33) <--[1..3]--> unknown_B (23) <--[0..1]--> unknown_C (16) <--[0..1]--> unknown_D (19) <--[0..38]--> unknown_E (62) <--[2..21]--> unknown_F (57) <--[0..9]--> unknown_G (11) <--[0..1]--> unknown_H (9) <--[0..4]--> unknown_I (7) <--[0..1]--> unknown_J (6) <--[0..2]--> unknown_K (8) <--[0..17]--> unknown_L (14) <--[0..1]--> unknown_M (16) <--[1..3]--> unknown_N (12) <--[0..1]--> unknown_O (22) <--[6..23]--> unknown_P (8) <--[9..99]-- # fly version. Using default transition matrix. # Looks like ./tmp/contig_2847cbai_genome_v1.01.fasta_145855183_.temp is in fasta format. # We have hints for 0 sequences and for 0 of the sequences in the input set. # # ----- prediction on sequence number 1 (length = 6082, name = contig_2847) ----- # # Predicted genes for sequence number 1 on both strands # start gene g1 contig_2847 AUGUSTUS gene 1516 3330 0.04 + . g1 contig_2847 AUGUSTUS transcript 1516 3330 0.04 + . g1.t1 contig_2847 AUGUSTUS exon 1516 2248 . + . transcript_id "g1.t1"; gene_id "g1"; contig_2847 AUGUSTUS start_codon 1847 1849 . + 0 transcript_id "g1.t1"; gene_id "g1"; contig_2847 AUGUSTUS intron 2249 2355 0.99 + . transcript_id "g1.t1"; gene_id "g1"; contig_2847 AUGUSTUS CDS 1847 2248 0.81 + 0 transcript_id "g1.t1"; gene_id "g1"; contig_2847 AUGUSTUS CDS 2356 3204 0.99 + 0 transcript_id "g1.t1"; gene_id "g1"; contig_2847 AUGUSTUS exon 2356 3330 . + . transcript_id "g1.t1"; gene_id "g1"; contig_2847 AUGUSTUS stop_codon 3202 3204 . + 0 transcript_id "g1.t1"; gene_id "g1"; contig_2847 AUGUSTUS tts 3330 3330 . + . transcript_id "g1.t1"; gene_id "g1"; # coding sequence = [atgcagacctcacggaaatattcctgcagggtgaaaacttcctacgccctcaggtggccgacgagcgtccaccttcacg # gcgaggcagcaggcggctgcttcggctgtttcaggaagaccttcgtgccggtggcgtgcatcaccgtgccgcccgacacgtggcagatgctccccggg # cacacgtgggaacacgagaagtgggtaaacacggaggccgagctccgcatcgacttcgggaagctggacgccatggacggcacgggacgtcgcctgaa # ggtgaaggcgaagcgcgggcagaagaccattgccaagggcaccttcgcaaacatcttcgacaacgttgaagatggcgactttgcgtttggtattgcgt # ttggcaaggatggcttatcgctgtccttgagggaggggaacaaaatggatgtacttccacccccgtcgatgtattatgaagaagaaagcagcgacgag # gagaatgtccaggatgacggattctgtgacatttctcttgacgacacaagtgacgacgacggcaacggcctctcgtctccgctggttgagacaagcga # cagtctcaaccagcagcttcagttgccgaccttctccatggtgcccgagacacccttgcctggacgtcaggacagcctcatgccctcgctgcccgccc # tgctgtggttcgccaaccaggcagtggccgtggcggcaccggctgtgtgcgtgtacggcgcggtggtgccactgctggaagacaccgtgcccttcctg # gctggcctgtcctcgcgccctgatgaggtgacgagcgtggtgcaggcggcgtgcgtggccttcgtgctggtcacgacaggctattggtacgctgccct # ggtggcgcggccctcctgcaggcggcggcacaggcagcgggataagtactgtgcggcctgcgaggaggaggtgccggtggacgagtggcactgcgagg # tgtgcggcgagtgcgtggagggcagacagcaccactcggtgctgctgaaccggtgcgtgggtgccgccaacgtggcgctttacaggacgctgatgaag # ctcctgaccactggcgccgcggcggtgctggtgctgttcctcctggaggcggtgatgggccagccgtggcgcctcctccaggtcgggctgttgttgtg # ctgtgccctgggctcggcacactgccgcaacgccgcggccaaagagcgagccaggcaggcttgtcgcggtgactggtccctgctcgaggagtga] # protein sequence = [MQTSRKYSCRVKTSYALRWPTSVHLHGEAAGGCFGCFRKTFVPVACITVPPDTWQMLPGHTWEHEKWVNTEAELRIDF # GKLDAMDGTGRRLKVKAKRGQKTIAKGTFANIFDNVEDGDFAFGIAFGKDGLSLSLREGNKMDVLPPPSMYYEEESSDEENVQDDGFCDISLDDTSDD # DGNGLSSPLVETSDSLNQQLQLPTFSMVPETPLPGRQDSLMPSLPALLWFANQAVAVAAPAVCVYGAVVPLLEDTVPFLAGLSSRPDEVTSVVQAACV # AFVLVTTGYWYAALVARPSCRRRHRQRDKYCAACEEEVPVDEWHCEVCGECVEGRQHHSVLLNRCVGAANVALYRTLMKLLTTGAAAVLVLFLLEAVM # GQPWRLLQVGLLLCCALGSAHCRNAAAKERARQACRGDWSLLEE] # end gene g1 ### # start gene g2 contig_2847 AUGUSTUS gene 3526 5576 0.03 + . g2 contig_2847 AUGUSTUS transcript 3526 5576 0.03 + . g2.t1 contig_2847 AUGUSTUS tss 3526 3526 . + . transcript_id "g2.t1"; gene_id "g2"; contig_2847 AUGUSTUS exon 3526 5576 . + . transcript_id "g2.t1"; gene_id "g2"; contig_2847 AUGUSTUS start_codon 4665 4667 . + 0 transcript_id "g2.t1"; gene_id "g2"; contig_2847 AUGUSTUS CDS 4665 4868 0.82 + 0 transcript_id "g2.t1"; gene_id "g2"; contig_2847 AUGUSTUS stop_codon 4866 4868 . + 0 transcript_id "g2.t1"; gene_id "g2"; contig_2847 AUGUSTUS tts 5576 5576 . + . transcript_id "g2.t1"; gene_id "g2"; # coding sequence = [atgatgatgataataatgataatgatgaaaagaatagtgataatgataataaggatccatttagtgttcaataaagata # gaaagcttgatatcttctttacatctctcgatcatcgctttaaggactggctgctaagggtttttattgtgaggcttatgacttttattatgcccctg # gccagtccgttgcgcatgtcaggatga] # protein sequence = [MMMIIMIMMKRIVIMIIRIHLVFNKDRKLDIFFTSLDHRFKDWLLRVFIVRLMTFIMPLASPLRMSG] # end gene g2 ### # command line: # /gscratch/srlab/programs/Augustus-3.3.2/bin/augustus --codingseq=1 --proteinprofile=/gscratch/srlab/sam/data/databases/BUSCO/metazoa_odb9/prfl/EOG091G0HOK.prfl --predictionStart=0 --predictionEnd=8051 --species=fly --progress=true ./tmp/contig_2847cbai_genome_v1.01.fasta_145855183_.temp