# Please cite: Mario Stanke, Mark Diekhans, Robert Baertsch, David Haussler (2008), # Using native and syntenically mapped cDNA alignments to improve de novo gene finding # Bioinformatics 24: 637-644, doi 10.1093/bioinformatics/btn013 # No extrinsic information on sequences given. # Initialising the parameters using config directory /augustus/config/ ... # Using protein profile unknown # --[0..1843]--> unknown_A (38) <--[1..5]--> unknown_B (10) <--[0..1]--> unknown_C (13) <--[0..3]--> unknown_D (45) <--[0..2]--> unknown_E (18) <--[0..2]--> unknown_F (9) <--[4..5]--> unknown_G (22) <--[3..16]--> unknown_H (9) <--[0..5]--> unknown_I (10) <--[5..44]--> unknown_J (14) <--[0..3]--> unknown_K (13) <--[0..1]--> unknown_L (26) <--[3..12]--> unknown_M (60) <--[0..1]--> unknown_N (26) <--[2..69]--> unknown_O (29) <--[22..69]--> unknown_Q (19) <--[0..531]-- # BUSCO_20180911_busco_2432604931 version. Using default transition matrix. # admissible start codons and their probabilities: ATG(1), CTG(0), TTG(0) # Looks like ./tmp/Contig1872820180911_busco_2432604931_.temp is in fasta format. # We have hints for 0 sequences and for 0 of the sequences in the input set. # # ----- prediction on sequence number 1 (length = 17142, name = Contig18728) ----- # # Constraints/Hints: # (none) # Predicted genes for sequence number 1 on both strands # start gene g1 Contig18728 AUGUSTUS gene 1672 4461 0.65 - . g1 Contig18728 AUGUSTUS transcript 1672 4461 0.65 - . g1.t1 Contig18728 AUGUSTUS stop_codon 1672 1674 . - 0 transcript_id "g1.t1"; gene_id "g1"; Contig18728 AUGUSTUS CDS 1672 4461 0.65 - 0 transcript_id "g1.t1"; gene_id "g1"; Contig18728 AUGUSTUS start_codon 4459 4461 . - 0 transcript_id "g1.t1"; gene_id "g1"; # coding sequence = [atggagggtgaaagtattcatgcactgaagataccagagacaatccagtcaggagatgaagaatttgtcatagaaatca # actccgactacagtgatgaagaaacggatgatgatagagaaaagagaaatgaatgggaaatgggccagtatgatgagacagattctatggaacagtgt # actggacagaaacaccttacacataaaaatcaaagagattcccatctacagccagccaacaccgccaaagtgtccagtccactcaagataggaagaga # ccttgatggcaaggtgaatttcattggtttggagccttccaatgatgaatctagttctgatcacatgggaaatgacatgccttcaggacttgaaaata # ttaaaatcattaatgtttgcactcttgccgatgacttcgaacaagatactctggagaacccaagaatggagaacttcaactctgaacaatacatgaat # gctgaaggtgaccaccaagattctgatatgccttcaagaacatcaaacatgattattattaatgagcaacattcacaaagcactctgtccatgatgct # tgtatgcaagttttgtcatgatctttttcctactcatgaagcattacattgtcatgtcaaacagcacatggagaatgtcaaacagcataatatggaaa # acaaagacgaacacactgaagagaaacatattttcaagcaagcaccggataaaaaattacaaaagataaatgtaaggaaggaggacacggaggaagat # agatcacaccaagggaacaaatacgctgctaaaaaaagaaaaagccccgaaaatgacggtgttgagcgaaaatcagggagaatgaagaaaaagagaaa # ccatggactggttgatgaggagaacaaatgtgagaagtgcggcaagattttcattaaggagtatcaatatagatcacacgtttccttctgtaaaggga # agataactcagatggccacatgctttgaatgtaacaaaatgattccagctcatcaagagtatgaacatgctctaaaataccatgatatagtttgtcca # atatgtggtgtaaatcaaatgaaggaatctaaactgagaaaccatatgaagttcatgcacaagatttttaattacggaaaagatacgcagaaatctga # tataaagaatcagatttgtgtggtggaccaagtttattacaaaacatgtaatttatgtggtaaagtgtgcgcatcgtttgaagaattcaaggagcaca # gaaaagagcacagaactttgaaacaaatggagacagctgccatcttagaaaaaacgaaagaggaagaagacaaagctcaaaaaagtcaggatcagact # acaccggcgaagccaaagaaggagaaaccgaatcagtcctccgccatagagtgtgaaaactgcgaaaagaagttttcctctcgctaccagttagatag # ccatctgaagagatgttctgccaaagctgtcaaagtgttgttgtgtgatgtttgtgaactgctggtgccttacgatgaacataaagagcacgtcaaat # ctcatgaaatcatatgtgatttgtgtggaaaggtttgctatggtaaaaaaagttttaaatggcatagcatgcattgtccagcgaaacagaatatggtg # aaagttgctaaagaagatttgatgaaaactgctgaagatttgatgaaaactgaaactcagagacctggaaaatctagtataaacaagacaaagtggaa # aaaaaagaaaacgtggagatctattcagttgaccagaaatttgaagcgaaaatttactcaaaaacagaagatattcttggagaaagaaatgggaattg # atactaaaacattggaagaaagtcaagttataaatagtgaattagtaagtgatatgatcaaaagaggaggcgcaccacaggatagtgaaattgaggca # gttaaaacagcaatagttaatgaaggaatagaaaattctccaaaaagtatcaaattagagtcagaaaatcatgaggaggatagatttgatcaagacac # tcttgaagacagagataatagtgaaagaataatgagcactggtgattttagtgatagcaagactgagagctatcctgggaaaggtggcagtcagattc # ctgtggaaggggatgctgttgaatgtaacacgagaaaaaatgggataaatgaaaggagaaggaaaagaaggagacgaaatctttcgcgtctagggaac # agacgaaagaaagcaaggaaacagaaaacagatcaaacgcaggagggttttgttcacttatccaatgctgtgatggagggaaagtgtgaagaagttaa # cgcaaatataccaagggcagctggtgcaccatttgaagattcaacagaaaatgaaggcttttccagaaatacattaaggacaaaccctactaacattt # cacgagacacatcagaaacagaatctgatattgaggaggaagaggtatatattggaattatacaaaatcgtaaaattcctacttatgagccaacaggg # aattctgccatgttaaatttgaatgggatggaaaaaacttgtctggatgactggctgatacaaacttcaaactcacatgacaatgcccaagtctggat # ttgtcgacattgtggaaagcagttgggttctagggaagagggctttgatcacatgattagctgccatccaaccaacaaggccacaaaagaaatcctaa # aggaaatggagatgctaaggttgaagatgctatctaggtcacaaacagatgctgtgcttgaatga] # protein sequence = [MEGESIHALKIPETIQSGDEEFVIEINSDYSDEETDDDREKRNEWEMGQYDETDSMEQCTGQKHLTHKNQRDSHLQPA # NTAKVSSPLKIGRDLDGKVNFIGLEPSNDESSSDHMGNDMPSGLENIKIINVCTLADDFEQDTLENPRMENFNSEQYMNAEGDHQDSDMPSRTSNMII # INEQHSQSTLSMMLVCKFCHDLFPTHEALHCHVKQHMENVKQHNMENKDEHTEEKHIFKQAPDKKLQKINVRKEDTEEDRSHQGNKYAAKKRKSPEND # GVERKSGRMKKKRNHGLVDEENKCEKCGKIFIKEYQYRSHVSFCKGKITQMATCFECNKMIPAHQEYEHALKYHDIVCPICGVNQMKESKLRNHMKFM # HKIFNYGKDTQKSDIKNQICVVDQVYYKTCNLCGKVCASFEEFKEHRKEHRTLKQMETAAILEKTKEEEDKAQKSQDQTTPAKPKKEKPNQSSAIECE # NCEKKFSSRYQLDSHLKRCSAKAVKVLLCDVCELLVPYDEHKEHVKSHEIICDLCGKVCYGKKSFKWHSMHCPAKQNMVKVAKEDLMKTAEDLMKTET # QRPGKSSINKTKWKKKKTWRSIQLTRNLKRKFTQKQKIFLEKEMGIDTKTLEESQVINSELVSDMIKRGGAPQDSEIEAVKTAIVNEGIENSPKSIKL # ESENHEEDRFDQDTLEDRDNSERIMSTGDFSDSKTESYPGKGGSQIPVEGDAVECNTRKNGINERRRKRRRRNLSRLGNRRKKARKQKTDQTQEGFVH # LSNAVMEGKCEEVNANIPRAAGAPFEDSTENEGFSRNTLRTNPTNISRDTSETESDIEEEEVYIGIIQNRKIPTYEPTGNSAMLNLNGMEKTCLDDWL # IQTSNSHDNAQVWICRHCGKQLGSREEGFDHMISCHPTNKATKEILKEMEMLRLKMLSRSQTDAVLE] # end gene g1 ### # command line: # /augustus/bin/augustus --codingseq=1 --proteinprofile=eukaryota_odb9/prfl/EOG09370A22.prfl --predictionStart=0 --predictionEnd=34259 --species=BUSCO_20180911_busco_2432604931 ./tmp/Contig1872820180911_busco_2432604931_.temp