# Please cite: Mario Stanke, Mark Diekhans, Robert Baertsch, David Haussler (2008), # Using native and syntenically mapped cDNA alignments to improve de novo gene finding # Bioinformatics 24: 637-644, doi 10.1093/bioinformatics/btn013 # No extrinsic information on sequences given. # Initialising the parameters using config directory /gscratch/scrubbed/samwhite/outputs/20200924_cbai_genome_v1.01_busco/cbai_genome_v1.01.fasta_augustus/config/ ... # Using protein profile unknown # --[11..184]--> unknown_B (41) <--[0..3]--> unknown_C (64) <--[0..1]--> unknown_D (15) <--[0..1]--> unknown_E (11) <--[3..38]--> unknown_F (45) <--[0..13]-- # fly version. Using default transition matrix. # Looks like ./tmp/contig_3880cbai_genome_v1.01.fasta_145855183_.temp is in fasta format. # We have hints for 0 sequences and for 0 of the sequences in the input set. # # ----- prediction on sequence number 1 (length = 6059, name = contig_3880) ----- # # Predicted genes for sequence number 1 on both strands # start gene g1 contig_3880 AUGUSTUS gene 831 6059 0.01 + . g1 contig_3880 AUGUSTUS transcript 831 6059 0.01 + . g1.t1 contig_3880 AUGUSTUS exon 831 1982 . + . transcript_id "g1.t1"; gene_id "g1"; contig_3880 AUGUSTUS exon 2181 2751 . + . transcript_id "g1.t1"; gene_id "g1"; contig_3880 AUGUSTUS start_codon 2191 2193 . + 0 transcript_id "g1.t1"; gene_id "g1"; contig_3880 AUGUSTUS intron 2752 2898 0.01 + . transcript_id "g1.t1"; gene_id "g1"; contig_3880 AUGUSTUS intron 2913 2975 0.01 + . transcript_id "g1.t1"; gene_id "g1"; contig_3880 AUGUSTUS intron 3052 3868 0.01 + . transcript_id "g1.t1"; gene_id "g1"; contig_3880 AUGUSTUS intron 3892 4648 0.01 + . transcript_id "g1.t1"; gene_id "g1"; contig_3880 AUGUSTUS intron 4678 5815 0.01 + . transcript_id "g1.t1"; gene_id "g1"; contig_3880 AUGUSTUS intron 5834 5910 0.01 + . transcript_id "g1.t1"; gene_id "g1"; contig_3880 AUGUSTUS intron 5945 5997 0.01 + . transcript_id "g1.t1"; gene_id "g1"; contig_3880 AUGUSTUS CDS 2191 2751 0.02 + 0 transcript_id "g1.t1"; gene_id "g1"; contig_3880 AUGUSTUS CDS 2899 2912 0.01 + 0 transcript_id "g1.t1"; gene_id "g1"; contig_3880 AUGUSTUS exon 2899 2912 . + . transcript_id "g1.t1"; gene_id "g1"; contig_3880 AUGUSTUS CDS 2976 3051 0.01 + 1 transcript_id "g1.t1"; gene_id "g1"; contig_3880 AUGUSTUS exon 2976 3051 . + . transcript_id "g1.t1"; gene_id "g1"; contig_3880 AUGUSTUS CDS 3869 3891 0.01 + 0 transcript_id "g1.t1"; gene_id "g1"; contig_3880 AUGUSTUS exon 3869 3891 . + . transcript_id "g1.t1"; gene_id "g1"; contig_3880 AUGUSTUS CDS 4649 4677 0.01 + 1 transcript_id "g1.t1"; gene_id "g1"; contig_3880 AUGUSTUS exon 4649 4677 . + . transcript_id "g1.t1"; gene_id "g1"; contig_3880 AUGUSTUS CDS 5816 5833 0.01 + 2 transcript_id "g1.t1"; gene_id "g1"; contig_3880 AUGUSTUS exon 5816 5833 . + . transcript_id "g1.t1"; gene_id "g1"; contig_3880 AUGUSTUS CDS 5911 5944 0.01 + 2 transcript_id "g1.t1"; gene_id "g1"; contig_3880 AUGUSTUS exon 5911 5944 . + . transcript_id "g1.t1"; gene_id "g1"; contig_3880 AUGUSTUS CDS 5998 6031 0.01 + 1 transcript_id "g1.t1"; gene_id "g1"; contig_3880 AUGUSTUS exon 5998 6059 . + . transcript_id "g1.t1"; gene_id "g1"; contig_3880 AUGUSTUS stop_codon 6029 6031 . + 0 transcript_id "g1.t1"; gene_id "g1"; contig_3880 AUGUSTUS tts 6059 6059 . + . transcript_id "g1.t1"; gene_id "g1"; contig_3880 AUGUSTUS protein_match 2380 2502 2.72 + 0 target "unknown_B[1..41]"; target_start 63; transcript_id "g1.t1"; gene_id "g1"; contig_3880 AUGUSTUS protein_match 2512 2703 2.98 + 0 target "unknown_C[1..64]"; target_start 107; transcript_id "g1.t1"; gene_id "g1"; contig_3880 AUGUSTUS protein_match 2707 2751 2.06 + 0 target "unknown_D[1..15]"; target_start 172; transcript_id "g1.t1"; gene_id "g1"; contig_3880 AUGUSTUS protein_match 2902 2912 1.21 + 0 target "unknown_E[1..4]"; target_start 188; transcript_id "g1.t1"; gene_id "g1"; contig_3880 AUGUSTUS protein_match 2976 2997 2.23 + 1 target "unknown_E[4..11]"; target_start 188; transcript_id "g1.t1"; gene_id "g1"; contig_3880 AUGUSTUS protein_match 3028 3051 1.32 + 0 target "unknown_F[1..8]"; target_start 209; transcript_id "g1.t1"; gene_id "g1"; contig_3880 AUGUSTUS protein_match 3869 3891 1.32 + 2 target "unknown_F[9..16]"; target_start 208; transcript_id "g1.t1"; gene_id "g1"; contig_3880 AUGUSTUS protein_match 4649 4677 1.23 + 0 target "unknown_F[17..26]"; target_start 208; transcript_id "g1.t1"; gene_id "g1"; contig_3880 AUGUSTUS protein_match 5816 5833 1.31 + 1 target "unknown_F[26..32]"; target_start 208; transcript_id "g1.t1"; gene_id "g1"; contig_3880 AUGUSTUS protein_match 5911 5944 1.51 + 1 target "unknown_F[32..43]"; target_start 208; transcript_id "g1.t1"; gene_id "g1"; contig_3880 AUGUSTUS protein_match 5998 6004 0.204 + 1 target "unknown_F[43..45]"; target_start 209; transcript_id "g1.t1"; gene_id "g1"; # coding sequence = [atgaagcagaagagaatcacaatgaaactgaagaccttcacgccgccatcaatggcaggacccagcgccaccaaggccc # ccaaggcgggcaccaaggacgctgccccgccctccgccgcgcccgagctgctgctgcggccacgcggcgctgtgataaccttccacagcgacgtggtg # gacccgcccgccaccctccttatgggctccgacaagcatgagaacgaccacctcctggcgcaggcgacggagcgcctcgtgttctttcatgtggacga # cctgccctcggcccacgtgtacctccagctggagcccggccaggccctgcgcgacgtgccccgcgtccttctgaacgacgcggcacagctgtgcaagg # ccaacagtgccaagggcaacaagctgggcaacgtggtggtggtgtacacgctgggatcgaacctgagcaagacccgccacatgaaggccggcgaggtg # ggcttcgtctgcgccaaggaggtgcgcaagatcctcgtcaaccggcgggacgacagggtcatggaccgcctcaataacaccaagaggaagtcccttga # ccccgcgcggcgggagcggcgcgaggaggatagttcagacgaagagtgtgatgaagagaaggaaaacaaggaggagacagccaatgtgaggcaagcaa # ctttgaaacagtcagagaataaaataaaaaagagggcaaggcgtgacctgaaggtttattttagcttgaaaaacatgtctttgatgggatttcgcttt # acgagtatgccaggtggaacctaa] # protein sequence = [MKQKRITMKLKTFTPPSMAGPSATKAPKAGTKDAAPPSAAPELLLRPRGAVITFHSDVVDPPATLLMGSDKHENDHLL # AQATERLVFFHVDDLPSAHVYLQLEPGQALRDVPRVLLNDAAQLCKANSAKGNKLGNVVVVYTLGSNLSKTRHMKAGEVGFVCAKEVRKILVNRRDDR # VMDRLNNTKRKSLDPARRERREEDSSDEECDEEKENKEETANVRQATLKQSENKIKKRARRDLKVYFSLKNMSLMGFRFTSMPGGT] # sequence of block unknown_B 63 [TLLMGSDKHENDHLLAQATERLVFFHVDDLPSAHVYLQLEP] 104 # sequence of block unknown_C 107 [LRDVPRVLLNDAAQLCKANSAKGNKLGNVVVVYTLGSNLSKTRHMKAGEVGFVCAKEVRKILVN] 171 # sequence of block unknown_D 172 [RDDRVMDRLNNTKRK] 187 # sequence of block unknown_E 188 [LDPARRERREE] 199 # sequence of block unknown_F 209 [KENKEETANVRQATLKQSENKIKKRARRDLKVYFSLKNMSLMGFR] 254 # end gene g1 ### # command line: # /gscratch/srlab/programs/Augustus-3.3.2/bin/augustus --codingseq=1 --proteinprofile=/gscratch/srlab/sam/data/databases/BUSCO/metazoa_odb9/prfl/EOG091G0LS0.prfl --predictionStart=0 --predictionEnd=7667 --species=fly --progress=true ./tmp/contig_3880cbai_genome_v1.01.fasta_145855183_.temp