# # This is a constraints file for the transition matrix file # trans_shadow_partial.pbl. # # This configuration file can be used when optimizing the transition matrix parameters # for a new species or setting. It is an argument to # optimize_augustus.pl when run in the transition matrix optimization # mode, e.g. # # optimize_augustus.pl --species=myspecies # --opt_trans_matrix=/path/augustus/config/species/myspecies/myspecies_trans_shadow_partial.pbl # --matrix_constraints=constraints_shadow_partial.txt train.gb # # Mario Stanke (9.12.2007) # # # -------------------------------------------------------------------- # This is a list of states that should be tried to be optimized. For # the state numbers see the corresponding states file 'states_shadow.cfg'. # States not in this list are simply skipped by optimize_augustus.pl. # Use a list with one state number per line or use the keyword 'all. # The order determines the order in which the optimization cycles # through the states. # [TRY] 0 # intergenic region 13 # ass 0 18 # ass 1 23 # ass 2 # the following states tune the overall frequency of exons (if not normed) 2 # initial exon 0 5 # internal exon 0 # -------------------------------------------------------------------- # This is a list of states s, such that the transition probabilities # out of s are normed: # # M[s][0] + M[s][1] + ... + M[s][71] = const. # # The constant is computed from the original # trans_shadow_partial_utr.pbl file. It is 1.0 with few exceptions. # For unnormed states the M[s][.] values don't form a probability # distribution anymore. If you don't care -- AUGUSTUS doesn't. # Use a list with one state number per line or use the keyword 'all. # [NORMED] 0 13 18 23 # -------------------------------------------------------------------- # This section is for seting constraints between transition probabilities, # such as suggested by symmetry such as strand symmetry (or by treating transitions in # all reading frames the same). Theoretically, i.e. with infinite and # representative traininig data, this should not be neccessary. However, in the real finite # world this is a little safeguard against overfitting. # [BINDINGS] (0,1)=(0,24) # same frequency of single exon genes on both strands (0,2)=(0,31) # same frequency of initial exons with len=0 (mod 3) (0,3)=(0,30) # same frequency of initial exons with len=1 (mod 3) (0,4)=(0,29) # same frequency of initial exons with len=2 (mod 3) (13,8)=(18,8) # - Prob. of terminating (18,8)=(23,8) # CDS is independent (13,8)=(23,8) # of reading frame. (2,10)=(3,15) # - equal exit (3,15)=(4,20) # probs out of initial (2,10)=(4,20) # exons for the three frames (5,10)=(6,15) # - equal exit (5,10)=(7,20) # probs out of internal exons for the three frames (5,10)=(26,36) # and (5,10)=(27,41) # for (5,10)=(28,46) # the both strands # the Markov chain of phases of successive introns should # be the time-reversed chain for the reverse strand MC(((43,28),(43,26),(43,27)),((33,28),(33,26),(33,27)),((38,28),(38,26),(38,27)))=reverse(MC(((13,5),(13,6),(13,7)),((18,5),(18,6),(18,7)),((23,5),(23,6),(23,7)))