Package Bio :: Package Phylo :: Package Applications :: Module _Phyml
[hide private]
[frames] | no frames]

Source Code for Module Bio.Phylo.Applications._Phyml

  1  # Copyright 2011 by Eric Talevich.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its license. 
  3  # Please see the LICENSE file that should have been included as part of this 
  4  # package. 
  5  """Command-line wrapper for the tree inference program PhyML.""" 
  6  __docformat__ = "restructuredtext en" 
  7   
  8  from Bio._py3k import basestring 
  9   
 10  from Bio.Application import _Option, _Switch, AbstractCommandline 
 11   
 12   
13 -class PhymlCommandline(AbstractCommandline):
14 """Command-line wrapper for the tree inference program PhyML. 15 16 Homepage: http://www.atgc-montpellier.fr/phyml 17 18 Citations: 19 20 Guindon S, Gascuel O. 21 A simple, fast, and accurate algorithm to estimate large phylogenies by maximum 22 likelihood. 23 Systematic Biology, 2003 Oct;52(5):696-704. 24 PubMed PMID: 14530136. 25 26 Guindon S, Dufayard JF, Lefort V, Anisimova M, Hordijk W, Gascuel O. 27 New Algorithms and Methods to Estimate Maximum-Likelihood Phylogenies: Assessing 28 the Performance of PhyML 3.0. 29 Systematic Biology, 2010 59(3):307-21. 30 31 """ 32
33 - def __init__(self, cmd='phyml', **kwargs):
34 self.parameters = [ 35 _Option(['-i', '--input', 'input'], 36 """Name of the nucleotide or amino-acid sequence file in PHYLIP 37 format.""", 38 filename=True, 39 is_required=True, 40 equate=False, 41 ), 42 43 _Option(['-d', '--datatype', 'datatype'], 44 """Data type is 'nt' for nucleotide (default) and 'aa' for 45 amino-acid sequences.""", 46 checker_function=lambda x: x in ('nt', 'aa'), 47 equate=False, 48 ), 49 50 _Switch(['-q', '--sequential', 'sequential'], 51 "Changes interleaved format (default) to sequential format." 52 ), 53 54 _Option(['-n', '--multiple', 'multiple'], 55 "Number of data sets to analyse (integer).", 56 checker_function=(lambda x: 57 isinstance(x, int) or x.isdigit()), 58 equate=False, 59 ), 60 61 _Switch(['-p', '--pars', 'pars'], 62 """Use a minimum parsimony starting tree. 63 64 This option is taken into account when the '-u' option is absent 65 and when tree topology modifications are to be done. 66 """ 67 ), 68 69 _Option(['-b', '--bootstrap', 'bootstrap'], 70 """Number of bootstrap replicates, if value is > 0. 71 72 Otherwise: 73 74 0: neither approximate likelihood ratio test nor bootstrap 75 values are computed. 76 -1: approximate likelihood ratio test returning aLRT statistics. 77 -2: approximate likelihood ratio test returning Chi2-based 78 parametric branch supports. 79 -4: SH-like branch supports alone. 80 """, 81 equate=False, 82 ), 83 84 _Option(['-m', '--model', 'model'], 85 """Substitution model name. 86 87 Nucleotide-based models: 88 89 HKY85 (default) | JC69 | K80 | F81 | F84 | TN93 | GTR | custom 90 91 For the custom option, a string of six digits identifies the 92 model. For instance, 000000 corresponds to F81 (or JC69, 93 provided the distribution of nucleotide frequencies is uniform). 94 012345 corresponds to GTR. This option can be used for encoding 95 any model that is a nested within GTR. 96 97 Amino-acid based models: 98 99 LG (default) | WAG | JTT | MtREV | Dayhoff | DCMut | RtREV | 100 CpREV | VT | Blosum62 | MtMam | MtArt | HIVw | HIVb | custom 101 """, 102 checker_function=(lambda x: x in ( 103 # Nucleotide models: 104 'HKY85', 'JC69', 'K80', 'F81', 'F84', 'TN93', 'GTR', 105 # Amino acid models: 106 'LG', 'WAG', 'JTT', 'MtREV', 'Dayhoff', 'DCMut', 107 'RtREV', 'CpREV', 'VT', 'Blosum62', 'MtMam', 'MtArt', 108 'HIVw', 'HIVb') 109 or isinstance(x, int)), 110 equate=False, 111 ), 112 113 _Option(['-f', 'frequencies'], 114 """Character frequencies. 115 116 -f e, m, or "fA fC fG fT" 117 118 e : Empirical frequencies, determined as follows : 119 120 - Nucleotide sequences: (Empirical) the equilibrium base 121 frequencies are estimated by counting the occurence of the 122 different bases in the alignment. 123 - Amino-acid sequences: (Empirical) the equilibrium 124 amino-acid frequencies are estimated by counting the 125 occurence of the different amino-acids in the alignment. 126 127 m : ML/model-based frequencies, determined as follows : 128 129 - Nucleotide sequences: (ML) the equilibrium base 130 frequencies are estimated using maximum likelihood 131 - Amino-acid sequences: (Model) the equilibrium amino-acid 132 frequencies are estimated using the frequencies defined by 133 the substitution model. 134 135 "fA fC fG fT" : only valid for nucleotide-based models. 136 fA, fC, fG and fT are floating-point numbers that correspond 137 to the frequencies of A, C, G and T, respectively. 138 """, 139 filename=True, # ensure ".25 .25 .25 .25" stays quoted 140 equate=False, 141 ), 142 143 _Option(['-t', '--ts/tv', 'ts_tv_ratio'], 144 """Transition/transversion ratio. (DNA sequences only.) 145 146 Can be a fixed positive value (ex:4.0) or e to get the 147 maximum-likelihood estimate. 148 """, 149 equate=False, 150 ), 151 152 _Option(['-v', '--pinv', 'prop_invar'], 153 """Proportion of invariable sites. 154 155 Can be a fixed value in the range [0,1], or 'e' to get the 156 maximum-likelihood estimate. 157 """, 158 equate=False, 159 ), 160 161 _Option(['-c', '--nclasses', 'nclasses'], 162 """Number of relative substitution rate categories. 163 164 Default 1. Must be a positive integer. 165 """, 166 equate=False, 167 ), 168 169 _Option(['-a', '--alpha', 'alpha'], 170 """Distribution of the gamma distribution shape parameter. 171 172 Can be a fixed positive value, or 'e' to get the 173 maximum-likelihood estimate. 174 """, 175 equate=False, 176 ), 177 178 _Option(['-s', '--search', 'search'], 179 """Tree topology search operation option. 180 181 Can be one of: 182 183 NNI : default, fast 184 SPR : a bit slower than NNI 185 BEST : best of NNI and SPR search 186 """, 187 checker_function=lambda x: x in ('NNI', 'SPR', 'BEST'), 188 equate=False, 189 ), 190 191 # alt name: user_tree_file 192 _Option(['-u', '--inputtree', 'input_tree'], 193 "Starting tree filename. The tree must be in Newick format.", 194 filename=True, 195 equate=False, 196 ), 197 198 _Option(['-o', 'optimize'], 199 """Specific parameter optimisation. 200 201 tlr : tree topology (t), branch length (l) and 202 rate parameters (r) are optimised. 203 tl : tree topology and branch length are optimised. 204 lr : branch length and rate parameters are optimised. 205 l : branch length are optimised. 206 r : rate parameters are optimised. 207 n : no parameter is optimised. 208 """, 209 equate=False, 210 ), 211 212 _Switch(['--rand_start', 'rand_start'], 213 """Sets the initial tree to random. 214 215 Only valid if SPR searches are to be performed. 216 """, 217 ), 218 219 _Option(['--n_rand_starts', 'n_rand_starts'], 220 """Number of initial random trees to be used. 221 222 Only valid if SPR searches are to be performed. 223 """, 224 equate=False, 225 ), 226 227 _Option(['--r_seed', 'r_seed'], 228 """Seed used to initiate the random number generator. 229 230 Must be an integer. 231 """, 232 equate=False, 233 ), 234 235 _Switch(['--print_site_lnl', 'print_site_lnl'], 236 "Print the likelihood for each site in file *_phyml_lk.txt." 237 ), 238 239 _Switch(['--print_trace', 'print_trace'], 240 """Print each phylogeny explored during the tree search process 241 in file *_phyml_trace.txt.""" 242 ), 243 244 _Option(['--run_id', 'run_id'], 245 """Append the given string at the end of each PhyML output file. 246 247 This option may be useful when running simulations involving 248 PhyML. 249 """, 250 checker_function=lambda x: isinstance(x, basestring), 251 equate=False, 252 ), 253 254 # XXX should this always be set to True? 255 _Switch(['--quiet', 'quiet'], 256 "No interactive questions (for running in batch mode)." 257 ), 258 ] 259 AbstractCommandline.__init__(self, cmd, **kwargs)
260