Package Bio :: Package Phylo :: Package PAML :: Module codeml
[hide private]
[frames] | no frames]

Source Code for Module Bio.Phylo.PAML.codeml

  1  # Copyright (C) 2011 by Brandon Invergo (b.invergo@gmail.com) 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license. Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  # For using with statement in Python 2.5 or Jython 
  7  from __future__ import with_statement 
  8   
  9  import os 
 10  import os.path 
 11  from _paml import Paml, _relpath 
 12  import _parse_codeml 
 13   
 14   
15 -class CodemlError(EnvironmentError):
16 """CODEML has failed. Run with verbose = True to view CODEML's error 17 message"""
18 19
20 -class Codeml(Paml):
21 """This class implements an interface to CODEML, part of the PAML package.""" 22
23 - def __init__(self, alignment = None, tree = None, working_dir = None, 24 out_file = None):
25 """Initialize the codeml instance. 26 27 The user may optionally pass in strings specifying the locations 28 of the input alignment and tree files, the working directory and 29 the final output file. Other options found in the CODEML control 30 have typical settings by default to run site class models 0, 1 and 31 2 on a nucleotide alignment. 32 """ 33 Paml.__init__(self, alignment, working_dir, out_file) 34 if tree is not None: 35 if not os.path.exists(tree): 36 raise IOError("The specified tree file does not exist.") 37 self.tree = tree 38 self.ctl_file = "codeml.ctl" 39 self._options = {"noisy": None, 40 "verbose": None, 41 "runmode": None, 42 "seqtype": None, 43 "CodonFreq": None, 44 "ndata": None, 45 "clock": None, 46 "aaDist": None, 47 "aaRatefile": None, 48 "model": None, 49 "NSsites": None, 50 "icode": None, 51 "Mgene": None, 52 "fix_kappa": None, 53 "kappa": None, 54 "fix_omega": None, 55 "omega": None, 56 "fix_alpha": None, 57 "alpha": None, 58 "Malpha": None, 59 "ncatG": None, 60 "getSE": None, 61 "RateAncestor": None, 62 "Small_Diff": None, 63 "cleandata": None, 64 "fix_blength": None, 65 "method": None, 66 "rho": None, 67 "fix_rho": None}
68
69 - def write_ctl_file(self):
70 """Dynamically build a CODEML control file from the options. 71 72 The control file is written to the location specified by the 73 ctl_file property of the codeml class. 74 """ 75 # Make sure all paths are relative to the working directory 76 self._set_rel_paths() 77 if True: # Dummy statement to preserve indentation for diff 78 with open(self.ctl_file, 'w') as ctl_handle: 79 ctl_handle.write("seqfile = %s\n" % self._rel_alignment) 80 ctl_handle.write("outfile = %s\n" % self._rel_out_file) 81 ctl_handle.write("treefile = %s\n" % self._rel_tree) 82 for option in self._options.items(): 83 if option[1] is None: 84 # If an option has a value of None, there's no need 85 # to write it in the control file; it's normally just 86 # commented out. 87 continue 88 if option[0] == "NSsites": 89 # NSsites is stored in Python as a list but in the 90 # control file it is specified as a series of numbers 91 # separated by spaces. 92 NSsites = " ".join([str(site) for site in option[1]]) 93 ctl_handle.write("%s = %s\n" % (option[0], NSsites)) 94 else: 95 ctl_handle.write("%s = %s\n" % (option[0], option[1]))
96
97 - def read_ctl_file(self, ctl_file):
98 """Parse a control file and load the options into the Codeml instance. 99 """ 100 temp_options = {} 101 if not os.path.isfile(ctl_file): 102 raise IOError("File not found: %r" % ctl_file) 103 else: 104 with open(ctl_file) as ctl_handle: 105 for line in ctl_handle: 106 line = line.strip() 107 uncommented = line.split("*",1)[0] 108 if uncommented != "": 109 if "=" not in uncommented: 110 raise AttributeError( 111 "Malformed line in control file:\n%r" % line) 112 (option, value) = uncommented.split("=") 113 option = option.strip() 114 value = value.strip() 115 if option == "seqfile": 116 self.alignment = value 117 elif option == "treefile": 118 self.tree = value 119 elif option == "outfile": 120 self.out_file = value 121 elif option == "NSsites": 122 site_classes = value.split(" ") 123 for n in range(len(site_classes)): 124 try: 125 site_classes[n] = int(site_classes[n]) 126 except: 127 raise TypeError( 128 "Invalid site class: %s" % site_classes[n]) 129 temp_options["NSsites"] = site_classes 130 elif option not in self._options: 131 raise KeyError("Invalid option: %s" % option) 132 else: 133 if "." in value: 134 try: 135 converted_value = float(value) 136 except: 137 converted_value = value 138 else: 139 try: 140 converted_value = int(value) 141 except: 142 converted_value = value 143 temp_options[option] = converted_value 144 for option in self._options.keys(): 145 if option in temp_options.keys(): 146 self._options[option] = temp_options[option] 147 else: 148 self._options[option] = None
149
150 - def print_options(self):
151 """Print out all of the options and their current settings.""" 152 for option in self._options.items(): 153 if option[0] == "NSsites" and option[1] is not None: 154 # NSsites is stored in Python as a list but in the 155 # control file it is specified as a series of numbers 156 # separated by spaces. 157 NSsites = " ".join([str(site) for site in option[1]]) 158 print "%s = %s" % (option[0], NSsites) 159 else: 160 print "%s = %s" % (option[0], option[1])
161
162 - def _set_rel_paths(self):
163 """Convert all file/directory locations to paths relative to the current working directory. 164 165 CODEML requires that all paths specified in the control file be 166 relative to the directory from which it is called rather than 167 absolute paths. 168 """ 169 Paml._set_rel_paths(self) 170 if self.tree is not None: 171 self._rel_tree = _relpath(self.tree, self.working_dir)
172
173 - def run(self, ctl_file = None, verbose = False, command = "codeml", 174 parse = True):
175 """Run codeml using the current configuration and then parse the results. 176 177 Return a process signal so the user can determine if 178 the execution was successful (return code 0 is successful, -N 179 indicates a failure). The arguments may be passed as either 180 absolute or relative paths, despite the fact that CODEML 181 requires relative paths. 182 """ 183 if self.tree is None: 184 raise ValueError("Tree file not specified.") 185 if not os.path.exists(self.tree): 186 raise IOError("The specified tree file does not exist.") 187 Paml.run(self, ctl_file, verbose, command) 188 if parse: 189 results = read(self.out_file) 190 else: 191 results = None 192 return results
193 194
195 -def read(results_file):
196 """Parse a CODEML results file.""" 197 results = {} 198 if not os.path.exists(results_file): 199 raise IOError("Results file does not exist.") 200 handle = open(results_file) 201 lines = handle.readlines() 202 handle.close() 203 (results, multi_models, multi_genes) = _parse_codeml.parse_basics(lines, 204 results) 205 results = _parse_codeml.parse_nssites(lines, results, multi_models, 206 multi_genes) 207 results = _parse_codeml.parse_pairwise(lines, results) 208 results = _parse_codeml.parse_distances(lines, results) 209 if len(results) == 0: 210 raise ValueError("Invalid results file") 211 return results
212