Package Bio :: Package Application
[hide private]
[frames] | no frames]

Source Code for Package Bio.Application

  1  # Copyright 2001-2004 Brad Chapman. 
  2  # Revisions copyright 2009-2010 by Peter Cock. 
  3  # All rights reserved. 
  4  # This code is part of the Biopython distribution and governed by its 
  5  # license.  Please see the LICENSE file that should have been included 
  6  # as part of this package. 
  7  """General mechanisms to access applications in Biopython. 
  8   
  9  This module is not intended for direct use. It provides the basic objects which 
 10  are subclassed by our command line wrappers, such as: 
 11   
 12   - Bio.Align.Applications 
 13   - Bio.Blast.Applications 
 14   - Bio.Emboss.Applications 
 15   - Bio.Sequencing.Applications 
 16   
 17  These modules provide wrapper classes for command line tools to help you 
 18  construct command line strings by setting the values of each parameter. 
 19  The finished command line strings are then normally invoked via the built-in 
 20  Python module subprocess. 
 21  """ 
 22  import os 
 23  import sys 
 24  import StringIO 
 25  import subprocess 
 26  import re 
 27   
 28  from subprocess import CalledProcessError as _ProcessCalledError 
 29   
 30  from Bio import File 
 31   
 32  #Use this regular expression to test the property names are going to 
 33  #be valid as Python properties or arguments 
 34  _re_prop_name = re.compile(r"[a-zA-Z][a-zA-Z0-9_]*") 
 35  assert _re_prop_name.match("t") 
 36  assert _re_prop_name.match("test") 
 37  assert _re_prop_name.match("_test") is None # we don't want private names 
 38  assert _re_prop_name.match("-test") is None 
 39  assert _re_prop_name.match("test_name") 
 40  assert _re_prop_name.match("test2") 
 41  #These are reserved names in Python itself, 
 42  _reserved_names = ["and", "del", "from", "not", "while", "as", "elif", 
 43                     "global", "or", "with", "assert", "else", "if", "pass", 
 44                     "yield", "break", "except", "import", "print", "class", 
 45                     "exec", "in", "raise", "continue", "finally", "is", 
 46                     "return", "def", "for", "lambda", "try"] 
 47  #These are reserved names due to the way the wrappers work 
 48  _local_reserved_names = ["set_parameter"] 
 49   
 50   
51 -class ApplicationError(_ProcessCalledError):
52 """Raised when an application returns a non-zero exit status. 53 54 The exit status will be stored in the returncode attribute, similarly 55 the command line string used in the cmd attribute, and (if captured) 56 stdout and stderr as strings. 57 58 This exception is a subclass of subprocess.CalledProcessError. 59 60 >>> err = ApplicationError(-11, "helloworld", "", "Some error text") 61 >>> err.returncode, err.cmd, err.stdout, err.stderr 62 (-11, 'helloworld', '', 'Some error text') 63 >>> print err 64 Command 'helloworld' returned non-zero exit status -11, 'Some error text' 65 66 """
67 - def __init__(self, returncode, cmd, stdout="", stderr=""):
68 self.returncode = returncode 69 self.cmd = cmd 70 self.stdout = stdout 71 self.stderr = stderr
72
73 - def __str__(self):
74 #get first line of any stderr message 75 try: 76 msg = self.stderr.lstrip().split("\n", 1)[0].rstrip() 77 except: 78 msg = "" 79 if msg: 80 return "Command '%s' returned non-zero exit status %d, %r" \ 81 % (self.cmd, self.returncode, msg) 82 else: 83 return "Command '%s' returned non-zero exit status %d" \ 84 % (self.cmd, self.returncode)
85
86 - def __repr__(self):
87 return "ApplicationError(%i, %s, %s, %s)" \ 88 % (self.returncode, self.cmd, self.stdout, self.stderr)
89 90
91 -class AbstractCommandline(object):
92 """Generic interface for constructing command line strings. 93 94 This class shouldn't be called directly; it should be subclassed to 95 provide an implementation for a specific application. 96 97 For a usage example we'll show one of the EMBOSS wrappers. You can set 98 options when creating the wrapper object using keyword arguments - or 99 later using their corresponding properties: 100 101 >>> from Bio.Emboss.Applications import WaterCommandline 102 >>> cline = WaterCommandline(gapopen=10, gapextend=0.5) 103 >>> cline 104 WaterCommandline(cmd='water', gapopen=10, gapextend=0.5) 105 106 You can instead manipulate the parameters via their properties, e.g. 107 108 >>> cline.gapopen 109 10 110 >>> cline.gapopen = 20 111 >>> cline 112 WaterCommandline(cmd='water', gapopen=20, gapextend=0.5) 113 114 You can clear a parameter you have already added by 'deleting' the 115 corresponding property: 116 117 >>> del cline.gapopen 118 >>> cline.gapopen 119 >>> cline 120 WaterCommandline(cmd='water', gapextend=0.5) 121 122 Once you have set the parameters you need, turn the object into a string: 123 124 >>> str(cline) 125 Traceback (most recent call last): 126 ... 127 ValueError: You must either set outfile (output filename), or enable filter or stdout (output to stdout). 128 129 In this case the wrapper knows certain arguments are required to construct 130 a valid command line for the tool. For a complete example, 131 132 >>> from Bio.Emboss.Applications import WaterCommandline 133 >>> water_cmd = WaterCommandline(gapopen=10, gapextend=0.5) 134 >>> water_cmd.asequence = "asis:ACCCGGGCGCGGT" 135 >>> water_cmd.bsequence = "asis:ACCCGAGCGCGGT" 136 >>> water_cmd.outfile = "temp_water.txt" 137 >>> print water_cmd 138 water -outfile=temp_water.txt -asequence=asis:ACCCGGGCGCGGT -bsequence=asis:ACCCGAGCGCGGT -gapopen=10 -gapextend=0.5 139 >>> water_cmd 140 WaterCommandline(cmd='water', outfile='temp_water.txt', asequence='asis:ACCCGGGCGCGGT', bsequence='asis:ACCCGAGCGCGGT', gapopen=10, gapextend=0.5) 141 142 You would typically run the command line via a standard Python operating 143 system call using the subprocess module for full control. For the simple 144 case where you just want to run the command and get the output: 145 146 stdout, stderr = water_cmd() 147 """ 148 #Note the call example above is not a doctest as we can't handle EMBOSS 149 #(or any other tool) being missing in the unit tests.
150 - def __init__(self, cmd, **kwargs):
151 """Create a new instance of a command line wrapper object.""" 152 # Init method - should be subclassed! 153 # 154 # The subclass methods should look like this: 155 # 156 # def __init__(self, cmd="muscle", **kwargs): 157 # self.parameters = [...] 158 # AbstractCommandline.__init__(self, cmd, **kwargs) 159 # 160 # i.e. There should have an optional argument "cmd" to set the location 161 # of the executable (with a sensible default which should work if the 162 # command is on the path on Unix), and keyword arguments. It should 163 # then define a list of parameters, all objects derived from the base 164 # class _AbstractParameter. 165 # 166 # The keyword arguments should be any valid parameter name, and will 167 # be used to set the associated parameter. 168 self.program_name = cmd 169 try: 170 parameters = self.parameters 171 except AttributeError: 172 raise AttributeError("Subclass should have defined self.parameters") 173 #Create properties for each parameter at run time 174 aliases = set() 175 for p in parameters: 176 for name in p.names: 177 if name in aliases: 178 raise ValueError("Parameter alias %s multiply defined" 179 % name) 180 aliases.add(name) 181 name = p.names[-1] 182 if _re_prop_name.match(name) is None: 183 raise ValueError("Final parameter name %s cannot be used as " 184 "an argument or property name in python" 185 % repr(name)) 186 if name in _reserved_names: 187 raise ValueError("Final parameter name %s cannot be used as " 188 "an argument or property name because it is " 189 "a reserved word in python" % repr(name)) 190 if name in _local_reserved_names: 191 raise ValueError("Final parameter name %s cannot be used as " 192 "an argument or property name due to the " 193 "way the AbstractCommandline class works" 194 % repr(name)) 195 196 #Beware of binding-versus-assignment confusion issues 197 def getter(name): 198 return lambda x: x._get_parameter(name)
199 200 def setter(name): 201 return lambda x, value: x.set_parameter(name, value)
202 203 def deleter(name): 204 return lambda x: x._clear_parameter(name) 205 206 doc = p.description 207 if isinstance(p, _Switch): 208 doc += "\n\nThis property controls the addition of the %s " \ 209 "switch, treat this property as a boolean." % p.names[0] 210 else: 211 doc += "\n\nThis controls the addition of the %s parameter " \ 212 "and its associated value. Set this property to the " \ 213 "argument value required." % p.names[0] 214 prop = property(getter(name), setter(name), deleter(name), doc) 215 setattr(self.__class__, name, prop) # magic! 216 for key, value in kwargs.iteritems(): 217 self.set_parameter(key, value) 218
219 - def _validate(self):
220 """Make sure the required parameters have been set (PRIVATE). 221 222 No return value - it either works or raises a ValueError. 223 224 This is a separate method (called from __str__) so that subclasses may 225 override it. 226 """ 227 for p in self.parameters: 228 #Check for missing required parameters: 229 if p.is_required and not(p.is_set): 230 raise ValueError("Parameter %s is not set." 231 % p.names[-1])
232 #Also repeat the parameter validation here, just in case? 233
234 - def __str__(self):
235 """Make the commandline string with the currently set options. 236 237 e.g. 238 >>> from Bio.Emboss.Applications import WaterCommandline 239 >>> cline = WaterCommandline(gapopen=10, gapextend=0.5) 240 >>> cline.asequence = "asis:ACCCGGGCGCGGT" 241 >>> cline.bsequence = "asis:ACCCGAGCGCGGT" 242 >>> cline.outfile = "temp_water.txt" 243 >>> print cline 244 water -outfile=temp_water.txt -asequence=asis:ACCCGGGCGCGGT -bsequence=asis:ACCCGAGCGCGGT -gapopen=10 -gapextend=0.5 245 >>> str(cline) 246 'water -outfile=temp_water.txt -asequence=asis:ACCCGGGCGCGGT -bsequence=asis:ACCCGAGCGCGGT -gapopen=10 -gapextend=0.5' 247 """ 248 self._validate() 249 commandline = "%s " % self.program_name 250 for parameter in self.parameters: 251 if parameter.is_set: 252 #This will include a trailing space: 253 commandline += str(parameter) 254 return commandline.strip() # remove trailing space
255
256 - def __repr__(self):
257 """Return a representation of the command line object for debugging. 258 259 e.g. 260 >>> from Bio.Emboss.Applications import WaterCommandline 261 >>> cline = WaterCommandline(gapopen=10, gapextend=0.5) 262 >>> cline.asequence = "asis:ACCCGGGCGCGGT" 263 >>> cline.bsequence = "asis:ACCCGAGCGCGGT" 264 >>> cline.outfile = "temp_water.txt" 265 >>> print cline 266 water -outfile=temp_water.txt -asequence=asis:ACCCGGGCGCGGT -bsequence=asis:ACCCGAGCGCGGT -gapopen=10 -gapextend=0.5 267 >>> cline 268 WaterCommandline(cmd='water', outfile='temp_water.txt', asequence='asis:ACCCGGGCGCGGT', bsequence='asis:ACCCGAGCGCGGT', gapopen=10, gapextend=0.5) 269 """ 270 answer = "%s(cmd=%s" % (self.__class__.__name__, repr(self.program_name)) 271 for parameter in self.parameters: 272 if parameter.is_set: 273 if isinstance(parameter, _Switch): 274 answer += ", %s=True" % parameter.names[-1] 275 else: 276 answer += ", %s=%s" \ 277 % (parameter.names[-1], repr(parameter.value)) 278 answer += ")" 279 return answer
280
281 - def _get_parameter(self, name):
282 """Get a commandline option value.""" 283 for parameter in self.parameters: 284 if name in parameter.names: 285 if isinstance(parameter, _Switch): 286 return parameter.is_set 287 else: 288 return parameter.value 289 raise ValueError("Option name %s was not found." % name)
290
291 - def _clear_parameter(self, name):
292 """Reset or clear a commandline option value.""" 293 cleared_option = False 294 for parameter in self.parameters: 295 if name in parameter.names: 296 parameter.value = None 297 parameter.is_set = False 298 cleared_option = True 299 if not cleared_option: 300 raise ValueError("Option name %s was not found." % name)
301
302 - def set_parameter(self, name, value = None):
303 """Set a commandline option for a program. 304 """ 305 set_option = False 306 for parameter in self.parameters: 307 if name in parameter.names: 308 if isinstance(parameter, _Switch): 309 if value is None: 310 import warnings 311 warnings.warn("For a switch type argument like %s, " 312 "we expect a boolean. None is treated " 313 "as FALSE!" % parameter.names[-1]) 314 parameter.is_set = bool(value) 315 set_option = True 316 else: 317 if value is not None: 318 self._check_value(value, name, parameter.checker_function) 319 parameter.value = value 320 parameter.is_set = True 321 set_option = True 322 if not set_option: 323 raise ValueError("Option name %s was not found." % name)
324
325 - def _check_value(self, value, name, check_function):
326 """Check whether the given value is valid. 327 328 No return value - it either works or raises a ValueError. 329 330 This uses the passed function 'check_function', which can either 331 return a [0, 1] (bad, good) value or raise an error. Either way 332 this function will raise an error if the value is not valid, or 333 finish silently otherwise. 334 """ 335 if check_function is not None: 336 is_good = check_function(value) # May raise an exception 337 assert is_good in [0, 1, True, False] 338 if not is_good: 339 raise ValueError("Invalid parameter value %r for parameter %s" 340 % (value, name))
341
342 - def __setattr__(self, name, value):
343 """Set attribute name to value (PRIVATE). 344 345 This code implements a workaround for a user interface issue. 346 Without this __setattr__ attribute-based assignment of parameters 347 will silently accept invalid parameters, leading to known instances 348 of the user assuming that parameters for the application are set, 349 when they are not. 350 351 >>> from Bio.Emboss.Applications import WaterCommandline 352 >>> cline = WaterCommandline(gapopen=10, gapextend=0.5, stdout=True) 353 >>> cline.asequence = "a.fasta" 354 >>> cline.bsequence = "b.fasta" 355 >>> cline.csequence = "c.fasta" 356 Traceback (most recent call last): 357 ... 358 ValueError: Option name csequence was not found. 359 >>> print cline 360 water -stdout -asequence=a.fasta -bsequence=b.fasta -gapopen=10 -gapextend=0.5 361 362 This workaround uses a whitelist of object attributes, and sets the 363 object attribute list as normal, for these. Other attributes are 364 assumed to be parameters, and passed to the self.set_parameter method 365 for validation and assignment. 366 """ 367 if name in ['parameters', 'program_name']: # Allowed attributes 368 self.__dict__[name] = value 369 else: 370 self.set_parameter(name, value) # treat as a parameter
371
372 - def __call__(self, stdin=None, stdout=True, stderr=True, 373 cwd=None, env=None):
374 """Executes the command, waits for it to finish, and returns output. 375 376 Runs the command line tool and waits for it to finish. If it returns 377 a non-zero error level, an exception is raised. Otherwise two strings 378 are returned containing stdout and stderr. 379 380 The optional stdin argument should be a string of data which will be 381 passed to the tool as standard input. 382 383 The optional stdout and stderr argument are treated as a booleans, and 384 control if the output should be captured (True, default), or ignored 385 by sending it to /dev/null to avoid wasting memory (False). In the 386 later case empty string(s) are returned. 387 388 The optional cwd argument is a string giving the working directory 389 to run the command from. See Python's subprocess module documentation 390 for more details. 391 392 The optional env argument is a dictionary setting the environment 393 variables to be used in the new process. By default the current 394 process' environment variables are used. See Python's subprocess 395 module documentation for more details. 396 397 Default example usage: 398 399 from Bio.Emboss.Applications import WaterCommandline 400 water_cmd = WaterCommandline(gapopen=10, gapextend=0.5, 401 stdout=True, auto=True, 402 asequence="a.fasta", bsequence="b.fasta") 403 print "About to run:\n%s" % water_cmd 404 std_output, err_output = water_cmd() 405 406 This functionality is similar to subprocess.check_output() added in 407 Python 2.7. In general if you require more control over running the 408 command, use subprocess directly. 409 410 As of Biopython 1.56, when the program called returns a non-zero error 411 level, a custom ApplicationError exception is raised. This includes 412 any stdout and stderr strings captured as attributes of the exception 413 object, since they may be useful for diagnosing what went wrong. 414 """ 415 if stdout: 416 stdout_arg = subprocess.PIPE 417 else: 418 stdout_arg = open(os.devnull) 419 if stderr: 420 stderr_arg = subprocess.PIPE 421 else: 422 stderr_arg = open(os.devnull) 423 #We may not need to supply any piped input, but we setup the 424 #standard input pipe anyway as a work around for a python 425 #bug if this is called from a Windows GUI program. For 426 #details, see http://bugs.python.org/issue1124861 427 # 428 #Using universal newlines is important on Python 3, this 429 #gives unicode handles rather than bytes handles. 430 child_process = subprocess.Popen(str(self), stdin=subprocess.PIPE, 431 stdout=stdout_arg, stderr=stderr_arg, 432 universal_newlines=True, 433 cwd=cwd, env=env, 434 shell=(sys.platform!="win32")) 435 #Use .communicate as can get deadlocks with .wait(), see Bug 2804 436 stdout_str, stderr_str = child_process.communicate(stdin) 437 if not stdout: 438 assert not stdout_str 439 if not stderr: 440 assert not stderr_str 441 return_code = child_process.returncode 442 if return_code: 443 raise ApplicationError(return_code, str(self), 444 stdout_str, stderr_str) 445 return stdout_str, stderr_str
446 447
448 -class _AbstractParameter:
449 """A class to hold information about a parameter for a commandline. 450 451 Do not use this directly, instead use one of the subclasses. 452 """
453 - def __init__(self):
454 raise NotImplementedError
455
456 - def __str__(self):
457 raise NotImplementedError
458 459
460 -class _Option(_AbstractParameter):
461 """Represent an option that can be set for a program. 462 463 This holds UNIXish options like --append=yes and -a yes, 464 where a value (here "yes") is generally expected. 465 466 For UNIXish options like -kimura in clustalw which don't 467 take a value, use the _Switch object instead. 468 469 Attributes: 470 471 o names -- a list of string names by which the parameter can be 472 referenced (ie. ["-a", "--append", "append"]). The first name in 473 the list is considered to be the one that goes on the commandline, 474 for those parameters that print the option. The last name in the list 475 is assumed to be a "human readable" name describing the option in one 476 word. 477 478 o description -- a description of the option. 479 480 o filename -- True if this argument is a filename and should be 481 automatically quoted if it contains spaces. 482 483 o checker_function -- a reference to a function that will determine 484 if a given value is valid for this parameter. This function can either 485 raise an error when given a bad value, or return a [0, 1] decision on 486 whether the value is correct. 487 488 o equate -- should an equals sign be inserted if a value is used? 489 490 o is_required -- a flag to indicate if the parameter must be set for 491 the program to be run. 492 493 o is_set -- if the parameter has been set 494 495 o value -- the value of a parameter 496 """
497 - def __init__(self, names, description, filename=False, checker_function=None, 498 is_required=False, equate=True):
499 self.names = names 500 assert isinstance(description, basestring), \ 501 "%r for %s" % (description, names[-1]) 502 self.is_filename = filename 503 self.checker_function = checker_function 504 self.description = description 505 self.equate = equate 506 self.is_required = is_required 507 508 self.is_set = False 509 self.value = None
510
511 - def __str__(self):
512 """Return the value of this option for the commandline. 513 514 Includes a trailing space. 515 """ 516 # Note: Before equate was handled explicitly, the old 517 # code would do either "--name " or "--name=value ", 518 # or " -name " or " -name value ". This choice is now 519 # now made explicitly when setting up the option. 520 if self.value is None: 521 return "%s " % self.names[0] 522 if self.is_filename: 523 v = _escape_filename(self.value) 524 else: 525 v = str(self.value) 526 if self.equate: 527 return "%s=%s " % (self.names[0], v) 528 else: 529 return "%s %s " % (self.names[0], v)
530 531
532 -class _Switch(_AbstractParameter):
533 """Represent an optional argument switch for a program. 534 535 This holds UNIXish options like -kimura in clustalw which don't 536 take a value, they are either included in the command string 537 or omitted. 538 539 o names -- a list of string names by which the parameter can be 540 referenced (ie. ["-a", "--append", "append"]). The first name in 541 the list is considered to be the one that goes on the commandline, 542 for those parameters that print the option. The last name in the list 543 is assumed to be a "human readable" name describing the option in one 544 word. 545 546 o description -- a description of the option. 547 548 o is_set -- if the parameter has been set 549 550 NOTE - There is no value attribute, see is_set instead, 551 """
552 - def __init__(self, names, description):
553 self.names = names 554 self.description = description 555 self.is_set = False 556 self.is_required = False
557
558 - def __str__(self):
559 """Return the value of this option for the commandline. 560 561 Includes a trailing space. 562 """ 563 assert not hasattr(self, "value") 564 if self.is_set: 565 return "%s " % self.names[0] 566 else: 567 return ""
568 569
570 -class _Argument(_AbstractParameter):
571 """Represent an argument on a commandline. 572 """
573 - def __init__(self, names, description, filename=False, 574 checker_function=None, is_required=False):
575 self.names = names 576 assert isinstance(description, basestring), \ 577 "%r for %s" % (description, names[-1]) 578 self.is_filename = filename 579 self.checker_function = checker_function 580 self.description = description 581 self.is_required = is_required 582 self.is_set = False 583 self.value = None
584
585 - def __str__(self):
586 if self.value is None: 587 return " " 588 elif self.is_filename: 589 return "%s " % _escape_filename(self.value) 590 else: 591 return "%s " % self.value
592 593
594 -def _escape_filename(filename):
595 """Escape filenames with spaces by adding quotes (PRIVATE). 596 597 Note this will not add quotes if they are already included: 598 599 >>> print _escape_filename('example with spaces') 600 "example with spaces" 601 >>> print _escape_filename('"example with spaces"') 602 "example with spaces" 603 """ 604 #Is adding the following helpful 605 #if os.path.isfile(filename): 606 # #On Windows, if the file exists, we can ask for 607 # #its alternative short name (DOS style 8.3 format) 608 # #which has no spaces in it. Note that this name 609 # #is not portable between machines, or even folder! 610 # try: 611 # import win32api 612 # short = win32api.GetShortPathName(filename) 613 # assert os.path.isfile(short) 614 # return short 615 # except ImportError: 616 # pass 617 if " " not in filename: 618 return filename 619 #We'll just quote it - works on Windows, Mac OS X etc 620 if filename.startswith('"') and filename.endswith('"'): 621 #Its already quoted 622 return filename 623 else: 624 return '"%s"' % filename
625 626
627 -def _test():
628 """Run the Bio.Application module's doctests.""" 629 import doctest 630 doctest.testmod(verbose=1)
631 632 if __name__ == "__main__": 633 #Run the doctests 634 _test() 635