1
2
3
4
5
6
7 """General mechanisms to access applications in Biopython.
8
9 This module is not intended for direct use. It provides the basic objects which
10 are subclassed by our command line wrappers, such as:
11
12 - Bio.Align.Applications
13 - Bio.Blast.Applications
14 - Bio.Emboss.Applications
15 - Bio.Sequencing.Applications
16
17 These modules provide wrapper classes for command line tools to help you
18 construct command line strings by setting the values of each parameter.
19 The finished command line strings are then normally invoked via the built-in
20 Python module subprocess.
21 """
22 import os
23 import sys
24 import StringIO
25 import subprocess
26 import re
27
28 from subprocess import CalledProcessError as _ProcessCalledError
29
30 from Bio import File
31
32
33
34 _re_prop_name = re.compile(r"[a-zA-Z][a-zA-Z0-9_]*")
35 assert _re_prop_name.match("t")
36 assert _re_prop_name.match("test")
37 assert _re_prop_name.match("_test") is None
38 assert _re_prop_name.match("-test") is None
39 assert _re_prop_name.match("test_name")
40 assert _re_prop_name.match("test2")
41
42 _reserved_names = ["and", "del", "from", "not", "while", "as", "elif",
43 "global", "or", "with", "assert", "else", "if", "pass",
44 "yield", "break", "except", "import", "print", "class",
45 "exec", "in", "raise", "continue", "finally", "is",
46 "return", "def", "for", "lambda", "try"]
47
48 _local_reserved_names = ["set_parameter"]
49
50
52 """Raised when an application returns a non-zero exit status.
53
54 The exit status will be stored in the returncode attribute, similarly
55 the command line string used in the cmd attribute, and (if captured)
56 stdout and stderr as strings.
57
58 This exception is a subclass of subprocess.CalledProcessError.
59
60 >>> err = ApplicationError(-11, "helloworld", "", "Some error text")
61 >>> err.returncode, err.cmd, err.stdout, err.stderr
62 (-11, 'helloworld', '', 'Some error text')
63 >>> print err
64 Command 'helloworld' returned non-zero exit status -11, 'Some error text'
65
66 """
67 - def __init__(self, returncode, cmd, stdout="", stderr=""):
68 self.returncode = returncode
69 self.cmd = cmd
70 self.stdout = stdout
71 self.stderr = stderr
72
74
75 try:
76 msg = self.stderr.lstrip().split("\n", 1)[0].rstrip()
77 except:
78 msg = ""
79 if msg:
80 return "Command '%s' returned non-zero exit status %d, %r" \
81 % (self.cmd, self.returncode, msg)
82 else:
83 return "Command '%s' returned non-zero exit status %d" \
84 % (self.cmd, self.returncode)
85
87 return "ApplicationError(%i, %s, %s, %s)" \
88 % (self.returncode, self.cmd, self.stdout, self.stderr)
89
90
92 """Generic interface for constructing command line strings.
93
94 This class shouldn't be called directly; it should be subclassed to
95 provide an implementation for a specific application.
96
97 For a usage example we'll show one of the EMBOSS wrappers. You can set
98 options when creating the wrapper object using keyword arguments - or
99 later using their corresponding properties:
100
101 >>> from Bio.Emboss.Applications import WaterCommandline
102 >>> cline = WaterCommandline(gapopen=10, gapextend=0.5)
103 >>> cline
104 WaterCommandline(cmd='water', gapopen=10, gapextend=0.5)
105
106 You can instead manipulate the parameters via their properties, e.g.
107
108 >>> cline.gapopen
109 10
110 >>> cline.gapopen = 20
111 >>> cline
112 WaterCommandline(cmd='water', gapopen=20, gapextend=0.5)
113
114 You can clear a parameter you have already added by 'deleting' the
115 corresponding property:
116
117 >>> del cline.gapopen
118 >>> cline.gapopen
119 >>> cline
120 WaterCommandline(cmd='water', gapextend=0.5)
121
122 Once you have set the parameters you need, turn the object into a string:
123
124 >>> str(cline)
125 Traceback (most recent call last):
126 ...
127 ValueError: You must either set outfile (output filename), or enable filter or stdout (output to stdout).
128
129 In this case the wrapper knows certain arguments are required to construct
130 a valid command line for the tool. For a complete example,
131
132 >>> from Bio.Emboss.Applications import WaterCommandline
133 >>> water_cmd = WaterCommandline(gapopen=10, gapextend=0.5)
134 >>> water_cmd.asequence = "asis:ACCCGGGCGCGGT"
135 >>> water_cmd.bsequence = "asis:ACCCGAGCGCGGT"
136 >>> water_cmd.outfile = "temp_water.txt"
137 >>> print water_cmd
138 water -outfile=temp_water.txt -asequence=asis:ACCCGGGCGCGGT -bsequence=asis:ACCCGAGCGCGGT -gapopen=10 -gapextend=0.5
139 >>> water_cmd
140 WaterCommandline(cmd='water', outfile='temp_water.txt', asequence='asis:ACCCGGGCGCGGT', bsequence='asis:ACCCGAGCGCGGT', gapopen=10, gapextend=0.5)
141
142 You would typically run the command line via a standard Python operating
143 system call using the subprocess module for full control. For the simple
144 case where you just want to run the command and get the output:
145
146 stdout, stderr = water_cmd()
147 """
148
149
151 """Create a new instance of a command line wrapper object."""
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168 self.program_name = cmd
169 try:
170 parameters = self.parameters
171 except AttributeError:
172 raise AttributeError("Subclass should have defined self.parameters")
173
174 aliases = set()
175 for p in parameters:
176 for name in p.names:
177 if name in aliases:
178 raise ValueError("Parameter alias %s multiply defined"
179 % name)
180 aliases.add(name)
181 name = p.names[-1]
182 if _re_prop_name.match(name) is None:
183 raise ValueError("Final parameter name %s cannot be used as "
184 "an argument or property name in python"
185 % repr(name))
186 if name in _reserved_names:
187 raise ValueError("Final parameter name %s cannot be used as "
188 "an argument or property name because it is "
189 "a reserved word in python" % repr(name))
190 if name in _local_reserved_names:
191 raise ValueError("Final parameter name %s cannot be used as "
192 "an argument or property name due to the "
193 "way the AbstractCommandline class works"
194 % repr(name))
195
196
197 def getter(name):
198 return lambda x: x._get_parameter(name)
199
200 def setter(name):
201 return lambda x, value: x.set_parameter(name, value)
202
203 def deleter(name):
204 return lambda x: x._clear_parameter(name)
205
206 doc = p.description
207 if isinstance(p, _Switch):
208 doc += "\n\nThis property controls the addition of the %s " \
209 "switch, treat this property as a boolean." % p.names[0]
210 else:
211 doc += "\n\nThis controls the addition of the %s parameter " \
212 "and its associated value. Set this property to the " \
213 "argument value required." % p.names[0]
214 prop = property(getter(name), setter(name), deleter(name), doc)
215 setattr(self.__class__, name, prop)
216 for key, value in kwargs.iteritems():
217 self.set_parameter(key, value)
218
220 """Make sure the required parameters have been set (PRIVATE).
221
222 No return value - it either works or raises a ValueError.
223
224 This is a separate method (called from __str__) so that subclasses may
225 override it.
226 """
227 for p in self.parameters:
228
229 if p.is_required and not(p.is_set):
230 raise ValueError("Parameter %s is not set."
231 % p.names[-1])
232
233
235 """Make the commandline string with the currently set options.
236
237 e.g.
238 >>> from Bio.Emboss.Applications import WaterCommandline
239 >>> cline = WaterCommandline(gapopen=10, gapextend=0.5)
240 >>> cline.asequence = "asis:ACCCGGGCGCGGT"
241 >>> cline.bsequence = "asis:ACCCGAGCGCGGT"
242 >>> cline.outfile = "temp_water.txt"
243 >>> print cline
244 water -outfile=temp_water.txt -asequence=asis:ACCCGGGCGCGGT -bsequence=asis:ACCCGAGCGCGGT -gapopen=10 -gapextend=0.5
245 >>> str(cline)
246 'water -outfile=temp_water.txt -asequence=asis:ACCCGGGCGCGGT -bsequence=asis:ACCCGAGCGCGGT -gapopen=10 -gapextend=0.5'
247 """
248 self._validate()
249 commandline = "%s " % self.program_name
250 for parameter in self.parameters:
251 if parameter.is_set:
252
253 commandline += str(parameter)
254 return commandline.strip()
255
257 """Return a representation of the command line object for debugging.
258
259 e.g.
260 >>> from Bio.Emboss.Applications import WaterCommandline
261 >>> cline = WaterCommandline(gapopen=10, gapextend=0.5)
262 >>> cline.asequence = "asis:ACCCGGGCGCGGT"
263 >>> cline.bsequence = "asis:ACCCGAGCGCGGT"
264 >>> cline.outfile = "temp_water.txt"
265 >>> print cline
266 water -outfile=temp_water.txt -asequence=asis:ACCCGGGCGCGGT -bsequence=asis:ACCCGAGCGCGGT -gapopen=10 -gapextend=0.5
267 >>> cline
268 WaterCommandline(cmd='water', outfile='temp_water.txt', asequence='asis:ACCCGGGCGCGGT', bsequence='asis:ACCCGAGCGCGGT', gapopen=10, gapextend=0.5)
269 """
270 answer = "%s(cmd=%s" % (self.__class__.__name__, repr(self.program_name))
271 for parameter in self.parameters:
272 if parameter.is_set:
273 if isinstance(parameter, _Switch):
274 answer += ", %s=True" % parameter.names[-1]
275 else:
276 answer += ", %s=%s" \
277 % (parameter.names[-1], repr(parameter.value))
278 answer += ")"
279 return answer
280
282 """Get a commandline option value."""
283 for parameter in self.parameters:
284 if name in parameter.names:
285 if isinstance(parameter, _Switch):
286 return parameter.is_set
287 else:
288 return parameter.value
289 raise ValueError("Option name %s was not found." % name)
290
292 """Reset or clear a commandline option value."""
293 cleared_option = False
294 for parameter in self.parameters:
295 if name in parameter.names:
296 parameter.value = None
297 parameter.is_set = False
298 cleared_option = True
299 if not cleared_option:
300 raise ValueError("Option name %s was not found." % name)
301
303 """Set a commandline option for a program.
304 """
305 set_option = False
306 for parameter in self.parameters:
307 if name in parameter.names:
308 if isinstance(parameter, _Switch):
309 if value is None:
310 import warnings
311 warnings.warn("For a switch type argument like %s, "
312 "we expect a boolean. None is treated "
313 "as FALSE!" % parameter.names[-1])
314 parameter.is_set = bool(value)
315 set_option = True
316 else:
317 if value is not None:
318 self._check_value(value, name, parameter.checker_function)
319 parameter.value = value
320 parameter.is_set = True
321 set_option = True
322 if not set_option:
323 raise ValueError("Option name %s was not found." % name)
324
326 """Check whether the given value is valid.
327
328 No return value - it either works or raises a ValueError.
329
330 This uses the passed function 'check_function', which can either
331 return a [0, 1] (bad, good) value or raise an error. Either way
332 this function will raise an error if the value is not valid, or
333 finish silently otherwise.
334 """
335 if check_function is not None:
336 is_good = check_function(value)
337 assert is_good in [0, 1, True, False]
338 if not is_good:
339 raise ValueError("Invalid parameter value %r for parameter %s"
340 % (value, name))
341
343 """Set attribute name to value (PRIVATE).
344
345 This code implements a workaround for a user interface issue.
346 Without this __setattr__ attribute-based assignment of parameters
347 will silently accept invalid parameters, leading to known instances
348 of the user assuming that parameters for the application are set,
349 when they are not.
350
351 >>> from Bio.Emboss.Applications import WaterCommandline
352 >>> cline = WaterCommandline(gapopen=10, gapextend=0.5, stdout=True)
353 >>> cline.asequence = "a.fasta"
354 >>> cline.bsequence = "b.fasta"
355 >>> cline.csequence = "c.fasta"
356 Traceback (most recent call last):
357 ...
358 ValueError: Option name csequence was not found.
359 >>> print cline
360 water -stdout -asequence=a.fasta -bsequence=b.fasta -gapopen=10 -gapextend=0.5
361
362 This workaround uses a whitelist of object attributes, and sets the
363 object attribute list as normal, for these. Other attributes are
364 assumed to be parameters, and passed to the self.set_parameter method
365 for validation and assignment.
366 """
367 if name in ['parameters', 'program_name']:
368 self.__dict__[name] = value
369 else:
370 self.set_parameter(name, value)
371
372 - def __call__(self, stdin=None, stdout=True, stderr=True,
373 cwd=None, env=None):
374 """Executes the command, waits for it to finish, and returns output.
375
376 Runs the command line tool and waits for it to finish. If it returns
377 a non-zero error level, an exception is raised. Otherwise two strings
378 are returned containing stdout and stderr.
379
380 The optional stdin argument should be a string of data which will be
381 passed to the tool as standard input.
382
383 The optional stdout and stderr argument are treated as a booleans, and
384 control if the output should be captured (True, default), or ignored
385 by sending it to /dev/null to avoid wasting memory (False). In the
386 later case empty string(s) are returned.
387
388 The optional cwd argument is a string giving the working directory
389 to run the command from. See Python's subprocess module documentation
390 for more details.
391
392 The optional env argument is a dictionary setting the environment
393 variables to be used in the new process. By default the current
394 process' environment variables are used. See Python's subprocess
395 module documentation for more details.
396
397 Default example usage:
398
399 from Bio.Emboss.Applications import WaterCommandline
400 water_cmd = WaterCommandline(gapopen=10, gapextend=0.5,
401 stdout=True, auto=True,
402 asequence="a.fasta", bsequence="b.fasta")
403 print "About to run:\n%s" % water_cmd
404 std_output, err_output = water_cmd()
405
406 This functionality is similar to subprocess.check_output() added in
407 Python 2.7. In general if you require more control over running the
408 command, use subprocess directly.
409
410 As of Biopython 1.56, when the program called returns a non-zero error
411 level, a custom ApplicationError exception is raised. This includes
412 any stdout and stderr strings captured as attributes of the exception
413 object, since they may be useful for diagnosing what went wrong.
414 """
415 if stdout:
416 stdout_arg = subprocess.PIPE
417 else:
418 stdout_arg = open(os.devnull)
419 if stderr:
420 stderr_arg = subprocess.PIPE
421 else:
422 stderr_arg = open(os.devnull)
423
424
425
426
427
428
429
430 child_process = subprocess.Popen(str(self), stdin=subprocess.PIPE,
431 stdout=stdout_arg, stderr=stderr_arg,
432 universal_newlines=True,
433 cwd=cwd, env=env,
434 shell=(sys.platform!="win32"))
435
436 stdout_str, stderr_str = child_process.communicate(stdin)
437 if not stdout:
438 assert not stdout_str
439 if not stderr:
440 assert not stderr_str
441 return_code = child_process.returncode
442 if return_code:
443 raise ApplicationError(return_code, str(self),
444 stdout_str, stderr_str)
445 return stdout_str, stderr_str
446
447
449 """A class to hold information about a parameter for a commandline.
450
451 Do not use this directly, instead use one of the subclasses.
452 """
454 raise NotImplementedError
455
457 raise NotImplementedError
458
459
461 """Represent an option that can be set for a program.
462
463 This holds UNIXish options like --append=yes and -a yes,
464 where a value (here "yes") is generally expected.
465
466 For UNIXish options like -kimura in clustalw which don't
467 take a value, use the _Switch object instead.
468
469 Attributes:
470
471 o names -- a list of string names by which the parameter can be
472 referenced (ie. ["-a", "--append", "append"]). The first name in
473 the list is considered to be the one that goes on the commandline,
474 for those parameters that print the option. The last name in the list
475 is assumed to be a "human readable" name describing the option in one
476 word.
477
478 o description -- a description of the option.
479
480 o filename -- True if this argument is a filename and should be
481 automatically quoted if it contains spaces.
482
483 o checker_function -- a reference to a function that will determine
484 if a given value is valid for this parameter. This function can either
485 raise an error when given a bad value, or return a [0, 1] decision on
486 whether the value is correct.
487
488 o equate -- should an equals sign be inserted if a value is used?
489
490 o is_required -- a flag to indicate if the parameter must be set for
491 the program to be run.
492
493 o is_set -- if the parameter has been set
494
495 o value -- the value of a parameter
496 """
497 - def __init__(self, names, description, filename=False, checker_function=None,
498 is_required=False, equate=True):
499 self.names = names
500 assert isinstance(description, basestring), \
501 "%r for %s" % (description, names[-1])
502 self.is_filename = filename
503 self.checker_function = checker_function
504 self.description = description
505 self.equate = equate
506 self.is_required = is_required
507
508 self.is_set = False
509 self.value = None
510
512 """Return the value of this option for the commandline.
513
514 Includes a trailing space.
515 """
516
517
518
519
520 if self.value is None:
521 return "%s " % self.names[0]
522 if self.is_filename:
523 v = _escape_filename(self.value)
524 else:
525 v = str(self.value)
526 if self.equate:
527 return "%s=%s " % (self.names[0], v)
528 else:
529 return "%s %s " % (self.names[0], v)
530
531
533 """Represent an optional argument switch for a program.
534
535 This holds UNIXish options like -kimura in clustalw which don't
536 take a value, they are either included in the command string
537 or omitted.
538
539 o names -- a list of string names by which the parameter can be
540 referenced (ie. ["-a", "--append", "append"]). The first name in
541 the list is considered to be the one that goes on the commandline,
542 for those parameters that print the option. The last name in the list
543 is assumed to be a "human readable" name describing the option in one
544 word.
545
546 o description -- a description of the option.
547
548 o is_set -- if the parameter has been set
549
550 NOTE - There is no value attribute, see is_set instead,
551 """
552 - def __init__(self, names, description):
553 self.names = names
554 self.description = description
555 self.is_set = False
556 self.is_required = False
557
559 """Return the value of this option for the commandline.
560
561 Includes a trailing space.
562 """
563 assert not hasattr(self, "value")
564 if self.is_set:
565 return "%s " % self.names[0]
566 else:
567 return ""
568
569
571 """Represent an argument on a commandline.
572 """
573 - def __init__(self, names, description, filename=False,
574 checker_function=None, is_required=False):
575 self.names = names
576 assert isinstance(description, basestring), \
577 "%r for %s" % (description, names[-1])
578 self.is_filename = filename
579 self.checker_function = checker_function
580 self.description = description
581 self.is_required = is_required
582 self.is_set = False
583 self.value = None
584
586 if self.value is None:
587 return " "
588 elif self.is_filename:
589 return "%s " % _escape_filename(self.value)
590 else:
591 return "%s " % self.value
592
593
595 """Escape filenames with spaces by adding quotes (PRIVATE).
596
597 Note this will not add quotes if they are already included:
598
599 >>> print _escape_filename('example with spaces')
600 "example with spaces"
601 >>> print _escape_filename('"example with spaces"')
602 "example with spaces"
603 """
604
605
606
607
608
609
610
611
612
613
614
615
616
617 if " " not in filename:
618 return filename
619
620 if filename.startswith('"') and filename.endswith('"'):
621
622 return filename
623 else:
624 return '"%s"' % filename
625
626
628 """Run the Bio.Application module's doctests."""
629 import doctest
630 doctest.testmod(verbose=1)
631
632 if __name__ == "__main__":
633
634 _test()
635