1
2
3
4
5
6
7
8
9 """Code to interact with and run various EMBOSS programs.
10
11 These classes follow the AbstractCommandline interfaces for running
12 programs.
13 """
14
15 from Bio.Application import _Option, _Switch, AbstractCommandline
16
17
19 """Base Commandline object for EMBOSS wrappers (PRIVATE).
20
21 This is provided for subclassing, it deals with shared options
22 common to all the EMBOSS tools:
23
24 - auto Turn off prompts
25 - stdout Write standard output
26 - filter Read standard input, write standard output
27 - options Prompt for standard and additional values
28 - debug Write debug output to program.dbg
29 - verbose Report some/full command line options
30 - help Report command line options. More
31 information on associated and general
32 qualifiers can be found with -help -verbose
33 - warning Report warnings
34 - error Report errors
35 - fatal Report fatal errors
36 - die Report dying program messages
37 """
39 assert cmd is not None
40 extra_parameters = [
41 _Switch(["-auto","auto"],
42 """Turn off prompts.
43
44 Automatic mode disables prompting, so we recommend you set
45 this argument all the time when calling an EMBOSS tool from
46 Biopython.
47 """),
48 _Switch(["-stdout","stdout"],
49 "Write standard output."),
50 _Switch(["-filter","filter"],
51 "Read standard input, write standard output."),
52 _Switch(["-options","options"],
53 """Prompt for standard and additional values.
54
55 If you are calling an EMBOSS tool from within Biopython,
56 we DO NOT recommend using this option.
57 """),
58 _Switch(["-debug","debug"],
59 "Write debug output to program.dbg."),
60 _Switch(["-verbose","verbose"],
61 "Report some/full command line options"),
62 _Switch(["-help","help"],
63 """Report command line options.
64
65 More information on associated and general qualifiers can
66 be found with -help -verbose
67 """),
68 _Switch(["-warning","warning"],
69 "Report warnings."),
70 _Switch(["-error","error"],
71 "Report errors."),
72 _Switch(["-die","die"],
73 "Report dying program messages."),
74 ]
75 try:
76
77
78 self.parameters = extra_parameters + self.parameters
79 except AttributeError:
80
81 self.parameters = extra_parameters
82 AbstractCommandline.__init__(self, cmd, **kwargs)
83
84
86 """Base Commandline object for EMBOSS wrappers (PRIVATE).
87
88 This is provided for subclassing, it deals with shared options
89 common to all the EMBOSS tools plus:
90
91 - outfile Output filename
92
93 """
95 assert cmd is not None
96 extra_parameters = [
97 _Option(["-outfile","outfile"],
98 "Output filename",
99 filename=True),
100 ]
101 try:
102
103
104 self.parameters = extra_parameters + self.parameters
105 except AttributeError:
106
107 self.parameters = extra_parameters
108 _EmbossMinimalCommandLine.__init__(self, cmd, **kwargs)
109
111
112
113
114 if not (self.outfile or self.filter or self.stdout):
115 raise ValueError("You must either set outfile (output filename), "
116 "or enable filter or stdout (output to stdout).")
117 return _EmbossMinimalCommandLine._validate(self)
118
119
121 """Commandline object for the Primer3 interface from EMBOSS.
122
123 The precise set of supported arguments depends on your version of EMBOSS.
124 This version accepts arguments current at EMBOSS 6.1.0, but in order to
125 remain backwards compatible also support the old argument names as well.
126
127 e.g. Using EMBOSS 6.1.0 or later,
128
129 >>> cline = Primer3Commandline(sequence="mysequence.fas", auto=True, hybridprobe=True)
130 >>> cline.explainflag = True
131 >>> cline.osizeopt=20
132 >>> cline.psizeopt=200
133 >>> cline.outfile = "myresults.out"
134 >>> cline.bogusparameter = 1967 # Invalid parameter
135 Traceback (most recent call last):
136 ...
137 ValueError: Option name bogusparameter was not found.
138 >>> print cline
139 eprimer3 -auto -outfile=myresults.out -sequence=mysequence.fas -hybridprobe=True -psizeopt=200 -osizeopt=20 -explainflag=True
140
141 The equivalent for anyone still using an older version of EMBOSS would be:
142
143 >>> cline = Primer3Commandline(sequence="mysequence.fas", auto=True, hybridprobe=True)
144 >>> cline.explainflag = True
145 >>> cline.oligosize=20 # Old EMBOSS, instead of osizeopt
146 >>> cline.productosize=200 # Old EMBOSS, instead of psizeopt
147 >>> cline.outfile = "myresults.out"
148 >>> print cline
149 eprimer3 -auto -outfile=myresults.out -sequence=mysequence.fas -hybridprobe=True -productosize=200 -oligosize=20 -explainflag=True
150
151 """
152 - def __init__(self, cmd="eprimer3", **kwargs):
153 self.parameters = [
154 _Option(["-sequence","sequence"],
155 "Sequence to choose primers from.",
156 is_required=True),
157 _Option(["-task","task"],
158 "Tell eprimer3 what task to perform."),
159 _Option(["-hybridprobe","hybridprobe"],
160 "Find an internal oligo to use as a hyb probe."),
161 _Option(["-numreturn","numreturn"],
162 "Maximum number of primer pairs to return."),
163 _Option(["-includedregion","includedregion"],
164 "Subregion of the sequence in which to pick primers."),
165 _Option(["-target","target"],
166 "Sequence to target for flanking primers."),
167 _Option(["-excludedregion","excludedregion"],
168 "Regions to exclude from primer picking."),
169 _Option(["-forwardinput","forwardinput"],
170 "Sequence of a forward primer to check."),
171 _Option(["-reverseinput","reverseinput"],
172 "Sequence of a reverse primer to check."),
173 _Option(["-gcclamp","gcclamp"],
174 "The required number of Gs and Cs at the 3' of each primer."),
175 _Option(["-osize","osize"],
176 "Optimum length of a primer oligo."),
177 _Option(["-minsize","minsize"],
178 "Minimum length of a primer oligo."),
179 _Option(["-maxsize","maxsize"],
180 "Maximum length of a primer oligo."),
181 _Option(["-otm","otm"],
182 "Optimum melting temperature for a primer oligo."),
183 _Option(["-mintm","mintm"],
184 "Minimum melting temperature for a primer oligo."),
185 _Option(["-maxtm","maxtm"],
186 "Maximum melting temperature for a primer oligo."),
187 _Option(["-maxdifftm","maxdifftm"],
188 "Maximum difference in melting temperatures between "
189 "forward and reverse primers."),
190 _Option(["-ogcpercent","ogcpercent"],
191 "Optimum GC% for a primer."),
192 _Option(["-mingc","mingc"],
193 "Minimum GC% for a primer."),
194 _Option(["-maxgc","maxgc"],
195 "Maximum GC% for a primer."),
196 _Option(["-saltconc","saltconc"],
197 "Millimolar salt concentration in the PCR."),
198 _Option(["-dnaconc","dnaconc"],
199 "Nanomolar concentration of annealing oligos in the PCR."),
200 _Option(["-maxpolyx","maxpolyx"],
201 "Maximum allowable mononucleotide repeat length in a primer."),
202
203 _Option(["-productosize","productosize"],
204 """Optimum size for the PCR product (OBSOLETE).
205
206 Option replaced in EMBOSS 6.1.0 by -psizeopt
207 """),
208 _Option(["-psizeopt", "psizeopt"],
209 """Optimum size for the PCR product.
210
211 Option added in EMBOSS 6.1.0, replacing -productosize
212 """),
213 _Option(["-productsizerange","productsizerange"],
214 """Acceptable range of length for the PCR product (OBSOLETE).
215
216 Option replaced in EMBOSS 6.1.0 by -prange
217 """),
218 _Option(["-prange", "prange"],
219 """Acceptable range of length for the PCR product.
220
221 Option added in EMBOSS 6.1.0, replacing -productsizerange
222 """),
223
224 _Option(["-productotm","productotm"],
225 """Optimum melting temperature for the PCR product (OBSOLETE).
226
227 Option replaced in EMBOSS 6.1.0 by -ptmopt
228 """),
229 _Option(["-ptmopt", "ptmopt"],
230 """Optimum melting temperature for the PCR product.
231
232 Option added in EMBOSS 6.1.0, replacing -productotm
233 """),
234 _Option(["-productmintm","productmintm"],
235 """Minimum allowed melting temperature for the amplicon (OBSOLETE)
236
237 Option replaced in EMBOSS 6.1.0 by -ptmmin
238 """),
239 _Option(["-ptmmin", "ptmmin"],
240 """Minimum allowed melting temperature for the amplicon."),
241
242 Option added in EMBOSS 6.1.0, replacing -productmintm
243 """),
244 _Option(["-productmaxtm","productmaxtm"],
245 """Maximum allowed melting temperature for the amplicon (OBSOLETE).
246
247 Option replaced in EMBOSS 6.1.0 by -ptmmax
248 """),
249 _Option(["-ptmmax", "ptmmax"],
250 """Maximum allowed melting temperature for the amplicon."),
251
252 Option added in EMBOSS 6.1.0, replacing -productmaxtm
253 """),
254
255 _Option(["-oexcludedregion", "oexcludedregion"],
256 """Do not pick internal oligos in this region."),
257
258 Option added in EMBOSS 6.1.0, replacing -oligoexcludedregion.
259 """),
260 _Option(["-oligoexcludedregion", "oligoexcludedregion"],
261 """Do not pick internal oligos in this region (OBSOLETE)."),
262
263 Option replaced in EMBOSS 6.1.0 by -oexcluderegion.
264 """),
265 _Option(["-oligoinput","oligoinput"],
266 "Sequence of the internal oligo."),
267
268 _Option(["-oligosize","oligosize"],
269 """Optimum length of internal oligo (OBSOLETE).
270
271 Option replaced in EMBOSS 6.1.0 by -osizeopt.
272 """),
273 _Option(["-osizeopt", "osizeopt"],
274 """Optimum length of internal oligo.
275
276 Option added in EMBOSS 6.1.0, replaces -oligosize
277 """),
278 _Option(["-oligominsize","oligominsize"],
279 """Minimum length of internal oligo (OBSOLETE)."),
280
281 Option replaced in EMBOSS 6.1.0 by -ominsize.
282 """),
283 _Option(["-ominsize", "ominsize"],
284 """Minimum length of internal oligo."
285
286 Option added in EMBOSS 6.1.0, replaces -oligominsize
287 """),
288 _Option(["-oligomaxsize","oligomaxsize"],
289 """Maximum length of internal oligo (OBSOLETE).
290
291 Option replaced in EMBOSS 6.1.0 by -omaxsize.
292 """),
293 _Option(["-omaxsize", "omaxsize"],
294 """Maximum length of internal oligo.
295
296 Option added in EMBOSS 6.1.0, replaces -oligomaxsize
297 """),
298
299 _Option(["-oligotm","oligotm"],
300 """Optimum melting temperature of internal oligo (OBSOLETE).
301
302 Option replaced in EMBOSS 6.1.0 by -otmopt.
303 """),
304 _Option(["-otmopt", "otmopt"],
305 """Optimum melting temperature of internal oligo.
306
307 Option added in EMBOSS 6.1.0.
308 """),
309 _Option(["-oligomintm","oligomintm"],
310 """Minimum melting temperature of internal oligo (OBSOLETE).
311
312 Option replaced in EMBOSS 6.1.0 by -otmmin.
313 """),
314 _Option(["-otmmin", "otmmin"],
315 """Minimum melting temperature of internal oligo.
316
317 Option added in EMBOSS 6.1.0, replacing -oligomintm
318 """),
319 _Option(["-oligomaxtm","oligomaxtm"],
320 """Maximum melting temperature of internal oligo (OBSOLETE).
321
322 Option replaced in EMBOSS 6.1.0 by -otmmax.
323 """),
324 _Option(["-otmmax", "otmmax"],
325 """Maximum melting temperature of internal oligo.
326
327 Option added in EMBOSS 6.1.0, replacing -oligomaxtm
328 """),
329
330 _Option(["-oligoogcpercent","oligoogcpercent"],
331 """Optimum GC% for internal oligo (OBSOLETE).
332
333 Option replaced in EMBOSS 6.1.0 by -ogcopt.
334 """),
335 _Option(["-ogcopt", "ogcopt"],
336 """Optimum GC% for internal oligo."
337
338 Option added in EMBOSS 6.1.0, replacing -oligoogcpercent
339 """),
340 _Option(["-oligomingc","oligomingc"],
341 """Minimum GC% for internal oligo (OBSOLETE).
342
343 Option replaced in EMBOSS 6.1.0 by -ogcmin.
344 """),
345 _Option(["-ogcmin", "ogcmin"],
346 """Minimum GC% for internal oligo.
347
348 Option added in EMBOSS 6.1.0, replacing -oligomingc
349 """),
350 _Option(["-oligomaxgc","oligomaxgc"],
351 """Maximum GC% for internal oligo.
352
353 Option replaced in EMBOSS 6.1.0 by -ogcmax
354 """),
355 _Option(["-ogcmax", "ogcmax"],
356 """Maximum GC% for internal oligo."),
357
358 Option added in EMBOSS 6.1.0, replacing -oligomaxgc
359 """),
360
361 _Option(["-oligosaltconc","oligosaltconc"],
362 """Millimolar concentration of salt in the hybridisation."),
363
364 Option replaced in EMBOSS 6.1.0 by -osaltconc
365 """),
366 _Option(["-osaltconc", "osaltconc"],
367 """Millimolar concentration of salt in the hybridisation."),
368
369 Option added in EMBOSS 6.1.0, replacing -oligosaltconc
370 """),
371 _Option(["-oligodnaconc","oligodnaconc"],
372 """Nanomolar concentration of internal oligo in the hybridisation.
373
374 Option replaced in EMBOSS 6.1.0 by -odnaconc
375 """),
376 _Option(["-odnaconc", "odnaconc"],
377 """Nanomolar concentration of internal oligo in the hybridisation.
378
379 Option added in EMBOSS 6.1.0, replacing -oligodnaconc
380 """),
381
382 _Option(["-oligoselfany","oligoselfany"],
383 """Maximum allowable alignment score for self-complementarity (OBSOLETE).
384
385 Option replaced in EMBOSS 6.1.0 by -oanyself
386 """),
387 _Option(["-oanyself", "oanyself"],
388 """Maximum allowable alignment score for self-complementarity."),
389
390 Option added in EMBOSS 6.1.0, replacing -oligoselfany
391 """),
392 _Option(["-oligoselfend","oligoselfend"],
393 """Maximum allowable 3`-anchored global alignment score "
394 for self-complementarity (OBSOLETE).
395
396 Option replaced in EMBOSS 6.1.0 by -oendself
397 """),
398 _Option(["-oendself", "oendself"],
399 """Max 3`-anchored self-complementarity global alignment score.
400
401 Option added in EMBOSS 6.1.0, replacing -oligoselfend
402 """),
403 _Option(["-oligomaxpolyx","oligomaxpolyx"],
404 """Maximum length of mononucleotide repeat in internal oligo (OBSOLETE).
405
406 Option replaced in EMBOSS 6.1.0 by -opolyxmax
407 """),
408 _Option(["-opolyxmax", "opolyxmax"],
409 """Maximum length of mononucleotide repeat in internal oligo."),
410
411 Option added in EMBOSS 6.1.0, replacing -oligomaxpolyx
412 """),
413 _Option(["-mispriminglibraryfile","mispriminglibraryfile"],
414 "File containing library of sequences to avoid amplifying"),
415 _Option(["-maxmispriming","maxmispriming"],
416 "Maximum allowed similarity of primers to sequences in "
417 "library specified by -mispriminglibrary"),
418 _Option(["-oligomaxmishyb","oligomaxmishyb"],
419 """Maximum alignment score for hybridisation of internal oligo to
420 library specified by -oligomishyblibraryfile (OBSOLETE).
421
422 Option replaced in EMBOSS 6.1.0 by -omishybmax
423 """),
424 _Option(["-omishybmax", "omishybmax"],
425 """Maximum alignment score for hybridisation of internal oligo to
426 library specified by -mishyblibraryfile.
427
428 Option added in EMBOSS 6.1.0, replacing -oligomaxmishyb
429 """),
430 _Option(["-oligomishyblibraryfile", "oligomishyblibraryfile"],
431
432 """Library file of seqs to avoid internal oligo hybridisation (OBSOLETE).
433
434 Option replaced in EMBOSS 6.1.0 by -mishyblibraryfile
435 """),
436 _Option(["-mishyblibraryfile", "mishyblibraryfile"],
437 """Library file of seqs to avoid internal oligo hybridisation.
438
439 Option added in EMBOSS 6.1.0, replacing -oligomishyblibraryfile
440 """),
441 _Option(["-explainflag","explainflag"],
442 "Produce output tags with eprimer3 statistics"),
443 ]
444 _EmbossCommandLine.__init__(self, cmd, **kwargs)
445
446
448 """Commandline object for the primersearch program from EMBOSS.
449 """
450 - def __init__(self, cmd="primersearch", **kwargs):
451 self.parameters = [
452 _Option(["-seqall","-sequences","sequences","seqall"],
453 "Sequence to look for the primer pairs in.",
454 is_required=True),
455
456
457
458 _Option(["-infile","-primers","primers","infile"],
459 "File containing the primer pairs to search for.",
460 filename=True,
461 is_required=True),
462
463
464
465 _Option(["-mismatchpercent","mismatchpercent"],
466 "Allowed percentage mismatch (any integer value, default 0).",
467 is_required=True),
468 _Option(["-snucleotide","snucleotide"],
469 "Sequences are nucleotide (boolean)"),
470 _Option(["-sprotein","sprotein"],
471 "Sequences are protein (boolean)"),
472 ]
473 _EmbossCommandLine.__init__(self, cmd, **kwargs)
474
475
477 """Commandline object for the fdnadist program from EMBOSS.
478
479 fdnadist is an EMBOSS wrapper for the PHYLIP program dnadist for
480 calulating distance matrices from DNA sequence files.
481 """
482 - def __init__(self, cmd = "fdnadist", **kwargs):
483 self.parameters = [
484 _Option(["-sequence", "sequence"],
485 "seq file to use (phylip)",
486 filename=True,
487 is_required=True),
488 _Option(["-method", "method"],
489 "sub. model [f,k,j,l,s]",
490 is_required=True),
491 _Option(["-gamma", "gamma"],
492 "gamma [g, i,n]"),
493 _Option(["-ncategories", "ncategories"],
494 "number of rate catergories (1-9)"),
495 _Option(["-rate", "rate"],
496 "rate for each category"),
497 _Option(["-categories","categories"],
498 "File of substitution rate categories"),
499 _Option(["-weights", "weights"],
500 "weights file"),
501 _Option(["-gammacoefficient", "gammacoefficient"],
502 "value for gamma (> 0.001)"),
503 _Option(["-invarfrac", "invarfrac"],
504 "proportoin of invariant sites"),
505 _Option(["-ttratio", "ttratio"],
506 "ts/tv ratio"),
507 _Option(["-freqsfrom", "freqsfrom"],
508 "use emprical base freqs"),
509 _Option(["-basefreq", "basefreq"],
510 "specify basefreqs"),
511 _Option(["-lower", "lower"],
512 "lower triangle matrix (y/N)")]
513 _EmbossCommandLine.__init__(self, cmd, **kwargs)
514
515
517 """Commandline object for the ftreedist program from EMBOSS.
518
519 ftreedist is an EMBOSS wrapper for the PHYLIP program treedist used for
520 calulating distance measures between phylogentic trees.
521 """
522 - def __init__(self, cmd = "ftreedist", **kwargs):
523 self.parameters = [
524 _Option(["-intreefile", "intreefile"],
525 "tree file to score (phylip)",
526 filename=True,
527 is_required=True),
528 _Option(["-dtype", "dtype"],
529 "distance type ([S]ymetric, [b]ranch score)"),
530 _Option(["-pairing", "pairing"],
531 "tree pairing method ([A]djacent pairs, all [p]ossible pairs)"),
532 _Option(["-style", "style"],
533 "output style - [V]erbose, [f]ill, [s]parse"),
534 _Option(["-noroot", "noroot"],
535 "treat trees as rooted [N/y]"),
536 _Option(["-outgrno", "outgrno"],
537 "which taxon to root the trees with (starts from 0)")]
538 _EmbossCommandLine.__init__(self, cmd, **kwargs)
539
540
542 """Commandline object for the fneighbor program from EMBOSS.
543
544 fneighbor is an EMBOSS wrapper for the PHYLIP program neighbor used for
545 calulating neighbor-joining or UPGMA trees from distance matrices.
546 """
547 - def __init__(self, cmd = "fneighbor", **kwargs):
548 self.parameters = [
549 _Option(["-datafile", "datafile"],
550 "dist file to use (phylip)",
551 filename=True,
552 is_required=True),
553 _Option(["-matrixtype", "matrixtype"],
554 "is martrix [S]quare pr [u]pper or [l]ower"),
555 _Option(["-treetype", "treetype"],
556 "nj or UPGMA tree (n/u)"),
557 _Option(["-outgrno","outgrno" ],
558 "taxon to use as OG"),
559 _Option(["-jumble", "jumble"],
560 "randommise input order (Y/n)"),
561 _Option(["-seed", "seed"],
562 "provide a random seed"),
563 _Option(["-trout", "trout"],
564 "write tree (Y/n)"),
565 _Option(["-outtreefile", "outtreefile"],
566 "filename for output tree"),
567 _Option(["-progress", "progress"],
568 "print progress (Y/n)"),
569 _Option(["-treeprint", "treeprint"],
570 "print tree (Y/n)")]
571 _EmbossCommandLine.__init__(self, cmd, **kwargs)
572
573
575 """Commandline object for the fseqboot program from EMBOSS.
576
577 fseqboot is an EMBOSS wrapper for the PHYLIP program seqboot used to
578 pseudo-sample alignment files.
579 """
580 - def __init__(self, cmd = "fseqboot", **kwargs):
581 self.parameters = [
582 _Option(["-sequence", "sequence"],
583 "seq file to sample (phylip)",
584 filename=True,
585 is_required=True),
586 _Option(["-categories", "catergories"],
587 "file of input categories"),
588 _Option(["-weights", "weights"],
589 " weights file"),
590 _Option(["-test", "test"],
591 "specify operation, default is bootstrap"),
592 _Option(["-regular", "regular"],
593 "absolute number to resample"),
594 _Option(["-fracsample", "fracsample"],
595 "fraction to resample"),
596 _Option(["-rewriteformat", "rewriteformat"],
597 "output format ([P]hyilp, [n]exus, [x]ml"),
598 _Option(["-seqtype", "seqtype"],
599 "output format ([D]na, [p]rotein, [r]na"),
600 _Option(["-blocksize", "blocksize"],
601 "print progress (Y/n)"),
602 _Option(["-reps", "reps"],
603 "how many replicates, defaults to 100)"),
604 _Option(["-justweights", "jusweights"],
605 "what to write out [D]atasets of just [w]eights"),
606 _Option(["-seed", "seed"],
607 "specify random seed"),
608 _Option(["-dotdiff", "dotdiff"],
609 "Use dot-differencing? [Y/n]"),]
610 _EmbossCommandLine.__init__(self, cmd, **kwargs)
611
612
614 """Commandline object for the fdnapars program from EMBOSS.
615
616 fdnapars is an EMBOSS version of the PHYLIP program dnapars, for
617 estimating trees from DNA sequences using parsiomny. Calling this command
618 without providing a value for the option "-intreefile" will invoke
619 "interactive mode" (and as a result fail if called with subprocess) if
620 "-auto" is not set to true.
621 """
622 - def __init__(self, cmd = "fdnapars", **kwargs):
623 self.parameters = [
624 _Option(["-sequence", "sequence"],
625 "seq file to use (phylip)",
626 filename=True,
627 is_required=True),
628 _Option(["-intreefile", "intreefile"],
629 "Phylip tree file"),
630 _Option(["-weights", "weights"],
631 "weights file"),
632 _Option(["-maxtrees", "maxtrees"],
633 "max trees to save during run"),
634 _Option(["-thorough", "thorough"],
635 "more thorough search (Y/n)"),
636 _Option(["-rearrange", "rearrange"],
637 "Rearrange on jsut 1 best tree (Y/n)"),
638 _Option(["-transversion", "transversion"],
639 "Use tranversion parsimony (y/N)"),
640 _Option(["-njumble", "njumble"],
641 "number of times to randomise input order (default is 0)"),
642 _Option(["-seed", "seed"],
643 "provide random seed"),
644 _Option(["-outgrno", "outgrno"],
645 "Specify outgroup"),
646 _Option(["-thresh", "thresh"],
647 "Use threshold parsimony (y/N)"),
648 _Option(["-threshold", "threshold"],
649 "Threshold value"),
650 _Option(["-trout", "trout"],
651 "Write trees to file (Y/n)"),
652 _Option(["-outtreefile", "outtreefile"],
653 "filename for output tree"),
654 _Option(["-dotdiff", "dotdiff"],
655 "Use dot-differencing? [Y/n]")]
656 _EmbossCommandLine.__init__(self, cmd, **kwargs)
657
658
660 """Commandline object for the fdnapars program from EMBOSS.
661
662 fprotpars is an EMBOSS version of the PHYLIP program protpars, for
663 estimating trees from protein sequences using parsiomny. Calling this
664 command without providing a value for the option "-intreefile" will invoke
665 "interactive mode" (and as a result fail if called with subprocess) if
666 "-auto" is not set to true.
667 """
668 - def __init__(self, cmd = "fprotpars", **kwargs):
669 self.parameters = [
670 _Option(["-sequence", "sequence"],
671 "seq file to use (phylip)",
672 filename=True,
673 is_required=True),
674 _Option(["-intreefile", "intreefile"],
675 "Phylip tree file to score"),
676 _Option(["-outtreefile", "outtreefile"],
677 "phylip tree output file",
678 filename=True,
679 is_required=True),
680 _Option(["-weights", "weights"],
681 "weights file"),
682 _Option(["-whichcode", "whichcode"],
683 "which genetic code, [U,M,V,F,Y]]"),
684 _Option(["-njumble", "njumble"],
685 "number of times to randomise input order (default is 0)"),
686 _Option(["-seed", "seed"],
687 "provide random seed"),
688 _Option(["-outgrno", "outgrno"],
689 "Specify outgroup"),
690 _Option(["-thresh", "thresh"],
691 "Use threshold parsimony (y/N)"),
692 _Option(["-threshold", "threshold"],
693 "Threshold value"),
694 _Option(["-trout", "trout"],
695 "Write trees to file (Y/n)"),
696 _Option(["-dotdiff", "dotdiff"],
697 "Use dot-differencing? [Y/n]")]
698 _EmbossCommandLine.__init__(self, cmd, **kwargs)
699
700
702 """Commandline object for the fprotdist program from EMBOSS.
703
704 fprotdist is an EMBOSS wrapper for the PHYLIP program protdist used to
705 estimate trees from protein sequences using parsimony
706 """
707 - def __init__(self, cmd = "fprotdist", **kwargs):
708 self.parameters = [
709 _Option(["-sequence", "sequence"],
710 "seq file to use (phylip)",
711 filename=True,
712 is_required=True),
713 _Option(["-ncategories", "ncategories"],
714 "number of rate catergories (1-9)"),
715 _Option(["-rate", "rate"],
716 "rate for each category"),
717 _Option(["-catergories","catergories"],
718 "file of rates"),
719 _Option(["-weights", "weights"],
720 "weights file"),
721 _Option(["-method", "method"],
722 "sub. model [j,h,d,k,s,c]"),
723 _Option(["-gamma", "gamma"],
724 "gamma [g, i,c]"),
725 _Option(["-gammacoefficient", "gammacoefficient"],
726 "value for gamma (> 0.001)"),
727 _Option(["-invarcoefficient", "invarcoefficient"],
728 "float for variation of substitution rate among sites"),
729 _Option(["-aacateg", "aacateg"],
730 "Choose the category to use [G,C,H]"),
731 _Option(["-whichcode", "whichcode"],
732 "genetic code [c,m,v,f,y]"),
733 _Option(["-ease", "ease"],
734 "Pob change catergory (float between -0 and 1)"),
735 _Option(["-ttratio", "ttratio"],
736 "Transition/transversion ratio (0-1)"),
737 _Option(["-basefreq", "basefreq"],
738 "DNA base frequencies (space separated list)")]
739 _EmbossCommandLine.__init__(self, cmd, **kwargs)
740
741
743 """Commandline object for the fconsense program from EMBOSS.
744
745 fconsense is an EMBOSS wrapper for the PHYLIP program consense used to
746 calculate consensus trees.
747 """
748 - def __init__(self, cmd = "fconsense", **kwargs):
749 self.parameters = [
750 _Option(["-intreefile", "intreefile"],
751 "file with phylip trees to make consensus from",
752 filename=True,
753 is_required=True),
754 _Option(["-method", "method"],
755 "consensus method [s, mr, MRE, ml]"),
756 _Option(["-mlfrac", "mlfrac"],
757 "cut-off freq for a branch to appear in consensus (0.5-1.0)"),
758 _Option(["-root", "root"],
759 "treat trees as rooted (YES, no)"),
760 _Option(["-outgrno", "outgrno"],
761 "OTU to use as outgroup (starts from 0)"),
762 _Option(["-trout", "trout"],
763 "treat trees as rooted (YES, no)"),
764 _Option(["-outtreefile", "outtreefile"],
765 "Phylip tree output file (optional)")]
766 _EmbossCommandLine.__init__(self, cmd, **kwargs)
767
768
770 """Commandline object for the water program from EMBOSS.
771 """
772 - def __init__(self, cmd="water", **kwargs):
773 self.parameters = [
774 _Option(["-asequence","asequence"],
775 "First sequence to align",
776 filename=True,
777 is_required=True),
778 _Option(["-bsequence","bsequence"],
779 "Second sequence to align",
780 filename=True,
781 is_required=True),
782 _Option(["-gapopen","gapopen"],
783 "Gap open penalty",
784 is_required=True),
785 _Option(["-gapextend","gapextend"],
786 "Gap extension penalty",
787 is_required=True),
788 _Option(["-datafile","datafile"],
789 "Matrix file",
790 filename=True),
791 _Switch(["-nobrief", "nobrief"],
792 "Display extended identity and similarity"),
793 _Switch(["-brief", "brief"],
794 "Display brief identity and similarity"),
795 _Option(["-similarity","similarity"],
796 "Display percent identity and similarity"),
797 _Option(["-snucleotide","snucleotide"],
798 "Sequences are nucleotide (boolean)"),
799 _Option(["-sprotein","sprotein"],
800 "Sequences are protein (boolean)"),
801 _Option(["-aformat","aformat"],
802 "Display output in a different specified output format")]
803 _EmbossCommandLine.__init__(self, cmd, **kwargs)
804
805
807 """Commandline object for the needle program from EMBOSS.
808 """
809 - def __init__(self, cmd="needle", **kwargs):
810 self.parameters = [
811 _Option(["-asequence","asequence"],
812 "First sequence to align",
813 filename=True,
814 is_required=True),
815 _Option(["-bsequence","bsequence"],
816 "Second sequence to align",
817 filename=True,
818 is_required=True),
819 _Option(["-gapopen","gapopen"],
820 "Gap open penalty",
821 is_required=True),
822 _Option(["-gapextend","gapextend"],
823 "Gap extension penalty",
824 is_required=True),
825 _Option(["-datafile","datafile"],
826 "Matrix file",
827 filename=True),
828 _Option(["-endweight", "endweight"],
829 "Apply And gap penalties"),
830 _Option(["-endopen", "endopen"],
831 "The score taken away when an end gap is created."),
832 _Option(["-endextend", "endextend"],
833 "The score added to the end gap penality for each base or "
834 "residue in the end gap."),
835 _Switch(["-nobrief", "nobrief"],
836 "Display extended identity and similarity"),
837 _Switch(["-brief", "brief"],
838 "Display brief identity and similarity"),
839 _Option(["-similarity","similarity"],
840 "Display percent identity and similarity"),
841 _Option(["-snucleotide","snucleotide"],
842 "Sequences are nucleotide (boolean)"),
843 _Option(["-sprotein","sprotein"],
844 "Sequences are protein (boolean)"),
845 _Option(["-aformat","aformat"],
846 "Display output in a different specified output format")]
847 _EmbossCommandLine.__init__(self, cmd, **kwargs)
848
849
851 """Commandline object for the needleall program from EMBOSS.
852 """
853 - def __init__(self, cmd="needleall", **kwargs):
854 self.parameters = [
855 _Option(["-asequence","asequence"],
856 "First sequence to align",
857 filename=True,
858 is_required=True),
859 _Option(["-bsequence","bsequence"],
860 "Second sequence to align",
861 filename=True,
862 is_required=True),
863 _Option(["-gapopen","gapopen"],
864 "Gap open penalty",
865 is_required=True),
866 _Option(["-gapextend","gapextend"],
867 "Gap extension penalty",
868 is_required=True),
869 _Option(["-datafile","datafile"],
870 "Matrix file",
871 filename=True),
872 _Option(["-minscore","minscore"],
873 "Exclude alignments with scores below this threshold score."),
874 _Option(["-errorfile", "errorfile"],
875 "Error file to be written to."),
876 _Option(["-endweight", "endweight"],
877 "Apply And gap penalties"),
878 _Option(["-endopen", "endopen"],
879 "The score taken away when an end gap is created."),
880 _Option(["-endextend", "endextend"],
881 "The score added to the end gap penality for each base or "
882 "residue in the end gap."),
883 _Switch(["-nobrief", "nobrief"],
884 "Display extended identity and similarity"),
885 _Switch(["-brief", "brief"],
886 "Display brief identity and similarity"),
887 _Option(["-similarity","similarity"],
888 "Display percent identity and similarity"),
889 _Option(["-snucleotide","snucleotide"],
890 "Sequences are nucleotide (boolean)"),
891 _Option(["-sprotein","sprotein"],
892 "Sequences are protein (boolean)"),
893 _Option(["-aformat","aformat"],
894 "Display output in a different specified output format")]
895 _EmbossCommandLine.__init__(self, cmd, **kwargs)
896
897
899 """Commandline object for the stretcher program from EMBOSS.
900 """
901 - def __init__(self, cmd="stretcher", **kwargs):
902 self.parameters = [
903 _Option(["-asequence","asequence"],
904 "First sequence to align",
905 filename=True,
906 is_required=True),
907 _Option(["-bsequence","bsequence"],
908 "Second sequence to align",
909 filename=True,
910 is_required=True),
911 _Option(["-gapopen","gapopen"],
912 "Gap open penalty",
913 is_required=True,
914 checker_function=lambda value: isinstance(value, int)),
915 _Option(["-gapextend","gapextend"],
916 "Gap extension penalty",
917 is_required=True,
918 checker_function=lambda value: isinstance(value, int)),
919 _Option(["-datafile","datafile"],
920 "Matrix file",
921 filename=True),
922 _Option(["-snucleotide","snucleotide"],
923 "Sequences are nucleotide (boolean)"),
924 _Option(["-sprotein","sprotein"],
925 "Sequences are protein (boolean)"),
926 _Option(["-aformat","aformat"],
927 "Display output in a different specified output format")]
928 _EmbossCommandLine.__init__(self, cmd, **kwargs)
929
930
932 """Commandline object for the fuzznuc program from EMBOSS.
933 """
934 - def __init__(self, cmd="fuzznuc", **kwargs):
935 self.parameters = [
936 _Option(["-sequence","sequence"],
937 "Sequence database USA",
938 is_required=True),
939 _Option(["-pattern","pattern"],
940 "Search pattern, using standard IUPAC one-letter codes",
941 is_required=True),
942 _Option(["-mismatch","mismatch"],
943 "Number of mismatches",
944 is_required=True),
945 _Option(["-complement","complement"],
946 "Search complementary strand"),
947 _Option(["-rformat","rformat"],
948 "Specify the report format to output in.")]
949 _EmbossCommandLine.__init__(self, cmd, **kwargs)
950
951
953 """Commandline object for the est2genome program from EMBOSS.
954 """
955 - def __init__(self, cmd="est2genome", **kwargs):
956 self.parameters = [
957 _Option(["-est","est"],
958 "EST sequence(s)",
959 is_required=True),
960 _Option(["-genome","genome"],
961 "Genomic sequence",
962 is_required=True),
963 _Option(["-match","match"],
964 "Score for matching two bases"),
965 _Option(["-mismatch","mismatch"],
966 "Cost for mismatching two bases"),
967 _Option(["-gappenalty","gappenalty"],
968 "Cost for deleting a single base in either sequence, "
969 "excluding introns"),
970 _Option(["-intronpenalty","intronpenalty"],
971 "Cost for an intron, independent of length."),
972 _Option(["-splicepenalty","splicepenalty"],
973 "Cost for an intron, independent of length "
974 "and starting/ending on donor-acceptor sites"),
975 _Option(["-minscore","minscore"],
976 "Exclude alignments with scores below this threshold score."),
977 _Option(["-reverse","reverse"],
978 "Reverse the orientation of the EST sequence"),
979 _Option(["-splice","splice"],
980 "Use donor and acceptor splice sites."),
981 _Option(["-mode","mode"],
982 "This determines the comparion mode. 'both', 'forward' "
983 "'reverse'"),
984 _Option(["-best","best"],
985 "You can print out all comparisons instead of just the best"),
986 _Option(["-space","space"],
987 "for linear-space recursion."),
988 _Option(["-shuffle","shuffle"],
989 "Shuffle"),
990 _Option(["-seed","seed"],
991 "Random number seed"),
992 _Option(["-align","align"],
993 "Show the alignment."),
994 _Option(["-width","width"],
995 "Alignment width")
996 ]
997 _EmbossCommandLine.__init__(self, cmd, **kwargs)
998
999
1001 """Commandline object for the etandem program from EMBOSS.
1002 """
1003 - def __init__(self, cmd="etandem", **kwargs):
1004 self.parameters = [
1005 _Option(["-sequence","sequence"],
1006 "Sequence",
1007 filename=True,
1008 is_required=True),
1009 _Option(["-minrepeat","minrepeat"],
1010 "Minimum repeat size",
1011 is_required=True),
1012 _Option(["-maxrepeat","maxrepeat"],
1013 "Maximum repeat size",
1014 is_required=True),
1015 _Option(["-threshold","threshold"],
1016 "Threshold score"),
1017 _Option(["-mismatch","mismatch"],
1018 "Allow N as a mismatch"),
1019 _Option(["-uniform","uniform"],
1020 "Allow uniform consensus"),
1021 _Option(["-rformat","rformat"],
1022 "Output report format")]
1023 _EmbossCommandLine.__init__(self, cmd, **kwargs)
1024
1025
1027 """Commandline object for the einverted program from EMBOSS.
1028 """
1029 - def __init__(self, cmd="einverted", **kwargs):
1030 self.parameters = [
1031 _Option(["-sequence","sequence"],
1032 "Sequence",
1033 filename=True,
1034 is_required=True),
1035 _Option(["-gap","gap"],
1036 "Gap penalty",
1037 filename=True,
1038 is_required=True),
1039 _Option(["-threshold","threshold"],
1040 "Minimum score threshold",
1041 is_required=True),
1042 _Option(["-match","match"],
1043 "Match score",
1044 is_required=True),
1045 _Option(["-mismatch","mismatch"],
1046 "Mismatch score",
1047 is_required=True),
1048 _Option(["-maxrepeat","maxrepeat"],
1049 "Maximum separation between the start and end of repeat"),
1050 ]
1051 _EmbossCommandLine.__init__(self, cmd, **kwargs)
1052
1053
1055 """Commandline object for the palindrome program from EMBOSS.
1056 """
1057 - def __init__(self, cmd="palindrome", **kwargs):
1058 self.parameters = [
1059 _Option(["-sequence","sequence"],
1060 "Sequence",
1061 filename=True,
1062 is_required=True),
1063 _Option(["-minpallen","minpallen"],
1064 "Minimum palindrome length",
1065 is_required=True),
1066 _Option(["-maxpallen","maxpallen"],
1067 "Maximum palindrome length",
1068 is_required=True),
1069 _Option(["-gaplimit","gaplimit"],
1070 "Maximum gap between repeats",
1071 is_required=True),
1072 _Option(["-nummismatches","nummismatches"],
1073 "Number of mismatches allowed",
1074 is_required=True),
1075 _Option(["-overlap","overlap"],
1076 "Report overlapping matches",
1077 is_required=True),
1078 ]
1079 _EmbossCommandLine.__init__(self, cmd, **kwargs)
1080
1081
1083 """Commandline object for the tranalign program from EMBOSS.
1084 """
1085 - def __init__(self, cmd="tranalign", **kwargs):
1086 self.parameters = [
1087 _Option(["-asequence","asequence"],
1088 "Nucleotide sequences to be aligned.",
1089 filename=True,
1090 is_required=True),
1091 _Option(["-bsequence","bsequence"],
1092 "Protein sequence alignment",
1093 filename=True,
1094 is_required=True),
1095 _Option(["-outseq","outseq"],
1096 "Output sequence file.",
1097 filename=True,
1098 is_required=True),
1099 _Option(["-table","table"],
1100 "Code to use")]
1101 _EmbossCommandLine.__init__(self, cmd, **kwargs)
1102
1103
1105 """Commandline object for the diffseq program from EMBOSS.
1106 """
1107 - def __init__(self, cmd="diffseq", **kwargs):
1108 self.parameters = [
1109 _Option(["-asequence","asequence"],
1110 "First sequence to compare",
1111 filename=True,
1112 is_required=True),
1113 _Option(["-bsequence","bsequence"],
1114 "Second sequence to compare",
1115 filename=True,
1116 is_required=True),
1117 _Option(["-wordsize","wordsize"],
1118 "Word size to use for comparisons (10 default)",
1119 is_required=True),
1120 _Option(["-aoutfeat","aoutfeat"],
1121 "File for output of first sequence's features",
1122 filename=True,
1123 is_required=True),
1124 _Option(["-boutfeat","boutfeat"],
1125 "File for output of second sequence's features",
1126 filename=True,
1127 is_required=True),
1128 _Option(["-rformat","rformat"],
1129 "Output report file format")
1130 ]
1131 _EmbossCommandLine.__init__(self, cmd, **kwargs)
1132
1133
1135 """Commandline for EMBOSS iep: calculated isoelectric point and charge.
1136
1137 Example:
1138
1139 >>> from Bio.Emboss.Applications import IepCommandline
1140 >>> iep_cline = IepCommandline(sequence="proteins.faa",
1141 ... outfile="proteins.txt")
1142 >>> print iep_cline
1143 iep -outfile=proteins.txt -sequence=proteins.faa
1144
1145 You would typically run the command line with iep_cline() or via the
1146 Python subprocess module, as described in the Biopython tutorial.
1147 """
1148 - def __init__(self, cmd="iep", **kwargs):
1149 self.parameters = [
1150 _Option(["-sequence","sequence"],
1151 "Protein sequence(s) filename",
1152 filename=True,
1153 is_required=True),
1154 _Option(["-amino","amino"],
1155 """Number of N-termini
1156
1157 Integer 0 (default) or more.
1158 """),
1159 _Option(["-carboxyl","carboxyl"],
1160 """Number of C-termini
1161
1162 Integer 0 (default) or more.
1163 """),
1164 _Option(["-lysinemodified","lysinemodified"],
1165 """Number of modified lysines
1166
1167 Integer 0 (default) or more.
1168 """),
1169 _Option(["-disulphides","disulphides"],
1170 """Number of disulphide bridges
1171
1172 Integer 0 (default) or more.
1173 """),
1174
1175 _Option(["-notermini","notermini"],
1176 "Exclude (True) or include (False) charge at N and C terminus."),
1177 ]
1178 _EmbossCommandLine.__init__(self, cmd, **kwargs)
1179
1180
1181
1183 """Commandline object for the seqret program from EMBOSS.
1184
1185 This tool allows you to interconvert between different sequence file
1186 formats (e.g. GenBank to FASTA). Combining Biopython's Bio.SeqIO module
1187 with seqret using a suitable intermediate file format can allow you to
1188 read/write to an even wider range of file formats.
1189
1190 This wrapper currently only supports the core functionality, things like
1191 feature tables (in EMBOSS 6.1.0 onwards) are not yet included.
1192 """
1193 - def __init__(self, cmd="seqret", **kwargs):
1194 self.parameters = [
1195 _Option(["-sequence","sequence"],
1196 "Input sequence(s) filename",
1197 filename=True),
1198 _Option(["-outseq","outseq"],
1199 "Output sequence file.",
1200 filename=True),
1201 _Option(["-sformat","sformat"],
1202 "Input sequence(s) format (e.g. fasta, genbank)"),
1203 _Option(["-osformat","osformat"],
1204 "Output sequence(s) format (e.g. fasta, genbank)"),
1205 ]
1206 _EmbossMinimalCommandLine.__init__(self, cmd, **kwargs)
1207
1209
1210
1211
1212 if not (self.outseq or self.filter or self.stdout):
1213 raise ValueError("You must either set outfile (output filename), "
1214 "or enable filter or stdout (output to stdout).")
1215 if not (self.sequence or self.filter or self.stdint):
1216 raise ValueError("You must either set sequence (input filename), "
1217 "or enable filter or stdin (input from stdin).")
1218 return _EmbossMinimalCommandLine._validate(self)
1219
1220
1222 """ Commandline object for the seqmatchall program from EMBOSS
1223
1224 e.g.
1225 >>> cline = SeqmatchallCommandline(sequence="opuntia.fasta", outfile="opuntia.txt")
1226 >>> cline.auto = True
1227 >>> cline.wordsize = 18
1228 >>> cline.aformat = "pair"
1229 >>> print cline
1230 seqmatchall -auto -outfile=opuntia.txt -sequence=opuntia.fasta -wordsize=18 -aformat=pair
1231
1232 """
1233 - def __init__(self, cmd="seqmatchall", **kwargs):
1234 self.parameters = [
1235 _Option(["-sequence", "sequence"],
1236 "Readable set of sequences",
1237 filename=True,
1238 is_required=True),
1239 _Option(["-wordsize", "wordsize"],
1240 "Word size (Integer 2 or more, default 4)"),
1241 _Option(["-aformat","aformat"],
1242 "Display output in a different specified output format"),
1243 ]
1244 _EmbossCommandLine.__init__(self, cmd, **kwargs)
1245
1246
1248 """Run the Bio.Emboss.Applications module doctests."""
1249 import doctest
1250 doctest.testmod()
1251
1252 if __name__ == "__main__":
1253
1254 _test()
1255