1
2
3
4
5 """Command line wrapper for the multiple alignment program MUSCLE.
6 """
7
8 __docformat__ = "epytext en"
9
10 from Bio.Application import _Option, _Switch, AbstractCommandline
11
12
14 r"""Command line wrapper for the multiple alignment program MUSCLE.
15
16 http://www.drive5.com/muscle/
17
18 Example:
19
20 >>> from Bio.Align.Applications import MuscleCommandline
21 >>> muscle_exe = r"C:\Program Files\Aligments\muscle3.8.31_i86win32.exe"
22 >>> in_file = r"C:\My Documents\unaligned.fasta"
23 >>> out_file = r"C:\My Documents\aligned.fasta"
24 >>> muscle_cline = MuscleCommandline(muscle_exe, input=in_file, out=out_file)
25 >>> print muscle_cline
26 C:\Program Files\Aligments\muscle3.8.31_i86win32.exe -in "C:\My Documents\unaligned.fasta" -out "C:\My Documents\aligned.fasta"
27
28 You would typically run the command line with muscle_cline() or via
29 the Python subprocess module, as described in the Biopython tutorial.
30
31 Citations:
32
33 Edgar, Robert C. (2004), MUSCLE: multiple sequence alignment with high
34 accuracy and high throughput, Nucleic Acids Research 32(5), 1792-97.
35
36 Edgar, R.C. (2004) MUSCLE: a multiple sequence alignment method with
37 reduced time and space complexity. BMC Bioinformatics 5(1): 113.
38
39 Last checked against version: 3.7, briefly against 3.8
40 """
41 - def __init__(self, cmd="muscle", **kwargs):
42 CLUSTERING_ALGORITHMS = ["upgma", "upgmb", "neighborjoining"]
43 DISTANCE_MEASURES_ITER1 = ["kmer6_6", "kmer20_3", "kmer20_4", "kbit20_3",
44 "kmer4_6"]
45 DISTANCE_MEASURES_ITER2 = DISTANCE_MEASURES_ITER1 + \
46 ["pctid_kimura", "pctid_log"]
47 OBJECTIVE_SCORES = ["sp", "ps", "dp", "xp", "spf", "spm"]
48 TREE_ROOT_METHODS = ["pseudo", "midlongestspan", "minavgleafdist"]
49 SEQUENCE_TYPES = ["protein", "nucleo", "auto"]
50 WEIGHTING_SCHEMES = ["none", "clustalw", "henikoff", "henikoffpb",
51 "gsc", "threeway"]
52 self.parameters = \
53 [
54
55 _Option(["-in", "in", "input"],
56 "Input filename",
57 filename=True,
58 equate=False),
59 _Option(["-out", "out"],
60 "Output filename",
61 filename=True,
62 equate=False),
63 _Switch(["-diags", "diags"],
64 "Find diagonals (faster for similar sequences)"),
65 _Switch(["-profile", "profile"],
66 "Perform a profile alignment"),
67 _Option(["-in1", "in1"],
68 "First input filename for profile alignment",
69 filename=True,
70 equate=False),
71 _Option(["-in2", "in2"],
72 "Second input filename for a profile alignment",
73 filename=True,
74 equate=False),
75
76 _Option(["-anchorspacing", "anchorspacing"],
77 "Minimum spacing between anchor columns",
78 checker_function=lambda x: isinstance(x, int),
79 equate=False),
80
81
82 _Option(["-center", "center"],
83 "Center parameter - should be negative",
84 checker_function=lambda x: isinstance(x, float),
85 equate=False),
86
87 _Option(["-cluster1", "cluster1"],
88 "Clustering method used in iteration 1",
89 checker_function=lambda x: x in CLUSTERING_ALGORITHMS,
90 equate=False),
91
92
93
94
95 _Option(["-cluster2", "cluster2"],
96 "Clustering method used in iteration 2",
97 checker_function=lambda x: x in CLUSTERING_ALGORITHMS,
98 equate=False),
99
100
101 _Option(["-diaglength", "diaglength"],
102 "Minimum length of diagonal",
103 checker_function=lambda x: isinstance(x, int),
104 equate=True),
105
106
107
108 _Option(["-diagmargin", "diagmargin"],
109 "Discard this many positions at ends of diagonal",
110 checker_function=lambda x: isinstance(x, int),
111 equate=False),
112
113
114
115
116
117 _Option(["-distance1", "distance1"],
118 "Distance measure for iteration 1",
119 checker_function=lambda x: x in DISTANCE_MEASURES_ITER1,
120 equate=False),
121
122
123
124
125
126
127 _Option(["-distance2", "distance2"],
128 "Distance measure for iteration 2",
129 checker_function=lambda x: x in DISTANCE_MEASURES_ITER2,
130 equate=False),
131
132
133 _Option(["-gapopen", "gapopen"],
134 "Gap open score - negative number",
135 checker_function=lambda x: isinstance(x, float),
136 equate=False),
137
138
139
140 _Option(["-hydro", "hydro"],
141 "Window size for hydrophobic region",
142 checker_function=lambda x: isinstance(x, int),
143 equate=False),
144
145
146
147 _Option(["-hydrofactor", "hydrofactor"],
148 "Multiplier for gap penalties in hydrophobic regions",
149 checker_function=lambda x: isinstance(x, float),
150 equate=False),
151
152
153 _Option(["-log", "log"],
154 "Log file name",
155 filename=True,
156 equate=False),
157
158
159 _Option(["-loga", "loga"],
160 "Log file name (append to existing file)",
161 filename=True,
162 equate=False),
163
164
165
166
167
168 _Option(["-maxdiagbreak", "maxdiagbreak"],
169 "Maximum distance between two diagonals that allows "
170 "them to merge into one diagonal",
171 checker_function=lambda x: isinstance(x, int),
172 equate=False),
173
174
175
176
177
178
179
180
181 _Option(["-maxhours", "maxhours"],
182 "Maximum time to run in hours",
183 checker_function=lambda x: isinstance(x, float),
184 equate=False),
185
186
187 _Option(["-maxiters", "maxiters"],
188 "Maximum number of iterations",
189 checker_function=lambda x: isinstance(x, int),
190 equate=False),
191
192
193
194 _Option(["-maxtrees", "maxtrees"],
195 "Maximum number of trees to build in iteration 2",
196 checker_function=lambda x: isinstance(x, int),
197 equate=False),
198
199
200
201 _Option(["-minbestcolscore", "minbestcolscore"],
202 "Minimum score a column must have to be an anchor",
203 checker_function=lambda x: isinstance(x, float),
204 equate=False),
205
206
207
208 _Option(["-minsmoothscore", "minsmoothscore"],
209 "Minimum smoothed score a column must have to "
210 "be an anchor",
211 checker_function=lambda x: isinstance(x, float),
212 equate=False),
213
214
215
216
217
218
219
220
221
222
223
224
225
226 _Option(["-objscore", "objscore"],
227 "Objective score used by tree dependent refinement",
228 checker_function=lambda x: x in OBJECTIVE_SCORES,
229 equate=False),
230
231 _Option(["-root1", "root1"],
232 "Method used to root tree in iteration 1",
233 checker_function=lambda x: x in TREE_ROOT_METHODS,
234 equate=False),
235
236
237
238
239 _Option(["-root2", "root2"],
240 "Method used to root tree in iteration 2",
241 checker_function=lambda x: x in TREE_ROOT_METHODS,
242 equate=False),
243
244
245
246 _Option(["-seqtype", "seqtype"],
247 "Sequence type",
248 checker_function=lambda x: x in SEQUENCE_TYPES,
249 equate=False),
250
251
252
253 _Option(["-smoothscoreceil", "smoothscoreceil"],
254 "Maximum value of column score for smoothing",
255 checker_function=lambda x: isinstance(x, float),
256 equate=False),
257
258
259 _Option(["-smoothwindow", "smoothwindow"],
260 "Window used for anchor column smoothing",
261 checker_function=lambda x: isinstance(x, int),
262 equate=False),
263
264
265
266
267
268
269
270 _Option(["-sueff", "sueff"],
271 "Constant used in UPGMB clustering",
272 checker_function=lambda x: isinstance(x, float),
273 equate=False),
274
275 _Option(["-tree1", "tree1"],
276 "Save Newick tree from iteration 1",
277 equate=False),
278
279
280
281
282 _Option(["-tree2", "tree2"],
283 "Save Newick tree from iteration 2",
284 equate=False),
285
286 _Option(["-weight1", "weight1"],
287 "Weighting scheme used in iteration 1",
288 checker_function=lambda x: x in WEIGHTING_SCHEMES,
289 equate=False),
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308 _Option(["-weight2", "weight2"],
309 "Weighting scheme used in iteration 2",
310 checker_function=lambda x: x in WEIGHTING_SCHEMES,
311 equate=False),
312
313
314
315
316
317
318
319
320
321
322
323
324 _Switch(["-clw", "clw"],
325 "Write output in CLUSTALW format (with a MUSCLE header)"),
326
327
328
329
330
331 _Switch(["-clwstrict", "clwstrict"],
332 "Write output in CLUSTALW format with version 1.81 header"),
333
334
335
336 _Switch(["-fasta", "fasta"],
337 "Write output in FASTA format"),
338
339
340 _Switch(["-html", "html"],
341 "Write output in HTML format"),
342
343
344 _Switch(["-msf", "msf"],
345 "Write output in MSF format"),
346
347 _Switch(["-phyi", "phyi"],
348 "Write output in PHYLIP interleaved format"),
349
350 _Switch(["-phys", "phys"],
351 "Write output in PHYLIP sequential format"),
352
353 _Option(["-phyiout", "phyiout"],
354 "Write PHYLIP interleaved output to specified filename",
355 filename=True,
356 equate=False),
357 _Option(["-physout", "physout"],"Write PHYLIP sequential format to specified filename",
358 filename=True,
359 equate=False),
360 _Option(["-htmlout", "htmlout"],"Write HTML output to specified filename",
361 filename=True,
362 equate=False),
363 _Option(["-clwout", "clwout"],
364 "Write CLUSTALW output (with MUSCLE header) to specified "
365 "filename",
366 filename=True,
367 equate=False),
368 _Option(["-clwstrictout", "clwstrictout"],
369 "Write CLUSTALW output (with version 1.81 header) to "
370 "specified filename",
371 filename=True,
372 equate=False),
373 _Option(["-msfout", "msfout"],
374 "Write MSF format output to specified filename",
375 filename=True,
376 equate=False),
377 _Option(["-fastaout", "fastaout"],
378 "Write FASTA format output to specified filename",
379 filename=True,
380 equate=False),
381
382
383
384 _Switch(["-anchors", "anchors"],
385 "Use anchor optimisation in tree dependent "
386 "refinement iterations"),
387
388
389 _Switch(["-noanchors", "noanchors"],
390 "Do not use anchor optimisation in tree dependent "
391 "refinement iterations"),
392
393
394
395 _Switch(["-group", "group"],
396 "Group similar sequences in output"),
397
398
399
400 _Switch(["-stable", "stable"],
401 "Do not group similar sequences in output (not supported in v3.8)"),
402
403
404
405
406
407
408
409
410
411
412
413 _Switch(["-le", "le"],
414 "Use log-expectation profile score (VTML240)"),
415
416
417 _Switch(["-sv", "sv"],
418 "Use sum-of-pairs profile score (VTML240)"),
419
420
421 _Switch(["-sp", "sp"],
422 "Use sum-of-pairs protein profile score (PAM200)"),
423
424
425
426
427 _Switch(["-spn", "spn"],
428 "Use sum-of-pairs protein nucleotide profile score"),
429
430
431 _Switch(["-quiet", "quiet"],
432 "Use sum-of-pairs protein nucleotide profile score"),
433
434
435
436 _Switch(["-refine", "refine"],
437 "Only do tree dependent refinement"),
438
439
440 _Switch(["-core", "core"],
441 "Catch exceptions"),
442
443
444 _Switch(["-nocore", "nocore"],
445 "Do not catch exceptions"),
446
447
448
449
450
451
452
453
454
455
456
457
458
459 _Switch(["-verbose", "verbose"],
460 "Write parameter settings and progress"),
461
462 _Switch(["-version", "version"],
463 "Write version string to stdout and exit"),
464 ]
465 AbstractCommandline.__init__(self, cmd, **kwargs)
466
467
469 """Run the module's doctests (PRIVATE)."""
470 print "Running MUSCLE doctests..."
471 import doctest
472 doctest.testmod()
473 print "Done"
474
475 if __name__ == "__main__":
476 _test()
477