1
2
3
4
5
6
7
8 """Definitions for interacting with BLAST related applications.
9
10 Obsolete wrappers for the old/classic NCBI BLAST tools (written in C):
11
12 - FastacmdCommandline
13 - BlastallCommandline
14 - BlastpgpCommandline
15 - RpsBlastCommandline
16
17 Wrappers for the new NCBI BLAST+ tools (written in C++):
18
19 - NcbiblastpCommandline - Protein-Protein BLAST
20 - NcbiblastnCommandline - Nucleotide-Nucleotide BLAST
21 - NcbiblastxCommandline - Translated Query-Protein Subject BLAST
22 - NcbitblastnCommandline - Protein Query-Translated Subject BLAST
23 - NcbitblastxCommandline - Translated Query-Protein Subject BLAST
24 - NcbipsiblastCommandline - Position-Specific Initiated BLAST
25 - NcbirpsblastCommandline - Reverse Position Specific BLAST
26 - NcbirpstblastnCommandline - Translated Reverse Position Specific BLAST
27 - NcbiblastformatterCommandline - Convert ASN.1 to other BLAST output formats
28
29 For further details, see:
30
31 Camacho et al. BLAST+: architecture and applications
32 BMC Bioinformatics 2009, 10:421
33 doi:10.1186/1471-2105-10-421
34 """
35 from Bio.Application import _Option, AbstractCommandline, _Switch
36
38 """Create a commandline for the fasta program from NCBI (OBSOLETE).
39
40 """
41 - def __init__(self, cmd="fastacmd", **kwargs):
42 self.parameters = [
43 _Option(["-d", "database"],
44 "The database to retrieve from.",
45 is_required=True,
46 equate=False),
47 _Option(["-s", "search_string"],
48 "The id to search for.",
49 is_required=True,
50 equate=False)
51 ]
52 AbstractCommandline.__init__(self, cmd, **kwargs)
53
54
56 """Base Commandline object for (classic) NCBI BLAST wrappers (PRIVATE).
57
58 This is provided for subclassing, it deals with shared options
59 common to all the BLAST tools (blastall, rpsblast, blastpgp).
60 """
62 assert cmd is not None
63 extra_parameters = [
64 _Switch(["--help", "help"],
65 "Print USAGE, DESCRIPTION and ARGUMENTS description; ignore other arguments."),
66 _Option(["-d", "database"],
67 "The database to BLAST against.",
68 is_required=True,
69 equate=False),
70 _Option(["-i", "infile"],
71 "The sequence to search with.",
72 filename=True,
73 is_required=True,
74 equate=False),
75 _Option(["-e", "expectation"],
76 "Expectation value cutoff.",
77 equate=False),
78 _Option(["-m", "align_view"],
79 "Alignment view. Integer 0-11. Use 7 for XML output.",
80 equate=False),
81 _Option(["-o", "align_outfile", "outfile"],
82 "Output file for alignment.",
83 filename=True,
84 equate=False),
85 _Option(["-y", "xdrop_extension"],
86 "Dropoff for blast extensions.",
87 equate=False),
88 _Option(["-F", "filter"],
89 "Filter query sequence with SEG? T/F",
90 equate=False),
91 _Option(["-X", "xdrop"],
92 "Dropoff value (bits) for gapped alignments.",
93 equate=False),
94 _Option(["-I", "show_gi"],
95 "Show GI's in deflines? T/F",
96 equate=False),
97 _Option(["-J", "believe_query"],
98 "Believe the query defline? T/F",
99 equate=False),
100 _Option(["-Z", "xdrop_final"],
101 "X dropoff for final gapped alignment.",
102 equate=False),
103 _Option(["-z", "db_length"],
104 "Effective database length.",
105 equate=False),
106 _Option(["-O", "seqalign_file"],
107 "seqalign file to output.",
108 filename=True,
109 equate=False),
110 _Option(["-v", "descriptions"],
111 "Number of one-line descriptions.",
112 equate=False),
113 _Option(["-b", "alignments"],
114 "Number of alignments.",
115 equate=False),
116 _Option(["-Y", "search_length"],
117 "Effective length of search space (use zero for the "
118 "real size).",
119 equate=False),
120 _Option(["-T", "html"],
121 "Produce HTML output? T/F",
122 equate=False),
123 _Option(["-U", "case_filter"],
124 "Use lower case filtering of FASTA sequence? T/F",
125 equate=False),
126 _Option(["-a", "nprocessors"],
127 "Number of processors to use.",
128 equate=False),
129 _Option(["-g", "gapped"],
130 "Whether to do a gapped alignment. T/F",
131 equate=False),
132 ]
133 try:
134
135
136 self.parameters = extra_parameters + self.parameters
137 except AttributeError:
138
139 self.parameters = extra_parameters
140 AbstractCommandline.__init__(self, cmd, **kwargs)
141
147
148
150 """Base Commandline object for NCBI BLAST wrappers (PRIVATE).
151
152 This is provided for subclassing, it deals with shared options
153 common to all the blastall and blastpgp tools (but not rpsblast).
154 """
155 - def __init__(self, cmd=None, **kwargs):
156 assert cmd is not None
157 extra_parameters = [
158 _Option(["-G", "gap_open"],
159 "Gap open penalty",
160 equate=False),
161 _Option(["-E", "gap_extend"],
162 "Gap extension penalty",
163 equate=False),
164 _Option(["-A", "window_size"],
165 "Multiple hits window size",
166 equate=False),
167 _Option(["-f", "hit_extend"],
168 "Threshold for extending hits.",
169 equate=False),
170 _Option(["-K", "keep_hits"],
171 " Number of best hits from a region to keep.",
172 equate=False),
173 _Option(["-W", "wordsize"],
174 "Word size",
175 equate=False),
176 _Option(["-P", "passes"],
177 "Hits/passes. Integer 0-2. 0 for multiple hit, "
178 "1 for single hit (does not apply to blastn)",
179 equate=False),
180 ]
181 try:
182
183
184 self.parameters = extra_parameters + self.parameters
185 except AttributeError:
186
187 self.parameters = extra_parameters
188 _BlastCommandLine.__init__(self, cmd, **kwargs)
189
190
192 """Create a commandline for the blastall program from NCBI (OBSOLETE).
193
194 With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI
195 are replacing blastall with separate tools blastn, blastp, blastx, tblastn
196 and tblastx.
197
198 Like blastall, this wrapper is now obsolete, and will be deprecated and
199 removed in a future release of Biopython.
200
201 >>> from Bio.Blast.Applications import BlastallCommandline
202 >>> cline = BlastallCommandline(program="blastx", infile="m_cold.fasta",
203 ... database="nr", expectation=0.001)
204 >>> cline
205 BlastallCommandline(cmd='blastall', database='nr', infile='m_cold.fasta', expectation=0.001, program='blastx')
206 >>> print cline
207 blastall -d nr -i m_cold.fasta -e 0.001 -p blastx
208
209 You would typically run the command line with cline() or via the Python
210 subprocess module, as described in the Biopython tutorial.
211 """
212
213 - def __init__(self, cmd="blastall",**kwargs):
214 import warnings
215 warnings.warn("Like blastall, this wrapper is now obsolete, and will be deprecated and removed in a future release of Biopython.", PendingDeprecationWarning)
216 self.parameters = [
217
218
219
220 _Option(["-p", "program"],
221 "The blast program to use (e.g. blastp, blastn).",
222 is_required=True,
223 equate=False),
224 _Option(["-q", "nuc_mismatch"],
225 "Penalty for a nucleotide mismatch (blastn only).",
226 equate=False),
227 _Option(["-r", "nuc_match"],
228 "Reward for a nucleotide match (blastn only).",
229 equate=False),
230 _Option(["-Q", "query_genetic_code"],
231 "Query Genetic code to use.",
232 equate=False),
233 _Option(["-D", "db_genetic_code"],
234 "DB Genetic code (for tblast[nx] only).",
235 equate=False),
236 _Option(["-M", "matrix"],
237 "Matrix to use",
238 equate=False),
239 _Option(["-S", "strands"],
240 "Query strands to search against database (for blast[nx], "
241 "and tblastx). 3 is both, 1 is top, 2 is bottom.",
242 equate=False),
243 _Option(["-l", "restrict_gi"],
244 "Restrict search of database to list of GI's.",
245 equate=False),
246 _Option(["-R", "checkpoint"],
247 "PSI-TBLASTN checkpoint input file.",
248 filename=True,
249 equate=False),
250 _Option(["-n", "megablast"],
251 "MegaBlast search T/F.",
252 equate=False),
253
254
255 _Option(["-L", "region_length", "range_restriction"],
256 """Location on query sequence (string format start,end).
257
258 In older versions of BLAST, -L set the length of region
259 used to judge hits (see -K parameter).""",
260 equate=False),
261 _Option(["-w", "frame_shit_penalty"],
262 "Frame shift penalty (OOF algorithm for blastx).",
263 equate=False),
264 _Option(["-t", "largest_intron"],
265 "Length of the largest intron allowed in a translated "
266 "nucleotide sequence when linking multiple distinct "
267 "alignments. (0 invokes default behavior; a negative value "
268 "disables linking.)",
269 equate=False),
270 _Option(["-B", "num_concatenated_queries"],
271 "Number of concatenated queries, for blastn and tblastn.",
272 equate=False),
273 _Option(["-V", "oldengine"],
274 "Force use of the legacy BLAST engine.",
275 equate=False),
276 _Option(["-C", "composition_based"],
277 """Use composition-based statistics for tblastn:
278 D or d: default (equivalent to F)
279 0 or F or f: no composition-based statistics
280 1 or T or t: Composition-based statistics as in NAR 29:2994-3005, 2001
281 2: Composition-based score adjustment as in Bioinformatics
282 21:902-911, 2005, conditioned on sequence properties
283 3: Composition-based score adjustment as in Bioinformatics
284 21:902-911, 2005, unconditionally
285 For programs other than tblastn, must either be absent or be
286 D, F or 0.""",
287 equate=False),
288 _Option(["-s", "smith_waterman"],
289 "Compute locally optimal Smith-Waterman alignments (This "
290 "option is only available for gapped tblastn.) T/F",
291 equate=False),
292 ]
293 _BlastAllOrPgpCommandLine.__init__(self, cmd, **kwargs)
294
295
297 """Create a commandline for the blastpgp program from NCBI (OBSOLETE).
298
299 With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI
300 are replacing blastpgp with a renamed tool psiblast. This module provides
301 NcbipsiblastCommandline as a wrapper for the new tool psiblast.
302
303 Like blastpgp (and blastall), this wrapper is now obsolete, and will be
304 deprecated and removed in a future release of Biopython.
305
306 >>> from Bio.Blast.Applications import BlastpgpCommandline
307 >>> cline = BlastpgpCommandline(help=True)
308 >>> cline
309 BlastpgpCommandline(cmd='blastpgp', help=True)
310 >>> print cline
311 blastpgp --help
312
313 You would typically run the command line with cline() or via the Python
314 subprocess module, as described in the Biopython tutorial.
315 """
316 - def __init__(self, cmd="blastpgp",**kwargs):
317 import warnings
318 warnings.warn("Like blastpgp (and blastall), this wrapper is now obsolete, and will be deprecated and removed in a future release of Biopython.", PendingDeprecationWarning)
319 self.parameters = [
320 _Option(["-C", "checkpoint_outfile"],
321 "Output file for PSI-BLAST checkpointing.",
322 filename=True,
323 equate=False),
324 _Option(["-R", "restart_infile"],
325 "Input file for PSI-BLAST restart.",
326 filename=True,
327 equate=False),
328 _Option(["-k", "hit_infile"],
329 "Hit file for PHI-BLAST.",
330 filename=True,
331 equate=False),
332 _Option(["-Q", "matrix_outfile"],
333 "Output file for PSI-BLAST matrix in ASCII.",
334 filename=True,
335 equate=False),
336 _Option(["-B", "align_infile"],
337 "Input alignment file for PSI-BLAST restart.",
338 filename=True,
339 equate=False),
340 _Option(["-S", "required_start"],
341 "Start of required region in query.",
342 equate=False),
343 _Option(["-H", "required_end"],
344 "End of required region in query.",
345 equate=False),
346 _Option(["-j", "npasses"],
347 "Number of passes",
348 equate=False),
349 _Option(["-N", "nbits_gapping"],
350 "Number of bits to trigger gapping.",
351 equate=False),
352 _Option(["-c", "pseudocounts"],
353 "Pseudocounts constants for multiple passes.",
354 equate=False),
355 _Option(["-h", "model_threshold"],
356 "E-value threshold to include in multipass model.",
357 equate=False),
358
359 _Option(["-L", "region_length"],
360 "Cost to decline alignment (disabled when zero).",
361 equate=False),
362 _Option(["-M", "matrix"],
363 "Matrix (string, default BLOSUM62).",
364 equate=False),
365 _Option(["-p", "program"],
366 "The blast program to use (e.g blastpgp, patseedp or seedp).",
367 is_required=True,
368 equate=False),
369 ]
370 _BlastAllOrPgpCommandLine.__init__(self, cmd, **kwargs)
371
372
374 """Create a commandline for the classic rpsblast program from NCBI (OBSOLETE).
375
376 With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI
377 are replacing the old rpsblast with a new version of the same name plus a
378 second tool rpstblastn, both taking different command line arguments. This
379 module provides NcbirpsblastCommandline and NcbirpsblasntCommandline as
380 wrappers for the new tools.
381
382 Like the old rpsblast (and blastall), this wrapper is now obsolete, and will
383 be deprecated and removed in a future release of Biopython.
384
385 >>> from Bio.Blast.Applications import RpsBlastCommandline
386 >>> cline = RpsBlastCommandline(help=True)
387 >>> cline
388 RpsBlastCommandline(cmd='rpsblast', help=True)
389 >>> print cline
390 rpsblast --help
391
392 You would typically run the command line with cline() or via the Python
393 subprocess module, as described in the Biopython tutorial.
394 """
395 - def __init__(self, cmd="rpsblast",**kwargs):
396 import warnings
397 warnings.warn("Like the old rpsblast (and blastall), this wrapper is now obsolete, and will be deprecated and removed in a future release of Biopython.", PendingDeprecationWarning)
398 self.parameters = [
399
400 _Option(["-N", "nbits_gapping"],
401 "Number of bits to trigger gapping.",
402 equate=False),
403
404
405 _Option(["-P", "multihit"],
406 "0 for multiple hit, 1 for single hit",
407 equate=False),
408 _Option(["-l", "logfile"],
409 "Logfile name.",
410 filename=True,
411 equate=False),
412 _Option(["-p", "protein"],
413 "Query sequence is protein. T/F",
414 equate=False),
415 _Option(["-L", "range_restriction"],
416 "Location on query sequence (string format start,end).",
417 equate=False),
418 ]
419 _BlastCommandLine.__init__(self, cmd, **kwargs)
420
421
422
423
424
426 """Base Commandline object for (new) NCBI BLAST+ wrappers (PRIVATE).
427
428 This is provided for subclassing, it deals with shared options
429 common to all the BLAST tools (blastn, rpsblast, rpsblast, etc
430 AND blast_formatter).
431 """
432 - def __init__(self, cmd=None, **kwargs):
433 assert cmd is not None
434 extra_parameters = [
435
436 _Switch(["-h", "h"],
437 "Print USAGE and DESCRIPTION; ignore other arguments."),
438 _Switch(["-help", "help"],
439 "Print USAGE, DESCRIPTION and ARGUMENTS description; "
440 "ignore other arguments."),
441 _Switch(["-version", "version"],
442 "Print version number; ignore other arguments."),
443
444 _Option(["-out", "out"],
445 "Output file for alignment.",
446 filename=True,
447 equate=False),
448
449 _Option(["-outfmt", "outfmt"],
450 "Alignment view. Integer 0-11. Use 5 for XML output "
451 "(differs from classic BLAST which used 7 for XML).",
452 equate=False),
453
454 _Switch(["-show_gis","show_gis"],
455 "Show NCBI GIs in deflines?"),
456 _Option(["-num_descriptions","num_descriptions"],
457 """Number of database sequences to show one-line descriptions for.
458
459 Integer argument (at least zero). Default is 500.
460 See also num_alignments.""",
461 equate=False),
462 _Option(["-num_alignments","num_alignments"],
463 """Number of database sequences to show num_alignments for.
464
465 Integer argument (at least zero). Default is 200.
466 See also num_alignments.""",
467 equate=False),
468 _Switch(["-html", "html"],
469 "Produce HTML output? See also the outfmt option."),
470
471 _Switch(["-parse_deflines", "parse_deflines"],
472 "Should the query and subject defline(s) be parsed?"),
473 ]
474 try:
475
476
477 self.parameters = extra_parameters + self.parameters
478 except AttributeError:
479
480 self.parameters = extra_parameters
481 AbstractCommandline.__init__(self, cmd, **kwargs)
482
484 """Used by the BLAST+ _validate method (PRIVATE)."""
485 for a in incompatibles:
486 if self._get_parameter(a):
487 for b in incompatibles[a]:
488 if self._get_parameter(b):
489 raise ValueError("Options %s and %s are incompatible." \
490 % (a,b))
491
492
494 """Base Commandline object for (new) NCBI BLAST+ wrappers (PRIVATE).
495
496 This is provided for subclassing, it deals with shared options
497 common to all the BLAST tools (blastn, rpsblast, rpsblast, etc).
498 """
499 - def __init__(self, cmd=None, **kwargs):
500 assert cmd is not None
501 extra_parameters = [
502
503 _Option(["-query", "query"],
504 "The sequence to search with.",
505 filename=True,
506 equate=False),
507 _Option(["-query_loc", "query_loc"],
508 "Location on the query sequence (Format: start-stop)",
509 equate=False),
510
511 _Option(["-db", "db"],
512 "The database to BLAST against.",
513 equate=False),
514 _Option(["-evalue", "evalue"],
515 "Expectation value cutoff.",
516 equate=False),
517 _Option(["-word_size","word_size"],
518 """Word size for wordfinder algorithm.
519
520 Integer. Minimum 2.""",
521 equate=False),
522
523
524
525
526
527
528
529
530 _Switch(["-lcase_masking", "lcase_masking"],
531 "Use lower case filtering in query and subject sequence(s)?"),
532
533 _Option(["-gilist", "gilist"],
534 """Restrict search of database to list of GI's.
535
536 Incompatible with: negative_gilist, seqidlist, remote, subject, subject_loc""",
537 filename=True,
538 equate=False),
539 _Option(["-negative_gilist", "negative_gilist"],
540 """Restrict search of database to everything except the listed GIs.
541
542 Incompatible with: gilist, seqidlist, remote, subject, subject_loc""",
543 filename=True,
544 equate=False),
545 _Option(["-seqidlist", "seqidlist"],
546 """Restrict search of database to list of SeqID's.
547
548 Incompatible with: gilist, negative_gilist, remote, subject, subject_loc""",
549 filename=True,
550 equate=False),
551 _Option(["-entrez_query", "entrez_query"],
552 "Restrict search with the given Entrez query (requires remote).",
553 equate=False),
554 _Option(["-max_target_seqs", "max_target_seqs"],
555 """Maximum number of aligned sequences to keep.
556
557 Integer argument (at least one).""",
558 equate=False),
559
560 _Option(["-dbsize", "dbsize"],
561 "Effective length of the database (integer)",
562 equate=False),
563 _Option(["-searchsp", "searchsp"],
564 "Effective length of the search space (integer)",
565 equate=False),
566
567 _Option(["-xdrop_ungap", "xdrop_ungap"],
568 "X-dropoff value (in bits) for ungapped extensions. Float.",
569 equate=False),
570 _Option(["-xdrop_gap", "xdrop_gap"],
571 "X-dropoff value (in bits) for preliminary gapped extensions. Float.",
572 equate=False),
573 _Option(["-xdrop_gap_final", "xdrop_gap_final"],
574 "X-dropoff value (in bits) for final gapped alignment. Float.",
575 equate=False),
576 _Option(["-window_size", "window_size"],
577 "Multiple hits window size, use 0 to specify 1-hit algorithm. Integer.",
578 equate=False),
579
580 _Option(["-import_search_strategy", "import_search_strategy"],
581 """Search strategy to use.
582
583 Incompatible with: export_search_strategy""",
584 filename=True,
585 equate=False),
586 _Option(["-export_search_strategy", "export_search_strategy"],
587 """File name to record the search strategy used.
588
589 Incompatible with: import_search_strategy""",
590 filename=True,
591 equate=False),
592
593 _Option(["-num_threads", "num_threads"],
594 """Number of threads to use in the BLAST search.
595
596 Integer of at least one. Default is one.
597 Incompatible with: remote""",
598 equate=False),
599 _Switch(["-remote", "remote"],
600 """Execute search remotely?
601
602 Incompatible with: gilist, negative_gilist, subject_loc, num_threads, ..."""),
603 ]
604 try:
605
606
607 self.parameters = extra_parameters + self.parameters
608 except AttributeError:
609
610 self.parameters = extra_parameters
611 _NcbibaseblastCommandline.__init__(self, cmd, **kwargs)
612
614 incompatibles = {"remote":["gilist", "negative_gilist", "num_threads"],
615 "import_search_strategy" : ["export_search_strategy"],
616 "gilist":["negative_gilist"],
617 "seqidlist":["gilist", "negative_gilist", "remote"]}
618 self._validate_incompatibilities(incompatibles)
619 if self.entrez_query and not self.remote :
620 raise ValueError("Option entrez_query requires remote option.")
621 AbstractCommandline._validate(self)
622
623
625 """Base Commandline object for (new) NCBI BLAST+ wrappers (PRIVATE).
626
627 This is provided for subclassing, it deals with shared options
628 common to all the BLAST tools supporting two-sequence BLAST
629 (blastn, psiblast, etc) but not rpsblast or rpstblastn.
630 """
631 - def __init__(self, cmd=None, **kwargs):
632 assert cmd is not None
633 extra_parameters = [
634
635 _Option(["-gapopen", "gapopen"],
636 "Cost to open a gap (integer).",
637 equate=False),
638 _Option(["-gapextend", "gapextend"],
639 "Cost to extend a gap (integer).",
640 equate=False),
641
642 _Option(["-subject", "subject"],
643 """Subject sequence(s) to search.
644
645 Incompatible with: db, gilist, negative_gilist.
646 See also subject_loc.""",
647 filename=True,
648 equate=False),
649 _Option(["-subject_loc", "subject_loc"],
650 """Location on the subject sequence (Format: start-stop)
651
652 Incompatible with: db, gilist, seqidlist, negative_gilist,
653 db_soft_mask, db_hard_mask, remote.
654
655 See also subject.""",
656 equate=False),
657
658 _Option(["-culling_limit", "culling_limit"],
659 """Hit culling limit (integer).
660
661 If the query range of a hit is enveloped by that of at
662 least this many higher-scoring hits, delete the hit.
663
664 Incompatible with: best_hit_overhang, best_hit_score_edge.
665 """,
666 equate=False),
667 _Option(["-best_hit_overhang", "best_hit_overhang"],
668 """Best Hit algorithm overhang value (recommended value: 0.1)
669
670 Float between 0.0 and 0.5 inclusive.
671
672 Incompatible with: culling_limit.""",
673 equate=False),
674 _Option(["-best_hit_score_edge", "best_hit_score_edge"],
675 """Best Hit algorithm score edge value (recommended value: 0.1)
676
677 Float between 0.0 and 0.5 inclusive.
678
679 Incompatible with: culling_limit.""",
680 equate=False),
681 ]
682 try:
683
684
685 self.parameters = extra_parameters + self.parameters
686 except AttributeError:
687
688 self.parameters = extra_parameters
689 _NcbiblastCommandline.__init__(self, cmd, **kwargs)
690
691
693 incompatibles = {"subject_loc":["db", "gilist", "negative_gilist", "seqidlist", "remote"],
694 "culling_limit":["best_hit_overhang","best_hit_score_edge"],
695 "subject":["db", "gilist", "negative_gilist", "seqidlist"]}
696 self._validate_incompatibilities(incompatibles)
697 _NcbiblastCommandline._validate(self)
698
699
700 -class _NcbiblastMain2SeqCommandline(_Ncbiblast2SeqCommandline):
701 """Base Commandline object for (new) NCBI BLAST+ wrappers (PRIVATE).
702
703 This is provided for subclassing, it deals with shared options
704 common to the main BLAST tools blastp, blastn, blastx, tblastx, tblastn
705 but not psiblast, rpsblast or rpstblastn.
706 """
707 - def __init__(self, cmd=None, **kwargs):
708 assert cmd is not None
709 extra_parameters = [
710
711 _Option(["-db_soft_mask", "db_soft_mask"],
712 """Filtering algorithm for soft masking (integer).
713
714 Filtering algorithm ID to apply to the BLAST database as soft masking.
715
716 Incompatible with: db_hard_mask, subject, subject_loc""",
717 equate=False),
718 _Option(["-db_hard_mask", "db_hard_mask"],
719 """Filtering algorithm for hard masking (integer).
720
721 Filtering algorithm ID to apply to the BLAST database as hard masking.
722
723 Incompatible with: db_soft_mask, subject, subject_loc""",
724 equate=False),
725 ]
726 try:
727
728
729 self.parameters = extra_parameters + self.parameters
730 except AttributeError:
731
732 self.parameters = extra_parameters
733 _Ncbiblast2SeqCommandline.__init__(self, cmd, **kwargs)
734
735 - def _validate(self):
736 incompatibles = {"db_soft_mask":["db_hard_mask", "subject", "subject_loc"],
737 "db_hard_mask":["db_soft_mask", "subject", "subject_loc"]}
738 self._validate_incompatibilities(incompatibles)
739 _Ncbiblast2SeqCommandline._validate(self)
740
742 """Create a commandline for the NCBI BLAST+ program blastp (for proteins).
743
744 With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI
745 replaced the old blastall tool with separate tools for each of the searches.
746 This wrapper therefore replaces BlastallCommandline with option -p blastp.
747
748 >>> from Bio.Blast.Applications import NcbiblastpCommandline
749 >>> cline = NcbiblastpCommandline(query="rosemary.pro", db="nr",
750 ... evalue=0.001, remote=True, ungapped=True)
751 >>> cline
752 NcbiblastpCommandline(cmd='blastp', query='rosemary.pro', db='nr', evalue=0.001, remote=True, ungapped=True)
753 >>> print cline
754 blastp -query rosemary.pro -db nr -evalue 0.001 -remote -ungapped
755
756 You would typically run the command line with cline() or via the Python
757 subprocess module, as described in the Biopython tutorial.
758 """
759 - def __init__(self, cmd="blastp", **kwargs):
760 self.parameters = [
761
762 _Option(["-task", "task"],
763 "Task to execute (string, blastp (default) or blastp-short).",
764 checker_function=lambda value : value in ["blastp",
765 "blastp-short"],
766 equate=False),
767 _Option(["-matrix", "matrix"],
768 "Scoring matrix name (default BLOSUM62)."),
769 _Option(["-threshold", "threshold"],
770 "Minimum word score such that the word is added to the "
771 "BLAST lookup table (float)",
772 equate=False),
773 _Option(["-comp_based_stats", "comp_based_stats"],
774 """Use composition-based statistics (string, default 2, i.e. True).
775
776 0, F or f: no composition-based statistics
777 2, T or t, D or d : Composition-based score adjustment as in
778 Bioinformatics 21:902-911, 2005, conditioned on sequence properties
779
780 Note that tblastn also supports values of 1 and 3.""",
781 checker_function=lambda value : value in "0Ft2TtDd",
782 equate=False),
783
784 _Option(["-seg", "seg"],
785 """Filter query sequence with SEG (string).
786
787 Format: "yes", "window locut hicut", or "no" to disable.
788 Default is "12 2.2 2.5""",
789 equate=False),
790
791 _Switch(["-ungapped", "ungapped"],
792 "Perform ungapped alignment only?"),
793
794 _Switch(["-use_sw_tback", "use_sw_tback"],
795 "Compute locally optimal Smith-Waterman alignments?"),
796 ]
797 _NcbiblastMain2SeqCommandline.__init__(self, cmd, **kwargs)
798
799
801 """Wrapper for the NCBI BLAST+ program blastn (for nucleotides).
802
803 With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI
804 replaced the old blastall tool with separate tools for each of the searches.
805 This wrapper therefore replaces BlastallCommandline with option -p blastn.
806
807 For example, to run a search against the "nt" nucleotide database using the
808 FASTA nucleotide file "m_code.fasta" as the query, with an expectation value
809 cut off of 0.001, saving the output to a file in XML format:
810
811 >>> from Bio.Blast.Applications import NcbiblastnCommandline
812 >>> cline = NcbiblastnCommandline(query="m_cold.fasta", db="nt", strand="plus",
813 ... evalue=0.001, out="m_cold.xml", outfmt=5)
814 >>> cline
815 NcbiblastnCommandline(cmd='blastn', out='m_cold.xml', outfmt=5, query='m_cold.fasta', db='nt', evalue=0.001, strand='plus')
816 >>> print cline
817 blastn -out m_cold.xml -outfmt 5 -query m_cold.fasta -db nt -evalue 0.001 -strand plus
818
819 You would typically run the command line with cline() or via the Python
820 subprocess module, as described in the Biopython tutorial.
821 """
822 - def __init__(self, cmd="blastn", **kwargs):
823 self.parameters = [
824
825 _Option(["-strand", "strand"],
826 """Query strand(s) to search against database/subject.
827
828 Values allowed are "both" (default), "minus", "plus".""",
829 checker_function=lambda value : value in ["both",
830 "minus",
831 "plus"],
832 equate=False),