Package Bio :: Package Blast :: Module Applications
[hide private]
[frames] | no frames]

Source Code for Module Bio.Blast.Applications

   1  # Copyright 2001 Brad Chapman. 
   2  # Revisions copyright 2009-2010 by Peter Cock. 
   3  # Revisions copyright 2010 by Phillip Garland. 
   4  # All rights reserved. 
   5  # This code is part of the Biopython distribution and governed by its 
   6  # license.  Please see the LICENSE file that should have been included 
   7  # as part of this package. 
   8  """Definitions for interacting with BLAST related applications. 
   9   
  10  Obsolete wrappers for the old/classic NCBI BLAST tools (written in C): 
  11   
  12  - FastacmdCommandline 
  13  - BlastallCommandline 
  14  - BlastpgpCommandline 
  15  - RpsBlastCommandline 
  16   
  17  Wrappers for the new NCBI BLAST+ tools (written in C++): 
  18   
  19  - NcbiblastpCommandline - Protein-Protein BLAST 
  20  - NcbiblastnCommandline - Nucleotide-Nucleotide BLAST 
  21  - NcbiblastxCommandline - Translated Query-Protein Subject BLAST 
  22  - NcbitblastnCommandline - Protein Query-Translated Subject BLAST 
  23  - NcbitblastxCommandline - Translated Query-Protein Subject BLAST 
  24  - NcbipsiblastCommandline - Position-Specific Initiated BLAST 
  25  - NcbirpsblastCommandline - Reverse Position Specific BLAST 
  26  - NcbirpstblastnCommandline - Translated Reverse Position Specific BLAST 
  27  - NcbiblastformatterCommandline - Convert ASN.1 to other BLAST output formats 
  28   
  29  For further details, see: 
  30   
  31  Camacho et al. BLAST+: architecture and applications 
  32  BMC Bioinformatics 2009, 10:421 
  33  doi:10.1186/1471-2105-10-421 
  34  """ 
  35  from Bio import BiopythonDeprecationWarning 
  36   
  37  from Bio.Application import _Option, AbstractCommandline, _Switch 
  38   
  39   
40 -class FastacmdCommandline(AbstractCommandline):
41 """Create a commandline for the fasta program from NCBI (OBSOLETE). 42 43 """
44 - def __init__(self, cmd="fastacmd", **kwargs):
45 self.parameters = [ 46 _Option(["-d", "database"], 47 "The database to retrieve from.", 48 is_required=True, 49 equate=False), 50 _Option(["-s", "search_string"], 51 "The id to search for.", 52 is_required=True, 53 equate=False) 54 ] 55 AbstractCommandline.__init__(self, cmd, **kwargs)
56 57
58 -class _BlastCommandLine(AbstractCommandline):
59 """Base Commandline object for (classic) NCBI BLAST wrappers (PRIVATE). 60 61 This is provided for subclassing, it deals with shared options 62 common to all the BLAST tools (blastall, rpsblast, blastpgp). 63 """
64 - def __init__(self, cmd=None, **kwargs):
65 assert cmd is not None 66 extra_parameters = [ 67 _Switch(["--help", "help"], 68 "Print USAGE, DESCRIPTION and ARGUMENTS description; ignore other arguments."), 69 _Option(["-d", "database"], 70 "The database to BLAST against.", 71 is_required=True, 72 equate=False), 73 _Option(["-i", "infile"], 74 "The sequence to search with.", 75 filename=True, 76 is_required=True, 77 equate=False), 78 _Option(["-e", "expectation"], 79 "Expectation value cutoff.", 80 equate=False), 81 _Option(["-m", "align_view"], 82 "Alignment view. Integer 0-11. Use 7 for XML output.", 83 equate=False), 84 _Option(["-o", "align_outfile", "outfile"], 85 "Output file for alignment.", 86 filename=True, 87 equate=False), 88 _Option(["-y", "xdrop_extension"], 89 "Dropoff for blast extensions.", 90 equate=False), 91 _Option(["-F", "filter"], 92 "Filter query sequence with SEG? T/F", 93 equate=False), 94 _Option(["-X", "xdrop"], 95 "Dropoff value (bits) for gapped alignments.", 96 equate=False), 97 _Option(["-I", "show_gi"], 98 "Show GI's in deflines? T/F", 99 equate=False), 100 _Option(["-J", "believe_query"], 101 "Believe the query defline? T/F", 102 equate=False), 103 _Option(["-Z", "xdrop_final"], 104 "X dropoff for final gapped alignment.", 105 equate=False), 106 _Option(["-z", "db_length"], 107 "Effective database length.", 108 equate=False), 109 _Option(["-O", "seqalign_file"], 110 "seqalign file to output.", 111 filename=True, 112 equate=False), 113 _Option(["-v", "descriptions"], 114 "Number of one-line descriptions.", 115 equate=False), 116 _Option(["-b", "alignments"], 117 "Number of alignments.", 118 equate=False), 119 _Option(["-Y", "search_length"], 120 "Effective length of search space (use zero for the " 121 "real size).", 122 equate=False), 123 _Option(["-T", "html"], 124 "Produce HTML output? T/F", 125 equate=False), 126 _Option(["-U", "case_filter"], 127 "Use lower case filtering of FASTA sequence? T/F", 128 equate=False), 129 _Option(["-a", "nprocessors"], 130 "Number of processors to use.", 131 equate=False), 132 _Option(["-g", "gapped"], 133 "Whether to do a gapped alignment. T/F", 134 equate=False), 135 ] 136 try: 137 #Insert extra parameters - at the start just in case there 138 #are any arguments which must come last: 139 self.parameters = extra_parameters + self.parameters 140 except AttributeError: 141 #Should we raise an error? The subclass should have set this up! 142 self.parameters = extra_parameters 143 AbstractCommandline.__init__(self, cmd, **kwargs)
144
145 - def _validate(self):
146 if self.help: 147 #Don't want to check the normally mandatory arguments like db 148 return 149 AbstractCommandline._validate(self)
150 151
152 -class _BlastAllOrPgpCommandLine(_BlastCommandLine):
153 """Base Commandline object for NCBI BLAST wrappers (PRIVATE). 154 155 This is provided for subclassing, it deals with shared options 156 common to all the blastall and blastpgp tools (but not rpsblast). 157 """
158 - def __init__(self, cmd=None, **kwargs):
159 assert cmd is not None 160 extra_parameters = [ 161 _Option(["-G", "gap_open"], 162 "Gap open penalty", 163 equate=False), 164 _Option(["-E", "gap_extend"], 165 "Gap extension penalty", 166 equate=False), 167 _Option(["-A", "window_size"], 168 "Multiple hits window size", 169 equate=False), 170 _Option(["-f", "hit_extend"], 171 "Threshold for extending hits.", 172 equate=False), 173 _Option(["-K", "keep_hits"], 174 " Number of best hits from a region to keep.", 175 equate=False), 176 _Option(["-W", "wordsize"], 177 "Word size", 178 equate=False), 179 _Option(["-P", "passes"], 180 "Hits/passes. Integer 0-2. 0 for multiple hit, " 181 "1 for single hit (does not apply to blastn)", 182 equate=False), 183 ] 184 try: 185 #Insert extra parameters - at the start just in case there 186 #are any arguments which must come last: 187 self.parameters = extra_parameters + self.parameters 188 except AttributeError: 189 #Should we raise an error? The subclass should have set this up! 190 self.parameters = extra_parameters 191 _BlastCommandLine.__init__(self, cmd, **kwargs)
192 193
194 -class BlastallCommandline(_BlastAllOrPgpCommandLine):
195 """Create a commandline for the blastall program from NCBI (OBSOLETE). 196 197 With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI 198 are replacing blastall with separate tools blastn, blastp, blastx, tblastn 199 and tblastx. 200 201 Like blastall, this wrapper is now obsolete, and will be deprecated and 202 removed in a future release of Biopython. 203 204 >>> from Bio.Blast.Applications import BlastallCommandline 205 >>> cline = BlastallCommandline(program="blastx", infile="m_cold.fasta", 206 ... database="nr", expectation=0.001) 207 >>> cline 208 BlastallCommandline(cmd='blastall', database='nr', infile='m_cold.fasta', expectation=0.001, program='blastx') 209 >>> print cline 210 blastall -d nr -i m_cold.fasta -e 0.001 -p blastx 211 212 You would typically run the command line with cline() or via the Python 213 subprocess module, as described in the Biopython tutorial. 214 """ 215 #TODO - This could use more checking for valid parameters to the program.
216 - def __init__(self, cmd="blastall",**kwargs):
217 import warnings 218 warnings.warn("Like blastall, this wrapper is now deprecated and will be removed in a future release of Biopython.", BiopythonDeprecationWarning) 219 self.parameters = [ 220 #Sorted in the same order as the output from blastall --help 221 #which should make it easier to keep them up to date in future. 222 #Note that some arguments are defined in the base classes (above). 223 _Option(["-p", "program"], 224 "The blast program to use (e.g. blastp, blastn).", 225 is_required=True, 226 equate=False), 227 _Option(["-q", "nuc_mismatch"], 228 "Penalty for a nucleotide mismatch (blastn only).", 229 equate=False), 230 _Option(["-r", "nuc_match"], 231 "Reward for a nucleotide match (blastn only).", 232 equate=False), 233 _Option(["-Q", "query_genetic_code"], 234 "Query Genetic code to use.", 235 equate=False), 236 _Option(["-D", "db_genetic_code"], 237 "DB Genetic code (for tblast[nx] only).", 238 equate=False), 239 _Option(["-M", "matrix"], 240 "Matrix to use", 241 equate=False), 242 _Option(["-S", "strands"], 243 "Query strands to search against database (for blast[nx], " 244 "and tblastx). 3 is both, 1 is top, 2 is bottom.", 245 equate=False), 246 _Option(["-l", "restrict_gi"], 247 "Restrict search of database to list of GI's.", 248 equate=False), 249 _Option(["-R", "checkpoint"], 250 "PSI-TBLASTN checkpoint input file.", 251 filename=True, 252 equate=False), 253 _Option(["-n", "megablast"], 254 "MegaBlast search T/F.", 255 equate=False), 256 #The old name "region_length" is for consistency with our 257 #old blastall function wrapper: 258 _Option(["-L", "region_length", "range_restriction"], 259 """Location on query sequence (string format start,end). 260 261 In older versions of BLAST, -L set the length of region 262 used to judge hits (see -K parameter).""", 263 equate=False), 264 _Option(["-w", "frame_shift_penalty"], 265 "Frame shift penalty (OOF algorithm for blastx).", 266 equate=False), 267 _Option(["-t", "largest_intron"], 268 "Length of the largest intron allowed in a translated " 269 "nucleotide sequence when linking multiple distinct " 270 "alignments. (0 invokes default behavior; a negative value " 271 "disables linking.)", 272 equate=False), 273 _Option(["-B", "num_concatenated_queries"], 274 "Number of concatenated queries, for blastn and tblastn.", 275 equate=False), 276 _Option(["-V", "oldengine"], 277 "Force use of the legacy BLAST engine.", 278 equate=False), 279 _Option(["-C", "composition_based"], 280 """Use composition-based statistics for tblastn: 281 D or d: default (equivalent to F) 282 0 or F or f: no composition-based statistics 283 1 or T or t: Composition-based statistics as in NAR 29:2994-3005, 2001 284 2: Composition-based score adjustment as in Bioinformatics 285 21:902-911, 2005, conditioned on sequence properties 286 3: Composition-based score adjustment as in Bioinformatics 287 21:902-911, 2005, unconditionally 288 For programs other than tblastn, must either be absent or be 289 D, F or 0.""", 290 equate=False), 291 _Option(["-s", "smith_waterman"], 292 "Compute locally optimal Smith-Waterman alignments (This " 293 "option is only available for gapped tblastn.) T/F", 294 equate=False), 295 ] 296 _BlastAllOrPgpCommandLine.__init__(self, cmd, **kwargs)
297 298
299 -class BlastpgpCommandline(_BlastAllOrPgpCommandLine):
300 """Create a commandline for the blastpgp program from NCBI (OBSOLETE). 301 302 With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI 303 are replacing blastpgp with a renamed tool psiblast. This module provides 304 NcbipsiblastCommandline as a wrapper for the new tool psiblast. 305 306 Like blastpgp (and blastall), this wrapper is now obsolete, and will be 307 deprecated and removed in a future release of Biopython. 308 309 >>> from Bio.Blast.Applications import BlastpgpCommandline 310 >>> cline = BlastpgpCommandline(help=True) 311 >>> cline 312 BlastpgpCommandline(cmd='blastpgp', help=True) 313 >>> print cline 314 blastpgp --help 315 316 You would typically run the command line with cline() or via the Python 317 subprocess module, as described in the Biopython tutorial. 318 """
319 - def __init__(self, cmd="blastpgp",**kwargs):
320 import warnings 321 warnings.warn("Like blastpgp (and blastall), this wrapper is now deprecated and will be removed in a future release of Biopython.", BiopythonDeprecationWarning) 322 self.parameters = [ 323 _Option(["-C", "checkpoint_outfile"], 324 "Output file for PSI-BLAST checkpointing.", 325 filename=True, 326 equate=False), 327 _Option(["-R", "restart_infile"], 328 "Input file for PSI-BLAST restart.", 329 filename=True, 330 equate=False), 331 _Option(["-k", "hit_infile"], 332 "Hit file for PHI-BLAST.", 333 filename=True, 334 equate=False), 335 _Option(["-Q", "matrix_outfile"], 336 "Output file for PSI-BLAST matrix in ASCII.", 337 filename=True, 338 equate=False), 339 _Option(["-B", "align_infile"], 340 "Input alignment file for PSI-BLAST restart.", 341 filename=True, 342 equate=False), 343 _Option(["-S", "required_start"], 344 "Start of required region in query.", 345 equate=False), 346 _Option(["-H", "required_end"], 347 "End of required region in query.", 348 equate=False), 349 _Option(["-j", "npasses"], 350 "Number of passes", 351 equate=False), 352 _Option(["-N", "nbits_gapping"], 353 "Number of bits to trigger gapping.", 354 equate=False), 355 _Option(["-c", "pseudocounts"], 356 "Pseudocounts constants for multiple passes.", 357 equate=False), 358 _Option(["-h", "model_threshold"], 359 "E-value threshold to include in multipass model.", 360 equate=False), 361 #Does the old name "region_length" for -L make sense? 362 _Option(["-L", "region_length"], 363 "Cost to decline alignment (disabled when zero).", 364 equate=False), 365 _Option(["-M", "matrix"], 366 "Matrix (string, default BLOSUM62).", 367 equate=False), 368 _Option(["-p", "program"], 369 "The blast program to use (e.g blastpgp, patseedp or seedp).", 370 is_required=True, 371 equate=False), 372 ] 373 _BlastAllOrPgpCommandLine.__init__(self, cmd, **kwargs)
374 375
376 -class RpsBlastCommandline(_BlastCommandLine):
377 """Create a commandline for the classic rpsblast program from NCBI (OBSOLETE). 378 379 With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI 380 are replacing the old rpsblast with a new version of the same name plus a 381 second tool rpstblastn, both taking different command line arguments. This 382 module provides NcbirpsblastCommandline and NcbirpsblasntCommandline as 383 wrappers for the new tools. 384 385 Like the old rpsblast (and blastall), this wrapper is now obsolete, and will 386 be deprecated and removed in a future release of Biopython. 387 388 >>> from Bio.Blast.Applications import RpsBlastCommandline 389 >>> cline = RpsBlastCommandline(help=True) 390 >>> cline 391 RpsBlastCommandline(cmd='rpsblast', help=True) 392 >>> print cline 393 rpsblast --help 394 395 You would typically run the command line with cline() or via the Python 396 subprocess module, as described in the Biopython tutorial. 397 """
398 - def __init__(self, cmd="rpsblast",**kwargs):
399 import warnings 400 warnings.warn("Like the old rpsblast (and blastall), this wrapper is now deprecated and will be removed in a future release of Biopython.", BiopythonDeprecationWarning) 401 self.parameters = [ 402 #Note -N is also in blastpgp, but not blastall 403 _Option(["-N", "nbits_gapping"], 404 "Number of bits to trigger gapping.", 405 equate=False), 406 #Note blastall and blastpgp wrappers have -P with name "passes". 407 #If this is the same thing, we should be consistent! 408 _Option(["-P", "multihit"], 409 "0 for multiple hit, 1 for single hit", 410 equate=False), 411 _Option(["-l", "logfile"], 412 "Logfile name.", 413 filename=True, 414 equate=False), 415 _Option(["-p", "protein"], 416 "Query sequence is protein. T/F", 417 equate=False), 418 _Option(["-L", "range_restriction"], 419 "Location on query sequence (string format start,end).", 420 equate=False), 421 ] 422 _BlastCommandLine.__init__(self, cmd, **kwargs)
423 424 ############################################################################## 425 # Legacy BLAST wrappers above, (new) BLAST+ wrappers below 426 ############################################################################## 427 428
429 -class _NcbibaseblastCommandline(AbstractCommandline):
430 """Base Commandline object for (new) NCBI BLAST+ wrappers (PRIVATE). 431 432 This is provided for subclassing, it deals with shared options 433 common to all the BLAST tools (blastn, rpsblast, rpsblast, etc 434 AND blast_formatter). 435 """
436 - def __init__(self, cmd=None, **kwargs):
437 assert cmd is not None 438 extra_parameters = [ 439 #Core: 440 _Switch(["-h", "h"], 441 "Print USAGE and DESCRIPTION; ignore other arguments."), 442 _Switch(["-help", "help"], 443 "Print USAGE, DESCRIPTION and ARGUMENTS description; " 444 "ignore other arguments."), 445 _Switch(["-version", "version"], 446 "Print version number; ignore other arguments."), 447 # Output configuration options 448 _Option(["-out", "out"], 449 "Output file for alignment.", 450 filename=True, 451 equate=False), 452 #Formatting options: 453 _Option(["-outfmt", "outfmt"], 454 "Alignment view. Integer 0-11. Use 5 for XML output " 455 "(differs from classic BLAST which used 7 for XML).", 456 equate=False), 457 #TODO - Document and test the column options 458 _Switch(["-show_gis","show_gis"], 459 "Show NCBI GIs in deflines?"), 460 _Option(["-num_descriptions","num_descriptions"], 461 """Number of database sequences to show one-line descriptions for. 462 463 Integer argument (at least zero). Default is 500. 464 See also num_alignments.""", 465 equate=False), 466 _Option(["-num_alignments","num_alignments"], 467 """Number of database sequences to show num_alignments for. 468 469 Integer argument (at least zero). Default is 200. 470 See also num_alignments.""", 471 equate=False), 472 _Switch(["-html", "html"], 473 "Produce HTML output? See also the outfmt option."), 474 #Miscellaneous options 475 _Switch(["-parse_deflines", "parse_deflines"], 476 "Should the query and subject defline(s) be parsed?"), 477 ] 478 try: 479 #Insert extra parameters - at the start just in case there 480 #are any arguments which must come last: 481 self.parameters = extra_parameters + self.parameters 482 except AttributeError: 483 #Should we raise an error? The subclass should have set this up! 484 self.parameters = extra_parameters 485 AbstractCommandline.__init__(self, cmd, **kwargs)
486
487 - def _validate_incompatibilities(self, incompatibles):
488 """Used by the BLAST+ _validate method (PRIVATE).""" 489 for a in incompatibles: 490 if self._get_parameter(a): 491 for b in incompatibles[a]: 492 if self._get_parameter(b): 493 raise ValueError("Options %s and %s are incompatible." 494 % (a,b))
495 496
497 -class _NcbiblastCommandline(_NcbibaseblastCommandline):
498 """Base Commandline object for (new) NCBI BLAST+ wrappers (PRIVATE). 499 500 This is provided for subclassing, it deals with shared options 501 common to all the BLAST tools (blastn, rpsblast, rpsblast, etc). 502 """
503 - def __init__(self, cmd=None, **kwargs):
504 assert cmd is not None 505 extra_parameters = [ 506 #Input query options: 507 _Option(["-query", "query"], 508 "The sequence to search with.", 509 filename=True, 510 equate=False), # Should this be required? 511 _Option(["-query_loc", "query_loc"], 512 "Location on the query sequence (Format: start-stop)", 513 equate=False), 514 #General search options: 515 _Option(["-db", "db"], 516 "The database to BLAST against.", 517 equate=False), 518 _Option(["-evalue", "evalue"], 519 "Expectation value cutoff.", 520 equate=False), 521 _Option(["-word_size","word_size"], 522 """Word size for wordfinder algorithm. 523 524 Integer. Minimum 2.""", 525 equate=False), 526 #BLAST-2-Sequences options: 527 # - see subclass 528 #Formatting options: 529 # - see baseclass 530 #Query filtering options 531 # TODO -soft_masking <Boolean>, is this a switch or an option? 532 #_Switch(["-soft_masking", "soft_masking"], 533 # "Apply filtering locations as soft masks?"), 534 _Switch(["-lcase_masking", "lcase_masking"], 535 "Use lower case filtering in query and subject sequence(s)?"), 536 #Restrict search or results 537 _Option(["-gilist", "gilist"], 538 """Restrict search of database to list of GI's. 539 540 Incompatible with: negative_gilist, seqidlist, remote, subject, subject_loc""", 541 filename=True, 542 equate=False), 543 _Option(["-negative_gilist", "negative_gilist"], 544 """Restrict search of database to everything except the listed GIs. 545 546 Incompatible with: gilist, seqidlist, remote, subject, subject_loc""", 547 filename=True, 548 equate=False), 549 _Option(["-seqidlist", "seqidlist"], 550 """Restrict search of database to list of SeqID's. 551 552 Incompatible with: gilist, negative_gilist, remote, subject, subject_loc""", 553 filename=True, 554 equate=False), 555 _Option(["-entrez_query", "entrez_query"], 556 "Restrict search with the given Entrez query (requires remote).", 557 equate=False), 558 _Option(["-max_target_seqs", "max_target_seqs"], 559 """Maximum number of aligned sequences to keep. 560 561 Integer argument (at least one).""", 562 equate=False), 563 #Statistical options 564 _Option(["-dbsize", "dbsize"], 565 "Effective length of the database (integer)", 566 equate=False), 567 _Option(["-searchsp", "searchsp"], 568 "Effective length of the search space (integer)", 569 equate=False), 570 _Option(["-max_hsps_per_subject", "max_hsps_per_subject"], 571 "Override maximum number of HSPs per subject to save for ungapped searches (integer)", 572 equate=False), 573 #Extension options 574 _Option(["-xdrop_ungap", "xdrop_ungap"], 575 "X-dropoff value (in bits) for ungapped extensions. Float.", 576 equate=False), 577 _Option(["-xdrop_gap", "xdrop_gap"], 578 "X-dropoff value (in bits) for preliminary gapped extensions. Float.", 579 equate=False), 580 _Option(["-xdrop_gap_final", "xdrop_gap_final"], 581 "X-dropoff value (in bits) for final gapped alignment. Float.", 582 equate=False), 583 _Option(["-window_size", "window_size"], 584 "Multiple hits window size, use 0 to specify 1-hit algorithm. Integer.", 585 equate=False), 586 # Search strategy options 587 _Option(["-import_search_strategy", "import_search_strategy"], 588 """Search strategy to use. 589 590 Incompatible with: export_search_strategy""", 591 filename=True, 592 equate=False), 593 _Option(["-export_search_strategy", "export_search_strategy"], 594 """File name to record the search strategy used. 595 596 Incompatible with: import_search_strategy""", 597 filename=True, 598 equate=False), 599 #Miscellaneous options 600 _Option(["-num_threads", "num_threads"], 601 """Number of threads to use in the BLAST search. 602 603 Integer of at least one. Default is one. 604 Incompatible with: remote""", 605 equate=False), 606 _Switch(["-remote", "remote"], 607 """Execute search remotely? 608 609 Incompatible with: gilist, negative_gilist, subject_loc, num_threads, ..."""), 610 ] 611 try: 612 #Insert extra parameters - at the start just in case there 613 #are any arguments which must come last: 614 self.parameters = extra_parameters + self.parameters 615 except AttributeError: 616 #Should we raise an error? The subclass should have set this up! 617 self.parameters = extra_parameters 618 _NcbibaseblastCommandline.__init__(self, cmd, **kwargs)
619
620 - def _validate(self):
621 incompatibles = {"remote":["gilist", "negative_gilist", "num_threads"], 622 "import_search_strategy" : ["export_search_strategy"], 623 "gilist":["negative_gilist"], 624 "seqidlist":["gilist", "negative_gilist", "remote"]} 625 self._validate_incompatibilities(incompatibles) 626 if self.entrez_query and not self.remote : 627 raise ValueError("Option entrez_query requires remote option.") 628 AbstractCommandline._validate(self)
629 630
631 -class _Ncbiblast2SeqCommandline(_NcbiblastCommandline):
632 """Base Commandline object for (new) NCBI BLAST+ wrappers (PRIVATE). 633 634 This is provided for subclassing, it deals with shared options 635 common to all the BLAST tools supporting two-sequence BLAST 636 (blastn, psiblast, etc) but not rpsblast or rpstblastn. 637 """
638 - def __init__(self, cmd=None, **kwargs):
639 assert cmd is not None 640 extra_parameters = [ 641 #General search options: 642 _Option(["-gapopen", "gapopen"], 643 "Cost to open a gap (integer).", 644 equate=False), 645 _Option(["-gapextend", "gapextend"], 646 "Cost to extend a gap (integer).", 647 equate=False), 648 #BLAST-2-Sequences options: 649 _Option(["-subject", "subject"], 650 """Subject sequence(s) to search. 651 652 Incompatible with: db, gilist, negative_gilist. 653 See also subject_loc.""", 654 filename=True, 655 equate=False), 656 _Option(["-subject_loc", "subject_loc"], 657 """Location on the subject sequence (Format: start-stop) 658 659 Incompatible with: db, gilist, seqidlist, negative_gilist, 660 db_soft_mask, db_hard_mask, remote. 661 662 See also subject.""", 663 equate=False), 664 #Restrict search or results: 665 _Option(["-culling_limit", "culling_limit"], 666 """Hit culling limit (integer). 667 668 If the query range of a hit is enveloped by that of at 669 least this many higher-scoring hits, delete the hit. 670 671 Incompatible with: best_hit_overhang, best_hit_score_edge. 672 """, 673 equate=False), 674 _Option(["-best_hit_overhang", "best_hit_overhang"], 675 """Best Hit algorithm overhang value (recommended value: 0.1) 676 677 Float between 0.0 and 0.5 inclusive. 678 679 Incompatible with: culling_limit.""", 680 equate=False), 681 _Option(["-best_hit_score_edge", "best_hit_score_edge"], 682 """Best Hit algorithm score edge value (recommended value: 0.1) 683 684 Float between 0.0 and 0.5 inclusive. 685 686 Incompatible with: culling_limit.""", 687 equate=False), 688 ] 689 try: 690 #Insert extra parameters - at the start just in case there 691 #are any arguments which must come last: 692 self.parameters = extra_parameters + self.parameters 693 except AttributeError: 694 #Should we raise an error? The subclass should have set this up! 695 self.parameters = extra_parameters 696 _NcbiblastCommandline.__init__(self, cmd, **kwargs)
697
698 - def _validate(self):
699 incompatibles = {"subject_loc":["db", "gilist", "negative_gilist", "seqidlist", "remote"], 700 "culling_limit":["best_hit_overhang","best_hit_score_edge"], 701 "subject":["db", "gilist", "negative_gilist", "seqidlist"]} 702 self._validate_incompatibilities(incompatibles) 703 _NcbiblastCommandline._validate(self)
704 705
706 -class _NcbiblastMain2SeqCommandline(_Ncbiblast2SeqCommandline):
707 """Base Commandline object for (new) NCBI BLAST+ wrappers (PRIVATE). 708 709 This is provided for subclassing, it deals with shared options 710 common to the main BLAST tools blastp, blastn, blastx, tblastx, tblastn 711 but not psiblast, rpsblast or rpstblastn. 712 """
713 - def __init__(self, cmd=None, **kwargs):
714 assert cmd is not None 715 extra_parameters = [ 716 #Restrict search or results: 717 _Option(["-db_soft_mask", "db_soft_mask"], 718 """Filtering algorithm for soft masking (integer). 719 720 Filtering algorithm ID to apply to the BLAST database as soft masking. 721 722 Incompatible with: db_hard_mask, subject, subject_loc""", 723 equate=False), 724 _Option(["-db_hard_mask", "db_hard_mask"], 725 """Filtering algorithm for hard masking (integer). 726 727 Filtering algorithm ID to apply to the BLAST database as hard masking. 728 729 Incompatible with: db_soft_mask, subject, subject_loc""", 730 equate=False), 731 ] 732 try: 733 #Insert extra parameters - at the start just in case there 734 #are any arguments which must come last: 735 self.parameters = extra_parameters + self.parameters 736 except AttributeError: 737 #Should we raise an error? The subclass should have set this up! 738 self.parameters = extra_parameters 739 _Ncbiblast2SeqCommandline.__init__(self, cmd, **kwargs)
740
741 - def _validate(self):
742 incompatibles = {"db_soft_mask":["db_hard_mask", "subject", "subject_loc"], 743 "db_hard_mask":["db_soft_mask", "subject", "subject_loc"]} 744 self._validate_incompatibilities(incompatibles) 745 _Ncbiblast2SeqCommandline._validate(self)
746 747
748 -class NcbiblastpCommandline(_NcbiblastMain2SeqCommandline):
749 """Create a commandline for the NCBI BLAST+ program blastp (for proteins). 750 751 With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI 752 replaced the old blastall tool with separate tools for each of the searches. 753 This wrapper therefore replaces BlastallCommandline with option -p blastp. 754 755 >>> from Bio.Blast.Applications import NcbiblastpCommandline 756 >>> cline = NcbiblastpCommandline(query="rosemary.pro", db="nr", 757 ... evalue=0.001, remote=True, ungapped=True) 758 >>> cline 759 NcbiblastpCommandline(cmd='blastp', query='rosemary.pro', db='nr', evalue=0.001, remote=True, ungapped=True) 760 >>> print cline 761 blastp -query rosemary.pro -db nr -evalue 0.001 -remote -ungapped 762 763 You would typically run the command line with cline() or via the Python 764 subprocess module, as described in the Biopython tutorial. 765 """
766 - def __init__(self, cmd="blastp", **kwargs):
767 self.parameters = [ 768 #General search options: 769 _Option(["-task", "task"], 770 "Task to execute (string, blastp (default) or blastp-short).", 771 checker_function=lambda value : value in ["blastp", 772 "blastp-short"], 773 equate=False), 774 _Option(["-matrix", "matrix"], 775 "Scoring matrix name (default BLOSUM62)."), 776 _Option(["-threshold", "threshold"], 777 "Minimum word score such that the word is added to the " 778 "BLAST lookup table (float)", 779 equate=False), 780 _Option(["-comp_based_stats", "comp_based_stats"], 781 """Use composition-based statistics (string, default 2, i.e. True). 782 783 0, F or f: no composition-based statistics 784 2, T or t, D or d : Composition-based score adjustment as in 785 Bioinformatics 21:902-911, 2005, conditioned on sequence properties 786 787 Note that tblastn also supports values of 1 and 3.""", 788 checker_function=lambda value : value in "0Ft2TtDd", 789 equate=False), 790 #Query filtering options: 791 _Option(["-seg", "seg"], 792 """Filter query sequence with SEG (string). 793 794 Format: "yes", "window locut hicut", or "no" to disable. 795 Default is "12 2.2 2.5""", 796 equate=False), 797 #Extension options: 798 _Switch(["-ungapped", "ungapped"], 799 "Perform ungapped alignment only?"), 800 #Miscellaneous options: 801 _Switch(["-use_sw_tback", "use_sw_tback"], 802 "Compute locally optimal Smith-Waterman alignments?"), 803 ] 804 _NcbiblastMain2SeqCommandline.__init__(self, cmd, **kwargs)
805 806
807 -class NcbiblastnCommandline(_NcbiblastMain2SeqCommandline):
808 """Wrapper for the NCBI BLAST+ program blastn (for nucleotides). 809 810 With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI 811 replaced the old blastall tool with separate tools for each of the searches. 812 This wrapper therefore replaces BlastallCommandline with option -p blastn. 813 814 For example, to run a search against the "nt" nucleotide database using the 815 FASTA nucleotide file "m_code.fasta" as the query, with an expectation value 816 cut off of 0.001, saving the output to a file in XML format: 817 818 >>> from Bio.Blast.Applications import NcbiblastnCommandline 819 >>> cline = NcbiblastnCommandline(query="m_cold.fasta", db="nt", strand="plus", 820 ... evalue=0.001, out="m_cold.xml", outfmt=5) 821 >>> cline 822 NcbiblastnCommandline(cmd='blastn', out='m_cold.xml', outfmt=5, query='m_cold.fasta', db='nt', evalue=0.001, strand='plus') 823 >>> print cline 824 blastn -out m_cold.xml -outfmt 5 -query m_cold.fasta -db nt -evalue 0.001 -strand plus 825 826 You would typically run the command line with cline() or via the Python 827 subprocess module, as described in the Biopython tutorial. 828 """
829 - def __init__(self, cmd="blastn", **kwargs):
830 self.parameters = [ 831 #Input query options: 832 _Option(["-strand", "strand"], 833 """Query strand(s) to search against database/subject. 834 835 Values allowed are "both" (default), "minus", "plus".""", 836 checker_function=lambda value : value in ["both", 837 "minus", 838 "plus"], 839 equate=False), 840 #General search options: 841 _Option(["-task", "task"], 842 """Task to execute (string, default 'megablast') 843 844 Allowed values 'blastn', 'blastn-short', 'dc-megablast', 'megablast' 845 (the default), or 'vecscreen'.""", 846 checker_function=lambda value : value in ['blastn', 847 'blastn-short', 848 'dc-megablast', 849 'megablast', 850 'vecscreen'], 851 equate=False), 852 _Option(["-penalty", "penalty"], 853 "Penalty for a nucleotide mismatch (integer, at most zero).", 854 equate=False), 855 _Option(["-reward", "reward"], 856 "Reward for a nucleotide match (integer, at least zero).", 857 equate=False), 858 #TODO - Does this need an argument or is it a switch? 859 #_Option(["-use_index", "use_index"], 860 # "Use MegaBLAST database index (boolean).", 861 # equate=False), 862 _Option(["-index_name", "index_name"], 863 "MegaBLAST database index name.", 864 equate=False), 865 #Query filtering options: 866 _Option(["-dust", "dust"], 867 """Filter query sequence with DUST (string). 868 869 Format: 'yes', 'level window linker', or 'no' to disable. 870 Default = '20 64 1'. 871 """, 872 equate=False), 873 _Option(["-filtering_db", "filtering_db"], 874 "BLAST database containing filtering elements (i.e. repeats).", 875 equate=False), 876 _Option(["-window_masker_taxid", "window_masker_taxid"], 877 "Enable WindowMasker filtering using a Taxonomic ID (integer).", 878 equate=False), 879 _Option(["-window_masker_db", "window_masker_db"], 880 "Enable WindowMasker filtering using this repeats database (string).", 881 equate=False), 882 #Restrict search or results: 883 _Option(["-perc_identity", "perc_identity"], 884 "Percent identity (real, 0 to 100 inclusive).", 885 equate=False), 886 #Discontiguous MegaBLAST options 887 _Option(["-template_type", "template_type"], 888 """Discontiguous MegaBLAST template type (string). 889 890 Allowed values: 'coding', 'coding_and_optimal' or 'optimal' 891 Requires: template_length.""", 892 checker_function=lambda value : value in ['coding', 'coding_and_optimal','optimal'], 893 equate=False), 894 _Option(["-template_length", "template_length"], 895 """Discontiguous MegaBLAST template length (integer). 896 897 Allowed values: 16, 18, 21 898 899 Requires: template_type.""", 900 checker_function=lambda value : value in [16,18,21,'16','18','21'], 901 equate=False), 902 #Extension options: 903 _Switch(["-no_greedy", "no_greedy"], 904 "Use non-greedy dynamic programming extension"), 905 _Option(["-min_raw_gapped_score", "min_raw_gapped_score"], 906 "Minimum raw gapped score to keep an alignment in the " 907 "preliminary gapped and traceback stages (integer).", 908 equate=False), 909 _Switch(["-ungapped", "ungapped"], 910 "Perform ungapped alignment only?"), 911 _Option(["-off_diagonal_range", "off_diagonal_range"], 912 """Number of off-diagonals to search for the 2nd hit (integer). 913 914 Expects a positive integer, or 0 (default) to turn off. 915 916 Added in BLAST 2.2.23+ 917 """, 918 equate=False), 919 ] 920 _NcbiblastMain2SeqCommandline.__init__(self, cmd, **kwargs)
921
922 - def _validate(self):
923 if (self.template_type and not self.template_length) \ 924 or (self.template_length and not self.template_type) : 925 raise ValueError("Options template_type and template_type require each other.") 926 _NcbiblastMain2SeqCommandline._validate(self)
927 928
929 -class NcbiblastxCommandline(_NcbiblastMain2SeqCommandline):
930 """Wrapper for the NCBI BLAST+ program blastx (nucleotide query, protein database). 931 932 With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI 933 replaced the old blastall tool with separate tools for each of the searches. 934 This wrapper therefore replaces BlastallCommandline with option -p blastx. 935 936 >>> from Bio.Blast.Applications import NcbiblastxCommandline 937 >>> cline = NcbiblastxCommandline(query="m_cold.fasta", db="nr", evalue=0.001) 938 >>> cline 939 NcbiblastxCommandline(cmd='blastx', query='m_cold.fasta', db='nr', evalue=0.001) 940 >>> print cline 941 blastx -query m_cold.fasta -db nr -evalue 0.001 942 943 You would typically run the command line with cline() or via the Python 944 subprocess module, as described in the Biopython tutorial. 945 """
946 - def __init__(self, cmd="blastx", **kwargs):
947 self.parameters = [ 948 #Input query options: 949 _Option(["-strand", "strand"], 950 """Query strand(s) to search against database/subject. 951 952 Values allowed are "both" (default), "minus", "plus".""", 953 checker_function=lambda value : value in ["both", "minus", "plus"], 954 equate=False), 955 #Input query options: 956 _Option(["-query_gencode", "query_gencode"], 957 """Genetic code to use to translate query 958 959 Integer. Default is one.""", 960 equate=False), 961 #General search options: 962 _Option(["-frame_shift_penalty", "frame_shift_penalty"], 963 """Frame shift penalty (integer, at least 1, default ignored) (OBSOLETE). 964 965 This was removed in BLAST 2.2.27+""", 966 equate=False), 967 _Option(["-max_intron_length", "max_intron_length"], 968 """Maximum intron length (integer). 969 970 Length of the largest intron allowed in a translated nucleotide 971 sequence when linking multiple distinct alignments (a negative 972 value disables linking). Default zero.""", 973 equate=False), 974 _Option(["-matrix", "matrix"], 975 "Scoring matrix name (default BLOSUM62).", 976 equate=False), 977 _Option(["-threshold", "threshold"], 978 "Minimum word score such that the word is added to the " 979 "BLAST lookup table (float)", 980 equate=False), 981 _Option(["-comp_based_stats", "comp_based_stats"], 982 """Use composition-based statistics for blastp, blastx, or tblastn: 983 D or d: default (equivalent to 2 ) 984 0 or F or f: no composition-based statistics 985 1: Composition-based statistics as in NAR 29:2994-3005, 2001 986 2 or T or t : Composition-based score adjustment as in Bioinformatics 21:902-911, 2005, conditioned on sequence properties 987 3: Composition-based score adjustment as in Bioinformatics 21:902-911, 2005, unconditionally 988 989 For programs other than tblastn, must either be absent or be D, F or 0 990 Default = `2' 991 """, 992 equate=False), 993 #Query filtering options: 994 _Option(["-seg", "seg"], 995 """Filter query sequence with SEG (string). 996 997 Format: "yes", "window locut hicut", or "no" to disable. 998 Default is "12 2.2 2.5""", 999 equate=False), 1000 #Extension options: 1001 _Switch(["-ungapped", "ungapped"], 1002 "Perform ungapped alignment only?"), 1003 _Switch(["-use_sw_tback", "use_sw_tback"], 1004 "Compute locally optimal Smith-Waterman alignments?"), 1005 ] 1006 _NcbiblastMain2SeqCommandline.__init__(self, cmd, **kwargs)
1007 1008
1009 -class NcbitblastnCommandline(_NcbiblastMain2SeqCommandline):
1010 """Wrapper for the NCBI BLAST+ program tblastn. 1011 1012 With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI 1013 replaced the old blastall tool with separate tools for each of the searches. 1014 This wrapper therefore replaces BlastallCommandline with option -p tblastn. 1015 1016 >>> from Bio.Blast.Applications import NcbitblastnCommandline 1017 >>> cline = NcbitblastnCommandline(help=True) 1018 >>> cline 1019 NcbitblastnCommandline(cmd='tblastn', help=True) 1020 >>> print cline 1021 tblastn -help 1022 1023 You would typically run the command line with cline() or via the Python 1024 subprocess module, as described in the Biopython tutorial. 1025 """
1026 - def __init__(self, cmd="tblastn", **kwargs):
1027 self.parameters = [ 1028 #General search options: 1029 _Option(["-db_gencode", "db_gencode"], 1030 """Genetic code to use to translate query 1031 1032 Integer. Default is one.""", 1033 equate=False), 1034 _Option(["-frame_shift_penalty", "frame_shift_penalty"], 1035 """Frame shift penalty (integer, at least 1, default ignored) (OBSOLETE). 1036 1037 This was removed in BLAST 2.2.27+""", 1038 equate=False), 1039 _Option(["-max_intron_length", "max_intron_length"], 1040 """Maximum intron length (integer). 1041 1042 Length of the largest intron allowed in a translated nucleotide 1043 sequence when linking multiple distinct alignments (a negative 1044 value disables linking). Default zero.""", 1045 equate=False), 1046 _Option(["-matrix", "matrix"], 1047 "Scoring matrix name (default BLOSUM62).", 1048 equate=False), 1049 _Option(["-threshold", "threshold"], 1050 "Minimum word score such that the word is added to the BLAST lookup table (float)", 1051 equate=False), 1052 _Option(["-comp_based_stats", "comp_based_stats"], 1053 """Use composition-based statistics (string, default 2, i.e. True). 1054 1055 0, F or f: no composition-based statistics 1056 1: Composition-based statistics as in NAR 29:2994-3005, 2001 1057 2, T or t, D or d : Composition-based score adjustment as in 1058 Bioinformatics 21:902-911, 2005, conditioned on sequence properties 1059 3: Composition-based score adjustment as in Bioinformatics 21:902-911, 1060 2005, unconditionally 1061 1062 Note that only tblastn supports values of 1 and 3.""", 1063 checker_function=lambda value : value in "0Ft12TtDd3", 1064 equate=False), 1065 #Query filtering options: 1066 _Option(["-seg", "seg"], 1067 """Filter query sequence with SEG (string). 1068 1069 Format: "yes", "window locut hicut", or "no" to disable. 1070 Default is "12 2.2 2.5""", 1071 equate=False), 1072 #Extension options: 1073 _Switch(["-ungapped", "ungapped"], 1074 "Perform ungapped alignment only?"), 1075 #Miscellaneous options: 1076 _Switch(["-use_sw_tback", "use_sw_tback"], 1077 "Compute locally optimal Smith-Waterman alignments?"), 1078 #PSI-TBLASTN options: 1079 _Option(["-in_pssm", "in_pssm"], 1080 """PSI-BLAST checkpoint file 1081 1082 Incompatible with: remote, query""", 1083 filename=True, 1084 equate=False), 1085 ] 1086 _NcbiblastMain2SeqCommandline.__init__(self, cmd, **kwargs)
1087 1088
1089 -class NcbitblastxCommandline(_NcbiblastMain2SeqCommandline):
1090 """Wrapper for the NCBI BLAST+ program tblastx. 1091 1092 With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI 1093 replaced the old blastall tool with separate tools for each of the searches. 1094 This wrapper therefore replaces BlastallCommandline with option -p tblastx. 1095 1096 >>> from Bio.Blast.Applications import NcbitblastxCommandline 1097 >>> cline = NcbitblastxCommandline(help=True) 1098 >>> cline 1099 NcbitblastxCommandline(cmd='tblastx', help=True) 1100 >>> print cline 1101 tblastx -help 1102 1103 You would typically run the command line with cline() or via the Python 1104 subprocess module, as described in the Biopython tutorial. 1105 """
1106 - def __init__(self, cmd="tblastx", **kwargs):
1107 self.parameters = [ 1108 #Input query options: 1109 _Option(["-strand", "strand"], 1110 """Query strand(s) to search against database/subject. 1111 1112 Values allowed are "both" (default), "minus", "plus".""", 1113 checker_function=lambda value : value in ["both", "minus", "plus"], 1114 equate=False), 1115 #Input query options: 1116 _Option(["-query_gencode", "query_gencode"], 1117 """Genetic code to use to translate query 1118 1119 Integer. Default is one.""", 1120 equate=False), 1121 #General search options: 1122 _Option(["-db_gencode", "db_gencode"], 1123 """Genetic code to use to translate query 1124 1125 Integer. Default is one.""", 1126 equate=False), 1127 _Option(["-max_intron_length", "max_intron_length"], 1128 """Maximum intron length (integer). 1129 1130 Length of the largest intron allowed in a translated nucleotide 1131 sequence when linking multiple distinct alignments (a negative 1132 value disables linking). Default zero.""", 1133 equate=False), 1134 _Option(["-matrix", "matrix"], 1135 "Scoring matrix name (default BLOSUM62).", 1136 equate=False), 1137 _Option(["-threshold", "threshold"], 1138 "Minimum word score such that the word is added to the " 1139 "BLAST lookup table (float)", 1140 equate=False), 1141 #Query filtering options: 1142 _Option(["-seg", "seg"], 1143 """Filter query sequence with SEG (string). 1144 1145 Format: "yes", "window locut hicut", or "no" to disable. 1146 Default is "12 2.2 2.5""", 1147 equate=False), 1148 ] 1149 _NcbiblastMain2SeqCommandline.__init__(self, cmd, **kwargs)
1150 1151
1152 -class NcbipsiblastCommandline(_Ncbiblast2SeqCommandline):
1153 """Wrapper for the NCBI BLAST+ program psiblast. 1154 1155 With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI 1156 replaced the old blastpgp tool with a similar tool psiblast. This wrapper 1157 therefore replaces BlastpgpCommandline, the wrapper for blastpgp. 1158 1159 >>> from Bio.Blast.Applications import NcbipsiblastCommandline 1160 >>> cline = NcbipsiblastCommandline(help=True) 1161 >>> cline 1162 NcbipsiblastCommandline(cmd='psiblast', help=True) 1163 >>> print cline 1164 psiblast -help 1165 1166 You would typically run the command line with cline() or via the Python 1167 subprocess module, as described in the Biopython tutorial. 1168 """
1169 - def __init__(self, cmd="psiblast", **kwargs):
1170 self.parameters = [ 1171 #General search options: 1172 _Option(["-matrix", "matrix"], 1173 "Scoring matrix name (default BLOSUM62).", 1174 equate=False), 1175 _Option(["-threshold", "threshold"], 1176 "Minimum word score such that the word is added to the " 1177 "BLAST lookup table (float)", 1178 equate=False), 1179 _Option(["-comp_based_stats", "comp_based_stats"], 1180 """Use composition-based statistics (string, default 2, i.e. True). 1181 1182 0, F or f: no composition-based statistics 1183 2, T or t, D or d : Composition-based score adjustment 1184 as in Bioinformatics 21:902-911, 2005, conditioned on 1185 sequence properties 1186 1187 Note that tblastn also supports values of 1 and 3.""", 1188 checker_function=lambda value : value in "0Ft2TtDd", 1189 equate=False), 1190 #Query filtering options: 1191 _Option(["-seg", "seg"], 1192 """Filter query sequence with SEG (string). 1193 1194 Format: "yes", "window locut hicut", or "no" to disable. 1195 Default is "12 2.2 2.5""", 1196 equate=False), 1197 #Extension options: 1198 _Option(["-gap_trigger", "gap_trigger"], 1199 "Number of bits to trigger gapping (float, default 22)", 1200 equate=False), 1201 #Miscellaneous options: 1202 _Switch(["-use_sw_tback", "use_sw_tback"], 1203 "Compute locally optimal Smith-Waterman alignments?"), 1204 #PSI-BLAST options: 1205 _Option(["-num_iterations", "num_iterations"], 1206 """Number of iterations to perform, integer 1207 1208 Integer of at least one. Default is one. 1209 Incompatible with: remote""", 1210 equate=False), 1211 _Option(["-out_pssm", "out_pssm"], 1212 "File name to store checkpoint file", 1213 filename=True, 1214 equate=False), 1215 _Option(["-out_ascii_pssm", "out_ascii_pssm"], 1216 "File name to store ASCII version of PSSM", 1217 filename=True, 1218 equate=False), 1219 _Option(["-in_msa", "in_msa"], 1220 """File name of multiple sequence alignment to restart 1221 PSI-BLAST 1222 1223 Incompatible with: in_pssm, query""", 1224 filename=True, 1225 equate=False), 1226 _Option(["-msa_master_idx", "msa_master_idx"], 1227 """Index of sequence to use as master in MSA. 1228 1229 Index (1-based) of sequence to use as the master in the 1230 multiple sequence alignment. If not specified, the first 1231 sequence is used.""", 1232 equate=False), 1233 _Option(["-in_pssm", "in_pssm"], 1234 """PSI-BLAST checkpoint file 1235 1236 Incompatible with: in_msa, query, phi_pattern""", 1237 filename=True, 1238 equate=False), 1239 #PSSM engine options: 1240 _Option(["-pseudocount", "pseudocount"], 1241 """Pseudo-count value used when constructing PSSM 1242 1243 Integer. Default is zero.""", 1244 equate=False), 1245 _Option(["-inclusion_ethresh", "inclusion_ethresh"], 1246 """E-value inclusion threshold for pairwise alignments 1247 1248 Float. Default is 0.002.""", 1249 equate=False), 1250 _Switch(["-ignore_msa_master", "ignore_msa_master"], 1251 """Ignore the master sequence when creating PSSM 1252 1253 * Requires: in_msa 1254 * Incompatible with: msa_master_idx, in_pssm, query, 1255 query_loc, phi_pattern 1256 """), 1257 #PHI-BLAST options: 1258 _Option(["-phi_pattern", "phi_pattern"], 1259 """File name containing pattern to search 1260 1261 Incompatible with: in_pssm""", 1262 filename=True, 1263 equate=False), 1264 ] 1265 _Ncbiblast2SeqCommandline.__init__(self, cmd, **kwargs)
1266
1267 - def _validate(self):
1268 incompatibles = {"num_iterations":["remote"], 1269 "in_msa":["in_pssm", "query"], 1270 "in_pssm":["in_msa","query","phi_pattern"], 1271 "ignore_msa_master":["msa_master_idx", "in_pssm", 1272 "query", "query_loc", "phi_pattern"], 1273 } 1274 self._validate_incompatibilities(incompatibles) 1275 _Ncbiblast2SeqCommandline._validate(self)
1276 1277
1278 -class NcbirpsblastCommandline(_NcbiblastCommandline):
1279 """Wrapper for the NCBI BLAST+ program rpsblast. 1280 1281 With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI 1282 replaced the old rpsblast tool with a similar tool of the same name. This 1283 wrapper replaces RpsBlastCommandline, the wrapper for the old rpsblast. 1284 1285 >>> from Bio.Blast.Applications import NcbirpsblastCommandline 1286 >>> cline = NcbirpsblastCommandline(help=True) 1287 >>> cline 1288 NcbirpsblastCommandline(cmd='rpsblast', help=True) 1289 >>> print cline 1290 rpsblast -help 1291 1292 You would typically run the command line with cline() or via the Python 1293 subprocess module, as described in the Biopython tutorial. 1294 """
1295 - def __init__(self, cmd="rpsblast", **kwargs):
1296 self.parameters = [ 1297 #Query filtering options: 1298 _Option(["-seg", "seg"], 1299 """Filter query sequence with SEG (string). 1300 1301 Format: "yes", "window locut hicut", or "no" to disable. 1302 Default is "12 2.2 2.5""", 1303 equate=False), 1304 #Restrict search or results: 1305 _Option(["-culling_limit", "culling_limit"], 1306 """Hit culling limit (integer). 1307 1308 If the query range of a hit is enveloped by that of at 1309 least this many higher-scoring hits, delete the hit. 1310 1311 Incompatible with: best_hit_overhang, best_hit_score_edge. 1312 """, 1313 equate=False), 1314 _Option(["-best_hit_overhang", "best_hit_overhang"], 1315 """Best Hit algorithm overhang value (recommended value: 0.1) 1316 1317 Float between 0.0 and 0.5 inclusive. 1318 1319 Incompatible with: culling_limit.""", 1320 equate=False), 1321 _Option(["-best_hit_score_edge", "best_hit_score_edge"], 1322 """Best Hit algorithm score edge value (recommended value: 0.1) 1323 1324 Float between 0.0 and 0.5 inclusive. 1325 1326 Incompatible with: culling_limit.""", 1327 equate=False), 1328 ] 1329 _NcbiblastCommandline.__init__(self, cmd, **kwargs)
1330
1331 - def _validate(self):
1332 incompatibles = {"culling_limit":["best_hit_overhang","best_hit_score_edge"]} 1333 self._validate_incompatibilities(incompatibles) 1334 _NcbiblastCommandline._validate(self)
1335 1336
1337 -class NcbirpstblastnCommandline(_NcbiblastCommandline):
1338 """Wrapper for the NCBI BLAST+ program rpstblastn. 1339 1340 With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI 1341 replaced the old rpsblast tool with a similar tool of the same name, and a 1342 separate tool rpstblastn for Translated Reverse Position Specific BLAST. 1343 1344 >>> from Bio.Blast.Applications import NcbirpstblastnCommandline 1345 >>> cline = NcbirpstblastnCommandline(help=True) 1346 >>> cline 1347 NcbirpstblastnCommandline(cmd='rpstblastn', help=True) 1348 >>> print cline 1349 rpstblastn -help 1350 1351 You would typically run the command line with cline() or via the Python 1352 subprocess module, as described in the Biopython tutorial. 1353 """
1354 - def __init__(self, cmd="rpstblastn", **kwargs):
1355 self.parameters = [ 1356 #Input query options: 1357 _Option(["-strand", "strand"], 1358 """Query strand(s) to search against database/subject. 1359 1360 Values allowed are "both" (default), "minus", "plus".""", 1361 checker_function=lambda value : value in ["both", 1362 "minus", 1363 "plus"], 1364 equate=False), 1365 #Input query options: 1366 _Option(["-query_gencode", "query_gencode"], 1367 """Genetic code to use to translate query 1368 1369 Integer. Default is one.""", 1370 equate=False), 1371 #Query filtering options: 1372 _Option(["-seg", "seg"], 1373 """Filter query sequence with SEG (string). 1374 1375 Format: "yes", "window locut hicut", or "no" to disable. 1376 Default is "12 2.2 2.5""", 1377 equate=False), 1378 #Extension options: 1379 _Switch(["-ungapped", "ungapped"], 1380 "Perform ungapped alignment only?"), 1381 ] 1382 _NcbiblastCommandline.__init__(self, cmd, **kwargs)
1383 1384
1385 -class NcbiblastformatterCommandline(_NcbibaseblastCommandline):
1386 """Wrapper for the NCBI BLAST+ program blast_formatter. 1387 1388 With the release of BLAST 2.2.24+ (i.e. the BLAST suite rewritten in C++ 1389 instead of C), the NCBI added the ASN.1 output format option to all the 1390 search tools, and extended the blast_formatter to support this as input. 1391 1392 The blast_formatter command allows you to convert the ASN.1 output into 1393 the other output formats (XML, tabular, plain text, HTML). 1394 1395 >>> from Bio.Blast.Applications import NcbiblastformatterCommandline 1396 >>> cline = NcbiblastformatterCommandline(archive="example.asn", outfmt=5, out="example.xml") 1397 >>> cline 1398 NcbiblastformatterCommandline(cmd='blast_formatter', out='example.xml', outfmt=5, archive='example.asn') 1399 >>> print cline 1400 blast_formatter -out example.xml -outfmt 5 -archive example.asn 1401 1402 You would typically run the command line with cline() or via the Python 1403 subprocess module, as described in the Biopython tutorial. 1404 1405 Note that this wrapper is for the version of blast_formatter from BLAST 1406 2.2.24+ (or later) which is when the NCBI first announced the inclusion 1407 this tool. There was actually an early version in BLAST 2.2.23+ (and 1408 possibly in older releases) but this did not have the -archive option 1409 (instead -rid is a mandatory argument), and is not supported by this 1410 wrapper. 1411 """
1412 - def __init__(self, cmd="blast_formatter", **kwargs):
1413 self.parameters = [ 1414 # Input options 1415 _Option(["-rid", "rid"], 1416 "BLAST Request ID (RID), not compatible with archive arg", 1417 equate=False), 1418 _Option(["-archive", "archive"], 1419 "Archive file of results, not compatible with rid arg.", 1420 filename=True, 1421 equate=False), 1422 # Restrict search or results 1423 _Option(["-max_target_seqs", "max_target_seqs"], 1424 "Maximum number of aligned sequences to keep", 1425 checker_function=lambda value: value >= 1, 1426 equate=False), 1427 ] 1428 _NcbibaseblastCommandline.__init__(self, cmd, **kwargs)
1429
1430 - def _validate(self):
1431 incompatibles = {"rid":["archive"]} 1432 self._validate_incompatibilities(incompatibles) 1433 _NcbibaseblastCommandline._validate(self)
1434 1435
1436 -def _test():
1437 """Run the Bio.Blast.Applications module's doctests.""" 1438 import doctest 1439 doctest.testmod(verbose=1)
1440 1441 if __name__ == "__main__": 1442 #Run the doctests 1443 _test() 1444