Package Bio :: Package Blast :: Module Applications
[hide private]
[frames] | no frames]

Source Code for Module Bio.Blast.Applications

   1  # Copyright 2001 Brad Chapman. 
   2  # Revisions copyright 2009-2010 by Peter Cock. 
   3  # Revisions copyright 2010 by Phillip Garland. 
   4  # All rights reserved. 
   5  # This code is part of the Biopython distribution and governed by its 
   6  # license.  Please see the LICENSE file that should have been included 
   7  # as part of this package. 
   8  """Definitions for interacting with BLAST related applications. 
   9   
  10  Obsolete wrappers for the old/classic NCBI BLAST tools (written in C): 
  11   
  12  - FastacmdCommandline 
  13  - BlastallCommandline 
  14  - BlastpgpCommandline 
  15  - RpsBlastCommandline 
  16   
  17  Wrappers for the new NCBI BLAST+ tools (written in C++): 
  18   
  19  - NcbiblastpCommandline - Protein-Protein BLAST 
  20  - NcbiblastnCommandline - Nucleotide-Nucleotide BLAST 
  21  - NcbiblastxCommandline - Translated Query-Protein Subject BLAST 
  22  - NcbitblastnCommandline - Protein Query-Translated Subject BLAST 
  23  - NcbitblastxCommandline - Translated Query-Protein Subject BLAST 
  24  - NcbipsiblastCommandline - Position-Specific Initiated BLAST 
  25  - NcbirpsblastCommandline - Reverse Position Specific BLAST 
  26  - NcbirpstblastnCommandline - Translated Reverse Position Specific BLAST 
  27  - NcbiblastformatterCommandline - Convert ASN.1 to other BLAST output formats 
  28   
  29  For further details, see: 
  30   
  31  Camacho et al. BLAST+: architecture and applications 
  32  BMC Bioinformatics 2009, 10:421 
  33  doi:10.1186/1471-2105-10-421 
  34  """ 
  35  from Bio.Application import _Option, AbstractCommandline, _Switch 
  36   
37 -class FastacmdCommandline(AbstractCommandline):
38 """Create a commandline for the fasta program from NCBI (OBSOLETE). 39 40 """
41 - def __init__(self, cmd="fastacmd", **kwargs):
42 self.parameters = [ 43 _Option(["-d", "database"], 44 "The database to retrieve from.", 45 is_required=True, 46 equate=False), 47 _Option(["-s", "search_string"], 48 "The id to search for.", 49 is_required=True, 50 equate=False) 51 ] 52 AbstractCommandline.__init__(self, cmd, **kwargs)
53 54
55 -class _BlastCommandLine(AbstractCommandline):
56 """Base Commandline object for (classic) NCBI BLAST wrappers (PRIVATE). 57 58 This is provided for subclassing, it deals with shared options 59 common to all the BLAST tools (blastall, rpsblast, blastpgp). 60 """
61 - def __init__(self, cmd=None, **kwargs):
62 assert cmd is not None 63 extra_parameters = [ 64 _Switch(["--help", "help"], 65 "Print USAGE, DESCRIPTION and ARGUMENTS description; ignore other arguments."), 66 _Option(["-d", "database"], 67 "The database to BLAST against.", 68 is_required=True, 69 equate=False), 70 _Option(["-i", "infile"], 71 "The sequence to search with.", 72 filename=True, 73 is_required=True, 74 equate=False), 75 _Option(["-e", "expectation"], 76 "Expectation value cutoff.", 77 equate=False), 78 _Option(["-m", "align_view"], 79 "Alignment view. Integer 0-11. Use 7 for XML output.", 80 equate=False), 81 _Option(["-o", "align_outfile", "outfile"], 82 "Output file for alignment.", 83 filename=True, 84 equate=False), 85 _Option(["-y", "xdrop_extension"], 86 "Dropoff for blast extensions.", 87 equate=False), 88 _Option(["-F", "filter"], 89 "Filter query sequence with SEG? T/F", 90 equate=False), 91 _Option(["-X", "xdrop"], 92 "Dropoff value (bits) for gapped alignments.", 93 equate=False), 94 _Option(["-I", "show_gi"], 95 "Show GI's in deflines? T/F", 96 equate=False), 97 _Option(["-J", "believe_query"], 98 "Believe the query defline? T/F", 99 equate=False), 100 _Option(["-Z", "xdrop_final"], 101 "X dropoff for final gapped alignment.", 102 equate=False), 103 _Option(["-z", "db_length"], 104 "Effective database length.", 105 equate=False), 106 _Option(["-O", "seqalign_file"], 107 "seqalign file to output.", 108 filename=True, 109 equate=False), 110 _Option(["-v", "descriptions"], 111 "Number of one-line descriptions.", 112 equate=False), 113 _Option(["-b", "alignments"], 114 "Number of alignments.", 115 equate=False), 116 _Option(["-Y", "search_length"], 117 "Effective length of search space (use zero for the " 118 "real size).", 119 equate=False), 120 _Option(["-T", "html"], 121 "Produce HTML output? T/F", 122 equate=False), 123 _Option(["-U", "case_filter"], 124 "Use lower case filtering of FASTA sequence? T/F", 125 equate=False), 126 _Option(["-a", "nprocessors"], 127 "Number of processors to use.", 128 equate=False), 129 _Option(["-g", "gapped"], 130 "Whether to do a gapped alignment. T/F", 131 equate=False), 132 ] 133 try: 134 #Insert extra parameters - at the start just in case there 135 #are any arguments which must come last: 136 self.parameters = extra_parameters + self.parameters 137 except AttributeError: 138 #Should we raise an error? The subclass should have set this up! 139 self.parameters = extra_parameters 140 AbstractCommandline.__init__(self, cmd, **kwargs)
141
142 - def _validate(self):
143 if self.help: 144 #Don't want to check the normally mandatory arguments like db 145 return 146 AbstractCommandline._validate(self)
147 148
149 -class _BlastAllOrPgpCommandLine(_BlastCommandLine):
150 """Base Commandline object for NCBI BLAST wrappers (PRIVATE). 151 152 This is provided for subclassing, it deals with shared options 153 common to all the blastall and blastpgp tools (but not rpsblast). 154 """
155 - def __init__(self, cmd=None, **kwargs):
156 assert cmd is not None 157 extra_parameters = [ 158 _Option(["-G", "gap_open"], 159 "Gap open penalty", 160 equate=False), 161 _Option(["-E", "gap_extend"], 162 "Gap extension penalty", 163 equate=False), 164 _Option(["-A", "window_size"], 165 "Multiple hits window size", 166 equate=False), 167 _Option(["-f", "hit_extend"], 168 "Threshold for extending hits.", 169 equate=False), 170 _Option(["-K", "keep_hits"], 171 " Number of best hits from a region to keep.", 172 equate=False), 173 _Option(["-W", "wordsize"], 174 "Word size", 175 equate=False), 176 _Option(["-P", "passes"], 177 "Hits/passes. Integer 0-2. 0 for multiple hit, " 178 "1 for single hit (does not apply to blastn)", 179 equate=False), 180 ] 181 try: 182 #Insert extra parameters - at the start just in case there 183 #are any arguments which must come last: 184 self.parameters = extra_parameters + self.parameters 185 except AttributeError: 186 #Should we raise an error? The subclass should have set this up! 187 self.parameters = extra_parameters 188 _BlastCommandLine.__init__(self, cmd, **kwargs)
189 190
191 -class BlastallCommandline(_BlastAllOrPgpCommandLine):
192 """Create a commandline for the blastall program from NCBI (OBSOLETE). 193 194 With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI 195 are replacing blastall with separate tools blastn, blastp, blastx, tblastn 196 and tblastx. 197 198 Like blastall, this wrapper is now obsolete, and will be deprecated and 199 removed in a future release of Biopython. 200 201 >>> from Bio.Blast.Applications import BlastallCommandline 202 >>> cline = BlastallCommandline(program="blastx", infile="m_cold.fasta", 203 ... database="nr", expectation=0.001) 204 >>> cline 205 BlastallCommandline(cmd='blastall', database='nr', infile='m_cold.fasta', expectation=0.001, program='blastx') 206 >>> print cline 207 blastall -d nr -i m_cold.fasta -e 0.001 -p blastx 208 209 You would typically run the command line with cline() or via the Python 210 subprocess module, as described in the Biopython tutorial. 211 """ 212 #TODO - This could use more checking for valid parameters to the program.
213 - def __init__(self, cmd="blastall",**kwargs):
214 import warnings 215 warnings.warn("Like blastall, this wrapper is now obsolete, and will be deprecated and removed in a future release of Biopython.", PendingDeprecationWarning) 216 self.parameters = [ 217 #Sorted in the same order as the output from blastall --help 218 #which should make it easier to keep them up to date in future. 219 #Note that some arguments are defined the the base clases (above). 220 _Option(["-p", "program"], 221 "The blast program to use (e.g. blastp, blastn).", 222 is_required=True, 223 equate=False), 224 _Option(["-q", "nuc_mismatch"], 225 "Penalty for a nucleotide mismatch (blastn only).", 226 equate=False), 227 _Option(["-r", "nuc_match"], 228 "Reward for a nucleotide match (blastn only).", 229 equate=False), 230 _Option(["-Q", "query_genetic_code"], 231 "Query Genetic code to use.", 232 equate=False), 233 _Option(["-D", "db_genetic_code"], 234 "DB Genetic code (for tblast[nx] only).", 235 equate=False), 236 _Option(["-M", "matrix"], 237 "Matrix to use", 238 equate=False), 239 _Option(["-S", "strands"], 240 "Query strands to search against database (for blast[nx], " 241 "and tblastx). 3 is both, 1 is top, 2 is bottom.", 242 equate=False), 243 _Option(["-l", "restrict_gi"], 244 "Restrict search of database to list of GI's.", 245 equate=False), 246 _Option(["-R", "checkpoint"], 247 "PSI-TBLASTN checkpoint input file.", 248 filename=True, 249 equate=False), 250 _Option(["-n", "megablast"], 251 "MegaBlast search T/F.", 252 equate=False), 253 #The old name "region_length" is for consistency with our 254 #old blastall function wrapper: 255 _Option(["-L", "region_length", "range_restriction"], 256 """Location on query sequence (string format start,end). 257 258 In older versions of BLAST, -L set the length of region 259 used to judge hits (see -K parameter).""", 260 equate=False), 261 _Option(["-w", "frame_shit_penalty"], 262 "Frame shift penalty (OOF algorithm for blastx).", 263 equate=False), 264 _Option(["-t", "largest_intron"], 265 "Length of the largest intron allowed in a translated " 266 "nucleotide sequence when linking multiple distinct " 267 "alignments. (0 invokes default behavior; a negative value " 268 "disables linking.)", 269 equate=False), 270 _Option(["-B", "num_concatenated_queries"], 271 "Number of concatenated queries, for blastn and tblastn.", 272 equate=False), 273 _Option(["-V", "oldengine"], 274 "Force use of the legacy BLAST engine.", 275 equate=False), 276 _Option(["-C", "composition_based"], 277 """Use composition-based statistics for tblastn: 278 D or d: default (equivalent to F) 279 0 or F or f: no composition-based statistics 280 1 or T or t: Composition-based statistics as in NAR 29:2994-3005, 2001 281 2: Composition-based score adjustment as in Bioinformatics 282 21:902-911, 2005, conditioned on sequence properties 283 3: Composition-based score adjustment as in Bioinformatics 284 21:902-911, 2005, unconditionally 285 For programs other than tblastn, must either be absent or be 286 D, F or 0.""", 287 equate=False), 288 _Option(["-s", "smith_waterman"], 289 "Compute locally optimal Smith-Waterman alignments (This " 290 "option is only available for gapped tblastn.) T/F", 291 equate=False), 292 ] 293 _BlastAllOrPgpCommandLine.__init__(self, cmd, **kwargs)
294 295
296 -class BlastpgpCommandline(_BlastAllOrPgpCommandLine):
297 """Create a commandline for the blastpgp program from NCBI (OBSOLETE). 298 299 With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI 300 are replacing blastpgp with a renamed tool psiblast. This module provides 301 NcbipsiblastCommandline as a wrapper for the new tool psiblast. 302 303 Like blastpgp (and blastall), this wrapper is now obsolete, and will be 304 deprecated and removed in a future release of Biopython. 305 306 >>> from Bio.Blast.Applications import BlastpgpCommandline 307 >>> cline = BlastpgpCommandline(help=True) 308 >>> cline 309 BlastpgpCommandline(cmd='blastpgp', help=True) 310 >>> print cline 311 blastpgp --help 312 313 You would typically run the command line with cline() or via the Python 314 subprocess module, as described in the Biopython tutorial. 315 """
316 - def __init__(self, cmd="blastpgp",**kwargs):
317 import warnings 318 warnings.warn("Like blastpgp (and blastall), this wrapper is now obsolete, and will be deprecated and removed in a future release of Biopython.", PendingDeprecationWarning) 319 self.parameters = [ 320 _Option(["-C", "checkpoint_outfile"], 321 "Output file for PSI-BLAST checkpointing.", 322 filename=True, 323 equate=False), 324 _Option(["-R", "restart_infile"], 325 "Input file for PSI-BLAST restart.", 326 filename=True, 327 equate=False), 328 _Option(["-k", "hit_infile"], 329 "Hit file for PHI-BLAST.", 330 filename=True, 331 equate=False), 332 _Option(["-Q", "matrix_outfile"], 333 "Output file for PSI-BLAST matrix in ASCII.", 334 filename=True, 335 equate=False), 336 _Option(["-B", "align_infile"], 337 "Input alignment file for PSI-BLAST restart.", 338 filename=True, 339 equate=False), 340 _Option(["-S", "required_start"], 341 "Start of required region in query.", 342 equate=False), 343 _Option(["-H", "required_end"], 344 "End of required region in query.", 345 equate=False), 346 _Option(["-j", "npasses"], 347 "Number of passes", 348 equate=False), 349 _Option(["-N", "nbits_gapping"], 350 "Number of bits to trigger gapping.", 351 equate=False), 352 _Option(["-c", "pseudocounts"], 353 "Pseudocounts constants for multiple passes.", 354 equate=False), 355 _Option(["-h", "model_threshold"], 356 "E-value threshold to include in multipass model.", 357 equate=False), 358 #Does the old name "region_length" for -L make sense? 359 _Option(["-L", "region_length"], 360 "Cost to decline alignment (disabled when zero).", 361 equate=False), 362 _Option(["-M", "matrix"], 363 "Matrix (string, default BLOSUM62).", 364 equate=False), 365 _Option(["-p", "program"], 366 "The blast program to use (e.g blastpgp, patseedp or seedp).", 367 is_required=True, 368 equate=False), 369 ] 370 _BlastAllOrPgpCommandLine.__init__(self, cmd, **kwargs)
371 372
373 -class RpsBlastCommandline(_BlastCommandLine):
374 """Create a commandline for the classic rpsblast program from NCBI (OBSOLETE). 375 376 With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI 377 are replacing the old rpsblast with a new version of the same name plus a 378 second tool rpstblastn, both taking different command line arguments. This 379 module provides NcbirpsblastCommandline and NcbirpsblasntCommandline as 380 wrappers for the new tools. 381 382 Like the old rpsblast (and blastall), this wrapper is now obsolete, and will 383 be deprecated and removed in a future release of Biopython. 384 385 >>> from Bio.Blast.Applications import RpsBlastCommandline 386 >>> cline = RpsBlastCommandline(help=True) 387 >>> cline 388 RpsBlastCommandline(cmd='rpsblast', help=True) 389 >>> print cline 390 rpsblast --help 391 392 You would typically run the command line with cline() or via the Python 393 subprocess module, as described in the Biopython tutorial. 394 """
395 - def __init__(self, cmd="rpsblast",**kwargs):
396 import warnings 397 warnings.warn("Like the old rpsblast (and blastall), this wrapper is now obsolete, and will be deprecated and removed in a future release of Biopython.", PendingDeprecationWarning) 398 self.parameters = [ 399 #Note -N is also in blastpgp, but not blastall 400 _Option(["-N", "nbits_gapping"], 401 "Number of bits to trigger gapping.", 402 equate=False), 403 #Note blastall and blastpgp wrappers have -P with name "passes". 404 #If this is the same thing, we should be consistent! 405 _Option(["-P", "multihit"], 406 "0 for multiple hit, 1 for single hit", 407 equate=False), 408 _Option(["-l", "logfile"], 409 "Logfile name.", 410 filename=True, 411 equate=False), 412 _Option(["-p", "protein"], 413 "Query sequence is protein. T/F", 414 equate=False), 415 _Option(["-L", "range_restriction"], 416 "Location on query sequence (string format start,end).", 417 equate=False), 418 ] 419 _BlastCommandLine.__init__(self, cmd, **kwargs)
420 421 ############################################################################## 422 # Legacy BLAST wrappers above, (new) BLAST+ wrappers below 423 ############################################################################## 424
425 -class _NcbibaseblastCommandline(AbstractCommandline):
426 """Base Commandline object for (new) NCBI BLAST+ wrappers (PRIVATE). 427 428 This is provided for subclassing, it deals with shared options 429 common to all the BLAST tools (blastn, rpsblast, rpsblast, etc 430 AND blast_formatter). 431 """
432 - def __init__(self, cmd=None, **kwargs):
433 assert cmd is not None 434 extra_parameters = [ 435 #Core: 436 _Switch(["-h", "h"], 437 "Print USAGE and DESCRIPTION; ignore other arguments."), 438 _Switch(["-help", "help"], 439 "Print USAGE, DESCRIPTION and ARGUMENTS description; " 440 "ignore other arguments."), 441 _Switch(["-version", "version"], 442 "Print version number; ignore other arguments."), 443 # Output configuration options 444 _Option(["-out", "out"], 445 "Output file for alignment.", 446 filename=True, 447 equate=False), 448 #Formatting options: 449 _Option(["-outfmt", "outfmt"], 450 "Alignment view. Integer 0-11. Use 5 for XML output " 451 "(differs from classic BLAST which used 7 for XML).", 452 equate=False), 453 #TODO - Document and test the column options 454 _Switch(["-show_gis","show_gis"], 455 "Show NCBI GIs in deflines?"), 456 _Option(["-num_descriptions","num_descriptions"], 457 """Number of database sequences to show one-line descriptions for. 458 459 Integer argument (at least zero). Default is 500. 460 See also num_alignments.""", 461 equate=False), 462 _Option(["-num_alignments","num_alignments"], 463 """Number of database sequences to show num_alignments for. 464 465 Integer argument (at least zero). Default is 200. 466 See also num_alignments.""", 467 equate=False), 468 _Switch(["-html", "html"], 469 "Produce HTML output? See also the outfmt option."), 470 #Miscellaneous options 471 _Switch(["-parse_deflines", "parse_deflines"], 472 "Should the query and subject defline(s) be parsed?"), 473 ] 474 try: 475 #Insert extra parameters - at the start just in case there 476 #are any arguments which must come last: 477 self.parameters = extra_parameters + self.parameters 478 except AttributeError: 479 #Should we raise an error? The subclass should have set this up! 480 self.parameters = extra_parameters 481 AbstractCommandline.__init__(self, cmd, **kwargs)
482
483 - def _validate_incompatibilities(self, incompatibles):
484 """Used by the BLAST+ _validate method (PRIVATE).""" 485 for a in incompatibles: 486 if self._get_parameter(a): 487 for b in incompatibles[a]: 488 if self._get_parameter(b): 489 raise ValueError("Options %s and %s are incompatible." \ 490 % (a,b))
491 492
493 -class _NcbiblastCommandline(_NcbibaseblastCommandline):
494 """Base Commandline object for (new) NCBI BLAST+ wrappers (PRIVATE). 495 496 This is provided for subclassing, it deals with shared options 497 common to all the BLAST tools (blastn, rpsblast, rpsblast, etc). 498 """
499 - def __init__(self, cmd=None, **kwargs):
500 assert cmd is not None 501 extra_parameters = [ 502 #Input query options: 503 _Option(["-query", "query"], 504 "The sequence to search with.", 505 filename=True, 506 equate=False), #Should this be required? 507 _Option(["-query_loc", "query_loc"], 508 "Location on the query sequence (Format: start-stop)", 509 equate=False), 510 #General search options: 511 _Option(["-db", "db"], 512 "The database to BLAST against.", 513 equate=False), 514 _Option(["-evalue", "evalue"], 515 "Expectation value cutoff.", 516 equate=False), 517 _Option(["-word_size","word_size"], 518 """Word size for wordfinder algorithm. 519 520 Integer. Minimum 2.""", 521 equate=False), 522 #BLAST-2-Sequences options: 523 # - see subclass 524 #Formatting options: 525 # - see baseclass 526 #Query filtering options 527 # TODO -soft_masking <Boolean>, is this a switch or an option? 528 #_Switch(["-soft_masking", "soft_masking"], 529 # "Apply filtering locations as soft masks?"), 530 _Switch(["-lcase_masking", "lcase_masking"], 531 "Use lower case filtering in query and subject sequence(s)?"), 532 #Restrict search or results 533 _Option(["-gilist", "gilist"], 534 """Restrict search of database to list of GI's. 535 536 Incompatible with: negative_gilist, seqidlist, remote, subject, subject_loc""", 537 filename=True, 538 equate=False), 539 _Option(["-negative_gilist", "negative_gilist"], 540 """Restrict search of database to everything except the listed GIs. 541 542 Incompatible with: gilist, seqidlist, remote, subject, subject_loc""", 543 filename=True, 544 equate=False), 545 _Option(["-seqidlist", "seqidlist"], 546 """Restrict search of database to list of SeqID's. 547 548 Incompatible with: gilist, negative_gilist, remote, subject, subject_loc""", 549 filename=True, 550 equate=False), 551 _Option(["-entrez_query", "entrez_query"], 552 "Restrict search with the given Entrez query (requires remote).", 553 equate=False), 554 _Option(["-max_target_seqs", "max_target_seqs"], 555 """Maximum number of aligned sequences to keep. 556 557 Integer argument (at least one).""", 558 equate=False), 559 #Statistical options 560 _Option(["-dbsize", "dbsize"], 561 "Effective length of the database (integer)", 562 equate=False), 563 _Option(["-searchsp", "searchsp"], 564 "Effective length of the search space (integer)", 565 equate=False), 566 #Extension options 567 _Option(["-xdrop_ungap", "xdrop_ungap"], 568 "X-dropoff value (in bits) for ungapped extensions. Float.", 569 equate=False), 570 _Option(["-xdrop_gap", "xdrop_gap"], 571 "X-dropoff value (in bits) for preliminary gapped extensions. Float.", 572 equate=False), 573 _Option(["-xdrop_gap_final", "xdrop_gap_final"], 574 "X-dropoff value (in bits) for final gapped alignment. Float.", 575 equate=False), 576 _Option(["-window_size", "window_size"], 577 "Multiple hits window size, use 0 to specify 1-hit algorithm. Integer.", 578 equate=False), 579 # Search strategy options 580 _Option(["-import_search_strategy", "import_search_strategy"], 581 """Search strategy to use. 582 583 Incompatible with: export_search_strategy""", 584 filename=True, 585 equate=False), 586 _Option(["-export_search_strategy", "export_search_strategy"], 587 """File name to record the search strategy used. 588 589 Incompatible with: import_search_strategy""", 590 filename=True, 591 equate=False), 592 #Miscellaneous options 593 _Option(["-num_threads", "num_threads"], 594 """Number of threads to use in the BLAST search. 595 596 Integer of at least one. Default is one. 597 Incompatible with: remote""", 598 equate=False), 599 _Switch(["-remote", "remote"], 600 """Execute search remotely? 601 602 Incompatible with: gilist, negative_gilist, subject_loc, num_threads, ..."""), 603 ] 604 try: 605 #Insert extra parameters - at the start just in case there 606 #are any arguments which must come last: 607 self.parameters = extra_parameters + self.parameters 608 except AttributeError: 609 #Should we raise an error? The subclass should have set this up! 610 self.parameters = extra_parameters 611 _NcbibaseblastCommandline.__init__(self, cmd, **kwargs)
612
613 - def _validate(self):
614 incompatibles = {"remote":["gilist", "negative_gilist", "num_threads"], 615 "import_search_strategy" : ["export_search_strategy"], 616 "gilist":["negative_gilist"], 617 "seqidlist":["gilist", "negative_gilist", "remote"]} 618 self._validate_incompatibilities(incompatibles) 619 if self.entrez_query and not self.remote : 620 raise ValueError("Option entrez_query requires remote option.") 621 AbstractCommandline._validate(self)
622 623
624 -class _Ncbiblast2SeqCommandline(_NcbiblastCommandline):
625 """Base Commandline object for (new) NCBI BLAST+ wrappers (PRIVATE). 626 627 This is provided for subclassing, it deals with shared options 628 common to all the BLAST tools supporting two-sequence BLAST 629 (blastn, psiblast, etc) but not rpsblast or rpstblastn. 630 """
631 - def __init__(self, cmd=None, **kwargs):
632 assert cmd is not None 633 extra_parameters = [ 634 #General search options: 635 _Option(["-gapopen", "gapopen"], 636 "Cost to open a gap (integer).", 637 equate=False), 638 _Option(["-gapextend", "gapextend"], 639 "Cost to extend a gap (integer).", 640 equate=False), 641 #BLAST-2-Sequences options: 642 _Option(["-subject", "subject"], 643 """Subject sequence(s) to search. 644 645 Incompatible with: db, gilist, negative_gilist. 646 See also subject_loc.""", 647 filename=True, 648 equate=False), 649 _Option(["-subject_loc", "subject_loc"], 650 """Location on the subject sequence (Format: start-stop) 651 652 Incompatible with: db, gilist, seqidlist, negative_gilist, 653 db_soft_mask, db_hard_mask, remote. 654 655 See also subject.""", 656 equate=False), 657 #Restrict search or results: 658 _Option(["-culling_limit", "culling_limit"], 659 """Hit culling limit (integer). 660 661 If the query range of a hit is enveloped by that of at 662 least this many higher-scoring hits, delete the hit. 663 664 Incompatible with: best_hit_overhang, best_hit_score_edge. 665 """, 666 equate=False), 667 _Option(["-best_hit_overhang", "best_hit_overhang"], 668 """Best Hit algorithm overhang value (recommended value: 0.1) 669 670 Float between 0.0 and 0.5 inclusive. 671 672 Incompatible with: culling_limit.""", 673 equate=False), 674 _Option(["-best_hit_score_edge", "best_hit_score_edge"], 675 """Best Hit algorithm score edge value (recommended value: 0.1) 676 677 Float between 0.0 and 0.5 inclusive. 678 679 Incompatible with: culling_limit.""", 680 equate=False), 681 ] 682 try: 683 #Insert extra parameters - at the start just in case there 684 #are any arguments which must come last: 685 self.parameters = extra_parameters + self.parameters 686 except AttributeError: 687 #Should we raise an error? The subclass should have set this up! 688 self.parameters = extra_parameters 689 _NcbiblastCommandline.__init__(self, cmd, **kwargs)
690 691
692 - def _validate(self):
693 incompatibles = {"subject_loc":["db", "gilist", "negative_gilist", "seqidlist", "remote"], 694 "culling_limit":["best_hit_overhang","best_hit_score_edge"], 695 "subject":["db", "gilist", "negative_gilist", "seqidlist"]} 696 self._validate_incompatibilities(incompatibles) 697 _NcbiblastCommandline._validate(self)
698 699
700 -class _NcbiblastMain2SeqCommandline(_Ncbiblast2SeqCommandline):
701 """Base Commandline object for (new) NCBI BLAST+ wrappers (PRIVATE). 702 703 This is provided for subclassing, it deals with shared options 704 common to the main BLAST tools blastp, blastn, blastx, tblastx, tblastn 705 but not psiblast, rpsblast or rpstblastn. 706 """
707 - def __init__(self, cmd=None, **kwargs):
708 assert cmd is not None 709 extra_parameters = [ 710 #Restrict search or results: 711 _Option(["-db_soft_mask", "db_soft_mask"], 712 """Filtering algorithm for soft masking (integer). 713 714 Filtering algorithm ID to apply to the BLAST database as soft masking. 715 716 Incompatible with: db_hard_mask, subject, subject_loc""", 717 equate=False), 718 _Option(["-db_hard_mask", "db_hard_mask"], 719 """Filtering algorithm for hard masking (integer). 720 721 Filtering algorithm ID to apply to the BLAST database as hard masking. 722 723 Incompatible with: db_soft_mask, subject, subject_loc""", 724 equate=False), 725 ] 726 try: 727 #Insert extra parameters - at the start just in case there 728 #are any arguments which must come last: 729 self.parameters = extra_parameters + self.parameters 730 except AttributeError: 731 #Should we raise an error? The subclass should have set this up! 732 self.parameters = extra_parameters 733 _Ncbiblast2SeqCommandline.__init__(self, cmd, **kwargs)
734
735 - def _validate(self):
736 incompatibles = {"db_soft_mask":["db_hard_mask", "subject", "subject_loc"], 737 "db_hard_mask":["db_soft_mask", "subject", "subject_loc"]} 738 self._validate_incompatibilities(incompatibles) 739 _Ncbiblast2SeqCommandline._validate(self)
740
741 -class NcbiblastpCommandline(_NcbiblastMain2SeqCommandline):
742 """Create a commandline for the NCBI BLAST+ program blastp (for proteins). 743 744 With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI 745 replaced the old blastall tool with separate tools for each of the searches. 746 This wrapper therefore replaces BlastallCommandline with option -p blastp. 747 748 >>> from Bio.Blast.Applications import NcbiblastpCommandline 749 >>> cline = NcbiblastpCommandline(query="rosemary.pro", db="nr", 750 ... evalue=0.001, remote=True, ungapped=True) 751 >>> cline 752 NcbiblastpCommandline(cmd='blastp', query='rosemary.pro', db='nr', evalue=0.001, remote=True, ungapped=True) 753 >>> print cline 754 blastp -query rosemary.pro -db nr -evalue 0.001 -remote -ungapped 755 756 You would typically run the command line with cline() or via the Python 757 subprocess module, as described in the Biopython tutorial. 758 """
759 - def __init__(self, cmd="blastp", **kwargs):
760 self.parameters = [ 761 #General search options: 762 _Option(["-task", "task"], 763 "Task to execute (string, blastp (default) or blastp-short).", 764 checker_function=lambda value : value in ["blastp", 765 "blastp-short"], 766 equate=False), 767 _Option(["-matrix", "matrix"], 768 "Scoring matrix name (default BLOSUM62)."), 769 _Option(["-threshold", "threshold"], 770 "Minimum word score such that the word is added to the " 771 "BLAST lookup table (float)", 772 equate=False), 773 _Option(["-comp_based_stats", "comp_based_stats"], 774 """Use composition-based statistics (string, default 2, i.e. True). 775 776 0, F or f: no composition-based statistics 777 2, T or t, D or d : Composition-based score adjustment as in 778 Bioinformatics 21:902-911, 2005, conditioned on sequence properties 779 780 Note that tblastn also supports values of 1 and 3.""", 781 checker_function=lambda value : value in "0Ft2TtDd", 782 equate=False), 783 #Query filtering options: 784 _Option(["-seg", "seg"], 785 """Filter query sequence with SEG (string). 786 787 Format: "yes", "window locut hicut", or "no" to disable. 788 Default is "12 2.2 2.5""", 789 equate=False), 790 #Extension options: 791 _Switch(["-ungapped", "ungapped"], 792 "Perform ungapped alignment only?"), 793 #Miscellaneous options: 794 _Switch(["-use_sw_tback", "use_sw_tback"], 795 "Compute locally optimal Smith-Waterman alignments?"), 796 ] 797 _NcbiblastMain2SeqCommandline.__init__(self, cmd, **kwargs)
798 799
800 -class NcbiblastnCommandline(_NcbiblastMain2SeqCommandline):
801 """Wrapper for the NCBI BLAST+ program blastn (for nucleotides). 802 803 With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI 804 replaced the old blastall tool with separate tools for each of the searches. 805 This wrapper therefore replaces BlastallCommandline with option -p blastn. 806 807 For example, to run a search against the "nt" nucleotide database using the 808 FASTA nucleotide file "m_code.fasta" as the query, with an expectation value 809 cut off of 0.001, saving the output to a file in XML format: 810 811 >>> from Bio.Blast.Applications import NcbiblastnCommandline 812 >>> cline = NcbiblastnCommandline(query="m_cold.fasta", db="nt", strand="plus", 813 ... evalue=0.001, out="m_cold.xml", outfmt=5) 814 >>> cline 815 NcbiblastnCommandline(cmd='blastn', out='m_cold.xml', outfmt=5, query='m_cold.fasta', db='nt', evalue=0.001, strand='plus') 816 >>> print cline 817 blastn -out m_cold.xml -outfmt 5 -query m_cold.fasta -db nt -evalue 0.001 -strand plus 818 819 You would typically run the command line with cline() or via the Python 820 subprocess module, as described in the Biopython tutorial. 821 """
822 - def __init__(self, cmd="blastn", **kwargs):
823 self.parameters = [ 824 #Input query options: 825 _Option(["-strand", "strand"], 826 """Query strand(s) to search against database/subject. 827 828 Values allowed are "both" (default), "minus", "plus".""", 829 checker_function=lambda value : value in ["both", 830 "minus", 831 "plus"], 832 equate=False),