Package Bio :: Package Blast :: Module Record
[hide private]
[frames] | no frames]

Source Code for Module Bio.Blast.Record

  1  # Copyright 1999-2000 by Jeffrey Chang.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  """Record classes to hold BLAST output. 
  7   
  8  Classes: 
  9  Blast              Holds all the information from a blast search. 
 10  PSIBlast           Holds all the information from a psi-blast search. 
 11   
 12  Header             Holds information from the header. 
 13  Description        Holds information about one hit description. 
 14  Alignment          Holds information about one alignment hit. 
 15  HSP                Holds information about one HSP. 
 16  MultipleAlignment  Holds information about a multiple alignment. 
 17  DatabaseReport     Holds information from the database report. 
 18  Parameters         Holds information from the parameters. 
 19   
 20  """ 
 21  # XXX finish printable BLAST output 
 22   
 23  from Bio.Seq import Seq 
 24  from Bio.SeqRecord import SeqRecord 
 25  from Bio.Align import MultipleSeqAlignment 
 26   
 27   
28 -class Header(object):
29 """Saves information from a blast header. 30 31 Members: 32 application The name of the BLAST flavor that generated this data. 33 version Version of blast used. 34 date Date this data was generated. 35 reference Reference for blast. 36 37 query Name of query sequence. 38 query_letters Number of letters in the query sequence. (int) 39 40 database Name of the database. 41 database_sequences Number of sequences in the database. (int) 42 database_letters Number of letters in the database. (int) 43 44 """
45 - def __init__(self):
46 self.application = '' 47 self.version = '' 48 self.date = '' 49 self.reference = '' 50 51 self.query = '' 52 self.query_letters = None 53 54 self.database = '' 55 self.database_sequences = None 56 self.database_letters = None
57 58
59 -class Description(object):
60 """Stores information about one hit in the descriptions section. 61 62 Members: 63 title Title of the hit. 64 score Number of bits. (int) 65 bits Bit score. (float) 66 e E value. (float) 67 num_alignments Number of alignments for the same subject. (int) 68 """
69 - def __init__(self):
70 self.title = '' 71 self.score = None 72 self.bits = None 73 self.e = None 74 self.num_alignments = None
75
76 - def __str__(self):
77 return "%-66s %5s %s" % (self.title, self.score, self.e)
78 79
80 -class Alignment(object):
81 """Stores information about one hit in the alignments section. 82 83 Members: 84 title Name. 85 hit_id Hit identifier. (str) 86 hit_def Hit definition. (str) 87 length Length. (int) 88 hsps A list of HSP objects. 89 90 """
91 - def __init__(self):
92 self.title = '' 93 self.hit_id = '' 94 self.hit_def = '' 95 self.length = None 96 self.hsps = []
97
98 - def __str__(self):
99 lines = self.title.split('\n') 100 lines.append("Length = %s\n" % self.length) 101 return '\n '.join(lines)
102 103
104 -class HSP(object):
105 """Stores information about one hsp in an alignment hit. 106 107 Members: 108 score BLAST score of hit. (float) 109 bits Number of bits for that score. (float) 110 expect Expect value. (float) 111 num_alignments Number of alignments for same subject. (int) 112 identities Number of identities (int) if using the XML parser. 113 Tuple of numer of identities/total aligned (int, int) 114 if using the (obsolete) plain text parser. 115 positives Number of positives (int) if using the XML parser. 116 Tuple of numer of positives/total aligned (int, int) 117 if using the (obsolete) plain text parser. 118 gaps Number of gaps (int) if using the XML parser. 119 Tuple of numer of gaps/total aligned (int, int) if 120 using the (obsolete) plain text parser. 121 align_length Length of the alignment. (int) 122 strand Tuple of (query, target) strand. 123 frame Tuple of 1 or 2 frame shifts, depending on the flavor. 124 125 query The query sequence. 126 query_start The start residue for the query sequence. (1-based) 127 query_end The end residue for the query sequence. (1-based) 128 match The match sequence. 129 sbjct The sbjct sequence. 130 sbjct_start The start residue for the sbjct sequence. (1-based) 131 sbjct_end The end residue for the sbjct sequence. (1-based) 132 133 Not all flavors of BLAST return values for every attribute: 134 score expect identities positives strand frame 135 BLASTP X X X X 136 BLASTN X X X X X 137 BLASTX X X X X X 138 TBLASTN X X X X X 139 TBLASTX X X X X X/X 140 141 Note: for BLASTX, the query sequence is shown as a protein sequence, 142 but the numbering is based on the nucleotides. Thus, the numbering 143 is 3x larger than the number of amino acid residues. A similar effect 144 can be seen for the sbjct sequence in TBLASTN, and for both sequences 145 in TBLASTX. 146 147 Also, for negative frames, the sequence numbering starts from 148 query_start and counts down. 149 150 """
151 - def __init__(self):
152 self.score = None 153 self.bits = None 154 self.expect = None 155 self.num_alignments = None 156 self.identities = (None, None) 157 self.positives = (None, None) 158 self.gaps = (None, None) 159 self.align_length = None 160 self.strand = (None, None) 161 self.frame = () 162 163 self.query = '' 164 self.query_start = None 165 self.query_end = None 166 self.match = '' 167 self.sbjct = '' 168 self.sbjct_start = None 169 self.sbjct_end = None
170
171 - def __str__(self):
172 lines = ["Score %i (%i bits), expectation %0.1e, alignment length %i" 173 % (self.score, self.bits, self.expect, self.align_length)] 174 if self.align_length < 50: 175 lines.append("Query:%s %s %s" % (str(self.query_start).rjust(8), 176 str(self.query), 177 str(self.query_end))) 178 lines.append(" %s" 179 % (str(self.match))) 180 lines.append("Sbjct:%s %s %s" % (str(self.sbjct_start).rjust(8), 181 str(self.sbjct), 182 str(self.sbjct_end))) 183 else: 184 lines.append("Query:%s %s...%s %s" 185 % (str(self.query_start).rjust(8), 186 str(self.query)[:45], 187 str(self.query)[-3:], 188 str(self.query_end))) 189 lines.append(" %s...%s" 190 % (str(self.match)[:45], 191 str(self.match)[-3:])) 192 lines.append("Sbjct:%s %s...%s %s" 193 % (str(self.sbjct_start).rjust(8), 194 str(self.sbjct)[:45], 195 str(self.sbjct)[-3:], 196 str(self.sbjct_end))) 197 return "\n".join(lines)
198 199
200 -class MultipleAlignment(object):
201 """Holds information about a multiple alignment. 202 203 Members: 204 alignment A list of tuples (name, start residue, sequence, end residue). 205 206 The start residue is 1-based. It may be blank, if that sequence is 207 not aligned in the multiple alignment. 208 209 """
210 - def __init__(self):
211 self.alignment = []
212
213 - def to_generic(self, alphabet):
214 """Retrieve generic alignment object for the given alignment. 215 216 Instead of the tuples, this returns a MultipleSeqAlignment object 217 from Bio.Align, through which you can manipulate and query 218 the object. 219 220 alphabet is the specified alphabet for the sequences in the code (for 221 example IUPAC.IUPACProtein). 222 223 Thanks to James Casbon for the code. 224 """ 225 #TODO - Switch to new Bio.Align.MultipleSeqAlignment class? 226 seq_parts = [] 227 seq_names = [] 228 parse_number = 0 229 n = 0 230 for name, start, seq, end in self.alignment: 231 if name == 'QUERY': # QUERY is the first in each alignment block 232 parse_number += 1 233 n = 0 234 235 if parse_number == 1: # create on first_parse, append on all others 236 seq_parts.append(seq) 237 seq_names.append(name) 238 else: 239 seq_parts[n] += seq 240 n += 1 241 242 generic = MultipleSeqAlignment([], alphabet) 243 for (name, seq) in zip(seq_names, seq_parts): 244 generic.append(SeqRecord(Seq(seq, alphabet), name)) 245 246 return generic
247 248
249 -class Round(object):
250 """Holds information from a PSI-BLAST round. 251 252 Members: 253 number Round number. (int) 254 reused_seqs Sequences in model, found again. List of Description objects. 255 new_seqs Sequences not found, or below threshold. List of Description. 256 alignments A list of Alignment objects. 257 multiple_alignment A MultipleAlignment object. 258 """
259 - def __init__(self):
260 self.number = None 261 self.reused_seqs = [] 262 self.new_seqs = [] 263 self.alignments = [] 264 self.multiple_alignment = None
265 266
267 -class DatabaseReport(object):
268 """Holds information about a database report. 269 270 Members: 271 database_name List of database names. (can have multiple dbs) 272 num_letters_in_database Number of letters in the database. (int) 273 num_sequences_in_database List of number of sequences in the database. 274 posted_date List of the dates the databases were posted. 275 ka_params A tuple of (lambda, k, h) values. (floats) 276 gapped # XXX this isn't set right! 277 ka_params_gap A tuple of (lambda, k, h) values. (floats) 278 279 """
280 - def __init__(self):
281 self.database_name = [] 282 self.posted_date = [] 283 self.num_letters_in_database = [] 284 self.num_sequences_in_database = [] 285 self.ka_params = (None, None, None) 286 self.gapped = 0 287 self.ka_params_gap = (None, None, None)
288 289
290 -class Parameters(object):
291 """Holds information about the parameters. 292 293 Members: 294 matrix Name of the matrix. 295 gap_penalties Tuple of (open, extend) penalties. (floats) 296 sc_match Match score for nucleotide-nucleotide comparison 297 sc_mismatch Mismatch penalty for nucleotide-nucleotide comparison 298 num_hits Number of hits to the database. (int) 299 num_sequences Number of sequences. (int) 300 num_good_extends Number of extensions. (int) 301 num_seqs_better_e Number of sequences better than e-value. (int) 302 hsps_no_gap Number of HSP's better, without gapping. (int) 303 hsps_prelim_gapped Number of HSP's gapped in prelim test. (int) 304 hsps_prelim_gapped_attemped Number of HSP's attempted in prelim. (int) 305 hsps_gapped Total number of HSP's gapped. (int) 306 query_length Length of the query. (int) 307 query_id Identifier of the query sequence. (str) 308 database_length Number of letters in the database. (int) 309 effective_hsp_length Effective HSP length. (int) 310 effective_query_length Effective length of query. (int) 311 effective_database_length Effective length of database. (int) 312 effective_search_space Effective search space. (int) 313 effective_search_space_used Effective search space used. (int) 314 frameshift Frameshift window. Tuple of (int, float) 315 threshold Threshold. (int) 316 window_size Window size. (int) 317 dropoff_1st_pass Tuple of (score, bits). (int, float) 318 gap_x_dropoff Tuple of (score, bits). (int, float) 319 gap_x_dropoff_final Tuple of (score, bits). (int, float) 320 gap_trigger Tuple of (score, bits). (int, float) 321 blast_cutoff Tuple of (score, bits). (int, float) 322 """
323 - def __init__(self):
324 self.matrix = '' 325 self.gap_penalties = (None, None) 326 self.sc_match = None 327 self.sc_mismatch = None 328 self.num_hits = None 329 self.num_sequences = None 330 self.num_good_extends = None 331 self.num_seqs_better_e = None 332 self.hsps_no_gap = None 333 self.hsps_prelim_gapped = None 334 self.hsps_prelim_gapped_attemped = None 335 self.hsps_gapped = None 336 self.query_id = None 337 self.query_length = None 338 self.database_length = None 339 self.effective_hsp_length = None 340 self.effective_query_length = None 341 self.effective_database_length = None 342 self.effective_search_space = None 343 self.effective_search_space_used = None 344 self.frameshift = (None, None) 345 self.threshold = None 346 self.window_size = None 347 self.dropoff_1st_pass = (None, None) 348 self.gap_x_dropoff = (None, None) 349 self.gap_x_dropoff_final = (None, None) 350 self.gap_trigger = (None, None) 351 self.blast_cutoff = (None, None)
352 353 354 #TODO - Add a friendly __str__ method to BLAST results
355 -class Blast(Header, DatabaseReport, Parameters):
356 """Saves the results from a blast search. 357 358 Members: 359 descriptions A list of Description objects. 360 alignments A list of Alignment objects. 361 multiple_alignment A MultipleAlignment object. 362 + members inherited from base classes 363 364 """
365 - def __init__(self):
366 Header.__init__(self) 367 DatabaseReport.__init__(self) 368 Parameters.__init__(self) 369 self.descriptions = [] 370 self.alignments = [] 371 self.multiple_alignment = None
372 373
374 -class PSIBlast(Header, DatabaseReport, Parameters):
375 """Saves the results from a blastpgp search. 376 377 Members: 378 rounds A list of Round objects. 379 converged Whether the search converged. 380 + members inherited from base classes 381 382 """
383 - def __init__(self):
384 Header.__init__(self) 385 DatabaseReport.__init__(self) 386 Parameters.__init__(self) 387 self.rounds = [] 388 self.converged = 0
389