Package Bio :: Package Compass
[hide private]
[frames] | no frames]

Source Code for Package Bio.Compass

  1  # Copyright 2004 by James Casbon.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  """ 
  7  Code to deal with COMPASS output, a program for profile/profile comparison. 
  8   
  9  Compass is described in: 
 10   
 11  Sadreyev R, Grishin N. COMPASS: a tool for comparison of multiple protein 
 12  alignments with assessment of statistical significance. J Mol Biol. 2003 Feb 
 13  7;326(1):317-36. 
 14   
 15  Tested with COMPASS 1.24. 
 16   
 17  Functions: 
 18  read          Reads a COMPASS file containing one COMPASS record 
 19  parse         Iterates over records in a COMPASS file. 
 20   
 21  Classes: 
 22  Record        One result of a COMPASS file 
 23  """ 
 24  import re 
 25   
 26   
27 -def read(handle):
28 record = None 29 try: 30 line = next(handle) 31 record = Record() 32 __read_names(record, line) 33 line = next(handle) 34 __read_threshold(record, line) 35 line = next(handle) 36 __read_lengths(record, line) 37 line = next(handle) 38 __read_profilewidth(record, line) 39 line = next(handle) 40 __read_scores(record, line) 41 except StopIteration: 42 if not record: 43 raise ValueError("No record found in handle") 44 else: 45 raise ValueError("Unexpected end of stream.") 46 for line in handle: 47 if not line.strip(): # skip empty lines 48 continue 49 __read_query_alignment(record, line) 50 try: 51 line = next(handle) 52 __read_positive_alignment(record, line) 53 line = next(handle) 54 __read_hit_alignment(record, line) 55 except StopIteration: 56 raise ValueError("Unexpected end of stream.") 57 return record
58 59
60 -def parse(handle):
61 record = None 62 try: 63 line = next(handle) 64 except StopIteration: 65 return 66 while True: 67 try: 68 record = Record() 69 __read_names(record, line) 70 line = next(handle) 71 __read_threshold(record, line) 72 line = next(handle) 73 __read_lengths(record, line) 74 line = next(handle) 75 __read_profilewidth(record, line) 76 line = next(handle) 77 __read_scores(record, line) 78 except StopIteration: 79 raise ValueError("Unexpected end of stream.") 80 for line in handle: 81 if not line.strip(): 82 continue 83 if "Ali1:" in line: 84 yield record 85 break 86 __read_query_alignment(record, line) 87 try: 88 line = next(handle) 89 __read_positive_alignment(record, line) 90 line = next(handle) 91 __read_hit_alignment(record, line) 92 except StopIteration: 93 raise ValueError("Unexpected end of stream.") 94 else: 95 yield record 96 break
97 98
99 -class Record(object):
100 """ 101 Hold information from one compass hit. 102 Ali1 is the query, Ali2 the hit. 103 """ 104
105 - def __init__(self):
106 self.query='' 107 self.hit='' 108 self.gap_threshold=0 109 self.query_length=0 110 self.query_filtered_length=0 111 self.query_nseqs=0 112 self.query_neffseqs=0 113 self.hit_length=0 114 self.hit_filtered_length=0 115 self.hit_nseqs=0 116 self.hit_neffseqs=0 117 self.sw_score=0 118 self.evalue=-1 119 self.query_start=-1 120 self.hit_start=-1 121 self.query_aln='' 122 self.hit_aln='' 123 self.positives=''
124
125 - def query_coverage(self):
126 """Return the length of the query covered in the alignment.""" 127 s = self.query_aln.replace("=", "") 128 return len(s)
129
130 - def hit_coverage(self):
131 """Return the length of the hit covered in the alignment.""" 132 s = self.hit_aln.replace("=", "") 133 return len(s)
134 135 # Everything below is private 136 137 __regex = {"names": re.compile("Ali1:\s+(\S+)\s+Ali2:\s+(\S+)\s+"), 138 "threshold": re.compile("Threshold of effective gap content in columns: (\S+)"), 139 "lengths": re.compile("length1=(\S+)\s+filtered_length1=(\S+)\s+length2=(\S+)\s+filtered_length2=(\S+)"), 140 "profilewidth": re.compile("Nseqs1=(\S+)\s+Neff1=(\S+)\s+Nseqs2=(\S+)\s+Neff2=(\S+)"), 141 "scores": re.compile("Smith-Waterman score = (\S+)\s+Evalue = (\S+)"), 142 "start": re.compile("(\d+)"), 143 "align": re.compile("^.{15}(\S+)"), 144 "positive_alignment": re.compile("^.{15}(.+)"), 145 } 146 147
148 -def __read_names(record, line):
149 """ 150 Ali1: 60456.blo.gz.aln Ali2: allscop//14984.blo.gz.aln 151 ------query----- -------hit------------- 152 """ 153 if not "Ali1:" in line: 154 raise ValueError("Line does not contain 'Ali1:':\n%s" % line) 155 m = __regex["names"].search(line) 156 record.query = m.group(1) 157 record.hit = m.group(2)
158 159
160 -def __read_threshold(record, line):
161 if not line.startswith("Threshold"): 162 raise ValueError("Line does not start with 'Threshold':\n%s" % line) 163 m = __regex["threshold"].search(line) 164 record.gap_threshold = float(m.group(1))
165 166
167 -def __read_lengths(record, line):
168 if not line.startswith("length1="): 169 raise ValueError("Line does not start with 'length1=':\n%s" % line) 170 m = __regex["lengths"].search(line) 171 record.query_length = int(m.group(1)) 172 record.query_filtered_length = float(m.group(2)) 173 record.hit_length = int(m.group(3)) 174 record.hit_filtered_length = float(m.group(4))
175 176
177 -def __read_profilewidth(record, line):
178 if not "Nseqs1" in line: 179 raise ValueError("Line does not contain 'Nseqs1':\n%s" % line) 180 m = __regex["profilewidth"].search(line) 181 record.query_nseqs = int(m.group(1)) 182 record.query_neffseqs = float(m.group(2)) 183 record.hit_nseqs = int(m.group(3)) 184 record.hit_neffseqs = float(m.group(4))
185 186
187 -def __read_scores(record, line):
188 if not line.startswith("Smith-Waterman"): 189 raise ValueError("Line does not start with 'Smith-Waterman':\n%s" % line) 190 m = __regex["scores"].search(line) 191 if m: 192 record.sw_score = int(m.group(1)) 193 record.evalue = float(m.group(2)) 194 else: 195 record.sw_score = 0 196 record.evalue = -1.0
197 198
199 -def __read_query_alignment(record, line):
200 m = __regex["start"].search(line) 201 if m: 202 record.query_start = int(m.group(1)) 203 m = __regex["align"].match(line) 204 assert m is not None, "invalid match" 205 record.query_aln += m.group(1)
206 207
208 -def __read_positive_alignment(record, line):
209 m = __regex["positive_alignment"].match(line) 210 assert m is not None, "invalid match" 211 record.positives += m.group(1)
212 213
214 -def __read_hit_alignment(record, line):
215 m = __regex["start"].search(line) 216 if m: 217 record.hit_start = int(m.group(1)) 218 m = __regex["align"].match(line) 219 assert m is not None, "invalid match" 220 record.hit_aln += m.group(1)
221