Package Bio :: Package Motif :: Module _Motif
[hide private]
[frames] | no frames]

Source Code for Module Bio.Motif._Motif

  1  # Copyright 2003-2009 by Bartek Wilczynski.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5  """Implementation of sequence motifs (PRIVATE). 
  6  """ 
  7   
  8  from __future__ import print_function 
  9   
 10  from Bio._py3k import range 
 11   
 12  from Bio.Seq import Seq 
 13  from Bio.SubsMat import FreqTable 
 14  from Bio.Alphabet import IUPAC 
 15  import math 
 16  import random 
 17   
 18   
 19  __docformat__ = "restructuredtext en" 
 20   
 21   
22 -class Motif(object):
23 """ 24 A class representing sequence motifs. 25 """
26 - def __init__(self, alphabet=IUPAC.unambiguous_dna):
27 self.instances = [] 28 self.has_instances = False 29 self.counts = {} 30 self.has_counts = False 31 self.mask = [] 32 self._pwm_is_current = False 33 self._pwm = [] 34 self._log_odds_is_current = False 35 self._log_odds = [] 36 self.alphabet = alphabet 37 self.length = None 38 self.background = dict((n, 1.0 / len(self.alphabet.letters)) 39 for n in self.alphabet.letters) 40 self.beta = 1.0 41 self.info = None 42 self.name = ""
43
44 - def _check_length(self, len):
45 # TODO - Change parameter name (len clashes with built in function)? 46 if self.length is None: 47 self.length = len 48 elif self.length != len: 49 raise ValueError("You can't change the length of the motif " 50 "%r %r %r" % (self.length, self.instances, len))
51
52 - def _check_alphabet(self, alphabet):
53 if self.alphabet is None: 54 self.alphabet = alphabet 55 elif self.alphabet != alphabet: 56 raise ValueError("Wrong Alphabet")
57
58 - def add_instance(self, instance):
59 """ 60 adds new instance to the motif 61 """ 62 self._check_alphabet(instance.alphabet) 63 self._check_length(len(instance)) 64 if self.has_counts: 65 for i in range(self.length): 66 let = instance[i] 67 self.counts[let][i] += 1 68 69 if self.has_instances or not self.has_counts: 70 self.instances.append(instance) 71 self.has_instances = True 72 73 self._pwm_is_current = False 74 self._log_odds_is_current = False
75
76 - def set_mask(self, mask):
77 """ 78 sets the mask for the motif 79 80 The mask should be a string containing asterisks in the position of significant columns and spaces in other columns 81 """ 82 self._check_length(len(mask)) 83 self.mask = [] 84 for char in mask: 85 if char == "*": 86 self.mask.append(1) 87 elif char == " ": 88 self.mask.append(0) 89 else: 90 raise ValueError("Mask should contain only '*' or ' ' and not a '%s'" % char)
91
92 - def pwm(self, laplace=True):
93 """ 94 returns the PWM computed for the set of instances 95 96 if laplace=True (default), pseudocounts equal to self.background multiplied by self.beta are added to all positions. 97 """ 98 if self._pwm_is_current: 99 return self._pwm 100 # we need to compute new pwm 101 self._pwm = [] 102 for i in range(self.length): 103 dict = {} 104 # filling the dict with 0's 105 for letter in self.alphabet.letters: 106 if laplace: 107 dict[letter] = self.beta * self.background[letter] 108 else: 109 dict[letter] = 0.0 110 if self.has_counts: 111 # taking the raw counts 112 for letter in self.alphabet.letters: 113 dict[letter] += self.counts[letter][i] 114 elif self.has_instances: 115 # counting the occurences of letters in instances 116 for seq in self.instances: 117 # dict[seq[i]]=dict[seq[i]]+1 118 try: 119 dict[seq[i]] += 1 120 except KeyError: # we need to ignore non-alphabet letters 121 pass 122 self._pwm.append(FreqTable.FreqTable(dict, FreqTable.COUNT, self.alphabet)) 123 self._pwm_is_current = 1 124 return self._pwm
125
126 - def log_odds(self, laplace=True):
127 """ 128 returns the logg odds matrix computed for the set of instances 129 """ 130 131 if self._log_odds_is_current: 132 return self._log_odds 133 # we need to compute new pwm 134 self._log_odds = [] 135 pwm = self.pwm(laplace) 136 for i in range(self.length): 137 d = {} 138 for a in self.alphabet.letters: 139 d[a] = math.log(pwm[i][a] / self.background[a], 2) 140 self._log_odds.append(d) 141 self._log_odds_is_current = 1 142 return self._log_odds
143
144 - def ic(self):
145 """Method returning the information content of a motif. 146 """ 147 res = 0 148 pwm = self.pwm() 149 for i in range(self.length): 150 res += 2 151 for a in self.alphabet.letters: 152 if pwm[i][a] != 0: 153 res += pwm[i][a] * math.log(pwm[i][a], 2) 154 return res
155
156 - def exp_score(self, st_dev=False):
157 """ 158 Computes expected score of motif's instance and its standard deviation 159 """ 160 exs = 0.0 161 var = 0.0 162 pwm = self.pwm() 163 for i in range(self.length): 164 ex1 = 0.0 165 ex2 = 0.0 166 for a in self.alphabet.letters: 167 if pwm[i][a] != 0: 168 ex1 += pwm[i][a] * (math.log(pwm[i][a], 2) - math.log(self.background[a], 2)) 169 ex2 += pwm[i][a] * (math.log(pwm[i][a], 2) - math.log(self.background[a], 2)) ** 2 170 exs += ex1 171 var += ex2 - ex1 ** 2 172 if st_dev: 173 return exs, math.sqrt(var) 174 else: 175 return exs
176
177 - def search_instances(self, sequence):
178 """ 179 a generator function, returning found positions of instances of the motif in a given sequence 180 """ 181 if not self.has_instances: 182 raise ValueError("This motif has no instances") 183 for pos in range(0, len(sequence) - self.length + 1): 184 for instance in self.instances: 185 if str(instance) == str(sequence[pos:pos + self.length]): 186 yield (pos, instance) 187 break # no other instance will fit (we don't want to return multiple hits)
188
189 - def score_hit(self, sequence, position, normalized=0, masked=0):
190 """ 191 give the pwm score for a given position 192 """ 193 lo = self.log_odds() 194 score = 0.0 195 for pos in range(self.length): 196 a = sequence[position + pos] 197 if not masked or self.mask[pos]: 198 try: 199 score += lo[pos][a] 200 except: 201 pass 202 if normalized: 203 if not masked: 204 score /= self.length 205 else: 206 score /= len([x for x in self.mask if x]) 207 return score
208
209 - def search_pwm(self, sequence, normalized=0, masked=0, threshold=0.0, both=True):
210 """ 211 a generator function, returning found hits in a given sequence with the pwm score higher than the threshold 212 """ 213 if both: 214 rc = self.reverse_complement() 215 216 sequence = str(sequence).upper() 217 for pos in range(0, len(sequence) - self.length + 1): 218 score = self.score_hit(sequence, pos, normalized, masked) 219 if score > threshold: 220 yield (pos, score) 221 if both: 222 rev_score = rc.score_hit(sequence, pos, normalized, masked) 223 if rev_score > threshold: 224 yield (-pos, rev_score)
225
226 - def dist_pearson(self, motif, masked=0):
227 """ 228 return the similarity score based on pearson correlation for the given motif against self. 229 230 We use the Pearson's correlation of the respective probabilities. 231 """ 232 233 if self.alphabet != motif.alphabet: 234 raise ValueError("Cannot compare motifs with different alphabets") 235 236 max_p = -2 237 for offset in range(-self.length + 1, motif.length): 238 if offset < 0: 239 p = self.dist_pearson_at(motif, -offset) 240 else: # offset>=0 241 p = motif.dist_pearson_at(self, offset) 242 243 if max_p < p: 244 max_p = p 245 max_o = -offset 246 return 1 - max_p, max_o
247
248 - def dist_pearson_at(self, motif, offset):
249 sxx = 0 # \sum x^2 250 sxy = 0 # \sum x \cdot y 251 sx = 0 # \sum x 252 sy = 0 # \sum y 253 syy = 0 # \sum x^2 254 norm = max(self.length, offset + motif.length) 255 256 for pos in range(max(self.length, offset + motif.length)): 257 for l in self.alphabet.letters: 258 xi = self[pos][l] 259 yi = motif[pos - offset][l] 260 sx = sx + xi 261 sy = sy + yi 262 sxx = sxx + xi * xi 263 syy = syy + yi * yi 264 sxy = sxy + xi * yi 265 266 norm *= len(self.alphabet.letters) 267 s1 = (sxy - sx * sy * 1.0 / norm) 268 s2 = (norm * sxx - sx * sx * 1.0) * (norm * syy - sy * sy * 1.0) 269 return s1 / math.sqrt(s2)
270
271 - def dist_product(self, other):
272 """ 273 A similarity measure taking into account a product probability of generating overlaping instances of two motifs 274 """ 275 max_p = 0.0 276 for offset in range(-self.length + 1, other.length): 277 if offset < 0: 278 p = self.dist_product_at(other, -offset) 279 else: # offset>=0 280 p = other.dist_product_at(self, offset) 281 if max_p < p: 282 max_p = p 283 max_o = -offset 284 return 1 - max_p / self.dist_product_at(self, 0), max_o
285
286 - def dist_product_at(self, other, offset):
287 s = 0 288 for i in range(max(self.length, offset + other.length)): 289 f1 = self[i] 290 f2 = other[i - offset] 291 for n, b in self.background.items(): 292 s += b * f1[n] * f2[n] 293 return s / i
294
295 - def dist_dpq(self, other):
296 r"""Calculates the DPQ distance measure between motifs. 297 298 It is calculated as a maximal value of DPQ formula (shown using LaTeX 299 markup, familiar to mathematicians):: 300 301 \sqrt{\sum_{i=1}^{alignment.len()} \sum_{k=1}^alphabet.len() \ 302 \{ m1[i].freq(alphabet[k])*log_2(m1[i].freq(alphabet[k])/m2[i].freq(alphabet[k])) + 303 m2[i].freq(alphabet[k])*log_2(m2[i].freq(alphabet[k])/m1[i].freq(alphabet[k])) 304 } 305 306 over possible non-spaced alignemts of two motifs. See this reference: 307 308 D. M Endres and J. E Schindelin, "A new metric for probability 309 distributions", IEEE transactions on Information Theory 49, no. 7 310 (July 2003): 1858-1860. 311 """ 312 313 min_d = float("inf") 314 min_o = -1 315 d_s = [] 316 for offset in range(-self.length + 1, other.length): 317 # print("%2.3d"%offset) 318 if offset < 0: 319 d = self.dist_dpq_at(other, -offset) 320 overlap = self.length + offset 321 else: # offset>=0 322 d = other.dist_dpq_at(self, offset) 323 overlap = other.length - offset 324 overlap = min(self.length, other.length, overlap) 325 out = self.length + other.length - 2 * overlap 326 # print("%f %f %f" % (d,1.0*(overlap+out)/overlap,d*(overlap+out)/overlap)) 327 # d = d/(2*overlap) 328 d = (d / (out + overlap)) * (2 * overlap + out) / (2 * overlap) 329 # print(d) 330 d_s.append((offset, d)) 331 if min_d > d: 332 min_d = d 333 min_o = -offset 334 return min_d, min_o # ,d_s
335
336 - def dist_dpq_at(self, other, offset):
337 """ 338 calculates the dist_dpq measure with a given offset. 339 340 offset should satisfy 0<=offset<=len(self) 341 """ 342 def dpq(f1, f2, alpha): 343 s = 0 344 for n in alpha.letters: 345 avg = (f1[n] + f2[n]) / 2 346 s += f1[n] * math.log(f1[n] / avg, 2) + f2[n] * math.log(f2[n] / avg, 2) 347 return math.sqrt(s)
348 349 s = 0 350 for i in range(max(self.length, offset + other.length)): 351 f1 = self[i] 352 f2 = other[i - offset] 353 s += dpq(f1, f2, self.alphabet) 354 return s
355
356 - def _read(self, stream):
357 """Reads the motif from the stream (in AlignAce format). 358 359 the self.alphabet variable must be set beforehand. 360 If the last line contains asterisks it is used for setting mask 361 """ 362 363 while True: 364 ln = stream.readline() 365 if "*" in ln: 366 self.set_mask(ln.strip("\n\c")) 367 break 368 self.add_instance(Seq(ln.strip(), self.alphabet))
369
370 - def __str__(self, masked=False):
371 """ string representation of a motif. 372 """ 373 str = "".join(str(inst) + "\n" for inst in self.instances) 374 375 if masked: 376 for i in range(self.length): 377 if self.mask[i]: 378 str += "*" 379 else: 380 str += " " 381 str += "\n" 382 return str
383
384 - def __len__(self):
385 """return the length of a motif 386 387 Please use this method (i.e. invoke len(m)) instead of refering to the m.length directly. 388 """ 389 if self.length is None: 390 return 0 391 else: 392 return self.length
393
394 - def _write(self, stream):
395 """ 396 writes the motif to the stream 397 """ 398 399 stream.write(self.__str__())
400
401 - def _to_fasta(self):
402 """ 403 FASTA representation of motif 404 """ 405 if not self.has_instances: 406 self.make_instances_from_counts() 407 return "".join(">instance%d\n%s\n" % (i, inst) for i, inst in enumerate(self.instances))
408
409 - def reverse_complement(self):
410 """ 411 Gives the reverse complement of the motif 412 """ 413 res = Motif() 414 if self.has_instances: 415 for i in self.instances: 416 res.add_instance(i.reverse_complement()) 417 else: # has counts 418 res.has_counts = True 419 res.counts["A"] = self.counts["T"][:] 420 res.counts["T"] = self.counts["A"][:] 421 res.counts["G"] = self.counts["C"][:] 422 res.counts["C"] = self.counts["G"][:] 423 res.counts["A"].reverse() 424 res.counts["C"].reverse() 425 res.counts["G"].reverse() 426 res.counts["T"].reverse() 427 res.length = self.length 428 res.mask = self.mask 429 return res
430
431 - def _from_jaspar_pfm(self, stream, make_instances=False):
432 """ 433 reads the motif from Jaspar .pfm file 434 435 The instances are fake, but the pwm is accurate. 436 """ 437 return self._from_horiz_matrix(stream, letters="ACGT", make_instances=make_instances)
438
439 - def _from_vert_matrix(self, stream, letters=None, make_instances=False):
440 """reads a vertical count matrix from stream and fill in the counts. 441 """ 442 443 self.counts = {} 444 self.has_counts = True 445 if letters is None: 446 letters = self.alphabet.letters 447 self.length = 0 448 for i in letters: 449 self.counts[i] = [] 450 for ln in stream.readlines(): 451 rec = [float(x) for x in ln.strip().split()] 452 for k, v in zip(letters, rec): 453 self.counts[k].append(v) 454 self.length += 1 455 self.set_mask("*" * self.length) 456 if make_instances is True: 457 self.make_instances_from_counts() 458 return self
459
460 - def _from_horiz_matrix(self, stream, letters=None, make_instances=False):
461 """reads a horizontal count matrix from stream and fill in the counts. 462 """ 463 if letters is None: 464 letters = self.alphabet.letters 465 self.counts = {} 466 self.has_counts = True 467 468 for i in letters: 469 ln = stream.readline().strip().split() 470 # if there is a letter in the beginning, ignore it 471 if ln[0] == i: 472 ln = ln[1:] 473 # print(ln) 474 try: 475 self.counts[i] = [int(x) for x in ln] 476 except ValueError: # not integers 477 self.counts[i] = [float(x) for x in ln] 478 # print(counts[i]) 479 480 s = sum(self.counts[nuc][0] for nuc in letters) 481 l = len(self.counts[letters[0]]) 482 self.length = l 483 self.set_mask("*" * l) 484 if make_instances is True: 485 self.make_instances_from_counts() 486 return self
487
488 - def make_instances_from_counts(self):
489 """Creates "fake" instances for a motif created from a count matrix. 490 491 In case the sums of counts are different for different columnes, the 492 shorter columns are padded with background. 493 """ 494 alpha = "".join(self.alphabet.letters) 495 # col[i] is a column taken from aligned motif instances 496 col = [] 497 self.has_instances = True 498 self.instances = [] 499 s = sum(self.counts[nuc][0] for nuc in self.alphabet.letters) 500 for i in range(self.length): 501 col.append("") 502 for n in self.alphabet.letters: 503 col[i] = col[i] + n * (self.counts[n][i]) 504 if len(col[i]) < s: 505 print("WARNING, column too short %i %i" % (len(col[i]), s)) 506 col[i] += (alpha * s)[:(s - len(col[i]))] 507 # print("column %i, %s" % (i, col[i])) 508 # iterate over instances 509 for i in range(s): 510 inst = "" # start with empty seq 511 for j in range(self.length): # iterate over positions 512 inst += col[j][i] 513 # print("%i %s" % (i,inst) 514 inst = Seq(inst, self.alphabet) 515 self.add_instance(inst) 516 return self.instances
517
518 - def make_counts_from_instances(self):
519 """Creates the count matrix for a motif with instances. 520 521 """ 522 # make strings for "columns" of motifs 523 # col[i] is a column taken from aligned motif instances 524 counts = {} 525 for a in self.alphabet.letters: 526 counts[a] = [] 527 self.has_counts = True 528 s = len(self.instances) 529 for i in range(self.length): 530 ci = dict((a, 0) for a in self.alphabet.letters) 531 for inst in self.instances: 532 ci[inst[i]] += 1 533 for a in self.alphabet.letters: 534 counts[a].append(ci[a]) 535 self.counts = counts 536 return counts
537
538 - def _from_jaspar_sites(self, stream):
539 """ 540 reads the motif from Jaspar .sites file 541 542 The instances and pwm are OK. 543 """ 544 while True: 545 ln = stream.readline() # read the header "$>...." 546 if ln == "" or ln[0] != ">": 547 break 548 549 ln = stream.readline().strip() # read the actual sequence 550 i = 0 551 while ln[i] == ln[i].lower(): 552 i += 1 553 inst = "" 554 while i < len(ln) and ln[i] == ln[i].upper(): 555 inst += ln[i] 556 i += 1 557 inst = Seq(inst, self.alphabet) 558 self.add_instance(inst) 559 560 self.set_mask("*" * len(inst)) 561 return self
562
563 - def __getitem__(self, index):
564 """Returns the probability distribution over symbols at a given position, padding with background. 565 566 If the requested index is out of bounds, the returned distribution comes from background. 567 """ 568 if index in range(self.length): 569 return self.pwm()[index] 570 else: 571 return self.background
572
573 - def consensus(self):
574 """Returns the consensus sequence of a motif. 575 """ 576 res = "" 577 for i in range(self.length): 578 max_f = 0 579 max_n = "X" 580 for n in sorted(self[i]): 581 if self[i][n] > max_f: 582 max_f = self[i][n] 583 max_n = n 584 res += max_n 585 return Seq(res, self.alphabet)
586
587 - def anticonsensus(self):
588 """returns the least probable pattern to be generated from this motif. 589 """ 590 res = "" 591 for i in range(self.length): 592 min_f = 10.0 593 min_n = "X" 594 for n in sorted(self[i]): 595 if self[i][n] < min_f: 596 min_f = self[i][n] 597 min_n = n 598 res += min_n 599 return Seq(res, self.alphabet)
600
601 - def max_score(self):
602 """Maximal possible score for this motif. 603 604 returns the score computed for the consensus sequence. 605 """ 606 return self.score_hit(self.consensus(), 0)
607
608 - def min_score(self):
609 """Minimal possible score for this motif. 610 611 returns the score computed for the anticonsensus sequence. 612 """ 613 return self.score_hit(self.anticonsensus(), 0)
614
615 - def weblogo(self, fname, format="PNG", **kwds):
616 """ 617 uses the Berkeley weblogo service to download and save a weblogo of itself 618 619 requires an internet connection. 620 The parameters from ``**kwds`` are passed directly to the weblogo server. 621 """ 622 from Bio._py3k import urlopen, urlencode, Request 623 624 al = self._to_fasta() 625 url = 'http://weblogo.berkeley.edu/logo.cgi' 626 values = {'sequence': al, 627 'format': format, 628 'logowidth': '18', 629 'logoheight': '5', 630 'logounits': 'cm', 631 'kind': 'AUTO', 632 'firstnum': "1", 633 'command': 'Create Logo', 634 'smallsamplecorrection': "on", 635 'symbolsperline': 32, 636 'res': '96', 637 'res_units': 'ppi', 638 'antialias': 'on', 639 'title': '', 640 'barbits': '', 641 'xaxis': 'on', 642 'xaxis_label': '', 643 'yaxis': 'on', 644 'yaxis_label': '', 645 'showends': 'on', 646 'shrink': '0.5', 647 'fineprint': 'on', 648 'ticbits': '1', 649 'colorscheme': 'DEFAULT', 650 'color1': 'green', 651 'color2': 'blue', 652 'color3': 'red', 653 'color4': 'black', 654 'color5': 'purple', 655 'color6': 'orange', 656 'color1': 'black', 657 } 658 for k, v in kwds.items(): 659 values[k] = str(v) 660 661 data = urlencode(values) 662 req = Request(url, data) 663 response = urlopen(req) 664 with open(fname, "w") as f: 665 im = response.read() 666 f.write(im)
667
668 - def _to_transfac(self):
669 """Write the representation of a motif in TRANSFAC format 670 """ 671 res = "XX\nTY Motif\n" # header 672 try: 673 res += "ID %s\n" % self.name 674 except: 675 pass 676 res += "BF undef\nP0" 677 for a in self.alphabet.letters: 678 res += " %s" % a 679 res += "\n" 680 if not self.has_counts: 681 self.make_counts_from_instances() 682 for i in range(self.length): 683 if i < 9: 684 res += "0%d" % (i + 1) 685 else: 686 res += "%d" % (i + 1) 687 for a in self.alphabet.letters: 688 res += " %d" % self.counts[a][i] 689 res += "\n" 690 res += "XX\n" 691 return res
692
693 - def _to_vertical_matrix(self, letters=None):
694 """Return string representation of the motif as a matrix. 695 """ 696 if letters is None: 697 letters = self.alphabet.letters 698 self._pwm_is_current = False 699 pwm = self.pwm(laplace=False) 700 res = "" 701 for i in range(self.length): 702 res += "\t".join(str(pwm[i][a]) for a in letters) 703 res += "\n" 704 return res
705
706 - def _to_horizontal_matrix(self, letters=None, normalized=True):
707 """Return string representation of the motif as a matrix. 708 """ 709 if letters is None: 710 letters = self.alphabet.letters 711 res = "" 712 if normalized: # output PWM 713 self._pwm_is_current = False 714 mat = self.pwm(laplace=False) 715 for a in letters: 716 res += "\t".join(str(mat[i][a]) for i in range(self.length)) 717 res += "\n" 718 else: # output counts 719 if not self.has_counts: 720 self.make_counts_from_instances() 721 mat = self.counts 722 for a in letters: 723 res += "\t".join(str(mat[a][i]) for i in range(self.length)) 724 res += "\n" 725 return res
726
727 - def _to_jaspar_pfm(self):
728 """Returns the pfm representation of the motif 729 """ 730 return self._to_horizontal_matrix(normalized=False, letters="ACGT")
731
732 - def format(self, format):
733 """Returns a string representation of the Motif in a given format 734 735 Currently supported fromats: 736 - jaspar-pfm : JASPAR Position Frequency Matrix 737 - transfac : TRANSFAC like files 738 - fasta : FASTA file with instances 739 """ 740 741 formatters = { 742 "jaspar-pfm": self._to_jaspar_pfm, 743 "transfac": self._to_transfac, 744 "fasta": self._to_fasta, 745 } 746 747 try: 748 return formatters[format]() 749 except KeyError: 750 raise ValueError("Wrong format type")
751
752 - def scanPWM(self, seq):
753 """Matrix of log-odds scores for a nucleotide sequence. 754 755 scans a nucleotide sequence and returns the matrix of log-odds 756 scores for all positions. 757 758 - the result is a one-dimensional list or numpy array 759 - the sequence can only be a DNA sequence 760 - the search is performed only on one strand 761 """ 762 # TODO - Code itself tolerates ambiguous bases (as NaN). 763 if not isinstance(self.alphabet, IUPAC.IUPACUnambiguousDNA): 764 raise ValueError("PSSM has wrong alphabet: %s - Use only with DNA motifs" 765 % self.alphabet) 766 if not isinstance(seq.alphabet, IUPAC.IUPACUnambiguousDNA): 767 raise ValueError("Sequence has wrong alphabet: %r - Use only with DNA sequences" 768 % sequence.alphabet) 769 770 seq = str(seq) 771 772 # check if the fast C code can be used 773 try: 774 import _pwm 775 except ImportError: 776 # use the slower Python code otherwise 777 return self._pwm_calculate(seq) 778 779 # get the log-odds matrix into a proper shape 780 # (each row contains sorted (ACGT) log-odds values) 781 logodds = [[y[1] for y in sorted(x.items())] for x in self.log_odds()] 782 return _pwm.calculate(seq, logodds)
783
784 - def _pwm_calculate(self, sequence):
785 logodds = self.log_odds() 786 m = len(logodds) 787 s = len(sequence) 788 n = s - m + 1 789 result = [None] * n 790 for i in range(n): 791 score = 0.0 792 for j in range(m): 793 c = sequence[i + j] 794 temp = logodds[j].get(c) 795 if temp is None: 796 break 797 score += temp 798 else: 799 result[i] = score 800 return result
801