Package Bio :: Package Motif :: Module _Motif
[hide private]
[frames] | no frames]

Source Code for Module Bio.Motif._Motif

  1  # Copyright 2003-2009 by Bartek Wilczynski.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5  """Implementation of sequence motifs (PRIVATE). 
  6  """ 
  7   
  8  from __future__ import print_function 
  9   
 10  from Bio._py3k import range 
 11   
 12  from Bio.Seq import Seq 
 13  from Bio.SubsMat import FreqTable 
 14  from Bio.Alphabet import IUPAC 
 15  import math 
 16  import random 
 17   
 18   
 19  __docformat__ = "restructuredtext en" 
 20   
21 -class Motif(object):
22 """ 23 A class representing sequence motifs. 24 """
25 - def __init__(self, alphabet=IUPAC.unambiguous_dna):
26 self.instances = [] 27 self.has_instances=False 28 self.counts = {} 29 self.has_counts=False 30 self.mask = [] 31 self._pwm_is_current = False 32 self._pwm = [] 33 self._log_odds_is_current = False 34 self._log_odds = [] 35 self.alphabet=alphabet 36 self.length=None 37 self.background=dict((n, 1.0/len(self.alphabet.letters)) 38 for n in self.alphabet.letters) 39 self.beta=1.0 40 self.info=None 41 self.name=""
42
43 - def _check_length(self, len):
44 # TODO - Change parameter name (len clashes with built in function)? 45 if self.length is None: 46 self.length = len 47 elif self.length != len: 48 raise ValueError("You can't change the length of the motif " 49 "%r %r %r" % (self.length, self.instances, len))
50
51 - def _check_alphabet(self, alphabet):
52 if self.alphabet is None: 53 self.alphabet=alphabet 54 elif self.alphabet != alphabet: 55 raise ValueError("Wrong Alphabet")
56
57 - def add_instance(self, instance):
58 """ 59 adds new instance to the motif 60 """ 61 self._check_alphabet(instance.alphabet) 62 self._check_length(len(instance)) 63 if self.has_counts: 64 for i in range(self.length): 65 let=instance[i] 66 self.counts[let][i]+=1 67 68 if self.has_instances or not self.has_counts: 69 self.instances.append(instance) 70 self.has_instances=True 71 72 self._pwm_is_current = False 73 self._log_odds_is_current = False
74
75 - def set_mask(self, mask):
76 """ 77 sets the mask for the motif 78 79 The mask should be a string containing asterisks in the position of significant columns and spaces in other columns 80 """ 81 self._check_length(len(mask)) 82 self.mask=[] 83 for char in mask: 84 if char=="*": 85 self.mask.append(1) 86 elif char==" ": 87 self.mask.append(0) 88 else: 89 raise ValueError("Mask should contain only '*' or ' ' and not a '%s'"%char)
90
91 - def pwm(self, laplace=True):
92 """ 93 returns the PWM computed for the set of instances 94 95 if laplace=True (default), pseudocounts equal to self.background multiplied by self.beta are added to all positions. 96 """ 97 if self._pwm_is_current: 98 return self._pwm 99 # we need to compute new pwm 100 self._pwm = [] 101 for i in range(self.length): 102 dict = {} 103 # filling the dict with 0's 104 for letter in self.alphabet.letters: 105 if laplace: 106 dict[letter]=self.beta*self.background[letter] 107 else: 108 dict[letter]=0.0 109 if self.has_counts: 110 # taking the raw counts 111 for letter in self.alphabet.letters: 112 dict[letter]+=self.counts[letter][i] 113 elif self.has_instances: 114 # counting the occurences of letters in instances 115 for seq in self.instances: 116 # dict[seq[i]]=dict[seq[i]]+1 117 try: 118 dict[seq[i]]+=1 119 except KeyError: # we need to ignore non-alphabet letters 120 pass 121 self._pwm.append(FreqTable.FreqTable(dict, FreqTable.COUNT, self.alphabet)) 122 self._pwm_is_current=1 123 return self._pwm
124
125 - def log_odds(self, laplace=True):
126 """ 127 returns the logg odds matrix computed for the set of instances 128 """ 129 130 if self._log_odds_is_current: 131 return self._log_odds 132 # we need to compute new pwm 133 self._log_odds = [] 134 pwm=self.pwm(laplace) 135 for i in range(self.length): 136 d = {} 137 for a in self.alphabet.letters: 138 d[a]=math.log(pwm[i][a]/self.background[a], 2) 139 self._log_odds.append(d) 140 self._log_odds_is_current=1 141 return self._log_odds
142
143 - def ic(self):
144 """Method returning the information content of a motif. 145 """ 146 res=0 147 pwm=self.pwm() 148 for i in range(self.length): 149 res+=2 150 for a in self.alphabet.letters: 151 if pwm[i][a]!=0: 152 res+=pwm[i][a]*math.log(pwm[i][a], 2) 153 return res
154
155 - def exp_score(self, st_dev=False):
156 """ 157 Computes expected score of motif's instance and its standard deviation 158 """ 159 exs=0.0 160 var=0.0 161 pwm=self.pwm() 162 for i in range(self.length): 163 ex1=0.0 164 ex2=0.0 165 for a in self.alphabet.letters: 166 if pwm[i][a]!=0: 167 ex1+=pwm[i][a]*(math.log(pwm[i][a], 2)-math.log(self.background[a], 2)) 168 ex2+=pwm[i][a]*(math.log(pwm[i][a], 2)-math.log(self.background[a], 2))**2 169 exs+=ex1 170 var+=ex2-ex1**2 171 if st_dev: 172 return exs, math.sqrt(var) 173 else: 174 return exs
175
176 - def search_instances(self, sequence):
177 """ 178 a generator function, returning found positions of instances of the motif in a given sequence 179 """ 180 if not self.has_instances: 181 raise ValueError("This motif has no instances") 182 for pos in range(0, len(sequence) - self.length + 1): 183 for instance in self.instances: 184 if str(instance) == str(sequence[pos:pos + self.length]): 185 yield (pos, instance) 186 break # no other instance will fit (we don't want to return multiple hits)
187
188 - def score_hit(self, sequence, position, normalized=0, masked=0):
189 """ 190 give the pwm score for a given position 191 """ 192 lo=self.log_odds() 193 score = 0.0 194 for pos in range(self.length): 195 a = sequence[position+pos] 196 if not masked or self.mask[pos]: 197 try: 198 score += lo[pos][a] 199 except: 200 pass 201 if normalized: 202 if not masked: 203 score/=self.length 204 else: 205 score/=len([x for x in self.mask if x]) 206 return score
207
208 - def search_pwm(self, sequence, normalized=0, masked=0, threshold=0.0, both=True):
209 """ 210 a generator function, returning found hits in a given sequence with the pwm score higher than the threshold 211 """ 212 if both: 213 rc = self.reverse_complement() 214 215 sequence = str(sequence).upper() 216 for pos in range(0, len(sequence) - self.length + 1): 217 score = self.score_hit(sequence, pos, normalized, masked) 218 if score > threshold: 219 yield (pos, score) 220 if both: 221 rev_score = rc.score_hit(sequence, pos, normalized, masked) 222 if rev_score > threshold: 223 yield (-pos, rev_score)
224
225 - def dist_pearson(self, motif, masked=0):
226 """ 227 return the similarity score based on pearson correlation for the given motif against self. 228 229 We use the Pearson's correlation of the respective probabilities. 230 """ 231 232 if self.alphabet != motif.alphabet: 233 raise ValueError("Cannot compare motifs with different alphabets") 234 235 max_p=-2 236 for offset in range(-self.length+1, motif.length): 237 if offset<0: 238 p = self.dist_pearson_at(motif, -offset) 239 else: # offset>=0 240 p = motif.dist_pearson_at(self, offset) 241 242 if max_p<p: 243 max_p=p 244 max_o=-offset 245 return 1-max_p, max_o
246
247 - def dist_pearson_at(self, motif, offset):
248 sxx = 0 # \sum x^2 249 sxy = 0 # \sum x \cdot y 250 sx = 0 # \sum x 251 sy = 0 # \sum y 252 syy = 0 # \sum x^2 253 norm=max(self.length, offset+motif.length) 254 255 for pos in range(max(self.length, offset+motif.length)): 256 for l in self.alphabet.letters: 257 xi = self[pos][l] 258 yi = motif[pos-offset][l] 259 sx = sx + xi 260 sy = sy + yi 261 sxx = sxx + xi * xi 262 syy = syy + yi * yi 263 sxy = sxy + xi * yi 264 265 norm *= len(self.alphabet.letters) 266 s1 = (sxy - sx*sy*1.0/norm) 267 s2 = (norm*sxx - sx*sx*1.0)*(norm*syy- sy*sy*1.0) 268 return s1/math.sqrt(s2)
269
270 - def dist_product(self, other):
271 """ 272 A similarity measure taking into account a product probability of generating overlaping instances of two motifs 273 """ 274 max_p=0.0 275 for offset in range(-self.length+1, other.length): 276 if offset<0: 277 p = self.dist_product_at(other, -offset) 278 else: # offset>=0 279 p = other.dist_product_at(self, offset) 280 if max_p<p: 281 max_p=p 282 max_o=-offset 283 return 1-max_p/self.dist_product_at(self, 0), max_o
284
285 - def dist_product_at(self, other, offset):
286 s=0 287 for i in range(max(self.length, offset+other.length)): 288 f1=self[i] 289 f2=other[i-offset] 290 for n, b in self.background.items(): 291 s+=b*f1[n]*f2[n] 292 return s/i
293
294 - def dist_dpq(self, other):
295 r"""Calculates the DPQ distance measure between motifs. 296 297 It is calculated as a maximal value of DPQ formula (shown using LaTeX 298 markup, familiar to mathematicians):: 299 300 \sqrt{\sum_{i=1}^{alignment.len()} \sum_{k=1}^alphabet.len() \ 301 \{ m1[i].freq(alphabet[k])*log_2(m1[i].freq(alphabet[k])/m2[i].freq(alphabet[k])) + 302 m2[i].freq(alphabet[k])*log_2(m2[i].freq(alphabet[k])/m1[i].freq(alphabet[k])) 303 } 304 305 over possible non-spaced alignemts of two motifs. See this reference: 306 307 D. M Endres and J. E Schindelin, "A new metric for probability 308 distributions", IEEE transactions on Information Theory 49, no. 7 309 (July 2003): 1858-1860. 310 """ 311 312 min_d=float("inf") 313 min_o=-1 314 d_s=[] 315 for offset in range(-self.length+1, other.length): 316 # print("%2.3d"%offset) 317 if offset<0: 318 d = self.dist_dpq_at(other, -offset) 319 overlap = self.length+offset 320 else: # offset>=0 321 d = other.dist_dpq_at(self, offset) 322 overlap = other.length-offset 323 overlap = min(self.length, other.length, overlap) 324 out = self.length+other.length - 2*overlap 325 # print("%f %f %f" % (d,1.0*(overlap+out)/overlap,d*(overlap+out)/overlap)) 326 # d = d/(2*overlap) 327 d = (d/(out+overlap))*(2*overlap+out)/(2*overlap) 328 # print(d) 329 d_s.append((offset, d)) 330 if min_d > d: 331 min_d = d 332 min_o = -offset 333 return min_d, min_o # ,d_s
334
335 - def dist_dpq_at(self, other, offset):
336 """ 337 calculates the dist_dpq measure with a given offset. 338 339 offset should satisfy 0<=offset<=len(self) 340 """ 341 def dpq(f1, f2, alpha): 342 s=0 343 for n in alpha.letters: 344 avg=(f1[n]+f2[n])/2 345 s+=f1[n]*math.log(f1[n]/avg, 2)+f2[n]*math.log(f2[n]/avg, 2) 346 return math.sqrt(s)
347 348 s=0 349 for i in range(max(self.length, offset+other.length)): 350 f1=self[i] 351 f2=other[i-offset] 352 s+=dpq(f1, f2, self.alphabet) 353 return s
354
355 - def _read(self, stream):
356 """Reads the motif from the stream (in AlignAce format). 357 358 the self.alphabet variable must be set beforehand. 359 If the last line contains asterisks it is used for setting mask 360 """ 361 362 while True: 363 ln = stream.readline() 364 if "*" in ln: 365 self.set_mask(ln.strip("\n\c")) 366 break 367 self.add_instance(Seq(ln.strip(), self.alphabet))
368
369 - def __str__(self, masked=False):
370 """ string representation of a motif. 371 """ 372 str = "".join(str(inst) + "\n" for inst in self.instances) 373 374 if masked: 375 for i in range(self.length): 376 if self.mask[i]: 377 str += "*" 378 else: 379 str += " " 380 str += "\n" 381 return str
382
383 - def __len__(self):
384 """return the length of a motif 385 386 Please use this method (i.e. invoke len(m)) instead of refering to the m.length directly. 387 """ 388 if self.length is None: 389 return 0 390 else: 391 return self.length
392
393 - def _write(self, stream):
394 """ 395 writes the motif to the stream 396 """ 397 398 stream.write(self.__str__())
399
400 - def _to_fasta(self):
401 """ 402 FASTA representation of motif 403 """ 404 if not self.has_instances: 405 self.make_instances_from_counts() 406 return "".join(">instance%d\n%s\n" % (i, inst) for i, inst in enumerate(self.instances))
407
408 - def reverse_complement(self):
409 """ 410 Gives the reverse complement of the motif 411 """ 412 res = Motif() 413 if self.has_instances: 414 for i in self.instances: 415 res.add_instance(i.reverse_complement()) 416 else: # has counts 417 res.has_counts=True 418 res.counts["A"]=self.counts["T"][:] 419 res.counts["T"]=self.counts["A"][:] 420 res.counts["G"]=self.counts["C"][:] 421 res.counts["C"]=self.counts["G"][:] 422 res.counts["A"].reverse() 423 res.counts["C"].reverse() 424 res.counts["G"].reverse() 425 res.counts["T"].reverse() 426 res.length=self.length 427 res.mask = self.mask 428 return res
429
430 - def _from_jaspar_pfm(self, stream, make_instances=False):
431 """ 432 reads the motif from Jaspar .pfm file 433 434 The instances are fake, but the pwm is accurate. 435 """ 436 return self._from_horiz_matrix(stream, letters="ACGT", make_instances=make_instances)
437
438 - def _from_vert_matrix(self, stream, letters=None, make_instances=False):
439 """reads a vertical count matrix from stream and fill in the counts. 440 """ 441 442 self.counts = {} 443 self.has_counts = True 444 if letters is None: 445 letters = self.alphabet.letters 446 self.length = 0 447 for i in letters: 448 self.counts[i] = [] 449 for ln in stream.readlines(): 450 rec = [float(x) for x in ln.strip().split()] 451 for k, v in zip(letters, rec): 452 self.counts[k].append(v) 453 self.length += 1 454 self.set_mask("*"*self.length) 455 if make_instances is True: 456 self.make_instances_from_counts() 457 return self
458
459 - def _from_horiz_matrix(self, stream, letters=None, make_instances=False):
460 """reads a horizontal count matrix from stream and fill in the counts. 461 """ 462 if letters is None: 463 letters=self.alphabet.letters 464 self.counts = {} 465 self.has_counts=True 466 467 for i in letters: 468 ln = stream.readline().strip().split() 469 # if there is a letter in the beginning, ignore it 470 if ln[0] == i: 471 ln = ln[1:] 472 # print(ln) 473 try: 474 self.counts[i] = [int(x) for x in ln] 475 except ValueError: # not integers 476 self.counts[i] = [float(x) for x in ln] 477 # print(counts[i]) 478 479 s = sum(self.counts[nuc][0] for nuc in letters) 480 l = len(self.counts[letters[0]]) 481 self.length=l 482 self.set_mask("*"*l) 483 if make_instances is True: 484 self.make_instances_from_counts() 485 return self
486
487 - def make_instances_from_counts(self):
488 """Creates "fake" instances for a motif created from a count matrix. 489 490 In case the sums of counts are different for different columnes, the 491 shorter columns are padded with background. 492 """ 493 alpha = "".join(self.alphabet.letters) 494 # col[i] is a column taken from aligned motif instances 495 col = [] 496 self.has_instances = True 497 self.instances = [] 498 s = sum(self.counts[nuc][0] for nuc in self.alphabet.letters) 499 for i in range(self.length): 500 col.append("") 501 for n in self.alphabet.letters: 502 col[i] = col[i] + n*(self.counts[n][i]) 503 if len(col[i]) < s: 504 print("WARNING, column too short %i %i" % (len(col[i]), s)) 505 col[i] += (alpha*s)[:(s-len(col[i]))] 506 # print("column %i, %s" % (i, col[i])) 507 # iterate over instances 508 for i in range(s): 509 inst = "" # start with empty seq 510 for j in range(self.length): # iterate over positions 511 inst += col[j][i] 512 # print("%i %s" % (i,inst) 513 inst = Seq(inst, self.alphabet) 514 self.add_instance(inst) 515 return self.instances
516
517 - def make_counts_from_instances(self):
518 """Creates the count matrix for a motif with instances. 519 520 """ 521 # make strings for "columns" of motifs 522 # col[i] is a column taken from aligned motif instances 523 counts={} 524 for a in self.alphabet.letters: 525 counts[a]=[] 526 self.has_counts=True 527 s = len(self.instances) 528 for i in range(self.length): 529 ci = dict((a, 0) for a in self.alphabet.letters) 530 for inst in self.instances: 531 ci[inst[i]]+=1 532 for a in self.alphabet.letters: 533 counts[a].append(ci[a]) 534 self.counts=counts 535 return counts
536
537 - def _from_jaspar_sites(self, stream):
538 """ 539 reads the motif from Jaspar .sites file 540 541 The instances and pwm are OK. 542 """ 543 while True: 544 ln = stream.readline() # read the header "$>...." 545 if ln=="" or ln[0]!=">": 546 break 547 548 ln=stream.readline().strip() # read the actual sequence 549 i=0 550 while ln[i]==ln[i].lower(): 551 i+=1 552 inst="" 553 while i<len(ln) and ln[i]==ln[i].upper(): 554 inst+=ln[i] 555 i+=1 556 inst=Seq(inst, self.alphabet) 557 self.add_instance(inst) 558 559 self.set_mask("*"*len(inst)) 560 return self
561
562 - def __getitem__(self, index):
563 """Returns the probability distribution over symbols at a given position, padding with background. 564 565 If the requested index is out of bounds, the returned distribution comes from background. 566 """ 567 if index in range(self.length): 568 return self.pwm()[index] 569 else: 570 return self.background
571
572 - def consensus(self):
573 """Returns the consensus sequence of a motif. 574 """ 575 res="" 576 for i in range(self.length): 577 max_f=0 578 max_n="X" 579 for n in sorted(self[i]): 580 if self[i][n]>max_f: 581 max_f=self[i][n] 582 max_n=n 583 res+=max_n 584 return Seq(res, self.alphabet)
585
586 - def anticonsensus(self):
587 """returns the least probable pattern to be generated from this motif. 588 """ 589 res="" 590 for i in range(self.length): 591 min_f=10.0 592 min_n="X" 593 for n in sorted(self[i]): 594 if self[i][n]<min_f: 595 min_f=self[i][n] 596 min_n=n 597 res+=min_n 598 return Seq(res, self.alphabet)
599
600 - def max_score(self):
601 """Maximal possible score for this motif. 602 603 returns the score computed for the consensus sequence. 604 """ 605 return self.score_hit(self.consensus(), 0)
606
607 - def min_score(self):
608 """Minimal possible score for this motif. 609 610 returns the score computed for the anticonsensus sequence. 611 """ 612 return self.score_hit(self.anticonsensus(), 0)
613
614 - def weblogo(self, fname, format="PNG", **kwds):
615 """ 616 uses the Berkeley weblogo service to download and save a weblogo of itself 617 618 requires an internet connection. 619 The parameters from ``**kwds`` are passed directly to the weblogo server. 620 """ 621 from Bio._py3k import urlopen, urlencode, Request 622 623 al= self._to_fasta() 624 url = 'http://weblogo.berkeley.edu/logo.cgi' 625 values = {'sequence': al, 626 'format': format, 627 'logowidth': '18', 628 'logoheight': '5', 629 'logounits': 'cm', 630 'kind': 'AUTO', 631 'firstnum': "1", 632 'command': 'Create Logo', 633 'smallsamplecorrection': "on", 634 'symbolsperline': 32, 635 'res': '96', 636 'res_units': 'ppi', 637 'antialias': 'on', 638 'title': '', 639 'barbits': '', 640 'xaxis': 'on', 641 'xaxis_label': '', 642 'yaxis': 'on', 643 'yaxis_label': '', 644 'showends': 'on', 645 'shrink': '0.5', 646 'fineprint': 'on', 647 'ticbits': '1', 648 'colorscheme': 'DEFAULT', 649 'color1': 'green', 650 'color2': 'blue', 651 'color3': 'red', 652 'color4': 'black', 653 'color5': 'purple', 654 'color6': 'orange', 655 'color1': 'black', 656 } 657 for k, v in kwds.items(): 658 values[k]=str(v) 659 660 data = urlencode(values) 661 req = Request(url, data) 662 response = urlopen(req) 663 with open(fname, "w") as f: 664 im=response.read() 665 f.write(im)
666
667 - def _to_transfac(self):
668 """Write the representation of a motif in TRANSFAC format 669 """ 670 res="XX\nTY Motif\n" # header 671 try: 672 res+="ID %s\n"%self.name 673 except: 674 pass 675 res+="BF undef\nP0" 676 for a in self.alphabet.letters: 677 res+=" %s"%a 678 res+="\n" 679 if not self.has_counts: 680 self.make_counts_from_instances() 681 for i in range(self.length): 682 if i<9: 683 res+="0%d"%(i+1) 684 else: 685 res+="%d"%(i+1) 686 for a in self.alphabet.letters: 687 res+=" %d"%self.counts[a][i] 688 res+="\n" 689 res+="XX\n" 690 return res
691
692 - def _to_vertical_matrix(self, letters=None):
693 """Return string representation of the motif as a matrix. 694 """ 695 if letters is None: 696 letters = self.alphabet.letters 697 self._pwm_is_current=False 698 pwm = self.pwm(laplace=False) 699 res = "" 700 for i in range(self.length): 701 res += "\t".join(str(pwm[i][a]) for a in letters) 702 res += "\n" 703 return res
704
705 - def _to_horizontal_matrix(self, letters=None, normalized=True):
706 """Return string representation of the motif as a matrix. 707 """ 708 if letters is None: 709 letters = self.alphabet.letters 710 res = "" 711 if normalized: # output PWM 712 self._pwm_is_current=False 713 mat=self.pwm(laplace=False) 714 for a in letters: 715 res += "\t".join(str(mat[i][a]) for i in range(self.length)) 716 res += "\n" 717 else: # output counts 718 if not self.has_counts: 719 self.make_counts_from_instances() 720 mat = self.counts 721 for a in letters: 722 res += "\t".join(str(mat[a][i]) for i in range(self.length)) 723 res += "\n" 724 return res
725
726 - def _to_jaspar_pfm(self):
727 """Returns the pfm representation of the motif 728 """ 729 return self._to_horizontal_matrix(normalized=False, letters="ACGT")
730
731 - def format(self, format):
732 """Returns a string representation of the Motif in a given format 733 734 Currently supported fromats: 735 - jaspar-pfm : JASPAR Position Frequency Matrix 736 - transfac : TRANSFAC like files 737 - fasta : FASTA file with instances 738 """ 739 740 formatters={ 741 "jaspar-pfm": self._to_jaspar_pfm, 742 "transfac": self._to_transfac, 743 "fasta": self._to_fasta, 744 } 745 746 try: 747 return formatters[format]() 748 except KeyError: 749 raise ValueError("Wrong format type")
750
751 - def scanPWM(self, seq):
752 """Matrix of log-odds scores for a nucleotide sequence. 753 754 scans a nucleotide sequence and returns the matrix of log-odds 755 scores for all positions. 756 757 - the result is a one-dimensional list or numpy array 758 - the sequence can only be a DNA sequence 759 - the search is performed only on one strand 760 """ 761 # TODO - Code itself tolerates ambiguous bases (as NaN). 762 if not isinstance(self.alphabet, IUPAC.IUPACUnambiguousDNA): 763 raise ValueError("PSSM has wrong alphabet: %s - Use only with DNA motifs" 764 % self.alphabet) 765 if not isinstance(seq.alphabet, IUPAC.IUPACUnambiguousDNA): 766 raise ValueError("Sequence has wrong alphabet: %r - Use only with DNA sequences" 767 % sequence.alphabet) 768 769 seq = str(seq) 770 771 # check if the fast C code can be used 772 try: 773 import _pwm 774 except ImportError: 775 # use the slower Python code otherwise 776 return self._pwm_calculate(seq) 777 778 # get the log-odds matrix into a proper shape 779 # (each row contains sorted (ACGT) log-odds values) 780 logodds=[[y[1] for y in sorted(x.items())] for x in self.log_odds()] 781 return _pwm.calculate(seq, logodds)
782
783 - def _pwm_calculate(self, sequence):
784 logodds = self.log_odds() 785 m = len(logodds) 786 s = len(sequence) 787 n = s - m + 1 788 result = [None] * n 789 for i in range(n): 790 score = 0.0 791 for j in range(m): 792 c = sequence[i+j] 793 temp = logodds[j].get(c) 794 if temp is None: 795 break 796 score += temp 797 else: 798 result[i] = score 799 return result
800