Package Bio :: Package NeuralNetwork :: Package Gene :: Module Pattern
[hide private]
[frames] | no frames]

Source Code for Module Bio.NeuralNetwork.Gene.Pattern

  1  # This code is part of the Biopython distribution and governed by its 
  2  # license.  Please see the LICENSE file that should have been included 
  3  # as part of this package. 
  4  # 
  5   
  6  """Generic functionality useful for all gene representations. 
  7   
  8  This module contains classes which can be used for all the different 
  9  types of patterns available for representing gene information (ie. motifs, 
 10  signatures and schemas). These are the general classes which should be 
 11  handle any of the different specific patterns. 
 12  """ 
 13  # standard library 
 14  import random 
 15   
 16  # biopython 
 17  from Bio.Alphabet import _verify_alphabet 
 18  from Bio.Seq import Seq, MutableSeq 
 19   
 20  __docformat__ = "restructuredtext en" 
 21   
22 -class PatternIO(object):
23 """Allow reading and writing of patterns to files. 24 25 This just defines a simple persistance class for patterns, making 26 it easy to write them to a file and read 'em back. 27 """
28 - def __init__(self, alphabet=None):
29 """Intialize the reader and writer class. 30 31 Arguments: 32 33 o alphabet - An optional argument specifying the alphabet 34 which patterns should follow. If an alphabet is set it'll be used 35 to verify that all patterns follow it. 36 37 Attributes: 38 o separator - A character to use in separating items in a signature 39 when it is written to a file and read back. This character should 40 not be in the possible alphabet of the sequences, or there will 41 be trouble. 42 """ 43 self._alphabet = alphabet 44 45 self.separator = ";"
46
47 - def write(self, pattern_list, output_handle):
48 """Write a list of patterns to the given handle. 49 """ 50 for pattern in pattern_list: 51 # deal with signatures, concatentate them with the separator 52 if isinstance(pattern, list) or isinstance(pattern, tuple): 53 string_pattern = self.separator.join(pattern) 54 # deal with the normal cases 55 else: 56 string_pattern = pattern 57 58 output_handle.write("%s\n" % string_pattern)
59
60 - def write_seq(self, seq_pattern_list, output_handle):
61 """Convenience function to write Seq objects to a file. 62 63 This can take Seqs and MutableSeqs, and write them to a file 64 as strings. 65 """ 66 # convert the seq patterns into just string patterns 67 all_patterns = [] 68 69 for seq_pattern in seq_pattern_list: 70 if isinstance(seq_pattern, MutableSeq): 71 seq = seq_pattern.toseq() 72 all_patterns.append(str(seq)) 73 elif isinstance(seq_pattern, Seq): 74 all_patterns.append(str(seq_pattern)) 75 else: 76 raise ValueError("Unexpected pattern type %r" % seq_pattern) 77 78 self.write(all_patterns, output_handle)
79
80 - def read(self, input_handle):
81 """Read patterns from the specified handle. 82 """ 83 all_patterns = [] 84 85 while True: 86 cur_line = input_handle.readline() 87 88 if not(cur_line): 89 break 90 91 cur_pattern = cur_line.rstrip() 92 # split up signatures 93 if self.separator in cur_pattern: 94 cur_pattern = tuple(cur_pattern.split(self.separator)) 95 96 if self._alphabet is not None: 97 # make single patterns (not signatures) into lists, so we 98 # can check signatures and single patterns the same 99 if not isinstance(cur_pattern, tuple): 100 test_pattern = [cur_pattern] 101 else: 102 test_pattern = cur_pattern 103 for pattern_item in test_pattern: 104 pattern_seq = Seq(pattern_item, self._alphabet) 105 if not(_verify_alphabet(pattern_seq)): 106 raise ValueError("Pattern %s not matching alphabet %s" 107 % (cur_pattern, self._alphabet)) 108 109 all_patterns.append(cur_pattern) 110 111 return all_patterns
112 113
114 -class PatternRepository(object):
115 """This holds a list of specific patterns found in sequences. 116 117 This is designed to be a general holder for a set of patterns and 118 should be subclassed for specific implementations (ie. holding Motifs 119 or Signatures. 120 """
121 - def __init__(self, pattern_info):
122 """Initialize a repository with patterns, 123 124 Arguments: 125 126 - pattern_info - A representation of all of the patterns found in 127 a finder search. This should be a dictionary, where the keys 128 are patterns, and the values are the number of times a pattern is 129 found. 130 131 The patterns are represented interally as a list of two 132 tuples, where the first element is the number of times a pattern 133 occurs, and the second is the pattern itself. This makes it easy 134 to sort the list and return the top N patterns. 135 """ 136 self._pattern_dict = pattern_info 137 138 # create the list representation 139 self._pattern_list = [] 140 for pattern_name in self._pattern_dict: 141 self._pattern_list.append((self._pattern_dict[pattern_name], 142 pattern_name)) 143 144 self._pattern_list.sort() 145 self._pattern_list.reverse()
146
147 - def get_all(self):
148 """Retrieve all of the patterns in the repository. 149 """ 150 patterns = [] 151 for pattern_info in self._pattern_list: 152 patterns.append(pattern_info[1]) 153 154 return patterns
155
156 - def get_random(self, num_patterns):
157 """Retrieve the specified number of patterns randomly. 158 159 Randomly selects patterns from the list and returns them. 160 161 Arguments: 162 163 o num_patterns - The total number of patterns to return. 164 """ 165 all_patterns = [] 166 167 while len(all_patterns) < num_patterns: 168 # pick a pattern, and only add it if it is not already present 169 new_pattern_info = random.choice(self._pattern_list) 170 171 if new_pattern_info[1] not in all_patterns: 172 all_patterns.append(new_pattern_info[1]) 173 174 return all_patterns
175
176 - def get_top_percentage(self, percent):
177 """Return a percentage of the patterns. 178 179 This returns the top 'percent' percentage of the patterns in the 180 repository. 181 """ 182 all_patterns = self.get_all() 183 184 num_to_return = int(len(all_patterns) * percent) 185 186 return all_patterns[:num_to_return]
187
188 - def get_top(self, num_patterns):
189 """Return the specified number of most frequently occurring patterns 190 191 Arguments: 192 193 o num_patterns - The number of patterns to return. 194 """ 195 all_patterns = [] 196 for pattern_info in self._pattern_list[:num_patterns]: 197 all_patterns.append(pattern_info[1]) 198 199 return all_patterns
200
201 - def get_differing(self, top_num, bottom_num):
202 """Retrieve patterns that are at the extreme ranges. 203 204 This returns both patterns at the top of the list (ie. the same as 205 returned by get_top) and at the bottom of the list. This 206 is especially useful for patterns that are the differences between 207 two sets of patterns. 208 209 Arguments: 210 211 o top_num - The number of patterns to take from the top of the list. 212 213 o bottom_num - The number of patterns to take from the bottom of 214 the list. 215 """ 216 all_patterns = [] 217 # first get from the top of the list 218 for pattern_info in self._pattern_list[:top_num]: 219 all_patterns.append(pattern_info[1]) 220 221 # then from the bottom 222 for pattern_info in self._pattern_list[-bottom_num:]: 223 all_patterns.append(pattern_info[1]) 224 225 return all_patterns
226
227 - def remove_polyA(self, at_percentage=.9):
228 """Remove patterns which are likely due to polyA tails from the lists. 229 230 This is just a helper function to remove pattenrs which are likely 231 just due to polyA tails, and thus are not really great motifs. 232 This will also get rid of stuff like ATATAT, which might be a 233 useful motif, so use at your own discretion. 234 235 XXX Could we write a more general function, based on info content 236 or something like that? 237 238 Arguments: 239 240 o at_percentage - The percentage of A and T residues in a pattern 241 that qualifies it for being removed. 242 """ 243 remove_list = [] 244 # find all of the really AT rich patterns 245 for pattern_info in self._pattern_list: 246 pattern_at = float(pattern_info[1].count('A') + pattern_info[1].count('T')) / len(pattern_info[1]) 247 if pattern_at > at_percentage: 248 remove_list.append(pattern_info) 249 250 # now remove them from the master list 251 for to_remove in remove_list: 252 self._pattern_list.remove(to_remove)
253
254 - def count(self, pattern):
255 """Return the number of times the specified pattern is found. 256 """ 257 try: 258 return self._pattern_dict[pattern] 259 except KeyError: 260 return 0
261