Package Bio :: Package motifs :: Module transfac
[hide private]
[frames] | no frames]

Source Code for Module Bio.motifs.transfac

  1  # Copyright 2003 by Bartek Wilczynski.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  """Parsing TRANSFAC files 
  7  """ 
  8   
  9  from Bio import motifs 
 10  from Bio.Alphabet import IUPAC 
 11   
 12  __docformat__ = "restructuredtext en" 
 13   
 14   
15 -class Motif(motifs.Motif, dict):
16 """A Bio.motifs.transfac.Motif stores the information in one TRANSFAC 17 motif. This class inherits from the Bio.motifs.Motif base class, as well 18 as from a Python dictionary. All motif information found by the parser 19 is stored as attributes of the base class when possible; see the 20 Bio.motifs.Motif base class for a description of these attributes. All 21 other information associated with the motif is stored as (key, value) 22 pairs in the dictionary, where the key is the two-letter fields as found 23 in the TRANSFAC file. References are an exception: These are stored in 24 the .references attribute. 25 26 These fields are commonly found in TRANSFAC files:: 27 28 AC: Accession number 29 AS: Accession numbers, secondary 30 BA: Statistical basis 31 BF: Binding factors 32 BS: Factor binding sites underlying the matrix 33 [sequence; SITE accession number; start position for matrix 34 sequence; length of sequence used; number of gaps inserted; 35 strand orientation.] 36 CC: Comments 37 CO: Copyright notice 38 DE: Short factor description 39 DR: External databases 40 [database name: database accession number] 41 DT: Date created/updated 42 HC: Subfamilies 43 HP: Superfamilies 44 ID: Identifier 45 NA: Name of the binding factor 46 OC: Taxonomic classification 47 OS: Species/Taxon 48 OV: Older version 49 PV: Preferred version 50 TY: Type 51 XX: Empty line; these are not stored in the Record. 52 53 References are stored in an .references attribute, which is a list of 54 dictionaries with the following keys:: 55 56 RN: Reference number 57 RA: Reference authors 58 RL: Reference data 59 RT: Reference title 60 RX: PubMed ID 61 62 For more information, see the TRANSFAC documentation. 63 """ 64 multiple_value_keys = set(['BF', 'OV', 'HP', 'BS', 'HC', 'DT', 'DR']) 65 # These keys can occur multiple times for one motif 66 67 reference_keys = set(['RX', 'RA', 'RT', 'RL'])
68 # These keys occur for references 69 70
71 -class Record(list):
72 """A Bio.motifs.transfac.Record stores the information in a TRANSFAC 73 matrix table. The record inherits from a list containing the individual 74 motifs. 75 76 Attributes: 77 o version: The version number, corresponding to the 'VV' field 78 in the TRANSFAC file; 79 """
80 - def __init__(self):
81 self.version = None
82
83 - def __str__(self):
84 return write(self)
85 86
87 -def read(handle):
88 """record = read(handle)""" 89 annotations = {} 90 references = [] 91 counts = None 92 record = Record() 93 for line in handle: 94 line = line.strip() 95 key, value = line[:2], line[4:] 96 if key == 'VV': 97 record.version = value 98 elif key in ('P0', 'PO'): # Old TRANSFAC files use PO instead of P0 99 counts = {} 100 assert value.split()[:4] == ['A', 'C', 'G', 'T'] 101 length = 0 102 for c in "ACGT": 103 counts[c] = [] 104 for line in handle: 105 key, value = line[:2], line[4:] 106 try: 107 i = int(key) 108 except ValueError: 109 break 110 length += 1 111 assert i == length 112 values = value.split() 113 for c, v in zip("ACGT", values): 114 counts[c].append(float(v)) 115 if line == 'XX': 116 pass 117 elif key == 'RN': 118 index, separator, accession = value.partition(";") 119 assert index[0] == '[' 120 assert index[-1] == ']' 121 index = int(index[1:-1]) 122 assert len(references) == index - 1 123 reference = {key: value} 124 references.append(reference) 125 elif key == '//': 126 if counts is not None: 127 motif = Motif(alphabet=IUPAC.unambiguous_dna, counts=counts) 128 motif.update(annotations) 129 motif.references = references 130 record.append(motif) 131 annotations = {} 132 references = [] 133 elif key in Motif.reference_keys: 134 reference[key] = value 135 elif key in Motif.multiple_value_keys: 136 if key not in annotations: 137 annotations[key] = [] 138 annotations[key].append(value) 139 else: 140 annotations[key] = value 141 return record
142 143
144 -def write(motifs):
145 """Write the representation of a motif in TRANSFAC format 146 """ 147 blocks = [] 148 try: 149 version = motifs.version 150 except AttributeError: 151 pass 152 else: 153 if version is not None: 154 block = """\ 155 VV %s 156 XX 157 // 158 """ % version 159 blocks.append(block) 160 multiple_value_keys = Motif.multiple_value_keys 161 sections = (('AC', 'AS',), # Accession 162 ('ID',), # ID 163 ('DT', 'CO'), # Date, copyright 164 ('NA',), # Name 165 ('DE',), # Short factor description 166 ('TY',), # Type 167 ('OS', 'OC'), # Organism 168 ('HP', 'HC'), # Superfamilies, subfamilies 169 ('BF',), # Binding factors 170 ('P0',), # Frequency matrix 171 ('BA',), # Statistical basis 172 ('BS',), # Factor binding sites 173 ('CC',), # Comments 174 ('DR',), # External databases 175 ('OV', 'PV',), # Versions 176 ) 177 for motif in motifs: 178 lines = [] 179 for section in sections: 180 blank = False 181 for key in section: 182 if key == 'P0': 183 # Frequency matrix 184 length = motif.length 185 if length == 0: 186 continue 187 sequence = motif.degenerate_consensus 188 line = "P0 A C G T" 189 lines.append(line) 190 for i in range(length): 191 line = "%02.d %6.20g %6.20g %6.20g %6.20g %s" % ( 192 i + 1, 193 motif.counts['A'][i], 194 motif.counts['C'][i], 195 motif.counts['G'][i], 196 motif.counts['T'][i], 197 sequence[i], 198 ) 199 lines.append(line) 200 blank = True 201 else: 202 try: 203 value = motif.get(key) 204 except AttributeError: 205 value = None 206 if value is not None: 207 if key in multiple_value_keys: 208 for v in value: 209 line = "%s %s" % (key, v) 210 lines.append(line) 211 else: 212 line = "%s %s" % (key, value) 213 lines.append(line) 214 blank = True 215 if key == 'PV': 216 # References 217 try: 218 references = motif.references 219 except AttributeError: 220 pass 221 else: 222 keys = ("RN", "RX", "RA", "RT", "RL") 223 for reference in references: 224 for key in keys: 225 value = reference.get(key) 226 if value is None: 227 continue 228 line = "%s %s" % (key, value) 229 lines.append(line) 230 blank = True 231 if blank: 232 line = 'XX' 233 lines.append(line) 234 # Finished this motif; glue the lines together 235 line = "//" 236 lines.append(line) 237 block = "\n".join(lines) + "\n" 238 blocks.append(block) 239 # Finished all motifs; glue the blocks together 240 text = "".join(blocks) 241 return text
242