Package Bio :: Package motifs :: Module transfac
[hide private]
[frames] | no frames]

Source Code for Module Bio.motifs.transfac

  1  # Copyright 2003 by Bartek Wilczynski.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  """Parsing TRANSFAC files 
  7  """ 
  8   
  9  from Bio import motifs 
 10  from Bio.Alphabet import IUPAC 
 11   
 12  __docformat__ = "restructuredtext en" 
 13   
14 -class Motif(motifs.Motif, dict):
15 """A Bio.motifs.transfac.Motif stores the information in one TRANSFAC 16 motif. This class inherits from the Bio.motifs.Motif base class, as well 17 as from a Python dictionary. All motif information found by the parser 18 is stored as attributes of the base class when possible; see the 19 Bio.motifs.Motif base class for a description of these attributes. All 20 other information associated with the motif is stored as (key, value) 21 pairs in the dictionary, where the key is the two-letter fields as found 22 in the TRANSFAC file. References are an exception: These are stored in 23 the .references attribute. 24 25 These fields are commonly found in TRANSFAC files:: 26 27 AC: Accession number 28 AS: Accession numbers, secondary 29 BA: Statistical basis 30 BF: Binding factors 31 BS: Factor binding sites underlying the matrix 32 [sequence; SITE accession number; start position for matrix 33 sequence; length of sequence used; number of gaps inserted; 34 strand orientation.] 35 CC: Comments 36 CO: Copyright notice 37 DE: Short factor description 38 DR: External databases 39 [database name: database accession number] 40 DT: Date created/updated 41 HC: Subfamilies 42 HP: Superfamilies 43 ID: Identifier 44 NA: Name of the binding factor 45 OC: Taxonomic classification 46 OS: Species/Taxon 47 OV: Older version 48 PV: Preferred version 49 TY: Type 50 XX: Empty line; these are not stored in the Record. 51 52 References are stored in an .references attribute, which is a list of 53 dictionaries with the following keys:: 54 55 RN: Reference number 56 RA: Reference authors 57 RL: Reference data 58 RT: Reference title 59 RX: PubMed ID 60 61 For more information, see the TRANSFAC documentation. 62 """ 63 multiple_value_keys = set(['BF', 'OV', 'HP', 'BS', 'HC', 'DT', 'DR']) 64 # These keys can occur multiple times for one motif 65 66 reference_keys = set(['RX', 'RA', 'RT', 'RL'])
67 # These keys occur for references 68 69
70 -class Record(list):
71 """A Bio.motifs.transfac.Record stores the information in a TRANSFAC 72 matrix table. The record inherits from a list containing the individual 73 motifs. 74 75 Attributes: 76 o version: The version number, corresponding to the 'VV' field 77 in the TRANSFAC file; 78 """
79 - def __init__(self):
80 self.version = None
81
82 - def __str__(self):
83 return write(self)
84 85
86 -def read(handle):
87 """record = read(handle)""" 88 annotations = {} 89 references = [] 90 counts = None 91 record = Record() 92 for line in handle: 93 line = line.strip() 94 key, value = line[:2], line[4:] 95 if key=='VV': 96 record.version = value 97 elif key in ('P0', 'PO'): # Old TRANSFAC files use PO instead of P0 98 counts = {} 99 assert value.split()[:4]==['A', 'C', 'G', 'T'] 100 length = 0 101 for c in "ACGT": 102 counts[c] = [] 103 for line in handle: 104 key, value = line[:2], line[4:] 105 try: 106 i = int(key) 107 except ValueError: 108 break 109 length+=1 110 assert i==length 111 values = value.split() 112 for c, v in zip("ACGT", values): 113 counts[c].append(float(v)) 114 if line=='XX': 115 pass 116 elif key=='RN': 117 index, separator, accession = value.partition(";") 118 assert index[0]=='[' 119 assert index[-1]==']' 120 index = int(index[1:-1]) 121 assert len(references)==index-1 122 reference = {key: value} 123 references.append(reference) 124 elif key=='//': 125 if counts is not None: 126 motif = Motif(alphabet=IUPAC.unambiguous_dna, counts=counts) 127 motif.update(annotations) 128 motif.references = references 129 record.append(motif) 130 annotations = {} 131 references = [] 132 elif key in Motif.reference_keys: 133 reference[key] = value 134 elif key in Motif.multiple_value_keys: 135 if key not in annotations: 136 annotations[key] = [] 137 annotations[key].append(value) 138 else: 139 annotations[key] = value 140 return record
141 142
143 -def write(motifs):
144 """Write the representation of a motif in TRANSFAC format 145 """ 146 blocks = [] 147 try: 148 version = motifs.version 149 except AttributeError: 150 pass 151 else: 152 if version is not None: 153 block = """\ 154 VV %s 155 XX 156 // 157 """ % version 158 blocks.append(block) 159 multiple_value_keys = Motif.multiple_value_keys 160 sections = (('AC', 'AS',), # Accession 161 ('ID',), # ID 162 ('DT', 'CO'), # Date, copyright 163 ('NA',), # Name 164 ('DE',), # Short factor description 165 ('TY',), # Type 166 ('OS', 'OC'), # Organism 167 ('HP', 'HC'), # Superfamilies, subfamilies 168 ('BF',), # Binding factors 169 ('P0',), # Frequency matrix 170 ('BA',), # Statistical basis 171 ('BS',), # Factor binding sites 172 ('CC',), # Comments 173 ('DR',), # External databases 174 ('OV', 'PV',), # Versions 175 ) 176 for motif in motifs: 177 lines = [] 178 for section in sections: 179 blank = False 180 for key in section: 181 if key=='P0': 182 # Frequency matrix 183 length = motif.length 184 if length==0: 185 continue 186 sequence = motif.degenerate_consensus 187 line = "P0 A C G T" 188 lines.append(line) 189 for i in range(length): 190 line = "%02.d %6.20g %6.20g %6.20g %6.20g %s" % ( 191 i+1, 192 motif.counts['A'][i], 193 motif.counts['C'][i], 194 motif.counts['G'][i], 195 motif.counts['T'][i], 196 sequence[i], 197 ) 198 lines.append(line) 199 blank = True 200 else: 201 try: 202 value = motif.get(key) 203 except AttributeError: 204 value = None 205 if value is not None: 206 if key in multiple_value_keys: 207 for v in value: 208 line = "%s %s" % (key, v) 209 lines.append(line) 210 else: 211 line = "%s %s" % (key, value) 212 lines.append(line) 213 blank = True 214 if key=='PV': 215 # References 216 try: 217 references = motif.references 218 except AttributeError: 219 pass 220 else: 221 keys = ("RN", "RX", "RA", "RT", "RL") 222 for reference in references: 223 for key in keys: 224 value = reference.get(key) 225 if value is None: 226 continue 227 line = "%s %s" % (key, value) 228 lines.append(line) 229 blank = True 230 if blank: 231 line = 'XX' 232 lines.append(line) 233 # Finished this motif; glue the lines together 234 line = "//" 235 lines.append(line) 236 block = "\n".join(lines) + "\n" 237 blocks.append(block) 238 # Finished all motifs; glue the blocks together 239 text = "".join(blocks) 240 return text
241