Package Bio :: Package SwissProt :: Module KeyWList
[hide private]
[frames] | no frames]

Source Code for Module Bio.SwissProt.KeyWList

 1  # Copyright 1999 by Jeffrey Chang.  All rights reserved. 
 2  # This code is part of the Biopython distribution and governed by its 
 3  # license.  Please see the LICENSE file that should have been included 
 4  # as part of this package. 
 5   
 6  """Code to parse the keywlist.txt file from SwissProt/UniProt 
 7   
 8  See: 
 9  http://www.expasy.ch/sprot/sprot-top.html 
10  ftp://ftp.expasy.org/databases/uniprot/current_release/knowledgebase/complete/docs/keywlist.txt 
11   
12  Classes: 
13   
14      - Record            Stores the information about one keyword or one category 
15        in the keywlist.txt file. 
16   
17  Functions: 
18   
19      - parse             Parses the keywlist.txt file and returns an iterator to 
20        the records it contains. 
21  """ 
22   
23  from __future__ import print_function 
24   
25  __docformat__ = "restructuredtext en" 
26   
27   
28 -class Record(dict):
29 """ 30 This record stores the information of one keyword or category in the 31 keywlist.txt as a Python dictionary. The keys in this dictionary are 32 the line codes that can appear in the keywlist.txt file:: 33 34 --------- --------------------------- ---------------------- 35 Line code Content Occurrence in an entry 36 --------- --------------------------- ---------------------- 37 ID Identifier (keyword) Once; starts a keyword entry 38 IC Identifier (category) Once; starts a category entry 39 AC Accession (KW-xxxx) Once 40 DE Definition Once or more 41 SY Synonyms Optional; once or more 42 GO Gene ontology (GO) mapping Optional; once or more 43 HI Hierarchy Optional; once or more 44 WW Relevant WWW site Optional; once or more 45 CA Category Once per keyword entry; absent 46 in category entries 47 """
48 - def __init__(self):
49 dict.__init__(self) 50 for keyword in ("DE", "SY", "GO", "HI", "WW"): 51 self[keyword] = []
52 53
54 -def parse(handle):
55 record = Record() 56 # First, skip the header - look for start of a record 57 for line in handle: 58 if line.startswith("ID "): 59 # Looks like there was no header 60 record["ID"] = line[5:].strip() 61 break 62 if line.startswith("IC "): 63 # Looks like there was no header 64 record["IC"] = line[5:].strip() 65 break 66 # Now parse the records 67 for line in handle: 68 if line.startswith("-------------------------------------"): 69 # We have reached the footer 70 break 71 key = line[:2] 72 if key == "//": 73 record["DE"] = " ".join(record["DE"]) 74 record["SY"] = " ".join(record["SY"]) 75 yield record 76 record = Record() 77 elif line[2:5] == " ": 78 value = line[5:].strip() 79 if key in ("ID", "IC", "AC", "CA"): 80 record[key] = value 81 elif key in ("DE", "SY", "GO", "HI", "WW"): 82 record[key].append(value) 83 else: 84 print("Ignoring: %s" % line.strip()) 85 # Read the footer and throw it away 86 for line in handle: 87 pass
88