1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 """ SCOP: Structural Classification of Proteins.
19
20 The SCOP database aims to provide a manually constructed classification of
21 all know protein structures into a hierarchy, the main levels of which
22 are family, superfamily and fold.
23
24 * "SCOP":http://scop.mrc-lmb.cam.ac.uk/scop/
25
26 * "Introduction":http://scop.mrc-lmb.cam.ac.uk/scop/intro.html
27
28 * "SCOP parsable files":http://scop.mrc-lmb.cam.ac.uk/scop/parse/
29
30 The Scop object in this module represents the entire SCOP classification. It
31 can be built from the three SCOP parsable files, modified is so desired, and
32 converted back to the same file formats. A single SCOP domain (represented
33 by the Domain class) can be obtained from Scop using the domain's SCOP
34 identifier (sid).
35
36
37 nodeCodeDict -- A mapping between known 2 letter node codes and a longer
38 description. The known node types are 'cl' (class), 'cf'
39 (fold), 'sf' (superfamily), 'fa' (family), 'dm' (domain),
40 'sp' (species), 'px' (domain). Additional node types may
41 be added in the future.
42
43 This module also provides code to access SCOP over the WWW.
44
45 Functions:
46 search -- Access the main CGI script.
47 _open -- Internally used function.
48
49 """
50
51 import os
52 import re
53
54 import Des
55 import Cla
56 import Hie
57 import Residues
58 from Bio import SeqIO
59 from Bio.Seq import Seq
60
61
62 nodeCodeDict = { 'cl':'class', 'cf':'fold', 'sf':'superfamily',
63 'fa':'family', 'dm':'protein', 'sp':'species', 'px':'domain'}
64
65
66 _nodetype_to_code= { 'class': 'cl', 'fold': 'cf', 'superfamily': 'sf',
67 'family': 'fa', 'protein': 'dm', 'species': 'sp', 'domain': 'px'}
68
69 nodeCodeOrder = [ 'ro', 'cl', 'cf', 'sf', 'fa', 'dm', 'sp', 'px' ]
70
71 astralBibIds = [10,20,25,30,35,40,50,70,90,95,100]
72
73 astralEvs = [10, 5, 1, 0.5, 0.1, 0.05, 0.01, 0.005, 0.001, 1e-4, 1e-5, 1e-10, 1e-15,
74 1e-20, 1e-25, 1e-50]
75
76 astralEv_to_file = { 10: 'e+1', 5: 'e+0,7', 1: 'e+0', 0.5: 'e-0,3', 0.1: 'e-1',
77 0.05: 'e-1,3', 0.01: 'e-2', 0.005: 'e-2,3', 0.001: 'e-3',
78 1e-4: 'e-4', 1e-5: 'e-5', 1e-10: 'e-10', 1e-15: 'e-15',
79 1e-20: 'e-20', 1e-25: 'e-25', 1e-50: 'e-50' }
80
81 astralEv_to_sql = { 10: 'e1', 5: 'e0_7', 1: 'e0', 0.5: 'e_0_3', 0.1: 'e_1',
82 0.05: 'e_1_3', 0.01: 'e_2', 0.005: 'e_2_3', 0.001: 'e_3',
83 1e-4: 'e_4', 1e-5: 'e_5', 1e-10: 'e_10', 1e-15: 'e_15',
84 1e-20: 'e_20', 1e-25: 'e_25', 1e-50: 'e_50' }
85
86 try:
87
88 _cmp = cmp
89 except NameError:
91 """Implementation of cmp(x,y) for Python 3 (PRIVATE).
92
93 Based on Python 3 docs which say if you really need the cmp()
94 functionality, you could use the expression (a > b) - (a < b)
95 as the equivalent for cmp(a, b)
96 """
97 return (a > b) - (a < b)
98
99
101 """Order SCOP concise classification strings (sccs).
102
103 a.4.5.1 < a.4.5.11 < b.1.1.1
104
105 A sccs (e.g. a.4.5.11) compactly represents a domain's classification.
106 The letter represents the class, and the numbers are the fold,
107 superfamily, and family, respectively.
108
109 """
110
111 s1 = sccs1.split(".")
112 s2 = sccs2.split(".")
113
114 if s1[0] != s2[0]:
115 return _cmp(s1[0], s2[0])
116
117 s1 = list(map(int, s1[1:]))
118 s2 = list(map(int, s2[1:]))
119
120 return _cmp(s1,s2)
121
122
123 _domain_re = re.compile(r">?([\w_\.]*)\s+([\w\.]*)\s+\(([^)]*)\) (.*)")
124
125
126 -def parse_domain(str):
127 """Convert an ASTRAL header string into a Scop domain.
128
129 An ASTRAL (http://astral.stanford.edu/) header contains a concise
130 description of a SCOP domain. A very similar format is used when a
131 Domain object is converted into a string. The Domain returned by this
132 method contains most of the SCOP information, but it will not be located
133 within the SCOP hierarchy (i.e. The parent node will be None). The
134 description is composed of the SCOP protein and species descriptions.
135
136 A typical ASTRAL header looks like --
137 >d1tpt_1 a.46.2.1 (1-70) Thymidine phosphorylase {Escherichia coli}
138 """
139
140 m = _domain_re.match(str)
141 if (not m):
142 raise ValueError("Domain: "+ str)
143
144 dom = Domain()
145 dom.sid = m.group(1)
146 dom.sccs = m.group(2)
147 dom.residues = Residues.Residues(m.group(3))
148 if not dom.residues.pdbid:
149 dom.residues.pdbid= dom.sid[1:5]
150 dom.description = m.group(4).strip()
151
152 return dom
153
154
156 filename = "dir.%s.scop.txt_%s" % (filetype,version)
157 handle = open(os.path.join( scop_dir_path, filename))
158 return handle
159
160
162 """The entire SCOP hierarchy.
163
164 root -- The root node of the hierarchy
165 """
166 - def __init__(self, cla_handle=None, des_handle=None, hie_handle=None,
167 dir_path=None, db_handle=None, version=None):
168 """Build the SCOP hierarchy from the SCOP parsable files, or a sql backend.
169
170 If no file handles are given, then a Scop object with a single
171 empty root node is returned.
172
173 If a directory and version are given (with dir_path=.., version=...) or
174 file handles for each file, the whole scop tree will be built in memory.
175
176 If a MySQLdb database handle is given, the tree will be built as needed,
177 minimising construction times. To build the SQL database to the methods
178 write_xxx_sql to create the tables.
179
180 """
181 self._sidDict = {}
182 self._sunidDict = {}
183
184 if all(h is None for h in [cla_handle, des_handle, hie_handle, dir_path, db_handle]):
185 return
186
187 if dir_path is None and db_handle is None:
188 if cla_handle is None or des_handle is None or hie_handle is None:
189 raise RuntimeError("Need CLA, DES and HIE files to build SCOP")
190
191 sunidDict = {}
192
193 self.db_handle = db_handle
194 try:
195 if db_handle:
196
197 pass
198 else:
199
200 if dir_path:
201 if not version:
202 raise RuntimeError("Need SCOP version to find parsable files in directory")
203 if cla_handle or des_handle or hie_handle:
204 raise RuntimeError("Cannot specify SCOP directory and specific files")
205
206 cla_handle = _open_scop_file( dir_path, version, 'cla')
207 des_handle = _open_scop_file( dir_path, version, 'des')
208 hie_handle = _open_scop_file( dir_path, version, 'hie')
209
210 root = Node()
211 domains = []
212 root.sunid=0
213 root.type='ro'
214 sunidDict[root.sunid] = root
215 self.root = root
216 root.description = 'SCOP Root'
217
218
219 records = Des.parse(des_handle)
220 for record in records:
221 if record.nodetype =='px':
222 n = Domain()
223 n.sid = record.name
224 domains.append(n)
225 else :
226 n = Node()
227 n.sunid = record.sunid
228 n.type = record.nodetype
229 n.sccs = record.sccs
230 n.description = record.description
231
232 sunidDict[n.sunid] = n
233
234
235 records = Hie.parse(hie_handle)
236 for record in records:
237 if record.sunid not in sunidDict:
238 print record.sunid
239
240 n = sunidDict[record.sunid]
241
242 if record.parent != '' :
243
244 if record.parent not in sunidDict:
245 raise ValueError("Incomplete data?")
246
247 n.parent = sunidDict[record.parent]
248
249 for c in record.children:
250 if c not in sunidDict:
251 raise ValueError("Incomplete data?")
252 n.children.append(sunidDict[c])
253
254
255 sidDict = {}
256 records = Cla.parse(cla_handle)
257 for record in records:
258 n = sunidDict[record.sunid]
259 assert n.sccs == record.sccs
260 assert n.sid == record.sid
261 n.residues = record.residues
262 sidDict[n.sid] = n
263
264
265 self._sunidDict = sunidDict
266 self._sidDict = sidDict
267 self._domains = tuple(domains)
268
269 finally:
270 if dir_path:
271
272 if cla_handle:
273 cla_handle.close()
274 if des_handle:
275 des_handle.close()
276 if hie_handle:
277 hie_handle.close()
278
281
282 - def getDomainBySid(self, sid):
283 """Return a domain from its sid"""
284 if sid in self._sidDict:
285 return self._sidDict[sid]
286 if self.db_handle:
287 self.getDomainFromSQL(sid=sid)
288 if sid in self._sidDict:
289 return self._sidDict[sid]
290 else:
291 return None
292
294 """Return a node from its sunid"""
295 if sunid in self._sunidDict:
296 return self._sunidDict[sunid]
297 if self.db_handle:
298 self.getDomainFromSQL(sunid=sunid)
299 if sunid in self._sunidDict:
300 return self._sunidDict[sunid]
301 else:
302 return None
303
304 - def getDomains(self):
305 """Returns an ordered tuple of all SCOP Domains"""
306 if self.db_handle:
307 return self.getRoot().getDescendents('px')
308 else:
309 return self._domains
310
318
327
335
336 - def getDomainFromSQL(self, sunid=None, sid=None):
337 """Load a node from the SQL backend using sunid or sid"""
338 if sunid is None and sid is None:
339 return None
340
341 cur = self.db_handle.cursor()
342
343 if sid:
344 cur.execute("SELECT sunid FROM cla WHERE sid=%s", sid)
345 res = cur.fetchone()
346 if res is None:
347 return None
348 sunid = res[0]
349
350 cur.execute("SELECT * FROM des WHERE sunid=%s", sunid)
351 data = cur.fetchone()
352
353 if data is not None:
354 n = None
355
356
357 if data[1] != "px":
358 n = Node(scop=self)
359
360 cur.execute("SELECT child FROM hie WHERE parent=%s", sunid)
361 children = []
362 for c in cur.fetchall():
363 children.append(c[0])
364 n.children = children
365 else:
366 n = Domain(scop=self)
367 cur.execute("select sid, residues, pdbid from cla where sunid=%s",
368 sunid)
369
370 [n.sid,n.residues,pdbid] = cur.fetchone()
371 n.residues = Residues.Residues(n.residues)
372 n.residues.pdbid=pdbid
373 self._sidDict[n.sid] = n
374
375 [n.sunid,n.type,n.sccs,n.description] = data
376
377 if data[1] != 'ro':
378 cur.execute("SELECT parent FROM hie WHERE child=%s", sunid)
379 n.parent = cur.fetchone()[0]
380
381 n.sunid = int(n.sunid)
382
383 self._sunidDict[n.sunid] = n
384
397
399 """Get descendents of a node using the database backend. This avoids
400 repeated iteration of SQL calls and is therefore much quicker than
401 repeatedly calling node.getChildren().
402 """
403 if nodeCodeOrder.index(type) <= nodeCodeOrder.index(node.type):
404 return []
405
406 des_list = []
407
408
409 if node.type == 'ro':
410 for c in node.getChildren():
411 for d in self.getDescendentsFromSQL(c,type):
412 des_list.append(d)
413 return des_list
414
415 cur = self.db_handle.cursor()
416
417 if type != 'px':
418 cur.execute("SELECT DISTINCT des.sunid,des.type,des.sccs,description FROM \
419 cla,des WHERE cla."+node.type+"=%s AND cla."+type+"=des.sunid", (node.sunid))
420 data = cur.fetchall()
421 for d in data:
422 if int(d[0]) not in self._sunidDict:
423 n = Node(scop=self)
424 [n.sunid,n.type,n.sccs,n.description] = d
425 n.sunid=int(n.sunid)
426 self._sunidDict[n.sunid] = n
427
428 cur.execute("SELECT parent FROM hie WHERE child=%s", n.sunid)
429 n.parent = cur.fetchone()[0]
430
431 cur.execute("SELECT child FROM hie WHERE parent=%s", n.sunid)
432 children = []
433 for c in cur.fetchall():
434 children.append(c[0])
435 n.children = children
436
437 des_list.append( self._sunidDict[int(d[0])] )
438
439 else:
440 cur.execute("SELECT cla.sunid,sid,pdbid,residues,cla.sccs,type,description,sp\
441 FROM cla,des where cla.sunid=des.sunid and cla."+node.type+"=%s",
442 node.sunid)
443
444 data = cur.fetchall()
445 for d in data:
446 if int(d[0]) not in self._sunidDict:
447 n = Domain(scop=self)
448
449
450 [n.sunid,n.sid, pdbid,n.residues,n.sccs,n.type,n.description,
451 n.parent] = d[0:8]
452 n.residues = Residues.Residues(n.residues)
453 n.residues.pdbid = pdbid
454 n.sunid = int(n.sunid)
455 self._sunidDict[n.sunid] = n
456 self._sidDict[n.sid] = n
457
458 des_list.append( self._sunidDict[int(d[0])] )
459
460 return des_list
461
463 """Write HIE data to SQL database"""
464 cur = handle.cursor()
465
466 cur.execute("DROP TABLE IF EXISTS hie")
467 cur.execute("CREATE TABLE hie (parent INT, child INT, PRIMARY KEY (child),\
468 INDEX (parent) )")
469
470 for p in self._sunidDict.itervalues():
471 for c in p.children:
472 cur.execute("INSERT INTO hie VALUES (%s,%s)" % (p.sunid, c.sunid))
473
475 """Write CLA data to SQL database"""
476 cur = handle.cursor()
477
478 cur.execute("DROP TABLE IF EXISTS cla")
479 cur.execute("CREATE TABLE cla (sunid INT, sid CHAR(8), pdbid CHAR(4),\
480 residues VARCHAR(50), sccs CHAR(10), cl INT, cf INT, sf INT, fa INT,\
481 dm INT, sp INT, px INT, PRIMARY KEY (sunid), INDEX (SID) )")
482
483 for n in self._sidDict.itervalues():
484 c = n.toClaRecord()
485 cur.execute( "INSERT INTO cla VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)",
486 (n.sunid, n.sid, c.residues.pdbid, c.residues, n.sccs,
487 n.getAscendent('cl').sunid, n.getAscendent('cf').sunid,
488 n.getAscendent('sf').sunid, n.getAscendent('fa').sunid,
489 n.getAscendent('dm').sunid, n.getAscendent('sp').sunid,
490 n.sunid ))
491
493 """Write DES data to SQL database"""
494 cur = handle.cursor()
495
496 cur.execute("DROP TABLE IF EXISTS des")
497 cur.execute("CREATE TABLE des (sunid INT, type CHAR(2), sccs CHAR(10),\
498 description VARCHAR(255),\
499 PRIMARY KEY (sunid) )")
500
501 for n in self._sunidDict.itervalues():
502 cur.execute( "INSERT INTO des VALUES (%s,%s,%s,%s)",
503 ( n.sunid, n.type, n.sccs, n.description ) )
504
505
507 """ A node in the Scop hierarchy
508
509 sunid -- SCOP unique identifiers. e.g. '14986'
510
511 parent -- The parent node
512
513 children -- A list of child nodes
514
515 sccs -- SCOP concise classification string. e.g. 'a.1.1.2'
516
517 type -- A 2 letter node type code. e.g. 'px' for domains
518
519 description --
520
521 """
523 """Create a Node in the scop hierarchy. If a Scop instance is provided to the
524 constructor, this will be used to lookup related references using the SQL
525 methods. If no instance is provided, it is assumed the whole tree exists
526 and is connected."""
527 self.sunid=''
528 self.parent = None
529 self.children=[]
530 self.sccs = ''
531 self.type =''
532 self.description =''
533 self.scop=scop
534
543
545 """Return an Hie.Record"""
546 rec = Hie.Record()
547 rec.sunid = str(self.sunid)
548 if self.getParent():
549 rec.parent = str(self.getParent().sunid)
550 else:
551 rec.parent = '-'
552 for c in self.getChildren():
553 rec.children.append(str(c.sunid))
554 return rec
555
557 """Return a Des.Record"""
558 rec = Des.Record()
559 rec.sunid = str(self.sunid)
560 rec.nodetype = self.type
561 rec.sccs = self.sccs
562 rec.description = self.description
563 return rec
564
566 """Return a list of children of this Node"""
567 if self.scop is None:
568 return self.children
569 else:
570 return map( self.scop.getNodeBySunid, self.children )
571
573 """Return the parent of this Node"""
574 if self.scop is None:
575 return self.parent
576 else:
577 return self.scop.getNodeBySunid( self.parent )
578
580 """ Return a list of all decendent nodes of the given type. Node type can a
581 two letter code or longer description. e.g. 'fa' or 'family'
582 """
583 if node_type in _nodetype_to_code:
584 node_type = _nodetype_to_code[node_type]
585
586 nodes = [self]
587 if self.scop:
588 return self.scop.getDescendentsFromSQL(self,node_type)
589 while nodes[0].type != node_type:
590 if nodes[0].type == 'px':
591 return []
592 child_list = []
593 for n in nodes:
594 for child in n.getChildren():
595 child_list.append( child )
596 nodes = child_list
597
598 return nodes
599
601 """ Return the ancenstor node of the given type, or None.Node type can a
602 two letter code or longer description. e.g. 'fa' or 'family'"""
603 if node_type in _nodetype_to_code:
604 node_type = _nodetype_to_code[node_type]
605
606 if self.scop:
607 return self.scop.getAscendentFromSQL(self,node_type)
608 else:
609 n = self
610 if n.type == node_type:
611 return None
612
613 while n.type != node_type:
614 if n.type == 'ro':
615 return None
616 n = n.getParent()
617
618 return n
619
620
622 """ A SCOP domain. A leaf node in the Scop hierarchy.
623
624 sid -- The SCOP domain identifier. e.g. 'd5hbib_'
625
626 residues -- A Residue object. It defines the collection
627 of PDB atoms that make up this domain.
628 """
629 - def __init__(self,scop=None):
630 Node.__init__(self,scop=scop)
631 self.sid = ''
632 self.residues = None
633
635 s = []
636 s.append(self.sid)
637 s.append(self.sccs)
638 s.append("("+str(self.residues)+")")
639
640 if not self.getParent():
641 s.append(self.description)
642 else:
643 sp = self.getParent()
644 dm = sp.getParent()
645 s.append(dm.description)
646 s.append("{"+sp.description+"}")
647
648 return " ".join(s)
649
650 - def toDesRecord(self):
651 """Return a Des.Record"""
652 rec = Node.toDesRecord(self)
653 rec.name = self.sid
654 return rec
655
656 - def toClaRecord(self):
657 """Return a Cla.Record"""
658 rec = Cla.Record()
659 rec.sid = self.sid
660 rec.residues = self.residues
661 rec.sccs = self.sccs
662 rec.sunid = self.sunid
663
664 n = self
665 while n.sunid != 0:
666 rec.hierarchy[n.type] = str(n.sunid)
667 n = n.getParent()
668
669
670
671
672
673 return rec
674
675
677 """Abstraction of the ASTRAL database, which has sequences for all the SCOP domains,
678 as well as clusterings by percent id or evalue.
679 """
680
681 - def __init__( self, dir_path=None, version=None, scop=None,
682 astral_file=None, db_handle=None):
683 """
684 Initialise the astral database.
685
686 You must provide either a directory of SCOP files:
687
688 dir_path - string, the path to location of the scopseq-x.xx directory
689 (not the directory itself), and
690 version -a version number.
691
692 or, a FASTA file:
693
694 astral_file - string, a path to a fasta file (which will be loaded in memory)
695
696 or, a MYSQL database:
697
698 db_handle - a database handle for a MYSQL database containing a table
699 'astral' with the astral data in it. This can be created
700 using writeToSQL.
701 """
702
703 if astral_file is None and dir_path is None and db_handle is None:
704 raise RuntimeError("Need either file handle, or (dir_path + "
705 + "version) or database handle to construct Astral")
706 if not scop:
707 raise RuntimeError("Must provide a Scop instance to construct")
708
709 self.scop = scop
710 self.db_handle = db_handle
711
712 if not astral_file and not db_handle:
713 if dir_path is None or version is None:
714 raise RuntimeError("must provide dir_path and version")
715
716 self.version = version
717 self.path = os.path.join( dir_path, "scopseq-%s" % version)
718 astral_file = "astral-scopdom-seqres-all-%s.fa" % self.version
719 astral_file = os.path.join(self.path, astral_file)
720
721 if astral_file:
722
723 self.fasta_dict = SeqIO.to_dict(SeqIO.parse(open(astral_file), "fasta"))
724
725 self.astral_file = astral_file
726 self.EvDatasets = {}
727 self.EvDatahash = {}
728 self.IdDatasets = {}
729 self.IdDatahash = {}
730
732 """get domains clustered by evalue"""
733 if id not in self.EvDatasets:
734 if self.db_handle:
735 self.EvDatasets[id] = self.getAstralDomainsFromSQL(astralEv_to_sql[id])
736
737 else:
738 if not self.path:
739 raise RuntimeError("No scopseq directory specified")
740
741 file_prefix = "astral-scopdom-seqres-sel-gs"
742 filename = "%s-e100m-%s-%s.id" % (file_prefix, astralEv_to_file[id] ,
743 self.version)
744 filename = os.path.join(self.path,filename)
745 self.EvDatasets[id] = self.getAstralDomainsFromFile(filename)
746 return self.EvDatasets[id]
747
749 """get domains clustered by percent id"""
750 if id not in self.IdDatasets:
751 if self.db_handle:
752 self.IdDatasets[id] = self.getAstralDomainsFromSQL("id"+str(id))
753 else:
754 if not self.path:
755 raise RuntimeError("No scopseq directory specified")
756
757 file_prefix = "astral-scopdom-seqres-sel-gs"
758 filename = "%s-bib-%s-%s.id" % (file_prefix, id, self.version)
759 filename = os.path.join(self.path,filename)
760 self.IdDatasets[id] = self.getAstralDomainsFromFile(filename)
761 return self.IdDatasets[id]
762
763 - def getAstralDomainsFromFile(self,filename=None,file_handle=None):
764 """Get the scop domains from a file containing a list of sids"""
765 if file_handle is None and filename is None:
766 raise RuntimeError("You must provide a filename or handle")
767 if not file_handle:
768 file_handle = open(filename)
769 doms = []
770 while 1:
771 line = file_handle.readline()
772 if not line:
773 break
774 line = line.rstrip()
775 doms.append(line)
776 if filename:
777 file_handle.close()
778
779 doms = filter( lambda a: a[0]=='d', doms )
780 doms = map( self.scop.getDomainBySid, doms )
781 return doms
782
783 - def getAstralDomainsFromSQL(self, column):
784 """Load a set of astral domains from a column in the astral table of a MYSQL
785 database (which can be created with writeToSQL(...)"""
786 cur = self.db_handle.cursor()
787 cur.execute("SELECT sid FROM astral WHERE "+column+"=1")
788 data = cur.fetchall()
789 data = map( lambda x: self.scop.getDomainBySid(x[0]), data)
790
791 return data
792
794 """get the seq record of a given domain from its sid"""
795 if self.db_handle is None:
796 return self.fasta_dict[domain].seq
797 else:
798 cur = self.db_handle.cursor()
799 cur.execute("SELECT seq FROM astral WHERE sid=%s", domain)
800 return Seq(cur.fetchone()[0])
801
805
806 - def hashedDomainsById(self,id):
807 """Get domains clustered by sequence identity in a dict"""
808 if id not in self.IdDatahash:
809 self.IdDatahash[id] = {}
810 for d in self.domainsClusteredById(id):
811 self.IdDatahash[id][d] = 1
812 return self.IdDatahash[id]
813
814 - def hashedDomainsByEv(self,id):
815 """Get domains clustered by evalue in a dict"""
816 if id not in self.EvDatahash:
817 self.EvDatahash[id] = {}
818 for d in self.domainsClusteredByEv(id):
819 self.EvDatahash[id][d] = 1
820 return self.EvDatahash[id]
821
822 - def isDomainInId(self,dom,id):
823 """Returns true if the domain is in the astral clusters for percent ID"""
824 return dom in self.hashedDomainsById(id)
825
826 - def isDomainInEv(self,dom,id):
827 """Returns true if the domain is in the ASTRAL clusters for evalues"""
828 return dom in self.hashedDomainsByEv(id)
829
831 """Write the ASTRAL database to a MYSQL database"""
832 cur = db_handle.cursor()
833
834 cur.execute("DROP TABLE IF EXISTS astral")
835 cur.execute("CREATE TABLE astral (sid CHAR(8), seq TEXT, PRIMARY KEY (sid))")
836
837 for dom in self.fasta_dict:
838 cur.execute("INSERT INTO astral (sid,seq) values (%s,%s)",
839 (dom, self.fasta_dict[dom].seq.data))
840
841 for i in astralBibIds:
842 cur.execute("ALTER TABLE astral ADD (id"+str(i)+" TINYINT)")
843
844 for d in self.domainsClusteredById(i):
845 cur.execute("UPDATE astral SET id"+str(i)+"=1 WHERE sid=%s",
846 d.sid)
847
848 for ev in astralEvs:
849 cur.execute("ALTER TABLE astral ADD ("+astralEv_to_sql[ev]+" TINYINT)")
850
851 for d in self.domainsClusteredByEv(ev):
852 cur.execute("UPDATE astral SET "+astralEv_to_sql[ev]+"=1 WHERE sid=%s",
853 d.sid)
854
855
856 -def search(pdb=None, key=None, sid=None, disp=None, dir=None, loc=None,
857 cgi='http://scop.mrc-lmb.cam.ac.uk/scop/search.cgi', **keywds):
858 """search(pdb=None, key=None, sid=None, disp=None, dir=None, loc=None,
859 cgi='http://scop.mrc-lmb.cam.ac.uk/scop/search.cgi', **keywds)
860
861 Access search.cgi and return a handle to the results. See the
862 online help file for an explanation of the parameters:
863 http://scop.mrc-lmb.cam.ac.uk/scop/help.html
864
865 Raises an IOError if there's a network error.
866
867 """
868 params = {'pdb' : pdb, 'key' : key, 'sid' : sid, 'disp' : disp,
869 'dir' : dir, 'loc' : loc}
870 variables = {}
871 for k, v in params.iteritems():
872 if v is not None:
873 variables[k] = v
874 variables.update(keywds)
875 return _open(cgi, variables)
876
877
878 -def _open(cgi, params={}, get=1):
879 """_open(cgi, params={}, get=1) -> UndoHandle
880
881 Open a handle to SCOP. cgi is the URL for the cgi script to access.
882 params is a dictionary with the options to pass to it. get is a boolean
883 that describes whether a GET should be used. Does some
884 simple error checking, and will raise an IOError if it encounters one.
885
886 """
887 import urllib
888 import urllib2
889
890 options = urllib.urlencode(params)
891 try:
892 if get:
893 if options:
894 cgi += "?" + options
895 handle = urllib2.urlopen(cgi)
896 else:
897 handle = urllib2.urlopen(cgi, data=options)
898 except urllib2.HTTPError, exception:
899 raise exception
900 return handle
901