Package Bio :: Package Medline
[hide private]
[frames] | no frames]

Source Code for Package Bio.Medline

  1  # Copyright 1999 by Jeffrey Chang.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  """Code to work with Medline from the NCBI. 
  7   
  8  Classes: 
  9   - Record           A dictionary holding Medline data. 
 10   
 11  Functions: 
 12   - read             Reads one Medline record 
 13   - parse            Allows you to iterate over a bunch of Medline records 
 14  """ 
 15   
 16  __docformat__ = "restructuredtext en" 
 17   
 18   
19 -class Record(dict):
20 """A dictionary holding information from a Medline record. 21 22 All data are stored under the mnemonic appearing in the Medline 23 file. These mnemonics have the following interpretations: 24 25 ========= ============================== 26 Mnemonic Description 27 --------- ------------------------------ 28 AB Abstract 29 CI Copyright Information 30 AD Affiliation 31 IRAD Investigator Affiliation 32 AID Article Identifier 33 AU Author 34 FAU Full Author 35 CN Corporate Author 36 DCOM Date Completed 37 DA Date Created 38 LR Date Last Revised 39 DEP Date of Electronic Publication 40 DP Date of Publication 41 EDAT Entrez Date 42 GS Gene Symbol 43 GN General Note 44 GR Grant Number 45 IR Investigator Name 46 FIR Full Investigator Name 47 IS ISSN 48 IP Issue 49 TA Journal Title Abbreviation 50 JT Journal Title 51 LA Language 52 LID Location Identifier 53 MID Manuscript Identifier 54 MHDA MeSH Date 55 MH MeSH Terms 56 JID NLM Unique ID 57 RF Number of References 58 OAB Other Abstract 59 OCI Other Copyright Information 60 OID Other ID 61 OT Other Term 62 OTO Other Term Owner 63 OWN Owner 64 PG Pagination 65 PS Personal Name as Subject 66 FPS Full Personal Name as Subject 67 PL Place of Publication 68 PHST Publication History Status 69 PST Publication Status 70 PT Publication Type 71 PUBM Publishing Model 72 PMC PubMed Central Identifier 73 PMID PubMed Unique Identifier 74 RN Registry Number/EC Number 75 NM Substance Name 76 SI Secondary Source ID 77 SO Source 78 SFM Space Flight Mission 79 STAT Status 80 SB Subset 81 TI Title 82 TT Transliterated Title 83 VI Volume 84 CON Comment on 85 CIN Comment in 86 EIN Erratum in 87 EFR Erratum for 88 CRI Corrected and Republished in 89 CRF Corrected and Republished from 90 PRIN Partial retraction in 91 PROF Partial retraction of 92 RPI Republished in 93 RPF Republished from 94 RIN Retraction in 95 ROF Retraction of 96 UIN Update in 97 UOF Update of 98 SPIN Summary for patients in 99 ORI Original report in 100 ========= ============================== 101 """
102 103
104 -def parse(handle):
105 """Read Medline records one by one from the handle. 106 107 The handle is either is a Medline file, a file-like object, or a list 108 of lines describing one or more Medline records. 109 110 Typical usage:: 111 112 from Bio import Medline 113 with open("mymedlinefile") as handle: 114 records = Medline.parse(handle) 115 for record in record: 116 print(record['TI']) 117 118 """ 119 # TODO - Turn that into a working doctest 120 # These keys point to string values 121 textkeys = ("ID", "PMID", "SO", "RF", "NI", "JC", "TA", "IS", "CY", "TT", 122 "CA", "IP", "VI", "DP", "YR", "PG", "LID", "DA", "LR", "OWN", 123 "STAT", "DCOM", "PUBM", "DEP", "PL", "JID", "SB", "PMC", 124 "EDAT", "MHDA", "PST", "AB", "AD", "EA", "TI", "JT") 125 handle = iter(handle) 126 127 key = "" 128 record = Record() 129 for line in handle: 130 line = line.rstrip() 131 if line[:6] == " ": # continuation line 132 if key == "MH": 133 # Multi-line MESH term, want to append to last entry in list 134 record[key][-1] += line[5:] # including space using line[5:] 135 else: 136 record[key].append(line[6:]) 137 elif line: 138 key = line[:4].rstrip() 139 if key not in record: 140 record[key] = [] 141 record[key].append(line[6:]) 142 elif record: 143 # Join each list of strings into one string. 144 for key in record: 145 if key in textkeys: 146 record[key] = " ".join(record[key]) 147 yield record 148 record = Record() 149 if record: # catch last one 150 for key in record: 151 if key in textkeys: 152 record[key] = " ".join(record[key]) 153 yield record
154 155
156 -def read(handle):
157 """Read a single Medline record from the handle. 158 159 The handle is either is a Medline file, a file-like object, or a list 160 of lines describing a Medline record. 161 162 Typical usage: 163 164 >>> from Bio import Medline 165 >>> with open("mymedlinefile") as handle: 166 ... record = Medline.read(handle) 167 ... print(record['TI']) 168 169 """ 170 # TODO - Turn that into a working doctest 171 records = parse(handle) 172 return next(records)
173