Package rdkit :: Package Dbase :: Package Pubmed :: Module Records
[hide private]
[frames] | no frames]

Source Code for Module rdkit.Dbase.Pubmed.Records

  1  # $Id$ 
  2  # 
  3  # Copyright (C) 2003-2006 Rational Discovery LLC 
  4  # 
  5  #   @@ All Rights Reserved @@ 
  6  #  This file is part of the RDKit. 
  7  #  The contents are covered by the terms of the BSD license 
  8  #  which is included in the file license.txt, found at the root 
  9  #  of the RDKit source tree. 
 10  # 
 11  from xml.etree import ElementTree 
 12  # check the version of ElementTree.  We need at least version 1.2 
 13  # in order for the XPath-style parsing stuff to work 
 14  import re 
 15  vers = re.split("[a-zA-Z]",ElementTree.VERSION)[0] 
 16  if vers < '1.2': 
 17    raise ImportError('The PubMed record interface requires a version of ElementTree >= 1.2') 
 18   
 19   
20 -class Record(object):
21 - def __init__(self,element):
22 for field in self._fieldsOfInterest: 23 setattr(self,field,'') 24 self._element = element
25 - def toXML(self):
26 from io import StringIO 27 sio = StringIO() 28 ElementTree.ElementTree(self._element).write(sio) 29 return sio.getvalue()
30
31 -class SummaryRecord(Record):
32 _fieldsOfInterest=['PubMedId','PubDate','Source','Authors', 33 'Title','Volume','Issue','Pages','Lang', 34 'HasAbstract','RecordStatus']
35 - def __init__(self,element):
36 Record.__init__(self,element) 37 for item in element.getiterator('Item'): 38 if item.attrib['Name'] in self._fieldsOfInterest: 39 setattr(self,item.attrib['Name'],item.text) 40 if self.PubDate: 41 self.PubYear = str(self.PubDate).split(' ')[0]
42
43 -class JournalArticleRecord(Record):
44 _fieldsOfInterest=['PubMedId','PubYear','Source','Authors', 45 'Title','Volume','Issue','Pages','Lang', 46 'Abstract']
47 - def __init__(self,element):
48 Record.__init__(self,element) 49 50 cite = self._element.find('MedlineCitation') 51 self.PubMedId = cite.findtext('PMID') 52 article = cite.find('Article') 53 issue = article.find('Journal/JournalIssue') 54 self.Volume = issue.findtext('Volume') 55 self.Issue = issue.findtext('Issue') 56 self.PubYear = issue.findtext('PubDate/Year') 57 if not self.PubYear: 58 txt = issue.findtext('PubDate/MedlineDate') 59 self.PubYear = txt.split(' ')[0] 60 self.Title = unicode(article.findtext('ArticleTitle')) 61 self.Pages = article.findtext('Pagination/MedlinePgn') 62 abs = article.findtext('Abstract/AbstractText') 63 if abs: 64 self.Abstract = unicode(abs) 65 66 self.authors = [] 67 tmp = [] 68 for author in article.find('AuthorList').getiterator('Author'): 69 last = unicode(author.findtext('LastName')) 70 first = unicode(author.findtext('ForeName')) 71 initials = unicode(author.findtext('Initials')) 72 self.authors.append((last,first,initials)) 73 tmp.append('%s %s'%(last,initials)) 74 self.Authors=', '.join(tmp) 75 journal = cite.findtext('MedlineJournalInfo/MedlineTA') 76 if journal: 77 self.Source = unicode(journal) 78 79 self.ParseKeywords() 80 self.ParseChemicals()
81
82 - def ParseKeywords(self):
83 self.keywords = [] 84 headings = self.find('MedlineCitation/MeshHeadingList') 85 if headings: 86 for heading in headings.getiterator('MeshHeading'): 87 kw = unicode(heading.findtext('DescriptorName')) 88 for qualifier in heading.getiterator('QualifierName'): 89 kw += ' / %s'%(unicode(qualifier.text)) 90 self.keywords.append(kw)
91
92 - def ParseChemicals(self):
93 self.chemicals = [] 94 chemicals = self.find('MedlineCitation/ChemicalList') 95 if chemicals: 96 for chemical in chemicals.getiterator('Chemical'): 97 name = chemical.findtext('NameOfSubstance').encode('utf-8') 98 rn = chemical.findtext('RegistryNumber').encode('utf-8') 99 if rn != '0': 100 self.chemicals.append('%s <%s>'%(name,rn)) 101 else: 102 self.chemicals.append('%s'%(name))
103 104 105 # -------------------------------------------- 106 # 107 # We'll expose these ElementTree methods in case 108 # client code wants to pull extra info 109 #
110 - def getiterator(self,key=None):
111 if key is not None: 112 return self._element.getiterator(key) 113 else: 114 return self._element.getiterator()
115 - def find(self,key):
116 return self._element.find(key)
117 - def findtext(self,key):
118 return self._element.findtext(key)
119 - def findall(self,key):
120 return self._element.findall(key)
121
122 -class LinkRecord(Record):
123 _fieldsOfInterest=[]
124 - def __init__(self,element):
125 Record.__init__(self,element) 126 self.PubMedId = self._element.text 127 nbr = self._element.get('HasNeighbor','N') 128 if nbr == 'Y': 129 self.HasNeighbor = 1 130 else: 131 self.HasNeighbor = 0
132