source: TI01-discovery/trunk/ingestAutomation/OAIBatch/Utilities.py @ 3785

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/TI01-discovery/trunk/ingestAutomation/OAIBatch/Utilities.py@3785
Revision 3785, 5.9 KB checked in by selatham, 11 years ago (diff)

gather required utilities. improve elementree imports in keywordAdder. put host etc in arguments for oai_ingest.

Line 
1from collections import deque # python 2.4
2#SEL 08/01/2007
3try: #python 2.5
4    from xml.etree import ElementTree as ET
5except ImportError:
6    try:
7        # if you've installed it yourself it comes this way
8        import elementtree.ElementTree as ET
9    except ImportError:
10        # if you've egged it this is the way it comes
11        from ndgUtils.elementtree import ElementTree as ET
12
13from ETxmlView import *
14import ConfigParser
15import os
16import re
17import urllib
18import logging
19
20__NOCONFIG='Missing Config File'
21
22class myConfig:
23   
24   ''' Handle missing sections and variables in a config file a bit gracefully. Also
25   instantiates a logger if necessary '''
26   
27   def __init__(self,configfile,logName='NDGLOG'):
28       self.config=ConfigParser.ConfigParser()
29       if not os.path.exists(configfile): raise __NOCONFIG, configfile
30       self.config.read(configfile)
31       logfile=self.get('logging','debugLog',None)
32       self.logfile=None #deprecated
33       self.logger=None
34       if logfile is not None:
35           logger=logging.getLogger(logName)
36           handler=logging.FileHandler(logfile)
37           formatter=logging.Formatter('%(asctime)s %(levelname)s %(message)s')
38           handler.setFormatter(formatter)
39           logger.addHandler(handler)
40           logger.setLevel(logging.INFO)
41           self.logger=logger
42       
43   def get(self,section,key,default=None):
44       ''' Return a config file value for key frm section '''
45       try:
46           return self.config.get(section,key)
47       except:
48           return default
49       
50   def log(self,string):
51       ''' Log some debug information '''
52       if self.logger is None: return
53       if string is not None:
54          self.logger.info(string)
55       else:
56          self.logger.info('empty log entry')
57         
58   def getLog(self):
59       return self.logger
60
61class RingBuffer(deque):
62    #deque is a python 2.4 class!
63    #credit http://www.onlamp.com/pub/a/python/excerpt/pythonckbk_chap1/index1.html
64    def __init__(self, size_max):
65        deque.__init__(self)
66        self.size_max = size_max
67    def append(self, datum):
68        deque.append(self, datum)
69        if len(self) > self.size_max:
70            self.popleft( )
71    def tolist(self):
72        return list(self)
73
74def wrapGetText(element,xpathExpression,multiple=0):
75        ''' Wraps a call to ET to get a text object in an error handler '''
76        def none2txt(i):
77            if i is None: return ''
78            return i
79        if element is None:
80            if multiple:
81                 return ['',]
82            else: return ''
83        if multiple:
84                r=element.findall(xpathExpression)
85        else:
86                r=[element.find(xpathExpression),]
87        try:
88                rr=[i.text for i in r]
89        except:
90                rr=['',]
91        rr=map(none2txt,rr) 
92        if multiple: 
93                return rr
94        else: return rr[0] 
95
96def getURLdict(cgiFieldStorage):
97    ''' takes a cgi field storage object and converts it to a dictionary '''
98    result={}
99    for item in cgiFieldStorage:
100            result[item]=cgiFieldStorage[item].value
101    return result
102##
103### convert the followign two methods into one class that can handle
104## xml directly too if necessary
105##
106def DIFid2NDGid(string):
107    ''' takes a dif thing parses it and produces an ET ndg element id ...
108    and use this in dif ... '''
109    s=string.split(':')
110    try:
111        r='''<DIFid><schemeIdentifier>%s</schemeIdentifier>
112         <repositoryIdentifier>%s</repositoryIdentifier>
113         <localIdentifier>%s</localIdentifier></DIFid>'''%(s[1],s[0],s[2])
114        return ET.fromstring(r)
115    except:
116        r='''<DIFid><schemeIdentifier>DIF</schemeIdentifier>
117        <repositoryIdentifier>Unknown</repositoryIdentifier>
118        <localIdentifier>%s</localIdentifier></DIFid>'''%string
119        return ET.fromstring(r)
120
121def EnumerateString(string):
122    ''' Takes a string, and if it's got a number on the end, increments it,
123    otherwise adds a number on the end, used to differentiate strings which
124    would otherwise be identical '''
125    def addNum(matchObj):
126        s=matchObj.group()
127        return str(int(s)+1)
128    r=re.sub('\d+$',addNum,string)
129    if r==string: r=r+'1'
130    return r
131
132def dateParse(string,instruction):
133    ''' Simple date manipulations on a string, if it is understood ...
134       if instruction = YYYY, return the year '''
135    s=string.split('-')
136    if instruction=='YYYY':
137        if len(s)==3: # expecting year,mon,day or day,mon,year ...
138            if int(s[0])>int(s[2]): 
139                return s[0]
140            else:
141                return s[2]
142        else:
143            return string # unknown format as yet ...
144    else:
145        return 'unknown instruction to dateParse %s'%instruction
146
147def idget(xml,dataType='DIF'):
148    ''' Given an xml document (string), parse it using ElementTree and
149    find the identifier within it. Supports dataTypes of 'DIF' ...
150    (actually only DIF for now).
151    '''
152    et=loadET(xml)
153    helper=nsdumb(et)
154    if dataType=='DIF':
155        return helper.getText(et,'Entry_ID')
156    else:
157        raise TypeError,'idget does not support datatype [%s]'%dataType
158
159import unittest
160
161class TestCase(unittest.TestCase):
162    """ Tests as required """
163
164    configFile='examples/example.config'
165    difFile='examples/neodc.eg1.dif'
166   
167    def setUp(self):
168        # If pkg_resources is available assume the module is eggified and
169        # get a stream to the input data from the egg.
170        #try:
171        #    import pkg_resources
172        #    f = pkg_resources.resource_stream(__name__, self.configFile)
173        #except ImportError:
174            # Else take the input file from __file__
175            #import os
176        self.config=myConfig(self.configFile)
177        f=file(self.difFile,'r')
178        self.difxml=f.read()
179            #f=file(os.path.join(os.path.basepath(__file__), self.configFile))
180
181        #self.config=myConfig(f)
182
183    def testConfig(self):
184        print 'Discovery Icon [%s]'%self.config.get('DISCOVERY','icon')
185       
186    def testidget(self):
187        self.assertEqual(idget(self.difxml),'NOCSDAT192')
188   
189
190if __name__=="__main__":
191    unittest.main()
192
193
194
Note: See TracBrowser for help on using the repository browser.