source: TI01-discovery/branches/ingestAutomation-upgrade/OAIBatch/Utilities.py @ 4027

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/TI01-discovery/branches/ingestAutomation-upgrade/OAIBatch/Utilities.py@4027
Revision 4027, 5.8 KB checked in by cbyrom, 11 years ago (diff)

Add setup script for oai ingester + remove dependencies in this project
on local ndgUtils and csml project files - and remove these from this
codebase, subsequently.

Line 
1from collections import deque # python 2.4
2try: #python 2.5
3    from xml.etree import ElementTree as ET
4except:
5    #ActivePython-2.4                      #SEL 08/01/2007
6    import elementtree.ElementTree as ET   #SEL 08/01/2007
7       
8from ndgUtils.ETxmlView import *
9import ConfigParser
10import os
11import re
12import urllib
13import logging
14
15__NOCONFIG='Missing Config File'
16
17class myConfig:
18   
19   ''' Handle missing sections and variables in a config file a bit gracefully. Also
20   instantiates a logger if necessary '''
21   
22   def __init__(self,configfile,logName='NDGLOG'):
23       self.config=ConfigParser.ConfigParser()
24       if not os.path.exists(configfile): raise __NOCONFIG, configfile
25       self.config.read(configfile)
26       logfile=self.get('logging','debugLog',None)
27       self.logfile=None #deprecated
28       self.logger=None
29       if logfile is not None:
30           logger=logging.getLogger(logName)
31           handler=logging.FileHandler(logfile)
32           formatter=logging.Formatter('%(asctime)s %(levelname)s %(message)s')
33           handler.setFormatter(formatter)
34           logger.addHandler(handler)
35           logger.setLevel(logging.INFO)
36           self.logger=logger
37       
38   def get(self,section,key,default=None):
39       ''' Return a config file value for key frm section '''
40       try:
41           return self.config.get(section,key)
42       except:
43           return default
44       
45   def log(self,string):
46       ''' Log some debug information '''
47       if self.logger is None: return
48       if string is not None:
49          self.logger.info(string)
50       else:
51          self.logger.info('empty log entry')
52         
53   def getLog(self):
54       return self.logger
55
56class RingBuffer(deque):
57    #deque is a python 2.4 class!
58    #credit http://www.onlamp.com/pub/a/python/excerpt/pythonckbk_chap1/index1.html
59    def __init__(self, size_max):
60        deque.__init__(self)
61        self.size_max = size_max
62    def append(self, datum):
63        deque.append(self, datum)
64        if len(self) > self.size_max:
65            self.popleft( )
66    def tolist(self):
67        return list(self)
68
69def wrapGetText(element,xpathExpression,multiple=0):
70        ''' Wraps a call to ET to get a text object in an error handler '''
71        def none2txt(i):
72            if i is None: return ''
73            return i
74        if element is None:
75            if multiple:
76                 return ['',]
77            else: return ''
78        if multiple:
79                r=element.findall(xpathExpression)
80        else:
81                r=[element.find(xpathExpression),]
82        try:
83                rr=[i.text for i in r]
84        except:
85                rr=['',]
86        rr=map(none2txt,rr) 
87        if multiple: 
88                return rr
89        else: return rr[0] 
90
91def getURLdict(cgiFieldStorage):
92    ''' takes a cgi field storage object and converts it to a dictionary '''
93    result={}
94    for item in cgiFieldStorage:
95            result[item]=cgiFieldStorage[item].value
96    return result
97##
98### convert the followign two methods into one class that can handle
99## xml directly too if necessary
100##
101def DIFid2NDGid(string):
102    ''' takes a dif thing parses it and produces an ET ndg element id ...
103    and use this in dif ... '''
104    s=string.split(':')
105    try:
106        r='''<DIFid><schemeIdentifier>%s</schemeIdentifier>
107         <repositoryIdentifier>%s</repositoryIdentifier>
108         <localIdentifier>%s</localIdentifier></DIFid>'''%(s[1],s[0],s[2])
109        return ET.fromstring(r)
110    except:
111        r='''<DIFid><schemeIdentifier>DIF</schemeIdentifier>
112        <repositoryIdentifier>Unknown</repositoryIdentifier>
113        <localIdentifier>%s</localIdentifier></DIFid>'''%string
114        return ET.fromstring(r)
115
116def EnumerateString(string):
117    ''' Takes a string, and if it's got a number on the end, increments it,
118    otherwise adds a number on the end, used to differentiate strings which
119    would otherwise be identical '''
120    def addNum(matchObj):
121        s=matchObj.group()
122        return str(int(s)+1)
123    r=re.sub('\d+$',addNum,string)
124    if r==string: r=r+'1'
125    return r
126
127def dateParse(string,instruction):
128    ''' Simple date manipulations on a string, if it is understood ...
129       if instruction = YYYY, return the year '''
130    s=string.split('-')
131    if instruction=='YYYY':
132        if len(s)==3: # expecting year,mon,day or day,mon,year ...
133            if int(s[0])>int(s[2]): 
134                return s[0]
135            else:
136                return s[2]
137        else:
138            return string # unknown format as yet ...
139    else:
140        return 'unknown instruction to dateParse %s'%instruction
141
142def idget(xml,dataType='DIF'):
143    ''' Given an xml document (string), parse it using ElementTree and
144    find the identifier within it. Supports dataTypes of 'DIF' ...
145    (actually only DIF for now).
146    '''
147    et=loadET(xml)
148    helper=nsdumb(et)
149    if dataType=='DIF':
150        return helper.getText(et,'Entry_ID')
151    else:
152        raise TypeError,'idget does not support datatype [%s]'%dataType
153
154import unittest
155
156class TestCase(unittest.TestCase):
157    """ Tests as required """
158
159    configFile='examples/example.config'
160    difFile='examples/neodc.eg1.dif'
161   
162    def setUp(self):
163        # If pkg_resources is available assume the module is eggified and
164        # get a stream to the input data from the egg.
165        #try:
166        #    import pkg_resources
167        #    f = pkg_resources.resource_stream(__name__, self.configFile)
168        #except ImportError:
169            # Else take the input file from __file__
170            #import os
171        self.config=myConfig(self.configFile)
172        f=file(self.difFile,'r')
173        self.difxml=f.read()
174            #f=file(os.path.join(os.path.basepath(__file__), self.configFile))
175
176        #self.config=myConfig(f)
177
178    def testConfig(self):
179        print 'Discovery Icon [%s]'%self.config.get('DISCOVERY','icon')
180       
181    def testidget(self):
182        self.assertEqual(idget(self.difxml),'NOCSDAT192')
183   
184
185if __name__=="__main__":
186    unittest.main()
187
188
189
Note: See TracBrowser for help on using the repository browser.