source: TI01-discovery/branches/ingestAutomation-upgrade/OAIBatch/Utilities.py @ 4258

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/TI01-discovery/branches/ingestAutomation-upgrade/OAIBatch/Utilities.py@4258
Revision 4258, 5.8 KB checked in by cbyrom, 11 years ago (diff)

Extend idget function to cope with MDIP records and implement usage of this

  • to remove the need for the DIF and MDIP models - delete these from the codebase.
Line 
1from collections import deque # python 2.4
2try: #python 2.5
3    from xml.etree import ElementTree as ET
4except:
5    #ActivePython-2.4                      #SEL 08/01/2007
6    import elementtree.ElementTree as ET   #SEL 08/01/2007
7       
8from ndgUtils.ETxmlView import *
9import ConfigParser
10import os
11import re
12import urllib
13import logging
14
15__NOCONFIG='Missing Config File'
16
17class myConfig:
18   
19   ''' Handle missing sections and variables in a config file a bit gracefully. Also
20   instantiates a logger if necessary '''
21   
22   def __init__(self,configfile,logName='NDGLOG'):
23       self.config=ConfigParser.ConfigParser()
24       if not os.path.exists(configfile): raise __NOCONFIG, configfile
25       self.config.read(configfile)
26       logfile=self.get('logging','debugLog',None)
27       self.logfile=None #deprecated
28       self.logger=None
29       if logfile is not None:
30           logger=logging.getLogger(logName)
31           handler=logging.FileHandler(logfile)
32           formatter=logging.Formatter('%(asctime)s %(levelname)s %(message)s')
33           handler.setFormatter(formatter)
34           logger.addHandler(handler)
35           logger.setLevel(logging.INFO)
36           self.logger=logger
37       
38   def get(self,section,key,default=None):
39       ''' Return a config file value for key frm section '''
40       try:
41           return self.config.get(section,key)
42       except:
43           return default
44       
45   def log(self,string):
46       ''' Log some debug information '''
47       if self.logger is None: return
48       if string is not None:
49          self.logger.info(string)
50       else:
51          self.logger.info('empty log entry')
52         
53   def getLog(self):
54       return self.logger
55
56class RingBuffer(deque):
57    #deque is a python 2.4 class!
58    #credit http://www.onlamp.com/pub/a/python/excerpt/pythonckbk_chap1/index1.html
59    def __init__(self, size_max):
60        deque.__init__(self)
61        self.size_max = size_max
62    def append(self, datum):
63        deque.append(self, datum)
64        if len(self) > self.size_max:
65            self.popleft( )
66    def tolist(self):
67        return list(self)
68
69def wrapGetText(element,xpathExpression,multiple=0):
70        ''' Wraps a call to ET to get a text object in an error handler '''
71        def none2txt(i):
72            if i is None: return ''
73            return i
74        if element is None:
75            if multiple:
76                 return ['',]
77            else: return ''
78        if multiple:
79                r=element.findall(xpathExpression)
80        else:
81                r=[element.find(xpathExpression),]
82        try:
83                rr=[i.text for i in r]
84        except:
85                rr=['',]
86        rr=map(none2txt,rr) 
87        if multiple: 
88                return rr
89        else: return rr[0] 
90
91def getURLdict(cgiFieldStorage):
92    ''' takes a cgi field storage object and converts it to a dictionary '''
93    result={}
94    for item in cgiFieldStorage:
95            result[item]=cgiFieldStorage[item].value
96    return result
97##
98### convert the followign two methods into one class that can handle
99## xml directly too if necessary
100##
101def DIFid2NDGid(string):
102    ''' takes a dif thing parses it and produces an ET ndg element id ...
103    and use this in dif ... '''
104    s=string.split(':')
105    try:
106        r='''<DIFid><schemeIdentifier>%s</schemeIdentifier>
107         <repositoryIdentifier>%s</repositoryIdentifier>
108         <localIdentifier>%s</localIdentifier></DIFid>'''%(s[1],s[0],s[2])
109        return ET.fromstring(r)
110    except:
111        r='''<DIFid><schemeIdentifier>DIF</schemeIdentifier>
112        <repositoryIdentifier>Unknown</repositoryIdentifier>
113        <localIdentifier>%s</localIdentifier></DIFid>'''%string
114        return ET.fromstring(r)
115
116def EnumerateString(string):
117    ''' Takes a string, and if it's got a number on the end, increments it,
118    otherwise adds a number on the end, used to differentiate strings which
119    would otherwise be identical '''
120    def addNum(matchObj):
121        s=matchObj.group()
122        return str(int(s)+1)
123    r=re.sub('\d+$',addNum,string)
124    if r==string: r=r+'1'
125    return r
126
127def dateParse(string,instruction):
128    ''' Simple date manipulations on a string, if it is understood ...
129       if instruction = YYYY, return the year '''
130    s=string.split('-')
131    if instruction=='YYYY':
132        if len(s)==3: # expecting year,mon,day or day,mon,year ...
133            if int(s[0])>int(s[2]): 
134                return s[0]
135            else:
136                return s[2]
137        else:
138            return string # unknown format as yet ...
139    else:
140        return 'unknown instruction to dateParse %s'%instruction
141
142def idget(xml,dataType='DIF'):
143    ''' Given an xml document (string), parse it using ElementTree and
144    find the identifier within it. Supports dataTypes of 'DIF' and 'MDIP'...
145    '''
146    et=loadET(xml)
147    helper=nsdumb(et)
148    if dataType=='DIF':
149        return helper.getText(et,'Entry_ID')
150    elif dataType == 'MDIP':
151        return helper.getText(self.tree,'DatasetIdentifier')
152    else:
153        raise TypeError,'idget does not support datatype [%s]'%dataType
154
155import unittest
156
157class TestCase(unittest.TestCase):
158    """ Tests as required """
159
160    configFile='examples/example.config'
161    difFile='examples/neodc.eg1.dif'
162   
163    def setUp(self):
164        # If pkg_resources is available assume the module is eggified and
165        # get a stream to the input data from the egg.
166        #try:
167        #    import pkg_resources
168        #    f = pkg_resources.resource_stream(__name__, self.configFile)
169        #except ImportError:
170            # Else take the input file from __file__
171            #import os
172        self.config=myConfig(self.configFile)
173        f=file(self.difFile,'r')
174        self.difxml=f.read()
175            #f=file(os.path.join(os.path.basepath(__file__), self.configFile))
176
177        #self.config=myConfig(f)
178
179    def testConfig(self):
180        print 'Discovery Icon [%s]'%self.config.get('DISCOVERY','icon')
181       
182    def testidget(self):
183        self.assertEqual(idget(self.difxml),'NOCSDAT192')
184   
185
186if __name__=="__main__":
187    unittest.main()
188
189
190
Note: See TracBrowser for help on using the repository browser.