source: TI07-MOLES/trunk/PythonCode/wsgi/ndgRetrieve.py @ 2448

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/TI07-MOLES/trunk/PythonCode/wsgi/ndgRetrieve.py@2448
Revision 2448, 10.2 KB checked in by lawrence, 12 years ago (diff)

Changes to make cleaner logging in discovery/browse ..

Line 
1from Utilities import myConfig
2from paste.request import parse_querystring
3from renderEntity import renderEntity
4import xmlHandler
5from DC import DC
6from DIF import DIF
7from stubB import stubB
8from ndgObject import ndgObject
9from MDIP import MDIP
10from ndgSearch import ndgSearch
11import time
12
13# any class with a get method can be used here, and then only
14# the instantiation within ndgRetrieve would need
15# modification to use it ...
16
17from DocumentRetrieve import DocumentRetrieve
18debug=0
19def retrieveFactory(global_config,**local_conf):
20   
21    ''' This factory is intended to be used by paste-deploy to return
22    the ndgRetrieve wsgi application, configured to use the configDir
23    to find local configuration information '''
24   
25    if 'configDir' in local_conf.keys():
26        configDir=local_conf[configDir]
27    else:
28        try:
29            configDir=global_config['configDir']
30        except:
31            configDir=None
32    ndg=ndgRetrieve(configDir)
33    return ndg
34
35class showDoc:
36    ''' Given an xml document (xml), of format ('DIF','DC','NDG-B1'), return
37    either the document rendered as "pretty" xml for printing, or as html
38    rendered by an appropriate application'''
39   
40    def __init__(self,config):
41        self.config=config
42        self.renderer=renderEntity(self.config)
43       
44    def get(self,xml,format,otype='html',ndgObj=None):
45        if otype=='html':
46            if format=='DIF':
47                r=DIF(xml,ndgObj=ndgObj)
48            elif format=='NDG-B1':
49                r=stubB(xml,self.config)
50            elif format=='DC':
51                r=DC(xml)
52            elif format=='MDIP':
53                r=MDIP(xml)
54            else:
55                raise ValueError,'Unknown format [%s] for rendering'%format
56            r=self.renderer.render(r)
57        elif otype=='print':
58            x=xmlHandler.xmlHandler(xml,string=1)
59            r=x.html
60        elif otype=='xml':
61            return xml
62        else:
63            raise TypeError('Invalid output type [%s]'%otype)
64        return r
65
66class ndgRetrieve:
67   
68    ''' This is the ndgRetrieve application which simply allows one to enter a uri
69    of a document held in a LOCAL exist repository and return one of
70       - the raw document (type=xml),
71       - an html version fully rendered (type=html, which is the default), or
72       - a pretty printed version suitable for printing (type=print)
73    Note that this WSGI application SHOULD be wrapped by an ndgGatekeeper MIDDLEWARE application and
74    not exposed directly to users (except in testing of course :-) '''
75   
76    def __init__(self,configDir):
77       
78        ''' Instantiate the wsgi document retrieval application '''
79        self.configDir=configDir
80        self.config=myConfig(configDir+'ndgDiscovery.config')
81        self.logger=None
82        self.requestor='None'
83
84        self.shower=showDoc(self.config)
85        # nb ... we don't instantiate the retrieval web service, since
86        # we expect to potentially have multiple web services pointing
87        # at multiple databases (eg. BADC + NEODC + NDG ) ...
88        self.retrieveWS=DocumentRetrieve
89        self.WScursors={}
90   
91    def __call__(self,environ,start_response):
92       
93        ''' This is the function which implements the document retrieval '''
94        self.logger=environ['wsgilog.logger']
95        self.requestor=environ['REMOTE_ADDR']
96        browse=0
97        inputs=dict(parse_querystring(environ))
98        if environ['SCRIPT_NAME']=='/browse': browse=1
99       
100        ctype='text/html'   # default content type (since the default
101                            # may be an error return ...) the actual default
102                            # data type for return is xml.
103        ctype,r=self._present(inputs,ctype,browse=browse)
104        #there appears to be a problem that if r is a python string (not unicode)
105        #and then we try and encode it we get in trouble ...
106        start_response('200 OK', [('Content-Type', ctype),('charset','utf-8')])
107        return [r.encode('utf-8')]
108               
109    def _present(self,inputs,ctype,browse=0):
110        ''' This is the entry point for an  inner retrieval loop for document retrieval '''
111        # we seperate this out for ease of building test cases
112        ori,format=0,None
113       
114        if 'original' in inputs.keys(): ori=1
115        try:
116            outputType=inputs['type']
117        except KeyError:
118            outputType='print'
119       
120        if 'uri' not in inputs.keys():
121            r=self.__prompt()
122            raise ValueError,r
123        else:
124            #this will raise a ValueError if there is a problem
125            uri=ndgObject(inputs['uri'])
126            uri.setConfig(self.config)
127            self.uri=uri
128            if 'repository' in inputs.keys(): 
129                db=inputs['repository']
130            else:
131                db=uri.repository
132            if 'format' not in inputs:
133                format=self.uri.schema
134            else:
135                format=inputs['format']
136            if db in self.WScursors.keys():
137                ws=self.WScursors[db]
138            else:
139                if db.upper()=='NDG' or format.upper()=='ORIGINAL' :
140                    ws=ndgSearch()
141                else:
142                    ws=DocumentRetrieve(db)
143                self.WScursors[db]=ws
144            ok=1
145            try:
146                if ori:
147                #    r=ws.getOriginal(uri.uri)
148                    if outputType=='html': outputType='print'  # sanity ...   
149                #else:
150                if browse or format=='NDG-B1':
151                    target='/db/ndg_B_metadata'
152                else:
153                    target='/db/discovery/moles'
154                   
155                #do the actual search:
156                time1=time.time()
157                r=ws.get(uri.repository,uri.schema,uri.localID,format=format,targetCollection=target)
158                time2=time.time()-time1
159                logString='Document retrieve [%s] for [%s] took [%ss]'%(uri.uri,self.requestor,time2)
160               
161                if self.logger:
162                    self.logger.info(logString)
163                else: print logString
164               
165                if isinstance(r,int):
166                     r='<p> There are %s identifiers matching your request! </p>'%r
167                     ok=0
168            except Exception,e:
169                 r='<p> Unable to retrieve [%s], reason was [%s]</p>'%(uri.uri,e)
170                 ok=0
171       
172        if not ok: return ctype,r
173       
174        if outputType!='xml': 
175            if debug:
176                r=self.shower.get(r,format,otype=outputType,ndgObj=self.uri)
177            else:
178                try:
179                    r=self.shower.get(r,format,otype=outputType,ndgObj=self.uri)
180                except Exception,e:
181                    r='<p> Unable to show document, reason was [%s]<p>'%e
182        else:
183            ctype='text/xml'
184        return ctype,r
185   
186   
187    def __prompt(self):
188        ''' If the URI is not an argument to the request, prompt the user for a URI '''
189        h='''
190        <form method='get' action=''>
191        <p>
192          Enter a Document Identifier to View: 
193            <input type='text' name='uri'/><br/>
194            (uri format: e.g. badc.nerc.ac.uk:DIF:xyz39) <br/>
195            (Querying <input type='text' name='repository' value='gepidae.esc.rl.ac.uk'/>)<br/>
196            Return: <input type="radio" name="type" value="xml"/> XML Raw
197                <input type="radio" name="type" value="html"/> Formatted
198                <input type="radio" name="type" value="print"/> XML Printable
199          <input type='submit'/>
200        </p>
201        </form>
202        '''
203        return h
204   
205import unittest
206#testURI='noc.soton.ac.uk__DIF__NOCSDAT193'
207#testURI='ndg.noc.soton.ac.uk__NDG-B0__NOCSDAT274'
208#testURI='grid.bodc.nerc.ac.uk__DIF__EDMED1048034'
209#testURI='grid.bodc.nerc.ac.uk__DIF__EDMED1048008'
210testURI='bgs.nerc.ac.uk__DIF__GEOP_EAST'
211testURI='neodc.nerc.ac.uk__DIF__NEODC_NEXTMAP'
212retriever=ndgRetrieve('./')
213class TestCase(unittest.TestCase):
214       
215    def AtestOriginal(self):
216        ''' Tests the ability to obtain an original document via xmlrpc '''
217        inputs={'repository':'glue.badc.rl.ac.uk','uri':testURI,'format':'original','type':'html','original':'1'}
218        ctype,r=retriever._present(inputs,'text/html')
219        if r[0:3]=='<p>':raise ValueError,r
220    def AtestSOAPOriginal(self):
221        ''' Tests the ability to obtain an original document via soap '''
222        inputs={'repository':'ndg','uri':testURI,'format':'original','type':'html','original':'1'}
223        ctype,r=retriever._present(inputs,'text/html')
224        if r[0:3]=='<p>':raise ValueError,r
225    def AtestDIF(self):
226        ''' Tests the ability to obtain a DIF document '''
227        inputs={'repository':'glue.badc.rl.ac.uk','uri':testURI,'format':'DIF','type':'html'}
228        ctype,r=retriever._present(inputs,'text/html')
229        if r[0:3]=='<p>':raise ValueError,r
230    def AtestMDIP(self):
231        ''' Tests the ability to obtain an MDIP document '''
232        inputs={'repository':'glue.badc.rl.ac.uk','uri':testURI,'format':'MDIP','type':'html'}
233        ctype,r=retriever._present(inputs,'text/html')
234        if r[0:3]=='<p>':raise ValueError,r
235    def AtestISO(self):
236        ''' Tests the ability to obtain an ISO document '''
237        inputs={'repository':'glue.badc.rl.ac.uk','uri':testURI,'format':'ISO19139','type':'html'}
238        ctype,r=retriever._present(inputs,'text/html')
239        if r[0:3]=='<p>':raise ValueError,r
240    def AtestDC(self):
241        ''' Tests the ability to obtain a DC document '''
242        inputs={'repository':'glue.badc.rl.ac.uk','uri':testURI,'format':'DC','type':'html'}
243        ctype,r=retriever._present(inputs,'text/html')
244        if r[0:3]=='<p>':raise ValueError,r
245    def testCh(self):
246        ''' test link to local repository for NDG-B0'''
247        testURI='badc.nerc.ac.uk__NDG-B0__dataent_chablis'
248        inputs={'repository':'chinook.badc.rl.ac.uk','uri':testURI,'format':'NDG-B0','type':'xml'}
249        ctype,r=retriever._present(inputs,'text/html',browse=1)
250        if r[0:3]=='<p>':raise ValueError,r
251       
252    def testCHstub(self):
253        ''' test link to local repository for NDG-B0'''
254        testURI='badc.nerc.ac.uk__NDG-B0__dataent_chablis'
255        testURI='badc.nerc.ac.uk__NDG-B0__activity_activity_faam'
256        inputs={'repository':'chinook.badc.rl.ac.uk','uri':testURI,'format':'NDG-B1','type':'xml'}
257        ctype,r=retriever._present(inputs,'text/html',browse=1)
258        if r[0:3]=='<p>':raise ValueError,r
259       
260if __name__=="__main__":
261    unittest.main()
262   
Note: See TracBrowser for help on using the repository browser.