source: TI07-MOLES/trunk/PythonCode/wsgi/ndgRetrieve.py @ 2522

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/TI07-MOLES/trunk/PythonCode/wsgi/ndgRetrieve.py@2522
Revision 2522, 10.8 KB checked in by lawrence, 12 years ago (diff)

Sundry minor improvements in layout etc.

Line 
1from Utilities import myConfig
2from paste.request import parse_querystring
3from renderEntity import renderEntity
4import xmlHandler
5from DC import DC
6from DIF import DIF
7from stubB import stubB
8from ndgObject import ndgObject
9from MDIP import MDIP
10from ndgSearch import ndgSearch
11import time
12import cgi
13
14# any class with a get method can be used here, and then only
15# the instantiation within ndgRetrieve would need
16# modification to use it ...
17
18from DocumentRetrieve import DocumentRetrieve
19debug=0
20def retrieveFactory(global_config,**local_conf):
21   
22    ''' This factory is intended to be used by paste-deploy to return
23    the ndgRetrieve wsgi application, configured to use the configDir
24    to find local configuration information '''
25   
26    if 'configDir' in local_conf.keys():
27        configDir=local_conf[configDir]
28    else:
29        try:
30            configDir=global_config['configDir']
31        except:
32            configDir=None
33    ndg=ndgRetrieve(configDir)
34    return ndg
35
36class showDoc:
37    ''' Given an xml document (xml), of format ('DIF','DC','NDG-B1'), return
38    either the document rendered as "pretty" xml for printing, or as html
39    rendered by an appropriate application'''
40   
41    def __init__(self,config):
42        self.config=config
43        self.renderer=renderEntity(self.config)
44       
45    def get(self,xml,format,otype='html',ndgObj=None):
46        if otype=='html':
47            if format=='DIF':
48                r=DIF(xml,ndgObj=ndgObj)
49            elif format=='NDG-B1':
50                r=stubB(xml,self.config)
51            elif format=='DC':
52                r=DC(xml)
53            elif format=='MDIP':
54                r=MDIP(xml)
55            else:
56                raise ValueError,'Unknown format [%s] for rendering'%format
57            r=self.renderer.render(r)
58        elif otype=='print':
59            x=xmlHandler.xmlHandler(xml,string=1)
60            r=x.html
61        elif otype=='xml':
62            return xml
63        else:
64            raise TypeError('Invalid output type [%s]'%otype)
65        return r
66
67class ndgRetrieve:
68   
69    ''' This is the ndgRetrieve application which simply allows one to enter a uri
70    of a document held in a LOCAL exist repository and return one of
71       - the raw document (type=xml),
72       - an html version fully rendered (type=html, which is the default), or
73       - a pretty printed version suitable for printing (type=print)
74    Note that this WSGI application SHOULD be wrapped by an ndgGatekeeper MIDDLEWARE application and
75    not exposed directly to users (except in testing of course :-) '''
76   
77    def __init__(self,configDir):
78       
79        ''' Instantiate the wsgi document retrieval application '''
80        self.configDir=configDir
81        self.config=myConfig(configDir+'ndgDiscovery.config')
82        self.logger=None
83        self.requestor='None'
84
85        self.shower=showDoc(self.config)
86        # nb ... we don't instantiate the retrieval web service, since
87        # we expect to potentially have multiple web services pointing
88        # at multiple databases (eg. BADC + NEODC + NDG ) ...
89        self.retrieveWS=DocumentRetrieve
90        self.WScursors={}
91   
92    def __call__(self,environ,start_response):
93       
94        ''' This is the function which implements the document retrieval '''
95        self.logger=environ['wsgilog.logger']
96        self.requestor=environ['REMOTE_ADDR']
97        browse=0
98        inputs=dict(parse_querystring(environ))
99        if environ['SCRIPT_NAME']=='/browse': browse=1
100       
101        ctype='text/html'   # default content type (since the default
102                            # may be an error return ...) the actual default
103                            # data type for return is xml.
104        ctype,r=self._present(inputs,ctype,browse=browse)
105        #there appears to be a problem that if r is a python string (not unicode)
106        #and then we try and encode it we get in trouble ...
107        start_response('200 OK', [('Content-Type', ctype),('charset','utf-8')])
108        return [r.encode('utf-8')]
109               
110    def _present(self,inputs,ctype,browse=0):
111        ''' This is the entry point for an  inner retrieval loop for document retrieval '''
112        # we seperate this out for ease of building test cases
113        ori,format=0,None
114       
115        if 'original' in inputs.keys(): ori=1
116        try:
117            outputType=inputs['type']
118        except KeyError:
119            outputType='print'
120       
121        if 'uri' not in inputs.keys():
122            r=self.__prompt()
123            raise ValueError,r
124        else:
125            #this will raise a ValueError if there is a problem
126            uri=ndgObject(inputs['uri'])
127            uri.setConfig(self.config)
128            self.uri=uri
129            if 'repository' in inputs.keys(): 
130                db=inputs['repository']
131            else:
132                db=uri.repository
133            if 'format' not in inputs:
134                format=self.uri.schema
135            else:
136                format=inputs['format']
137            if db in self.WScursors.keys():
138                ws=self.WScursors[db]
139            else:
140                if db.upper()=='NDG' or format.upper()=='ORIGINAL' :
141                    ws=ndgSearch()
142                else:
143                    ws=DocumentRetrieve(db)
144                self.WScursors[db]=ws
145            ok=1
146            try:
147                if ori:
148                #    r=ws.getOriginal(uri.uri)
149                    if outputType=='html': outputType='print'  # sanity ...   
150                #else:
151                if browse or format=='NDG-B1':
152                    target='/db/ndg_B_metadata'
153                else:
154                    target='/db/discovery/moles'
155                   
156                #do the actual search:
157                time1=time.time()
158                r=ws.get(uri.repository,uri.schema,uri.localID,format=format,targetCollection=target)
159                time2=time.time()-time1
160                logString='Document retrieve [%s] for [%s] took [%ss]'%(uri.uri,self.requestor,time2)
161               
162                if self.logger:
163                    self.logger.info(logString)
164                else: print logString
165               
166                if isinstance(r,int):
167                     r='<p> There are %s identifiers matching your request! </p>'%r
168                     ok=0
169            except Exception,e:
170                 r='<p> Unable to retrieve [%s], reason was [%s]</p>'%(uri.uri,cgi.escape(str(e)))
171                 ok=0
172       
173        if not ok: return ctype,r
174       
175        if outputType!='xml': 
176            if debug:
177                r=self.shower.get(r,format,otype=outputType,ndgObj=self.uri)
178            else:
179                try:
180                    self.logger.info('Doc--\n%s\n--\n'%r)
181                    r=self.shower.get(r,format,otype=outputType,ndgObj=self.uri)
182                except Exception,e:
183                    r='<p> Unable to show document, reason was [%s]<p>'%cgi.escape(str(e))
184        else:
185            ctype='text/xml'
186        return ctype,r
187   
188   
189    def __prompt(self):
190        ''' If the URI is not an argument to the request, prompt the user for a URI '''
191        h='''
192        <form method='get' action=''>
193        <p>
194          Enter a Document Identifier to View: 
195            <input type='text' name='uri'/><br/>
196            (uri format: e.g. badc.nerc.ac.uk:DIF:xyz39) <br/>
197            (Querying <input type='text' name='repository' value='gepidae.esc.rl.ac.uk'/>)<br/>
198            Return: <input type="radio" name="type" value="xml"/> XML Raw
199                <input type="radio" name="type" value="html"/> Formatted
200                <input type="radio" name="type" value="print"/> XML Printable
201          <input type='submit'/>
202        </p>
203        </form>
204        '''
205        return h
206   
207import unittest
208#testURI='noc.soton.ac.uk__DIF__NOCSDAT193'
209#testURI='ndg.noc.soton.ac.uk__NDG-B0__NOCSDAT274'
210#testURI='grid.bodc.nerc.ac.uk__DIF__EDMED1048034'
211#testURI='grid.bodc.nerc.ac.uk__DIF__EDMED1048008'
212testURI='bgs.nerc.ac.uk__DIF__GEOP_EAST'
213testURI='neodc.nerc.ac.uk__DIF__NEODC_NEXTMAP'
214retriever=ndgRetrieve('./')
215class TestCase(unittest.TestCase):
216       
217    def AtestOriginal(self):
218        ''' Tests the ability to obtain an original document via xmlrpc '''
219        inputs={'repository':'glue.badc.rl.ac.uk','uri':testURI,'format':'original','type':'html','original':'1'}
220        ctype,r=retriever._present(inputs,'text/html')
221        if r[0:3]=='<p>':raise ValueError,r
222    def AtestSOAPOriginal(self):
223        ''' Tests the ability to obtain an original document via soap '''
224        inputs={'repository':'ndg','uri':testURI,'format':'original','type':'html','original':'1'}
225        ctype,r=retriever._present(inputs,'text/html')
226        if r[0:3]=='<p>':raise ValueError,r
227    def AtestDIF(self):
228        ''' Tests the ability to obtain a DIF document '''
229        inputs={'repository':'glue.badc.rl.ac.uk','uri':testURI,'format':'DIF','type':'html'}
230        ctype,r=retriever._present(inputs,'text/html')
231        if r[0:3]=='<p>':raise ValueError,r
232    def AtestMDIP(self):
233        ''' Tests the ability to obtain an MDIP document '''
234        inputs={'repository':'glue.badc.rl.ac.uk','uri':testURI,'format':'MDIP','type':'html'}
235        ctype,r=retriever._present(inputs,'text/html')
236        if r[0:3]=='<p>':raise ValueError,r
237    def AtestISO(self):
238        ''' Tests the ability to obtain an ISO document '''
239        inputs={'repository':'glue.badc.rl.ac.uk','uri':testURI,'format':'ISO19139','type':'html'}
240        ctype,r=retriever._present(inputs,'text/html')
241        if r[0:3]=='<p>':raise ValueError,r
242    def AtestDC(self):
243        ''' Tests the ability to obtain a DC document '''
244        inputs={'repository':'glue.badc.rl.ac.uk','uri':testURI,'format':'DC','type':'html'}
245        ctype,r=retriever._present(inputs,'text/html')
246        if r[0:3]=='<p>':raise ValueError,r
247    def testCh(self):
248        ''' test link to local repository for NDG-B0'''
249        testURI='badc.nerc.ac.uk__NDG-B0__dataent_chablis'
250        inputs={'repository':'chinook.badc.rl.ac.uk','uri':testURI,'format':'NDG-B0','type':'xml'}
251        ctype,r=retriever._present(inputs,'text/html',browse=1)
252        if r[0:3]=='<p>':raise ValueError,r
253       
254    def testBODC(self):
255        ''' Tests the link to BODC exist '''
256        uri='grid.bodc.nerc.ac.uk:DIF:PCDA47973RS2302'
257        inputs={'repository':'grid.bodc.nerc.ac.uk','uri':uri,'format':'NDG-B1','type':'xml'}
258        ctype,r=retriever._present(inputs,'text/html',browse=1)
259        if r[0:3]=='<p>':raise ValueError,r
260       
261    def testCHstub(self):
262        ''' test link to local repository for NDG-B0'''
263        testURI='badc.nerc.ac.uk__NDG-B0__dataent_chablis'
264        #testURI='badc.nerc.ac.uk__NDG-B0__activity_activity_faam'
265        #testURI='badc.nerc.ac.uk__NDG-B0__activity_activity_hadat'
266        #testURI='badc.nerc.ac.uk__NDG-B0__activity_activity_cwvc'
267        testURI='badc.nerc.ac.uk__NDG-B1__obs_1162914786499342'
268        inputs={'repository':'chinook.badc.rl.ac.uk','uri':testURI,'format':'NDG-B1','type':'xml'}
269        ctype,r=retriever._present(inputs,'text/html',browse=1)
270        f=file('%s.xml'%testURI,'w')
271        f.write(r)
272        f.close()
273        if r[0:3]=='<p>':raise ValueError,r
274       
275if __name__=="__main__":
276    unittest.main()
277   
Note: See TracBrowser for help on using the repository browser.