source: TI07-MOLES/trunk/PythonCode/wsgi/ndgRetrieve.py @ 2433

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/TI07-MOLES/trunk/PythonCode/wsgi/ndgRetrieve.py@2433
Revision 2433, 9.6 KB checked in by lawrence, 12 years ago (diff)

Fixes for ticket:722, changing to internal unicode (probably not complete),
with decode on the edge coming out ... fixes 722 but may have some
other problems now ...

Line 
1from Utilities import myConfig
2from paste.request import parse_querystring
3from renderEntity import renderEntity
4import xmlHandler
5from DC import DC
6from DIF import DIF
7from stubB import stubB
8from ndgObject import ndgObject
9from MDIP import MDIP
10from ndgSearch import ndgSearch
11
12# any class with a get method can be used here, and then only
13# the instantiation within ndgRetrieve would need
14# modification to use it ...
15
16from DocumentRetrieve import DocumentRetrieve
17debug=0
18def retrieveFactory(global_config,**local_conf):
19   
20    ''' This factory is intended to be used by paste-deploy to return
21    the ndgRetrieve wsgi application, configured to use the configDir
22    to find local configuration information '''
23   
24    if 'configDir' in local_conf.keys():
25        configDir=local_conf[configDir]
26    else:
27        try:
28            configDir=global_config['configDir']
29        except:
30            configDir=None
31    ndg=ndgRetrieve(configDir)
32    return ndg
33
34class showDoc:
35    ''' Given an xml document (xml), of format ('DIF','DC','NDG-B1'), return
36    either the document rendered as "pretty" xml for printing, or as html
37    rendered by an appropriate application'''
38   
39    def __init__(self,config):
40        self.config=config
41        self.renderer=renderEntity(self.config)
42       
43    def get(self,xml,format,otype='html',ndgObj=None):
44        if otype=='html':
45            if format=='DIF':
46                r=DIF(xml,ndgObj=ndgObj)
47            elif format=='NDG-B1':
48                r=stubB(xml,self.config)
49            elif format=='DC':
50                r=DC(xml)
51            elif format=='MDIP':
52                r=MDIP(xml)
53            else:
54                raise ValueError,'Unknown format [%s] for rendering'%format
55            r=self.renderer.render(r)
56        elif otype=='print':
57            x=xmlHandler.xmlHandler(xml,string=1)
58            r=x.html
59        elif otype=='xml':
60            return xml
61        else:
62            raise TypeError('Invalid output type [%s]'%otype)
63        return r
64
65class ndgRetrieve:
66   
67    ''' This is the ndgRetrieve application which simply allows one to enter a uri
68    of a document held in a LOCAL exist repository and return one of
69       - the raw document (type=xml),
70       - an html version fully rendered (type=html, which is the default), or
71       - a pretty printed version suitable for printing (type=print)
72    Note that this WSGI application SHOULD be wrapped by an ndgGatekeeper MIDDLEWARE application and
73    not exposed directly to users (except in testing of course :-) '''
74   
75    def __init__(self,configDir):
76       
77        ''' Instantiate the wsgi document retrieval application '''
78        self.configDir=configDir
79        self.config=myConfig(configDir+'ndgDiscovery.config')
80        self.shower=showDoc(self.config)
81        # nb ... we don't instantiate the retrieval web service, since
82        # we expect to potentially have multiple web services pointing
83        # at multiple databases (eg. BADC + NEODC + NDG ) ...
84        self.retrieveWS=DocumentRetrieve
85        self.WScursors={}
86   
87    def __call__(self,environ,start_response):
88       
89        ''' This is the function which implements the document retrieval '''
90       
91        browse=0
92        inputs=dict(parse_querystring(environ))
93        if environ['SCRIPT_NAME']=='/browse': browse=1
94       
95        ctype='text/html'   # default content type (since the default
96                            # may be an error return ...) the actual default
97                            # data type for return is xml.
98        ctype,r=self._present(inputs,ctype,browse=browse)
99        #there appears to be a problem that if r is a python string (not unicode)
100        #and then we try and encode it we get in trouble ...
101        start_response('200 OK', [('Content-Type', ctype),('charset','utf-8')])
102        return [r.encode('utf-8')]
103               
104    def _present(self,inputs,ctype,browse=0):
105        ''' This is the entry point for an  inner retrieval loop for document retrieval '''
106        # we seperate this out for ease of building test cases
107        ori,format=0,None
108       
109        if 'original' in inputs.keys(): ori=1
110        try:
111            outputType=inputs['type']
112        except KeyError:
113            outputType='print'
114       
115        if 'uri' not in inputs.keys():
116            r=self.__prompt()
117            raise ValueError,r
118        else:
119            #this will raise a ValueError if there is a problem
120            uri=ndgObject(inputs['uri'])
121            uri.setConfig(self.config)
122            self.uri=uri
123            if 'repository' in inputs.keys(): 
124                db=inputs['repository']
125            else:
126                db=uri.repository
127            if 'format' not in inputs:
128                format=self.uri.schema
129            else:
130                format=inputs['format']
131            if db in self.WScursors.keys():
132                ws=self.WScursors[db]
133            else:
134                if db.upper()=='NDG' or format.upper()=='ORIGINAL' :
135                    ws=ndgSearch()
136                else:
137                    ws=DocumentRetrieve(db)
138                self.WScursors[db]=ws
139            ok=1
140            try:
141                if ori:
142                #    r=ws.getOriginal(uri.uri)
143                    if outputType=='html': outputType='print'  # sanity ...   
144                #else:
145                if browse or format=='NDG-B1':
146                    target='/db/ndg_B_metadata'
147                else:
148                    target='/db/discovery/moles'
149                r=ws.get(uri.repository,uri.schema,uri.localID,format=format,targetCollection=target)
150                if isinstance(r,int):
151                     r='<p> There are %s identifiers matching your request! </p>'%r
152                     ok=0
153            except Exception,e:
154                 r='<p> Unable to retrieve [%s], reason was [%s]</p>'%(uri.uri,e)
155                 ok=0
156       
157        if not ok: return ctype,r
158       
159        if outputType!='xml': 
160            if debug:
161                r=self.shower.get(r,format,otype=outputType,ndgObj=self.uri)
162            else:
163                try:
164                    r=self.shower.get(r,format,otype=outputType,ndgObj=self.uri)
165                except Exception,e:
166                    r='<p> Unable to show document, reason was [%s]<p>'%e
167        else:
168            ctype='text/xml'
169        return ctype,r
170   
171   
172    def __prompt(self):
173        ''' If the URI is not an argument to the request, prompt the user for a URI '''
174        h='''
175        <form method='get' action=''>
176        <p>
177          Enter a Document Identifier to View: 
178            <input type='text' name='uri'/><br/>
179            (uri format: e.g. badc.nerc.ac.uk:DIF:xyz39) <br/>
180            (Querying <input type='text' name='repository' value='gepidae.esc.rl.ac.uk'/>)<br/>
181            Return: <input type="radio" name="type" value="xml"/> XML Raw
182                <input type="radio" name="type" value="html"/> Formatted
183                <input type="radio" name="type" value="print"/> XML Printable
184          <input type='submit'/>
185        </p>
186        </form>
187        '''
188        return h
189   
190import unittest
191#testURI='noc.soton.ac.uk__DIF__NOCSDAT193'
192#testURI='ndg.noc.soton.ac.uk__NDG-B0__NOCSDAT274'
193#testURI='grid.bodc.nerc.ac.uk__DIF__EDMED1048034'
194#testURI='grid.bodc.nerc.ac.uk__DIF__EDMED1048008'
195testURI='bgs.nerc.ac.uk__DIF__GEOP_EAST'
196testURI='neodc.nerc.ac.uk__DIF__NEODC_NEXTMAP'
197retriever=ndgRetrieve('./')
198class TestCase(unittest.TestCase):
199       
200    def AtestOriginal(self):
201        ''' Tests the ability to obtain an original document via xmlrpc '''
202        inputs={'repository':'glue.badc.rl.ac.uk','uri':testURI,'format':'original','type':'html','original':'1'}
203        ctype,r=retriever._present(inputs,'text/html')
204        if r[0:3]=='<p>':raise ValueError,r
205    def AtestSOAPOriginal(self):
206        ''' Tests the ability to obtain an original document via soap '''
207        inputs={'repository':'ndg','uri':testURI,'format':'original','type':'html','original':'1'}
208        ctype,r=retriever._present(inputs,'text/html')
209        if r[0:3]=='<p>':raise ValueError,r
210    def AtestDIF(self):
211        ''' Tests the ability to obtain a DIF document '''
212        inputs={'repository':'glue.badc.rl.ac.uk','uri':testURI,'format':'DIF','type':'html'}
213        ctype,r=retriever._present(inputs,'text/html')
214        if r[0:3]=='<p>':raise ValueError,r
215    def AtestMDIP(self):
216        ''' Tests the ability to obtain an MDIP document '''
217        inputs={'repository':'glue.badc.rl.ac.uk','uri':testURI,'format':'MDIP','type':'html'}
218        ctype,r=retriever._present(inputs,'text/html')
219        if r[0:3]=='<p>':raise ValueError,r
220    def AtestISO(self):
221        ''' Tests the ability to obtain an ISO document '''
222        inputs={'repository':'glue.badc.rl.ac.uk','uri':testURI,'format':'ISO19139','type':'html'}
223        ctype,r=retriever._present(inputs,'text/html')
224        if r[0:3]=='<p>':raise ValueError,r
225    def AtestDC(self):
226        ''' Tests the ability to obtain a DC document '''
227        inputs={'repository':'glue.badc.rl.ac.uk','uri':testURI,'format':'DC','type':'html'}
228        ctype,r=retriever._present(inputs,'text/html')
229        if r[0:3]=='<p>':raise ValueError,r
230    def testCh(self):
231        ''' test link to local repository for NDG-B0'''
232        testURI='badc.nerc.ac.uk__NDG-B0__dataent_chablis'
233        inputs={'repository':'chinook.badc.rl.ac.uk','uri':testURI,'format':'NDG-B0','type':'xml'}
234        ctype,r=retriever._present(inputs,'text/html',browse=1)
235        if r[0:3]=='<p>':raise ValueError,r
236       
237    def testCHstub(self):
238        ''' test link to local repository for NDG-B0'''
239        testURI='badc.nerc.ac.uk__NDG-B0__dataent_chablis'
240        testURI='badc.nerc.ac.uk__NDG-B0__activity_activity_faam'
241        inputs={'repository':'chinook.badc.rl.ac.uk','uri':testURI,'format':'NDG-B1','type':'xml'}
242        ctype,r=retriever._present(inputs,'text/html',browse=1)
243        if r[0:3]=='<p>':raise ValueError,r
244       
245if __name__=="__main__":
246    unittest.main()
247   
Note: See TracBrowser for help on using the repository browser.