source: TI05-delivery/ows_framework/trunk/ows_server/ows_server/models/DocumentRetrieve.py @ 2860

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/TI05-delivery/ows_framework/trunk/ows_server/ows_server/models/DocumentRetrieve.py@2860
Revision 2860, 6.6 KB checked in by lawrence, 13 years ago (diff)

First step to remote ndg document retrieval

Line 
1from eXistInterface import ndg_eXist
2from ndg_xqueries import *
3from xml.dom import minidom
4import urllib2
5#from stripped_xqueries import strip_de_xquery
6
7# The MOLES document retrieval is a python port of :
8# TI07-MOLES/trunk/JavaCode/returnmolesxmldb/ndg/services/returnmoles/Main.java
9# Note that ndgRetreive essentially provides test cases for this code.
10
11debug=0
12
13def queryReplace(xquery,repository,localID,targetCollection):
14    ''' Used to modify the vanilla xqueries to a query which gets the actual requested documents '''
15    xquery=xquery.replace('RepositoryID',repository,1)
16    xquery=xquery.replace('LocalID',localID,1)
17    xquery=xquery.replace('TargetCollection', targetCollection)
18    return xquery
19   
20class DocumentRetrieve (ndg_eXist):
21    ''' This class provides a document retrieval service via the NDG exist interfaces '''
22    def __init__(self,repository,pwfile='passwords.txt'):
23
24        ndg_eXist.__init__(self,db=repository,passwordFile=pwfile)
25        self.repository=repository
26
27    def get(self,repository,schema,localID,format='NDG-B0',targetCollection='/db/discovery/moles'):
28
29        # We are making the assumption for now that everything is stored as
30        # MOLES documents and that we can retrieve from MOLES anything we like
31        # in another format.
32        if schema in ['NDG-A0','NumSim']:
33            xquery={'NDG-A0':csmlQuery,'NumSim':numsimQuery}[schema]
34            if schema=='NumSim': xquery=xquery.replace('RepositoryID',repository)
35            xquery=xquery.replace('TargetCollection',targetCollection)
36            xquery=xquery.replace('LocalID',localID)
37            id,summary=self.executeQuery(xquery)
38            if summary['hits']==1:
39                r=self.retrieve(id,0,{})
40                self.sessionRelease(id)
41            else:
42                raise ValueError('Unable to obtain %s document [%s] (hits=%s)'%(schema,localID,summary['hits']))
43            return r
44       
45        elif (schema[0:5] == 'NDG-B' or schema[0:3]=='DIF' or schema[0:4] == 'MDIP'):
46           
47            # find out what type of object actually exists of this sort
48            # 0 - None, 1 - Activity, 2 - DPT, 3 - ObsStn, 4 - DE
49            # if the schema is a DIF, we expect to find a DE from the DIF ingestion to MOLES ...
50           
51            # the following xquery can be used to generate a listing of the database
52            # contents ...
53            # xquery=listingQuery
54
55            # default code has targetCollection='/db/ndg_B_metadata, this gets us the right one ...
56            xquery=ObjectTypeQuery
57            xquery=queryReplace(xquery,repository,localID,targetCollection)
58         
59            id,summary=self.executeQuery(xquery)
60            hits=summary['hits']
61           
62            if hits!=1: raise ValueError,'%s documents returned for uri %s:%s:%s (in %s)'%(hits,repository,'NDG-B0',localID,targetCollection)
63           
64            # check output formats
65            allowed = ['DIF','DC','ISO19139','NDG-B0','NDG-B1','MDIP']
66            if format not in allowed: raise TypeError,'Invalid document output format [%s]'%format
67           
68            # get output type
69            r=self.retrieve(id,0,{})
70           
71            # typical response looks like this:
72            #        <objectType xmlns="http://ndg.nerc.ac.uk/moles">1</objectType>
73            xml=minidom.parseString(r)
74            objectType=int(xml.getElementsByTagNameNS(
75                            "http://ndg.nerc.ac.uk/moles", "objectType").item(0).firstChild.data)
76            if debug: print 'objectType=%s'%objectType
77           
78            if format.find('NDG')==-1 and objectType!=4:
79                msg={1:'Activity',2:'Data Production Tool',3:'Observation Station'}[objectType]
80                raise TypeError,'Document URI type [%s] not valid for output format [%s]'%(msg,format)
81           
82            #select the correct xquery
83            xquery={'DIF':DIFQuery,
84                    'DC':DublinCoreDEQuery,
85                    'ISO19139':ISO19139Query,
86                    'NDG-B0':MOLESQuery,
87                    'MDIP':MDIPQuery,
88                    'NDG-B1':{
89                        4:StubBDEQuery,#strip_de_xquery,#
90                        3:StubBObsStnQuery,
91                        2:StubBDPTQuery,
92                        1:StubBActQuery}[objectType]
93                     }[format]
94                     
95            xquery=queryReplace(xquery,repository,localID,targetCollection)
96            try:
97                id,summary=self.executeQuery(xquery)
98            except Exception,e:
99                print xquery
100                raise Exception,e
101            hits=summary['hits']
102            # should only be the one document in the result set
103            if hits!=1: 
104                if debug:
105                    f=open('xquery.%s.fails.xq'%format,'w')
106                    f.write(xquery)
107                    f.close()
108                raise ValueError,'Actual Document Query returned [%s] hits - internal error!'%hits
109             
110            # now let's get it and return it
111            r=self.retrieve(id,0,{})
112
113        else:
114            raise TypeError('Unknown Schema "%s" in URI'%schema)
115        self.sessionRelease(id)
116        return r
117           
118           
119class RestfulGet:
120    ''' Provides a get method to obtain an xml document from a remote NDG repository '''
121    def __init__(self,remoteHost,proxyServer=None):
122        self.remoteHost=remoteHost
123        if proxyServer is None:
124            proxyHandler=urllib2.ProxyHandler({})
125        else:
126            proxy=proxyServer
127            if proxy[0:4]!='http':proxy='http://'+proxy
128            proxyHandler=urllib2.ProxyHandler({'http':proxy})
129        self.opener=urllib2.build_opener(proxyHandler)
130
131    def get(self,repository,schema,localID,format='NDG-B0',targetCollection=None):
132        #TODO what about security? Probably means we need to get the headers of our responses sorted ...
133        url='%s/view/%s__%s__%s?format=raw&outputSchema=%s'%(self.remoteHost,repository,schema,localID,format)
134        #http://docs.python.org/lib/urllib2-examples.html
135        request=urllib2.Request(url)
136        response='Error obtaining remote file: '
137        try:
138            f = self.opener.open(request)
139            response=''
140        except urllib2.URLError,e:
141            if hasattr(e,'reason'):
142                response+='No access to server [%s]'%e.reason
143            elif hasattr(e,'code'):
144                response+='Response code [%s]'%e.code
145        except socket.error:
146            response+='Network Socket problem'
147        except Exception,e:
148            response+='[%s]'%str(e)
149        if response=='':
150            return f.read()
151        else:
152            raise IOError(response)
153   
Note: See TracBrowser for help on using the repository browser.