source: TI05-delivery/ows_framework/trunk/ows_server/ows_server/models/DocumentRetrieve.py @ 3075

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/TI05-delivery/ows_framework/trunk/ows_server/ows_server/models/DocumentRetrieve.py@3075
Revision 3075, 8.6 KB checked in by lawrence, 12 years ago (diff)

Oh the many ways ElementTree can appear ...

Line 
1from eXistInterface import ndg_eXist
2from ndg_xqueries import *
3from xml.dom import minidom
4try:
5    from xml.etree import ElementTree as ET
6except ImportError:
7    try:
8        import ElementTree as ET
9    except ImportError:
10        # For some reason when I install ElementTree with easyinstall it
11        # is called "elementree".
12        import elementtree.ElementTree as ET
13import urllib2
14#from stripped_xqueries import strip_de_xquery
15
16# The MOLES document retrieval is a python port of :
17# TI07-MOLES/trunk/JavaCode/returnmolesxmldb/ndg/services/returnmoles/Main.java
18# Note that ndgRetreive essentially provides test cases for this code.
19
20debug=0
21
22def queryReplace(xquery,repository,localID,targetCollection):
23    ''' Used to modify the vanilla xqueries to a query which gets the actual requested documents '''
24    xquery=xquery.replace('RepositoryID',repository,1)
25    xquery=xquery.replace('LocalID',localID,1)
26    xquery=xquery.replace('TargetCollection', targetCollection)
27    return xquery
28   
29class DocumentRetrieve (ndg_eXist):
30    ''' This class provides a document retrieval service via the NDG exist interfaces '''
31    def __init__(self,repository,pwfile='passwords.txt'):
32
33        ndg_eXist.__init__(self,db=repository,passwordFile=pwfile)
34        self.repository=repository
35
36    def get(self,repository,schema,localID,format='NDG-B0',targetCollection='/db/discovery/moles'):
37
38        # We are making the assumption for now that everything is stored as
39        # MOLES documents and that we can retrieve from MOLES anything we like
40        # in another format.
41        if schema in ['NDG-A0','NumSim']:
42            xquery={'NDG-A0':csmlQuery,'NumSim':numsimQuery}[schema]
43            if schema=='NumSim': xquery=xquery.replace('RepositoryID',repository)
44            xquery=xquery.replace('TargetCollection',targetCollection)
45            xquery=xquery.replace('LocalID',localID)
46            if debug:print xquery
47            id,summary=self.executeQuery(xquery)
48            if summary['hits']==1:
49                r=self.retrieve(id,0,{})
50                self.sessionRelease(id)
51            else:
52                raise ValueError('Unable to obtain %s document [%s] (hits=%s)'%(schema,localID,summary['hits']))
53            return r
54       
55        elif (schema[0:5] == 'NDG-B' or schema[0:3]=='DIF' or schema[0:4] == 'MDIP'):
56           
57            # find out what type of object actually exists of this sort
58            # 0 - None, 1 - Activity, 2 - DPT, 3 - ObsStn, 4 - DE
59            # if the schema is a DIF, we expect to find a DE from the DIF ingestion to MOLES ...
60           
61            # the following xquery can be used to generate a listing of the database
62            # contents ...
63            # xquery=listingQuery
64
65            # default code has targetCollection='/db/ndg_B_metadata, this gets us the right one ...
66            xquery=ObjectTypeQuery
67            xquery=queryReplace(xquery,repository,localID,targetCollection)
68         
69            id,summary=self.executeQuery(xquery)
70            hits=summary['hits']
71           
72            if hits!=1: raise ValueError,'%s documents returned for uri %s:%s:%s (in %s)'%(hits,repository,'NDG-B0',localID,targetCollection)
73           
74            # check output formats
75            allowed = ['DIF','DC','ISO19139','NDG-B0','NDG-B1','MDIP']
76            if format not in allowed: raise TypeError,'Invalid document output format [%s]'%format
77           
78            # get output type
79            r=self.retrieve(id,0,{})
80           
81            # typical response looks like this:
82            #        <objectType xmlns="http://ndg.nerc.ac.uk/moles">1</objectType>
83            xml=minidom.parseString(r)
84            objectType=int(xml.getElementsByTagNameNS(
85                            "http://ndg.nerc.ac.uk/moles", "objectType").item(0).firstChild.data)
86            if debug: print 'objectType=%s'%objectType
87           
88            if format.find('NDG')==-1 and objectType!=4:
89                msg={1:'Activity',2:'Data Production Tool',3:'Observation Station'}[objectType]
90                raise TypeError,'Document URI type [%s] not valid for output format [%s]'%(msg,format)
91           
92            #select the correct xquery
93            xquery={'DIF':DIFQuery,
94                    'DC':DublinCoreDEQuery,
95                    'ISO19139':ISO19139Query,
96                    'NDG-B0':MOLESQuery,
97                    'MDIP':MDIPQuery,
98                    'NDG-B1':{
99                        4:StubBDEQuery,#strip_de_xquery,#
100                        3:StubBObsStnQuery,
101                        2:StubBDPTQuery,
102                        1:StubBActQuery}[objectType]
103                     }[format]
104                     
105            xquery=queryReplace(xquery,repository,localID,targetCollection)
106            try:
107                id,summary=self.executeQuery(xquery)
108            except Exception,e:
109                print xquery
110                raise Exception,e
111            hits=summary['hits']
112            # should only be the one document in the result set
113            if hits!=1: 
114                if debug:
115                    f=open('xquery.%s.fails.xq'%format,'w')
116                    f.write(xquery)
117                    f.close()
118                raise ValueError,'Actual Document Query returned [%s] hits - internal error!'%hits
119             
120            # now let's get it and return it
121            r=self.retrieve(id,0,{})
122        else:
123            raise TypeError('Unknown Schema "%s" in URI'%schema)
124        self.sessionRelease(id)
125        return r
126           
127class genericHTTP(object):
128    ''' Provides a generic HTTP request class '''
129    def __init__(self,proxyServer=None):
130        if proxyServer is None:
131            proxyHandler=urllib2.ProxyHandler({})
132        else:
133            proxy=proxyServer
134            if proxy[0:4]!='http':proxy='http://'+proxy
135            proxyHandler=urllib2.ProxyHandler({'http':proxy})
136        self.opener=urllib2.build_opener(proxyHandler)
137       
138    def get(self,url):
139        request=urllib2.Request(url)
140        response='Error obtaining remote file: '
141        try:
142            f = self.opener.open(request)
143            response=''
144        except urllib2.URLError,e:
145            if hasattr(e,'reason'):
146                response+='No access to server [%s]'%e.reason
147            elif hasattr(e,'code'):
148                response+='Response code [%s]'%e.code
149        except socket.error:
150            response+='Network Socket problem'
151        except Exception,e:
152            response+='[%s]'%str(e)
153        if response=='':
154            return f.read()
155        else:
156            raise IOError(response) 
157           
158class ndgHTTP(genericHTTP):
159    ''' Provides a get method to obtain an xml document from a remote NDG repository '''
160    def __init__(self,remoteHost,proxyServer=None):
161        self.remoteHost=remoteHost
162        genericHTTP.__init__(self,proxyServer)
163       
164    def uriget(self,uri):
165        n=ndgObject(uri)
166        return self.get(n.repository,n.schema,n.localID)
167
168    def get(self,repository,schema,localID,format='NDG-B0',targetCollection=None):
169        #last two arguments for compatibiltiy with API, not for use in restful get ...
170        #TODO what about security? Probably means we need to get the headers of our responses sorted ...
171        url='%s/view/%s__%s__%s?format=raw&outputSchema=%s'%(self.remoteHost,repository,schema,localID,format)
172        #http://docs.python.org/lib/urllib2-examples.html
173        return genericHTTP.get(self,url)
174       
175    def setSecurity(self,location,usercode,password):
176        ''' Use a usercode password to set security credentials at a specific location '''
177        pass
178   
179class ndgVocabPOX(genericHTTP):
180    ''' Provides a POX interface to the vocab server '''
181    def __init__(self,path="http://vocab.ndg.nerc.ac.uk/axis2/services/vocab/",proxyServer=None):
182        genericHTTP.__init__(self,proxyServer)
183        self.path="http://vocab.ndg.nerc.ac.uk/axis2/services/vocab/"
184        self.ns="http://vocab.ndg.nerc.ac.uk/"
185    def getRelated(self,subject):
186        ''' Get a related record '''
187        url='%sgetRelatedRecordByCriteria?subjectText=%s&predicate=255&inferences=True&objectList=%slist/P211/current'%(self.path,subject,self.ns)
188        self.url=url
189        doc=genericHTTP.get(self,url)
190        x=ET.fromstring(doc)
191        b=x.findall('*/{urn:vocab/types}broadMatch')
192        n=x.findall('*/{urn:vocab/types}narrowMatch')
193        s=x.findall('*/{urn:vocab/types}exactMatch')
194        self.broader=[(i.find('{urn:vocab/types}entryTerm').text or '') for i in b]
195        self.narrower=[(i.find('{urn:vocab/types}entryTerm').text or '') for i in n]
196        self.synonyms=[(i.find('{urn:vocab/types}entryTerm').text or '') for i in s]
197        return [self.broader,self.narrower,self.synonyms]
Note: See TracBrowser for help on using the repository browser.