source: TI05-delivery/ows_framework/trunk/ows_server/ows_server/models/DocumentRetrieve.py @ 3073

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/TI05-delivery/ows_framework/trunk/ows_server/ows_server/models/DocumentRetrieve.py@3073
Revision 3073, 8.4 KB checked in by lawrence, 12 years ago (diff)

Support for viewing xhtml description sections within stub-B

Line 
1from eXistInterface import ndg_eXist
2from ndg_xqueries import *
3from xml.dom import minidom
4try:
5    from xml.etree import ElementTree as ET
6except:
7    import ElementTree as ET
8import urllib2
9#from stripped_xqueries import strip_de_xquery
10
11# The MOLES document retrieval is a python port of :
12# TI07-MOLES/trunk/JavaCode/returnmolesxmldb/ndg/services/returnmoles/Main.java
13# Note that ndgRetreive essentially provides test cases for this code.
14
15debug=0
16
17def queryReplace(xquery,repository,localID,targetCollection):
18    ''' Used to modify the vanilla xqueries to a query which gets the actual requested documents '''
19    xquery=xquery.replace('RepositoryID',repository,1)
20    xquery=xquery.replace('LocalID',localID,1)
21    xquery=xquery.replace('TargetCollection', targetCollection)
22    return xquery
23   
24class DocumentRetrieve (ndg_eXist):
25    ''' This class provides a document retrieval service via the NDG exist interfaces '''
26    def __init__(self,repository,pwfile='passwords.txt'):
27
28        ndg_eXist.__init__(self,db=repository,passwordFile=pwfile)
29        self.repository=repository
30
31    def get(self,repository,schema,localID,format='NDG-B0',targetCollection='/db/discovery/moles'):
32
33        # We are making the assumption for now that everything is stored as
34        # MOLES documents and that we can retrieve from MOLES anything we like
35        # in another format.
36        if schema in ['NDG-A0','NumSim']:
37            xquery={'NDG-A0':csmlQuery,'NumSim':numsimQuery}[schema]
38            if schema=='NumSim': xquery=xquery.replace('RepositoryID',repository)
39            xquery=xquery.replace('TargetCollection',targetCollection)
40            xquery=xquery.replace('LocalID',localID)
41            if debug:print xquery
42            id,summary=self.executeQuery(xquery)
43            if summary['hits']==1:
44                r=self.retrieve(id,0,{})
45                self.sessionRelease(id)
46            else:
47                raise ValueError('Unable to obtain %s document [%s] (hits=%s)'%(schema,localID,summary['hits']))
48            return r
49       
50        elif (schema[0:5] == 'NDG-B' or schema[0:3]=='DIF' or schema[0:4] == 'MDIP'):
51           
52            # find out what type of object actually exists of this sort
53            # 0 - None, 1 - Activity, 2 - DPT, 3 - ObsStn, 4 - DE
54            # if the schema is a DIF, we expect to find a DE from the DIF ingestion to MOLES ...
55           
56            # the following xquery can be used to generate a listing of the database
57            # contents ...
58            # xquery=listingQuery
59
60            # default code has targetCollection='/db/ndg_B_metadata, this gets us the right one ...
61            xquery=ObjectTypeQuery
62            xquery=queryReplace(xquery,repository,localID,targetCollection)
63         
64            id,summary=self.executeQuery(xquery)
65            hits=summary['hits']
66           
67            if hits!=1: raise ValueError,'%s documents returned for uri %s:%s:%s (in %s)'%(hits,repository,'NDG-B0',localID,targetCollection)
68           
69            # check output formats
70            allowed = ['DIF','DC','ISO19139','NDG-B0','NDG-B1','MDIP']
71            if format not in allowed: raise TypeError,'Invalid document output format [%s]'%format
72           
73            # get output type
74            r=self.retrieve(id,0,{})
75           
76            # typical response looks like this:
77            #        <objectType xmlns="http://ndg.nerc.ac.uk/moles">1</objectType>
78            xml=minidom.parseString(r)
79            objectType=int(xml.getElementsByTagNameNS(
80                            "http://ndg.nerc.ac.uk/moles", "objectType").item(0).firstChild.data)
81            if debug: print 'objectType=%s'%objectType
82           
83            if format.find('NDG')==-1 and objectType!=4:
84                msg={1:'Activity',2:'Data Production Tool',3:'Observation Station'}[objectType]
85                raise TypeError,'Document URI type [%s] not valid for output format [%s]'%(msg,format)
86           
87            #select the correct xquery
88            xquery={'DIF':DIFQuery,
89                    'DC':DublinCoreDEQuery,
90                    'ISO19139':ISO19139Query,
91                    'NDG-B0':MOLESQuery,
92                    'MDIP':MDIPQuery,
93                    'NDG-B1':{
94                        4:StubBDEQuery,#strip_de_xquery,#
95                        3:StubBObsStnQuery,
96                        2:StubBDPTQuery,
97                        1:StubBActQuery}[objectType]
98                     }[format]
99                     
100            xquery=queryReplace(xquery,repository,localID,targetCollection)
101            try:
102                id,summary=self.executeQuery(xquery)
103            except Exception,e:
104                print xquery
105                raise Exception,e
106            hits=summary['hits']
107            # should only be the one document in the result set
108            if hits!=1: 
109                if debug:
110                    f=open('xquery.%s.fails.xq'%format,'w')
111                    f.write(xquery)
112                    f.close()
113                raise ValueError,'Actual Document Query returned [%s] hits - internal error!'%hits
114             
115            # now let's get it and return it
116            r=self.retrieve(id,0,{})
117        else:
118            raise TypeError('Unknown Schema "%s" in URI'%schema)
119        self.sessionRelease(id)
120        return r
121           
122class genericHTTP(object):
123    ''' Provides a generic HTTP request class '''
124    def __init__(self,proxyServer=None):
125        if proxyServer is None:
126            proxyHandler=urllib2.ProxyHandler({})
127        else:
128            proxy=proxyServer
129            if proxy[0:4]!='http':proxy='http://'+proxy
130            proxyHandler=urllib2.ProxyHandler({'http':proxy})
131        self.opener=urllib2.build_opener(proxyHandler)
132       
133    def get(self,url):
134        request=urllib2.Request(url)
135        response='Error obtaining remote file: '
136        try:
137            f = self.opener.open(request)
138            response=''
139        except urllib2.URLError,e:
140            if hasattr(e,'reason'):
141                response+='No access to server [%s]'%e.reason
142            elif hasattr(e,'code'):
143                response+='Response code [%s]'%e.code
144        except socket.error:
145            response+='Network Socket problem'
146        except Exception,e:
147            response+='[%s]'%str(e)
148        if response=='':
149            return f.read()
150        else:
151            raise IOError(response) 
152           
153class ndgHTTP(genericHTTP):
154    ''' Provides a get method to obtain an xml document from a remote NDG repository '''
155    def __init__(self,remoteHost,proxyServer=None):
156        self.remoteHost=remoteHost
157        genericHTTP.__init__(self,proxyServer)
158       
159    def uriget(self,uri):
160        n=ndgObject(uri)
161        return self.get(n.repository,n.schema,n.localID)
162
163    def get(self,repository,schema,localID,format='NDG-B0',targetCollection=None):
164        #last two arguments for compatibiltiy with API, not for use in restful get ...
165        #TODO what about security? Probably means we need to get the headers of our responses sorted ...
166        url='%s/view/%s__%s__%s?format=raw&outputSchema=%s'%(self.remoteHost,repository,schema,localID,format)
167        #http://docs.python.org/lib/urllib2-examples.html
168        return genericHTTP.get(self,url)
169       
170    def setSecurity(self,location,usercode,password):
171        ''' Use a usercode password to set security credentials at a specific location '''
172        pass
173   
174class ndgVocabPOX(genericHTTP):
175    ''' Provides a POX interface to the vocab server '''
176    def __init__(self,path="http://vocab.ndg.nerc.ac.uk/axis2/services/vocab/",proxyServer=None):
177        genericHTTP.__init__(self,proxyServer)
178        self.path="http://vocab.ndg.nerc.ac.uk/axis2/services/vocab/"
179        self.ns="http://vocab.ndg.nerc.ac.uk/"
180    def getRelated(self,subject):
181        ''' Get a related record '''
182        url='%sgetRelatedRecordByCriteria?subjectText=%s&predicate=255&inferences=True&objectList=%slist/P211/current'%(self.path,subject,self.ns)
183        self.url=url
184        doc=genericHTTP.get(self,url)
185        x=ET.fromstring(doc)
186        b=x.findall('*/{urn:vocab/types}broadMatch')
187        n=x.findall('*/{urn:vocab/types}narrowMatch')
188        s=x.findall('*/{urn:vocab/types}exactMatch')
189        self.broader=[(i.find('{urn:vocab/types}entryTerm').text or '') for i in b]
190        self.narrower=[(i.find('{urn:vocab/types}entryTerm').text or '') for i in n]
191        self.synonyms=[(i.find('{urn:vocab/types}entryTerm').text or '') for i in s]
192        return [self.broader,self.narrower,self.synonyms]
Note: See TracBrowser for help on using the repository browser.