source: TI05-delivery/ows_framework/trunk/ows_server/ows_server/models/DocumentRetrieve.py @ 2976

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/TI05-delivery/ows_framework/trunk/ows_server/ows_server/models/DocumentRetrieve.py@2976
Revision 2976, 7.0 KB checked in by lawrence, 12 years ago (diff)

A whole raft of modifications

  • changing to the NERC icons
  • search support within Browse and NumSim
  • NumSim V10 support in viewing
  • older numsim docs will now not be found and displayed properly (which means a lot of updating is needed)
Line 
1from eXistInterface import ndg_eXist
2from ndg_xqueries import *
3from xml.dom import minidom
4import urllib2
5#from stripped_xqueries import strip_de_xquery
6
7# The MOLES document retrieval is a python port of :
8# TI07-MOLES/trunk/JavaCode/returnmolesxmldb/ndg/services/returnmoles/Main.java
9# Note that ndgRetreive essentially provides test cases for this code.
10
11debug=0
12
13def queryReplace(xquery,repository,localID,targetCollection):
14    ''' Used to modify the vanilla xqueries to a query which gets the actual requested documents '''
15    xquery=xquery.replace('RepositoryID',repository,1)
16    xquery=xquery.replace('LocalID',localID,1)
17    xquery=xquery.replace('TargetCollection', targetCollection)
18    return xquery
19   
20class DocumentRetrieve (ndg_eXist):
21    ''' This class provides a document retrieval service via the NDG exist interfaces '''
22    def __init__(self,repository,pwfile='passwords.txt'):
23
24        ndg_eXist.__init__(self,db=repository,passwordFile=pwfile)
25        self.repository=repository
26
27    def get(self,repository,schema,localID,format='NDG-B0',targetCollection='/db/discovery/moles'):
28
29        # We are making the assumption for now that everything is stored as
30        # MOLES documents and that we can retrieve from MOLES anything we like
31        # in another format.
32        if schema in ['NDG-A0','NumSim']:
33            xquery={'NDG-A0':csmlQuery,'NumSim':numsimQuery}[schema]
34            if schema=='NumSim': xquery=xquery.replace('RepositoryID',repository)
35            xquery=xquery.replace('TargetCollection',targetCollection)
36            xquery=xquery.replace('LocalID',localID)
37            print xquery
38            id,summary=self.executeQuery(xquery)
39            if summary['hits']==1:
40                r=self.retrieve(id,0,{})
41                self.sessionRelease(id)
42            else:
43                raise ValueError('Unable to obtain %s document [%s] (hits=%s)'%(schema,localID,summary['hits']))
44            return r
45       
46        elif (schema[0:5] == 'NDG-B' or schema[0:3]=='DIF' or schema[0:4] == 'MDIP'):
47           
48            # find out what type of object actually exists of this sort
49            # 0 - None, 1 - Activity, 2 - DPT, 3 - ObsStn, 4 - DE
50            # if the schema is a DIF, we expect to find a DE from the DIF ingestion to MOLES ...
51           
52            # the following xquery can be used to generate a listing of the database
53            # contents ...
54            # xquery=listingQuery
55
56            # default code has targetCollection='/db/ndg_B_metadata, this gets us the right one ...
57            xquery=ObjectTypeQuery
58            xquery=queryReplace(xquery,repository,localID,targetCollection)
59         
60            id,summary=self.executeQuery(xquery)
61            hits=summary['hits']
62           
63            if hits!=1: raise ValueError,'%s documents returned for uri %s:%s:%s (in %s)'%(hits,repository,'NDG-B0',localID,targetCollection)
64           
65            # check output formats
66            allowed = ['DIF','DC','ISO19139','NDG-B0','NDG-B1','MDIP']
67            if format not in allowed: raise TypeError,'Invalid document output format [%s]'%format
68           
69            # get output type
70            r=self.retrieve(id,0,{})
71           
72            # typical response looks like this:
73            #        <objectType xmlns="http://ndg.nerc.ac.uk/moles">1</objectType>
74            xml=minidom.parseString(r)
75            objectType=int(xml.getElementsByTagNameNS(
76                            "http://ndg.nerc.ac.uk/moles", "objectType").item(0).firstChild.data)
77            if debug: print 'objectType=%s'%objectType
78           
79            if format.find('NDG')==-1 and objectType!=4:
80                msg={1:'Activity',2:'Data Production Tool',3:'Observation Station'}[objectType]
81                raise TypeError,'Document URI type [%s] not valid for output format [%s]'%(msg,format)
82           
83            #select the correct xquery
84            xquery={'DIF':DIFQuery,
85                    'DC':DublinCoreDEQuery,
86                    'ISO19139':ISO19139Query,
87                    'NDG-B0':MOLESQuery,
88                    'MDIP':MDIPQuery,
89                    'NDG-B1':{
90                        4:StubBDEQuery,#strip_de_xquery,#
91                        3:StubBObsStnQuery,
92                        2:StubBDPTQuery,
93                        1:StubBActQuery}[objectType]
94                     }[format]
95                     
96            xquery=queryReplace(xquery,repository,localID,targetCollection)
97           
98            try:
99                id,summary=self.executeQuery(xquery)
100            except Exception,e:
101                print xquery
102                raise Exception,e
103            hits=summary['hits']
104            # should only be the one document in the result set
105            if hits!=1: 
106                if debug:
107                    f=open('xquery.%s.fails.xq'%format,'w')
108                    f.write(xquery)
109                    f.close()
110                raise ValueError,'Actual Document Query returned [%s] hits - internal error!'%hits
111             
112            # now let's get it and return it
113            r=self.retrieve(id,0,{})
114
115        else:
116            raise TypeError('Unknown Schema "%s" in URI'%schema)
117        self.sessionRelease(id)
118        return r
119           
120class ndgHTTP:
121    ''' Provides a get method to obtain an xml document from a remote NDG repository '''
122    def __init__(self,remoteHost,proxyServer=None):
123        self.remoteHost=remoteHost
124        if proxyServer is None:
125            proxyHandler=urllib2.ProxyHandler({})
126        else:
127            proxy=proxyServer
128            if proxy[0:4]!='http':proxy='http://'+proxy
129            proxyHandler=urllib2.ProxyHandler({'http':proxy})
130        self.opener=urllib2.build_opener(proxyHandler)
131       
132    def uriget(self,uri):
133        n=ndgObject(uri)
134        return self.get(n.repository,n.schema,n.localID)
135
136    def get(self,repository,schema,localID,format='NDG-B0',targetCollection=None):
137        #last two arguments for compatibiltiy with API, not for use in restful get ...
138        #TODO what about security? Probably means we need to get the headers of our responses sorted ...
139        url='%s/view/%s__%s__%s?format=raw&outputSchema=%s'%(self.remoteHost,repository,schema,localID,format)
140        #http://docs.python.org/lib/urllib2-examples.html
141        request=urllib2.Request(url)
142        response='Error obtaining remote file: '
143        try:
144            f = self.opener.open(request)
145            response=''
146        except urllib2.URLError,e:
147            if hasattr(e,'reason'):
148                response+='No access to server [%s]'%e.reason
149            elif hasattr(e,'code'):
150                response+='Response code [%s]'%e.code
151        except socket.error:
152            response+='Network Socket problem'
153        except Exception,e:
154            response+='[%s]'%str(e)
155        if response=='':
156            return f.read()
157        else:
158            raise IOError(response)
159       
160    def setSecurity(self,location,usercode,password):
161        ''' Use a usercode password to set security credentials at a specific location '''
162        pass
163   
Note: See TracBrowser for help on using the repository browser.