source: exist/trunk/python/ndgUtils/DocumentRetrieve.py @ 3127

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/exist/trunk/python/ndgUtils/DocumentRetrieve.py@3127
Revision 3127, 6.6 KB checked in by lawrence, 12 years ago (diff)

This verison of ndgUtils does build to an egg. Next we need to
get dif2moles round trip testing working ... and solve
the related URL problem, and then we can work on the changes
to ndgObject which arise ... before trying to replace
the ows_server code.

Line 
1from eXistInterface import ndg_eXist
2from ndgXqueries import ndgXqueries
3import urllib2
4try:
5    from xml.etree import ElementTree as ET
6except ImportError:
7    try:
8        import ElementTree as ET
9    except ImportError:
10        # For some reason when I install ElementTree with easyinstall it
11        # is called "elementree".
12        import elementtree.ElementTree as ET
13
14debug=0
15   
16class DocumentRetrieve (ndg_eXist):
17    ''' This class provides a document retrieval service via the NDG exist interfaces '''
18    def __init__(self,repository,pwfile='passwords.txt'):
19
20        ndg_eXist.__init__(self,db=repository,passwordFile=pwfile)
21        self.repository=repository
22        self.xq=ndgXqueries()
23        self.knownQueries={'DIF':'moles2dif','DC':'moles2DC','ISO19139':'moles2iso19139',
24                           'NDG-B0':'moles','NDG-B1':'molesObjectType','MDIP':'moles2mdip'}
25
26    def get(self,repository,schema,localID,targetCollection='/db/discovery/moles'):
27
28        if schema in ['NDG-A0','NumSim']:
29            ''' These documents are stored in their own native format and have their
30            own native xqueries '''
31            query={'NDG-A0':'csml','NumSim':'numsim'}[schema]
32            xquery=self.xq.actual(query,targetCollection,repository,localID)
33            id,summary=self.executeQuery(xquery)
34            if summary['hits']==1:
35                r=self.retrieve(id,0,{})
36                self.sessionRelease(id)
37            else:
38                raise ValueError('Unable to obtain %s document [%s] (hits=%s)'%(schema,localID,summary['hits']))
39            return r
40       
41        elif schema[0:5] == 'NDG-B' or schema in self.knownQueries:
42           
43            ''' These documents are stored in moles, and need to be converted or extracted
44            directly '''
45           
46            xqtype=self.knownQueries[schema]
47           
48            if xqtype=='molesObjectType':
49                #We need to know what sort of moles thing it is to get the correct query
50                xquery=self.xq.actual(xqtype,targetCollection,repository,localID) 
51                id,summary=self.executeQuery(xquery)
52                hits=summary['hits']
53           
54                if hits!=1: self.error('%s documents returned! '%hits,targetCollection,repository,'NDG-B0',localID)
55           
56                # get output type
57                r=self.retrieve(id,0,{})
58                xml=ET.fromstring(r)
59                otype=int(xml.text or 0)
60               
61                xqtype={4:'stubB_dataEntity',
62                        3:'stubB_observationStation',
63                        2:'stubB_DPT',
64                        1:'stubB_activity'}[otype]
65           
66            #now get the real xquery
67            xquery=self.xq.actual(xqtype,targetCollection,repository,localID)
68           
69            try:
70                id,summary=self.executeQuery(xquery)
71            except Exception,e:
72                print xquery
73                raise Exception,e
74            hits=summary['hits']
75            # should only be the one document in the result set
76            if hits!=1: 
77                self.error('Document Query returned [%s] hits'%hits,targetCollection,repository,schema,localID)
78             
79            # now let's get it and return it
80            r=self.retrieve(id,0,{})
81        else:
82            raise TypeError('Unknown Schema "%s" in URI'%schema)
83        self.sessionRelease(id)
84        return r
85    def error(self,string,t,r,s,l):
86        raise ValueError,string+' for %s:%s:%s in %s'%(r,s,l,t)
87           
88class genericHTTP(object):
89    ''' Provides a generic HTTP request class '''
90    def __init__(self,proxyServer=None):
91        if proxyServer is None:
92            proxyHandler=urllib2.ProxyHandler({})
93        else:
94            proxy=proxyServer
95            if proxy[0:4]!='http':proxy='http://'+proxy
96            proxyHandler=urllib2.ProxyHandler({'http':proxy})
97        self.opener=urllib2.build_opener(proxyHandler)
98       
99    def get(self,url):
100        request=urllib2.Request(url)
101        response='Error obtaining remote file: '
102        try:
103            f = self.opener.open(request)
104            response=''
105        except urllib2.URLError,e:
106            if hasattr(e,'reason'):
107                response+='No access to server [%s]'%e.reason
108            elif hasattr(e,'code'):
109                response+='Response code [%s]'%e.code
110        except socket.error:
111            response+='Network Socket problem'
112        except Exception,e:
113            response+='[%s]'%str(e)
114        if response=='':
115            return f.read()
116        else:
117            raise IOError(response) 
118           
119class ndgHTTP(genericHTTP):
120    ''' Provides a get method to obtain an xml document from a remote NDG repository '''
121    def __init__(self,remoteHost,proxyServer=None):
122        self.remoteHost=remoteHost
123        genericHTTP.__init__(self,proxyServer)
124       
125    def uriget(self,uri):
126        n=ndgObject(uri)
127        return self.get(n.repository,n.schema,n.localID)
128
129    def get(self,repository,schema,localID,**kw):
130        ''' Return a remote ndg document '''
131        #TODO what about security? Probably means we need to get the headers of our responses sorted ...
132        url='%s/retrieve/%s__%s__%s'%(self.remoteHost,repository,schema,localID)
133        print url
134        #http://docs.python.org/lib/urllib2-examples.html
135        return genericHTTP.get(self,url)
136       
137    def setSecurity(self,location,usercode,password):
138        ''' Use a usercode password to set security credentials at a specific location '''
139        pass
140   
141class ndgVocabPOX(genericHTTP):
142    ''' Provides a POX interface to the vocab server '''
143    def __init__(self,path="http://vocab.ndg.nerc.ac.uk/axis2/services/vocab/",proxyServer=None):
144        genericHTTP.__init__(self,proxyServer)
145        self.path="http://vocab.ndg.nerc.ac.uk/axis2/services/vocab/"
146        self.ns="http://vocab.ndg.nerc.ac.uk/"
147    def getRelated(self,subject):
148        ''' Get a related record '''
149        url='%sgetRelatedRecordByCriteria?subjectText=%s&predicate=255&inferences=True&objectList=%slist/P211/current'%(self.path,subject,self.ns)
150        self.url=url
151        doc=genericHTTP.get(self,url)
152        x=ET.fromstring(doc)
153        b=x.findall('*/{urn:vocab/types}broadMatch')
154        n=x.findall('*/{urn:vocab/types}narrowMatch')
155        s=x.findall('*/{urn:vocab/types}exactMatch')
156        self.broader=[(i.find('{urn:vocab/types}entryTerm').text or '') for i in b]
157        self.narrower=[(i.find('{urn:vocab/types}entryTerm').text or '') for i in n]
158        self.synonyms=[(i.find('{urn:vocab/types}entryTerm').text or '') for i in s]
159        return [self.broader,self.narrower,self.synonyms]
Note: See TracBrowser for help on using the repository browser.