source: exist/trunk/python/ndgUtils/DocumentRetrieve.py @ 3125

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/exist/trunk/python/ndgUtils/DocumentRetrieve.py@3125
Revision 3125, 6.5 KB checked in by lawrence, 12 years ago (diff)

ndgUtils taking shape with test cases etc

Line 
1from eXistInterface import ndg_eXist
2from ndgXqueries import ndgXqueries
3import urllib2
4try:
5    from xml.etree import ElementTree as ET
6except ImportError:
7    try:
8        import ElementTree as ET
9    except ImportError:
10        # For some reason when I install ElementTree with easyinstall it
11        # is called "elementree".
12        import elementtree.ElementTree as ET
13
14debug=0
15   
16class DocumentRetrieve (ndg_eXist):
17    ''' This class provides a document retrieval service via the NDG exist interfaces '''
18    def __init__(self,repository,pwfile='passwords.txt'):
19
20        ndg_eXist.__init__(self,db=repository,passwordFile=pwfile)
21        self.repository=repository
22        self.xq=ndgXqueries()
23        self.knownQueries={'DIF':'moles2dif','DC':'moles2DC','ISO19139':'moles2iso19139',
24                           'NDG-B0':'moles','NDG-B1':'molesObjectType','MDIP':'moles2mdip'}
25
26    def get(self,repository,schema,localID,targetCollection='/db/discovery/moles'):
27
28        if schema in ['NDG-A0','NumSim']:
29            ''' These documents are stored in their own native format and have their
30            own native xqueries '''
31            query={'NDG-A0':'csml','NumSim':'numsim'}[schema]
32            xquery=self.xq.actual(query,targetCollection,repository,localID)
33            id,summary=self.executeQuery(xquery)
34            if summary['hits']==1:
35                r=self.retrieve(id,0,{})
36                self.sessionRelease(id)
37            else:
38                raise ValueError('Unable to obtain %s document [%s] (hits=%s)'%(schema,localID,summary['hits']))
39            return r
40       
41        elif schema[0:5] == 'NDG-B' or schema in self.knownQueries:
42           
43            ''' These documents are stored in moles, and need to be converted or extracted
44            directly '''
45           
46            xqtype=self.knownQueries[schema]
47           
48            if xqtype=='molesObjectType':
49                #We need to know what sort of moles thing it is to get the correct query
50                xquery=self.xq.actual(xqtype,targetCollection,repository,localID) 
51                id,summary=self.executeQuery(xquery)
52                hits=summary['hits']
53           
54                if hits!=1: self.error('%s documents returned! '%hits,targetCollection,repository,'NDG-B0',localID)
55           
56                # get output type
57                r=self.retrieve(id,0,{})
58                xml=ET.fromstring(r)
59                otype=int(xml.text or 0)
60               
61                xqtype={4:'stubB_dataEntity',
62                        3:'stubB_observationStation',
63                        2:'stubB_DPT',
64                        1:'stubB_activity'}[otype]
65           
66            #now get the real xquery
67            xq=self.xq.actual(xqtype,targetCollection,repository,localID)
68           
69            try:
70                id,summary=self.executeQuery(xquery)
71            except Exception,e:
72                print xquery
73                raise Exception,e
74            hits=summary['hits']
75            # should only be the one document in the result set
76            if hits!=1: 
77                self.error('Document Query returned [%s] hits'%hits,targetCollection,repository,schema,localID)
78             
79            # now let's get it and return it
80            r=self.retrieve(id,0,{})
81        else:
82            raise TypeError('Unknown Schema "%s" in URI'%schema)
83        self.sessionRelease(id)
84        return r
85    def error(self,string,t,r,s,l):
86        raise ValueError,string+' for %s:%s:%s in %s'%(r,s,l,t)
87           
88class genericHTTP(object):
89    ''' Provides a generic HTTP request class '''
90    def __init__(self,proxyServer=None):
91        if proxyServer is None:
92            proxyHandler=urllib2.ProxyHandler({})
93        else:
94            proxy=proxyServer
95            if proxy[0:4]!='http':proxy='http://'+proxy
96            proxyHandler=urllib2.ProxyHandler({'http':proxy})
97        self.opener=urllib2.build_opener(proxyHandler)
98       
99    def get(self,url):
100        request=urllib2.Request(url)
101        response='Error obtaining remote file: '
102        try:
103            f = self.opener.open(request)
104            response=''
105        except urllib2.URLError,e:
106            if hasattr(e,'reason'):
107                response+='No access to server [%s]'%e.reason
108            elif hasattr(e,'code'):
109                response+='Response code [%s]'%e.code
110        except socket.error:
111            response+='Network Socket problem'
112        except Exception,e:
113            response+='[%s]'%str(e)
114        if response=='':
115            return f.read()
116        else:
117            raise IOError(response) 
118           
119class ndgHTTP(genericHTTP):
120    ''' Provides a get method to obtain an xml document from a remote NDG repository '''
121    def __init__(self,remoteHost,proxyServer=None):
122        self.remoteHost=remoteHost
123        genericHTTP.__init__(self,proxyServer)
124       
125    def uriget(self,uri):
126        n=ndgObject(uri)
127        return self.get(n.repository,n.schema,n.localID)
128
129    def get(self,repository,schema,localID,**kw):
130        ''' Return a remote ndg document '''
131        #TODO what about security? Probably means we need to get the headers of our responses sorted ...
132        url='%s/retrieve/%s__%s__%s'%(self.remoteHost,repository,schema,localID)
133        print url
134        #http://docs.python.org/lib/urllib2-examples.html
135        return genericHTTP.get(self,url)
136       
137    def setSecurity(self,location,usercode,password):
138        ''' Use a usercode password to set security credentials at a specific location '''
139        pass
140   
141class ndgVocabPOX(genericHTTP):
142    ''' Provides a POX interface to the vocab server '''
143    def __init__(self,path="http://vocab.ndg.nerc.ac.uk/axis2/services/vocab/",proxyServer=None):
144        genericHTTP.__init__(self,proxyServer)
145        self.path="http://vocab.ndg.nerc.ac.uk/axis2/services/vocab/"
146        self.ns="http://vocab.ndg.nerc.ac.uk/"
147    def getRelated(self,subject):
148        ''' Get a related record '''
149        url='%sgetRelatedRecordByCriteria?subjectText=%s&predicate=255&inferences=True&objectList=%slist/P211/current'%(self.path,subject,self.ns)
150        self.url=url
151        doc=genericHTTP.get(self,url)
152        x=ET.fromstring(doc)
153        b=x.findall('*/{urn:vocab/types}broadMatch')
154        n=x.findall('*/{urn:vocab/types}narrowMatch')
155        s=x.findall('*/{urn:vocab/types}exactMatch')
156        self.broader=[(i.find('{urn:vocab/types}entryTerm').text or '') for i in b]
157        self.narrower=[(i.find('{urn:vocab/types}entryTerm').text or '') for i in n]
158        self.synonyms=[(i.find('{urn:vocab/types}entryTerm').text or '') for i in s]
159        return [self.broader,self.narrower,self.synonyms]
Note: See TracBrowser for help on using the repository browser.