source: exist/trunk/python/ndgUtils/DocumentRetrieve.py @ 4025

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/exist/trunk/python/ndgUtils/DocumentRetrieve.py@4025
Revision 4025, 6.6 KB checked in by cbyrom, 11 years ago (diff)

DocumentRetrieve? - add granule atom support for get method.
ETxmlView - fix nsdumb.strip() - using lstrip() when should be using replace()
eXistConnectory - add removeDoc method to allow cleanup of eXist when
deleting granules.

Line 
1from eXistInterface import ndg_eXist
2from ndgXqueries import ndgXqueries
3import urllib2
4try:
5    from xml.etree import ElementTree as ET
6except ImportError:
7    try:
8        import ElementTree as ET
9    except ImportError:
10        # For some reason when I install ElementTree with easyinstall it
11        # is called "elementree".
12        import elementtree.ElementTree as ET
13
14debug=0
15   
16class DocumentRetrieve (ndg_eXist):
17    ''' This class provides a document retrieval service via the NDG exist interfaces '''
18    def __init__(self,repository,pwfile='passwords.txt'):
19
20        ndg_eXist.__init__(self,db=repository,passwordFile=pwfile)
21        self.repository=repository
22        self.xq=ndgXqueries()
23        self.knownQueries={'DIF':'moles2dif','DC':'moles2DC','ISO19139':'moles2iso19139',
24                           'NDG-B0':'moles','NDG-B1':'molesObjectType','MDIP':'moles2mdip'}
25
26    def get(self,repository,schema,localID,targetCollection='/db/discovery/moles'):
27
28        if schema in ['NDG-A0', 'NumSim', 'G_ATOM']:
29            ''' These documents are stored in their own native format and have their
30            own native xqueries '''
31            query={'NDG-A0':'csml','NumSim':'numsim', 'G_ATOM':'granuleAtom'}[schema]
32            xquery=self.xq.actual(query,targetCollection,repository,localID)
33            print xquery
34            id,summary=self.executeQuery(xquery)
35            if summary['hits']==1:
36                r=self.retrieve(id,0,{})
37                self.sessionRelease(id)
38            else:
39                raise ValueError('Unable to obtain %s document [%s] (hits=%s)'%(schema,localID,summary['hits']))
40            return r
41       
42        elif schema[0:5] == 'NDG-B' or schema in self.knownQueries:
43           
44            ''' These documents are stored in moles, and need to be converted or extracted
45            directly '''
46           
47            xqtype=self.knownQueries[schema]
48           
49            if xqtype=='molesObjectType':
50                #We need to know what sort of moles thing it is to get the correct query
51                xquery=self.xq.actual(xqtype,targetCollection,repository,localID) 
52                id,summary=self.executeQuery(xquery)
53                hits=summary['hits']
54           
55                if hits!=1: self.error('%s documents returned! '%hits,targetCollection,repository,'NDG-B0',localID)
56           
57                # get output type
58                r=self.retrieve(id,0,{})
59                xml=ET.fromstring(r)
60                otype=int(xml.text or 0)
61               
62                xqtype={4:'stubB_dataEntity',
63                        3:'stubB_observationStation',
64                        2:'stubB_DPT',
65                        1:'stubB_activity'}[otype]
66           
67            #now get the real xquery
68            xquery=self.xq.actual(xqtype,targetCollection,repository,localID)
69           
70            try:
71                id,summary=self.executeQuery(xquery)
72            except Exception,e:
73                print xquery
74                raise Exception,e
75            hits=summary['hits']
76            # should only be the one document in the result set
77            if hits!=1: 
78                self.error('Document Query returned [%s] hits'%hits,targetCollection,repository,schema,localID)
79             
80            # now let's get it and return it
81            r=self.retrieve(id,0,{})
82        else:
83            raise TypeError('Unknown Schema "%s" in URI'%schema)
84        self.sessionRelease(id)
85        return r
86    def error(self,string,t,r,s,l):
87        raise ValueError,string+' for %s:%s:%s in %s'%(r,s,l,t)
88           
89class genericHTTP(object):
90    ''' Provides a generic HTTP request class '''
91    def __init__(self,proxyServer=None):
92        if proxyServer is None:
93            proxyHandler=urllib2.ProxyHandler({})
94        else:
95            proxy=proxyServer
96            if proxy[0:4]!='http':proxy='http://'+proxy
97            proxyHandler=urllib2.ProxyHandler({'http':proxy})
98        self.opener=urllib2.build_opener(proxyHandler)
99       
100    def get(self,url):
101        request=urllib2.Request(url)
102        response='Cannot obtain remote file: '
103        try:
104            f = self.opener.open(request)
105            response=''
106        except urllib2.URLError,e:
107            if hasattr(e,'reason'):
108                response+='No access to server [%s]'%e.reason
109            elif hasattr(e,'code'):
110                response+='Response code [%s]'%e.code
111        except socket.error:
112            response+='Network Socket problem'
113        except Exception,e:
114            response+='[%s]'%str(e)
115        if response=='':
116            return f.read()
117        else:
118            raise IOError(response) 
119           
120class ndgHTTP(genericHTTP):
121    ''' Provides a get method to obtain an xml document from a remote NDG repository '''
122    def __init__(self,remoteHost,proxyServer=None):
123        self.remoteHost=remoteHost
124        genericHTTP.__init__(self,proxyServer)
125       
126    def uriget(self,uri):
127        n=ndgObject(uri)
128        return self.get(n.repository,n.schema,n.localID)
129
130    def get(self,repository,schema,localID,**kw):
131        ''' Return a remote ndg document '''
132        #TODO what about security? Probably means we need to get the headers of our responses sorted ...
133        url='%s/retrieve/%s__%s__%s'%(self.remoteHost,repository,schema,localID)
134        print url
135        #http://docs.python.org/lib/urllib2-examples.html
136        return genericHTTP.get(self,url)
137       
138    def setSecurity(self,location,usercode,password):
139        ''' Use a usercode password to set security credentials at a specific location '''
140        pass
141   
142class ndgVocabPOX(genericHTTP):
143    ''' Provides a POX interface to the vocab server '''
144    def __init__(self,path="http://vocab.ndg.nerc.ac.uk/axis2/services/vocab/",proxyServer=None):
145        genericHTTP.__init__(self,proxyServer)
146        self.path="http://vocab.ndg.nerc.ac.uk/axis2/services/vocab/"
147        self.ns="http://vocab.ndg.nerc.ac.uk/"
148    def getRelated(self,subject):
149        ''' Get a related record '''
150        url='%sgetRelatedRecordByCriteria?subjectText=%s&predicate=255&inferences=True&objectList=%slist/P211/current'%(self.path,subject,self.ns)
151        self.url=url
152        doc=genericHTTP.get(self,url)
153        x=ET.fromstring(doc)
154        b=x.findall('*/{urn:vocab/types}broadMatch')
155        n=x.findall('*/{urn:vocab/types}narrowMatch')
156        s=x.findall('*/{urn:vocab/types}exactMatch')
157        self.broader=[(i.find('{urn:vocab/types}entryTerm').text or '') for i in b]
158        self.narrower=[(i.find('{urn:vocab/types}entryTerm').text or '') for i in n]
159        self.synonyms=[(i.find('{urn:vocab/types}entryTerm').text or '') for i in s]
160        return [self.broader,self.narrower,self.synonyms]
161
Note: See TracBrowser for help on using the repository browser.