source: exist/trunk/python/ndgUtils/DocumentRetrieve.py @ 4532

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/exist/trunk/python/ndgUtils/DocumentRetrieve.py@4532
Revision 4532, 6.9 KB checked in by cbyrom, 11 years ago (diff)

Generalise atomDeploymentsList.xq to become atomTypeList.xq and adjust
surrounding code + tidy up some vocab data wrt deployment terms.

Line 
1from eXistInterface import ndg_eXist
2from ndgXqueries import ndgXqueries
3
4import urllib2, logging, socket
5try:
6    from xml.etree import ElementTree as ET
7except ImportError:
8    try:
9        import ElementTree as ET
10    except ImportError:
11        # For some reason when I install ElementTree with easyinstall it
12        # is called "elementree".
13        import elementtree.ElementTree as ET
14
15debug=0
16
17
18def httpify(url):
19    '''
20    Ensure a url has an http prefix
21    '''
22    if url[0:4] != 'http':
23        url = 'http://' + url
24    return url
25   
26       
27class DocumentRetrieve(ndg_eXist):
28    '''
29    This class provides a document retrieval service via the NDG exist interfaces
30    '''
31
32    ATOM_TYPE = 'ATOM-TYPE'
33    def __init__(self, repository, pwfile='passwords.txt'):
34        logging.info("Using repository, '%s'" %repository)
35
36        ndg_eXist.__init__(self, db=repository, passwordFile=pwfile)
37        logging.info("DB connection initialised")
38        self.repository=repository
39        self.xq=ndgXqueries()
40        self.knownQueries={'DIF':'moles2dif','DC':'moles2DC','ISO19139':'moles2iso19139', \
41                           'NDG-B0':'moles','NDG-B1':'molesObjectType','MDIP':'moles2mdip', \
42                           'NDG-A0':'csml','NumSim':'numsim', 'ATOM':'atom', \
43                           'ATOM-BACKUP':'atom', self.ATOM_TYPE:'atomTypeList'}
44
45    def _retrieveDoc(self, schema, xqtype, targetCollection, repository, localID):
46        '''
47        Retrieve doc using specified XQuery type
48        @return: docName, docContents
49        '''
50        logging.debug("Retrieving doc - type, '%s', coll, '%s', rep:'%s', localID:'%s'" \
51                      %(xqtype,targetCollection,repository,localID))
52        xquery=self.xq.actual(xqtype,targetCollection,repository,localID)
53
54        id,summary=self.executeQuery(xquery)
55        if summary['hits'] != 1:
56            raise ValueError('Unable to obtain single %s document [%s] (hits=%s)'\
57                             %(schema,localID,summary['hits']))
58
59        docName = summary['documents'][0][0]
60        r=self.retrieve(id,0,{})
61        self.sessionRelease(id)
62        return docName, r
63
64
65    def get(self,repository,schema,localID,targetCollection='/db/discovery/moles', includeDocNameData=False):
66        '''
67        @keyword includeDocNameData: if True, a dictionary is returned, instead of the dataset, with the key
68        being the name of the document and the entry being the dataset
69        '''
70        logging.debug("Get called with rep:'%s', schema:'%s', localID:'%s', collection:'%s'" \
71                      %(repository,schema,localID,targetCollection))
72        docName = ""
73        if schema not in self.knownQueries:
74            raise TypeError('Unknown Schema "%s" in URI'%schema)
75
76        xqtype=self.knownQueries[schema]
77
78        if schema == 'NDG-B1':
79            # this is a general moles object - so need to further establish the type of moles doc
80            # it is to get the correct XQUery to use
81            name, xml = self._retrieveDoc(schema, xqtype, targetCollection, \
82                                          repository, localID)
83            xml=ET.fromstring(xml)
84            otype=int(xml.text or 0)
85           
86            xqtype={4:'stubB_dataEntity',
87                    3:'stubB_observationStation',
88                    2:'stubB_DPT',
89                    1:'stubB_activity'}[otype]
90
91        docName, r = self._retrieveDoc(schema, xqtype, targetCollection, 
92                                       repository, localID)
93        if includeDocNameData:
94            return {docName: r}
95        return r
96   
97   
98    def error(self,string,t,r,s,l):
99        raise ValueError,string+' for %s:%s:%s in %s'%(r,s,l,t)
100
101           
102class genericHTTP(object):
103    ''' Provides a generic HTTP request class '''
104    def __init__(self,proxyServer=None):
105        if proxyServer is None:
106            proxyHandler=urllib2.ProxyHandler({})
107        else:
108            proxy=httpify(proxyServer)
109            proxyHandler=urllib2.ProxyHandler({'http':proxy})
110        self.opener=urllib2.build_opener(proxyHandler)
111       
112    def get(self,url):
113        url = httpify(url)
114        request=urllib2.Request(url)
115        logging.info("Getting data from url: %s" %url)
116        response='Cannot obtain remote file: '
117        try:
118            f = self.opener.open(request)
119            response=''
120        except urllib2.URLError,e:
121            if hasattr(e,'reason'):
122                response+='No access to server [%s]'%e.reason
123            elif hasattr(e,'code'):
124                response+='Response code [%s]'%e.code
125        except socket.error:
126            response+='Network Socket problem'
127        except Exception,e:
128            response+='[%s]'%str(e)
129           
130        if response=='':
131            return f.read()
132        else:
133            raise IOError(response) 
134
135           
136class ndgHTTP(genericHTTP):
137    ''' Provides a get method to obtain an xml document from a remote NDG repository '''
138    def __init__(self,remoteHost,proxyServer=None):
139        self.remoteHost=remoteHost
140        genericHTTP.__init__(self,proxyServer)
141       
142    def uriget(self,uri):
143        # NB, having this import at the module level can cause problems
144        # with resolving imports when using this module - e.g. from the
145        # ndgObject level
146        import ndgObject
147        n=ndgObject.ndgObject(uri)
148        return self.get(n.repository,n.schema,n.localID)
149
150    def get(self,repository,schema,localID,**kw):
151        ''' Return a remote ndg document '''
152        #TODO what about security? Probably means we need to get the headers of our responses sorted ...
153        url='%s/retrieve/%s__%s__%s'%(self.remoteHost,repository,schema,localID)
154        return genericHTTP.get(self,url)
155       
156    def setSecurity(self,location,usercode,password):
157        ''' Use a usercode password to set security credentials at a specific location '''
158        pass
159
160   
161class ndgVocabPOX(genericHTTP):
162    ''' Provides a POX interface to the vocab server '''
163    def __init__(self,path="http://vocab.ndg.nerc.ac.uk/axis2/services/vocab/",proxyServer=None):
164        genericHTTP.__init__(self,proxyServer)
165        self.path="http://vocab.ndg.nerc.ac.uk/axis2/services/vocab/"
166        self.ns="http://vocab.ndg.nerc.ac.uk/"
167    def getRelated(self,subject):
168        ''' Get a related record '''
169        url='%sgetRelatedRecordByCriteria?subjectText=%s&predicate=255&inferences=True&objectList=%slist/P211/current'%(self.path,subject,self.ns)
170        self.url=url
171        doc=genericHTTP.get(self,url)
172        x=ET.fromstring(doc)
173        b=x.findall('*/{urn:vocab/types}broadMatch')
174        n=x.findall('*/{urn:vocab/types}narrowMatch')
175        s=x.findall('*/{urn:vocab/types}exactMatch')
176        self.broader=[(i.find('{urn:vocab/types}entryTerm').text or '') for i in b]
177        self.narrower=[(i.find('{urn:vocab/types}entryTerm').text or '') for i in n]
178        self.synonyms=[(i.find('{urn:vocab/types}entryTerm').text or '') for i in s]
179        return [self.broader,self.narrower,self.synonyms]
180
Note: See TracBrowser for help on using the repository browser.