source: exist/trunk/python/ndgUtils/DocumentRetrieve.py @ 4638

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/exist/trunk/python/ndgUtils/DocumentRetrieve.py@4638
Revision 4638, 7.2 KB checked in by cbyrom, 11 years ago (diff)

Adjust retrieval of backup atom docs; these will return more than one
search hit - so don't throw an exception.

Line 
1from eXistInterface import ndg_eXist
2from ndgXqueries import ndgXqueries
3
4import urllib2, logging, socket
5try:
6    from xml.etree import ElementTree as ET
7except ImportError:
8    try:
9        import ElementTree as ET
10    except ImportError:
11        # For some reason when I install ElementTree with easyinstall it
12        # is called "elementree".
13        import elementtree.ElementTree as ET
14
15debug=0
16
17
18def httpify(url):
19    '''
20    Ensure a url has an http prefix
21    '''
22    if url[0:4] != 'http':
23        url = 'http://' + url
24    return url
25   
26       
27class DocumentRetrieve(ndg_eXist):
28    '''
29    This class provides a document retrieval service via the NDG exist interfaces
30    '''
31
32    ATOM_TYPE = 'ATOM-TYPE'
33    ATOM_BACKUP_TYPE = 'ATOM-BACKUP'
34    ATOM = 'ATOM'
35    def __init__(self, repository, pwfile='passwords.txt'):
36        logging.info("Using repository, '%s'" %repository)
37
38        ndg_eXist.__init__(self, db=repository, passwordFile=pwfile)
39        logging.info("DB connection initialised")
40        self.repository=repository
41        self.xq=ndgXqueries()
42        self.knownQueries={'DIF':'moles2dif','DC':'moles2DC','ISO19139':'moles2iso19139', \
43                           'NDG-B0':'moles','NDG-B1':'molesObjectType','MDIP':'moles2mdip', \
44                           'NDG-A0':'csml','NumSim':'numsim', self.ATOM:'atom', \
45                           self.ATOM_BACKUP_TYPE:'atom', self.ATOM_TYPE:'atomTypeList'}
46
47    def _retrieveDoc(self, schema, xqtype, targetCollection, repository, localID):
48        '''
49        Retrieve doc using specified XQuery type
50        @return: docName, docContents
51        '''
52        logging.debug("Retrieving doc - type, '%s', coll, '%s', rep:'%s', localID:'%s'" \
53                      %(xqtype,targetCollection,repository,localID))
54        xquery=self.xq.actual(xqtype,targetCollection,repository,localID)
55
56        id,summary=self.executeQuery(xquery)
57        # NB, backups will inevitably return lots of docs - only retrieve the top one
58        # for the moment - since this is not really needed atm
59        if summary['hits'] != 1 and schema != self.ATOM_BACKUP_TYPE:
60            raise ValueError('Unable to obtain single %s document [%s] (hits=%s)'\
61                             %(schema,localID,summary['hits']))
62
63        docName = summary['documents'][0][0]
64        r=self.retrieve(id,0,{})
65        self.sessionRelease(id)
66        return docName, r
67
68
69    def get(self,repository,schema,localID,targetCollection='/db/discovery/moles', \
70            includeDocNameData=False):
71        '''
72        @keyword includeDocNameData: if True, a dictionary is returned, instead of the dataset, with the key
73        being the name of the document and the entry being the dataset
74        '''
75        logging.debug("Get called with rep:'%s', schema:'%s', localID:'%s', collection:'%s'" \
76                      %(repository,schema,localID,targetCollection))
77        docName = ""
78        if schema not in self.knownQueries:
79            raise TypeError('Unknown Schema "%s" in URI'%schema)
80
81        xqtype=self.knownQueries[schema]
82
83        if schema == 'NDG-B1':
84            # this is a general moles object - so need to further establish the type of moles doc
85            # it is to get the correct XQUery to use
86            name, xml = self._retrieveDoc(schema, xqtype, targetCollection, \
87                                          repository, localID)
88            xml=ET.fromstring(xml)
89            otype=int(xml.text or 0)
90           
91            xqtype={4:'stubB_dataEntity',
92                    3:'stubB_observationStation',
93                    2:'stubB_DPT',
94                    1:'stubB_activity'}[otype]
95
96        docName, r = self._retrieveDoc(schema, xqtype, targetCollection, 
97                                       repository, localID)
98        if includeDocNameData:
99            return {docName: r}
100        return r
101   
102   
103    def error(self,string,t,r,s,l):
104        raise ValueError,string+' for %s:%s:%s in %s'%(r,s,l,t)
105
106           
107class genericHTTP(object):
108    ''' Provides a generic HTTP request class '''
109    def __init__(self,proxyServer=None):
110        if proxyServer is None:
111            proxyHandler=urllib2.ProxyHandler({})
112        else:
113            proxy=httpify(proxyServer)
114            proxyHandler=urllib2.ProxyHandler({'http':proxy})
115        self.opener=urllib2.build_opener(proxyHandler)
116       
117    def get(self,url):
118        url = httpify(url)
119        request=urllib2.Request(url)
120        logging.info("Getting data from url: %s" %url)
121        response='Cannot obtain remote file: '
122        try:
123            f = self.opener.open(request)
124            response=''
125        except urllib2.URLError,e:
126            if hasattr(e,'reason'):
127                response+='No access to server [%s]'%e.reason
128            elif hasattr(e,'code'):
129                response+='Response code [%s]'%e.code
130        except socket.error:
131            response+='Network Socket problem'
132        except Exception,e:
133            response+='[%s]'%str(e)
134           
135        if response=='':
136            return f.read()
137        else:
138            raise IOError(response) 
139
140           
141class ndgHTTP(genericHTTP):
142    ''' Provides a get method to obtain an xml document from a remote NDG repository '''
143    def __init__(self,remoteHost,proxyServer=None):
144        self.remoteHost=remoteHost
145        genericHTTP.__init__(self,proxyServer)
146       
147    def uriget(self,uri):
148        # NB, having this import at the module level can cause problems
149        # with resolving imports when using this module - e.g. from the
150        # ndgObject level
151        import ndgObject
152        n=ndgObject.ndgObject(uri)
153        return self.get(n.repository,n.schema,n.localID)
154
155    def get(self,repository,schema,localID,**kw):
156        ''' Return a remote ndg document '''
157        #TODO what about security? Probably means we need to get the headers of our responses sorted ...
158        url='%s/retrieve/%s__%s__%s'%(self.remoteHost,repository,schema,localID)
159        return genericHTTP.get(self,url)
160       
161    def setSecurity(self,location,usercode,password):
162        ''' Use a usercode password to set security credentials at a specific location '''
163        pass
164
165   
166class ndgVocabPOX(genericHTTP):
167    ''' Provides a POX interface to the vocab server '''
168    def __init__(self,path="http://vocab.ndg.nerc.ac.uk/axis2/services/vocab/",proxyServer=None):
169        genericHTTP.__init__(self,proxyServer)
170        self.path="http://vocab.ndg.nerc.ac.uk/axis2/services/vocab/"
171        self.ns="http://vocab.ndg.nerc.ac.uk/"
172    def getRelated(self,subject):
173        ''' Get a related record '''
174        url='%sgetRelatedRecordByCriteria?subjectText=%s&predicate=255&inferences=True&objectList=%slist/P211/current'%(self.path,subject,self.ns)
175        self.url=url
176        doc=genericHTTP.get(self,url)
177        x=ET.fromstring(doc)
178        b=x.findall('*/{urn:vocab/types}broadMatch')
179        n=x.findall('*/{urn:vocab/types}narrowMatch')
180        s=x.findall('*/{urn:vocab/types}exactMatch')
181        self.broader=[(i.find('{urn:vocab/types}entryTerm').text or '') for i in b]
182        self.narrower=[(i.find('{urn:vocab/types}entryTerm').text or '') for i in n]
183        self.synonyms=[(i.find('{urn:vocab/types}entryTerm').text or '') for i in s]
184        return [self.broader,self.narrower,self.synonyms]
185
Note: See TracBrowser for help on using the repository browser.