source: exist/trunk/python/ndgUtils/DocumentRetrieve.py @ 4555

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/exist/trunk/python/ndgUtils/DocumentRetrieve.py@4555
Revision 4555, 6.9 KB checked in by cbyrom, 11 years ago (diff)

Move existbdclient to lib package + extend to make use of DocumentRetrieve? to allow retrieval of atoms by ID + fix handling of
authors vs contributors when doing Atom to XML exports.

Line 
1from eXistInterface import ndg_eXist
2from ndgXqueries import ndgXqueries
3
4import urllib2, logging, socket
5try:
6    from xml.etree import ElementTree as ET
7except ImportError:
8    try:
9        import ElementTree as ET
10    except ImportError:
11        # For some reason when I install ElementTree with easyinstall it
12        # is called "elementree".
13        import elementtree.ElementTree as ET
14
15debug=0
16
17
18def httpify(url):
19    '''
20    Ensure a url has an http prefix
21    '''
22    if url[0:4] != 'http':
23        url = 'http://' + url
24    return url
25   
26       
27class DocumentRetrieve(ndg_eXist):
28    '''
29    This class provides a document retrieval service via the NDG exist interfaces
30    '''
31
32    ATOM_TYPE = 'ATOM-TYPE'
33    ATOM = 'ATOM'
34    def __init__(self, repository, pwfile='passwords.txt'):
35        logging.info("Using repository, '%s'" %repository)
36
37        ndg_eXist.__init__(self, db=repository, passwordFile=pwfile)
38        logging.info("DB connection initialised")
39        self.repository=repository
40        self.xq=ndgXqueries()
41        self.knownQueries={'DIF':'moles2dif','DC':'moles2DC','ISO19139':'moles2iso19139', \
42                           'NDG-B0':'moles','NDG-B1':'molesObjectType','MDIP':'moles2mdip', \
43                           'NDG-A0':'csml','NumSim':'numsim', self.ATOM:'atom', \
44                           'ATOM-BACKUP':'atom', self.ATOM_TYPE:'atomTypeList'}
45
46    def _retrieveDoc(self, schema, xqtype, targetCollection, repository, localID):
47        '''
48        Retrieve doc using specified XQuery type
49        @return: docName, docContents
50        '''
51        logging.debug("Retrieving doc - type, '%s', coll, '%s', rep:'%s', localID:'%s'" \
52                      %(xqtype,targetCollection,repository,localID))
53        xquery=self.xq.actual(xqtype,targetCollection,repository,localID)
54
55        id,summary=self.executeQuery(xquery)
56        if summary['hits'] != 1:
57            raise ValueError('Unable to obtain single %s document [%s] (hits=%s)'\
58                             %(schema,localID,summary['hits']))
59
60        docName = summary['documents'][0][0]
61        r=self.retrieve(id,0,{})
62        self.sessionRelease(id)
63        return docName, r
64
65
66    def get(self,repository,schema,localID,targetCollection='/db/discovery/moles', \
67            includeDocNameData=False):
68        '''
69        @keyword includeDocNameData: if True, a dictionary is returned, instead of the dataset, with the key
70        being the name of the document and the entry being the dataset
71        '''
72        logging.debug("Get called with rep:'%s', schema:'%s', localID:'%s', collection:'%s'" \
73                      %(repository,schema,localID,targetCollection))
74        docName = ""
75        if schema not in self.knownQueries:
76            raise TypeError('Unknown Schema "%s" in URI'%schema)
77
78        xqtype=self.knownQueries[schema]
79
80        if schema == 'NDG-B1':
81            # this is a general moles object - so need to further establish the type of moles doc
82            # it is to get the correct XQUery to use
83            name, xml = self._retrieveDoc(schema, xqtype, targetCollection, \
84                                          repository, localID)
85            xml=ET.fromstring(xml)
86            otype=int(xml.text or 0)
87           
88            xqtype={4:'stubB_dataEntity',
89                    3:'stubB_observationStation',
90                    2:'stubB_DPT',
91                    1:'stubB_activity'}[otype]
92
93        docName, r = self._retrieveDoc(schema, xqtype, targetCollection, 
94                                       repository, localID)
95        if includeDocNameData:
96            return {docName: r}
97        return r
98   
99   
100    def error(self,string,t,r,s,l):
101        raise ValueError,string+' for %s:%s:%s in %s'%(r,s,l,t)
102
103           
104class genericHTTP(object):
105    ''' Provides a generic HTTP request class '''
106    def __init__(self,proxyServer=None):
107        if proxyServer is None:
108            proxyHandler=urllib2.ProxyHandler({})
109        else:
110            proxy=httpify(proxyServer)
111            proxyHandler=urllib2.ProxyHandler({'http':proxy})
112        self.opener=urllib2.build_opener(proxyHandler)
113       
114    def get(self,url):
115        url = httpify(url)
116        request=urllib2.Request(url)
117        logging.info("Getting data from url: %s" %url)
118        response='Cannot obtain remote file: '
119        try:
120            f = self.opener.open(request)
121            response=''
122        except urllib2.URLError,e:
123            if hasattr(e,'reason'):
124                response+='No access to server [%s]'%e.reason
125            elif hasattr(e,'code'):
126                response+='Response code [%s]'%e.code
127        except socket.error:
128            response+='Network Socket problem'
129        except Exception,e:
130            response+='[%s]'%str(e)
131           
132        if response=='':
133            return f.read()
134        else:
135            raise IOError(response) 
136
137           
138class ndgHTTP(genericHTTP):
139    ''' Provides a get method to obtain an xml document from a remote NDG repository '''
140    def __init__(self,remoteHost,proxyServer=None):
141        self.remoteHost=remoteHost
142        genericHTTP.__init__(self,proxyServer)
143       
144    def uriget(self,uri):
145        # NB, having this import at the module level can cause problems
146        # with resolving imports when using this module - e.g. from the
147        # ndgObject level
148        import ndgObject
149        n=ndgObject.ndgObject(uri)
150        return self.get(n.repository,n.schema,n.localID)
151
152    def get(self,repository,schema,localID,**kw):
153        ''' Return a remote ndg document '''
154        #TODO what about security? Probably means we need to get the headers of our responses sorted ...
155        url='%s/retrieve/%s__%s__%s'%(self.remoteHost,repository,schema,localID)
156        return genericHTTP.get(self,url)
157       
158    def setSecurity(self,location,usercode,password):
159        ''' Use a usercode password to set security credentials at a specific location '''
160        pass
161
162   
163class ndgVocabPOX(genericHTTP):
164    ''' Provides a POX interface to the vocab server '''
165    def __init__(self,path="http://vocab.ndg.nerc.ac.uk/axis2/services/vocab/",proxyServer=None):
166        genericHTTP.__init__(self,proxyServer)
167        self.path="http://vocab.ndg.nerc.ac.uk/axis2/services/vocab/"
168        self.ns="http://vocab.ndg.nerc.ac.uk/"
169    def getRelated(self,subject):
170        ''' Get a related record '''
171        url='%sgetRelatedRecordByCriteria?subjectText=%s&predicate=255&inferences=True&objectList=%slist/P211/current'%(self.path,subject,self.ns)
172        self.url=url
173        doc=genericHTTP.get(self,url)
174        x=ET.fromstring(doc)
175        b=x.findall('*/{urn:vocab/types}broadMatch')
176        n=x.findall('*/{urn:vocab/types}narrowMatch')
177        s=x.findall('*/{urn:vocab/types}exactMatch')
178        self.broader=[(i.find('{urn:vocab/types}entryTerm').text or '') for i in b]
179        self.narrower=[(i.find('{urn:vocab/types}entryTerm').text or '') for i in n]
180        self.synonyms=[(i.find('{urn:vocab/types}entryTerm').text or '') for i in s]
181        return [self.broader,self.narrower,self.synonyms]
182
Note: See TracBrowser for help on using the repository browser.