source: exist/trunk/python/ndgUtils/DocumentRetrieve.py @ 4444

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/exist/trunk/python/ndgUtils/DocumentRetrieve.py@4444
Revision 4444, 6.9 KB checked in by cbyrom, 12 years ago (diff)

Add new xquery to lookup deployment atoms associated with an atom ID +
add code to models to allow the use of this to retrieve this information
+ update tests + avoid doubly escaping special characters.

Line 
1from eXistInterface import ndg_eXist
2from ndgXqueries import ndgXqueries
3
4import urllib2, logging, socket
5try:
6    from xml.etree import ElementTree as ET
7except ImportError:
8    try:
9        import ElementTree as ET
10    except ImportError:
11        # For some reason when I install ElementTree with easyinstall it
12        # is called "elementree".
13        import elementtree.ElementTree as ET
14
15debug=0
16
17
18def httpify(url):
19    '''
20    Ensure a url has an http prefix
21    '''
22    if url[0:4] != 'http':
23        url = 'http://' + url
24    return url
25   
26       
27class DocumentRetrieve(ndg_eXist):
28    ''' This class provides a document retrieval service via the NDG exist interfaces '''
29   
30    ATOM_DEPLOYMENTS = 'ATOM-DEPLOYMENTS'
31    def __init__(self, repository, pwfile='passwords.txt'):
32        logging.info("Using repository, '%s'" %repository)
33
34        ndg_eXist.__init__(self, db=repository, passwordFile=pwfile)
35        logging.info("DB connection initialised")
36        self.repository=repository
37        self.xq=ndgXqueries()
38        self.knownQueries={'DIF':'moles2dif','DC':'moles2DC','ISO19139':'moles2iso19139', \
39                           'NDG-B0':'moles','NDG-B1':'molesObjectType','MDIP':'moles2mdip', \
40                           'NDG-A0':'csml','NumSim':'numsim', 'ATOM':'atom', \
41                           'ATOM-BACKUP':'atom', self.ATOM_DEPLOYMENTS:'atomDeploymentsList'}
42
43    def _retrieveDoc(self, schema, xqtype, targetCollection, repository, localID):
44        '''
45        Retrieve doc using specified XQuery type
46        @return: docName, docContents
47        '''
48        logging.debug("Retrieving doc - type, '%s', coll, '%s', rep:'%s', localID:'%s'" \
49                      %(xqtype,targetCollection,repository,localID))
50        xquery=self.xq.actual(xqtype,targetCollection,repository,localID)
51
52        id,summary=self.executeQuery(xquery)
53        if summary['hits'] != 1:
54            raise ValueError('Unable to obtain single %s document [%s] (hits=%s)'\
55                             %(schema,localID,summary['hits']))
56
57        docName = summary['documents'][0][0]
58        r=self.retrieve(id,0,{})
59        self.sessionRelease(id)
60        return docName, r
61
62
63    def get(self,repository,schema,localID,targetCollection='/db/discovery/moles', includeDocNameData=False):
64        '''
65        @keyword includeDocNameData: if True, a dictionary is returned, instead of the dataset, with the key
66        being the name of the document and the entry being the dataset
67        '''
68        logging.debug("Get called with rep:'%s', schema:'%s', localID:'%s', collection:'%s'" \
69                      %(repository,schema,localID,targetCollection))
70        docName = ""
71        if schema not in self.knownQueries:
72            raise TypeError('Unknown Schema "%s" in URI'%schema)
73
74        xqtype=self.knownQueries[schema]
75
76        if schema == 'NDG-B1':
77            # this is a general moles object - so need to further establish the type of moles doc
78            # it is to get the correct XQUery to use
79            name, xml = self._retrieveDoc(schema, xqtype, targetCollection, \
80                                          repository, localID)
81            xml=ET.fromstring(xml)
82            otype=int(xml.text or 0)
83           
84            xqtype={4:'stubB_dataEntity',
85                    3:'stubB_observationStation',
86                    2:'stubB_DPT',
87                    1:'stubB_activity'}[otype]
88
89        docName, r = self._retrieveDoc(schema, xqtype, targetCollection, 
90                                       repository, localID)
91        if includeDocNameData:
92            return {docName: r}
93        return r
94   
95   
96    def error(self,string,t,r,s,l):
97        raise ValueError,string+' for %s:%s:%s in %s'%(r,s,l,t)
98
99           
100class genericHTTP(object):
101    ''' Provides a generic HTTP request class '''
102    def __init__(self,proxyServer=None):
103        if proxyServer is None:
104            proxyHandler=urllib2.ProxyHandler({})
105        else:
106            proxy=httpify(proxyServer)
107            proxyHandler=urllib2.ProxyHandler({'http':proxy})
108        self.opener=urllib2.build_opener(proxyHandler)
109       
110    def get(self,url):
111        url = httpify(url)
112        request=urllib2.Request(url)
113        logging.info("Getting data from url: %s" %url)
114        response='Cannot obtain remote file: '
115        try:
116            f = self.opener.open(request)
117            response=''
118        except urllib2.URLError,e:
119            if hasattr(e,'reason'):
120                response+='No access to server [%s]'%e.reason
121            elif hasattr(e,'code'):
122                response+='Response code [%s]'%e.code
123        except socket.error:
124            response+='Network Socket problem'
125        except Exception,e:
126            response+='[%s]'%str(e)
127           
128        if response=='':
129            return f.read()
130        else:
131            raise IOError(response) 
132
133           
134class ndgHTTP(genericHTTP):
135    ''' Provides a get method to obtain an xml document from a remote NDG repository '''
136    def __init__(self,remoteHost,proxyServer=None):
137        self.remoteHost=remoteHost
138        genericHTTP.__init__(self,proxyServer)
139       
140    def uriget(self,uri):
141        # NB, having this import at the module level can cause problems
142        # with resolving imports when using this module - e.g. from the
143        # ndgObject level
144        import ndgObject
145        n=ndgObject.ndgObject(uri)
146        return self.get(n.repository,n.schema,n.localID)
147
148    def get(self,repository,schema,localID,**kw):
149        ''' Return a remote ndg document '''
150        #TODO what about security? Probably means we need to get the headers of our responses sorted ...
151        url='%s/retrieve/%s__%s__%s'%(self.remoteHost,repository,schema,localID)
152        return genericHTTP.get(self,url)
153       
154    def setSecurity(self,location,usercode,password):
155        ''' Use a usercode password to set security credentials at a specific location '''
156        pass
157
158   
159class ndgVocabPOX(genericHTTP):
160    ''' Provides a POX interface to the vocab server '''
161    def __init__(self,path="http://vocab.ndg.nerc.ac.uk/axis2/services/vocab/",proxyServer=None):
162        genericHTTP.__init__(self,proxyServer)
163        self.path="http://vocab.ndg.nerc.ac.uk/axis2/services/vocab/"
164        self.ns="http://vocab.ndg.nerc.ac.uk/"
165    def getRelated(self,subject):
166        ''' Get a related record '''
167        url='%sgetRelatedRecordByCriteria?subjectText=%s&predicate=255&inferences=True&objectList=%slist/P211/current'%(self.path,subject,self.ns)
168        self.url=url
169        doc=genericHTTP.get(self,url)
170        x=ET.fromstring(doc)
171        b=x.findall('*/{urn:vocab/types}broadMatch')
172        n=x.findall('*/{urn:vocab/types}narrowMatch')
173        s=x.findall('*/{urn:vocab/types}exactMatch')
174        self.broader=[(i.find('{urn:vocab/types}entryTerm').text or '') for i in b]
175        self.narrower=[(i.find('{urn:vocab/types}entryTerm').text or '') for i in n]
176        self.synonyms=[(i.find('{urn:vocab/types}entryTerm').text or '') for i in s]
177        return [self.broader,self.narrower,self.synonyms]
178
Note: See TracBrowser for help on using the repository browser.