source: ndgCommon/trunk/ndg/common/src/dal/DocumentRetrieve.py @ 4793

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/ndgCommon/trunk/ndg/common/src/dal/DocumentRetrieve.py@4793
Revision 4793, 7.1 KB checked in by cbyrom, 12 years ago (diff)

Checking in initial codebase for ndgUtils restructure.

Line 
1'''
2 Extend eXistInterface class - to add document retrieval functionality
3 
4 @author: B Lawrence?
5'''
6from ndg.common.src.clients.xmldb.eXist.eXistInterface import ndg_eXist
7from ndg.common.src.lib.ndgXqueries import ndgXqueries
8from xml.etree import ElementTree as ET
9import urllib2, logging, socket
10
11
12def httpify(url):
13    '''
14    Ensure a url has an http prefix
15    '''
16    if url[0:4] != 'http':
17        url = 'http://' + url
18    return url
19   
20       
21class DocumentRetrieve(ndg_eXist):
22    '''
23    This class provides a document retrieval service via the NDG exist interfaces
24    '''
25
26    ATOM_TYPE = 'ATOM-TYPE'
27    ATOM_BACKUP_TYPE = 'ATOM-BACKUP'
28    ATOM = 'ATOM'
29    def __init__(self, repository, pwfile='passwords.txt'):
30        logging.info("Using repository, '%s'" %repository)
31
32        ndg_eXist.__init__(self, db=repository, passwordFile=pwfile)
33        logging.info("DB connection initialised")
34        self.repository=repository
35        self.xq=ndgXqueries()
36        self.knownQueries={'DIF':'moles2dif','DC':'moles2DC','ISO19139':'moles2iso19139', \
37                           'NDG-B0':'moles','NDG-B1':'molesObjectType','MDIP':'moles2mdip', \
38                           'NDG-A0':'csml','NumSim':'numsim', self.ATOM:'atom', \
39                           self.ATOM_BACKUP_TYPE:'atom', self.ATOM_TYPE:'atomTypeList'}
40
41    def _retrieveDoc(self, schema, xqtype, targetCollection, repository, localID):
42        '''
43        Retrieve doc using specified XQuery type
44        @return: docName, docContents
45        '''
46        logging.debug("Retrieving doc - type, '%s', coll, '%s', rep:'%s', localID:'%s'" \
47                      %(xqtype,targetCollection,repository,localID))
48        xquery=self.xq.actual(xqtype,targetCollection,repository,localID)
49
50        id,summary=self.executeQuery(xquery)
51        # NB, backups will inevitably return lots of docs - only retrieve the top one
52        # for the moment - since this is not really needed atm
53        if summary['hits'] != 1 and schema != self.ATOM_BACKUP_TYPE:
54            raise ValueError('Unable to obtain single %s document [%s] (hits=%s)'\
55                             %(schema,localID,summary['hits']))
56
57        docName = summary['documents'][0][0]
58        r=self.retrieve(id,0,{})
59        self.sessionRelease(id)
60        return docName, r
61
62
63    def get(self,repository,schema,localID,targetCollection='/db/discovery/moles', \
64            includeDocNameData=False):
65        '''
66        @keyword includeDocNameData: if True, a dictionary is returned, instead of the dataset, with the key
67        being the name of the document and the entry being the dataset
68        '''
69        logging.debug("Get called with rep:'%s', schema:'%s', localID:'%s', collection:'%s'" \
70                      %(repository,schema,localID,targetCollection))
71        docName = ""
72        if schema not in self.knownQueries:
73            raise TypeError('Unknown Schema "%s" in URI'%schema)
74
75        xqtype=self.knownQueries[schema]
76
77        if schema == 'NDG-B1':
78            # this is a general moles object - so need to further establish the type of moles doc
79            # it is to get the correct XQUery to use
80            name, xml = self._retrieveDoc(schema, xqtype, targetCollection, \
81                                          repository, localID)
82            xml=ET.fromstring(xml)
83            otype=int(xml.text or 0)
84           
85            xqtype={4:'stubB_dataEntity',
86                    3:'stubB_observationStation',
87                    2:'stubB_DPT',
88                    1:'stubB_activity'}[otype]
89
90        docName, r = self._retrieveDoc(schema, xqtype, targetCollection, 
91                                       repository, localID)
92        if includeDocNameData:
93            return {docName: r}
94        return r
95   
96   
97    def error(self,string,t,r,s,l):
98        raise ValueError,string+' for %s:%s:%s in %s'%(r,s,l,t)
99
100           
101class genericHTTP(object):
102    ''' Provides a generic HTTP request class '''
103    def __init__(self,proxyServer=None):
104        if proxyServer is None:
105            proxyHandler=urllib2.ProxyHandler({})
106        else:
107            proxy=httpify(proxyServer)
108            proxyHandler=urllib2.ProxyHandler({'http':proxy})
109        self.opener=urllib2.build_opener(proxyHandler)
110       
111    def get(self,url):
112        url = httpify(url)
113        request=urllib2.Request(url)
114        logging.info("Getting data from url: %s" %url)
115        response='Cannot obtain remote file: '
116        try:
117            f = self.opener.open(request)
118            response=''
119        except urllib2.URLError,e:
120            if hasattr(e,'reason'):
121                response+='No access to server [%s]'%e.reason
122            elif hasattr(e,'code'):
123                response+='Response code [%s]'%e.code
124        except socket.error:
125            response+='Network Socket problem'
126        except Exception,e:
127            response+='[%s]'%str(e)
128           
129        if response=='':
130            return f.read()
131        else:
132            raise IOError(response) 
133
134           
135class ndgHTTP(genericHTTP):
136    ''' Provides a get method to obtain an xml document from a remote NDG repository '''
137    def __init__(self,remoteHost,proxyServer=None):
138        self.remoteHost=remoteHost
139        genericHTTP.__init__(self,proxyServer)
140       
141    def uriget(self,uri):
142        # NB, having this import at the module level can cause problems
143        # with resolving imports when using this module - e.g. from the
144        # ndgObject level
145        from ndg.common.src.models.ndgObject import ndgObject
146        n = ndgObject(uri)
147        return self.get(n.repository,n.schema,n.localID)
148
149    def get(self,repository,schema,localID,**kw):
150        ''' Return a remote ndg document '''
151        #TODO what about security? Probably means we need to get the headers of our responses sorted ...
152        url='%s/retrieve/%s__%s__%s'%(self.remoteHost,repository,schema,localID)
153        return genericHTTP.get(self,url)
154       
155    def setSecurity(self,location,usercode,password):
156        ''' Use a usercode password to set security credentials at a specific location '''
157        pass
158
159   
160class ndgVocabPOX(genericHTTP):
161    ''' Provides a POX interface to the vocab server '''
162    def __init__(self,path="http://vocab.ndg.nerc.ac.uk/axis2/services/vocab/",proxyServer=None):
163        genericHTTP.__init__(self,proxyServer)
164        self.path="http://vocab.ndg.nerc.ac.uk/axis2/services/vocab/"
165        self.ns="http://vocab.ndg.nerc.ac.uk/"
166    def getRelated(self,subject):
167        ''' Get a related record '''
168        url='%sgetRelatedRecordByCriteria?subjectText=%s&predicate=255&inferences=True&objectList=%slist/P211/current'%(self.path,subject,self.ns)
169        self.url=url
170        doc=genericHTTP.get(self,url)
171        x=ET.fromstring(doc)
172        b=x.findall('*/{urn:vocab/types}broadMatch')
173        n=x.findall('*/{urn:vocab/types}narrowMatch')
174        s=x.findall('*/{urn:vocab/types}exactMatch')
175        self.broader=[(i.find('{urn:vocab/types}entryTerm').text or '') for i in b]
176        self.narrower=[(i.find('{urn:vocab/types}entryTerm').text or '') for i in n]
177        self.synonyms=[(i.find('{urn:vocab/types}entryTerm').text or '') for i in s]
178        return [self.broader,self.narrower,self.synonyms]
179
Note: See TracBrowser for help on using the repository browser.