source: exist/trunk/python/ndgUtils/ndgHTTP.py @ 5371

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/exist/trunk/python/ndgUtils/ndgHTTP.py@5371
Revision 5371, 4.6 KB checked in by lawrence, 11 years ago (diff)

Fixes to the underlying utils for handling the vocab server interface.
Note that by moving this stuff to it's own module, there might be
consequential changes in calling code.

Line 
1from xml.etree import ElementTree as ET
2import urllib2, logging, time, socket
3
4def httpify(url):
5    ''' Ensure a url has an http prefix '''
6    if url[0:4]!='http': url='http://%s'%url
7    return url
8
9class genericHTTP(object):
10    ''' Provides a generic HTTP request class '''
11    def __init__(self,proxyServer=None):
12        ''' Initialise with an http handler with a proxy server '''
13        #Faff around to ensure we don't have an httpBLAH/ url
14        if proxyServer[0:4]=='http':proxyServer=proxyServer[5:]
15        if proxyServer[-1]=='/':proxyServer=proxyServer[0:-1]
16        logging.debug('HTTP connection using proxy %s'%proxyServer)
17        if proxyServer is None:
18            proxyHandler=urllib2.ProxyHandler({})
19        else:
20            proxyHandler=urllib2.ProxyHandler({'http':proxyServer})
21        self.opener=urllib2.build_opener(proxyHandler)
22       
23    def get(self,url):
24        ''' HTTP GET the url argument '''
25        url=httpify(url)
26        request=urllib2.Request(url)
27        logging.debug("Getting data from url: %s" %url)
28        response='Cannot obtain remote file: '
29        try:
30            f = self.opener.open(request)
31            response=''
32        except urllib2.URLError,e:
33            if hasattr(e,'reason'):
34                response+='No access to server [%s]'%e.reason
35            elif hasattr(e,'code'):
36                response+='Response code [%s]'%e.code
37        except socket.error:
38            response+='Network Socket problem'
39        except Exception,e:
40            response+='[%s]'%str(e)
41           
42        if response=='':
43            return f.read()
44        else:
45            raise IOError(response)
46       
47class ndgHTTP(genericHTTP):
48    ''' Provides a get method to obtain an xml document from a remote NDG repository '''
49    def __init__(self,remoteHost,proxyServer=None):
50        self.remoteHost=remoteHost
51        genericHTTP.__init__(self,proxyServer)
52       
53    def uriget(self,uri):
54        # NB, having this import at the module level can cause problems
55        # with resolving imports when using this module - e.g. from the
56        # ndgObject level
57        import ndgObject
58        n=ndgObject.ndgObject(uri)
59        return self.get(n.repository,n.schema,n.localID)
60
61    def get(self,repository,schema,localID,**kw):
62        ''' Return a remote ndg document '''
63        #TODO what about security? Probably means we need to get the headers of our responses sorted ...
64        url='%s/retrieve/%s__%s__%s'%(self.remoteHost,repository,schema,localID)
65        return genericHTTP.get(self,url)
66       
67    def setSecurity(self,location,usercode,password):
68        ''' Use a usercode password to set security credentials at a specific location '''
69        pass     
70       
71   
72class ndgVocabPOX(genericHTTP):
73    ''' Provides a Plain Old XML  interface to the vocab server '''
74    def __init__(self,path="http://vocab.ndg.nerc.ac.uk/axis2/services/vocab/",proxyServer=None):
75        genericHTTP.__init__(self,proxyServer)
76        self.path="http://vocab.ndg.nerc.ac.uk/axis2/services/vocab/"
77        self.ns="http://vocab.ndg.nerc.ac.uk/"
78        self.onto='%slist/P211/current'%self.ns
79    def getRelated(self,subject,caseSensitive=True,withTarget=True):
80        ''' Get a related record for a specific subject query.
81              Note that with V1.1 of the VocabServer, inferences do nothing,
82              caseSensitivity needs to be false, and
83              withTarget limits searches to this one list, making things much faster '''
84        inference={True:'true',False:'false'}[True]
85        url='%sgetRelatedRecordByCriteria?subjectText=%s&predicate=255&inferences=%s&objectList=%s'%(self.path,subject,inference,self.onto)
86        if not caseSensitive: url+='&caseSensitivity=false'
87        if withTarget: url+='&subjectList=%s'%self.onto
88        itime=time.time()
89        doc=genericHTTP.get(self,url)
90        etime=time.time()-itime
91        logging.info('Search for %s took %ss'%(subject,etime))
92        self.xmlResult=doc
93        self.url=url
94        logging.debug(doc)
95        x=ET.fromstring(doc)
96        b=x.findall('*/{urn:vocab/types}broadMatch')
97        n=x.findall('*/{urn:vocab/types}narrowMatch')
98        s=x.findall('*/{urn:vocab/types}exactMatch')
99        self.broader=[(i.find('{urn:vocab/types}entryTerm').text or '') for i in b]
100        self.narrower=[(i.find('{urn:vocab/types}entryTerm').text or '') for i in n]
101        self.synonyms=[(i.find('{urn:vocab/types}entryTerm').text or '') for i in s]
102        return [self.broader,self.narrower,self.synonyms]
103
104if __name__=="__main__":
105    import logging
106    logging.basicConfig(level=logging.DEBUG)
107    v=ndgVocabPOX(proxyServer='wwwcache3.rl.ac.uk:8080')
108    v.getRelated('rain')
Note: See TracBrowser for help on using the repository browser.