source: ndgCommon/trunk/ndg/common/src/clients/http/vocabserverclient.py @ 5374

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/ndgCommon/trunk/ndg/common/src/clients/http/vocabserverclient.py@5374
Revision 5374, 6.5 KB checked in by cbyrom, 11 years ago (diff)

Update vocab server 'getRelated' calls to include the required data
to allow proper invocation of services + adjust calls so that the
service is ran for both vocab terms + free text inputs and the results
are combined + extend tests.

Line 
1'''
2Simple client to a vocab server - using a POX (plain old XML) interface
3 
4 @author: B Lawrence?, C Byrom, Tessella, Feb 09
5'''
6import logging, urlparse, urllib
7from xml.etree import ElementTree as ET
8from generichttpclient import GenericHTTPClient
9from ndg.common.src.models.ndgObject import ndgObject
10from ndg.common.src.lib.utilities import httpify
11from ndg.common.src.models.codetabletype import CodeTableType
12from ndg.common.src.models.codetablerecord import CodeTableRecord
13
14class VocabServerClient(GenericHTTPClient):
15    '''
16    Provides a POX interface to the vocab server
17    '''
18   
19    GET_LIST_QUERY = '%sgetList?recordKey=%s'
20    WHAT_LISTS_QUERY = '%swhatLists'
21    SEARCH_VOCAB_QUERY = '%ssearchVocab?listKey=%s&searchTerm=%s'
22    GET_RELATED_QUERY = '%sgetRelatedRecordByCriteria?subjectText=%s&predicate=255&inferences=True&objectList=%s&caseSensitivity=%s'
23     
24    def __init__(self, 
25                 path = "http://vocab.ndg.nerc.ac.uk/axis2/services/vocab/",
26                 proxyServer = None):
27        logging.debug("Instantiating VocabServerClient")
28        self.path = path
29       
30        # extract the hostname - NB, the parser assumes an http prefix so ensure
31        # this is there
32        url = httpify(path)
33        parsedURL = urlparse.urlparse(url)
34        self.ns = "%s://%s/" %(parsedURL.scheme, parsedURL.hostname)
35        self.objectList = '%slist/P211/current' %self.ns
36           
37        super(VocabServerClient, self).__init__(proxyServer = proxyServer)
38        logging.debug("VocabServerClient instantiated")
39
40
41    def getRelated(self, subject ,caseSensitive = "false", withTarget = True):
42        '''
43        Get a related record
44        @param subject: subject text to search for in vocab server
45        @keyword caseSensitive - currently the search only works if this is set to "false"
46        @keyword withTarget: if True, limits search to the specified object list
47        - this makes things run much faster
48        @return 2D array of results:
49        [0] - broad result matches
50        [1] - narrow result matches
51        [2] - synonym matches
52        TODO: not sure this actually works since it's difficult to get back
53        any meaningful results.  Possibly things are the wrong way around - the
54        results only return the subject - whereas the interesting info is in a
55        parent entry term to this
56        '''
57        # avoid any spaces/funny non-html characters
58        subject = urllib.quote(subject)
59        logging.debug("Getting vocab data on subject term, '%s'" %subject)
60        url = self.GET_RELATED_QUERY  %(self.path, subject, self.objectList, caseSensitive)
61
62        if withTarget:
63            url+='&subjectList=%s'%self.objectList
64           
65        doc = self.readURL(url)
66        x=ET.fromstring(doc)
67        b=x.findall('*/{%s}broadMatch' %ndgObject.VOCAB_NS)
68        n=x.findall('*/{%s}narrowMatch' %ndgObject.VOCAB_NS)
69        s=x.findall('*/{%s}exactMatch' %ndgObject.VOCAB_NS)
70        self.broader=[(i.findtext('{%s}entryTerm' %ndgObject.VOCAB_NS)) for i in b]
71        self.narrower=[(i.findtext('{%s}entryTerm' %ndgObject.VOCAB_NS)) for i in n]
72        self.synonyms=[(i.findtext('{%s}entryTerm' %ndgObject.VOCAB_NS)) for i in s]
73        logging.debug("- returning info on subject term")
74        return [self.broader,self.narrower,self.synonyms]
75   
76   
77    def getList(self, listKey):
78        '''
79        Retrieve contents of a list - or a specific term in a list
80        @param listKey: key to use to retrieve list - e.g.
81        for list: http://vocab.ndg.nerc.ac.uk/list/P071/11
82        for term: http://vocab.ndg.nerc.ac.uk/term/P071/11/CFV10N55
83        @raise SystemError if the service returns an error
84        @raise IOError if an invalid listKey is provided
85        @raise SystemError if returned doc features an error
86        @return ElementTree with all data loaded - or None, if nothing retrieved
87        '''
88        logging.debug("Retrieving list info for key, '%s'" %listKey)
89        url = self.GET_LIST_QUERY%(self.path, listKey)
90        doc = self.readURL(url)
91        et = ET.fromstring(doc)
92        error = et.findtext('{%s}error' %ndgObject.VOCAB_NS)
93       
94        # check for errors
95        if error != 'false':
96            raise SystemError("Problem occurred whilst looking up vocab data: %s" %error)
97       
98        logging.debug("- returning info in Elementtree object")
99        return et
100   
101   
102    def getListInfo(self, listKey):
103        '''
104        Retrieve info on a vocab list
105        @param listKey: url of the vocab list
106        @raise SystemError: if vocab lookup fails
107        @raise ValueError: if no list info found
108        @return CodeTableType with data about specified list
109        '''
110        logging.debug("Retrieving info on list with key, '%s'" %listKey)
111        url = self.WHAT_LISTS_QUERY %(self.path)
112        doc = self.readURL(url)
113        et = ET.fromstring(doc)
114
115        error = et.findtext('{%s}error' %ndgObject.VOCAB_NS)
116        if error != 'false':
117            raise SystemError("Problem occurred whilst looking up vocab list: %s" %error)
118
119        recordElements = et.findall('{%s}codeTableType' %ndgObject.VOCAB_NS)
120       
121        for record in recordElements:
122            ctr = CodeTableType(et = record)
123            if ctr.listKey == listKey:
124                logging.debug("- found the specified list - returning data as CodeTableType")
125                return ctr
126           
127        raise ValueError("Vocab list, '%s' not found" %listKey)       
128       
129       
130    def searchVocabTerm(self, vocabListKey, searchTerm):
131        '''
132        Search the specified vocab list for terms matching the input searchTerm
133        @param vocabListKey: url of the vocab list
134        @param searchTerm: term to search on - NB, accepts '*' wildcard for any character
135        @return list of CodeTableRecords matching the search criteria
136        '''
137        logging.debug("Searching for vocab terms matching the input filter, '%s' in vocab list, '%s'" 
138                      %(searchTerm, vocabListKey))
139        url = self.SEARCH_VOCAB_QUERY %(self.path, vocabListKey, searchTerm)
140        doc = self.readURL(url)
141        et = ET.fromstring(doc)
142
143        error = et.findtext('{%s}error' %ndgObject.VOCAB_NS)
144        if error != 'false':
145            raise SystemError("Problem occurred whilst searching vocab data: %s" %error)
146
147        recordElements = et.findall('{%s}codeTableRecord' %ndgObject.VOCAB_NS)
148       
149        self.records = []
150        for record in recordElements:
151            self.records.append(CodeTableRecord(et = record))
152       
153        logging.debug("- returning %s results" %len(self.records))
154        return self.records
Note: See TracBrowser for help on using the repository browser.