source: ndgCommon/trunk/ndg/common/src/clients/http/vocabserverclient.py @ 6860

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/ndgCommon/trunk/ndg/common/src/clients/http/vocabserverclient.py@6860
Revision 6860, 8.1 KB checked in by sdonegan, 10 years ago (diff)

Added method to find a vocab by just base url with no version number

Line 
1'''
2Simple client to a vocab server - using a POX (plain old XML) interface
3 
4 @author: B Lawrence?, C Byrom, Tessella, Feb 09
5'''
6import logging, urlparse, urllib
7from xml.etree import ElementTree as ET
8from generichttpclient import GenericHTTPClient
9from ndg.common.src.models.ndgObject import ndgObject
10from ndg.common.src.lib.utilities import httpify
11from ndg.common.src.models.codetabletype import CodeTableType
12from ndg.common.src.models.codetablerecord import CodeTableRecord
13
14class VocabServerClient(GenericHTTPClient):
15    '''
16    Provides a POX interface to the vocab server
17    '''
18   
19    GET_LIST_QUERY = '%sgetList?recordKey=%s'
20    WHAT_LISTS_QUERY = '%swhatLists'
21    SEARCH_VOCAB_QUERY = '%ssearchVocab?listKey=%s&searchTerm=%s'
22    GET_RELATED_QUERY = '%sgetRelatedRecordByCriteria?subjectText=%s&predicate=255&inferences=True&objectList=%s&caseSensitivity=%s'
23     
24    def __init__(self, 
25                 path = "http://vocab.ndg.nerc.ac.uk/axis2/services/vocab/",
26                 proxyServer = None):
27        logging.debug("Instantiating VocabServerClient")
28        self.path = path
29       
30        # extract the hostname - NB, the parser assumes an http prefix so ensure
31        # this is there
32        url = httpify(path)
33        parsedURL = urlparse.urlparse(url)
34        self.ns = "%s://%s/" %(parsedURL.scheme, parsedURL.hostname)
35        self.objectList = '%slist/P211/current' %self.ns
36           
37        super(VocabServerClient, self).__init__(proxyServer = proxyServer)
38        logging.debug("VocabServerClient instantiated")
39
40
41    def getRelated(self, subject ,caseSensitive = "false", withTarget = True):
42        '''
43        Get a related record
44        @param subject: subject text to search for in vocab server
45        @keyword caseSensitive - currently the search only works if this is set to "false"
46        @keyword withTarget: if True, limits search to the specified object list
47        - this makes things run much faster
48        @return 2D array of results:
49        [0] - broad result matches
50        [1] - narrow result matches
51        [2] - synonym matches
52        TODO: not sure this actually works since it's difficult to get back
53        any meaningful results.  Possibly things are the wrong way around - the
54        results only return the subject - whereas the interesting info is in a
55        parent entry term to this
56        '''
57        # avoid any spaces/funny non-html characters
58        subject = urllib.quote(subject)
59        logging.debug("Getting vocab data on subject term, '%s'" %subject)
60        url = self.GET_RELATED_QUERY  %(self.path, subject, self.objectList, caseSensitive)
61
62        if withTarget:
63            url+='&subjectList=%s'%self.objectList
64           
65        doc = self.readURL(url)
66        x=ET.fromstring(doc)
67        b=x.findall('*/{%s}broadMatch' %ndgObject.VOCAB_NS)
68        n=x.findall('*/{%s}narrowMatch' %ndgObject.VOCAB_NS)
69        s=x.findall('*/{%s}exactMatch' %ndgObject.VOCAB_NS)
70        self.broader=[(i.findtext('{%s}entryTerm' %ndgObject.VOCAB_NS)) for i in b]
71        self.narrower=[(i.findtext('{%s}entryTerm' %ndgObject.VOCAB_NS)) for i in n]
72        self.synonyms=[(i.findtext('{%s}entryTerm' %ndgObject.VOCAB_NS)) for i in s]
73        logging.debug("- returning info on subject term")
74        return [self.broader,self.narrower,self.synonyms]
75   
76   
77    def getList(self, listKey):
78        '''
79        Retrieve contents of a list - or a specific term in a list
80        @param listKey: key to use to retrieve list - e.g.
81        for list: http://vocab.ndg.nerc.ac.uk/list/P071/11
82        for term: http://vocab.ndg.nerc.ac.uk/term/P071/11/CFV10N55
83        @raise SystemError if the service returns an error
84        @raise IOError if an invalid listKey is provided
85        @raise SystemError if returned doc features an error
86        @return ElementTree with all data loaded - or None, if nothing retrieved
87        '''
88        logging.debug("Retrieving list info for key, '%s'" %listKey)
89       
90        url = self.GET_LIST_QUERY%(self.path, listKey)
91        doc = self.readURL(url)
92        et = ET.fromstring(doc)
93        error = et.findtext('{%s}error' %ndgObject.VOCAB_NS)
94       
95        # check for errors
96        if error != 'false':
97            raise SystemError("Problem occurred whilst looking up vocab data: %s" %error)
98       
99        logging.debug("- returning info in Elementtree object")
100        return et
101   
102   
103    def getListInfo(self, listKey):
104        '''
105        Retrieve info on a vocab list
106        @param listKey: url of the vocab list
107        @raise SystemError: if vocab lookup fails
108        @raise ValueError: if no list info found
109        @return CodeTableType with data about specified list
110        '''
111        logging.debug("Retrieving info on list with key, '%s'" %listKey)
112        url = self.WHAT_LISTS_QUERY %(self.path)
113        doc = self.readURL(url)
114        et = ET.fromstring(doc)
115
116        error = et.findtext('{%s}error' %ndgObject.VOCAB_NS)
117        if error != 'false':
118            raise SystemError("Problem occurred whilst looking up vocab list: %s" %error)
119
120        recordElements = et.findall('{%s}codeTableType' %ndgObject.VOCAB_NS)
121       
122        for record in recordElements:
123            ctr = CodeTableType(et = record)
124           
125            if ctr.listKey == listKey:
126                logging.debug("- found the specified list - returning data as CodeTableType")
127                return ctr
128           
129        raise ValueError("Vocab list, '%s' not found" %listKey)
130       
131   
132    def getListInfoBroadMatch(self, listKey):
133        '''
134       
135        Updated version of getListInfo - needed for dealing with NDG vocab lists - version number is upgraded each time a set of
136        entries are accepted so no use keeping explicit version in original VTE config file i.e. vocab.ndg.nerc.ac.uk/list/P071/12
137        therefore, drop the version and just use the unique list identifier to match from the whatLists operation... i.e. vocab.ndg.nerc.ac.uk/list/P071
138       
139        Retrieve info on a vocab list
140        @param listKey: url of the vocab list
141        @raise SystemError: if vocab lookup fails
142        @raise ValueError: if no list info found
143        @return CodeTableType with data about specified list
144        '''
145        logging.debug("Retrieving info on list with key, '%s'" %listKey)
146        url = self.WHAT_LISTS_QUERY %(self.path)
147        doc = self.readURL(url)
148        et = ET.fromstring(doc)
149
150        error = et.findtext('{%s}error' %ndgObject.VOCAB_NS)
151        if error != 'false':
152            raise SystemError("Problem occurred whilst looking up vocab list: %s" %error)
153
154        recordElements = et.findall('{%s}codeTableType' %ndgObject.VOCAB_NS)
155       
156        for record in recordElements:
157            ctr = CodeTableType(et = record)
158           
159            if listKey in ctr.listKey:
160                logging.debug("- found a matching list root - returning data as CodeTableType")
161                return ctr
162           
163        raise ValueError("Vocab list, '%s' not found" %listKey)     
164       
165       
166    def searchVocabTerm(self, vocabListKey, searchTerm):
167        '''
168        Search the specified vocab list for terms matching the input searchTerm
169        @param vocabListKey: url of the vocab list
170        @param searchTerm: term to search on - NB, accepts '*' wildcard for any character
171        @return list of CodeTableRecords matching the search criteria
172        '''
173        logging.debug("Searching for vocab terms matching the input filter, '%s' in vocab list, '%s'" 
174                      %(searchTerm, vocabListKey))
175        url = self.SEARCH_VOCAB_QUERY %(self.path, vocabListKey, searchTerm)
176        doc = self.readURL(url)
177        et = ET.fromstring(doc)
178
179        error = et.findtext('{%s}error' %ndgObject.VOCAB_NS)
180        if error != 'false':
181            raise SystemError("Problem occurred whilst searching vocab data: %s" %error)
182
183        recordElements = et.findall('{%s}codeTableRecord' %ndgObject.VOCAB_NS)
184       
185        self.records = []
186        for record in recordElements:
187            self.records.append(CodeTableRecord(et = record))
188       
189        logging.debug("- returning %s results" %len(self.records))
190        return self.records
Note: See TracBrowser for help on using the repository browser.