source: ndgCommon/trunk/ndg/common/src/clients/xmldb/eXist/searchclient.py @ 5134

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/ndgCommon/trunk/ndg/common/src/clients/xmldb/eXist/searchclient.py@5134
Revision 5134, 8.0 KB checked in by cbyrom, 12 years ago (diff)

Add search functionality for doing searches in 'browse' mode against
the local browse service + add templates to support this - making the
atom search result template more generic for re-use.

Line 
1'''
2 Basic search client for eXist XML DBs
3 
4 @author: C Byrom - Tessella, Feb 09
5'''
6from ndg.common.src.clients.xmldb.abstractsearchxmldbclient import AbstractSearchXMLDBClient
7from ndg.common.src.lib.utilities import loadConfigDetails, edict
8from xml.etree import cElementTree as ET
9import dbconstants as dc
10import logging, os
11from xmlrpcclient import XMLRPCClient
12
13
14class SearchClient(AbstractSearchXMLDBClient):
15   
16    def __init__(self, dbHostName = 'chinook.badc.rl.ac.uk', 
17                 configFileName ='passwords.txt', 
18                 clientType = dc.XML_RPC_CLIENT):
19        '''
20        Set up a search client for eXist DB
21        @keyword db: eXist host to use - defaults to 'chinook.badc.rl.ac.uk'
22        @keyword configFileName : password file to use - NB, this should
23        have contents of format (NB, space delimiter):
24        dbName userID password
25        Default is 'passwords.txt'.
26        @keyword clientType: Type of client to use for eXist connection.  Currently
27        only supported by 'xmlrpc' - which is the default.
28        '''
29        logging.debug("Initialising SearchClient")
30        self.setupDBConnection(dbHostName, configFileName, clientType)
31        logging.debug("SearchClient initialised")
32
33
34    def setupDBConnection(self, dbName, pwFilename, clientType):
35        '''
36        Establish connection to eXist DB - using the specified DB name and
37        password file
38        @param dbName: hostname of the eXist DB
39        @param pwFilename: name of file with password details - NB, this should
40        have contents of format (NB, space delimiter):
41        dbName userID password
42        @param clientType: Type of client to use for eXist connection.  Currently
43        only supported by 'xmlrpc' - which is the default.
44        @raise ValueError: if unrecognised eXist client type specified
45        '''
46        logging.debug("Initialising connection to eXist DB ('%s')" %dbName)
47        userID, password, host = loadConfigDetails(pwFilename, dbName)
48       
49        if clientType == dc.XML_RPC_CLIENT:
50            self.client = XMLRPCClient(dbName, userID, password)
51        else:
52            raise ValueError("Unrecognised eXist client type: '%s'" %clientType)
53       
54        logging.debug("Connection set up")
55
56
57    def getDocs(self, documents, format='original'):
58        '''
59        Return one or more complete document
60        @param documents: array of documents to retrieve
61        @keyword format: format the retrieved document should be in.  Default, 'original'
62        returns documents in their original format. NB, not currently implemented.
63        @raise ValueError if the requested document(s) cannot be found
64        @return: dictionary with format, {docName: docContents}
65        '''
66        logging.info("Getting docs, %s, from eXist DB..." %documents)
67        results = {}
68        for doc in documents:
69            # avoid duplicate lookups
70            if doc not in results.keys():
71               
72                # NB, invalid doc paths won't return anything - so ignore these
73                res = self.client.getDoc(doc)
74                if res:
75                    results[doc] = res
76        logging.info("Returning dict with docs in")
77        return results
78
79       
80    def search(self, term, start=1, howmany=20, \
81               target=None,scope=None,bbox=None, \
82               dateRange=None,geoSearchType=None, \
83               providerID=None, atomTypeID=None,
84               useChunked = True):
85        '''
86        Provides a search interface that mimics the WSDL search interface, except
87        that the target used is the exist collection name, and scope, bbox and
88        dateRange are ignored, and a python summary object is returned
89        '''
90        #select the right query according to the docType requested
91        if target == self.ATOM_TARGET:
92            xquery = self.__createAtomSearch(providerID, atomTypeID, term)
93        else:
94            xqName={'ndg_B_metadata':'molesSummary',\
95                'NumSim':'numsimSummary'}[target]
96            xquery= self.resources.xq[xqName]
97            xquery=xquery.replace('SEARCHSTRING',term)
98       
99        logging.info("Executing xquery search")
100        logging.debug(" - query: '%s'" %xquery)
101
102        results = self.client.runQuery(xquery, offset=start - 1, noResults = howmany, 
103                                       useChunked = False)
104        logging.info("Search complete - processing results")
105
106        self.serverSessionID = ''
107       
108        self.results = []
109        if not results:
110            errorMessage = 'No results for [%s]'%term
111            logging.info(errorMessage)
112            self.hits=0
113            self.start=0
114            self.howmany=0
115            self.error=[errorMessage]
116            return self.results
117       
118        self.error = None
119        self.start = start
120        self.howmany = howmany
121       
122        docET = ET.fromstring(results[0])
123        summaries = docET.getchildren()
124        self.hits = len(summaries)
125        for summary in summaries:
126            t = edict(id=summary.findtext('id'),
127                      name=summary.findtext('name'),
128                      type=summary.findtext('type'))
129
130            # add extra stuff for atom search (NB, this could be added to all
131            # but am unsure of implications elsewhere atm
132            if target == self.ATOM_TARGET:
133               
134                # NB, the eXist 1.2.5->1.3 upgrade resulted in atom feed entries being
135                # created as separate files - so skip these in results sets (they
136                # won't have an href attribute)
137                if not summary.findtext('href'):
138                    continue
139                for key in ['providerID', 'created', 'href', 'subtype']:
140                    t[key] = summary.findtext(key)
141
142                    # adjust href to point to atom editor instead of atom view
143                    if key == 'href' and t[key]:
144                        t['editLink'] = t[key].replace('view', 'editAtom')
145                       
146                    if key == 'subtype':
147                        t[key] = t[key]
148               
149            self.results.append(t)
150        return self.results
151   
152
153    def __createAtomSearch(self, providerID, atomTypeID, term):
154        '''
155        Create a valid xquery search for searching atoms
156        '''
157        logging.debug("Creating xquery for searching atoms")
158        logging.debug("providerID: '%s', atomTypeID: '%s', term: '%s'" 
159                      %(providerID, atomTypeID, term))
160        xquery = "declare namespace moles='http://ndg.nerc.ac.uk/schema/moles2beta';\n" + \
161            "declare namespace atom='http://www.w3.org/2005/Atom';\n" + \
162            "<results>\n{\n" + \
163            "for $DE in collection('/db/atoms')/atom:entry["
164
165        # only add the required search clauses - NB, these increase the search time
166        # significantly, so avoid unless really required
167        whereClause = []
168        if term:
169            whereClause.append(". &= '" + term + "'")
170       
171        if providerID and providerID != self.DEFAULT_ALL_VAL:
172            whereClause.append("moles:entity/moles:molesISO/moles:providerID = '" + providerID + "'")
173       
174        if atomTypeID and atomTypeID != self.DEFAULT_ALL_VAL:
175            whereClause.append("atom:category/@label = '" + atomTypeID + "'")
176           
177        if not whereClause:
178            whereClause.append(".")
179       
180        for i, clause in enumerate(whereClause):
181            if i > 0:
182                xquery += " and "
183            xquery += clause
184           
185        xquery += "] return <summary> \
186<id>{$DE/atom:id/text()}</id> \
187<name>{$DE/atom:title/text()}</name> \
188<type>{string($DE/atom:category[@term='ATOM_TYPE']/@scheme)}</type> \
189<subtype>{string($DE/atom:category[@term='ATOM_SUBTYPE']/@scheme)}</subtype> \
190<href>{string($DE/atom:link[@rel='self']/@href)}</href> \
191<providerID>{$DE/moles:entity/moles:molesISO/moles:providerID/text()}</providerID> \
192<created>{$DE/moles:entity/moles:molesISO/moles:created/text()}</created> \
193</summary>\
194}\
195</results>"
196
197        logging.debug("Created xquery: '%s'" %xquery)
198        return xquery
Note: See TracBrowser for help on using the repository browser.