source: ndgCommon/trunk/ndg/common/src/clients/xmldb/eXist/searchclient.py @ 4923

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/ndgCommon/trunk/ndg/common/src/clients/xmldb/eXist/searchclient.py@4923
Revision 4923, 7.6 KB checked in by cbyrom, 11 years ago (diff)

Create search client for eXist DB - implementing the generic search
interface.

Line 
1'''
2 Basic search client for eXist XML DBs
3 
4 @author: C Byrom - Tessella, Feb 09
5'''
6from ndg.common.src.clients.xmldb.abstractsearchxmldbclient import AbstractSearchXMLDBClient
7from ndg.common.src.lib.utilities import loadConfigDetails
8from xml.etree import cElementTree as ET
9import dbconstants as dc
10import logging, os
11from xmlrpcclient import XMLRPCClient
12
13
14class SearchClient(AbstractSearchXMLDBClient):
15   
16    def __init__(self, dbHostName = 'chinook.badc.rl.ac.uk', 
17                 configFileName ='passwords.txt', 
18                 clientType = dc.XML_RPC_CLIENT):
19        '''
20        Set up a search client for eXist DB
21        @keyword db: eXist host to use - defaults to 'chinook.badc.rl.ac.uk'
22        @keyword configFileName : password file to use - NB, this should
23        have contents of format (NB, space delimiter):
24        dbName userID password
25        Default is 'passwords.txt'.
26        @keyword clientType: Type of client to use for eXist connection.  Currently
27        only supported by 'xmlrpc' - which is the default.
28        '''
29        logging.debug("Initialising SearchClient")
30        self.setupDBConnection(dbHostName, configFileName, clientType)
31        logging.debug("SearchClient initialised")
32
33
34    def setupDBConnection(self, dbName, pwFilename, clientType):
35        '''
36        Establish connection to eXist DB - using the specified DB name and
37        password file
38        @param dbName: hostname of the eXist DB
39        @param pwFilename: name of file with password details - NB, this should
40        have contents of format (NB, space delimiter):
41        dbName userID password
42        @param clientType: Type of client to use for eXist connection.  Currently
43        only supported by 'xmlrpc' - which is the default.
44        @raise ValueError: if unrecognised eXist client type specified
45        '''
46        logging.debug("Initialising connection to eXist DB ('%s')" %dbName)
47        userID, password, host = loadConfigDetails(pwFilename, dbName)
48       
49        if clientType == dc.XML_RPC_CLIENT:
50            self.client = XMLRPCClient(dbName, userID, password)
51        else:
52            raise ValueError("Unrecognised eXist client type: '%s'" %clientType)
53       
54        logging.debug("Connection set up")
55
56
57    def getDocs(self, documents, format='original'):
58        '''
59        Return one or more complete document
60        @param documents: array of documents to retrieve
61        @keyword format: format the retrieved document should be in.  Default, 'original'
62        returns documents in their original format. NB, not currently implemented.
63        @raise ValueError if the requested document(s) cannot be found
64        @return: dictionary with format, {docName: docContents}
65        '''
66        logging.info("Getting docs, %s, from eXist DB..." %documents)
67        results = {}
68        for doc in documents:
69            # avoid duplicate lookups
70            if doc not in results.keys():
71               
72                # NB, invalid doc paths won't return anything - so ignore these
73                res = self.client.getDoc(doc)
74                if res:
75                    results[doc] = res
76        logging.info("Returning dict with docs in")
77        return results
78
79       
80    def search(self, term, start=1, howmany=20, \
81               target=None,scope=None,bbox=None, \
82               dateRange=None,geoSearchType=None, \
83               providerID=None, atomTypeID=None,
84               useChunked = True):
85        '''
86        Provides a search interface that mimics the WSDL search interface, except
87        that the target used is the exist collection name, and scope, bbox and
88        dateRange are ignored, and a python summary object is returned
89        '''
90
91        #select the right query according to the docType requested
92        if target == self.ATOM_TARGET:
93            xquery = self.__createAtomSearch(providerID, atomTypeID, term)
94        else:
95            xqName={'ndg_B_metadata':'molesSummary',\
96                'NumSim':'numsimSummary'}[target]
97            xquery= self.resources.xq[xqName]
98            xquery=xquery.replace('SEARCHSTRING',term)
99       
100        logging.info("Executing xquery search")
101        logging.debug(" - query: '%s'" %xquery)
102
103        results = self.client.runQuery(xquery, offset=start - 1, noResults = howmany, 
104                                       useChunked = useChunked)
105        logging.info("Search complete - processing results")
106
107        self.serverSessionID = ''
108       
109        self.results = []
110        if not results:
111            errorMessage = 'No results for [%s]'%term
112            logging.info(errorMessage)
113            self.hits=0
114            self.start=0
115            self.howmany=0
116            self.error=[errorMessage]
117            return self.results
118       
119        self.hits = len(results)
120        self.error = None
121        self.start = start
122        self.howmany = howmany
123       
124        for result in results:
125            s = ET.fromstring(result)
126            t = {'id': s.find('id').text,
127                 'name': s.find('name').text,
128                 'type': s.find('type').text}
129
130            # add extra stuff for atom search (NB, this could be added to all
131            # but am unsure of implications elsewhere atm
132            if target == self.ATOM_TARGET:
133                for key in ['providerID', 'created', 'href', 'subtype']:
134                    t[key] = s.find(key).text
135
136                    # adjust href to point to atom editor instead of atom view
137                    if key == 'href' and t[key]:
138                        t[key] = t[key].replace('view', 'editAtom')
139                       
140                    if key == 'subtype':
141                        t[key] = t[key]
142               
143            self.results.append(t)
144        return self.results
145   
146
147    def __createAtomSearch(self, providerID, atomTypeID, term):
148        '''
149        Create a valid xquery search for searching atoms
150        '''
151        logging.debug("Creating xquery for searching atoms")
152        logging.debug("providerID: '%s', atomTypeID: '%s', term: '%s'" 
153                      %(providerID, atomTypeID, term))
154        xquery = "declare namespace moles='http://ndg.nerc.ac.uk/schema/moles2beta';\n" + \
155            "declare namespace atom='http://www.w3.org/2005/Atom';\n" + \
156            "for $DE in collection('/db/atoms')/atom:entry["
157
158        # only add the required search clauses - NB, these increase the search time
159        # significantly, so avoid unless really required
160        whereClause = []
161        if term:
162            whereClause.append(". &= '" + term + "'")
163       
164        if providerID and providerID != self.DEFAULT_ALL_VAL:
165            whereClause.append("moles:entity/moles:molesISO/moles:providerID = '" + providerID + "'")
166       
167        if atomTypeID and atomTypeID != self.DEFAULT_ALL_VAL:
168            whereClause.append("atom:category/@label = '" + atomTypeID + "'")
169           
170        if not whereClause:
171            whereClause.append(".")
172       
173        for i, clause in enumerate(whereClause):
174            if i > 0:
175                xquery += " and "
176            xquery += clause
177           
178        xquery += "] return <summary> \
179<id>{$DE/atom:id/text()}</id> \
180<name>{$DE/atom:title/text()}</name> \
181<type>{string($DE/atom:category[@term='ATOM_TYPE']/@scheme)}</type> \
182<subtype>{string($DE/atom:category[@term='ATOM_SUBTYPE']/@scheme)}</subtype> \
183<href>{string($DE/atom:link[@rel='self']/@href)}</href> \
184<providerID>{$DE/moles:entity/moles:molesISO/moles:providerID/text()}</providerID> \
185<created>{$DE/moles:entity/moles:molesISO/moles:created/text()}</created> \
186</summary>"
187
188        logging.debug("Created xquery: '%s'" %xquery)
189        return xquery
Note: See TracBrowser for help on using the repository browser.