1 | ''' |
---|
2 | Basic search client for eXist XML DBs |
---|
3 | |
---|
4 | @author: C Byrom - Tessella, Feb 09 |
---|
5 | ''' |
---|
6 | from ndg.common.src.clients.xmldb.abstractsearchxmldbclient import AbstractSearchXMLDBClient |
---|
7 | from ndg.common.src.lib.utilities import loadConfigDetails |
---|
8 | from xml.etree import cElementTree as ET |
---|
9 | import dbconstants as dc |
---|
10 | import logging, os |
---|
11 | from xmlrpcclient import XMLRPCClient |
---|
12 | |
---|
13 | |
---|
14 | class SearchClient(AbstractSearchXMLDBClient): |
---|
15 | |
---|
16 | def __init__(self, dbHostName = 'chinook.badc.rl.ac.uk', |
---|
17 | configFileName ='passwords.txt', |
---|
18 | clientType = dc.XML_RPC_CLIENT): |
---|
19 | ''' |
---|
20 | Set up a search client for eXist DB |
---|
21 | @keyword db: eXist host to use - defaults to 'chinook.badc.rl.ac.uk' |
---|
22 | @keyword configFileName : password file to use - NB, this should |
---|
23 | have contents of format (NB, space delimiter): |
---|
24 | dbName userID password |
---|
25 | Default is 'passwords.txt'. |
---|
26 | @keyword clientType: Type of client to use for eXist connection. Currently |
---|
27 | only supported by 'xmlrpc' - which is the default. |
---|
28 | ''' |
---|
29 | logging.debug("Initialising SearchClient") |
---|
30 | self.setupDBConnection(dbHostName, configFileName, clientType) |
---|
31 | logging.debug("SearchClient initialised") |
---|
32 | |
---|
33 | |
---|
34 | def setupDBConnection(self, dbName, pwFilename, clientType): |
---|
35 | ''' |
---|
36 | Establish connection to eXist DB - using the specified DB name and |
---|
37 | password file |
---|
38 | @param dbName: hostname of the eXist DB |
---|
39 | @param pwFilename: name of file with password details - NB, this should |
---|
40 | have contents of format (NB, space delimiter): |
---|
41 | dbName userID password |
---|
42 | @param clientType: Type of client to use for eXist connection. Currently |
---|
43 | only supported by 'xmlrpc' - which is the default. |
---|
44 | @raise ValueError: if unrecognised eXist client type specified |
---|
45 | ''' |
---|
46 | logging.debug("Initialising connection to eXist DB ('%s')" %dbName) |
---|
47 | userID, password, host = loadConfigDetails(pwFilename, dbName) |
---|
48 | |
---|
49 | if clientType == dc.XML_RPC_CLIENT: |
---|
50 | self.client = XMLRPCClient(dbName, userID, password) |
---|
51 | else: |
---|
52 | raise ValueError("Unrecognised eXist client type: '%s'" %clientType) |
---|
53 | |
---|
54 | logging.debug("Connection set up") |
---|
55 | |
---|
56 | |
---|
57 | def getDocs(self, documents, format='original'): |
---|
58 | ''' |
---|
59 | Return one or more complete document |
---|
60 | @param documents: array of documents to retrieve |
---|
61 | @keyword format: format the retrieved document should be in. Default, 'original' |
---|
62 | returns documents in their original format. NB, not currently implemented. |
---|
63 | @raise ValueError if the requested document(s) cannot be found |
---|
64 | @return: dictionary with format, {docName: docContents} |
---|
65 | ''' |
---|
66 | logging.info("Getting docs, %s, from eXist DB..." %documents) |
---|
67 | results = {} |
---|
68 | for doc in documents: |
---|
69 | # avoid duplicate lookups |
---|
70 | if doc not in results.keys(): |
---|
71 | |
---|
72 | # NB, invalid doc paths won't return anything - so ignore these |
---|
73 | res = self.client.getDoc(doc) |
---|
74 | if res: |
---|
75 | results[doc] = res |
---|
76 | logging.info("Returning dict with docs in") |
---|
77 | return results |
---|
78 | |
---|
79 | |
---|
80 | def search(self, term, start=1, howmany=20, \ |
---|
81 | target=None,scope=None,bbox=None, \ |
---|
82 | dateRange=None,geoSearchType=None, \ |
---|
83 | providerID=None, atomTypeID=None, |
---|
84 | useChunked = True): |
---|
85 | ''' |
---|
86 | Provides a search interface that mimics the WSDL search interface, except |
---|
87 | that the target used is the exist collection name, and scope, bbox and |
---|
88 | dateRange are ignored, and a python summary object is returned |
---|
89 | ''' |
---|
90 | |
---|
91 | #select the right query according to the docType requested |
---|
92 | if target == self.ATOM_TARGET: |
---|
93 | xquery = self.__createAtomSearch(providerID, atomTypeID, term) |
---|
94 | else: |
---|
95 | xqName={'ndg_B_metadata':'molesSummary',\ |
---|
96 | 'NumSim':'numsimSummary'}[target] |
---|
97 | xquery= self.resources.xq[xqName] |
---|
98 | xquery=xquery.replace('SEARCHSTRING',term) |
---|
99 | |
---|
100 | logging.info("Executing xquery search") |
---|
101 | logging.debug(" - query: '%s'" %xquery) |
---|
102 | |
---|
103 | results = self.client.runQuery(xquery, offset=start - 1, noResults = howmany, |
---|
104 | useChunked = useChunked) |
---|
105 | logging.info("Search complete - processing results") |
---|
106 | |
---|
107 | self.serverSessionID = '' |
---|
108 | |
---|
109 | self.results = [] |
---|
110 | if not results: |
---|
111 | errorMessage = 'No results for [%s]'%term |
---|
112 | logging.info(errorMessage) |
---|
113 | self.hits=0 |
---|
114 | self.start=0 |
---|
115 | self.howmany=0 |
---|
116 | self.error=[errorMessage] |
---|
117 | return self.results |
---|
118 | |
---|
119 | self.hits = len(results) |
---|
120 | self.error = None |
---|
121 | self.start = start |
---|
122 | self.howmany = howmany |
---|
123 | |
---|
124 | for result in results: |
---|
125 | s = ET.fromstring(result) |
---|
126 | t = {'id': s.find('id').text, |
---|
127 | 'name': s.find('name').text, |
---|
128 | 'type': s.find('type').text} |
---|
129 | |
---|
130 | # add extra stuff for atom search (NB, this could be added to all |
---|
131 | # but am unsure of implications elsewhere atm |
---|
132 | if target == self.ATOM_TARGET: |
---|
133 | for key in ['providerID', 'created', 'href', 'subtype']: |
---|
134 | t[key] = s.find(key).text |
---|
135 | |
---|
136 | # adjust href to point to atom editor instead of atom view |
---|
137 | if key == 'href' and t[key]: |
---|
138 | t[key] = t[key].replace('view', 'editAtom') |
---|
139 | |
---|
140 | if key == 'subtype': |
---|
141 | t[key] = t[key] |
---|
142 | |
---|
143 | self.results.append(t) |
---|
144 | return self.results |
---|
145 | |
---|
146 | |
---|
147 | def __createAtomSearch(self, providerID, atomTypeID, term): |
---|
148 | ''' |
---|
149 | Create a valid xquery search for searching atoms |
---|
150 | ''' |
---|
151 | logging.debug("Creating xquery for searching atoms") |
---|
152 | logging.debug("providerID: '%s', atomTypeID: '%s', term: '%s'" |
---|
153 | %(providerID, atomTypeID, term)) |
---|
154 | xquery = "declare namespace moles='http://ndg.nerc.ac.uk/schema/moles2beta';\n" + \ |
---|
155 | "declare namespace atom='http://www.w3.org/2005/Atom';\n" + \ |
---|
156 | "for $DE in collection('/db/atoms')/atom:entry[" |
---|
157 | |
---|
158 | # only add the required search clauses - NB, these increase the search time |
---|
159 | # significantly, so avoid unless really required |
---|
160 | whereClause = [] |
---|
161 | if term: |
---|
162 | whereClause.append(". &= '" + term + "'") |
---|
163 | |
---|
164 | if providerID and providerID != self.DEFAULT_ALL_VAL: |
---|
165 | whereClause.append("moles:entity/moles:molesISO/moles:providerID = '" + providerID + "'") |
---|
166 | |
---|
167 | if atomTypeID and atomTypeID != self.DEFAULT_ALL_VAL: |
---|
168 | whereClause.append("atom:category/@label = '" + atomTypeID + "'") |
---|
169 | |
---|
170 | if not whereClause: |
---|
171 | whereClause.append(".") |
---|
172 | |
---|
173 | for i, clause in enumerate(whereClause): |
---|
174 | if i > 0: |
---|
175 | xquery += " and " |
---|
176 | xquery += clause |
---|
177 | |
---|
178 | xquery += "] return <summary> \ |
---|
179 | <id>{$DE/atom:id/text()}</id> \ |
---|
180 | <name>{$DE/atom:title/text()}</name> \ |
---|
181 | <type>{string($DE/atom:category[@term='ATOM_TYPE']/@scheme)}</type> \ |
---|
182 | <subtype>{string($DE/atom:category[@term='ATOM_SUBTYPE']/@scheme)}</subtype> \ |
---|
183 | <href>{string($DE/atom:link[@rel='self']/@href)}</href> \ |
---|
184 | <providerID>{$DE/moles:entity/moles:molesISO/moles:providerID/text()}</providerID> \ |
---|
185 | <created>{$DE/moles:entity/moles:molesISO/moles:created/text()}</created> \ |
---|
186 | </summary>" |
---|
187 | |
---|
188 | logging.debug("Created xquery: '%s'" %xquery) |
---|
189 | return xquery |
---|