source: ndgCommon/trunk/ndg/common/src/clients/interfacesearchclient.py @ 4793

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/ndgCommon/trunk/ndg/common/src/clients/interfacesearchclient.py@4832
Revision 4793, 7.5 KB checked in by cbyrom, 11 years ago (diff)

Checking in initial codebase for ndgUtils restructure.

Line 
1'''
2 Basic interface for clients to implement for the searching
3 of metadata documents
4 
5 @author: C Byrom - Tessella, Jan 09
6'''
7import os, sys, logging
8
9class InterfaceSearchClient(object):
10   
11    def __init__():
12        '''
13        '''
14        logging.info("Initialising connection to eXist DB")
15        logging.info("eXist DB connection initialised")
16
17    def search(self,term,start=1,howmany=20,target='All',scope=None,bbox=None,dateRange=None,geoSearchType='overlaps'):
18        ''' Get a list of documents for a specific term using a free text search'''
19        logging.info("Running search - with term, '%s'" %term)
20        logging.info("Search complete")
21        return self.documents
22
23
24    def getDoc(self,document,format='original'):
25        '''
26        Return a single document from the backend database
27        '''
28        logging.info("Retrieving document, '%s' in %s format" %(document, format))
29
30        request = self.__setUpRequestObject([document], format)
31
32        docs = self.__runDocumentPresent(request)
33        logging.info("Document retrieved successfully")
34        return docs[0]
35
36                   
37    def getAllDocs(self,format='original'):           
38        '''
39        Parse the list of documents, and retrieve them directly
40        '''
41        logging.info("Retrieving all documents specified")
42        logging.debug("- %s" %self.documents)
43        if self.documents is None:
44            logging.info("No documents specified to retrieve - returning")
45            return []
46   
47        request = self.__setUpRequestObject(self.documents, format)
48        docs = self.__runDocumentPresent(request)
49        logging.info("Documents retrieved successfully")
50        return docs
51
52           
53    def getDocElement(self,document):
54        ''' Takes a document path (maybe from a previous call to ndgSearch) and extracts that document
55        into an ElementTree instance '''
56        #we stick it straight into element tree because we need to use et to get the actual document
57        #we want, not the envelope xml elements
58       
59        doc=self.getDoc(document)
60        path=document.rstrip('.xml')
61        try:
62            r=loadET(doc)
63            #return r.find(path)
64            return r
65        except:
66            return self.__xmlerror((path,doc))
67
68           
69    def getAllDocsAsElements(self):
70        '''
71        Get all the documents and load them into a list of ET instances
72        '''
73        result=[]
74        docs=self.getAllDocs()
75        for doc in docs: 
76            try:
77                r=loadET(doc)
78                result.append(r)#result.append(r.find(path))
79            except:
80                result.append(self.__xmlerror(doc))
81        return result
82           
83           
84    def getLabelledDocs(self,format='original'):
85        '''
86        Returns all the documents in sequence in a labelled list of strings
87        '''
88        logging.info("Retrieving all labelled documents")
89        if self.hits==0: return []
90        #filenames=self.documents.Document
91        #so we know that the following call is the problem ...
92        responses=self.getAllDocs(format)
93       
94        i=len(self.documents)
95        j=len(responses)
96        if i!=j:
97            logging.debug("Requested docs: %s\nActual results: %s" \
98                          %(self.documents, responses))
99            raise NDGSearchError('Internal inconsistency in search \
100                return [hits:%s!=responses:%s]'%(i,j))
101        indices=range(i)
102        results=[]
103        for i in indices:
104            ff=os.path.splitext(self.documents[i])
105            results.append((ff[0],responses[i]))
106        logging.info("Documents retrieved")
107        return results
108       
109    def get(self,repository,schema,localID,format='DIF',**kw):
110        ''' Obtain a document via it's NDG id split up '''
111        #nb argument targetCollection is here to provide same API as exist xmlrpc interface
112        uri='%s__%s__%s'%(repository,schema,localID)
113        fileName=uri+'.xml'
114        return self.getDoc(fileName,format)
115       
116       
117
118
119
120    def getAllAtomIDs(self):
121        '''
122        Retrieve all the atom IDs in the atoms directory - NB, this can
123        be a quick way of producing a cache of data to check - e.g. to avoid
124        multiple calls to getAtomFileCollectionPath
125        @return: ids - array of all atom IDs
126        '''
127        logging.info("Retrieving all atom ids")
128        xq = self.xmldb.xq.actual('atomList', '/db/atoms', '', '')
129        id, doc = self.xmldb.executeQuery(xq)
130        if doc['hits'] == 0: 
131            return []
132       
133        indices = range(doc['hits'])
134       
135        doc = self.xmldb.retrieve(id, 0)
136        et = ET.fromstring(doc)
137        ids = []
138        for member in et:
139            fn = member.findtext('{http://www.w3.org/2005/Atom}repositoryID')
140            ids.append(fn)
141        logging.debug("Found ids, '%s'" %ids)
142        return ids
143
144
145    def getAllAtomCollections(self):
146        '''
147        Get all atom collection paths and store in a dictionary - for easy
148        reference when doing lots of things at once
149        @return: dict with key/val of atomID/collectionPath
150        '''
151        logging.info("Retrieving all atom collection paths")
152        xq = self.xmldb.xq.actual('atomList', '/db/atoms', '', '')
153        id, doc = self.xmldb.executeQuery(xq)
154        if doc['hits'] == 0: 
155            return []
156       
157        indices = range(doc['hits'])
158       
159        doc = self.xmldb.retrieve(id, 0)
160        et = ET.fromstring(doc)
161        colData = {}
162        for member in et:
163            collection = member.findtext('{http://www.w3.org/2005/Atom}fileName')
164            fileName = collection.split('/')[-1]
165            fileName = fileName.split('.')[0]
166            dir = '/'.join(collection.split('/')[0:-1])
167            colData[fileName] = dir
168
169        logging.debug("Finished looking up atom paths")
170        return colData
171
172
173    def getAtomPublicationState(self, atomID):
174        '''
175        Retrieve the publication state of the specified atom - by
176        checking the collection it is in
177        @param atom: atom id to look up
178        @return: AtomState for the atom.  NB, if the ID is not found, assume
179        we're dealing with a new atom and set the state as the working state
180        '''
181        logging.debug("Finding atom publication state")
182        path = self.getAtomFileCollectionPath(atomID)
183        for state in AtomState.allStates.values():
184            if path.find('/%s' %state.collectionPath) > -1:
185                logging.debug("- state found: '%s'" %state.title)
186                return state
187       
188        logging.debug("- state not found - returning WORKING state")
189        return AtomState.WORKING_STATE
190
191
192    def getAtomFileCollectionPath(self, atomID):
193        '''
194        Given an atom id, determine and return the collection path in eXist
195        of the associated atom file
196        @param atom: atom id to look up
197        @return: collection path, if it exists, None, otherwise
198        '''
199        logging.info("Looking up collection path for atom ID, '%s'" %atomID)
200        xq = self.xmldb.xq['atomFullPath']
201        xq = xq.replace('TargetCollection', ec.BASE_COLLECTION_PATH)
202        xq = xq.replace('LocalID', atomID)
203
204        id, doc = self.xmldb.executeQuery(xq)
205        if doc['hits'] == 0:
206            logging.info("No document found with the specified ID")
207            return None
208
209        doc = self.xmldb.retrieve(id,0,{})
210
211        docET = ET.fromstring(doc)
212        collPath = docET.text + '/'
213        logging.debug("Found collection path, '%s'" %collPath)
214        return collPath
215
Note: See TracBrowser for help on using the repository browser.