Changeset 4918 for ndgCommon


Ignore:
Timestamp:
10/02/09 16:27:27 (11 years ago)
Author:
cbyrom
Message:

Define interfaces for the following clients - to improve implementation
and extensibility:

  • search client
  • xml db crud client
  • xml db low level ops client (e.g. to be implemented with XML-RPC/REST interface)
  • atom client for xml db
Location:
ndgCommon/trunk/ndg/common/src/clients
Files:
3 added
1 deleted
1 edited

Legend:

Unmodified
Added
Removed
  • ndgCommon/trunk/ndg/common/src/clients/interfacesearchclient.py

    r4793 r4918  
    88 
    99class InterfaceSearchClient(object): 
     10 
     11    # target to use when specifying atom searches 
     12    ATOM_TARGET = 'Atom' 
    1013     
    11     def __init__(): 
     14    # value used when specifying '*' in searches 
     15    DEFAULT_ALL_VAL = 'All'  
     16     
     17    def search(self, term, start=1, howmany=20, target='All', 
     18               scope=None, bbox=None, dateRange=None, geoSearchType='overlaps'): 
     19        '''  
     20        Get a list of documents, with basic summary info for each doc, for a  
     21        specific term using a free text search 
     22        @param term: term to search on 
     23        @keyword start:  first record to return 
     24        @keyword howmany: number of records to return 
     25        @keyword target: target of search 
     26        @keyword scope: scope of search 
     27        @keyword bbox: bounding box for search data - NB, this should be an array with 
     28        content, [N, W, E, S] 
     29        @keyword dateRange: range of dates to use in search - an array with content,  
     30        [startDate, endDate] 
     31        @keyword geoSearchType: type of geospatial search to use - NB, currently only 
     32        'overlaps' accepted  
     33        @return: array of search results; if no results are found this array should be empty 
    1234        ''' 
    13         ''' 
    14         logging.info("Initialising connection to eXist DB") 
    15         logging.info("eXist DB connection initialised") 
    16  
    17     def search(self,term,start=1,howmany=20,target='All',scope=None,bbox=None,dateRange=None,geoSearchType='overlaps'): 
    18         ''' Get a list of documents for a specific term using a free text search''' 
    19         logging.info("Running search - with term, '%s'" %term) 
    20         logging.info("Search complete") 
    21         return self.documents 
     35        raise NotImplementedError("This is an interface class and this method has not " + \ 
     36                                  "been implemented yet") 
    2237 
    2338 
    24     def getDoc(self,document,format='original'): 
     39    def getDocs(self, documents, format='original'): 
    2540        '''  
    26         Return a single document from the backend database  
     41        Return one or more complete document 
     42        @param documents: array of documents to retrieve 
     43        @keyword format: format the retrieved document should be in.  Default, 'original' 
     44        returns documents in their original format   
     45        @raise ValueError if the requested document(s) cannot be found 
     46        @return: dictionary with format, {docName: docContents} 
    2747        ''' 
    28         logging.info("Retrieving document, '%s' in %s format" %(document, format)) 
     48        raise NotImplementedError("This is an interface class and this method has not " + \ 
     49                                  "been implemented yet") 
    2950 
    30         request = self.__setUpRequestObject([document], format) 
    31  
    32         docs = self.__runDocumentPresent(request) 
    33         logging.info("Document retrieved successfully") 
    34         return docs[0] 
    35  
    36                     
    37     def getAllDocs(self,format='original'):             
    38         '''  
    39         Parse the list of documents, and retrieve them directly  
    40         ''' 
    41         logging.info("Retrieving all documents specified") 
    42         logging.debug("- %s" %self.documents) 
    43         if self.documents is None: 
    44             logging.info("No documents specified to retrieve - returning") 
    45             return [] 
    46      
    47         request = self.__setUpRequestObject(self.documents, format) 
    48         docs = self.__runDocumentPresent(request) 
    49         logging.info("Documents retrieved successfully") 
    50         return docs 
    51  
    52              
    53     def getDocElement(self,document): 
    54         ''' Takes a document path (maybe from a previous call to ndgSearch) and extracts that document 
    55         into an ElementTree instance ''' 
    56         #we stick it straight into element tree because we need to use et to get the actual document 
    57         #we want, not the envelope xml elements  
    58          
    59         doc=self.getDoc(document) 
    60         path=document.rstrip('.xml') 
    61         try: 
    62             r=loadET(doc) 
    63             #return r.find(path) 
    64             return r 
    65         except: 
    66             return self.__xmlerror((path,doc)) 
    67  
    68              
    69     def getAllDocsAsElements(self): 
    70         '''  
    71         Get all the documents and load them into a list of ET instances  
    72         ''' 
    73         result=[] 
    74         docs=self.getAllDocs() 
    75         for doc in docs:  
    76             try: 
    77                 r=loadET(doc) 
    78                 result.append(r)#result.append(r.find(path)) 
    79             except: 
    80                 result.append(self.__xmlerror(doc)) 
    81         return result 
    82              
    83              
    84     def getLabelledDocs(self,format='original'): 
    85         '''  
    86         Returns all the documents in sequence in a labelled list of strings 
    87         ''' 
    88         logging.info("Retrieving all labelled documents") 
    89         if self.hits==0: return [] 
    90         #filenames=self.documents.Document 
    91         #so we know that the following call is the problem ... 
    92         responses=self.getAllDocs(format) 
    93          
    94         i=len(self.documents) 
    95         j=len(responses) 
    96         if i!=j: 
    97             logging.debug("Requested docs: %s\nActual results: %s" \ 
    98                           %(self.documents, responses)) 
    99             raise NDGSearchError('Internal inconsistency in search \ 
    100                 return [hits:%s!=responses:%s]'%(i,j)) 
    101         indices=range(i) 
    102         results=[] 
    103         for i in indices: 
    104             ff=os.path.splitext(self.documents[i]) 
    105             results.append((ff[0],responses[i])) 
    106         logging.info("Documents retrieved") 
    107         return results 
    108          
    109     def get(self,repository,schema,localID,format='DIF',**kw): 
    110         ''' Obtain a document via it's NDG id split up ''' 
    111         #nb argument targetCollection is here to provide same API as exist xmlrpc interface 
    112         uri='%s__%s__%s'%(repository,schema,localID) 
    113         fileName=uri+'.xml' 
    114         return self.getDoc(fileName,format) 
    115          
    116          
    117  
    118  
    119  
    120     def getAllAtomIDs(self): 
    121         ''' 
    122         Retrieve all the atom IDs in the atoms directory - NB, this can 
    123         be a quick way of producing a cache of data to check - e.g. to avoid 
    124         multiple calls to getAtomFileCollectionPath 
    125         @return: ids - array of all atom IDs 
    126         ''' 
    127         logging.info("Retrieving all atom ids") 
    128         xq = self.xmldb.xq.actual('atomList', '/db/atoms', '', '') 
    129         id, doc = self.xmldb.executeQuery(xq) 
    130         if doc['hits'] == 0:  
    131             return [] 
    132          
    133         indices = range(doc['hits']) 
    134          
    135         doc = self.xmldb.retrieve(id, 0) 
    136         et = ET.fromstring(doc) 
    137         ids = [] 
    138         for member in et: 
    139             fn = member.findtext('{http://www.w3.org/2005/Atom}repositoryID') 
    140             ids.append(fn) 
    141         logging.debug("Found ids, '%s'" %ids) 
    142         return ids 
    143  
    144  
    145     def getAllAtomCollections(self): 
    146         ''' 
    147         Get all atom collection paths and store in a dictionary - for easy 
    148         reference when doing lots of things at once 
    149         @return: dict with key/val of atomID/collectionPath 
    150         ''' 
    151         logging.info("Retrieving all atom collection paths") 
    152         xq = self.xmldb.xq.actual('atomList', '/db/atoms', '', '') 
    153         id, doc = self.xmldb.executeQuery(xq) 
    154         if doc['hits'] == 0:  
    155             return [] 
    156          
    157         indices = range(doc['hits']) 
    158          
    159         doc = self.xmldb.retrieve(id, 0) 
    160         et = ET.fromstring(doc) 
    161         colData = {} 
    162         for member in et: 
    163             collection = member.findtext('{http://www.w3.org/2005/Atom}fileName') 
    164             fileName = collection.split('/')[-1] 
    165             fileName = fileName.split('.')[0] 
    166             dir = '/'.join(collection.split('/')[0:-1]) 
    167             colData[fileName] = dir 
    168  
    169         logging.debug("Finished looking up atom paths") 
    170         return colData 
    171  
    172  
    173     def getAtomPublicationState(self, atomID): 
    174         ''' 
    175         Retrieve the publication state of the specified atom - by  
    176         checking the collection it is in 
    177         @param atom: atom id to look up 
    178         @return: AtomState for the atom.  NB, if the ID is not found, assume 
    179         we're dealing with a new atom and set the state as the working state 
    180         ''' 
    181         logging.debug("Finding atom publication state") 
    182         path = self.getAtomFileCollectionPath(atomID) 
    183         for state in AtomState.allStates.values(): 
    184             if path.find('/%s' %state.collectionPath) > -1: 
    185                 logging.debug("- state found: '%s'" %state.title) 
    186                 return state 
    187          
    188         logging.debug("- state not found - returning WORKING state") 
    189         return AtomState.WORKING_STATE 
    190  
    191  
    192     def getAtomFileCollectionPath(self, atomID): 
    193         ''' 
    194         Given an atom id, determine and return the collection path in eXist 
    195         of the associated atom file 
    196         @param atom: atom id to look up 
    197         @return: collection path, if it exists, None, otherwise  
    198         ''' 
    199         logging.info("Looking up collection path for atom ID, '%s'" %atomID) 
    200         xq = self.xmldb.xq['atomFullPath'] 
    201         xq = xq.replace('TargetCollection', ec.BASE_COLLECTION_PATH) 
    202         xq = xq.replace('LocalID', atomID) 
    203  
    204         id, doc = self.xmldb.executeQuery(xq) 
    205         if doc['hits'] == 0: 
    206             logging.info("No document found with the specified ID") 
    207             return None 
    208  
    209         doc = self.xmldb.retrieve(id,0,{}) 
    210  
    211         docET = ET.fromstring(doc) 
    212         collPath = docET.text + '/' 
    213         logging.debug("Found collection path, '%s'" %collPath) 
    214         return collPath 
    215  
Note: See TracChangeset for help on using the changeset viewer.