Ignore:
Timestamp:
18/02/09 10:46:59 (11 years ago)
Author:
cbyrom
Message:

Refactor ndgRetrieve making much more structured + turning into an
object to allow simple re-use + improve documentation and logging +
update unit tests.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • ndgCommon/trunk/ndg/common/src/dal/ndgRetrieve.py

    r4932 r4988  
    11''' 
    2  Given an uri (an instance of ndgObject), retrieve it, with security in place. 
     2 Class allowing the retrieval of ndg documents via their ndgObject, with  
     3 security in place. 
    34  
    4  @author: B Lawrence? 
     5 @author: B Lawrence?, C Byrom, Tessella Feb 09 
    56''' 
    67import cgi,time, logging 
    7 import DocumentRetrieve 
    88from ndg.common.src.clients.xmldb.eXist.searchclient import SearchClient 
     9from ndg.common.src.clients.http.httpsearchclient import HTTPSearchClient 
    910import ndg.common.src.clients.xmldb.eXist.dbconstants as dc 
    1011from ndg.common.src.clients.ws.discoveryserviceclient import DiscoveryServiceClient 
     
    1314 
    1415class NDGRetrieveError(Exception): 
    15     """Exception handling for NDG Retrieve method.""" 
     16    """ 
     17    Exception handling for NDG Retrieve method. 
     18    """ 
    1619    def __init__(self, msg): 
    1720        logging.error(msg) 
     
    1922 
    2023 
    21 def ndgRetrieve(uri, config, output='',remote=0, discovery=1): 
     24class NDGRetrieve(object): 
     25 
     26    def __init__(self, config, useRemoteEXist = False, useDiscovery = True): 
     27        ''' 
     28        Constructor for NDGRetrieve object 
     29        @param config: config object to use with object 
     30        @keyword useRemoteEXist: if True, set default behaviour to use a remote eXist 
     31        DB when retrieving data - via a (secured) restful http call.  Default = False 
     32        @keyword useDiscovery: if True, AND the schema is one of [DIF,MDIP,or ISO] 
     33        use the discovery ws to retrieve ndg docs 
     34        NB, cannot have both the useRemoteEXist and useDiscovery = True 
     35        - also, note, these are just the default values; they can be overridden on  
     36        calls to retrieveDoc()  
     37        ''' 
     38        logging.info("Instantiating NDGRetrieve") 
     39        self.config = config 
     40        self.__getEXistConfigData() 
     41        self.useRemoteEXist = useRemoteEXist 
     42        self.useDiscovery = useDiscovery 
     43        logging.info("NDGRetrieve instantiated") 
     44 
    2245     
    23     ''' Given an uri (an instance of ndgObject), retrieve it, with security in place. 
    24             If output is not '' it should be desired output schema! 
    25             If remote is non zero, then attempt to get the document 
    26         from a remote exist site via a (secured) restful http call 
    27             If discovery is non-zero AND the schema is in  
    28                 [DIF,MDIP,or ISO] 
    29         then use the discoveryserviceclient class to obtain the document rather 
    30         than direct from an eXist database.''' 
    31      
    32     def getws(config,uri,remote): 
    33         ''' Get a ws connection to the local exist database ''' 
    34         logging.debug("Getting WS connection to eXist DB") 
    35         # The logic here is that 
    36         #    this code is running on server, which supports repositories sr 
    37         #    this particular object needs the repository for uri.repository 
    38         #    which had better be the same as sr, and then we use the 
    39         #    actual exist repository er. 
    40         #    I'm sure this can be cleaned up further :-) 
    41         if remote and discovery: 
    42             raise ValueError, 'Invalid options to ndgRetrieve (remote AND discovery impossible)' 
    43         if discovery: 
    44             logging.debug("Using Discovery service") 
    45             if uri.schema in no.DISCOVERY_SCHEMES: 
    46                 ws = DiscoveryServiceClient() 
    47             else: 
    48                 raise NDGRetrieveError('Invalid schema (%s) with discovery \ 
    49                     option in ndgRetrieve' %uri.schema) 
     46    def __getClient(self, ndgObject, useRemoteEXist, useDiscovery): 
     47        '''  
     48        Set up the client to the retrieve the ndg doc - NB, this can be 
     49        via a webservice client or an eXist client - which implement the 
     50        interfacesearchclient  
     51        @param ndgObject: ndgObject representing doc to retrieve 
     52        @param useRemoteEXist: if True, set default behaviour to use a remote eXist 
     53        DB when retrieving data - via a (secured) restful http call. 
     54        @param useDiscovery: if True, AND the schema is one of [DIF,MDIP,or ISO] 
     55        use the discovery ws to retrieve ndg docs 
     56        ''' 
     57        logging.debug("Getting approprate connection to retrieve document") 
     58        if useRemoteEXist and useDiscovery: 
     59            raise ValueError('Invalid options to NDGRetrieve (useRemoteEXist AND useDiscovery impossible)') 
     60         
     61        if useDiscovery: 
     62            return self.__getDiscoveryClient(ndgObject) 
     63 
     64        return self.__getEXistClient(ndgObject, useRemoteEXist) 
     65 
     66 
     67    def __getDiscoveryClient(self, ndgObject): 
     68        ''' 
     69        Return discovery client, if appropriate, for retrieving ndgObject 
     70        @param ndgObject: ndgObject associated with doc to retrieve 
     71        @raise NDGRetrieveError if object not of appropriate schema to use 
     72        the discovery service for 
     73        @return DiscoveryServiceClient 
     74        ''' 
     75        logging.debug("Getting Discovery service client") 
     76        if ndgObject.schema in no.DISCOVERY_SCHEMES: 
     77            client =  DiscoveryServiceClient() 
     78            logging.debug("- returning discovery service client") 
     79            return client 
    5080        else: 
    51             logging.debug("Using Browse service") 
     81            raise NDGRetrieveError('Invalid schema (%s) with useDiscovery \ 
     82                option in NDGRetrieve - should be one of %s'  
     83                %(ndgObject.schema, no.DISCOVERY_SCHEMES)) 
     84 
     85 
     86    def __getEXistClient(self, ndgObject, useRemoteEXist): 
     87        ''' 
     88        Return EXist client, if appropriate, for retrieving ndgObject 
     89        @param ndgObject: ndgObject associated with doc to retrieve 
     90        @param useRemoteEXist: if True, set default behaviour to use a remote eXist 
     91        DB when retrieving data - via a (secured) restful http call. 
     92        @raise NDGRetrieveError if configuration not set correctly 
     93        @return client with getNDGDoc method available to retrieve ndgObject 
     94        from an eXist DB 
     95        ''' 
     96        logging.debug("Getting Browse service client") 
     97 
     98        # lookup the repository associated with the ndgObject data - and compare 
     99        browseRepository = None 
     100        try: 
     101            browseRepository = self.config.get('NDG_B_SERVICE', ndgObject.repository) 
     102        except: 
     103            raise NDGRetrieveError("Config file not available or incomplete " + \ 
     104                "- cannot find data for NDG_B_SERVICE, %s" %ndgObject.repository) 
     105         
     106        logging.debug("NDG_B_SERVICE: '%s', default repository: '%s'"  
     107                      %(browseRepository, self.localRepository)) 
     108 
     109        # firstly check we are using a locally available repository 
     110        if browseRepository == self.localRepository: 
    52111            try: 
    53                 r=config.get('NDG_B_SERVICE',uri.repository) 
    54                 sr=config.get('DEFAULT','repository') 
    55                 logging.debug("NDG_B_SERVICE: '%s', default repository: '%s'" %(r,sr)) 
     112                logging.debug("Data available locally - in eXist repository, '%s'"  
     113                              %ndgObject.repository) 
     114                eXistDB = self.config.get('NDG_EXIST', ndgObject.repository) 
     115 
     116                logging.debug("Retrieving data from eXist DB, '%s'" %eXistDB) 
     117                client = SearchClient(dbHostName = eXistDB, configFileName = self.pwf) 
     118                logging.debug("Returning client to local eXist DB") 
     119                return client 
    56120            except: 
    57                 raise NDGRetrieveError("Config file not available or incomplete " + \ 
    58                     "- cannot find data for NDG_B_SERVICE, %s" %uri.repository) 
     121                raise NDGRetrieveError('Config file not available or has \ 
     122                    no [NDG_EXIST] setting') 
     123 
     124        # if not local, we need to be able to use remote lookups 
     125        if not useRemoteEXist: 
     126            raise NDGRetrieveError('The uri [%s] is not available on local repository [%s]' 
     127                                   %(ndgObject, self.localRepository)) 
     128 
     129        logging.debug("Using remote eXist DB to retrieve data") 
     130        if not self.proxyServer: 
     131            raise NDGRetrieveError('No proxyServer settings available in config; these ' + \ 
     132                                   'are required for looking up remote data') 
     133        client =  HTTPSearchClient(browseRepository, proxyServer = self.proxyServer) 
     134        logging.debug("Returning client to remote eXist DB") 
     135        return client 
     136         
     137 
     138    def __getEXistConfigData(self): 
     139        ''' 
     140        Lookup standard eXist configuration data required to retrieve the docs 
     141        ''' 
     142        self.localRepository = self.config.get('DEFAULT', 'repository') 
     143        self.pwf= self.config.get('NDG_EXIST','passwordFile') 
     144        self.proxyServer = self.config.get('DEFAULT','proxyServer') 
     145         
     146 
     147 
     148    def retrieveDoc(self, ndgObject, outputFormat = None, 
     149                    useRemoteEXist = None, useDiscovery = None): 
     150        ''' 
     151        Retrieve the doc represented by the specified ndgURI in the specified format 
     152        - using security when set up.  
     153        @param ndgObject: ndgObject representing doc to retrieve 
     154        @keyword outputFormat: string specifying output format to return doc in. 
     155        Default = None => doc is returned in original format  
     156        @keyword useRemoteEXist: if True, set default behaviour to use a remote eXist 
     157        DB when retrieving data - via a (secured) restful http call.  Default = False 
     158        @keyword useDiscovery: if True, AND the schema is one of [DIF,MDIP,or ISO] 
     159        use the discovery ws to retrieve ndg docs 
     160        NB, cannot have both the useRemoteEXist and useDiscovery = True 
     161        - also, note, these are just the default values; they can be overridden on  
     162        calls to retrieveDoc()  
     163        @return: status, doc: status = 0 if fails, or 1 if succeeds. doc = retrieved 
     164        doc in string format 
     165        ''' 
     166        logging.info("Retrieving NDG doc with URI, %s" %ndgObject) 
     167        # allow users to adjust retrieval settings on a per retrieval basis 
     168        remote = self.useRemoteEXist 
     169        if useRemoteEXist is not None: 
     170            remote = useRemoteEXist 
    59171             
    60             if r <> sr: 
    61                 if not remote: 
    62                     return 0,'The uri [%s] is not available on [%s]'%(uri,sr) 
    63                 else: 
    64                     try: 
    65                         ps=config.get('DEFAULT','proxyServer') 
    66                     except: 
    67                         raise NDGRetrieveError('Config file not available \ 
    68                             or has no [proxyServer] setting') 
    69                 logging.debug("Retrieving data from remote proxy server, '%s'" %ps) 
    70                 ws = DocumentRetrieve.ndgHTTP(r,proxyServer=ps) 
    71             else: 
    72                 try: 
    73                     logging.info("Lookup up repository:'%s'" %uri.repository) 
    74                     er=config.get('NDG_EXIST',uri.repository) 
    75                     pwf=config.get('NDG_EXIST','passwordFile') 
    76                 except: 
    77                     raise NDGRetrieveError('Config file not available or has \ 
    78                         no [NDG_EXIST] setting') 
    79  
    80                 logging.debug("Retrieving data from repository, '%s'" %er) 
    81                 ws = SearchClient(dbHostName = er, 
    82                                   configFileName = pwf) 
    83         return 1,ws 
    84  
    85      
    86     status,ws=getws(config,uri,remote) 
    87    
    88     if not status: return status,ws 
    89             
    90     if uri.schema==no.NDGA0_DOC_TYPE: 
    91         target = dc.NDG_A_COLLECTION_PATH 
    92     elif uri.schema == no.NUMSIM_DOC_TYPE: 
    93         target = dc.NUMSIM_COLLECTION_PATH 
    94     elif uri.schema == no.ATOM_DOC_TYPE: 
    95         target = dc.ATOM_COLLECTION_PATH 
    96     elif uri.schema == no.ATOM_BACKUP_DOC_TYPE: 
    97         target = dc.BACKUP_COLLECTION_PATH 
    98     elif uri.schema == no.BROWSE_DIF_DOC_TYPE: 
    99         target = dc.DIF_COLLECTION_PATH  
    100     else: 
    101         target = dc.MOLES_COLLECTION_PATH 
    102     
    103     # strip trainling slashes, if required 
    104     if target.endswith('/'): 
    105         target = target[0:-1] 
    106          
    107     #do the actual retrieve: 
    108     e=None 
    109     try: 
    110         time1=time.time() 
    111          
    112         r=ws.getNDGDoc(uri.repository,uri.schema,uri.localID,targetCollection=target) 
    113         time2=time.time()-time1 
    114         logging.info('Document retrieve [%s] took [%s]'%(uri,time2)) 
    115     except Exception,e: 
    116         e=str(e) 
    117         r=e 
    118         logging.error('Document retrieve [%s] failed [%s]'%(uri,e)) 
    119      
    120  
    121     # did we get a sensible answer? 
    122     if isinstance(r,int): 
    123         return 0,'<p> There are %s identifiers matching your request! </p>'%r 
    124     if e is not None: 
    125         return 0,'Document retrieve [%s] failed [%s]'%(uri,cgi.escape(e)) 
    126      
    127     # we used to return an xmlHandler instance, but we don't do that any more ... 
    128     return 1,r 
    129      
     172        discovery = self.useDiscovery 
     173        if useDiscovery is not None: 
     174            discovery = useDiscovery 
     175             
     176        client = self.__getClient(ndgObject, remote, discovery) 
     177       
     178        if ndgObject.schema==no.NDGA0_DOC_TYPE: 
     179            target = dc.NDG_A_COLLECTION_PATH 
     180        elif ndgObject.schema == no.NUMSIM_DOC_TYPE: 
     181            target = dc.NUMSIM_COLLECTION_PATH 
     182        elif ndgObject.schema == no.ATOM_DOC_TYPE: 
     183            target = dc.ATOM_COLLECTION_PATH 
     184        elif ndgObject.schema == no.ATOM_BACKUP_DOC_TYPE: 
     185            target = dc.BACKUP_COLLECTION_PATH 
     186        elif ndgObject.schema == no.BROWSE_DIF_DOC_TYPE: 
     187            target = dc.DIF_COLLECTION_PATH  
     188        else: 
     189            target = dc.MOLES_COLLECTION_PATH 
     190        
     191        # strip trainling slashes, if required 
     192        if target.endswith('/'): 
     193            target = target[0:-1] 
     194             
     195        #do the actual retrieve: 
     196        error = None 
     197        try: 
     198            time1 = time.time() 
     199            doc = client.getNDGDoc(ndgObject.repository,ndgObject.schema, 
     200                                   ndgObject.localID, targetCollection=target) 
     201            time2 = time.time()-time1 
     202            logging.info('Document retrieve [%s] took [%s]'%(ndgObject, time2)) 
     203        except Exception,e: 
     204            error = str(e) 
     205            doc = error 
     206            logging.error('Document retrieve [%s] failed [%s]'%(ndgObject, error)) 
     207         
     208        if error is not None: 
     209            return 0,'Document retrieve [%s] failed [%s]'%(ndgObject, cgi.escape(error)) 
     210         
     211        return 1, doc 
     212         
Note: See TracChangeset for help on using the changeset viewer.