Changeset 4229


Ignore:
Timestamp:
24/09/08 15:26:06 (11 years ago)
Author:
cbyrom
Message:

Add support for doing lists and summaries of atom docs via ndgDirectory and the existInterface class + add necessary xquery files for this + add new method to
allow retrieval of the collection to which an atom with a specified ID belongs - including the associated xquery file + extend tests to exercise these new features
+ add custom Atom error + improve preservation of key atom attributes when loading existing atoms into an Atom object + improve determining default atom collection

Location:
exist/trunk/python/ndgUtils
Files:
8 edited

Legend:

Unmodified
Added
Removed
  • exist/trunk/python/ndgUtils/eXistConnector.py

    r4209 r4229  
    123123     
    124124    def getDoc(self,collectionName,documentName): 
    125         ''' Lightweight interface to the getDocument method ''' 
    126         name='%s/%s'%(collectionName,documentName) 
    127         r=self.xmlrpc.getDocumentAsString(name,{}) 
    128         return r 
     125        '''  
     126    Lightweight interface to the getDocument method  
     127    ''' 
     128    # atoms have a more structured collection directory - as a result 
     129    # AtomList.xq returns the full path to the listed docs - so avoid 
     130    # concat'ing the collectionName for these 
     131        if documentName.startswith(collectionName): 
     132            name = documentName 
     133        else: 
     134            name='%s/%s'%(collectionName,documentName) 
     135        r=self.xmlrpc.getDocumentAsString(name,{}) 
     136        return r 
    129137     
    130138    def removeCollection(self,collectionPath): 
  • exist/trunk/python/ndgUtils/eXistInterface.py

    r4160 r4229  
    9090        return r 
    9191         
    92     def search(self,term,start=1,howmany=20,target=None,scope=None,bbox=None,dateRange=None,geoSearchType=None): 
     92    def search(self,term,start=1,howmany=20, \ 
     93               target=None,scope=None,bbox=None, \ 
     94               dateRange=None,geoSearchType=None): 
    9395        ''' Provides a search interface that mimics the WSDL search interface, except that 
    9496        the target used is the exist collection name, and scope, bbox and dateRange are ignored, 
    9597        and a python summary object is returned ''' 
    9698        #select the right query according to the docType 
    97         xqName={'ndg_B_metadata':'molesSummary','NumSim':'numsimSummary'}[target] 
     99        xqName={'ndg_B_metadata':'molesSummary',\ 
     100                'NumSim':'numsimSummary',\ 
     101                'Atom': 'AtomSummary'}[target] 
    98102        xquery=ndgXqueries()[xqName] 
    99103        xquery=xquery.replace('SEARCHSTRING',term) 
  • exist/trunk/python/ndgUtils/models/Atom.py

    r4219 r4229  
    2020from ndgUtils.vocabtermdata import VocabTermData as VTD 
    2121from ndgUtils.models import MolesEntity as ME 
     22 
     23 
     24class AtomError(Exception): 
     25    """ 
     26    Exception handling for Atom class. 
     27    """ 
     28    def __init__(self, msg): 
     29        logging.error(msg) 
     30        Exception.__init__(self, msg) 
     31 
    2232 
    2333class Person(): 
     
    193203 
    194204 
    195     def getEXistCollectionPath(self): 
     205    def getDefaultCollectionPath(self): 
    196206        ''' 
    197207        Determine the correct collection to use for the atom in eXist 
     
    206216            collectionPath += eXistConnector.DEPLOYMENT_COLLECTION_PATH 
    207217         
    208         if self.ME: 
    209             collectionPath += self.ME.providerID + "/" 
     218        if not self.ME.providerID: 
     219            raise AtomError("Error: cannot determine atom collection path because " + \ 
     220                            "the provider ID is not defined") 
     221             
     222        collectionPath += self.ME.providerID + "/" 
    210223        return collectionPath 
    211224 
     
    324337        molesLink.attrib["rel"] = "related" 
    325338         
    326         logging.debug(self.relatedLinks) 
    327339        for relatedLink in self.relatedLinks: 
    328340            root.append(relatedLink.toXML()) 
     
    481493                self.atomAuthors.append(author) 
    482494                 
    483             # NB, must lookup the ID following the ME lookup  
    484             # - since the provider ID from this is required to construct  
    485             # the browse URL 
    486             id = tree.findtext('id') 
    487             if id: 
    488                 id = id.split('__ATOM__')[1] 
    489                 self.setDatasetID(id) 
     495        self.atomID = tree.findtext('id') 
    490496 
    491497        self._parseCategoryData(tree.findall('category')) 
     
    631637        self.atomBrowseURL = linkData['self'][0].href 
    632638         
     639        self.datasetID = self.atomBrowseURL.split("__ATOM__")[-1] 
     640        self.atomName = self.datasetID + ".atom" 
     641         
    633642        # now remove this value and the associated moles doc link 
    634643        del linkData['self'] 
  • exist/trunk/python/ndgUtils/models/existdbclient.py

    r4220 r4229  
    88from ndgUtils.eXistInterface import ndg_eXist 
    99from ndgUtils.eXistConnector import eXistConnector as ec 
     10from ndgUtils.ndgXqueries import ndgXqueries 
     11 
     12try: 
     13    from xml.etree import ElementTree as ET 
     14except ImportError: 
     15    try: 
     16        import ElementTree as ET 
     17    except ImportError: 
     18        import elementtree.ElementTree as ET 
    1019 
    1120class eXistDBClient: 
    1221     
    1322     
    14     def __init__(self, configFile): 
     23    def __init__(self, configFile = None, eXistDBHostname = None): 
    1524        ''' 
    1625        Initialise a connection to the eXistDB 
    1726        ''' 
    1827        logging.info("Initialising connection to eXist DB") 
     28        self.eXistDBHostname = eXistDBHostname 
     29         
     30        inputs = {} 
    1931        # Firstly load in config data 
    20         self._loadDBDetails(configFile) 
    21          
     32        if configFile: 
     33            self._loadDBDetails(configFile) 
     34            inputs['passwordFile'] = configFile 
     35             
     36        if self.eXistDBHostname: 
     37            inputs['db'] = self.eXistDBHostname 
     38             
    2239        # Now set up the connection 
    23         self.xmldb = ndg_eXist(db = self.eXistDBHostname, passwordFile = configFile) 
     40        self.xmldb = ndg_eXist(**inputs) 
    2441         
    2542        # set up any collections required - NB, if these already exist they won't cause any files to be lost 
    26         self._setUpEXistCollections() 
     43        self._setUpEXistAtomCollections() 
    2744        logging.info("eXist DB connection initialised") 
    2845 
     
    4158 
    4259 
    43     def _setUpEXistCollections(self): 
     60    def _setUpEXistAtomCollections(self): 
    4461        ''' 
    4562        Set up the required eXist collections needed for running the granulator script 
     
    87104        ''' 
    88105        logging.info("Retrieving info for file, '%s'" %docPath) 
     106         
    89107        doc = self.xmldb.executeQuery('doc("' + docPath + '")') 
    90108         
     
    184202        @return: True, if file created successfully 
    185203        ''' 
    186         logging.info("Adding file, '%s' to eXist DB collection, '%s'" %(fileName, collection)) 
     204        logging.info("Adding file, '%s' to eXist DB collection, '%s'" \ 
     205                     %(fileName, collection)) 
    187206        logging.debug("data: %s" %xml) 
    188207 
     
    232251        self.createEXistFile(xml, collection, fileName) 
    233252 
     253 
     254    def getAtomFileCollectionPath(self, atomID): 
     255        ''' 
     256        Given an atom id, determine and return the collection path in eXist 
     257        of the associated atom file 
     258        @param atom: atom id to look up 
     259        @return: collection path, if it exists, None, otherwise  
     260        ''' 
     261        logging.info("Looking up collection path for atom ID, '%s'" %atomID) 
     262        xq = ndgXqueries()['atomFullPath'] 
     263        xq = xq.replace('TargetCollection', ec.BASE_COLLECTION_PATH) 
     264        xq = xq.replace('LocalID', atomID) 
     265 
     266        id, doc = self.xmldb.executeQuery(xq) 
     267        if doc['hits'] == 0: 
     268            logging.info("No document found with the specified ID") 
     269            return None 
     270 
     271        doc = self.xmldb.retrieve(id,0,{}) 
     272 
     273        docET = ET.fromstring(doc) 
     274        collPath = docET.text + '/' 
     275        logging.debug("Found collection path, '%s'" %collPath) 
     276        return collPath 
     277         
    234278             
    235279    def createAtomInExist(self, atom): 
     280        ''' 
     281        Create an atom in the eXist DB 
     282        @param atom: atom object to create in the DB 
     283        ''' 
    236284        logging.info("Creating atom in eXist") 
    237         eXistCollection = atom.getEXistCollectionPath() 
    238          
    239         # ensure this collection exists - otherwise an exception will be thrown 
    240         self.createCollections([eXistCollection]) 
    241         self.createOrUpdateEXistFile(atom.toPrettyXML(), \ 
    242                                      eXistCollection, atom.atomName) 
     285        eXistCollection = self.getAtomFileCollectionPath(atom.atomID) 
     286         
     287        # if collection not found, assume we're dealing with a new atom; get its 
     288        # default collection 
     289        if not eXistCollection: 
     290            eXistCollection = atom.getDefaultCollectionPath() 
     291        # create backup of atom if it already exists 
     292        else: 
     293            self.backupEXistFile(eXistCollection, atom.atomName) 
     294             
     295        self.createEXistFile(atom.toPrettyXML(), eXistCollection, atom.atomName) 
     296             
    243297        logging.info("Atom created in eXist") 
  • exist/trunk/python/ndgUtils/models/testexistdbclient.py

    r4220 r4229  
    1414     
    1515     # collection path for CSML docs 
    16     VALID_COLLECTION_PATH = eXistConnector.NDG_A_COLLECTION_PATH 
     16    VALID_COLLECTION_PATH = eXistConnector.BASE_COLLECTION_PATH 
    1717    VALID_FILE = "tst.xml" 
    1818    VALID_FILE_PATH = VALID_COLLECTION_PATH + VALID_FILE 
     
    2525        print "Setting up..." 
    2626        self.tidyUp = False    #     switch to indicate to tearDown that we need to delete the created eXist file 
    27         self.dbc = dbc.eXistDBClient(self.DBCONFIG_FILE) 
     27        self.dbc = dbc.eXistDBClient(configFile = self.DBCONFIG_FILE) 
    2828         
    2929         
    3030    def testSetUpEXistCollections(self): 
    31         self.dbc._setUpEXistCollections() 
     31        try: 
     32            self.dbc._setUpEXistAtomCollections() 
     33        except: 
     34            fail("Should not have thrown an exception") 
    3235         
    3336    def testLoadDBDetailsValid(self): 
     
    8992    def testBackupEXistFileMissing(self): 
    9093        self.assertRaises(SystemError, self.dbc.backupEXistFile, self.VALID_COLLECTION_PATH, self.VALID_FILE) 
     94 
     95             
     96    def testGetAtomFileCollectionPath(self): 
     97        self.tidyUp = True 
     98        self.assertEquals(self.dbc.createEXistFile(data.testdata.xmlString, self.VALID_COLLECTION_PATH, self.VALID_FILE), True) 
     99        self.dbc.getAtomFileCollectionPath(data.testdata.id) 
     100        #self.dbc.getAtomFileCollectionPath('tag:localhost:5000,2008-09-23:/view/neodc.nerc.ac.uk__ATOM__activity_11737124322917004') 
     101 
    91102     
    92103    def tearDown(self): 
  • exist/trunk/python/ndgUtils/ndgDirectory.py

    r3840 r4229  
    2020         
    2121        #get the right namespace 
    22         self.ns={'DIF':'http://gcmd.gsfc.nasa.gov/Aboutus/xml/dif/','MOLES':'http://ndg.nerc.ac.uk/moles','MDIP':'http://www.oceannet.org/mdip/xml'}[docType] 
     22        self.ns={'DIF':'http://gcmd.gsfc.nasa.gov/Aboutus/xml/dif/',\ 
     23                 'MOLES':'http://ndg.nerc.ac.uk/moles',\ 
     24                 'MDIP':'http://www.oceannet.org/mdip/xml',\ 
     25                 'ATOM': 'http://www.w3.org/2005/Atom'}[docType] 
    2326        #load all queries 
    2427        queries=ndgXqueries.ndgXqueries() 
    2528        #select the right query according to the docType 
    26         xqName={'DIF':'DIFList','MOLES':'molesList', 'MDIP':'MDIPList', 'moles':'molesList'}[docType] 
     29        xqName={'DIF':'DIFList','MOLES':'molesList', \ 
     30                'MDIP':'MDIPList', 'moles':'molesList', \ 
     31                'ATOM': 'atomList'}[docType] 
    2732        self.docType=docType 
    2833        xq=queries[xqName] 
    2934        # set the query up with the correct target collection 
    3035        self.xq=xq.replace('TargetCollection',self.targetCollection) 
    31         
     36         
    3237        #make sure we have a db available 
    3338        self.xmldb=ndg_eXist(db=existHost) 
     
    6469        of the specific member within the collection ''' 
    6570        q="'%s'"%self.targetCollection 
    66         if fileName is not None: q+=",'%s'"%fileName 
     71        if fileName: 
     72            # if we're dealing with atom docs, these include the full collection  
     73            # path already - so strip this off 
     74            if fileName.startswith(self.targetCollection): 
     75                fileBits = fileName.split('/') 
     76                fileName = fileBits[-1] 
     77                tc = '/' + '/'.join(fileBits[0:-1]) 
     78                q = "'%s', '%s'" %(tc, fileName) 
     79            else: 
     80                q+=",'%s'"%fileName 
    6781        xq="<created>{xmldb:created(%s)}</created>"%q 
    6882        r=self.xmldb.executeChunkedQuery(xq,1,1) 
  • exist/trunk/python/ndgUtils/ndgRetrieve.py

    r4187 r4229  
    6969            else: 
    7070                try: 
    71                     logging.info("Lookup up repository:-%s-" %uri.repository) 
     71                    logging.info("Lookup up repository:'%s'" %uri.repository) 
    7272                    er=config.get('NDG_EXIST',uri.repository) 
    7373                    pwf=config.get('NDG_EXIST','passwordFile') 
  • exist/trunk/python/ndgUtils/test_ndgDirectory.py

    r3161 r4229  
    55class TestCase(unittest.TestCase): 
    66     
     7    GLUE_DB = 'glue.badc.rl.ac.uk' 
     8    CHINOOK_DB = 'chinook.badc.rl.ac.uk' 
    79    def setUp(self):  
    810        tc='/db/discovery/original/DIF/badc.nerc.ac.uk' 
    9         host='glue.badc.rl.ac.uk' 
    10         self.ndgDir=ndgDirectory(tc,host,docType='DIF') 
     11        self.ndgDir=ndgDirectory(tc, self.GLUE_DB, docType='DIF') 
    1112     
    1213    def testDirectory(self): 
     
    2122        print self.ndgDir.created(self.ndgDir.members[0]['fileName']) 
    2223         
     24    def testAtomDocsDirectory(self): 
     25        self.ndgDir=ndgDirectory('/db/atoms', self.CHINOOK_DB,docType='ATOM') 
     26        self.assertTrue(len(self.ndgDir.members) > 0) 
     27        self.assertTrue(len(self.ndgDir.files) > 0) 
     28        self.assertTrue(len(self.ndgDir.entries) > 0) 
     29        self.assertNotEqual(self.ndgDir.get(self.ndgDir.members[0]['fileName']), None) 
     30         
     31    def testAtomCreationDates(self): 
     32        self.ndgDir=ndgDirectory('/db/atoms', self.CHINOOK_DB,docType='ATOM') 
     33        self.assertNotEqual(self.ndgDir.created(), None) 
     34        self.assertNotEqual(self.ndgDir.created(fileName=self.ndgDir.members[0]['fileName']), None) 
     35         
    2336 
    2437if __name__=="__main__": 
Note: See TracChangeset for help on using the changeset viewer.