Ignore:
Timestamp:
15/01/09 10:24:29 (11 years ago)
Author:
cbyrom
Message:

Create new class (and test suite) to do all the eXist DB setting up - and move this code from the
eXistdbclient to tidy things up.
Allow feed publishing to be done synch/asynch.
Improve loading of non-python resources - adding code to store the various schemata and indexes
required for setting up eXist
Improve parsing of atom URLs to allow browser host to change
Fix moles data in testconstants
Add tests for looking up associated atom data
Add indexing configuration files - to optimise atom processing in eXist + add code in initialiser class to set these up on eXist.
Add all the required schemata required for atom schema validation in eXist + add code in initialiser class to set these up on eXist;
this allows eXist to be ran without need of access beyond the firewall.

Location:
ndgCommon/trunk/ndg/common/src
Files:
1 added
9 edited

Legend:

Unmodified
Added
Removed
  • ndgCommon/trunk/ndg/common/src/clients/xmldb/eXist/eXistConnector.py

    r4793 r4810  
    5757    DIF_COLLECTION_PATH = "/db/DIF/" 
    5858    PROVIDER_FEED_PATH = BASE_COLLECTION_PATH + "providerFeeds/" 
     59    SCHEMATA_COLLECTION_PATH = BASE_COLLECTION_PATH + "schemata" 
     60    # collection for storing misc files - e.g. the org data snippets 
     61    RESOURCES_COLLECTION_PATH = BASE_COLLECTION_PATH + "resources" 
     62    EXIST_CONFIG_PATH = "/db/system/config/" 
    5963 
    6064    """Access class for eXist""" 
  • ndgCommon/trunk/ndg/common/src/clients/xmldb/eXist/existdbclient.py

    r4793 r4810  
    1212from ndg.common.src.dal.DocumentRetrieve import DocumentRetrieve as DR 
    1313from ndg.common.src.lib.atomutilities import * 
     14from ndg.common.src.lib.existinitialiser import EXistInitialiser 
    1415from xml.etree import ElementTree as ET 
    1516from threading import Thread 
     
    8485        # to do a rollback if there's an error encountered 
    8586        self.backupName = None 
     87 
     88        # if we preload the collections data, it'll be stored here 
     89        self.collections = None 
    8690         
    8791        # NB, there are two routes through here: if a config file is specified 
     
    105109         
    106110        if setUpDB: 
    107             # set up any collections required - NB, if these already exist they won't cause any files to be lost 
    108             self.__setUpEXistAtomCollections() 
    109              
    110             # add the schema required for atom validation 
    111             self.__addAtomSchema() 
    112  
    113             # add common data missing from atoms - but used to create output DIF records 
    114             self.__uploadOrgData() 
    115              
    116             # set up the required atom feeds 
    117             self.feedClient.setupBasicFeeds() 
    118          
    119         self.collections = None 
     111            # set up the eXist DB appropriately for use with ndgCommon 
     112            initialiser = EXistInitialiser(self) 
     113            initialiser.initialise() 
     114         
    120115        if loadCollectionData: 
    121116            self.collections = self.getAllAtomCollections() 
     
    132127        logging.debug("Getting atom schema data") 
    133128        if not self.atomSchema: 
    134             self.atomSchema = ec.BASE_COLLECTION_PATH + \ 
     129            self.atomSchema = ec.SCHEMATA_COLLECTION_PATH + '/' + \ 
    135130                self.xmldb.xq.ATOM_MOLES_SCHEMA  + '.xsd' 
    136131 
     
    138133 
    139134    AtomSchema = property(fget=__getSchema, doc="Atom schema path") 
    140  
    141  
    142     def __uploadOrgData(self): 
    143         '''         
    144         Upload the organisations DIF file to eXist - this is required for the atom->DIF output 
    145         ''' 
    146         logging.info("Uploading organisation data to eXist") 
    147         schemae = [self.xmldb.xq.ATOM_SCHEMA, self.xmldb.xq.MOLES_SCHEMA, self.xmldb.xq.ATOM_MOLES_SCHEMA] 
    148         for fileName, orgData in self.xmldb.xq.resources.items(): 
    149             self.createEXistFile(orgData, ec.BASE_COLLECTION_PATH, fileName) 
    150         logging.info("Data uploaded") 
    151135 
    152136 
     
    178162    def checkAtomSchemaCompliance(self, atomPath, atom = None, isDebug = False): 
    179163        ''' 
    180         Validate the specified atom in eXist with the atom schemae in eXist 
     164        Validate the specified atom in eXist with the atom schemata in eXist 
    181165        @param atomPath: path to the atom in eXist 
    182166        @keyword atom: if set to an atom, this will be created temporarily in eXist 
     
    247231        logging.info("Validation complete") 
    248232        return errors 
    249      
    250  
    251     def __setUpEXistAtomCollections(self): 
    252         ''' 
    253         Set up the required eXist collections needed for running the granulator script 
    254         ''' 
    255         logging.info("Ensuring required collections are available in eXist") 
    256         for col in [ec.BASE_COLLECTION_PATH, ec.BACKUP_COLLECTION_PATH]: 
    257             for type in [ec.OLD_COLLECTION_PATH, ec.PUBLISHED_COLLECTION_PATH, \ 
    258                          ec.SMALL_P_PUBLISHED_COLLECTION_PATH, ec.WORKING_COLLECTION_PATH]: 
    259                 self.xmldb.createCollection(col) 
    260                 self.xmldb.createCollection(col + type) 
    261                 self.xmldb.createCollection(col + type + ec.DE_COLLECTION_PATH) 
    262                 self.xmldb.createCollection(col + type + ec.DEPLOYMENT_COLLECTION_PATH) 
    263                 self.xmldb.createCollection(col + type + ec.DEPLOYMENTS_COLLECTION_PATH) 
    264                 self.xmldb.createCollection(col + type + ec.GRANULE_COLLECTION_PATH) 
    265  
    266         self.xmldb.createCollection(ec.DIF_COLLECTION_PATH) 
    267         self.xmldb.createCollection(ec.PROVIDER_FEED_PATH) 
    268         logging.info("Required collections available") 
    269          
    270  
    271     def __addAtomSchema(self): 
    272         ''' 
    273         Add the required atom schema to the atoms collection - to allow validation 
    274         of input atoms 
    275         ''' 
    276         logging.info("Adding atom schema to eXist") 
    277         schemata = [self.xmldb.xq.ATOM_SCHEMA, self.xmldb.xq.MOLES_SCHEMA, self.xmldb.xq.ATOM_MOLES_SCHEMA] 
    278         for schema in schemata: 
    279             xml = self.xmldb.xq.getSchema(schema) 
    280             self.createEXistFile(xml, ec.BASE_COLLECTION_PATH, schema + '.xsd') 
    281         logging.info("- schema added") 
     233 
    282234         
    283235 
     
    450402        logging.debug("data: %s" %xml) 
    451403 
    452         # create the collection, in case it doesn't already exist - NB, this won't overwrite anything 
    453         self.createCollections([collection]) 
    454         status = self.xmldb.storeXML(xml, collection + "/" + fileName, overwrite=1)     
     404        # create the collection, if it doesn't already exist - NB, this won't overwrite anything 
     405        if self.collections is None or not self.collections.get(collection): 
     406            self.createCollections([collection]) 
     407             
     408        status = self.xmldb.storeXML(xml, collection + "/" + fileName, overwrite=1) 
    455409        if not status: 
    456410            errorMessage = "Command to create file in eXist did not complete successfully - exiting" 
     
    643597        @keyword replaceAtom: if False and the atom is already available in eXist 
    644598        @param runAsynch: if True, if a backup of an existing file, do this 
    645         asynchronously in a separate thread 
     599        asynchronously in a separate thread + do the feed publishing and DIF 
     600        creating in a separate thread, too 
    646601        then raise a ValueError. 
    647602        ''' 
     
    705660        # feeds 
    706661        if atom.isPublished(): 
    707             self.runAsynchAtomPublish(atom) 
     662            if runAsynch: 
     663                self.runAsynchAtomPublish(atom) 
     664            else: 
     665                self.__publishAtom(atom) 
    708666            
    709667        return atom 
  • ndgCommon/trunk/ndg/common/src/clients/xmldb/eXist/existdbfeedclient.py

    r4793 r4810  
    2828    PROVIDERLEVEL_ATOM_FEED_TITLE = 'Moles Atom Data - for provider, %s' 
    2929    PROVIDERLEVEL_DIF_FEED_TITLE = 'Moles Atom Data - for provider, %s' 
     30    DIF_ENTRY_TITLE = 'DIF Record' 
    3031     
    3132    REST_BASE_URL = '/exist/rest' 
     
    175176        browseURL = atom.atomBrowseURL 
    176177        if isDIFRecord: 
    177             titleString = 'DIF Record' 
     178            titleString = self.DIF_ENTRY_TITLE 
    178179            browseURL = browseURL.replace('__%s__' %ndgObject.ATOM_DOC_TYPE,  
    179180                                          '__%s__' %ndgObject.BROWSE_DIF_DOC_TYPE) 
  • ndgCommon/trunk/ndg/common/src/lib/atomvalidator.py

    r4793 r4810  
    1515from ndg.common.src.models.vocabtermdata import isValidTermURI 
    1616from ndg.common.src.models.Atom import Atom 
    17 from ndg.common.src.lib.ndgXqueries import ndgXqueries 
    1817from ndg.common.src.lib.utilities import isValidUnicode, simpleURLCheck, strftime 
    1918 
     
    122121         
    123122        # now check the vocab terms 
    124         #self.__validateVocabData() 
     123        self.__validateVocabData() 
    125124         
    126125        # check the atom conforms to the schema 
  • ndgCommon/trunk/ndg/common/src/lib/existatomvalidator.py

    r4793 r4810  
    1010from ndg.common.src.models.Atom import Atom 
    1111from atomvalidator import AtomValidator 
    12 from ndgXqueries import ndgXqueries 
    1312 
    1413     
  • ndgCommon/trunk/ndg/common/src/lib/ndgXqueries.py

    r4793 r4810  
    1919    ATOM_SCHEMA = "atom1.0" 
    2020     
    21     def __init__(self,directory='xquery'): 
     21    def __init__(self, directory='common/xmldb/'): 
    2222        ''' Loads up xqueries from the internal package directory ''' 
    23         files=pkg_resources.resource_listdir('ndgUtils',directory) 
     23 
     24        xqueryDir = directory + 'xquery' 
     25        files=pkg_resources.resource_listdir('ndg', xqueryDir) 
    2426        # this next bit to get rid of .svn and anything else in testing 
    25         xq=[] 
    26         xsd = [] 
     27        self.xq = {} 
    2728        for f in files: 
    28             if f.find('.xq') != -1:  
    29                 xq.append(f) 
    30             elif f.find('.xsd') != -1: 
    31                 xsd.append(f) 
    32                  
    33         #remember that .rstrip doesn't quite do what you think it does :-) 
    34         self.xq = dict([(re.sub('.xq\Z','',f),\ 
    35                        pkg_resources.resource_string('ndgUtils','%s/%s'%(directory,f))) for f in xq]) 
    36         self.xsd = dict([(re.sub('.xsd\Z','',f),\ 
    37                pkg_resources.resource_string('ndgUtils',\ 
    38                                              '%s/%s'%(directory, f))) for f in xsd]) 
    39          
     29            if f.endswith('.xq'): 
     30                self.xq[re.sub('.xq\Z','',f)] = \ 
     31                    pkg_resources.resource_string('ndg','%s/%s'%(xqueryDir,f)) 
     32 
     33        schemaDir = directory + 'schema' 
     34        files=pkg_resources.resource_listdir('ndg', schemaDir) 
     35        self.xsd = {} 
     36        for f in files: 
     37            if f.endswith('.xsd'): 
     38                self.xsd[re.sub('.xsd\Z','',f)] = \ 
     39                    pkg_resources.resource_string('ndg','%s/%s'%(schemaDir, f)) 
     40                           
    4041        # add the various xquery libraries, too 
    41         self.xqlib=dict() 
    42         dirs = ['xqueryLib/Vocabs', 'xqueryLib/Utilities', 'xqueryLib/StubB'] 
    43         for dir in dirs: 
    44             files = pkg_resources.resource_listdir('ndgUtils', dir) 
     42        self.xqlib = {} 
     43        for dir in ['Vocabs', 'Utilities', 'StubB']: 
     44            xqueryLibDir = xqueryDir + '/lib/' + dir 
     45            files = pkg_resources.resource_listdir('ndg', xqueryLibDir) 
    4546            for f in files: 
    46                 if f.find('.xquery') == -1:  
    47                     continue 
    48  
    49                 self.xqlib[f] = pkg_resources.resource_string('ndgUtils','%s/%s'%(dir,f)) 
     47                if f.endswith('.xquery'):  
     48                    self.xqlib[f] = \ 
     49                        pkg_resources.resource_string('ndg','%s/%s'%(xqueryLibDir,f)) 
    5050         
    5151        # also add the organisation data files - NB, this class should be generalised 
    5252        # to make more sense... 
    53         files = pkg_resources.resource_listdir('ndgUtils', 'resources') 
     53        resourceDir = directory + 'resources' 
     54        files = pkg_resources.resource_listdir('ndg', resourceDir) 
    5455        self.resources = {} 
    5556        for f in files: 
    56             if f.find('.xml') == -1:  
    57                 continue 
    58             self.resources[f] = pkg_resources.resource_string('ndgUtils','resources/%s'%f) 
     57            if f.endswith('.xml'):  
     58                self.resources[f] = \ 
     59                    pkg_resources.resource_string('ndg','%s/%s' %(resourceDir, f)) 
     60 
     61        # lastly, add the indexes 
     62        self.indexes = {} 
     63        for dir in ['eXist']: 
     64            indexDir = directory + 'indexing' + '/' + dir 
     65            files = pkg_resources.resource_listdir('ndg', indexDir) 
     66            for f in files: 
     67                if f.endswith('.xconf'):  
     68                    self.indexes[f] = \ 
     69                        pkg_resources.resource_string('ndg','%s/%s'%(indexDir,f)) 
    5970         
    6071    def __setitem__(self,key,value): 
  • ndgCommon/trunk/ndg/common/src/models/Atom.py

    r4793 r4810  
    899899        self.datasetID = self.atomBrowseURL.split("__ATOM__")[-1] 
    900900        self.atomName = self.datasetID + ".atom" 
    901         self.ndgURI = self.atomBrowseURL.split(VTD.BROWSE_ROOT_URL)[1] 
     901        # NB, only split on the stem, since the browse host may not be 
     902        # the same as that defined in VTD 
     903        self.ndgURI = self.atomBrowseURL.split(VTD.BROWSE_STEM_URL)[-1] 
    902904         
    903905        # now remove this value and the associated moles doc link 
     
    12261228        ''' 
    12271229        logging.info("Looking up %s info" %type) 
    1228          
    12291230        self.allActivities = [] 
    12301231        self.allObs = [] 
  • ndgCommon/trunk/ndg/common/src/models/MolesEntity.py

    r4793 r4810  
    4343        ''' 
    4444        logging.debug("Adding responsible party data") 
    45         from ndgUtils.models.Atom import Person 
     45        from ndg.common.src.models.Atom import Person 
    4646        if type(partyVals) is not list: 
    4747            partyVals = [partyVals] 
     
    105105        authorElements = tree.findall('{%s}molesISO/{%s}responsibleParties/{%s}responsibleParty' \ 
    106106                                      %(ndgObject.MOLES_NS, ndgObject.MOLES_NS, ndgObject.MOLES_NS)) 
    107         from ndgUtils.models.Atom import Person 
     107        from ndg.common.src.models.Atom import Person 
    108108        for authorElement in authorElements: 
    109109            logging.debug("Adding atom author data") 
  • ndgCommon/trunk/ndg/common/src/models/vocabtermdata.py

    r4793 r4810  
    263263    VOCAB_SERVER_URL = 'http://vocab.ndg.nerc.ac.uk/clients/whatLists' 
    264264 
    265     #BROWSE_ROOT_URL = "http://snow.badc.rl.ac.uk:5000/view/" 
    266     BROWSE_ROOT_URL = "http://localhost:5000/view/" 
     265    # TODO: this should be more configurable, really 
     266    BROWSE_SERVER_URL = "http://ndg.nerc.ac.uk" 
     267    BROWSE_STEM_URL = "/view/" 
     268    BROWSE_ROOT_URL = BROWSE_SERVER_URL + BROWSE_STEM_URL 
    267269     
    268270    BADC_BROWSE_ROOT = 'http://badc.nerc.ac.uk/browse' 
Note: See TracChangeset for help on using the changeset viewer.