Changeset 4810
- Timestamp:
- 15/01/09 10:24:29 (12 years ago)
- Location:
- ndgCommon/trunk
- Files:
-
- 8 added
- 2 deleted
- 17 edited
Legend:
- Unmodified
- Added
- Removed
-
ndgCommon/trunk/ndg/common/src/clients/xmldb/eXist/eXistConnector.py
r4793 r4810 57 57 DIF_COLLECTION_PATH = "/db/DIF/" 58 58 PROVIDER_FEED_PATH = BASE_COLLECTION_PATH + "providerFeeds/" 59 SCHEMATA_COLLECTION_PATH = BASE_COLLECTION_PATH + "schemata" 60 # collection for storing misc files - e.g. the org data snippets 61 RESOURCES_COLLECTION_PATH = BASE_COLLECTION_PATH + "resources" 62 EXIST_CONFIG_PATH = "/db/system/config/" 59 63 60 64 """Access class for eXist""" -
ndgCommon/trunk/ndg/common/src/clients/xmldb/eXist/existdbclient.py
r4793 r4810 12 12 from ndg.common.src.dal.DocumentRetrieve import DocumentRetrieve as DR 13 13 from ndg.common.src.lib.atomutilities import * 14 from ndg.common.src.lib.existinitialiser import EXistInitialiser 14 15 from xml.etree import ElementTree as ET 15 16 from threading import Thread … … 84 85 # to do a rollback if there's an error encountered 85 86 self.backupName = None 87 88 # if we preload the collections data, it'll be stored here 89 self.collections = None 86 90 87 91 # NB, there are two routes through here: if a config file is specified … … 105 109 106 110 if setUpDB: 107 # set up any collections required - NB, if these already exist they won't cause any files to be lost 108 self.__setUpEXistAtomCollections() 109 110 # add the schema required for atom validation 111 self.__addAtomSchema() 112 113 # add common data missing from atoms - but used to create output DIF records 114 self.__uploadOrgData() 115 116 # set up the required atom feeds 117 self.feedClient.setupBasicFeeds() 118 119 self.collections = None 111 # set up the eXist DB appropriately for use with ndgCommon 112 initialiser = EXistInitialiser(self) 113 initialiser.initialise() 114 120 115 if loadCollectionData: 121 116 self.collections = self.getAllAtomCollections() … … 132 127 logging.debug("Getting atom schema data") 133 128 if not self.atomSchema: 134 self.atomSchema = ec. BASE_COLLECTION_PATH+ \129 self.atomSchema = ec.SCHEMATA_COLLECTION_PATH + '/' + \ 135 130 self.xmldb.xq.ATOM_MOLES_SCHEMA + '.xsd' 136 131 … … 138 133 139 134 AtomSchema = property(fget=__getSchema, doc="Atom schema path") 140 141 142 def __uploadOrgData(self):143 '''144 Upload the organisations DIF file to eXist - this is required for the atom->DIF output145 '''146 logging.info("Uploading organisation data to eXist")147 schemae = [self.xmldb.xq.ATOM_SCHEMA, self.xmldb.xq.MOLES_SCHEMA, self.xmldb.xq.ATOM_MOLES_SCHEMA]148 for fileName, orgData in self.xmldb.xq.resources.items():149 self.createEXistFile(orgData, ec.BASE_COLLECTION_PATH, fileName)150 logging.info("Data uploaded")151 135 152 136 … … 178 162 def checkAtomSchemaCompliance(self, atomPath, atom = None, isDebug = False): 179 163 ''' 180 Validate the specified atom in eXist with the atom schema ein eXist164 Validate the specified atom in eXist with the atom schemata in eXist 181 165 @param atomPath: path to the atom in eXist 182 166 @keyword atom: if set to an atom, this will be created temporarily in eXist … … 247 231 logging.info("Validation complete") 248 232 return errors 249 250 251 def __setUpEXistAtomCollections(self): 252 ''' 253 Set up the required eXist collections needed for running the granulator script 254 ''' 255 logging.info("Ensuring required collections are available in eXist") 256 for col in [ec.BASE_COLLECTION_PATH, ec.BACKUP_COLLECTION_PATH]: 257 for type in [ec.OLD_COLLECTION_PATH, ec.PUBLISHED_COLLECTION_PATH, \ 258 ec.SMALL_P_PUBLISHED_COLLECTION_PATH, ec.WORKING_COLLECTION_PATH]: 259 self.xmldb.createCollection(col) 260 self.xmldb.createCollection(col + type) 261 self.xmldb.createCollection(col + type + ec.DE_COLLECTION_PATH) 262 self.xmldb.createCollection(col + type + ec.DEPLOYMENT_COLLECTION_PATH) 263 self.xmldb.createCollection(col + type + ec.DEPLOYMENTS_COLLECTION_PATH) 264 self.xmldb.createCollection(col + type + ec.GRANULE_COLLECTION_PATH) 265 266 self.xmldb.createCollection(ec.DIF_COLLECTION_PATH) 267 self.xmldb.createCollection(ec.PROVIDER_FEED_PATH) 268 logging.info("Required collections available") 269 270 271 def __addAtomSchema(self): 272 ''' 273 Add the required atom schema to the atoms collection - to allow validation 274 of input atoms 275 ''' 276 logging.info("Adding atom schema to eXist") 277 schemata = [self.xmldb.xq.ATOM_SCHEMA, self.xmldb.xq.MOLES_SCHEMA, self.xmldb.xq.ATOM_MOLES_SCHEMA] 278 for schema in schemata: 279 xml = self.xmldb.xq.getSchema(schema) 280 self.createEXistFile(xml, ec.BASE_COLLECTION_PATH, schema + '.xsd') 281 logging.info("- schema added") 233 282 234 283 235 … … 450 402 logging.debug("data: %s" %xml) 451 403 452 # create the collection, in case it doesn't already exist - NB, this won't overwrite anything 453 self.createCollections([collection]) 454 status = self.xmldb.storeXML(xml, collection + "/" + fileName, overwrite=1) 404 # create the collection, if it doesn't already exist - NB, this won't overwrite anything 405 if self.collections is None or not self.collections.get(collection): 406 self.createCollections([collection]) 407 408 status = self.xmldb.storeXML(xml, collection + "/" + fileName, overwrite=1) 455 409 if not status: 456 410 errorMessage = "Command to create file in eXist did not complete successfully - exiting" … … 643 597 @keyword replaceAtom: if False and the atom is already available in eXist 644 598 @param runAsynch: if True, if a backup of an existing file, do this 645 asynchronously in a separate thread 599 asynchronously in a separate thread + do the feed publishing and DIF 600 creating in a separate thread, too 646 601 then raise a ValueError. 647 602 ''' … … 705 660 # feeds 706 661 if atom.isPublished(): 707 self.runAsynchAtomPublish(atom) 662 if runAsynch: 663 self.runAsynchAtomPublish(atom) 664 else: 665 self.__publishAtom(atom) 708 666 709 667 return atom -
ndgCommon/trunk/ndg/common/src/clients/xmldb/eXist/existdbfeedclient.py
r4793 r4810 28 28 PROVIDERLEVEL_ATOM_FEED_TITLE = 'Moles Atom Data - for provider, %s' 29 29 PROVIDERLEVEL_DIF_FEED_TITLE = 'Moles Atom Data - for provider, %s' 30 DIF_ENTRY_TITLE = 'DIF Record' 30 31 31 32 REST_BASE_URL = '/exist/rest' … … 175 176 browseURL = atom.atomBrowseURL 176 177 if isDIFRecord: 177 titleString = 'DIF Record'178 titleString = self.DIF_ENTRY_TITLE 178 179 browseURL = browseURL.replace('__%s__' %ndgObject.ATOM_DOC_TYPE, 179 180 '__%s__' %ndgObject.BROWSE_DIF_DOC_TYPE) -
ndgCommon/trunk/ndg/common/src/lib/atomvalidator.py
r4793 r4810 15 15 from ndg.common.src.models.vocabtermdata import isValidTermURI 16 16 from ndg.common.src.models.Atom import Atom 17 from ndg.common.src.lib.ndgXqueries import ndgXqueries18 17 from ndg.common.src.lib.utilities import isValidUnicode, simpleURLCheck, strftime 19 18 … … 122 121 123 122 # now check the vocab terms 124 #self.__validateVocabData()123 self.__validateVocabData() 125 124 126 125 # check the atom conforms to the schema -
ndgCommon/trunk/ndg/common/src/lib/existatomvalidator.py
r4793 r4810 10 10 from ndg.common.src.models.Atom import Atom 11 11 from atomvalidator import AtomValidator 12 from ndgXqueries import ndgXqueries13 12 14 13 -
ndgCommon/trunk/ndg/common/src/lib/ndgXqueries.py
r4793 r4810 19 19 ATOM_SCHEMA = "atom1.0" 20 20 21 def __init__(self, directory='xquery'):21 def __init__(self, directory='common/xmldb/'): 22 22 ''' Loads up xqueries from the internal package directory ''' 23 files=pkg_resources.resource_listdir('ndgUtils',directory) 23 24 xqueryDir = directory + 'xquery' 25 files=pkg_resources.resource_listdir('ndg', xqueryDir) 24 26 # this next bit to get rid of .svn and anything else in testing 25 xq=[] 26 xsd = [] 27 self.xq = {} 27 28 for f in files: 28 if f. find('.xq') != -1:29 xq.append(f)30 elif f.find('.xsd') != -1:31 xsd.append(f) 32 33 #remember that .rstrip doesn't quite do what you think it does :-)34 self.x q = dict([(re.sub('.xq\Z','',f),\35 pkg_resources.resource_string('ndgUtils','%s/%s'%(directory,f))) for f in xq])36 self.xsd = dict([(re.sub('.xsd\Z','',f),\37 pkg_resources.resource_string('ndgUtils',\38 '%s/%s'%(directory, f))) for f in xsd])39 29 if f.endswith('.xq'): 30 self.xq[re.sub('.xq\Z','',f)] = \ 31 pkg_resources.resource_string('ndg','%s/%s'%(xqueryDir,f)) 32 33 schemaDir = directory + 'schema' 34 files=pkg_resources.resource_listdir('ndg', schemaDir) 35 self.xsd = {} 36 for f in files: 37 if f.endswith('.xsd'): 38 self.xsd[re.sub('.xsd\Z','',f)] = \ 39 pkg_resources.resource_string('ndg','%s/%s'%(schemaDir, f)) 40 40 41 # add the various xquery libraries, too 41 self.xqlib =dict()42 dirs = ['xqueryLib/Vocabs', 'xqueryLib/Utilities', 'xqueryLib/StubB']43 for dir in dirs:44 files = pkg_resources.resource_listdir('ndg Utils', dir)42 self.xqlib = {} 43 for dir in ['Vocabs', 'Utilities', 'StubB']: 44 xqueryLibDir = xqueryDir + '/lib/' + dir 45 files = pkg_resources.resource_listdir('ndg', xqueryLibDir) 45 46 for f in files: 46 if f.find('.xquery') == -1: 47 continue 48 49 self.xqlib[f] = pkg_resources.resource_string('ndgUtils','%s/%s'%(dir,f)) 47 if f.endswith('.xquery'): 48 self.xqlib[f] = \ 49 pkg_resources.resource_string('ndg','%s/%s'%(xqueryLibDir,f)) 50 50 51 51 # also add the organisation data files - NB, this class should be generalised 52 52 # to make more sense... 53 files = pkg_resources.resource_listdir('ndgUtils', 'resources') 53 resourceDir = directory + 'resources' 54 files = pkg_resources.resource_listdir('ndg', resourceDir) 54 55 self.resources = {} 55 56 for f in files: 56 if f.find('.xml') == -1: 57 continue 58 self.resources[f] = pkg_resources.resource_string('ndgUtils','resources/%s'%f) 57 if f.endswith('.xml'): 58 self.resources[f] = \ 59 pkg_resources.resource_string('ndg','%s/%s' %(resourceDir, f)) 60 61 # lastly, add the indexes 62 self.indexes = {} 63 for dir in ['eXist']: 64 indexDir = directory + 'indexing' + '/' + dir 65 files = pkg_resources.resource_listdir('ndg', indexDir) 66 for f in files: 67 if f.endswith('.xconf'): 68 self.indexes[f] = \ 69 pkg_resources.resource_string('ndg','%s/%s'%(indexDir,f)) 59 70 60 71 def __setitem__(self,key,value): -
ndgCommon/trunk/ndg/common/src/models/Atom.py
r4793 r4810 899 899 self.datasetID = self.atomBrowseURL.split("__ATOM__")[-1] 900 900 self.atomName = self.datasetID + ".atom" 901 self.ndgURI = self.atomBrowseURL.split(VTD.BROWSE_ROOT_URL)[1] 901 # NB, only split on the stem, since the browse host may not be 902 # the same as that defined in VTD 903 self.ndgURI = self.atomBrowseURL.split(VTD.BROWSE_STEM_URL)[-1] 902 904 903 905 # now remove this value and the associated moles doc link … … 1226 1228 ''' 1227 1229 logging.info("Looking up %s info" %type) 1228 1229 1230 self.allActivities = [] 1230 1231 self.allObs = [] -
ndgCommon/trunk/ndg/common/src/models/MolesEntity.py
r4793 r4810 43 43 ''' 44 44 logging.debug("Adding responsible party data") 45 from ndg Utils.models.Atom import Person45 from ndg.common.src.models.Atom import Person 46 46 if type(partyVals) is not list: 47 47 partyVals = [partyVals] … … 105 105 authorElements = tree.findall('{%s}molesISO/{%s}responsibleParties/{%s}responsibleParty' \ 106 106 %(ndgObject.MOLES_NS, ndgObject.MOLES_NS, ndgObject.MOLES_NS)) 107 from ndg Utils.models.Atom import Person107 from ndg.common.src.models.Atom import Person 108 108 for authorElement in authorElements: 109 109 logging.debug("Adding atom author data") -
ndgCommon/trunk/ndg/common/src/models/vocabtermdata.py
r4793 r4810 263 263 VOCAB_SERVER_URL = 'http://vocab.ndg.nerc.ac.uk/clients/whatLists' 264 264 265 #BROWSE_ROOT_URL = "http://snow.badc.rl.ac.uk:5000/view/" 266 BROWSE_ROOT_URL = "http://localhost:5000/view/" 265 # TODO: this should be more configurable, really 266 BROWSE_SERVER_URL = "http://ndg.nerc.ac.uk" 267 BROWSE_STEM_URL = "/view/" 268 BROWSE_ROOT_URL = BROWSE_SERVER_URL + BROWSE_STEM_URL 267 269 268 270 BADC_BROWSE_ROOT = 'http://badc.nerc.ac.uk/browse' -
ndgCommon/trunk/ndg/common/unittests/clients/xmldb/eXist/testexistdbclient.py
r4793 r4810 147 147 self.assertEquals(None, newClient._eXistDBClient__lookupEXistFile(molesSchema)) 148 148 149 def testSetUpEXistCollections(self):150 try:151 self.utils.dbc._eXistDBClient__setUpEXistAtomCollections()152 except:153 self.fail("Should not have thrown an exception")154 155 149 def testLoadDBDetailsValid(self): 156 150 self.utils.dbc._eXistDBClient__loadDBDetails(self.confFile) -
ndgCommon/trunk/ndg/common/unittests/clients/xmldb/eXist/testexistdbfeedclient.py
r4793 r4810 8 8 from ndg.common.src.clients.xmldb.eXist.eXistConnector import eXistConnector as ec 9 9 from ndg.common.src.clients.xmldb.eXist.existdbfeedclient import eXistDBFeedClient as feedClient, FeedClientError 10 from ndg.common.src.lib.ndgXqueries import ndgXqueries11 10 from ndg.common.src.models.Atom import Atom 12 11 from ndg.common.src.models.vocabtermdata import VocabTermData as VTD -
ndgCommon/trunk/ndg/common/unittests/lib/test_xmlhandler2.py
r4793 r4810 1 1 import unittest, ConfigParser 2 2 from ndg.common.src.lib.xmlHandler2 import xmlHandler 3 from ndg.common.src.lib.ndgXqueries import ndgXqueries4 3 from ndg.common.src.clients.xmldb.eXist.eXistInterface import ndg_eXist 5 4 from ndg.common.src.models.ndgObject import ndgObject -
ndgCommon/trunk/ndg/common/unittests/models/testatom.py
r4793 r4810 11 11 import ndg.common.unittests.testconstants as tc 12 12 from ndg.common.src.models import AtomState 13 from ndg.common.unittests.testutils import testUtils as tu 14 from ndg.common.src.dal.DocumentRetrieve import DocumentRetrieve 13 15 14 16 class testatom(unittest.TestCase): … … 29 31 self.link2.href = 'www.pete.com' 30 32 self.link2.rel = 'self' 33 self.createdAtoms = [] # array to store paths to atoms created - in order to then delete them 34 self.utils = tu(tc.DBCONFIG_FILE) 31 35 32 36 … … 210 214 self.assertEqual(AtomState.OLD_STATE, self.atom.state) 211 215 self.assertFalse(self.atom.isPublished()) 212 216 217 def testLookupAssociatedDEData(self): 218 atom = self.utils.createAtomInEXist(tc.invalidXmlString) 219 self.createdAtoms.append(atom.getFullPath()) 220 dr = DocumentRetrieve(tc.VALID_REPOSITORY, 221 pwfile = tc.DBCONFIG_FILE) 222 223 atom.lookupAssociatedData(VTD.DE_TERM, dr) 224 self.utils.dbc.deleteAtomInExist(atom) 225 226 def testLookupAssociatedDeploymentData(self): 227 atom = self.utils.createAtomInEXist(tc.invalidXmlString) 228 self.createdAtoms.append(atom.getFullPath()) 229 dr = DocumentRetrieve(tc.VALID_REPOSITORY, 230 pwfile = tc.DBCONFIG_FILE) 231 232 atom.lookupAssociatedData(VTD.DEPLOYMENT_TERM, dr) 233 self.utils.dbc.deleteAtomInExist(atom) 234 235 def testLookupAssociatedDeploymentDataWithLookupIndirectReferences(self): 236 atom = self.utils.createAtomInEXist(tc.invalidXmlString) 237 self.createdAtoms.append(atom.getFullPath()) 238 dr = DocumentRetrieve(tc.VALID_REPOSITORY, 239 pwfile = tc.DBCONFIG_FILE) 240 241 atom.lookupAssociatedData(VTD.DEPLOYMENT_TERM, dr, 242 lookupIndirectReferences=True) 243 self.utils.dbc.deleteAtomInExist(atom) 244 245 def testLookupAssociatedDEDataWithLookupIndirectReferences(self): 246 atom = self.utils.createAtomInEXist(tc.invalidXmlString) 247 self.createdAtoms.append(atom.getFullPath()) 248 dr = DocumentRetrieve(tc.VALID_REPOSITORY, 249 pwfile = tc.DBCONFIG_FILE) 250 251 atom.lookupAssociatedData(VTD.DE_TERM, dr, 252 lookupIndirectReferences=True) 253 self.utils.dbc.deleteAtomInExist(atom) 254 213 255 214 256 -
ndgCommon/trunk/ndg/common/unittests/models/testdif.py
r4793 r4810 19 19 def setUp(self): 20 20 ''' Load example config and DIF files for testing ''' 21 from ndg Utils.xmlHandler2 import xmlHandler21 from ndg.common.src.lib.xmlHandler2 import xmlHandler 22 22 x=xmlHandler(tc.DIF_FILE) 23 23 self.dif=DIF(x.tree,et=1) … … 37 37 def testDifficult(self): 38 38 ''' Grab a test example from the internet and load it ''' 39 from ndgUtils import ndgRetrieve, xmlHandler2 39 from ndg.common.src.dal import ndgRetrieve 40 from ndg.common.src.lib import xmlHandler2 40 41 #testURI='grid.bodc.nerc.ac.uk__DIF__EDMED1048008' 41 42 testURI='neodc.nerc.ac.uk__DIF__NEODC_NEXTMAP' -
ndgCommon/trunk/ndg/common/unittests/testconstants.py
r4793 r4810 13 13 __revision__ = '$Id: $' 14 14 15 from ndg Utils.eXistConnector import eXistConnector as ec16 from ndg Utils.models.vocabtermdata import VocabTermData as VTD15 from ndg.common.src.clients.xmldb.eXist.eXistConnector import eXistConnector as ec 16 from ndg.common.src.models.vocabtermdata import VocabTermData as VTD 17 17 import os, datetime 18 18 … … 107 107 108 108 MOLES_ENTITY_STRING = '<moles:entity type="granule"><moles:molesISO><moles:responsibleParties>' + \ 109 '<moles:responsibleParty>< name>calum byrom</name><uri>www.cb.org</uri><role>Metadata maintainer</role></moles:responsibleParty>' + \110 '<moles:responsibleParty>< name>dom lowe</name><uri>www.badc.rl.ac.uk</uri><role>Metadata maintainer</role></moles:responsibleParty>' + \109 '<moles:responsibleParty><moles:name>calum byrom</moles:name><moles:uri>www.cb.org</moles:uri><moles:role>Metadata maintainer</moles:role></moles:responsibleParty>' + \ 110 '<moles:responsibleParty><moles:name>dom lowe</moles:name><moles:uri>www.badc.rl.ac.uk</moles:uri><moles:role>Metadata maintainer</moles:role></moles:responsibleParty>' + \ 111 111 '</moles:responsibleParties><moles:datasetLanguage>English</moles:datasetLanguage>' + \ 112 112 '<moles:datasetLanguage>English</moles:datasetLanguage>' + \ … … 153 153 '<contributor><name>Brian Bandy</name><uri>www.cb.org</uri></contributor>' + \ 154 154 '<moles:entity type="granule"><moles:molesISO><moles:responsibleParties>' + \ 155 '<moles:responsibleParty>< name>calum byrom</name><uri>www.cb.org</uri><role>Metadata maintainer</role></moles:responsibleParty>' + \156 '<moles:responsibleParty>< name>dom lowe</name><uri>www.badc.rl.ac.uk</uri><role>Metadata maintainer</role></moles:responsibleParty>' + \155 '<moles:responsibleParty><moles:name>calum byrom</moles:name><moles:uri>www.cb.org</moles:uri><moles:role>Metadata maintainer</moles:role></moles:responsibleParty>' + \ 156 '<moles:responsibleParty><moles:name>dom lowe</moles:name><moles:uri>www.badc.rl.ac.uk</moles:uri><moles:role>Metadata maintainer</moles:role></moles:responsibleParty>' + \ 157 157 '</moles:responsibleParties><moles:datasetLanguage>English</moles:datasetLanguage>' + \ 158 158 '<moles:providerID>badc.nerc.ac.uk</moles:providerID>' + \ -
ndgCommon/trunk/ndg/common/xmldb/schema/molesAtom1.0.xsd
r4793 r4810 11 11 <!-- NB, to use this schema, need to set the schemaLocations to point to 12 12 a valid, accessible version of moles2.0.xsd and atom1.0.xsd --> 13 <xs:import namespace="http://www.w3.org/XML/1998/namespace" schemaLocation="http://www.w3.org/2001/03/xml.xsd" /> 14 <xs:import namespace="http://www.georss.org/georss/10" schemaLocation="http://georss.cn/xml/1.0/georss.xsd" /> 13 <!-- The xml schema is commented out since it doesn't appear to be needed when running through xmlrpc 14 - if you are running in an exist client, it is needed, tho' - NB, could not get things working with 15 this schema stored locally --> 16 <!-- xs:import namespace="http://www.w3.org/XML/1998/namespace" schemaLocation="xml.xsd" /--> 17 <xs:import namespace="http://www.georss.org/georss/10" schemaLocation="georss.xsd" /> 15 18 <xs:import namespace="http://ndg.nerc.ac.uk/schema/moles2beta" schemaLocation="moles2.0.xsd" /> 16 19 -
ndgCommon/trunk/setup.py
r4793 r4810 6 6 packages=find_packages(), 7 7 test_suite='nose.collector', 8 package_data = {'ndgUtils':['xquery/*.xq','xqueryLib/*/*.xquery']},9 8 install_requires=["ZSI", "feedparser"], # for Discovery Service Client 10 9 author = 'Calum Byrom',
Note: See TracChangeset
for help on using the changeset viewer.