source: ndgCommon/trunk/ndg/common/src/lib/granulite.py @ 4964

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/ndgCommon/trunk/ndg/common/src/lib/granulite.py@4991
Revision 4964, 30.0 KB checked in by cbyrom, 11 years ago (diff)

Adjust granulite to ensure that granule atoms are validated before
they are created - and add code to properly deal with validation errors.

Line 
1#!/usr/bin/env python
2'''
3Data model representing a granulite file - together with utitilities to
4augment atom data with
5
6 @author: C Byrom, Tessella Nov 08
7'''
8import os, sys, string, getopt, logging, re, datetime, cgi
9import ndg.common.src.clients.xmldb.eXist.dbconstants as dc
10from ndg.common.src.models import Atom
11import ndg.common.src.lib.utilities as utilities
12from ndg.common.src.lib.atomvalidator import AtomValidator
13from ndg.common.src.models.vocabtermdata import VocabTermData as VTD
14from ndg.common.src.clients.xmldb.eXist.atomclient import AtomClient
15
16class granulite(object):
17    '''
18    Granulite data model
19    '''
20    # expected sections in a granulite file
21    AUTHORS_SECTION = "authors"
22    FILE_SECTION = "files"
23    PARAMETERS_SECTION = "parameters"
24    COVERAGE_SECTION = "coverage"
25    CSML_SECTION = "csml_file"
26    CDML_SECTION = "cdml_file"
27    GRANULE_INFO_SECTION = "granule_info"
28    GRANULE_AUTHORS_SECTION = "granule_authors"
29    SUMMARY_SECTION = "summary"
30    DATA_ENTITY_SECTION = "data_entity_id"
31    LOGO_SECTION = "logo"
32    RELATED_LINKS_SECTION = "related_links"
33   
34    # need to distinguise coverage data between spatial and temporal data - use these variables to do so
35    TEMPORAL_DATA = "temporal_data"
36    SPATIAL_DATA = "spatial_data"
37
38    # group the data together as either single valued or array valued - to ease setting attributes on Atom
39    singleVals = {} 
40    arrayVals = {AUTHORS_SECTION:'authors', \
41                 FILE_SECTION:'files', GRANULE_AUTHORS_SECTION:'atomAuthors', \
42                 SUMMARY_SECTION:'summary'}
43       
44    # config file with eXist DB details
45    DBCONFIG_FILE = "exist.config"
46
47    # default title given to CSML files by csmlscan
48    DEFAULT_CSML_TITLE = "NAME OF DATASET GOES HERE"
49   
50    # flag to use when running in test mode
51    TEST_MODE = "granulite_test_mode"
52   
53    # eXist DB client
54    _eXist = None
55   
56    # flag to mark mode of operation
57    _isOverride = False
58
59    # info on specified CDML + CSML
60    _cdmlFileName = None
61    _cdmlTimeAxis = None
62    _datasetID = None
63    _csmlFileName = None
64
65    # info on the datasets to attach the granule to
66    _dataEntityIDs = []
67   
68    # standard output delimiter
69    LINE_SEPARATOR = "-----------------------------"
70       
71       
72    def __init__(self, granulite, granuleAtom = None, \
73                  eXistClient = None, csmlOrCdmlFile = None, \
74                  aggregateCoverage = False, useCSMLID = False, 
75                  timeAxis = 'time', datasetID = None, replaceAtom = False, 
76                  deleteMode = False):
77        '''
78        Constructor for granulite object - NB, the csml/cdml keywords allow use
79        in a webapp - outside of this, this data should be specified in the granulite
80        file
81       
82        @param granulite: either the granulite filename or contents
83        @keyword granuleAtom: an existing granule atom - to add granulite data to
84        - NB, if not specified, a new atom is used
85        @keyword eXistClient: an eXist client implementing the InterfaceXMLDBAtomClient interface
86        @keyword csmlOrCdmlFile: a cgi.FieldStorage object with CSML or CDML data
87        @keyword aggregateCoverage: if set to True, only coverage data that extends the
88        atom coverage data will be added
89        @keyword useCSMLID: if True, use the CSML doc ID as the dataset ID - NB,
90        this should only be True if creating a new atom - e.g. from a granulite
91        @keyword timeAxis: if using a CDML file, specify the time axis to use - 'time'
92        is the default
93        @keyword datasetID: if using a CDML file, specify the ID of the dataset to use
94        - otherwise one will be randomly generated
95        @keyword replaceAtom: if True, and if a granule atom is found with the ID of
96        the atom to be created by the granulite, automatically overwrite the older
97        atom
98        @keyword deleteMode: if True the granule atom and any references to it are
99        deleted from eXist - when processGranulite are ran
100        '''
101        logging.info("Creating granulite data model")
102       
103        self._granulite = granulite
104       
105        # set up connection to eXist
106        if eXistClient:
107            self._eXist = eXistClient
108        else:
109            self._eXist = AtomClient(configFile = self.DBCONFIG_FILE)
110       
111        # override CSML/CDML data specified in the granulite file with data input directly
112        self.ingestGranuliteFiles = True
113        # NB, empty FieldStorage fields end up as empty strings
114        if csmlOrCdmlFile is not None:
115            self.ingestGranuliteFiles = False
116            if csmlOrCdmlFile != '':
117                if not isinstance(csmlOrCdmlFile, cgi.FieldStorage):
118                    raise ValueError("Specified CSML/CDML file is not a cgi.FieldStorage object")
119                self.__addCSMLOrCDMLData(csmlOrCdmlFile.filename, csmlOrCdmlFile.value)
120       
121
122        if granuleAtom:
123            if not isinstance(granuleAtom, Atom.Atom):
124                raise ValueError("Specified granule atom is not an Atom object")
125            self._atom = granuleAtom
126        else:
127            # create a skeleton granule
128            self.__initialiseGranule()
129           
130        self._replaceAtom = replaceAtom
131        self._deleteMode = deleteMode
132        self.useCSMLID = useCSMLID
133        self._cdmlTimeAxis = timeAxis
134        self._datasetID = datasetID
135       
136        # AtomValidator to use for validation - only load when needed
137        self.validator = None
138           
139        logging.info("Granulite data model set up")
140
141       
142    def __getSectionName(self, str):
143        '''
144        Checks an input string to see if it contains a section title; if so
145        return this title name, otherwise return None
146        @param str: string to parse for section name
147        @return: section name, if found, None otherwise
148        '''
149        sectionName = None
150        if str.count('::') == 1:
151            sectionName = str.partition('::')[0]
152        return sectionName
153
154
155    def __getGranuliteDetails(self):
156        '''
157        Load the granulite config file and extract the required data 
158        '''
159        logging.info("Retrieving data from granulite config file")
160       
161        # assume we've already read in the file if it is multi-line
162        newLine = None
163        # cope with unix and dos end of lines
164        if self._granulite.find('\n') > -1:
165            newLine = '\n'
166        elif self._granulite.find('\r') > -1:
167            newLine = '\r'
168       
169        if newLine:
170            granuliteData = self._granulite.split(newLine)
171        else:
172            # Check this file exists
173            if not os.path.isfile(self._granulite):
174                raise ValueError("ERROR: Could not find the config file, %s; please specify " \
175                         "a valid file" %self._granulite)
176               
177            granulite_file = open(self._granulite, "r")
178            granuliteData = granulite_file.readlines()
179            granulite_file.close()
180       
181        # create a dictionary of all data in file - then use this to get the required data
182        granulite_data = {}
183       
184        # initialise vocab term data lookup object
185        self.VTD = VTD()
186
187        sectionName = "" # variable to indicate what section we're currently parsing in the granulite file data
188        logging.info("Parsing granulite file...")
189        for line in granuliteData:
190           
191            line = line.strip()
192            # avoid comments
193            if (line.startswith('#')):
194                continue
195           
196            # avoid empty lines - except in the summary section where these may be appropriate
197            if not line and sectionName != self.SUMMARY_SECTION:
198                continue
199           
200            section = self.__getSectionName(line)
201
202            # if a section name is returned, we're at a new section of the file - so change the dictionary key
203            if section:
204                sectionName = section
205                continue
206           
207            # cope with case where there is rubbish header info in the file
208            if not sectionName:
209                continue
210           
211            # initialise dictionary array if required
212            if sectionName not in granulite_data:
213                granulite_data[sectionName] = []
214           
215            if sectionName != self.COVERAGE_SECTION:
216                logging.debug("Adding %s data: -%s-" %(sectionName, line))
217                granulite_data[sectionName].append(line)
218            else:
219                # if it's coverage data, determine if it is spatial or temporal data
220                coverageType = self.SPATIAL_DATA
221                if line.lower().find('time') > -1:
222                    coverageType = self.TEMPORAL_DATA
223               
224                # initialise dictionary array if required
225                if coverageType not in granulite_data:
226                    granulite_data[coverageType] = []
227                logging.debug("Adding %s data: %s" %(coverageType, line))
228                granulite_data[coverageType].append(line)
229                   
230        logging.info("Finished parsing granulite file")
231        logging.info("- returning dict of ingested data")
232        return granulite_data
233       
234       
235    def __applyCoreGranuliteDetails(self, granulite_data):
236        '''
237        Apply the essential data read in from the granulite file to the atom data model
238        being processed by the granulite
239        @param granulite_data: the dict produced by __getGranuliteDetails
240        '''
241        # add the general granule info
242        if self.GRANULE_INFO_SECTION not in granulite_data:
243            raise ValueError("Need granule_info section in granulite input file")
244       
245        data = utilities.getTripleData(granulite_data[self.GRANULE_INFO_SECTION][0])
246        if not data[0]:
247            raise ValueError("Provider ID is missing for granule; please add this info to the " + \
248                             self.GRANULE_INFO_SECTION + " section of the granulite config file")
249        self._atom.addMolesEntityData(None, data[0], None)
250        self._atom.setDatasetID(data[1])
251        self._atom.title = data[2]
252
253        # NB, if running from the web, ignore the CSML/CDML files specified in the granulite
254        if self.ingestGranuliteFiles:
255            # check for CSML/CDML file input - these data are changed before adding to the granule
256            # - since the file will be stored and referenced in eXist by then
257            if not self._csmlFileName:
258                if self.CSML_SECTION in granulite_data:
259                    self.__addCSMLOrCDMLData(granulite_data[self.CSML_SECTION][0], None)
260   
261            if not self._cdmlFileName:
262                if self.CDML_SECTION in granulite_data:
263                    if self._csmlFileName:
264                        raise ValueError("Cannot specify both CDML and CSML file in granulite config file" + \
265                                         "\nNB, CSML file is generated from the specified CDML file")
266               
267                    data = utilities.getTripleData(granulite_data[self.CDML_SECTION][0])
268                    self.__addCSMLOrCDMLData(data[0], None)
269                    self._datasetID = data[1]
270                    self._cdmlTimeAxis = data[2]
271       
272       
273    def __applyGranuliteDetails(self, granulite_data):
274        '''
275        Apply the data read in from the granulite file to the atom data model
276        being processed by the granulite
277        @param granulite_data: the dict produced by __getGranuliteDetails
278        '''
279        logging.info("Applying granulite data to atom")
280        # add the single value attributes to the granule
281        for attribute in self.singleVals:
282            if attribute in granulite_data:
283                self._atom.setAttribute(self.singleVals[attribute], \
284                                        granulite_data[attribute][0])
285
286        # NB, explicitly set the related links before running the arrayVals loop, since
287        # this will replace any file Links created if it occurs after these have been
288        # created (was originally in arrayVals, but this doesn't enforce any specific ordering)
289        if self.RELATED_LINKS_SECTION in granulite_data:
290            self._atom.setAttribute('relatedLinks', \
291                                    granulite_data[self.RELATED_LINKS_SECTION])
292
293        # now add the arrays data
294        for attribute in self.arrayVals:
295            if attribute in granulite_data:
296                self._atom.setAttribute(self.arrayVals[attribute], \
297                                        granulite_data[attribute])
298       
299        if self.LOGO_SECTION in granulite_data:
300            for logo in granulite_data[self.LOGO_SECTION]:
301                self._atom.addRelatedLinks(logo + " | Logo | " + \
302                                           self.VTD.getTermCurrentVocabURL(VTD.LOGO_TERM))
303
304        # add the parameters data via the method - since this does some tidying up of the data
305        if self.PARAMETERS_SECTION in granulite_data:
306            self._atom.addParameters(granulite_data[self.PARAMETERS_SECTION])
307           
308        if self.DATA_ENTITY_SECTION in granulite_data:
309            self._dataEntityIDs = granulite_data[self.DATA_ENTITY_SECTION] 
310
311        # now add any coverage data
312        if self.SPATIAL_DATA in granulite_data:
313            self._extractSpatialData(granulite_data[self.SPATIAL_DATA][0])
314        if self.TEMPORAL_DATA in granulite_data:
315            self._extractTemporalData(granulite_data[self.TEMPORAL_DATA][0])
316
317        logging.info("All input data set up")
318       
319
320    def _extractSpatialData(self, geomString):
321        '''
322        Extract bounding box info from the specified geometry string
323        @param geomString: A string holding geometry info
324        NB, currently the method supports parsing of POLYGONs
325        '''
326        if geomString.upper().find('POLYGON') > -1:
327            logging.debug("Spatial data specified in POLYGON format - extracting data from this")
328            vals = re.findall('([\-\d\.]+)', geomString)
329            # assume we're dealing with a rectangle normal to the equator...
330            if len(vals) == 10:
331                self._atom.minX = vals[0]
332                self._atom.minY = vals[1]
333                self._atom.maxX = vals[4]
334                self._atom.maxY = vals[5]
335        else:
336            errorMessage = "Spatial coverage data not stored in POLYGON format - please correct and rerun"
337            logging.error(errorMessage)
338            raise ValueError(errorMessage)
339
340
341    def _extractTemporalData(self, dateRangeString):
342        '''
343        Extract temporal info from the specified daterange string
344        @param dateRangeString: A string holding temporal info
345        NB, currently the method supports parsing of TIMERANGE objects
346        '''
347        if dateRangeString.upper().find('TIMERANGE') == -1:
348            errorMessage = "Temporal data not stored in TIMERANGE() format - please correct and rerun"
349            logging.error(errorMessage)
350            raise ValueError(errorMessage)
351       
352        vals = re.findall('([0-9][0-9\-:TZ ]+)', dateRangeString)
353        if vals:
354            logging.debug("Adding start time: %s" %vals[0])
355            self._atom.t1 = vals[0]
356            if len(vals) > 1:
357                logging.debug("Adding finish time: %s" %vals[1])
358                self._atom.t2 = vals[1]
359     
360   
361    def __addGranuleToDataEntityRecords(self):
362        '''
363        Augment any data entity records, specified in the granulite, with a reference to
364        the granule
365        '''
366        logging.info("Adding granule info to data entities")
367        if not self._dataEntityIDs:
368            logging.info("No data entity IDs were specified in granulite - so will not add granule data to eXist DB")
369            return
370       
371        # now set up the granule links to the data entities specified
372        for entityID in self._dataEntityIDs:
373            data = utilities.getTripleData(entityID)
374            dataEntityID = data[0]
375            dataEntityProviderID = data[1]
376           
377            self.__updateDataEntity(dataEntityID, dataEntityProviderID)
378
379        logging.info("Granule data added to data entities")
380     
381   
382    def __removeGranuleFromDataEntityRecords(self):
383        '''
384        Remove references to the granule in any data entity records
385        '''
386        logging.info("Removing granule info from data entities")
387        logging.info("Retrieving data entities with references to granule")
388        self._atom.lookupAssociatedData(VTD.DE_TERM, self._eXist,
389                                        lookupIndirectReferences = True)
390       
391        # now set up the granule links to the data entities specified
392        for de in self._atom.dataEntities:
393            self.__updateDataEntity(de.rel, "", removeLink = True)
394        logging.info("Granule data removed from data entities")
395
396
397    def __updateDataEntity(self, dataEntityID, providerID, removeLink = False):
398        '''
399        Retrieve the specified data entity and add a link to the current
400        data granule, if required, then update the atom in eXist
401        @param dataEntityID: ID of the data entity to augment with granule link
402        @param providerID: provider ID of the data entity to augment with granule link
403        @keyword removeLink: If True, remove the link to the current data granule
404        - otherwise add it
405        '''
406        logging.debug("Retrieving data entity atom - to update associated granule info")
407        doc = self._eXist.buildAndRunQuery('atom', 
408                                           dc.ATOM_COLLECTION_PATH, 
409                                           providerID, 
410                                           dataEntityID)
411       
412        if not doc:
413            errorMessage = "Could not find data entity, %s - please retry with valid data" %dataEntityID
414            logging.error(errorMessage)
415            raise ValueError(errorMessage)
416        logging.debug("DE retrieved")
417       
418        de = Atom.Atom(xmlString=str(doc[0]))
419        noLinks = len(de.relatedLinks)
420       
421        linkData = "%s | %s | %s " %(self._atom.atomBrowseURL,
422                                     self._atom.title,
423                                     self._atom.VTD.getTermCurrentVocabURL(VTD.GRANULE_TERM))
424        if removeLink:
425            logging.debug("- now removing link to granule")
426            link = Atom.Link()
427            link.fromString(linkData)
428            de.removeRelatedLinks(link)
429            logging.debug("Link removed")
430        else:
431            logging.debug("- now adding link to granule")
432            de.addRelatedLinks(linkData)
433            logging.debug("Link added")
434           
435        # only save if need be
436        if len(de.relatedLinks) == noLinks:
437            logging.info("- data entity unchanged - skipping")
438            return
439       
440        logging.debug("Now, save the updated DE atom back to eXist")
441        self._eXist.createAtom(de)
442        logging.debug("DE atom updated")
443
444
445    def __initialiseGranule(self):
446        '''
447        Create an Atom object ready to populate with data
448        '''
449        self._atom = Atom.Atom(VTD.GRANULE_TERM)
450
451
452    def __processCSMLFile(self):
453        logging.info("Processing CSML file")
454        # only keep file name, if full path specified
455        fileName = self._csmlFileName
456        fileName = fileName.split('/')[-1]
457        fileName = fileName.split('\\')[-1]
458       
459        self._atom.addCSMLData(self._csmlFileName, \
460                               self._csmlContent, useCSMLID = self.useCSMLID)
461        logging.info("Adding CSML file to eXist")
462        self._eXist.createOrUpdateDoc(self._csmlContent,
463                                      dc.NDG_A_COLLECTION_PATH + \
464                                      self._atom.ME.providerID + '/',
465                                      fileName)
466        logging.info("CSML file added to eXist")
467        logging.info("Finished processing CSML file")
468           
469
470    def __processCDMLFile(self):
471        logging.info("Processing CDML file")
472        # only keep file name, if full path specified
473        fileName = self._cdmlFileName
474        fileName = fileName.split('/')[-1]
475        fileName = fileName.split('\\')[-1]
476       
477        # firstly, save the doc to eXist
478        # remove DOCTYPE tags - as these will prevent eXist from ingesting the doc
479        self._cdmlContent = re.sub(r'<!DOCTYPE.*>', '', self._cdmlContent)
480        logging.info("CDML file loaded")
481
482        logging.info("Adding CDML file to eXist")
483        self._eXist.createOrUpdateDoc(self._cdmlContent,
484                                      dc.NDG_A_COLLECTION_PATH + \
485                                      self._atom.ME.providerID + '/',
486                                      fileName)
487        self._atom.cdmlFile = dc.NDG_A_COLLECTION_PATH + fileName
488       
489        # create a local copy of the CDML file - NB, this is required if running
490        # from web app
491        fn = os.path.basename(str(datetime.datetime.today().microsecond) + fileName)
492        cdmlFile = open(fn, 'wb')
493        cdmlFile.write(self._cdmlContent)
494        cdmlFile.close()
495        message = 'The file "' + fn + '" was uploaded successfully'
496       
497        logging.info("Create CSML file from the CDML file - NB, this will be stored in eXist too " + \
498                     "and will be used to extract dataset information from")
499        csmlFileName = utilities.createCSMLFile(fn, self._cdmlTimeAxis, datasetID = self._datasetID)
500        os.remove(fn)
501       
502        logging.info("CSML file successfully created - now processing this")
503        self.__addCSMLOrCDMLData(csmlFileName, None)
504       
505        # NB, can remove the CSML file now since the contents are read into memory
506        os.remove(csmlFileName)
507        logging.info("Finished processing CDML file")
508
509
510    def __addCSMLOrCDMLData(self, fileName, fileContent):
511        '''
512        Given an unknown file type, determine whether it is CSML or CDML; if it
513        is either, ingest the data appropriately; if not, just skip. NB, CDML docs
514        are converted into CSML ones to allow the data ingest
515        @param fileName: name of the file to ingest data from
516        @param fileContent: content of the file - NB, if this is set to None and the
517        file, fileName, is available locally, CsmlParser.Dataset will read in the file
518        directly
519        '''
520        logging.info("Determining file type to add data from")
521        if not fileContent:
522            logging.info("- NB, file contents not provided - attempt to load")
523            try:
524                f = open(fileName, 'r')
525                fileContent = f.read()
526                f.close
527            except IOError, e:
528                logging.error(e.message)
529               
530            if not fileContent:
531                raise ValueError("Could not load data from file, '%s'" %fileName)
532
533        # if we're dealing with a CDML file, process this to produce a CSML file
534        if utilities.isCSMLFile(fileContent):
535            self._csmlFileName = fileName
536            self._csmlContent = fileContent
537        elif utilities.isCDMLFile(fileContent):
538            self._cdmlFileName = fileName
539            self._cdmlContent = fileContent
540        else:
541            raise ValueError("Unrecognised file type, '%s'" %fileName)
542       
543        logging.info("Finished determining file type")
544       
545
546
547    def processCSMLOrCDMLFile(self):
548        '''
549        If a CSML or CDML file has been specified, process it - i.e.:
550        - extract required data
551        - add to eXist
552        @return atom: Atom object of created data granule with CSML/CDML data
553        added
554        '''
555        logging.info("Processing CSML/CDML data")
556        # NB, if a CDML file is specified, a CSML file will be created from it and this will be used to
557        # extract the required dataset info
558        if self._cdmlFileName:
559            self.__processCDMLFile()
560           
561        if self._csmlFileName:
562            self.__processCSMLFile()
563        logging.info("Finished processing CSML/CDML data")
564        return self._atom
565
566       
567    def processGranulite(self, replaceAtom = False):
568        '''
569        Complete the required granulite actions
570        - ingest granulite data + add to granule atom
571        - ingest CSML or CDML data + add to granule atom
572        - save CSML/CDML data to eXist
573        - add granule atom to eXist
574        - add references to granule atom to specified data entity atoms
575        @keyword replaceAtom: if True, allow granule atoms to replace existing
576        atoms with the same ID - if False, throw a DuplicationError
577        @raise DumplicationError: if allowBackups = False and an atom is found
578        with the same ID as that to be created 
579        @return atom: Atom object of created data granule
580        '''
581        logging.info("Processing granulite data")
582        # load in the granulite details
583        inputs = self.__getGranuliteDetails()
584       
585        # apply the basic, essential data
586        self.__applyCoreGranuliteDetails(inputs)
587       
588        # check for CSML or CDML file and load details
589        self.processCSMLOrCDMLFile()
590
591        # apply any granulite data; NB, this overrides/augments the
592        # CSML/CDML data by default
593        self.__applyGranuliteDetails(inputs)
594       
595        if self._deleteMode:
596            logging.info("In delete mode - deleting granule atom and any references")
597            self.deleteGranuleAndDEReferences()
598           
599        else:
600            # validate the newly created atom - before importing it
601            self.__validateGranuleAtom()
602           
603            # add the granule to eXist - if this exists already a DuplicationError
604            # will be thrown if backups are not allowed
605            doReplace = replaceAtom or self._replaceAtom
606            logging.info("Creating granule atom, '%s', in eXist DB" %self._atom.atomName)
607            self._eXist.createAtom(self._atom, replaceAtom = doReplace)
608           
609            # if the atom already exists, keep track of the backed up file - incase it
610            # needs to be restored
611            self.backupName = self._eXist.backupName
612       
613            # now add the granule data to the data entity in eXist
614            # NB, if problems occur here, rollback changes
615            try:
616                # if the atom existed before, clear out the links to it before we start
617                if self.backupName:
618                    self.__removeGranuleFromDataEntityRecords()
619                   
620                self.__addGranuleToDataEntityRecords()
621            except Exception, e:
622                errorMessage = "Exception thrown whilst updating data entities - detail: " + \
623                    str(e.message) + \
624                    "\n - will now roll back changes to granule atom to leave system in original state"
625                logging.error(errorMessage)
626                self.deleteGranuleAndDEReferences()
627
628                # if the atom existed before, restore it and recreate the old DE links
629                if self.backupName:
630                    self._eXist.restoreBackup(self._eXist.backupName)
631                    self.__restoreGranuleToDataEntityRecords()
632                raise SystemError(errorMessage)
633               
634        logging.info("granulite processing complete")
635        return self._atom
636
637
638    def __validateGranuleAtom(self):
639        '''
640        Validate the granule atom created, using the standard AtomValidator
641        @raise atomvlaidator.ValidationError if any errors are found
642        '''
643        if not self.validator:
644            self.validator = AtomValidator(None, 
645                                           atomClient = self._eXist,
646                                           newLineChar="<br/>")
647           
648        self.validator.setAtom(self._atom)
649        self.validator.validateAtom()
650       
651
652    def deleteGranuleAndDEReferences(self):
653        '''
654        Delete the granule atom and any references in data entities to it
655        '''
656        self.__deleteGranule()
657        self.__removeGranuleFromDataEntityRecords()
658
659
660    def __deleteGranule(self):
661        '''
662        Delete granule data - remove granule atom from eXist and all references to
663        granule in DEs
664        '''
665        logging.info("Deleting granule...")
666        logging.info("Remove granule atom from eXist")
667        self._eXist.deleteDoc(self._atom.getFullPath())
668        logging.info("Granule deleted")
669
670
671    def __restoreGranuleToDataEntityRecords(self):
672        '''
673        If a granulite ingest has failed whilst replacing an existing granule,
674        restore the original DE links to this granule
675        '''
676        logging.info("Restoring original links with data entities")
677        # clear out any DEs from the current granulite input
678        self._dataEntityIDs = []
679       
680        # now add back in the original DE links
681        for de in self._atom.dataEntities:
682            deLink = "%s | %s | %s " %(de.href, de.title, de.rel)
683            self._dataEntityIDs.append(deLink)
684       
685        self.__addGranuleToDataEntityRecords()
686        logging.info("Finished restoring links")
687       
688
689if __name__=="__main__":
690    opts, args = getopt.getopt(sys.argv[1:], '-rxvd')
691    if len(args) < 1:
692        print "Error: Please specify a granulite data file to process"
693        sys.exit(2)
694       
695    loggingLevel = logging.WARNING
696    isReplace = False
697    isDelete = False
698    for o, a in opts:
699        if o == "-v":
700            print " - Verbose mode ON"
701            loggingLevel = logging.INFO
702        elif o == "-d":
703            print " - Debug mode ON"
704            loggingLevel = logging.DEBUG
705        elif o == "-r":
706            print " - Replace mode ON"
707            isReplace = True
708        elif o == "-x":
709            print " - Delete mode ON"
710            isDelete = True
711   
712    logging.basicConfig(level = loggingLevel,
713                        format='%(asctime)s %(filename)s:%(lineno)d %(levelname)s %(message)s')
714    g = granulite(args[0], replaceAtom = isReplace, deleteMode = isDelete)
715   
716    try:
717        g.processGranulite()
718    except edc.DuplicateError, e:
719        # allow the user to specify if they want to overwrite the duplicated atom
720        print e.message
721        input = raw_input("- do you want to overwrite this with the granulite data? (y/n) ")
722        if input.strip().upper() == 'Y':
723            print "OK - replacing old granule atom..."
724            g.processGranulite(replaceAtom = True)
725        else:
726            print "Exiting...."
Note: See TracBrowser for help on using the repository browser.