source: exist/trunk/python/ndgUtils/lib/granulite.py @ 4696

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/exist/trunk/python/ndgUtils/lib/granulite.py@4696
Revision 4696, 28.5 KB checked in by cbyrom, 11 years ago (diff)

Adjust Atom and MolesEntity? data models to properly use namespaces when
dealing with xpath queries - rather than having these stripped out. This
avoids problems when namespaces are given arbitrary names and is a more
exact, hence robust, approach.
Create new test class to put the xmlhandler2 tests separately in.
Add delete function to granulite - to allow data granules, and their
connections to data entities, to be removed + add 'roll back' functionality
to cope with scenarios when granulite replace/delete fails to complete
properly. Add new methods to the existdbclient to allow the restore/delete/backup
functionality.
Extend test suite to exercise new functionality.

Line 
1#!/usr/bin/env python
2'''
3Data model representing a granulite file - together with utitilities to
4augment atom data with
5
6 @author: C Byrom, Tessella Nov 08
7'''
8import os, sys, string, getopt, logging, re, datetime, cgi
9from ndgUtils.eXistConnector import eXistConnector
10from ndgUtils.models import Atom
11import ndgUtils.lib.utilities as utilities
12from ndgUtils.models.vocabtermdata import VocabTermData as VTD
13import ndgUtils.lib.existdbclient as edc
14
15class granulite(object):
16    '''
17    Granulite data model
18    '''
19    # expected sections in a granulite file
20    AUTHORS_SECTION = "authors"
21    FILE_SECTION = "files"
22    PARAMETERS_SECTION = "parameters"
23    COVERAGE_SECTION = "coverage"
24    CSML_SECTION = "csml_file"
25    CDML_SECTION = "cdml_file"
26    GRANULE_INFO_SECTION = "granule_info"
27    GRANULE_AUTHORS_SECTION = "granule_authors"
28    SUMMARY_SECTION = "summary"
29    DATA_ENTITY_SECTION = "data_entity_id"
30    LOGO_SECTION = "logo"
31    RELATED_LINKS_SECTION = "related_links"
32   
33    # need to distinguise coverage data between spatial and temporal data - use these variables to do so
34    TEMPORAL_DATA = "temporal_data"
35    SPATIAL_DATA = "spatial_data"
36
37    # group the data together as either single valued or array valued - to ease setting attributes on Atom
38    singleVals = {} 
39    arrayVals = {AUTHORS_SECTION:'authors', \
40                 FILE_SECTION:'files', GRANULE_AUTHORS_SECTION:'atomAuthors', \
41                 SUMMARY_SECTION:'summary'}
42       
43    # config file with eXist DB details
44    DBCONFIG_FILE = "exist.config"
45
46    # default title given to CSML files by csmlscan
47    DEFAULT_CSML_TITLE = "NAME OF DATASET GOES HERE"
48   
49    # flag to use when running in test mode
50    TEST_MODE = "granulite_test_mode"
51   
52    # eXist DB client
53    _eXist = None
54   
55    # flag to mark mode of operation
56    _isOverride = False
57
58    # info on specified CDML + CSML
59    _cdmlFileName = None
60    _cdmlTimeAxis = None
61    _datasetID = None
62    _csmlFileName = None
63
64    # info on the datasets to attach the granule to
65    _dataEntityIDs = []
66   
67    # standard output delimiter
68    LINE_SEPARATOR = "-----------------------------"
69       
70       
71    def __init__(self, granulite, granuleAtom = None, \
72                  eXistClient = None, csmlOrCdmlFile = None, \
73                  aggregateCoverage = False, useCSMLID = False, 
74                  timeAxis = 'time', datasetID = None, replaceAtom = False, 
75                  deleteMode = False):
76        '''
77        Constructor for granulite object - NB, the csml/cdml keywords allow use
78        in a webapp - outside of this, this data should be specified in the granulite
79        file
80       
81        @param granulite: either the granulite filename or contents
82        @keyword granuleAtom: an existing granule atom - to add granulite data to
83        - NB, if not specified, a new atom is used
84        @keyword eXistClient: an eXist connection
85        @keyword csmlOrCdmlFile: a cgi.FieldStorage object with CSML or CDML data
86        @keyword aggregateCoverage: if set to True, only coverage data that extends the
87        atom coverage data will be added
88        @keyword useCSMLID: if True, use the CSML doc ID as the dataset ID - NB,
89        this should only be True if creating a new atom - e.g. from a granulite
90        @keyword timeAxis: if using a CDML file, specify the time axis to use - 'time'
91        is the default
92        @keyword datasetID: if using a CDML file, specify the ID of the dataset to use
93        - otherwise one will be randomly generated
94        @keyword replaceAtom: if True, and if a granule atom is found with the ID of
95        the atom to be created by the granulite, automatically overwrite the older
96        atom
97        @keyword deleteMode: if True the granule atom and any references to it are
98        deleted from eXist - when processGranulite are ran
99        '''
100        logging.info("Creating granulite data model")
101       
102        self._granulite = granulite
103       
104        # set up connection to eXist
105        if eXistClient:
106            self._eXist = eXistClient
107        else:
108            self._eXist = edc.eXistDBClient(configFile = self.DBCONFIG_FILE)
109       
110        # override CSML/CDML data specified in the granulite file with data input directly
111        self.ingestGranuliteFiles = True
112        # NB, empty FieldStorage fields end up as empty strings
113        if csmlOrCdmlFile is not None:
114            self.ingestGranuliteFiles = False
115            if csmlOrCdmlFile != '':
116                if not isinstance(csmlOrCdmlFile, cgi.FieldStorage):
117                    raise ValueError("Specified CSML/CDML file is not a cgi.FieldStorage object")
118                self.__addCSMLOrCDMLData(csmlOrCdmlFile.filename, csmlOrCdmlFile.value)
119       
120
121        if granuleAtom:
122            if not isinstance(granuleAtom, Atom.Atom):
123                raise ValueError("Specified granule atom is not an Atom object")
124            self._atom = granuleAtom
125        else:
126            # create a skeleton granule
127            self.__initialiseGranule()
128           
129        self._replaceAtom = replaceAtom
130        self._deleteMode = deleteMode
131        self.useCSMLID = useCSMLID
132        self._cdmlTimeAxis = timeAxis
133        self._datasetID = datasetID
134           
135        logging.info("Granulite data model set up")
136
137       
138    def __getSectionName(self, str):
139        '''
140        Checks an input string to see if it contains a section title; if so
141        return this title name, otherwise return None
142        @param str: string to parse for section name
143        @return: section name, if found, None otherwise
144        '''
145        sectionName = None
146        if str.count('::') == 1:
147            sectionName = str.partition('::')[0]
148        return sectionName
149
150
151    def __getGranuliteDetails(self):
152        '''
153        Load the granulite config file and extract the required data 
154        '''
155        logging.info("Retrieving data from granulite config file")
156       
157        # assume we've already read in the file if it is multi-line
158        newLine = None
159        # cope with unix and dos end of lines
160        if self._granulite.find('\n') > -1:
161            newLine = '\n'
162        elif self._granulite.find('\r') > -1:
163            newLine = '\r'
164       
165        if newLine:
166            granuliteData = self._granulite.split(newLine)
167        else:
168            # Check this file exists
169            if not os.path.isfile(self._granulite):
170                raise ValueError("ERROR: Could not find the config file, %s; please specify " \
171                         "a valid file" %self._granulite)
172               
173            granulite_file = open(self._granulite, "r")
174            granuliteData = granulite_file.readlines()
175            granulite_file.close()
176       
177        # create a dictionary of all data in file - then use this to get the required data
178        granulite_data = {}
179       
180        # initialise vocab term data lookup object
181        self.VTD = VTD()
182
183        sectionName = "" # variable to indicate what section we're currently parsing in the granulite file data
184        logging.info("Parsing granulite file...")
185        for line in granuliteData:
186           
187            line = line.strip()
188            # avoid comments
189            if (line.startswith('#')):
190                continue
191           
192            # avoid empty lines - except in the summary section where these may be appropriate
193            if not line and sectionName != self.SUMMARY_SECTION:
194                continue
195           
196            section = self.__getSectionName(line)
197
198            # if a section name is returned, we're at a new section of the file - so change the dictionary key
199            if section:
200                sectionName = section
201                continue
202           
203            # cope with case where there is rubbish header info in the file
204            if not sectionName:
205                continue
206           
207            # initialise dictionary array if required
208            if sectionName not in granulite_data:
209                granulite_data[sectionName] = []
210           
211            if sectionName != self.COVERAGE_SECTION:
212                logging.debug("Adding %s data: -%s-" %(sectionName, line))
213                granulite_data[sectionName].append(line)
214            else:
215                # if it's coverage data, determine if it is spatial or temporal data
216                coverageType = self.SPATIAL_DATA
217                if line.lower().find('time') > -1:
218                    coverageType = self.TEMPORAL_DATA
219               
220                # initialise dictionary array if required
221                if coverageType not in granulite_data:
222                    granulite_data[coverageType] = []
223                logging.debug("Adding %s data: %s" %(coverageType, line))
224                granulite_data[coverageType].append(line)
225                   
226        logging.info("Finished parsing granulite file")
227        logging.info("- returning dict of ingested data")
228        return granulite_data
229       
230       
231    def __applyCoreGranuliteDetails(self, granulite_data):
232        '''
233        Apply the essential data read in from the granulite file to the atom data model
234        being processed by the granulite
235        @param granulite_data: the dict produced by __getGranuliteDetails
236        '''
237        # add the general granule info
238        if self.GRANULE_INFO_SECTION not in granulite_data:
239            raise ValueError("Need granule_info section in granulite input file")
240       
241        data = utilities.getTripleData(granulite_data[self.GRANULE_INFO_SECTION][0])
242        if not data[0]:
243            raise ValueError("Provider ID is missing for granule; please add this info to the " + \
244                             self.GRANULE_INFO_SECTION + " section of the granulite config file")
245        self._atom.addMolesEntityData(None, data[0], None)
246        self._atom.setDatasetID(data[1])
247        self._atom.title = data[2]
248
249        # NB, if running from the web, ignore the CSML/CDML files specified in the granulite
250        if self.ingestGranuliteFiles:
251            # check for CSML/CDML file input - these data are changed before adding to the granule
252            # - since the file will be stored and referenced in eXist by then
253            if not self._csmlFileName:
254                if self.CSML_SECTION in granulite_data:
255                    self.__addCSMLOrCDMLData(granulite_data[self.CSML_SECTION][0], None)
256   
257            if not self._cdmlFileName:
258                if self.CDML_SECTION in granulite_data:
259                    if self._csmlFileName:
260                        raise ValueError("Cannot specify both CDML and CSML file in granulite config file" + \
261                                         "\nNB, CSML file is generated from the specified CDML file")
262               
263                    data = utilities.getTripleData(granulite_data[self.CDML_SECTION][0])
264                    self.__addCSMLOrCDMLData(data[0], None)
265                    self._datasetID = data[1]
266                    self._cdmlTimeAxis = data[2]
267       
268       
269    def __applyGranuliteDetails(self, granulite_data):
270        '''
271        Apply the data read in from the granulite file to the atom data model
272        being processed by the granulite
273        @param granulite_data: the dict produced by __getGranuliteDetails
274        '''
275        logging.info("Applying granulite data to atom")
276        # add the single value attributes to the granule
277        for attribute in self.singleVals:
278            if attribute in granulite_data:
279                self._atom.setAttribute(self.singleVals[attribute], \
280                                        granulite_data[attribute][0])
281
282        # NB, explicitly set the related links before running the arrayVals loop, since
283        # this will replace any file Links created if it occurs after these have been
284        # created (was originally in arrayVals, but this doesn't enforce any specific ordering)
285        if self.RELATED_LINKS_SECTION in granulite_data:
286            self._atom.setAttribute('relatedLinks', \
287                                    granulite_data[self.RELATED_LINKS_SECTION])
288
289        # now add the arrays data
290        for attribute in self.arrayVals:
291            if attribute in granulite_data:
292                self._atom.setAttribute(self.arrayVals[attribute], \
293                                        granulite_data[attribute])
294       
295        if self.LOGO_SECTION in granulite_data:
296            for logo in granulite_data[self.LOGO_SECTION]:
297                self._atom.addRelatedLinks(logo + " | Logo | " + \
298                                           self.VTD.getTermCurrentVocabURL(VTD.LOGO_TERM))
299
300        # add the parameters data via the method - since this does some tidying up of the data
301        if self.PARAMETERS_SECTION in granulite_data:
302            self._atom.addParameters(granulite_data[self.PARAMETERS_SECTION])
303           
304        if self.DATA_ENTITY_SECTION in granulite_data:
305            self._dataEntityIDs = granulite_data[self.DATA_ENTITY_SECTION] 
306
307        # now add any coverage data
308        if self.SPATIAL_DATA in granulite_data:
309            self._extractSpatialData(granulite_data[self.SPATIAL_DATA][0])
310        if self.TEMPORAL_DATA in granulite_data:
311            self._extractTemporalData(granulite_data[self.TEMPORAL_DATA][0])
312
313        logging.info("All input data set up")
314       
315
316    def _extractSpatialData(self, geomString):
317        '''
318        Extract bounding box info from the specified geometry string
319        @param geomString: A string holding geometry info
320        NB, currently the method supports parsing of POLYGONs
321        '''
322        if geomString.upper().find('POLYGON') > -1:
323            logging.debug("Spatial data specified in POLYGON format - extracting data from this")
324            vals = re.findall('([\-\d\.]+)', geomString)
325            # assume we're dealing with a rectangle normal to the equator...
326            if len(vals) == 10:
327                self._atom.minX = vals[0]
328                self._atom.minY = vals[1]
329                self._atom.maxX = vals[4]
330                self._atom.maxY = vals[5]
331        else:
332            errorMessage = "Spatial coverage data not stored in POLYGON format - please correct and rerun"
333            logging.error(errorMessage)
334            raise ValueError(errorMessage)
335
336
337    def _extractTemporalData(self, dateRangeString):
338        '''
339        Extract temporal info from the specified daterange string
340        @param dateRangeString: A string holding temporal info
341        NB, currently the method supports parsing of TIMERANGE objects
342        '''
343        if dateRangeString.upper().find('TIMERANGE') == -1:
344            errorMessage = "Temporal data not stored in TIMERANGE() format - please correct and rerun"
345            logging.error(errorMessage)
346            raise ValueError(errorMessage)
347       
348        vals = re.findall('([0-9][0-9\-:TZ ]+)', dateRangeString)
349        if vals:
350            logging.debug("Adding start time: %s" %vals[0])
351            self._atom.t1 = vals[0]
352            if len(vals) > 1:
353                logging.debug("Adding finish time: %s" %vals[1])
354                self._atom.t2 = vals[1]
355     
356   
357    def __addGranuleToDataEntityRecords(self):
358        '''
359        Augment any data entity records, specified in the granulite, with a reference to
360        the granule
361        '''
362        logging.info("Adding granule info to data entities")
363        if not self._dataEntityIDs:
364            logging.info("No data entity IDs were specified in granulite - so will not add granule data to cedarmoles DB")
365            return
366       
367        # now set up the granule links to the data entities specified
368        for entityID in self._dataEntityIDs:
369            data = utilities.getTripleData(entityID)
370            dataEntityID = data[0]
371            dataEntityProviderID = data[1]
372           
373            self.__updateDataEntity(dataEntityID)
374
375        logging.info("Granule data added to data entities")
376     
377   
378    def __removeGranuleFromDataEntityRecords(self):
379        '''
380        Remove references to the granule in any data entity records
381        '''
382        logging.info("Removing granule info from data entities")
383
384        logging.info("Retrieving data entities with references to granule")
385        self._atom.lookupAssociatedData(VTD.DE_TERM, self._eXist.xmldb, \
386                                    lookupIndirectReferences = True)
387       
388        # now set up the granule links to the data entities specified
389        for de in self._atom.dataEntities:
390            self.__updateDataEntity(de.rel, removeLink = True)
391        logging.info("Granule data removed from data entities")
392
393
394    def __updateDataEntity(self, dataEntityID, removeLink = False):
395        '''
396        Retrieve the specified data entity and add a link to the current
397        data granule, if required, then update the atom in eXist
398        @param dataEntityID: ID of the data entity to augment with granule link
399        @keyword removeLink: If True, remove the link to the current data granule
400        - otherwise add it
401        '''
402        logging.debug("Retrieving data entity atom - to update associated granule info")
403        doc = self._eXist.getAtom('dataent_' + dataEntityID)
404        logging.debug("DE retrieved")
405        de = Atom.Atom(xmlString=str(doc))
406        noLinks = len(de.relatedLinks)
407       
408        linkData = "%s | %s | %s " %(self._atom.atomBrowseURL,
409                                     self._atom.title,
410                                     self._atom.VTD.getTermCurrentVocabURL(VTD.GRANULE_TERM))
411        if removeLink:
412            logging.debug("- now removing link to granule")
413            link = Atom.Link()
414            link.fromString(linkData)
415            de.removeRelatedLinks(link)
416            logging.debug("Link removed")
417        else:
418            logging.debug("- now adding link to granule")
419            de.addRelatedLinks(linkData)
420            logging.debug("Link added")
421           
422        # only save if need be
423        if len(de.relatedLinks) == noLinks:
424            logging.info("- data entity unchanged - skipping")
425            return
426       
427        logging.debug("Now, save the updated DE atom back to eXist")
428        self._eXist.createAtomInExist(de)
429        logging.debug("DE atom updated")
430
431
432    def __initialiseGranule(self):
433        '''
434        Create an Atom object ready to populate with data
435        '''
436        self._atom = Atom.Atom(VTD.GRANULE_TERM)
437
438
439    def __processCSMLFile(self):
440        logging.info("Processing CSML file")
441        # only keep file name, if full path specified
442        fileName = self._csmlFileName
443        fileName = fileName.split('/')[-1]
444        fileName = fileName.split('\\')[-1]
445       
446        self._atom.addCSMLData(self._csmlFileName, \
447                               self._csmlContent, useCSMLID = self.useCSMLID)
448        logging.info("Adding CSML file to eXist")
449        self._eXist.createOrUpdateEXistFile(self._csmlContent, \
450                                        eXistConnector.NDG_A_COLLECTION_PATH + \
451                                        self._atom.ME.providerID + '/', \
452                                        fileName)
453        logging.info("CSML file added to eXist")
454        logging.info("Finished processing CSML file")
455           
456
457    def __processCDMLFile(self):
458        logging.info("Processing CDML file")
459        # only keep file name, if full path specified
460        fileName = self._cdmlFileName
461        fileName = fileName.split('/')[-1]
462        fileName = fileName.split('\\')[-1]
463       
464        # firstly, save the doc to eXist
465        # remove DOCTYPE tags - as these will prevent eXist from ingesting the doc
466        self._cdmlContent = re.sub(r'<!DOCTYPE.*>', '', self._cdmlContent)
467        logging.info("CDML file loaded")
468
469        logging.info("Adding CDML file to eXist")
470        self._eXist.createOrUpdateEXistFile(self._cdmlContent, \
471                                            eXistConnector.NDG_A_COLLECTION_PATH + \
472                                            self._atom.ME.providerID + '/',\
473                                            fileName)
474        self._atom.cdmlFile = eXistConnector.NDG_A_COLLECTION_PATH + fileName
475       
476        # create a local copy of the CDML file - NB, this is required if running
477        # from web app
478        fn = os.path.basename(str(datetime.datetime.today().microsecond) + fileName)
479        cdmlFile = open(fn, 'wb')
480        cdmlFile.write(self._cdmlContent)
481        cdmlFile.close()
482        message = 'The file "' + fn + '" was uploaded successfully'
483       
484        logging.info("Create CSML file from the CDML file - NB, this will be stored in eXist too " + \
485                     "and will be used to extract dataset information from")
486        csmlFileName = utilities.createCSMLFile(fn, self._datasetID, self._cdmlTimeAxis)
487        os.remove(fn)
488       
489        logging.info("CSML file successfully created - now processing this")
490        self.__addCSMLOrCDMLData(csmlFileName, None)
491       
492        # NB, can remove the CSML file now since the contents are read into memory
493        os.remove(csmlFileName)
494        logging.info("Finished processing CDML file")
495
496
497    def __addCSMLOrCDMLData(self, fileName, fileContent):
498        '''
499        Given an unknown file type, determine whether it is CSML or CDML; if it
500        is either, ingest the data appropriately; if not, just skip. NB, CDML docs
501        are converted into CSML ones to allow the data ingest
502        @param fileName: name of the file to ingest data from
503        @param fileContent: content of the file - NB, if this is set to None and the
504        file, fileName, is available locally, CsmlParser.Dataset will read in the file
505        directly
506        '''
507        logging.info("Determining file type to add data from")
508        if not fileContent:
509            logging.info("- NB, file contents not provided - attempt to load")
510            try:
511                f = open(fileName, 'r')
512                fileContent = f.read()
513                f.close
514            except IOError, e:
515                logging.error(e.message)
516               
517            if not fileContent:
518                raise ValueError("Could not load data from file, '%s'" %fileName)
519
520        # if we're dealing with a CDML file, process this to produce a CSML file
521        if utilities.isCSMLFile(fileContent):
522            self._csmlFileName = fileName
523            self._csmlContent = fileContent
524        elif utilities.isCDMLFile(fileContent):
525            self._cdmlFileName = fileName
526            self._cdmlContent = fileContent
527        else:
528            raise ValueError("Unrecognised file type, '%s'" %fileName)
529       
530        logging.info("Finished determining file type")
531       
532
533
534    def processCSMLOrCDMLFile(self):
535        '''
536        If a CSML or CDML file has been specified, process it - i.e.:
537        - extract required data
538        - add to eXist
539        @return atom: Atom object of created data granule with CSML/CDML data
540        added
541        '''
542        logging.info("Processing CSML/CDML data")
543        # NB, if a CDML file is specified, a CSML file will be created from it and this will be used to
544        # extract the required dataset info
545        if self._cdmlFileName:
546            self.__processCDMLFile()
547           
548        if self._csmlFileName:
549            self.__processCSMLFile()
550        logging.info("Finished processing CSML/CDML data")
551        return self._atom
552
553       
554    def processGranulite(self, replaceAtom = False):
555        '''
556        Complete the required granulite actions
557        - ingest granulite data + add to granule atom
558        - ingest CSML or CDML data + add to granule atom
559        - save CSML/CDML data to eXist
560        - add granule atom to eXist
561        - add references to granule atom to specified data entity atoms
562        @keyword replaceAtom: if True, allow granule atoms to replace existing
563        atoms with the same ID - if False, throw a DuplicationError
564        @raise DumplicationError: if allowBackups = False and an atom is found
565        with the same ID as that to be created 
566        @return atom: Atom object of created data granule
567        '''
568        logging.info("Processing granulite data")
569        # load in the granulite details
570        inputs = self.__getGranuliteDetails()
571       
572        # apply the basic, essential data
573        self.__applyCoreGranuliteDetails(inputs)
574       
575        # check for CSML or CDML file and load details
576        self.processCSMLOrCDMLFile()
577
578        # apply any granulite data; NB, this overrides/augments the
579        # CSML/CDML data by default
580        self.__applyGranuliteDetails(inputs)
581       
582        if self._deleteMode:
583            logging.info("In delete mode - deleting granule atom and any references")
584            self.__deleteGranuleAndDEReferences()
585           
586        else:
587            # add the granule to eXist - if this exists already a DuplicationError
588            # will be thrown if backups are not allowed
589            doReplace = replaceAtom or self._replaceAtom
590            logging.info("Creating granule atom, '%s', in eXist DB" %self._atom.atomName)
591            self._eXist.createAtomInExist(self._atom, replaceAtom = doReplace)
592           
593            # if the atom already exists, keep track of the backed up file - incase it
594            # needs to be restored
595            self.backupName = self._eXist.backupName
596       
597            # now add the granule data to the data entity in eXist
598            # NB, if problems occur here, rollback changes
599            try:
600                # if the atom existed before, clear out the links to it before we start
601                if self.backupName:
602                    self.__removeGranuleFromDataEntityRecords()
603                   
604                self.__addGranuleToDataEntityRecords()
605            except:
606                logging.error("Exception thrown whilst updating data entities - detail: ")
607                logging.error(sys.exc_info())
608                logging.error("Will now roll back changes to granule atom to leave system in original state")
609                self.__deleteGranuleAndDEReferences()
610
611                # if the atom existed before, restore it and recreate the old DE links
612                if self.backupName:
613                    self._eXist.restoreBackup(self._eXist.backupName)
614                    self.__restoreGranuleToDataEntityRecords()
615               
616        logging.info("granulite processing complete")
617        return self._atom
618
619
620    def __deleteGranuleAndDEReferences(self):
621        '''
622        Delete the granule atom and any references in data entities to it
623        '''
624        self.__deleteGranule()
625        self.__removeGranuleFromDataEntityRecords()
626
627
628    def __deleteGranule(self):
629        '''
630        Delete granule data - remove granule atom from eXist and all references to
631        granule in DEs
632        '''
633        logging.info("Deleting granule...")
634        logging.info("Remove granule atom from eXist")
635        self._eXist.deleteAtomInExist(self._atom)
636        logging.info("Granule deleted")
637
638
639    def __restoreGranuleToDataEntityRecords(self):
640        '''
641        If a granulite ingest has failed whilst replacing an existing granule,
642        restore the original DE links to this granule
643        '''
644        logging.info("Restoring original links with data entities")
645        # clear out any DEs from the current granulite input
646        self._dataEntityIDs = []
647       
648        # now add back in the original DE links
649        for de in self._atom.dataEntities:
650            deLink = "%s | %s | %s " %(de.href, de.title, de.rel)
651            self._dataEntityIDs.append(deLink)
652       
653        self.__addGranuleToDataEntityRecords()
654        logging.info("Finished restoring links")
655       
656
657if __name__=="__main__":
658    opts, args = getopt.getopt(sys.argv[1:], '-rxvd')
659    if len(args) < 1:
660        print "Error: Please specify a granulite data file to process"
661        sys.exit(2)
662       
663    loggingLevel = logging.WARNING
664    isReplace = False
665    isDelete = False
666    for o, a in opts:
667        if o == "-v":
668            print " - Verbose mode ON"
669            loggingLevel = logging.INFO
670        elif o == "-d":
671            print " - Debug mode ON"
672            loggingLevel = logging.DEBUG
673        elif o == "-r":
674            print " - Replace mode ON"
675            isReplace = True
676        elif o == "-x":
677            print " - Delete mode ON"
678            isDelete = True
679   
680    logging.basicConfig(level = loggingLevel,
681                        format='%(asctime)s %(filename)s:%(lineno)d %(levelname)s %(message)s')
682    g = granulite(args[0], replaceAtom = isReplace, deleteMode = isDelete)
683   
684    try:
685        g.processGranulite()
686    except edc.DuplicateError, e:
687        # allow the user to specify if they want to overwrite the duplicated atom
688        print e.message
689        input = raw_input("- do you want to overwrite this with the granulite data? (y/n) ")
690        if input.strip().upper() == 'Y':
691            print "OK - replacing old granule atom..."
692            g.processGranulite(replaceAtom = True)
693        else:
694            print "Exiting...."
Note: See TracBrowser for help on using the repository browser.