source: ndgCommon/trunk/ndg/common/src/lib/granulite.py @ 5219

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/ndgCommon/trunk/ndg/common/src/lib/granulite.py@5219
Revision 5219, 32.1 KB checked in by cbyrom, 12 years ago (diff)

Various updates to test suite and a small fix to allow granulite to
process atom summaries properly as a string.

Line 
1#!/usr/bin/env python
2'''
3Data model representing a granulite file - together with utitilities to
4augment atom data with
5
6 @author: C Byrom, Tessella Nov 08
7'''
8import os, sys, string, getopt, logging, re, datetime, cgi
9import ndg.common.src.clients.xmldb.eXist.dbconstants as dc
10from ndg.common.src.models import Atom
11import ndg.common.src.lib.utilities as utilities
12from ndg.common.src.lib.atomvalidator import AtomValidator
13from ndg.common.src.models.vocabtermdata import VocabTermData as VTD
14from ndg.common.src.clients.xmldb.eXist.atomclient import AtomClient
15from ndg.common.src.clients.xmldb.abstractxmldbatomclient import DuplicateError
16
17class granulite(object):
18    '''
19    Granulite data model
20    '''
21    # expected sections in a granulite file
22    AUTHORS_SECTION = "authors"
23    FILE_SECTION = "files"
24    PARAMETERS_SECTION = "parameters"
25    COVERAGE_SECTION = "coverage"
26    CSML_SECTION = "csml_file"
27    CDML_SECTION = "cdml_file"
28    GRANULE_INFO_SECTION = "granule_info"
29    GRANULE_AUTHORS_SECTION = "granule_authors"
30    SUMMARY_SECTION = "summary"
31    DATA_ENTITY_SECTION = "data_entity_id"
32    LOGO_SECTION = "logo"
33    RELATED_LINKS_SECTION = "related_links"
34   
35    # need to distinguise coverage data between spatial and temporal data - use these variables to do so
36    # NB, other_spatial is for spatial resolution and vertical extent data
37    TEMPORAL_DATA = "temporal_data"
38    SPATIAL_DATA = "spatial_data"
39    OTHER_SPATIAL_DATA = "other_spatial"
40
41    # group the data together as either single valued or array valued - to ease setting attributes on Atom
42    singleVals = {} 
43    arrayVals = {AUTHORS_SECTION:'authors', \
44                 FILE_SECTION:'files', GRANULE_AUTHORS_SECTION:'atomAuthors', \
45                 SUMMARY_SECTION:'summary'}
46       
47    # config file with eXist DB details
48    DBCONFIG_FILE = "exist.config"
49
50    # default title given to CSML files by csmlscan
51    DEFAULT_CSML_TITLE = "NAME OF DATASET GOES HERE"
52   
53    # flag to use when running in test mode
54    TEST_MODE = "granulite_test_mode"
55   
56    # eXist DB client
57    _eXist = None
58   
59    # flag to mark mode of operation
60    _isOverride = False
61
62    # info on specified CDML + CSML
63    _cdmlFileName = None
64    _cdmlTimeAxis = None
65    _datasetID = None
66    _csmlFileName = None
67
68    # info on the datasets to attach the granule to
69    _dataEntityIDs = []
70   
71    # standard output delimiter
72    LINE_SEPARATOR = "-----------------------------"
73       
74       
75    def __init__(self, granulite, granuleAtom = None, \
76                  eXistClient = None, csmlOrCdmlFile = None, \
77                  aggregateCoverage = False, useCSMLID = False, 
78                  timeAxis = 'time', datasetID = None, replaceAtom = False, 
79                  deleteMode = False):
80        '''
81        Constructor for granulite object - NB, the csml/cdml keywords allow use
82        in a webapp - outside of this, this data should be specified in the granulite
83        file
84       
85        @param granulite: either the granulite filename or contents
86        @keyword granuleAtom: an existing granule atom - to add granulite data to
87        - NB, if not specified, a new atom is used
88        @keyword eXistClient: an eXist client implementing the InterfaceXMLDBAtomClient interface
89        @keyword csmlOrCdmlFile: a cgi.FieldStorage object with CSML or CDML data
90        @keyword aggregateCoverage: if set to True, only coverage data that extends the
91        atom coverage data will be added
92        @keyword useCSMLID: if True, use the CSML doc ID as the dataset ID - NB,
93        this should only be True if creating a new atom - e.g. from a granulite
94        @keyword timeAxis: if using a CDML file, specify the time axis to use - 'time'
95        is the default
96        @keyword datasetID: if using a CDML file, specify the ID of the dataset to use
97        - otherwise one will be randomly generated
98        @keyword replaceAtom: if True, and if a granule atom is found with the ID of
99        the atom to be created by the granulite, automatically overwrite the older
100        atom
101        @keyword deleteMode: if True the granule atom and any references to it are
102        deleted from eXist - when processGranulite are ran
103        '''
104        logging.info("Creating granulite data model")
105       
106        self._granulite = granulite
107       
108        # set up connection to eXist
109        if eXistClient:
110            self._eXist = eXistClient
111        else:
112            self._eXist = AtomClient(configFileName = self.DBCONFIG_FILE)
113       
114        # override CSML/CDML data specified in the granulite file with data input directly
115        self.ingestGranuliteFiles = True
116        # NB, empty FieldStorage fields end up as empty strings
117        if csmlOrCdmlFile is not None:
118            self.ingestGranuliteFiles = False
119            if csmlOrCdmlFile != '':
120                if not isinstance(csmlOrCdmlFile, cgi.FieldStorage):
121                    raise ValueError("Specified CSML/CDML file is not a cgi.FieldStorage object")
122                self.__addCSMLOrCDMLData(csmlOrCdmlFile.filename, csmlOrCdmlFile.value)
123       
124
125        if granuleAtom:
126            if not isinstance(granuleAtom, Atom.Atom):
127                raise ValueError("Specified granule atom is not an Atom object")
128            self._atom = granuleAtom
129        else:
130            # create a skeleton granule
131            self.__initialiseGranule()
132           
133        self._replaceAtom = replaceAtom
134        self._deleteMode = deleteMode
135        self.useCSMLID = useCSMLID
136        self._cdmlTimeAxis = timeAxis
137        self._datasetID = datasetID
138       
139        # AtomValidator to use for validation - only load when needed
140        self.validator = None
141           
142        logging.info("Granulite data model set up")
143
144       
145    def __getSectionName(self, str):
146        '''
147        Checks an input string to see if it contains a section title; if so
148        return this title name, otherwise return None
149        @param str: string to parse for section name
150        @return: section name, if found, None otherwise
151        '''
152        sectionName = None
153        if str.count('::') == 1:
154            sectionName = str.partition('::')[0]
155        return sectionName
156
157
158    def __getGranuliteDetails(self):
159        '''
160        Load the granulite config file and extract the required data 
161        '''
162        logging.info("Retrieving data from granulite config file")
163       
164        # assume we've already read in the file if it is multi-line
165        self.newLine = None
166        # cope with unix and dos end of lines
167        if self._granulite.find('\n') > -1:
168            self.newLine = '\n'
169        elif self._granulite.find('\r') > -1:
170            self.newLine = '\r'
171       
172        if self.newLine:
173            granuliteData = self._granulite.split(self.newLine)
174        else:
175            # Check this file exists
176            if not os.path.isfile(self._granulite):
177                raise ValueError("ERROR: Could not find the config file, %s; please specify " \
178                         "a valid file" %self._granulite)
179               
180            granulite_file = open(self._granulite, "r")
181            granuliteData = granulite_file.readlines()
182            granulite_file.close()
183       
184        # create a dictionary of all data in file - then use this to get the required data
185        granulite_data = {}
186       
187        # initialise vocab term data lookup object
188        self.VTD = VTD()
189
190        sectionName = "" # variable to indicate what section we're currently parsing in the granulite file data
191        logging.info("Parsing granulite file...")
192        for line in granuliteData:
193           
194            line = line.strip()
195            # avoid comments
196            if (line.startswith('#')):
197                continue
198           
199            # avoid empty lines - except in the summary section where these may be appropriate
200            if not line and sectionName != self.SUMMARY_SECTION:
201                continue
202           
203            section = self.__getSectionName(line)
204
205            # if a section name is returned, we're at a new section of the file - so change the dictionary key
206            if section:
207                sectionName = section
208                continue
209           
210            # cope with case where there is rubbish header info in the file
211            if not sectionName:
212                continue
213           
214            # initialise dictionary array if required
215            if sectionName not in granulite_data:
216                granulite_data[sectionName] = []
217           
218            if sectionName != self.COVERAGE_SECTION:
219                logging.debug("Adding %s data: -%s-" %(sectionName, line))
220                granulite_data[sectionName].append(line)
221            else:
222                # if it's coverage data, determine if it is spatial or temporal data
223                coverageType = self.SPATIAL_DATA
224                if line.lower().find('time') > -1:
225                    coverageType = self.TEMPORAL_DATA
226                elif line.lower().startswith(self.OTHER_SPATIAL_DATA.lower()):
227                    coverageType = self.OTHER_SPATIAL_DATA
228               
229                # initialise dictionary array if required
230                if coverageType not in granulite_data:
231                    granulite_data[coverageType] = []
232                logging.debug("Adding %s data: %s" %(coverageType, line))
233                granulite_data[coverageType].append(line)
234                   
235        logging.info("Finished parsing granulite file")
236        logging.info("- returning dict of ingested data")
237        return granulite_data
238       
239       
240    def __applyCoreGranuliteDetails(self, granulite_data):
241        '''
242        Apply the essential data read in from the granulite file to the atom data model
243        being processed by the granulite
244        @param granulite_data: the dict produced by __getGranuliteDetails
245        '''
246        # add the general granule info
247        if self.GRANULE_INFO_SECTION not in granulite_data:
248            raise ValueError("Need granule_info section in granulite input file")
249       
250        data = utilities.getTripleData(granulite_data[self.GRANULE_INFO_SECTION][0])
251        if not data[0]:
252            raise ValueError("Provider ID is missing for granule; please add this info to the " + \
253                             self.GRANULE_INFO_SECTION + " section of the granulite config file")
254        self._atom.addMolesEntityData(None, data[0], None)
255        self._atom.setDatasetID(data[1])
256        self._atom.title = data[2]
257
258        # NB, if running from the web, ignore the CSML/CDML files specified in the granulite
259        if self.ingestGranuliteFiles:
260            # check for CSML/CDML file input - these data are changed before adding to the granule
261            # - since the file will be stored and referenced in eXist by then
262            if not self._csmlFileName:
263                if self.CSML_SECTION in granulite_data:
264                    self.__addCSMLOrCDMLData(granulite_data[self.CSML_SECTION][0], None)
265   
266            if not self._cdmlFileName:
267                if self.CDML_SECTION in granulite_data:
268                    if self._csmlFileName:
269                        raise ValueError("Cannot specify both CDML and CSML file in granulite config file" + \
270                                         "\nNB, CSML file is generated from the specified CDML file")
271               
272                    data = utilities.getTripleData(granulite_data[self.CDML_SECTION][0])
273                    self.__addCSMLOrCDMLData(data[0], None)
274                    self._datasetID = data[1]
275                    self._cdmlTimeAxis = data[2]
276       
277       
278    def __applyGranuliteDetails(self, granulite_data):
279        '''
280        Apply the data read in from the granulite file to the atom data model
281        being processed by the granulite
282        @param granulite_data: the dict produced by __getGranuliteDetails
283        '''
284        logging.info("Applying granulite data to atom")
285        # add the single value attributes to the granule
286        for attribute in self.singleVals:
287            if attribute in granulite_data:
288                self._atom.setAttribute(self.singleVals[attribute], \
289                                        granulite_data[attribute][0])
290
291        # NB, explicitly set the related links before running the arrayVals loop, since
292        # this will replace any file Links created if it occurs after these have been
293        # created (was originally in arrayVals, but this doesn't enforce any specific ordering)
294        if self.RELATED_LINKS_SECTION in granulite_data:
295            self._atom.setAttribute('relatedLinks', \
296                                    granulite_data[self.RELATED_LINKS_SECTION],
297                                    escapeSpecials = False)
298
299        # now add the arrays data
300        for attribute in self.arrayVals:
301            if attribute in granulite_data:
302                val = granulite_data[attribute]
303                if attribute == self.SUMMARY_SECTION:
304                    val = (self.newLine or '\n').join(val)
305                   
306                self._atom.setAttribute(self.arrayVals[attribute], val)
307       
308        if self.LOGO_SECTION in granulite_data:
309            for logo in granulite_data[self.LOGO_SECTION]:
310                self._atom.addRelatedLinks(logo + " | Logo | " + \
311                                           self.VTD.getTermCurrentVocabURL(VTD.LOGO_TERM))
312
313        # add the parameters data via the method - since this does some tidying up of the data
314        if self.PARAMETERS_SECTION in granulite_data:
315            self._atom.addParameters(granulite_data[self.PARAMETERS_SECTION])
316           
317        if self.DATA_ENTITY_SECTION in granulite_data:
318            self._dataEntityIDs = granulite_data[self.DATA_ENTITY_SECTION] 
319
320        # now add any coverage data
321        if self.SPATIAL_DATA in granulite_data:
322            self._extractSpatialData(granulite_data[self.SPATIAL_DATA][0])
323        if self.TEMPORAL_DATA in granulite_data:
324            self._extractTemporalData(granulite_data[self.TEMPORAL_DATA][0])
325        if self.OTHER_SPATIAL_DATA in granulite_data:
326            data = utilities.getTripleData(granulite_data[self.OTHER_SPATIAL_DATA][0])
327            self._atom.ME.spatialResolution = data[1]
328            self._atom.ME.verticalExtent = data[2]
329
330        logging.info("All input data set up")
331       
332
333    def _extractSpatialData(self, geomString):
334        '''
335        Extract bounding box info from the specified geometry string
336        @param geomString: A string holding geometry info
337        NB, currently the method supports parsing of POLYGONs
338        '''
339        if geomString.upper().find('POLYGON') > -1:
340            logging.debug("Spatial data specified in POLYGON format - extracting data from this")
341            vals = re.findall('([\-\d\.]+)', geomString)
342            # we're just going to extract max/min vals to create a rectangle normal to equator
343            # - NB, the data is specified in pairs of values
344            # NB, firstly need to convert data into floats to do comparisons
345            newVals = []
346            for val in vals:
347                newVals.append(float(val))
348           
349            if len(newVals) >= 8:
350                # start with some initial values
351                self._atom.minX = newVals[0]
352                self._atom.minY = newVals[1]
353                self._atom.maxX = newVals[0]
354                self._atom.maxY = newVals[1]
355               
356                for i in range(len(newVals) - 1):
357                    # process array in pairs - so skip every second int
358                    if i % 2:
359                        continue
360                   
361                    if newVals[i] < self._atom.minX:
362                        self._atom.minX = newVals[i]
363                    if newVals[i] > self._atom.maxX:
364                        self._atom.maxX = newVals[i]
365                    if newVals[i + 1] < self._atom.minY:
366                        self._atom.minY = newVals[i + 1]
367                    if newVals[i + 1] > self._atom.maxY:
368                        self._atom.maxY = newVals[i + 1]
369            else:
370                logging.debug("- insufficient data points to specify POLYGON - skipping")
371        else:
372            errorMessage = "Spatial coverage data not stored in POLYGON format - please correct and rerun"
373            logging.error(errorMessage)
374            raise ValueError(errorMessage)
375
376
377    def _extractTemporalData(self, dateRangeString):
378        '''
379        Extract temporal info from the specified daterange string
380        @param dateRangeString: A string holding temporal info
381        NB, currently the method supports parsing of TIMERANGE objects
382        '''
383        if dateRangeString.upper().find('TIMERANGE') == -1:
384            errorMessage = "Temporal data not stored in TIMERANGE() format - please correct and rerun"
385            logging.error(errorMessage)
386            raise ValueError(errorMessage)
387       
388        vals = re.findall('([0-9][0-9\-:TZ ]+)', dateRangeString)
389        if vals:
390            logging.debug("Adding start time: %s" %vals[0])
391            self._atom.t1 = vals[0]
392            if len(vals) > 1:
393                logging.debug("Adding finish time: %s" %vals[1])
394                self._atom.t2 = vals[1]
395     
396   
397    def __addGranuleToDataEntityRecords(self):
398        '''
399        Augment any data entity records, specified in the granulite, with a reference to
400        the granule
401        '''
402        logging.info("Adding granule info to data entities")
403        if not self._dataEntityIDs:
404            logging.info("No data entity IDs were specified in granulite - so will not add granule data to eXist DB")
405            return
406       
407        # now set up the granule links to the data entities specified
408        for entityID in self._dataEntityIDs:
409            data = utilities.getTripleData(entityID)
410            dataEntityID = data[0]
411            dataEntityProviderID = data[1]
412           
413            self.__updateDataEntity(dataEntityID, dataEntityProviderID)
414
415        logging.info("Granule data added to data entities")
416     
417   
418    def __removeGranuleFromDataEntityRecords(self):
419        '''
420        Remove references to the granule in any data entity records
421        '''
422        logging.info("Removing granule info from data entities")
423        logging.info("Retrieving data entities with references to granule")
424        self._atom.lookupAssociatedData(VTD.DE_TERM, self._eXist,
425                                        lookupIndirectReferences = True)
426       
427        # now set up the granule links to the data entities specified
428        for de in self._atom.dataEntities:
429            self.__updateDataEntity(de.rel, "", removeLink = True)
430        logging.info("Granule data removed from data entities")
431
432
433    def __updateDataEntity(self, dataEntityID, providerID, removeLink = False):
434        '''
435        Retrieve the specified data entity and add a link to the current
436        data granule, if required, then update the atom in eXist
437        @param dataEntityID: ID of the data entity to augment with granule link
438        @param providerID: provider ID of the data entity to augment with granule link
439        @keyword removeLink: If True, remove the link to the current data granule
440        - otherwise add it
441        '''
442        logging.debug("Retrieving data entity atom - to update associated granule info")
443        doc = self._eXist.buildAndRunQuery('atom', 
444                                           dc.ATOM_COLLECTION_PATH, 
445                                           providerID, 
446                                           dataEntityID)
447       
448        if not doc:
449            errorMessage = "Could not find data entity, %s - please retry with valid data" %dataEntityID
450            logging.error(errorMessage)
451            raise ValueError(errorMessage)
452        logging.debug("DE retrieved")
453       
454        de = Atom.Atom(xmlString=str(doc[0]))
455        noLinks = len(de.relatedLinks)
456       
457        linkData = "%s | %s | %s " %(self._atom.atomBrowseURL,
458                                     self._atom.title,
459                                     self._atom.VTD.getTermCurrentVocabURL(VTD.GRANULE_TERM))
460        if removeLink:
461            logging.debug("- now removing link to granule")
462            link = Atom.Link()
463            link.fromString(linkData)
464            de.removeRelatedLinks(link)
465            logging.debug("Link removed")
466        else:
467            logging.debug("- now adding link to granule")
468            de.addRelatedLinks(linkData)
469            logging.debug("Link added")
470           
471        # only save if need be
472        if len(de.relatedLinks) == noLinks:
473            logging.info("- data entity unchanged - skipping")
474            return
475       
476        logging.debug("Now, save the updated DE atom back to eXist")
477        self._eXist.createAtom(de)
478        logging.debug("DE atom updated")
479
480
481    def __initialiseGranule(self):
482        '''
483        Create an Atom object ready to populate with data
484        '''
485        self._atom = Atom.Atom(VTD.GRANULE_TERM)
486
487
488    def __processCSMLFile(self):
489        logging.info("Processing CSML file")
490        # only keep file name, if full path specified
491        fileName = self._csmlFileName
492        fileName = fileName.split('/')[-1]
493        fileName = fileName.split('\\')[-1]
494       
495        self._atom.addCSMLData(self._csmlFileName, \
496                               self._csmlContent, useCSMLID = self.useCSMLID)
497        logging.info("Adding CSML file to eXist")
498        self._eXist.createOrUpdateDoc(self._csmlContent,
499                                      dc.NDG_A_COLLECTION_PATH + \
500                                      self._atom.ME.providerID + '/',
501                                      fileName)
502        logging.info("CSML file added to eXist")
503        logging.info("Finished processing CSML file")
504           
505
506    def __processCDMLFile(self):
507        logging.info("Processing CDML file")
508        # only keep file name, if full path specified
509        fileName = self._cdmlFileName
510        fileName = fileName.split('/')[-1]
511        fileName = fileName.split('\\')[-1]
512       
513        # firstly, save the doc to eXist
514        # remove DOCTYPE tags - as these will prevent eXist from ingesting the doc
515        self._cdmlContent = re.sub(r'<!DOCTYPE.*>', '', self._cdmlContent)
516        logging.info("CDML file loaded")
517
518        logging.info("Adding CDML file to eXist")
519        self._eXist.createOrUpdateDoc(self._cdmlContent,
520                                      dc.NDG_A_COLLECTION_PATH + \
521                                      self._atom.ME.providerID + '/',
522                                      fileName)
523        self._atom.cdmlFile = dc.NDG_A_COLLECTION_PATH + fileName
524       
525        # create a local copy of the CDML file - NB, this is required if running
526        # from web app
527        fn = os.path.basename(str(datetime.datetime.today().microsecond) + fileName)
528        cdmlFile = open(fn, 'wb')
529        cdmlFile.write(self._cdmlContent)
530        cdmlFile.close()
531        message = 'The file "' + fn + '" was uploaded successfully'
532       
533        logging.info("Create CSML file from the CDML file - NB, this will be stored in eXist too " + \
534                     "and will be used to extract dataset information from")
535        csmlFileName = utilities.createCSMLFile(fn, self._cdmlTimeAxis, datasetID = self._datasetID)
536        os.remove(fn)
537       
538        logging.info("CSML file successfully created - now processing this")
539        self.__addCSMLOrCDMLData(csmlFileName, None)
540       
541        # NB, can remove the CSML file now since the contents are read into memory
542        os.remove(csmlFileName)
543        logging.info("Finished processing CDML file")
544
545
546    def __addCSMLOrCDMLData(self, fileName, fileContent):
547        '''
548        Given an unknown file type, determine whether it is CSML or CDML; if it
549        is either, ingest the data appropriately; if not, just skip. NB, CDML docs
550        are converted into CSML ones to allow the data ingest
551        @param fileName: name of the file to ingest data from
552        @param fileContent: content of the file - NB, if this is set to None and the
553        file, fileName, is available locally, CsmlParser.Dataset will read in the file
554        directly
555        '''
556        logging.info("Determining file type to add data from")
557        if not fileContent:
558            logging.info("- NB, file contents not provided - attempt to load")
559            try:
560                f = open(fileName, 'r')
561                fileContent = f.read()
562                f.close
563            except IOError, e:
564                logging.error(e.message)
565               
566            if not fileContent:
567                raise ValueError("Could not load data from file, '%s'" %fileName)
568
569        # if we're dealing with a CDML file, process this to produce a CSML file
570        if utilities.isCSMLFile(fileContent):
571            self._csmlFileName = fileName
572            self._csmlContent = fileContent
573        elif utilities.isCDMLFile(fileContent):
574            self._cdmlFileName = fileName
575            self._cdmlContent = fileContent
576        else:
577            raise ValueError("Unrecognised file type, '%s'" %fileName)
578       
579        logging.info("Finished determining file type")
580       
581
582
583    def processCSMLOrCDMLFile(self):
584        '''
585        If a CSML or CDML file has been specified, process it - i.e.:
586        - extract required data
587        - add to eXist
588        @return atom: Atom object of created data granule with CSML/CDML data
589        added
590        '''
591        logging.info("Processing CSML/CDML data")
592        # NB, if a CDML file is specified, a CSML file will be created from it and this will be used to
593        # extract the required dataset info
594        if self._cdmlFileName:
595            self.__processCDMLFile()
596           
597        if self._csmlFileName:
598            self.__processCSMLFile()
599        logging.info("Finished processing CSML/CDML data")
600        return self._atom
601
602       
603    def processGranulite(self, replaceAtom = False, initialiseAtom = False):
604        '''
605        Complete the required granulite actions
606        - ingest granulite data + add to granule atom
607        - ingest CSML or CDML data + add to granule atom
608        - save CSML/CDML data to eXist
609        - add granule atom to eXist
610        - add references to granule atom to specified data entity atoms
611        @keyword replaceAtom: if True, allow granule atoms to replace existing
612        atoms with the same ID - if False, throw a DuplicationError
613        @keyword initialiseAtom: if True, use a new atom object - otherwise
614        continue to use what is currently loaded
615        @raise DumplicationError: if allowBackups = False and an atom is found
616        with the same ID as that to be created 
617        @return atom: Atom object of created data granule
618        '''
619        logging.info("Processing granulite data")
620        if initialiseAtom:
621            self.__initialiseGranule()
622
623        # load in the granulite details
624        inputs = self.__getGranuliteDetails()
625       
626        # apply the basic, essential data
627        self.__applyCoreGranuliteDetails(inputs)
628       
629        # check for CSML or CDML file and load details
630        self.processCSMLOrCDMLFile()
631
632        # apply any granulite data; NB, this overrides/augments the
633        # CSML/CDML data by default
634        self.__applyGranuliteDetails(inputs)
635       
636        if self._deleteMode:
637            logging.info("In delete mode - deleting granule atom and any references")
638            self.deleteGranuleAndDEReferences()
639           
640        else:
641            # validate the newly created atom - before importing it
642            self.__validateGranuleAtom()
643           
644            # add the granule to eXist - if this exists already a DuplicationError
645            # will be thrown if backups are not allowed
646            doReplace = replaceAtom or self._replaceAtom
647            logging.info("Creating granule atom, '%s', in eXist DB" %self._atom.atomName)
648            self._eXist.createAtom(self._atom, replaceAtom = doReplace)
649           
650            # if the atom already exists, keep track of the backed up file - incase it
651            # needs to be restored
652            self.backupName = self._eXist.backupName
653       
654            # now add the granule data to the data entity in eXist
655            # NB, if problems occur here, rollback changes
656            try:
657                # if the atom existed before, clear out the links to it before we start
658                if self.backupName:
659                    self.__removeGranuleFromDataEntityRecords()
660                   
661                self.__addGranuleToDataEntityRecords()
662            except Exception, e:
663                errorMessage = "Exception thrown whilst updating data entities - detail: " + \
664                    str(e.message) + \
665                    "\n - will now roll back changes to granule atom to leave system in original state"
666                logging.error(errorMessage)
667                self.deleteGranuleAndDEReferences()
668
669                # if the atom existed before, restore it and recreate the old DE links
670                if self.backupName:
671                    self._eXist.restoreBackup(self._eXist.backupName)
672                    self.__restoreGranuleToDataEntityRecords()
673                raise SystemError(errorMessage)
674               
675        logging.info("granulite processing complete")
676        return self._atom
677
678
679    def __validateGranuleAtom(self):
680        '''
681        Validate the granule atom created, using the standard AtomValidator
682        @raise atomvlaidator.ValidationError if any errors are found
683        '''
684        if not self.validator:
685            self.validator = AtomValidator(None, 
686                                           atomClient = self._eXist,
687                                           newLineChar="<br/>")
688           
689        self.validator.setAtom(self._atom)
690        self.validator.validateAtom()
691       
692
693    def deleteGranuleAndDEReferences(self):
694        '''
695        Delete the granule atom and any references in data entities to it
696        '''
697        self.__deleteGranule()
698        self.__removeGranuleFromDataEntityRecords()
699
700
701    def __deleteGranule(self):
702        '''
703        Delete granule data - remove granule atom from eXist and all references to
704        granule in DEs
705        '''
706        logging.info("Deleting granule...")
707        logging.info("Remove granule atom from eXist")
708        self._eXist.deleteDoc(self._atom.getFullPath())
709        logging.info("Granule deleted")
710
711
712    def __restoreGranuleToDataEntityRecords(self):
713        '''
714        If a granulite ingest has failed whilst replacing an existing granule,
715        restore the original DE links to this granule
716        '''
717        logging.info("Restoring original links with data entities")
718        # clear out any DEs from the current granulite input
719        self._dataEntityIDs = []
720       
721        # now add back in the original DE links
722        for de in self._atom.dataEntities:
723            deLink = "%s | %s | %s " %(de.href, de.title, de.rel)
724            self._dataEntityIDs.append(deLink)
725       
726        self.__addGranuleToDataEntityRecords()
727        logging.info("Finished restoring links")
728       
729
730if __name__=="__main__":
731    opts, args = getopt.getopt(sys.argv[1:], '-rxvd')
732    if len(args) < 1:
733        print "Error: Please specify a granulite data file to process"
734        sys.exit(2)
735       
736    loggingLevel = logging.WARNING
737    isReplace = False
738    isDelete = False
739    for o, a in opts:
740        if o == "-v":
741            print " - Verbose mode ON"
742            loggingLevel = logging.INFO
743        elif o == "-d":
744            print " - Debug mode ON"
745            loggingLevel = logging.DEBUG
746        elif o == "-r":
747            print " - Replace mode ON"
748            isReplace = True
749        elif o == "-x":
750            print " - Delete mode ON"
751            isDelete = True
752   
753    logging.basicConfig(level = loggingLevel,
754                        format='%(asctime)s %(filename)s:%(lineno)d %(levelname)s %(message)s')
755    g = granulite(args[0], replaceAtom = isReplace, deleteMode = isDelete)
756   
757    try:
758        g.processGranulite()
759    except DuplicateError, e:
760        # allow the user to specify if they want to overwrite the duplicated atom
761        print e.message
762        input = raw_input("- do you want to overwrite this with the granulite data? (y/n) ")
763        if input.strip().upper() == 'Y':
764            print "OK - replacing old granule atom..."
765            g = granulite(args[0], replaceAtom = isReplace, deleteMode = isDelete)
766            g.processGranulite(replaceAtom = True)
767        else:
768            print "Exiting...."
Note: See TracBrowser for help on using the repository browser.