source: exist/trunk/python/ndgUtils/lib/atomvalidator.py @ 4491

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/exist/trunk/python/ndgUtils/lib/atomvalidator.py@4491
Revision 4491, 10.7 KB checked in by cbyrom, 12 years ago (diff)

Create new class, AtomValidator?, to act as a utility class for the Atom class - allowing validation of the following data:
i) External links

ii) Vocab data
iii) Schema compliance
iv) unicode compliance - with utf-8 encoding
v) data consistency within the atom data model


Store an instance in a global MILK variable for easy re-use by the atom-editor. Also, remove redundant geoUtilities class.

Line 
1#!/usr/bin/env python
2'''
3 Helper class to use with the Atom data model - for data validation
4 Validates:
5 i) External links
6 ii) Vocab data
7 iii) Schema compliance
8 iv) unicode compliance - with utf-8 encoding
9 v) data consistency within the atom data model
10 
11 @author: C Byrom, Tessella Nov 2008
12'''
13import logging, traceback, datetime
14import ndgUtils.models.existdbclient as edc
15from ndgUtils.vocabtermdata import VocabTermData as VTD
16from ndgUtils.models.Atom import Atom
17from ndgUtils.ndgXqueries import ndgXqueries
18from ndgUtils.lib.utilities import isValidUnicode, checkURL, strftime
19
20
21class ValidationError(Exception):
22    """
23    Exception handling for validation.
24    """
25    def __init__(self, errorDict):
26        msg = "Data validation error"
27        logging.error(msg)
28        Exception.__init__(self, msg)
29        for val in errorDict.itervalues():
30            logging.error(val)
31        self._errorDict = errorDict
32           
33    def unpack_errors(self):
34        return self._errorDict
35   
36   
37class AtomValidator(object):
38    '''
39    Helper class for validating atom data
40    '''
41    # eXist DB client
42    _eXist = None
43   
44    # standard output delimiter
45    LINE_SEPARATOR = "-----------------------------"
46   
47    # constants to use as error dict keys
48    BROKEN_LINKS = 1
49    INVALID_VOCAB_TERM = 2
50    SCHEMA_VALIDATION_FAILURE = 4
51
52    VALID_RELS = ["self", "related"]
53   
54    NEW_LINE = "\n"
55
56       
57    def __init__(self, atom, dbConfigFile = None, raiseException = False, \
58                 newLineChar= NEW_LINE):
59        '''
60        Set up validator object - with atom to validate
61        @param atom: Atom object to validate
62        @keyword dbConfigFile: config file to use with eXist DB connection
63        @keyword raiseException: if True, raise a ValidationException following a failed validation
64        - NB, if not used, errors can be retrieved from the self.errors field
65        '''
66        logging.info("Setting up atomValidation object")
67        self._atom = atom
68        self._nl = newLineChar
69       
70        # set up connection to eXist and postgres DBs
71        if dbConfigFile:
72            self.setUpEXistDBConnection(dbConfigFile)
73
74        # setup the dictionary to store errors
75        self.raiseException = raiseException
76        self.errors = {}
77        logging.info("atomValidator initialised")
78
79   
80    def setUpEXistDBConnection(self, dbConfFile):
81        '''
82        Get the default eXist DB connection - by reading in data from the db config file
83        '''
84        logging.info("Setting up connection to eXist DB")
85        self._eXist = edc.eXistDBClient(configFile = dbConfFile, loadCollectionData=True)
86        logging.info("eXist DB connection now set up")
87
88
89    def setAtom(self, atom):
90        '''
91        Set the atom to use the validator with
92        @param atom: an Atom object to validate
93        '''
94        if not isinstance(atom, Atom):
95            raise ValueError("Input object is not an Atom object")
96        logging.info("Setting new atom with validator (id=%s)" %atom.atomID)
97        self.errors = {} # clear out any existing errors
98        self._atom = atom
99       
100
101    def validateAtom(self):
102        '''
103        Retrieve an atom from the specified path and validate the contents
104        @param atomPath: path to the atom in the eXist DB
105        '''
106        if not self._atom:
107            logging.error("No atom specified to validate - skipping")
108        logging.info("Validating atom, '%s'" %self._atom.atomID)
109        # firstly, check the links point to valid uris
110        self.__validateLinks()
111       
112        # now check the vocab terms
113        self.__validateVocabData()
114       
115        # check the atom conforms to the schema
116        self.__validateSchemaCompliance()
117       
118        # validate the actual atom content - for more specific checks on data
119        self.__validateAtomContent()
120       
121        # lastly check for non-unicode compliant characters
122        self.__validateUnicode()
123           
124        logging.info("Atom validation completed")
125       
126        # remove the error dict entry if no errors receieved
127        if self.errors:
128            logging.info("- atom is invalid")
129           
130            if self.raiseException:
131                logging.warning("Errors found in atom data: %s" %self.errors)
132                raise ValidationError(self.errors)
133        else:
134            logging.info("- atom is valid")
135       
136
137    def __validateAtomContent(self):
138        '''
139        Check the data content of the atom is consistent; if an error with any of
140        these is found, raise a ValueError
141        @raise ValueError: if any atom attributes have a problem
142        '''
143        logging.info("Validating the atom data model consistency")
144        if not self._atom.title:
145            self.__addError('title', "Title attribute cannot be empty")
146           
147        if self._atom.minX or self._atom.maxX or self._atom.minY or self._atom.maxY:
148            missingVals = False
149            incorrectFormat = False 
150            for val in [self._atom.minX, self._atom.maxX, self._atom.minY, self._atom.maxY]:
151                if val == '':
152                    missingVals = True
153                else:
154                    try:
155                        float(val)
156                    except:
157                        incorrectFormat = True
158
159            spatialError = ""
160            if missingVals:
161                spatialError += "Incomplete spatial coverage data.%s"  %self._nl
162            if incorrectFormat:
163                spatialError += "Spatial coverage data not in numerical format."
164
165            if spatialError:
166                self.__addError('spatialcoverage', spatialError)
167
168        if self._atom.t1 or self._atom.t2:
169            timeErrors = ''
170            d1 = None
171            d2 = None
172            if self._atom.t1:
173                try:
174                    d1 = datetime.datetime.strptime(self._atom.t1, self._atom.YEAR_FORMAT)
175                except:
176                    timeErrors += "Incorrect start date format - '%s' - c.f. '2008-04-12'. %s" \
177                        %(self._atom.t1, self._nl)
178            if self._atom.t2:
179                try:
180                    d2 = datetime.datetime.strptime(self._atom.t2, self._atom.YEAR_FORMAT)
181                except:
182                    timeErrors += "Incorrect end date format - '%s' - c.f. '2008-04-12'. %s" \
183                        %(self._atom.t2, self._nl)
184
185            if d1 and d2:
186                if d1 > d2 or d2 < d1:
187                    timeErrors += "Inconsistent date range - '%s' is not before '%s'" \
188                        %(strftime(d1, self._atom.YEAR_FORMAT), strftime(d2, self._atom.YEAR_FORMAT))
189#                        %(d1.strftime(self._atom.YEAR_FORMAT), d2.strftime(self._atom.YEAR_FORMAT))
190
191            if timeErrors:
192                self.__addError('temporalrange', timeErrors)
193
194        logging.info("Atom model consistency validation completed")
195
196
197    def __validateUnicode(self):
198        '''
199        Do a quick recursion over all the attributes to look for non
200        utf-8 compliant characters
201        '''
202        logging.info("Validating unicode UTF-8 compliance")
203        for key, val in self._atom.__dict__.items():
204            if val:
205                if isinstance(val, basestring):
206                    if not isValidUnicode(val):
207                        if not self.errors.has_key(key):
208                            self.errors[key] = ''
209                        self.errors[key] += "Illegal unicode found in string: '%s'.'%s'" %(val, nl)
210        logging.info("Completed validating unicode UTF-8 compliance")
211       
212
213    def __validateLinks(self):
214        '''
215        Check the external links contained in the atom and ensure they are valid
216        '''
217        logging.info("Validating atom links")
218        for link in self._atom.relatedLinks:
219            if not checkURL(link.href):
220                self.__addError(self.BROKEN_LINKS, "Broken link: '%s'" %link.href)
221
222        logging.info("Completed link validation")
223
224
225    def __validateVocabData(self):
226        '''
227        Check the vocab data contained in the atom and ensure they are valid
228        '''
229        logging.info("Validating atom vocab data")
230        for category in self._atom.parameters:
231            if not checkURL(category.scheme):
232                self.__addError(self.INVALID_VOCAB_TERM, \
233                                "Invalid vocab term: '%s'" %category.scheme)
234
235        # also check the terms used in the links
236        for link in self._atom.relatedLinks:
237            if link.rel not in self.VALID_RELS:
238                if not checkURL(link.rel):
239                    self.__addError(self.INVALID_VOCAB_TERM, \
240                                    "Invalid vocab term: '%s'" %link.rel)
241        logging.info("Completed link validation")
242       
243
244    def __validateSchemaCompliance(self):
245        '''
246        Validate the atom, against the atom xsd, using eXist validation facilities
247        @param atomPath: collection path to atom in eXist
248        @param atomID: atom ID
249        '''
250        logging.info("Validating schema compliance")
251        atomPath = self._atom.getDefaultCollectionPath() + self._atom.atomName
252        try:
253            errors = self._eXist.checkAtomSchemaCompliance(atomPath, atom = self._atom)
254            for error in errors:
255                self.__addError(self.SCHEMA_VALIDATION_FAILURE, error)
256           
257        except Exception, e:
258            errorMessage = "Problem experienced when validating against schema:%s'%s'" \
259                %(self._nl, e.message)
260            traceback.format_exc()
261            logging.error(errorMessage)
262            self.__addError(self.SCHEMA_VALIDATION_FAILURE, errorMessage)
263        logging.info("Completed validating schema compliance")
264           
265   
266    def __addError(self, errorLabel, errorMessage):
267        '''
268        Add an error with the specified label and message to the error dict for the
269        specified atom ID
270        @param errorLabel: type of error to add
271        @param errorMessage: error message to add
272        '''
273        logging.debug("Adding error to error list")
274        logging.debug(errorMessage)
275       
276        if not self.errors.has_key(errorLabel):
277            self.errors[errorLabel] = []
278           
279        self.errors[errorLabel].append(errorMessage)
280        logging.debug("Error added")
281
282       
283    def logErrors(self):
284        '''
285        Outpus any errors caught during validation to log
286        '''
287        logging.info("The following errors were encountered when validating the atoms:")
288        logging.info('')
289        logging.info("- atom ID '%s'" %self._atom.atomID)
290        logging.info("--------------------------------------")
291        for errors in self.errors.values():
292            for error in errors:
293                logging.info(error)
294        logging.info("--------------------------------------")
Note: See TracBrowser for help on using the repository browser.