source: exist/trunk/python/ndgUtils/lib/existatomvalidator.py @ 4627

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/exist/trunk/python/ndgUtils/lib/existatomvalidator.py@4627
Revision 4627, 5.0 KB checked in by cbyrom, 11 years ago (diff)

Improve input parameter checking + improve scoping of methods + add more structure to validator.

  • Property svn:executable set to *
Line 
1#!/usr/bin/env python
2'''
3 Command line tool to validate the
4 atoms in a specified eXist DB
5 
6 @author: C Byrom, Tessella Nov 2008
7'''
8import os, sys, getopt, logging
9import ndgUtils.lib.existdbclient as edc
10from ndgUtils.vocabtermdata import VocabTermData as VTD
11from ndgUtils.models.Atom import Atom
12from ndgUtils.lib.atomvalidator import AtomValidator
13from ndgUtils.ndgXqueries import ndgXqueries
14
15   
16class eXistAtomValidator:
17    '''
18    Command line tool for checking links in an eXist atom collection
19    '''
20    # config file with eXist DB details
21    DBCONFIG_FILE = "passwords.txt"
22
23    # standard output delimiter
24    LINE_SEPARATOR = "-----------------------------"
25   
26    def __validateAtom(self, atomPath):
27        '''
28        Retrieve an atom from the specified path and validate the contents
29        @param atomPath: path to the atom in the eXist DB
30        '''
31        logging.info("Validating atom, '%s'" %atomPath)
32        logging.info("- retrive atom from DB...")
33        atomString = self.validator._eXist.getEXistFile(atomPath)
34        atom = Atom(xmlString=str(atomString))
35
36        # create an entry in the errors dict for the atom
37        if self._errors.has_key(atom.atomID):
38            raise ValueError("Atom with duplicate ID (%s) encountered - this needs to be fixed in the DB" %atomID)
39       
40        self.validator.setAtom(atom)
41        self.validator.validateAtom()
42        if self.validator.errors:
43            self._errors[atom.atomID] = self.validator.errors
44
45        logging.info("Atom validation completed")
46       
47        # remove the error dict entry if no errors receieved
48        if self._errors[atom.atomID]:
49            logging.info("- atom is invalid")
50        else:
51            logging.info("- atom is valid")
52
53       
54    def __displayErrors(self):
55        '''
56        Display any errors caught during validation
57        '''
58        # set up an additional logger to output results to file
59        hdlr = logging.FileHandler('errors.log', 'w')
60        formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s')
61        hdlr.setFormatter(formatter)
62        hdlr.setLevel(logging.INFO)
63        logging.getLogger('').addHandler(hdlr)
64               
65        logging.info("The following errors were encountered when validating the atoms:")
66        for atomID, errorDict in self._errors.items():
67            if not errorDict:
68                continue
69            logging.info('')
70            logging.info(" atom ID '%s'" %atomID)
71            logging.info("--------------------------------------")
72            for errors in errorDict.values():
73                for error in errors:
74                    logging.info(error)
75            logging.info("--------------------------------------")
76
77
78    def _setUpOptions(self):
79        '''
80        Determine the logging level to use and configure this appropriately
81        '''
82        try:
83            opts, args = getopt.getopt(sys.argv[1:], "vd")
84        except getopt.GetoptError, err:
85            # print help information and exit:
86            print str(err) # will print something like "option -a not recognized"
87           
88        loggingLevel = logging.WARNING
89        for o, a in opts:
90            if o == "-v":
91                print " - Verbose mode ON"
92                loggingLevel = logging.INFO
93            elif o == "-d":
94                print " - Debug mode ON"
95                loggingLevel = logging.DEBUG
96       
97        print self.LINE_SEPARATOR
98        logging.basicConfig(level=loggingLevel,
99                        format='%(asctime)s %(filename)s:%(lineno)d %(levelname)s %(message)s')
100
101
102    def usage(self):
103        '''
104        Display input params for the script
105        '''
106        print "Usage: python eXistAtomValidator.py [OPTION]"
107        print " - where options are:"
108        print " -v - verbose mode for output logging"
109        print " -d - debug mode for output logging"
110        print "\neXist DB details should be stored in a config file called, '%s'" %self.DBCONFIG_FILE
111        sys.exit(2)
112
113       
114    def __init__(self):
115        '''
116        Main entry point for script
117        '''
118        print self.LINE_SEPARATOR
119        print "RUNNING: eXistAtomValidator.py"
120       
121        self._setUpOptions()
122
123        self.validator = AtomValidator(None, self.DBCONFIG_FILE, loadAllCollections=True, \
124                                       raiseException = False)
125       
126        # setup the dictionary to store errors
127        self._errors = {}
128       
129       
130    def validateCollection(self):
131        # NB, we've loaded all the atom collection data so now step through this
132        for atom, collection in self.validator._eXist.collections.items():
133            if collection.find('Published') > -1:
134                self.__validateAtom(collection + '/' + atom + '.atom')
135       
136        if self._errors:
137            self.__displayErrors()
138        logging.info("eXistAtomValidator processing complete")
139       
140   
141if __name__=="__main__":
142    opts, args = getopt.getopt(sys.argv[1:], '-vd')
143    validator = eXistAtomValidator()
144    validator.validateCollection()
Note: See TracBrowser for help on using the repository browser.