source: ndgCommon/trunk/ndg/common/src/tools/existatomvalidator.py @ 4970

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/ndgCommon/trunk/ndg/common/src/tools/existatomvalidator.py@5883
Revision 4970, 5.0 KB checked in by cbyrom, 11 years ago (diff)

Various fixes, tidy ups and simplications to ndgCommon codebase.

Line 
1#!/usr/bin/env python
2'''
3 Class to validate the atoms in a specified eXist DB
4 
5 @author: C Byrom, Tessella Nov 2008
6'''
7import os, sys, getopt, logging
8import ndg.common.src.xmldb.clients.eXist.existdbclient as edc
9from ndg.common.src.models.vocabtermdata import VocabTermData as VTD
10from ndg.common.src.models.Atom import Atom
11from ndg.common.src.lib.atomvalidator import AtomValidator
12
13   
14class eXistAtomValidator:
15    '''
16    Command line tool for checking links in an eXist atom collection
17    '''
18    # config file with eXist DB details
19    DBCONFIG_FILE = "passwords.txt"
20
21    # standard output delimiter
22    LINE_SEPARATOR = "-----------------------------"
23   
24    def __validateAtom(self, atomPath):
25        '''
26        Retrieve an atom from the specified path and validate the contents
27        @param atomPath: path to the atom in the eXist DB
28        '''
29        logging.info("Validating atom, '%s'" %atomPath)
30        logging.info("- retrive atom from DB...")
31        atomString = self.validator._eXist.getEXistFile(atomPath)
32        atom = Atom(xmlString=str(atomString))
33
34        # create an entry in the errors dict for the atom
35        if self._errors.has_key(atom.atomID):
36            raise ValueError("Atom with duplicate ID (%s) encountered - this needs to be fixed in the DB" %atomID)
37       
38        self.validator.setAtom(atom)
39        self.validator.validateAtom()
40        if self.validator.errors:
41            self._errors[atom.atomID] = self.validator.errors
42
43        logging.info("Atom validation completed")
44       
45        # remove the error dict entry if no errors receieved
46        if self._errors[atom.atomID]:
47            logging.info("- atom is invalid")
48        else:
49            logging.info("- atom is valid")
50
51       
52    def __displayErrors(self):
53        '''
54        Display any errors caught during validation
55        '''
56        # set up an additional logger to output results to file
57        hdlr = logging.FileHandler('errors.log', 'w')
58        formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s')
59        hdlr.setFormatter(formatter)
60        hdlr.setLevel(logging.INFO)
61        logging.getLogger('').addHandler(hdlr)
62               
63        logging.info("The following errors were encountered when validating the atoms:")
64        for atomID, errorDict in self._errors.items():
65            if not errorDict:
66                continue
67            logging.info('')
68            logging.info(" atom ID '%s'" %atomID)
69            logging.info("--------------------------------------")
70            for errors in errorDict.values():
71                for error in errors:
72                    logging.info(error)
73            logging.info("--------------------------------------")
74
75
76    def _setUpOptions(self):
77        '''
78        Determine the logging level to use and configure this appropriately
79        '''
80        try:
81            opts, args = getopt.getopt(sys.argv[1:], "vd")
82        except getopt.GetoptError, err:
83            # print help information and exit:
84            print str(err) # will print something like "option -a not recognized"
85           
86        loggingLevel = logging.WARNING
87        for o, a in opts:
88            if o == "-v":
89                print " - Verbose mode ON"
90                loggingLevel = logging.INFO
91            elif o == "-d":
92                print " - Debug mode ON"
93                loggingLevel = logging.DEBUG
94       
95        print self.LINE_SEPARATOR
96        logging.basicConfig(level=loggingLevel,
97                        format='%(asctime)s %(filename)s:%(lineno)d %(levelname)s %(message)s')
98
99
100    def usage(self):
101        '''
102        Display input params for the script
103        '''
104        print "Usage: python eXistAtomValidator.py [OPTION]"
105        print " - where options are:"
106        print " -v - verbose mode for output logging"
107        print " -d - debug mode for output logging"
108        print "\neXist DB details should be stored in a config file called, '%s'" %self.DBCONFIG_FILE
109        sys.exit(2)
110
111       
112    def __init__(self):
113        '''
114        Main entry point for script
115        '''
116        print self.LINE_SEPARATOR
117        print "RUNNING: eXistAtomValidator.py"
118       
119        self._setUpOptions()
120
121        self.validator = AtomValidator(None, dbConfigFile = self.DBCONFIG_FILE, 
122                                       loadAllCollections=True,
123                                       raiseException = False)
124       
125        # setup the dictionary to store errors
126        self._errors = {}
127       
128       
129    def validateCollection(self):
130        # NB, we've loaded all the atom collection data so now step through this
131        for atom, collection in self.validator._eXist.collections.items():
132            if collection.find('Published') > -1:
133                self.__validateAtom(collection + '/' + atom + '.atom')
134       
135        if self._errors:
136            self.__displayErrors()
137        logging.info("eXistAtomValidator processing complete")
138       
139   
140if __name__=="__main__":
141    opts, args = getopt.getopt(sys.argv[1:], '-vd')
142    validator = eXistAtomValidator()
143    validator.validateCollection()
Note: See TracBrowser for help on using the repository browser.