source: exist/trunk/python/ndgUtils/lib/existatomvalidator.py @ 4555

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/exist/trunk/python/ndgUtils/lib/existatomvalidator.py@4555
Revision 4555, 5.0 KB checked in by cbyrom, 11 years ago (diff)

Move existbdclient to lib package + extend to make use of DocumentRetrieve? to allow retrieval of atoms by ID + fix handling of
authors vs contributors when doing Atom to XML exports.

  • Property svn:executable set to *
Line 
1#!/usr/bin/env python
2'''
3 Command line tool to validate the
4 atoms in a specified eXist DB
5 
6 @author: C Byrom, Tessella Nov 2008
7'''
8import os, sys, getopt, logging
9import ndgUtils.lib.existdbclient as edc
10from ndgUtils.vocabtermdata import VocabTermData as VTD
11from ndgUtils.models.Atom import Atom
12from ndgUtils.lib.atomvalidator import AtomValidator
13from ndgUtils.ndgXqueries import ndgXqueries
14
15   
16class eXistAtomValidator:
17    '''
18    Command line tool for checking links in an eXist atom collection
19    '''
20    # config file with eXist DB details
21    DBCONFIG_FILE = "passwords.txt"
22
23    # standard output delimiter
24    LINE_SEPARATOR = "-----------------------------"
25   
26    # constants to use as error dict keys
27    BROKEN_LINKS = 1
28    INVALID_VOCAB_TERM = 2
29    SCHEMA_VALIDATION_FAILURE = 4
30
31    VALID_RELS = ["self", "related"]
32   
33
34    def __validateAtom(self, atomPath):
35        '''
36        Retrieve an atom from the specified path and validate the contents
37        @param atomPath: path to the atom in the eXist DB
38        '''
39        logging.info("Validating atom, '%s'" %atomPath)
40        logging.info("- retrive atom from DB...")
41        atomString = self.validator._eXist.getEXistFile(atomPath)
42        atom = Atom(xmlString=str(atomString))
43
44        # create an entry in the errors dict for the atom
45        if self._errors.has_key(atom.atomID):
46            raise ValueError("Atom with duplicate ID (%s) encountered - this needs to be fixed in the DB" %atomID)
47       
48        self.validator.setAtom(atom)
49        self.validator.validateAtom()
50        if self.validator.errors:
51            self._errors[atom.atomID] = self.validator.errors
52
53        logging.info("Atom validation completed")
54       
55        # remove the error dict entry if no errors receieved
56        if self._errors[atom.atomID]:
57            logging.info("- atom is invalid")
58        else:
59            logging.info("- atom is valid")
60
61       
62    def __displayErrors(self):
63        '''
64        Display any errors caught during validation
65        '''
66        # set up an additional logger to output results to file
67        hdlr = logging.FileHandler('errors.log', 'w')
68        formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s')
69        hdlr.setFormatter(formatter)
70        hdlr.setLevel(logging.INFO)
71        logging.getLogger('').addHandler(hdlr)
72               
73        logging.info("The following errors were encountered when validating the atoms:")
74        for atomID, errorDict in self._errors.items():
75            if not errorDict:
76                continue
77            logging.info('')
78            logging.info(" atom ID '%s'" %atomID)
79            logging.info("--------------------------------------")
80            for errors in errorDict.values():
81                for error in errors:
82                    logging.info(error)
83            logging.info("--------------------------------------")
84
85
86
87    def _setUpOptions(self):
88        '''
89        Determine the logging level to use and configure this appropriately
90        '''
91        try:
92            opts, args = getopt.getopt(sys.argv[1:], "vd")
93        except getopt.GetoptError, err:
94            # print help information and exit:
95            print str(err) # will print something like "option -a not recognized"
96           
97        loggingLevel = logging.WARNING
98        for o, a in opts:
99            if o == "-v":
100                print " - Verbose mode ON"
101                loggingLevel = logging.INFO
102            elif o == "-d":
103                print " - Debug mode ON"
104                loggingLevel = logging.DEBUG
105       
106        print self.LINE_SEPARATOR
107        logging.basicConfig(level=loggingLevel,
108                        format='%(asctime)s %(filename)s:%(lineno)d %(levelname)s %(message)s')
109
110
111    def usage(self):
112        '''
113        Display input params for the script
114        '''
115        print "Usage: python eXistAtomValidator.py [OPTION]"
116        print " - where options are:"
117        print " -v - verbose mode for output logging"
118        print " -d - debug mode for output logging"
119        print "\neXist DB details should be stored in a config file called, '%s'" %self.DBCONFIG_FILE
120        sys.exit(2)
121
122       
123    def __init__(self):
124        '''
125        Main entry point for script
126        '''
127        print self.LINE_SEPARATOR
128        print "RUNNING: eXistAtomValidator.py"
129       
130        self._setUpOptions()
131
132        self.validator = AtomValidator(None, self.DBCONFIG_FILE, loadAllCollections=True)
133       
134        # setup the dictionary to store errors
135        self._errors = {}
136        # NB, we've loaded all the atom collection data so now step through this
137        for atom, collection in self.validator._eXist.collections.items():
138            if collection.find('Published') > -1:
139                self.__validateAtom(collection + '/' + atom + '.atom')
140       
141        if self._errors:
142            self.__displayErrors()
143        logging.info("eXistAtomValidator processing complete")
144       
145   
146if __name__=="__main__":
147    opts, args = getopt.getopt(sys.argv[1:], '-vd')
148    eXistAtomValidator()
149   
Note: See TracBrowser for help on using the repository browser.