source: TI01-discovery/branches/ingestAutomation-upgrade/OAIBatch/run_all_ingest.py @ 6188

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/TI01-discovery/branches/ingestAutomation-upgrade/OAIBatch/run_all_ingest.py
Revision 6188, 5.6 KB checked in by sdonegan, 10 years ago (diff)

debug

  • Property svn:executable set to *
Line 
1#!/usr/bin/env python
2"""
3"""
4import os, sys, logging,time,string
5
6lineSeparator = "-----------------------------"
7
8# configure logging
9logging.basicConfig(level=logging.INFO,
10                    format='%(asctime)s %(filename)s:%(lineno)d %(levelname)s %(message)s')
11
12logging.info(lineSeparator)
13logging.info("RUNNING: run_all_ingest.py")
14logging.info(lineSeparator)
15
16from oai_document_ingester import oai_document_ingester
17
18#pick up path info from processing config file
19if not os.path.isfile(configFilePath):
20        sys.exit("ERROR: Could not find the processing config file")
21                   
22processingConfig = {}
23                   
24processing_config_file = open(configFilePath, "r")
25               
26for line in processing_config_file.readlines():
27        words  = string.split(line)
28        if len(words) == 0:
29                continue
30        elif words[0] == 'code_directory':
31                processingConfig['code_directory'] = words[1]                                                           
32        elif words[0] == 'base_directory':
33                processingConfig['base_directory'] = words[1]
34        elif words[0] == 'reporting_directory':
35                processingConfig['reporting_directory'] = words[1]
36        elif words[0] == 'passwords_file':
37                processingConfig['passwords_file'] = words[1]
38        elif words[0] == 'datcentre_configs':
39                processingConfig['datcentre_configs'] = words[1]
40
41
42
43run_counter = 0
44error_counter = 0
45
46#current_dir = os.getcwd() + "/"# this is the base dir that the script is ran from
47#current_dir = '/home/badc/buildouts/oai_document_ingester/ingestAutomation-upgrade/OAIBatch/' # NOTE this is changed to the production buildout directory
48current_dir = processingConfig['code_directory']
49logging.info("Running ingest script for all config files in the current run directory (%s)" %current_dir)
50
51#create file for summary report on all ingests
52#reportingDir = '/home/badc/discovery_docs/ingestDocs/data/' # base dir where production reporting and output should go
53reportingDir = processingConfig['reporting_directory']
54summaryFileName = reportingDir + 'runAllIngestSummary.txt'       
55summaryFile = open(summaryFileName,'w')           
56
57#iterate over all files in the current directory and process any config ones
58config_suffix = '_config.properties'
59filenames = os.listdir(current_dir + 'datacentre_config/')
60
61ingester = oai_document_ingester()
62for filename in filenames:
63        if filename.endswith(config_suffix):
64            logging.info("Found config file: %s" %filename)
65         
66            # get the datacentre from the filename
67            datacentre = filename.replace(config_suffix, '')
68            summaryFile.write("\n=====================================================================\n")
69            summaryFile.write("Ingest report for data centre: " + datacentre + " at " + str(time.asctime()) + "\n")
70                   
71            if datacentre.find('backup') > -1:
72                continue
73           
74            # now invoke the ingest script
75            logging.info("Running the ingest script for datacentre: %s" %datacentre)
76            try:
77                run_counter += 1
78                ingester.processDataCentre(datacentre)
79            except:
80                logging.error("Exception thrown - detail: ")
81                logging.error(sys.exc_info())
82                logging.info("Continue processing next datacentre config file...")
83                error_counter +=1
84
85            #summarise harvest info from report doc now produced by ingester program
86            recOpFileName = reportingDir + datacentre + "_ingestSummary.txt"
87
88            try:
89                ingest_report_file = open(recOpFileName, "r")
90
91                for line in ingest_report_file.readlines():
92                    words  = string.split(line)
93                    if len(words) == 0:
94                        continue
95                    if words[0] == 'TOTAL_PROCESSED':
96                        summaryFile.write("Number of processed files: " + words[1] + "\n")
97                        summaryFile.write("\n")
98                    if words[0] == 'INGESTED_Created':
99                        summaryFile.write("Number of ingested (created) files: " + words[1] + "\n")
100                        summaryFile.write("\n")
101                    if words[0] == 'INGESTED_Updated':
102                        summaryFile.write("Number of ingested (updated) files: " + words[1] + "\n")
103                        summaryFile.write("\n")
104                    if words[0] == 'DELETED':
105                        summaryFile.write("Number of deleted files: " + words[1] + "\n")
106                        summaryFile.write("\n")
107                    if words[0] == 'PROBLEM_FILES':
108                        summaryFile.write("Number of problem files: " + words[1] + "\n")
109                        summaryFile.write("\n")
110                    if words[0] == 'PROBLEM_FILE':
111                        summaryFile.write("Problem file name: " + words[1] + "\n")
112                        summaryFile.write("\n")
113
114                    ingest_report_file.close()
115                   
116                   
117            except:
118                summaryFile.write("Could not extract summary info for " + datacentre + " ingest!!\n")
119           
120summaryFile.close()
121
122if os.path.exists(summaryFileName):
123    commandline = "cat " + summaryFileName + " | mail -s 'TRITON discovery ingest report' steve.donegan@stfc.ac.uk"
124    os.system(commandline)
125
126logging.info("run_all_ingest.py complete - processed %s config files" %run_counter)
127logging.info("ingest report at: "  + summaryFileName)
128
129if error_counter > 0:
130    errorTxt = "WARNING: %s errors were encountered during the run - check logfiles for more details" %error_counter
131    logging.error(errorTxt)
132    commandline = "echo " + errorTxt + " | mail -s 'TRITON discovery ingest report:PROBLEM' steve.donegan@stfc.ac.uk"
133
Note: See TracBrowser for help on using the repository browser.