source: TI01-discovery/tags/stable-TI01-ingestAutomation_Proglue_upgradesAndReporting/temp/OAIBatch/run_all_ingest.py @ 4888

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/TI01-discovery/tags/stable-TI01-ingestAutomation_Proglue_upgradesAndReporting/temp/OAIBatch/run_all_ingest.py@4888
Revision 4888, 3.8 KB checked in by sdonegan, 12 years ago (diff)

Create tagged release of stable developed version for ingestion and ingestion reporting. Note - this is pre- Calums restructuring of ndgUtils and update to use atom feed. Will need to merge the two at some stage.

  • Property svn:executable set to *
Line 
1#!/usr/bin/env python
2"""
3"""
4import os, sys, logging,time,string
5from oai_document_ingester import oai_document_ingester
6
7
8run_counter = 0
9error_counter = 0
10
11lineSeparator = "-----------------------------"
12
13# configure logging
14logging.basicConfig(level=logging.INFO,
15                    format='%(asctime)s %(filename)s:%(lineno)d %(levelname)s %(message)s')
16logging.info(lineSeparator)
17logging.info("RUNNING: run_all_ingest.py")
18logging.info(lineSeparator)
19
20
21current_dir = os.getcwd() + "/"# this is the base dir that the script is ran from
22logging.info("Running ingest script for all config files in the current run directory (%s)" %current_dir)
23
24#create file for summary report on all ingests
25summaryFileName = "data/runAllIngestSummary.txt"       
26summaryFile = open(summaryFileName,'w')           
27
28#iterate over all files in the current directory and process any config ones
29config_suffix = '_config.properties'
30filenames = os.listdir(current_dir + 'datacentre_config/')
31
32for filename in filenames:
33        if filename.endswith(config_suffix):
34            logging.info("Found config file: %s" %filename)
35         
36            # get the datacentre from the filename
37            datacentre = filename.replace(config_suffix, '')
38            summaryFile.write("\n=====================================================================\n")
39            summaryFile.write("Ingest report for data centre: " + datacentre + " at " + str(time.asctime()) + "\n")
40                   
41            if datacentre.find('backup') > -1:
42                continue
43           
44            # now invoke the ingest script
45            logging.info("Running the ingest script for datacentre: %s" %datacentre)
46            try:
47                run_counter += 1
48                oai_document_ingester(datacentre)
49           
50           
51            except:
52                logging.error("Exception thrown - detail: ")
53                logging.error(sys.exc_info())
54                logging.info("Continue processing next datacentre config file...")
55                error_counter +=1
56               
57            #summarise harvest info from report doc now produced by ingester program
58            recOpFileName = "data/" + datacentre + "_ingestSummary.txt"
59           
60            try:
61                ingest_report_file = open(recOpFileName, "r")
62                                     
63                for line in ingest_report_file.readlines():                   
64                    words  = string.split(line)                                   
65                    if len(words) == 0:
66                        continue
67                    if words[0] == 'PROCESSED': 
68                        summaryFile.write("Number of processed files: " + words[1] + "\n")
69                    if words[0] == 'INGESTED':
70                        summaryFile.write("Number of ingested files: " + words[1] + "\n")
71                    if words[0] == 'PROBLEM_NUM':
72                        summaryFile.write("Number of problem files: " + words[1] + "\n")
73                    if words[0] == 'PROBLEM_FILE':
74                        summaryFile.write("Problem file name: " + words[1] + "\n")
75                                         
76                    ingest_report_file.close()
77                   
78            except:
79                summaryFile.write("Could not extract summary info for " + datacentre + " ingest!!\n")
80           
81           
82           
83summaryFile.close()
84
85if os.path.exists(summaryFileName):
86    commandline = "cat " + summaryFileName + " | mail -s 'ignis discovery ingest report' s.j.donegan@rl.ac.uk"
87    os.system(commandline)
88
89logging.info("run_all_ingest.py complete - processed %s config files" %run_counter)
90logging.info("ingest report at: "  + summaryFileName)
91
92if error_counter > 0:           
93    logging.error("WARNING: %s errors were encountered during the run - check logfiles for more details" %error_counter)
94
Note: See TracBrowser for help on using the repository browser.