source: TI01-discovery/branches/ingestAutomation-upgrade/OAIBatch/run_all_ingest.py @ 6154

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/TI01-discovery/branches/ingestAutomation-upgrade/OAIBatch/run_all_ingest.py@6154
Revision 6154, 4.7 KB checked in by sdonegan, 10 years ago (diff)

all bugs in controller script seem to be fixed...

  • Property svn:executable set to *
RevLine 
[3839]1#!/usr/bin/env python
2"""
3"""
[4711]4import os, sys, logging,time,string
[5243]5
[6092]6lineSeparator = "-----------------------------"
7
[5243]8# configure logging
9logging.basicConfig(level=logging.INFO,
10                    format='%(asctime)s %(filename)s:%(lineno)d %(levelname)s %(message)s')
[6092]11
[5243]12logging.info(lineSeparator)
13logging.info("RUNNING: run_all_ingest.py")
14logging.info(lineSeparator)
15
[3861]16from oai_document_ingester import oai_document_ingester
[3839]17
[4711]18
[3839]19run_counter = 0
[3861]20error_counter = 0
[3839]21
[6128]22#current_dir = os.getcwd() + "/"# this is the base dir that the script is ran from
23current_dir = '/home/badc/buildouts/oai_document_ingester/ingestAutomation-upgrade/OAIBatch/' # NOTE this is changed to the production buildout directory
[3839]24logging.info("Running ingest script for all config files in the current run directory (%s)" %current_dir)
25
[4711]26#create file for summary report on all ingests
[6154]27reportingDir = '/home/badc/discovery_docs/ingestDocs/data/' # base dir where production reporting and output should go
[6128]28summaryFileName = reportingDir + 'runAllIngestSummary.txt'       
[4711]29summaryFile = open(summaryFileName,'w')           
[3839]30
[4711]31#iterate over all files in the current directory and process any config ones
[3839]32config_suffix = '_config.properties'
33filenames = os.listdir(current_dir + 'datacentre_config/')
[4711]34
[4980]35ingester = oai_document_ingester()
[3839]36for filename in filenames:
37        if filename.endswith(config_suffix):
38            logging.info("Found config file: %s" %filename)
[4711]39         
[3839]40            # get the datacentre from the filename
41            datacentre = filename.replace(config_suffix, '')
[4711]42            summaryFile.write("\n=====================================================================\n")
43            summaryFile.write("Ingest report for data centre: " + datacentre + " at " + str(time.asctime()) + "\n")
44                   
[3839]45            if datacentre.find('backup') > -1:
46                continue
47           
48            # now invoke the ingest script
49            logging.info("Running the ingest script for datacentre: %s" %datacentre)
[3861]50            try:
51                run_counter += 1
[6154]52                ingester.processDataCentre(datacentre)
[3861]53            except:
[3869]54                logging.error("Exception thrown - detail: ")
55                logging.error(sys.exc_info())
56                logging.info("Continue processing next datacentre config file...")
[3861]57                error_counter +=1
[6093]58
59            #summarise harvest info from report doc now produced by ingester program
[6128]60            recOpFileName = reportingDir + datacentre + "_ingestSummary.txt"
[6093]61
62            try:
63                ingest_report_file = open(recOpFileName, "r")
64
65                for line in ingest_report_file.readlines():
66                    words  = string.split(line)
67                    if len(words) == 0:
68                        continue
[6100]69                    if words[0] == 'TOTAL_PROCESSED':
[6093]70                        summaryFile.write("Number of processed files: " + words[1] + "\n")
[6154]71                        summaryFile.write("\n")
[6100]72                    if words[0] == 'INGESTED_Created':
73                        summaryFile.write("Number of ingested (created) files: " + words[1] + "\n")
[6154]74                        summaryFile.write("\n")
[6100]75                    if words[0] == 'INGESTED_Updated':
76                        summaryFile.write("Number of ingested (updated) files: " + words[1] + "\n")
[6154]77                        summaryFile.write("\n")
[6100]78                    if words[0] == 'DELETED':
79                        summaryFile.write("Number of deleted files: " + words[1] + "\n")
[6154]80                        summaryFile.write("\n")
[6100]81                    if words[0] == 'PROBLEM_FILES':
[6093]82                        summaryFile.write("Number of problem files: " + words[1] + "\n")
[6154]83                        summaryFile.write("\n")
[6093]84                    if words[0] == 'PROBLEM_FILE':
85                        summaryFile.write("Problem file name: " + words[1] + "\n")
[6154]86                        summaryFile.write("\n")
[6093]87
88                    ingest_report_file.close()
[6100]89                   
90                   
[6093]91            except:
92                summaryFile.write("Could not extract summary info for " + datacentre + " ingest!!\n")
[4711]93           
94summaryFile.close()
95
96if os.path.exists(summaryFileName):
[6091]97    commandline = "cat " + summaryFileName + " | mail -s 'TRITON discovery ingest report' steve.donegan@stfc.ac.uk"
[4711]98    os.system(commandline)
99
[3862]100logging.info("run_all_ingest.py complete - processed %s config files" %run_counter)
[4711]101logging.info("ingest report at: "  + summaryFileName)
102
[6091]103if error_counter > 0:
104    errorTxt = "WARNING: %s errors were encountered during the run - check logfiles for more details" %error_counter
105    logging.error(errorTxt)
106    commandline = "echo " + errorTxt + " | mail -s 'TRITON discovery ingest report:PROBLEM' steve.donegan@stfc.ac.uk"
[3839]107
Note: See TracBrowser for help on using the repository browser.