Changeset 3912 for TI01-discovery
- Timestamp:
- 19/05/08 17:21:46 (13 years ago)
- Location:
- TI01-discovery/branches/ingestAutomation-upgrade/OAIBatch
- Files:
-
- 4 edited
Legend:
- Unmodified
- Added
- Removed
-
TI01-discovery/branches/ingestAutomation-upgrade/OAIBatch/FileUtilities.py
r3844 r3912 24 24 f.close() 25 25 except: 26 sys.exit("ERROR: Problem encountered when creating file, %s" %fileName)26 raise SystemError, "ERROR: Problem encountered when creating file, %s" %fileName 27 27 28 28 -
TI01-discovery/branches/ingestAutomation-upgrade/OAIBatch/PostgresRecord.py
r3869 r3912 27 27 @param 28 28 ''' 29 # TODO MDIP transforms do not work very well for lots of files - so currently hiding these 29 30 documentTypes = ['MOLES', 'DIF', 'DC', 'ISO19139']#, 'MDIP'] 30 31 … … 182 183 xqName = "mdip2moles" 183 184 else: 184 sys.exit("ERROR: No XQuery exists to transform input document type, %s, into moles format" \185 %self.docType )185 raise TypeError, "ERROR: No XQuery exists to transform input document type, %s, into moles format" \ 186 %self.docType 186 187 187 188 # now run the appropriate transform and set the attribute … … 215 216 keywordAdder.main(tmpDir, tmpKeywordsDir, self._datacentre_groups) 216 217 217 sys.exit()218 218 # Now load in the converted file 219 219 f=open(tmpKeywordsDir + "/" + tmpFile, 'r') … … 223 223 # Finally, tidy up temp dirs 224 224 self._fileUtils.cleanDir(tmpDir) 225 self._fileUtils.clea rDir(tmpKeywordsDir)225 self._fileUtils.cleanDir(tmpKeywordsDir) 226 226 logging.info("Completed adding keywords") 227 227 -
TI01-discovery/branches/ingestAutomation-upgrade/OAIBatch/molesReadWrite.py
r3910 r3912 192 192 continue 193 193 194 frag = None195 194 if hasattr(self, item): 196 195 if isinstance(self.__dict__[item], molesElement): -
TI01-discovery/branches/ingestAutomation-upgrade/OAIBatch/oai_document_ingester.py
r3862 r3912 41 41 ID=d.id 42 42 else: 43 sys.exit("Only handles DIF or MDIP here.")43 raise TypeError, "Only handles DIF or MDIP here." 44 44 45 45 logging.info("Found identifier: " + ID) … … 58 58 59 59 logging.info("Adding file, " + filename + ", to postgres DB") 60 discoveryID = self.getID(filename)61 60 62 61 # first of all create a PostgresRecord - this object represents all the data required 63 62 # for a DB entry 63 dao = None 64 64 try: 65 discoveryID = self.getID(filename) 66 65 67 record = PostgresRecord(filename, self._NDG_dataProvider, \ 66 68 self._datacentre_groups, self._datacentre_namespace, \ … … 77 79 logging.error(sys.exc_info()) 78 80 81 if dao: 82 logging.info("Removing record and its associated info from DB") 83 logging.info("- to allow clean ingestion on rerun") 84 try: 85 dao.deleteOriginalRecord() 86 except: 87 logging.error("Problem encountered when removing record: ") 88 logging.error(sys.exc_info()) 89 logging.error("NB, this record will need to be cleared manually from DB to ensure all relevant data is ingested") 90 91 self._no_problem_files += 1 92 logging.info("Continue processing other files") 79 93 80 94 … … 165 179 Display input params for the script 166 180 ''' 167 logging.info("Usage: python oai_document_ingester.py [OPTION] <datacentre>")168 logging.info(" - where:\n <datacentre> is the data centre to ingest data from; and options are:")169 logging.info(" -v - verbose mode for output logging")170 logging.info(" -d - debug mode for output logging")181 print "Usage: python oai_document_ingester.py [OPTION] <datacentre>" 182 print " - where:\n <datacentre> is the data centre to ingest data from; and options are:" 183 print " -v - verbose mode for output logging" 184 print " -d - debug mode for output logging" 171 185 sys.exit(2) 172 186 … … 177 191 ''' 178 192 self.lineSeparator = "-----------------------------" 179 logging.info(self.lineSeparator)180 logging.info("RUNNING: oai_document_ingester.py")193 print self.lineSeparator 194 print "RUNNING: oai_document_ingester.py" 181 195 182 196 # check for verbose option … … 185 199 except getopt.GetoptError, err: 186 200 # print help information and exit: 187 logging.info(str(err)) # will print something like "option -a not recognized"201 print str(err) # will print something like "option -a not recognized" 188 202 189 203 loggingLevel = logging.WARNING 190 204 for o, a in opts: 191 205 if o == "-v": 192 logging.info(" - Verbose mode ON")206 print " - Verbose mode ON" 193 207 loggingLevel = logging.INFO 194 208 elif o == "-d": 195 logging.info(" - Debug mode ON")209 print " - Debug mode ON" 196 210 loggingLevel = logging.DEBUG 197 211 212 print self.lineSeparator 198 213 logging.basicConfig(level=loggingLevel, 199 format='%(asctime)s %(filename)s:%(lineno)d %(levelname)s %(message)s') 200 201 logging.info(self.lineSeparator) 214 format='%(asctime)s %(filename)s:%(lineno)d %(levelname)s %(message)s') 202 215 203 216 if datacentre is None: … … 207 220 fileUtils = FileUtilities() 208 221 209 status = 0210 222 numfilesproc = 0 211 223 self._no_files_ingested = 0 212 self._base_dir = os.getcwd() + "/"# this is the base dir that the script is ran from 224 self._no_problem_files = 0 225 self._base_dir = os.getcwd() + "/" # this is the base dir that the script is ran from 213 226 214 227 data_dir = self._base_dir + "data/" + datacentre # dir relating to the specified datacentre docs … … 238 251 239 252 # The directory to put things for a tape backup (should already exist) 240 #backupdir = '/disks/glue1/oaiBackup/' 241 # TODO: uncomment above on live system 242 backupdir = data_dir + "/backups/" 253 backupdir = '/disks/glue1/oaiBackup/' 243 254 244 255 # the following dirs define where the specific documents should go … … 266 277 if filename.endswith('.xml'): 267 278 original_filename = originals_dir + filename 268 ident=self.getID(original_filename) 279 try: 280 ident=self.getID(original_filename) 281 except Exception, detail: 282 logging.error("Could not retrieve ID from file, %s" %filename) 283 logging.error("Detail: %s" %detail) 284 logging.info("Continue with next file") 285 continue 269 286 270 287 if self._NDG_dataProvider: … … 280 297 try: 281 298 SchemaNameSpace(original_filename, new_filename, self._datacentre_format) 282 except: 283 sys.exit("ERROR: SchemaNameSpace failed on file %s" %original_filename) 299 except Exception, detail: 300 logging.error("SchemaNameSpace failed on file %s" %original_filename) 301 logging.error("Detail: %s" %detail) 302 logging.info("Continue with next file") 303 continue 284 304 numfilesproc += 1 285 305 else: … … 309 329 fileUtils.makeBackUp(originals_dir, this_backupdir) 310 330 311 #Clear out the original harvest records area and FINALMOLES331 #Clear out the original harvest records area and discovery dir 312 332 fileUtils.cleanDir(originals_dir) 313 333 fileUtils.cleanDir(discovery_dir) 314 # TODO: uncomment following line when live on system 315 #fileUtils.cleanDir(self._harvest_home) 334 335 logging.info("oai_document_ingest processing complete:") 336 if self._no_problem_files == 0: 337 logging.info("All files successfully processed - cleaning harvest directory") 338 fileUtils.cleanDir(self._harvest_home) 339 else: 340 logging.error("Problems experienced with %s files" %self._no_problem_files) 341 logging.error("- harvest directory will not be cleared until these have been fixed and the script has been reran") 316 342 317 343 logging.info(self.lineSeparator) 318 344 logging.info("INFO: Number of files processed = %s" %numfilesproc) 319 345 logging.info("INFO: Number of files ingested = %s" %self._no_files_ingested) 320 if status == 0: 321 logging.info("INFO: Procedure oai_document_ingester.py completed") 322 else: 323 logging.error("ERROR: Procedure oai_document_ingester.py FAILED with status %s" %status) 324 logging.info(self.lineSeparator) 346 logging.info(self.lineSeparator) 347 print "Script finished running." 325 348 326 349 327 350 if __name__=="__main__": 328 logging.basicConfig(level=logging.INFO,329 format='%(asctime)s %(filename)s:%(lineno)d %(levelname)s %(message)s')330 351 opts, args = getopt.getopt(sys.argv[1:], '-vd') 331 352 if len(args) < 1:
Note: See TracChangeset
for help on using the changeset viewer.