Changeset 8000
- Timestamp:
- 02/12/11 14:47:11 (9 years ago)
- Location:
- TI01-discovery-Ingest/trunk/v4.3.0/ingestAutomation-upgrade/OAIBatch
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
TI01-discovery-Ingest/trunk/v4.3.0/ingestAutomation-upgrade/OAIBatch/PostgresDAO.py
r7889 r8000 122 122 123 123 self.getRecordID() 124 returnCode=0 # 0 if failed, 1 if update, 2 if create 125 126 if self._record.db_id: 127 if self.updateRecord(): 128 returnCode = 1 129 else: 124 returnCode=None # 0 if failed, 1 if update, 2 if create 125 msg = None 126 127 try: 128 if self._record.db_id: 129 if self.updateRecord(): 130 returnCode = 1 131 else: 130 132 #create the record! 131 if self.createRecord(): 132 returnCode = 2 133 if self.createRecord(): 134 returnCode = 2 135 136 except Exception, error: 137 138 returnCode = 0 139 140 if (type(error).__name__ == 'UnicodeEncodeError') or (type(error).__name__ == 'UnicodeDecodeError'): 141 #get the position of the dodgy characters & message 142 convertMsg = 'Error with character encoding in this field (%s): %s'%(type(error).__name__,error) 143 logging.warn(convertMsg) 144 msg = convertMsg 145 146 else: 147 logging.error("Error detected with parsing and inserting record into the database") 148 msg = "Possible error with sql content (bad characters?)" 133 149 134 return returnCode 150 return returnCode, msg 135 151 136 152 … … 514 530 #Get rid of any xml comment characters - causing problems 515 531 originalXMLdoc = re.sub("<!--.*-->","",self._record.originalFormat) 516 517 532 518 533 sqlCmd = "SELECT create_document('" + self._record.shortFilename + "', '" + \ 519 534 self._record.discovery_id + "', '" + self.dpwsID + "', '" + self._record.docType + "', '" + \ … … 522 537 self._record.dataset_name + "', '" + self._record.datacentre_name + "', '" + \ 523 538 self._record.dataset_lastEdit + "', '" + self._record.datasetStartNom + "', '" + self._record.datasetEndNom + "' );" 524 525 539 526 540 #sort out any nulls.. -
TI01-discovery-Ingest/trunk/v4.3.0/ingestAutomation-upgrade/OAIBatch/abstractdocumentingester.py
r7980 r8000 24 24 from SchematronValidate import SchematronValidate as Schematron 25 25 26 class SqlException(Exception):pass 27 26 28 class AbstractDocumentIngester(object): 27 29 ''' … … 209 211 210 212 try: 211 213 #import pdb 214 #pdb.set_trace() 212 215 #if record is valid (vital elements checked in ExtractISO - validDoc is True if everything ok.. 213 216 … … 217 220 218 221 if self.isoDataModel.validDoc is True: 219 222 220 223 #record whats attempting to be ingested 221 224 record = PostgresRecord(filename, self._NDG_dataProvider,self._datacentre_groups, self._datacentre_namespace, … … 233 236 # Finally, write the new record 234 237 # 0 if failed, 1 if update, 2 if create 235 returnCode = dao.createOrUpdateRecord() 238 returnCode, msg = dao.createOrUpdateRecord() 239 240 if returnCode == 0: 241 logging.error("Detected problem on update: msg") 242 243 #most likely problem is a character encoding problem.. to get around this (& t be sure) sort the encoding of filename to be on the safe side: 244 datasetSafeName,cnvMsg = record.characterEncoding(self.isoDataModel.datasetName[0][0]) 245 246 self.ingestProblemMessage= "%s (Content problems: %s)" %(datasetSafeName,msg) 247 #import pdb; pdb.set_trace() 248 raise ValueError,self.ingestProblemMessage 249 #raise SqlException() 236 250 237 251 if dao.processingWarning != '': … … 252 266 raise ValueError,self.ingestProblemMessage 253 267 254 except: 255 268 except Exception: 256 269 257 270 #if error encountered, add to failure lisr … … 259 272 originalRecordFilename = self.inputFileOrigFinal[shortFilename] 260 273 self.updateFailList.append(originalRecordFilename) 274 261 275 262 276 logging.error("Exception thrown - detail: ") … … 274 288 logging.error(sys.exc_info()) 275 289 logging.error("NB, this record will need to be cleared manually from DB to ensure all relevant data is ingested") 276 290 #import pdb; 291 #pdb.set_trace() 277 292 self._no_problem_files += 1 278 293 … … 834 849 if self.ingestProcessID is not None and self.procID is None: 835 850 sqlStatusCmd = "select update_ingest_info (%s, ' (ingesting %s of %s)');" %(self.ingestProcessID,counter,numfilesproc) 851 836 852 self.pgc_IngestLog.runSQLCommand(sqlStatusCmd) 837 853 … … 843 859 #record all problem messages 844 860 #ingestProbMsg = ingestProbMsg + self.ingestProblemMessage + "\n" 861 845 862 if self.ingestProblemMessage != '': 846 ingestProbMsg = ingestProbMsg + self.ingestProblemMessage 847 self.problemMessageList[self.originalsDiscoveryFilesMap[filename]] = self.ingestProblemMessage 863 ingestProbMsg = ingestProbMsg + self.ingestProblemMessage 864 #Need to record datasetname here as well as the filename - filenames are usually useless (i.e. CSW harvest) 865 #key = '(%s(%s)' %(self.originalsDiscoveryFilesMap[filename], self.isoDataModel.datasetName[0][0]) 866 key = self.originalsDiscoveryFilesMap[filename] 867 self.problemMessageList[key] = self.ingestProblemMessage 848 868 849 869 else: … … 922 942 if len(self.updateFailList) > 0: 923 943 message = message + "\nBelow are comments recorded on problem files...\n\n" 924 925 for badFile in self. updateFailList:944 945 for badFile in self.problemMessageList.keys(): #self.updateFailList: 926 946 #recOpFile.write("PROBLEM_FILE " + badFile + "\n") 927 947 928 948 #NOTE: there may not be a problem message for every file that failed to ingest.. 929 if badFile in self.problemMessageList.keys():930 931 message = message + self.problemMessageList[badFile] + "\n"949 #if badFile in self.problemMessageList.keys(): 950 message = message +"\nPROBLEM_FILE " + badFile + "\n" 951 message = message + self.problemMessageList[badFile] + "\n\n" 932 952 933 953 … … 952 972 if self._no_problem_files > 0: 953 973 logging.info("There are %s problem files - recording these in stats logging db" %self._no_problem_files) 974 954 975 955 976 #note sometimes message will not always be recorded, so just record name of file … … 971 992 972 993 #if process id has been supplied, then update the logging db with a "start_ingest" (only for DPWS ops) 994 #import pdb; pdb.set_trace() 973 995 if self.procID is None: 974 996 sqlStatusCmd = "select update_ingest_status (%s, 'end_ingest');" %self.ingestProcessID
Note: See TracChangeset
for help on using the changeset viewer.