Changeset 3814 for TI01-discovery
- Timestamp:
- 23/04/08 09:52:51 (13 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
TI01-discovery/branches/ingestAutomation-upgrade/OAIBatch/PostgresDAO.py
r3808 r3814 4 4 C Byrom Apr 08 5 5 ''' 6 import sys, os, logging 6 7 import molesReadWrite as MRW 7 import sys8 import os9 8 import db_funcs 10 9 … … 24 23 self._connection = db_connect() 25 24 self._record = record 25 self.id = None 26 26 27 27 … … 32 32 @return: id of record, if it exists, '-1' if it doesn't 33 33 ''' 34 sql = "select id from spatiotemp where id = '" + self._record.id + "';" 35 id = db_funcs.runSQLCommand(self._connection, sql) 36 print "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXKKKK", id 37 self.id = id 38 return id 34 logging.info("Looking up record, " + self._record.discovery_id + " in DB") 35 if self.id is not None & self.id > 0: 36 logging.info("Already looked up record - ID is " + self.id) 37 return self.id 38 39 sql = "SELECT original_document_id FROM ORIGINAL_DOCUMENT where discovery_id = '" + self._record.discovery_id + "';" 40 self.id = db_funcs.runSQLCommand(self._connection, sql) 41 print "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXKKKK", self.id 42 39 43 40 44 def createOrUpdateRecord(self): … … 46 50 else: 47 51 updateRecord() 52 48 53 49 54 def createRecord(self): … … 51 56 Create a record in the postgres DB based on the specified PostgresRecord object 52 57 ''' 58 logging.info("Creating a new record in the DB for the metada document") 53 59 # firstly store the actual documents contained by the record - i.e. the original doc + 54 60 # the various transforms required 61 _insertOriginalRecord() 55 62 _insertMetadataRecords() 56 63 57 64 # Now add the spatiotemporal data 58 65 _insertSpatioTemporalData() 66 logging.info("New record created") 59 67 60 68 … … 63 71 Update a record in the postgres DB based on the specified PostgresRecord object 64 72 ''' 65 # firstly update the actual documents contained by the record - i.e. the original doc + 66 # the various transforms required 67 _updateMetadataRecords(record) 68 69 # Now update the spatiotemporal data 70 _updateSpatioTemporalData(record) 71 72 73 logging.info("Record already existing in DB - performing updates") 74 # firstly, check the document is actually new - i.e. not just a repeat of the same data 75 if _checkIsUpdatedRecord(): 76 77 # firstly update the actual documents contained by the record - i.e. the original doc + 78 # the various transforms required 79 _updateMetadataRecords(record) 80 81 # Now update the spatiotemporal data 82 _updateSpatioTemporalData(record) 83 84 85 def _checkIsUpdatedRecord(self): 86 ''' 87 Looks up an existing record and checks it is not identical to the new record; if it is 88 incremement the harvest_count value and don't process again 89 @return: True if doc contains changes, False otherwise 90 ''' 91 logging.info("Checking the updated document actually contains changes") 92 sql = "SELECT harvest_count FROM ORIGINAL_DOCUMENT where original_document_id = " + self.id + \ 93 " AND original_document = " + self._record.originalFormat + "';" 94 count = db_funcs.runSQLCommand(self._connection, sql) 95 96 # NB, if the document is not identical, the sql command will not find anything 97 if count > 0: 98 logging.info("Ingested document is identical to document currently in DB - incrementing harvest_count") 99 count += 1 100 sql = "UPDATE ORIGINAL_DOCUMENT SET harvest_count = " + count + " WHERE original_document_id = " + self.id 101 count = db_funcs.runSQLCommand(self._connection, sql) 102 return False 103 104 logging.info("Ingested document is different to that in the current DB") 105 return True 106 73 107 74 108 def _insertSpatioTemporalData(): … … 77 111 specified in the PostgresRecord object 78 112 ''' 113 logging.info("Adding spatiotemporal data to DB record") 79 114 # NB, build the query up in two parts so that we only include the correct data 80 115 sqlStart = "" … … 104 139 sqlEnd = ", " + sqlEnd 105 140 sqlCmd = "INSERT INTO spatiotemp (id, original_doc_id" + sqlStart + ") " + sqlEnd + ");" 106 print sqlCmd107 141 db_funcs.runSQLCommand(self._connection, sqlCmd) 142 logging.info("Spatiotemporal data added to DB record") 108 143 109 144 … … 114 149 ''' 115 150 # NB, build the query up in two parts so that we only include the correct data 151 logging.info("Updating spatiotemporal data to DB record") 116 152 sqlStart = "" 117 153 sqlEnd = " WHERE original_doc_id='" + self._record.id + "';" … … 135 171 sqlCmd = "UPDATE spatiotemp SET (" + sqlStart + "update_time= now()) " + sqlEnd + ");" 136 172 137 print sqlCmd138 173 db_funcs.runSQLCommand(self._connection, sqlCmd) 174 logging.info("Spatiotemporal data updated for DB record") 139 175 140 176 … … 143 179 Insert the original metadata doc into the postgres DB 144 180 ''' 145 print "INFO: Inserting new original document in Postgres DB"181 logging.info("Inserting new original document in Postgres DB") 146 182 sqlCmd = "INSERT INTO ORIGINAL_DOCUMENT (original_document_id, original_document_name, original_format, " \ 147 183 "original_document, ts_vector, create_date, harvest_count, scn) VALUES (" \ … … 152 188 self.id = id 153 189 self._record.db_id = id 154 155 190 logging.info("Original document inserted in Postgres DB") 191 156 192 157 193 def _updateOriginalRecord(): … … 159 195 Update the original doc in the postgres DB 160 196 ''' 161 print "INFO: Updating original document in Postgres DB"197 logging.info("Updating original document in Postgres DB") 162 198 sqlCmd = "UPDATE ORIGINAL_DOCUMENT SET (original_document_name = '" + self._record.filename + "', " \ 163 199 "original_format = '" + self._record.originalFormat + "', " \ … … 168 204 169 205 db_funcs.runSQLCommand(self._connection, sqlCmd) 206 logging.info("Original document updated in Postgres DB") 170 207 171 208 … … 174 211 Insert the metadata docs into the postgres DB 175 212 ''' 176 print "INFO: Inserting transformed documents for original document, %s, in Postgres DB", self._record.filename213 logging.info("Inserting transformed documents for original document, %s, in Postgres DB", self._record.filename) 177 214 if self._record.db_id is None: 178 215 print "No DB ID for the original record exists; cannot add associated transformed docs" … … 185 222 "DEFAULT, '" + self._record.db_id + "', '" + \ 186 223 docType + "', '" + doc + "'), current_date, 1);" 187 224 225 db_funcs.runSQLCommand(self._connection, sqlCmd) 226 227 logging.info("Transformed records created in DB") 188 228 189 229 … … 192 232 Update the metadata docs into the postgres DB 193 233 ''' 194 print "INFO: Inserting transformed documents for original document, %s, in Postgres DB", self._record.filename234 logging.info("Updating transformed documents for original document, %s, in Postgres DB", self._record.filename) 195 235 if self._record.db_id is None: 196 236 print "No DB ID for the original record exists; cannot update associated transformed docs" … … 201 241 "', update_date = current_date) WHERE original_record_id = " + \ 202 242 self._record.db_id + " AND transformed_format = '" + docType + "';" 203 243 244 db_funcs.runSQLCommand(self._connection, sqlCmd) 245 246 logging.info("Transformed records updated in DB") 247 204 248 205 249 def setRecord(self, record): … … 209 253 ''' 210 254 self._record = record 211 self.getSpatioTemporalData()212 self.doRecordTransforms()
Note: See TracChangeset
for help on using the changeset viewer.