Ignore:
Timestamp:
20/01/09 16:33:33 (11 years ago)
Author:
cbyrom
Message:

Add new ingest script - to allow ingest of DIF docs from eXist hosted
atom feed. NB, this required restructure of original OAI harvester
to allow re-use of shared code - by abstracting this out into new class,
absstractdocumentingester.

Add new documentation and tidy up codebase removing dependencies where possible to simplify things.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • TI01-discovery/branches/ingestAutomation-upgrade/OAIBatch/PostgresDAO.py

    r4224 r4854  
    55''' 
    66import sys, os, logging 
    7 import db_funcs 
    87from SpatioTemporalData import * 
     8from ndg.common.src.clients.reldb.postgres.postgresclient import PostgresClient as pgc 
    99 
    1010class PostgresDAO: 
    1111     
    12     def __init__(self, record, connection=None): 
     12    def __init__(self, record, pgClient = None): 
    1313        ''' 
    1414        Constructor - to initialise the DAL and do some initial setting up 
    15         @param record: the PostgresRecord object to add or update in the DB  
     15        @param record: the PostgresRecord object to add or update in the DB 
     16        @keyword pgClient: a postgresClient object to use to connect to postgres 
     17        - NB, if not set, will try to set one up by looking for a pg.config file 
     18        locally  
    1619        ''' 
    1720        if record == "": 
     
    2124 
    2225        # setup a connection to the db - if none specified 
    23         if connection is None: 
    24             connection = db_funcs.db_connect() 
    25         self._connection = connection 
     26        if pgClient is None: 
     27            self.pgc = pgc(configFile, pgc.DEFAULT_CONFIG_FILE) 
     28        else: 
     29            self.pgc = pgClient 
     30 
    2631        self._record = record 
    2732         
     
    4045        sql = "SELECT original_document_id FROM ORIGINAL_DOCUMENT where discovery_id = '" + \ 
    4146            self._record.discovery_id + "';" 
    42         dbId = db_funcs.runSQLCommand(self._connection, sql) 
     47        dbId = self.pgc.runSQLCommand(sql) 
    4348        if dbId: 
    4449            self._record.db_id = dbId[0][0] 
     
    113118        sql = "SELECT harvest_count, scn FROM ORIGINAL_DOCUMENT where original_document_id = " + \ 
    114119            str(self._record.db_id) + " AND original_document = '" + self._record.originalFormat + "';" 
    115         results = db_funcs.runSQLCommand(self._connection, sql) 
     120        results = self.pgc.runSQLCommand(sql) 
    116121 
    117122        # NB, if the document is not identical, the sql command will not find anything 
     
    121126            sql = "SELECT scn FROM ORIGINAL_DOCUMENT where original_document_id = " + \ 
    122127                    str(self._record.db_id) + ";" 
    123             results = db_funcs.runSQLCommand(self._connection, sql) 
     128            results = self.pgc.runSQLCommand(sql) 
    124129            self._record.scn = results[0][0] 
    125130            return True 
     
    135140        sql = "UPDATE ORIGINAL_DOCUMENT SET harvest_count = " + str(count) + \ 
    136141            " WHERE original_document_id = " + str(self._record.db_id) 
    137         db_funcs.runSQLCommand(self._connection, sql) 
     142        self.pgc.runSQLCommand(sql) 
    138143        return False 
    139144 
     
    149154            str(self._record.db_id) + ";"       
    150155 
    151         db_funcs.runSQLCommand(self._connection, sqlCmd) 
     156        self.pgc.runSQLCommand(sqlCmd) 
    152157        logging.info("Spatiotemporal data deleted successfully") 
    153158 
     
    164169        sqlCmd = "SELECT add_spatiotemporal_row('" + str(self._record.db_id) + "', '" + \ 
    165170            str(coords.north) + "', '" + str(coords.south) + "', '" + str(coords.west) + "', '" + \ 
    166             str(coords.east) + "', " 
    167              
    168              
    169          
    170         # cope with null dates appropriately 
    171         if timeRange.start == "null": 
    172             sqlCmd += timeRange.start + ", " 
    173         else: 
    174             sqlCmd += "'" + timeRange.start + "', " 
    175              
    176         if timeRange.end == "null": 
    177             sqlCmd += timeRange.end 
    178         else: 
    179             sqlCmd += "'" + timeRange.end + "'" 
    180          
    181         sqlCmd += ");"       
    182  
    183         db_funcs.runSQLCommand(self._connection, sqlCmd) 
     171            str(coords.east) + "', '" + timeRange.start + "', '" + timeRange.end + "');" 
     172             
     173        # fix any null strings 
     174        sqlCmd = sqlCmd.replace("'null'", "null") 
     175 
     176        self.pgc.runSQLCommand(sqlCmd) 
    184177        logging.info("Spatiotemporal row added successfully") 
    185178         
     
    243236            self._record.getParametersInfo() + "', '" + self._record.getScopeInfo() + "');"  
    244237 
    245         id = db_funcs.runSQLCommand(self._connection, sqlCmd) 
     238        id = self.pgc.runSQLCommand(sqlCmd) 
    246239        if len(id) == 0: 
    247240            raise ValueError, 'No PK ID returned from INSERT to DB' 
     
    258251        sqlCmd = "SELECT delete_document('" + str(self._record.db_id) + "');"  
    259252 
    260         db_funcs.runSQLCommand(self._connection, sqlCmd) 
     253        self.pgc.runSQLCommand(sqlCmd) 
    261254        logging.info("Original document deleted from Postgres DB") 
    262255         
     
    272265            self._record.originalFormat + "', '" + self._record.getAuthorsInfo() + "', '" + \ 
    273266            self._record.getParametersInfo() + "', '" + self._record.getScopeInfo() + "', '" + str(self._record.scn) + "');" 
    274         db_funcs.runSQLCommand(self._connection, sqlCmd) 
     267        self.pgc.runSQLCommand(sqlCmd) 
    275268         
    276269        # increment scn - saves doing an additional db call 
     
    295288                docType + "', '" + doc + "', current_timestamp, 1);" 
    296289             
    297             db_funcs.runSQLCommand(self._connection, sqlCmd) 
     290            self.pgc.runSQLCommand(sqlCmd) 
    298291         
    299292        logging.info("Transformed records created in DB") 
     
    314307                str(self._record.db_id) + " AND transformed_format = '" + docType + "';" 
    315308 
    316             db_funcs.runSQLCommand(self._connection, sqlCmd) 
     309            self.pgc.runSQLCommand(sqlCmd) 
    317310     
    318311        logging.info("Transformed records updated in DB") 
Note: See TracChangeset for help on using the changeset viewer.