Changeset 4948


Ignore:
Timestamp:
11/02/09 10:05:08 (11 years ago)
Author:
cbyrom
Message:

Adjust ingest scripts to use the new ndg common client suite + add new
testdata to improve universality of tests.

Location:
TI01-discovery/branches/ingestAutomation-upgrade
Files:
1 added
3 edited
1 copied

Legend:

Unmodified
Added
Removed
  • TI01-discovery/branches/ingestAutomation-upgrade/OAIBatch/abstractdocumentingester.py

    r4854 r4948  
    99from time import strftime 
    1010from SchemaNameSpace import SchemaNameSpace 
    11 from ndg.common.src.lib.ndgXqueries import ndgXqueries 
     11from ndg.common.src.lib.ndgresources import ndgResources 
    1212import ndg.common.src.lib.fileutilities as FileUtilities 
    1313from ndg.common.src.clients.reldb.postgres.postgresclient import PostgresClient as pgc 
     
    329329                - NB, extract the xquery libraries locally for easy reference 
    330330                ''' 
    331                 self._xq=ndgXqueries() 
     331                self._xq = ndgResources() 
    332332                for libFile in self._xq.xqlib: 
    333                         FileUtilities.createFile(libFile, self._xq.xqlib[libFile]) 
     333                        # NB, we don't want the full path to the files - just the filename 
     334                        fileName = libFile.split('/')[-1] 
     335                        FileUtilities.createFile(fileName, self._xq.xqlib[libFile]) 
  • TI01-discovery/branches/ingestAutomation-upgrade/OAIBatch/feeddocumentingester.py

    r4854 r4948  
    88from ndg.common.src.lib.utilities import formatDateYYYYMMDD, YEAR_FORMAT 
    99import ndg.common.src.lib.fileutilities as FileUtilities 
    10 from ndg.common.src.clients.xmldb.eXist.existdbfeedclient import eXistDBFeedClient as feedClient 
     10from ndg.common.src.clients.xmldb.eXist.feedclient import FeedClient as feedClient 
    1111from abstractdocumentingester import AbstractDocumentIngester 
    1212from ndg.common.src.models.vocabtermdata import VocabTermData as VTD 
    1313from ndg.common.src.lib.atomutilities import * 
    14 from ndg.common.src.models.ndgObject import ndgObject as ndgObject 
    15 from ndg.common.src.dal.DocumentRetrieve import DocumentRetrieve as DR 
    16 from ndg.common.src.clients.xmldb.eXist.eXistConnector import eXistConnector as ec 
     14from ndg.common.src.models.ndgObject import ndgObject 
     15from ndg.common.src.clients.xmldb.eXist.searchclient import SearchClient 
     16import ndg.common.src.clients.xmldb.eXist.dbconstants as dc 
    1717 
    1818class FeedDocumentIngester(AbstractDocumentIngester): 
     
    3131        TEMP_DIR_NAME = "feedPollTempDir" 
    3232 
    33         def __getEXistDBConnection(self, eXistDBHostname='chinook.badc.rl.ac.uk'): 
     33        def __getDBConnection(self, dBHostname, configFileName): 
    3434                ''' 
    3535                Get the default DB connection - by reading in data from the db config file 
    3636                - to eXist DB to get feed docs from 
    37                 @keyword eXistDBHostname: the hostname of the eXist DB to use for the ingest 
    38                 - defaults to chinook 
     37                @param eXistDBHostname: the hostname of the eXist DB to use for the ingest 
     38                @param configFileName: name of config file to get DB details from 
    3939                ''' 
    4040                logging.info("Setting up connection to eXist DB") 
    41                 self.dr = DR(eXistDBHostname) 
     41                self.dbClient = SearchClient(dbHostName = dBHostname, 
     42                                                                         configFileName = configFileName) 
    4243                logging.info("Connection to eXist now set up") 
    4344 
     
    102103                self._setupDataCentreDirs() 
    103104                 
    104                 docData = self.dr.get(no.repository,no.schema,no.localID, 
    105                                                           targetCollection=ec.DIF_COLLECTION_PATH, 
    106                                                           includeDocNameData=True) 
    107  
    108                 difFilename, contents = docData.items()[0] 
     105                doc = self.dbClient.getNDGDoc(no.repository, no.schema, no.localID, 
     106                                      targetCollection=dc.DIF_COLLECTION_PATH) 
     107 
    109108                # create a temporary file to allow the ingest to do the various 
    110109                # xquery transforms on 
    111                 difFilename = self.originals_dir + difFilename 
    112                 FileUtilities.createFile(difFilename, contents) 
     110                difFilename = self.originals_dir + no.localID + '.dif' 
     111                FileUtilities.createFile(difFilename, doc) 
    113112 
    114113                self._convertAndIngestFiles(self.originals_dir, self.discovery_dir) 
     
    210209 
    211210 
    212         def __init__(self, interval = None, ingestFromDate = None, \ 
    213                                  eXistDBHostname = 'chinook.badc.rl.ac.uk', eXistPortNo = '8080', 
     211        def __init__(self, interval = None, ingestFromDate = None,  
     212                                 eXistDBHostname = 'chinook.badc.rl.ac.uk',  
     213                                 eXistPortNo = '8080', 
     214                                 configFileName = 'passwords.txt',  
    214215                                 dataCentrePoll = None): 
    215216                ''' 
     
    221222        @keyword eXistDBHostname: name of eXist DB to use 
    222223        @keyword eXistPortNo: Port number that the eXist DB is exposed by - defaults 
     224        @keyword configFileName : password file to use - NB, this should 
     225        have contents of format (NB, space delimiter): 
     226        dbName userID password  
     227        Default is 'passwords.txt'. 
    223228        to '8080' if not set  
    224229        @keyword dataCentrePoll: to specify a datacentre to poll data for - e.g. 
     
    237242                self.eXistDBHostname = eXistDBHostname 
    238243                self.eXistPortNo = eXistPortNo 
     244                self.configFileName = configFileName 
    239245                self.refreshTime = 0. 
    240246                self.entryNumber = 0 
     
    266272                #self.__setupFeedClient() 
    267273                self._getPostgresDBConnection() 
    268                 self.__getEXistDBConnection(eXistDBHostname = self.eXistDBHostname) 
     274                self.__getDBConnection(self.eXistDBHostname, self.configFileName) 
    269275                self._setupXQueries() 
    270276         
  • TI01-discovery/branches/ingestAutomation-upgrade/OAIBatch/testfeeddocumentingester.py

    r4854 r4948  
    77from feeddocumentingester import FeedDocumentIngester 
    88from ndg.common.src.models.vocabtermdata import VocabTermData as VTD 
    9 import ndgutilstests.testconstants as tc 
    10 from ndgutilstests.testutils import testUtils as tu 
     9import ndg.common.unittests.testconstants as tc 
     10from ndg.common.unittests.testutils import testUtils as tu 
    1111 
    1212class testFeedDocumentIngester(unittest.TestCase): 
     
    2626        self.createdAtoms = []    # array to store paths to atoms created - in order to then delete them 
    2727 
    28     def offtestSetPollInterval(self): 
     28    def testSetPollInterval(self): 
    2929        try: 
    3030            self.ingester.setPollInterval(self.VALID_INTERVAL) 
     
    3333            self.fail("Should not have thrown an exception") 
    3434 
    35     def offtestInvalidSetPollInterval(self): 
     35    def testInvalidSetPollInterval(self): 
    3636        try: 
    3737            self.ingester.setPollInterval(self.INVALID_INTERVAL) 
    3838            self.fail("Should have thrown an exception") 
    3939        except Exception, e: 
    40             self.assertTrue(e.message.find('not a float or an int') > -1) 
     40            self.assertTrue(e.message.find('invalid literal for float') > -1) 
    4141 
    42     def offtestSetIngestFromDate(self): 
     42    def testSetIngestFromDate(self): 
    4343        try: 
    4444            self.ingester.setIngestFromDate(self.VALID_DATE) 
     
    4747            self.fail("Should not have thrown an exception") 
    4848 
    49     def offtestInvalidSetIngestFromDate(self): 
     49    def testInvalidSetIngestFromDate(self): 
    5050        try: 
    5151            self.ingester.setIngestFromDate(self.INVALID_DATE) 
     
    5353        except Exception, e: 
    5454            self.assertTrue(e.message.find('not in ISO8601 format') > -1) 
    55  
    56  
    57     def offtestIngestRESTfulAtomDocument(self): 
    58         try: 
    59             # firstly create doc in eXist to ensure there is an entry to ingest 
    60             # NB, for this test to work, we need use a data entity - since this 
    61             # is the only type of document that we'll ingest via the feed 
    62             atom = tu(tc.EXIST_DBCONFIG_FILE).createAtomInEXist(tc.xmlString) 
    63             self.createdAtoms.append(atom.getFullPath()) 
    64  
    65             # ensure a feed client is available 
    66             self.ingester.setupPoll() 
    67  
    68             self.ingester.ingestRESTfulAtomDocument(atom.getFullPath()) 
    69         except Exception, e: 
    70             print e.message 
    71             self.fail("Should not have thrown an exception") 
    7255 
    7356 
     
    7962            atom = tu(tc.EXIST_DBCONFIG_FILE).createAtomInEXist(tc.xmlString) 
    8063            self.createdAtoms.append(atom.getFullPath()) 
    81             import pdb 
    82             pdb.set_trace() 
    8364 
    8465            # ensure a feed client is available 
     
    9576        for path in self.createdAtoms: 
    9677            # delete the test file, in case it has been used in a test 
    97             self.utils.deleteAtomInExist(path) 
     78            self.utils.deleteDoc(path) 
    9879 
    9980if __name__ == '__main__': 
Note: See TracChangeset for help on using the changeset viewer.