Changeset 5040


Ignore:
Timestamp:
26/02/09 11:55:50 (11 years ago)
Author:
sdonegan
Message:

Debug new ingest classes - previous commit had problems with mdip records.

Location:
TI01-discovery/tags/stable-TI01-ingestAutomation_Proglue_upgradesAndReporting/temp/OAIBatch
Files:
7 edited

Legend:

Unmodified
Added
Removed
  • TI01-discovery/tags/stable-TI01-ingestAutomation_Proglue_upgradesAndReporting/temp/OAIBatch/DIF.py

    r5027 r5040  
    8181            self.datasetTitle=helper.getText(self.tree,'Data_Set_Citation/Dataset_Title') 
    8282         
    83         logging.info('EXTRA INFORMATION for ORDERING= dataset title:  ' + self.datasetTitle + '  dataset creation date: ' + self.metadataCreationDate + '  datacentre name: ' + self.datacentreName) 
     83        #logging.info('EXTRA INFORMATION for ORDERING= dataset title:  ' + self.datasetTitle + '  dataset creation date: ' + self.metadataCreationDate + '  datacentre name: ' + self.datacentreName) 
    8484         
    8585        #Note that entity.constraints.html is about access control on the metadata, 
  • TI01-discovery/tags/stable-TI01-ingestAutomation_Proglue_upgradesAndReporting/temp/OAIBatch/DatasetBasicParameters.py

    r5028 r5040  
    1919        logging.info("Retrieving identifier for metadata record " + filename + " in format: " + format) 
    2020        xml=file(filename).read() 
    21          
     21                 
    2222        self._datacentre_format = format 
    2323         
  • TI01-discovery/tags/stable-TI01-ingestAutomation_Proglue_upgradesAndReporting/temp/OAIBatch/MDIP.py

    r5027 r5040  
    2929        #add in extra stuff for new DatasetBasicParameters object 
    3030        self.metadataCreationDate=helper.getText(self.tree,'DateOfUpdateOfMetadata') 
    31         self.datacentreName=helper.getText(self.tree,'Distributor/DistributorName/DistributorNameCode') 
     31        self.datacentreName=helper.getText(self.tree,'Distributor/DistributorName/DistributorNameName') 
    3232         
    3333        if len(helper.getText(self.tree,'Title')) < 1: 
     34            self.datasetTitle=helper.getText(self.tree,'DatasetIdentifier') 
     35        else: 
    3436            self.datasetTitle=helper.getText(self.tree,'Title') 
    35         else: 
    36             self.datasetTitle=helper.getText(self.tree,'DatasetIdentifier') 
    3737         
    38         logging.info('EXTRA INFORMATION for ORDERING= dataset title:  ' + self.datasetTitle + '  dataset creation date: ' + self.metadataCreationDate + '  datacentre name: ' + self.datacentreName) 
     38        #logging.info('EXTRA INFORMATION for ORDERING= dataset title:  ' + self.datasetTitle + '  dataset creation date: ' + self.metadataCreationDate + '  datacentre name: ' + self.datacentreName) 
    3939         
    4040         
     
    4343    import sys 
    4444    f=sys.argv[1] 
    45     print 'file:[%s]'%f 
     45    #print 'file:[%s]'%f 
    4646    ff=file(f) 
    4747    xml=ff.read() 
  • TI01-discovery/tags/stable-TI01-ingestAutomation_Proglue_upgradesAndReporting/temp/OAIBatch/People.py

    r4027 r5040  
    3333            self.data={'personName':Name(None),'personEmail':'','personPhone':'','orgURL':'','orgName':'','searchlink':''} 
    3434        def toHTML(self,prefix=''): 
    35             print self.data 
     35            #print self.data 
    3636            if self.elem is None: return '' 
    3737            html=prefix #para start? 
  • TI01-discovery/tags/stable-TI01-ingestAutomation_Proglue_upgradesAndReporting/temp/OAIBatch/PostgresDAO.py

    r5027 r5040  
    294294        Insert the original metadata doc into the postgres DB 
    295295        ''' 
    296          
    297         logging.info("cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc 1") 
     296        
    298297        logging.info("Inserting new original document in Postgres DB") 
    299298        sqlCmd = "SELECT create_document('" + self._record.shortFilename + "', '" + \ 
     
    303302            self._record.dataset_name + "', '" + self._record.datacentre_name + "', '" + self._record.dataset_lastEdit + "');"  
    304303 
    305         logging.info("SELECT create_document('" + self._record.shortFilename + "', '" + \ 
     304        '''logging.info("SELECT create_document('" + self._record.shortFilename + "', '" + \ 
    306305            self._record.discovery_id + "', '" + self._record.docType + "', '" + \ 
    307306            self._record.originalFormat + "', '" + self._record.getAuthorsInfo() + "', '" + \ 
    308307            self._record.getParametersInfo() + "', '" + self._record.getScopeInfo() + "', '" + \ 
    309             self._record.dataset_name + "', '" + self._record.datacentre_name + "', '" + self._record.dataset_lastEdit + "');" ) 
     308            self._record.dataset_name + "', '" + self._record.datacentre_name + "', '" + self._record.dataset_lastEdit + "');" )''' 
    310309         
    311310        id = db_funcs.runSQLCommand(self._connection, sqlCmd) 
  • TI01-discovery/tags/stable-TI01-ingestAutomation_Proglue_upgradesAndReporting/temp/OAIBatch/geoUtilities.py

    r4908 r5040  
    7777        ''' Test time coverage class ''' 
    7878        tc=TimeCoverage(('01-01-2001','01-02-2001','Complete')) 
    79         print tc 
     79         
    8080        self.assertEqual(tc[2],'Complete') 
    8181     
  • TI01-discovery/tags/stable-TI01-ingestAutomation_Proglue_upgradesAndReporting/temp/OAIBatch/oai_document_ingester.py

    r5027 r5040  
    99from time import strftime 
    1010from SchemaNameSpace import SchemaNameSpace 
    11 from DIF import DIF 
    12 from MDIP import MDIP 
     11#from DIF import DIF 
     12#from MDIP import MDIP 
    1313from DatasetBasicParameters import DatasetBasicParameters 
    1414import ndgUtils 
     
    2626        data in the correct form in the DB 
    2727        ''' 
    28  
    29         def getID(self, filename): 
    30                 '''  
    31                 Gets the identifier out of an input metadata xml record.  
    32                 Copes with DIF and MDIP currently. 
    33                 @param filename - name of document file being processed 
    34                 @return: ID - id to use to refer to the document 
    35                 ''' 
    36                 logging.info("Retrieving identifier for metadata record " + filename) 
    37                 xml=file(filename).read() 
    38                 if self._datacentre_format == "DIF": 
    39                     d=DIF(xml) 
    40                     ID=d.entryID 
    41                 elif self._datacentre_format == "MDIP": 
    42                     d=MDIP(xml) 
    43                     ID=d.id 
    44                 else: 
    45                     raise TypeError, "Only handles DIF or MDIP here." 
    46          
    47                 return ID 
    48                 
     28         
    4929         
    50         def addFileToPostgresDB(self, filename): 
     30        def addFileToPostgresDB(self, filename,basicParameters): 
    5131                ''' 
    5232                Add a file to the postgres DB - extracting and storing all the required 
     
    7050                dao = None 
    7151                try: 
    72                          
    73                         #Update: add some extra vas in for ingest to aid in search/present ordering of datasets 
    74                         basicParameters=DatasetBasicParameters(filename,self._datacentre_format) 
     52                                                 
    7553                        discoveryID = basicParameters.datasetID 
    7654                        datasetName = basicParameters.datasetName 
     
    7856                        datasetLastEditUpdateDate = basicParameters.metadataCreationDate 
    7957                         
    80                         logging.info("gggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggg 1") 
    8158                         
    8259                        record = PostgresRecord(filename, self._NDG_dataProvider, \ 
     
    8562                                                            self._xq, self._datacentre_format) 
    8663         
    87                         logging.info("gggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggg 2")                         
    8864                         
    8965                        # Now create the data access object to interface to the DB 
    9066                        dao = PostgresDAO(record, self._dbConnection) 
    91                         logging.info("gggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggg 3")                         
    9267                         
    9368                        # Finally, write the new record 
     
    149124                    if words[0] == 'groups': 
    150125                        self._datacentre_groups = words[1:] 
    151                     if words[0] == 'format': 
     126                    if words[0] == 'format':                     
    152127                        self._datacentre_format = words[1] 
    153128                    if words[0] == 'namespace': 
     
    267242                self._no_files_ingested = 0 
    268243                self._no_problem_files = 0 
    269                 #self._base_dir = "/usr/local/WSClientsIngestUpdate/stable-TI01-ingestAutomation_Proglue_upgradesAndReporting/temp/OAIBatch/" #os.getcwd() + "/" # this is the base dir that the script is ran from 
    270                 self._base_dir = "/disks/aer1/users/sdonegan/PROJECTS/ndgWorkspace/discoveryIngestIgnisPRODUCTION/OAIBatch/" 
     244                self._base_dir = "/usr/local/WSClientsIngestUpdate/stable-TI01-ingestAutomation_Proglue_upgradesAndReporting/temp/OAIBatch/" #os.getcwd() + "/" # this is the base dir that the script is ran from 
     245                #self._base_dir = "/disks/aer1/users/sdonegan/PROJECTS/ndgWorkspace/discoveryIngestIgnisPRODUCTION/OAIBatch/" 
    271246                data_dir = self._base_dir + "data/" + datacentre  # dir relating to the specified datacentre docs 
    272247                 
     
    336311                logging.info(self.lineSeparator) 
    337312                logging.info("Renaming files:") 
    338          
     313                                 
    339314                for filename in os.listdir(originals_dir): 
    340315                        if filename.endswith('.xml'):                            
    341316                                original_filename = originals_dir + filename 
    342                                                                  
     317                                 
     318                                #Use new class to get basic parameters from input xml doc to pass around (supplants getID method) 
     319                                basicParameters=DatasetBasicParameters(original_filename,self._datacentre_format) 
     320                                                                         
    343321                                try: 
    344                                         ident=self.getID(original_filename) 
     322                                        ident=basicParameters.datasetID #self.getID(original_filename) 
    345323                                         
    346324                                except Exception, detail: 
     
    397375                filenames = os.listdir(discovery_dir) 
    398376                for filename in filenames: 
    399                         self.addFileToPostgresDB(discovery_dir + filename) 
     377                        self.addFileToPostgresDB(discovery_dir + filename,basicParameters) 
    400378                 
    401379                #Make copies of discovery and oai/originals and DIF2MOLES areas to backup area for tape backups 
Note: See TracChangeset for help on using the changeset viewer.