Changeset 5027


Ignore:
Timestamp:
24/02/09 14:53:22 (11 years ago)
Author:
sdonegan
Message:

Added extra fields to ingest into original document table to aid in search result ordering

Location:
TI01-discovery/tags/stable-TI01-ingestAutomation_Proglue_upgradesAndReporting/temp/OAIBatch
Files:
7 edited

Legend:

Unmodified
Added
Removed
  • TI01-discovery/tags/stable-TI01-ingestAutomation_Proglue_upgradesAndReporting/temp/OAIBatch/DIF.py

    r4027 r5027  
    7171        self.name=helper.getText(self.tree,'Entry_Title') 
    7272        self.abbreviation=self.name[0:min(5,len(self.name))] 
     73         
     74        #add some extra parameters here to extract further columns to aid in ranking & ordering         
     75        self.metadataCreationDate=helper.getText(self.tree,'DIF_Creation_Date') 
     76        self.datacentreName=helper.getText(self.tree,'Data_Center/Data_Center_Name/Short_Name') 
     77         
     78        if len(helper.getText(self.tree,'Data_Set_Citation/Dataset_Title')) < 1: 
     79            self.datasetTitle=self.name 
     80        else: 
     81            self.datasetTitle=helper.getText(self.tree,'Data_Set_Citation/Dataset_Title') 
     82         
     83        logging.info('EXTRA INFORMATION for ORDERING= dataset title:  ' + self.datasetTitle + '  dataset creation date: ' + self.metadataCreationDate + '  datacentre name: ' + self.datacentreName) 
    7384         
    7485        #Note that entity.constraints.html is about access control on the metadata, 
  • TI01-discovery/tags/stable-TI01-ingestAutomation_Proglue_upgradesAndReporting/temp/OAIBatch/MDIP.py

    r4027 r5027  
    2626        #print helper 
    2727        self.id=helper.getText(self.tree,'DatasetIdentifier') 
     28         
     29        #add in extra stuff for new DatasetBasicParameters object 
     30        self.metadataCreationDate=helper.getText(self.tree,'DateOfUpdateOfMetadata') 
     31        self.datacentreName=helper.getText(self.tree,'Distributor/DistributorName/DistributorNameCode') 
     32         
     33        if len(helper.getText(self.tree,'Title')) < 1: 
     34            self.datasetTitle=helper.getText(self.tree,'Title') 
     35        else: 
     36            self.datasetTitle=helper.getText(self.tree,'DatasetIdentifier') 
     37         
     38        logging.info('EXTRA INFORMATION for ORDERING= dataset title:  ' + self.datasetTitle + '  dataset creation date: ' + self.metadataCreationDate + '  datacentre name: ' + self.datacentreName) 
     39         
     40         
    2841        print 'id [%s]'%id 
    2942if __name__=="__main__": 
  • TI01-discovery/tags/stable-TI01-ingestAutomation_Proglue_upgradesAndReporting/temp/OAIBatch/PostgresDAO.py

    r4956 r5027  
    257257            spatialData = self._record.getSpatialData() 
    258258        except: 
    259             print "doobt" 
     259            print "ouch" 
    260260                 
    261261        # check if we have any spatiotemporal data to add; escape if not 
     
    294294        Insert the original metadata doc into the postgres DB 
    295295        ''' 
     296         
     297        logging.info("cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc 1") 
    296298        logging.info("Inserting new original document in Postgres DB") 
    297299        sqlCmd = "SELECT create_document('" + self._record.shortFilename + "', '" + \ 
    298300            self._record.discovery_id + "', '" + self._record.docType + "', '" + \ 
    299301            self._record.originalFormat + "', '" + self._record.getAuthorsInfo() + "', '" + \ 
    300             self._record.getParametersInfo() + "', '" + self._record.getScopeInfo() + "');"  
    301  
     302            self._record.getParametersInfo() + "', '" + self._record.getScopeInfo() + "', '" + \ 
     303            self._record.dataset_name + "', '" + self._record.datacentre_name + "', '" + self._record.dataset_lastEdit + "');"  
     304 
     305        logging.info("SELECT create_document('" + self._record.shortFilename + "', '" + \ 
     306            self._record.discovery_id + "', '" + self._record.docType + "', '" + \ 
     307            self._record.originalFormat + "', '" + self._record.getAuthorsInfo() + "', '" + \ 
     308            self._record.getParametersInfo() + "', '" + self._record.getScopeInfo() + "', '" + \ 
     309            self._record.dataset_name + "', '" + self._record.datacentre_name + "', '" + self._record.dataset_lastEdit + "');" ) 
     310         
    302311        id = db_funcs.runSQLCommand(self._connection, sqlCmd) 
    303312        if len(id) == 0: 
     
    329338            self._record.discovery_id + "', '" + self._record.docType + "', '" + \ 
    330339            self._record.originalFormat + "', '" + self._record.getAuthorsInfo() + "', '" + \ 
    331             self._record.getParametersInfo() + "', '" + self._record.getScopeInfo() + "', '" + str(self._record.scn) + "');" 
     340            self._record.getParametersInfo() + "', '" + self._record.getScopeInfo() + "', '" + str(self._record.scn) + \ 
     341            self._record.dataset_name + "', '" + self._record.datacentre_name + "', '" + self._record.dataset_lastEdit +"');" 
     342             
    332343        db_funcs.runSQLCommand(self._connection, sqlCmd) 
    333344         
  • TI01-discovery/tags/stable-TI01-ingestAutomation_Proglue_upgradesAndReporting/temp/OAIBatch/PostgresRecord.py

    r4956 r5027  
    3838    ndg_data_provider_vocab = "http://vocab.ndg.nerc.ac.uk/term/N010" 
    3939         
    40     def __init__(self, filename, ndg_dataprovider, datacentre_groups, datacentre_namespace, discovery_id, xq, docType): 
     40    def __init__(self, filename, ndg_dataprovider, datacentre_groups, datacentre_namespace, discovery_id,datasetName,datacentreName,datasetLastEditUpdateDate, xq, docType): 
    4141        logging.info("Setting up Postgres record for file, " + filename) 
    4242        self.filename = filename 
     
    5656        self._xq = xq 
    5757        self.docType = docType 
     58         
     59        self.dataset_name = datasetName 
     60        self.dataset_lastEdit = datasetLastEditUpdateDate 
     61        self.datacentre_name = datacentreName 
    5862 
    5963        self._molesFormat = None    # initialise this, so we can guarantee a value - to avoid using getattr 
  • TI01-discovery/tags/stable-TI01-ingestAutomation_Proglue_upgradesAndReporting/temp/OAIBatch/RecordToDelete.py

    r4956 r5027  
    3131        self.db_id = None    # the DB ID of the record, for easy reference when it is created 
    3232         
    33         #use this class to work out correct filenames and create a quasi record object 
    34          
    35          
    36         #get discoveryID 
    3733         
    3834     
  • TI01-discovery/tags/stable-TI01-ingestAutomation_Proglue_upgradesAndReporting/temp/OAIBatch/oai_document_ingester.py

    r4963 r5027  
    1111from DIF import DIF 
    1212from MDIP import MDIP 
     13from DatasetBasicParameters import DatasetBasicParameters 
    1314import ndgUtils 
    1415from ndgUtils.ndgXqueries import ndgXqueries 
     
    4546         
    4647                return ID 
    47  
    48          
     48                
     49         
    4950        def addFileToPostgresDB(self, filename): 
    5051                ''' 
     
    7071                try: 
    7172                         
    72                         discoveryID = self.getID(filename) 
     73                        #Update: add some extra vas in for ingest to aid in search/present ordering of datasets 
     74                        basicParameters=DatasetBasicParameters(filename,self._datacentre_format) 
     75                        discoveryID = basicParameters.datasetID 
     76                        datasetName = basicParameters.datasetName 
     77                        datacentreName = basicParameters.datacentreName 
     78                        datasetLastEditUpdateDate = basicParameters.metadataCreationDate 
     79                         
     80                        logging.info("gggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggg 1") 
    7381                         
    7482                        record = PostgresRecord(filename, self._NDG_dataProvider, \ 
    7583                                                            self._datacentre_groups, self._datacentre_namespace, \ 
    76                                                             discoveryID, self._xq, self._datacentre_format) 
    77          
     84                                                            discoveryID,datasetName,datacentreName,datasetLastEditUpdateDate, \ 
     85                                                            self._xq, self._datacentre_format) 
     86         
     87                        logging.info("gggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggg 2")                         
    7888                         
    7989                        # Now create the data access object to interface to the DB 
    8090                        dao = PostgresDAO(record, self._dbConnection) 
     91                        logging.info("gggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggg 3")                         
    8192                         
    8293                        # Finally, write the new record 
     
    215226                try: 
    216227                    opts, args = getopt.getopt(sys.argv[1:], "vdi") 
     228                    
    217229                except getopt.GetoptError, err: 
    218230                    # print help information and exit: 
     
    237249                         
    238250                        print " - Use INDIVIDUAL file: " + indFileToIngest + " to load" 
    239                          
    240                    
     251                 
     252                  
    241253                print self.lineSeparator 
    242254                logging.basicConfig(level=loggingLevel, 
     
    255267                self._no_files_ingested = 0 
    256268                self._no_problem_files = 0 
    257                 self._base_dir = "/usr/local/WSClientsIngestUpdate/stable-TI01-ingestAutomation_Proglue_upgradesAndReporting/temp/OAIBatch/" #os.getcwd() + "/" # this is the base dir that the script is ran from 
    258                          
     269                #self._base_dir = "/usr/local/WSClientsIngestUpdate/stable-TI01-ingestAutomation_Proglue_upgradesAndReporting/temp/OAIBatch/" #os.getcwd() + "/" # this is the base dir that the script is ran from 
     270                self._base_dir = "/disks/aer1/users/sdonegan/PROJECTS/ndgWorkspace/discoveryIngestIgnisPRODUCTION/OAIBatch/" 
    259271                data_dir = self._base_dir + "data/" + datacentre  # dir relating to the specified datacentre docs 
    260272                 
  • TI01-discovery/tags/stable-TI01-ingestAutomation_Proglue_upgradesAndReporting/temp/OAIBatch/run_all_ingest.py

    r4961 r5027  
    1313 
    1414# configure logging 
    15 logging.basicConfig(level=logging.INFO, 
     15logging.basicConfig(level=logging.ERROR, 
    1616                    format='%(asctime)s %(filename)s:%(lineno)d %(levelname)s %(message)s') 
    1717logging.info(lineSeparator) 
     
    2121 
    2222#this hardwired to proglue BADC usage ops 
    23 current_dir = "/usr/local/WSClientsIngestUpdate/stable-TI01-ingestAutomation_Proglue_upgradesAndReporting/temp/OAIBatch/"# this is the base dir that the script is ran from 
     23#current_dir = "/usr/local/WSClientsIngestUpdate/stable-TI01-ingestAutomation_Proglue_upgradesAndReporting/temp/OAIBatch/"# this is the base dir that the script is ran from 
     24current_dir = "/disks/aer1/users/sdonegan/PROJECTS/ndgWorkspace/discoveryIngestIgnisPRODUCTION/OAIBatch/" 
    2425logging.info("Running ingest script for all config files in the current run directory (%s)" %current_dir) 
    2526 
     
    8788 
    8889if os.path.exists(summaryFileName): 
    89     commandline = "cat " + summaryFileName + " | mail -s 'PROGLUE Discovery Ingest report' steve.donegan@stfc.ac.uk" 
     90    commandline = "cat " + summaryFileName + " | mail -s 'IGNIS Discovery Ingest report' steve.donegan@stfc.ac.uk" 
    9091    os.system(commandline) 
    9192 
Note: See TracChangeset for help on using the changeset viewer.