Changeset 5040 for TI01-discovery/tags
- Timestamp:
- 26/02/09 11:55:50 (11 years ago)
- Location:
- TI01-discovery/tags/stable-TI01-ingestAutomation_Proglue_upgradesAndReporting/temp/OAIBatch
- Files:
-
- 7 edited
Legend:
- Unmodified
- Added
- Removed
-
TI01-discovery/tags/stable-TI01-ingestAutomation_Proglue_upgradesAndReporting/temp/OAIBatch/DIF.py
r5027 r5040 81 81 self.datasetTitle=helper.getText(self.tree,'Data_Set_Citation/Dataset_Title') 82 82 83 logging.info('EXTRA INFORMATION for ORDERING= dataset title: ' + self.datasetTitle + ' dataset creation date: ' + self.metadataCreationDate + ' datacentre name: ' + self.datacentreName)83 #logging.info('EXTRA INFORMATION for ORDERING= dataset title: ' + self.datasetTitle + ' dataset creation date: ' + self.metadataCreationDate + ' datacentre name: ' + self.datacentreName) 84 84 85 85 #Note that entity.constraints.html is about access control on the metadata, -
TI01-discovery/tags/stable-TI01-ingestAutomation_Proglue_upgradesAndReporting/temp/OAIBatch/DatasetBasicParameters.py
r5028 r5040 19 19 logging.info("Retrieving identifier for metadata record " + filename + " in format: " + format) 20 20 xml=file(filename).read() 21 21 22 22 self._datacentre_format = format 23 23 -
TI01-discovery/tags/stable-TI01-ingestAutomation_Proglue_upgradesAndReporting/temp/OAIBatch/MDIP.py
r5027 r5040 29 29 #add in extra stuff for new DatasetBasicParameters object 30 30 self.metadataCreationDate=helper.getText(self.tree,'DateOfUpdateOfMetadata') 31 self.datacentreName=helper.getText(self.tree,'Distributor/DistributorName/DistributorName Code')31 self.datacentreName=helper.getText(self.tree,'Distributor/DistributorName/DistributorNameName') 32 32 33 33 if len(helper.getText(self.tree,'Title')) < 1: 34 self.datasetTitle=helper.getText(self.tree,'DatasetIdentifier') 35 else: 34 36 self.datasetTitle=helper.getText(self.tree,'Title') 35 else:36 self.datasetTitle=helper.getText(self.tree,'DatasetIdentifier')37 37 38 logging.info('EXTRA INFORMATION for ORDERING= dataset title: ' + self.datasetTitle + ' dataset creation date: ' + self.metadataCreationDate + ' datacentre name: ' + self.datacentreName)38 #logging.info('EXTRA INFORMATION for ORDERING= dataset title: ' + self.datasetTitle + ' dataset creation date: ' + self.metadataCreationDate + ' datacentre name: ' + self.datacentreName) 39 39 40 40 … … 43 43 import sys 44 44 f=sys.argv[1] 45 print 'file:[%s]'%f45 #print 'file:[%s]'%f 46 46 ff=file(f) 47 47 xml=ff.read() -
TI01-discovery/tags/stable-TI01-ingestAutomation_Proglue_upgradesAndReporting/temp/OAIBatch/People.py
r4027 r5040 33 33 self.data={'personName':Name(None),'personEmail':'','personPhone':'','orgURL':'','orgName':'','searchlink':''} 34 34 def toHTML(self,prefix=''): 35 print self.data35 #print self.data 36 36 if self.elem is None: return '' 37 37 html=prefix #para start? -
TI01-discovery/tags/stable-TI01-ingestAutomation_Proglue_upgradesAndReporting/temp/OAIBatch/PostgresDAO.py
r5027 r5040 294 294 Insert the original metadata doc into the postgres DB 295 295 ''' 296 297 logging.info("cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc 1") 296 298 297 logging.info("Inserting new original document in Postgres DB") 299 298 sqlCmd = "SELECT create_document('" + self._record.shortFilename + "', '" + \ … … 303 302 self._record.dataset_name + "', '" + self._record.datacentre_name + "', '" + self._record.dataset_lastEdit + "');" 304 303 305 logging.info("SELECT create_document('" + self._record.shortFilename + "', '" + \304 '''logging.info("SELECT create_document('" + self._record.shortFilename + "', '" + \ 306 305 self._record.discovery_id + "', '" + self._record.docType + "', '" + \ 307 306 self._record.originalFormat + "', '" + self._record.getAuthorsInfo() + "', '" + \ 308 307 self._record.getParametersInfo() + "', '" + self._record.getScopeInfo() + "', '" + \ 309 self._record.dataset_name + "', '" + self._record.datacentre_name + "', '" + self._record.dataset_lastEdit + "');" ) 308 self._record.dataset_name + "', '" + self._record.datacentre_name + "', '" + self._record.dataset_lastEdit + "');" )''' 310 309 311 310 id = db_funcs.runSQLCommand(self._connection, sqlCmd) -
TI01-discovery/tags/stable-TI01-ingestAutomation_Proglue_upgradesAndReporting/temp/OAIBatch/geoUtilities.py
r4908 r5040 77 77 ''' Test time coverage class ''' 78 78 tc=TimeCoverage(('01-01-2001','01-02-2001','Complete')) 79 print tc79 80 80 self.assertEqual(tc[2],'Complete') 81 81 -
TI01-discovery/tags/stable-TI01-ingestAutomation_Proglue_upgradesAndReporting/temp/OAIBatch/oai_document_ingester.py
r5027 r5040 9 9 from time import strftime 10 10 from SchemaNameSpace import SchemaNameSpace 11 from DIF import DIF12 from MDIP import MDIP11 #from DIF import DIF 12 #from MDIP import MDIP 13 13 from DatasetBasicParameters import DatasetBasicParameters 14 14 import ndgUtils … … 26 26 data in the correct form in the DB 27 27 ''' 28 29 def getID(self, filename): 30 ''' 31 Gets the identifier out of an input metadata xml record. 32 Copes with DIF and MDIP currently. 33 @param filename - name of document file being processed 34 @return: ID - id to use to refer to the document 35 ''' 36 logging.info("Retrieving identifier for metadata record " + filename) 37 xml=file(filename).read() 38 if self._datacentre_format == "DIF": 39 d=DIF(xml) 40 ID=d.entryID 41 elif self._datacentre_format == "MDIP": 42 d=MDIP(xml) 43 ID=d.id 44 else: 45 raise TypeError, "Only handles DIF or MDIP here." 46 47 return ID 48 28 49 29 50 def addFileToPostgresDB(self, filename ):30 def addFileToPostgresDB(self, filename,basicParameters): 51 31 ''' 52 32 Add a file to the postgres DB - extracting and storing all the required … … 70 50 dao = None 71 51 try: 72 73 #Update: add some extra vas in for ingest to aid in search/present ordering of datasets 74 basicParameters=DatasetBasicParameters(filename,self._datacentre_format) 52 75 53 discoveryID = basicParameters.datasetID 76 54 datasetName = basicParameters.datasetName … … 78 56 datasetLastEditUpdateDate = basicParameters.metadataCreationDate 79 57 80 logging.info("gggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggg 1")81 58 82 59 record = PostgresRecord(filename, self._NDG_dataProvider, \ … … 85 62 self._xq, self._datacentre_format) 86 63 87 logging.info("gggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggg 2")88 64 89 65 # Now create the data access object to interface to the DB 90 66 dao = PostgresDAO(record, self._dbConnection) 91 logging.info("gggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggg 3")92 67 93 68 # Finally, write the new record … … 149 124 if words[0] == 'groups': 150 125 self._datacentre_groups = words[1:] 151 if words[0] == 'format': 126 if words[0] == 'format': 152 127 self._datacentre_format = words[1] 153 128 if words[0] == 'namespace': … … 267 242 self._no_files_ingested = 0 268 243 self._no_problem_files = 0 269 #self._base_dir = "/usr/local/WSClientsIngestUpdate/stable-TI01-ingestAutomation_Proglue_upgradesAndReporting/temp/OAIBatch/" #os.getcwd() + "/" # this is the base dir that the script is ran from270 self._base_dir = "/disks/aer1/users/sdonegan/PROJECTS/ndgWorkspace/discoveryIngestIgnisPRODUCTION/OAIBatch/"244 self._base_dir = "/usr/local/WSClientsIngestUpdate/stable-TI01-ingestAutomation_Proglue_upgradesAndReporting/temp/OAIBatch/" #os.getcwd() + "/" # this is the base dir that the script is ran from 245 #self._base_dir = "/disks/aer1/users/sdonegan/PROJECTS/ndgWorkspace/discoveryIngestIgnisPRODUCTION/OAIBatch/" 271 246 data_dir = self._base_dir + "data/" + datacentre # dir relating to the specified datacentre docs 272 247 … … 336 311 logging.info(self.lineSeparator) 337 312 logging.info("Renaming files:") 338 313 339 314 for filename in os.listdir(originals_dir): 340 315 if filename.endswith('.xml'): 341 316 original_filename = originals_dir + filename 342 317 318 #Use new class to get basic parameters from input xml doc to pass around (supplants getID method) 319 basicParameters=DatasetBasicParameters(original_filename,self._datacentre_format) 320 343 321 try: 344 ident= self.getID(original_filename)322 ident=basicParameters.datasetID #self.getID(original_filename) 345 323 346 324 except Exception, detail: … … 397 375 filenames = os.listdir(discovery_dir) 398 376 for filename in filenames: 399 self.addFileToPostgresDB(discovery_dir + filename )377 self.addFileToPostgresDB(discovery_dir + filename,basicParameters) 400 378 401 379 #Make copies of discovery and oai/originals and DIF2MOLES areas to backup area for tape backups
Note: See TracChangeset
for help on using the changeset viewer.