- Timestamp:
- 20/10/09 11:35:29 (11 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
TI01-discovery/branches/ingestAutomation-upgrade/OAIBatch/Utilities.py
r5714 r5847 75 75 76 76 self.paths = self.appendNameSpace(self.URLpaths()) 77 78 for path in self.paths: 77 79 78 for path in self.paths:79 80 80 nodes = self.root.findall(path) 81 81 82 82 for node in nodes: 83 84 83 currentURL = urllib.quote(node.text) 85 86 84 redirectURL = self.redirectBaseURL + currentURL + '&docID=' + urllib.quote(docID) + '&docTitle=' + urllib.quote(docTitle) 87 88 85 node.text = redirectURL 89 86 90 87 except: 91 88 print "Cannot perform identityTransform to rewrite ndg redirect urls!" 92 89 logging.warn("Cannot perform identityTransform to rewrite ndg redirect urls!") 93 90 … … 123 120 '''method to handle xpath to title for acceptable formats''' 124 121 def datasetTitle(self): 125 if (self.format == 'DIF') | (self.format == 'dif'):122 if self.format == 'DIF': 126 123 return ['Entry_Title'] 127 elif (self.format == 'MDIP') | (self.format =='mdip'):124 elif self.format == 'MDIP': 128 125 return ['Title'] 129 126 130 127 '''method to handle xpath for id if for reqd format ''' 131 128 def getIdPath(self): 132 if (self.format == 'DIF') | (self.format == 'dif'):129 if self.format == 'DIF': 133 130 return ['Entry_ID'] 134 elif (self.format == 'MDIP') | (self.format =='mdip'):131 elif self.format == 'MDIP': 135 132 return ['DatasetIdentifier'] 136 133 137 134 '''method to handle xpath for expected URLS if for reqd format ''' 138 135 def URLpaths(self): 139 if (self.format == 'DIF') | (self.format == 'dif'):140 return ['Related_URL/URL','Data_Center/Data_Center_URL' , 'Data_Set_Citation/Online_Resource']141 elif (self.format == 'MDIP') | (self.format =='mdip'):136 if self.format == 'DIF': 137 return ['Related_URL/URL','Data_Center/Data_Center_URL'] 138 elif self.format == 'MDIP': 142 139 return ['OnlineResource','Distributor/Web'] 143 140 144 141 '''method to handle default namespaces for reqd format ''' 145 142 def returnNS(self): 146 if (self.format == 'DIF') | (self.format == 'dif'):143 if self.format == 'DIF': 147 144 return '{http://gcmd.gsfc.nasa.gov/Aboutus/xml/dif/}' #Note that ns has to be encapsulated in {}'s! 148 elif (self.format == 'MDIP') | (self.format =='mdip'):145 elif self.format == 'MDIP': 149 146 return '{http://www.oceannet.org/mdip/xml}' 150 147 … … 267 264 self.datasetID=helper.getText(et,'Entry_ID') 268 265 self.datasetName = helper.getText(et,'Data_Set_Citation/Dataset_Title') 269 self.datacentreName = helper.getText(et,'Data_Center/Data_Center_Name/Short_Name') 270 self.metadataCreationDate=helper.getText(et,'DIF_Creation_Date') 266 self.datacentreName = helper.getText(et,'Data_Center/Data_Center_Name/Short_Name') 271 267 self.datasetStartDateNom = helper.getText(et,'Temporal_Coverage/Start_Date') 272 268 self.datasetEndDateNom = helper.getText(et,'Temporal_Coverage/Stop_Date') 273 269 270 #need to make sure that latest date, eother from creation or last revision is present. 271 if helper.getText(et,'Last_DIF_Revision_Date') != '': 272 self.metadataCreationDate=helper.getText(et,'Last_DIF_Revision_Date') 273 else: 274 self.metadataCreationDate=helper.getText(et,'DIF_Creation_Date') 275 274 276 #Fudge to get around some DC's using "entry_title" and others "dataset_title". grrr. 275 if self.datasetName == '': 276 self.datasetName = helper.getText(et,'Entry_Title')277 277 if self.datasetName == '': 278 self.datasetName == helper.getText(et,'Entry_Title') 279 278 280 #TODO amend this - just a fudge to ingest records from crappy badc/neodc whilst pipeline down;.. 279 281 if self.datasetEndDateNom == '': 280 282 self.datasetEndDateNom = helper.getText(et,'Temporal_Coverage/End_Date') 281 283 282 elif ((self._datacentre_format == 'MDIP') or (self._datacentre_format == 'mdip')): 283 284 elif self._datacentre_format == 'MDIP': 284 285 #return helper.getText(self.tree,'DatasetIdentifier') 285 286 self.datasetID=helper.getText(et,'DatasetIdentifier')
Note: See TracChangeset
for help on using the changeset viewer.