source: TI01-discovery/tags/stable-TI01-ingestAutomation_Proglue_upgradesAndReporting/temp/OAIBatch/MDIP.py @ 5040

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/TI01-discovery/tags/stable-TI01-ingestAutomation_Proglue_upgradesAndReporting/temp/OAIBatch/MDIP.py@5040
Revision 5040, 2.0 KB checked in by sdonegan, 11 years ago (diff)

Debug new ingest classes - previous commit had problems with mdip records.

Line 
1try: #python 2.5
2    from xml.etree import ElementTree as ET
3except ImportError:
4    try:
5        # if you've installed it yourself it comes this way
6        import ElementTree as ET
7    except ImportError:
8        # if you've egged it this is the way it comes
9        from elementtree import ElementTree as ET
10#this is a fix to the  ElementTree namespace problem that namespaces are usually represented as ns0, ns1, ns2 etc.
11#ET._namespace_map.update({'http://www.oceannet.org/mdip/xml': 'mdip', 'http://www.w3.org/1999/xlink':'xlink'})
12
13from ndgUtils.ETxmlView import loadET, nsdumb
14class MDIP:
15    ''' Supports the MDIP interim format for extraction of the (unique) identifier '''
16    def __init__(self,file):
17        # we use loadET to protect ourselves from scummy characters and unicode problems
18        self.tree=loadET(file)
19        #debugging stuff
20        #print self.tree.tag
21        #print self.tree.keys()
22        #for i in self.tree: print i.tag
23        #print dir(self.tree)
24        #we use nsdumb in case the namespace causes difficulties ...
25        helper=nsdumb(self.tree)
26        #print helper
27        self.id=helper.getText(self.tree,'DatasetIdentifier')
28       
29        #add in extra stuff for new DatasetBasicParameters object
30        self.metadataCreationDate=helper.getText(self.tree,'DateOfUpdateOfMetadata')
31        self.datacentreName=helper.getText(self.tree,'Distributor/DistributorName/DistributorNameName')
32       
33        if len(helper.getText(self.tree,'Title')) < 1:
34            self.datasetTitle=helper.getText(self.tree,'DatasetIdentifier')
35        else:
36            self.datasetTitle=helper.getText(self.tree,'Title')
37       
38        #logging.info('EXTRA INFORMATION for ORDERING= dataset title:  ' + self.datasetTitle + '  dataset creation date: ' + self.metadataCreationDate + '  datacentre name: ' + self.datacentreName)
39       
40       
41        print 'id [%s]'%id
42if __name__=="__main__":
43    import sys
44    f=sys.argv[1]
45    #print 'file:[%s]'%f
46    ff=file(f)
47    xml=ff.read()
48    x=MDIP(xml)
Note: See TracBrowser for help on using the repository browser.