source: TI01-discovery/tags/stable-TI01-ingestAutomation_Proglue_upgradesAndReporting/temp/OAIBatch/DIF.py @ 5852

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/TI01-discovery/tags/stable-TI01-ingestAutomation_Proglue_upgradesAndReporting/temp/OAIBatch/DIF.py@5852
Revision 5852, 7.9 KB checked in by sdonegan, 10 years ago (diff)

Update creation date handling to mimic r5847 in MSI thread

Line 
1# python class to support methods on a DIF ... to conform with
2# renderEntity etc ...
3#
4from Utilities import *
5from geoUtilities import *
6from People import *
7from AccessControl import AccessControl
8from ndgUtils.ETxmlView import loadET, nsdumb
9from renderEntity import renderEntity
10try: #python 2.5
11    from xml.etree import ElementTree as ET
12except ImportError:
13    try:
14        # if you've installed it yourself it comes this way
15        import ElementTree as ET
16    except ImportError:
17        # if you've egged it this is the way it comes
18        from elementtree import ElementTree as ET
19
20class DIFService:
21    ''' A DIF only knows about a related URL '''
22    def __init__(self,c,u,d):
23        ''' Take a related url tuple (content_type,url,description) and store it, using
24        an ndgModifier if necessary '''
25        self.contentType,self.url,self.description=c,u,d
26    def __str__(self):
27        return '<a href="%s" title="%s">%s</a>'%self.contentType,self.url,self.description
28   
29class DIF:
30    ''' Supports the NASA GCMD DIF format for python operations,
31    note ... not a complete implementation, currently minimum to
32    show a reasonable piece of content '''
33    def __init__(self,xml,et=0,debug=0,ndgObject=None):
34       
35        '''Initialise a python dif instance based on an xml document (expected
36        to be an input string if et=0, otherwise an ElementTree instance) '''
37        if et:
38            try:
39                self.tree=xml
40                self.xml=ET.tostring(xml)
41            except:
42                raise TypeError,'DIF input is not a valid ElementTree instance'
43        else:
44            try:
45                self.tree=loadET(xml)
46                self.xml=xml
47            except: # for some reason we can't parse the document, and it's worth knowing why
48                if isinstance(xml,str):
49                    l=min(len(xml),300)-1
50                    if l!=len(xml):xml=xml[0:l]+'\n...'
51                    raise ValueError,'DIF input cannot be parsed into an ElementTree instance:\n%s'%xml
52                else:
53                    raise TypeError,'DIF input of type [%s] needs to be a string!'%type(xml)
54       
55        self.debug=debug
56       
57        # if this is an ndgObject that'll have been sorted externally ...
58        self.ndgObject=ndgObject
59       
60        # now try and interpret it
61       
62        helper=nsdumb(self.tree)
63        self.metadataType='DIF'
64        if helper.strip(self.tree.tag)!=self.metadataType: 
65            self.tree=helper.find(self.tree,self.metadataType)
66            if self.tree is None: 
67                raise ValueError, 'DIF input does not include a DIF element:\n%s'%self.xml
68       
69        self.entryID=helper.getText(self.tree,'Entry_ID')
70        self.abstract=helper.getText(self.tree,'Summary')
71        self.name=helper.getText(self.tree,'Entry_Title')
72        self.abbreviation=self.name[0:min(5,len(self.name))]
73       
74        #add some extra parameters here to extract further columns to aid in ranking & ordering
75                #updated to mimic trunk MSI dev at r5847 20/10/09
76                if len(helper.getText(self.tree,'Last_DIF_Revision_Date')) == '':
77                        self.metadataCreationDate=helper.getText(self.tree,'Last_DIF_Revision_Date')
78                else:
79                        self.metadataCreationDate=helper.getText(self.tree,'DIF_Creation_Date')
80               
81        #if len(helper.getText(self.tree,'DIF_Creation_Date')) < 1:           
82            #set to null as bad timestamp will screw up the sql commands (will scan for it later)           
83        #else:
84            #self.metadataCreationDate=helper.getText(self.tree,'DIF_Creation_Date')
85           
86        self.datacentreName=helper.getText(self.tree,'Data_Center/Data_Center_Name/Short_Name')
87       
88        if len(helper.getText(self.tree,'Data_Set_Citation/Dataset_Title')) < 1:
89            self.datasetTitle=self.name
90        else:
91            self.datasetTitle=helper.getText(self.tree,'Data_Set_Citation/Dataset_Title')
92       
93        #logging.info('EXTRA INFORMATION for ORDERING= dataset title:  ' + self.datasetTitle + '  dataset creation date: ' + self.metadataCreationDate + '  datacentre name: ' + self.datacentreName)
94       
95        #Note that entity.constraints.html is about access control on the metadata,
96        #and so we don't populate this here ...
97        self.constraints=AccessControl(None)
98       
99        #need entity.parameters, entity.bbox, entity.timeCoverage, entity.curator, entity.creators
100
101        self.parameters=[]
102        for parameter in helper.findall(self.tree,'Parameters'):
103            name=''
104            for level in ['Category','Topic','Term','Variable','Detailed_Variable']:
105                name+=helper.getText(parameter,level)+'/'
106            self.parameters.append(name.rstrip('/'))
107           
108
109        #load up information about spatial bounding box
110        self.bbox=Bounding(self.tree,entity='DIF',getter=helper.getText)
111       
112        #load up information about temporal extent
113        tc=(
114            helper.getText(self.tree,'Temporal_Coverage/Start_Date'),
115            helper.getText(self.tree,'Temporal_Coverage/Stop_Date'),
116            helper.getText(self.tree,'Data_Set_Progress') )
117        self.timeCoverage=TimeCoverage(tc)
118       
119        #load up those silly paleo keywords
120        self.paleoKeywords=[]
121        paleoElements=helper.findall(self.tree,'Paleo_Temporal_Coverage')
122        for e in paleoElements:
123            self.paleoKeywords.append(helper.getText(e,'Chronostratigraphic_Unit'))
124           
125        #Data curator information
126        self.centre=DIFcontact(helper.find(self.tree,'Data_Center'),ctype='centre',helper=helper)
127        self.curator=DIFcontact(self.tree)
128
129        #Data Creators
130        self.creators=[]
131        # use author here because a full dif entry for creator wont necessarily exist in citation ...
132        self.authors=DIFAuthors(self.tree,helper)
133        self.date=dateParse(helper.getText(self.tree,'Data_Set_Citation/Dataset_Release_Date'),'YYYY') 
134        if self.date=='': self.date='XXXX'
135        self.title=helper.getText(self.tree,'Data_Set_Citation/Dataset_Title')
136        self.briefCitation=None
137        if (self.authors!='' and self.date!='' and self.title!=''):
138            self.briefCitation='%s (%s): %s'%(self.authors,self.date,self.title)
139
140        #services
141        self.services=[]
142
143        for item in helper.findall(self.tree,'Related_URL'):
144            self.services.append(
145                DIFService(
146                 helper.getText(item,'URL_Content_type'),
147                 helper.getText(item,'URL'),
148                 helper.getText(item,'Description') ))
149       
150        if self.ndgObject is None:
151            self.binding=None
152        else:
153            if self.ndgObject.discoveryURL is not None:
154                self.binding=DIFService('DISCOVERY',self.ndgObject.discoveryURL,'Discovery record')
155            else: self.binding=None
156
157        if self.debug:
158           f=open('difs.log','a')
159           f.write('%s##\n%s\n##################################\n'%(self.entryID,self.xml))
160           f.close()
161           
162    def toHTML(self,config):
163
164        if self.tree is not None:
165            renderer=renderEntity(config)
166            return renderer.render(self)
167        else:
168            return '<p>No Valid DIF</p>'
169
170
171import unittest
172
173class TestCase(unittest.TestCase):
174    """
175    """
176
177    inputFile = 'examples/neodc.eg1.dif'
178    configFile='examples/example.config'
179   
180    def setUp(self):
181        ''' Load example config and DIF files for testing '''
182        f=file(self.inputFile,'r')
183        xml=f.read()
184        self.dif=DIF(xml)
185        self.config=myConfig(self.configFile)
186
187    def testEntries(self):
188        ''' Testing the DIF object can be loaded and some key entries extracted '''
189        print 'Entry ID [%s]'%self.dif.entryID
190        print 'Author [%s]'%self.dif.authors
191       
192    def testrenderDIF(self):
193        ''' Testing the conversion to html '''
194        print self.dif.timeCoverage
195        html=self.dif.toHTML(self.config)
196        g=file('difOutput.html','w')
197        g.write(html)
198
199if __name__=="__main__":
200    unittest.main()
201
202       
203       
Note: See TracBrowser for help on using the repository browser.