source: exist/trunk/python/ndgUtils/ndgObject.py @ 4782

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/exist/trunk/python/ndgUtils/ndgObject.py@5371
Revision 4782, 7.6 KB checked in by cbyrom, 11 years ago (diff)

Add code to allow publishing of data to multiple feeds - to offer
different levels of granularity to subscribe to (with appropriate feed
titles) + add code to
allow feed publication to be done asynchronously + extend to
run atom to DIF transforms at point of publish for atom docs
in the published state (i.e. with state of 'published' or 'Published').
Add code to ingest required organisation data when setting up
the eXist DB - and provide cache of this data to avoid unecessary
lookups. Also extend the eXist DB setup code to initialise the
various new feeds added.
Add utility methods to check if an eXist collection is new and to
establish the publication state of an atom doc - and to change
the state of this doc (NB, need to do a doc delete and doc create
for this - can't see a doc 'move' function in eXist). Also add
code to do the atom to DIF transform direct in the eXist DB.
Improve error handling when publishing feed data.

Line 
1import ndgRetrieve, xmlHandler2, logging
2
3class ndgObject:
4    ''' This class instantiates an ndgObject which describes the various ways
5        of obtaining itself, primarily
6                (1) a downloadable xml representation from a repository,
7                (2) a printable xml representation '''
8
9    # The various different document types:
10    MOLES_DOC_TYPE = 'NDG-B1'
11    DIF_DOC_TYPE = 'DIF'
12    DC_DOC_TYPE = 'DC'
13    ISO_DOC_TYPE = 'ISO19139'
14    NDGB1_DOC_TYPE = 'NDG-B1'
15    NDGB0_DOC_TYPE = 'NDG-B0'
16    MDIP_DOC_TYPE = 'MDIP'
17    NDGA0_DOC_TYPE = 'NDG-A0'
18    NUMSIM_DOC_TYPE = 'NumSim'
19    ATOM_DOC_TYPE = 'ATOM'
20    ATOM_BACKUP_DOC_TYPE = 'ATOM-BACKUP'
21   
22    # NB, DIF records are stored in local eXist DB to allow feeds to serve them
23    BROWSE_DIF_DOC_TYPE = 'BROWSE-DIF'
24
25        # various namespaces used in the docs
26    ATOM_NS = 'http://www.w3.org/2005/Atom'
27    DIF_NS = 'http://gcmd.gsfc.nasa.gov/Aboutus/xml/dif/'
28    MOLES_NS = 'http://ndg.nerc.ac.uk/schema/moles2beta'
29    GEOSS_NS = 'http://www.georss.org/georss/10'
30    GML_NS = 'http://www.opengis.net/gml'
31    CSML_NS = 'http://ndg.nerc.ac.uk/csml'
32    XHTML_NS = "http://www.w3.org/1999/xhtml"
33    CDML_DTD = 'http://www-pcmdi.llnl.gov/software/cdms/cdml.dtd'
34       
35    # Group the doc types according to the source they should be retrieved from
36    DISCOVERY_SCHEMES = [DIF_DOC_TYPE, DC_DOC_TYPE, \
37                         MDIP_DOC_TYPE, ISO_DOC_TYPE, 'ISO']
38    BROWSE_SCHEMES = [MOLES_DOC_TYPE, NDGB1_DOC_TYPE, \
39                      NUMSIM_DOC_TYPE, ATOM_DOC_TYPE, \
40                      ATOM_BACKUP_DOC_TYPE, BROWSE_DIF_DOC_TYPE]
41   
42    def __init__(self,uri,config=None):
43        ''' Parse the uri and prepare for obtaining the actual content'''
44        logging.debug("Initialising ndgObject with uri: '%s'" %uri)
45        #Dom had problem with unicode coming in here ... dunno why @@@@
46        uri=str(uri) 
47       
48        # a priori, assume we can't get content for this object
49        self.gettable=-1 
50       
51        # handle all the known ways of doing an NDG URI ...
52        bits=uri.split(':')
53        bits2=uri.split('__')
54        ok=1
55        if len(bits)==3:
56            repository,schema,localID=bits
57            self.uri=uri.replace(':','__')
58        elif len(bits2)==3:
59            repository,schema,localID=bits2
60            self.uri=uri
61        elif len(bits2)>3:
62            repository,schema,localID=bits2[0],bits2[1],'__'.join(bits2[2:])
63            self.uri=uri
64        else:
65            bits=uri.split('/')
66            if len(bits)==2:
67                schema='NDG-B0'
68                repository,localID=bits
69                self.uri=None  #
70                ok=0  # I reckon we shouldn't ever see any of these again ...
71                # but if we do, the uri will need fixing too ...
72            else: ok=0
73       
74        if not ok:
75            # after all that, we don't think it's an NDG URI ...
76            raise ValueError,'The identifier [%s] is not a valid NDG style URI'%uri
77
78        # yes, it is an NDG URI ...
79        self.repository,self.schema,self.localID=repository,schema,localID
80        logging.debug("Extracted valid NDG values from URI: repository: '%s', schema: '%s', localID: '%s'" \
81                      %(repository, schema, localID))
82        self.setConfig(config)
83
84    def setDefaultProperties(self):
85        '''
86        Set default properties for object
87        '''
88        self.discoveryURL,self.baseURL,self.xmlURL,\
89            self.printableURL =None,None,None,None
90        self.useDiscoveryService = 1
91        self.BURL=None
92
93       
94    def setConfig(self, config):
95        '''
96        Set up the configuration for retrieving this document
97        '''
98        logging.debug("Setting up configuration for retrieving document")
99        self.config=config
100        if config is None:
101            self.setDefaultProperties()
102            return
103
104        self.server=self.config.get('DISCOVERY','default')
105        server=self.server
106        qs=None
107        # This NDG object may itself be a discovery record, which makes life easy, but
108        # it might not be, in which case we have to build up all the possible views upon it.
109        # But remember only data entity b records have discovery records ...
110        self.viewService='%s/view/'%server
111        discoveryBASE='%s%s__%s__%s'%(self.viewService,self.repository,self.schema,self.localID)
112
113        # set default return format - if not set, just return in original format
114        fmt = self.config.get('DISCOVERY','formatDefault')
115        logging.info("Default discovery format set to: %s" %fmt)
116           
117        self.BURL=None
118        # We'll build the following even if it can't be used (as would be the case for
119        # a non data entity B record or an A record) because it's a useful template.
120        if self.schema!=fmt: 
121            qs=('outputSchema',fmt)
122        self.discoveryURL=self.__buildURL(discoveryBASE,[qs])
123
124        # config file should have details on the service to use for the repository
125        # - if not, default to 'unknown'
126        servicehost = self.config.get('NDG_B_SERVICE',self.repository)
127        if not servicehost:
128            servicehost = 'unknown'
129           
130        # If this record is itself a discovery record, then we don't have much more to do
131        if self.schema in self.DISCOVERY_SCHEMES:
132            self.xmlURL=self.__buildURL(
133                discoveryBASE.replace('/view/','/retrieve/'),[qs,('format','raw')])
134            self.printableURL=self.__buildURL(discoveryBASE,[qs,('format','xml')])
135            self.URL=self.discoveryURL
136            if servicehost != 'unknown':
137                self.gettable=1
138        elif self.schema in self.BROWSE_SCHEMES:
139            # One day we'll use a service binding to get this
140            # This is a mapping from the ndg repository id to an actual repository id
141            # understood by the ndg exist interface
142            blank=self.config.get('NDG_B_SERVICE','instance')
143            url=blank.replace('SERVICEHOST',servicehost)
144            url=url.replace('URI',self.uri)
145            self.URL=url
146            self.xmlURL=url.replace('/view/','/retrieve/')+'?format=raw'
147            self.printableURL=url+'?format=xml'
148            if servicehost !='unknown': 
149                self.gettable=0
150            self.useDiscoveryService = 0
151        else:
152            #currently we don't know how to get this one
153            self.URL='unknown'
154            self.setDefaultProperties()
155
156        logging.debug("Set up URL: '%s', discoveryURL: '%s'" %(self.URL, self.discoveryURL))
157           
158        #now, we'll build a stub-B url as well, in case that comes in handy
159        if self.schema!='NumSim' and self.gettable<>-1:
160            if self.schema:
161                self.BURL=discoveryBASE.replace(self.schema,'NDG-B1')
162            if server:
163                self.BURL=self.BURL.replace(server,servicehost)
164            logging.debug("Set up stub-B url: '%s'" %self.BURL)
165
166
167    def __buildURL(self,base,queryStuff):
168        ss=''
169        for i in queryStuff: 
170            if i!=None:ss+='&%s=%s'%(i[0],i[1])
171        if ss!='':ss='?'+ss[1:]
172        return base+ss
173           
174    def get(self,securityCredentials=None):
175        ''' Actually retrieve an ORIGINAL xml record corresponding to an ndg URI
176        NB, not sure if this method is actually used anywhere - since get is
177        usually done via ndgRetrieve
178        '''
179        if self.config is None: return None
180        status,xmlh=ndgRetrieve.ndgRetrieve(self,self.config, \
181                                            discovery = self.useDiscoveryService)
182        if status:
183            x=xmlHandler2.xmlHandler(xmlh,string=1)
184            return x.xmls
185        else:
186            return status
187       
188    def __str__(self):
189        return self.uri
190
Note: See TracBrowser for help on using the repository browser.