source: exist/trunk/python/ndgUtils/ndgRetrieve.py @ 4782

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/exist/trunk/python/ndgUtils/ndgRetrieve.py@5371
Revision 4782, 4.7 KB checked in by cbyrom, 11 years ago (diff)

Add code to allow publishing of data to multiple feeds - to offer
different levels of granularity to subscribe to (with appropriate feed
titles) + add code to
allow feed publication to be done asynchronously + extend to
run atom to DIF transforms at point of publish for atom docs
in the published state (i.e. with state of 'published' or 'Published').
Add code to ingest required organisation data when setting up
the eXist DB - and provide cache of this data to avoid unecessary
lookups. Also extend the eXist DB setup code to initialise the
various new feeds added.
Add utility methods to check if an eXist collection is new and to
establish the publication state of an atom doc - and to change
the state of this doc (NB, need to do a doc delete and doc create
for this - can't see a doc 'move' function in eXist). Also add
code to do the atom to DIF transform direct in the eXist DB.
Improve error handling when publishing feed data.

Line 
1import DocumentRetrieve
2from ndgSearch import ndgSearch
3import cgi,time, logging
4import ndgObject as no
5
6MOLES_COLLECTION = '/db/ndg_B_metadata'
7ATOM_COLLECTION = '/db/atoms'
8ATOM_BACKUP_COLLECTION = '/db/atoms_backup'
9NUMSIM_COLLECTION = '/db/NumSim'
10NDGA0_COLLECTION = '/db/ndg_A_metadata'
11DIF_COLLECTION = '/db/DIF'
12
13class NDGRetrieveError(Exception):
14    """Exception handling for NDG Retrieve method."""
15    def __init__(self, msg):
16        logging.error(msg)
17        Exception.__init__(self, msg)
18
19
20def ndgRetrieve(uri, config, output='',remote=0, discovery=1):
21   
22    ''' Given an uri (an instance of ndgObject), retrieve it, with security in place.
23            If output is not '' it should be desired output schema!
24            If remote is non zero, then attempt to get the document
25        from a remote exist site via a (secured) restful http call
26            If discovery is non-zero AND the schema is in
27                [DIF,MDIP,or ISO]
28        then use the ndgSearch interface to obtain the document rather
29        than direct from an eXist database.'''
30   
31    def getws(config,uri,remote):
32        ''' Get a ws connection to the local exist database '''
33        logging.debug("Getting WS connection to eXist DB")
34        # The logic here is that
35        #    this code is running on server, which supports repositories sr
36        #    this particular object needs the repository for uri.repository
37        #    which had better be the same as sr, and then we use the
38        #    actual exist repository er.
39        #    I'm sure this can be cleaned up further :-)
40        if remote and discovery:
41            raise ValueError, 'Invalid options to ndgRetrieve (remote AND discovery impossible)'
42        if discovery:
43            logging.debug("Using Discovery service")
44            if uri.schema in no.ndgObject.DISCOVERY_SCHEMES:
45                ws=ndgSearch()
46            else:
47                raise NDGRetrieveError('Invalid schema (%s) with discovery \
48                    option in ndgRetrieve' %uri.schema)
49        else:
50            logging.debug("Using Browse service")
51            try:
52                r=config.get('NDG_B_SERVICE',uri.repository)
53                sr=config.get('DEFAULT','repository')
54                logging.debug("NDG_B_SERVICE: '%s', default repository: '%s'" %(r,sr))
55            except:
56                raise NDGRetrieveError("Config file not available or incomplete " + \
57                    "- cannot find data for NDG_B_SERVICE, %s" %uri.repository)
58           
59            if r <> sr:
60                if not remote:
61                    return 0,'The uri [%s] is not available on [%s]'%(uri,sr)
62                else:
63                    try:
64                        ps=config.get('DEFAULT','proxyServer')
65                    except:
66                        raise NDGRetrieveError('Config file not available \
67                            or has no [proxyServer] setting')
68                logging.debug("Retrieving data from remote proxy server, '%s'" %ps)
69                ws = DocumentRetrieve.ndgHTTP(r,proxyServer=ps)
70            else:
71                try:
72                    logging.info("Lookup up repository:'%s'" %uri.repository)
73                    er=config.get('NDG_EXIST',uri.repository)
74                    pwf=config.get('NDG_EXIST','passwordFile')
75                except:
76                    raise NDGRetrieveError('Config file not available or has \
77                        no [NDG_EXIST] setting')
78
79                logging.debug("Retrieving data from repository, '%s'" %er)
80                ws=DocumentRetrieve.DocumentRetrieve(er,pwfile=pwf)
81        return 1,ws
82   
83    status,ws=getws(config,uri,remote)
84 
85    if not status: return status,ws
86           
87    if uri.schema==no.ndgObject.NDGA0_DOC_TYPE:
88        target = NDGA0_COLLECTION
89    elif uri.schema == no.ndgObject.NUMSIM_DOC_TYPE:
90        target = NUMSIM_COLLECTION
91    elif uri.schema == no.ndgObject.ATOM_DOC_TYPE:
92        target = ATOM_COLLECTION
93    elif uri.schema == no.ndgObject.ATOM_BACKUP_DOC_TYPE:
94        target = ATOM_BACKUP_COLLECTION
95    else:
96        target = MOLES_COLLECTION
97   
98    #do the actual retrieve:
99    e=None
100    try:
101        time1=time.time()
102        r=ws.get(uri.repository,uri.schema,uri.localID,targetCollection=target)
103        time2=time.time()-time1
104        logging.info('Document retrieve [%s] took [%s]'%(uri,time2))
105    except Exception,e:
106        e=str(e)
107        r=e
108        logging.error('Document retrieve [%s] failed [%s]'%(uri,e))
109   
110
111    # did we get a sensible answer?
112    if isinstance(r,int):
113        return 0,'<p> There are %s identifiers matching your request! </p>'%r
114    if e is not None:
115        return 0,'Document retrieve [%s] failed [%s]'%(uri,cgi.escape(e))
116   
117    # we used to return an xmlHandler instance, but we don't do that any more ...
118    return 1,r
119   
Note: See TracBrowser for help on using the repository browser.