source: ndgCommon/trunk/ndg/common/src/dal/ndgRetrieve.py @ 4834

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/ndgCommon/trunk/ndg/common/src/dal/ndgRetrieve.py@4834
Revision 4834, 4.9 KB checked in by cbyrom, 11 years ago (diff)

Add support for retrieving DIF documents produced in provider eXist DB

  • i.e. not via discovery service - to allow retrieval of DIF docs

produced when atom docs are published; this will aid harvesting of
info from feeds.

Line 
1'''
2 Given an uri (an instance of ndgObject), retrieve it, with security in place.
3 
4 @author: B Lawrence?
5'''
6import cgi,time, logging
7import DocumentRetrieve
8from ndg.common.src.clients.ws.ndgSearch import ndgSearch
9from ndg.common.src.models.ndgObject import ndgObject as no
10
11MOLES_COLLECTION = '/db/ndg_B_metadata'
12ATOM_COLLECTION = '/db/atoms'
13ATOM_BACKUP_COLLECTION = '/db/atoms_backup'
14NUMSIM_COLLECTION = '/db/NumSim'
15NDGA0_COLLECTION = '/db/ndg_A_metadata'
16DIF_COLLECTION = '/db/DIF'
17
18class NDGRetrieveError(Exception):
19    """Exception handling for NDG Retrieve method."""
20    def __init__(self, msg):
21        logging.error(msg)
22        Exception.__init__(self, msg)
23
24
25def ndgRetrieve(uri, config, output='',remote=0, discovery=1):
26   
27    ''' Given an uri (an instance of ndgObject), retrieve it, with security in place.
28            If output is not '' it should be desired output schema!
29            If remote is non zero, then attempt to get the document
30        from a remote exist site via a (secured) restful http call
31            If discovery is non-zero AND the schema is in
32                [DIF,MDIP,or ISO]
33        then use the ndgSearch interface to obtain the document rather
34        than direct from an eXist database.'''
35   
36    def getws(config,uri,remote):
37        ''' Get a ws connection to the local exist database '''
38        logging.debug("Getting WS connection to eXist DB")
39        # The logic here is that
40        #    this code is running on server, which supports repositories sr
41        #    this particular object needs the repository for uri.repository
42        #    which had better be the same as sr, and then we use the
43        #    actual exist repository er.
44        #    I'm sure this can be cleaned up further :-)
45        if remote and discovery:
46            raise ValueError, 'Invalid options to ndgRetrieve (remote AND discovery impossible)'
47        if discovery:
48            logging.debug("Using Discovery service")
49            if uri.schema in no.DISCOVERY_SCHEMES:
50                ws=ndgSearch()
51            else:
52                raise NDGRetrieveError('Invalid schema (%s) with discovery \
53                    option in ndgRetrieve' %uri.schema)
54        else:
55            logging.debug("Using Browse service")
56            try:
57                r=config.get('NDG_B_SERVICE',uri.repository)
58                sr=config.get('DEFAULT','repository')
59                logging.debug("NDG_B_SERVICE: '%s', default repository: '%s'" %(r,sr))
60            except:
61                raise NDGRetrieveError("Config file not available or incomplete " + \
62                    "- cannot find data for NDG_B_SERVICE, %s" %uri.repository)
63           
64            if r <> sr:
65                if not remote:
66                    return 0,'The uri [%s] is not available on [%s]'%(uri,sr)
67                else:
68                    try:
69                        ps=config.get('DEFAULT','proxyServer')
70                    except:
71                        raise NDGRetrieveError('Config file not available \
72                            or has no [proxyServer] setting')
73                logging.debug("Retrieving data from remote proxy server, '%s'" %ps)
74                ws = DocumentRetrieve.ndgHTTP(r,proxyServer=ps)
75            else:
76                try:
77                    logging.info("Lookup up repository:'%s'" %uri.repository)
78                    er=config.get('NDG_EXIST',uri.repository)
79                    pwf=config.get('NDG_EXIST','passwordFile')
80                except:
81                    raise NDGRetrieveError('Config file not available or has \
82                        no [NDG_EXIST] setting')
83
84                logging.debug("Retrieving data from repository, '%s'" %er)
85                ws=DocumentRetrieve.DocumentRetrieve(er,pwfile=pwf)
86        return 1,ws
87   
88    status,ws=getws(config,uri,remote)
89 
90    if not status: return status,ws
91           
92    if uri.schema==no.NDGA0_DOC_TYPE:
93        target = NDGA0_COLLECTION
94    elif uri.schema == no.NUMSIM_DOC_TYPE:
95        target = NUMSIM_COLLECTION
96    elif uri.schema == no.ATOM_DOC_TYPE:
97        target = ATOM_COLLECTION
98    elif uri.schema == no.ATOM_BACKUP_DOC_TYPE:
99        target = ATOM_BACKUP_COLLECTION
100    elif uri.schema == no.BROWSE_DIF_DOC_TYPE:
101        target = DIF_COLLECTION
102    else:
103        target = MOLES_COLLECTION
104   
105    #do the actual retrieve:
106    e=None
107    try:
108        time1=time.time()
109        r=ws.get(uri.repository,uri.schema,uri.localID,targetCollection=target)
110        time2=time.time()-time1
111        logging.info('Document retrieve [%s] took [%s]'%(uri,time2))
112    except Exception,e:
113        e=str(e)
114        r=e
115        logging.error('Document retrieve [%s] failed [%s]'%(uri,e))
116   
117
118    # did we get a sensible answer?
119    if isinstance(r,int):
120        return 0,'<p> There are %s identifiers matching your request! </p>'%r
121    if e is not None:
122        return 0,'Document retrieve [%s] failed [%s]'%(uri,cgi.escape(e))
123   
124    # we used to return an xmlHandler instance, but we don't do that any more ...
125    return 1,r
126   
Note: See TracBrowser for help on using the repository browser.