source: exist/trunk/python/ndgUtils/ndgRetrieve.py @ 4229

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/exist/trunk/python/ndgUtils/ndgRetrieve.py@4229
Revision 4229, 4.7 KB checked in by cbyrom, 11 years ago (diff)

Add support for doing lists and summaries of atom docs via ndgDirectory and the existInterface class + add necessary xquery files for this + add new method to
allow retrieval of the collection to which an atom with a specified ID belongs - including the associated xquery file + extend tests to exercise these new features
+ add custom Atom error + improve preservation of key atom attributes when loading existing atoms into an Atom object + improve determining default atom collection

Line 
1import DocumentRetrieve,xmlHandler2
2from ndgSearch import ndgSearch
3import cgi,time, logging
4import ndgObject as no
5
6MOLES_COLLECTION = '/db/ndg_B_metadata'
7ATOM_COLLECTION = '/db/atoms'
8ATOM_BACKUP_COLLECTION = '/db/atoms_backup'
9NUMSIM_COLLECTION = '/db/NumSim'
10NDGA0_COLLECTION = '/db/ndg_A_metadata'
11
12class NDGRetrieveError(Exception):
13    """Exception handling for NDG Retrieve method."""
14    def __init__(self, msg):
15        logging.error(msg)
16        Exception.__init__(self, msg)
17
18
19def ndgRetrieve(uri, config, output='',remote=0, discovery=1):
20   
21    ''' Given an uri (an instance of ndgObject), retrieve it, with security in place.
22            If output is not '' it should be desired output schema!
23            If remote is non zero, then attempt to get the document
24        from a remote exist site via a (secured) restful http call
25            If discovery is non-zero AND the schema is in
26                [DIF,MDIP,or ISO]
27        then use the ndgSearch interface to obtain the document rather
28        than direct from an eXist database.'''
29   
30    def getws(config,uri,remote):
31        ''' Get a ws connection to the local exist database '''
32        logging.debug("Getting WS connection to eXist DB")
33        # The logic here is that
34        #    this code is running on server, which supports repositories sr
35        #    this particular object needs the repository for uri.repository
36        #    which had better be the same as sr, and then we use the
37        #    actual exist repository er.
38        #    I'm sure this can be cleaned up further :-)
39        if remote and discovery:
40            raise ValueError, 'Invalid options to ndgRetrieve (remote AND discovery impossible)'
41        if discovery:
42            logging.debug("Using Discovery service")
43            if uri.schema in no.ndgObject.DISCOVERY_SCHEMES:
44                ws=ndgSearch()
45            else:
46                raise NDGRetrieveError('Invalid schema (%s) with discovery \
47                    option in ndgRetrieve' %uri.schema)
48        else:
49            logging.debug("Using Browse service")
50            try:
51                r=config.get('NDG_B_SERVICE',uri.repository)
52                sr=config.get('DEFAULT','repository')
53                logging.debug("NDG_B_SERVICE: '%s', default repository: '%s'" %(r,sr))
54            except:
55                raise NDGRetrieveError("Config file not available or incomplete " + \
56                    "- cannot find data for NDG_B_SERVICE, %s" %uri.repository)
57           
58            if r <> sr:
59                if not remote:
60                    return 0,'The uri [%s] is not available on [%s]'%(uri,sr)
61                else:
62                    try:
63                        ps=config.get('DEFAULT','proxyServer')
64                    except:
65                        raise NDGRetrieveError('Config file not available \
66                            or has no [proxyServer] setting')
67                logging.debug("Retrieving data from remote proxy server, '%s'" %ps)
68                ws = DocumentRetrieve.ndgHTTP(r,proxyServer=ps)
69            else:
70                try:
71                    logging.info("Lookup up repository:'%s'" %uri.repository)
72                    er=config.get('NDG_EXIST',uri.repository)
73                    pwf=config.get('NDG_EXIST','passwordFile')
74                except:
75                    raise NDGRetrieveError('Config file not available or has \
76                        no [NDG_EXIST] setting')
77
78                logging.debug("Retrieving data from repository, '%s'" %er)
79                ws=DocumentRetrieve.DocumentRetrieve(er,pwfile=pwf)
80        return 1,ws
81   
82    status,ws=getws(config,uri,remote)
83 
84    if not status: return status,ws
85           
86    if uri.schema==no.ndgObject.NDGA0_DOC_TYPE:
87        target = NDGA0_COLLECTION
88    elif uri.schema == no.ndgObject.NUMSIM_DOC_TYPE:
89        target = NUMSIM_COLLECTION
90    elif uri.schema == no.ndgObject.ATOM_DOC_TYPE:
91        target = ATOM_COLLECTION
92    elif uri.schema == no.ndgObject.ATOM_BACKUP_DOC_TYPE:
93        target = ATOM_BACKUP_COLLECTION
94    else:
95        target = MOLES_COLLECTION
96   
97    #do the actual retrieve:
98    e=None
99    try:
100        time1=time.time()
101        r=ws.get(uri.repository,uri.schema,uri.localID,targetCollection=target)
102        time2=time.time()-time1
103        logging.info('Document retrieve [%s] took [%s]'%(uri,time2))
104    except Exception,e:
105        e=str(e)
106        r=e
107        logging.error('Document retrieve [%s] failed [%s]'%(uri,e))
108   
109
110    # did we get a sensible answer?
111    if isinstance(r,int):
112        return 0,'<p> There are %s identifiers matching your request! </p>'%r
113    if e is not None:
114        return 0,'Document retrieve [%s] failed [%s]'%(uri,cgi.escape(e))
115   
116    # we used to return an xmlHandler instance, but we don't do that any more ...
117    return 1,r
118   
Note: See TracBrowser for help on using the repository browser.