source: exist/trunk/python/ndgUtils/eXistInterface.py @ 4229

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/exist/trunk/python/ndgUtils/eXistInterface.py@4229
Revision 4229, 4.7 KB checked in by cbyrom, 11 years ago (diff)

Add support for doing lists and summaries of atom docs via ndgDirectory and the existInterface class + add necessary xquery files for this + add new method to
allow retrieval of the collection to which an atom with a specified ID belongs - including the associated xquery file + extend tests to exercise these new features
+ add custom Atom error + improve preservation of key atom attributes when loading existing atoms into an Atom object + improve determining default atom collection

Line 
1from eXistConnector import *
2from ndgXqueries import ndgXqueries
3import xmlHandler2, logging
4
5class ndg_eXist(eXistConnector):
6    ''' Adds ndg methods to a "standard" exist Connector '''
7    def __init__(self,db='glue.badc.rl.ac.uk',passwordFile='passwords.txt'):
8        logging.debug("Initialising connection to eXist DB")
9        try:
10            f=file(passwordFile,'r')
11        except IOError,e:
12            raise IOError('%s [looking for %s in %s]'%(e,passwordFile,os.getcwd()))
13           
14        pw={}
15        for line in f.readlines():
16            host,userid,password=line.strip().split(' ')
17            pw[host]=(userid,password)
18        f.close()
19        if db not in pw:
20            raise ValueError('Unable to find eXist password for repository [%s]'%db)
21        eXistConstants = InstanceObject(host=db,
22                                userid=pw[db][0],
23                                password=pw[db][1],
24                                base_path="/exist/servlet",
25                                xmlrpc_base_path="/exist/xmlrpc",
26                                port=8080)
27       
28        eXistConnector.__init__(self,eXistConstants)
29        self.ids={}
30        logging.debug("Connection initialised")
31       
32       
33    def __buildquery(self,query,target=None):
34        '''Create an NDG full text query '''
35        if target is not None:
36            s='''let $hits := collection('/db/%s')//root()[. &= '%s'] ;
37                 for $i in $hits
38                    return <document>{document-uri($i)}</document>'''%(target,query)
39        else: s="/*[. &='%s']"%query
40
41        return s#xmlrpclib.Binary(s)
42
43    def __buildParamSearch(self,param,value,target):
44        s="for $x in document()//%s where $x[.%s &= '%s'] return $x"%(target,param,value)
45        return s
46
47    def full_text(self,query,target=None):
48        ''' Carry out a full text search within the "target" collection '''
49        id,summary=self.executeQuery(self.__buildquery(query,target))
50        self.ids[id]=0
51        return id,summary
52
53    def retrieveNext(self,id,pos=None):
54        ''' Takes a sessionID from an existing query and gets the next document '''
55        if pos is not None: self.ids[id]=pos
56        try:
57            r=self.retrieve(id, self.ids[id])
58            self.ids[id]+=1
59            return r
60        except xmlrpclib.Fault:
61            return None
62        except KeyError:
63            return None
64   
65    def sessionRelease(self,id):
66        ''' Releases a session and removes the position counter '''
67        try:
68            self.release(id)
69            del self.ids[id]
70            return 1
71        except:
72            return 0
73   
74    def chunkedFullText(self,query,start=1,number=10,target='DIF'):
75        ''' Execute a chunked full text query and return the result
76        set '''
77        return self.executeChunkedQuery(self.__buildquery(query,target),start,number,params={})
78   
79   
80    def getDIF(self,entryID):
81        ''' Get a specific DIF document from a repository by using the entryID '''
82        xq='''for $DE in collection()/DIF[Entry_ID='%s'] return $DE'''%entryID
83        xquery='''for $DE in collection('/db/testdif1')/DIF[Entry_ID='%s'] return $DE'''%entryID
84        id,summary=self.executeQuery(xq)#xquery)
85        if summary['hits']==1:
86            r=self.retrieve(id,0,{})
87            self.sessionRelease(id)
88        else:
89            r=summary['hits']
90        return r
91       
92    def search(self,term,start=1,howmany=20, \
93               target=None,scope=None,bbox=None, \
94               dateRange=None,geoSearchType=None):
95        ''' Provides a search interface that mimics the WSDL search interface, except that
96        the target used is the exist collection name, and scope, bbox and dateRange are ignored,
97        and a python summary object is returned '''
98        #select the right query according to the docType
99        xqName={'ndg_B_metadata':'molesSummary',\
100                'NumSim':'numsimSummary',\
101                'Atom': 'AtomSummary'}[target]
102        xquery=ndgXqueries()[xqName]
103        xquery=xquery.replace('SEARCHSTRING',term)
104        r=self.executeChunkedQuery(xquery,start,howmany)
105        x=xmlHandler2.xmlHandler(str(r),string=1)
106        h=x.tree.get('hits')
107        self.results=[]
108        self.serverSessionID=''
109        if h is None:
110            self.hits=0
111            self.start=0
112            self.howmany=0
113            self.error=['No results for [%s]'%term,]
114        else:
115            self.hits=int(h)
116            self.error=None
117            self.start=int(x.tree.get('start'))
118            self.howmany=int(x.tree.get('count'))
119            slist=x.tree.findall('summary')
120            for s in slist:
121                t=edict(id=s.find('id').text,name=s.find('name').text,type=s.find('type').text)
122                self.results.append(t)
123        return self.results
Note: See TracBrowser for help on using the repository browser.