source: TI07-MOLES/trunk/PythonCode/browse/portal/cgi/browse/ndgSearch.py @ 1671

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/TI07-MOLES/trunk/PythonCode/browse/portal/cgi/browse/ndgSearch.py@1671
Revision 1671, 5.0 KB checked in by lawrence, 14 years ago (diff)

New NDG discovery interface, including CSS, still a few
bits to do, but functional as WSGI now.

Line 
1# these imports are autogenerated by wsdl2py ...
2from DiscoveryService_services_types import *
3from DiscoveryService_services import *
4
5#normal imports
6import sys, time
7try: #python 2.5
8    import xml.etree.ElementTree as ET
9except:
10    import ElementTree as ET
11   
12from ETxmlView import loadET,et2text
13
14from xml.dom import expatbuilder
15class ExpatReaderClass:
16      fromString = staticmethod(expatbuilder.parseString)
17      fromStream = staticmethod(expatbuilder.parse)
18 
19
20class ndgSearch:
21    ''' Proivdes a client to the NDG discovery services methods exposed by the Discovery.wsdl '''
22   
23    def __init__(self,logger=None):
24        '''get an instance of the service'''
25        #how do we get a different backend provider?
26        loc=DiscoveryServiceLocator()
27        self.server=loc.getDiscovery(readerclass=ExpatReaderClass)
28        #tracefile=sys.stdout)
29        self.documents=None
30        self.hits=None
31        self.serverSessionID=None
32        self.logger=logger
33
34   
35    def search(self,term,start=1,howmany=20,scope=None):
36        ''' Get a list of documents for a specific term using a free text search'''
37       
38        #create a request object
39        request=doFullTextSearchRequest()
40        request._term=term
41        request._start=start
42        request._howMany=howmany
43       
44        if scope is not None:
45            request._scope=scope
46        #do the search
47
48        if self.logger: itime=time.time()
49        response=self.server.doFullTextSearch(request)
50        if self.logger:
51            etime=time.time()-itime
52            self.logger.info('Search Request [%s] took [%ss]'%(term,etime))
53        if response._status:
54            self.serverSessionID=response._resultId
55            self.hits=response._hits
56            self.documents=response._documents._document
57        self.status=[response._statusMessage,]
58        return self.documents
59
60    def getDoc(self,document):
61        ''' Return a document from the backend database '''
62       
63        #create a request object
64        request=doPresentRequest()
65        request._document=document
66        request._format='ndgDoc'
67        searchResult=self.server.doPresent(request)
68        if searchResult._status:
69            result=searchResult._document
70        else:
71            result=searchResult._statusMessage
72        return result
73                   
74    def getAllDocs(self):           
75        ''' Parse the list of documents, get them, and load them into a list of tuple strings (name,content) '''
76        # we keep the name to make it easy to get the actual document out to avoid xpath issues ...
77       
78        docs=[]
79        self.status=[]
80       
81        if self.documents is None: return []
82       
83        #create a request object
84        request=doPresentRequest()
85        if self.logger: itime=time.time()
86        for doc in self.documents:
87            request._document=doc
88            request._format='ndgDoc'
89            result=self.server.doPresent(request)
90            if result._status:
91                docs.append((doc,result._document))
92            else:
93                self.status.append((doc,result._statusMessage))
94        if self.logger:
95            etime=time.time()-itime
96            self.logger.info('Document Load [n=%s] took [%ss]'%(len(self.documents),etime))
97        self.docs=docs
98        return self.docs
99           
100           
101    def __xmlerror(self,docmessage):
102        print docmessage
103        return ET.fromstring('<Error><Document>%s</Document><Message>%s</Message></Error>'%docmessage)
104           
105    def getDocElement(self,document):
106        ''' Takes a document path (maybe from a previous call to ndgSearch) and extracts that document
107        into an ElementTree instance '''
108        #we stick it straight into element tree because we need to use et to get the actual document
109        #we want, not the envelope xml elements
110       
111        doc=self.getDoc(document)
112        path=document.rstrip('.xml')
113        try:
114            r=loadET(doc)
115            #return r.find(path)
116            return r
117        except:
118            return self.__xmlerror((path,doc))
119           
120    def getAllDocsAsElements(self):
121        ''' Get all the documents and load them into a list of ET instances '''
122        result=[]
123        docs=self.getAllDocs()
124        for doc in docs: 
125            try:
126                r=loadET(doc[1])
127                path=doc[0].rstrip('.xml')
128                result.append(r)#result.append(r.find(path))
129            except:
130                result.append(self.__xmlerror((path,doc[1])))
131        return result
132           
133if __name__=="__main__":
134   
135    args=sys.argv
136    if len(args)>1:
137        term=args[1]
138    else:
139        term='temperature'
140   
141    s=ndgSearch()
142    howmany=10
143    docs=s.search(term,start=1,howmany=howmany)
144    print s.status
145    print 'Asked for ',howmany,' documents, and got:'
146    j=0
147    for i in docs: 
148        j+=1
149        print j,':',i
150       
151   
152    #docs=s.getAllDocs()
153    #print docs[0]
154    #print s.status
155   
156    doc=s.documents[1]
157    r=s.getDocElement(doc)
158    #print et2text(r)
Note: See TracBrowser for help on using the repository browser.