source: mauRepo/HPFos/trunk/hpfos/HPFos/osImpl/myimpl.py @ 8582

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/mauRepo/HPFos/trunk/hpfos/HPFos/osImpl/myimpl.py@8582
Revision 8582, 21.7 KB checked in by mnagni, 7 years ago (diff)

Incomplete - # 22558: Formatting of start/end times to conform to OpenSearch? time extension
 http://team.ceda.ac.uk/trac/ceda/ticket/22558

Now can search much faster the CEDA_ObservationCollection on all the given parameters (start, end, q, bbox).

  • Property svn:mime-type set to text/plain
Line 
1'''
2BSD Licence
3Copyright (c) 2012, Science & Technology Facilities Council (STFC)
4All rights reserved.
5
6Redistribution and use in source and binary forms, with or without modification,
7are permitted provided that the following conditions are met:
8
9    * Redistributions of source code must retain the above copyright notice,
10        this list of conditions and the following disclaimer.
11    * Redistributions in binary form must reproduce the above copyright notice,
12        this list of conditions and the following disclaimer in the documentation
13        and/or other materials provided with the distribution.
14    * Neither the name of the Science & Technology Facilities Council (STFC)
15        nor the names of its contributors may be used to endorse or promote
16        products derived from this software without specific prior written permission.
17
18THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
20THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
22BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
23OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
29Created on 5 May 2012
30
31@author: Maurizio Nagni
32'''
33from datetime import datetime, date
34from hpfos.libs.postgisutil import create_st_setSRID
35from ceda_markup.opensearch.osquery import OSQuery
36from ceda_markup.atom.atom import createID, createUpdated, \
37    createPublished, createEntry
38from ceda_markup.atom.info import createTitle, HTML_TYPE, createContent
39from ceda_markup.dc.dc import createDate
40from ceda_markup.gml.gml import createBeginPosition, createEndPosition, \
41    createTimePeriod, createValidTime, createEnvelope, createLowerCorner, \
42    createUpperCorner, createPosList, createLinearRing, createExterior, \
43    createPolygon, GML_NAMESPACE
44from ceda_markup.georss.georss import createWhere
45from ceda_markup.atom.link import REL_SEARCH, REL_ALTERNATE, createLink
46from ceda_markup.opensearch import filter_results, COUNT_DEFAULT, \
47    START_INDEX_DEFAULT, START_PAGE_DEFAULT, create_autodiscovery_link
48from ceda_markup.opensearch.template.osresponse import Result, Subresult
49from ceda_markup.opensearch.template.atom import OSAtomResponse
50from ceda_markup.opensearch.template.html import OSHTMLResponse
51from ea_model.iso_19108_2006_temporal_schema.temporal_objects.tm_instant \
52    import TM_Instant
53from ceda_markup.georss import create_where_from_postgis
54from ea_model.ceda_metadatamodel.ceda_observationcollection.ceda_observationcollection \
55    import CEDA_ObservationCollection
56from ea_model.ceda_metadatamodel.ceda_observation.ceda_observation \
57    import CEDA_Observation
58from ea_model.ceda_metadatamodel.ceda_result.ceda_result import CEDA_Result
59from ceda_markup.opensearch.os_param import OSParam
60from hpfos import __version__, __revision__
61from ceda_markup.atom.atom import ATOM_NAMESPACE
62from hpfos.HPFos.osImpl.commons import get_document, get_xml_document, \
63    from_pt_to_string, tm_InstantToDatetime, string_to_datetime
64from ceda_markup.opensearch.os_request import OS_NAMESPACE
65from xml.etree.ElementTree import _ElementInterface
66from ea_model.iso_19115_2006_metadata_corrigendum.\
67    extent_information.ex_geographicboundingbox import EX_GeographicBoundingBox
68
69GUID = 'guid'
70FILE_ID = 'guid'
71COLLECTION = 'collection'
72OBSERVATION = 'observation'
73RESULT = 'result'
74BBOX = 'bbox'
75DUMMY_GUID = 'dummy_guid'
76
77FATCAT_HOST = 'citest1.jc.rl.ac.uk'
78FATCAT_ROOT_PATH = 'fatcatOS'
79PROXY_URL = 'http://wwwcache.rl.ac.uk:8080'
80
81CEDA_TITLE = 'ceda_title'
82HPFOS_VERSION = __version__
83HPFOS_REVISION = __revision__
84
85HPFOS_ID = ''
86if HPFOS_REVISION != 'REVISION':
87    HPFOS_ID = '(v. %s rev. %s)' % (HPFOS_VERSION, HPFOS_REVISION)
88else:
89    HPFOS_ID = '(v. %s rev. %s)' % (HPFOS_VERSION, 'unknown')
90   
91HPFOS_TITLE = 'Discovery feed for Search Services %s' % (HPFOS_ID)
92
93
94
95class MyOSAtomResponse(OSAtomResponse):
96    '''
97    classdocs
98    '''
99
100    def __init__(self):
101        #query_params = {"q": "searchTerms", "pw":"startPage"}
102        super(MyOSAtomResponse, self).__init__()
103       
104        '''
105        Constructor
106        '''
107   
108    def _digest_fatcat_atom(self, context, results):
109        count, start_index, start_page = self._importCountAndPage(context)
110        entries = results.findall('{%s}entry' % (ATOM_NAMESPACE))
111        subresults = []
112        for entry in entries:
113            iid = entry.find('{%s}id' % (ATOM_NAMESPACE)).text.strip()
114            ititle = entry.find('{%s}title' % (ATOM_NAMESPACE)).text.strip()
115            kwargs = {}
116                         
117            element = entry.find('.//{%s}beginPosition' % (GML_NAMESPACE))
118            if element is not None:
119                kwargs['beginPosition'] = element.text.strip()
120                element = None
121           
122            element = entry.find('.//{%s}endPosition' % (GML_NAMESPACE))
123            if element is not None:
124                kwargs['endPosition'] = element.text.strip()
125                element = None           
126           
127            geometry = entry.find('.//{%s}posList' % (GML_NAMESPACE))
128            if geometry is not None:
129                kwargs['geometry'] = 'POLYGON((%s))' % geometry.text.strip()
130            else:
131                geometry = entry.find('.//{%s}Envelope' % (GML_NAMESPACE))
132                if geometry is not None:
133                    lc = geometry.find('./{%s}lowerCorner' % (GML_NAMESPACE)).text.strip()
134                    uc = geometry.find('./{%s}upperCorner' % (GML_NAMESPACE)).text.strip()
135                    kwargs['geometry'] = _create_box2d(lc, uc)
136
137
138            kwargs['enclosure'] = entry.findall("./{%s}link" % (ATOM_NAMESPACE))
139            #This can be activated on python >= 2.7
140            #kwargs['enclosure'] = entry.findall("./{%s}link[@rel='enclosure']" % (ATOM_NAMESPACE))                         
141            kwargs['description'] = 'no description'
142            subresults.append(Subresult(iid, ititle,
143                                        datetime.now().isoformat(), **kwargs))
144       
145        tot_results = int(results.find('{%s}totalResults'
146                                       % (OS_NAMESPACE)).text.replace('\n','').strip())
147        return Result(count, start_index, start_page, tot_results,
148                      subresult = subresults, title=HPFOS_TITLE)       
149       
150    def _check_text_filter(self, text, title, description):
151        words = text.split()
152        for word in words:
153            if word in title \
154                or word in description:
155                    return True
156        return False
157
158    def _check_start_filter(self, startTime, phenomenonTime):
159        pt = phenomenonTime
160        if isinstance(phenomenonTime, list):
161            if len(phenomenonTime) == 0:
162                return True
163            pt = phenomenonTime[0]
164         
165        if isinstance(pt, TM_Instant):
166            pt = tm_InstantToDatetime(pt)   
167        elif hasattr(pt, 'begin'):
168            pt = tm_InstantToDatetime(pt.begin)
169           
170        if isinstance(pt, datetime) and isinstance(startTime, date):
171            return pt.date() > startTime
172        if isinstance(pt, date) and isinstance(startTime, datetime):
173            return pt > startTime.date()   
174        return pt > startTime     
175       
176    def _check_end_filter(self, endTime, phenomenonTime):
177        pt = phenomenonTime
178        if isinstance(phenomenonTime, list):
179            if len(phenomenonTime) == 0:
180                return True
181            pt = phenomenonTime[0]
182
183        if isinstance(pt, TM_Instant):
184            pt = tm_InstantToDatetime(pt)   
185        elif hasattr(pt, 'end'):
186            pt = tm_InstantToDatetime(pt.end)
187       
188        if isinstance(pt, datetime) and isinstance(endTime, date):
189            return endTime < pt.date()
190        if isinstance(pt, date) and isinstance(endTime, datetime):
191            return endTime.date() < pt             
192        return endTime < pt
193
194    def apply_query_params(self, context, results):
195        # A cleaner implementation would require calls to
196        # db's merge_period_instant_views() but actually it does not collect
197        # infos about CEDA_Results
198       
199        subresults = []
200        if results is None:
201            return subresults
202        for result in results:         
203            result_guid = context['moles3EPB'].retrieveGUIDFromInstance(result)
204            if result_guid is None:
205                continue
206            ititle = self._extractTitle(result)
207
208            kwargs = {}
209            #------------------------
210            phenomenonTime = result.phenomenonTime
211            if isinstance(result, list) and len(result.phenomenonTime) > 0:
212                phenomenonTime = result.phenomenonTime[0]                   
213            kwargs['beginPosition'], kwargs['endPosition'] = \
214                from_pt_to_string(phenomenonTime)
215            #------------------------
216           
217            #------------------------
218            kwargs['geometry'] = self._new_extract_geographic_extent(result)
219            #------------------------               
220
221            #------------------------                   
222            kwargs['description'] = result.description
223            #------------------------
224
225            item = Subresult(result_guid.id, ititle, datetime.now().isoformat(),
226                             **kwargs)               
227            subresults.append(item)
228        return subresults
229
230    def digest_search_results(self, results, context):
231        if type(results) == CEDA_ObservationCollection:
232            results = results.member
233        elif isinstance(results, _ElementInterface):
234            return self._digest_fatcat_atom(context, results)                   
235
236        count, start_index, start_page = self._importCountAndPage(context)
237
238        instances = results
239        if type(results) == tuple:
240            ids = [id[0] for id in results[1]]
241            instances = context['moles3EPB'].get_instance_by_ids(ids)
242                           
243        subresults = filter_results(instances, count, start_index, start_page)
244        subresults = self.apply_query_params(context, subresults)                                             
245
246        tot_results = 0
247        try:
248            tot_results = len(instances)
249        except:
250            pass
251                       
252        return Result(count, start_index, start_page, tot_results, \
253                      subresult = subresults, title=HPFOS_TITLE)
254
255    def _new_extract_geographic_extent(self, result):       
256        if not hasattr(result, 'geographicExtent') \
257            or result.geographicExtent is None \
258            or len(result.geographicExtent) == 0:
259            return None
260       
261        ge = result.geographicExtent[0]
262        if not isinstance(ge, EX_GeographicBoundingBox):
263            return
264
265        return _create_box2d('%s %s' % (ge.southBoundLatitude, ge.westBoundLongitude),
266                             '%s %s' % (ge.northBoundLatitude, ge.eastBoundLongitude))
267                                   
268
269
270
271    def _extract_geographic_extent(self, result):       
272        if not hasattr(result, 'geographicExtent') \
273            or result.geographicExtent is None \
274            or len(result.geographicExtent) == 0:
275            return None, None
276       
277        ge = result.geographicExtent[0]
278        if not isinstance(ge, EX_GeographicBoundingBox):
279            return
280       
281        #returns lowerCorner, upperCorner
282        return [ge.southBoundLatitude, ge.westBoundLongitude], \
283                                    [ge.northBoundLatitude, ge.eastBoundLongitude]
284       
285       
286    def generateEntryLinks(self, entry, atomroot, path, linkid = None):
287        entry.append(create_autodiscovery_link(atomroot, path, self.extension, \
288                                               linkid, None, rel = REL_ALTERNATE))
289        entry.append(create_autodiscovery_link(atomroot, path, self.extension, \
290                                               linkid, None, rel = REL_SEARCH))               
291
292    def generate_entries(self, atomroot, subresults, path):
293        if isinstance(subresults, list) \
294                and len(subresults) > 0 \
295                and isinstance(subresults[0], _ElementInterface):
296            for entry in subresults:
297                atomroot.append(entry)
298            return
299       
300        entries = []
301       
302        for subresult in subresults:
303            #Here could loop over results
304            atomID = createID(path + subresult.id + '/' + self.extension, root = atomroot)
305            ititle = createTitle(root = atomroot,
306                                 body = subresult.title,
307                                 itype = HTML_TYPE)
308            atomContent = createContent(root = atomroot,
309                                        body = subresult.description,
310                                        itype = HTML_TYPE)
311            atomUpdated = createUpdated(subresult.updated, root = atomroot)
312            atomPublished = createPublished('TO_BE_DONE_2011-01-21T11:05:29.511Z',
313                                            root = atomroot)           
314            entry = createEntry(atomID, ititle, atomUpdated,
315                                published=atomPublished,
316                                content=atomContent, root = atomroot)
317            #xmlentry = entry.buildElement()
318           
319           
320            beginPosition = None
321            endPosition = None
322            if hasattr(subresult, 'beginPosition') \
323                    and subresult.beginPosition is not None:
324                beginPosition = subresult.beginPosition
325            if hasattr(subresult, 'endPosition') and subresult.endPosition is not None:               
326                endPosition = subresult.endPosition
327            self._append_valid_time(subresult, entry, atomroot, beginPosition, endPosition)           
328           
329            idate = createDate(root = atomroot,
330                               body = 'TO_BE_DONE_2002-10-18T08:07:37.387Z/2012-03-29T07:12:20.735Z')       
331            entry.append(idate)
332           
333            if hasattr(subresult, 'geometry') and subresult.geometry is not None:
334                where = create_where_from_postgis(subresult.geometry, atomroot)
335                entry.append(where)
336           
337            self.generateEntryLinks(entry, atomroot, path, subresult.id)
338            if hasattr(subresult, 'enclosure') and subresult.enclosure is not None:
339                for enclosure in subresult.enclosure:
340                    if enclosure.get('rel', None) == 'enclosure':
341                        entry.append(createLink(enclosure.get('href'),
342                                                rel = 'enclosure',
343                                                root = atomroot,
344                                                itype = enclosure.get('type'),
345                                                length = enclosure.get('length')))               
346                           
347            entries.append(entry)
348
349        for entry in entries:
350            atomroot.append(entry)
351
352    def _append_where(self, geometry, entry, atomroot):
353        if geometry.startswith('BOX2D('):
354            lc, uc = geometry[6:-1].split(',')
355            lowerCorner = createLowerCorner(atomroot, values = lc)
356            upperCorner = createUpperCorner(atomroot, values = uc)
357            where_body = createEnvelope(lowerCorner, upperCorner, atomroot)
358           
359        elif geometry.startswith('POLYGON(('):
360                posList = createPosList(root = atomroot,
361                                        values = [float(val) for val
362                                                  in geometry[9:-2].
363                                                        replace(',', ' ').split()],
364                                        srsDimension = '2')
365                linearRing = createLinearRing(root = atomroot, body = posList)
366                exterior = createExterior(root = atomroot, body = linearRing)
367                where_body = createPolygon(root = atomroot, body = exterior)       
368
369        where = createWhere(root = atomroot, body = where_body)
370        entry.append(where)
371
372    def _append_valid_time(self, subresult, entry, atomroot,
373                           beginPosition, endPosition):
374        #xmlentry = entry.buildElement()
375        if beginPosition is not None:
376            beginPosition = createBeginPosition(root = atomroot,
377                                                body = subresult.beginPosition)
378        if endPosition is not None:               
379            endPosition = createEndPosition(root = atomroot,
380                                            body = subresult.endPosition)               
381        timePeriod = createTimePeriod(root = atomroot,
382                                      begin = beginPosition, end = endPosition)       
383        validTime = createValidTime(root = atomroot, body = timePeriod)
384        if beginPosition is not None or endPosition is not None:
385            entry.append(validTime)
386
387    def _importCountAndPage(self, context):       
388        count = COUNT_DEFAULT
389        start_index = START_INDEX_DEFAULT
390        start_page = START_PAGE_DEFAULT
391       
392        try:
393            count = int(context['count'])
394        except:
395            pass       
396       
397        try:
398            start_index = int(context['startIndex'])
399        except:
400            pass
401       
402        try:
403            start_page = int(context['startPage'])
404        except:
405            pass
406       
407        return count, start_index, start_page
408
409    def _extractTitle(self, cedaObj):
410        if hasattr(cedaObj, 'identifier'):
411            for ident in cedaObj.identifier:
412                if ident.authority.title == CEDA_TITLE:
413                    return ident.code 
414
415class MyOSHTMLResponse(OSHTMLResponse):
416    '''
417    classdocs
418    '''
419
420    def __init__(self):
421        '''
422        Constructor
423        '''
424        super(MyOSHTMLResponse, self).__init__()
425       
426    def generateResponse(self, result, queries, ospath, **kwargs):
427        return result + " HTML!"
428       
429class MyOSQuery(OSQuery):
430    '''
431    classdocs
432    '''
433
434    def __init__(self):
435        '''
436            Constructor
437        '''       
438        param_1 = OSParam("count", "count")
439        param_2 = OSParam("startPage", "startPage")
440        param_3 = OSParam("startIndex", "startIndex")               
441        param_4 = OSParam("q", "searchTerms")           
442        param_5 = OSParam("uid", "uid",
443                          namespace = "http://a9.com/-/opensearch/extensions/geo/1.0/")       
444        param_6 = OSParam(BBOX, 'box',
445                          namespace = "http://a9.com/-/opensearch/extensions/geo/1.0/")       
446        param_7 = OSParam("start", "start",
447                          namespace = "http://a9.com/-/opensearch/extensions/time/1.0/")       
448        param_8 = OSParam("stop", "end",
449                          namespace = "http://a9.com/-/opensearch/extensions/time/1.0/")       
450        params = [param_1, param_2, param_3, param_4, param_5, param_6, param_7, param_8]
451        super(MyOSQuery, self).__init__(params)
452       
453    def do_search(self, context):
454        ibbox = None
455        if context.has_key(BBOX) and context[BBOX] is not None:
456            coords = context[BBOX].split(',')
457            try:
458                if len(coords) == 4:
459                    ibbox = create_st_setSRID(int(coords[0]),int(coords[1]),
460                                              int(coords[2]),int(coords[3]))
461            except:
462                pass
463       
464        if context.has_key(GUID) and 'FID' in context[GUID]:
465            return self._extractFatcatEntities(context[GUID])
466       
467        start = None
468        stop = None
469        if context['start'] is not None:
470            start = string_to_datetime(context['start'])
471        if context['stop'] is not None:
472            stop = string_to_datetime(context['stop'])           
473       
474        if not context.has_key(GUID) or context[GUID] is None:
475            res = context['moles3EPB'].\
476                getObservationCollections_(bbox = ibbox,
477                                          keywords = context['q'],
478                                          start = start,
479                                          stop = stop)
480            return (CEDA_ObservationCollection, res)
481            #if res_ids is not None:
482            #    ids = [(id[0] for id in res_ids]     
483             
484        obj = context['moles3EPB'].getInstanceFromGUID(context[GUID])   
485        if obj is None:
486            return None
487        if type(obj) == CEDA_ObservationCollection:
488            return context['moles3EPB'].searchSelectiveLoadByInstance(obj, 'member')
489        elif type(obj) == CEDA_Observation:
490            for source in obj.result.source:
491                if source.function == 'search':
492                    return self._extractFatcatEntities(source.description)
493
494       
495    def _extractFatcatEntities(self, fc_resource_id):
496        path = '/%s/search/%s/atom/' % (FATCAT_ROOT_PATH, str(fc_resource_id))
497        return find_fatcat_atom_entity(host = FATCAT_HOST, path = path)
498   
499def find_fatcat_atom_entity(host = 'localhost', path = '', port = 80):   
500    source = get_document(host, path, port, proxy = PROXY_URL)
501    return get_xml_document(source)
502
503def _create_box2d(lc, uc):
504    '''
505        Creates a postgis-like BOX2D string.
506        **Parameters**
507            * lc: the box lower corner as a two floats-as-string space separated pair
508            * uc: the box upper corner as a two floats-as-string space separated pair
509        **Returns**
510            A string formatted as 'BOX2D(lc1 lc2, uc1 uc2)'
511    '''   
512    return 'BOX2D(%s, %s)' % (lc, uc)
Note: See TracBrowser for help on using the repository browser.