source: mauRepo/HPFos/trunk/hpfos/HPFos/osImpl/myimpl.py @ 8717

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/mauRepo/HPFos/trunk/hpfos/HPFos/osImpl/myimpl.py@8717
Revision 8717, 19.9 KB checked in by mnagni, 6 years ago (diff)

Incomplete - # 22576: Filtering on Files in a particular Result returns all Files rather than a subset.
 http://team.ceda.ac.uk/trac/ceda/ticket/22576

Should fix all the remaining bugs

  • Property svn:mime-type set to text/plain
Line 
1'''
2BSD Licence
3Copyright (c) 2012, Science & Technology Facilities Council (STFC)
4All rights reserved.
5
6Redistribution and use in source and binary forms, with or without modification,
7are permitted provided that the following conditions are met:
8
9    * Redistributions of source code must retain the above copyright notice,
10        this list of conditions and the following disclaimer.
11    * Redistributions in binary form must reproduce the above copyright notice,
12        this list of conditions and the following disclaimer in the documentation
13        and/or other materials provided with the distribution.
14    * Neither the name of the Science & Technology Facilities Council (STFC)
15        nor the names of its contributors may be used to endorse or promote
16        products derived from this software without specific prior written permission.
17
18THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
20THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
22BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
23OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
29Created on 5 May 2012
30
31@author: Maurizio Nagni
32'''
33from datetime import datetime
34from hpfos.libs.postgisutil import create_st_setSRID
35from ceda_markup.opensearch.osquery import OSQuery
36from ceda_markup.atom.atom import createID, createUpdated, \
37    createPublished, createEntry
38from ceda_markup.atom.info import createTitle, HTML_TYPE, createContent
39from ceda_markup.dc.dc import createDate
40from ceda_markup.gml.gml import createBeginPosition, createEndPosition, \
41    createTimePeriod, createValidTime, GML_NAMESPACE
42from ceda_markup.atom.link import REL_SEARCH, REL_ALTERNATE, createLink
43from ceda_markup.opensearch import filter_results, COUNT_DEFAULT, \
44    START_INDEX_DEFAULT, START_PAGE_DEFAULT, create_autodiscovery_link
45from ceda_markup.opensearch.template.osresponse import Result, Subresult
46from ceda_markup.opensearch.template.atom import OSAtomResponse
47from ceda_markup.opensearch.template.html import OSHTMLResponse
48from sqlalchemy.orm.collections import InstrumentedList
49from ceda_markup.georss import create_where_from_postgis
50from urllib import urlencode
51from hpfos.HPFos.exception import FatcatException
52from ea_model.ceda_metadatamodel.ceda_observationcollection.ceda_observationcollection \
53    import CEDA_ObservationCollection
54from ea_model.ceda_metadatamodel.ceda_observation.ceda_observation \
55    import CEDA_Observation
56from ceda_markup.opensearch.os_param import OSParam
57from hpfos import __version__, __revision__
58from ceda_markup.atom.atom import ATOM_NAMESPACE
59from hpfos.HPFos.osImpl.commons import get_document, get_xml_document, \
60    from_pt_to_string, string_to_datetime
61from ceda_markup.opensearch.os_request import OS_NAMESPACE
62from xml.etree.ElementTree import _ElementInterface
63from ea_model.iso_19115_2006_metadata_corrigendum.\
64    extent_information.ex_geographicboundingbox import EX_GeographicBoundingBox
65
66GUID = 'guid'
67FILE_ID = 'guid'
68COLLECTION = 'collection'
69OBSERVATION = 'observation'
70RESULT = 'result'
71BBOX = 'bbox'
72DUMMY_GUID = 'dummy_guid'
73
74FATCAT_HOST = 'citest1.jc.rl.ac.uk'
75FATCAT_ROOT_PATH = 'fatcatOS'
76PROXY_URL = 'http://wwwcache.rl.ac.uk:8080'
77
78CEDA_TITLE = 'ceda_title'
79HPFOS_VERSION = __version__
80HPFOS_REVISION = __revision__
81
82HPFOS_ID = ''
83if HPFOS_REVISION != 'REVISION':
84    HPFOS_ID = '(v. %s rev. %s)' % (HPFOS_VERSION, HPFOS_REVISION)
85else:
86    HPFOS_ID = '(v. %s rev. %s)' % (HPFOS_VERSION, 'unknown')
87   
88HPFOS_TITLE = 'Discovery feed for Search Services %s' % (HPFOS_ID)
89
90def append_valid_time(subresult, entry, atomroot,
91                       begin_position, end_position):
92    #xmlentry = entry.buildElement()
93    if begin_position is not None:
94        begin_position = createBeginPosition(root = atomroot,
95                                            body = subresult.beginPosition)
96    if end_position is not None:               
97        end_position = createEndPosition(root = atomroot,
98                                        body = subresult.endPosition)               
99    time_period = createTimePeriod(root = atomroot,
100                                  begin = begin_position, end = end_position)       
101    valid_time = createValidTime(root = atomroot, body = time_period)
102    if begin_position is not None or end_position is not None:
103        entry.append(valid_time)
104
105def extract_title(ceda_obj):
106    if hasattr(ceda_obj, 'identifier'):
107        for ident in ceda_obj.identifier:
108            if ident.authority.title == CEDA_TITLE:
109                return ident.code 
110
111def generate_url_id(url, iid = None):
112    if iid is None:
113        return "%s/search" % (url)
114   
115    return "%s/search/%s" % (url, iid)
116
117def new_extract_geographic_extent(result):       
118    if not hasattr(result, 'geographicExtent') \
119        or result.geographicExtent is None \
120        or len(result.geographicExtent) == 0:
121        return None
122   
123    geo_ex = result.geographicExtent[0]
124    if not isinstance(geo_ex, EX_GeographicBoundingBox):
125        return
126
127    return _create_box2d('%s %s' % (geo_ex.southBoundLatitude,
128                                    geo_ex.westBoundLongitude),
129                         '%s %s' % (geo_ex.northBoundLatitude,
130                                    geo_ex.eastBoundLongitude))
131
132def digest_fatcat_atom(context, results):
133    entries = results.findall('{%s}entry' % (ATOM_NAMESPACE))
134    subresults = []
135    for entry in entries:
136        iid = entry.find('{%s}id' % (ATOM_NAMESPACE)).text.strip()
137        ititle = entry.find('{%s}title' % (ATOM_NAMESPACE)).text.strip()
138        kwargs = {}
139                     
140        element = entry.find('.//{%s}beginPosition' % (GML_NAMESPACE))
141        if element is not None:
142            kwargs['beginPosition'] = element.text.strip()
143            element = None
144       
145        element = entry.find('.//{%s}endPosition' % (GML_NAMESPACE))
146        if element is not None:
147            kwargs['endPosition'] = element.text.strip()
148            element = None           
149       
150        geometry = entry.find('.//{%s}posList' % (GML_NAMESPACE))
151        if geometry is not None:
152            kwargs['geometry'] = 'POLYGON((%s))' % geometry.text.strip()
153        else:
154            geometry = entry.find('.//{%s}Envelope' % (GML_NAMESPACE))
155            if geometry is not None:
156                geo_lc = geometry.find('./{%s}lowerCorner'
157                                       % (GML_NAMESPACE)).text.strip()
158                geo_uc = geometry.find('./{%s}upperCorner'
159                                       % (GML_NAMESPACE)).text.strip()
160                kwargs['geometry'] = _create_box2d(geo_lc, geo_uc)
161
162
163        kwargs['enclosure'] = entry.findall("./{%s}link" % (ATOM_NAMESPACE))
164        #This can be activated on python >= 2.7
165        #kwargs['enclosure'] = entry.findall("./{%s}link[@rel='enclosure']"
166        #                            % (ATOM_NAMESPACE))                         
167        kwargs['description'] = 'no description'
168        subresults.append(Subresult(iid, ititle,
169                                    datetime.now().isoformat(), **kwargs))
170   
171    return subresults
172
173def apply_query_params(context, results):
174    # A cleaner implementation would require calls to
175    # db's merge_period_instant_views() but actually it does not collect
176    # infos about CEDA_Results
177   
178    subresults = []
179    if results is None:
180        return subresults
181    for result in results:         
182        result_guid = context['moles3EPB'].retrieveGUIDFromInstance(result)
183        if result_guid is None:
184            continue
185        ititle = extract_title(result)
186
187        kwargs = {}
188        #------------------------
189        phenomenon_time = result.phenomenonTime
190        if isinstance(result, list) and len(result.phenomenonTime) > 0:
191            phenomenon_time = result.phenomenonTime[0]                   
192        kwargs['beginPosition'], kwargs['endPosition'] = \
193            from_pt_to_string(phenomenon_time)
194        #------------------------
195       
196        #------------------------
197        kwargs['geometry'] = new_extract_geographic_extent(result)
198        #------------------------               
199
200        #------------------------                   
201        kwargs['description'] = result.description
202        #------------------------
203
204        item = Subresult(result_guid.id, ititle, datetime.now().isoformat(),
205                         **kwargs)               
206        subresults.append(item)
207    return subresults
208
209def import_count_and_page(context):
210    ret = []       
211   
212    try:
213        ret.append(int(context.get('count', COUNT_DEFAULT)))
214    except (ValueError, TypeError):
215        ret.append(COUNT_DEFAULT)     
216   
217    try:
218        ret.append(int(context.get('startIndex', START_INDEX_DEFAULT)))
219    except (ValueError, TypeError):
220        ret.append(START_INDEX_DEFAULT)
221   
222    try:
223        ret.append(int(context.get('startPage', START_PAGE_DEFAULT)))
224    except (ValueError, TypeError):
225        ret.append(START_PAGE_DEFAULT)
226   
227    return tuple(ret)
228
229class MyOSAtomResponse(OSAtomResponse):
230    '''
231    classdocs
232    '''
233
234    def __init__(self):
235        super(MyOSAtomResponse, self).__init__()       
236
237    def digest_search_results(self, results, context):
238        instances = None
239        tot_results = 0
240        if type(results) == CEDA_ObservationCollection:
241            instances = results.member
242        elif type(results) == tuple:
243            ids = [iid[0] for iid in results[1]]
244            instances = context['moles3EPB'].get_instance_by_ids(ids)
245            try:
246                tot_results = len(instances)
247            except TypeError:
248                pass
249        elif isinstance(results, _ElementInterface):
250            instances = digest_fatcat_atom(context, results)
251            tot_results = int(results.find('{%s}totalResults'
252                               % (OS_NAMESPACE)).text.replace('\n','').strip())                   
253
254        count, start_index, start_page = import_count_and_page(context)                       
255        subresults = filter_results(instances, count, start_index, start_page)
256       
257        if (type(instances) == list \
258                and type(instances[0]) == CEDA_ObservationCollection) \
259            or (type(instances) == InstrumentedList \
260                and type(instances[0]) == CEDA_Observation):
261            subresults = apply_query_params(context, subresults)
262
263        title = HPFOS_TITLE
264        if context.get('exception', None):
265            title = context.get('exception').value
266                                                                   
267        return Result(count, start_index, start_page, tot_results, \
268                      subresult = subresults, title = title)
269       
270    def generateEntryLinks(self, entry, atomroot, path, params_model, context):
271        entry.append(create_autodiscovery_link(atomroot, path, \
272                                               params_model, context, \
273                                               self.extension, \
274                                               rel = REL_ALTERNATE))               
275       
276        entry.append(create_autodiscovery_link(atomroot, path, \
277                                           params_model, context, \
278                                           extension = self.extension, \
279                                           rel = REL_SEARCH))               
280
281    def generate_url(self, os_host_urlURL, context):
282        return generate_url_id(os_host_urlURL, context.get('guid')) 
283
284    def generate_entries(self, atomroot, subresults, path, params_model, context):
285        if subresults is None:
286            return
287       
288        if isinstance(subresults, list) \
289                and len(subresults) > 0 \
290                and isinstance(subresults[0], _ElementInterface):
291            for entry in subresults:
292                atomroot.append(entry)
293            return
294       
295        entries = []
296       
297        for subresult in subresults:
298            #Here could loop over results
299            entry_path = generate_url_id(path, subresult.id)
300            atom_id = createID(entry_path + '/' + self.extension, root = atomroot)
301            ititle = createTitle(root = atomroot,
302                                 body = subresult.title,
303                                 itype = HTML_TYPE)
304            atom_content = createContent(root = atomroot,
305                                        body = subresult.description,
306                                        itype = HTML_TYPE)
307            atom_updated = createUpdated(subresult.updated, root = atomroot)
308            atom_published = createPublished('TO_BE_DONE_2011-01-21T11:05:29.511Z',
309                                            root = atomroot)           
310            entry = createEntry(atom_id, ititle, atom_updated,
311                                published=atom_published,
312                                content=atom_content, root = atomroot)
313           
314            begin_position = None
315            end_position = None
316            if hasattr(subresult, 'beginPosition') \
317                    and subresult.beginPosition is not None:
318                begin_position = subresult.beginPosition
319            if hasattr(subresult, 'endPosition') \
320                    and subresult.endPosition is not None:               
321                end_position = subresult.endPosition
322            append_valid_time(subresult, entry, atomroot,
323                              begin_position, end_position)           
324           
325            idate = createDate(root = atomroot,
326                body = 'TO_BE_DONE_2002-10-18T08:07:37.387Z/2012-03-29T07:12:20.735Z')       
327            entry.append(idate)
328           
329            if hasattr(subresult, 'geometry') \
330                    and subresult.geometry is not None:
331                where = create_where_from_postgis(subresult.geometry, atomroot)
332                entry.append(where)
333           
334           
335           
336            self.generateEntryLinks(entry, atomroot, entry_path, \
337                                    params_model, context)
338            if hasattr(subresult, 'enclosure') \
339                    and subresult.enclosure is not None:
340                for enclosure in subresult.enclosure:
341                    if enclosure.get('rel', None) == 'enclosure':
342                        entry.append(createLink(enclosure.get('href'),
343                                            rel = 'enclosure',
344                                            root = atomroot,
345                                            itype = enclosure.get('type'),
346                                            length = enclosure.get('length')))               
347                           
348            entries.append(entry)
349
350        for entry in entries:
351            atomroot.append(entry)
352
353class MyOSHTMLResponse(OSHTMLResponse):
354    '''
355    classdocs
356    '''
357
358    def __init__(self):
359        '''
360        Constructor
361        '''
362        super(MyOSHTMLResponse, self).__init__()
363       
364    def generateResponse(self, result, queries, ospath, **kwargs):
365        return result + " HTML!"
366       
367class MyOSQuery(OSQuery):
368    '''
369    classdocs
370    '''
371
372    def __init__(self):
373        '''
374            Constructor
375        '''
376        params = []
377        params.append(OSParam("count", "count",
378                              namespace = OS_NAMESPACE))
379        params.append(OSParam("startPage", "startPage",
380                              namespace = OS_NAMESPACE))
381        params.append(OSParam("startIndex", "startIndex",
382                              namespace = OS_NAMESPACE))               
383        params.append(OSParam("q", "searchTerms",
384                              namespace = OS_NAMESPACE))         
385        params.append(OSParam("uid", "uid",
386                namespace = "http://a9.com/-/opensearch/extensions/geo/1.0/"))       
387        params.append(OSParam(BBOX, 'box',
388                namespace = "http://a9.com/-/opensearch/extensions/geo/1.0/"))       
389        params.append(OSParam("start", "start",
390                namespace = "http://a9.com/-/opensearch/extensions/time/1.0/"))       
391        params.append(OSParam("stop", "end",
392                namespace = "http://a9.com/-/opensearch/extensions/time/1.0/"))       
393        super(MyOSQuery, self).__init__(params)
394       
395    def do_search(self, context):
396        ibbox = None
397        if context.has_key(BBOX) and context[BBOX] is not None:
398            coords = context[BBOX].split(',')
399            try:
400                if len(coords) == 4:
401                    ibbox = create_st_setSRID(int(coords[0]),int(coords[1]),
402                                              int(coords[2]),int(coords[3]))
403            except:
404                pass
405       
406        if context.has_key(GUID) and 'FID' in context[GUID]:
407            return extract_fatcat_entities(context[GUID])
408       
409        start = None
410        stop = None
411        if context['start'] is not None:
412            start = string_to_datetime(context['start'])
413        if context['stop'] is not None:
414            stop = string_to_datetime(context['stop'])           
415       
416        if not context.has_key(GUID) or context[GUID] is None:
417            res = context['moles3EPB'].\
418                getObservationCollections_(bbox = ibbox,
419                                          keywords = context['q'],
420                                          start = start,
421                                          stop = stop)
422            return (CEDA_ObservationCollection, res)
423            #if res_ids is not None:
424            #    ids = [(id[0] for id in res_ids]     
425             
426        obj = context['moles3EPB'].getInstanceFromGUID(context[GUID])   
427        if obj is None:
428            return None
429        if type(obj) == CEDA_ObservationCollection:
430            return context['moles3EPB'].searchSelectiveLoadByInstance(obj, 'member')
431        elif type(obj) == CEDA_Observation:
432            '''
433                Sometimes returns empty because the Result associated with the
434                CedaObservation has not a pointer to the file catalog but a URL link.
435                The link may point either to a file or to a directory.
436            '''
437            for source in obj.result.source:
438                if source.function == 'search':
439                    try:                   
440                        return extract_fatcat_entities(source.description,
441                                                   self.recreate_query(context))
442                    except FatcatException as e:
443                        context['exception'] = e
444            return None
445
446    def recreate_query(self, context):
447        ret = []
448        for param in self.params_model:
449            if context.get(param.par_name, None):
450               ret.append((param.par_name, context.get(param.par_name)))
451        return ret
452       
453def extract_fatcat_entities(fc_resource_id, query = ''):
454    '''
455        Uses the Fatcat resource_id to build and submit an Opensearch query to Fatcat
456    '''   
457    path = '/%s/search/%s/atom?%s' % (FATCAT_ROOT_PATH, str(fc_resource_id), urlencode(query))
458    return find_fatcat_atom_entity(host = FATCAT_HOST, path = path)
459   
460def find_fatcat_atom_entity(host = 'localhost', path = '', port = 80):
461    source = get_document(host, path, port, proxy = PROXY_URL)
462    return get_xml_document(source)
463
464def _create_box2d(lc, uc):
465    '''
466        Creates a postgis-like BOX2D string.
467        **Parameters**
468            * lc: the box lower corner as a two floats-as-string space separated pair
469            * uc: the box upper corner as a two floats-as-string space separated pair
470        **Returns**
471            A string formatted as 'BOX2D(lc1 lc2, uc1 uc2)'
472    '''   
473    return 'BOX2D(%s, %s)' % (lc, uc)
Note: See TracBrowser for help on using the repository browser.