source: mauRepo/MolesManager/trunk/cedaMoles/MolesManager/moles3epb.py @ 8522

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/mauRepo/MolesManager/trunk/cedaMoles/MolesManager/moles3epb.py@8522
Revision 8522, 20.5 KB checked in by mnagni, 9 years ago (diff)

Incomplete - # 22528: Migration of FAtCat Open Search link for HPFeld
 http://team.ceda.ac.uk/trac/ceda/ticket/22528
Incomplete - # 22534: Add versiojn number to the gui page
 http://team.ceda.ac.uk/trac/ceda/ticket/22534

Line 
1'''
2BSD Licence
3Copyright (c) 2012, Science & Technology Facilities Council (STFC)
4All rights reserved.
5
6Redistribution and use in source and binary forms, with or without modification,
7are permitted provided that the following conditions are met:
8
9    * Redistributions of source code must retain the above copyright notice,
10        this list of conditions and the following disclaimer.
11    * Redistributions in binary form must reproduce the above copyright notice,
12        this list of conditions and the following disclaimer in the documentation
13        and/or other materials provided with the distribution.
14    * Neither the name of the Science & Technology Facilities Council (STFC)
15        nor the names of its contributors may be used to endorse or promote
16        products derived from this software without specific prior written permission.
17
18THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
20THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
22BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
23OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
29Created on 10 Jan 2012
30
31@author: mnagni
32'''
33from cedaMoles.libs.epb import EPB
34from ea_model.moles3_4.observationcollection.mo_observationcollection \
35    import MO_ObservationCollection
36from ea_model.moles3_4.observation.mo_observation import MO_Observation
37from sqlalchemy import Table, Column, ForeignKey, Integer, String, event
38from sqlalchemy.orm import mapper
39from cedaMoles.MolesManager.ceda_guid import CedaGUID
40from sqlalchemy.orm.collections import InstrumentedList
41from ea_model.iso_19115_2006_metadata_corrigendum.\
42    reference_system_information.md_identifier import MD_Identifier
43from ea_model.iso_19115_2006_metadata_corrigendum.\
44    citation_and_responsible_party_information.ci_citation import CI_Citation
45from ea_model.iso_19115_2006_metadata_corrigendum.\
46    extent_information.ex_geographicboundingbox import EX_GeographicBoundingBox
47from cedaMoles.libs.postgisutil import create_st_setSRID, unifyGeometriesAsBBox
48from ea_model.ceda_metadatamodel.ceda_observationcollection.\
49    ceda_observationcollection import CEDA_ObservationCollection
50from ea_model.ceda_metadatamodel.ceda_observation.\
51    ceda_observation import CEDA_Observation
52from ea_model.ceda_metadatamodel.ceda_project.ceda_project import CEDA_Project
53from cedaMoles.MolesManager.codelist import MM_RoleValue, getCLValue
54from cedaMoles.libs.migration.processor.commons import from_pt_to_string
55from datetime import datetime
56from ascore.utils import synchAttributes
57from cedaMoles.MolesManager.db.partyIndexes import associateMOParty_indexes
58from cedaMoles.MolesManager.db.after_flush import afterFlush
59
60class Moles3EPBFactory(EPB):   
61    def __init__(self, db_manager):
62        super(Moles3EPBFactory, self).__init__(db_manager)
63        self._init_ceda_customization()
64
65    def _init_ceda_customization(self):
66        self._associate_ceda_guid()
67        associateMOParty_indexes(self._db_manager.metadata)
68        self._initSearchIndexes() 
69       
70    def _associate_ceda_guid(self):
71        guid_table = Table('ceda_guid', self._db_manager.metadata, \
72                           Column('id', String, primary_key=True), \
73                           Column('ceda_observationcollection', Integer, ForeignKey('ceda_observationcollection.id')), 
74                           Column('ceda_observation', Integer, ForeignKey('ceda_observation.id')),
75                           Column('ceda_project', Integer, ForeignKey('ceda_project.id')))
76        mapper(CedaGUID, guid_table)
77        self._db_manager.metadata.create_all()
78
79    def _initSearchIndexes(self):
80        #To Be Done - CHECK IF THE COLUMN ALREADY EXISTS!
81        # We don't want sqlalchemy to know about this column so we add it externally.
82        try:
83            self._db_manager.engine.execute("alter table md_identifier add column code_search_vector tsvector")                 
84
85            # This indexes the tsvector column
86
87            self._db_manager.engine.execute("create index md_identifier_code_search_index on md_identifier using gin(code_search_vector)")
88
89            # This sets up the trigger that keeps the tsvector column up to date.
90            self._db_manager.engine.execute("create trigger md_identifier_code_search_update before update or insert on md_identifier \
91                for each row execute procedure tsvector_update_trigger('code_search_vector', 'pg_catalog.english', code)")                       
92        except Exception:
93            pass
94       
95    def createEPB(self):
96        session = self._get_session()
97        event.listen(session, 'after_flush', afterFlush)
98        return Moles3EPB(session)
99
100class Moles3EPB(object):
101
102    def __init__(self, session):
103        self._session = session
104       
105    def close(self):
106        return self._session.close()       
107       
108    def searchEager(self, clazz, inst_id):
109        return EPB.searchEager(clazz, inst_id, self._session)     
110     
111    def _controlledCommit(self):
112        try:
113            self._session.commit()
114        except Exception as e:
115            print e
116
117    def rollback(self):
118        """
119        Rolls back the session and one a new transaction
120        """
121        EPB.rollback(self._session) 
122     
123    def persistInstance(self, instance):
124        """
125        Adds a new migration object.
126        **Parameters**
127            * instance: the object to add
128           
129        **Returns**
130            An updated, session independent, object instance reflecting the new persisted object
131        """
132        EPB.persistInstance(instance, self._session)       
133
134    def mergeInstance(self, instance):
135        """
136        Copy the state an instance onto the persistent instance with the same identifier.
137        **Parameters**
138            * instance: the object to add
139           
140        **Returns**
141            An updated object instance reflecting the new persisted object
142        """
143        return EPB.mergeInstance(instance, self._session) 
144
145    def refresh(self, instance):
146        """
147        Expires and refresh the attributes on the given instance.
148        **Parameters**
149            * instance: the object to refresh
150        """
151        EPB.refresh(instance, self._session) 
152
153    def deleteInstance(self, instance, commit = False):
154        """
155            Deletes an object.
156           
157            **Parameters**
158            * `object` **instance**
159                the object to delete                           
160            * `bool` **commit**
161                Defines if the delete operation has to be immediately committed.
162                    Default is `False`
163        """ 
164        EPB.deleteInstance(instance, self._session, commit) 
165
166    def expunge(self, instance):
167        """
168        Expunges an object from the session
169        **Parameters**
170            * instance: the object to expunge
171        """
172        EPB.expunge(instance, self._session) 
173
174    def _mergeOrAddInSession(self, ceda_object):
175        try:
176            return self._session.merge(ceda_object)   
177        except:
178            self._session.add(ceda_object)
179            return ceda_object 
180     
181    def updateCedaObject(self, ceda_object, cols_to_update):
182        """
183            Update, eventually add to the session, and commit a CEDA Object in MOLES3 db.
184            @param ceda_object: the CEDA object to update
185            @param cols_to_update: a dictionary containing the columns to update for the given ceda_object and the desired value.
186            If the attribute is a list of objects the new instances are appended only if do not exist in the actual list
187            @return: the given instance with the updated attributes.
188        """
189        coll = self._mergeOrAddInSession(ceda_object)
190        if coll != None:       
191            for k,v in cols_to_update.items():
192                if v is None:
193                    continue
194                if hasattr(coll, k):                   
195                    coll_k = getattr(coll, k)                       
196                    if type(coll_k) == list or type(coll_k) == InstrumentedList:
197                        tmp_coll = []
198                        if type(v) == list or type(v) == InstrumentedList:
199                            tmp_coll.extend(v)
200                        else:
201                            tmp_coll.append(v)
202                        for item in tmp_coll:
203                            el = self._mergeOrAddInSession(item)
204                            if el not in coll_k:
205                                coll_k.append(el)
206                    else:
207                        el = self._mergeOrAddInSession(v)
208                        setattr(coll, k, el)
209        synchAttributes(coll)                                             
210        self._controlledCommit()
211        #return coll                                     
212
213    def getUnifyObservationCollectionGEAsBBox(self, collection):
214        """
215            Returns the union of the collections.member'a  GeographicExtension(s)
216            @param collection: an CEDA_ObservationColleciton instance 
217        """
218        bboxes = []
219        if not hasattr(collection, 'member'):
220            return bboxes
221        for member in collection.member:
222            for ge in member.geographicExtent:
223                bboxes.append(getGeograpicExtentGeometry(ge))
224       
225        return unifyGeometriesAsBBox(bboxes, self) 
226       
227        #return unifyGeometriesAsBBox(bboxes, self)
228
229
230    def retrieveGUIDFromInstance(self, instance):
231        """
232            Returns the CedaGUID object associated with the given instance.
233            @param instance: an instance of CEDA_Observation os CEDA_ObservationCollection 
234        """
235        if instance is None or not hasattr(instance, 'id'):
236            return None
237        if type(instance) == CEDA_ObservationCollection:
238            return self._session.query(CedaGUID).filter(CedaGUID.ceda_observationcollection==instance.id).first()
239        elif type(instance) == CEDA_Observation:
240            return self._session.query(CedaGUID).filter(CedaGUID.ceda_observation==instance.id).first()
241        elif type(instance) == CEDA_Project:
242            return self._session.query(CedaGUID).filter(CedaGUID.ceda_project==instance.id).first()               
243   
244    def observationCollectionHasObservation(self, obs_coll_id, obs_id):
245        """
246            Checks if a CEDA_Collection contains a given CEDA_Observation.
247            @param obs_coll_id: the CEDA_ObservationColleciton id
248            @param obs_id: the CEDA_Observation id
249            @return: True if the collection contains the given observation, False otherwise 
250        """
251        coll = self._session.query(CEDA_ObservationCollection).filter(CEDA_ObservationCollection.id==obs_coll_id).first()
252        obs = self._session.query(CEDA_Observation).filter(CEDA_Observation.id==obs_id).first()
253        return obs in coll.member
254
255    def observationAuthor(self, observation):
256        """
257            Lists the CEDA_Observation author.
258            @param observation: the CEDA_Observation inside which look for the author           
259            @return: True if the collection contains the given observation, False otherwise 
260        """
261       
262        # TO FIX!!!
263        for partyInfo in observation.relatedParty:
264            if partyInfo.role == getCLValue(MM_RoleValue.cl_author):
265                return partyInfo.party       
266
267       
268
269    def extractObservationByTitleKeywords(self, keywords):
270        """
271            Loooks for CEDA_Observation containing a specific title (observation.identifier.code)
272            @param keywords: a space separated terms string
273            @returns: dictionary where the keys are the GUID of the CEDA_Observation in the value 
274        """               
275        # search_vector is a ts_vector column. To search for terms, you use the
276        # @@ operator. plainto_tsquery turns a string into a query that can be
277        # used with @@. So this adds a where clause like "WHERE search_vector
278        # @@ plaint_tsquery(<search string>)"
279        q = self._session.query(CEDA_Observation). \
280            join(MO_Observation).join(MO_Observation.identifier). \
281            filter('md_identifier.code_search_vector @@ to_tsquery(:terms)')
282        # This binds the :terms placeholder to the searchterms string. User input
283        # should always be put into queries this way to prevent SQL injection.
284        q = q.params(terms=keywords)
285       
286        ret = {}
287        for item in q.all():
288            guid = self.retrieveGUIDFromInstance(item)
289            if guid is not None:
290                ret[guid.id] = item           
291        return ret
292        return 
293
294
295    def extractCollectionIdentifierByTitle(self, i_title):
296        """
297            Searches for an MD_Identifier from a CEDA_ObservationCollection contains a specific title (observation.identifier.code)
298            @param i_title: the CEDA_ObservationCollection.identifier.title value to search for
299            @return: a tuple containing a CEDA_ObservationCollection and the CEDA_ObservationCollection.idenfitier element having the title 
300        """
301        return self._session.query(CEDA_ObservationCollection, MD_Identifier). \
302            join(MO_ObservationCollection).join(MO_ObservationCollection.identifier). \
303            join(MD_Identifier.authority).filter(CI_Citation.title.like('%' + i_title + '%'))
304
305    def extractObservationsForProject(self, project):
306        """
307            Searches for the CEDA_Observation associated with a CEDA_Project
308            @param project: a CEDA_Project instance
309            @return: a tuple containing the associated CEDA_Observation 
310        """
311        return self._session.query(CEDA_Observation). \
312            join(CEDA_Observation, MO_Observation.inSupportOf).filter(CEDA_Project.id == project.id)
313
314    def extractProjectObservationCollections(self, project):
315        """
316            Searches for the Observation_Collections associated with a CEDA_Project
317            @param project: a CEDA_Project instance
318            @return: a tuple containing the associated CEDA_ObservationCollection 
319        """
320        mo_obs = self._session.query(MO_Observation).join(CEDA_Project).\
321            filter(CEDA_Project.id == project.id).subquery()     
322        obsers = self._session.query(CEDA_Observation).\
323            join(mo_obs, CEDA_Observation.id == mo_obs.c.id).one()
324       
325        '''
326        cos = self._session.query(CEDA_ObservationCollection).all()
327        co = self._session.query(MO_ObservationCollection).\
328            join(MO_ObservationCollection.member).\
329            filter(MO_ObservationCollection.member.contains(obsers))
330        '''
331        observations = self._session.query(MO_ObservationCollection).\
332            join(CEDA_Observation).\
333            filter(obsers.any(CEDA_Observation.id==obsers.c.id))
334        print "observation:" + str(observations.count())
335        return observations
336
337    def search(self, clazz, inst_id = None):
338        ret = EPB.search(clazz, inst_id, self._session)
339        return ret
340     
341    def search_object_and_gui(self, clazz, inst_id = None):       
342        result = EPB.search(clazz, inst_id, self._session)
343        ret = {}
344        for item in result.all():
345            guid = self.retrieveGUIDFromInstance(item)
346            if guid is not None:
347                ret[guid.id] = item           
348        return ret     
349     
350    def searchSelectiveLoad(self, clazz, inst_id, attributes): 
351        """
352            Searches a required instance by id loading selectively \
353            the specified fields. The parameter "attributes" is a single string or a list of attributes
354            owned by the instance of "clazz". Furthermore such list may contain
355            also the children of the main attributes. For example "attrs" may look
356            like
357            ['resultAccumulation', 'identifier.authority', 'resultTime.position.dateTime8601.month', \
358                      'relatedParty.party', 'result.source.function', 'permission', \
359                      'geographicExtent', 'phenomenonTime', 'keywords', 'description', \
360                      'inSupportOf.abstract', 'dataLineage']
361            the first parameter refers to the main class so is equivalent to
362            clazz.resultAccumulation
363            the second parameter is equivalent to invoke
364            clazz.identifier.authority
365            As single string "attributes" could be as well just 'identifier.authority'
366            @param clazz: the class type to search for
367            @param inst_id: the instance id for which the search is done
368            @param attributes: a single string or a list of attributes to load
369            @param session: a session to use for the query. By default a new one is created automatically at start and closed at the end
370            @return the required instance             
371        """               
372        ret = EPB.searchSelectiveLoad(clazz, inst_id, attributes, self._session)
373        return ret   
374   
375    def loadAttributes(self, instance, attributes):
376        """
377            Returns the attribute of an instance. The parameter "attributes" is a single string or a list of attributes
378            owned by the instance of "clazz". Furthermore such list may contain
379            also the children of the main attributes. For example "attrs" may look
380            like
381            ['resultAccumulation', 'identifier.authority', 'resultTime.position.dateTime8601.month', \
382                      'relatedParty.party', 'result.source.function', 'permission', \
383                      'geographicExtent', 'phenomenonTime', 'keywords', 'description', \
384                      'inSupportOf.abstract', 'dataLineage']
385            the first parameter refers to the main class so is equivalent to
386            clazz.resultAccumulation
387            the second parameter is equivalent to invoke
388            clazz.identifier.authority
389            As single string "attributes" could be as well just 'identifier.authority'
390            @param instance: an instance containing the appropriate id
391            @param attributes: the attribute value required
392            @param session: the session to use for the operation
393            @return: the given instance filled with the required attributes.                     
394        """
395        instance = self._session.merge(instance)
396        EPB.loadAttributes(instance, attributes, self._session)                 
397        return instance
398
399    def executeNative(self, sqlNative):
400        return EPB.executeNative(sqlNative, self._session) 
401
402
403def getGeograpicExtentGeometry(ge):
404    '''
405        Creates the appropriate postgis geometry from a EX_GeographicExtent
406        @param ge: an EX_GeographicExtent instance
407        @return: a postgix text geometry
408    '''
409    if isinstance(ge, EX_GeographicBoundingBox):
410        return create_st_setSRID(ge.westBoundLongitude, ge.southBoundLatitude, \
411                       ge.eastBoundLongitude, ge.northBoundLatitude)
412    return None
413
414def _tmpstrftime(dt):
415    """
416        Returns a string from a datastring. This function is necessary because
417        python <3.2 strftime method is not able to handle date < 1900
418        @param dt: a datetime object
419    """
420    return "%s-%s-%s" % (dt.year, dt.month, dt.day)
421
422def unify_observation_collection_phenomenon_time(collection):
423    """
424        Returns the time period of the collections.member'a  phenomenonTime(s)
425        @param collection: an CEDA_ObservationColleciton instance
426        @return: a tuple (startDate, endDate) strings
427    """
428    dateFormat = '%Y-%m-%d'
429    ptStart = []
430    ptEnd = []     
431    for member in collection.member:
432        if member.phenomenonTime is None:
433            continue
434           
435        pt =  member.phenomenonTime
436        ptString = from_pt_to_string(pt)
437        if ptString[0] is not None:                   
438            ptStart.append(datetime.strptime(ptString[0], dateFormat))
439        if ptString[1] is not None:                   
440            ptEnd.append(datetime.strptime(ptString[1], dateFormat))
441    ptStart.sort()                   
442    ptEnd.sort()
443    start = None
444    end = None
445    #takes the earlier date
446    if len(ptStart) > 0:
447        start = _tmpstrftime(ptStart[0]) 
448    #takes the latest date
449    if len(ptEnd) > 0:
450        end = _tmpstrftime(ptEnd[len(ptEnd) - 1]) 
451    return start, end
Note: See TracBrowser for help on using the repository browser.