source: mauRepo/MolesManager/trunk/cedaMoles/MolesManager/moles3epb.py @ 8537

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/mauRepo/MolesManager/trunk/cedaMoles/MolesManager/moles3epb.py@8537
Revision 8537, 20.7 KB checked in by mnagni, 7 years ago (diff)

Incomplete - # 22528: Migration of FAtCat Open Search link for HPFeld
 http://team.ceda.ac.uk/trac/ceda/ticket/22528

Line 
1'''
2BSD Licence
3Copyright (c) 2012, Science & Technology Facilities Council (STFC)
4All rights reserved.
5
6Redistribution and use in source and binary forms, with or without modification,
7are permitted provided that the following conditions are met:
8
9    * Redistributions of source code must retain the above copyright notice,
10        this list of conditions and the following disclaimer.
11    * Redistributions in binary form must reproduce the above copyright notice,
12        this list of conditions and the following disclaimer in the documentation
13        and/or other materials provided with the distribution.
14    * Neither the name of the Science & Technology Facilities Council (STFC)
15        nor the names of its contributors may be used to endorse or promote
16        products derived from this software without specific prior written permission.
17
18THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
20THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
22BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
23OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
29Created on 10 Jan 2012
30
31@author: mnagni
32'''
33from cedaMoles.libs.epb import EPB
34from ea_model.moles3_4.observationcollection.mo_observationcollection \
35    import MO_ObservationCollection
36from ea_model.moles3_4.observation.mo_observation import MO_Observation
37from sqlalchemy import Table, Column, ForeignKey, Integer, String, event
38from sqlalchemy.orm import mapper
39from cedaMoles.MolesManager.ceda_guid import CedaGUID
40from sqlalchemy.orm.collections import InstrumentedList
41from ea_model.iso_19115_2006_metadata_corrigendum.\
42    reference_system_information.md_identifier import MD_Identifier
43from ea_model.iso_19115_2006_metadata_corrigendum.\
44    citation_and_responsible_party_information.ci_citation import CI_Citation
45from ea_model.iso_19115_2006_metadata_corrigendum.\
46    extent_information.ex_geographicboundingbox import EX_GeographicBoundingBox
47from cedaMoles.libs.postgisutil import create_st_setSRID, unifyGeometriesAsBBox
48from ea_model.ceda_metadatamodel.ceda_observationcollection.\
49    ceda_observationcollection import CEDA_ObservationCollection
50from ea_model.ceda_metadatamodel.ceda_observation.\
51    ceda_observation import CEDA_Observation
52from ea_model.ceda_metadatamodel.ceda_project.ceda_project import CEDA_Project
53from cedaMoles.MolesManager.codelist import MM_RoleValue, getCLValue
54from cedaMoles.libs.migration.processor.commons import from_pt_to_string
55from datetime import datetime
56from ascore.utils import synchAttributes
57from cedaMoles.MolesManager.db.partyIndexes import associateMOParty_indexes
58from cedaMoles.MolesManager.db.after_flush import afterFlush
59
60class Moles3EPBFactory(EPB):   
61    def __init__(self, db_manager):
62        super(Moles3EPBFactory, self).__init__(db_manager)
63        self._init_ceda_customization()
64
65    def _init_ceda_customization(self):
66        self._associate_ceda_guid()
67        associateMOParty_indexes(self._db_manager.metadata)
68        self._initSearchIndexes() 
69       
70    def _associate_ceda_guid(self):
71        guid_table = Table('ceda_guid', self._db_manager.metadata, \
72                           Column('id', String, primary_key=True), \
73                           Column('ceda_observationcollection', \
74                                  Integer, ForeignKey('ceda_observationcollection.id')), 
75                           Column('ceda_observation', \
76                                  Integer, ForeignKey('ceda_observation.id')),
77                           Column('ceda_project', \
78                                  Integer, ForeignKey('ceda_project.id')))
79        mapper(CedaGUID, guid_table)
80        self._db_manager.metadata.create_all()
81
82    def _initSearchIndexes(self):
83        #To Be Done - CHECK IF THE COLUMN ALREADY EXISTS!
84        # We don't want sqlalchemy to know about this column so we add it externally.
85        try:
86            self._db_manager.engine.execute("alter table md_identifier \
87                add column code_search_vector tsvector")                 
88
89            # This indexes the tsvector column
90
91            self._db_manager.engine.execute("create index \
92                md_identifier_code_search_index on md_identifier using gin(code_search_vector)")
93
94            # This sets up the trigger that keeps the tsvector column up to date.
95            self._db_manager.engine.execute("create trigger \
96                md_identifier_code_search_update \
97                before update or insert on md_identifier \
98                for each row execute procedure \
99                tsvector_update_trigger('code_search_vector', \
100                                        'pg_catalog.english', code)")                       
101        except Exception:
102            pass
103       
104    def createEPB(self):
105        session = self._get_session()
106        event.listen(session, 'after_flush', afterFlush)
107        return Moles3EPB(session)
108
109class Moles3EPB(object):
110
111    def __init__(self, session):
112        self._session = session
113       
114    def close(self):
115        return self._session.close()       
116       
117    def searchEager(self, clazz, inst_id):
118        return EPB.searchEager(clazz, inst_id, self._session)     
119     
120    def _controlledCommit(self):
121        try:
122            self._session.commit()
123        except Exception as e:
124            print e
125
126    def rollback(self):
127        """
128        Rolls back the session and one a new transaction
129        """
130        EPB.rollback(self._session) 
131     
132    def persistInstance(self, instance):
133        """
134        Adds a new migration object.
135        **Parameters**
136            * instance: the object to add
137           
138        **Returns**
139            An updated, session independent, object instance reflecting the new persisted object
140        """
141        EPB.persistInstance(instance, self._session)       
142
143    def mergeInstance(self, instance):
144        """
145        Copy the state an instance onto the persistent instance with the same identifier.
146        **Parameters**
147            * instance: the object to add
148           
149        **Returns**
150            An updated object instance reflecting the new persisted object
151        """
152        return EPB.mergeInstance(instance, self._session) 
153
154    def refresh(self, instance):
155        """
156        Expires and refresh the attributes on the given instance.
157        **Parameters**
158            * instance: the object to refresh
159        """
160        EPB.refresh(instance, self._session) 
161
162    def deleteInstance(self, instance, commit = False):
163        """
164            Deletes an object.
165           
166            **Parameters**
167            * `object` **instance**
168                the object to delete                           
169            * `bool` **commit**
170                Defines if the delete operation has to be immediately committed.
171                    Default is `False`
172        """ 
173        EPB.deleteInstance(instance, self._session, commit) 
174
175    def expunge(self, instance):
176        """
177        Expunges an object from the session
178        **Parameters**
179            * instance: the object to expunge
180        """
181        EPB.expunge(instance, self._session) 
182
183    def _mergeOrAddInSession(self, ceda_object):
184        try:
185            return self._session.merge(ceda_object)   
186        except:
187            self._session.add(ceda_object)
188            return ceda_object 
189     
190    def updateCedaObject(self, ceda_object, cols_to_update):
191        """
192            Update, eventually add to the session, and commit a CEDA Object in MOLES3 db.
193            @param ceda_object: the CEDA object to update
194            @param cols_to_update: a dictionary containing the columns to update for the given ceda_object and the desired value.
195            If the attribute is a list of objects the new instances are appended only if do not exist in the actual list
196            @return: the given instance with the updated attributes.
197        """
198        coll = self._mergeOrAddInSession(ceda_object)
199        if coll != None:       
200            for k,v in cols_to_update.items():
201                if v is None:
202                    continue
203                if hasattr(coll, k):                   
204                    coll_k = getattr(coll, k)                       
205                    if type(coll_k) == list or type(coll_k) == InstrumentedList:
206                        tmp_coll = []
207                        if type(v) == list or type(v) == InstrumentedList:
208                            tmp_coll.extend(v)
209                        else:
210                            tmp_coll.append(v)
211                        for item in tmp_coll:
212                            el = self._mergeOrAddInSession(item)
213                            if el not in coll_k:
214                                coll_k.append(el)
215                    else:
216                        el = self._mergeOrAddInSession(v)
217                        setattr(coll, k, el)
218        synchAttributes(coll)                                             
219        self._controlledCommit()
220        #return coll                                     
221
222    def getUnifyObservationCollectionGEAsBBox(self, collection):
223        """
224            Returns the union of the collections.member'a  GeographicExtension(s)
225            @param collection: an CEDA_ObservationColleciton instance 
226        """
227        bboxes = []
228        if not hasattr(collection, 'member'):
229            return bboxes
230        for member in collection.member:
231            for ge in member.geographicExtent:
232                bboxes.append(getGeograpicExtentGeometry(ge))
233       
234        return unifyGeometriesAsBBox(bboxes, self) 
235       
236        #return unifyGeometriesAsBBox(bboxes, self)
237
238
239    def retrieveGUIDFromInstance(self, instance):
240        """
241            Returns the CedaGUID object associated with the given instance.
242            @param instance: an instance of CEDA_Observation os CEDA_ObservationCollection 
243        """
244        if instance is None or not hasattr(instance, 'id'):
245            return None
246        if type(instance) == CEDA_ObservationCollection:
247            return self._session.query(CedaGUID).filter(CedaGUID.ceda_observationcollection==instance.id).first()
248        elif type(instance) == CEDA_Observation:
249            return self._session.query(CedaGUID).filter(CedaGUID.ceda_observation==instance.id).first()
250        elif type(instance) == CEDA_Project:
251            return self._session.query(CedaGUID).filter(CedaGUID.ceda_project==instance.id).first()               
252   
253    def observationCollectionHasObservation(self, obs_coll_id, obs_id):
254        """
255            Checks if a CEDA_Collection contains a given CEDA_Observation.
256            @param obs_coll_id: the CEDA_ObservationColleciton id
257            @param obs_id: the CEDA_Observation id
258            @return: True if the collection contains the given observation, False otherwise 
259        """
260        coll = self._session.query(CEDA_ObservationCollection).filter(CEDA_ObservationCollection.id==obs_coll_id).first()
261        obs = self._session.query(CEDA_Observation).filter(CEDA_Observation.id==obs_id).first()
262        return obs in coll.member
263
264    def observationAuthor(self, observation):
265        """
266            Lists the CEDA_Observation author.
267            @param observation: the CEDA_Observation inside which look for the author           
268            @return: True if the collection contains the given observation, False otherwise 
269        """
270       
271        # TO FIX!!!
272        for partyInfo in observation.relatedParty:
273            if partyInfo.role == getCLValue(MM_RoleValue.cl_author):
274                return partyInfo.party       
275
276       
277
278    def extractObservationByTitleKeywords(self, keywords):
279        """
280            Loooks for CEDA_Observation containing a specific title (observation.identifier.code)
281            @param keywords: a space separated terms string
282            @returns: dictionary where the keys are the GUID of the CEDA_Observation in the value 
283        """               
284        # search_vector is a ts_vector column. To search for terms, you use the
285        # @@ operator. plainto_tsquery turns a string into a query that can be
286        # used with @@. So this adds a where clause like "WHERE search_vector
287        # @@ plaint_tsquery(<search string>)"
288        q = self._session.query(CEDA_Observation). \
289            join(MO_Observation).join(MO_Observation.identifier). \
290            filter('md_identifier.code_search_vector @@ to_tsquery(:terms)')
291        # This binds the :terms placeholder to the searchterms string. User input
292        # should always be put into queries this way to prevent SQL injection.
293        q = q.params(terms=keywords)
294       
295        ret = {}
296        for item in q.all():
297            guid = self.retrieveGUIDFromInstance(item)
298            if guid is not None:
299                ret[guid.id] = item           
300        return ret
301        return 
302
303
304    def extractCollectionIdentifierByTitle(self, i_title):
305        """
306            Searches for an MD_Identifier from a CEDA_ObservationCollection contains a specific title (observation.identifier.code)
307            @param i_title: the CEDA_ObservationCollection.identifier.title value to search for
308            @return: a tuple containing a CEDA_ObservationCollection and the CEDA_ObservationCollection.idenfitier element having the title 
309        """
310        return self._session.query(CEDA_ObservationCollection, MD_Identifier). \
311            join(MO_ObservationCollection).join(MO_ObservationCollection.identifier). \
312            join(MD_Identifier.authority).filter(CI_Citation.title.like('%' + i_title + '%'))
313
314    def extractObservationsForProject(self, project):
315        """
316            Searches for the CEDA_Observation associated with a CEDA_Project
317            @param project: a CEDA_Project instance
318            @return: a tuple containing the associated CEDA_Observation 
319        """
320        return self._session.query(CEDA_Observation). \
321            join(CEDA_Observation, MO_Observation.inSupportOf).filter(CEDA_Project.id == project.id)
322
323    def extractProjectObservationCollections(self, project):
324        """
325            Searches for the Observation_Collections associated with a CEDA_Project
326            @param project: a CEDA_Project instance
327            @return: a tuple containing the associated CEDA_ObservationCollection 
328        """
329        mo_obs = self._session.query(MO_Observation).join(CEDA_Project).\
330            filter(CEDA_Project.id == project.id).subquery()     
331        obsers = self._session.query(CEDA_Observation).\
332            join(mo_obs, CEDA_Observation.id == mo_obs.c.id).one()
333       
334        '''
335        cos = self._session.query(CEDA_ObservationCollection).all()
336        co = self._session.query(MO_ObservationCollection).\
337            join(MO_ObservationCollection.member).\
338            filter(MO_ObservationCollection.member.contains(obsers))
339        '''
340        observations = self._session.query(MO_ObservationCollection).\
341            join(CEDA_Observation).\
342            filter(obsers.any(CEDA_Observation.id==obsers.c.id))
343        print "observation:" + str(observations.count())
344        return observations
345
346    def search(self, clazz, inst_id = None):
347        ret = EPB.search(clazz, inst_id, self._session)
348        return ret
349     
350    def search_object_and_gui(self, clazz, inst_id = None):       
351        result = EPB.search(clazz, inst_id, self._session)
352        ret = {}
353        for item in result.all():
354            guid = self.retrieveGUIDFromInstance(item)
355            if guid is not None:
356                ret[guid.id] = item           
357        return ret     
358     
359    def searchSelectiveLoad(self, clazz, inst_id, attributes): 
360        """
361            Searches a required instance by id loading selectively \
362            the specified fields. The parameter "attributes" is a single string or a list of attributes
363            owned by the instance of "clazz". Furthermore such list may contain
364            also the children of the main attributes. For example "attrs" may look
365            like
366            ['resultAccumulation', 'identifier.authority', 'resultTime.position.dateTime8601.month', \
367                      'relatedParty.party', 'result.source.function', 'permission', \
368                      'geographicExtent', 'phenomenonTime', 'keywords', 'description', \
369                      'inSupportOf.abstract', 'dataLineage']
370            the first parameter refers to the main class so is equivalent to
371            clazz.resultAccumulation
372            the second parameter is equivalent to invoke
373            clazz.identifier.authority
374            As single string "attributes" could be as well just 'identifier.authority'
375            @param clazz: the class type to search for
376            @param inst_id: the instance id for which the search is done
377            @param attributes: a single string or a list of attributes to load
378            @param session: a session to use for the query. By default a new one is created automatically at start and closed at the end
379            @return the required instance             
380        """               
381        ret = EPB.searchSelectiveLoad(clazz, inst_id, attributes, self._session)
382        return ret   
383   
384    def loadAttributes(self, instance, attributes):
385        """
386            Returns the attribute of an instance. The parameter "attributes" is a single string or a list of attributes
387            owned by the instance of "clazz". Furthermore such list may contain
388            also the children of the main attributes. For example "attrs" may look
389            like
390            ['resultAccumulation', 'identifier.authority', 'resultTime.position.dateTime8601.month', \
391                      'relatedParty.party', 'result.source.function', 'permission', \
392                      'geographicExtent', 'phenomenonTime', 'keywords', 'description', \
393                      'inSupportOf.abstract', 'dataLineage']
394            the first parameter refers to the main class so is equivalent to
395            clazz.resultAccumulation
396            the second parameter is equivalent to invoke
397            clazz.identifier.authority
398            As single string "attributes" could be as well just 'identifier.authority'
399            @param instance: an instance containing the appropriate id
400            @param attributes: the attribute value required
401            @param session: the session to use for the operation
402            @return: the given instance filled with the required attributes.                     
403        """
404        instance = self._session.merge(instance)
405        EPB.loadAttributes(instance, attributes, self._session)                 
406        return instance
407
408    def executeNative(self, sqlNative):
409        return EPB.executeNative(sqlNative, self._session) 
410
411
412def getGeograpicExtentGeometry(ge):
413    '''
414        Creates the appropriate postgis geometry from a EX_GeographicExtent
415        @param ge: an EX_GeographicExtent instance
416        @return: a postgix text geometry
417    '''
418    if isinstance(ge, EX_GeographicBoundingBox):
419        return create_st_setSRID(ge.westBoundLongitude, ge.southBoundLatitude, \
420                       ge.eastBoundLongitude, ge.northBoundLatitude)
421    return None
422
423def _tmpstrftime(dt):
424    """
425        Returns a string from a datastring. This function is necessary because
426        python <3.2 strftime method is not able to handle date < 1900
427        @param dt: a datetime object
428    """
429    return "%s-%s-%s" % (dt.year, dt.month, dt.day)
430
431def unify_observation_collection_phenomenon_time(collection):
432    """
433        Returns the time period of the collections.member'a  phenomenonTime(s)
434        @param collection: an CEDA_ObservationColleciton instance
435        @return: a tuple (startDate, endDate) strings
436    """
437    dateFormat = '%Y-%m-%d'
438    ptStart = []
439    ptEnd = []     
440    for member in collection.member:
441        if member.phenomenonTime is None:
442            continue
443           
444        pt =  member.phenomenonTime
445        ptString = from_pt_to_string(pt)
446        if ptString[0] is not None:                   
447            ptStart.append(datetime.strptime(ptString[0], dateFormat))
448        if ptString[1] is not None:                   
449            ptEnd.append(datetime.strptime(ptString[1], dateFormat))
450    ptStart.sort()                   
451    ptEnd.sort()
452    start = None
453    end = None
454    #takes the earlier date
455    if len(ptStart) > 0:
456        start = _tmpstrftime(ptStart[0]) 
457    #takes the latest date
458    if len(ptEnd) > 0:
459        end = _tmpstrftime(ptEnd[len(ptEnd) - 1]) 
460    return start, end
Note: See TracBrowser for help on using the repository browser.