source: mauRepo/MolesManager/trunk/cedaMoles/MolesManager/moles3epb.py @ 8538

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/mauRepo/MolesManager/trunk/cedaMoles/MolesManager/moles3epb.py@8538
Revision 8538, 20.7 KB checked in by mnagni, 7 years ago (diff)

Incomplete - # 22528: Migration of FAtCat Open Search link for HPFeld
 http://team.ceda.ac.uk/trac/ceda/ticket/22528

RevLine 
[8077]1'''
[8358]2BSD Licence
3Copyright (c) 2012, Science & Technology Facilities Council (STFC)
4All rights reserved.
5
6Redistribution and use in source and binary forms, with or without modification,
7are permitted provided that the following conditions are met:
8
9    * Redistributions of source code must retain the above copyright notice,
10        this list of conditions and the following disclaimer.
11    * Redistributions in binary form must reproduce the above copyright notice,
12        this list of conditions and the following disclaimer in the documentation
13        and/or other materials provided with the distribution.
14    * Neither the name of the Science & Technology Facilities Council (STFC)
15        nor the names of its contributors may be used to endorse or promote
16        products derived from this software without specific prior written permission.
17
18THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
20THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
22BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
23OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
[8077]29Created on 10 Jan 2012
30
31@author: mnagni
32'''
[8486]33from cedaMoles.libs.epb import EPB
[8514]34from ea_model.moles3_4.observationcollection.mo_observationcollection \
35    import MO_ObservationCollection
[8205]36from ea_model.moles3_4.observation.mo_observation import MO_Observation
[8482]37from sqlalchemy import Table, Column, ForeignKey, Integer, String, event
[8236]38from sqlalchemy.orm import mapper
[8486]39from cedaMoles.MolesManager.ceda_guid import CedaGUID
[8323]40from sqlalchemy.orm.collections import InstrumentedList
[8514]41from ea_model.iso_19115_2006_metadata_corrigendum.\
42    reference_system_information.md_identifier import MD_Identifier
43from ea_model.iso_19115_2006_metadata_corrigendum.\
44    citation_and_responsible_party_information.ci_citation import CI_Citation
45from ea_model.iso_19115_2006_metadata_corrigendum.\
46    extent_information.ex_geographicboundingbox import EX_GeographicBoundingBox
[8486]47from cedaMoles.libs.postgisutil import create_st_setSRID, unifyGeometriesAsBBox
[8514]48from ea_model.ceda_metadatamodel.ceda_observationcollection.\
49    ceda_observationcollection import CEDA_ObservationCollection
50from ea_model.ceda_metadatamodel.ceda_observation.\
51    ceda_observation import CEDA_Observation
[8409]52from ea_model.ceda_metadatamodel.ceda_project.ceda_project import CEDA_Project
[8486]53from cedaMoles.MolesManager.codelist import MM_RoleValue, getCLValue
[8496]54from cedaMoles.libs.migration.processor.commons import from_pt_to_string
[8445]55from datetime import datetime
56from ascore.utils import synchAttributes
[8486]57from cedaMoles.MolesManager.db.partyIndexes import associateMOParty_indexes
58from cedaMoles.MolesManager.db.after_flush import afterFlush
[8077]59
[8493]60class Moles3EPBFactory(EPB):   
[8489]61    def __init__(self, db_manager):
[8493]62        super(Moles3EPBFactory, self).__init__(db_manager)
63        self._init_ceda_customization()
[8147]64
[8493]65    def _init_ceda_customization(self):
66        self._associate_ceda_guid()
[8489]67        associateMOParty_indexes(self._db_manager.metadata)
[8482]68        self._initSearchIndexes() 
[8236]69       
[8493]70    def _associate_ceda_guid(self):
[8538]71        guid_table = Table('ceda_guid', self._db_manager.metadata, 
72                           Column('id', String, primary_key=True), 
73                           Column('ceda_observationcollection', 
[8537]74                                  Integer, ForeignKey('ceda_observationcollection.id')), 
[8538]75                           Column('ceda_observation', 
[8537]76                                  Integer, ForeignKey('ceda_observation.id')),
[8538]77                           Column('ceda_project', 
[8537]78                                  Integer, ForeignKey('ceda_project.id')))
[8236]79        mapper(CedaGUID, guid_table)
[8489]80        self._db_manager.metadata.create_all()
[8236]81
82    def _initSearchIndexes(self):
83        #To Be Done - CHECK IF THE COLUMN ALREADY EXISTS!
84        # We don't want sqlalchemy to know about this column so we add it externally.
85        try:
[8537]86            self._db_manager.engine.execute("alter table md_identifier \
87                add column code_search_vector tsvector")                 
[8236]88
89            # This indexes the tsvector column
90
[8537]91            self._db_manager.engine.execute("create index \
92                md_identifier_code_search_index on md_identifier using gin(code_search_vector)")
[8236]93
94            # This sets up the trigger that keeps the tsvector column up to date.
[8537]95            self._db_manager.engine.execute("create trigger \
96                md_identifier_code_search_update \
97                before update or insert on md_identifier \
98                for each row execute procedure \
99                tsvector_update_trigger('code_search_vector', \
100                                        'pg_catalog.english', code)")                       
[8489]101        except Exception:
[8236]102            pass
[8325]103       
[8337]104    def createEPB(self):
[8493]105        session = self._get_session()
[8482]106        event.listen(session, 'after_flush', afterFlush)
107        return Moles3EPB(session)
[8088]108
[8325]109class Moles3EPB(object):
110
111    def __init__(self, session):
112        self._session = session
113       
114    def close(self):
115        return self._session.close()       
116       
117    def searchEager(self, clazz, inst_id):
[8409]118        return EPB.searchEager(clazz, inst_id, self._session)     
[8325]119     
[8433]120    def _controlledCommit(self):
121        try:
122            self._session.commit()
123        except Exception as e:
124            print e
[8482]125
126    def rollback(self):
127        """
128        Rolls back the session and one a new transaction
129        """
130        EPB.rollback(self._session) 
[8433]131     
[8325]132    def persistInstance(self, instance):
[8323]133        """
[8482]134        Adds a new migration object.
135        **Parameters**
136            * instance: the object to add
137           
138        **Returns**
139            An updated, session independent, object instance reflecting the new persisted object
[8433]140        """
[8460]141        EPB.persistInstance(instance, self._session)       
142
[8482]143    def mergeInstance(self, instance):
144        """
145        Copy the state an instance onto the persistent instance with the same identifier.
146        **Parameters**
147            * instance: the object to add
148           
149        **Returns**
150            An updated object instance reflecting the new persisted object
151        """
152        return EPB.mergeInstance(instance, self._session) 
153
154    def refresh(self, instance):
155        """
156        Expires and refresh the attributes on the given instance.
157        **Parameters**
158            * instance: the object to refresh
159        """
160        EPB.refresh(instance, self._session) 
161
[8522]162    def deleteInstance(self, instance, commit = False):
[8482]163        """
[8522]164            Deletes an object.
165           
166            **Parameters**
167            * `object` **instance**
168                the object to delete                           
169            * `bool` **commit**
170                Defines if the delete operation has to be immediately committed.
171                    Default is `False`
172        """ 
173        EPB.deleteInstance(instance, self._session, commit) 
[8482]174
175    def expunge(self, instance):
176        """
177        Expunges an object from the session
178        **Parameters**
179            * instance: the object to expunge
180        """
181        EPB.expunge(instance, self._session) 
182
[8460]183    def _mergeOrAddInSession(self, ceda_object):
184        try:
185            return self._session.merge(ceda_object)   
186        except:
187            self._session.add(ceda_object)
188            return ceda_object 
[8325]189     
190    def updateCedaObject(self, ceda_object, cols_to_update):
[8089]191        """
[8416]192            Update, eventually add to the session, and commit a CEDA Object in MOLES3 db.
[8350]193            @param ceda_object: the CEDA object to update
194            @param cols_to_update: a dictionary containing the columns to update for the given ceda_object and the desired value.
195            If the attribute is a list of objects the new instances are appended only if do not exist in the actual list
[8323]196            @return: the given instance with the updated attributes.
[8089]197        """
[8460]198        coll = self._mergeOrAddInSession(ceda_object)
[8323]199        if coll != None:       
200            for k,v in cols_to_update.items():
[8474]201                if v is None:
202                    continue
[8460]203                if hasattr(coll, k):                   
[8323]204                    coll_k = getattr(coll, k)                       
205                    if type(coll_k) == list or type(coll_k) == InstrumentedList:
[8350]206                        tmp_coll = []
[8460]207                        if type(v) == list or type(v) == InstrumentedList:
208                            tmp_coll.extend(v)
[8323]209                        else:
[8460]210                            tmp_coll.append(v)
[8350]211                        for item in tmp_coll:
[8460]212                            el = self._mergeOrAddInSession(item)
213                            if el not in coll_k:
214                                coll_k.append(el)
[8323]215                    else:
[8460]216                        el = self._mergeOrAddInSession(v)
217                        setattr(coll, k, el)
[8445]218        synchAttributes(coll)                                             
[8433]219        self._controlledCommit()
[8460]220        #return coll                                     
[8077]221
[8416]222    def getUnifyObservationCollectionGEAsBBox(self, collection):
[8392]223        """
224            Returns the union of the collections.member'a  GeographicExtension(s)
225            @param collection: an CEDA_ObservationColleciton instance 
226        """
227        bboxes = []
[8513]228        if not hasattr(collection, 'member'):
229            return bboxes
[8392]230        for member in collection.member:
231            for ge in member.geographicExtent:
[8430]232                bboxes.append(getGeograpicExtentGeometry(ge))
[8392]233       
[8416]234        return unifyGeometriesAsBBox(bboxes, self) 
235       
[8445]236        #return unifyGeometriesAsBBox(bboxes, self)
[8392]237
[8445]238
[8325]239    def retrieveGUIDFromInstance(self, instance):
[8264]240        """
241            Returns the CedaGUID object associated with the given instance.
242            @param instance: an instance of CEDA_Observation os CEDA_ObservationCollection 
243        """
[8358]244        if instance is None or not hasattr(instance, 'id'):
[8264]245            return None
246        if type(instance) == CEDA_ObservationCollection:
[8325]247            return self._session.query(CedaGUID).filter(CedaGUID.ceda_observationcollection==instance.id).first()
[8264]248        elif type(instance) == CEDA_Observation:
[8481]249            return self._session.query(CedaGUID).filter(CedaGUID.ceda_observation==instance.id).first()
250        elif type(instance) == CEDA_Project:
251            return self._session.query(CedaGUID).filter(CedaGUID.ceda_project==instance.id).first()               
[8147]252   
[8325]253    def observationCollectionHasObservation(self, obs_coll_id, obs_id):
[8147]254        """
255            Checks if a CEDA_Collection contains a given CEDA_Observation.
256            @param obs_coll_id: the CEDA_ObservationColleciton id
257            @param obs_id: the CEDA_Observation id
258            @return: True if the collection contains the given observation, False otherwise 
259        """
[8387]260        coll = self._session.query(CEDA_ObservationCollection).filter(CEDA_ObservationCollection.id==obs_coll_id).first()
261        obs = self._session.query(CEDA_Observation).filter(CEDA_Observation.id==obs_id).first()
262        return obs in coll.member
[8180]263
[8430]264    def observationAuthor(self, observation):
[8295]265        """
266            Lists the CEDA_Observation author.
[8430]267            @param observation: the CEDA_Observation inside which look for the author           
[8295]268            @return: True if the collection contains the given observation, False otherwise 
269        """
[8408]270       
271        # TO FIX!!!
[8430]272        for partyInfo in observation.relatedParty:
273            if partyInfo.role == getCLValue(MM_RoleValue.cl_author):
274                return partyInfo.party       
275
[8408]276       
[8295]277
[8325]278    def extractObservationByTitleKeywords(self, keywords):
[8205]279        """
280            Loooks for CEDA_Observation containing a specific title (observation.identifier.code)
[8245]281            @param keywords: a space separated terms string
[8502]282            @returns: dictionary where the keys are the GUID of the CEDA_Observation in the value 
[8205]283        """               
284        # search_vector is a ts_vector column. To search for terms, you use the
285        # @@ operator. plainto_tsquery turns a string into a query that can be
286        # used with @@. So this adds a where clause like "WHERE search_vector
287        # @@ plaint_tsquery(<search string>)"
[8325]288        q = self._session.query(CEDA_Observation). \
[8434]289            join(MO_Observation).join(MO_Observation.identifier). \
[8205]290            filter('md_identifier.code_search_vector @@ to_tsquery(:terms)')
291        # This binds the :terms placeholder to the searchterms string. User input
292        # should always be put into queries this way to prevent SQL injection.
293        q = q.params(terms=keywords)
[8502]294       
295        ret = {}
296        for item in q.all():
297            guid = self.retrieveGUIDFromInstance(item)
298            if guid is not None:
299                ret[guid.id] = item           
300        return ret
301        return 
[8205]302
303
[8325]304    def extractCollectionIdentifierByTitle(self, i_title):
[8180]305        """
[8217]306            Searches for an MD_Identifier from a CEDA_ObservationCollection contains a specific title (observation.identifier.code)
[8198]307            @param i_title: the CEDA_ObservationCollection.identifier.title value to search for
308            @return: a tuple containing a CEDA_ObservationCollection and the CEDA_ObservationCollection.idenfitier element having the title 
[8180]309        """
[8325]310        return self._session.query(CEDA_ObservationCollection, MD_Identifier). \
[8198]311            join(MO_ObservationCollection).join(MO_ObservationCollection.identifier). \
312            join(MD_Identifier.authority).filter(CI_Citation.title.like('%' + i_title + '%'))
[8217]313
[8325]314    def extractObservationsForProject(self, project):
[8217]315        """
316            Searches for the CEDA_Observation associated with a CEDA_Project
317            @param project: a CEDA_Project instance
318            @return: a tuple containing the associated CEDA_Observation 
319        """
[8325]320        return self._session.query(CEDA_Observation). \
[8217]321            join(CEDA_Observation, MO_Observation.inSupportOf).filter(CEDA_Project.id == project.id)
322
[8325]323    def extractProjectObservationCollections(self, project):
[8217]324        """
325            Searches for the Observation_Collections associated with a CEDA_Project
326            @param project: a CEDA_Project instance
327            @return: a tuple containing the associated CEDA_ObservationCollection 
328        """
[8494]329        mo_obs = self._session.query(MO_Observation).join(CEDA_Project).\
330            filter(CEDA_Project.id == project.id).subquery()     
331        obsers = self._session.query(CEDA_Observation).\
332            join(mo_obs, CEDA_Observation.id == mo_obs.c.id).one()
[8217]333       
[8494]334        '''
[8325]335        cos = self._session.query(CEDA_ObservationCollection).all()
[8494]336        co = self._session.query(MO_ObservationCollection).\
337            join(MO_ObservationCollection.member).\
338            filter(MO_ObservationCollection.member.contains(obsers))
339        '''
340        observations = self._session.query(MO_ObservationCollection).\
341            join(CEDA_Observation).\
[8217]342            filter(obsers.any(CEDA_Observation.id==obsers.c.id))
343        print "observation:" + str(observations.count())
344        return observations
345
[8476]346    def search(self, clazz, inst_id = None):
[8325]347        ret = EPB.search(clazz, inst_id, self._session)
[8323]348        return ret
[8325]349     
[8501]350    def search_object_and_gui(self, clazz, inst_id = None):       
351        result = EPB.search(clazz, inst_id, self._session)
352        ret = {}
[8503]353        for item in result.all():
[8501]354            guid = self.retrieveGUIDFromInstance(item)
355            if guid is not None:
356                ret[guid.id] = item           
357        return ret     
358     
[8325]359    def searchSelectiveLoad(self, clazz, inst_id, attributes): 
[8147]360        """
[8323]361            Searches a required instance by id loading selectively \
362            the specified fields. The parameter "attributes" is a single string or a list of attributes
363            owned by the instance of "clazz". Furthermore such list may contain
364            also the children of the main attributes. For example "attrs" may look
365            like
366            ['resultAccumulation', 'identifier.authority', 'resultTime.position.dateTime8601.month', \
367                      'relatedParty.party', 'result.source.function', 'permission', \
368                      'geographicExtent', 'phenomenonTime', 'keywords', 'description', \
369                      'inSupportOf.abstract', 'dataLineage']
370            the first parameter refers to the main class so is equivalent to
371            clazz.resultAccumulation
372            the second parameter is equivalent to invoke
373            clazz.identifier.authority
374            As single string "attributes" could be as well just 'identifier.authority'
375            @param clazz: the class type to search for
376            @param inst_id: the instance id for which the search is done
377            @param attributes: a single string or a list of attributes to load
378            @param session: a session to use for the query. By default a new one is created automatically at start and closed at the end
379            @return the required instance             
380        """               
[8325]381        ret = EPB.searchSelectiveLoad(clazz, inst_id, attributes, self._session)
[8323]382        return ret   
383   
[8325]384    def loadAttributes(self, instance, attributes):
[8147]385        """
[8323]386            Returns the attribute of an instance. The parameter "attributes" is a single string or a list of attributes
387            owned by the instance of "clazz". Furthermore such list may contain
388            also the children of the main attributes. For example "attrs" may look
389            like
390            ['resultAccumulation', 'identifier.authority', 'resultTime.position.dateTime8601.month', \
391                      'relatedParty.party', 'result.source.function', 'permission', \
392                      'geographicExtent', 'phenomenonTime', 'keywords', 'description', \
393                      'inSupportOf.abstract', 'dataLineage']
394            the first parameter refers to the main class so is equivalent to
395            clazz.resultAccumulation
396            the second parameter is equivalent to invoke
397            clazz.identifier.authority
398            As single string "attributes" could be as well just 'identifier.authority'
399            @param instance: an instance containing the appropriate id
400            @param attributes: the attribute value required
[8147]401            @param session: the session to use for the operation
[8323]402            @return: the given instance filled with the required attributes.                     
[8147]403        """
[8325]404        instance = self._session.merge(instance)
405        EPB.loadAttributes(instance, attributes, self._session)                 
[8323]406        return instance
[8147]407
[8325]408    def executeNative(self, sqlNative):
[8430]409        return EPB.executeNative(sqlNative, self._session) 
410
411
412def getGeograpicExtentGeometry(ge):
413    '''
414        Creates the appropriate postgis geometry from a EX_GeographicExtent
415        @param ge: an EX_GeographicExtent instance
416        @return: a postgix text geometry
417    '''
418    if isinstance(ge, EX_GeographicBoundingBox):
419        return create_st_setSRID(ge.westBoundLongitude, ge.southBoundLatitude, \
420                       ge.eastBoundLongitude, ge.northBoundLatitude)
[8445]421    return None
422
[8474]423def _tmpstrftime(dt):
424    """
425        Returns a string from a datastring. This function is necessary because
426        python <3.2 strftime method is not able to handle date < 1900
427        @param dt: a datetime object
428    """
429    return "%s-%s-%s" % (dt.year, dt.month, dt.day)
430
[8494]431def unify_observation_collection_phenomenon_time(collection):
[8445]432    """
433        Returns the time period of the collections.member'a  phenomenonTime(s)
434        @param collection: an CEDA_ObservationColleciton instance
435        @return: a tuple (startDate, endDate) strings
436    """
437    dateFormat = '%Y-%m-%d'
438    ptStart = []
439    ptEnd = []     
440    for member in collection.member:
441        if member.phenomenonTime is None:
442            continue
443           
444        pt =  member.phenomenonTime
[8496]445        ptString = from_pt_to_string(pt)
[8445]446        if ptString[0] is not None:                   
447            ptStart.append(datetime.strptime(ptString[0], dateFormat))
448        if ptString[1] is not None:                   
449            ptEnd.append(datetime.strptime(ptString[1], dateFormat))
450    ptStart.sort()                   
451    ptEnd.sort()
452    start = None
453    end = None
454    #takes the earlier date
455    if len(ptStart) > 0:
[8474]456        start = _tmpstrftime(ptStart[0]) 
[8445]457    #takes the latest date
458    if len(ptEnd) > 0:
[8474]459        end = _tmpstrftime(ptEnd[len(ptEnd) - 1]) 
[8445]460    return start, end
Note: See TracBrowser for help on using the repository browser.