source: mauRepo/MolesManager/trunk/src/MolesManager/moles3epb.py @ 8474

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/mauRepo/MolesManager/trunk/src/MolesManager/moles3epb.py@8474
Revision 8474, 19.5 KB checked in by mnagni, 8 years ago (diff)

Fixed:
on data handling (date < 1900)
None value in cedaUpdateObject

Line 
1'''
2BSD Licence
3Copyright (c) 2012, Science & Technology Facilities Council (STFC)
4All rights reserved.
5
6Redistribution and use in source and binary forms, with or without modification,
7are permitted provided that the following conditions are met:
8
9    * Redistributions of source code must retain the above copyright notice,
10        this list of conditions and the following disclaimer.
11    * Redistributions in binary form must reproduce the above copyright notice,
12        this list of conditions and the following disclaimer in the documentation
13        and/or other materials provided with the distribution.
14    * Neither the name of the Science & Technology Facilities Council (STFC)
15        nor the names of its contributors may be used to endorse or promote
16        products derived from this software without specific prior written permission.
17
18THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
20THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
22BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
23OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
29Created on 10 Jan 2012
30
31@author: mnagni
32'''
33from libs.epb import EPB
34from ea_model.moles3_4.observationcollection.mo_observationcollection import MO_ObservationCollection
35from ea_model.moles3_4.observation.mo_observation import MO_Observation
36from sqlalchemy import Table, Column, ForeignKey, Integer, String
37from sqlalchemy.orm import mapper
38from MolesManager.ceda_guid import CedaGUID
39from sqlalchemy.orm.collections import InstrumentedList
40from ea_model.iso_19115_2006_metadata_corrigendum.reference_system_information.md_identifier import MD_Identifier
41from ea_model.iso_19115_2006_metadata_corrigendum.citation_and_responsible_party_information.ci_citation import CI_Citation
42from ea_model.iso_19115_2006_metadata_corrigendum.extent_information.ex_geographicboundingbox import EX_GeographicBoundingBox
43from libs.postgisutil import unifyGeometries, create_st_setSRID, getBox2D,\
44    unifyGeometriesAsBBox
45from ea_model.ceda_metadatamodel.ceda_observationcollection.ceda_observationcollection import CEDA_ObservationCollection
46from ea_model.ceda_metadatamodel.ceda_observation.ceda_observation import CEDA_Observation
47from ea_model.ceda_metadatamodel.ceda_project.ceda_project import CEDA_Project
48from ea_model.moles3_4.utilities.mo_responsiblepartyinfo import MO_ResponsiblePartyInfo
49from ea_model.moles3_4.utilities.ceda_rolevalue import CEDA_RoleValue
50from MolesManager.codelist import MM_RoleValue, getCLValue
51from libs.migration.processor.commons import fromPhenomenonTimeToString
52from datetime import datetime
53from ascore.utils import synchAttributes
54
55class Moles3EPBFactory(EPB):
56   
57    def __init__(self, dbManager):
58        self._dbManager = dbManager
59        self._initCEDA_Customization()   
60       
61
62    def _initCEDA_Customization(self):
63        self._associateCEDA_GUID()
64        self._initSearchIndexes()       
65       
66    def _associateCEDA_GUID(self):
67        guid_table = Table('ceda_guid', self._dbManager.metadata, \
68                           Column('id', String, primary_key=True), \
69                           Column('ceda_observationcollection', Integer, ForeignKey('ceda_observationcollection.id')), 
70                           Column('ceda_observation', Integer, ForeignKey('ceda_observation.id')))
71        mapper(CedaGUID, guid_table)
72        self._dbManager.metadata.create_all()
73
74    def _initSearchIndexes(self):
75        #To Be Done - CHECK IF THE COLUMN ALREADY EXISTS!
76        # We don't want sqlalchemy to know about this column so we add it externally.
77        try:
78            self._dbManager.engine.execute("alter table md_identifier add column code_search_vector tsvector")                 
79
80            # This indexes the tsvector column
81
82            self._dbManager.engine.execute("create index md_identifier_code_search_index on md_identifier using gin(code_search_vector)")
83
84            # This sets up the trigger that keeps the tsvector column up to date.
85            self._dbManager.engine.execute("create trigger md_identifier_code_search_update before update or insert on md_identifier \
86                for each row execute procedure tsvector_update_trigger('code_search_vector', 'pg_catalog.english', code)")                       
87        except Exception as e:
88            pass
89
90    def _getSession(self):
91        if self._dbManager is not None:
92            return self._dbManager.createDbSession()               
93        return None
94       
95    def createEPB(self):
96        return Moles3EPB(self._getSession())
97
98class Moles3EPB(object):
99
100    def __init__(self, session):
101        self._session = session
102       
103    def close(self):
104        return self._session.close()       
105       
106    def searchEager(self, clazz, inst_id):
107        return EPB.searchEager(clazz, inst_id, self._session)     
108     
109    def _controlledCommit(self):
110        try:
111            self._session.commit()
112        except Exception as e:
113            print e
114     
115    def persistInstance(self, instance):
116        """
117            Adds a new migration object.
118            @param migrationObj: the migration object to add
119            @param session: an SQLAlchemy Session object. If not None the session IS NOT closed at the exit,
120            If None (default) a new Session is created from the underlying EPB and closed at the exit.
121            @return an updated, session independent, object instance reflecting the new persisted object
122        """
123        EPB.persistInstance(instance, self._session)       
124
125    def _mergeOrAddInSession(self, ceda_object):
126        try:
127            return self._session.merge(ceda_object)   
128        except:
129            self._session.add(ceda_object)
130            return ceda_object 
131     
132    def updateCedaObject(self, ceda_object, cols_to_update):
133        """
134            Update, eventually add to the session, and commit a CEDA Object in MOLES3 db.
135            @param ceda_object: the CEDA object to update
136            @param cols_to_update: a dictionary containing the columns to update for the given ceda_object and the desired value.
137            If the attribute is a list of objects the new instances are appended only if do not exist in the actual list
138            @return: the given instance with the updated attributes.
139        """
140        coll = self._mergeOrAddInSession(ceda_object)
141        if coll != None:       
142            for k,v in cols_to_update.items():
143                if v is None:
144                    continue
145                if hasattr(coll, k):                   
146                    coll_k = getattr(coll, k)                       
147                    if type(coll_k) == list or type(coll_k) == InstrumentedList:
148                        tmp_coll = []
149                        if type(v) == list or type(v) == InstrumentedList:
150                            tmp_coll.extend(v)
151                        else:
152                            tmp_coll.append(v)
153                        for item in tmp_coll:
154                            el = self._mergeOrAddInSession(item)
155                            if el not in coll_k:
156                                coll_k.append(el)
157                    else:
158                        el = self._mergeOrAddInSession(v)
159                        setattr(coll, k, el)
160        synchAttributes(coll)                                             
161        self._controlledCommit()
162        #return coll                                     
163
164    def getUnifyObservationCollectionGEAsBBox(self, collection):
165        """
166            Returns the union of the collections.member'a  GeographicExtension(s)
167            @param collection: an CEDA_ObservationColleciton instance 
168        """
169        bboxes = []
170        for member in collection.member:
171            for ge in member.geographicExtent:
172                bboxes.append(getGeograpicExtentGeometry(ge))
173       
174        return unifyGeometriesAsBBox(bboxes, self) 
175       
176    def getUnifyObservationCollectionPhenomenonTime(self, collection):
177        """
178            Returns the time period of the collections.member'a  phenomenonTime(s)
179            @param collection: an CEDA_ObservationColleciton instance
180            @return: a tuple (startDate, endDate)
181        """
182        ptStart = []
183        ptEnd = []     
184        for member in collection.member:
185            for pt in member.phenomenonTime:
186                ptString = fromPhenomenonTimeToString(pt)
187                if ptString[0] is not None:                   
188                    ptStart.append(datetime.strptime(ptString[0], '%Y-%m-%d'))
189                if ptString[1] is not None:                   
190                    ptEnd.append(datetime.strptime(ptString[1], '%Y-%m-%d'))
191        ptStart.sort()                   
192        ptEnd.sort()
193        start = None
194        end = None
195        #takes the earlier date
196        if len(ptStart) > 0:
197            start = ptStart[0]
198        #takes the latest date
199        if len(ptEnd) > 0:
200            end = ptEnd[len(ptEnd) - 1]
201        return start, end 
202       
203       
204       
205        #return unifyGeometriesAsBBox(bboxes, self)
206
207
208    def retrieveGUIDFromInstance(self, instance):
209        """
210            Returns the CedaGUID object associated with the given instance.
211            @param instance: an instance of CEDA_Observation os CEDA_ObservationCollection 
212        """
213        if instance is None or not hasattr(instance, 'id'):
214            return None
215        if type(instance) == CEDA_ObservationCollection:
216            return self._session.query(CedaGUID).filter(CedaGUID.ceda_observationcollection==instance.id).first()
217        elif type(instance) == CEDA_Observation:
218            return self._session.query(CedaGUID).filter(CedaGUID.ceda_observation==instance.id).first()       
219   
220    def observationCollectionHasObservation(self, obs_coll_id, obs_id):
221        """
222            Checks if a CEDA_Collection contains a given CEDA_Observation.
223            @param obs_coll_id: the CEDA_ObservationColleciton id
224            @param obs_id: the CEDA_Observation id
225            @return: True if the collection contains the given observation, False otherwise 
226        """
227        coll = self._session.query(CEDA_ObservationCollection).filter(CEDA_ObservationCollection.id==obs_coll_id).first()
228        obs = self._session.query(CEDA_Observation).filter(CEDA_Observation.id==obs_id).first()
229        return obs in coll.member
230
231    def observationAuthor(self, observation):
232        """
233            Lists the CEDA_Observation author.
234            @param observation: the CEDA_Observation inside which look for the author           
235            @return: True if the collection contains the given observation, False otherwise 
236        """
237       
238        # TO FIX!!!
239        for partyInfo in observation.relatedParty:
240            if partyInfo.role == getCLValue(MM_RoleValue.cl_author):
241                return partyInfo.party       
242
243       
244
245    def extractObservationByTitleKeywords(self, keywords):
246        """
247            Loooks for CEDA_Observation containing a specific title (observation.identifier.code)
248            @param keywords: a space separated terms string
249            @return: a tuple containing a CEDA_Observation satisfying the queryllection.idenfitier element having the title 
250        """               
251        # search_vector is a ts_vector column. To search for terms, you use the
252        # @@ operator. plainto_tsquery turns a string into a query that can be
253        # used with @@. So this adds a where clause like "WHERE search_vector
254        # @@ plaint_tsquery(<search string>)"
255        q = self._session.query(CEDA_Observation). \
256            join(MO_Observation).join(MO_Observation.identifier). \
257            filter('md_identifier.code_search_vector @@ to_tsquery(:terms)')
258        # This binds the :terms placeholder to the searchterms string. User input
259        # should always be put into queries this way to prevent SQL injection.
260        q = q.params(terms=keywords)
261        return q.all()
262
263
264    def extractCollectionIdentifierByTitle(self, i_title):
265        """
266            Searches for an MD_Identifier from a CEDA_ObservationCollection contains a specific title (observation.identifier.code)
267            @param i_title: the CEDA_ObservationCollection.identifier.title value to search for
268            @return: a tuple containing a CEDA_ObservationCollection and the CEDA_ObservationCollection.idenfitier element having the title 
269        """
270        return self._session.query(CEDA_ObservationCollection, MD_Identifier). \
271            join(MO_ObservationCollection).join(MO_ObservationCollection.identifier). \
272            join(MD_Identifier.authority).filter(CI_Citation.title.like('%' + i_title + '%'))
273
274    def extractObservationsForProject(self, project):
275        """
276            Searches for the CEDA_Observation associated with a CEDA_Project
277            @param project: a CEDA_Project instance
278            @return: a tuple containing the associated CEDA_Observation 
279        """
280        return self._session.query(CEDA_Observation). \
281            join(CEDA_Observation, MO_Observation.inSupportOf).filter(CEDA_Project.id == project.id)
282
283    def extractProjectObservationCollections(self, project):
284        """
285            Searches for the Observation_Collections associated with a CEDA_Project
286            @param project: a CEDA_Project instance
287            @return: a tuple containing the associated CEDA_ObservationCollection 
288        """
289        mo_obs = self._session.query(MO_Observation).join(CEDA_Project).filter(CEDA_Project.id == project.id).subquery()     
290        obsers = self._session.query(CEDA_Observation).join(mo_obs, CEDA_Observation.id == mo_obs.c.id).one()
291        #print "obsers: " + str(intSession.query(CEDA_Observation).join(mo_obs, CEDA_Observation.id == mo_obs.c.id).count())
292       
293        cos = self._session.query(CEDA_ObservationCollection).all()
294        co = self._session.query(MO_ObservationCollection).join(MO_ObservationCollection.member).filter(MO_ObservationCollection.member.contains(obsers))
295       
296        observations = self._session.query(MO_ObservationCollection).join(CEDA_Observation). \
297            filter(obsers.any(CEDA_Observation.id==obsers.c.id))
298        print "observation:" + str(observations.count())
299        return observations
300
301    def search(self, clazz, inst_id):
302        ret = EPB.search(clazz, inst_id, self._session)
303        return ret
304     
305    def searchSelectiveLoad(self, clazz, inst_id, attributes): 
306        """
307            Searches a required instance by id loading selectively \
308            the specified fields. The parameter "attributes" is a single string or a list of attributes
309            owned by the instance of "clazz". Furthermore such list may contain
310            also the children of the main attributes. For example "attrs" may look
311            like
312            ['resultAccumulation', 'identifier.authority', 'resultTime.position.dateTime8601.month', \
313                      'relatedParty.party', 'result.source.function', 'permission', \
314                      'geographicExtent', 'phenomenonTime', 'keywords', 'description', \
315                      'inSupportOf.abstract', 'dataLineage']
316            the first parameter refers to the main class so is equivalent to
317            clazz.resultAccumulation
318            the second parameter is equivalent to invoke
319            clazz.identifier.authority
320            As single string "attributes" could be as well just 'identifier.authority'
321            @param clazz: the class type to search for
322            @param inst_id: the instance id for which the search is done
323            @param attributes: a single string or a list of attributes to load
324            @param session: a session to use for the query. By default a new one is created automatically at start and closed at the end
325            @return the required instance             
326        """               
327        ret = EPB.searchSelectiveLoad(clazz, inst_id, attributes, self._session)
328        return ret   
329   
330    def loadAttributes(self, instance, attributes):
331        """
332            Returns the attribute of an instance. The parameter "attributes" is a single string or a list of attributes
333            owned by the instance of "clazz". Furthermore such list may contain
334            also the children of the main attributes. For example "attrs" may look
335            like
336            ['resultAccumulation', 'identifier.authority', 'resultTime.position.dateTime8601.month', \
337                      'relatedParty.party', 'result.source.function', 'permission', \
338                      'geographicExtent', 'phenomenonTime', 'keywords', 'description', \
339                      'inSupportOf.abstract', 'dataLineage']
340            the first parameter refers to the main class so is equivalent to
341            clazz.resultAccumulation
342            the second parameter is equivalent to invoke
343            clazz.identifier.authority
344            As single string "attributes" could be as well just 'identifier.authority'
345            @param instance: an instance containing the appropriate id
346            @param attributes: the attribute value required
347            @param session: the session to use for the operation
348            @return: the given instance filled with the required attributes.                     
349        """
350        instance = self._session.merge(instance)
351        EPB.loadAttributes(instance, attributes, self._session)                 
352        return instance
353
354    def executeNative(self, sqlNative):
355        return EPB.executeNative(sqlNative, self._session) 
356
357
358def getGeograpicExtentGeometry(ge):
359    '''
360        Creates the appropriate postgis geometry from a EX_GeographicExtent
361        @param ge: an EX_GeographicExtent instance
362        @return: a postgix text geometry
363    '''
364    if isinstance(ge, EX_GeographicBoundingBox):
365        return create_st_setSRID(ge.westBoundLongitude, ge.southBoundLatitude, \
366                       ge.eastBoundLongitude, ge.northBoundLatitude)
367    return None
368
369def _tmpstrftime(dt):
370    """
371        Returns a string from a datastring. This function is necessary because
372        python <3.2 strftime method is not able to handle date < 1900
373        @param dt: a datetime object
374    """
375    return "%s-%s-%s" % (dt.year, dt.month, dt.day)
376
377def getUnifyObservationCollectionPhenomenonTime(collection):
378    """
379        Returns the time period of the collections.member'a  phenomenonTime(s)
380        @param collection: an CEDA_ObservationColleciton instance
381        @return: a tuple (startDate, endDate) strings
382    """
383    dateFormat = '%Y-%m-%d'
384    ptStart = []
385    ptEnd = []     
386    for member in collection.member:
387        if member.phenomenonTime is None:
388            continue
389           
390        pt =  member.phenomenonTime
391        ptString = fromPhenomenonTimeToString(pt)
392        if ptString[0] is not None:                   
393            ptStart.append(datetime.strptime(ptString[0], dateFormat))
394        if ptString[1] is not None:                   
395            ptEnd.append(datetime.strptime(ptString[1], dateFormat))
396    ptStart.sort()                   
397    ptEnd.sort()
398    start = None
399    end = None
400    #takes the earlier date
401    if len(ptStart) > 0:
402        start = _tmpstrftime(ptStart[0]) 
403    #takes the latest date
404    if len(ptEnd) > 0:
405        end = _tmpstrftime(ptEnd[len(ptEnd) - 1]) 
406    return start, end
Note: See TracBrowser for help on using the repository browser.