1 | ''' |
---|
2 | BSD Licence |
---|
3 | Copyright (c) 2012, Science & Technology Facilities Council (STFC) |
---|
4 | All rights reserved. |
---|
5 | |
---|
6 | Redistribution and use in source and binary forms, with or without modification, |
---|
7 | are permitted provided that the following conditions are met: |
---|
8 | |
---|
9 | * Redistributions of source code must retain the above copyright notice, |
---|
10 | this list of conditions and the following disclaimer. |
---|
11 | * Redistributions in binary form must reproduce the above copyright notice, |
---|
12 | this list of conditions and the following disclaimer in the documentation |
---|
13 | and/or other materials provided with the distribution. |
---|
14 | * Neither the name of the Science & Technology Facilities Council (STFC) |
---|
15 | nor the names of its contributors may be used to endorse or promote |
---|
16 | products derived from this software without specific prior written permission. |
---|
17 | |
---|
18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
---|
19 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, |
---|
20 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
---|
21 | PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS |
---|
22 | BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, |
---|
23 | OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
---|
24 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
---|
25 | HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
---|
26 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
---|
27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
---|
28 | |
---|
29 | Created on 10 Jan 2012 |
---|
30 | |
---|
31 | @author: mnagni |
---|
32 | ''' |
---|
33 | from libs.epb import EPB |
---|
34 | from ea_model.moles3_4.observationcollection.mo_observationcollection import MO_ObservationCollection |
---|
35 | from ea_model.moles3_4.observation.mo_observation import MO_Observation |
---|
36 | from sqlalchemy import Table, Column, ForeignKey, Integer, String |
---|
37 | from sqlalchemy.orm import mapper |
---|
38 | from MolesManager.ceda_guid import CedaGUID |
---|
39 | from sqlalchemy.orm.collections import InstrumentedList |
---|
40 | from ea_model.iso_19115_2006_metadata_corrigendum.reference_system_information.md_identifier import MD_Identifier |
---|
41 | from ea_model.iso_19115_2006_metadata_corrigendum.citation_and_responsible_party_information.ci_citation import CI_Citation |
---|
42 | from ea_model.iso_19115_2006_metadata_corrigendum.extent_information.ex_geographicboundingbox import EX_GeographicBoundingBox |
---|
43 | from libs.postgisutil import unifyGeometries, create_st_setSRID, getBox2D,\ |
---|
44 | unifyGeometriesAsBBox |
---|
45 | from ea_model.ceda_metadatamodel.ceda_observationcollection.ceda_observationcollection import CEDA_ObservationCollection |
---|
46 | from ea_model.ceda_metadatamodel.ceda_observation.ceda_observation import CEDA_Observation |
---|
47 | from ea_model.ceda_metadatamodel.ceda_project.ceda_project import CEDA_Project |
---|
48 | from ea_model.moles3_4.utilities.mo_responsiblepartyinfo import MO_ResponsiblePartyInfo |
---|
49 | from ea_model.moles3_4.utilities.ceda_rolevalue import CEDA_RoleValue |
---|
50 | from MolesManager.codelist import MM_RoleValue, getCLValue |
---|
51 | from libs.migration.processor.commons import fromPhenomenonTimeToString |
---|
52 | from datetime import datetime |
---|
53 | from ascore.utils import synchAttributes |
---|
54 | |
---|
55 | class Moles3EPBFactory(EPB): |
---|
56 | |
---|
57 | def __init__(self, dbManager): |
---|
58 | self._dbManager = dbManager |
---|
59 | self._initCEDA_Customization() |
---|
60 | |
---|
61 | |
---|
62 | def _initCEDA_Customization(self): |
---|
63 | self._associateCEDA_GUID() |
---|
64 | self._initSearchIndexes() |
---|
65 | |
---|
66 | def _associateCEDA_GUID(self): |
---|
67 | guid_table = Table('ceda_guid', self._dbManager.metadata, \ |
---|
68 | Column('id', String, primary_key=True), \ |
---|
69 | Column('ceda_observationcollection', Integer, ForeignKey('ceda_observationcollection.id')), |
---|
70 | Column('ceda_observation', Integer, ForeignKey('ceda_observation.id'))) |
---|
71 | mapper(CedaGUID, guid_table) |
---|
72 | self._dbManager.metadata.create_all() |
---|
73 | |
---|
74 | def _initSearchIndexes(self): |
---|
75 | #To Be Done - CHECK IF THE COLUMN ALREADY EXISTS! |
---|
76 | # We don't want sqlalchemy to know about this column so we add it externally. |
---|
77 | try: |
---|
78 | self._dbManager.engine.execute("alter table md_identifier add column code_search_vector tsvector") |
---|
79 | |
---|
80 | # This indexes the tsvector column |
---|
81 | |
---|
82 | self._dbManager.engine.execute("create index md_identifier_code_search_index on md_identifier using gin(code_search_vector)") |
---|
83 | |
---|
84 | # This sets up the trigger that keeps the tsvector column up to date. |
---|
85 | self._dbManager.engine.execute("create trigger md_identifier_code_search_update before update or insert on md_identifier \ |
---|
86 | for each row execute procedure tsvector_update_trigger('code_search_vector', 'pg_catalog.english', code)") |
---|
87 | except Exception as e: |
---|
88 | pass |
---|
89 | |
---|
90 | def _getSession(self): |
---|
91 | if self._dbManager is not None: |
---|
92 | return self._dbManager.createDbSession() |
---|
93 | return None |
---|
94 | |
---|
95 | def createEPB(self): |
---|
96 | return Moles3EPB(self._getSession()) |
---|
97 | |
---|
98 | class Moles3EPB(object): |
---|
99 | |
---|
100 | def __init__(self, session): |
---|
101 | self._session = session |
---|
102 | |
---|
103 | def close(self): |
---|
104 | return self._session.close() |
---|
105 | |
---|
106 | def searchEager(self, clazz, inst_id): |
---|
107 | return EPB.searchEager(clazz, inst_id, self._session) |
---|
108 | |
---|
109 | def _controlledCommit(self): |
---|
110 | try: |
---|
111 | self._session.commit() |
---|
112 | except Exception as e: |
---|
113 | print e |
---|
114 | |
---|
115 | def persistInstance(self, instance): |
---|
116 | """ |
---|
117 | Adds a new migration object. |
---|
118 | @param migrationObj: the migration object to add |
---|
119 | @param session: an SQLAlchemy Session object. If not None the session IS NOT closed at the exit, |
---|
120 | If None (default) a new Session is created from the underlying EPB and closed at the exit. |
---|
121 | @return an updated, session independent, object instance reflecting the new persisted object |
---|
122 | """ |
---|
123 | self._session.add(instance) |
---|
124 | self._session.commit() |
---|
125 | |
---|
126 | |
---|
127 | def updateCedaObject(self, ceda_object, cols_to_update): |
---|
128 | """ |
---|
129 | Update, eventually add to the session, and commit a CEDA Object in MOLES3 db. |
---|
130 | @param ceda_object: the CEDA object to update |
---|
131 | @param cols_to_update: a dictionary containing the columns to update for the given ceda_object and the desired value. |
---|
132 | If the attribute is a list of objects the new instances are appended only if do not exist in the actual list |
---|
133 | @return: the given instance with the updated attributes. |
---|
134 | """ |
---|
135 | coll = None |
---|
136 | try: |
---|
137 | if ceda_object in self._session: |
---|
138 | coll = self._session.merge(ceda_object) |
---|
139 | else: |
---|
140 | self._session.add(ceda_object) |
---|
141 | coll = ceda_object |
---|
142 | except Exception as e: |
---|
143 | print e |
---|
144 | if coll != None: |
---|
145 | for k,v in cols_to_update.items(): |
---|
146 | if hasattr(coll, k): |
---|
147 | val = None |
---|
148 | try: |
---|
149 | val = self._session.merge(v) |
---|
150 | except Exception: |
---|
151 | val = v |
---|
152 | coll_k = getattr(coll, k) |
---|
153 | if type(coll_k) == list or type(coll_k) == InstrumentedList: |
---|
154 | tmp_coll = [] |
---|
155 | if type(val) == list or type(val) == InstrumentedList: |
---|
156 | tmp_coll.extend(val) |
---|
157 | else: |
---|
158 | tmp_coll.append(val) |
---|
159 | for item in tmp_coll: |
---|
160 | if item not in coll_k: |
---|
161 | coll_k.append(item) |
---|
162 | else: |
---|
163 | setattr(coll, k, val) |
---|
164 | synchAttributes(coll) |
---|
165 | self._controlledCommit() |
---|
166 | return coll |
---|
167 | |
---|
168 | def getUnifyObservationCollectionGEAsBBox(self, collection): |
---|
169 | """ |
---|
170 | Returns the union of the collections.member'a GeographicExtension(s) |
---|
171 | @param collection: an CEDA_ObservationColleciton instance |
---|
172 | """ |
---|
173 | bboxes = [] |
---|
174 | for member in collection.member: |
---|
175 | for ge in member.geographicExtent: |
---|
176 | bboxes.append(getGeograpicExtentGeometry(ge)) |
---|
177 | |
---|
178 | return unifyGeometriesAsBBox(bboxes, self) |
---|
179 | |
---|
180 | def getUnifyObservationCollectionPhenomenonTime(self, collection): |
---|
181 | """ |
---|
182 | Returns the time period of the collections.member'a phenomenonTime(s) |
---|
183 | @param collection: an CEDA_ObservationColleciton instance |
---|
184 | @return: a tuple (startDate, endDate) |
---|
185 | """ |
---|
186 | ptStart = [] |
---|
187 | ptEnd = [] |
---|
188 | for member in collection.member: |
---|
189 | for pt in member.phenomenonTime: |
---|
190 | ptString = fromPhenomenonTimeToString(pt) |
---|
191 | if ptString[0] is not None: |
---|
192 | ptStart.append(datetime.strptime(ptString[0], '%Y-%m-%d')) |
---|
193 | if ptString[1] is not None: |
---|
194 | ptEnd.append(datetime.strptime(ptString[1], '%Y-%m-%d')) |
---|
195 | ptStart.sort() |
---|
196 | ptEnd.sort() |
---|
197 | start = None |
---|
198 | end = None |
---|
199 | #takes the earlier date |
---|
200 | if len(ptStart) > 0: |
---|
201 | start = ptStart[0] |
---|
202 | #takes the latest date |
---|
203 | if len(ptEnd) > 0: |
---|
204 | end = ptEnd[len(ptEnd) - 1] |
---|
205 | return start, end |
---|
206 | |
---|
207 | |
---|
208 | |
---|
209 | #return unifyGeometriesAsBBox(bboxes, self) |
---|
210 | |
---|
211 | |
---|
212 | def retrieveGUIDFromInstance(self, instance): |
---|
213 | """ |
---|
214 | Returns the CedaGUID object associated with the given instance. |
---|
215 | @param instance: an instance of CEDA_Observation os CEDA_ObservationCollection |
---|
216 | """ |
---|
217 | if instance is None or not hasattr(instance, 'id'): |
---|
218 | return None |
---|
219 | if type(instance) == CEDA_ObservationCollection: |
---|
220 | return self._session.query(CedaGUID).filter(CedaGUID.ceda_observationcollection==instance.id).first() |
---|
221 | elif type(instance) == CEDA_Observation: |
---|
222 | return self._session.query(CedaGUID).filter(CedaGUID.ceda_observation==instance.id).first() |
---|
223 | |
---|
224 | def observationCollectionHasObservation(self, obs_coll_id, obs_id): |
---|
225 | """ |
---|
226 | Checks if a CEDA_Collection contains a given CEDA_Observation. |
---|
227 | @param obs_coll_id: the CEDA_ObservationColleciton id |
---|
228 | @param obs_id: the CEDA_Observation id |
---|
229 | @return: True if the collection contains the given observation, False otherwise |
---|
230 | """ |
---|
231 | coll = self._session.query(CEDA_ObservationCollection).filter(CEDA_ObservationCollection.id==obs_coll_id).first() |
---|
232 | obs = self._session.query(CEDA_Observation).filter(CEDA_Observation.id==obs_id).first() |
---|
233 | return obs in coll.member |
---|
234 | |
---|
235 | def observationAuthor(self, observation): |
---|
236 | """ |
---|
237 | Lists the CEDA_Observation author. |
---|
238 | @param observation: the CEDA_Observation inside which look for the author |
---|
239 | @return: True if the collection contains the given observation, False otherwise |
---|
240 | """ |
---|
241 | |
---|
242 | # TO FIX!!! |
---|
243 | for partyInfo in observation.relatedParty: |
---|
244 | if partyInfo.role == getCLValue(MM_RoleValue.cl_author): |
---|
245 | return partyInfo.party |
---|
246 | |
---|
247 | |
---|
248 | |
---|
249 | def extractObservationByTitleKeywords(self, keywords): |
---|
250 | """ |
---|
251 | Loooks for CEDA_Observation containing a specific title (observation.identifier.code) |
---|
252 | @param keywords: a space separated terms string |
---|
253 | @return: a tuple containing a CEDA_Observation satisfying the queryllection.idenfitier element having the title |
---|
254 | """ |
---|
255 | # search_vector is a ts_vector column. To search for terms, you use the |
---|
256 | # @@ operator. plainto_tsquery turns a string into a query that can be |
---|
257 | # used with @@. So this adds a where clause like "WHERE search_vector |
---|
258 | # @@ plaint_tsquery(<search string>)" |
---|
259 | q = self._session.query(CEDA_Observation). \ |
---|
260 | join(MO_Observation).join(MO_Observation.identifier). \ |
---|
261 | filter('md_identifier.code_search_vector @@ to_tsquery(:terms)') |
---|
262 | # This binds the :terms placeholder to the searchterms string. User input |
---|
263 | # should always be put into queries this way to prevent SQL injection. |
---|
264 | q = q.params(terms=keywords) |
---|
265 | return q.all() |
---|
266 | |
---|
267 | |
---|
268 | def extractCollectionIdentifierByTitle(self, i_title): |
---|
269 | """ |
---|
270 | Searches for an MD_Identifier from a CEDA_ObservationCollection contains a specific title (observation.identifier.code) |
---|
271 | @param i_title: the CEDA_ObservationCollection.identifier.title value to search for |
---|
272 | @return: a tuple containing a CEDA_ObservationCollection and the CEDA_ObservationCollection.idenfitier element having the title |
---|
273 | """ |
---|
274 | return self._session.query(CEDA_ObservationCollection, MD_Identifier). \ |
---|
275 | join(MO_ObservationCollection).join(MO_ObservationCollection.identifier). \ |
---|
276 | join(MD_Identifier.authority).filter(CI_Citation.title.like('%' + i_title + '%')) |
---|
277 | |
---|
278 | def extractObservationsForProject(self, project): |
---|
279 | """ |
---|
280 | Searches for the CEDA_Observation associated with a CEDA_Project |
---|
281 | @param project: a CEDA_Project instance |
---|
282 | @return: a tuple containing the associated CEDA_Observation |
---|
283 | """ |
---|
284 | return self._session.query(CEDA_Observation). \ |
---|
285 | join(CEDA_Observation, MO_Observation.inSupportOf).filter(CEDA_Project.id == project.id) |
---|
286 | |
---|
287 | def extractProjectObservationCollections(self, project): |
---|
288 | """ |
---|
289 | Searches for the Observation_Collections associated with a CEDA_Project |
---|
290 | @param project: a CEDA_Project instance |
---|
291 | @return: a tuple containing the associated CEDA_ObservationCollection |
---|
292 | """ |
---|
293 | mo_obs = self._session.query(MO_Observation).join(CEDA_Project).filter(CEDA_Project.id == project.id).subquery() |
---|
294 | obsers = self._session.query(CEDA_Observation).join(mo_obs, CEDA_Observation.id == mo_obs.c.id).one() |
---|
295 | #print "obsers: " + str(intSession.query(CEDA_Observation).join(mo_obs, CEDA_Observation.id == mo_obs.c.id).count()) |
---|
296 | |
---|
297 | cos = self._session.query(CEDA_ObservationCollection).all() |
---|
298 | co = self._session.query(MO_ObservationCollection).join(MO_ObservationCollection.member).filter(MO_ObservationCollection.member.contains(obsers)) |
---|
299 | |
---|
300 | observations = self._session.query(MO_ObservationCollection).join(CEDA_Observation). \ |
---|
301 | filter(obsers.any(CEDA_Observation.id==obsers.c.id)) |
---|
302 | print "observation:" + str(observations.count()) |
---|
303 | return observations |
---|
304 | |
---|
305 | def search(self, clazz, inst_id): |
---|
306 | ret = EPB.search(clazz, inst_id, self._session) |
---|
307 | return ret |
---|
308 | |
---|
309 | def searchSelectiveLoad(self, clazz, inst_id, attributes): |
---|
310 | """ |
---|
311 | Searches a required instance by id loading selectively \ |
---|
312 | the specified fields. The parameter "attributes" is a single string or a list of attributes |
---|
313 | owned by the instance of "clazz". Furthermore such list may contain |
---|
314 | also the children of the main attributes. For example "attrs" may look |
---|
315 | like |
---|
316 | ['resultAccumulation', 'identifier.authority', 'resultTime.position.dateTime8601.month', \ |
---|
317 | 'relatedParty.party', 'result.source.function', 'permission', \ |
---|
318 | 'geographicExtent', 'phenomenonTime', 'keywords', 'description', \ |
---|
319 | 'inSupportOf.abstract', 'dataLineage'] |
---|
320 | the first parameter refers to the main class so is equivalent to |
---|
321 | clazz.resultAccumulation |
---|
322 | the second parameter is equivalent to invoke |
---|
323 | clazz.identifier.authority |
---|
324 | As single string "attributes" could be as well just 'identifier.authority' |
---|
325 | @param clazz: the class type to search for |
---|
326 | @param inst_id: the instance id for which the search is done |
---|
327 | @param attributes: a single string or a list of attributes to load |
---|
328 | @param session: a session to use for the query. By default a new one is created automatically at start and closed at the end |
---|
329 | @return the required instance |
---|
330 | """ |
---|
331 | ret = EPB.searchSelectiveLoad(clazz, inst_id, attributes, self._session) |
---|
332 | return ret |
---|
333 | |
---|
334 | def loadAttributes(self, instance, attributes): |
---|
335 | """ |
---|
336 | Returns the attribute of an instance. The parameter "attributes" is a single string or a list of attributes |
---|
337 | owned by the instance of "clazz". Furthermore such list may contain |
---|
338 | also the children of the main attributes. For example "attrs" may look |
---|
339 | like |
---|
340 | ['resultAccumulation', 'identifier.authority', 'resultTime.position.dateTime8601.month', \ |
---|
341 | 'relatedParty.party', 'result.source.function', 'permission', \ |
---|
342 | 'geographicExtent', 'phenomenonTime', 'keywords', 'description', \ |
---|
343 | 'inSupportOf.abstract', 'dataLineage'] |
---|
344 | the first parameter refers to the main class so is equivalent to |
---|
345 | clazz.resultAccumulation |
---|
346 | the second parameter is equivalent to invoke |
---|
347 | clazz.identifier.authority |
---|
348 | As single string "attributes" could be as well just 'identifier.authority' |
---|
349 | @param instance: an instance containing the appropriate id |
---|
350 | @param attributes: the attribute value required |
---|
351 | @param session: the session to use for the operation |
---|
352 | @return: the given instance filled with the required attributes. |
---|
353 | """ |
---|
354 | instance = self._session.merge(instance) |
---|
355 | EPB.loadAttributes(instance, attributes, self._session) |
---|
356 | return instance |
---|
357 | |
---|
358 | def executeNative(self, sqlNative): |
---|
359 | return EPB.executeNative(sqlNative, self._session) |
---|
360 | |
---|
361 | |
---|
362 | def getGeograpicExtentGeometry(ge): |
---|
363 | ''' |
---|
364 | Creates the appropriate postgis geometry from a EX_GeographicExtent |
---|
365 | @param ge: an EX_GeographicExtent instance |
---|
366 | @return: a postgix text geometry |
---|
367 | ''' |
---|
368 | if isinstance(ge, EX_GeographicBoundingBox): |
---|
369 | return create_st_setSRID(ge.westBoundLongitude, ge.southBoundLatitude, \ |
---|
370 | ge.eastBoundLongitude, ge.northBoundLatitude) |
---|
371 | return None |
---|
372 | |
---|
373 | def getUnifyObservationCollectionPhenomenonTime(collection): |
---|
374 | """ |
---|
375 | Returns the time period of the collections.member'a phenomenonTime(s) |
---|
376 | @param collection: an CEDA_ObservationColleciton instance |
---|
377 | @return: a tuple (startDate, endDate) strings |
---|
378 | """ |
---|
379 | dateFormat = '%Y-%m-%d' |
---|
380 | ptStart = [] |
---|
381 | ptEnd = [] |
---|
382 | for member in collection.member: |
---|
383 | if member.phenomenonTime is None: |
---|
384 | continue |
---|
385 | |
---|
386 | pt = member.phenomenonTime |
---|
387 | ptString = fromPhenomenonTimeToString(pt) |
---|
388 | if ptString[0] is not None: |
---|
389 | ptStart.append(datetime.strptime(ptString[0], dateFormat)) |
---|
390 | if ptString[1] is not None: |
---|
391 | ptEnd.append(datetime.strptime(ptString[1], dateFormat)) |
---|
392 | ptStart.sort() |
---|
393 | ptEnd.sort() |
---|
394 | start = None |
---|
395 | end = None |
---|
396 | #takes the earlier date |
---|
397 | if len(ptStart) > 0: |
---|
398 | start = ptStart[0].strftime(dateFormat) |
---|
399 | #takes the latest date |
---|
400 | if len(ptEnd) > 0: |
---|
401 | end = ptEnd[len(ptEnd) - 1].strftime(dateFormat) |
---|
402 | return start, end |
---|