source: mauRepo/MolesManager/trunk/cedaMoles/libs/migration/processor/dataEntity.py @ 8554

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/mauRepo/MolesManager/trunk/cedaMoles/libs/migration/processor/dataEntity.py@8554
Revision 8554, 15.9 KB checked in by mnagni, 7 years ago (diff)

Incomplete - # 22551: List resources / display single file
 http://team.ceda.ac.uk/trac/ceda/ticket/22551

Line 
1'''
2BSD Licence
3Copyright (c) 2012, Science & Technology Facilities Council (STFC)
4All rights reserved.
5
6Redistribution and use in source and binary forms, with or without modification,
7are permitted provided that the following conditions are met:
8
9    * Redistributions of source code must retain the above copyright notice,
10        this list of conditions and the following disclaimer.
11    * Redistributions in binary form must reproduce the above copyright notice,
12        this list of conditions and the following disclaimer in the documentation
13        and/or other materials provided with the distribution.
14    * Neither the name of the Science & Technology Facilities Council (STFC)
15        nor the names of its contributors may be used to endorse or promote
16        products derived from this software without specific prior written permission.
17
18THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
20THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
22BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
23OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
29Created on 15 Nov 2011
30
31@author: Maurizio Nagni
32'''
33from cedaMoles.libs.migration.exception.exceptions import MigrationObjectException, NoAssociatedAuthor,\
34    migrationObjectDescription, NoAssociatedDeployments, \
35    NoGeographicalExtensionException, NoCitationException
36from cedaMoles.libs.migration.processor.commons import find_deployments_in_de, \
37    create_md_identifier, extract_content,\
38    has_mo_same_hash, create_ci_citation, create_ci_date, find_published_date, \
39    isoDateTimeStringToTimeDate, find_updated_date, create_date, \
40    calculate_hash, find_doi_in_migration_document, \
41    has_mo_been_processed, get_atom_document_hash_by_mo, extract_title, extract_summary, \
42    create_ex_geographic_boundingbox, from_date_string_to_pt, \
43    compare_phenomenon_times, compareGeographicBoundingBoxes,\
44    unify_observation_collection_phenomenon_time
45from cedaMoles.libs.migration.processor.deployment import DeploymentProcessor
46from logging import StreamHandler
47import logging
48from ea_model.iso_19115_2006_metadata_corrigendum.\
49citation_and_responsible_party_information.ci_datetypecode import CI_DateTypeCode
50from cedaMoles.MolesManager.ceda_guid import CedaGUID
51from cedaMoles.MolesManager.codelist import MM_ObservationPublicationStateValue, \
52    getCLValue
53from ea_model.ceda_metadatamodel.ceda_observationcollection.ceda_observationcollection \
54    import CEDA_ObservationCollection
55CEDA_TITLE = 'ceda_title'
56
57class DataEntityProcessor(object):
58    log = logging.getLogger('DataEntityProcessor')
59    log.addHandler(StreamHandler())
60    log.setLevel(logging.INFO) 
61    '''
62        Processes a DataEntityMigration item. Note that each DataEntity is associated
63        to a "dataent_xxxx" file in Moles2
64    '''       
65    def __init__(self, dataEntityMigration, epbRepo):
66        '''
67            Initializes the class
68            @param _dataEntityMigration: the DataEntityMigration instance
69            @param epbRepo: an instance of EPBRepo
70        '''
71        self._dataEntityMigration = dataEntityMigration       
72        self._dataEntityHasSameHash = \
73            has_mo_same_hash(self._dataEntityMigration)   
74        self._dataEntityHasBeenProcessed = \
75            has_mo_been_processed(self._dataEntityMigration)   
76        self.epbRepo = epbRepo
77        self._report = []
78
79    def _assignGeographicExtent(self, ceda_observationCollection):
80        bbox = self.epbRepo.moles3EPB.\
81                    getUnifyObservationCollectionGEAsBBox(ceda_observationCollection)
82       
83        if bbox is None:
84            self._report.\
85                    append(NoGeographicalExtensionException(self._dataEntityMigration))
86            return           
87
88        upperCornerData, lowerCornerData = bbox[4:len(bbox)-1].split(',')
89        east, north = upperCornerData.split()
90        west, south = lowerCornerData.split()
91        geographicExtent = create_ex_geographic_boundingbox(float(east), 
92                                                            float(north), 
93                                                            float(west), 
94                                                            float(south))
95       
96        if len(ceda_observationCollection.geographicExtent) == 0 or \
97            (len(ceda_observationCollection.geographicExtent) > 0 and \
98                not compareGeographicBoundingBoxes(geographicExtent, \
99                                        ceda_observationCollection.geographicExtent[0])):
100            self.epbRepo.moles3EPB.updateCedaObject(ceda_observationCollection, 
101                                            {'geographicExtent': geographicExtent})                       
102                     
103       
104    def _assignPhenomenonTime(self, ceda_observationCollection):
105        start, end = unify_observation_collection_phenomenon_time(ceda_observationCollection)
106        pt = from_date_string_to_pt(start, end)
107       
108        if pt is not None \
109            and ceda_observationCollection.phenomenonTime is not None \
110            and (len(ceda_observationCollection.phenomenonTime) == 0 \
111                 or (len(ceda_observationCollection.phenomenonTime) == 1 \
112                     and not compare_phenomenon_times(ceda_observationCollection.phenomenonTime[0], pt))):
113            self.epbRepo.moles3EPB.updateCedaObject(ceda_observationCollection, {'phenomenonTime': pt})       
114
115    def _assignDescription(self, ceda_observationCollection):
116        description = extract_summary(self._dataEntityMigration)
117           
118        if description:
119            ceda_observationCollection.description = description
120   
121    def _processTitle(self, ceda_observationCollection):
122        ititle = extract_title(self._dataEntityMigration)
123        if ceda_observationCollection.identifier:
124            for ident in ceda_observationCollection.identifier:
125                if ident.authority.title == CEDA_TITLE:
126                    if ident.code != ititle:
127                        ident.code = ititle
128                    else:
129                        return           
130 
131        #Else create new
132        i_citation = create_ci_citation(title = CEDA_TITLE)
133        newIdentifier = create_md_identifier(code = ititle, authority=i_citation)
134       
135        if self._dataEntityHasBeenProcessed:
136            self._report.append('The _assignGeographicExtent update is \
137                                skipped because not implemented')
138            DataEntityProcessor.log.warn('The _assignGeographicExtent \
139                                update is skipped because not implemented')
140            return
141           
142        self.epbRepo.moles3EPB.updateCedaObject(ceda_observationCollection, \
143                                                {'identifier': newIdentifier})
144   
145    def _processCitation(self, ceda_observationCollection):
146        contentDict = extract_content(self._dataEntityMigration)
147        if not contentDict.has_key('citation'):
148            self._report.append((NoCitationException(self._dataEntityMigration)))
149            DataEntityProcessor.log.info("The migration object "\
150                                + migrationObjectDescription(self._dataEntityMigration) \
151                                + " has not associated cedacat:citation")
152            return
153
154        ci_dates = []
155        doc_date = find_published_date(self._dataEntityMigration)           
156        if doc_date:
157            i_date = create_date(isoDateTimeStringToTimeDate(doc_date))           
158            ci_dates.append(create_ci_date(getCLValue(CI_DateTypeCode.cl_publication), \
159                                           date = i_date))
160
161        doc_date = find_updated_date(self._dataEntityMigration)           
162        if doc_date:
163            i_date = create_date(isoDateTimeStringToTimeDate(doc_date))               
164            ci_dates.append(create_ci_date(getCLValue(CI_DateTypeCode.cl_revision), \
165                                           date = i_date))
166           
167        i_citation = create_ci_citation(title = 'ceda_moles2_citation', date=ci_dates)
168        newIdentifier = create_md_identifier(code = contentDict['citation'], \
169                                             authority=i_citation)
170
171        if self._dataEntityHasBeenProcessed:
172            DataEntityProcessor.log.warn('The _assignGeographicExtent update is skipped \
173                because not implemented')
174            return
175       
176        ceda_observationCollection = self.epbRepo.moles3EPB.updateCedaObject(ceda_observationCollection, {'identifier': newIdentifier})
177       
178
179    def _execute(self, ceda_observationCollection): 
180        """
181            Creates a new CEDA_ObservationCollection instance in the Moles3DB
182            using the self._dataEntityMigration object.
183            If successful adds the new instance ID to the related
184            DataEntityMigration object
185            @return: the persisted CEDA_ObservationCollection element
186        """
187       
188        if not self._dataEntityHasSameHash:
189            self._processTitle(ceda_observationCollection)
190            self._assignDescription(ceda_observationCollection)
191            self._processCitation(ceda_observationCollection)
192       
193        #Is a first time process?
194        if not has_mo_been_processed(self._dataEntityMigration):
195            docHash = get_atom_document_hash_by_mo(self._dataEntityMigration)
196            ceda_observationCollection.publicationState \
197                = getCLValue(MM_ObservationPublicationStateValue.cl_working)
198            self.epbRepo.moles3EPB.persistInstance(ceda_observationCollection)
199            self.epbRepo.migrationEPB.updateMigrationObject(self._dataEntityMigration, \
200                {'ceda_observation_coll_id': ceda_observationCollection.id, \
201                 'doc_hash': docHash})
202
203        #Has to updated the hash?
204        if not self._dataEntityHasSameHash and has_mo_been_processed(self._dataEntityMigration):
205            docHash = get_atom_document_hash_by_mo(self._dataEntityMigration)
206            self.epbRepo.migrationEPB.updateMigrationObject(self._dataEntityMigration, \
207                {'doc_hash': docHash})
208       
209        #Has a proper CEDAGUID?
210        if self.epbRepo.moles3EPB.retrieveGUIDFromInstance(ceda_observationCollection) \
211                is None:
212            #Adds the CedaGUID
213            ceda_guid = CedaGUID()
214            ceda_guid.id = calculate_hash(self._dataEntityMigration.data_ent_id)
215            setattr(ceda_guid, \
216                    'ceda_observationcollection', \
217                    ceda_observationCollection.id)
218            self.epbRepo.moles3EPB.persistInstance(ceda_guid)
219            DataEntityProcessor.log.info("GUID for this ObservationCollection: %s" \
220                                         % (ceda_guid.id))
221
222    def _processDOI(self, deploymentMigration, ceda_observation, \
223                    deProcessor, single_deployment):       
224        doi = find_doi_in_migration_document(deploymentMigration)                                                             
225        if single_deployment:
226            if doi is None:
227                doi = find_doi_in_migration_document(self._dataEntityMigration)
228                    #collection_identifier = Moles3EPB.extractCollectionIdentifierByTitle(MD_CODE_MOLES2_CITATION, self.migrationSessions.molesSession)
229                    #if collection_identifier.count()==1:
230                    #    ceda_observation.identifier.append(collection_identifier.first())               
231        deProcessor.assignDOI(ceda_observation, doi)
232
233    def _assignObservationCollectionToProject(self, obsColl, ceda_observation):
234        if ceda_observation.inSupportOf is None:
235            return
236       
237        for proj_coll in ceda_observation.inSupportOf.observationCollection:
238            if proj_coll.id == obsColl.id:
239                return
240       
241        ceda_observation.inSupportOf.observationCollection.append(obsColl)
242
243    def _processDeploymentMigration(self, deploymentMigration, single_deployment):                                     
244        deProcessor = DeploymentProcessor(self._dataEntityMigration, \
245                                          deploymentMigration, self.epbRepo)
246        try:
247            DataEntityProcessor.log.info("Processing deployment: %s" \
248                                    % (migrationObjectDescription(deploymentMigration)))
249            obs_ex_report, ceda_observation = deProcessor.process()
250            self._report.extend(obs_ex_report)
251            try:                           
252                self._processDOI(deploymentMigration, ceda_observation, \
253                                 deProcessor, single_deployment)
254            except Exception as ex:
255                pass                               
256        except NoAssociatedAuthor as ex:
257            raise ex                 
258        except Exception as ex:               
259            raise MigrationObjectException(ex)                     
260       
261        return ceda_observation
262   
263    def process(self):
264        obs_coll = None
265        DataEntityProcessor.log.info("Processing dataEntity: %s" \
266                                % (migrationObjectDescription(self._dataEntityMigration)))
267        try :
268            if self._dataEntityMigration.ceda_observation_coll_id:
269                #obs_coll = self.epbRepo.moles3EPB.search(CEDA_ObservationCollection, self._dataEntityMigration.ceda_observation_coll_id)
270                obs_coll = self.epbRepo.moles3EPB.searchSelectiveLoad(CEDA_ObservationCollection, self._dataEntityMigration.ceda_observation_coll_id, \
271                                                           ['identifier'])
272            else:
273                obs_coll = CEDA_ObservationCollection()
274            self._execute(obs_coll)                 
275        except Exception as ex:
276            self._report.append(ex)
277            return self._report     
278       
279        #retrieves the associated deployment links from the data_entity
280        deployments_links = find_deployments_in_de(self._dataEntityMigration)       
281        #retrieves the DataEntityMigration sorted by creation date
282        deployment_migrations = self.epbRepo.migrationEPB.\
283            getAllDeploymentsMigrationByDataEntitySortedByDate(self._dataEntityMigration, \
284                                                                deployments_links)
285   
286        if deployment_migrations is None or len(deployment_migrations) == 0:
287            self._report.append(NoAssociatedDeployments(self._dataEntityMigration))
288            return self._report
289       
290        howManydm = len(deployment_migrations)           
291        for deployment_migration in deployment_migrations:
292            try:
293                ceda_observation \
294                    = self._processDeploymentMigration(deployment_migration, \
295                                                       howManydm == 1)               
296                #Is a first time process?
297                if not self.epbRepo.moles3EPB.\
298                        observationCollectionHasObservation(getattr(obs_coll, 'id'), \
299                                                        getattr(ceda_observation, 'id')):
300                    self.epbRepo.moles3EPB.updateCedaObject(obs_coll, \
301                                                            {'member': ceda_observation})
302               
303                self._assignObservationCollectionToProject(obs_coll, ceda_observation)                   
304            except Exception as ex:
305                self._report.append(ex)
306                       
307        self._assignGeographicExtent(obs_coll)
308        self._assignPhenomenonTime(obs_coll)       
309         
310        return self._report
Note: See TracBrowser for help on using the repository browser.