source: mauRepo/MolesManager/trunk/cedaMoles/libs/migration/processor/dataEntity.py @ 8537

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/mauRepo/MolesManager/trunk/cedaMoles/libs/migration/processor/dataEntity.py@8537
Revision 8537, 16.0 KB checked in by mnagni, 7 years ago (diff)

Incomplete - # 22528: Migration of FAtCat Open Search link for HPFeld
 http://team.ceda.ac.uk/trac/ceda/ticket/22528

Line 
1'''
2BSD Licence
3Copyright (c) 2012, Science & Technology Facilities Council (STFC)
4All rights reserved.
5
6Redistribution and use in source and binary forms, with or without modification,
7are permitted provided that the following conditions are met:
8
9    * Redistributions of source code must retain the above copyright notice,
10        this list of conditions and the following disclaimer.
11    * Redistributions in binary form must reproduce the above copyright notice,
12        this list of conditions and the following disclaimer in the documentation
13        and/or other materials provided with the distribution.
14    * Neither the name of the Science & Technology Facilities Council (STFC)
15        nor the names of its contributors may be used to endorse or promote
16        products derived from this software without specific prior written permission.
17
18THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
20THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
22BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
23OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
29Created on 15 Nov 2011
30
31@author: Maurizio Nagni
32'''
33from cedaMoles.libs.migration.exception.exceptions import MigrationObjectException, NoAssociatedAuthor,\
34    migrationObjectDescription, NoAssociatedDeployments, \
35    NoGeographicalExtensionException, NoCitationException
36from cedaMoles.libs.migration.processor.commons import find_deployments_in_de, \
37    create_md_identifier, extract_content,\
38    has_mo_same_hash, create_ci_citation, create_ci_date, find_published_date, \
39    isoDateTimeStringToTimeDate, find_updated_date, create_date, \
40    calculate_hash, find_doi_in_migration_document, \
41    has_mo_been_processed, get_atom_document_hash_by_mo, extract_title, extract_summary, \
42    create_ex_geographic_boundingbox, from_date_string_to_pt, \
43    compare_phenomenon_times, compareGeographicBoundingBoxes
44from cedaMoles.libs.migration.processor.deployment import DeploymentProcessor
45from logging import StreamHandler
46import logging
47from ea_model.iso_19115_2006_metadata_corrigendum.\
48citation_and_responsible_party_information.ci_datetypecode import CI_DateTypeCode
49from cedaMoles.MolesManager.ceda_guid import CedaGUID
50from cedaMoles.MolesManager.codelist import MM_ObservationPublicationStateValue, \
51    getCLValue
52from ea_model.ceda_metadatamodel.ceda_observationcollection.ceda_observationcollection \
53    import CEDA_ObservationCollection
54from cedaMoles.MolesManager.moles3epb import unify_observation_collection_phenomenon_time
55CEDA_TITLE = 'ceda_title'
56
57class DataEntityProcessor(object):
58    log = logging.getLogger('DataEntityProcessor')
59    log.addHandler(StreamHandler())
60    log.setLevel(logging.INFO) 
61    '''
62        Processes a DataEntityMigration item. Note that each DataEntity is associated
63        to a "dataent_xxxx" file in Moles2
64    '''       
65    def __init__(self, dataEntityMigration, epbRepo):
66        '''
67            Initializes the class
68            @param _dataEntityMigration: the DataEntityMigration instance
69            @param epbRepo: an instance of EPBRepo
70        '''
71        self._dataEntityMigration = dataEntityMigration       
72        self._dataEntityHasSameHash = \
73            has_mo_same_hash(self._dataEntityMigration)   
74        self._dataEntityHasBeenProcessed = \
75            has_mo_been_processed(self._dataEntityMigration)   
76        self.epbRepo = epbRepo
77        self._report = []
78
79    def _assignGeographicExtent(self, ceda_observationCollection):
80        bbox = self.epbRepo.moles3EPB.getUnifyObservationCollectionGEAsBBox(ceda_observationCollection)
81        if bbox is not None:
82            upperCornerData, lowerCornerData = bbox[4:len(bbox)-1].split(',')
83            east, north = upperCornerData.split()
84            west, south = lowerCornerData.split()
85            geographicExtent = create_ex_geographic_boundingbox(float(east), \
86                                                                float(north), \
87                                                                float(west), \
88                                                                float(south))
89           
90            if len(ceda_observationCollection.geographicExtent) == 0 or \
91                (len(ceda_observationCollection.geographicExtent) > 0 and \
92                    not compareGeographicBoundingBoxes(geographicExtent, \
93                                                  ceda_observationCollection.geographicExtent[0])):
94                self.epbRepo.moles3EPB.updateCedaObject(ceda_observationCollection, \
95                                                {'geographicExtent': geographicExtent})                       
96        else:
97            self._report.append(NoGeographicalExtensionException(self._dataEntityMigration))         
98       
99    def _assignPhenomenonTime(self, ceda_observationCollection):
100        start, end = unify_observation_collection_phenomenon_time(ceda_observationCollection)
101        dateString = start
102        if end is not None:
103            dateString = '%s/%s' % (start, end)
104        pt = from_date_string_to_pt(dateString)
105       
106        if pt is not None \
107            and ceda_observationCollection.phenomenonTime is not None \
108            and (len(ceda_observationCollection.phenomenonTime) == 0 \
109                 or (len(ceda_observationCollection.phenomenonTime) == 1 \
110                     and not compare_phenomenon_times(ceda_observationCollection.phenomenonTime[0], pt))):
111            self.epbRepo.moles3EPB.updateCedaObject(ceda_observationCollection, {'phenomenonTime': pt})       
112
113    def _assignDescription(self, ceda_observationCollection):
114        description = extract_summary(self._dataEntityMigration)
115           
116        if description:
117            ceda_observationCollection.description = description
118   
119    def _processTitle(self, ceda_observationCollection):
120        ititle = extract_title(self._dataEntityMigration)
121        if ceda_observationCollection.identifier:
122            for ident in ceda_observationCollection.identifier:
123                if ident.authority.title == CEDA_TITLE:
124                    if ident.code != ititle:
125                        ident.code = ititle
126                    else:
127                        return           
128 
129        #Else create new
130        i_citation = create_ci_citation(title = CEDA_TITLE)
131        newIdentifier = create_md_identifier(code = ititle, authority=i_citation)
132       
133        if self._dataEntityHasBeenProcessed:
134            self._report.append('The _assignGeographicExtent update is \
135                                skipped because not implemented')
136            DataEntityProcessor.log.warn('The _assignGeographicExtent \
137                                update is skipped because not implemented')
138            return
139           
140        self.epbRepo.moles3EPB.updateCedaObject(ceda_observationCollection, \
141                                                {'identifier': newIdentifier})
142   
143    def _processCitation(self, ceda_observationCollection):
144        contentDict = extract_content(self._dataEntityMigration)
145        if not contentDict.has_key('citation'):
146            self._report.append((NoCitationException(self._dataEntityMigration)))
147            DataEntityProcessor.log.info("The migration object "\
148                                + migrationObjectDescription(self._dataEntityMigration) \
149                                + " has not associated cedacat:citation")
150            return
151
152        ci_dates = []
153        doc_date = find_published_date(self._dataEntityMigration)           
154        if doc_date:
155            i_date = create_date(isoDateTimeStringToTimeDate(doc_date))           
156            ci_dates.append(create_ci_date(getCLValue(CI_DateTypeCode.cl_publication), \
157                                           date = i_date))
158
159        doc_date = find_updated_date(self._dataEntityMigration)           
160        if doc_date:
161            i_date = create_date(isoDateTimeStringToTimeDate(doc_date))               
162            ci_dates.append(create_ci_date(getCLValue(CI_DateTypeCode.cl_revision), \
163                                           date = i_date))
164           
165        i_citation = create_ci_citation(title = 'ceda_moles2_citation', date=ci_dates)
166        newIdentifier = create_md_identifier(code = contentDict['citation'], \
167                                             authority=i_citation)
168
169        if self._dataEntityHasBeenProcessed:
170            DataEntityProcessor.log.warn('The _assignGeographicExtent update is skipped \
171                because not implemented')
172            return
173       
174        ceda_observationCollection = self.epbRepo.moles3EPB.updateCedaObject(ceda_observationCollection, {'identifier': newIdentifier})
175       
176
177    def _execute(self, ceda_observationCollection): 
178        """
179            Creates a new CEDA_ObservationCollection instance in the Moles3DB
180            using the self._dataEntityMigration object.
181            If successful adds the new instance ID to the related
182            DataEntityMigration object
183            @return: the persisted CEDA_ObservationCollection element
184        """
185       
186        if not self._dataEntityHasSameHash:
187            self._processTitle(ceda_observationCollection)
188            self._assignDescription(ceda_observationCollection)
189            self._processCitation(ceda_observationCollection)
190       
191        #Is a first time process?
192        if not has_mo_been_processed(self._dataEntityMigration):
193            docHash = get_atom_document_hash_by_mo(self._dataEntityMigration)
194            ceda_observationCollection.publicationState \
195                = getCLValue(MM_ObservationPublicationStateValue.cl_working)
196            self.epbRepo.moles3EPB.persistInstance(ceda_observationCollection)
197            self.epbRepo.migrationEPB.updateMigrationObject(self._dataEntityMigration, \
198                {'ceda_observation_coll_id': ceda_observationCollection.id, \
199                 'doc_hash': docHash})
200
201        #Has to updated the hash?
202        if not self._dataEntityHasSameHash and has_mo_been_processed(self._dataEntityMigration):
203            docHash = get_atom_document_hash_by_mo(self._dataEntityMigration)
204            self.epbRepo.migrationEPB.updateMigrationObject(self._dataEntityMigration, \
205                {'doc_hash': docHash})
206       
207        #Has a proper CEDAGUID?
208        if self.epbRepo.moles3EPB.retrieveGUIDFromInstance(ceda_observationCollection) \
209                is None:
210            #Adds the CedaGUID
211            ceda_guid = CedaGUID()
212            ceda_guid.id = calculate_hash(self._dataEntityMigration.data_ent_id)
213            setattr(ceda_guid, \
214                    'ceda_observationcollection', \
215                    ceda_observationCollection.id)
216            self.epbRepo.moles3EPB.persistInstance(ceda_guid)
217            DataEntityProcessor.log.info("GUID for this ObservationCollection: %s" \
218                                         % (ceda_guid.id))
219
220    def _processDOI(self, deploymentMigration, ceda_observation, \
221                    deProcessor, single_deployment):       
222        doi = find_doi_in_migration_document(deploymentMigration)                                                             
223        if single_deployment:
224            if doi is None:
225                doi = find_doi_in_migration_document(self._dataEntityMigration)
226                    #collection_identifier = Moles3EPB.extractCollectionIdentifierByTitle(MD_CODE_MOLES2_CITATION, self.migrationSessions.molesSession)
227                    #if collection_identifier.count()==1:
228                    #    ceda_observation.identifier.append(collection_identifier.first())               
229        deProcessor.assignDOI(ceda_observation, doi)
230
231    def _assignObservationCollectionToProject(self, obsColl, ceda_observation):
232        if ceda_observation.inSupportOf is None:
233            return
234       
235        for proj_coll in ceda_observation.inSupportOf.observationCollection:
236            if proj_coll.id == obsColl.id:
237                return
238       
239        ceda_observation.inSupportOf.observationCollection.append(obsColl)
240
241    def _processDeploymentMigration(self, deploymentMigration, single_deployment):                                     
242        deProcessor = DeploymentProcessor(self._dataEntityMigration, \
243                                          deploymentMigration, self.epbRepo)
244        try:
245            DataEntityProcessor.log.info("Processing deployment: %s" \
246                                    % (migrationObjectDescription(deploymentMigration)))
247            obs_ex_report, ceda_observation = deProcessor.process()
248            self._report.extend(obs_ex_report)
249            try:                           
250                self._processDOI(deploymentMigration, ceda_observation, \
251                                 deProcessor, single_deployment)
252            except Exception as ex:
253                pass                               
254        except NoAssociatedAuthor as ex:
255            raise ex                 
256        except Exception as ex:               
257            raise MigrationObjectException(ex)                     
258       
259        return ceda_observation
260   
261    def process(self):
262        obs_coll = None
263        DataEntityProcessor.log.info("Processing dataEntity: %s" \
264                                % (migrationObjectDescription(self._dataEntityMigration)))
265        try :
266            if self._dataEntityMigration.ceda_observation_coll_id:
267                #obs_coll = self.epbRepo.moles3EPB.search(CEDA_ObservationCollection, self._dataEntityMigration.ceda_observation_coll_id)
268                obs_coll = self.epbRepo.moles3EPB.searchSelectiveLoad(CEDA_ObservationCollection, self._dataEntityMigration.ceda_observation_coll_id, \
269                                                           ['identifier'])
270            else:
271                obs_coll = CEDA_ObservationCollection()
272            self._execute(obs_coll)                 
273        except Exception as ex:
274            self._report.append(ex)
275            return self._report     
276       
277        #retrieves the associated deployment links from the data_entity
278        deployments_links = find_deployments_in_de(self._dataEntityMigration)       
279        #retrieves the DataEntityMigration sorted by creation date
280        deployment_migrations = self.epbRepo.migrationEPB.\
281            getAllDeploymentsMigrationByDataEntitySortedByDate(self._dataEntityMigration, \
282                                                                deployments_links)
283   
284        if deployment_migrations is None or len(deployment_migrations) == 0:
285            self._report.append(NoAssociatedDeployments(self._dataEntityMigration))
286            return self._report
287       
288        howManydm = len(deployment_migrations)           
289        for deployment_migration in deployment_migrations:
290            try:
291                ceda_observation \
292                    = self._processDeploymentMigration(deployment_migration, \
293                                                       howManydm == 1)               
294                #Is a first time process?
295                if not self.epbRepo.moles3EPB.\
296                        observationCollectionHasObservation(getattr(obs_coll, 'id'), \
297                                                        getattr(ceda_observation, 'id')):
298                    self.epbRepo.moles3EPB.updateCedaObject(obs_coll, \
299                                                            {'member': ceda_observation})
300               
301                self._assignObservationCollectionToProject(obs_coll, ceda_observation)                   
302            except Exception as ex:
303                self._report.append(ex)               
304                       
305        self._assignGeographicExtent(obs_coll)
306        self._assignPhenomenonTime(obs_coll)       
307         
308        return self._report
Note: See TracBrowser for help on using the repository browser.