source: mauRepo/MolesManager/trunk/src/libs/migration/processor/deployment.py @ 8201

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/mauRepo/MolesManager/trunk/src/libs/migration/processor/deployment.py@8201
Revision 8201, 23.8 KB checked in by mnagni, 8 years ago (diff)

Complete - # 22390: AMEND MOLES2 -> CEDA Moles Author and Co-author migration (plus PI assignment) to CEDA Observation record
 http://team.ceda.ac.uk/trac/ceda/ticket/22390
Complete - # 22397: CEDA Observation geographicExtent
 http://team.ceda.ac.uk/trac/ceda/ticket/22397

Line 
1'''
2Created on 15 Nov 2011
3
4@author: mnagni
5'''
6from libs.migration.processor.commons import findMolesLineage,\
7    createMO_ResponsiblePartyInfo,\
8    DO_BADC, DO_NEODC, findAuthorsInResource, CEDA,\
9    createMD_Identifier, extractSummary, extractQuality, \
10    hasAtomDocumentSameHash, getAtomDocumentHashByMO, extractTitle,\
11        createCEDA_Result,\
12    createEX_GeographicBoundingBox, extractGeographicExtentInMigrationDocument, findDownloadLinksInMigrationDocument,\
13    findDOIInMigrationDocument, extractContent, createCI_Citation, createCI_Date, createDate,\
14    createTM_Position, createTM_Instant, extractMolesCreationDate,\
15    createDateTime, isoDateTimeStringToTimeDate, extractMolesProviderID,\
16    DO_UKSSDC, createMO_ResponsiblePartyInfo_, createMO_Organization,\
17    createCI_Contact, createCI_Address, createCI_OnlineResource,\
18    createCI_Telephone, extractMolesTemporalRange, isoDateStringToTimeDate,\
19    createTM_Period, createCI_Individual, findAccessLinksInMigrationDocument,\
20    findLinksInDeployment, createMD_LegalConstraints
21from ea_model.moles3_4.utilities.mo_rolevalue import MO_RoleValue
22from ea_model.ceda_metadatamodel.ceda_observation.ceda_observation import CEDA_Observation
23from libs.migration.MigrationEPB import MigrationEPB
24from libs.epb import EPB
25from libs.migration.processor.deployment_data import DeploymentDataProcessor
26from ea_model.moles3_4.utilities.mo_publicationstatevalue import MO_PublicationStateValue
27from libs.migration.exception.exceptions import NoDataLineage,\
28    NoAssociatedAuthor
29from MolesManager.moles3epb import Moles3EPB
30from libs.migration.authors import authors
31from logging import StreamHandler
32import logging
33from datetime import date
34from ea_model.iso_19115_2006_metadata_corrigendum.citation_and_responsible_party_information.ci_onlinefunctioncode import CI_OnLineFunctionCode
35from libs.migration.processor.check.check import moles3Append
36from ea_model.iso_19115_2006_metadata_corrigendum.citation_and_responsible_party_information.ci_datetypecode import CI_DateTypeCode
37from ea_model.iso_19115_2006_metadata_corrigendum.constraint_information.md_restrictioncode import MD_RestrictionCode
38from copy import deepcopy
39
40class DeploymentProcessor(object):
41    '''
42        Migrates a deployment element in a CEDA_Observation entity
43    '''
44    publisherName = 'CEDA'
45   
46    log = logging.getLogger('DeploymentProcessor')
47    log.addHandler(StreamHandler())
48    log.setLevel(logging.DEBUG)   
49    def __init__(self, dataEntityMigration, deploymentMigration, migrationSessions):
50        '''
51            Initializes the class
52            @param dataEntityMigration: a DataEntityMigration instance
53            @param deploymentMigration: the DeploymentMigration instance
54            @param migrationSessions: a MigrationSessions instance   
55        '''             
56        self._dataEntityMigration = dataEntityMigration
57        self._deploymentMigration = deploymentMigration
58        self._dataEntityHasSameHash = hasAtomDocumentSameHash(self._dataEntityMigration) and self._dataEntityMigration.doc_hash is not None
59        self._deploymentHasSameHash = hasAtomDocumentSameHash(self._deploymentMigration) and self._deploymentMigration.doc_hash is not None
60       
61        self._migrationSessions = migrationSessions
62
63    def _existsCEDAasPublisher(self):
64        for rp in self._ceda_observation.relatedParty:
65            if rp.role == MO_RoleValue.cl_publisher:
66                for party in rp.parties:
67                    if party.name == DeploymentProcessor.publisherName:
68                        return True
69        return False
70                   
71    def _extractResponsiblePartyInfo(self, authors = None, parsedParties = []):
72        '''
73            @param authors: the deployment's authors
74            @param parsedParties: a list to fill with the created MO_ResponsiblePartyInfos
75        '''
76        #First is assumend to be the author
77        parsedParties.append(createMO_ResponsiblePartyInfo(MO_RoleValue.cl_author, authors[:1]))   
78        parsedParties.append(createMO_ResponsiblePartyInfo(MO_RoleValue.cl_coinvestigator, authors[1:]))           
79
80    def _addResponsiblePartyInfo(self, oldResponsiblePartyInfos, newResponsiblePartyInfo):
81        opi = None
82        for oldPartyInfo in oldResponsiblePartyInfos:
83            if oldPartyInfo.role == newResponsiblePartyInfo.role:
84                opi = oldPartyInfo
85                break
86           
87        if len(oldResponsiblePartyInfos) == 0 or opi is None:
88            oldResponsiblePartyInfos.append(newResponsiblePartyInfo)
89       
90        if opi is not None: 
91            for np in newResponsiblePartyInfo.party:
92                opi.party.append(np)
93
94    def _appendResponsibleParty(self, observation, names, creation_function, role):
95        for nm in names:
96            i_party = creation_function(name = nm)
97            observation.relatedParty.append(createMO_ResponsiblePartyInfo_(role, [i_party]))       
98
99    def _assignName(self, observation):       
100        '''
101            @param relatedPartyInfos: a MO_ResponsiblePartyInfo list
102            @return True if the documents changed, False otherwise
103        '''
104        whereAreAuthors = self._deploymentMigration       
105        doc_authors = findAuthorsInResource(self._deploymentMigration)       
106        if doc_authors['authors'] in [DO_BADC, DO_NEODC]:
107            doc_authors = findAuthorsInResource(self._dataEntityMigration)
108            whereAreAuthors = self._dataEntityMigration
109
110        i_party = createMO_Organization(name = CEDA)
111        observation.relatedParty.append(createMO_ResponsiblePartyInfo_(MO_RoleValue.cl_curator, [i_party]))                               
112       
113        if authors.has_key(doc_authors['authors']):
114            tmp_auth = authors[doc_authors['authors']]
115            ret = self._extractIndividualsAndOrganizations(tmp_auth, whereAreAuthors)
116            if len(ret['ind']) > 0:
117                self._appendResponsibleParty(observation, ret['ind'], createCI_Individual, MO_RoleValue.cl_author)               
118            if len(ret['org']) > 0:
119                self._appendResponsibleParty(observation, ret['org'], createMO_Organization, MO_RoleValue.cl_author)                               
120            if len(ret['co_ind']) > 0:
121                self._appendResponsibleParty(observation, ret['co_ind'], createCI_Individual, MO_RoleValue.cl_coinvestigator)               
122            if len(ret['co_org']) > 0:
123                self._appendResponsibleParty(observation, ret['co_org'], createMO_Organization, MO_RoleValue.cl_coinvestigator)                                                                                         
124        else:
125            raise NoAssociatedAuthor(doc_authors['authors'], migrationObject = whereAreAuthors)
126       
127        if doc_authors['contributors'] is None:
128            return
129       
130        if authors.has_key(doc_authors['contributors']):
131            tmp_auth = authors[doc_authors['contributors']]           
132            ret = self._extractIndividualsAndOrganizations(tmp_auth, whereAreAuthors)
133            if len(ret['ind']) > 0:
134                self._appendResponsibleParty(observation, ret['ind'], createCI_Individual, MO_RoleValue.cl_coinvestigator)
135            if len(ret['org']) > 0:
136                self._appendResponsibleParty(observation, ret['org'], createMO_Organization, MO_RoleValue.cl_coinvestigator)               
137            if len(ret['co_ind']) > 0:
138                self._appendResponsibleParty(observation, ret['co_ind'], createCI_Individual, MO_RoleValue.cl_coinvestigator)               
139            if len(ret['co_org']) > 0:
140                self._appendResponsibleParty(observation, ret['co_org'], createMO_Organization, MO_RoleValue.cl_coinvestigator)                                                       
141        else:
142            raise NoAssociatedAuthor(doc_authors['contributors'], migrationObject = whereAreAuthors)
143
144
145    def _extractIndividualsAndOrganizations(self, tmp_auth, whereAreAuthors):
146        if whereAreAuthors is None:
147            raise Exception("migrationObject is None")
148        try:
149            ret = {'ind': [], 'org': [], 'co_ind': [], 'co_org': []}
150            if tmp_auth['type'] == 'ind':
151                ret['ind'].append(tmp_auth['author'])
152            elif tmp_auth['type'] == 'org':
153                ret['org'].append(tmp_auth['author'])
154           
155            if tmp_auth['author'] == 'unknown':
156                DeploymentProcessor.log.info("%s %s %s has unknown author" \
157                                             % (whereAreAuthors.doc_status, whereAreAuthors.doc_owner, whereAreAuthors.doc_name))       
158                 
159            for item in tmp_auth['co_author_type']:
160                if item == 'ind':
161                    ret['ind'].append(tmp_auth['co_author'][tmp_auth['co_author_type'].index(item)])
162                elif item == 'org':
163                    ret['org'].append(tmp_auth['co_author'][tmp_auth['co_author_type'].index(item)])
164           
165                if (tmp_auth['co_author'][tmp_auth['co_author_type'].index(item)] == 'unknown'):
166                    DeploymentProcessor.log.info("%s %s %s has unknown author" \
167                                                 % (whereAreAuthors.doc_status, whereAreAuthors.doc_owner, whereAreAuthors.doc_name))
168        except Exception as e:
169            print e
170        return ret
171
172    def _assignLineage(self, observation):
173        data_lineage = findMolesLineage(self._dataEntityMigration)
174        if data_lineage is None:
175            raise NoDataLineage(self._dataEntityMigration)
176        Moles3EPB.addDataLineageToObservation(data_lineage, observation, self._migrationSessions.molesSession)       
177       
178    def _assignTitle(self, observation):       
179        doc_title = extractTitle(self._deploymentMigration)
180       
181        if doc_title.startswith('deployment_') or doc_title.startswith('Deployment_'):
182            doc_title = extractTitle(self._dataEntityMigration)           
183        auth = createCI_Citation(title = 'ceda_title')                 
184        Moles3EPB.addIdentifierToObservation(createMD_Identifier(code = doc_title, authority = auth), \
185                                             observation, self._migrationSessions.molesSession)
186
187    def _assignDescription(self, observation):
188        summary = extractSummary(self._deploymentMigration, self._dataEntityMigration)
189        Moles3EPB.addDescriptionToObservation(summary, observation, self._migrationSessions.molesSession)
190       
191    def _assignQuality(self, observation):
192        doc_quality = extractQuality(self._dataEntityMigration)
193        Moles3EPB.addDocumentQuality(doc_quality, observation, self._migrationSessions.molesSession)
194       
195    def updateObservation(self):
196        ceda_observation = EPB.searchOrCreate(CEDA_Observation, self._migrationSessions.molesSession, \
197                                              self._deploymentMigration.ceda_observation_id)
198        return ceda_observation
199
200
201    def _assignResult(self, observation):
202        # TDB - Check that if is an update or not!       
203        download = findDownloadLinksInMigrationDocument(self._deploymentMigration)
204        content = None
205        if len(download) == 0:
206            download = findDownloadLinksInMigrationDocument(self._dataEntityMigration)
207            content = extractContent(self._dataEntityMigration)
208        else:
209            content = extractContent(self._deploymentMigration) 
210        for dwn in download:
211            int_description = None
212            int_applicationProfile = None
213            if content.has_key('formats'):
214                int_applicationProfile = content['formats']
215            if dwn['href'].startswith('http://badc.nerc.ac.uk/browse') or dwn['href'].startswith('http://neodc.nerc.ac.uk/browse'):
216                int_description = "download directly from archive"               
217            observation.result = createCEDA_Result(linkage = dwn['href'], name = dwn['title'], \
218                                                   function = CI_OnLineFunctionCode.cl_download, \
219                                                   description = int_description, applicationProfile = int_applicationProfile)                               
220
221
222    def _assignGeographicExtent(self, observation):
223        # TDB - Check that if is an update or not!
224        ge = extractGeographicExtentInMigrationDocument(self._deploymentMigration)
225        if not ge:
226            ge = extractGeographicExtentInMigrationDocument(self._dataEntityMigration)
227        if ge:
228            moles3Append(observation, 'geographicExtent', createEX_GeographicBoundingBox(ge['east'], ge['north'], ge['west'], ge['south']))
229
230    def _assignCreationDate(self, observation):
231        # TDB - Check that if is an update or not!
232        creation_date = extractMolesCreationDate(self._deploymentMigration)
233        py_datetime = isoDateTimeStringToTimeDate(creation_date)
234        date_time = createDateTime(py_datetime)   
235        tm_position = createTM_Position(dateTime8601 = date_time)
236        observation.resultTime = createTM_Instant(tm_position)
237
238    def _assignDOI(self, observation):
239        # TDB - Check that if is an update or not!
240        doi = findDOIInMigrationDocument(self._deploymentMigration) 
241        if not doi:
242            doi = findDOIInMigrationDocument(self._dataEntityMigration)
243        if doi:
244            identifier = createMD_Identifier(code = doi['href'])
245            ci_date = None
246            if doi['href'] == '10.5285/E8F43A51-0198-4323-A926-FE69225D57DD':
247                #dt = createDateTime('21', '11', '04', '01', '07', '25', '35')
248                py_date = date(2011, 4, 1)
249                dt = createDate(py_date)
250                ci_date = createCI_Date(CI_DateTypeCode.cl_publication, date = dt)
251            elif doi['href'] == '10.5285/78114093-E2BD-4601-8AE5-3551E62AEF2B':
252                #dt = createDateTime('21', '11', '11', '29', '11', '37', '11')
253                py_date = date(2011, 11, 29)               
254                dt = createDate(py_date)               
255                ci_date = createCI_Date(CI_DateTypeCode.cl_publication, date = dt)
256            if ci_date:               
257                identifier.authority = createCI_Citation("", date = ci_date)
258            moles3Append(observation, 'identifier', identifier)                 
259            #Moles3EPB.addIdentifierToObservation(identifier, observation, self._migrationSessions.molesSession)                   
260
261    def _assignPublisherCurator(self, observation):       
262        provider_id = extractMolesProviderID(self._deploymentMigration)
263        party = None
264        if provider_id == DO_BADC:
265            i_linkage = 'http://badc.rl.ac.uk'
266            i_onlineResources = createCI_OnlineResource(linkage = i_linkage, name = 'British Atmospheric Data Centre Website')
267            i_address = createCI_Address(deliveryPoint = ['British Atmospheric Data Centre, STFC Rutherford Appleton Laboratory'], \
268                                         electronicMailAddress=['badc@rl.ac.uk'], postalCode='OX11 0QX', country='UK', city='Harwell Oxford')                                   
269            i_phone = createCI_Telephone(voice=['+44(0)1235 446432'])                                   
270            contact = createCI_Contact(phone=i_phone, address=i_address, onlineResource=i_onlineResources)                       
271            party = createMO_Organization(name = "NERC - British Atmospheric Data Centre", contactInfo = [contact])
272        elif provider_id == DO_NEODC:
273            i_linkage = 'http://www.neodc.rl.ac.uk'
274            i_onlineResources = createCI_OnlineResource(linkage = i_linkage, name = 'NERC Earth Observation Data Centre website')
275            i_address = createCI_Address(deliveryPoint = ['NERC - Earth Observation Data Centre, STFC Rutherford Appleton Laboratory'], \
276                                         electronicMailAddress=['neodc@rl.ac.uk'], postalCode='OX11 0QX', country='UK', city='Harwell Oxford')                                   
277            i_phone = createCI_Telephone(voice=['+44(0)1235 446432'])                                 
278            contact = createCI_Contact(phone=i_phone, address=i_address, onlineResource=i_onlineResources)                       
279            party = createMO_Organization(name = 'NERC - British Atmospheric Data Centre', contactInfo = [contact])
280        elif provider_id == DO_UKSSDC:
281            i_linkage = 'http://www.ukssdc.rl.ac.uk'
282            i_onlineResources = createCI_OnlineResource(linkage = i_linkage, name = 'UK Solar System Data Centre website')
283            i_address = createCI_Address(deliveryPoint = ['UK Solar System Data Centre, STFC Rutherford Appleton Laboratory'], \
284                                         electronicMailAddress=['support@rl.ac.uk'], postalCode='OX11 0QX', country='UK', city='Harwell Oxford')                                   
285            i_phone = createCI_Telephone(voice=['+44(0)1235 445173'])                                 
286            contact = createCI_Contact(phone=i_phone, address=i_address, onlineResource=i_onlineResources)                       
287            party = createMO_Organization(name = 'NERC - UK Solar System Data Centre', contactInfo = [contact])
288       
289        if party:
290            observation.relatedParty.append(createMO_ResponsiblePartyInfo_(MO_RoleValue.cl_publisher, [party]))
291            observation.relatedParty.append(createMO_ResponsiblePartyInfo_(MO_RoleValue.cl_custodian, deepcopy([party])))
292            observation.relatedParty.append(createMO_ResponsiblePartyInfo_(MO_RoleValue.cl_distributor, deepcopy([party])))
293            observation.relatedParty.append(createMO_ResponsiblePartyInfo_(MO_RoleValue.cl_pointofcontact, deepcopy([party])))
294            observation.relatedParty.append(createMO_ResponsiblePartyInfo_(MO_RoleValue.cl_metadataowner, deepcopy([party])))
295            observation.relatedParty.append(createMO_ResponsiblePartyInfo_(MO_RoleValue.cl_curator, deepcopy([party])))                                                           
296
297    def _assignPhenomenonTime(self, observation):       
298        doc_phenomenon_time = extractMolesTemporalRange(self._deploymentMigration)
299       
300        if doc_phenomenon_time:
301            if '/' in doc_phenomenon_time:
302                period = doc_phenomenon_time.split('/')
303                begin_date = createDate(isoDateStringToTimeDate(period[0]))
304                begin_position = createTM_Position(date8601 = begin_date)
305                begin_tm_instant = createTM_Instant(begin_position)
306               
307                end_date = createDate(isoDateStringToTimeDate(period[1]))
308                end_position = createTM_Position(date8601 = end_date)
309                end_tm_instant = createTM_Instant(end_position)
310               
311                observation.phenomenonTime = createTM_Period(begin_tm_instant, end_tm_instant)
312            else:
313                observation.phenomenonTime = createTM_Position(date8601 = createDate(isoDateStringToTimeDate(doc_phenomenon_time)))         
314
315    def _assignPermission(self, observation):
316        access_link = findAccessLinksInMigrationDocument(self._deploymentMigration)
317        dwn_link = findDownloadLinksInMigrationDocument(self._deploymentMigration)
318        if len(access_link) == 0:
319            access_link = findAccessLinksInMigrationDocument(self._dataEntityMigration) 
320
321        i_accessConstraints = []
322        i_use_limitation = []
323        md_legal_constraints = None
324       
325        if len(access_link) == 0:
326            if len(dwn_link) == 0:
327                dwn_link = findDownloadLinksInMigrationDocument(self._dataEntityMigration)
328                if dwn_link and len(dwn_link) == 1:               
329                    i_use_limitation.append("These data are open access and available through %s." % (dwn_link[0]['href']) )
330                    #i_accessConstraints.append(MD_RestrictionCode.cl_)
331                    observation.permission = createMD_LegalConstraints(useLimitation = i_use_limitation, accessConstrains = i_accessConstraints)
332        else:
333            if access_link and len(access_link) == 1:
334                i_use_limitation.append("Access to these data is restricted. To obtain access please apply for access at: %s" % (access_link[0]['href']))
335                i_accessConstraints.append(MD_RestrictionCode.cl_restricted)
336                observation.permission = createMD_LegalConstraints(useLimitation = i_use_limitation, accessConstrains = i_accessConstraints)               
337                                   
338        '''                               
339        contentDict = extractContent(self._deploymentMigration)
340        if not contentDict.has_key('access-restricted'):
341            contentDict = extractContent(self._dataEntityMigration)
342        '''
343           
344    def _assignMoles2Link(self, ceda_observation):
345        i_code = 'http://badc.nerc.ac.uk/view/%s__ATOM__%s' % (self._deploymentMigration.doc_owner, self._deploymentMigration.doc_name)
346        i_code = i_code.replace('.atom', '')
347        #i_code = buildExistDocPath(self._deploymentMigration.doc_status, DT_DEPLOYMENTS, self._deploymentMigration.doc_owner, self._deploymentMigration.doc_name)
348        i_authority = createCI_Citation('moles2url')
349        identifier = createMD_Identifier(code = i_code, authority = i_authority)
350        moles3Append(ceda_observation, 'identifier', identifier)
351
352   
353    def createNewObservation(self):
354        ceda_observation = CEDA_Observation()
355        ceda_observation.publicationState = MO_PublicationStateValue.cl_working
356        self._assignQuality(ceda_observation)
357        self._assignLineage(ceda_observation)
358        self._assignDescription(ceda_observation)
359        self._assignName(ceda_observation)
360        self._assignTitle(ceda_observation)
361        #self._assignResult(ceda_observation)   
362        self._assignGeographicExtent(ceda_observation)   
363        self._assignDOI(ceda_observation)       
364        self._assignCreationDate(ceda_observation)
365        self._assignPublisherCurator(ceda_observation)       
366        self._assignPhenomenonTime(ceda_observation)       
367        self._assignPermission(ceda_observation)       
368        self._assignMoles2Link(ceda_observation)       
369        #self._migrationSessions.molesSession.commit()
370        #self._commitMoles3()
371        #self._commitMigration()
372        Moles3EPB.addCedaObject(ceda_observation, self._migrationSessions.molesSession, True)
373        MigrationEPB.associateObservationToDeployment(self._deploymentMigration, ceda_observation.id, self._migrationSessions.molesSession, True)
374        return ceda_observation
375
376    def _getObservation(self):
377        return EPB.search(CEDA_Observation, self._deploymentMigration.ceda_observation_id, self._migrationSessions.molesSession)
378           
379    def process(self):
380        ceda_observation = None
381        #Moles3 object exists...
382        if self._deploymentMigration.ceda_observation_id: 
383            # ...and the data entity document has not changed
384            if hasAtomDocumentSameHash(self._deploymentMigration):
385                ceda_observation = self._getObservation()
386            else:
387                ceda_observation = self.updateObservation()
388        else:
389            #... does not exist so create it
390            ceda_observation = self.createNewObservation()
391       
392        #process the CEDA_Observation.procedure
393        deploymentDataProcessor = DeploymentDataProcessor(self._deploymentMigration, self._migrationSessions)
394        links = findLinksInDeployment(self._deploymentMigration)
395        procedure = deploymentDataProcessor.createProcess(links)
396       
397        #Temporary commented because CEDA_Project.subProject is not correctly mapped to the DB
398        #project = deploymentDataProcessor.createProject(links)       
399        project = None
400        if procedure:
401            ceda_observation.procedure = procedure
402           
403        if project:
404            ceda_observation.inSupportOf = project         
405           
406        if procedure or project:                       
407            self._migrationSessions.molesSession.commit()
408           
409        if not self._deploymentHasSameHash:
410            self._deploymentMigration.doc_hash = getAtomDocumentHashByMO(self._deploymentMigration)
411            self._commitMigration()
412             
413       
414        return ceda_observation
Note: See TracBrowser for help on using the repository browser.