source: ndgCommon/trunk/ndg/common/src/lib/atomutilities.py @ 4934

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/ndgCommon/trunk/ndg/common/src/lib/atomutilities.py@4934
Revision 4934, 5.8 KB checked in by cbyrom, 11 years ago (diff)

Update lib code to cope with new client suite + remove dependency on
xmlHandler2 and remove this + various other tidy ups.

Line 
1'''
2Various helper methods for use with atom data  - mainly for
3adding missing info when doing an atom -> dif transform
4
5@author: C Byrom, Tessella Jan 09
6'''
7import logging
8from xml.etree import ElementTree as ET
9from ndg.common.src.models.ndgObject import ndgObject
10from ndg.common.src.lib.utilities import findElementIndex
11import ndg.common.src.clients.xmldb.eXist.dbconstants as dc
12
13def addStandardKeywords(doc):
14    '''
15    Add keywords common to all data sets
16    @param doc: string representation of DIF doc
17    @return doc in string form with added data
18    '''
19    logging.info("Adding common keywords")
20    difDoc = ET.fromstring(doc)
21   
22    elementName = '{' + ndgObject.DIF_NS + '}Parameters'
23    index = findElementIndex(difDoc, elementName, isLast = True)
24
25    if index < 0:
26        errorMessage = "Could not find '%s' element in doc to insert organisation data after - skipping doc" %elementName
27        logging.error(errorMessage)
28        raise ValueError(errorMessage)
29
30    for keyword in ['NERC_DDC']: #'climatologyMeteorologyAtmosphere'
31        keywordElement = ET.Element('{' + ndgObject.DIF_NS + '}Keyword')
32        keywordElement.text = keyword
33        difDoc.insert(index + 1, keywordElement)
34   
35    logging.info("Keywords added")
36    return ET.tostring(difDoc)
37
38
39def expandParametersData(doc):
40    '''
41    The parameters data is taken from the atom category elements - and these need
42    to be properly looked up on the vocab server to give their proper values
43    @param doc: string representation of DIF doc
44    @return doc in string form with added data
45    '''
46    logging.info("Expanding Parameters data")
47    difDoc = ET.fromstring(doc)
48
49    parametersName = '{' + ndgObject.DIF_NS + '}Parameters'
50    for element in difDoc:
51        if element.tag == parametersName:
52            logging.debug("Parameters element found")
53           
54            # extract the topic data - this is where the vocab data is stored
55            # NB, the data is concated as URI---TERM
56            topicElement = element.find('{' + ndgObject.DIF_NS + '}Topic')
57            if not topicElement.text:
58                continue;
59            data = topicElement.text.split('---')
60            if len(data) == 2:
61                uri = data[0]
62                term = data[1]
63                try:
64                    VTI = vocabUtil.getTermItemfromURIAndTerm(uri, term)
65                    topicElement.text = VTI.title
66                except:
67                    # NB, this will currently fail since the vocab data is incomplete
68                    topicElement.text = uri + " (NEED TO RESOLVE)"
69               
70    logging.info("Parameters expanded")
71    return ET.tostring(difDoc)
72
73
74def addOrgData(doc, targetCollection, dataCentre):
75    '''
76    Add organisation data to a DIF doc
77    @param doc: string representation of DIF doc
78    @param targetCollection: Collection that the data is from
79    @param dataCentre: elementree element representing the DIF:Data_Center
80    element - with the required organisation data to add
81    @return doc in string form with added data
82    '''
83    logging.info("Adding organisation data")
84    # add organisation data
85    # - NB, need to remove end DIF tag for this
86    difDoc = ET.fromstring(doc)
87   
88    elementName = '{' + ndgObject.DIF_NS + '}Data_Set_Language'
89    index = findElementIndex(difDoc, elementName)
90
91    if index < 0:
92        errorMessage = "Could not find '%s' element in doc to insert organisation data after - skipping doc" %elementName
93        logging.error(errorMessage)
94        raise ValueError(errorMessage)
95   
96    difDoc.insert(index + 1, dataCentre)
97   
98    # add additional tag for Published (not published) data
99    if targetCollection.find('Published') > -1:
100        logging.info("Adding published version info")
101        # TODO: when proper versioning is implemented in the Atoms, this
102        # tag should extract the relevant info
103        citationElement = difDoc.find('{' + ndgObject.DIF_NS + '}Data_Set_Citation')
104        versionElement = ET.Element('{' + ndgObject.DIF_NS + '}Version')
105        versionElement.text = '1.0'
106        citationElement.append(versionElement)
107        logging.info("Finished adding version info")
108
109    logging.info("Finished adding organisation info")
110    return ET.tostring(difDoc)
111
112
113def getDataCentreDIFElement(repositoryID, atomClient):
114    '''
115    Organisation data needs to be added to DIF records created from atoms
116    - this data is stored in the eXist DB - and is retrieved using this method
117    @param repositoryID: ID of the organisation where the data is stored.  Currently
118    valid IDs are 'badc.nerc.ac.uk' and 'neodc.nerc.ac.uk'
119    @param atomClient: xml db client implementing the InterfaceXMLDBClient interface
120    @return: Elementtree Element containing the specified Data_Centre element data
121    @raise: ValueError if unrecognised repository ID is specified
122    '''
123    logging.info("Getting organisation data for repository, '%s'" %repositoryID)
124    if repositoryID != 'badc.nerc.ac.uk' and repositoryID != 'neodc.nerc.ac.uk':
125        raise ValueError("Unrecognised repository ID, '$s'" %repositoryID)
126   
127    # NB, using the query() method (via the useChunked keyword) here results
128    # in the namespaces being expanded - which ET doesn't like
129    orgData = atomClient.buildAndRunQuery('dif', dc.RESOURCES_COLLECTION_PATH, 
130                                          repositoryID, 'organisation',
131                                          useChunked = False)
132   
133    if not orgData:
134        raise ValueError("Could not find organisation data for provider, '%s'" %repositoryID)
135
136    logging.info("Data retrieved - turning this into Elementtree Element data")
137   
138    # strip out the content we want
139    tree = ET.fromstring(orgData[0])
140    dataCentre = tree.find('{' + ndgObject.DIF_NS + '}Data_Center')
141    logging.info("- returning Element data for data centre")
142    return dataCentre
Note: See TracBrowser for help on using the repository browser.