1 | ''' |
---|
2 | Class representing data in atom format - allowing moles data to be stored and accessed in a web feed compatible way |
---|
3 | |
---|
4 | @author: C Byrom, Tessella Jun 2008 |
---|
5 | ''' |
---|
6 | import sys, logging, re, datetime |
---|
7 | from xml.sax.saxutils import escape, unescape |
---|
8 | from xml.etree import cElementTree as ET |
---|
9 | import csml.parser as CsmlParser |
---|
10 | import ndg.common.src.clients.xmldb.eXist.dbconstants as dc |
---|
11 | from ndg.common.src.lib.ETxmlView import et2text |
---|
12 | import ndg.common.src.lib.utilities as utilities |
---|
13 | from ndg.common.src.models.vocabtermdata import VocabTermData as VTD |
---|
14 | from ndg.common.src.models import MolesEntity as ME |
---|
15 | from ndg.common.src.models import Deployment as Deployment |
---|
16 | from ndg.common.src.models import AtomState |
---|
17 | from ndg.common.src.models.ndgObject import ndgObject |
---|
18 | |
---|
19 | class AtomError(Exception): |
---|
20 | """ |
---|
21 | Exception handling for Atom class. |
---|
22 | """ |
---|
23 | def __init__(self, msg): |
---|
24 | logging.error(msg) |
---|
25 | Exception.__init__(self, msg) |
---|
26 | |
---|
27 | |
---|
28 | class Person(object): |
---|
29 | ''' |
---|
30 | Class representing atom author type data - with name, uri and role attributes |
---|
31 | @keyword personType: Type of person to create - specified using the Person.._Type |
---|
32 | values. Default is AUTHOR_TYPE. |
---|
33 | @keyword namespace: a two value array of format, ['short_namespace_name', 'full_namespace_name'] |
---|
34 | - e.g. ['moles', 'http://ndg.nerc.ac.uk/schema/moles2beta'] |
---|
35 | ''' |
---|
36 | AUTHOR_TYPE = 0 |
---|
37 | CONTRIBUTOR_TYPE = 1 |
---|
38 | RESPONSIBLE_PARTY_TYPE = 2 |
---|
39 | ELEMENT_NAMES = ["author", "contributor", "responsibleParty"] |
---|
40 | |
---|
41 | def __init__(self, personType = AUTHOR_TYPE, namespace = None): |
---|
42 | self.type = personType |
---|
43 | if namespace: |
---|
44 | self.ns_shortname = namespace[0] |
---|
45 | self.ns_fullname = namespace[1] |
---|
46 | else: |
---|
47 | self.ns_shortname = "" |
---|
48 | self.ns_fullname = ndgObject.ATOM_NS |
---|
49 | |
---|
50 | self.name = "" |
---|
51 | self.uri = "" |
---|
52 | self.role = "" |
---|
53 | |
---|
54 | # NB, the atom format specifies slightly different data contents |
---|
55 | self.uriTagName = "email" |
---|
56 | # NB, responsible party data is always stored in the moles section |
---|
57 | if self.type == self.RESPONSIBLE_PARTY_TYPE: |
---|
58 | self.ns_shortname = 'moles' |
---|
59 | self.ns_fullname = ndgObject.MOLES_NS |
---|
60 | self.uriTagName = "uri" |
---|
61 | |
---|
62 | def __str__(self): |
---|
63 | if self.name or self.uri or self.role: |
---|
64 | return self.name + " | " + self.uri + " | " + self.role |
---|
65 | return "" |
---|
66 | |
---|
67 | |
---|
68 | def hasValue(self): |
---|
69 | if self.name or self.uri or self.role: |
---|
70 | return True |
---|
71 | return False |
---|
72 | |
---|
73 | def fromString(self, personString): |
---|
74 | (self.name, self.uri, self.role) = utilities.getTripleData(personString) |
---|
75 | |
---|
76 | def fromETElement(self, personTag): |
---|
77 | self.name = unescape(personTag.findtext('{%s}name' %self.ns_fullname) or "") |
---|
78 | self.role = unescape(personTag.findtext('{%s}role' %self.ns_fullname) or "") |
---|
79 | self.uri = unescape(personTag.findtext('{%s}%s' %(self.ns_fullname, self.uriTagName)) or "") |
---|
80 | logging.debug("Added name: '%s', role: '%s', %s: '%s'" \ |
---|
81 | %(self.name, self.role, self.uriTagName, self.uri)) |
---|
82 | |
---|
83 | def toXML(self): |
---|
84 | prefix = "" |
---|
85 | if self.ns_shortname: |
---|
86 | prefix = self.ns_shortname + ':' |
---|
87 | |
---|
88 | author = ET.Element(prefix + self.ELEMENT_NAMES[self.type]) |
---|
89 | |
---|
90 | if self.name: |
---|
91 | name = ET.SubElement(author, prefix + "name") |
---|
92 | name.text = escape(self.name) |
---|
93 | |
---|
94 | if self.uri: |
---|
95 | uri = ET.SubElement(author, prefix + self.uriTagName) |
---|
96 | uri.text = escape(self.uri) |
---|
97 | |
---|
98 | if self.role: |
---|
99 | role = ET.SubElement(author, prefix + "role") |
---|
100 | role.text = escape(self.role) |
---|
101 | |
---|
102 | return author |
---|
103 | |
---|
104 | def __cmp__(self, person1): |
---|
105 | ''' |
---|
106 | Override comparison to allow proper object comparison when checking |
---|
107 | if Person objects are in an array already - i.e. if person in personArray... |
---|
108 | ''' |
---|
109 | if not person1: |
---|
110 | return -1 |
---|
111 | |
---|
112 | if self is person1: |
---|
113 | return 0 |
---|
114 | elif self.uri == person1.uri and self.name == person1.name and \ |
---|
115 | self.role == person1.role and self.type == person1.type: |
---|
116 | return 0 |
---|
117 | return 1 |
---|
118 | |
---|
119 | |
---|
120 | class Link(object): |
---|
121 | ''' |
---|
122 | Class representing an atom link - with href, title and rel attributes |
---|
123 | ''' |
---|
124 | |
---|
125 | def __init__(self): |
---|
126 | self.href = "" |
---|
127 | self.title = "" |
---|
128 | self.rel = "" |
---|
129 | |
---|
130 | def fromString(self, linkString): |
---|
131 | (self.href, self.title, self.rel) = utilities.getTripleData(linkString, doEscape=False) |
---|
132 | # ensure no funny characters are included on data ingest |
---|
133 | self.title = utilities.escapeSpecialCharacters(self.title) |
---|
134 | |
---|
135 | def fromETElement(self, linkTag): |
---|
136 | # remove any url quoting when reading in from XML - to avoid need for |
---|
137 | # correction on display |
---|
138 | self.href = unescape(linkTag.attrib.get('href') or "") |
---|
139 | self.rel = unescape(linkTag.attrib.get('rel') or "") |
---|
140 | self.title = unescape(linkTag.attrib.get('title') or "") |
---|
141 | |
---|
142 | def toXML(self): |
---|
143 | # ensure the xml element doesn't contain things like '&' - which will |
---|
144 | # cause problems when running xqueries |
---|
145 | link = ET.Element("link") |
---|
146 | link.attrib["href"] = escape(self.href) |
---|
147 | link.attrib["title"] = escape(self.title) |
---|
148 | link.attrib["rel"] = escape(self.rel) |
---|
149 | return link |
---|
150 | |
---|
151 | def hasValue(self): |
---|
152 | # NB, just a rel on its own is meaningless - so ignore |
---|
153 | if self.href or self.title: |
---|
154 | return True |
---|
155 | return False |
---|
156 | |
---|
157 | def __str__(self): |
---|
158 | if self.href or self.title or self.rel: |
---|
159 | return self.href + " | " + self.title + " | " + self.rel |
---|
160 | return "" |
---|
161 | |
---|
162 | def isChildAtom(self): |
---|
163 | ''' |
---|
164 | Determines whether the link refers to another atom - e.g. a link to |
---|
165 | a data granule |
---|
166 | @return True, if so; False otherwise |
---|
167 | ''' |
---|
168 | if self.rel.endswith(VTD.GRANULE_TERM) or \ |
---|
169 | self.rel.endswith(VTD.DEPLOYMENT_TERM) or \ |
---|
170 | self.rel.endswith(VTD.ACTIVITY_TERM) or \ |
---|
171 | self.rel.endswith(VTD.DPT_TERM) or \ |
---|
172 | self.rel.endswith(VTD.OBS_TERM): |
---|
173 | return True |
---|
174 | |
---|
175 | return False |
---|
176 | |
---|
177 | def __cmp__(self, link1): |
---|
178 | ''' |
---|
179 | Override comparison to allow proper object comparison when checking |
---|
180 | if Link objects are in an array already - i.e. if link in linkArray... |
---|
181 | ''' |
---|
182 | if not link1: |
---|
183 | return -1 |
---|
184 | |
---|
185 | if self is link1: |
---|
186 | return 0 |
---|
187 | elif self.href == link1.href and self.title == link1.title and \ |
---|
188 | self.rel == link1.rel: |
---|
189 | return 0 |
---|
190 | return 1 |
---|
191 | |
---|
192 | |
---|
193 | class Category(object): |
---|
194 | ''' |
---|
195 | Class representing an atom category - with term, scheme and label attributes |
---|
196 | ''' |
---|
197 | def __init__(self): |
---|
198 | self.term = "" |
---|
199 | self.scheme = "" |
---|
200 | self.label = "" |
---|
201 | |
---|
202 | def fromString(self, linkString, escapeSpecialCharacters=True): |
---|
203 | ''' |
---|
204 | Create Category from triple string of format, 'label | scheme | term' |
---|
205 | @param linkString: triple string to create category with |
---|
206 | @keyword escapeSpecialCharacters: if set to True, special characters in |
---|
207 | triple string are escaped (default) |
---|
208 | ''' |
---|
209 | (self.label, self.scheme, self.term) = utilities.getTripleData(linkString, \ |
---|
210 | doEscape=escapeSpecialCharacters) |
---|
211 | |
---|
212 | # also replace any double quotes with single apostrophes - since this data |
---|
213 | # is stored as an attribute - i.e. already surrounded by double quotes |
---|
214 | self.label = self.label.replace("\"", "'") |
---|
215 | self.scheme = self.scheme.replace("\"", "'") |
---|
216 | self.term = self.term.replace("\"", "'") |
---|
217 | |
---|
218 | |
---|
219 | def fromETElement(self, linkTag): |
---|
220 | self.term = unescape(linkTag.attrib.get('term') or "") |
---|
221 | self.label = unescape(linkTag.attrib.get('label') or "") |
---|
222 | self.scheme = unescape(linkTag.attrib.get('scheme') or "") |
---|
223 | |
---|
224 | def toXML(self): |
---|
225 | link = ET.Element("category") |
---|
226 | link.attrib["term"] = escape(self.term) |
---|
227 | link.attrib["scheme"] = escape(self.scheme) |
---|
228 | link.attrib["label"] = escape(self.label) |
---|
229 | return link |
---|
230 | |
---|
231 | def hasValue(self): |
---|
232 | if self.scheme or self.label or self.term: |
---|
233 | return True |
---|
234 | return False |
---|
235 | |
---|
236 | |
---|
237 | class Atom(object): |
---|
238 | |
---|
239 | # labels for use with the atom categories |
---|
240 | ATOM_TYPE = "ATOM_TYPE" |
---|
241 | ATOM_SUBTYPE = "ATOM_SUBTYPE" |
---|
242 | |
---|
243 | # labels for use with the templates to set/extract specific inputs |
---|
244 | ONLINE_REF_LABEL = "online_ref" |
---|
245 | PARAMETER_LABEL = "parameter" |
---|
246 | ATOM_REF_LABEL = "atom_ref" |
---|
247 | DELIMITER = "---" |
---|
248 | REMOVE_LABEL = "remove" |
---|
249 | |
---|
250 | # format to use for t1-t2 date range |
---|
251 | YEAR_FORMAT = '%Y-%m-%d' |
---|
252 | |
---|
253 | # subtype name, when not defined |
---|
254 | SUB_TYPE_NOT_DEFINED_NAME = "Not currently defined" |
---|
255 | |
---|
256 | def __init__(self, atomType = None, vocabTermData = None, ndgObject = None, \ |
---|
257 | xmlString = None, state = AtomState.WORKING_STATE, **inputs): |
---|
258 | ''' |
---|
259 | Constructor - initialise the atom variables |
---|
260 | @keyword atomType: type of atom to set up |
---|
261 | @keyword vocabTermData: instance of VocabTermData object to use with atom |
---|
262 | @keywork ndgObject: instance of ndgObject to use with atom |
---|
263 | @keyword xmlString: XML representation of atom - will be parsed to populate |
---|
264 | the atom data |
---|
265 | @keyword state: AtomState object representing the state of the atom |
---|
266 | @param inputs: a dict with vals to set directly against the object fields |
---|
267 | ''' |
---|
268 | logging.info("Initialising atom") |
---|
269 | if atomType: |
---|
270 | logging.info(" - of type '%s'" %atomType) |
---|
271 | self.atomTypeID = atomType |
---|
272 | |
---|
273 | # some data have further subtypes specified |
---|
274 | self.subtypeID = None # this should be the termID |
---|
275 | self.subtype = None # and this should be the fully formed vocab URL |
---|
276 | |
---|
277 | self.ndgObject = ndgObject |
---|
278 | |
---|
279 | self.atomName = None |
---|
280 | self.files = [] |
---|
281 | self.author = Person() |
---|
282 | self.contributors = [] |
---|
283 | self.atomAuthors = [] |
---|
284 | self.parameters = [] |
---|
285 | self.spatialData = [] |
---|
286 | self.temporalData = [] |
---|
287 | self.relatedLinks = [] |
---|
288 | self.summary = "" |
---|
289 | self.content = [] |
---|
290 | # NB, this deployments data duplicates other atom data - and is only used for a |
---|
291 | # convenient way to collect the info (by lookupAssociatedData()) for use in templates |
---|
292 | self.deployments = [] |
---|
293 | # ditto for the following field |
---|
294 | self.dataEntities = [] |
---|
295 | |
---|
296 | self.csmlFile = None |
---|
297 | self.cdmlFile = None |
---|
298 | # general variable to use for setting the atom content - NB, if a csmlFile is specified |
---|
299 | # (either directly or via a cdmlFile specification), this will be the content by default |
---|
300 | # for this purpose |
---|
301 | self.contentFile = None |
---|
302 | self.title = None |
---|
303 | self.datasetID = None # NB, the dataset id ends up in the atomName - <path><datasetID>.atom |
---|
304 | self.atomID = None |
---|
305 | |
---|
306 | # boundary box info - to replace spatial/temporalData? |
---|
307 | self.minX = None |
---|
308 | self.minY = None |
---|
309 | self.maxX = None |
---|
310 | self.maxY = None |
---|
311 | self.t1 = None |
---|
312 | self.t2 = None |
---|
313 | |
---|
314 | self.ME = ME.MolesEntity(**inputs) |
---|
315 | |
---|
316 | # date when the atom was first ingested |
---|
317 | self.publishedDate = None |
---|
318 | |
---|
319 | # last update date |
---|
320 | self.updatedDate = None |
---|
321 | |
---|
322 | # assume atom in working state by default - this is used to define what collection |
---|
323 | # in eXist the atom is stored in |
---|
324 | self.state = state |
---|
325 | |
---|
326 | # additional, non standard atom data can be included in the molesExtra element |
---|
327 | if vocabTermData: |
---|
328 | self.VTD = vocabTermData |
---|
329 | else: |
---|
330 | self.VTD = VTD() |
---|
331 | |
---|
332 | if xmlString: |
---|
333 | self.fromString(xmlString) |
---|
334 | |
---|
335 | # retain old title, in case it has changed - NB, this will be done by applying |
---|
336 | # the inputs dict - and might require other atoms to be updated |
---|
337 | self.oldTitle = self.title |
---|
338 | |
---|
339 | # if inputs passed in as dict, add these now |
---|
340 | if inputs: |
---|
341 | logging.info("Adding info to atom from input dict") |
---|
342 | logging.debug(inputs) |
---|
343 | |
---|
344 | # avoid the initial case being caught - i.e. when there is no title at all |
---|
345 | if inputs.has_key('title'): |
---|
346 | newTitle = inputs.get('title') |
---|
347 | if not self.title: |
---|
348 | self.oldTitle = newTitle |
---|
349 | |
---|
350 | self.__dict__.update(inputs) |
---|
351 | self.ME.__dict__.update(inputs) |
---|
352 | |
---|
353 | # NB, this doesn't trigger the Content Property, so do this |
---|
354 | # explicitly, if need be |
---|
355 | if inputs.has_key('Content'): |
---|
356 | self.Content = inputs.get('Content') |
---|
357 | if inputs.has_key('author'): |
---|
358 | name = inputs.get('author') |
---|
359 | author = Person() |
---|
360 | author.fromString(name) |
---|
361 | self.author = author |
---|
362 | |
---|
363 | if self.atomTypeID: |
---|
364 | self.atomTypeName = self.VTD.TERM_DATA[self.atomTypeID].title |
---|
365 | |
---|
366 | self.deploymentsURL = "" |
---|
367 | self.dataEntitiesURL = "" |
---|
368 | |
---|
369 | logging.info("Atom initialised") |
---|
370 | |
---|
371 | |
---|
372 | def addOnlineReferences(self, links): |
---|
373 | ''' |
---|
374 | Add online reference data associated with the atom |
---|
375 | - NB, care needs to be taken here since this data is stored in the atom |
---|
376 | link elements and these are also used for the various atom associations |
---|
377 | @param links: a Link or array of Links to add to the relatedLinks attribute |
---|
378 | ''' |
---|
379 | logging.debug("Adding online references") |
---|
380 | if not links: |
---|
381 | return |
---|
382 | |
---|
383 | if type(links) is not list: |
---|
384 | links = [links] |
---|
385 | |
---|
386 | # firstly clear out any online refs data from the existing related links |
---|
387 | newLinks = [] |
---|
388 | for link in self.relatedLinks: |
---|
389 | if link.isChildAtom(): |
---|
390 | newLinks.append(link) |
---|
391 | |
---|
392 | newLinks.extend(links) |
---|
393 | self.relatedLinks = newLinks |
---|
394 | logging.debug("Online references added") |
---|
395 | |
---|
396 | |
---|
397 | def addUniqueRelatedLinks(self, links): |
---|
398 | ''' |
---|
399 | Add links to relatedLinks array - if they are not already included |
---|
400 | @param links: a Link or array of Links to add to the relatedLinks attribute |
---|
401 | ''' |
---|
402 | self.addUniqueLinks(self.relatedLinks, links) |
---|
403 | |
---|
404 | |
---|
405 | def removeRelatedLinks(self, linksToDelete): |
---|
406 | ''' |
---|
407 | Remove any links in the input list from the atom's related links list |
---|
408 | @param linksToDelete: array of Link objects to remove from atom |
---|
409 | ''' |
---|
410 | logging.debug("Removing related links from atom") |
---|
411 | if not linksToDelete: |
---|
412 | return |
---|
413 | |
---|
414 | if type(linksToDelete) is not list: |
---|
415 | linksToDelete = [linksToDelete] |
---|
416 | |
---|
417 | updatedLinks = [] |
---|
418 | for link in self.relatedLinks: |
---|
419 | if type(link) is not Link: |
---|
420 | logging.warning("Link is not of 'Link' object type (type='%s') - skipping" %type(link)) |
---|
421 | continue |
---|
422 | if link in linksToDelete: |
---|
423 | logging.debug("- found link to remove") |
---|
424 | else: |
---|
425 | updatedLinks.append(link) |
---|
426 | |
---|
427 | self.relatedLinks = updatedLinks |
---|
428 | logging.debug("Links removed") |
---|
429 | |
---|
430 | def getPublicationStatePath(self): |
---|
431 | ''' |
---|
432 | Determine the correct publication state collection for the atom |
---|
433 | @return collectionPath: collection path for the publication state of the atom |
---|
434 | ''' |
---|
435 | logging.debug("Getting collection path for atom publication state") |
---|
436 | collectionPath = dc.ATOM_COLLECTION_PATH + self.state.collectionPath |
---|
437 | logging.debug("Returning publication state collection, '%s'" %collectionPath) |
---|
438 | return collectionPath |
---|
439 | |
---|
440 | |
---|
441 | def getDefaultEntityCollectionPath(self): |
---|
442 | ''' |
---|
443 | Determine the correct collection for the entity type of the atom |
---|
444 | @return entityPath: collection path for the data type of the atom |
---|
445 | ''' |
---|
446 | logging.debug("Getting collection path for atom entity type") |
---|
447 | collectionPath = self.getPublicationStatePath() |
---|
448 | |
---|
449 | if self.atomTypeID == VTD.DE_TERM: |
---|
450 | collectionPath += dc.DE_COLLECTION_PATH |
---|
451 | elif self.atomTypeID == VTD.GRANULE_TERM: |
---|
452 | collectionPath += dc.GRANULE_COLLECTION_PATH |
---|
453 | elif self.atomTypeID == VTD.ACTIVITY_TERM and \ |
---|
454 | self.subtypeID == VTD.DEPLOYMENT_TERM: |
---|
455 | collectionPath += dc.DEPLOYMENTS_COLLECTION_PATH |
---|
456 | else: |
---|
457 | collectionPath += dc.DEPLOYMENT_COLLECTION_PATH |
---|
458 | |
---|
459 | logging.debug("Returning entity collection, '%s'" %collectionPath) |
---|
460 | return collectionPath |
---|
461 | |
---|
462 | |
---|
463 | def getDefaultCollectionPath(self): |
---|
464 | ''' |
---|
465 | Determine the correct collection to use for the atom in eXist |
---|
466 | ''' |
---|
467 | logging.debug("Getting default collection path for atom") |
---|
468 | collectionPath = self.getDefaultEntityCollectionPath() |
---|
469 | if not self.ME.providerID: |
---|
470 | raise AtomError("Error: cannot determine atom collection path because " + \ |
---|
471 | "the provider ID is not defined") |
---|
472 | |
---|
473 | collectionPath += self.ME.providerID + "/" |
---|
474 | logging.debug("Returning collection, '%s'" %collectionPath) |
---|
475 | return collectionPath |
---|
476 | |
---|
477 | |
---|
478 | def __addAtomTypeDataXML(self, root): |
---|
479 | ''' |
---|
480 | Add the atom type, and subtype data, if available, to atom categories |
---|
481 | - and lookup and add the appropriate vocab term data |
---|
482 | ''' |
---|
483 | if self.atomTypeID: |
---|
484 | logging.info("Adding atom type info to XML output") |
---|
485 | category = Category() |
---|
486 | category.label = self.atomTypeID |
---|
487 | # look up the appropriate vocab term data |
---|
488 | category.scheme = self.VTD.getTermCurrentVocabURL(self.atomTypeID) |
---|
489 | category.term = self.ATOM_TYPE |
---|
490 | root.append(category.toXML()) |
---|
491 | |
---|
492 | if self.subtypeID: |
---|
493 | logging.info("Adding atom subtype info to XML output") |
---|
494 | # NB subtypes not all defined, so leave this out for the moment |
---|
495 | category.label = self.subtypeID |
---|
496 | # look up the appropriate vocab term data |
---|
497 | category.scheme = self.VTD.getTermCurrentVocabURL(self.subtypeID) |
---|
498 | category.term = self.ATOM_SUBTYPE |
---|
499 | root.append(category.toXML()) |
---|
500 | |
---|
501 | |
---|
502 | def addMolesEntityData(self, abbreviation, provider_id, object_creation_time): |
---|
503 | ''' |
---|
504 | Add data to include in the moles entity element |
---|
505 | ''' |
---|
506 | logging.debug('Adding moles entity information') |
---|
507 | self.ME.abbreviation = abbreviation |
---|
508 | self.ME.providerID = provider_id |
---|
509 | self.ME.createdDate = utilities.getISO8601Date(object_creation_time) |
---|
510 | logging.debug('Moles entity information added') |
---|
511 | |
---|
512 | |
---|
513 | def addAuthors(self, authors): |
---|
514 | ''' |
---|
515 | Add author data appropriately to the atom |
---|
516 | NB, these will overwrite any existing authors of the same type |
---|
517 | @param authors: list of Person objects with the author data |
---|
518 | ''' |
---|
519 | logging.debug('Adding authors data to Atom') |
---|
520 | isFirstAuthor = {} |
---|
521 | authorArray = None |
---|
522 | for author in authors: |
---|
523 | # NB, we're only allowed one atom author |
---|
524 | if author.type == Person.AUTHOR_TYPE: |
---|
525 | self.author = author |
---|
526 | |
---|
527 | if isFirstAuthor.has_key(author.type): |
---|
528 | raise AtomError("Error: an atom can only have one author specified") |
---|
529 | isFirstAuthor[author.type] = 1 |
---|
530 | continue |
---|
531 | elif author.type == Person.CONTRIBUTOR_TYPE: |
---|
532 | authorArray = self.contributors |
---|
533 | elif author.type == Person.RESPONSIBLE_PARTY_TYPE: |
---|
534 | authorArray = self.ME.responsibleParties |
---|
535 | |
---|
536 | # check if this is the first addition - if so, clear out the |
---|
537 | # array in advance |
---|
538 | if not isFirstAuthor.has_key(author.type): |
---|
539 | logging.debug("Clearing out author array") |
---|
540 | # NB, need to be careful to clear the array, not create a ref |
---|
541 | # to a new array |
---|
542 | del authorArray[:] |
---|
543 | isFirstAuthor[author.type] = 1 |
---|
544 | |
---|
545 | if author.hasValue() and author not in authorArray: |
---|
546 | logging.debug("Adding author (type:'%s', name:'%s', uri:'%s', role:'%s')" \ |
---|
547 | %(author.type, author.name, author.uri, author.role)) |
---|
548 | authorArray.append(author) |
---|
549 | |
---|
550 | logging.debug('Finished adding authors data') |
---|
551 | |
---|
552 | |
---|
553 | def _isNewParameter(self, param): |
---|
554 | ''' |
---|
555 | Check if a parameter is already specified in the atom, return False if |
---|
556 | so, otherwise return True |
---|
557 | ''' |
---|
558 | for p in self.parameters: |
---|
559 | if p.term == param.term and \ |
---|
560 | p.scheme == param.scheme and \ |
---|
561 | p.label == param.label: |
---|
562 | return False |
---|
563 | return True |
---|
564 | |
---|
565 | |
---|
566 | def addRelatedLinks(self, linkVals): |
---|
567 | ''' |
---|
568 | Add related links in string format - converting to Link objects |
---|
569 | NB, only add the link if it is unique |
---|
570 | |
---|
571 | @param linkVals: string of format, 'uri | title | vocabServerURL' |
---|
572 | ''' |
---|
573 | link = self.objectify(linkVals, 'relatedLinks') |
---|
574 | if link not in self.relatedLinks: |
---|
575 | self.relatedLinks.append(link) |
---|
576 | |
---|
577 | |
---|
578 | def addParameters(self, params): |
---|
579 | ''' |
---|
580 | Add a parameter to list - ensuring it is unique and has been formatted and tidied appropriately |
---|
581 | @params param: parameter, as string array, to add to atom parameters collection |
---|
582 | ''' |
---|
583 | # avoid strings being parsed character by character |
---|
584 | if type(params) is str: |
---|
585 | params = [params] |
---|
586 | |
---|
587 | for param in params: |
---|
588 | # firstly tidy parameter |
---|
589 | param = utilities.tidyUpParameters(param) |
---|
590 | category = Category() |
---|
591 | # NB, data already tidied up here, so set keyword to avoid this happening again |
---|
592 | category.fromString(param, escapeSpecialCharacters=False) |
---|
593 | |
---|
594 | # now check for uniqueness |
---|
595 | if self._isNewParameter(category): |
---|
596 | logging.debug("Adding new parameter: %s" %param) |
---|
597 | self.parameters.append(category) |
---|
598 | |
---|
599 | |
---|
600 | def _linksToXML(self, root): |
---|
601 | ''' |
---|
602 | Add required links to the input element |
---|
603 | @param root: element to add links to - NB, should be the root element of the atom |
---|
604 | ''' |
---|
605 | selfLink = ET.SubElement(root, "link") |
---|
606 | selfLink.attrib["href"] = self.atomBrowseURL |
---|
607 | selfLink.attrib["rel"] = "self" |
---|
608 | |
---|
609 | for relatedLink in self.relatedLinks: |
---|
610 | if relatedLink.hasValue(): |
---|
611 | root.append(relatedLink.toXML()) |
---|
612 | |
---|
613 | def toXML(self): |
---|
614 | ''' |
---|
615 | Convert the atom into XML representation and return this |
---|
616 | @return: xml version of atom |
---|
617 | ''' |
---|
618 | logging.info("Creating formatted XML version of Atom") |
---|
619 | root = ET.Element("entry") |
---|
620 | root.attrib["xmlns"] = ndgObject.ATOM_NS |
---|
621 | root.attrib["xmlns:moles"] = ndgObject.MOLES_NS |
---|
622 | root.attrib["xmlns:georss"] = ndgObject.GEOSS_NS |
---|
623 | root.attrib["xmlns:gml"] = ndgObject.GML_NS |
---|
624 | id = ET.SubElement(root, "id") |
---|
625 | id.text = self.atomID |
---|
626 | title = ET.SubElement(root, "title") |
---|
627 | title.text = escape(self.title or "") |
---|
628 | self._linksToXML(root) |
---|
629 | |
---|
630 | if self.author and self.author.hasValue(): |
---|
631 | root.append(self.author.toXML()) |
---|
632 | |
---|
633 | for contributor in self.contributors: |
---|
634 | root.append(contributor.toXML()) |
---|
635 | |
---|
636 | # add parameters data |
---|
637 | for param in self.parameters: |
---|
638 | if param.hasValue(): |
---|
639 | root.append(param.toXML()) |
---|
640 | |
---|
641 | # add the type and subtype data |
---|
642 | self.__addAtomTypeDataXML(root) |
---|
643 | |
---|
644 | summary = ET.SubElement(root, "summary") |
---|
645 | summary.text = escape(self.summary) |
---|
646 | |
---|
647 | # add link to content, if required - NB, can only have one content element in atom |
---|
648 | # - and this is mandatory |
---|
649 | content = ET.SubElement(root, "content") |
---|
650 | contentFile = self.contentFile or self.csmlFile or self.cdmlFile |
---|
651 | if contentFile: |
---|
652 | content.attrib["type"] = "application/xml" |
---|
653 | content.attrib["src"] = contentFile |
---|
654 | else: |
---|
655 | content.attrib["type"] = "xhtml" |
---|
656 | div = ET.SubElement(content, 'xhtml:div') |
---|
657 | div.attrib["xmlns:xhtml"] = ndgObject.XHTML_NS |
---|
658 | |
---|
659 | div.text = self.Content |
---|
660 | |
---|
661 | # if there's a published date already defined, assume we're doing an update now |
---|
662 | # NB, update element is mandatory |
---|
663 | currentDate = datetime.datetime.today().strftime("%Y-%m-%dT%H:%M:%SZ") |
---|
664 | if not self.publishedDate: |
---|
665 | self.publishedDate = currentDate |
---|
666 | |
---|
667 | updated = ET.SubElement(root, "updated") |
---|
668 | if not self.updatedDate: |
---|
669 | self.updatedDate = currentDate |
---|
670 | updated.text = self.updatedDate |
---|
671 | |
---|
672 | published = ET.SubElement(root, "published") |
---|
673 | published.text = self.publishedDate |
---|
674 | |
---|
675 | # add the moles entity section, if it is required |
---|
676 | if self.ME: |
---|
677 | root.append(self.ME.toXML()) |
---|
678 | |
---|
679 | # add temporal range data, if available |
---|
680 | temporalRange = ET.SubElement(root, "moles:temporalRange") |
---|
681 | if self.t1: |
---|
682 | temporalRange.text = escape(self.t1) |
---|
683 | if self.t2: |
---|
684 | temporalRange.text += "/" + escape(self.t2) |
---|
685 | |
---|
686 | # add spatial range data, if available |
---|
687 | self._addSpatialData(root) |
---|
688 | |
---|
689 | tree = ET.ElementTree(root) |
---|
690 | logging.info("XML version of Atom created") |
---|
691 | return tree |
---|
692 | |
---|
693 | |
---|
694 | def __getContent(self): |
---|
695 | logging.debug("Getting content data") |
---|
696 | contentString = "" |
---|
697 | # NB, there must be content specified in an atom |
---|
698 | if not self.content: |
---|
699 | return "Metadata document" |
---|
700 | |
---|
701 | for content_line in self.content: |
---|
702 | contentString += content_line + "\n" |
---|
703 | |
---|
704 | return contentString |
---|
705 | |
---|
706 | def __setContent(self, content): |
---|
707 | logging.debug("Adding content data") |
---|
708 | self.content = [] |
---|
709 | if not content: |
---|
710 | return |
---|
711 | |
---|
712 | for content_line in content.split('\n'): |
---|
713 | self.content.append(content_line) |
---|
714 | |
---|
715 | Content = property(fset=__setContent, fget=__getContent, doc="Atom content") |
---|
716 | |
---|
717 | |
---|
718 | def fromString(self, xmlString): |
---|
719 | ''' |
---|
720 | Initialise Atom object using an xmlString |
---|
721 | @param xmlString: representation of atom as an XML string |
---|
722 | ''' |
---|
723 | logging.info("Ingesting data from XML string") |
---|
724 | logging.debug("Create elementtree instance with XML string") |
---|
725 | tree = ET.fromstring(xmlString) |
---|
726 | title = tree.findtext('{%s}title' %ndgObject.ATOM_NS) |
---|
727 | if title: |
---|
728 | logging.debug("Adding title data") |
---|
729 | self.title = unescape(title) |
---|
730 | |
---|
731 | summary = tree.findtext('{%s}summary' %ndgObject.ATOM_NS) |
---|
732 | if summary: |
---|
733 | self.summary = unescape(summary) |
---|
734 | |
---|
735 | authorElement = tree.find('{%s}author' %ndgObject.ATOM_NS) |
---|
736 | if authorElement: |
---|
737 | logging.debug("Adding author data") |
---|
738 | author = Person() |
---|
739 | author.fromETElement(authorElement) |
---|
740 | self.author = author |
---|
741 | |
---|
742 | contributorElements = tree.findall('{%s}contributor' %ndgObject.ATOM_NS) |
---|
743 | for contributorElement in contributorElements: |
---|
744 | logging.debug("Adding contributor data") |
---|
745 | contributor = Person(personType = Person.CONTRIBUTOR_TYPE) |
---|
746 | contributor.fromETElement(contributorElement) |
---|
747 | self.contributors.append(contributor) |
---|
748 | |
---|
749 | molesElement = tree.find('{%s}entity' %ndgObject.MOLES_NS) |
---|
750 | if molesElement: |
---|
751 | self.ME.fromET(molesElement) |
---|
752 | |
---|
753 | atomID = tree.findtext('{%s}id' %ndgObject.ATOM_NS) |
---|
754 | self.__parseAtomID(atomID) |
---|
755 | |
---|
756 | self._parseCategoryData(tree.findall('{%s}category' %ndgObject.ATOM_NS)) |
---|
757 | |
---|
758 | self._parseLinksData(tree.findall('{%s}link' %ndgObject.ATOM_NS)) |
---|
759 | |
---|
760 | contentTag = tree.find('{%s}content' %ndgObject.ATOM_NS) |
---|
761 | if contentTag != None: |
---|
762 | logging.debug("Found content tag - checking for CSML/CDML file data") |
---|
763 | file = contentTag.attrib.get('src') |
---|
764 | if file: |
---|
765 | # NB, the path will reveal more reliably whether we're dealing with CSML and CDML files |
---|
766 | if file.upper().find('CSML') > -1: |
---|
767 | logging.debug("Adding CSML file data") |
---|
768 | self.csmlFile = file |
---|
769 | elif file.upper().find('CDML') > -1: |
---|
770 | logging.debug("Adding CDML file data") |
---|
771 | self.cdmlFile = file |
---|
772 | self.contentFile = file |
---|
773 | else: |
---|
774 | logging.debug("No file data - adding contents of element instead") |
---|
775 | # the div ns is sometimes handled differently - cope with both |
---|
776 | # options |
---|
777 | |
---|
778 | tag = '{%s}div'%ndgObject.XHTML_NS |
---|
779 | divEl = contentTag.find(tag) |
---|
780 | |
---|
781 | if divEl is None: |
---|
782 | tag = '{%s}div'%ndgObject.ATOM_NS |
---|
783 | divEl = contentTag.find(tag) |
---|
784 | |
---|
785 | if divEl is not None: |
---|
786 | div = divEl.text |
---|
787 | |
---|
788 | # NB, this can contain xhtml, so check for children |
---|
789 | for child in divEl.getchildren(): |
---|
790 | div += ET.tostring(child) |
---|
791 | |
---|
792 | # NB, Elementtree tends to revert the namespace of the xhtml |
---|
793 | # elements to the parent Atom NS - so switch this back |
---|
794 | if div: |
---|
795 | div = div.replace(ndgObject.ATOM_NS, ndgObject.XHTML_NS) |
---|
796 | |
---|
797 | self.Content = div |
---|
798 | |
---|
799 | range = tree.findtext('{%s}temporalRange' %ndgObject.MOLES_NS) |
---|
800 | if range: |
---|
801 | logging.debug("Adding temporal range data") |
---|
802 | timeData = range.split('/') |
---|
803 | self.t1 = unescape(timeData[0]) |
---|
804 | if len(timeData) > 1: |
---|
805 | self.t2 = unescape(timeData[1]) |
---|
806 | |
---|
807 | where = tree.find('{%s}where' %ndgObject.GEOSS_NS) |
---|
808 | if where: |
---|
809 | # NB, this parser won't mind if we're dealing with Envelope or EnvelopeWithTimePeriod |
---|
810 | minBBox = where.findall('.//{%s}lowerCorner' %ndgObject.GML_NS) |
---|
811 | if minBBox: |
---|
812 | logging.debug("Adding min spatial range data") |
---|
813 | minBBox = minBBox[0] |
---|
814 | spatialData = minBBox.text.split() |
---|
815 | self.minX = unescape(spatialData[0]) |
---|
816 | if len(spatialData) > 1: |
---|
817 | self.minY = unescape(spatialData[1]) |
---|
818 | |
---|
819 | maxBBox = where.findall('.//{%s}upperCorner' %ndgObject.GML_NS) |
---|
820 | if maxBBox: |
---|
821 | maxBBox = maxBBox[0] |
---|
822 | logging.debug("Adding max spatial range data") |
---|
823 | spatialData = maxBBox.text.split() |
---|
824 | self.maxX = unescape(spatialData[0]) |
---|
825 | if len(spatialData) > 1: |
---|
826 | self.maxY = unescape(spatialData[1]) |
---|
827 | |
---|
828 | publishedDate = tree.findtext('{%s}published' %ndgObject.ATOM_NS) |
---|
829 | if publishedDate: |
---|
830 | logging.debug("Adding published date") |
---|
831 | self.publishedDate = publishedDate |
---|
832 | |
---|
833 | updatedDate = tree.findtext('{%s}updated' %ndgObject.ATOM_NS) |
---|
834 | if updatedDate: |
---|
835 | logging.debug("Adding updated date") |
---|
836 | self.updatedDate = updatedDate |
---|
837 | |
---|
838 | logging.info("Completed data ingest") |
---|
839 | |
---|
840 | |
---|
841 | def _parseCategoryData(self, categories): |
---|
842 | logging.debug("Adding category/parameters data") |
---|
843 | for category in categories: |
---|
844 | cat = Category() |
---|
845 | cat.fromETElement(category) |
---|
846 | |
---|
847 | if cat.term == self.ATOM_TYPE: |
---|
848 | logging.debug("Found atom type data") |
---|
849 | self.atomTypeID = cat.label |
---|
850 | self.atomTypeName = self.VTD.TERM_DATA[cat.label].title |
---|
851 | continue |
---|
852 | elif cat.term == self.ATOM_SUBTYPE: |
---|
853 | logging.debug("Found atom subtype data") |
---|
854 | self.subtypeID = cat.label |
---|
855 | self.subtype = cat.scheme |
---|
856 | continue |
---|
857 | |
---|
858 | self.parameters.append(cat) |
---|
859 | |
---|
860 | |
---|
861 | def __parseAtomID(self, atomID): |
---|
862 | ''' |
---|
863 | Given an atom ID, extract the useful bits of info and set these on |
---|
864 | the relevant atom attributes |
---|
865 | @param atomID: an atom ID in the 'tag' format |
---|
866 | ''' |
---|
867 | logging.debug("Extracting atom info from ID, '%s'" %atomID) |
---|
868 | self.atomID = atomID |
---|
869 | self.datasetID = atomID.split("__ATOM__")[-1] |
---|
870 | self._generateAtomName(self.datasetID) |
---|
871 | logging.debug("- all info extracted") |
---|
872 | |
---|
873 | |
---|
874 | def setDatasetID(self, datasetID): |
---|
875 | ''' |
---|
876 | Set the dataset ID for the atom - and generate an appropriate atom name using this |
---|
877 | @param datasetID: ID to set for the atom |
---|
878 | ''' |
---|
879 | self.datasetID = datasetID |
---|
880 | self._generateAtomName(datasetID) |
---|
881 | self.atomID = self.createAtomID(datasetID) |
---|
882 | |
---|
883 | |
---|
884 | def createAtomID(self, datasetID): |
---|
885 | ''' |
---|
886 | Create a unique ID, conforming to atom standards, for atom |
---|
887 | NB, see http://diveintomark.org/archives/2004/05/28/howto-atom-id |
---|
888 | @param datasetID: ID of atom's dataset |
---|
889 | @return: unique ID |
---|
890 | ''' |
---|
891 | logging.info("Creating unique ID for atom") |
---|
892 | if not self.atomBrowseURL: |
---|
893 | self._generateAtomName(datasetID) |
---|
894 | urlBit = self.atomBrowseURL.split('://')[1] |
---|
895 | urlBit = urlBit.replace('#', '') |
---|
896 | urlBits = urlBit.split('/') |
---|
897 | host = urlBits[0].split(':')[0] # avoid the port colon - as this breaks the ID format |
---|
898 | dateBit = datetime.datetime.today().strftime("%Y-%m-%d") |
---|
899 | |
---|
900 | id = "tag:" + host + "," + dateBit + ":/" + "/".join(urlBits[1:]) |
---|
901 | logging.info("- unique ID created for atom") |
---|
902 | logging.debug(" - '%s'" %id) |
---|
903 | return id |
---|
904 | |
---|
905 | |
---|
906 | def _generateAtomName(self, datasetID): |
---|
907 | ''' |
---|
908 | Generate a consistent name for the atom - with full eXist doc path |
---|
909 | @param datasetID: ID of atom's dataset |
---|
910 | ''' |
---|
911 | self.atomName = datasetID + ".atom" |
---|
912 | if not self.ME.providerID: |
---|
913 | raise ValueError("Provider ID has not been specified for atom - please add this and retry") |
---|
914 | self.ndgURI = self.ME.providerID + "__ATOM__" + datasetID |
---|
915 | self.atomBrowseURL = VTD.BROWSE_ROOT_URL + self.ndgURI |
---|
916 | |
---|
917 | |
---|
918 | def _parseLinksData(self, links): |
---|
919 | ''' |
---|
920 | Extract links and atom data from array of link elements in the XML representation of the atom |
---|
921 | @param links: an array of <link> elements |
---|
922 | ''' |
---|
923 | # firstly, get all data to start with, so we can properly process it afterwards |
---|
924 | linkData = {} |
---|
925 | logging.debug("Getting link data") |
---|
926 | for linkTag in links: |
---|
927 | link = Link() |
---|
928 | link.fromETElement(linkTag) |
---|
929 | |
---|
930 | if not linkData.has_key(link.rel): |
---|
931 | linkData[link.rel] = [] |
---|
932 | |
---|
933 | linkData[link.rel].append(link) |
---|
934 | |
---|
935 | |
---|
936 | # there should be one self referencing link - which will provide info on the atom itself |
---|
937 | if not linkData.has_key('self'): |
---|
938 | errorMessage = "Atom does not have self referencing link - " + \ |
---|
939 | "cannot ascertain datasetID without this - please fix" |
---|
940 | logging.error(errorMessage) |
---|
941 | raise ValueError(errorMessage) |
---|
942 | |
---|
943 | # this is the link describing the atom itself |
---|
944 | self.atomBrowseURL = linkData['self'][0].href |
---|
945 | |
---|
946 | self.datasetID = self.atomBrowseURL.split("__ATOM__")[-1] |
---|
947 | self.atomName = self.datasetID + ".atom" |
---|
948 | # NB, only split on the stem, since the browse host may not be |
---|
949 | # the same as that defined in VTD |
---|
950 | self.ndgURI = self.atomBrowseURL.split(VTD.BROWSE_STEM_URL)[-1] |
---|
951 | |
---|
952 | # now remove this value and the associated moles doc link |
---|
953 | del linkData['self'] |
---|
954 | molesDoc = self.atomBrowseURL.replace('ATOM', 'NDG-B1') |
---|
955 | if linkData.has_key('related'): |
---|
956 | relatedLinks = [] |
---|
957 | for link in linkData['related']: |
---|
958 | if link.href != molesDoc: |
---|
959 | relatedLinks.append(link) |
---|
960 | |
---|
961 | linkData['related'] = relatedLinks |
---|
962 | |
---|
963 | # now add the remaining links to the atom |
---|
964 | for key in linkData: |
---|
965 | for link in linkData[key]: |
---|
966 | logging.debug("Adding link data %s" %link) |
---|
967 | self.relatedLinks.append(link) |
---|
968 | |
---|
969 | |
---|
970 | def _addSpatialData(self, element): |
---|
971 | ''' |
---|
972 | Add spatial coverage element to an input element |
---|
973 | @param element: element to add coverage data to |
---|
974 | ''' |
---|
975 | logging.info("Adding spatial data to Atom") |
---|
976 | if not self.minX: |
---|
977 | logging.info("No spatial data specified") |
---|
978 | return |
---|
979 | bbox = ET.SubElement(element, "georss:where") |
---|
980 | envelope = ET.SubElement(bbox, "gml:Envelope") |
---|
981 | lc = ET.SubElement(envelope, "gml:lowerCorner") |
---|
982 | lc.text = escape(str(self.minX) + " " + str(self.minY)) |
---|
983 | uc = ET.SubElement(envelope, "gml:upperCorner") |
---|
984 | uc.text = escape(str(self.maxX) + " " + str(self.maxY)) |
---|
985 | |
---|
986 | |
---|
987 | def setAttribute(self, attributeName, attributeValue, escapeSpecials = True): |
---|
988 | ''' |
---|
989 | Set the value of an atom attribute - and do some basic tidying up of the string content |
---|
990 | - to escape any XML unfriendly characters |
---|
991 | @param attributeName: name of the attribute whose value to set |
---|
992 | @param attributeValue: value to set the attribute to |
---|
993 | @keyword escapeSpecials: if true, escape any special characters in the attribute |
---|
994 | content. Default = True |
---|
995 | ''' |
---|
996 | logging.debug("Setting attribute, %s, to %s" %(attributeName, attributeValue)) |
---|
997 | origValue = attributeValue |
---|
998 | |
---|
999 | # escape any special characters if a value has been specified |
---|
1000 | # NB, need to cope with both single values and arrays |
---|
1001 | isList = True |
---|
1002 | if attributeValue: |
---|
1003 | if not isinstance(attributeValue, list): |
---|
1004 | attributeValue = [attributeValue] |
---|
1005 | isList = False |
---|
1006 | |
---|
1007 | newVals = [] |
---|
1008 | for val in attributeValue: |
---|
1009 | data = val |
---|
1010 | if escapeSpecials: |
---|
1011 | utilities.escapeSpecialCharacters(val) |
---|
1012 | newVals.append(self.objectify(data, attributeName)) |
---|
1013 | attributeValue = newVals |
---|
1014 | |
---|
1015 | # handle the special case of authors; only one author is allowed per atom |
---|
1016 | # - the others should be treated as contributors |
---|
1017 | if attributeName == "authors": |
---|
1018 | setattr(self, "author", attributeValue[0]) |
---|
1019 | if len(attributeValue) > 1: |
---|
1020 | setattr(self, "contributors", attributeValue[1:]) |
---|
1021 | elif attributeName == "atomAuthors": |
---|
1022 | self.ME.responsibleParties.extend(attributeValue) |
---|
1023 | elif attributeName == "files": |
---|
1024 | self.addUniqueRelatedLinks(attributeValue) |
---|
1025 | else: |
---|
1026 | if not isList: |
---|
1027 | attributeValue = attributeValue[0] |
---|
1028 | setattr(self, attributeName, attributeValue) |
---|
1029 | |
---|
1030 | |
---|
1031 | def objectify(self, objectVals, attributeName): |
---|
1032 | ''' |
---|
1033 | Some inputs are specified as strings but need to be converted into |
---|
1034 | objects - do this here |
---|
1035 | @param objectVals: a '|' delimited string of values |
---|
1036 | @param attributeName: name of attribute the values belong to |
---|
1037 | ''' |
---|
1038 | obj = None |
---|
1039 | if type(objectVals) != str: |
---|
1040 | return objectVals |
---|
1041 | |
---|
1042 | if attributeName == "relatedLinks": |
---|
1043 | obj = Link() |
---|
1044 | elif attributeName == "atomAuthors": |
---|
1045 | obj = Person(personType = Person.RESPONSIBLE_PARTY_TYPE) |
---|
1046 | elif attributeName == "authors": |
---|
1047 | # NB, ensure there is only one author tag - extra authors are contributors |
---|
1048 | authorType = Person.AUTHOR_TYPE |
---|
1049 | if self.author and self.author.hasValue(): |
---|
1050 | authorType= Person.CONTRIBUTOR_TYPE |
---|
1051 | obj = Person(personType = authorType) |
---|
1052 | elif attributeName == 'files': |
---|
1053 | obj = Link() |
---|
1054 | objectVals = '%s|%s|%s' \ |
---|
1055 | %(self.VTD.getTermCurrentVocabURL(VTD.METADATA_SOURCE_TERM), objectVals, VTD.METADATA_SOURCE_TERM) |
---|
1056 | |
---|
1057 | if obj: |
---|
1058 | obj.fromString(objectVals) |
---|
1059 | # NB, need to set it now, just in case we don't set it before coming back |
---|
1060 | if attributeName == "authors" and (not self.author or not self.author.hasValue()): |
---|
1061 | self.author = obj |
---|
1062 | return obj |
---|
1063 | |
---|
1064 | return objectVals |
---|
1065 | |
---|
1066 | |
---|
1067 | def toPrettyXML(self): |
---|
1068 | ''' |
---|
1069 | Returns nicely formatted XML as string |
---|
1070 | ''' |
---|
1071 | atomXML = self.toXML() |
---|
1072 | |
---|
1073 | # create the string |
---|
1074 | logging.debug("Converting the elementtree object into a string") |
---|
1075 | prettyXML = et2text(atomXML.getroot()) |
---|
1076 | |
---|
1077 | # add XML version tag |
---|
1078 | prettyXML = "<?xml version=\"1.0\" encoding=\"utf-8\" ?>\n" + prettyXML |
---|
1079 | logging.info("Created formatted version of XML object") |
---|
1080 | return prettyXML |
---|
1081 | |
---|
1082 | |
---|
1083 | def getLinksOfType(self, termID): |
---|
1084 | ''' |
---|
1085 | Returns links in the atom related links attribute which match the specified |
---|
1086 | term ID |
---|
1087 | @param termID: the termID to look for in the related links - NB, this is |
---|
1088 | matched to the end of the link.rel value |
---|
1089 | @return links: array of Link objects with matching term type |
---|
1090 | ''' |
---|
1091 | logging.debug("Getting atom links of type, '%s'" %termID) |
---|
1092 | matchingLinks = [] |
---|
1093 | for link in self.relatedLinks: |
---|
1094 | # firstly, handle special case where we only want the online ref type links |
---|
1095 | # returned |
---|
1096 | if termID == self.ONLINE_REF_LABEL: |
---|
1097 | if not link.isChildAtom(): |
---|
1098 | logging.debug("- found link with matching term type %s" % link) |
---|
1099 | matchingLinks.append(link) |
---|
1100 | |
---|
1101 | elif link and link.rel and link.rel.lower().endswith(termID.lower()): |
---|
1102 | logging.debug("- found link with matching term type %s" % link) |
---|
1103 | matchingLinks.append(link) |
---|
1104 | |
---|
1105 | logging.debug("Returning matched links") |
---|
1106 | return matchingLinks |
---|
1107 | |
---|
1108 | |
---|
1109 | def getLogos(self): |
---|
1110 | ''' |
---|
1111 | Return related links that are logos |
---|
1112 | @return: array of Links containing the logos for the atom |
---|
1113 | ''' |
---|
1114 | logos = [] |
---|
1115 | for link in self.relatedLinks: |
---|
1116 | if link.rel.lower().endswith(VTD.LOGO_TERM.lower()): |
---|
1117 | logos.append(link) |
---|
1118 | return logos |
---|
1119 | |
---|
1120 | |
---|
1121 | def isGranule(self): |
---|
1122 | if self.atomTypeID == VTD.GRANULE_TERM: |
---|
1123 | return True |
---|
1124 | return False |
---|
1125 | |
---|
1126 | |
---|
1127 | def isDE(self): |
---|
1128 | if self.atomTypeID == VTD.DE_TERM: |
---|
1129 | return True |
---|
1130 | return False |
---|
1131 | |
---|
1132 | def isDeployment(self): |
---|
1133 | if self.subtypeID and self.subtypeID == VTD.DEPLOYMENT_TERM: |
---|
1134 | return True |
---|
1135 | return False |
---|
1136 | |
---|
1137 | def isDeployable(self): |
---|
1138 | if (self.atomTypeID == VTD.ACTIVITY_TERM and self.subtypeID != VTD.DEPLOYMENT_TERM) or \ |
---|
1139 | self.atomTypeID == VTD.DPT_TERM or \ |
---|
1140 | self.atomTypeID == VTD.OBS_TERM: |
---|
1141 | return True |
---|
1142 | return False |
---|
1143 | |
---|
1144 | def isPublished(self): |
---|
1145 | ''' |
---|
1146 | Check state of atom doc - if published or Published return True, |
---|
1147 | otherwise return False |
---|
1148 | ''' |
---|
1149 | return self.state.isPublishedState() |
---|
1150 | |
---|
1151 | |
---|
1152 | def addCSMLData(self, csmlName, csmlContent, aggregateCoverage=False, useCSMLID=False): |
---|
1153 | ''' |
---|
1154 | Parse CSML data and add extracted info to the atom |
---|
1155 | @param csmlName: name of the csml file |
---|
1156 | @param csmlContent: content of the csml file - NB, if this is set to None and the |
---|
1157 | file, csmlName, is available locally, CsmlParser.Dataset will read in the file |
---|
1158 | directly |
---|
1159 | @keyword aggregateCoverage: if set to True, only coverage data that extends the |
---|
1160 | atom coverage data will be added |
---|
1161 | @keyword useCSMLID: if True, use the CSML doc ID as the dataset ID - NB, |
---|
1162 | this should only be True if creating a new atom - e.g. from a granulite |
---|
1163 | @return csmlDoc: the CsmlParser.Dataset object with the csml data in |
---|
1164 | ''' |
---|
1165 | logging.info("Creating CSML data model") |
---|
1166 | self.csmlFile = csmlName |
---|
1167 | self.contentFile = csmlName |
---|
1168 | content = csmlContent or csmlName |
---|
1169 | |
---|
1170 | csmlDoc = CsmlParser.Dataset(file=content) |
---|
1171 | |
---|
1172 | logging.info("Extracting info from CSML file") |
---|
1173 | logging.debug("Got dataset ID: %s" %csmlDoc.id) |
---|
1174 | if useCSMLID: |
---|
1175 | logging.debug(" - using this ID for the atom") |
---|
1176 | self.setDatasetID(VTD.GRANULE_TERM + '_' + csmlDoc.id) |
---|
1177 | |
---|
1178 | title = csmlDoc.name.CONTENT |
---|
1179 | logging.debug("Got dataset name (title): '%s'" %title) |
---|
1180 | # NB, if a title is specified (and not as the default value), it automatically is used in |
---|
1181 | # place of anything in the granulite file |
---|
1182 | if title and title != "NAME OF DATASET GOES HERE": |
---|
1183 | logging.info("Title, '%s', extracted from CSML file" %title) |
---|
1184 | if self.title: |
---|
1185 | logging.info("- NB, this will override the title specified in the granulite file ('%s')" \ |
---|
1186 | %self.title) |
---|
1187 | self.title = title |
---|
1188 | |
---|
1189 | bbox1 = csmlDoc.getBoundingBox() |
---|
1190 | bbox2 = csmlDoc.getCSMLBoundingBox() |
---|
1191 | |
---|
1192 | time = None |
---|
1193 | if bbox2: |
---|
1194 | time = bbox2.getTimeLimits() |
---|
1195 | |
---|
1196 | # now check for other parameters to add to granule |
---|
1197 | # Firstly, extract the bounding envelope |
---|
1198 | if bbox1: |
---|
1199 | w, e = utilities.normaliseLongitude(bbox1[0],bbox1[2]) |
---|
1200 | n, s = (bbox1[3], bbox1[1]) |
---|
1201 | |
---|
1202 | if not aggregateCoverage or (not self.maxY or float(n) > float(self.maxY)): |
---|
1203 | self.maxY = n |
---|
1204 | |
---|
1205 | if not aggregateCoverage or (not self.minY or float(s) < float(self.minY)): |
---|
1206 | self.minY = s |
---|
1207 | |
---|
1208 | if not aggregateCoverage or (not self.minX or float(w) < float(self.minX)): |
---|
1209 | self.minX = w |
---|
1210 | |
---|
1211 | if not aggregateCoverage or (not self.maxX or float(e) > float(self.maxX)): |
---|
1212 | self.maxX = e |
---|
1213 | |
---|
1214 | logging.debug("Got bounding box data from file: (%s, %s) , (%s, %s)" \ |
---|
1215 | %(w, s, e, n)) |
---|
1216 | |
---|
1217 | logging.debug("Updated atom bounding box data: (%s, %s) , (%s, %s)" \ |
---|
1218 | %(self.minX, self.minY, self.maxX, self.maxY)) |
---|
1219 | else: |
---|
1220 | logging.debug("No valid bounding box data found") |
---|
1221 | |
---|
1222 | if time: |
---|
1223 | t1 = utilities.formatDateYYYYMMDD(time[0]) |
---|
1224 | if not aggregateCoverage or \ |
---|
1225 | (not self.t1 or datetime.datetime.strptime(t1, YEAR_FORMAT) < \ |
---|
1226 | datetime.datetime.strptime(self.t1, YEAR_FORMAT)): |
---|
1227 | self.t1 = t1 |
---|
1228 | |
---|
1229 | t2 = time[1] |
---|
1230 | if t2 and t2 != 'None': |
---|
1231 | t2 = utilities.formatDateYYYYMMDD(t2) |
---|
1232 | if not aggregateCoverage or \ |
---|
1233 | (not self.t2 or datetime.datetime.strptime(t2, YEAR_FORMAT) > \ |
---|
1234 | datetime.datetime.strptime(self.t2, YEAR_FORMAT)): |
---|
1235 | self.t2 = t2 |
---|
1236 | |
---|
1237 | logging.debug("Got time range: %s -> %s" %(self.t1, self.t2)) |
---|
1238 | else: |
---|
1239 | logging.debug("No valid time range data found") |
---|
1240 | |
---|
1241 | #create parameter summaries: |
---|
1242 | #set up list to hold the parameters data |
---|
1243 | parameters = [] |
---|
1244 | for feature in csmlDoc.featureCollection.featureMembers: |
---|
1245 | if hasattr(feature.parameter, 'href'): |
---|
1246 | paramTriple = "" |
---|
1247 | if hasattr(feature, 'description'): |
---|
1248 | paramTriple = feature.description.CONTENT |
---|
1249 | paramTriple += " | " + feature.parameter.href |
---|
1250 | |
---|
1251 | term = "" |
---|
1252 | if hasattr(feature, 'name'): |
---|
1253 | term = feature.name.CONTENT |
---|
1254 | |
---|
1255 | paramTriple += " | " + term |
---|
1256 | |
---|
1257 | logging.debug("Got parameter info: %s" %paramTriple) |
---|
1258 | parameters.append(paramTriple) |
---|
1259 | |
---|
1260 | # update the atom with the extracted parameters |
---|
1261 | logging.info("Adding CSML parameters to granule atom") |
---|
1262 | self.addParameters(parameters) |
---|
1263 | logging.info("Finished adding CSML data") |
---|
1264 | return csmlDoc |
---|
1265 | |
---|
1266 | |
---|
1267 | def lookupAssociatedData(self, type, searchClient, lookupIndirectReferences=False): |
---|
1268 | ''' |
---|
1269 | Check through the atom links and retrieve any associated data of the |
---|
1270 | specified type |
---|
1271 | @param type: type of associated data to lookup - currently VTD.DEPLOYMENT_TERM |
---|
1272 | or VTD.DE_TERM |
---|
1273 | @param searchClient: Client implementing the AbstractSearchXMLDBClient class |
---|
1274 | @keyword lookupIndirectReferences: if True, the atom ID is used to search |
---|
1275 | defined deployments to find those which reference it, otherwise only |
---|
1276 | deployments data featured in the atom related links are processed |
---|
1277 | ''' |
---|
1278 | logging.info("Looking up %s info" %type) |
---|
1279 | self.allActivities = [] |
---|
1280 | self.allObs = [] |
---|
1281 | self.allDpts = [] |
---|
1282 | |
---|
1283 | if type != VTD.DE_TERM and type != VTD.DEPLOYMENT_TERM: |
---|
1284 | raise ValueError('Unrecognised associated data type: %s' %type) |
---|
1285 | |
---|
1286 | # avoid duplicating lookup effort |
---|
1287 | if (type == VTD.DEPLOYMENT_TERM and self.deployments) or \ |
---|
1288 | (type == VTD.DE_TERM and self.dataEntities): |
---|
1289 | logging.info("- this info has already been looked up - returning") |
---|
1290 | return |
---|
1291 | |
---|
1292 | # firstly, collect all the references to the info required |
---|
1293 | if lookupIndirectReferences: |
---|
1294 | logging.info("Looking up indirect references") |
---|
1295 | |
---|
1296 | # if we're looking up DE data for deployments data, need to have the |
---|
1297 | # deployments info looked up first |
---|
1298 | if type == VTD.DE_TERM and self.isDeployable() and not self.deployments: |
---|
1299 | self.lookupAssociatedData(VTD.DEPLOYMENT_TERM, searchClient, |
---|
1300 | lookupIndirectReferences = lookupIndirectReferences) |
---|
1301 | |
---|
1302 | logging.info("Looking up references to this atom from other %s" %type) |
---|
1303 | |
---|
1304 | # NB, if we're looking up deployments info, we only look up references |
---|
1305 | # to this atom - if we're looking up DEs, we need to look up references |
---|
1306 | # to the deployments referenced by this atom |
---|
1307 | urls = [self.atomBrowseURL] |
---|
1308 | |
---|
1309 | if type == VTD.DE_TERM and self.isDeployable(): |
---|
1310 | urls = [] |
---|
1311 | for dep in self.deployments: |
---|
1312 | urls.append(dep.browseURL) |
---|
1313 | |
---|
1314 | links = [] |
---|
1315 | for url in urls: |
---|
1316 | doc = searchClient.getNDGDoc(type, ndgObject.ASSOCIATED_ATOM_DOC_TYPE, url, |
---|
1317 | targetCollection = dc.ATOM_COLLECTION_PATH) |
---|
1318 | # now need to turn this results set into actual atoms |
---|
1319 | tree = ET.fromstring(doc) |
---|
1320 | for atom in tree: |
---|
1321 | logging.debug("- found reference in %s" %type) |
---|
1322 | links.append(ET.tostring(atom)) |
---|
1323 | |
---|
1324 | logging.info("Finished looking up indirect references") |
---|
1325 | else: |
---|
1326 | links = self.getLinksOfType(self.VTD.DEPLOYMENT_TERM) |
---|
1327 | |
---|
1328 | # now retrieve the references and extract the required data |
---|
1329 | logging.info("Retrieving info from %s references" %type) |
---|
1330 | if type == VTD.DEPLOYMENT_TERM: |
---|
1331 | logging.info("Extracting links data to deployment entitites") |
---|
1332 | self.deployments = [] |
---|
1333 | for link in links: |
---|
1334 | if lookupIndirectReferences: |
---|
1335 | deploymentAtom = link |
---|
1336 | else: |
---|
1337 | localID = link.href.split("__ATOM__")[-1] |
---|
1338 | deploymentAtom = searchClient.getNDGDoc('', |
---|
1339 | 'ATOM', localID, |
---|
1340 | targetCollection = dc.ATOM_COLLECTION_PATH) |
---|
1341 | |
---|
1342 | deployment = Deployment.Deployment(Atom(xmlString=str(deploymentAtom))) |
---|
1343 | self.deployments.append(deployment) |
---|
1344 | |
---|
1345 | self.addUniqueLinks(self.allActivities, deployment.activities) |
---|
1346 | self.addUniqueLinks(self.allObs, deployment.obs) |
---|
1347 | self.addUniqueLinks(self.allDpts, deployment.dpts) |
---|
1348 | else: |
---|
1349 | # for DE data, just store the title + link in a Link object |
---|
1350 | self.dataEntities = [] |
---|
1351 | logging.info("Extracting links data to data entitites") |
---|
1352 | for data in links: |
---|
1353 | atom = Atom(xmlString=str(data)) |
---|
1354 | link = Link() |
---|
1355 | link.title = atom.title |
---|
1356 | link.href = atom.atomBrowseURL |
---|
1357 | link.rel = atom.datasetID |
---|
1358 | |
---|
1359 | # NB, different deployments may be used by the same DE - so |
---|
1360 | # avoid duplication |
---|
1361 | self.addUniqueLinks(self.dataEntities, link) |
---|
1362 | |
---|
1363 | logging.info("Finished looking up %s info" %type) |
---|
1364 | |
---|
1365 | |
---|
1366 | def addUniqueLinks(self, dataArray, links): |
---|
1367 | ''' |
---|
1368 | Add links to specified array - if they are not already included |
---|
1369 | @param dataArray: a list, potentially arlready containing links |
---|
1370 | @param links: a Link or array of Links to add to the dataArray |
---|
1371 | ''' |
---|
1372 | logging.debug("Adding new links") |
---|
1373 | if not links: |
---|
1374 | return |
---|
1375 | |
---|
1376 | if type(links) is not list: |
---|
1377 | links = [links] |
---|
1378 | |
---|
1379 | for link in links: |
---|
1380 | if type(link) is not Link: |
---|
1381 | logging.warning("Link is not of 'Link' object type (type='%s') - skipping" %type(link)) |
---|
1382 | continue |
---|
1383 | if link not in dataArray: |
---|
1384 | logging.debug("- adding unique link") |
---|
1385 | dataArray.append(link) |
---|
1386 | logging.debug("Finished adding links") |
---|
1387 | |
---|
1388 | |
---|
1389 | def getFullPath(self): |
---|
1390 | ''' |
---|
1391 | Return full path to atom in eXist, if it exists, or None, otherwise |
---|
1392 | @return fullPath: string - collection + filename of atom in eXist |
---|
1393 | ''' |
---|
1394 | # NB, name assigned when atom created in eXist - so if not defined, not |
---|
1395 | # in eXist |
---|
1396 | logging.debug("Getting full path to atom") |
---|
1397 | if self.atomName: |
---|
1398 | logging.debug("Return full path to atom in eXist") |
---|
1399 | return self.getDefaultCollectionPath() + self.atomName |
---|
1400 | logging.debug("Atom doesn't currently exist in eXist - return 'None'") |
---|
1401 | return None |
---|
1402 | |
---|
1403 | |
---|
1404 | def getSubTypePrettyName(self): |
---|
1405 | ''' |
---|
1406 | Return the subtype of the atom in a human readable form |
---|
1407 | @return: sub type of atom as a verbose string |
---|
1408 | ''' |
---|
1409 | logging.debug("Getting human readable version of atom subtype") |
---|
1410 | subType = self.SUB_TYPE_NOT_DEFINED_NAME |
---|
1411 | if self.subtypeID: |
---|
1412 | subType = self.VTD.tidySubTypeTitle(self.subtypeID) |
---|
1413 | |
---|
1414 | logging.debug("- returning subtype: '%s'" %subType) |
---|
1415 | return subType |
---|