Changeset 5192


Ignore:
Timestamp:
15/04/09 09:19:32 (10 years ago)
Author:
cbyrom
Message:

Escape/unescape special XML characters on export/import of data from
the Category and Link data models, to simplify processing + ensure
things are escaped when running xml validation + simplify handling
of lists/single values when setting atom attributes + fix handling
of chunked query execution + tidy up various imports and update
tests.

Location:
ndgCommon/trunk/ndg/common
Files:
9 edited

Legend:

Unmodified
Added
Removed
  • ndgCommon/trunk/ndg/common/src/clients/xmldb/eXist/atomclient.py

    r5184 r5192  
    119119            if atom: 
    120120                logging.info("Creating temporary file in eXist to do validation against") 
    121                 fileName = atom.datasetID + str(datetime.datetime.today().microsecond) 
     121                fileName = atom.datasetID + '.' + str(datetime.datetime.today().microsecond) 
    122122                self.createDoc(atom.toPrettyXML(), 
    123123                               atom.getDefaultCollectionPath(),  
  • ndgCommon/trunk/ndg/common/src/clients/xmldb/eXist/feedclient.py

    r5098 r5192  
    55 @author: C Byrom - Tessella 09 
    66''' 
    7 import logging, httplib, urllib2, os 
     7import logging, httplib 
    88import feedparser 
    99from ndg.common.src.models.vocabtermdata import VocabTermData as VTD 
  • ndgCommon/trunk/ndg/common/src/clients/xmldb/eXist/xmlrpcclient.py

    r5010 r5192  
    102102            while result is None: 
    103103                try: 
    104                     result = self.__executeChunkedQuery(xquery, offset,  
    105                                                         noResults, params) 
     104                    return self.__executeChunkedQuery(xquery, offset,  
     105                                                      noResults, params) 
    106106                except xmlrpclib.Fault, f: 
    107107                    if attempt < 3: 
  • ndgCommon/trunk/ndg/common/src/lib/atomvalidator.py

    r5170 r5192  
    1111 @author: C Byrom, Tessella Nov 2008 
    1212''' 
    13 import logging, traceback, datetime, xmlrpclib, socket, urllib 
     13import logging, traceback, datetime, xmlrpclib, socket 
     14from xml.sax.saxutils import unescape 
    1415from ndg.common.src.clients.xmldb.eXist.atomclient import AtomClient 
    1516from ndg.common.src.models.vocabtermdata import isValidTermURI 
     
    276277            if link.hasValue(): 
    277278                try: 
    278                     # unquote the link, just in case it has been encoded 
    279                     url = urllib.unquote(link.href) 
     279                    # unescape the link, just in case it has been encoded 
     280                    url = unescape(link.href) 
    280281                    # don't lookup link, if it has already been validated before 
    281282                    if url in self._validLinks: 
     
    345346        try: 
    346347            errors = self._atomClient.checkAtomSchemaCompliance(atomPath, atom = self._atom, 
    347                                                            isDebug = self._isDebug) 
     348                                                                isDebug = self._isDebug) 
    348349            for error in errors: 
    349350                self.__addError(self.SCHEMA_VALIDATION_FAILURE, error) 
  • ndgCommon/trunk/ndg/common/src/lib/granulite.py

    r5185 r5192  
    294294        if self.RELATED_LINKS_SECTION in granulite_data: 
    295295            self._atom.setAttribute('relatedLinks', \ 
    296                                     granulite_data[self.RELATED_LINKS_SECTION]) 
     296                                    granulite_data[self.RELATED_LINKS_SECTION], 
     297                                    escapeSpecials = False) 
    297298 
    298299        # now add the arrays data 
  • ndgCommon/trunk/ndg/common/src/lib/utilities.py

    r5143 r5192  
    517517    # avoid processing the url, if it has been set 
    518518    # - NB, special characters are escaped by getTripleData 
    519     data = getTripleData(params_string) 
     519    data = getTripleData(params_string, doEscape=False) 
    520520    newData = [] 
    521521    for r in [data[0], data[2]]: 
  • ndgCommon/trunk/ndg/common/src/models/Atom.py

    r5177 r5192  
    44 @author: C Byrom, Tessella Jun 2008 
    55''' 
    6 import sys, logging, re, datetime, urllib 
     6import sys, logging, re, datetime 
     7from xml.sax.saxutils import escape, unescape 
    78from xml.etree import cElementTree as ET 
    89import csml.parser as CsmlParser 
     
    128129 
    129130    def fromString(self, linkString): 
    130         (self.href, self.title, self.rel) = utilities.getTripleData(linkString) 
     131        (self.href, self.title, self.rel) = utilities.getTripleData(linkString, doEscape=False) 
     132        # ensure no funny characters are included on data ingest 
     133        self.title = utilities.escapeSpecialCharacters(self.title) 
    131134         
    132135    def fromETElement(self, linkTag): 
     136        # remove any url quoting when reading in from XML - to avoid need for 
     137        # correction on display 
    133138        self.href = linkTag.attrib.get('href') or "" 
     139        if self.href: 
     140            self.href = unescape(self.href) 
    134141        self.rel = linkTag.attrib.get('rel') or "" 
     142        if self.rel: 
     143            self.rel = unescape(self.rel) 
    135144        self.title = linkTag.attrib.get('title') or "" 
    136145 
    137146    def toXML(self): 
     147        # ensure the xml element doesn't contain things like '&' - which will 
     148        # cause problems when running xqueries 
    138149        link = ET.Element("link") 
    139         link.attrib["href"] = self.href 
     150        link.attrib["href"] = escape(self.href) 
    140151        link.attrib["title"] = self.title 
    141         link.attrib["rel"] = self.rel 
     152        link.attrib["rel"] = escape(self.rel) 
    142153        return link 
    143154 
     
    212223    def fromETElement(self, linkTag): 
    213224        self.term = linkTag.attrib.get('term') or "" 
     225        if self.term: 
     226            self.term = unescape(self.term) 
    214227        self.label = linkTag.attrib.get('label') or "" 
     228        if self.label: 
     229            self.label = unescape(self.label) 
    215230        self.scheme = linkTag.attrib.get('scheme') or "" 
     231        if self.scheme: 
     232            self.scheme = unescape(self.scheme) 
    216233 
    217234    def toXML(self): 
    218235        link = ET.Element("category") 
    219         link.attrib["term"] = self.term 
    220         link.attrib["scheme"] = self.scheme 
    221         link.attrib["label"] = self.label 
     236        link.attrib["term"] = escape(self.term) 
     237        link.attrib["scheme"] = escape(self.scheme) 
     238        link.attrib["label"] = escape(self.label) 
    222239        return link 
    223240     
     
    379396         
    380397        newLinks.extend(links) 
    381         for link in newLinks: 
    382            link.href = urllib.quote(link.href, '%') 
    383398        self.relatedLinks = newLinks 
    384399        logging.debug("Online references added") 
     
    580595            category = Category() 
    581596            # NB, data already tidied up here, so set keyword to avoid this happening again 
    582             category.fromString(param, escapeSpecialCharacters=True) 
     597            category.fromString(param, escapeSpecialCharacters=False) 
    583598 
    584599            # now check for uniqueness 
     
    969984 
    970985         
    971     def setAttribute(self, attributeName, attributeValue): 
     986    def setAttribute(self, attributeName, attributeValue, escapeSpecials = True): 
    972987        ''' 
    973988        Set the value of an atom attribute - and do some basic tidying up of the string content 
     
    975990        @param attributeName: name of the attribute whose value to set 
    976991        @param attributeValue: value to set the attribute to   
     992        @keyword escapeSpecials: if true, escape any special characters in the attribute 
     993        content.  Default = True 
    977994        ''' 
    978995        logging.debug("Setting attribute, %s, to %s" %(attributeName, attributeValue)) 
     
    981998        # escape any special characters if a value has been specified 
    982999        # NB, need to cope with both single values and arrays 
     1000        isList = True 
    9831001        if attributeValue: 
    984             if type(attributeValue) is list: 
    985                 newVals = [] 
    986                 for val in attributeValue: 
    987                     newVals.append(self.objectify(utilities.escapeSpecialCharacters(val), attributeName)) 
    988                 attributeValue = newVals 
    989                      
    990             else: 
    991                 attributeValue = self.objectify(utilities.escapeSpecialCharacters(attributeValue), attributeName) 
     1002            if not isinstance(attributeValue, list): 
     1003                attributeValue = [attributeValue] 
     1004                isList = False 
     1005                 
     1006            newVals = [] 
     1007            for val in attributeValue: 
     1008                data = val 
     1009                if escapeSpecials: 
     1010                    utilities.escapeSpecialCharacters(val) 
     1011                newVals.append(self.objectify(data, attributeName)) 
     1012            attributeValue = newVals 
    9921013 
    9931014        # handle the special case of authors; only one author is allowed per atom 
     
    9981019                setattr(self, "contributors", attributeValue[1:]) 
    9991020        elif attributeName == "atomAuthors": 
    1000             if isinstance(attributeValue, list): 
    1001                 for val in attributeValue: 
    1002                     self.ME.responsibleParties.append(val) 
    1003             else: 
    1004                 self.ME.responsibleParties.append(attributeValue) 
     1021            self.ME.responsibleParties.extend(attributeValue) 
    10051022        elif attributeName == "files": 
    10061023            self.addUniqueRelatedLinks(attributeValue) 
    10071024        else: 
     1025            if not isList: 
     1026                attributeValue = attributeValue[0] 
    10081027            setattr(self, attributeName, attributeValue) 
    10091028 
  • ndgCommon/trunk/ndg/common/src/models/vocabtermdata.py

    r5180 r5192  
    55 @author: C Byrom, Tessella Jul 2008 
    66''' 
    7 import logging, time, re, urllib 
     7import logging, time, re 
    88from ndg.common.src.lib.utilities import openURLWithDefaultProxy 
    99     
  • ndgCommon/trunk/ndg/common/unittests/clients/xmldb/eXist/testatomclient.py

    r5012 r5192  
    1313from ndg.common.src.models.vocabtermdata import VocabTermData as VTD 
    1414import ndg.common.src.lib.utilities as utils 
     15from ndg.common.src.models.myconfig import myConfig 
    1516 
    1617class TestCase(unittest.TestCase):  
     
    2425                                 setUpDB = True) 
    2526        self.createdAtoms = []    # array to store paths to atoms created - in order to then delete them 
     27        self.cf = myConfig(tc.NDG_CONFIG_FILE) 
    2628        self.utils = tu(tc.EXIST_DBCONFIG_FILE) 
    2729         
     
    160162        createdAtom.state = AtomState.PUBLISHED_STATE 
    161163        providerCollection = dc.PROVIDER_FEED_PATH + createdAtom.ME.providerID 
    162         feed = self.client.feedClient.getAtomFeed(providerCollection) 
     164        feed = self.client.feedClient.getAtomFeed(providerCollection, proxyServer = self.cf.get('DEFAULT','proxyServer')) 
    163165         
    164166        if self.client.isNewCollection(providerCollection): 
Note: See TracChangeset for help on using the changeset viewer.