source: nappy/trunk/nappy/nc_interface/na_content_collector.py @ 3469

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/nappy/trunk/nappy/nc_interface/na_content_collector.py@3469
Revision 3469, 26.1 KB checked in by astephen, 12 years ago (diff)

Some temporary scripts to test functionality before formalising into unit tests.

Line 
1#   Copyright (C) 2004 CCLRC & NERC( Natural Environment Research Council ).
2#   This software may be distributed under the terms of the
3#   Q Public License, version 1.0 or later. http://ndg.nerc.ac.uk/public_docs/QPublic_license.txt
4
5"""
6cdms_to_na.py
7=============
8
9Holds the class CDMSToNA that converts a set of CDMS variables and global attributes.
10
11"""
12
13# Imports from python standard library
14import sys
15import time
16import re
17
18# Import from nappy package
19from nappy.na_error import na_error
20import nappy.utils
21import nappy.cdms_utils.axis_utils
22import nappy.cdms_utils.var_utils
23import nappy.utils.common_utils
24import nappy.na_file.na_core
25
26config_dict = nappy.utils.getConfigDict()
27nc_to_na_map = config_dict["nc_to_na_map"]
28version = nappy.utils.getVersion()
29
30# Import external packages (if available)
31if sys.platform.find("win") > -1:
32    raise na_error.NAPlatformError("Windows does not support CDMS. CDMS is required to convert to CDMS objects and NetCDF.")
33try:
34    import cdms, Numeric
35except:
36    raise Exception("Could not import third-party software. Nappy requires the CDMS and Numeric packages to be installed to convert to CDMS and NetCDF.")
37
38cdms.setAutoBounds("off") 
39
40DEBUG = nappy.utils.getDebug() 
41
42class NAContentCollector(nappy.na_file.na_core.NACore):
43    """
44    Class to build a NASA Ames File object from a set of
45    CDMS variables and global attributes (optional).
46    """
47   
48    def __init__(self, variables, global_attributes={}, requested_ffi=None):
49        """
50        Sets up instance variables and calls appropriate methods to
51        generate sections of NASA Ames file object.
52
53        Input arguments are:
54          * variables - list/tuple of actual CDMS variables
55          * global_attributes - dictionary of user-defined globals to include.
56
57        Typical usage:
58        >>> x = NAContentCollector(["temp", "precip"])
59        >>> x.collectNAContent()
60        >>> if x.found_na == True:
61        ...     print x.na_dict, x.var_ids, x.unused_vars
62        """
63        self.output_message = []
64        self.na_dict = {}
65        self.vars = variables
66
67        # Note that self.var_ids will be a list containing:
68        #    [ordered_vars,  auxiliary_vars,   rank_zero_vars]
69        self.var_ids = None
70        self.globals = global_attributes       
71        self.requested_ffi = requested_ffi
72        self.rank_zero_vars = []
73        self.rank_zero_var_ids = []
74
75        # Create a flag to check if anything found
76        self.found_na = False
77
78
79    def collectNAContent(self):
80        """
81        Collect NASA Ames content. Save the contents to the following instance
82        attributes:
83         * self.na_dict
84         * self.var_ids
85         * self.unused_vars
86        """
87        (self.ordered_vars, aux_vars) = self._analyseVariables()
88     
89        if self.ordered_vars == []:
90            print "WARNING: No NASA Ames content created."
91            self.unused_vars = []
92        else:
93            self.var_ids = [[var.id for var in self.ordered_vars],
94                            [var.id for var in aux_vars], 
95                            self.rank_zero_var_ids]
96            self.na_dict["NLHEAD"] = "-999"
97            self._defineNAVars(self.ordered_vars)
98            self._defineNAaux_vars(aux_vars)
99            self._defineNAGlobals()
100            self._defineNAComments()
101            self._defineGeneralHeader()
102            # Quick fudge to cope with 1001 issue
103            if self.na_dict["FFI"] == 1001: 
104                self.na_dict["X"] = self.na_dict["X"][0]
105
106            self.found_na = True
107
108
109    def _analyseVariables(self):
110        """
111        Method to examine the content of CDMS variables to return
112        a tuple of two lists containing variables and auxiliary variables
113        for the NASA Ames file object.
114        Variables not compatible with the first file are put in self.unused_vars
115        """
116        self.unused_vars = []
117        ffis_limited = False
118
119        highest_rank = -1
120        best_var = None
121        count = 0
122
123        # Need to get highest ranked variable (most dimensions) so that we can work out FFI
124        for var in self.vars:
125            msg = "Analysing: %s" % var.id
126            print msg
127            self.output_message.append(msg)
128            count = count + 1
129            # get rank
130            rank = var.rank()
131
132            # Deal with singleton variables
133            if rank == 0: 
134                self.rank_zero_vars.append(var)
135                self.rank_zero_var_ids.append(var.id)
136                continue
137           
138            # Update highest if highest found or if equals highest with bigger size
139            if rank > highest_rank or (rank == highest_rank and var.size() > best_var.size()):
140                highest_rank = rank
141                best_var = var
142                best_var_index = count
143
144        # If all are zero ranked variables or no vars identified/found then we cannot write any to NASA Ames and return ([], [])
145        if len(self.rank_zero_vars) == len(self.vars) or best_var is None: 
146            return ([], [])
147
148        # Now start to sort the variables into main and auxiliary
149        vars_for_na = [best_var]
150        aux_vars_for_na = []
151        shape = best_var.shape
152        number_of_dims = len(shape)
153        self.na_dict["NIV"] = number_of_dims
154
155        # Get the axes for the main variable being used
156        best_var_axes = best_var.getAxisList()
157       
158        # Get other variable info
159        rest_of_the_vars = self.vars[:best_var_index - 1] + self.vars[best_var_index:]
160
161        for var in rest_of_the_vars:
162
163            if var.id in self.rank_zero_var_ids: continue
164
165            # What to do with variables that have different number of dimensions or different shape
166            if len(var.shape) != number_of_dims or var.shape != shape: 
167                # Could it be an auxiliary variable?
168                if len(var.shape) != 1: 
169                    self.unused_vars.append(var)
170                    continue
171
172                first_axis = var.getAxis(0)
173                # Check if axis is identical to first axis of main best variable, if so, can be auxiliary var
174                if nappy.cdms_utils.axis_utils.areAxesIdentical(best_var_axes[0], first_axis) == False: 
175                    # If not identical, then it might still qualify as an auxiliary every n time points - valid for 1020
176                    if len(var.shape) == 1 and nappy.cdms_utils.axis_utils.isAxisRegularlySpacedSubsetOf(first_axis, best_var_axes[0]) == True:
177                        ffis_limited = [1020]
178                    else:
179                        self.unused_vars.append(var)
180                        continue
181
182                # I think it is an auxiliary variable
183                aux_vars_for_na.append(var) 
184
185            else:
186                this_var_axes = var.getAxisList()
187
188                # Loop through dimensions
189                for i in range(number_of_dims):           
190                    if nappy.cdms_utils.axis_utils.areAxesIdentical(best_var_axes[i], this_var_axes[i]) == False:
191                        self.unused_vars.append(var)
192                        continue
193
194                # OK, I think the current variable is compatible to write with the best variable along with a NASA Ames file
195                vars_for_na.append(var)
196               
197        # Send vars_for_na AND aux_vars_for_na to a method to check if they have previously been mapped
198        # from NASA Ames. In which case we'll write them back in the order they were initially read from the input file.
199        (vars_for_na, aux_vars_for_na) = self._reorderVarsIfPreviouslyNA(vars_for_na, aux_vars_for_na)
200
201        # Get the FFI
202        self.na_dict["FFI"] = self._decideFileFormatIndex(number_of_dims, aux_vars_for_na, ffis_limited)
203        return (vars_for_na, aux_vars_for_na)
204
205
206    def _reorderVarsIfPreviouslyNA(self, vars_for_na, aux_vars_for_na):
207        """
208        Re-order if they previously came from NASA Ames files (i.e. including the
209        attribute 'nasa_ames_var_number'). Return re-ordered or unchanged pair of
210        (vars_for_na, aux_vars_for_na).
211        """
212        # THIS SHOULD REALLY BE DONE IN A LOOP
213        # First do the main variables
214        ordered_vars = [None] * 1000 # Make a long list to put vars in
215        # Create a list of other variables to collect up any that are not labelled as nasa ames variables
216        other_vars = []
217        for var in vars_for_na:
218            if hasattr(var, "nasa_ames_var_number"):
219                ordered_vars[var.nasa_ames_var_number[0]] = var
220            else:
221                other_vars.append(var)
222
223        # Remake vars_for_na now in new order and clean out any that are "None"
224        vars_for_na = []
225        for var in ordered_vars:
226            if var != None: 
227                vars_for_na.append(var)
228
229        vars_for_na = vars_for_na + other_vars
230
231        # Now re-order the Auxiliary variables if they previously came from NASA
232        ordered_aux_vars = [None] * 1000
233        other_aux_vars = []
234
235        for var in aux_vars_for_na:
236            if hasattr(var, "nasa_ames_aux_var_number"):
237                ordered_aux_vars[var.nasa_ames_aux_var_number[0]] = var
238            else:
239                other_aux_vars.append(var)
240
241        # Remake aux_vars_for_na now in order
242        aux_vars_for_na = []
243        for var in ordered_aux_vars:
244            if var != None: 
245                aux_vars_for_na.append(var)
246
247        aux_vars_for_na = aux_vars_for_na + other_aux_vars     
248        return (vars_for_na, aux_vars_for_na)
249
250
251    def _decideFileFormatIndex(self, number_of_dims, aux_vars_for_na, ffis_limited=False):
252        """
253        Based on the number of dimensions and the NASA Ames dictionary return
254        the File Format Index.
255        If there is a choice then make the most sensible selection.
256        If the user has specified a 'requested_ffi' then try and deliver
257        that. Raise an error if not possible.
258        """
259        # If ffis_limited is set then must use one of those
260        if self.requested_ffi and ffis_limited:
261            if self.requested_ffi not in ffis_limited:
262                raise Exception("Cannot write this data to FFI '" + str(self.requested_ffi) + "', can only write to: " + str(ffis_limited) + ".")
263            else:
264                return self.requested_ffi
265
266        # Base the sub-selection on number of dimensions
267        if number_of_dims > 1:
268            ffi = 10 + (number_of_dims * 1000)
269        elif number_of_dims > 4:
270            raise Exception("Cannot write variables defined against greater than 4 axes in NASA Ames format.")
271        else:
272            if len(aux_vars_for_na) > 0 or (self.na_dict.has_key("NAUXV") and self.na_dict["NAUXV"] > 0):
273                ffi = 1010
274            else:
275                ffi = 1001
276
277        if self.requested_ffi and ffi != self.requested_ffi:
278            raise Exception("Cannot write this data to FFI '" + str(self.requested_ffi) + "', can only write to: " + str(ffi) + ".")
279        return ffi
280
281
282    def _defineNAVars(self, vars):
283        """
284        Method to define NASA Ames file object variables and their
285        associated metadata.
286        """
287        self.na_dict["NV"] = len(vars)
288        self.na_dict["VNAME"] = []
289        self.na_dict["VMISS"] = []
290        self.na_dict["VSCAL"] = []
291        self.na_dict["V"] = []
292
293        for var in vars:
294            name = nappy.cdms_utils.var_utils.getBestName(var)
295            self.na_dict["VNAME"].append(name)
296            miss = nappy.cdms_utils.var_utils.getMissingValue(var)
297            if type(miss) not in (type(1.2), type(1), type(1L)): 
298                miss = miss[0]
299            self.na_dict["VMISS"].append(miss)
300            self.na_dict["VSCAL"].append(1)
301            # Populate the variable list with the array
302            self.na_dict["V"].append(var._data)
303
304            if not self.na_dict.has_key("X"):
305                self.na_dict["NXDEF"] = []
306                self.na_dict["NX"] = []
307
308                # Create independent variable information
309                self.ax0 = var.getAxis(0)
310                self.na_dict["X"] = [self.ax0[:].tolist()]
311                print self.na_dict["X"]
312                self.na_dict["XNAME"] = [nappy.cdms_utils.var_utils.getBestName(self.ax0)]
313                if len(self.ax0) == 1:
314                    self.na_dict["DX"] = [0]
315                else:
316                    incr = self.ax0[1] - self.ax0[0]
317                    # Set default increment as gap between first two
318                    self.na_dict["DX"] = [incr]
319                    # Now overwrite it as zero if non-uniform interval in axis
320                    for i in range(1, len(self.ax0)):
321                        if (self.ax0[i] - self.ax0[i - 1]) != incr:
322                            self.na_dict["DX"] = [0]
323                            break
324
325                # Now add the rest of the axes to the self.na_dict objects
326                for axis in var.getAxisList()[1:]:
327                    self._appendAxisDefinition(axis)
328
329
330    def _defineNAaux_vars(self, aux_vars):
331        """
332        Method to define NASA Ames file object auxiliary variables and their
333        associated metadata.
334        """
335        self.na_dict["NAUXV"] = len(aux_vars)
336        self.na_dict["ANAME"] = []
337        self.na_dict["AMISS"] = []
338        self.na_dict["ASCAL"] = []
339        self.na_dict["A"] = []
340
341        for var in aux_vars:
342            name = nappy.cdms_utils.var_utils.getBestName(var)
343            self.na_dict["ANAME"].append(name)
344            miss = nappy.cdms_utils.var_utils.getMissingValue(var)
345            if type(miss) != type(1.1):  miss = miss[0]
346            self.na_dict["AMISS"].append(miss)
347            self.na_dict["ASCAL"].append(1)
348            # Populate the variable list with the array
349            self.na_dict["A"].append(var._data)
350
351    def _appendAxisDefinition(self, axis):
352        """
353        Method to create the appropriate NASA Ames file object
354        items associated with an axis (independent variable in
355        NASA Ames). It appends to the various self.na_dict containers.
356        """
357        length = len(axis)
358
359        self.na_dict["NX"].append(length)
360        self.na_dict["XNAME"].append(nappy.cdms_utils.var_utils.getBestName(axis))
361        # If only one item in axis values
362        if length < 2:
363            self.na_dict["DX"].append(0)
364            self.na_dict["NXDEF"].append(length)
365            self.na_dict["X"].append(axis[:].tolist())       
366            return
367   
368        incr = axis[1] - axis[0]
369        for i in range(1, length):
370            if (axis[i] - axis[i - 1]) != incr:
371                self.na_dict["DX"].append(0)
372                self.na_dict["NXDEF"].append(length)
373                self.na_dict["X"].append(axis.tolist())
374                break
375        else: # If did not break out of the loop
376            max_length = length
377            if length > 3: 
378                max_length = 3
379            self.na_dict["DX"].append(incr)
380            self.na_dict["NXDEF"].append(max_length)
381            self.na_dict["X"].append(axis[:max_length])
382
383    def _defineNAGlobals(self):
384        """
385        Maps CDMS (NetCDF) global attributes into NASA Ames Header fields.
386        """
387        # Check if we should add to it with locally set rules
388        local_attributes = nappy.utils.getConfigDict()["local_attributes"]
389        for att, value in local_attributes.items():
390            if not nc_to_na_map.has_key(att):
391                nc_to_na_map[key] = value
392
393        self.extra_comments = [[],[],[]]  # Normal comments, special comments, other comments
394        convention_or_reference_comments = []
395
396        for key in self.globals.keys():
397            if key != "first_valid_date_of_data" and type(self.globals[key]) \
398                                       not in (type("s"), type(1.1), type(1)):
399                continue
400
401            # Loop through keys of header/comment items to map
402            if key in nc_to_na_map.keys():
403                if key == "history":
404                    time_string = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(time.time()))
405                    history = "History:\t%s - Converted to NASA Ames format using nappy-%s.\n\t%s" % \
406                                                 (time_string, version, self.globals[key])
407                    history = history.split("\n") 
408                    self.history = []
409                    for h in history:
410                        if h[:8] != "History:" and h[:1] != "\t": 
411                            h = "\t" + h
412                        self.history.append(h) 
413                   
414                elif key == "institution":
415                    # If fields came from NA then extract appropriate fields.
416                    match = re.match(r"(.*)\s+\(ONAME from NASA Ames file\);\s+(.*)\s+\(ORG from NASA Ames file\)\.", 
417                             self.globals[key])
418                    if match:
419                        self.na_dict["ONAME"] = match.groups()[0]
420                        self.na_dict["ORG"] = match.groups()[1]
421                    else:
422                        self.na_dict["ONAME"] = self.globals[key]
423                        self.na_dict["ORG"] = self.globals[key]           
424                    # NOTE: should probably do the following search and replace on all string lines
425                    self.na_dict["ONAME"] = self.na_dict["ONAME"].replace("\n", "  ")
426                    self.na_dict["ORG"] = self.na_dict["ORG"].replace("\n", "  ")
427                                   
428                elif key == "comment":
429                    # Need to work out if they are actually comments from NASA Ames in the first place
430                    comment_lines = self.globals[key].split("\n")
431                    normal_comments = []
432                    normal_comm_flag = None
433                    special_comments = []
434                    special_comm_flag = None
435
436                    for line in comment_lines:
437                        if line.find("###NASA Ames Special Comments follow###") > -1:
438                            special_comm_flag = 1
439                        elif line.find("###NASA Ames Special Comments end###") > -1:
440                            special_comm_flag = None
441                        elif line.find("###NASA Ames Normal Comments follow###") > -1:
442                            normal_comm_flag = 1
443                        elif line.find("###NASA Ames Normal Comments end###") > -1:
444                            normal_comm_flag = None     
445                        elif special_comm_flag == 1:
446                            special_comments.append(line)
447                        elif normal_comm_flag == 1:
448                            normal_comments.append(line)
449                        elif line.find("###Data Section begins on the next line###") > -1:
450                            pass
451                        else:
452                            normal_comments.append(line)           
453                   
454                    self.extra_comments = [special_comments, normal_comments, []]                   
455                                   
456                elif key == "first_valid_date_of_data":
457                    self.na_dict["DATE"] = self.globals[key]
458               
459                elif key in ("Conventions", "references"):
460                    #convention_or_reference_comments.append("%s:   %s" % (key, self.globals[key]))
461                    self.extra_comments[2].append("%s:   %s" % (key, self.globals[key]))
462                else:
463                    self.na_dict[nc_to_na_map[key]] = self.globals[key]
464            else:
465                self.extra_comments[2].append("%s:   %s" % (key, self.globals[key]))
466        return
467
468
469    def _defineNAComments(self, normal_comments=[], special_comments=[]):
470        """
471        Defines the Special and Normal comments sections in the NASA Ames file
472        object - including information gathered from the defineNAGlobals method.
473        """
474       
475        if hasattr(self, "ncom"):  normal_comments = self.ncom + normal_comments
476
477        NCOM = []
478        for ncom in normal_comments:
479            NCOM.append(ncom)
480
481        if len(NCOM) > 0:   NCOM.append("")
482       
483        if len(self.extra_comments[2]) > 0:
484            for excom in self.extra_comments[2]:
485                NCOM.append(excom)
486       
487        if len(self.extra_comments[1]) > 0: 
488            NCOM.append("Additional Global Attributes defined in the source file and not translated elsewhere:")
489            for excom in self.extra_comments[1]:
490                NCOM.append(excom)
491
492        if hasattr(self, "history"):
493            for h in self.history:
494                NCOM.append(h)
495       
496        if len(NCOM) > 0:
497            NCOM.insert(0, "###NASA Ames Normal Comments follow###")
498            NCOM.append("")
499            NCOM.append("###NASA Ames Normal Comments end###")
500            NCOM.append("###Data Section begins on the next line###")
501
502        spec_comm_flag = None
503        SCOM = []
504        special_comments = self.extra_comments[0]
505        if len(special_comments) > 0: 
506            SCOM = ["###NASA Ames Special Comments follow###"]
507
508            spec_comm_flag = 1
509        for scom in special_comments:
510            SCOM.append(scom)
511
512        used_var_atts = ("id",  "missing_value", "fill_value", "units", 
513                   "nasa_ames_var_number", "nasa_ames_aux_var_number")
514        var_comm_flag = None
515
516        # Create a string for the Special comments to hold rank-zero vars
517        rank_zero_vars_string = []
518
519        for var in self.rank_zero_vars:
520            rank_zero_vars_string.append("\tVariable %s: %s" % (var.id, nappy.cdms_utils.var_utils.getBestName(var)))
521
522            for att in var.attributes.keys():
523                value = var.attributes[att]
524
525                if type(value) in (type("s"), type(1.0), type(1)):
526
527                    rank_zero_vars_string.append("\t\t%s = %s" % (att, var.attributes[att]))
528
529        if len(rank_zero_vars_string) > 0:
530            rank_zero_vars_string.insert(0, "###Singleton Variables defined in the source file follow###")
531            rank_zero_vars_string.append("###Singleton Variables defined in the source file end###")
532
533        for var in self.ordered_vars:
534            varflag = "unused"
535            name = nappy.cdms_utils.var_utils.getBestName(var)
536
537            for scom,value in var.attributes.items():
538                if type(value) in (type([]), type(Numeric.array([0]))) and len(value) == 1:
539                    value = value[0]
540
541                if type(value) in (type("s"), type(1.1), type(1)) and scom not in used_var_atts:
542                    if varflag == "unused":
543                        if var_comm_flag == None:
544                            var_comm_flag = 1
545
546                    if spec_comm_flag == None:
547                        SCOM = ["###NASA Ames Special Comments follow###"] + rank_zero_vars_string
548                        SCOM.append("Additional Variable Attributes defined in the source file and not translated elsewhere:")
549                        SCOM.append("###Variable attributes from source (NetCDF) file follow###")
550                        varflag = "using" 
551
552                    SCOM.append("\tVariable %s: %s" % (var.id, name))
553                    SCOM.append("\t\t%s = %s" % (scom, value))
554
555        if var_comm_flag == 1: 
556            SCOM.append("###Variable attributes from source (NetCDF) file end###")
557        if spec_comm_flag == 1:
558            SCOM.append("###NASA Ames Special Comments end###")
559
560        # Strip out empty lines (or returns)
561        NCOM_cleaned = []
562        SCOM_cleaned = []
563
564        for c in NCOM:
565            if c.strip() not in ("", " ", "  "):
566                # Replace new lines within one attribute with a newline and tab so easier to read
567                lines = c.split("\n")
568                for line in lines:
569                    if line != lines[0]: 
570                        line = "\t" + line
571                    NCOM_cleaned.append(line)
572                       
573        for c in SCOM:
574            if c.strip() not in ("", " ", "  "):               
575                        # Replace new lines within one attribute with a newline and tab so easier to read
576                lines = c.split("\n")
577                for line in lines:
578                    if line != lines[0]: 
579                        line = "\t" + line
580                    SCOM_cleaned.append(line)
581                   
582        self.na_dict["NCOM"] = NCOM_cleaned
583        self.na_dict["NNCOML"] = len(self.na_dict["NCOM"])
584        self.na_dict["SCOM"] = SCOM_cleaned
585        self.na_dict["NSCOML"] = len(self.na_dict["SCOM"])
586        return
587
588
589    def _defineGeneralHeader(self, header_items={}):
590        """
591        Defines known header items and overwrites any with header_items
592        key/value pairs.
593        """
594        # Check if DATE field previously known in NASA Ames file
595        time_now = time.strftime("%Y %m %d", time.localtime(time.time())).split()
596        if not self.na_dict.has_key("RDATE"):
597            self.na_dict["RDATE"] = time_now
598       
599        if self.ax0.isTime():
600            # Get first date in list
601            try:
602                (unit, start_date) = re.match("(\w+)\s+?since\s+?(\d+-\d+-\d+)", self.ax0.units).groups()           
603                comptime = cdtime.s2c(start_date)
604                first_day = comptime.add(self.na_dict["X"][0][0], getattr(cdtime, unit.capitalize()))
605                self.na_dict["DATE"] = str(first_day).split(" ")[0].replace("-", " ").split()
606            except:
607                msg = "Nappy Warning: Could not get the first date in the file. You will need to manually edit the output file."
608                print msg
609                self.output_message.append(msg)
610                self.na_dict["DATE"] = ("DATE", "NOT", "KNOWN")
611        else: 
612            if not self.na_dict.has_key("DATE"):
613                msg = "Nappy Warning: Could not get the first date in the file. You will need to manually edit the output file."
614                print msg
615                self.output_message.append(msg)
616                self.na_dict["DATE"] = ("DATE", "NOT", "KNOWN")
617            else:
618                pass # i.e. use existing DATE
619
620        self.na_dict["IVOL"] = 1
621        self.na_dict["NVOL"] = 1
622        for key in header_items.keys():
623             self.na_dict[key] = header_items[key]
Note: See TracBrowser for help on using the repository browser.