source: nappy/trunk/nappy/nc_interface/na_content_collector.py @ 3423

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/nappy/trunk/nappy/nc_interface/na_content_collector.py@3423
Revision 3423, 24.7 KB checked in by astephen, 12 years ago (diff)

Improved FFI identification.

Line 
1#   Copyright (C) 2004 CCLRC & NERC( Natural Environment Research Council ).
2#   This software may be distributed under the terms of the
3#   Q Public License, version 1.0 or later. http://ndg.nerc.ac.uk/public_docs/QPublic_license.txt
4
5"""
6cdms_to_na.py
7=============
8
9Holds the class CDMSToNA that converts a set of CDMS variables and global attributes.
10
11"""
12
13# Imports from python standard library
14import sys
15import time
16import re
17
18# Import from nappy package
19from nappy.na_error import na_error
20import nappy.utils
21import nappy.cdms_utils.axis_utils
22import nappy.cdms_utils.var_utils
23import nappy.utils.common_utils
24import nappy.na_file.na_core
25
26nc_to_na_map = nappy.utils.getConfigDict()["nc_to_na_map"]
27
28# Import external packages (if available)
29if sys.platform.find("win") > -1:
30    raise na_error.NAPlatformError("Windows does not support CDMS. CDMS is required to convert to CDMS objects and NetCDF.")
31try:
32    import cdms, Numeric
33except:
34    raise Exception("Could not import third-party software. Nappy requires the CDMS and Numeric packages to be installed to convert to CDMS and NetCDF.")
35
36cdms.setAutoBounds("off") 
37
38DEBUG = nappy.utils.getDebug() 
39
40class NAContentCollector(nappy.na_file.na_core.NACore):
41    """
42    Class to build a NASA Ames File object from a set of
43    CDMS variables and global attributes (optional).
44    """
45   
46    def __init__(self, variables, global_attributes={}):
47        """
48        Sets up instance variables and calls appropriate methods to
49        generate sections of NASA Ames file object.
50
51        Input arguments are:
52          * variables - list/tuple of actual CDMS variables
53          * global_attributes - dictionary of user-defined globals to include.
54
55        Typical usage:
56        >>> x = NAContentCollector(["temp", "precip"])
57        >>> x.collectNAContent()
58        >>> print x.na_dict, x.var_ids, x.unused_vars
59        """
60        self.output_message = []
61        self.na_dict = {}
62        self.vars = variables
63
64        # Note that self.var_ids will be a list containing:
65        #    [ordered_vars,  auxiliary_vars,   rank_zero_vars]
66        self.var_ids = None
67        self.globals = global_attributes       
68        self.rank_zero_vars = []
69        self.rank_zero_var_ids = []
70
71
72    def collectNAContent(self):
73        """
74        Collect NASA Ames content. Save the contents to the following instance
75        attributes:
76         * self.na_dict
77         * self.var_ids
78         * self.unused_vars
79        """
80        (self.ordered_vars, aux_vars) = self._analyseVariables()
81        if self.ordered_vars == None:
82            self.unused_vars = []
83        else:
84            self.var_ids = [[var.id for var in self.ordered_vars],
85                            [var.id for var in aux_vars], 
86                            self.rank_zero_var_ids]
87            self.na_dict["NLHEAD"] = "-999"
88            self._defineNAVars(self.ordered_vars)
89            self._defineNAaux_vars(aux_vars)
90            self._defineNAGlobals()
91            self._defineNAComments()
92            self._defineGeneralHeader()
93            # Quick fudge to cope with 1001 issue
94            if self.na_dict["FFI"] == 1001: 
95                self.na_dict["X"] = self.na_dict["X"][0]
96
97    def _analyseVariables(self):
98        """
99        Method to examine the content of CDMS variables to return
100        a tuple of two lists containing variables and auxiliary variables
101        for the NASA Ames file object.
102        Variables not compatible with the first file are put in self.unused_vars
103        """
104        self.unused_vars = []
105
106        highest_rank = -1
107        best_var = None
108        count = 0
109
110        # Need to get highest ranked variable (most dimensions) so that we can work out FFI
111        for var in self.vars:
112            msg = "Analysing: %s" % var.id
113            print msg
114            self.output_message.append(msg)
115            count = count + 1
116            # get rank
117            rank = var.rank()
118
119            # Deal with singleton variables
120            if rank == 0: 
121                self.rank_zero_vars.append(var)
122                self.rank_zero_var_ids.append(var.id)
123                continue
124           
125            # Update highest if highest found or if equals highest with bigger size
126            if rank > highest_rank or (rank == highest_rank and var.size() > best_var.size()):
127                highest_rank = rank
128                best_var = var
129                best_var_index = count
130                print "WARNING WARNING WARNING....CHANGED CODE HERE from len(var.flat) to var.size() which should be quicker!!!"
131
132        # If all are zero ranked variables or no vars identified/found then we cannot write any to NASA Ames and return (None, None)
133        if len(self.rank_zero_vars) == len(self.vars) or best_var == None: 
134            print "No usable variables found." 
135            return (None, None)
136
137        # Now start to sort the variables into main and auxiliary
138        vars_for_na = [best_var]
139        aux_vars_for_na = []
140        shape = best_var.shape
141        number_of_dims = len(shape)
142        self.na_dict["NIV"] = number_of_dims
143
144        # Get the axes for the main variable being used
145        best_var_axes = best_var.getAxisList()
146       
147        # Get other variable info
148        rest_of_the_vars = self.vars[:best_var_index - 1] + self.vars[best_var_index:]
149
150        for var in rest_of_the_vars:
151
152            if var.id in self.rank_zero_var_ids: continue
153
154            # What to do with variables that have different number of dimensions or different shape
155            if len(var.shape) != number_of_dims or var.shape != shape: 
156                # Could it be an auxiliary variable?
157                if len(var.shape) != 1: 
158                    self.unused_vars.append(var)
159                    continue
160
161                first_axis = var.getAxis(0)
162
163                if nappy.cdms_utils.axis_utils.areAxesIdentical(best_var_axes[0], first_axis) == False: 
164                    self.unused_vars.append(var)
165                    continue
166
167                # I think it is an auxiliary variable
168                aux_vars_for_na.append(var) 
169                # Also put it in unused var bin because auxiliary vars might be useful later on in there own right
170                self.unused_vars.append(var)
171            else:
172                this_var_axes = var.getAxisList()
173
174                # Loop through dimensions
175                for i in range(number_of_dims):           
176                    if nappy.cdms_utils.axis_utils.areAxesIdentical(best_var_axes[i], this_var_axes[i]) == False:
177                        self.unused_vars.append(var)
178                        continue
179
180                # OK, I think the current variable is compatible to write with the best variable along with a NASA Ames file
181                vars_for_na.append(var)
182               
183        # Send vars_for_na AND aux_vars_for_na to a method to check if they have previously been mapped
184        # from NASA Ames. In which case we'll write them back in the order they were initially read from the input file.
185        (vars_for_na, aux_vars_for_na) = self._reorderVarsIfPreviouslyNA(vars_for_na, aux_vars_for_na)
186
187        # Get the FFI
188        self.na_dict["FFI"] = self._decideFileFormatIndex(number_of_dims, aux_vars_for_na)
189        return (vars_for_na, aux_vars_for_na)
190
191
192    def _reorderVarsIfPreviouslyNA(vars_for_na, aux_vars_for_na):
193        """
194        Re-order if they previously came from NASA Ames files (i.e. including the
195        attribute 'nasa_ames_var_number'). Return re-ordered or unchanged pair of
196        (vars_for_na, aux_vars_for_na).
197        """
198        # THIS SHOULD REALLY BE DONE IN A LOOP
199        # First do the main variables
200        ordered_vars = [None] * 1000 # Make a long list to put vars in
201        # Create a list of other variables to collect up any that are not labelled as nasa ames variables
202        other_vars = []
203        for var in vars_for_na:
204            if hasattr(var, "nasa_ames_var_number"):
205                ordered_vars[var.nasa_ames_var_number[0]] = var
206            else:
207                other_vars.append(var)
208
209        # Remake vars_for_na now in new order and clean out any that are "None"
210        vars_for_na = []
211        for var in ordered_vars:
212            if var != None: 
213                vars_for_na.append(var)
214
215        vars_for_na = vars_for_na + other_vars
216
217        # Now re-order the Auxiliary variables if they previously came from NASA
218        ordered_aux_vars = [None] * 1000
219        other_aux_vars = []
220
221        for var in aux_vars_for_na:
222            if hasattr(var, "nasa_ames_aux_var_number"):
223                ordered_aux_vars[var.nasa_ames_aux_var_number[0]] = var
224            else:
225                other_aux_vars.append(var)
226
227        # Remake aux_vars_for_na now in order
228        aux_vars_for_na = []
229        for var in ordered_aux_vars:
230            if var != None: 
231                aux_vars_for_na.append(var)
232
233        aux_vars_for_na = aux_vars_for_na + other_aux_vars     
234        return (vars_for_na, aux_vars_for_na)
235
236
237    def _decideFileFormatIndex(self, number_of_dims, aux_vars_for_na):
238        """
239        Based on the number of dimensions and the NASA Ames dictionary return
240        the File Format Index.
241        """
242        if number_of_dims in (2,3,4):
243            ffi = 10 + (number_of_dims * 1000)
244        elif number_of_dims > 4:
245            raise Exception("Cannot write variables defined against greater than 4 axes in NASA Ames format.")
246        else:
247            if len(aux_vars_for_na) > 0 or (self.na_dict.has_key("NAUXV") and self.na_dict["NAUXV"] > 0):
248                ffi = 1010
249            else:
250                ffi = 1001
251        return ffi
252
253
254    def _defineNAVars(self, vars):
255        """
256        Method to define NASA Ames file object variables and their
257        associated metadata.
258        """
259        self.na_dict["NV"] = len(vars)
260        self.na_dict["VNAME"] = []
261        self.na_dict["VMISS"] = []
262        self.na_dict["VSCAL"] = []
263        self.na_dict["V"] = []
264
265        for var in vars:
266            name = nappy.cdms_utils.var_utils.getBestName(var)
267            self.na_dict["VNAME"].append(name)
268            miss = nappy.cdms_utils.var_utils.getMissingValue(var)
269            if type(miss) not in (type(1.2), type(1), type(1L)): 
270                miss = miss[0]
271            self.na_dict["VMISS"].append(miss)
272            self.na_dict["VSCAL"].append(1)
273            # Populate the variable list with the array
274            self.na_dict["V"].append(var._data)
275
276            if not self.na_dict.has_key("X"):
277                self.na_dict["NXDEF"] = []
278                self.na_dict["NX"] = []
279
280                # Create independent variable information
281                self.ax0 = var.getAxis(0)
282                self.na_dict["X"] = [self.ax0[:].tolist()]
283                self.na_dict["XNAME"] = [nappy.cdms_utils.var_utils.getBestName(self.ax0)]
284                if len(self.ax0) == 1:
285                    self.na_dict["DX"] = [0]
286                else:
287                    incr = self.ax0[1] - self.ax0[0]
288                    # Set default increment as gap between first two
289                    self.na_dict["DX"] = [incr]
290                    # Now overwrite it as zero if non-uniform interval in axis
291                    for i in range(1, len(self.ax0)):
292                        if (self.ax0[i] - self.ax0[i - 1]) != incr:
293                            self.na_dict["DX"] = [0]
294                            break
295
296                # Now add the rest of the axes to the self.na_dict objects
297                for axis in var.getAxisList()[1:]:
298                    self._appendAxisDefinition(axis)
299
300
301    def _defineNAaux_vars(self, aux_vars):
302        """
303        Method to define NASA Ames file object auxiliary variables and their
304        associated metadata.
305        """
306        self.na_dict["NAUXV"] = len(aux_vars)
307        self.na_dict["ANAME"] = []
308        self.na_dict["AMISS"] = []
309        self.na_dict["ASCAL"] = []
310        self.na_dict["A"] = []
311
312        for var in aux_vars:
313            name = nappy.cdms_utils.var_utils.getBestName(var)
314            self.na_dict["ANAME"].append(name)
315            miss = nappy.cdms_utils.var_utils.getMissingValue(var)
316            if type(miss) != type(1.1):  miss = miss[0]
317            self.na_dict["AMISS"].append(miss)
318            self.na_dict["ASCAL"].append(1)
319            # Populate the variable list with the array
320            self.na_dict["A"].append(var._data)
321
322    def _appendAxisDefinition(self, axis):
323        """
324        Method to create the appropriate NASA Ames file object
325        items associated with an axis (independent variable in
326        NASA Ames). It appends to the various self.na_dict containers.
327        """
328        length = len(axis)
329        self.na_dict["NX"].append(length)
330        self.na_dict["XNAME"].append(nappy.cdms_utils.var_utils.getBestName(axis))
331        # If only one item in axis values
332        if length < 2:
333            self.na_dict["DX"].append(0)
334            self.na_dict["NXDEF"].append(length)
335            self.na_dict["X"].append(axis[:].tolist())       
336            return
337   
338        incr = axis[1] - axis[0]
339        for i in range(1, length):
340            if (axis[i] - axis[i - 1]) != incr:
341                self.na_dict["DX"].append(0)
342                self.na_dict["NXDEF"].append(length)
343                self.na_dict["X"].append(axis.tolist())
344                break
345            else: # If did not break out of the loop
346                max_length = length
347                if length > 3: max_length = 3
348
349                self.na_dict["DX"].append(incr)
350                self.na_dict["NXDEF"].append(max_length)
351                self.na_dict["X"].append(axis[:max_length])
352
353
354    def _defineNAGlobals(self):
355        """
356        Maps CDMS (NetCDF) global attributes into NASA Ames Header fields.
357        """
358        # Check if we should add to it with locally set rules
359        local_attributes = nappy.utils.getConfigDict()["local_attributes"]
360        for att, value in local_attributes.items():
361            if not nc_to_na_map.has_key(att):
362                nc_to_na_map[key] = value
363
364        self.extra_comments = [[],[],[]]  # Normal comments, special comments, other comments
365        convention_or_reference_comments = []
366
367        for key in self.globals.keys():
368            if key != "first_valid_date_of_data" and type(self.globals[key]) \
369                                       not in (type("s"), type(1.1), type(1)):
370                continue
371
372            # Loop through keys of header/comment items to map
373            if key in nc_to_na_map.keys():
374                if key == "history":
375                    time_string = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(time.time()))
376                    history = "History:\t%s - Converted to NASA Ames format using nappy-%s.\n\t%s" % \
377                                                 (time_string, version.version, self.globals[key])
378                    history = history.split("\n") 
379                    self.history = []
380                    for h in history:
381                        if h[:8] != "History:" and h[:1] != "\t": 
382                            h = "\t" + h
383                        self.history.append(h) 
384                   
385                elif key == "institution":
386                    # If fields came from NA then extract appropriate fields.
387                    match = re.match(r"(.*)\s+\(ONAME from NASA Ames file\);\s+(.*)\s+\(ORG from NASA Ames file\)\.", 
388                             self.globals[key])
389                    if match:
390                        self.na_dict["ONAME"] = match.groups()[0]
391                        self.na_dict["ORG"] = match.groups()[1]
392                    else:
393                        self.na_dict["ONAME"] = self.globals[key]
394                        self.na_dict["ORG"] = self.globals[key]           
395                    # NOTE: should probably do the following search and replace on all string lines
396                    self.na_dict["ONAME"] = self.na_dict["ONAME"].replace("\n", "  ")
397                    self.na_dict["ORG"] = self.na_dict["ORG"].replace("\n", "  ")
398                                   
399                elif key == "comment":
400                    # Need to work out if they are actually comments from NASA Ames in the first place
401                    comment_lines = self.globals[key].split("\n")
402                    normal_comments = []
403                    normal_comm_flag = None
404                    special_comments = []
405                    special_comm_flag = None
406
407                    for line in comment_lines:
408                        if line.find("###NASA Ames Special Comments follow###") > -1:
409                            special_comm_flag = 1
410                        elif line.find("###NASA Ames Special Comments end###") > -1:
411                            special_comm_flag = None
412                        elif line.find("###NASA Ames Normal Comments follow###") > -1:
413                            normal_comm_flag = 1
414                        elif line.find("###NASA Ames Normal Comments end###") > -1:
415                            normal_comm_flag = None     
416                        elif special_comm_flag == 1:
417                            special_comments.append(line)
418                        elif normal_comm_flag == 1:
419                            normal_comments.append(line)
420                        elif line.find("###Data Section begins on the next line###") > -1:
421                            pass
422                        else:
423                            normal_comments.append(line)           
424                   
425                    self.extra_comments = [special_comments, normal_comments, []]                   
426                                   
427                elif key == "first_valid_date_of_data":
428                    self.na_dict["DATE"] = self.globals[key]
429               
430                elif key in ("Conventions", "references"):
431                    #convention_or_reference_comments.append("%s:   %s" % (key, self.globals[key]))
432                    self.extra_comments[2].append("%s:   %s" % (key, self.globals[key]))
433                else:
434                    self.na_dict[nc_to_na_map[key]] = self.globals[key]
435            else:
436                self.extra_comments[2].append("%s:   %s" % (key, self.globals[key]))
437        return
438
439
440    def _defineNAComments(self, normal_comments=[], special_comments=[]):
441        """
442        Defines the Special and Normal comments sections in the NASA Ames file
443        object - including information gathered from the defineNAGlobals method.
444        """
445       
446        if hasattr(self, "ncom"):  normal_comments = self.ncom + normal_comments
447
448        NCOM = []
449        for ncom in normal_comments:
450            NCOM.append(ncom)
451
452        if len(NCOM) > 0:   NCOM.append("")
453       
454        if len(self.extra_comments[2]) > 0:
455            for excom in self.extra_comments[2]:
456                NCOM.append(excom)
457       
458        if len(self.extra_comments[1]) > 0: 
459            NCOM.append("Additional Global Attributes defined in the source file and not translated elsewhere:")
460            for excom in self.extra_comments[1]:
461                NCOM.append(excom)
462
463        if hasattr(self, "history"):
464            for h in self.history:
465                NCOM.append(h)
466       
467        if len(NCOM) > 0:
468            NCOM.insert(0, "###NASA Ames Normal Comments follow###")
469            NCOM.append("")
470            NCOM.append("###NASA Ames Normal Comments end###")
471            NCOM.append("###Data Section begins on the next line###")
472
473        spec_comm_flag = None
474        SCOM = []
475        special_comments = self.extra_comments[0]
476        if len(special_comments) > 0: 
477            SCOM = ["###NASA Ames Special Comments follow###"]
478
479            spec_comm_flag = 1
480        for scom in special_comments:
481            SCOM.append(scom)
482
483        used_var_atts = ("id",  "missing_value", "fill_value", "units", 
484                   "nasa_ames_var_number", "nasa_ames_aux_var_number")
485        var_comm_flag = None
486
487        # Create a string for the Special comments to hold rank-zero vars
488        rank_zero_vars_string = []
489        for var in self.rank_zero_vars:
490            rank_zero_vars_string.append("\tVariable %s: %s" % (var.id, nappy.cdms_utils.var_utils.getBestName(var)))
491
492        for att in var.attributes.keys():
493            value = var.attributes[att]
494            if type(value) in (type("s"), type(1.0), type(1)):
495                rank_zero_vars_string.append("\t\t%s = %s" % (att, var.attributes[att]))
496       
497        if len(rank_zero_vars_string) > 0:
498            rank_zero_vars_string.insert(0, "###Singleton Variables defined in the source file follow###")
499            rank_zero_vars_string.append("###Singleton Variables defined in the source file end###")
500
501        for var in self.ordered_vars:
502            varflag = "unused"
503            name = nappy.cdms_utils.var_utils.getBestName(var)
504
505            for scom,value in var.attributes.items():
506                if type(value) in (type([]), type(Numeric.array([0]))) and len(value) == 1:
507                    value = value[0]
508
509                if type(value) in (type("s"), type(1.1), type(1)) and scom not in used_var_atts:
510                    if varflag == "unused":
511                        if var_comm_flag == None:
512                            var_comm_flag = 1
513
514                    if spec_comm_flag == None:
515                        SCOM = ["###NASA Ames Special Comments follow###"] + rank_zero_vars_string
516                        SCOM.append("Additional Variable Attributes defined in the source file and not translated elsewhere:")
517                        SCOM.append("###Variable attributes from source (NetCDF) file follow###")
518                        varflag = "using" 
519
520                    SCOM.append("\tVariable %s: %s" % (var.id, name))
521                    SCOM.append("\t\t%s = %s" % (scom, value))
522
523        if var_comm_flag == 1: 
524            SCOM.append("###Variable attributes from source (NetCDF) file end###")
525        if spec_comm_flag == 1:
526            SCOM.append("###NASA Ames Special Comments end###")
527
528        # Strip out empty lines (or returns)
529        NCOM_cleaned = []
530        SCOM_cleaned = []
531
532        for c in NCOM:
533            if c.strip() not in ("", " ", "  "):
534                # Replace new lines within one attribute with a newline and tab so easier to read
535                lines = c.split("\n")
536                for line in lines:
537                    if line != lines[0]: 
538                        line = "\t" + line
539                    NCOM_cleaned.append(line)
540                       
541        for c in SCOM:
542            if c.strip() not in ("", " ", "  "):               
543                        # Replace new lines within one attribute with a newline and tab so easier to read
544                lines = c.split("\n")
545                for line in lines:
546                    if line != lines[0]: 
547                        line = "\t" + line
548                    SCOM_cleaned.append(line)
549                   
550        self.na_dict["NCOM"] = NCOM_cleaned
551        self.na_dict["NNCOML"] = len(self.na_dict["NCOM"])
552        self.na_dict["SCOM"] = SCOM_cleaned
553        self.na_dict["NSCOML"] = len(self.na_dict["SCOM"])
554        return
555
556
557    def _defineGeneralHeader(self, header_items={}):
558        """
559        Defines known header items and overwrites any with header_items
560        key/value pairs.
561        """
562        # Check if DATE field previously known in NASA Ames file
563        time_now = time.strftime("%Y %m %d", time.localtime(time.time())).split()
564        if not self.na_dict.has_key("RDATE"):
565            self.na_dict["RDATE"] = time_now
566       
567        if self.ax0.isTime():
568            # Get first date in list
569            try:
570                (unit, start_date) = re.match("(\w+)\s+?since\s+?(\d+-\d+-\d+)", self.ax0.units).groups()           
571                comptime = cdtime.s2c(start_date)
572                first_day = comptime.add(self.na_dict["X"][0][0], getattr(cdtime, unit.capitalize()))
573                self.na_dict["DATE"] = str(first_day).split(" ")[0].replace("-", " ").split()
574            except:
575                msg = "Nappy Warning: Could not get the first date in the file. You will need to manually edit the output file."
576                print msg
577                self.output_message.append(msg)
578                self.na_dict["DATE"] = ("DATE", "NOT", "KNOWN")
579        else: 
580            if not self.na_dict.has_key("DATE"):
581                msg = "Nappy Warning: Could not get the first date in the file. You will need to manually edit the output file."
582                print msg
583                self.output_message.append(msg)
584                self.na_dict["DATE"] = ("DATE", "NOT", "KNOWN")
585            else:
586                pass # i.e. use existing DATE
587
588        self.na_dict["IVOL"] = 1
589        self.na_dict["NVOL"] = 1
590        for key in header_items.keys():
591             self.na_dict[key] = header_items[key]
Note: See TracBrowser for help on using the repository browser.