source: nappy/trunk/nappy/nc_interface/nc_to_na.py @ 4693

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/nappy/trunk/nappy/nc_interface/nc_to_na.py@4693
Revision 4693, 14.2 KB checked in by astephen, 12 years ago (diff)

Updated nappy code to allow us to update header info better.

Line 
1#   Copyright (C) 2004 CCLRC & NERC( Natural Environment Research Council ).
2#   This software may be distributed under the terms of the
3#   Q Public License, version 1.0 or later. http://ndg.nerc.ac.uk/public_docs/QPublic_license.txt
4
5"""
6nc_to_na.py
7=============
8
9Holds the class NCToNA (sub-classing CDMSToNA) that converts a NetCDF file to
10one or more NASA Ames files.
11
12"""
13
14# Imports from python standard library
15import sys
16
17# Import from nappy package
18import nappy
19from nappy.na_error import na_error
20import nappy.utils
21import nappy.utils.common_utils
22import nappy.nc_interface.cdms_to_na
23import nappy.nc_interface.na_content_collector
24
25# Import external packages (if available)
26if sys.platform.find("win") > -1:
27    raise na_error.NAPlatformError("Windows does not support CDMS. CDMS is required to convert to CDMS objects and NetCDF.")
28
29try:
30    import cdms2 as cdms
31except:
32    try:
33        import cdms
34    except:
35        raise Exception("Could not import third-party software. Nappy requires the CDMS and Numeric packages to be installed to convert to CDMS and NetCDF.")
36
37cdms.setAutoBounds("off") 
38
39# Define global variables
40DEBUG = nappy.utils.getDebug() 
41default_delimiter = nappy.utils.getDefault("default_delimiter")
42default_float_format = nappy.utils.getDefault("default_float_format")
43comment_override_rule = nappy.utils.getDefault("comment_override_rule")
44add_column_headers = bool(nappy.utils.getDefault("add_column_headers"))
45
46# Define global variables
47permitted_overwrite_metadata = ("DATE",  "RDATE", "ANAME", "MNAME",
48           "ONAME", "ORG", "SNAME", "VNAME", "SCOM", "NCOM")
49items_as_lists = ["DATE", "RDATE", "ANAME", "VNAME"]
50
51
52class NCToNA(nappy.nc_interface.cdms_to_na.CDMSToNA):
53    """
54    Converts a NetCDF file to one or more NASA Ames files.
55    """
56
57    def __init__(self, nc_file, var_ids=None, na_items_to_override={}, 
58            only_return_file_names=False, exclude_vars=[],
59            requested_ffi=None,
60            ):
61        """
62        Sets up instance variables.
63        Typical usage is:
64        >>>    import nappy.nc_interface.nc_to_na as nc_to_na
65        >>>    c = nc_to_na.NCToNA("old_file.nc")
66        >>>    c.convert()
67        >>>    c.writeNAFiles("new_file.na", delimiter=",")
68
69        OR:
70        >>>    c = nc_to_na.NCToNA("old_file.nc")
71        >>>    file_names = c.constructNAFileNames()
72        """
73        self.nc_file = nc_file
74
75        # Now need to read CDMS file so parent class methods are compatible
76        (cdms_variables, global_attributes) = self._readCDMSFile(var_ids, exclude_vars)
77        nappy.nc_interface.cdms_to_na.CDMSToNA.__init__(self, cdms_variables, global_attributes=global_attributes, 
78                                                        na_items_to_override=na_items_to_override, 
79                                                        only_return_file_names=only_return_file_names,
80                                                        requested_ffi=requested_ffi)
81 
82
83    def _readCDMSFile(self, var_ids=None, exclude_vars=[]):
84        """
85        Reads the file and returns all the CDMS variables in a list as well
86        as the global attributes: (cdms_variable_list, global_atts_list)
87        If var_ids is defined then only get those.
88        """
89        fin = cdms.open(self.nc_file)
90        cdms_variables = []
91
92        # Make sure var_ids is a list
93        if type(var_ids) == type("string"):
94            var_ids = [var_ids]
95
96        for var_id in fin.listvariables():
97            if var_ids == None or var_id in var_ids:
98                if var_ids not in exclude_vars:
99                    cdms_variables.append(fin(var_id))
100
101        globals = fin.attributes.items()
102        return (cdms_variables, globals) 
103
104    def constructNAFileNames(self, na_file=None):
105        """
106        Works out what the file names of the output NA files will be and
107        returns a list of them.
108        """
109        self.convert()
110
111        file_names = []
112        # create file name if not given
113        if na_file == None:
114            base_name = self.nc_file
115            if base_name[-3:] == ".nc":
116                base_name = base_name[:-3]
117            na_file = base_name + ".na"
118
119        file_counter = 1
120        # Now, create some valid file names
121        for this_na_dict in self.na_dict_list:
122            if len(self.na_dict_list) == 1:
123                suffix = ""
124            else:
125                suffix = "_%s" % file_counter
126
127            # Create file name
128            name_parts = na_file.split(".")   
129            new_name = (".".join(name_parts[:-1])) + suffix + "." + name_parts[-1]
130            file_names.append(new_name)
131            file_counter += 1
132           
133        return file_names
134
135    def writeNAFiles(self, na_file=None, delimiter=default_delimiter, annotation=False,
136                     float_format=default_float_format, size_limit=None, no_header=False):
137        """
138        Writes the self.na_dict_list content to one or more NASA Ames files.
139        Output file names are based on the self.nc_file name unless specified
140        in the na_file_name argument in which case that provides the main name
141        that is appended to if multiple output file names are required.
142
143        TODO: no_header is NOT implemented.
144        """
145        self.convert() # just in case not already called
146
147        # Gets a list of NA file_names that will be produced.
148        file_names = self.constructNAFileNames(na_file)
149
150        # Set up some counters: file_counter is the expected number of files.
151        #      full_file_counter includes any files that have been split across multiple output NA files
152        #              because size_limit exceeded.
153        file_counter = 1
154        full_file_counter = 1
155
156        # Get any NASA Ames dictionary values that should be overwritten with local values
157        local_attributes = nappy.utils.getLocalAttributesConfigDict()
158        local_na_atts = local_attributes["na_attributes"]
159
160        # define final override list by using defaults then locally provided changes
161        overriders = local_na_atts
162        for (okey, ovalue) in self.na_items_to_override.items():
163            overriders[okey] = ovalue
164
165        # Now loop through writing the outputs
166        for na_dict_and_var_ids in self.na_dict_list:
167            file_name = file_names[file_counter - 1]
168            msg = "\nWriting output NASA Ames file: %s" % file_name
169            if DEBUG: print msg
170            self.output_message.append(msg)
171
172            # Set up current na dict
173            (this_na_dict, vars_to_write) = na_dict_and_var_ids
174
175            # Override content of NASA Ames if they are permitted
176            for key in overriders.keys():
177
178                if key in permitted_overwrite_metadata:   
179                    if key in items_as_lists:
180                        new_item = overriders[key].split()                 
181                    else:
182                        new_item = overriders[key]
183
184                    # Do specific overwrite for comments by inserting lines at start
185                    if key in ("SCOM", "NCOM"):
186
187                        # Use rule defined in config file in terms of where to put new comments
188                        if comment_override_rule == "replace":
189                            comments_list = new_item[:]
190                        elif comment_override_rule == "insert":
191                            comments_list = new_item[:] + this_na_dict.get(key, [])
192                        elif comment_override_rule == "extend": 
193                            comments_list = this_na_dict.get(key, []) + new_item[:]
194
195                        this_na_dict[key] = comments_list
196                        this_na_dict["N%sL" % key] = len(comments_list)
197                        #print "COMMMENTS:", comments_list
198                        #print "Added to comments:", key
199                         
200                    elif not this_na_dict.has_key(key) or new_item != this_na_dict[key]:
201                        this_na_dict[key] = new_item
202                        msg = "Metadata overwritten in output file: '%s' is now '%s'" % (key, this_na_dict[key])
203                        if DEBUG: print msg
204                        self.output_message.append(msg)
205
206            # For certain FFIs create final Normal comments as a list of column headers before data section
207            if add_column_headers == True:
208                self._updateWithColumnHeaders(this_na_dict, delimiter)
209       
210            file_list = []
211            # Cope with size limits if specified and FFI is 1001
212            # Seems to be writing different chunks of a too long array to different na_dicts to then write to separate files.
213            if size_limit is not None and (this_na_dict["FFI"] == 1001 and len(this_na_dict["V"][0]) > size_limit):
214                files_written = self._writeNAFileSubsetsWithinSizeLimit(this_na_dict, file_name, delimiter=delimiter,
215                                                                        float_format=float_format, size_limit=size_limit,
216                                                                        annotation=annotation)
217                file_list.extend(files_written)
218
219            # If not having to split file into multiple outputs (normal condition)
220            else:               
221                x = nappy.openNAFile(file_name, 'w', this_na_dict)
222                x.write(delimiter=delimiter, float_format=float_format, annotation=annotation)
223                x.close()
224                file_list.append(file_name)
225
226            # Report on what has been written
227            msg = "\nWrote the following variables:" + "\n\t" + ("\n\t".join(vars_to_write[0]))
228            if DEBUG: print msg
229            self.output_message.append(msg)
230       
231            msg = ""
232            aux_var_count = vars_to_write[1]
233            if len(aux_var_count) > 0:
234                msg = "\nWrote the following auxiliary variables:" + "\n\t" + ("\n\t".join(aux_var_count))     
235           
236            singleton_var_count = vars_to_write[2]
237            if len(singleton_var_count) > 0:
238                msg = "\nWrote the following Singleton variables:" + "\n\t" + ("\n\t".join(singleton_var_count))
239
240            if len(file_list) > 0:
241                msg = msg + ("\n\nNASA Ames file(s) written successfully: \n%s" % "\n".join(file_list))
242
243            full_file_counter += len(file_list)
244            file_counter += 1
245
246            if DEBUG: print msg
247            self.output_message.append(msg)
248           
249        full_file_count = full_file_counter - 1
250        if full_file_count == 1:
251            plural = ""
252        else:
253            plural = "s"             
254        msg = "\n%s file%s written." % (full_file_count, plural)
255   
256        if DEBUG: print msg
257        self.output_message.append(msg)
258        self.output_files_written = file_list
259
260        return self.output_message
261
262    def _writeNAFileSubsetsWithinSizeLimit(self, this_na_dict, file_name, delimiter, 
263                      float_format, size_limit, annotation):
264        """
265        If self.size_limit is specified and FFI is 1001 we can chunk the output into
266        different files in a NASA Ames compliant way.
267        Returns list of file names of outputs written.
268        """
269        file_names = []
270        var_list = this_na_dict["V"]
271        array_length = len(var_list[0])
272        nvol_info = divmod(array_length, size_limit)
273        nvol = nvol_info[0]
274
275        # create the number of volumes (files) that need to be written.
276        if nvol_info[1] > 0: nvol = nvol + 1
277
278        start = 0
279        letter_count = 0
280        ivol = 0
281
282        # Loop through until full array length has been written to a set of files.
283        while start < array_length:
284            ivol = ivol + 1
285            end = start + size_limit
286
287            if end > array_length:
288                end = array_length
289
290            current_block = []
291            # Write new V array
292            for v in var_list:
293                current_block.append(v[start:end])
294
295            # Adjust X accordingly in the na dictionary, because independent variable has been reduced in size
296            na_dict_copy = nappy.utils.common_utils.modifyNADictCopy(this_na_dict, current_block, 
297                                                                      start, end, ivol, nvol)
298            # Append a letter to the file name for writing this block to
299            file_name_plus_letter = "%s-%.3d.na" % (file_name[:-3], ivol)
300            file_list.append(file_name_plus_letter)
301
302            # Write data to output file
303            x = nappy.openNAFile(file_name_plus_letter, 'w', na_dict_copy)
304            x.write(delimiter=delimiter, float_format=float_format, annotation=annotation)
305            x.close()
306
307            msg = "\nOutput files split on size limit: %s\nFilename used: %s" % (size_limit, file_name_plus_letter)
308            if DEBUG: print msg
309            self.output_message.append(msg)
310            letter_count = letter_count + 1
311            start = end
312
313            file_names.append(file_name_plus_letter) 
314
315        return file_names
316
317
318    def _updateWithColumnHeaders(self, na_dict, delimiter):
319        """
320        Updates the NCOM and NCOML parts of the na_dict so that
321        the last normal comments line is in fact a set of headers
322        for the data section. E.g.:
323
324           UTs     Spd  Direc
325           30446.9 305  2592
326           20447.9 204  2596
327
328        The 'delimiter' argument is used to separate out the arguments.
329
330        This option is only compatible with a limited range of FFIs and
331        only works if there are no Auxiliary variables defined.
332        """
333        ffi = na_dict["FFI"]
334        compatible_ffis = (1001, 1010, 2110)
335
336        if ffi not in compatible_ffis or na_dict["NAUXV"] > 0:
337            print "WARNING: Column Headers are not written for FFIs other than: " + str(compatible_ffis)
338            return
339
340        if ffi in (1001, 2110):
341            col_names = [na_dict["XNAME"][0]]
342        elif ffi in (1010,):
343            col_names = []
344
345        col_names.extend(na_dict["VNAME"])
346        col_names_line = ",".join(col_names)
347        na_dict["NCOM"].append(col_names_line) 
348        na_dict["NNCOML"] = len(na_dict["NCOM"])
349        return 
Note: See TracBrowser for help on using the repository browser.