source: nappy/trunk/nappy/nc_interface/nc_to_na.py @ 5894

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/nappy/trunk/nappy/nc_interface/nc_to_na.py@5894
Revision 5894, 16.5 KB checked in by astephen, 10 years ago (diff)

minor changes to make sure date parsed properly.

Line 
1#   Copyright (C) 2004 CCLRC & NERC( Natural Environment Research Council ).
2#   This software may be distributed under the terms of the
3#   Q Public License, version 1.0 or later. http://ndg.nerc.ac.uk/public_docs/QPublic_license.txt
4
5"""
6nc_to_na.py
7=============
8
9Holds the class NCToNA (sub-classing CDMSToNA) that converts a NetCDF file to
10one or more NASA Ames files.
11
12"""
13
14# Imports from python standard library
15import sys
16import logging
17
18# Import from nappy package
19import nappy
20from nappy.na_error import na_error
21import nappy.utils
22import nappy.utils.common_utils
23import nappy.nc_interface.cdms_to_na
24import nappy.nc_interface.na_content_collector
25
26# Import external packages (if available)
27if sys.platform.find("win") > -1:
28    raise na_error.NAPlatformError("Windows does not support CDMS. CDMS is required to convert to CDMS objects and NetCDF.")
29
30try:
31    import cdms2 as cdms
32except:
33    try:
34        import cdms
35    except:
36        raise Exception("Could not import third-party software. Nappy requires the CDMS and Numeric packages to be installed to convert to CDMS and NetCDF.")
37
38cdms.setAutoBounds("off") 
39
40# Define global variables
41DEBUG = nappy.utils.getDebug() 
42default_delimiter = nappy.utils.getDefault("default_delimiter")
43default_float_format = nappy.utils.getDefault("default_float_format")
44comment_override_rule = nappy.utils.getDefault("comment_override_rule")
45add_column_headers = bool(nappy.utils.getDefault("add_column_headers"))
46
47config_dict = nappy.utils.getConfigDict()
48header_partitions = config_dict["header_partitions"]
49hp = header_partitions
50
51# Define global variables
52permitted_overwrite_metadata = ("DATE",  "RDATE", "ANAME", "MNAME",
53           "ONAME", "ORG", "SNAME", "VNAME", "SCOM", "NCOM")
54items_as_lists = ["DATE", "RDATE", "ANAME", "VNAME"]
55
56logging.basicConfig()
57log = logging.getLogger(__name__)
58
59class NCToNA(nappy.nc_interface.cdms_to_na.CDMSToNA):
60    """
61    Converts a NetCDF file to one or more NASA Ames files.
62    """
63
64    def __init__(self, nc_file, var_ids=None, na_items_to_override={}, 
65            only_return_file_names=False, exclude_vars=[],
66            requested_ffi=None,
67            ):
68        """
69        Sets up instance variables.
70        Typical usage is:
71        >>>    import nappy.nc_interface.nc_to_na as nc_to_na
72        >>>    c = nc_to_na.NCToNA("old_file.nc")
73        >>>    c.convert()
74        >>>    c.writeNAFiles("new_file.na", delimiter=",")
75
76        OR:
77        >>>    c = nc_to_na.NCToNA("old_file.nc")
78        >>>    file_names = c.constructNAFileNames()
79        """
80        self.nc_file = nc_file
81
82        # Now need to read CDMS file so parent class methods are compatible
83        (cdms_variables, global_attributes) = self._readCDMSFile(var_ids, exclude_vars)
84        nappy.nc_interface.cdms_to_na.CDMSToNA.__init__(self, cdms_variables, global_attributes=global_attributes, 
85                                                        na_items_to_override=na_items_to_override, 
86                                                        only_return_file_names=only_return_file_names,
87                                                        requested_ffi=requested_ffi)
88 
89
90    def _readCDMSFile(self, var_ids=None, exclude_vars=[]):
91        """
92        Reads the file and returns all the CDMS variables in a list as well
93        as the global attributes: (cdms_variable_list, global_atts_list)
94        If var_ids is defined then only get those.
95        """
96        fin = cdms.open(self.nc_file)
97        cdms_variables = []
98
99        # Make sure var_ids is a list
100        if type(var_ids) == type("string"):
101            var_ids = [var_ids]
102
103        for var_id in fin.listvariables():
104            if var_ids == None or var_id in var_ids:
105                if var_ids not in exclude_vars:
106                    cdms_variables.append(fin(var_id))
107
108        globals = fin.attributes.items()
109        return (cdms_variables, globals) 
110
111    def constructNAFileNames(self, na_file=None):
112        """
113        Works out what the file names of the output NA files will be and
114        returns a list of them.
115        """
116        self.convert()
117
118        file_names = []
119        # create file name if not given
120        if na_file == None:
121            base_name = self.nc_file
122            if base_name[-3:] == ".nc":
123                base_name = base_name[:-3]
124            na_file = base_name + ".na"
125
126        file_counter = 1
127        # Now, create some valid file names
128        for this_na_dict in self.na_dict_list:
129            if len(self.na_dict_list) == 1:
130                suffix = ""
131            else:
132                suffix = "_%s" % file_counter
133
134            # Create file name
135            name_parts = na_file.split(".")   
136            new_name = (".".join(name_parts[:-1])) + suffix + "." + name_parts[-1]
137            file_names.append(new_name)
138            file_counter += 1
139           
140        return file_names
141
142    def writeNAFiles(self, na_file=None, delimiter=default_delimiter, annotation=False,
143                     float_format=default_float_format, size_limit=None, no_header=False):
144        """
145        Writes the self.na_dict_list content to one or more NASA Ames files.
146        Output file names are based on the self.nc_file name unless specified
147        in the na_file_name argument in which case that provides the main name
148        that is appended to if multiple output file names are required.
149
150        TODO: no_header is NOT implemented.
151        """
152        self.convert() # just in case not already called
153
154        # Gets a list of NA file_names that will be produced.
155        file_names = self.constructNAFileNames(na_file)
156
157        # Set up some counters: file_counter is the expected number of files.
158        #      full_file_counter includes any files that have been split across multiple output NA files
159        #              because size_limit exceeded.
160        file_counter = 1
161        full_file_counter = 1
162
163        # Get any NASA Ames dictionary values that should be overwritten with local values
164        local_attributes = nappy.utils.getLocalAttributesConfigDict()
165        local_na_atts = local_attributes["na_attributes"]
166
167        # define final override list by using defaults then locally provided changes
168        overriders = local_na_atts
169        for (okey, ovalue) in self.na_items_to_override.items():
170            overriders[okey] = ovalue
171
172        # Now loop through writing the outputs
173        for na_dict_and_var_ids in self.na_dict_list:
174            file_name = file_names[file_counter - 1]
175            msg = "\nWriting output NASA Ames file: %s" % file_name
176            if DEBUG: log.debug(msg)
177            self.output_message.append(msg)
178
179            # Set up current na dict
180            (this_na_dict, vars_to_write) = na_dict_and_var_ids
181
182            # Override content of NASA Ames if they are permitted
183            for key in overriders.keys():
184
185                if key in permitted_overwrite_metadata:   
186                    if key in items_as_lists:
187                        new_item = overriders[key].split()                 
188                        if key in ("DATE", "RDATE"):
189                            new_item = [int(list_item) for list_item in new_item]
190                    else:
191                        new_item = overriders[key]
192
193                    # Do specific overwrite for comments by inserting lines at start
194                    if key in ("SCOM", "NCOM"):
195
196                        # Use rule defined in config file in terms of where to put new comments
197                        if comment_override_rule == "replace":
198                            comments_list = new_item[:]
199                        elif comment_override_rule in ("insert", "extend"):
200                            new_comments = new_item[:]
201                            existing_comments = this_na_dict.get(key, [])
202                            comments_list = self._cleanWrapComments(existing_comments, new_comments, key, comment_override_rule)
203                        else:
204                            raise Exception("Did not recognise comment_override_rule: " + str(comment_override_rule))
205
206                        this_na_dict[key] = comments_list
207                        this_na_dict["N%sL" % key] = len(comments_list)
208                         
209                    elif not this_na_dict.has_key(key) or new_item != this_na_dict[key]:
210                        this_na_dict[key] = new_item
211                        msg = "Metadata overwritten in output file: '%s' is now '%s'" % (key, this_na_dict[key])
212                        if DEBUG: log.debug(msg)
213                        self.output_message.append(msg)
214
215            # For certain FFIs create final Normal comments as a list of column headers before data section
216            if add_column_headers == True:
217                self._updateWithColumnHeaders(this_na_dict, delimiter)
218       
219            file_list = []
220            # Cope with size limits if specified and FFI is 1001
221            # Seems to be writing different chunks of a too long array to different na_dicts to then write to separate files.
222            if size_limit is not None and (this_na_dict["FFI"] == 1001 and len(this_na_dict["V"][0]) > size_limit):
223                files_written = self._writeNAFileSubsetsWithinSizeLimit(this_na_dict, file_name, delimiter=delimiter,
224                                                                        float_format=float_format, size_limit=size_limit,
225                                                                        annotation=annotation)
226                file_list.extend(files_written)
227
228            # If not having to split file into multiple outputs (normal condition)
229            else:               
230                log.info("Output NA file name: %s" % file_name)
231                x = nappy.openNAFile(file_name, 'w', this_na_dict)
232                x.write(delimiter=delimiter, float_format=float_format, annotation=annotation)
233                x.close()
234                file_list.append(file_name)
235
236            # Report on what has been written
237            msg = "\nWrote the following variables:" + "\n  " + ("\n  ".join(vars_to_write[0]))
238            if DEBUG: log.debug(msg)
239            self.output_message.append(msg)
240       
241            msg = ""
242            aux_var_count = vars_to_write[1]
243            if len(aux_var_count) > 0:
244                msg = "\nWrote the following auxiliary variables:" + "\n  " + ("\n  ".join(aux_var_count))     
245           
246            singleton_var_count = vars_to_write[2]
247            if len(singleton_var_count) > 0:
248                msg = "\nWrote the following Singleton variables:" + "\n  " + ("\n  ".join(singleton_var_count))
249
250            if len(file_list) > 0:
251                msg = msg + ("\n\nNASA Ames file(s) written successfully: \n%s" % "\n".join(file_list))
252
253            full_file_counter += len(file_list)
254            file_counter += 1
255
256            if DEBUG: log.debug(msg)
257            self.output_message.append(msg)
258           
259        full_file_count = full_file_counter - 1
260        if full_file_count == 1:
261            plural = ""
262        else:
263            plural = "s"             
264        msg = "\n%s file%s written." % (full_file_count, plural)
265   
266        if DEBUG: log.debug(msg)
267        self.output_message.append(msg)
268        self.output_files_written = file_list
269
270        return self.output_message
271
272    def _writeNAFileSubsetsWithinSizeLimit(self, this_na_dict, file_name, delimiter, 
273                      float_format, size_limit, annotation):
274        """
275        If self.size_limit is specified and FFI is 1001 we can chunk the output into
276        different files in a NASA Ames compliant way.
277        Returns list of file names of outputs written.
278        """
279        file_names = []
280        var_list = this_na_dict["V"]
281        array_length = len(var_list[0])
282        nvol_info = divmod(array_length, size_limit)
283        nvol = nvol_info[0]
284
285        # create the number of volumes (files) that need to be written.
286        if nvol_info[1] > 0: nvol = nvol + 1
287
288        start = 0
289        letter_count = 0
290        ivol = 0
291
292        # Loop through until full array length has been written to a set of files.
293        while start < array_length:
294            ivol = ivol + 1
295            end = start + size_limit
296
297            if end > array_length:
298                end = array_length
299
300            current_block = []
301            # Write new V array
302            for v in var_list:
303                current_block.append(v[start:end])
304
305            # Adjust X accordingly in the na dictionary, because independent variable has been reduced in size
306            na_dict_copy = nappy.utils.common_utils.modifyNADictCopy(this_na_dict, current_block, 
307                                                                      start, end, ivol, nvol)
308            # Append a letter to the file name for writing this block to
309            file_name_plus_letter = "%s-%.3d.na" % (file_name[:-3], ivol)
310            file_list.append(file_name_plus_letter)
311
312            # Write data to output file
313            x = nappy.openNAFile(file_name_plus_letter, 'w', na_dict_copy)
314            x.write(delimiter=delimiter, float_format=float_format, annotation=annotation)
315            x.close()
316
317            msg = "\nOutput files split on size limit: %s\nFilename used: %s" % (size_limit, file_name_plus_letter)
318            if DEBUG: log.debug(msg)
319            self.output_message.append(msg)
320            letter_count = letter_count + 1
321            start = end
322
323            file_names.append(file_name_plus_letter) 
324
325        return file_names
326
327
328    def _updateWithColumnHeaders(self, na_dict, delimiter):
329        """
330        Updates the NCOM and NCOML parts of the na_dict so that
331        the last normal comments line is in fact a set of headers
332        for the data section. E.g.:
333
334           UTs     Spd  Direc
335           30446.9 305  2592
336           20447.9 204  2596
337
338        The 'delimiter' argument is used to separate out the arguments.
339
340        This option is only compatible with a limited range of FFIs and
341        only works if there are no Auxiliary variables defined.
342        """
343        ffi = na_dict["FFI"]
344        compatible_ffis = (1001, 1010, 2110)
345
346        if ffi not in compatible_ffis or na_dict["NAUXV"] > 0:
347            log.debug("Column Headers are not written for FFIs other than: %s" % str(compatible_ffis))
348            return
349
350        if ffi in (1001, 2110):
351            col_names = [na_dict["XNAME"][0]]
352        elif ffi in (1010,):
353            col_names = []
354
355        col_names.extend(na_dict["VNAME"])
356        col_names_line = ",".join(col_names)
357        na_dict["NCOM"].append(col_names_line) 
358        na_dict["NNCOML"] = len(na_dict["NCOM"])
359        return 
360
361
362    def _cleanWrapComments(self, existing_comments, new_comments, key, comment_override_rule):
363        """
364        Combines new_comments with existing_comments where comments are
365        either Special or Normal. 'key' defines this being defined as either
366        "SCOM" or "NCOM". 'comment_override_rule' is either "insert" (new_comments first)
367        or "extend" (existing_comments first).
368        Returns a new list of combined_comments.
369        """
370        if existing_comments == []:   return new_comments
371        if new_comments == []:        return existing_comments
372
373        # Strip start header if used
374        c1 = key[0].lower()
375        start_line = hp[c1 + "c_start"]
376        start_used = False
377
378        if existing_comments[0] == start_line:
379            existing_comments = existing_comments[1:]
380            start_used = start_line
381       
382        # Now check last line
383        end_line = hp[c1 + "c_end"]
384        end_used = False
385
386        if existing_comments[-1] == end_line:
387            existing_comments = existing_comments[:-1]
388            end_used = end_line
389
390        # Check for alternative last line in NCOM
391        if end_used == False and key == "NCOM":
392            end_line2 = hp["data_next"]
393            if existing_comments[-1] == end_line2:
394                existing_comments = existing_comments[:-1]
395                end_used = end_line2
396       
397        # Now put back together
398        ordered_comments = [existing_comments, new_comments]
399        if comment_override_rule == "insert":
400            ordered_comments.reverse() 
401
402        combined_comments = ordered_comments[0] + ordered_comments[1]
403        if start_used:
404            combined_comments.insert(0, start_used)
405        if end_used:
406            combined_comments.append(end_used)
407   
408        return combined_comments
409
Note: See TracBrowser for help on using the repository browser.