Changeset 3390


Ignore:
Timestamp:
12/02/08 18:40:24 (12 years ago)
Author:
astephen
Message:
 
File:
1 edited

Legend:

Unmodified
Added
Removed
  • nappy/trunk/nappy/nc_interface/cdms_to_na.py

    r3383 r3390  
    1313# Imports from python standard library 
    1414import sys 
    15 import os 
    16 import time 
    17 import string 
    18 import re 
    1915 
    2016# Import from nappy package 
     17import nappy 
    2118from nappy.na_error import na_error 
    2219import nappy.utils 
     
    2421import nappy.cdms_utils.var_utils 
    2522import nappy.na_file.na_core 
    26  
     23import nappy.nc_interface.na_content_collector 
    2724 
    2825# Import external packages (if available) 
     
    3633cdms.setAutoBounds("off")  
    3734 
    38 cdms2na - 200 lines of code to do main conversion, needs to be split out into other stuff. 
    39  * getVariableCollections(f and varlist) --> (ordered_vars, other_vars) 
    40  * buildNADicts() 
    41  * writeToOutputFiles() 
    42  
     35# Define global variables 
     36permitted_overwrite_metadata = ("DATE",  "RDATE", "ANAME", "MNAME", 
     37           "ONAME", "ORG", "SNAME", "VNAME") 
     38items_as_lists = ["DATE", "RDATE", "ANAME", "VNAME"] 
     39var_limit = 5000 # surely never going to get this many vars in a file! 
     40DEBUG = nappy.utils.DEBUG  
    4341 
    4442class NCToNA: 
     
    4745    """ 
    4846 
    49     def __init__(self, nc_file, na_file_names=None, na_vars={}, variables=None,  
    50             only_return_file_names="no", delimiter="    ", float_format="%g",  
    51             size_limit=None): 
     47    def __init__(self, cdms_variables, global_atts={}, na_items_to_override={},  
     48                 only_return_file_names=False): 
     49        """ 
     50        Sets up instance variables.       
     51        """ 
     52        self.cdms_variables = cdms_variables 
     53        self.global_atts = global_atts 
     54        self.na_items_to_override = na_items_to_override 
     55        self.only_return_file_names = only_return_file_names 
     56        self.converted = False 
     57        self.output_message = [] 
     58     
     59    def convert(self): 
     60        """ 
     61        Reads the CDMS objects and convert to a set of dictionaries that 
     62        provide the structure for a NA File object. 
     63        Returns [(na_dict, var_ids), (na_dict, var_ids), ....] 
     64        All these na_dict dictionaries can be readily written to a NA File object. 
    5265 
     66        Note that NASA Ames is not as flexible as NetCDF so you cannot just send any  
     67        set of variables to write to a NASA Ames file. Essentially there is one 
     68        multi-dimensional structure and all variables must be defined against it. 
    5369 
     70        Otherwise variables must be auxiliary variables within that structure (i.e. only 
     71        defined once per the least changing dimension. 
    5472        """ 
    55         Sets up instance variables. Note that the argument 'na_file' has a relaxes definition 
    56         and can be either a NASA Ames file object or the name of a NASA AMES file. 
    57         Typical usage is: 
    58         >>>    import nappy.nc_interface.nc_to_na as nc_to_na 
    59         >>>    c = nc_to_na("old_file.nc")  
    60         >>>    c.convert() 
    61         >>>    c.write("new_file.na")          
     73        if self.converted == True: 
     74            print "Already converted to NA dictionary objects." 
     75            return self.na_dict_list 
     76         
     77        msg = "Reading data from: %s\n" % self.nc_file 
     78        if DEBUG: print msg 
     79        self.output_message.append(msg) 
     80 
     81        # Convert any singleton variables to CDMS variables 
     82        variables = self._convertSingletonVars(self.cdms_variables) 
     83 
     84        # Re-order variables if they have the attribute "nasa_ames_var_number" which means they came from a NASA Ames file originally 
     85        variables = self._reorderVars(variables) 
     86 
     87        # Make first call to collector class that creates NA dict from CDMS variables and global atts dicts 
     88        collector = NAContentCollector(variables, self.global_atts) 
     89        collected_dict = collector.collectNAContent() 
     90        # NOTE: collected_dict has attributes: na_dict, var_ids, unused_vars 
     91 
     92        # Return if no files returned 
     93        if collected.var_ids == None: 
     94            msg = "\nNo files created after variables parsed." 
     95            if DEBUG: print msg 
     96            self.output_message.append(msg) 
     97            return  
     98 
     99        # Set up a list to collect multiple calls to content collector 
     100        na_dict_list = [] 
     101        na_dict_list.append((collected_dict.na_dict, collected_dict.var_ids)) 
     102 
     103        # If there are variables that were not captured (i.e. unused) by NAContentCollector then loop through these 
     104        # in attempt to convert all to a set of na_dicts 
     105        while len(collector.unused_vars) > 0: 
     106            collector = NAContentCollector(collector.unused_vars, self.global_atts) 
     107            collected_dict = collector.collectNAContent()            
     108            self.output_message += collector.output_message 
     109            # Append to list if more variables were captured 
     110            if collector.var_ids != None:   
     111                na_dict_list.append((collected_dict.na_dict, collected_dict.var_ids)) 
     112 
     113        self.na_dict_list = na_dict_list 
     114        self.converted = True 
     115        return self.na_dict_list 
     116 
     117    def _convertSingletonVars(self, variables): 
    62118        """ 
     119        Loops through variables to convert singleton variables (i.e. Masked Arrays/Numeric Arrays)  
     120        to proper CDMS variables. Then code won't break when asking for rank attribute later. 
     121        Returns a list of CDMS variable objects 
     122        """ 
     123        vars = [] 
    63124 
     125        for variable in variables: 
     126            var_obj = variable 
    64127 
    65  
    66  
    67  
    68  
    69  
    70  
    71  
    72  
    73  
    74  
    75  
    76         """ 
    77         Set up instance variables, and call 
    78     Main conversion function that calls the appropriate classes and functions 
    79     to write a NASA Ames file. 
    80     """ 
    81     if type(na_file_names) == type("string"):  
    82         na_file_names = [na_file_names] 
    83      
    84     # Get which NASA Ames internal variables are allowed to be overwritten in the output files (i.e. by user inputs) 
    85     allowed_overwrite_metadata = ("DATE",  "RDATE", "ANAME", "MNAME", 
    86            "ONAME", "ORG", "SNAME", "VNAME") 
    87     array_args = ["DATE", "RDATE", "ANAME", "VNAME"] 
    88     output_message = [] 
    89     msg = "Reading data from: %s\n" % infilename 
    90     print msg 
    91     output_message.append(msg) 
    92     cdms_file = cdms.open(infilename) 
    93     globals = cdms_file.attributes 
    94      
    95     vars = [] 
    96     if not variables: 
    97         variables = cdms_file.listvariables() 
    98  
    99     for variable in variables: 
    100         var_obj = cdms_file(variable) 
    101  
    102         # Deal with singleton variables 
    103         if not hasattr(var_obj, "rank"): 
    104                 var_metadata = cdms_file[variable].attributes 
    105                 var_value = var_obj 
    106                 var_obj = cdms.createVariable(Numeric.array(var_obj),  
     128            # If singleton variable then convert into proper CDMS variables so code doesn't break later 
     129            if not hasattr(var_obj, "rank"): 
     130                var_metadata = var_obj.attributes        
     131                var_value = var_obj 
     132                        var_obj = cdms.createVariable(Numeric.array(var_obj),  
    107133                    id=nappy.cdms_utils.var_utils.getBestName(var_metadata).replace(" ", "_"),  
    108134                    attributes=var_metadata) 
    109                 var_obj.value = var_obj._data[0]                  
    110         vars.append(var_obj) 
    111          
    112     # Re-order variables if they have the attribute 'nasa_ames_var_number' 
    113     ordered_vars = [None] * 1000 
    114     other_vars = [] 
    115     for var in vars: 
    116         var_metadata = cdms_file[var] 
    117         if hasattr(var_metadata, "nasa_ames_var_number"): 
    118             num = var_metadata.nasa_ames_var_number 
    119             ordered_vars[num] = var 
    120         else: 
    121             other_vars.append(var) 
     135                        var_obj.value = var_obj._data[0]                  
     136                 
     137            vars.append(var_obj) 
     138            return vars 
     139 
     140    def _reorderVars(self, variables): 
     141        """ 
     142        Returns a reordered list of variables. Any that have the attribute  
     143        "nasa_ames_var_number" get ordered first in the list (according to numbering). 
     144        """ 
     145        # Set up a long list (longer than number of vars) 
     146        if len(variables) > var_limit: 
     147            raise Exception("Can only handle converting less than " + `var_limit` + " variables in any batch.") 
     148 
     149        # Collect up those that are ordered and unordered 
     150        ordered_vars = [None] * var_limit 
     151        unordered_vars = [] 
     152        for var in variables: 
     153            var_metadata = var.attributes 
     154            if hasattr(var_metadata, "nasa_ames_var_number"): 
     155                num = var_metadata.nasa_ames_var_number 
     156                ordered_vars[num] = var 
     157            else: 
     158                unordered_vars.append(var) 
    122159     
    123     vars = [] 
    124     for var in ordered_vars: 
    125         if var != None: 
    126             vars.append(var) 
     160        vars = [] 
     161        # Clear any None values in ordered_vars and place in final vars list 
     162        for var in ordered_vars + unordered_vars: 
     163            if var != None: vars.append(var) 
    127164             
    128     vars = vars + other_vars 
    129      
    130     builder = NAContentCollector(vars, globals) 
     165        return vars 
    131166 
    132                 collectNAContent 
    133  
    134  
    135     built_na_dicts = [[builder.na_dict, builder.var_ids]] 
    136  
    137     if builder.var_ids == None: 
    138         msg = "\nNo files created after variables parsed." 
    139         print msg 
    140         output_message.append(msg) 
    141         return output_message 
    142  
    143     while len(builder.unused_vars) > 0: 
    144         builder = NAContentCollector(builder.unused_vars, globals) 
    145  
    146               collectNAContent 
    147  
    148         output_message = output_message + builder.output_message 
    149         if builder.var_ids != None:  built_na_dicts.append([builder.na_dict, builder.var_ids]) 
    150  
    151     # Return only file_names if only want to know them now. 
    152     ncount = 1 
    153     file_names = [] 
    154     if only_return_file_names == "yes":  
    155         for i in built_na_dicts: 
    156             if len(built_na_dicts) == 1: 
    157                 suffix = "" 
    158             else: 
    159                 suffix = "_%s" % ncount 
    160             name_parts = output_file_names[0].split(".")     
    161             new_name = (".".join(name_parts[:-1])) + suffix + "." + name_parts[-1] 
    162             file_names.append(new_name) 
    163         ncount = ncount + 1 
    164              
    165         return file_names 
    166                  
    167     msg = "\n%s files to write" % len(built_na_dicts) 
    168     print msg 
    169     output_message.append(msg) 
    170  
    171     count = 1 
    172     ncount = 1 
    173     for i in built_na_dicts: 
    174         if len(output_file_names) == 1: 
    175             if len(built_na_dicts) == 1: 
    176                 suffix = "" 
    177             else: 
    178                 suffix = "_%s" % ncount 
    179             name_parts = output_file_names[0].split(".")     
    180             new_name = (".".join(name_parts[:-1])) + suffix + "." + name_parts[-1] 
    181         else: 
    182             new_name = output_file_names[count - 1] 
    183   
    184         msg = "\nWriting output NASA Ames file: %s" % new_name 
    185         print msg 
    186         output_message.append(msg) 
    187          
    188         builtNADict = i[0] 
    189         for key in na_vars.keys(): 
    190             if key in allowed_overwrite_metadata: 
    191              
    192                 if key in array_args: 
    193                     newItem = na_vars[key].split()                  
    194                 else: 
    195                     newItem = na_vars[key] 
    196                                      
    197                 if newItem != builtNADict[key]: 
    198                     builtNADict[key] = newItem 
    199                     msg = "Metadata overwritten in output file: '%s' is now '%s'" % (key, builtNADict[key]) 
    200                     print msg 
    201                     output_message.append(msg) 
    202          
    203         fileList = [] 
    204         # Cope with size limits if specified and FFI is 1001 
    205         if size_limit and (builtNADict["FFI"] == 1001 and len(builtNADict["V"][0]) > size_limit): 
    206             varList = builtNADict["V"] 
    207             arrayLength = len(varList[0]) 
    208             nvolInfo = divmod(arrayLength, size_limit) 
    209             nvol = nvolInfo[0] 
    210             if nvolInfo[1] > 0: nvol = nvol + 1 
    211             start = 0 
    212             letterCount = 0 
    213             ivol = 0 
    214             while start < arrayLength: 
    215                 ivol = ivol + 1 
    216                 end = start + size_limit 
    217                 if end > arrayLength: 
    218                     end = arrayLength 
    219                 currentBlock = [] 
    220                 # Write new V array 
    221                 for v in varList: 
    222                     currentBlock.append(v[start:end]) 
    223  
    224                 # Adjust X accordingly 
    225                 NADictCopy = nappy.utils.common_utils.modifyNADictCopy(builtNADict, currentBlock, start, end, ivol, nvol) 
    226                  
    227                 # Write data to output file 
    228                 new_namePlusLetter = "%s-%.3d.na" % (new_name[:-3], ivol) 
    229                 fileList.append(new_namePlusLetter) 
    230                 general.openNAFile(new_namePlusLetter, 'w', NADictCopy, delimiter=delimiter, float_format=float_format) 
    231                 msg = "\nOutput files split on size limit: %s\nFilename used: %s" % (size_limit, new_namePlusLetter) 
    232                 print msg 
    233                 output_message.append(msg) 
    234                 letterCount = letterCount + 1 
    235                 start = end 
    236  
    237  
    238         else:            
    239             general.openNAFile(new_name, 'w', builtNADict, delimiter=delimiter, float_format=float_format) 
    240  
    241         msg = "\nWrote the following variables:" + "\n\t" + ("\n\t".join(i[1][0])) 
    242         print msg 
    243         output_message.append(msg) 
    244          
    245         if len(i[1][1]) > 0: 
    246             msg = "\nWrote the following auxiliary variables:" 
    247             msg = msg + "\n\t" + ("\n\t".join(i[1][1]))  
    248              
    249         if len(i[1][2]) > 0: 
    250             msg = "\nWrote the following Singleton variables:" 
    251             msg = msg + "\n\t" + ("\n\t".join(i[1][2])) 
    252  
    253         if len(fileList) > 0: 
    254             msg = msg + ("\n\nNASA Ames files written successfully: \n%s" % "\n".join(fileList)) 
    255             count = count + len(fileList) 
    256         else: 
    257             msg = msg + "\n\nNASA Ames file written successfully: %s" % new_name 
    258             count = count + 1 
    259         ncount = ncount + 1 
    260  
    261         print msg 
    262         output_message.append(msg) 
    263              
    264     if (count - 1) == 1: 
    265         plural = "" 
    266     else: 
    267         plural = "s"           
    268     msg = "\n%s file%s written." % ((count - 1), plural) 
    269     print msg 
    270     output_message.append(msg) 
    271     return output_message 
    272  
Note: See TracChangeset for help on using the changeset viewer.