Mercurial > repos > ecology > timeseries_extraction
comparison xarray_netcdf2netcdf.py @ 0:dd19259f7da5 draft default tip
planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/data_manipulation/xarray/ commit fd8ad4d97db7b1fd3876ff63e14280474e06fdf7
| author | ecology |
|---|---|
| date | Sun, 31 Jul 2022 21:18:44 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:dd19259f7da5 |
|---|---|
| 1 #!/usr/bin/env python3 | |
| 2 # | |
| 3 # Apply operations on selected variables | |
| 4 # - scale | |
| 5 # one can also select the range of time (for timeseries) | |
| 6 # to apply these operations over the range only | |
| 7 # when a range of time is selected and when scaling, one | |
| 8 # can choose to save the entire timeseries or | |
| 9 # the selected range only. | |
| 10 # when scaling, one can add additional filters on dimensions | |
| 11 # (typically used to filter over latitudes and longitudes) | |
| 12 | |
| 13 | |
| 14 import argparse | |
| 15 import re | |
| 16 import warnings | |
| 17 from pathlib import Path | |
| 18 | |
| 19 import xarray as xr # noqa: E402 | |
| 20 | |
| 21 | |
| 22 class netCDF2netCDF (): | |
| 23 def __init__(self, infile, varname, scale="", | |
| 24 output="output.netcdf", | |
| 25 write_all=False, | |
| 26 keep_attributes=True, | |
| 27 filter_list="", | |
| 28 where_config="", | |
| 29 other="", | |
| 30 sel=False, | |
| 31 drop=False, | |
| 32 verbose=False): | |
| 33 self.drop = drop | |
| 34 if Path(where_config).exists(): | |
| 35 f = open(where_config) | |
| 36 self.where = f.read().replace("\n", "") | |
| 37 else: | |
| 38 self.where = "" | |
| 39 self.other = other | |
| 40 self.sel = sel | |
| 41 li = list(infile.split(",")) | |
| 42 if len(li) > 1: | |
| 43 self.infile = li | |
| 44 else: | |
| 45 self.infile = infile | |
| 46 self.verbose = verbose | |
| 47 if varname == 'None' or varname is None: | |
| 48 self.varname = varname | |
| 49 else: | |
| 50 li = list(varname.split(",")) | |
| 51 self.varname = li | |
| 52 self.write_all = write_all | |
| 53 self.keep_attributes = keep_attributes | |
| 54 if self.keep_attributes: | |
| 55 xr.set_options(keep_attrs=True) | |
| 56 self.filter = filter_list | |
| 57 self.selection = {} | |
| 58 self.method = {} | |
| 59 if scale == "" or scale is None: | |
| 60 self.scale = 1 | |
| 61 else: | |
| 62 self.scale = float(scale) | |
| 63 if output is None: | |
| 64 self.output = "output.netcdf" | |
| 65 else: | |
| 66 self.output = output | |
| 67 # initialization | |
| 68 self.dset = None | |
| 69 self.subset = None | |
| 70 if self.verbose: | |
| 71 print("infile: ", self.infile) | |
| 72 print("varname: ", self.varname) | |
| 73 print("filter_list: ", self.filter) | |
| 74 print("scale: ", self.scale) | |
| 75 print("write_all: ", self.write_all) | |
| 76 print("keep_attributes: ", self.keep_attributes) | |
| 77 print("sel: ", self.sel) | |
| 78 print("output: ", self.output) | |
| 79 | |
| 80 def apply_selection(self): | |
| 81 self.dset = self.ds | |
| 82 for key in self.selection: | |
| 83 if 'slice' in str(self.selection[key]): | |
| 84 self.dset = self.dset.sel( | |
| 85 {key: self.selection[key]} | |
| 86 ) | |
| 87 else: | |
| 88 self.dset = self.dset.sel( | |
| 89 {key: self.selection[key]}, | |
| 90 method=self.method[key] | |
| 91 ) | |
| 92 | |
| 93 def dimension_selection(self, single_filter): | |
| 94 split_filter = single_filter.split('#') | |
| 95 dimension_varname = split_filter[0] | |
| 96 op = split_filter[1] | |
| 97 if self.sel: | |
| 98 ll = float(split_filter[2]) | |
| 99 else: | |
| 100 ll = int(split_filter[2]) | |
| 101 if (op == 'sl'): | |
| 102 if self.sel: | |
| 103 rl = float(split_filter[3]) | |
| 104 else: | |
| 105 rl = int(split_filter[3]) | |
| 106 self.selection[dimension_varname] = slice(ll, rl) | |
| 107 elif (op == 'to'): | |
| 108 self.selection[dimension_varname] = slice(None, ll) | |
| 109 elif (op == 'from'): | |
| 110 self.selection[dimension_varname] = slice(ll, None) | |
| 111 elif (op == 'is'): | |
| 112 self.selection[dimension_varname] = ll | |
| 113 if self.sel: | |
| 114 rl = split_filter[3] | |
| 115 if 'None' in rl: | |
| 116 self.method[dimension_varname] = None | |
| 117 else: | |
| 118 self.method[dimension_varname] = rl | |
| 119 | |
| 120 def filter_selection(self): | |
| 121 for single_filter in self.filter: | |
| 122 self.dimension_selection(single_filter) | |
| 123 | |
| 124 if self.sel: | |
| 125 self.apply_selection() | |
| 126 else: | |
| 127 self.dset = \ | |
| 128 self.ds.isel(self.selection) | |
| 129 | |
| 130 if self.varname != 'None' and self.varname is not None: | |
| 131 for var in self.varname: | |
| 132 self.dset[var] = \ | |
| 133 self.dset[var]*self.scale | |
| 134 | |
| 135 def compute(self): | |
| 136 if self.dset is None: | |
| 137 if type(self.infile) is list: | |
| 138 self.ds = xr.open_mfdataset(self.infile) | |
| 139 else: | |
| 140 self.ds = xr.open_dataset(self.infile) | |
| 141 if self.where != "": | |
| 142 if self.drop: | |
| 143 if self.verbose: | |
| 144 print("Where with drop=True") | |
| 145 self.ds = self.ds.where( | |
| 146 self.eval_where(self.where), | |
| 147 drop=True | |
| 148 ) | |
| 149 elif self.other is not None and self.other != "": | |
| 150 if self.verbose: | |
| 151 print("Where with other=", float(self.other)) | |
| 152 self.ds = self.ds.where( | |
| 153 self.eval_where(self.where), | |
| 154 other=float(self.other) | |
| 155 ) | |
| 156 else: | |
| 157 self.ds = self.ds.where( | |
| 158 self.eval_where(self.where) | |
| 159 ) | |
| 160 self.filter_selection() | |
| 161 if self.verbose: | |
| 162 print(self.selection) | |
| 163 | |
| 164 def save(self): | |
| 165 if self.varname != 'None' and \ | |
| 166 self.varname is not None and \ | |
| 167 not self.write_all: | |
| 168 self.dset[self.varname].to_netcdf(self.output) | |
| 169 else: | |
| 170 self.dset.to_netcdf(self.output) | |
| 171 | |
| 172 def is_float(self, element) -> bool: | |
| 173 try: | |
| 174 float(element) | |
| 175 return True | |
| 176 except ValueError: | |
| 177 return False | |
| 178 | |
| 179 def eval_where(self, where_condition): | |
| 180 eval_cond = None | |
| 181 list_names = list(set( | |
| 182 list(self.ds.keys()) + | |
| 183 list(self.ds.coords.keys())) | |
| 184 ) | |
| 185 wcond = where_condition | |
| 186 check_cond = where_condition | |
| 187 for var in list_names: | |
| 188 wcond = wcond.replace(var, ' self.ds.' + var + ' ') | |
| 189 check_cond = check_cond.replace(var, '') | |
| 190 to_remove = "[><=&|()]" | |
| 191 check_cond = re.sub(to_remove, "", check_cond).replace("!", "") | |
| 192 check_cond = re.sub(' +', ' ', check_cond).strip() | |
| 193 list_flt = check_cond.split(" ") | |
| 194 no_convert = False | |
| 195 for num in list_flt: | |
| 196 if not self.is_float(num): | |
| 197 no_convert = True | |
| 198 if not no_convert: | |
| 199 eval_cond = eval(wcond) | |
| 200 return eval_cond | |
| 201 | |
| 202 | |
| 203 if __name__ == '__main__': | |
| 204 warnings.filterwarnings("ignore") | |
| 205 parser = argparse.ArgumentParser() | |
| 206 parser.add_argument( | |
| 207 'input', | |
| 208 help='input filename in netCDF format' | |
| 209 ) | |
| 210 parser.add_argument( | |
| 211 'varname', | |
| 212 help='Specify which variable to plot (case sensitive)' | |
| 213 ) | |
| 214 parser.add_argument( | |
| 215 '--filter', | |
| 216 nargs="*", | |
| 217 help='Filter list variable#operator#value_s#value_e' | |
| 218 ) | |
| 219 parser.add_argument( | |
| 220 '--where', | |
| 221 help='filename with where condition to be evaluated' | |
| 222 ) | |
| 223 parser.add_argument( | |
| 224 '--output', | |
| 225 help='Output filename to store the resulting netCDF file' | |
| 226 ) | |
| 227 parser.add_argument( | |
| 228 '--scale', | |
| 229 help='scale factor to apply to selection (float)' | |
| 230 ) | |
| 231 parser.add_argument( | |
| 232 '--other', | |
| 233 help='Value to use for locations where condition is False (float)' | |
| 234 ) | |
| 235 parser.add_argument( | |
| 236 "--write_all", | |
| 237 help="write all data to netCDF", | |
| 238 action="store_true") | |
| 239 parser.add_argument( | |
| 240 "--keep_attributes", | |
| 241 help="Keep all attributes", | |
| 242 action="store_true") | |
| 243 parser.add_argument( | |
| 244 "-v", "--verbose", | |
| 245 help="switch on verbose mode", | |
| 246 action="store_true") | |
| 247 parser.add_argument( | |
| 248 "--selection", | |
| 249 help="select by values", | |
| 250 action="store_true") | |
| 251 parser.add_argument( | |
| 252 "--drop", | |
| 253 help="drop values where condition is not met", | |
| 254 action="store_true") | |
| 255 args = parser.parse_args() | |
| 256 | |
| 257 print("args.selection", args.selection) | |
| 258 dset = netCDF2netCDF(infile=args.input, varname=args.varname, | |
| 259 scale=args.scale, output=args.output, | |
| 260 write_all=args.write_all, | |
| 261 sel=args.selection, | |
| 262 keep_attributes=args.keep_attributes, | |
| 263 filter_list=args.filter, | |
| 264 where_config=args.where, | |
| 265 drop=args.drop, other=args.other, | |
| 266 verbose=args.verbose) | |
| 267 dset.compute() | |
| 268 dset.save() |
