Mercurial > repos > ecology > xarray_mapplot
comparison xarray_select.py @ 2:adc0367f7cb1 draft default tip
planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/data_manipulation/xarray/ commit fd8ad4d97db7b1fd3876ff63e14280474e06fdf7
| author | ecology |
|---|---|
| date | Sun, 31 Jul 2022 21:17:52 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:65621a6c85e9 | 2:adc0367f7cb1 |
|---|---|
| 1 # xarray tool for: | |
| 2 # - getting metadata information | |
| 3 # - select data and save results in csv file for further post-processing | |
| 4 | |
| 5 import argparse | |
| 6 import os | |
| 7 import warnings | |
| 8 | |
| 9 import geopandas as gdp | |
| 10 | |
| 11 import pandas as pd | |
| 12 | |
| 13 from shapely.geometry import Point | |
| 14 from shapely.ops import nearest_points | |
| 15 | |
| 16 import xarray as xr | |
| 17 | |
| 18 | |
| 19 class XarraySelect (): | |
| 20 def __init__(self, infile, select="", outfile="", outputdir="", | |
| 21 latname="", latvalN="", latvalS="", lonname="", | |
| 22 lonvalE="", lonvalW="", filter_list="", coords="", | |
| 23 time="", verbose=False, no_missing=False, | |
| 24 tolerance=None): | |
| 25 self.infile = infile | |
| 26 self.select = select | |
| 27 self.outfile = outfile | |
| 28 self.outputdir = outputdir | |
| 29 self.latname = latname | |
| 30 if tolerance != "" and tolerance is not None: | |
| 31 self.tolerance = float(tolerance) | |
| 32 else: | |
| 33 self.tolerance = -1 | |
| 34 if latvalN != "" and latvalN is not None: | |
| 35 self.latvalN = float(latvalN) | |
| 36 else: | |
| 37 self.latvalN = "" | |
| 38 if latvalS != "" and latvalS is not None: | |
| 39 self.latvalS = float(latvalS) | |
| 40 else: | |
| 41 self.latvalS = "" | |
| 42 self.lonname = lonname | |
| 43 if lonvalE != "" and lonvalE is not None: | |
| 44 self.lonvalE = float(lonvalE) | |
| 45 else: | |
| 46 self.lonvalE = "" | |
| 47 if lonvalW != "" and lonvalW is not None: | |
| 48 self.lonvalW = float(lonvalW) | |
| 49 else: | |
| 50 self.lonvalW = "" | |
| 51 self.filter = filter_list | |
| 52 self.time = time | |
| 53 self.coords = coords | |
| 54 self.verbose = verbose | |
| 55 self.no_missing = no_missing | |
| 56 # initialization | |
| 57 self.dset = None | |
| 58 self.gset = None | |
| 59 if self.verbose: | |
| 60 print("infile: ", self.infile) | |
| 61 print("outfile: ", self.outfile) | |
| 62 print("select: ", self.select) | |
| 63 print("outfile: ", self.outfile) | |
| 64 print("outputdir: ", self.outputdir) | |
| 65 print("latname: ", self.latname) | |
| 66 print("latvalN: ", self.latvalN) | |
| 67 print("latvalS: ", self.latvalS) | |
| 68 print("lonname: ", self.lonname) | |
| 69 print("lonvalE: ", self.lonvalE) | |
| 70 print("lonvalW: ", self.lonvalW) | |
| 71 print("filter: ", self.filter) | |
| 72 print("time: ", self.time) | |
| 73 print("coords: ", self.coords) | |
| 74 | |
| 75 def rowfilter(self, single_filter): | |
| 76 split_filter = single_filter.split('#') | |
| 77 filter_varname = split_filter[0] | |
| 78 op = split_filter[1] | |
| 79 ll = float(split_filter[2]) | |
| 80 if (op == 'bi'): | |
| 81 rl = float(split_filter[3]) | |
| 82 if filter_varname == self.select: | |
| 83 # filter on values of the selected variable | |
| 84 if op == 'bi': | |
| 85 self.dset = self.dset.where( | |
| 86 (self.dset <= rl) & (self.dset >= ll) | |
| 87 ) | |
| 88 elif op == 'le': | |
| 89 self.dset = self.dset.where(self.dset <= ll) | |
| 90 elif op == 'ge': | |
| 91 self.dset = self.dset.where(self.dset >= ll) | |
| 92 elif op == 'e': | |
| 93 self.dset = self.dset.where(self.dset == ll) | |
| 94 else: # filter on other dimensions of the selected variable | |
| 95 if op == 'bi': | |
| 96 self.dset = self.dset.sel({filter_varname: slice(ll, rl)}) | |
| 97 elif op == 'le': | |
| 98 self.dset = self.dset.sel({filter_varname: slice(None, ll)}) | |
| 99 elif op == 'ge': | |
| 100 self.dset = self.dset.sel({filter_varname: slice(ll, None)}) | |
| 101 elif op == 'e': | |
| 102 self.dset = self.dset.sel({filter_varname: ll}, | |
| 103 method='nearest') | |
| 104 | |
| 105 def selection(self): | |
| 106 if self.dset is None: | |
| 107 self.ds = xr.open_dataset(self.infile) | |
| 108 self.dset = self.ds[self.select] # select variable | |
| 109 if self.time: | |
| 110 self.datetime_selection() | |
| 111 if self.filter: | |
| 112 self.filter_selection() | |
| 113 | |
| 114 self.area_selection() | |
| 115 if self.gset.count() > 1: | |
| 116 # convert to dataframe if several rows and cols | |
| 117 self.gset = self.gset.to_dataframe().dropna(how='all'). \ | |
| 118 reset_index() | |
| 119 self.gset.to_csv(self.outfile, header=True, sep='\t') | |
| 120 else: | |
| 121 data = { | |
| 122 self.latname: [self.gset[self.latname].values], | |
| 123 self.lonname: [self.gset[self.lonname].values], | |
| 124 self.select: [self.gset.values] | |
| 125 } | |
| 126 | |
| 127 df = pd.DataFrame(data, columns=[self.latname, self.lonname, | |
| 128 self.select]) | |
| 129 df.to_csv(self.outfile, header=True, sep='\t') | |
| 130 | |
| 131 def datetime_selection(self): | |
| 132 split_filter = self.time.split('#') | |
| 133 time_varname = split_filter[0] | |
| 134 op = split_filter[1] | |
| 135 ll = split_filter[2] | |
| 136 if (op == 'sl'): | |
| 137 rl = split_filter[3] | |
| 138 self.dset = self.dset.sel({time_varname: slice(ll, rl)}) | |
| 139 elif (op == 'to'): | |
| 140 self.dset = self.dset.sel({time_varname: slice(None, ll)}) | |
| 141 elif (op == 'from'): | |
| 142 self.dset = self.dset.sel({time_varname: slice(ll, None)}) | |
| 143 elif (op == 'is'): | |
| 144 self.dset = self.dset.sel({time_varname: ll}, method='nearest') | |
| 145 | |
| 146 def filter_selection(self): | |
| 147 for single_filter in self.filter: | |
| 148 self.rowfilter(single_filter) | |
| 149 | |
| 150 def area_selection(self): | |
| 151 | |
| 152 if self.latvalS != "" and self.lonvalW != "": | |
| 153 # Select geographical area | |
| 154 self.gset = self.dset.sel({self.latname: | |
| 155 slice(self.latvalS, self.latvalN), | |
| 156 self.lonname: | |
| 157 slice(self.lonvalW, self.lonvalE)}) | |
| 158 elif self.latvalN != "" and self.lonvalE != "": | |
| 159 # select nearest location | |
| 160 if self.no_missing: | |
| 161 self.nearest_latvalN = self.latvalN | |
| 162 self.nearest_lonvalE = self.lonvalE | |
| 163 else: | |
| 164 # find nearest location without NaN values | |
| 165 self.nearest_location() | |
| 166 if self.tolerance > 0: | |
| 167 self.gset = self.dset.sel({self.latname: self.nearest_latvalN, | |
| 168 self.lonname: self.nearest_lonvalE}, | |
| 169 method='nearest', | |
| 170 tolerance=self.tolerance) | |
| 171 else: | |
| 172 self.gset = self.dset.sel({self.latname: self.nearest_latvalN, | |
| 173 self.lonname: self.nearest_lonvalE}, | |
| 174 method='nearest') | |
| 175 else: | |
| 176 self.gset = self.dset | |
| 177 | |
| 178 def nearest_location(self): | |
| 179 # Build a geopandas dataframe with all first elements in each dimension | |
| 180 # so we assume null values correspond to a mask that is the same for | |
| 181 # all dimensions in the dataset. | |
| 182 dsel_frame = self.dset | |
| 183 for dim in self.dset.dims: | |
| 184 if dim != self.latname and dim != self.lonname: | |
| 185 dsel_frame = dsel_frame.isel({dim: 0}) | |
| 186 # transform to pandas dataframe | |
| 187 dff = dsel_frame.to_dataframe().dropna().reset_index() | |
| 188 # transform to geopandas to collocate | |
| 189 gdf = gdp.GeoDataFrame(dff, | |
| 190 geometry=gdp.points_from_xy(dff[self.lonname], | |
| 191 dff[self.latname])) | |
| 192 # Find nearest location where values are not null | |
| 193 point = Point(self.lonvalE, self.latvalN) | |
| 194 multipoint = gdf.geometry.unary_union | |
| 195 queried_geom, nearest_geom = nearest_points(point, multipoint) | |
| 196 self.nearest_latvalN = nearest_geom.y | |
| 197 self.nearest_lonvalE = nearest_geom.x | |
| 198 | |
| 199 def selection_from_coords(self): | |
| 200 fcoords = pd.read_csv(self.coords, sep='\t') | |
| 201 for row in fcoords.itertuples(): | |
| 202 self.latvalN = row[0] | |
| 203 self.lonvalE = row[1] | |
| 204 self.outfile = (os.path.join(self.outputdir, | |
| 205 self.select + '_' + | |
| 206 str(row.Index) + '.tabular')) | |
| 207 self.selection() | |
| 208 | |
| 209 | |
| 210 if __name__ == '__main__': | |
| 211 warnings.filterwarnings("ignore") | |
| 212 parser = argparse.ArgumentParser() | |
| 213 | |
| 214 parser.add_argument( | |
| 215 'infile', | |
| 216 help='netCDF input filename' | |
| 217 ) | |
| 218 parser.add_argument( | |
| 219 '--select', | |
| 220 help='Variable name to select' | |
| 221 ) | |
| 222 parser.add_argument( | |
| 223 '--latname', | |
| 224 help='Latitude name' | |
| 225 ) | |
| 226 parser.add_argument( | |
| 227 '--latvalN', | |
| 228 help='North latitude value' | |
| 229 ) | |
| 230 parser.add_argument( | |
| 231 '--latvalS', | |
| 232 help='South latitude value' | |
| 233 ) | |
| 234 parser.add_argument( | |
| 235 '--lonname', | |
| 236 help='Longitude name' | |
| 237 ) | |
| 238 parser.add_argument( | |
| 239 '--lonvalE', | |
| 240 help='East longitude value' | |
| 241 ) | |
| 242 parser.add_argument( | |
| 243 '--lonvalW', | |
| 244 help='West longitude value' | |
| 245 ) | |
| 246 parser.add_argument( | |
| 247 '--tolerance', | |
| 248 help='Maximum distance between original and selected value for ' | |
| 249 ' inexact matches e.g. abs(index[indexer] - target) <= tolerance' | |
| 250 ) | |
| 251 parser.add_argument( | |
| 252 '--coords', | |
| 253 help='Input file containing Latitude and Longitude' | |
| 254 'for geographical selection' | |
| 255 ) | |
| 256 parser.add_argument( | |
| 257 '--filter', | |
| 258 nargs="*", | |
| 259 help='Filter list variable#operator#value_s#value_e' | |
| 260 ) | |
| 261 parser.add_argument( | |
| 262 '--time', | |
| 263 help='select timeseries variable#operator#value_s[#value_e]' | |
| 264 ) | |
| 265 parser.add_argument( | |
| 266 '--outfile', | |
| 267 help='csv outfile for storing results of the selection' | |
| 268 '(valid only when --select)' | |
| 269 ) | |
| 270 parser.add_argument( | |
| 271 '--outputdir', | |
| 272 help='folder name for storing results with multiple selections' | |
| 273 '(valid only when --select)' | |
| 274 ) | |
| 275 parser.add_argument( | |
| 276 "-v", "--verbose", | |
| 277 help="switch on verbose mode", | |
| 278 action="store_true" | |
| 279 ) | |
| 280 parser.add_argument( | |
| 281 "--no_missing", | |
| 282 help="""Do not take into account possible null/missing values | |
| 283 (only valid for single location)""", | |
| 284 action="store_true" | |
| 285 ) | |
| 286 args = parser.parse_args() | |
| 287 | |
| 288 p = XarraySelect(args.infile, args.select, args.outfile, args.outputdir, | |
| 289 args.latname, args.latvalN, args.latvalS, args.lonname, | |
| 290 args.lonvalE, args.lonvalW, args.filter, | |
| 291 args.coords, args.time, args.verbose, | |
| 292 args.no_missing, args.tolerance) | |
| 293 if args.select: | |
| 294 p.selection() |
