Mercurial > repos > ecology > xarray_metadata_info
comparison xarray_info.py @ 5:6cdf25648dc6 draft default tip
planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/data_manipulation/xarray/ commit fd8ad4d97db7b1fd3876ff63e14280474e06fdf7
| author | ecology |
|---|---|
| date | Sun, 31 Jul 2022 21:17:01 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 4:9e37554c1b7a | 5:6cdf25648dc6 |
|---|---|
| 1 # xarray tool for: | |
| 2 # - getting metadata information | |
| 3 # - select data and save results in csv file for further post-processing | |
| 4 | |
| 5 import argparse | |
| 6 import csv | |
| 7 import os | |
| 8 import warnings | |
| 9 | |
| 10 import xarray as xr | |
| 11 | |
| 12 | |
| 13 class XarrayInfo (): | |
| 14 def __init__(self, infile, outfile_info="", outfile_summary="", | |
| 15 verbose=False, coords_info=None): | |
| 16 self.infile = infile | |
| 17 self.outfile_info = outfile_info | |
| 18 self.outfile_summary = outfile_summary | |
| 19 self.coords_info = coords_info | |
| 20 self.verbose = verbose | |
| 21 # initialization | |
| 22 self.dset = None | |
| 23 self.gset = None | |
| 24 if self.verbose: | |
| 25 print("infile: ", self.infile) | |
| 26 print("outfile_info: ", self.outfile_info) | |
| 27 print("outfile_summary: ", self.outfile_summary) | |
| 28 print("coords_info: ", self.coords_info) | |
| 29 | |
| 30 def info(self): | |
| 31 f = open(self.outfile_info, 'w') | |
| 32 ds = xr.open_dataset(self.infile) | |
| 33 ds.info(f) | |
| 34 f.close() | |
| 35 | |
| 36 def summary(self): | |
| 37 f = open(self.outfile_summary, 'w') | |
| 38 ds = xr.open_dataset(self.infile) | |
| 39 writer = csv.writer(f, delimiter='\t') | |
| 40 header = ['VariableName', 'NumberOfDimensions'] | |
| 41 for idx, val in enumerate(ds.dims.items()): | |
| 42 header.append('Dim' + str(idx) + 'Name') | |
| 43 header.append('Dim' + str(idx) + 'Size') | |
| 44 writer.writerow(header) | |
| 45 for name, da in ds.data_vars.items(): | |
| 46 line = [name] | |
| 47 line.append(len(ds[name].shape)) | |
| 48 for d, s in zip(da.shape, da.sizes): | |
| 49 line.append(s) | |
| 50 line.append(d) | |
| 51 writer.writerow(line) | |
| 52 for name, da in ds.coords.items(): | |
| 53 line = [name] | |
| 54 line.append(len(ds[name].shape)) | |
| 55 for d, s in zip(da.shape, da.sizes): | |
| 56 line.append(s) | |
| 57 line.append(d) | |
| 58 writer.writerow(line) | |
| 59 f.close() | |
| 60 | |
| 61 def get_coords_info(self): | |
| 62 ds = xr.open_dataset(self.infile) | |
| 63 for c in ds.coords: | |
| 64 filename = os.path.join(self.coords_info, | |
| 65 c.strip() + | |
| 66 '.tabular') | |
| 67 pd = ds.coords[c].to_pandas() | |
| 68 pd.index = range(len(pd)) | |
| 69 pd.to_csv(filename, header=False, sep='\t') | |
| 70 | |
| 71 | |
| 72 if __name__ == '__main__': | |
| 73 warnings.filterwarnings("ignore") | |
| 74 parser = argparse.ArgumentParser() | |
| 75 | |
| 76 parser.add_argument( | |
| 77 'infile', | |
| 78 help='netCDF input filename' | |
| 79 ) | |
| 80 parser.add_argument( | |
| 81 '--info', | |
| 82 help='Output filename where metadata information is stored' | |
| 83 ) | |
| 84 parser.add_argument( | |
| 85 '--summary', | |
| 86 help='Output filename where data summary information is stored' | |
| 87 ) | |
| 88 parser.add_argument( | |
| 89 '--coords_info', | |
| 90 help='output-folder where for each coordinate, coordinate values ' | |
| 91 ' are being printed in the corresponding outputfile' | |
| 92 ) | |
| 93 parser.add_argument( | |
| 94 "-v", "--verbose", | |
| 95 help="switch on verbose mode", | |
| 96 action="store_true" | |
| 97 ) | |
| 98 args = parser.parse_args() | |
| 99 | |
| 100 p = XarrayInfo(args.infile, args.info, args.summary, | |
| 101 args.verbose, args.coords_info) | |
| 102 if args.info: | |
| 103 p.info() | |
| 104 elif args.coords_info: | |
| 105 p.get_coords_info() | |
| 106 if args.summary: | |
| 107 p.summary() |
