Mercurial > repos > kls286 > chap_test_20230328
changeset 0:cbbe42422d56 draft
planemo upload for repository https://github.com/CHESSComputing/ChessAnalysisPipeline/tree/galaxy commit 1401a7e1ae007a6bda260d147f9b879e789b73e0-dirty
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/CHAP/__main__.py Tue Mar 28 15:07:30 2023 +0000 @@ -0,0 +1,4 @@ +from CHAP.runner import main + +if __name__ == '__main__': + main()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/CHAP/async.py Tue Mar 28 15:07:30 2023 +0000 @@ -0,0 +1,56 @@ +#!/usr/bin/env python +#-*- coding: utf-8 -*- +#pylint: disable= +""" +File : async.py +Author : Valentin Kuznetsov <vkuznet AT gmail dot com> +Description: AsyncProcessor module +""" + +# system modules +import asyncio + +# local modules +from CHAP.processor import Processor, PrintProcessor + + +async def task(mgr, doc): + """ + Process given data using provided task manager + """ + return mgr.process(doc) + + +async def executeTasks(mgr, docs): + """ + Process given set of documents using provided task manager + """ + coRoutines = [task(mgr, d) for d in docs] + await asyncio.gather(*coRoutines) + + +class AsyncProcessor(Processor): + """ + AsyncProcesor process given data via asyncio module + """ + def __init__(self, mgr): + super().__init__() + self.mgr = mgr + + def _process(self, docs): + """ + Internal method to process given data documents + """ + asyncio.run(executeTasks(self.mgr, docs)) + +def example(): + """ + Helper function to demonstrate usage of AsyncProcessor + """ + docs = [1,2,3] + mgr = PrintProcessor() + processor = AsyncProcessor(mgr) + processor.process(docs) + +if __name__ == '__main__': + example()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/CHAP/models/basemodel.py Tue Mar 28 15:07:30 2023 +0000 @@ -0,0 +1,84 @@ +#!/usr/bin/env python +#-*- coding: utf-8 -*- +#pylint: disable= +""" +File : basemodel.py +Author : Valentin Kuznetsov <vkuznet AT gmail dot com> +Description: BaseModel module +""" + +# system modules +import logging + + +class BaseModel(): + """ + BaseModel docstring + """ + def __init__(self, filename=None, **kwds): + self.logger = logging.getLogger(__name__) + self.construct(filename, **kwds) + self.map = dict(name=__name__) + + def construct(self, filename=None, **kwds): + """ + construct from CLI object + + :param filename: input file name + :param **kwds: named arguments + :return: Basemodel object + """ + print('construct API calls: ', end='') + if filename and filename.endswith('yaml'): + self.construct_from_yaml(filename) + elif filename and filename != '': + self.construct_from_file(filename) + else: + self.construct_from_config(**kwds) + + @classmethod + def construct_from_config(cls, **config): + """ + construct from config object + + :param **config: named arguments + :return: Basemodel object + """ + print(f'construct_from_config: {config}') + + @classmethod + def construct_from_yaml(cls, filename): + """ + construct from CLI object + + :param filename: input file name + :return: Basemodel object + """ + print(f'construct_from_yaml: {filename}') + + @classmethod + def construct_from_file(cls, filename): + """ + construct from filename + + :param filename: input file name + :return: Basemodel object + """ + print(f'construct_from_file: {filename}') + + def getMap(self): + """ + return model map + + :return: map object + """ + return self.map + + +if __name__ == '__main__': + print('### should construct from file.yaml') + base = BaseModel('file.yaml') + print('### should construct from file.txt') + base = BaseModel('file.txt') + print('### should construct from config') + base = BaseModel(param='file.txt', arg='bla')
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/CHAP/models/edd.py Tue Mar 28 15:07:30 2023 +0000 @@ -0,0 +1,218 @@ +from msnctools.general import create_mask +from msnctools.material import Material +from msnctools.scanparsers import SMBMCAScanParser as ScanParser +import numpy as np +from pathlib import PosixPath +from pydantic import (BaseModel, + confloat, + conint, + conlist, + constr, + FilePath, + validator) +from scipy.interpolate import interp1d +from typing import Optional + + +class MCACeriaCalibrationConfig(BaseModel): + '''Class representing metadata required to perform a Ceria calibration for an + MCA detector. + + :ivar spec_file: Path to the SPEC file containing the CeO2 scan + :ivar scan_number: Number of the CeO2 scan in `spec_file` + :ivar scan_step_index: Index of the scan step to use for calibration, + optional. If not specified, the calibration routine will be performed on + the average of all MCA spectra for the scan. + + :ivar flux_file: csv file containing station beam energy in eV (column 0) + and flux (column 1) + + :ivar detector_name: name of the MCA to calibrate + :ivar num_bins: number of channels on the MCA to calibrate + :ivar max_energy_kev: maximum channel energy of the MCA in keV + + :ivar hexrd_h5_material_file: path to a HEXRD materials.h5 file containing an + entry for the material properties. + :ivar hexrd_h5_material_name: Name of the material entry in + `hexrd_h5_material_file`, defaults to `'CeO2'`. + :ivar lattice_parameter_angstrom: lattice spacing in angstrom to use for + the cubic CeO2 crystal, defaults to `5.41153`. + + :ivar tth_max: detector rotation about hutch x axis, defaults to `90`. + :ivar hkl_tth_tol: minimum resolvable difference in 2&theta between two + unique HKL peaks, defaults to `0.15`. + + :ivar fit_include_bin_ranges: list of MCA channel index ranges whose data + will be included in the calibration routine + :ivar fit_hkls: list of unique HKL indices to fit peaks for in the + calibration routine + + :ivar tth_initial_guess: initial guess for 2&theta + :ivar slope_initial_guess: initial guess for detector channel energy + correction linear slope, defaults to `1.0`. + :ivar intercept_initial_guess: initial guess for detector channel energy + correction y-intercept, defaults to `0.0`. + + :ivar tth_calibrated: calibrated value for 2&theta, defaults to None + :ivar slope_calibrated: calibrated value for detector channel energy + correction linear slope, defaults to `None` + :ivar intercept_calibrated: calibrated value for detector channel energy + correction y-intercept, defaluts to None + + :ivar max_iter: maximum number of iterations of the calibration routine, + defaults to `10`. + :ivar tune_tth_tol: stop iteratively tuning 2&theta when an iteration + produces a change in the tuned value of 2&theta that is smaller than this + value, defaults to `1e-8`. + ''' + + spec_file: FilePath + scan_number: conint(gt=0) + scan_step_index: Optional[conint(ge=0)] + + flux_file: FilePath + + detector_name: constr(strip_whitespace=True, min_length=1) + num_bins: conint(gt=0) + max_energy_kev: confloat(gt=0) + + hexrd_h5_material_file: FilePath + hexrd_h5_material_name: constr(strip_whitespace=True, min_length=1) = 'CeO2' + lattice_parameter_angstrom: confloat(gt=0) = 5.41153 + + tth_max: confloat(gt=0, allow_inf_nan=False) = 90.0 + hkl_tth_tol: confloat(gt=0, allow_inf_nan=False) = 0.15 + + fit_include_bin_ranges: conlist(min_items=1, + item_type=conlist(item_type=conint(ge=0), + min_items=2, + max_items=2)) + fit_hkls: conlist(item_type=conint(ge=0), min_items=1) + + tth_initial_guess: confloat(gt=0, le=tth_max, allow_inf_nan=False) + slope_initial_guess: float = 1.0 + intercept_initial_guess: float = 0.0 + tth_calibrated: Optional[confloat(gt=0, allow_inf_nan=False)] + slope_calibrated: Optional[confloat(allow_inf_nan=False)] + intercept_calibrated: Optional[confloat(allow_inf_nan=False)] + + max_iter: conint(gt=0) = 10 + tune_tth_tol: confloat(ge=0) = 1e-8 + + @validator('fit_include_bin_ranges', each_item=True) + def validate_include_bin_range(cls, value, values): + '''Ensure no bin ranges are outside the boundary of the detector''' + + num_bins = values.get('num_bins') + value[1] = min(value[1], num_bins) + return(value) + + def mca_data(self): + '''Get the 1D array of MCA data to use for calibration. + + :return: MCA data + :rtype: np.ndarray + ''' + + scanparser = ScanParser(self.spec_file, self.scan_number) + if self.scan_step_index is None: + data = scanparser.get_all_detector_data(self.detector_name) + if scanparser.spec_scan_npts > 1: + data = np.average(data, axis=1) + else: + data = data[0] + else: + data = scanparser.get_detector_data(self.detector_name, self.scan_step_index) + + return(np.array(data)) + + def mca_mask(self): + '''Get a boolean mask array to use on MCA data before fitting. + + :return: boolean mask array + :rtype: numpy.ndarray + ''' + + mask = None + bin_indices = np.arange(self.num_bins) + for bin_range in self.fit_include_bin_ranges: + mask = create_mask(bin_indices, + bounds=bin_range, + exclude_bounds=False, + current_mask=mask) + + return(mask) + + def flux_correction_interpolation_function(self): + '''Get an interpolation function to correct MCA data for relative energy + flux of the incident beam. + + :return: energy flux correction interpolation function + :rtype: scipy.interpolate._polyint._Interpolator1D + ''' + + flux = np.loadtxt(self.flux_file) + energies = flux[:,0]/1.e3 + relative_intensities = flux[:,1]/np.max(flux[:,1]) + interpolation_function = interp1d(energies, relative_intensities) + return(interpolation_function) + + def material(self): + '''Get CeO2 as a `msnctools.materials.Material` object. + + :return: CeO2 material + :rtype: msnctools.material.Material + ''' + + material = Material(material_name=self.hexrd_h5_material_name, + material_file=self.hexrd_h5_material_file, + lattice_parameters_angstroms=self.lattice_parameter_angstrom) + # The following kwargs will be needed if we allow the material to be + # built using xrayutilities (for now, we only allow hexrd to make the + # material): + # sgnum=225, + # atoms=['Ce4p', 'O2mdot'], + # pos=[(0.,0.,0.), (0.25,0.75,0.75)], + # enrgy=50000.) # Why do we need to specify an energy to get HKLs when using xrayutilities? + return(material) + + def unique_ds(self): + '''Get a list of unique HKLs and their lattice spacings + + :return: unique HKLs and their lattice spacings in angstroms + :rtype: np.ndarray, np.ndarray + ''' + + unique_hkls, unique_ds = self.material().get_unique_ds(tth_tol=self.hkl_tth_tol, tth_max=self.tth_max) + + return(unique_hkls, unique_ds) + + def fit_ds(self): + '''Get a list of HKLs and their lattice spacings that will be fit in the + calibration routine + + :return: HKLs to fit and their lattice spacings in angstroms + :rtype: np.ndarray, np.ndarray + ''' + + unique_hkls, unique_ds = self.unique_ds() + + fit_hkls = np.array([unique_hkls[i] for i in self.fit_hkls]) + fit_ds = np.array([unique_ds[i] for i in self.fit_hkls]) + + return(fit_hkls, fit_ds) + + def dict(self): + '''Return a representation of this configuration in a dictionary that is + suitable for dumping to a YAML file (one that converts all instances of + fields with type `PosixPath` to `str`). + + :return: dictionary representation of the configuration. + :rtype: dict + ''' + + d = super().dict() + for k,v in d.items(): + if isinstance(v, PosixPath): + d[k] = str(v) + return(d)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/CHAP/models/integration.py Tue Mar 28 15:07:30 2023 +0000 @@ -0,0 +1,515 @@ +import copy +from functools import cache, lru_cache +import json +import logging +import os +from time import time +from typing import Literal, Optional + +# from multiprocessing.pool import ThreadPool +# from nexusformat.nexus import (NXdata, +# NXdetector, +# NXfield, +# NXprocess, +# NXroot) +import numpy as np +from pydantic import (BaseModel, + validator, + constr, + conlist, + conint, + confloat, + FilePath) +#import pyFAI, pyFAI.multi_geometry, pyFAI.units +from pyFAI import load as pyfai_load +from pyFAI.multi_geometry import MultiGeometry +from pyFAI.units import AZIMUTHAL_UNITS, RADIAL_UNITS +#from pyspec.file.tiff import TiffFile + +#from .map import MapConfig, SpecScans + + +class Detector(BaseModel): + """ + Detector class to represent a single detector used in the experiment. + + :param prefix: Prefix of the detector in the SPEC file. + :type prefix: str + :param poni_file: Path to the poni file. + :type poni_file: str + :param mask_file: Optional path to the mask file. + :type mask_file: str, optional + """ + prefix: constr(strip_whitespace=True, min_length=1) + poni_file: FilePath + mask_file: Optional[FilePath] + @validator('poni_file', allow_reuse=True) + def validate_poni_file(cls, poni_file): + """ + Validate the poni file by checking if it's a valid PONI file. + + :param poni_file: Path to the poni file. + :type poni_file: str + :raises ValueError: If poni_file is not a valid PONI file. + :returns: Absolute path to the poni file. + :rtype: str + """ + poni_file = os.path.abspath(poni_file) + try: + ai = azimuthal_integrator(poni_file) + except: + raise(ValueError(f'{poni_file} is not a valid PONI file')) + else: + return(poni_file) + @validator('mask_file', allow_reuse=True) + def validate_mask_file(cls, mask_file, values): + """ + Validate the mask file. If a mask file is provided, it checks if it's a valid TIFF file. + + :param mask_file: Path to the mask file. + :type mask_file: str or None + :param values: A dictionary of the Detector fields. + :type values: dict + :raises ValueError: If mask_file is provided and it's not a valid TIFF file. + :raises ValueError: If `'poni_file'` is not provided in `values`. + :returns: Absolute path to the mask file or None. + :rtype: str or None + """ + if mask_file is None: + return(mask_file) + else: + mask_file = os.path.abspath(mask_file) + poni_file = values.get('poni_file') + if poni_file is None: + raise(ValueError('Cannot validate mask file without a PONI file.')) + else: + try: + mask_array = get_mask_array(mask_file, poni_file) + except BaseException as e: + raise(ValueError(f'Unable to open {mask_file} as a TIFF file')) + else: + return(mask_file) + @property + def azimuthal_integrator(self): + return(azimuthal_integrator(self.poni_file)) + @property + def mask_array(self): + return(get_mask_array(self.mask_file, self.poni_file)) + +@cache +def azimuthal_integrator(poni_file:str): + if not isinstance(poni_file, str): + poni_file = str(poni_file) + return(pyfai_load(poni_file)) +@cache +def get_mask_array(mask_file:str, poni_file:str): + if mask_file is not None: + if not isinstance(mask_file, str): + mask_file = str(mask_file) + + from pyspec.file.tiff import TiffFile + with TiffFile(mask_file) as tiff: + mask_array = tiff.asarray() + else: + mask_array = np.zeros(azimuthal_integrator(poni_file).detector.shape) + return(mask_array) + +class IntegrationConfig(BaseModel): + """ + Class representing the configuration for a raw detector data integration. + + :ivar tool_type: type of integration tool; always set to "integration" + :type tool_type: str, optional + :ivar title: title of the integration + :type title: str + :ivar integration_type: type of integration, one of "azimuthal", "radial", or "cake" + :type integration_type: str + :ivar detectors: list of detectors used in the integration + :type detectors: List[Detector] + :ivar radial_units: radial units for the integration, defaults to `'q_A^-1'` + :type radial_units: str, optional + :ivar radial_min: minimum radial value for the integration range + :type radial_min: float, optional + :ivar radial_max: maximum radial value for the integration range + :type radial_max: float, optional + :ivar radial_npt: number of points in the radial range for the integration + :type radial_npt: int, optional + :ivar azimuthal_units: azimuthal units for the integration + :type azimuthal_units: str, optional + :ivar azimuthal_min: minimum azimuthal value for the integration range + :type azimuthal_min: float, optional + :ivar azimuthal_max: maximum azimuthal value for the integration range + :type azimuthal_max: float, optional + :ivar azimuthal_npt: number of points in the azimuthal range for the integration + :type azimuthal_npt: int, optional + :ivar error_model: error model for the integration, one of "poisson" or "azimuthal" + :type error_model: str, optional + """ + tool_type: Literal['integration'] = 'integration' + title: constr(strip_whitespace=True, min_length=1) + integration_type: Literal['azimuthal', 'radial', 'cake'] + detectors: conlist(item_type=Detector, min_items=1) + radial_units: str = 'q_A^-1' + radial_min: confloat(ge=0) + radial_max: confloat(gt=0) + radial_npt: conint(gt=0) = 1800 + azimuthal_units: str = 'chi_deg' + azimuthal_min: confloat(ge=-180) = -180 + azimuthal_max: confloat(le=360) = 180 + azimuthal_npt: conint(gt=0) = 3600 + error_model: Optional[Literal['poisson', 'azimuthal']] + sequence_index: Optional[conint(gt=0)] + @validator('radial_units', allow_reuse=True) + def validate_radial_units(cls, radial_units): + """ + Validate the radial units for the integration. + + :param radial_units: unvalidated radial units for the integration + :type radial_units: str + :raises ValueError: if radial units are not one of the recognized radial units + :return: validated radial units + :rtype: str + """ + if radial_units in RADIAL_UNITS.keys(): + return(radial_units) + else: + raise(ValueError(f'Invalid radial units: {radial_units}. Must be one of {", ".join(RADIAL_UNITS.keys())}')) + @validator('azimuthal_units', allow_reuse=True) + def validate_azimuthal_units(cls, azimuthal_units): + """ + Validate that `azimuthal_units` is one of the keys in the + `pyFAI.units.AZIMUTHAL_UNITS` dictionary. + + :param azimuthal_units: The string representing the unit to be validated. + :type azimuthal_units: str + :raises ValueError: If `azimuthal_units` is not one of the keys in `pyFAI.units.AZIMUTHAL_UNITS` + :return: The original supplied value, if is one of the keys in `pyFAI.units.AZIMUTHAL_UNITS`. + :rtype: str + """ + if azimuthal_units in AZIMUTHAL_UNITS.keys(): + return(azimuthal_units) + else: + raise(ValueError(f'Invalid azimuthal units: {azimuthal_units}. Must be one of {", ".join(AZIMUTHAL_UNITS.keys())}')) + def validate_range_max(range_name:str): + """Validate the maximum value of an integration range. + + :param range_name: The name of the integration range (e.g. radial, azimuthal). + :type range_name: str + :return: The callable that performs the validation. + :rtype: callable + """ + def _validate_range_max(cls, range_max, values): + """Check if the maximum value of the integration range is greater than its minimum value. + + :param range_max: The maximum value of the integration range. + :type range_max: float + :param values: The values of the other fields being validated. + :type values: dict + :raises ValueError: If the maximum value of the integration range is not greater than its minimum value. + :return: The validated maximum range value + :rtype: float + """ + range_min = values.get(f'{range_name}_min') + if range_min < range_max: + return(range_max) + else: + raise(ValueError(f'Maximum value of integration range must be greater than minimum value of integration range ({range_name}_min={range_min}).')) + return(_validate_range_max) + _validate_radial_max = validator('radial_max', allow_reuse=True)(validate_range_max('radial')) + _validate_azimuthal_max = validator('azimuthal_max', allow_reuse=True)(validate_range_max('azimuthal')) + def validate_for_map_config(self, map_config:BaseModel): + """ + Validate the existence of the detector data file for all scan points in `map_config`. + + :param map_config: The `MapConfig` instance to validate against. + :type map_config: MapConfig + :raises RuntimeError: If a detector data file could not be found for a scan point occurring in `map_config`. + :return: None + :rtype: None + """ + for detector in self.detectors: + for scans in map_config.spec_scans: + for scan_number in scans.scan_numbers: + scanparser = scans.get_scanparser(scan_number) + for scan_step_index in range(scanparser.spec_scan_npts): + # Make sure the detector data file exists for all scan points + try: + detector_data_file = scanparser.get_detector_data_file(detector.prefix, scan_step_index) + except: + raise(RuntimeError(f'Could not find data file for detector prefix {detector.prefix} on scan number {scan_number} in spec file {scans.spec_file}')) + def get_azimuthal_adjustments(self): + """To enable a continuous range of integration in the azimuthal direction + for radial and cake integration, obtain adjusted values for this + `IntegrationConfig`'s `azimuthal_min` and `azimuthal_max` values, the + angle amount by which those values were adjusted, and the proper location + of the discontinuity in the azimuthal direction. + + :return: Adjusted chi_min, adjusted chi_max, chi_offset, chi_discontinuity + :rtype: tuple[float,float,float,float] + """ + return(get_azimuthal_adjustments(self.azimuthal_min, self.azimuthal_max)) + def get_azimuthal_integrators(self): + """Get a list of `AzimuthalIntegrator`s that correspond to the detector + configurations in this instance of `IntegrationConfig`. + + The returned `AzimuthalIntegrator`s are (if need be) artificially rotated + in the azimuthal direction to achieve a continuous range of integration + in the azimuthal direction. + + :returns: A list of `AzimuthalIntegrator`s appropriate for use by this + `IntegrationConfig` tool + :rtype: list[pyFAI.azimuthalIntegrator.AzimuthalIntegrator] + """ + chi_min, chi_max, chi_offset, chi_disc = self.get_azimuthal_adjustments() + return(get_azimuthal_integrators(tuple([detector.poni_file for detector in self.detectors]), chi_offset=chi_offset)) + def get_multi_geometry_integrator(self): + """Get a `MultiGeometry` integrator suitable for use by this instance of + `IntegrationConfig`. + + :return: A `MultiGeometry` integrator + :rtype: pyFAI.multi_geometry.MultiGeometry + """ + poni_files = tuple([detector.poni_file for detector in self.detectors]) + radial_range = (self.radial_min, self.radial_max) + azimuthal_range = (self.azimuthal_min, self.azimuthal_max) + return(get_multi_geometry_integrator(poni_files, self.radial_units, radial_range, azimuthal_range)) + def get_azimuthally_integrated_data(self, spec_scans:BaseModel, scan_number:int, scan_step_index:int): + """Return azimuthally-integrated data for the scan step specified. + + :param spec_scans: An instance of `SpecScans` containing the scan step requested. + :type spec_scans: SpecScans + :param scan_number: The number of the scan containing the scan step requested. + :type scan_number: int + :param scan_step_index: The index of the scan step requested. + :type scan_step_index: int + :return: A 1D array of azimuthally-integrated raw detector intensities. + :rtype: np.ndarray + """ + detector_data = spec_scans.get_detector_data(self.detectors, scan_number, scan_step_index) + integrator = self.get_multi_geometry_integrator() + lst_mask = [detector.mask_array for detector in self.detectors] + result = integrator.integrate1d(detector_data, lst_mask=lst_mask, npt=self.radial_npt, error_model=self.error_model) + if result.sigma is None: + return(result.intensity) + else: + return(result.intensity, result.sigma) + def get_radially_integrated_data(self, spec_scans:BaseModel, scan_number:int, scan_step_index:int): + """Return radially-integrated data for the scan step specified. + + :param spec_scans: An instance of `SpecScans` containing the scan step requested. + :type spec_scans: SpecScans + :param scan_number: The number of the scan containing the scan step requested. + :type scan_number: int + :param scan_step_index: The index of the scan step requested. + :type scan_step_index: int + :return: A 1D array of radially-integrated raw detector intensities. + :rtype: np.ndarray + """ + # Handle idiosyncracies of azimuthal ranges in pyFAI + # Adjust chi ranges to get a continuous range of iintegrated data + chi_min, chi_max, chi_offset, chi_disc = self.get_azimuthal_adjustments() + # Perform radial integration on a detector-by-detector basis. + I_each_detector = [] + variance_each_detector = [] + integrators = self.get_azimuthal_integrators() + for i,(integrator,detector) in enumerate(zip(integrators,self.detectors)): + detector_data = spec_scans.get_detector_data([detector], scan_number, scan_step_index)[0] + result = integrator.integrate_radial(detector_data, self.azimuthal_npt, + unit=self.azimuthal_units, azimuth_range=(chi_min,chi_max), + radial_unit=self.radial_units, radial_range=(self.radial_min,self.radial_max), + mask=detector.mask_array) #, error_model=self.error_model) + I_each_detector.append(result.intensity) + if result.sigma is not None: + variance_each_detector.append(result.sigma**2) + # Add the individual detectors' integrated intensities together + I = np.nansum(I_each_detector, axis=0) + # Ignore data at values of chi for which there was no data + I = np.where(I==0, np.nan, I) + if len(I_each_detector) != len(variance_each_detector): + return(I) + else: + # Get the standard deviation of the summed detectors' intensities + sigma = np.sqrt(np.nansum(variance_each_detector, axis=0)) + return(I, sigma) + def get_cake_integrated_data(self, spec_scans:BaseModel, scan_number:int, scan_step_index:int): + """Return cake-integrated data for the scan step specified. + + :param spec_scans: An instance of `SpecScans` containing the scan step requested. + :type spec_scans: SpecScans + :param scan_number: The number of the scan containing the scan step requested. + :type scan_number: int + :param scan_step_index: The index of the scan step requested. + :type scan_step_index: int + :return: A 2D array of cake-integrated raw detector intensities. + :rtype: np.ndarray + """ + detector_data = spec_scans.get_detector_data(self.detectors, scan_number, scan_step_index) + integrator = self.get_multi_geometry_integrator() + lst_mask = [detector.mask_array for detector in self.detectors] + result = integrator.integrate2d(detector_data, lst_mask=lst_mask, + npt_rad=self.radial_npt, npt_azim=self.azimuthal_npt, + method='bbox', + error_model=self.error_model) + if result.sigma is None: + return(result.intensity) + else: + return(result.intensity, result.sigma) + def get_integrated_data(self, spec_scans:BaseModel, scan_number:int, scan_step_index:int): + """Return integrated data for the scan step specified. + + :param spec_scans: An instance of `SpecScans` containing the scan step requested. + :type spec_scans: SpecScans + :param scan_number: The number of the scan containing the scan step requested. + :type scan_number: int + :param scan_step_index: The index of the scan step requested. + :type scan_step_index: int + :return: An array of integrated raw detector intensities. + :rtype: np.ndarray + """ + if self.integration_type == 'azimuthal': + return(self.get_azimuthally_integrated_data(spec_scans, scan_number, scan_step_index)) + elif self.integration_type == 'radial': + return(self.get_radially_integrated_data(spec_scans, scan_number, scan_step_index)) + elif self.integration_type == 'cake': + return(self.get_cake_integrated_data(spec_scans, scan_number, scan_step_index)) + + @property + def integrated_data_coordinates(self): + """ + Return a dictionary of coordinate arrays for navigating the dimension(s) + of the integrated data produced by this instance of `IntegrationConfig`. + + :return: A dictionary with either one or two keys: 'azimuthal' and/or + 'radial', each of which points to a 1-D `numpy` array of coordinate + values. + :rtype: dict[str,np.ndarray] + """ + if self.integration_type == 'azimuthal': + return(get_integrated_data_coordinates(radial_range=(self.radial_min,self.radial_max), + radial_npt=self.radial_npt)) + elif self.integration_type == 'radial': + return(get_integrated_data_coordinates(azimuthal_range=(self.azimuthal_min,self.azimuthal_max), + azimuthal_npt=self.azimuthal_npt)) + elif self.integration_type == 'cake': + return(get_integrated_data_coordinates(radial_range=(self.radial_min,self.radial_max), + radial_npt=self.radial_npt, + azimuthal_range=(self.azimuthal_min,self.azimuthal_max), + azimuthal_npt=self.azimuthal_npt)) + @property + def integrated_data_dims(self): + """Return a tuple of the coordinate labels for the integrated data + produced by this instance of `IntegrationConfig`. + """ + directions = list(self.integrated_data_coordinates.keys()) + dim_names = [getattr(self, f'{direction}_units') for direction in directions] + return(dim_names) + @property + def integrated_data_shape(self): + """Return a tuple representing the shape of the integrated data + produced by this instance of `IntegrationConfig` for a single scan step. + """ + return(tuple([len(coordinate_values) for coordinate_name,coordinate_values in self.integrated_data_coordinates.items()])) + +@cache +def get_azimuthal_adjustments(chi_min:float, chi_max:float): + """ + Fix chi discontinuity at 180 degrees and return the adjusted chi range, + offset, and discontinuty. + + If the discontinuity is crossed, obtain the offset to artificially rotate + detectors to achieve a continuous azimuthal integration range. + + :param chi_min: The minimum value of the azimuthal range. + :type chi_min: float + :param chi_max: The maximum value of the azimuthal range. + :type chi_max: float + :return: The following four values: the adjusted minimum value of the + azimuthal range, the adjusted maximum value of the azimuthal range, the + value by which the chi angle was adjusted, the position of the chi + discontinuity. + """ + # Fix chi discontinuity at 180 degrees for now. + chi_disc = 180 + # If the discontinuity is crossed, artificially rotate the detectors to + # achieve a continuous azimuthal integration range + if chi_min < chi_disc and chi_max > chi_disc: + chi_offset = chi_max - chi_disc + else: + chi_offset = 0 + return(chi_min-chi_offset, chi_max-chi_offset, chi_offset, chi_disc) +@cache +def get_azimuthal_integrators(poni_files:tuple, chi_offset=0): + """ + Return a list of `AzimuthalIntegrator` objects generated from PONI files. + + :param poni_files: Tuple of strings, each string being a path to a PONI file. : tuple + :type poni_files: tuple + :param chi_offset: The angle in degrees by which the `AzimuthalIntegrator` objects will be rotated, defaults to 0. + :type chi_offset: float, optional + :return: List of `AzimuthalIntegrator` objects + :rtype: list[pyFAI.azimuthalIntegrator.AzimuthalIntegrator] + """ + ais = [] + for poni_file in poni_files: + ai = copy.deepcopy(azimuthal_integrator(poni_file)) + ai.rot3 += chi_offset * np.pi/180 + ais.append(ai) + return(ais) +@cache +def get_multi_geometry_integrator(poni_files:tuple, radial_unit:str, radial_range:tuple, azimuthal_range:tuple): + """Return a `MultiGeometry` instance that can be used for azimuthal or cake + integration. + + :param poni_files: Tuple of PONI files that describe the detectors to be + integrated. + :type poni_files: tuple + :param radial_unit: Unit to use for radial integration range. + :type radial_unit: str + :param radial_range: Tuple describing the range for radial integration. + :type radial_range: tuple[float,float] + :param azimuthal_range:Tuple describing the range for azimuthal integration. + :type azimuthal_range: tuple[float,float] + :return: `MultiGeometry` instance that can be used for azimuthal or cake + integration. + :rtype: pyFAI.multi_geometry.MultiGeometry + """ + chi_min, chi_max, chi_offset, chi_disc = get_azimuthal_adjustments(*azimuthal_range) + ais = copy.deepcopy(get_azimuthal_integrators(poni_files, chi_offset=chi_offset)) + multi_geometry = MultiGeometry(ais, + unit=radial_unit, + radial_range=radial_range, + azimuth_range=(chi_min,chi_max), + wavelength=sum([ai.wavelength for ai in ais])/len(ais), + chi_disc=chi_disc) + return(multi_geometry) +@cache +def get_integrated_data_coordinates(azimuthal_range:tuple=None, azimuthal_npt:int=None, radial_range:tuple=None, radial_npt:int=None): + """ + Return a dictionary of coordinate arrays for the specified radial and/or + azimuthal integration ranges. + + :param azimuthal_range: Tuple specifying the range of azimuthal angles over + which to generate coordinates, in the format (min, max), defaults to + None. + :type azimuthal_range: tuple[float,float], optional + :param azimuthal_npt: Number of azimuthal coordinate points to generate, + defaults to None. + :type azimuthal_npt: int, optional + :param radial_range: Tuple specifying the range of radial distances over + which to generate coordinates, in the format (min, max), defaults to + None. + :type radial_range: tuple[float,float], optional + :param radial_npt: Number of radial coordinate points to generate, defaults + to None. + :type radial_npt: int, optional + :return: A dictionary with either one or two keys: 'azimuthal' and/or + 'radial', each of which points to a 1-D `numpy` array of coordinate + values. + :rtype: dict[str,np.ndarray] + """ + integrated_data_coordinates = {} + if azimuthal_range is not None and azimuthal_npt is not None: + integrated_data_coordinates['azimuthal'] = np.linspace(*azimuthal_range, azimuthal_npt) + if radial_range is not None and radial_npt is not None: + integrated_data_coordinates['radial'] = np.linspace(*radial_range, radial_npt) + return(integrated_data_coordinates)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/CHAP/models/map.py Tue Mar 28 15:07:30 2023 +0000 @@ -0,0 +1,519 @@ +from functools import cache, lru_cache +import os +from typing import Literal, Optional, Union + +import numpy as np +from pydantic import (BaseModel, + conint, + conlist, + confloat, + constr, + FilePath, + PrivateAttr, + ValidationError, + validator) +from pyspec.file.spec import FileSpec + +class Sample(BaseModel): + """ + Class representing a sample metadata configuration. + + :ivar name: The name of the sample. + :type name: str + :ivar description: A description of the sample. + :type description: Optional[str] + """ + name: constr(min_length=1) + description: Optional[str] + +class SpecScans(BaseModel): + """ + Class representing a set of scans from a single SPEC file. + + :ivar spec_file: Path to the SPEC file. + :type spec_file: str + :ivar scan_numbers: List of scan numbers to use. + :type scan_numbers: list[int] + """ + spec_file: FilePath + scan_numbers: conlist(item_type=conint(gt=0), min_items=1) + @validator('spec_file', allow_reuse=True) + def validate_spec_file(cls, spec_file): + """ + Validate the specified SPEC file. + + :param spec_file: Path to the SPEC file. + :type spec_file: str + :raises ValueError: If the SPEC file is invalid. + :return: Absolute path to the SPEC file, if it is valid. + :rtype: str + """ + try: + spec_file = os.path.abspath(spec_file) + sspec_file = FileSpec(spec_file) + except: + raise(ValueError(f'Invalid SPEC file {spec_file}')) + else: + return(spec_file) + @validator('scan_numbers', allow_reuse=True) + def validate_scan_numbers(cls, scan_numbers, values): + """ + Validate the specified list of scan numbers. + + :param scan_numbers: List of scan numbers. + :type scan_numbers: list of int + :param values: Dictionary of values for all fields of the model. + :type values: dict + :raises ValueError: If a specified scan number is not found in the SPEC file. + :return: List of scan numbers. + :rtype: list of int + """ + spec_file = values.get('spec_file') + if spec_file is not None: + spec_scans = FileSpec(spec_file) + for scan_number in scan_numbers: + scan = spec_scans.get_scan_by_number(scan_number) + if scan is None: + raise(ValueError(f'There is no scan number {scan_number} in {spec_file}')) + return(scan_numbers) + + @property + def scanparsers(self): + '''A list of `ScanParser`s for each of the scans specified by the SPEC + file and scan numbers belonging to this instance of `SpecScans` + ''' + return([self.get_scanparser(scan_no) for scan_no in self.scan_numbers]) + + def get_scanparser(self, scan_number): + """This method returns a `ScanParser` for the specified scan number in + the specified SPEC file. + + :param scan_number: Scan number to get a `ScanParser` for + :type scan_number: int + :return: `ScanParser` for the specified scan number + :rtype: ScanParser + """ + return(get_scanparser(self.spec_file, scan_number)) + def get_index(self, scan_number:int, scan_step_index:int, map_config): + """This method returns a tuple representing the index of a specific step + in a specific spec scan within a map. + + :param scan_number: Scan number to get index for + :type scan_number: int + :param scan_step_index: Scan step index to get index for + :type scan_step_index: int + :param map_config: Map configuration to get index for + :type map_config: MapConfig + :return: Index for the specified scan number and scan step index within + the specified map configuration + :rtype: tuple + """ + index = () + for independent_dimension in map_config.independent_dimensions: + coordinate_index = list(map_config.coords[independent_dimension.label]).index(independent_dimension.get_value(self, scan_number, scan_step_index)) + index = (coordinate_index, *index) + return(index) + def get_detector_data(self, detectors:list, scan_number:int, scan_step_index:int): + """ + Return the raw data from the specified detectors at the specified scan + number and scan step index. + + :param detectors: List of detector prefixes to get raw data for + :type detectors: list[str] + :param scan_number: Scan number to get data for + :type scan_number: int + :param scan_step_index: Scan step index to get data for + :type scan_step_index: int + :return: Data from the specified detectors for the specified scan number + and scan step index + :rtype: list[np.ndarray] + """ + return(get_detector_data(tuple([detector.prefix for detector in detectors]), self.spec_file, scan_number, scan_step_index)) +@cache +def get_available_scan_numbers(spec_file:str): + scans = FileSpec(spec_file).scans + scan_numbers = list(scans.keys()) + return(scan_numbers) +@cache +def get_scanparser(spec_file:str, scan_number:int): + if scan_number not in get_available_scan_numbers(spec_file): + return(None) + else: + return(ScanParser(spec_file, scan_number)) +@lru_cache(maxsize=10) +def get_detector_data(detector_prefixes:tuple, spec_file:str, scan_number:int, scan_step_index:int): + detector_data = [] + scanparser = get_scanparser(spec_file, scan_number) + for prefix in detector_prefixes: + image_data = scanparser.get_detector_data(prefix, scan_step_index) + detector_data.append(image_data) + return(detector_data) + +class PointByPointScanData(BaseModel): + """Class representing a source of raw scalar-valued data for which a value + was recorded at every point in a `MapConfig`. + + :ivar label: A user-defined label for referring to this data in the NeXus + file and in other tools. + :type label: str + :ivar units: The units in which the data were recorded. + :type units: str + :ivar data_type: Represents how these data were recorded at time of data + collection. + :type data_type: Literal['spec_motor', 'scan_column', 'smb_par'] + :ivar name: Represents the name with which these raw data were recorded at + time of data collection. + :type name: str + """ + label: constr(min_length=1) + units: constr(strip_whitespace=True, min_length=1) + data_type: Literal['spec_motor', 'scan_column', 'smb_par'] + name: constr(strip_whitespace=True, min_length=1) + @validator('label') + def validate_label(cls, label): + """Validate that the supplied `label` does not conflict with any of the + values for `label` reserved for certain data needed to perform + corrections. + + :param label: The value of `label` to validate + :type label: str + :raises ValueError: If `label` is one of the reserved values. + :return: The original supplied value `label`, if it is allowed. + :rtype: str + """ + #if (not issubclass(cls,CorrectionsData)) and label in CorrectionsData.__fields__['label'].type_.__args__: + if (not issubclass(cls,CorrectionsData)) and label in CorrectionsData.reserved_labels(): + raise(ValueError(f'{cls.__name__}.label may not be any of the following reserved values: {CorrectionsData.reserved_labels()}')) + return(label) + def validate_for_station(self, station:str): + """Validate this instance of `PointByPointScanData` for a certain choice + of station (beamline). + + :param station: The name of the station (in 'idxx' format). + :type station: str + :raises TypeError: If the station is not compatible with the value of the + `data_type` attribute for this instance of PointByPointScanData. + :return: None + :rtype: None + """ + if station.lower() not in ('id1a3', 'id3a') and self.data_type == 'smb_par': + raise(TypeError(f'{self.__class__.__name__}.data_type may not be "smb_par" when station is "{station}"')) + def validate_for_spec_scans(self, spec_scans:list[SpecScans], scan_step_index:Union[Literal['all'],int]='all'): + """Validate this instance of `PointByPointScanData` for a list of + `SpecScans`. + + :param spec_scans: A list of `SpecScans` whose raw data will be checked + for the presence of the data represented by this instance of + `PointByPointScanData` + :type spec_scans: list[SpecScans] + :param scan_step_index: A specific scan step index to validate, defaults + to `'all'`. + :type scan_step_index: Union[Literal['all'],int], optional + :raises RuntimeError: If the data represented by this instance of + `PointByPointScanData` is missing for the specified scan steps. + :return: None + :rtype: None + """ + for scans in spec_scans: + for scan_number in scans.scan_numbers: + scanparser = scans.get_scanparser(scan_number) + if scan_step_index == 'all': + scan_step_index_range = range(scanparser.spec_scan_npts) + else: + scan_step_index_range = range(scan_step_index,scan_step_index+1) + for scan_step_index in scan_step_index_range: + try: + self.get_value(scans, scan_number, scan_step_index) + except: + raise(RuntimeError(f'Could not find data for {self.name} (data_type "{self.data_type}") on scan number {scan_number} in spec file {scans.spec_file}')) + def get_value(self, spec_scans:SpecScans, scan_number:int, scan_step_index:int): + """Return the value recorded for this instance of `PointByPointScanData` + at a specific scan step. + + :param spec_scans: An instance of `SpecScans` in which the requested scan step occurs. + :type spec_scans: SpecScans + :param scan_number: The number of the scan in which the requested scan step occurs. + :type scan_number: int + :param scan_step_index: The index of the requested scan step. + :type scan_step_index: int + :return: The value recorded of the data represented by this instance of + `PointByPointScanData` at the scan step requested + :rtype: float + """ + if self.data_type == 'spec_motor': + return(get_spec_motor_value(spec_scans.spec_file, scan_number, scan_step_index, self.name)) + elif self.data_type == 'scan_column': + return(get_spec_counter_value(spec_scans.spec_file, scan_number, scan_step_index, self.name)) + elif self.data_type == 'smb_par': + return(get_smb_par_value(spec_scans.spec_file, scan_number, self.name)) +@cache +def get_spec_motor_value(spec_file:str, scan_number:int, scan_step_index:int, spec_mnemonic:str): + """Return the value recorded for a SPEC motor at a specific scan step. + + :param spec_file: Location of a SPEC file in which the requested scan step occurs. + :type spec_scans: str + :param scan_number: The number of the scan in which the requested scan step occurs. + :type scan_number: int + :param scan_step_index: The index of the requested scan step. + :type scan_step_index: int + :param spec_mnemonic: The menmonic of a SPEC motor. + :type spec_mnemonic: str + :return: The value of the motor at the scan step requested + :rtype: float + """ + scanparser = get_scanparser(spec_file, scan_number) + if spec_mnemonic in scanparser.spec_scan_motor_mnes: + motor_i = scanparser.spec_scan_motor_mnes.index(spec_mnemonic) + if scan_step_index >= 0: + scan_step = np.unravel_index(scan_step_index, scanparser.spec_scan_shape, order='F') + motor_value = scanparser.spec_scan_motor_vals[motor_i][scan_step[motor_i]] + else: + motor_value = scanparser.spec_scan_motor_vals[motor_i] + else: + motor_value = scanparser.get_spec_positioner_value(spec_mnemonic) + return(motor_value) +@cache +def get_spec_counter_value(spec_file:str, scan_number:int, scan_step_index:int, spec_column_label:str): + """Return the value recorded for a SPEC counter at a specific scan step. + + :param spec_file: Location of a SPEC file in which the requested scan step occurs. + :type spec_scans: str + :param scan_number: The number of the scan in which the requested scan step occurs. + :type scan_number: int + :param scan_step_index: The index of the requested scan step. + :type scan_step_index: int + :param spec_column_label: The label of a SPEC data column. + :type spec_column_label: str + :return: The value of the counter at the scan step requested + :rtype: float + """ + scanparser = get_scanparser(spec_file, scan_number) + if scan_step_index >= 0: + return(scanparser.spec_scan_data[spec_column_label][scan_step_index]) + else: + return(scanparser.spec_scan_data[spec_column_label]) +@cache +def get_smb_par_value(spec_file:str, scan_number:int, par_name:str): + """Return the value recorded for a specific scan in SMB-tyle .par file. + + :param spec_file: Location of a SPEC file in which the requested scan step occurs. + :type spec_scans: str + :param scan_number: The number of the scan in which the requested scan step occurs. + :type scan_number: int + :param par_name: The name of the column in the .par file + :type par_name: str + :return: The value of the .par file value for the scan requested. + :rtype: float + """ + scanparser = get_scanparser(spec_file, scan_number) + return(scanparser.pars[par_name]) +def validate_data_source_for_map_config(data_source, values): + import_scanparser(values.get('station'), values.get('experiment_type')) + data_source.validate_for_station(values.get('station')) + data_source.validate_for_spec_scans(values.get('spec_scans')) + return(data_source) + +class CorrectionsData(PointByPointScanData): + """Class representing the special instances of `PointByPointScanData` that + are used by certain kinds of `CorrectionConfig` tools. + + :ivar label: One of the reserved values required by `CorrectionConfig`, + `'presample_intensity'`, `'postsample_intensity'`, or + `'dwell_time_actual'`. + :type label: Literal['presample_intensity','postsample_intensity','dwell_time_actual'] + :ivar units: The units in which the data were recorded. + :type units: str + :ivar data_type: Represents how these data were recorded at time of data + collection. + :type data_type: Literal['scan_column', 'smb_par'] + :ivar name: Represents the name with which these raw data were recorded at + time of data collection. + :type name: str + """ + label: Literal['presample_intensity','postsample_intensity','dwell_time_actual'] + data_type: Literal['scan_column','smb_par'] + @classmethod + def reserved_labels(cls): + """Return a list of all the labels reserved for corrections-related + scalar data. + + :return: A list of reserved labels + :rtype: list[str] + """ + return(list(cls.__fields__['label'].type_.__args__)) +class PresampleIntensity(CorrectionsData): + """Class representing a source of raw data for the intensity of the beam that + is incident on the sample. + + :ivar label: Must be `"presample_intensity"` + :type label: Literal["presample_intensity"] + :ivar units: Must be `"counts"` + :type units: Literal["counts"] + :ivar data_type: Represents how these data were recorded at time of data + collection. + :type data_type: Literal['scan_column', 'smb_par'] + :ivar name: Represents the name with which these raw data were recorded at + time of data collection. + :type name: str + """ + label: Literal['presample_intensity'] = 'presample_intensity' + units: Literal['counts'] = 'counts' +class PostsampleIntensity(CorrectionsData): + """Class representing a source of raw data for the intensity of the beam that + has passed through the sample. + + :ivar label: Must be `"postsample_intensity"` + :type label: Literal["postsample_intensity"] + :ivar units: Must be `"counts"` + :type units: Literal["counts"] + :ivar data_type: Represents how these data were recorded at time of data + collection. + :type data_type: Literal['scan_column', 'smb_par'] + :ivar name: Represents the name with which these raw data were recorded at + time of data collection. + :type name: str + """ + label: Literal['postsample_intensity'] = 'postsample_intensity' + units: Literal['counts'] = 'counts' +class DwellTimeActual(CorrectionsData): + """Class representing a source of raw data for the actual dwell time at each + scan point in SPEC (with some scan types, this value can vary slightly + point-to-point from the dwell time specified in the command). + + :ivar label: Must be `"dwell_time_actual"` + :type label: Literal["dwell_time_actual"] + :ivar units: Must be `"counts"` + :type units: Literal["counts"] + :ivar data_type: Represents how these data were recorded at time of data + collection. + :type data_type: Literal['scan_column', 'smb_par'] + :ivar name: Represents the name with which these raw data were recorded at + time of data collection. + :type name: str + """ + label: Literal['dwell_time_actual'] = 'dwell_time_actual' + units: Literal['s'] = 's' + +class MapConfig(BaseModel): + """Class representing an experiment consisting of one or more SPEC scans. + + :ivar title: The title for the map configuration. + :type title: str + :ivar station: The name of the station at which the map was collected. + :type station: Literal['id1a3','id3a','id3b'] + :ivar spec_scans: A list of the spec scans that compose the map. + :type spec_scans: list[SpecScans] + :ivar independent_dimensions: A list of the sources of data representing the + raw values of each independent dimension of the map. + :type independent_dimensions: list[PointByPointScanData] + :ivar presample_intensity: A source of point-by-point presample beam + intensity data. Required when applying a CorrectionConfig tool. + :type presample_intensity: Optional[PresampleIntensity] + :ivar dwell_time_actual: A source of point-by-point actual dwell times for + spec scans. Required when applying a CorrectionConfig tool. + :type dwell_time_actual: Optional[DwellTimeActual] + :ivar presample_intensity: A source of point-by-point postsample beam + intensity data. Required when applying a CorrectionConfig tool with + `correction_type="flux_absorption"` or + `correction_type="flux_absorption_background"`. + :type presample_intensity: Optional[PresampleIntensity] + :ivar scalar_data: A list of the sources of data representing other scalar + raw data values collected at each point ion the map. In the NeXus file + representation of the map, datasets for these values will be included. + :type scalar_values: Optional[list[PointByPointScanData]] + """ + title: constr(strip_whitespace=True, min_length=1) + station: Literal['id1a3','id3a','id3b'] + experiment_type: Literal['SAXSWAXS', 'EDD', 'XRF'] + sample: Sample + spec_scans: conlist(item_type=SpecScans, min_items=1) + independent_dimensions: conlist(item_type=PointByPointScanData, min_items=1) + presample_intensity: Optional[PresampleIntensity] + dwell_time_actual: Optional[DwellTimeActual] + postsample_intensity: Optional[PostsampleIntensity] + scalar_data: Optional[list[PointByPointScanData]] = [] + _coords: dict = PrivateAttr() + _validate_independent_dimensions = validator('independent_dimensions', each_item=True, allow_reuse=True)(validate_data_source_for_map_config) + _validate_presample_intensity = validator('presample_intensity', allow_reuse=True)(validate_data_source_for_map_config) + _validate_dwell_time_actual = validator('dwell_time_actual', allow_reuse=True)(validate_data_source_for_map_config) + _validate_postsample_intensity = validator('postsample_intensity', allow_reuse=True)(validate_data_source_for_map_config) + _validate_scalar_data = validator('scalar_data', each_item=True, allow_reuse=True)(validate_data_source_for_map_config) + @validator('experiment_type') + def validate_experiment_type(cls, value, values): + '''Ensure values for the station and experiment_type fields are compatible''' + station = values.get('station') + if station == 'id1a3': + allowed_experiment_types = ['SAXSWAXS', 'EDD'] + elif station == 'id3a': + allowed_experiment_types = ['EDD'] + elif station == 'id3b': + allowed_experiment_types = ['SAXSWAXS', 'XRF'] + else: + allowed_experiment_types = [] + if value not in allowed_experiment_types: + raise(ValueError(f'For station {station}, allowed experiment types are {allowed_experiment_types} (suuplied experiment type {value} is not allowed)')) + return(value) + @property + def coords(self): + """Return a dictionary of the values of each independent dimension across + the map. + + :returns: A dictionary ofthe map's coordinate values. + :rtype: dict[str,list[float]] + """ + try: + return(self._coords) + except: + coords = {} + for independent_dimension in self.independent_dimensions: + coords[independent_dimension.label] = [] + for scans in self.spec_scans: + for scan_number in scans.scan_numbers: + scanparser = scans.get_scanparser(scan_number) + for scan_step_index in range(scanparser.spec_scan_npts): + coords[independent_dimension.label].append(independent_dimension.get_value(scans, scan_number, scan_step_index)) + coords[independent_dimension.label] = np.unique(coords[independent_dimension.label]) + self._coords = coords + return(self._coords) + @property + def dims(self): + """Return a tuple of the independent dimension labels for the map.""" + return([point_by_point_scan_data.label for point_by_point_scan_data in self.independent_dimensions[::-1]]) + @property + def shape(self): + """Return the shape of the map -- a tuple representing the number of + unique values of each dimension across the map. + """ + return(tuple([len(values) for key,values in self.coords.items()][::-1])) + @property + def all_scalar_data(self): + """Return a list of all instances of `PointByPointScanData` for which + this map configuration will collect dataset-like data (as opposed to + axes-like data). + + This will be any and all of the items in the corrections-data-related + fields, as well as any additional items in the optional `scalar_data` + field.""" + return([getattr(self,l,None) for l in CorrectionsData.reserved_labels() if getattr(self,l,None) is not None] + self.scalar_data) + +def import_scanparser(station, experiment_type): + if station.lower() in ('id1a3', 'id3a'): + if experiment_type == 'SAXSWAXS': + from msnctools.scanparsers import SMBLinearScanParser + globals()['ScanParser'] = SMBLinearScanParser + elif experiment_type == 'EDD': + from msnctools.scanparsers import SMBMCAScanParser + globals()['ScanParser'] = SMBMCAScanParser + else: + raise(ValueError(f'Invalid experiment_type: {experiment_type}')) + elif station.lower() == 'id3b': + if experiment_type == 'SAXSWAXS': + from msnctools.scanparsers import FMBSAXSWAXSScanParser + globals()['ScanParser'] = FMBSAXSWAXSScanParser + elif experiment_type == 'XRF': + from msnctools.scanparsers import FMBXRFScanParser + globals()['ScanParser'] = FMBXRFScanParser + else: + raise(ValueError(f'Invalid experiment_type: {experiment_type}')) + else: + raise(ValueError(f'Invalid station: {station}'))
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/CHAP/models/workflow.py Tue Mar 28 15:07:30 2023 +0000 @@ -0,0 +1,48 @@ +#!/usr/bin/env python +#-*- coding: utf-8 -*- +#pylint: disable= +""" +File : workflow.py +Author : Valentin Kuznetsov <vkuznet AT gmail dot com> +Description: Workflow module +""" + +# system modules +from basemodel import BaseModel + + +class Workflow(BaseModel): + """ + Workflow docstring + """ + def __init__(self, filename=None, **kwds): + super().__init__(filename, **kwds) + self.map['workflow'] = __name__ + print('create Workflow calls: ', end='') + + +class EDDWorkflow(Workflow): + """ + EDDWorkflow + """ + def __init__(self, filename=None, **kwds): + super().__init__(filename, **kwds) + self.map['workflow'] = 'edd' + print('create EDDWorkflow') + +class SAXWWorkflow(Workflow): + """ + SAXWWorkflow + """ + def __init__(self, filename=None, **kwds): + super().__init__(filename, **kwds) + self.map['workflow'] = 'saxw' + print('create SAXWWorkflow') + +if __name__ == '__main__': + print('--- create EDDWorkflow from config') + wflow = EDDWorkflow() + print('map', wflow.map) + print('--- create SAXWWorkflow from file.txt') + wflow = SAXWWorkflow('file.txt') + print('map', wflow.map)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/CHAP/pipeline.py Tue Mar 28 15:07:30 2023 +0000 @@ -0,0 +1,84 @@ +#!/usr/bin/env python +#-*- coding: utf-8 -*- +#pylint: disable= +""" +File : pipeline.py +Author : Valentin Kuznetsov <vkuznet AT gmail dot com> +Description: +""" + +# system modules +import logging +from time import time + +class Pipeline(): + """ + Pipeline represent generic Pipeline class + """ + def __init__(self, items=None, kwds=None): + """ + Pipeline class constructor + + :param items: list of objects + :param kwds: list of method args for individual objects + """ + self.__name__ = self.__class__.__name__ + + self.items = items + self.kwds = kwds + + self.logger = logging.getLogger(self.__name__) + self.logger.propagate = False + + def execute(self): + """ + execute API + """ + + t0 = time() + self.logger.info(f'Executing "execute"\n') + + data = None + for item, kwargs in zip(self.items, self.kwds): + if hasattr(item, 'read'): + self.logger.info(f'Calling "read" on {item}') + data = item.read(**kwargs) + if hasattr(item, 'process'): + self.logger.info(f'Calling "process" on {item}') + data = item.process(data, **kwargs) + if hasattr(item, 'write'): + self.logger.info(f'Calling "write" on {item}') + data = item.write(data, **kwargs) + + self.logger.info(f'Exectuted "exectute" in {time()-t0:.3f} seconds') + +class PipelineObject(): + """ + PipelineObject represent generic Pipeline class + """ + def __init__(self, reader, writer, processor, fitter): + """ + PipelineObject class constructor + """ + self.reader = reader + self.writer = writer + self.processor = processor + + def read(self, filename): + """ + read object API + """ + return self.reader.read(filename) + + def write(self, data, filename): + """ + write object API + """ + return self.writer.write(data, filename) + + def process(self, data): + """ + process object API + """ + return self.processor.process(data) +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/CHAP/processor.py Tue Mar 28 15:07:30 2023 +0000 @@ -0,0 +1,948 @@ +#!/usr/bin/env python +#-*- coding: utf-8 -*- +#pylint: disable= +""" +File : processor.py +Author : Valentin Kuznetsov <vkuznet AT gmail dot com> +Description: Processor module +""" + +# system modules +import argparse +import json +import logging +import sys +from time import time + +# local modules +# from pipeline import PipelineObject + +class Processor(): + """ + Processor represent generic processor + """ + def __init__(self): + """ + Processor constructor + """ + self.__name__ = self.__class__.__name__ + self.logger = logging.getLogger(self.__name__) + self.logger.propagate = False + + def process(self, data): + """ + process data API + """ + + t0 = time() + self.logger.info(f'Executing "process" with type(data)={type(data)}') + + data = self._process(data) + + self.logger.info(f'Finished "process" in {time()-t0:.3f} seconds\n') + + return(data) + + def _process(self, data): + # If needed, extract data from a returned value of Reader.read + if isinstance(data, list): + if all([isinstance(d,dict) for d in data]): + data = data[0]['data'] + # process operation is a simple print function + data += "process part\n" + # and we return data back to pipeline + return data + + +class TFaaSImageProcessor(Processor): + ''' + A Processor to get predictions from TFaaS inference server. + ''' + def process(self, data, url, model, verbose=False): + """ + process data API + """ + + t0 = time() + self.logger.info(f'Executing "process" with url {url} model {model}') + + data = self._process(data, url, model, verbose) + + self.logger.info(f'Finished "process" in {time()-t0:.3f} seconds\n') + + return(data) + + def _process(self, data, url, model, verbose): + '''Print and return the input data. + + :param data: Input image data, either file name or actual image data + :type data: object + :return: `data` + :rtype: object + ''' + from MLaaS.tfaas_client import predictImage + from pathlib import Path + self.logger.info(f"input data {type(data)}") + if isinstance(data, str) and Path(data).is_file(): + imgFile = data + data = predictImage(url, imgFile, model, verbose) + else: + rdict = data[0] + import requests + img = rdict['data'] + session = requests.Session() + rurl = url + '/predict/image' + payload = dict(model=model) + files = dict(image=img) + self.logger.info(f"HTTP request {rurl} with image file and {payload} payload") + req = session.post(rurl, files=files, data=payload ) + data = req.content + data = data.decode("utf-8").replace('\n', '') + self.logger.info(f"HTTP response {data}") + + return(data) + +class URLResponseProcessor(Processor): + def _process(self, data): + '''Take data returned from URLReader.read and return a decoded version of + the content. + + :param data: input data (output of URLReader.read) + :type data: list[dict] + :return: decoded data contents + :rtype: object + ''' + + data = data[0] + + content = data['data'] + encoding = data['encoding'] + + self.logger.debug(f'Decoding content of type {type(content)} with {encoding}') + + try: + content = content.decode(encoding) + except: + self.logger.warning(f'Failed to decode content of type {type(content)} with {encoding}') + + return(content) + +class PrintProcessor(Processor): + '''A Processor to simply print the input data to stdout and return the + original input data, unchanged in any way. + ''' + + def _process(self, data): + '''Print and return the input data. + + :param data: Input data + :type data: object + :return: `data` + :rtype: object + ''' + + print(f'{self.__name__} data :') + + if callable(getattr(data, '_str_tree', None)): + # If data is likely an NXobject, print its tree representation + # (since NXobjects' str representations are just their nxname -- not + # very helpful). + print(data._str_tree(attrs=True, recursive=True)) + else: + print(str(data)) + + return(data) + +class NexusToNumpyProcessor(Processor): + '''A class to convert the default plottable data in an `NXobject` into an + `numpy.ndarray`. + ''' + + def _process(self, data): + '''Return the default plottable data signal in `data` as an + `numpy.ndarray`. + + :param data: input NeXus structure + :type data: nexusformat.nexus.tree.NXobject + :raises ValueError: if `data` has no default plottable data signal + :return: default plottable data signal in `data` + :rtype: numpy.ndarray + ''' + + default_data = data.plottable_data + + if default_data is None: + default_data_path = data.attrs['default'] + default_data = data.get(default_data_path) + if default_data is None: + raise(ValueError(f'The structure of {data} contains no default data')) + + default_signal = default_data.attrs.get('signal') + if default_signal is None: + raise(ValueError(f'The signal of {default_data} is unknown')) + default_signal = default_signal.nxdata + + np_data = default_data[default_signal].nxdata + + return(np_data) + +class NexusToXarrayProcessor(Processor): + '''A class to convert the default plottable data in an `NXobject` into an + `xarray.DataArray`.''' + + def _process(self, data): + '''Return the default plottable data signal in `data` as an + `xarray.DataArray`. + + :param data: input NeXus structure + :type data: nexusformat.nexus.tree.NXobject + :raises ValueError: if metadata for `xarray` is absen from `data` + :return: default plottable data signal in `data` + :rtype: xarray.DataArray + ''' + + from xarray import DataArray + + default_data = data.plottable_data + + if default_data is None: + default_data_path = data.attrs['default'] + default_data = data.get(default_data_path) + if default_data is None: + raise(ValueError(f'The structure of {data} contains no default data')) + + default_signal = default_data.attrs.get('signal') + if default_signal is None: + raise(ValueError(f'The signal of {default_data} is unknown')) + default_signal = default_signal.nxdata + + signal_data = default_data[default_signal].nxdata + + axes = default_data.attrs['axes'] + coords = {} + for axis_name in axes: + axis = default_data[axis_name] + coords[axis_name] = (axis_name, + axis.nxdata, + axis.attrs) + + dims = tuple(axes) + + name = default_signal + + attrs = default_data[default_signal].attrs + + return(DataArray(data=signal_data, + coords=coords, + dims=dims, + name=name, + attrs=attrs)) + +class XarrayToNexusProcessor(Processor): + '''A class to convert the data in an `xarray` structure to an + `nexusformat.nexus.NXdata`. + ''' + + def _process(self, data): + '''Return `data` represented as an `nexusformat.nexus.NXdata`. + + :param data: The input `xarray` structure + :type data: typing.Union[xarray.DataArray, xarray.Dataset] + :return: The data and metadata in `data` + :rtype: nexusformat.nexus.NXdata + ''' + + from nexusformat.nexus import NXdata, NXfield + + signal = NXfield(value=data.data, name=data.name, attrs=data.attrs) + + axes = [] + for name, coord in data.coords.items(): + axes.append(NXfield(value=coord.data, name=name, attrs=coord.attrs)) + axes = tuple(axes) + + return(NXdata(signal=signal, axes=axes)) + +class XarrayToNumpyProcessor(Processor): + '''A class to convert the data in an `xarray.DataArray` structure to an + `numpy.ndarray`. + ''' + + def _process(self, data): + '''Return just the signal values contained in `data`. + + :param data: The input `xarray.DataArray` + :type data: xarray.DataArray + :return: The data in `data` + :rtype: numpy.ndarray + ''' + + return(data.data) + +class MapProcessor(Processor): + '''Class representing a process that takes a map configuration and returns a + `nexusformat.nexus.NXentry` representing that map's metadata and any + scalar-valued raw data requseted by the supplied map configuration. + ''' + + def _process(self, data): + '''Process the output of a `Reader` that contains a map configuration and + return a `nexusformat.nexus.NXentry` representing the map. + + :param data: Result of `Reader.read` where at least one item has the + value `'MapConfig'` for the `'schema'` key. + :type data: list[dict[str,object]] + :return: Map data & metadata (SPEC only, no detector) + :rtype: nexusformat.nexus.NXentry + ''' + + map_config = self.get_map_config(data) + nxentry = self.__class__.get_nxentry(map_config) + + return(nxentry) + + def get_map_config(self, data): + '''Get an instance of `MapConfig` from a returned value of `Reader.read` + + :param data: Result of `Reader.read` where at least one item has the + value `'MapConfig'` for the `'schema'` key. + :type data: list[dict[str,object]] + :raises Exception: If a valid `MapConfig` cannot be constructed from `data`. + :return: a valid instance of `MapConfig` with field values taken from `data`. + :rtype: MapConfig + ''' + + from CHAP.models.map import MapConfig + + map_config = False + if isinstance(data, list): + for item in data: + if isinstance(item, dict): + if item.get('schema') == 'MapConfig': + map_config = item.get('data') + break + + if not map_config: + raise(ValueError('No map configuration found')) + + return(MapConfig(**map_config)) + + @staticmethod + def get_nxentry(map_config): + '''Use a `MapConfig` to construct a `nexusformat.nexus.NXentry` + + :param map_config: a valid map configuration + :type map_config: MapConfig + :return: the map's data and metadata contained in a NeXus structure + :rtype: nexusformat.nexus.NXentry + ''' + + from nexusformat.nexus import (NXcollection, + NXdata, + NXentry, + NXfield, + NXsample) + import numpy as np + + nxentry = NXentry(name=map_config.title) + + nxentry.map_config = json.dumps(map_config.dict()) + + nxentry[map_config.sample.name] = NXsample(**map_config.sample.dict()) + + nxentry.attrs['station'] = map_config.station + + nxentry.spec_scans = NXcollection() + for scans in map_config.spec_scans: + nxentry.spec_scans[scans.scanparsers[0].scan_name] = \ + NXfield(value=scans.scan_numbers, + dtype='int8', + attrs={'spec_file':str(scans.spec_file)}) + + nxentry.data = NXdata() + nxentry.data.attrs['axes'] = map_config.dims + for i,dim in enumerate(map_config.independent_dimensions[::-1]): + nxentry.data[dim.label] = NXfield(value=map_config.coords[dim.label], + units=dim.units, + attrs={'long_name': f'{dim.label} ({dim.units})', + 'data_type': dim.data_type, + 'local_name': dim.name}) + nxentry.data.attrs[f'{dim.label}_indices'] = i + + signal = False + auxilliary_signals = [] + for data in map_config.all_scalar_data: + nxentry.data[data.label] = NXfield(value=np.empty(map_config.shape), + units=data.units, + attrs={'long_name': f'{data.label} ({data.units})', + 'data_type': data.data_type, + 'local_name': data.name}) + if not signal: + signal = data.label + else: + auxilliary_signals.append(data.label) + + if signal: + nxentry.data.attrs['signal'] = signal + nxentry.data.attrs['auxilliary_signals'] = auxilliary_signals + + for scans in map_config.spec_scans: + for scan_number in scans.scan_numbers: + scanparser = scans.get_scanparser(scan_number) + for scan_step_index in range(scanparser.spec_scan_npts): + map_index = scans.get_index(scan_number, scan_step_index, map_config) + for data in map_config.all_scalar_data: + nxentry.data[data.label][map_index] = data.get_value(scans, scan_number, scan_step_index) + + return(nxentry) + +class IntegrationProcessor(Processor): + '''Class for integrating 2D detector data + ''' + + def _process(self, data): + '''Integrate the input data with the integration method and keyword + arguments supplied and return the results. + + :param data: input data, including raw data, integration method, and + keyword args for the integration method. + :type data: tuple[typing.Union[numpy.ndarray, list[numpy.ndarray]], + callable, + dict] + :param integration_method: the method of a + `pyFAI.azimuthalIntegrator.AzimuthalIntegrator` or + `pyFAI.multi_geometry.MultiGeometry` that returns the desired + integration results. + :return: integrated raw data + :rtype: pyFAI.containers.IntegrateResult + ''' + + detector_data, integration_method, integration_kwargs = data + + return(integration_method(detector_data, **integration_kwargs)) + +class IntegrateMapProcessor(Processor): + '''Class representing a process that takes a map and integration + configuration and returns a `nexusformat.nexus.NXprocess` containing a map of + the integrated detector data requested. + ''' + + def _process(self, data): + '''Process the output of a `Reader` that contains a map and integration + configuration and return a `nexusformat.nexus.NXprocess` containing a map + of the integrated detector data requested + + :param data: Result of `Reader.read` where at least one item has the + value `'MapConfig'` for the `'schema'` key, and at least one item has + the value `'IntegrationConfig'` for the `'schema'` key. + :type data: list[dict[str,object]] + :return: integrated data and process metadata + :rtype: nexusformat.nexus.NXprocess + ''' + + map_config, integration_config = self.get_configs(data) + nxprocess = self.get_nxprocess(map_config, integration_config) + + return(nxprocess) + + def get_configs(self, data): + '''Return valid instances of `MapConfig` and `IntegrationConfig` from the + input supplied by `MultipleReader`. + + :param data: Result of `Reader.read` where at least one item has the + value `'MapConfig'` for the `'schema'` key, and at least one item has + the value `'IntegrationConfig'` for the `'schema'` key. + :type data: list[dict[str,object]] + :raises ValueError: if `data` cannot be parsed into map and integration configurations. + :return: valid map and integration configuration objects. + :rtype: tuple[MapConfig, IntegrationConfig] + ''' + + self.logger.debug('Getting configuration objects') + t0 = time() + + from CHAP.models.map import MapConfig + from CHAP.models.integration import IntegrationConfig + + map_config = False + integration_config = False + if isinstance(data, list): + for item in data: + if isinstance(item, dict): + schema = item.get('schema') + if schema == 'MapConfig': + map_config = item.get('data') + elif schema == 'IntegrationConfig': + integration_config = item.get('data') + + if not map_config: + raise(ValueError('No map configuration found')) + if not integration_config: + raise(ValueError('No integration configuration found')) + + map_config = MapConfig(**map_config) + integration_config = IntegrationConfig(**integration_config) + + self.logger.debug(f'Got configuration objects in {time()-t0:.3f} seconds') + + return(map_config, integration_config) + + def get_nxprocess(self, map_config, integration_config): + '''Use a `MapConfig` and `IntegrationConfig` to construct a + `nexusformat.nexus.NXprocess` + + :param map_config: a valid map configuration + :type map_config: MapConfig + :param integration_config: a valid integration configuration + :type integration_config" IntegrationConfig + :return: the integrated detector data and metadata contained in a NeXus + structure + :rtype: nexusformat.nexus.NXprocess + ''' + + self.logger.debug('Constructing NXprocess') + t0 = time() + + from nexusformat.nexus import (NXdata, + NXdetector, + NXfield, + NXprocess) + import numpy as np + import pyFAI + + nxprocess = NXprocess(name=integration_config.title) + + nxprocess.map_config = json.dumps(map_config.dict()) + nxprocess.integration_config = json.dumps(integration_config.dict()) + + nxprocess.program = 'pyFAI' + nxprocess.version = pyFAI.version + + for k,v in integration_config.dict().items(): + if k == 'detectors': + continue + nxprocess.attrs[k] = v + + for detector in integration_config.detectors: + nxprocess[detector.prefix] = NXdetector() + nxprocess[detector.prefix].local_name = detector.prefix + nxprocess[detector.prefix].distance = detector.azimuthal_integrator.dist + nxprocess[detector.prefix].distance.attrs['units'] = 'm' + nxprocess[detector.prefix].calibration_wavelength = detector.azimuthal_integrator.wavelength + nxprocess[detector.prefix].calibration_wavelength.attrs['units'] = 'm' + nxprocess[detector.prefix].attrs['poni_file'] = str(detector.poni_file) + nxprocess[detector.prefix].attrs['mask_file'] = str(detector.mask_file) + nxprocess[detector.prefix].raw_data_files = np.full(map_config.shape, '', dtype='|S256') + + nxprocess.data = NXdata() + + nxprocess.data.attrs['axes'] = (*map_config.dims, *integration_config.integrated_data_dims) + for i,dim in enumerate(map_config.independent_dimensions[::-1]): + nxprocess.data[dim.label] = NXfield(value=map_config.coords[dim.label], + units=dim.units, + attrs={'long_name': f'{dim.label} ({dim.units})', + 'data_type': dim.data_type, + 'local_name': dim.name}) + nxprocess.data.attrs[f'{dim.label}_indices'] = i + + for i,(coord_name,coord_values) in enumerate(integration_config.integrated_data_coordinates.items()): + if coord_name == 'radial': + type_ = pyFAI.units.RADIAL_UNITS + elif coord_name == 'azimuthal': + type_ = pyFAI.units.AZIMUTHAL_UNITS + coord_units = pyFAI.units.to_unit(getattr(integration_config, f'{coord_name}_units'), type_=type_) + nxprocess.data[coord_units.name] = coord_values + nxprocess.data.attrs[f'{coord_units.name}_indices'] = i+len(map_config.coords) + nxprocess.data[coord_units.name].units = coord_units.unit_symbol + nxprocess.data[coord_units.name].attrs['long_name'] = coord_units.label + + nxprocess.data.attrs['signal'] = 'I' + nxprocess.data.I = NXfield(value=np.empty((*tuple([len(coord_values) for coord_name,coord_values in map_config.coords.items()][::-1]), *integration_config.integrated_data_shape)), + units='a.u', + attrs={'long_name':'Intensity (a.u)'}) + + integrator = integration_config.get_multi_geometry_integrator() + if integration_config.integration_type == 'azimuthal': + integration_method = integrator.integrate1d + integration_kwargs = { + 'lst_mask': [detector.mask_array for detector in integration_config.detectors], + 'npt': integration_config.radial_npt + } + elif integration_config.integration_type == 'cake': + integration_method = integrator.integrate2d + integration_kwargs = { + 'lst_mask': [detector.mask_array for detector in integration_config.detectors], + 'npt_rad': integration_config.radial_npt, + 'npt_azim': integration_config.azimuthal_npt, + 'method': 'bbox' + } + + integration_processor = IntegrationProcessor() + integration_processor.logger.setLevel(self.logger.getEffectiveLevel()) + integration_processor.logger.addHandler(self.logger.handlers[0]) + lst_args = [] + for scans in map_config.spec_scans: + for scan_number in scans.scan_numbers: + scanparser = scans.get_scanparser(scan_number) + for scan_step_index in range(scanparser.spec_scan_npts): + map_index = scans.get_index(scan_number, scan_step_index, map_config) + detector_data = scans.get_detector_data(integration_config.detectors, scan_number, scan_step_index) + result = integration_processor.process((detector_data, integration_method, integration_kwargs)) + nxprocess.data.I[map_index] = result.intensity + for detector in integration_config.detectors: + nxprocess[detector.prefix].raw_data_files[map_index] = scanparser.get_detector_data_file(detector.prefix, scan_step_index) + + self.logger.debug(f'Constructed NXprocess in {time()-t0:.3f} seconds') + + return(nxprocess) + +class MCACeriaCalibrationProcessor(Processor): + '''Class representing the procedure to use a CeO2 scan to obtain tuned values + for the bragg diffraction angle and linear correction parameters for MCA + channel energies for an EDD experimental setup. + ''' + + def _process(self, data): + '''Return tuned values for 2&theta and linear correction parameters for + the MCA channel energies. + + :param data: input configuration for the raw data & tuning procedure + :type data: list[dict[str,object]] + :return: original configuration dictionary with tuned values added + :rtype: dict[str,float] + ''' + + calibration_config = self.get_config(data) + + tth, slope, intercept = self.calibrate(calibration_config) + + calibration_config.tth_calibrated = tth + calibration_config.slope_calibrated = slope + calibration_config.intercept_calibrated = intercept + + return(calibration_config.dict()) + + def get_config(self, data): + '''Get an instance of the configuration object needed by this + `Processor` from a returned value of `Reader.read` + + :param data: Result of `Reader.read` where at least one item has the + value `'MCACeriaCalibrationConfig'` for the `'schema'` key. + :type data: list[dict[str,object]] + :raises Exception: If a valid config object cannot be constructed from `data`. + :return: a valid instance of a configuration object with field values + taken from `data`. + :rtype: MCACeriaCalibrationConfig + ''' + + from CHAP.models.edd import MCACeriaCalibrationConfig + + calibration_config = False + if isinstance(data, list): + for item in data: + if isinstance(item, dict): + if item.get('schema') == 'MCACeriaCalibrationConfig': + calibration_config = item.get('data') + break + + if not calibration_config: + raise(ValueError('No MCA ceria calibration configuration found in input data')) + + return(MCACeriaCalibrationConfig(**calibration_config)) + + def calibrate(self, calibration_config): + '''Iteratively calibrate 2&theta by fitting selected peaks of an MCA + spectrum until the computed strain is sufficiently small. Use the fitted + peak locations to determine linear correction parameters for the MCA's + channel energies. + + :param calibration_config: object configuring the CeO2 calibration procedure + :type calibration_config: MCACeriaCalibrationConfig + :return: calibrated values of 2&theta and linear correction parameters + for MCA channel energies : tth, slope, intercept + :rtype: float, float, float + ''' + + from msnctools.fit import Fit, FitMultipeak + import numpy as np + from scipy.constants import physical_constants + + hc = physical_constants['Planck constant in eV/Hz'][0] * \ + physical_constants['speed of light in vacuum'][0] * \ + 1e7 # We'll work in keV and A, not eV and m. + + # Collect raw MCA data of interest + mca_data = calibration_config.mca_data() + mca_bin_energies = np.arange(0, calibration_config.num_bins) * \ + (calibration_config.max_energy_kev / calibration_config.num_bins) + + # Mask out the corrected MCA data for fitting + mca_mask = calibration_config.mca_mask() + fit_mca_energies = mca_bin_energies[mca_mask] + fit_mca_intensities = mca_data[mca_mask] + + # Correct raw MCA data for variable flux at different energies + flux_correct = calibration_config.flux_correction_interpolation_function() + mca_intensity_weights = flux_correct(fit_mca_energies) + fit_mca_intensities = fit_mca_intensities / mca_intensity_weights + + # Get the HKLs and lattice spacings that will be used for fitting + tth = calibration_config.tth_initial_guess + fit_hkls, fit_ds = calibration_config.fit_ds() + c_1 = fit_hkls[:,0]**2 + fit_hkls[:,1]**2 + fit_hkls[:,2]**2 + + for iter_i in range(calibration_config.max_iter): + + ### Perform the uniform fit first ### + + # Get expected peak energy locations for this iteration's starting + # value of tth + fit_lambda = 2.0 * fit_ds * np.sin(0.5*np.radians(tth)) + fit_E0 = hc / fit_lambda + + # Run the uniform fit + best_fit, residual, best_values, best_errors, redchi, success = \ + FitMultipeak.fit_multipeak(fit_mca_intensities, + fit_E0, + x=fit_mca_energies, + fit_type='uniform') + + # Extract values of interest from the best values for the uniform fit + # parameters + uniform_fit_centers = [best_values[f'peak{i+1}_center'] for i in range(len(calibration_config.fit_hkls))] + # uniform_a = best_values['scale_factor'] + # uniform_strain = np.log(uniform_a / calibration_config.lattice_parameter_angstrom) + # uniform_tth = tth * (1.0 + uniform_strain) + # uniform_rel_rms_error = np.linalg.norm(residual) / np.linalg.norm(fit_mca_intensities) + + ### Next, perform the unconstrained fit ### + + # Use the peak locations found in the uniform fit as the initial + # guesses for peak locations in the unconstrained fit + best_fit, residual, best_values, best_errors, redchi, success = \ + FitMultipeak.fit_multipeak(fit_mca_intensities, + uniform_fit_centers, + x=fit_mca_energies, + fit_type='unconstrained') + + # Extract values of interest from the best values for the + # unconstrained fit parameters + unconstrained_fit_centers = np.array([best_values[f'peak{i+1}_center'] for i in range(len(calibration_config.fit_hkls))]) + unconstrained_a = 0.5 * hc * np.sqrt(c_1) / (unconstrained_fit_centers * abs(np.sin(0.5*np.radians(tth)))) + unconstrained_strains = np.log(unconstrained_a / calibration_config.lattice_parameter_angstrom) + unconstrained_strain = np.mean(unconstrained_strains) + unconstrained_tth = tth * (1.0 + unconstrained_strain) + # unconstrained_rel_rms_error = np.linalg.norm(residual) / np.linalg.norm(fit_mca_intensities) + + + # Update tth for the next iteration of tuning + prev_tth = tth + tth = unconstrained_tth + + # Stop tuning tth at this iteration if differences are small enough + if abs(tth - prev_tth) < calibration_config.tune_tth_tol: + break + + # Fit line to expected / computed peak locations from the last + # unconstrained fit. + fit = Fit.fit_data(fit_E0,'linear', x=unconstrained_fit_centers, nan_policy='omit') + slope = fit.best_values['slope'] + intercept = fit.best_values['intercept'] + + return(float(tth), float(slope), float(intercept)) + +class MCADataProcessor(Processor): + '''Class representing a process to return data from a MCA, restuctured to + incorporate the shape & metadata associated with a map configuration to + which the MCA data belongs, and linearly transformed according to the + results of a ceria calibration. + ''' + + def _process(self, data): + '''Process configurations for a map and MCA detector(s), and return the + raw MCA data collected over the map. + + :param data: input map configuration and results of ceria calibration + :type data: list[dict[str,object]] + :return: calibrated and flux-corrected MCA data + :rtype: nexusformat.nexus.NXentry + ''' + + map_config, calibration_config = self.get_configs(data) + nxroot = self.get_nxroot(map_config, calibration_config) + + return(nxroot) + + def get_configs(self, data): + '''Get instances of the configuration objects needed by this + `Processor` from a returned value of `Reader.read` + + :param data: Result of `Reader.read` where at least one item has the + value `'MapConfig'` for the `'schema'` key, and at least one item has + the value `'MCACeriaCalibrationConfig'` for the `'schema'` key. + :type data: list[dict[str,object]] + :raises Exception: If valid config objects cannot be constructed from `data`. + :return: valid instances of the configuration objects with field values + taken from `data`. + :rtype: tuple[MapConfig, MCACeriaCalibrationConfig] + ''' + + from CHAP.models.map import MapConfig + from CHAP.models.edd import MCACeriaCalibrationConfig + + map_config = False + calibration_config = False + if isinstance(data, list): + for item in data: + if isinstance(item, dict): + schema = item.get('schema') + if schema == 'MapConfig': + map_config = item.get('data') + elif schema == 'MCACeriaCalibrationConfig': + calibration_config = item.get('data') + + if not map_config: + raise(ValueError('No map configuration found in input data')) + if not calibration_config: + raise(ValueError('No MCA ceria calibration configuration found in input data')) + + return(MapConfig(**map_config), MCACeriaCalibrationConfig(**calibration_config)) + + def get_nxroot(self, map_config, calibration_config): + '''Get a map of the MCA data collected by the scans in `map_config`. The + MCA data will be calibrated and flux-corrected according to the + parameters included in `calibration_config`. The data will be returned + along with relevant metadata in the form of a NeXus structure. + + :param map_config: the map configuration + :type map_config: MapConfig + :param calibration_config: the calibration configuration + :type calibration_config: MCACeriaCalibrationConfig + :return: a map of the calibrated and flux-corrected MCA data + :rtype: nexusformat.nexus.NXroot + ''' + + from nexusformat.nexus import (NXdata, + NXdetector, + NXentry, + NXinstrument, + NXroot) + import numpy as np + + nxroot = NXroot() + + nxroot[map_config.title] = MapProcessor.get_nxentry(map_config) + nxentry = nxroot[map_config.title] + + nxentry.instrument = NXinstrument() + nxentry.instrument.detector = NXdetector() + nxentry.instrument.detector.calibration_configuration = json.dumps(calibration_config.dict()) + + nxentry.instrument.detector.data = NXdata() + nxdata = nxentry.instrument.detector.data + nxdata.raw = np.empty((*map_config.shape, calibration_config.num_bins)) + nxdata.raw.attrs['units'] = 'counts' + nxdata.channel_energy = calibration_config.slope_calibrated * \ + np.arange(0, calibration_config.num_bins) * \ + (calibration_config.max_energy_kev / calibration_config.num_bins) + \ + calibration_config.intercept_calibrated + nxdata.channel_energy.attrs['units'] = 'keV' + + for scans in map_config.spec_scans: + for scan_number in scans.scan_numbers: + scanparser = scans.get_scanparser(scan_number) + for scan_step_index in range(scanparser.spec_scan_npts): + map_index = scans.get_index(scan_number, scan_step_index, map_config) + nxdata.raw[map_index] = scanparser.get_detector_data(calibration_config.detector_name, scan_step_index) + + nxentry.data.makelink(nxdata.raw, name=calibration_config.detector_name) + nxentry.data.makelink(nxdata.channel_energy, name=f'{calibration_config.detector_name}_channel_energy') + if isinstance(nxentry.data.attrs['axes'], str): + nxentry.data.attrs['axes'] = [nxentry.data.attrs['axes'], f'{calibration_config.detector_name}_channel_energy'] + else: + nxentry.data.attrs['axes'] += [f'{calibration_config.detector_name}_channel_energy'] + nxentry.data.attrs['signal'] = calibration_config.detector_name + + return(nxroot) + +class StrainAnalysisProcessor(Processor): + '''Class representing a process to compute a map of sample strains by fitting + bragg peaks in 1D detector data and analyzing the difference between measured + peak locations and expected peak locations for the sample measured. + ''' + + def _process(self, data): + '''Process the input map detector data & configuration for the strain + analysis procedure, and return a map of sample strains. + + :param data: results of `MutlipleReader.read` containing input map + detector data and strain analysis configuration + :type data: dict[list[str,object]] + :return: map of sample strains + :rtype: xarray.Dataset + ''' + + strain_analysis_config = self.get_config(data) + + return(data) + + def get_config(self, data): + '''Get instances of the configuration objects needed by this + `Processor` from a returned value of `Reader.read` + + :param data: Result of `Reader.read` where at least one item has the + value `'StrainAnalysisConfig'` for the `'schema'` key. + :type data: list[dict[str,object]] + :raises Exception: If valid config objects cannot be constructed from `data`. + :return: valid instances of the configuration objects with field values + taken from `data`. + :rtype: StrainAnalysisConfig + ''' + + strain_analysis_config = False + if isinstance(data, list): + for item in data: + if isinstance(item, dict): + schema = item.get('schema') + if item.get('schema') == 'StrainAnalysisConfig': + strain_analysis_config = item.get('data') + + if not strain_analysis_config: + raise(ValueError('No strain analysis configuration found in input data')) + + return(strain_analysis_config) + + +class OptionParser(): + '''User based option parser''' + def __init__(self): + self.parser = argparse.ArgumentParser(prog='PROG') + self.parser.add_argument("--data", action="store", + dest="data", default="", help="Input data") + self.parser.add_argument("--processor", action="store", + dest="processor", default="Processor", help="Processor class name") + self.parser.add_argument('--log-level', choices=logging._nameToLevel.keys(), + dest='log_level', default='INFO', help='logging level') + +def main(): + '''Main function''' + optmgr = OptionParser() + opts = optmgr.parser.parse_args() + clsName = opts.processor + try: + processorCls = getattr(sys.modules[__name__],clsName) + except: + print(f'Unsupported processor {clsName}') + sys.exit(1) + + processor = processorCls() + processor.logger.setLevel(getattr(logging, opts.log_level)) + log_handler = logging.StreamHandler() + log_handler.setFormatter(logging.Formatter('{name:20}: {message}', style='{')) + processor.logger.addHandler(log_handler) + data = processor.process(opts.data) + + print(f"Processor {processor} operates on data {data}") + +if __name__ == '__main__': + main()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/CHAP/reader.py Tue Mar 28 15:07:30 2023 +0000 @@ -0,0 +1,209 @@ +#!/usr/bin/env python +""" +File : reader.py +Author : Valentin Kuznetsov <vkuznet AT gmail dot com> +Description: generic Reader module +""" + +# system modules +import argparse +import json +import logging +import sys +from time import time + +# local modules +# from pipeline import PipelineObject + +class Reader(): + """ + Reader represent generic file writer + """ + + def __init__(self): + """ + Constructor of Reader class + """ + self.__name__ = self.__class__.__name__ + self.logger = logging.getLogger(self.__name__) + self.logger.propagate = False + + def read(self, type_=None, schema=None, encoding=None, **_read_kwargs): + '''Read API + + Wrapper to read, format, and return the data requested. + + :param type_: the expected type of data read from `filename`, defualts + to `None` + :type type_: type, optional + :param schema: the expected schema of the data read from `filename`, + defaults to `None` + :type schema: str, otional + :param _read_kwargs: keyword arguments to pass to `self._read`, defaults + to `{}` + :type _read_kwargs: dict, optional + :return: list with one item: a dictionary containing the data read from + `filename`, the name of this `Reader`, and the values of `type_` and + `schema`. + :rtype: list[dict[str,object]] + ''' + + t0 = time() + self.logger.info(f'Executing "read" with type={type_}, schema={schema}, kwargs={_read_kwargs}') + + data = [{'name': self.__name__, + 'data': self._read(**_read_kwargs), + 'type': type_, + 'schema': schema, + 'encoding': encoding}] + + self.logger.info(f'Finished "read" in {time()-t0:.3f} seconds\n') + return(data) + + def _read(self, filename): + '''Read and return the data from requested from `filename` + + :param filename: Name of file to read from + :return: specific number of bytes from a file + ''' + + if not filename: + self.logger.warning('No file name is given, will skip read operation') + return None + + with open(filename) as file: + data = file.read() + return(data) + +class MultipleReader(Reader): + def read(self, readers): + '''Return resuts from multiple `Reader`s. + + :param readers: a dictionary where the keys are specific names that are + used by the next item in the `Pipeline`, and the values are `Reader` + configurations. + :type readers: list[dict] + :return: The results of calling `Reader.read(**kwargs)` for each item + configured in `readers`. + :rtype: list[dict[str,object]] + ''' + + t0 = time() + self.logger.info(f'Executing "read" with {len(readers)} Readers') + + data = [] + for reader_config in readers: + reader_name = list(reader_config.keys())[0] + reader_class = getattr(sys.modules[__name__], reader_name) + reader = reader_class() + reader_kwargs = reader_config[reader_name] + + data.extend(reader.read(**reader_kwargs)) + + self.logger.info(f'Finished "read" in {time()-t0:.3f} seconds\n') + + return(data) + +class YAMLReader(Reader): + def _read(self, filename): + '''Return a dictionary from the contents of a yaml file. + + :param filename: name of the YAML file to read from + :return: the contents of `filename` + :rtype: dict + ''' + + import yaml + + with open(filename) as file: + data = yaml.safe_load(file) + return(data) + +class BinaryFileReader(Reader): + def _read(self, filename): + '''Return a content of a given file name + + :param filename: name of the binart file to read from + :return: the content of `filename` + :rtype: binary + ''' + with open(filename, 'rb') as file: + data = file.read() + return(data) + +class NexusReader(Reader): + def _read(self, filename, nxpath='/'): + '''Return the NeXus object stored at `nxpath` in the nexus file + `filename`. + + :param filename: name of the NeXus file to read from + :type filename: str + :param nxpath: path to a specific loaction in the NeXus file to read + from, defaults to `'/'` + :type nxpath: str, optional + :raises nexusformat.nexus.NeXusError: if `filename` is not a NeXus + file or `nxpath` is not in `filename`. + :return: the NeXus structure indicated by `filename` and `nxpath`. + :rtype: nexusformat.nexus.NXobject + ''' + + from nexusformat.nexus import nxload + + nxobject = nxload(filename)[nxpath] + return(nxobject) + +class URLReader(Reader): + def _read(self, url, headers={}): + '''Make an HTTPS request to the provided URL and return the results. + Headers for the request are optional. + + :param url: the URL to read + :type url: str + :param headers: headers to attach to the request, defaults to `{}` + :type headers: dict, optional + :return: the content of the response + :rtype: object + ''' + + import requests + + resp = requests.get(url, headers=headers) + data = resp.content + + self.logger.debug(f'Response content: {data}') + + return(data) + +class OptionParser(): + '''User based option parser''' + def __init__(self): + self.parser = argparse.ArgumentParser(prog='PROG') + self.parser.add_argument("--filename", action="store", + dest="filename", default="", help="Input file") + self.parser.add_argument("--reader", action="store", + dest="reader", default="Reader", help="Reader class name") + self.parser.add_argument('--log-level', choices=logging._nameToLevel.keys(), + dest='log_level', default='INFO', help='logging level') + +def main(): + '''Main function''' + optmgr = OptionParser() + opts = optmgr.parser.parse_args() + clsName = opts.reader + try: + readerCls = getattr(sys.modules[__name__],clsName) + except: + print(f'Unsupported reader {clsName}') + sys.exit(1) + + reader = readerCls() + reader.logger.setLevel(getattr(logging, opts.log_level)) + log_handler = logging.StreamHandler() + log_handler.setFormatter(logging.Formatter('{name:20}: {message}', style='{')) + reader.logger.addHandler(log_handler) + data = reader.read(filename=opts.filename) + + print(f"Reader {reader} reads from {opts.filename}, data {data}") + +if __name__ == '__main__': + main()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/CHAP/runner.py Tue Mar 28 15:07:30 2023 +0000 @@ -0,0 +1,82 @@ +#!/usr/bin/env python +#-*- coding: utf-8 -*- +#pylint: disable= +""" +File : runner.py +Author : Valentin Kuznetsov <vkuznet AT gmail dot com> +Description: +""" + +# system modules +import argparse +import logging +import os +import sys +import yaml + +# local modules +from CHAP.pipeline import Pipeline + + +class OptionParser(): + def __init__(self): + "User based option parser" + self.parser = argparse.ArgumentParser(prog='PROG') + self.parser.add_argument("--config", action="store", + dest="config", default="", help="Input configuration file") + self.parser.add_argument('--log-level', choices=logging._nameToLevel.keys(), + dest='log_level', default='INFO', help='logging level') + +def main(): + "Main function" + optmgr = OptionParser() + opts = optmgr.parser.parse_args() + runner(opts) + +def runner(opts): + """ + Main runner function + + :param opts: opts is an instance of argparse.Namespace which contains all input parameters + """ + + logger = logging.getLogger(__name__) + log_level = getattr(logging, opts.log_level.upper()) + logger.setLevel(log_level) + log_handler = logging.StreamHandler() + log_handler.setFormatter(logging.Formatter('{name:20}: {message}', style='{')) + logger.addHandler(log_handler) + + config = {} + with open(opts.config) as file: + config = yaml.safe_load(file) + logger.info(f'Input configuration: {config}\n') + pipeline_config = config.get('pipeline', []) + objects = [] + kwds = [] + for item in pipeline_config: + # load individual object with given name from its module + if isinstance(item, dict): + name = list(item.keys())[0] + kwargs = item[name] + else: + name = item + kwargs = {} + modName, clsName = name.split('.') + module = __import__(f'CHAP.{modName}', fromlist=[clsName]) + obj = getattr(module, clsName)() + obj.logger.setLevel(log_level) + obj.logger.addHandler(log_handler) + logger.info(f'Loaded {obj}') + objects.append(obj) + kwds.append(kwargs) + pipeline = Pipeline(objects, kwds) + pipeline.logger.setLevel(log_level) + pipeline.logger.addHandler(log_handler) + logger.info(f'Loaded {pipeline} with {len(objects)} items\n') + logger.info(f'Calling "execute" on {pipeline}') + pipeline.execute() + + +if __name__ == '__main__': + main()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/CHAP/writer.py Tue Mar 28 15:07:30 2023 +0000 @@ -0,0 +1,243 @@ +#!/usr/bin/env python +""" +File : writer.py +Author : Valentin Kuznetsov <vkuznet AT gmail dot com> +Description: generic Writer module +""" + +# system modules +import argparse +import json +import logging +import os +import sys +from time import time + +# local modules +# from pipeline import PipelineObject + +class Writer(): + """ + Writer represent generic file writer + """ + + def __init__(self): + """ + Constructor of Writer class + """ + self.__name__ = self.__class__.__name__ + self.logger = logging.getLogger(self.__name__) + self.logger.propagate = False + + def write(self, data, filename, **_write_kwargs): + """ + write API + + :param filename: Name of file to write to + :param data: data to write to file + :return: data written to file + """ + + t0 = time() + self.logger.info(f'Executing "write" with filename={filename}, type(data)={type(data)}, kwargs={_write_kwargs}') + + data = self._write(data, filename, **_write_kwargs) + + self.logger.info(f'Finished "write" in {time()-t0:.3f} seconds\n') + + return(data) + + def _write(self, data, filename): + with open(filename, 'a') as file: + file.write(data) + return(data) + +class YAMLWriter(Writer): + def _write(self, data, filename, force_overwrite=False): + '''If `data` is a `dict`, write it to `filename`. + + :param data: the dictionary to write to `filename`. + :type data: dict + :param filename: name of the file to write to. + :type filename: str + :param force_overwrite: flag to allow data in `filename` to be + overwritten if it already exists. + :type force_overwrite: bool + :raises TypeError: if `data` is not a `dict` + :raises RuntimeError: if `filename` already exists and + `force_overwrite` is `False`. + :return: the original input data + :rtype: dict + ''' + + import yaml + + if not isinstance(data, (dict, list)): + raise(TypeError(f'{self.__name__}.write: input data must be a dict or list.')) + + if not force_overwrite: + if os.path.isfile(filename): + raise(RuntimeError(f'{self.__name__}: {filename} already exists.')) + + with open(filename, 'w') as outf: + yaml.dump(data, outf, sort_keys=False) + + return(data) + +class ExtractArchiveWriter(Writer): + def _write(self, data, filename): + '''Take a .tar archive represented as bytes in `data` and write the + extracted archive to files. + + :param data: the archive data + :type data: bytes + :param filename: the name of a directory to which the archive files will + be written + :type filename: str + :return: the original `data` + :rtype: bytes + ''' + + from io import BytesIO + import tarfile + + tar = tarfile.open(fileobj=BytesIO(data)) + tar.extractall(path=filename) + + return(data) + + +class NexusWriter(Writer): + def _write(self, data, filename, force_overwrite=False): + '''Write `data` to a NeXus file + + :param data: the data to write to `filename`. + :param filename: name of the file to write to. + :param force_overwrite: flag to allow data in `filename` to be + overwritten, if it already exists. + :return: the original input data + ''' + + from nexusformat.nexus import NXobject + import xarray as xr + + if isinstance(data, NXobject): + nxstructure = data + + elif isinstance(data, xr.Dataset): + nxstructure = self.get_nxdata_from_dataset(data) + + elif isinstance(data, xr.DataArray): + nxstructure = self.get_nxdata_from_dataarray(data) + + else: + raise(TypeError(f'{self.__name__}.write: unknown data format: {type(data).__name__}')) + + mode = 'w' if force_overwrite else 'w-' + nxstructure.save(filename, mode=mode) + + return(data) + + + def get_nxdata_from_dataset(self, dset): + '''Return an instance of `nexusformat.nexus.NXdata` that represents the + data and metadata attributes contained in `dset`. + + :param dset: the input dataset to represent + :type data: xarray.Dataset + :return: `dset` represented as an instance of `nexusformat.nexus.NXdata` + :rtype: nexusformat.nexus.NXdata + ''' + + from nexusformat.nexus import NXdata, NXfield + + nxdata_args = {'signal':None, 'axes':()} + + for var in dset.data_vars: + data_var = dset[var] + nxfield = NXfield(data_var.data, + name=data_var.name, + attrs=data_var.attrs) + if nxdata_args['signal'] is None: + nxdata_args['signal'] = nxfield + else: + nxdata_args[var] = nxfield + + for coord in dset.coords: + coord_var = dset[coord] + nxfield = NXfield(coord_var.data, + name=coord_var.name, + attrs=coord_var.attrs) + nxdata_args['axes'] = (*nxdata_args['axes'], nxfield) + + nxdata = NXdata(**nxdata_args) + nxdata.attrs['xarray_attrs'] = json.dumps(dset.attrs) + + return(nxdata) + + def get_nxdata_from_dataarray(self, darr): + '''Return an instance of `nexusformat.nexus.NXdata` that represents the + data and metadata attributes contained in `darr`. + + :param darr: the input dataset to represent + :type darr: xarray.DataArray + :return: `darr` represented as an instance of `nexusformat.nexus.NXdata` + :rtype: nexusformat.nexus.NXdata + ''' + + from nexusformat.nexus import NXdata, NXfield + + nxdata_args = {'signal':None, 'axes':()} + + nxdata_args['signal'] = NXfield(darr.data, + name=darr.name, + attrs=darr.attrs) + + + for coord in darr.coords: + coord_var = darr[coord] + nxfield = NXfield(coord_var.data, + name=coord_var.name, + attrs=coord_var.attrs) + nxdata_args['axes'] = (*nxdata_args['axes'], nxfield) + + nxdata = NXdata(**nxdata_args) + nxdata.attrs['xarray_attrs'] = json.dumps(darr.attrs) + + return(nxdata) + + +class OptionParser(): + '''User based option parser''' + def __init__(self): + self.parser = argparse.ArgumentParser(prog='PROG') + self.parser.add_argument("--data", action="store", + dest="data", default="", help="Input data") + self.parser.add_argument("--filename", action="store", + dest="filename", default="", help="Output file") + self.parser.add_argument("--writer", action="store", + dest="writer", default="Writer", help="Writer class name") + self.parser.add_argument('--log-level', choices=logging._nameToLevel.keys(), + dest='log_level', default='INFO', help='logging level') + +def main(): + '''Main function''' + optmgr = OptionParser() + opts = optmgr.parser.parse_args() + clsName = opts.writer + try: + writerCls = getattr(sys.modules[__name__],clsName) + except: + print(f'Unsupported writer {clsName}') + sys.exit(1) + + writer = writerCls() + writer.logger.setLevel(getattr(logging, opts.log_level)) + log_handler = logging.StreamHandler() + log_handler.setFormatter(logging.Formatter('{name:20}: {message}', style='{')) + writer.logger.addHandler(log_handler) + data = writer.write(opts.data, opts.filename) + print(f"Writer {writer} writes to {opts.filename}, data {data}") + +if __name__ == '__main__': + main()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ChessAnalysisPipeline.egg-info/PKG-INFO Tue Mar 28 15:07:30 2023 +0000 @@ -0,0 +1,44 @@ +Metadata-Version: 2.1 +Name: ChessAnalysisPipeline +Version: 0.0.1 +Summary: CHESS analysis pipeline framework +Home-page: https://github.com/CHESSComputing/ChessAnalysisPipeline +Author: Keara Soloway, Rolf Verberg, Valentin Kuznetsov +Author-email: +Classifier: Programming Language :: Python :: 3 +Classifier: License :: OSI Approved :: MIT License +Classifier: Operating System :: OS Independent +Requires-Python: >=3.8 +Description-Content-Type: text/markdown +License-File: LICENSE + +### Pipeline +This package conains proof of concepts pipeline framework for workflow +execution. It requires proper configuration of pipeline in terms classes, e.g. +``` +# pipeline deifinition as sequence of objects +pipeline: + - reader.Reader + - processor.Processor + - fitter.Fitter + - processor.Processor + - writer.Writer + - fitter.Fitter + - writer.Writer + +# specific object parameters, e.g. our reader accepts fileName=data.csv +reader.Reader: + fileName: data.csv + + +# specific object parameters, e.g. our writer accepts fileName=data.out +writer.Writer: + fileName: data.out +``` + +Then, you may execute this pipeline as following: +``` +./runner.py --config config.yaml +``` +and, check the output in `data.out` file. +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ChessAnalysisPipeline.egg-info/SOURCES.txt Tue Mar 28 15:07:30 2023 +0000 @@ -0,0 +1,49 @@ +.gitignore +Galaxy.md +LICENSE +README.md +chap.xml +environment.yml +setup.py +CHAP/__init__.py +CHAP/__main__.py +CHAP/async.py +CHAP/pipeline.py +CHAP/processor.py +CHAP/reader.py +CHAP/runner.py +CHAP/writer.py +CHAP/models/basemodel.py +CHAP/models/edd.py +CHAP/models/integration.py +CHAP/models/map.py +CHAP/models/workflow.py +ChessAnalysisPipeline.egg-info/PKG-INFO +ChessAnalysisPipeline.egg-info/SOURCES.txt +ChessAnalysisPipeline.egg-info/dependency_links.txt +ChessAnalysisPipeline.egg-info/requires.txt +ChessAnalysisPipeline.egg-info/top_level.txt +MLaaS/README.md +MLaaS/__init__.py +MLaaS/demo.sh +MLaaS/ktrain.py +MLaaS/mnist_img.py +MLaaS/tfaas_client.py +examples/edd/ceria_calibration_config.yaml +examples/edd/map.yaml +examples/edd/pipeline.yaml +examples/edd/strain_analysis_config.yaml +examples/inference/pipeline.yaml +examples/saxswaxs/integration_saxs_azimuthal.yaml +examples/saxswaxs/integration_waxs_azimuthal.yaml +examples/saxswaxs/map_1d.yaml +examples/saxswaxs/map_2d.yaml +examples/saxswaxs/pipeline.yaml +examples/sin2psi/integration.yaml +examples/sin2psi/map.yaml +examples/sin2psi/pipeline.yaml +examples/sin2psi/strain_analysis_config.yaml +scripts/CHAP +test-data/config.yaml +test-data/data.csv +test-data/data.out \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ChessAnalysisPipeline.egg-info/dependency_links.txt Tue Mar 28 15:07:30 2023 +0000 @@ -0,0 +1,1 @@ +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ChessAnalysisPipeline.egg-info/requires.txt Tue Mar 28 15:07:30 2023 +0000 @@ -0,0 +1,1 @@ +PyYAML
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ChessAnalysisPipeline.egg-info/top_level.txt Tue Mar 28 15:07:30 2023 +0000 @@ -0,0 +1,2 @@ +CHAP +MLaaS
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Galaxy.md Tue Mar 28 15:07:30 2023 +0000 @@ -0,0 +1,48 @@ +# Install Planemo +``` +python -m venv planemo +. planemo/bin/activate +pip install planemo +``` + +### Initialize chap.xml +``` +planemo tool_init --id 'chap' --name 'CHESS Analysis Pipeline' --example_command 'runner.py --config config.yaml' --example_input config.yaml --cite_url 'https://github.com/CHESSComputing/ChessAnalysisPipeline' --help_from_command 'runner.py --help' --test_case --example_output data.out +``` +this command will output: +``` +Tool written to chap.xml +No test-data directory, creating one. +Copying test-file config.yaml +``` + +Perform linting: +``` +planemo l +Linting tool /Users/vk/Work/CHESS/ChessPipeline/chap.xml +Applying linter tests... CHECK +.. CHECK: 1 test(s) found. +Applying linter output... CHECK +.. INFO: 1 outputs found. +Applying linter inputs... CHECK +.. INFO: Found 2 input parameters. +Applying linter help... CHECK +.. CHECK: Tool contains help section. +.. CHECK: Help contains valid reStructuredText. +Applying linter general... CHECK +.. CHECK: Tool defines a version [0.1.0+galaxy0]. +.. CHECK: Tool defines a name [CHESS Analysis Pipeline]. +.. CHECK: Tool defines an id [chap]. +.. CHECK: Tool specifies profile version [21.05]. +Applying linter command... CHECK +.. INFO: Tool contains a command. +Applying linter citations... CHECK +.. CHECK: Found 1 likely valid citations. +Applying linter tool_xsd... CHECK +.. INFO: File validates against XML schema. +``` + +Now, we can start server via the following commad: `planemo s`, +it will take a while. Once finished we may visit +`http://127.0.0.1:9090` to see our galaxy hub along with +our pipeline tool.
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/LICENSE Tue Mar 28 15:07:30 2023 +0000 @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2023 Valentin Kuznetsov + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE.
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/MLaaS/README.md Tue Mar 28 15:07:30 2023 +0000 @@ -0,0 +1,380 @@ +## MLaaS end-to-end example using MNIST dataset +MLaaS stands for Machine Learning as a Service, and here we'll provide +end-to-end example based on MNIST dataset using +Python [Keras](https://keras.io/) ML framework for training +part, and [TFaas](https://github.com/vkuznet/TFaaS) ML framework +for inference part. + +### Requirements (environment) +To proceed with ML trainig we need to acquire MNIST dataset. +We will assume that you have a box where recent version of python is installed, +please note that instructions were tested with `Python 3.10.10` + +``` +# create mnist_env, here python refers to python 3.10.10 +python -m venv mnist_env + +# download mnist dataset for training purposes in numpy gziped arrays +curl -ksLO https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz + +# download MNIST dataset for training purposes in pkl.gz data-format +curl -ksLO https://s3.amazonaws.com/img-datasets/mnist.pkl.gz + +# download MNIST images +# download MNIST actual images which we will use within inference +curl -O http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz +``` + +### Train ML model +Below you can see fully tested Keras mased ML codebase to train +simple convolutional neural network over MNIST dataset (save +this code as `ktrain.py`): +``` +#!/usr/bin/env python +#-*- coding: utf-8 -*- +#pylint: disable= +""" +File : ktrain.py +Author : Valentin Kuznetsov <vkuznet AT gmail dot com> +Description: Keras based ML network to train over MNIST dataset +""" + +# system modules +import os +import sys +import json +import gzip +import pickle +import argparse + +# third-party modules +import numpy as np +import tensorflow as tf +from tensorflow import keras +from tensorflow.keras import layers +from tensorflow.keras import backend as K +from tensorflow.python.tools import saved_model_utils + + +def modelGraph(model_dir): + """ + Provide input/output names used by TF Graph along with graph itself + The code is based on TF saved_model_cli.py script. + """ + input_names = [] + output_names = [] + tag_sets = saved_model_utils.get_saved_model_tag_sets(model_dir) + for tag_set in sorted(tag_sets): + print('%r' % ', '.join(sorted(tag_set))) + meta_graph_def = saved_model_utils.get_meta_graph_def(model_dir, tag_set[0]) + for key in meta_graph_def.signature_def.keys(): + meta = meta_graph_def.signature_def[key] + if hasattr(meta, 'inputs') and hasattr(meta, 'outputs'): + inputs = meta.inputs + outputs = meta.outputs + input_signatures = list(meta.inputs.values()) + input_names = [signature.name for signature in input_signatures] + if len(input_names) > 0: + output_signatures = list(meta.outputs.values()) + output_names = [signature.name for signature in output_signatures] + return input_names, output_names, meta_graph_def + +def readData(fin, num_classes): + """ + Helper function to read MNIST data and provide it to + upstream code, e.g. to the training layer + """ + # Load the data and split it between train and test sets +# (x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data() + f = gzip.open(fin, 'rb') + if sys.version_info < (3,): + mnist_data = pickle.load(f) + else: + mnist_data = pickle.load(f, encoding='bytes') + f.close() + (x_train, y_train), (x_test, y_test) = mnist_data + + # Scale images to the [0, 1] range + x_train = x_train.astype("float32") / 255 + x_test = x_test.astype("float32") / 255 + # Make sure images have shape (28, 28, 1) + x_train = np.expand_dims(x_train, -1) + x_test = np.expand_dims(x_test, -1) + print("x_train shape:", x_train.shape) + print(x_train.shape[0], "train samples") + print(x_test.shape[0], "test samples") + + + # convert class vectors to binary class matrices + y_train = keras.utils.to_categorical(y_train, num_classes) + y_test = keras.utils.to_categorical(y_test, num_classes) + return x_train, y_train, x_test, y_test + + +def train(fin, fout=None, model_name=None, epochs=1, batch_size=128, h5=False): + """ + train function for MNIST + """ + # Model / data parameters + num_classes = 10 + input_shape = (28, 28, 1) + + # create ML model + model = keras.Sequential( + [ + keras.Input(shape=input_shape), + layers.Conv2D(32, kernel_size=(3, 3), activation="relu"), + layers.MaxPooling2D(pool_size=(2, 2)), + layers.Conv2D(64, kernel_size=(3, 3), activation="relu"), + layers.MaxPooling2D(pool_size=(2, 2)), + layers.Flatten(), + layers.Dropout(0.5), + layers.Dense(num_classes, activation="softmax"), + ] + ) + + model.summary() + print("model input", model.input, type(model.input), model.input.__dict__) + print("model output", model.output, type(model.output), model.output.__dict__) + model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"]) + + # train model + x_train, y_train, x_test, y_test = readData(fin, num_classes) + model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_split=0.1) + + # evaluate trained model + score = model.evaluate(x_test, y_test, verbose=0) + print("Test loss:", score[0]) + print("Test accuracy:", score[1]) + print("save model to", fout) + writer(fout, model_name, model, input_shape, h5) + +def writer(fout, model_name, model, input_shape, h5=False): + """ + Writer provide write function for given model + """ + if not fout: + return + model.save(fout) + if h5: + model.save('{}/{}'.format(fout, h5), save_format='h5') + pbModel = '{}/saved_model.pb'.format(fout) + pbtxtModel = '{}/saved_model.pbtxt'.format(fout) + convert(pbModel, pbtxtModel) + + # get meta-data information about our ML model + input_names, output_names, model_graph = modelGraph(model_name) + print("### input", input_names) + print("### output", output_names) + # ML uses (28,28,1) shape, i.e. 28x28 black-white images + # if we'll use color images we'll use shape (28, 28, 3) + img_channels = input_shape[2] # last item represent number of colors + meta = {'name': model_name, + 'model': 'saved_model.pb', + 'labels': 'labels.txt', + 'img_channels': img_channels, + 'input_name': input_names[0].split(':')[0], + 'output_name': output_names[0].split(':')[0], + 'input_node': model.input.name, + 'output_node': model.output.name + } + with open(fout+'/params.json', 'w') as ostream: + ostream.write(json.dumps(meta)) + with open(fout+'/labels.txt', 'w') as ostream: + for i in range(0, 10): + ostream.write(str(i)+'\n') + with open(fout + '/model.graph', 'wb') as ostream: + ostream.write(model_graph.SerializeToString()) + +def convert(fin, fout): + """ + convert input model.pb into output model.pbtxt + Based on internet search: + - https://www.tensorflow.org/guide/saved_model + - https://www.programcreek.com/python/example/123317/tensorflow.core.protobuf.saved_model_pb2.SavedModel + """ + import google.protobuf + from tensorflow.core.protobuf import saved_model_pb2 + import tensorflow as tf + + saved_model = saved_model_pb2.SavedModel() + + with open(fin, 'rb') as f: + saved_model.ParseFromString(f.read()) + + with open(fout, 'w') as f: + f.write(google.protobuf.text_format.MessageToString(saved_model)) + + +class OptionParser(): + def __init__(self): + "User based option parser" + self.parser = argparse.ArgumentParser(prog='PROG') + self.parser.add_argument("--fin", action="store", + dest="fin", default="", help="Input MNIST file") + self.parser.add_argument("--fout", action="store", + dest="fout", default="", help="Output models area") + self.parser.add_argument("--model", action="store", + dest="model", default="mnist", help="model name") + self.parser.add_argument("--epochs", action="store", + dest="epochs", default=1, help="number of epochs to use in ML training") + self.parser.add_argument("--batch_size", action="store", + dest="batch_size", default=128, help="batch size to use in training") + self.parser.add_argument("--h5", action="store", + dest="h5", default="mnist", help="h5 model file name") + +def main(): + "Main function" + optmgr = OptionParser() + opts = optmgr.parser.parse_args() + train(opts.fin, opts.fout, + model_name=opts.model, + epochs=opts.epochs, + batch_size=opts.batch_size, + h5=opts.h5) + +if __name__ == '__main__': + main() +``` + +### Training process +We will train our model using the following command (for simplicity we skip +warning messages from TF and irrelevant printouts): +``` +# here fout=mnist represents mnist directory where we'll stored our trained model +# and model=mnist is the name of the model we'll use later in inference +./ktrain.py --fin=./mnist.pkl.gz --fout=mnist --model=mnist +... +x_train shape: (60000, 28, 28, 1) +60000 train samples +10000 test samples +Model: "sequential" +_________________________________________________________________ + Layer (type) Output Shape Param # +================================================================= + conv2d (Conv2D) (None, 26, 26, 32) 320 + + max_pooling2d (MaxPooling2D (None, 13, 13, 32) 0 + ) + + conv2d_1 (Conv2D) (None, 11, 11, 64) 18496 + + max_pooling2d_1 (MaxPooling (None, 5, 5, 64) 0 + 2D) + + flatten (Flatten) (None, 1600) 0 + + dropout (Dropout) (None, 1600) 0 + + dense (Dense) (None, 10) 16010 + +================================================================= +Total params: 34,826 +Trainable params: 34,826 +Non-trainable params: 0 +_________________________________________________________________ + +422/422 [==============================] - 37s 84ms/step - loss: 0.3645 - accuracy: 0.8898 - val_loss: 0.0825 - val_accuracy: 0.9772 +Test loss: 0.09409885853528976 +Test accuracy: 0.9703999757766724 +save model to mnist + +### input ['serving_default_input_1:0'] +### output ['StatefulPartitionedCall:0'] +``` +When this process is over you'll find `mnist` directory with the following +content: +``` +shell# ls mnist + +assets keras_metadata.pb model.graph saved_model.pb variables +fingerprint.pb labels.txt params.json saved_model.pbtxt +``` +- `saved_model.pb` represents trained ML model in protobuffer data-format +- `saved_model.pbtxt` represents trained ML model in text protobuffer representation +- `labels.txt` contains our image labels +- `params.json` contains meta-data used by TFaaS and it has the following content: +``` +cat mnist/params.json | jq +{ + "name": "mnist", + "model": "saved_model.pb", + "labels": "labels.txt", + "img_channels": 1, + "input_name": "serving_default_input_1", + "output_name": "StatefulPartitionedCall", + "input_node": "input_1", + "output_node": "dense/Softmax:0" +} +``` +Here you see, that our ML model is called `mnist`, the model is stored in +`saved_model.pb` file, and more importantly this file contains the input and +output tensor names and nodes which we need to provide for TFaaS to server +our predictions. + +### Inference server +Now, it is time to start our inference server. You can find its code in `src/go` area. +To build the code you need +``` +# download TF library and includes for your OS, e.g. macOS build +curl -ksLO https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-cpu-darwin-x86_64-2.11.0.tar.gz +# or linux build +curl -ksLO https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-cpu-linux-x86_64-2.11.0.tar.gz +# or linux GPU build +curl -ksLO https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-gpu-linux-x86_64-2.11.0.tar.gz + +# provide TF include area location to go build command +# the /opt/tensorflow/include is are where TF includes are +export CGO_CPPFLAGS="-I/opt/tensorflow/include" + +# compile the code +make + +# it will produce tfaas executable + +# to run the code we need to setup `DYLD_LIBRARY_PATH` +export DYLD_LIBRARY_PATH=/opt/tensorflow/lib +./tfaas -config config.json +``` +where `config.json` has the following form (please refer for more details): +``` +{ + "port": 8083, + "modelDir": "models", + "staticDir": "static", + "configProto": "", + "base": "", + "serverKey": "", + "serverCrt": "", + "verbose": 1 +} +``` + +### Serving predictions with TFaaS inference server +Finally, we are ready for the inference part. +- upload your ML model to TFaaS server +``` +# create tarball of your mnist ML trained model +tar cfz mnist.tar.gz mnist + +# upload tarball to TFaaS server +curl -v -X POST -H "Content-Encoding: gzip" \ + -H "Content-Type: application/octet-stream" \ + --data-binary @./mnist.tar.gz \ + http://localhost:8083/upload + +# check your model presence +curl http://localhost:8083/models + +# generate image from MNIST dataset you want to use for prediction +# img1.png will contain number 1, img4.png will contain number 4 +./mnist_img.py --fout img1.png --imgid=3 +./mnist_img.py --fout img4.png --imgid=2 + +# ask for prediction of your image +curl http://localhost:8083/predict/image -F 'image=@./img1.png' -F 'model=mnist' +[0,1,0,0,0,0,0,0,0,0] + +curl http://localhost:8083/predict/image -F 'image=@./img4.png' -F 'model=mnist' +[0,0,0,0,1,0,0,0,0,0] +```
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/MLaaS/demo.sh Tue Mar 28 15:07:30 2023 +0000 @@ -0,0 +1,179 @@ +#!/bin/bash +# TFaaS host name +turl=http://localhost:8083 +# location of files +tdir=/home/vk/chess/TFaaS/models +# we need the following files +model_tarball=$tdir/model.tar.gz +params_json=$tdir/vk/params.json +input_json=$tdir/vk/input.json +upload_json=$tdir/vk/upload.json +model_pb=$tdir/vk/model.pb +labels_txt=$tdir/vk/labels.txt +tfaas_client=$tdir/tfaas_client.py +hey=$tdir/hey-tools/hey_amd64 + +echo "### obtain any existing ML model" +echo +echo "$tfaas_client --url=$turl --models" +echo +$tfaas_client --url=$turl --models +sleep 1 +echo + +echo "### upload new ML model" +echo +echo "cat $upload_json" +echo +cat $upload_json +echo +echo "$tfaas_client --url=$turl --upload=$upload_json" +$tfaas_client --url=$turl --upload=$upload_json +echo + +echo "### view if our model exists" +echo +echo "$tfaas_client --url=$turl --models" +echo +$tfaas_client --url=$turl --models +sleep 2 +echo + +echo "### view if our model exists, but use jq tool to get better view over JSON" +echo +echo "$tfaas_client --url=$turl --models | jq" +echo +$tfaas_client --url=$turl --models | jq +sleep 2 +echo + +echo "### let's obtain some prediction" +echo +echo "cat $input_json" +echo +cat $input_json +echo +echo "$tfaas_client --url=$turl --predict=$input_json" +echo +$tfaas_client --url=$turl --predict=$input_json +sleep 2 +echo + +echo "### let's delete our ML model named vk" +echo +echo "$tfaas_client --url=$turl --delete=vk" +echo +$tfaas_client --url=$turl --delete=vk +sleep 1 +echo + +echo "### lets view again available models" +echo +echo "$tfaas_client --url=$turl --models" +echo +$tfaas_client --url=$turl --models +sleep 2 +echo + +echo "### now let's use curl as CLI tool to communicate with TFaaS" +echo +sleep 5 + +echo "### Let's view our models" +echo +echo "curl -s $turl/models" +echo +curl -s $turl/models +sleep 1 +echo + +echo "### let's send POST HTTP request with our parameters to upload ML model" +echo "### we provide $params_json" +echo +cat $params_json +echo +echo "### we provide $model_pb TF model" +echo +ls -al $model_pb +echo +echo "### and we provide our labels in $labels_txt file" +echo +cat $labels_txt +echo +echo "### now we make curl call" +echo +echo "curl -s -X POST $turl/upload -F 'name=vk' -F 'params=@$params_json' -F 'model=@$model_pb' -F 'labels=@$labels_txt'" +echo +curl -s -X POST $turl/upload -F 'name=vk' -F "params=@$params_json" -F "model=@$model_pb" -F "labels=@$labels_txt" +sleep 1 +echo + +echo "### Now we can view our models" +echo +echo "curl -s $turl/models | jq" +echo +curl -s $turl/models | jq +echo +sleep 2 + +echo "### And we can obtain our predictions using /json API" +echo +echo "curl -s -X POST $turl/json -H "Content-type: application/json" -d@$input_json" +echo +curl -s -X POST $turl/json -H "Content-type: application/json" -d@$input_json +sleep 1 +echo + +echo "### Now we can delete ML model using /delete end-point" +echo +echo "curl -s -X DELETE $turl/delete -F 'model=vk'" +echo +curl -s -X DELETE $turl/delete -F 'model=vk' +sleep 1 +echo + +echo "### Now we can view our models" +echo +echo "curl -s $turl/models" +echo +curl -s $turl/models +echo +sleep 1 + +$tfaas_client --url=$turl --upload=$upload_json + +echo +echo "### now let's use tar ball and upload it" +echo +ls -al $model_tarball +tar tvfz $model_tarball +sleep 5 + +echo "curl -v -X POST -H \"Content-Encoding: gzip\" -H \"content-type: application/octet-stream\" --data-binary @$model_tarball $turl/upload" +curl -v -X POST -H"Content-Encoding: gzip" -H"content-type: application/octet-stream" --data-binary @$model_tarball $turl/upload +sleep 1 +echo + +echo "### Now we can view our models" +echo +echo "curl -s $turl/models | jq" +echo +curl -s $turl/models | jq +echo +sleep 2 + +echo "### And we can obtain our predictions using /json API" +echo +echo "curl -s -X POST $turl/json -H "Content-type: application/json" -d@$input_json" +echo +curl -s -X POST $turl/json -H "Content-type: application/json" -d@$input_json +sleep 1 +echo + +if [ -f $hey ]; then +echo "### Now let's perform some stress tests" +echo "### for that we'll use hey tool which will send number of concurrent requests to tfaas service" +echo +echo "$hey -m POST -H "Content-type: application/json" -D $input_json $turl/json" +$hey -m POST -H "Content-type: application/json" -D $input_json $turl/json +fi
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/MLaaS/ktrain.py Tue Mar 28 15:07:30 2023 +0000 @@ -0,0 +1,205 @@ +#!/usr/bin/env python +#-*- coding: utf-8 -*- +#pylint: disable= +""" +File : ktrain.py +Author : Valentin Kuznetsov <vkuznet AT gmail dot com> +Description: Keras based ML network to train over MNIST dataset +""" + +# system modules +import os +import sys +import json +import gzip +import pickle +import argparse + +# third-party modules +import numpy as np +import tensorflow as tf +from tensorflow import keras +from tensorflow.keras import layers +from tensorflow.keras import backend as K +from tensorflow.python.tools import saved_model_utils + + +def modelGraph(model_dir): + """ + Provide input/output names used by TF Graph along with graph itself + The code is based on TF saved_model_cli.py script. + """ + input_names = [] + output_names = [] + tag_sets = saved_model_utils.get_saved_model_tag_sets(model_dir) + for tag_set in sorted(tag_sets): + print('%r' % ', '.join(sorted(tag_set))) + meta_graph_def = saved_model_utils.get_meta_graph_def(model_dir, tag_set[0]) + for key in meta_graph_def.signature_def.keys(): + meta = meta_graph_def.signature_def[key] + if hasattr(meta, 'inputs') and hasattr(meta, 'outputs'): + inputs = meta.inputs + outputs = meta.outputs + input_signatures = list(meta.inputs.values()) + input_names = [signature.name for signature in input_signatures] + if len(input_names) > 0: + output_signatures = list(meta.outputs.values()) + output_names = [signature.name for signature in output_signatures] + return input_names, output_names, meta_graph_def + +def readData(fin, num_classes): + """ + Helper function to read MNIST data and provide it to + upstream code, e.g. to the training layer + """ + # Load the data and split it between train and test sets +# (x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data() + f = gzip.open(fin, 'rb') + if sys.version_info < (3,): + mnist_data = pickle.load(f) + else: + mnist_data = pickle.load(f, encoding='bytes') + f.close() + (x_train, y_train), (x_test, y_test) = mnist_data + + # Scale images to the [0, 1] range + x_train = x_train.astype("float32") / 255 + x_test = x_test.astype("float32") / 255 + # Make sure images have shape (28, 28, 1) + x_train = np.expand_dims(x_train, -1) + x_test = np.expand_dims(x_test, -1) + print("x_train shape:", x_train.shape) + print(x_train.shape[0], "train samples") + print(x_test.shape[0], "test samples") + + + # convert class vectors to binary class matrices + y_train = keras.utils.to_categorical(y_train, num_classes) + y_test = keras.utils.to_categorical(y_test, num_classes) + return x_train, y_train, x_test, y_test + + +def train(fin, fout=None, model_name=None, epochs=1, batch_size=128, h5=False): + """ + train function for MNIST + """ + # Model / data parameters + num_classes = 10 + input_shape = (28, 28, 1) + + # create ML model + model = keras.Sequential( + [ + keras.Input(shape=input_shape), + layers.Conv2D(32, kernel_size=(3, 3), activation="relu"), + layers.MaxPooling2D(pool_size=(2, 2)), + layers.Conv2D(64, kernel_size=(3, 3), activation="relu"), + layers.MaxPooling2D(pool_size=(2, 2)), + layers.Flatten(), + layers.Dropout(0.5), + layers.Dense(num_classes, activation="softmax"), + ] + ) + + model.summary() + print("model input", model.input, type(model.input), model.input.__dict__) + print("model output", model.output, type(model.output), model.output.__dict__) + model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"]) + + # train model + x_train, y_train, x_test, y_test = readData(fin, num_classes) + model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_split=0.1) + + # evaluate trained model + score = model.evaluate(x_test, y_test, verbose=0) + print("Test loss:", score[0]) + print("Test accuracy:", score[1]) + print("save model to", fout) + writer(fout, model_name, model, input_shape, h5) + +def writer(fout, model_name, model, input_shape, h5=False): + """ + Writer provide write function for given model + """ + if not fout: + return + model.save(fout) + if h5: + model.save('{}/{}'.format(fout, h5), save_format='h5') + pbModel = '{}/saved_model.pb'.format(fout) + pbtxtModel = '{}/saved_model.pbtxt'.format(fout) + convert(pbModel, pbtxtModel) + + # get meta-data information about our ML model + input_names, output_names, model_graph = modelGraph(model_name) + print("### input", input_names) + print("### output", output_names) + # ML uses (28,28,1) shape, i.e. 28x28 black-white images + # if we'll use color images we'll use shape (28, 28, 3) + img_channels = input_shape[2] # last item represent number of colors + meta = {'name': model_name, + 'model': 'saved_model.pb', + 'labels': 'labels.txt', + 'img_channels': img_channels, + 'input_name': input_names[0].split(':')[0], + 'output_name': output_names[0].split(':')[0], + 'input_node': model.input.name, + 'output_node': model.output.name + } + with open(fout+'/params.json', 'w') as ostream: + ostream.write(json.dumps(meta)) + with open(fout+'/labels.txt', 'w') as ostream: + for i in range(0, 10): + ostream.write(str(i)+'\n') + with open(fout + '/model.graph', 'wb') as ostream: + ostream.write(model_graph.SerializeToString()) + +def convert(fin, fout): + """ + convert input model.pb into output model.pbtxt + Based on internet search: + - https://www.tensorflow.org/guide/saved_model + - https://www.programcreek.com/python/example/123317/tensorflow.core.protobuf.saved_model_pb2.SavedModel + """ + import google.protobuf + from tensorflow.core.protobuf import saved_model_pb2 + import tensorflow as tf + + saved_model = saved_model_pb2.SavedModel() + + with open(fin, 'rb') as f: + saved_model.ParseFromString(f.read()) + + with open(fout, 'w') as f: + f.write(google.protobuf.text_format.MessageToString(saved_model)) + + +class OptionParser(): + def __init__(self): + "User based option parser" + self.parser = argparse.ArgumentParser(prog='PROG') + self.parser.add_argument("--fin", action="store", + dest="fin", default="", help="Input MNIST file") + self.parser.add_argument("--fout", action="store", + dest="fout", default="", help="Output models area") + self.parser.add_argument("--model", action="store", + dest="model", default="mnist", help="model name") + self.parser.add_argument("--epochs", action="store", + dest="epochs", default=1, help="number of epochs to use in ML training") + self.parser.add_argument("--batch_size", action="store", + dest="batch_size", default=128, help="batch size to use in training") + self.parser.add_argument("--h5", action="store", + dest="h5", default="mnist", help="h5 model file name") + +def main(): + "Main function" + optmgr = OptionParser() + opts = optmgr.parser.parse_args() + train(opts.fin, opts.fout, + model_name=opts.model, + epochs=opts.epochs, + batch_size=opts.batch_size, + h5=opts.h5) + +if __name__ == '__main__': + main()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/MLaaS/mnist_img.py Tue Mar 28 15:07:30 2023 +0000 @@ -0,0 +1,83 @@ +#!/usr/bin/env python +#-*- coding: utf-8 -*- +#pylint: disable= +""" +File : mnist_img.py +Author : Valentin Kuznetsov <vkuznet AT gmail dot com> +Description: +""" + +import json +import gzip +import argparse +# from itertools import chain + +import numpy as np +import matplotlib.pyplot as plt + + +def readImage(fname, fout, num_images=5, imgId=2): + """ + Helper function to read MNIST image + """ + image_size = 28 + with gzip.open(fname, 'r') as fstream: + fstream.read(16) + buf = fstream.read(image_size * image_size * num_images) + data = np.frombuffer(buf, dtype=np.uint8).astype(np.float32) + data = data.reshape(num_images, image_size, image_size, 1) + image = np.asarray(data[imgId]).squeeze() + plt.imsave(fout, image) + print("read:", fname, "wrote:", fout, "image:", type(image), "shape:", image.shape) + +def img2json(image): + """ + Convert given image to JSON data format used by TFaaS + """ + # values = [int(i) for i in list(chain.from_iterable(image))] + # values = image.tolist() + values = [] + for row in image.tolist(): + row = [int(i) for i in row] + vals = [[i] for i in row] + values.append(vals) + # final values should be an array of elements, e.g. single image representation + values = [values] + keys = [str(i) for i in range(0, 10)] + meta = { + 'keys': keys, + 'values': values, + 'model': 'mnist' + } + with open('img.json', 'w') as ostream: + ostream.write(json.dumps(meta)) + + +class OptionParser(): + def __init__(self): + "User based option parser" + fname = "train-images-idx3-ubyte.gz" + self.parser = argparse.ArgumentParser(prog='PROG') + self.parser.add_argument("--fin", action="store", + dest="fin", default=fname, help=f"Input MNIST file, default {fname}") + self.parser.add_argument("--fout", action="store", + dest="fout", default="img.png", help="Output image fila name, default img.png") + self.parser.add_argument("--nimages", action="store", + dest="nimages", default=5, help="number of images to read, default 5") + self.parser.add_argument("--imgid", action="store", + dest="imgid", default=2, help="image index to use from nimages, default 2 (number 4)") + +def main(): + """ + main function to produce image file from mnist dataset. + MNIST dataset can be downloaded from + curl -O http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz + """ + optmgr = OptionParser() + opts = optmgr.parser.parse_args() + num_images = int(opts.nimages) + imgId = int(opts.imgid) + img = readImage(opts.fin, opts.fout, num_images, imgId) + +if __name__ == '__main__': + main()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/MLaaS/tfaas_client.py Tue Mar 28 15:07:30 2023 +0000 @@ -0,0 +1,371 @@ +#!/usr/bin/env python +#-*- coding: utf-8 -*- +#pylint: disable= +""" +File : tfaas_client.py +Author : Valentin Kuznetsov <vkuznet AT gmail dot com> +Description: simple python client to communicate with TFaaS server +""" + +# system modules +import os +import sys +import pwd +import ssl +import json +import binascii +import argparse +import itertools +import mimetypes +if sys.version_info < (2, 7): + raise Exception("TFaaS client requires python 2.7 or greater") +# python 3 +if sys.version.startswith('3.'): + import urllib.request as urllib2 + import urllib.parse as urllib + import http.client as httplib + import http.cookiejar as cookielib +else: + import mimetools + import urllib + import urllib2 + import httplib + import cookielib + +TFAAS_CLIENT = 'tfaas-client/1.1::python/%s.%s' % sys.version_info[:2] + +class OptionParser(): + def __init__(self): + "User based option parser" + self.parser = argparse.ArgumentParser(prog='PROG') + self.parser.add_argument("--url", action="store", + dest="url", default="", help="TFaaS URL") + self.parser.add_argument("--upload", action="store", + dest="upload", default="", help="upload model to TFaaS") + self.parser.add_argument("--bundle", action="store", + dest="bundle", default="", help="upload bundle ML files to TFaaS") + self.parser.add_argument("--predict", action="store", + dest="predict", default="", help="fetch prediction from TFaaS") + self.parser.add_argument("--image", action="store", + dest="image", default="", help="fetch prediction for given image") + self.parser.add_argument("--model", action="store", + dest="model", default="", help="TF model to use") + self.parser.add_argument("--delete", action="store", + dest="delete", default="", help="delete model in TFaaS") + self.parser.add_argument("--models", action="store_true", + dest="models", default=False, help="show existing models in TFaaS") + self.parser.add_argument("--verbose", action="store_true", + dest="verbose", default=False, help="verbose output") + msg = 'specify private key file name, default $X509_USER_PROXY' + self.parser.add_argument("--key", action="store", + default=x509(), dest="ckey", help=msg) + msg = 'specify private certificate file name, default $X509_USER_PROXY' + self.parser.add_argument("--cert", action="store", + default=x509(), dest="cert", help=msg) + default_ca = os.environ.get("X509_CERT_DIR") + if not default_ca or not os.path.exists(default_ca): + default_ca = "/etc/grid-security/certificates" + if not os.path.exists(default_ca): + default_ca = "" + if default_ca: + msg = 'specify CA path, default currently is %s' % default_ca + else: + msg = 'specify CA path; defaults to system CAs.' + self.parser.add_argument("--capath", action="store", + default=default_ca, dest="capath", help=msg) + msg = 'specify number of retries upon busy DAS server message' + +class HTTPSClientAuthHandler(urllib2.HTTPSHandler): + """ + Simple HTTPS client authentication class based on provided + key/ca information + """ + def __init__(self, key=None, cert=None, capath=None, level=0): + if level > 0: + urllib2.HTTPSHandler.__init__(self, debuglevel=1) + else: + urllib2.HTTPSHandler.__init__(self) + self.key = key + self.cert = cert + self.capath = capath + + def https_open(self, req): + """Open request method""" + #Rather than pass in a reference to a connection class, we pass in + # a reference to a function which, for all intents and purposes, + # will behave as a constructor + return self.do_open(self.get_connection, req) + + def get_connection(self, host, timeout=300): + """Connection method""" + if self.key and self.cert and not self.capath: + return httplib.HTTPSConnection(host, key_file=self.key, + cert_file=self.cert) + elif self.cert and self.capath: + context = ssl.SSLContext(ssl.PROTOCOL_TLSv1) + context.load_verify_locations(capath=self.capath) + context.load_cert_chain(self.cert) + return httplib.HTTPSConnection(host, context=context) + return httplib.HTTPSConnection(host) + +def x509(): + "Helper function to get x509 either from env or tmp file" + proxy = os.environ.get('X509_USER_PROXY', '') + if not proxy: + proxy = '/tmp/x509up_u%s' % pwd.getpwuid( os.getuid() ).pw_uid + if not os.path.isfile(proxy): + return '' + return proxy + +def check_auth(key): + "Check if user runs das_client with key/cert and warn users to switch" + if not key: + msg = "WARNING: tfaas_client is running without user credentials/X509 proxy, create proxy via 'voms-proxy-init -voms cms -rfc'" + print(msg) + +def fullpath(path): + "Expand path to full path" + if path and path[0] == '~': + path = path.replace('~', '') + path = path[1:] if path[0] == '/' else path + path = os.path.join(os.environ['HOME'], path) + return path + +def choose_boundary(): + """ + Helper function to replace deprecated mimetools.choose_boundary + https://stackoverflow.com/questions/27099290/where-is-mimetools-choose-boundary-function-in-python3 + https://docs.python.org/2.7/library/mimetools.html?highlight=choose_boundary#mimetools.choose_boundary + >>> mimetools.choose_boundary() + '192.168.1.191.502.42035.1678979116.376.1' + """ + # we will return any random string + import uuid + return str(uuid.uuid4()) + +# credit: https://pymotw.com/2/urllib2/#uploading-files +class MultiPartForm(object): + """Accumulate the data to be used when posting a form.""" + + def __init__(self): + self.form_fields = [] + self.files = [] + if sys.version.startswith('3.'): + self.boundary = choose_boundary() + else: + self.boundary = mimetools.choose_boundary() + return + + def get_content_type(self): + return 'multipart/form-data; boundary=%s' % self.boundary + + def add_field(self, name, value): + """Add a simple field to the form data.""" + self.form_fields.append((name, value)) + return + + def add_file(self, fieldname, filename, fileHandle, mimetype=None): + """Add a file to be uploaded.""" + body = fileHandle.read() + if mimetype is None: + mimetype = mimetypes.guess_type(filename)[0] or 'application/octet-stream' + if mimetype == 'application/octet-stream': + body = binascii.b2a_base64(body) +# if isinstance(body, bytes): +# body = body.decode("utf-8") + self.files.append((fieldname, filename, mimetype, body)) + return + + def __str__(self): + """Return a string representing the form data, including attached files.""" + # Build a list of lists, each containing "lines" of the + # request. Each part is separated by a boundary string. + # Once the list is built, return a string where each + # line is separated by '\r\n'. + parts = [] + part_boundary = '--' + self.boundary + + # Add the form fields + parts.extend( + [ part_boundary, + 'Content-Disposition: form-data; name="%s"' % name, + '', + value, + ] + for name, value in self.form_fields + ) + + # Add the files to upload + # here we use form-data content disposition instead of file one + # since this is how we define handlers in our Go server + # for more info see: https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Disposition + parts.extend( + [ part_boundary, + 'Content-Disposition: form-data; name="%s"; filename="%s"' % \ + (field_name, filename), + 'Content-Type: %s' % content_type, + '', + body, + ] + for field_name, filename, content_type, body in self.files + ) + + # Flatten the list and add closing boundary marker, + # then return CR+LF separated data + flattened = list(itertools.chain(*parts)) + flattened.append('--' + self.boundary + '--') + flattened.append('') + return '\r\n'.join(flattened) + +def models(host, verbose=None, ckey=None, cert=None, capath=None): + "models API shows models from TFaaS server" + url = host + '/models' + client = '%s (%s)' % (TFAAS_CLIENT, os.environ.get('USER', '')) + headers = {"Accept": "application/json", "User-Agent": client} + if verbose: + print("URL : %s" % url) + encoded_data = json.dumps({}) + return getdata(url, headers, encoded_data, ckey, cert, capath, verbose, 'GET') + +def delete(host, model, verbose=None, ckey=None, cert=None, capath=None): + "delete API deletes given model in TFaaS server" + url = host + '/delete' + client = '%s (%s)' % (TFAAS_CLIENT, os.environ.get('USER', '')) + headers = {"User-Agent": client} + if verbose: + print("URL : %s" % url) + print("model : %s" % model) + form = MultiPartForm() + form.add_field('model', model) + edata = str(form) + headers['Content-length'] = len(edata) + headers['Content-Type'] = form.get_content_type() + return getdata(url, headers, edata, ckey, cert, capath, verbose, method='DELETE') + +def bundle(host, ifile, verbose=None, ckey=None, cert=None, capath=None): + "bundle API uploads given bundle model files to TFaaS server" + url = host + '/upload' + client = '%s (%s)' % (TFAAS_CLIENT, os.environ.get('USER', '')) + headers = {"User-Agent": client, "Content-Encoding": "gzip", "Content-Type": "application/octet-stream"} + data = open(ifile, 'rb').read() + return getdata(url, headers, data, ckey, cert, capath, verbose) + +def upload(host, ifile, verbose=None, ckey=None, cert=None, capath=None): + "upload API uploads given model to TFaaS server" + url = host + '/upload' + client = '%s (%s)' % (TFAAS_CLIENT, os.environ.get('USER', '')) + headers = {"User-Agent": client} + params = json.load(open(ifile)) + if verbose: + print("URL : %s" % url) + print("ifile : %s" % ifile) + print("params: %s" % json.dumps(params)) + + form = MultiPartForm() + for key in params.keys(): + if key in ['model', 'labels', 'params']: + flag = 'r' + if key == 'model': + flag = 'rb' + name = params[key] + form.add_file(key, name, fileHandle=open(name, flag)) + else: + form.add_field(key, params[key]) + edata = str(form) + headers['Content-length'] = len(edata) + headers['Content-Type'] = form.get_content_type() + headers['Content-Encoding'] = 'base64' + return getdata(url, headers, edata, ckey, cert, capath, verbose) + +def predict(host, ifile, model, verbose=None, ckey=None, cert=None, capath=None): + "predict API get predictions from TFaaS server" + url = host + '/json' + client = '%s (%s)' % (TFAAS_CLIENT, os.environ.get('USER', '')) + headers = {"Accept": "application/json", "User-Agent": client} + params = json.load(open(ifile)) + if model: # overwrite model name in given input file + params['model'] = model + if verbose: + print("URL : %s" % url) + print("ifile : %s" % ifile) + print("params: %s" % json.dumps(params)) + encoded_data = json.dumps(params) + return getdata(url, headers, encoded_data, ckey, cert, capath, verbose) + +def predictImage(host, ifile, model, verbose=None, ckey=None, cert=None, capath=None): + "predict API get predictions from TFaaS server" + url = host + '/image' + client = '%s (%s)' % (TFAAS_CLIENT, os.environ.get('USER', '')) + headers = {"Accept": "application/json", "User-Agent": client} + if verbose: + print("URL : %s" % url) + print("ifile : %s" % ifile) + print("model : %s" % model) + form = MultiPartForm() +# form.add_file('image', ifile, fileHandle=open(ifile, 'r')) + form.add_file('image', ifile, fileHandle=open(ifile, 'rb')) + form.add_field('model', model) + edata = str(form) + headers['Content-length'] = len(edata) + headers['Content-Type'] = form.get_content_type() + return getdata(url, headers, edata, ckey, cert, capath, verbose) + +def getdata(url, headers, encoded_data, ckey, cert, capath, verbose=None, method='POST'): + "helper function to use in predict/upload APIs, it place given URL call to the server" + debug = 1 if verbose else 0 + req = urllib2.Request(url=url, headers=headers, data=encoded_data) + if method == 'DELETE': + req.get_method = lambda: 'DELETE' + elif method == 'GET': + req = urllib2.Request(url=url, headers=headers) + if ckey and cert: + ckey = fullpath(ckey) + cert = fullpath(cert) + http_hdlr = HTTPSClientAuthHandler(ckey, cert, capath, debug) + elif cert and capath: + cert = fullpath(cert) + http_hdlr = HTTPSClientAuthHandler(ckey, cert, capath, debug) + else: + http_hdlr = urllib2.HTTPHandler(debuglevel=debug) + proxy_handler = urllib2.ProxyHandler({}) + cookie_jar = cookielib.CookieJar() + cookie_handler = urllib2.HTTPCookieProcessor(cookie_jar) + data = {} + try: + opener = urllib2.build_opener(http_hdlr, proxy_handler, cookie_handler) + fdesc = opener.open(req) + if url.endswith('json'): + data = json.load(fdesc) + else: + data = fdesc.read() + fdesc.close() + except urllib2.HTTPError as error: + print(error.read()) + sys.exit(1) + if url.endswith('json'): + return json.dumps(data) + return data + +def main(): + "Main function" + optmgr = OptionParser() + opts = optmgr.parser.parse_args() + check_auth(opts.ckey) + res = '' + if opts.upload: + res = upload(opts.url, opts.upload, opts.verbose, opts.ckey, opts.cert, opts.capath) + if opts.bundle: + res = bundle(opts.url, opts.bundle, opts.verbose, opts.ckey, opts.cert, opts.capath) + elif opts.delete: + res = delete(opts.url, opts.delete, opts.verbose, opts.ckey, opts.cert, opts.capath) + elif opts.models: + res = models(opts.url, opts.verbose, opts.ckey, opts.cert, opts.capath) + elif opts.predict: + res = predict(opts.url, opts.predict, opts.model, opts.verbose, opts.ckey, opts.cert, opts.capath) + elif opts.image: + res = predictImage(opts.url, opts.image, opts.model, opts.verbose, opts.ckey, opts.cert, opts.capath) + if res: + print(res) + +if __name__ == '__main__': + main()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/README.txt Tue Mar 28 15:07:30 2023 +0000 @@ -0,0 +1,30 @@ +### Pipeline +This package conains proof of concepts pipeline framework for workflow +execution. It requires proper configuration of pipeline in terms classes, e.g. +``` +# pipeline deifinition as sequence of objects +pipeline: + - reader.Reader + - processor.Processor + - fitter.Fitter + - processor.Processor + - writer.Writer + - fitter.Fitter + - writer.Writer + +# specific object parameters, e.g. our reader accepts fileName=data.csv +reader.Reader: + fileName: data.csv + + +# specific object parameters, e.g. our writer accepts fileName=data.out +writer.Writer: + fileName: data.out +``` + +Then, you may execute this pipeline as following: +``` +./runner.py --config config.yaml +``` +and, check the output in `data.out` file. +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/build/bdist.linux-x86_64/egg/CHAP/async.py Tue Mar 28 15:07:30 2023 +0000 @@ -0,0 +1,56 @@ +#!/usr/bin/env python +#-*- coding: utf-8 -*- +#pylint: disable= +""" +File : async.py +Author : Valentin Kuznetsov <vkuznet AT gmail dot com> +Description: AsyncProcessor module +""" + +# system modules +import asyncio + +# local modules +from CHAP.processor import Processor, PrintProcessor + + +async def task(mgr, doc): + """ + Process given data using provided task manager + """ + return mgr.process(doc) + + +async def executeTasks(mgr, docs): + """ + Process given set of documents using provided task manager + """ + coRoutines = [task(mgr, d) for d in docs] + await asyncio.gather(*coRoutines) + + +class AsyncProcessor(Processor): + """ + AsyncProcesor process given data via asyncio module + """ + def __init__(self, mgr): + super().__init__() + self.mgr = mgr + + def _process(self, docs): + """ + Internal method to process given data documents + """ + asyncio.run(executeTasks(self.mgr, docs)) + +def example(): + """ + Helper function to demonstrate usage of AsyncProcessor + """ + docs = [1,2,3] + mgr = PrintProcessor() + processor = AsyncProcessor(mgr) + processor.process(docs) + +if __name__ == '__main__': + example()
Binary file build/bdist.linux-x86_64/egg/CHAP/models/__pycache__/basemodel.cpython-39.pyc has changed
Binary file build/bdist.linux-x86_64/egg/CHAP/models/__pycache__/integration.cpython-39.pyc has changed
Binary file build/bdist.linux-x86_64/egg/CHAP/models/__pycache__/workflow.cpython-39.pyc has changed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/build/bdist.linux-x86_64/egg/CHAP/models/basemodel.py Tue Mar 28 15:07:30 2023 +0000 @@ -0,0 +1,84 @@ +#!/usr/bin/env python +#-*- coding: utf-8 -*- +#pylint: disable= +""" +File : basemodel.py +Author : Valentin Kuznetsov <vkuznet AT gmail dot com> +Description: BaseModel module +""" + +# system modules +import logging + + +class BaseModel(): + """ + BaseModel docstring + """ + def __init__(self, filename=None, **kwds): + self.logger = logging.getLogger(__name__) + self.construct(filename, **kwds) + self.map = dict(name=__name__) + + def construct(self, filename=None, **kwds): + """ + construct from CLI object + + :param filename: input file name + :param **kwds: named arguments + :return: Basemodel object + """ + print('construct API calls: ', end='') + if filename and filename.endswith('yaml'): + self.construct_from_yaml(filename) + elif filename and filename != '': + self.construct_from_file(filename) + else: + self.construct_from_config(**kwds) + + @classmethod + def construct_from_config(cls, **config): + """ + construct from config object + + :param **config: named arguments + :return: Basemodel object + """ + print(f'construct_from_config: {config}') + + @classmethod + def construct_from_yaml(cls, filename): + """ + construct from CLI object + + :param filename: input file name + :return: Basemodel object + """ + print(f'construct_from_yaml: {filename}') + + @classmethod + def construct_from_file(cls, filename): + """ + construct from filename + + :param filename: input file name + :return: Basemodel object + """ + print(f'construct_from_file: {filename}') + + def getMap(self): + """ + return model map + + :return: map object + """ + return self.map + + +if __name__ == '__main__': + print('### should construct from file.yaml') + base = BaseModel('file.yaml') + print('### should construct from file.txt') + base = BaseModel('file.txt') + print('### should construct from config') + base = BaseModel(param='file.txt', arg='bla')
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/build/bdist.linux-x86_64/egg/CHAP/models/edd.py Tue Mar 28 15:07:30 2023 +0000 @@ -0,0 +1,218 @@ +from msnctools.general import create_mask +from msnctools.material import Material +from msnctools.scanparsers import SMBMCAScanParser as ScanParser +import numpy as np +from pathlib import PosixPath +from pydantic import (BaseModel, + confloat, + conint, + conlist, + constr, + FilePath, + validator) +from scipy.interpolate import interp1d +from typing import Optional + + +class MCACeriaCalibrationConfig(BaseModel): + '''Class representing metadata required to perform a Ceria calibration for an + MCA detector. + + :ivar spec_file: Path to the SPEC file containing the CeO2 scan + :ivar scan_number: Number of the CeO2 scan in `spec_file` + :ivar scan_step_index: Index of the scan step to use for calibration, + optional. If not specified, the calibration routine will be performed on + the average of all MCA spectra for the scan. + + :ivar flux_file: csv file containing station beam energy in eV (column 0) + and flux (column 1) + + :ivar detector_name: name of the MCA to calibrate + :ivar num_bins: number of channels on the MCA to calibrate + :ivar max_energy_kev: maximum channel energy of the MCA in keV + + :ivar hexrd_h5_material_file: path to a HEXRD materials.h5 file containing an + entry for the material properties. + :ivar hexrd_h5_material_name: Name of the material entry in + `hexrd_h5_material_file`, defaults to `'CeO2'`. + :ivar lattice_parameter_angstrom: lattice spacing in angstrom to use for + the cubic CeO2 crystal, defaults to `5.41153`. + + :ivar tth_max: detector rotation about hutch x axis, defaults to `90`. + :ivar hkl_tth_tol: minimum resolvable difference in 2&theta between two + unique HKL peaks, defaults to `0.15`. + + :ivar fit_include_bin_ranges: list of MCA channel index ranges whose data + will be included in the calibration routine + :ivar fit_hkls: list of unique HKL indices to fit peaks for in the + calibration routine + + :ivar tth_initial_guess: initial guess for 2&theta + :ivar slope_initial_guess: initial guess for detector channel energy + correction linear slope, defaults to `1.0`. + :ivar intercept_initial_guess: initial guess for detector channel energy + correction y-intercept, defaults to `0.0`. + + :ivar tth_calibrated: calibrated value for 2&theta, defaults to None + :ivar slope_calibrated: calibrated value for detector channel energy + correction linear slope, defaults to `None` + :ivar intercept_calibrated: calibrated value for detector channel energy + correction y-intercept, defaluts to None + + :ivar max_iter: maximum number of iterations of the calibration routine, + defaults to `10`. + :ivar tune_tth_tol: stop iteratively tuning 2&theta when an iteration + produces a change in the tuned value of 2&theta that is smaller than this + value, defaults to `1e-8`. + ''' + + spec_file: FilePath + scan_number: conint(gt=0) + scan_step_index: Optional[conint(ge=0)] + + flux_file: FilePath + + detector_name: constr(strip_whitespace=True, min_length=1) + num_bins: conint(gt=0) + max_energy_kev: confloat(gt=0) + + hexrd_h5_material_file: FilePath + hexrd_h5_material_name: constr(strip_whitespace=True, min_length=1) = 'CeO2' + lattice_parameter_angstrom: confloat(gt=0) = 5.41153 + + tth_max: confloat(gt=0, allow_inf_nan=False) = 90.0 + hkl_tth_tol: confloat(gt=0, allow_inf_nan=False) = 0.15 + + fit_include_bin_ranges: conlist(min_items=1, + item_type=conlist(item_type=conint(ge=0), + min_items=2, + max_items=2)) + fit_hkls: conlist(item_type=conint(ge=0), min_items=1) + + tth_initial_guess: confloat(gt=0, le=tth_max, allow_inf_nan=False) + slope_initial_guess: float = 1.0 + intercept_initial_guess: float = 0.0 + tth_calibrated: Optional[confloat(gt=0, allow_inf_nan=False)] + slope_calibrated: Optional[confloat(allow_inf_nan=False)] + intercept_calibrated: Optional[confloat(allow_inf_nan=False)] + + max_iter: conint(gt=0) = 10 + tune_tth_tol: confloat(ge=0) = 1e-8 + + @validator('fit_include_bin_ranges', each_item=True) + def validate_include_bin_range(cls, value, values): + '''Ensure no bin ranges are outside the boundary of the detector''' + + num_bins = values.get('num_bins') + value[1] = min(value[1], num_bins) + return(value) + + def mca_data(self): + '''Get the 1D array of MCA data to use for calibration. + + :return: MCA data + :rtype: np.ndarray + ''' + + scanparser = ScanParser(self.spec_file, self.scan_number) + if self.scan_step_index is None: + data = scanparser.get_all_detector_data(self.detector_name) + if scanparser.spec_scan_npts > 1: + data = np.average(data, axis=1) + else: + data = data[0] + else: + data = scanparser.get_detector_data(self.detector_name, self.scan_step_index) + + return(np.array(data)) + + def mca_mask(self): + '''Get a boolean mask array to use on MCA data before fitting. + + :return: boolean mask array + :rtype: numpy.ndarray + ''' + + mask = None + bin_indices = np.arange(self.num_bins) + for bin_range in self.fit_include_bin_ranges: + mask = create_mask(bin_indices, + bounds=bin_range, + exclude_bounds=False, + current_mask=mask) + + return(mask) + + def flux_correction_interpolation_function(self): + '''Get an interpolation function to correct MCA data for relative energy + flux of the incident beam. + + :return: energy flux correction interpolation function + :rtype: scipy.interpolate._polyint._Interpolator1D + ''' + + flux = np.loadtxt(self.flux_file) + energies = flux[:,0]/1.e3 + relative_intensities = flux[:,1]/np.max(flux[:,1]) + interpolation_function = interp1d(energies, relative_intensities) + return(interpolation_function) + + def material(self): + '''Get CeO2 as a `msnctools.materials.Material` object. + + :return: CeO2 material + :rtype: msnctools.material.Material + ''' + + material = Material(material_name=self.hexrd_h5_material_name, + material_file=self.hexrd_h5_material_file, + lattice_parameters_angstroms=self.lattice_parameter_angstrom) + # The following kwargs will be needed if we allow the material to be + # built using xrayutilities (for now, we only allow hexrd to make the + # material): + # sgnum=225, + # atoms=['Ce4p', 'O2mdot'], + # pos=[(0.,0.,0.), (0.25,0.75,0.75)], + # enrgy=50000.) # Why do we need to specify an energy to get HKLs when using xrayutilities? + return(material) + + def unique_ds(self): + '''Get a list of unique HKLs and their lattice spacings + + :return: unique HKLs and their lattice spacings in angstroms + :rtype: np.ndarray, np.ndarray + ''' + + unique_hkls, unique_ds = self.material().get_unique_ds(tth_tol=self.hkl_tth_tol, tth_max=self.tth_max) + + return(unique_hkls, unique_ds) + + def fit_ds(self): + '''Get a list of HKLs and their lattice spacings that will be fit in the + calibration routine + + :return: HKLs to fit and their lattice spacings in angstroms + :rtype: np.ndarray, np.ndarray + ''' + + unique_hkls, unique_ds = self.unique_ds() + + fit_hkls = np.array([unique_hkls[i] for i in self.fit_hkls]) + fit_ds = np.array([unique_ds[i] for i in self.fit_hkls]) + + return(fit_hkls, fit_ds) + + def dict(self): + '''Return a representation of this configuration in a dictionary that is + suitable for dumping to a YAML file (one that converts all instances of + fields with type `PosixPath` to `str`). + + :return: dictionary representation of the configuration. + :rtype: dict + ''' + + d = super().dict() + for k,v in d.items(): + if isinstance(v, PosixPath): + d[k] = str(v) + return(d)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/build/bdist.linux-x86_64/egg/CHAP/models/integration.py Tue Mar 28 15:07:30 2023 +0000 @@ -0,0 +1,511 @@ +import copy +from functools import cache, lru_cache +import json +import logging +import os +from time import time +from typing import Literal, Optional + +from msnctools.general import input_menu +from multiprocessing.pool import ThreadPool +from nexusformat.nexus import (NXdata, + NXdetector, + NXfield, + NXprocess, + NXroot) +import numpy as np +from pydantic import (BaseModel, + validator, + constr, + conlist, + conint, + confloat, + FilePath) +import pyFAI, pyFAI.multi_geometry, pyFAI.units +from pyspec.file.tiff import TiffFile + +from .map import MapConfig, SpecScans + + +class Detector(BaseModel): + """ + Detector class to represent a single detector used in the experiment. + + :param prefix: Prefix of the detector in the SPEC file. + :type prefix: str + :param poni_file: Path to the poni file. + :type poni_file: str + :param mask_file: Optional path to the mask file. + :type mask_file: str, optional + """ + prefix: constr(strip_whitespace=True, min_length=1) + poni_file: FilePath + mask_file: Optional[FilePath] + @validator('poni_file', allow_reuse=True) + def validate_poni_file(cls, poni_file): + """ + Validate the poni file by checking if it's a valid PONI file. + + :param poni_file: Path to the poni file. + :type poni_file: str + :raises ValueError: If poni_file is not a valid PONI file. + :returns: Absolute path to the poni file. + :rtype: str + """ + poni_file = os.path.abspath(poni_file) + try: + ai = azimuthal_integrator(poni_file) + except: + raise(ValueError(f'{poni_file} is not a valid PONI file')) + else: + return(poni_file) + @validator('mask_file', allow_reuse=True) + def validate_mask_file(cls, mask_file, values): + """ + Validate the mask file. If a mask file is provided, it checks if it's a valid TIFF file. + + :param mask_file: Path to the mask file. + :type mask_file: str or None + :param values: A dictionary of the Detector fields. + :type values: dict + :raises ValueError: If mask_file is provided and it's not a valid TIFF file. + :raises ValueError: If `'poni_file'` is not provided in `values`. + :returns: Absolute path to the mask file or None. + :rtype: str or None + """ + if mask_file is None: + return(mask_file) + else: + mask_file = os.path.abspath(mask_file) + poni_file = values.get('poni_file') + if poni_file is None: + raise(ValueError('Cannot validate mask file without a PONI file.')) + else: + try: + mask_array = get_mask_array(mask_file, poni_file) + except BaseException as e: + raise(ValueError(f'Unable to open {mask_file} as a TIFF file')) + else: + return(mask_file) + @property + def azimuthal_integrator(self): + return(azimuthal_integrator(self.poni_file)) + @property + def mask_array(self): + return(get_mask_array(self.mask_file, self.poni_file)) + +@cache +def azimuthal_integrator(poni_file:str): + if not isinstance(poni_file, str): + poni_file = str(poni_file) + return(pyFAI.load(poni_file)) +@cache +def get_mask_array(mask_file:str, poni_file:str): + if mask_file is not None: + if not isinstance(mask_file, str): + mask_file = str(mask_file) + with TiffFile(mask_file) as tiff: + mask_array = tiff.asarray() + else: + mask_array = np.zeros(azimuthal_integrator(poni_file).detector.shape) + return(mask_array) + +class IntegrationConfig(BaseModel): + """ + Class representing the configuration for a raw detector data integration. + + :ivar tool_type: type of integration tool; always set to "integration" + :type tool_type: str, optional + :ivar title: title of the integration + :type title: str + :ivar integration_type: type of integration, one of "azimuthal", "radial", or "cake" + :type integration_type: str + :ivar detectors: list of detectors used in the integration + :type detectors: List[Detector] + :ivar radial_units: radial units for the integration, defaults to `'q_A^-1'` + :type radial_units: str, optional + :ivar radial_min: minimum radial value for the integration range + :type radial_min: float, optional + :ivar radial_max: maximum radial value for the integration range + :type radial_max: float, optional + :ivar radial_npt: number of points in the radial range for the integration + :type radial_npt: int, optional + :ivar azimuthal_units: azimuthal units for the integration + :type azimuthal_units: str, optional + :ivar azimuthal_min: minimum azimuthal value for the integration range + :type azimuthal_min: float, optional + :ivar azimuthal_max: maximum azimuthal value for the integration range + :type azimuthal_max: float, optional + :ivar azimuthal_npt: number of points in the azimuthal range for the integration + :type azimuthal_npt: int, optional + :ivar error_model: error model for the integration, one of "poisson" or "azimuthal" + :type error_model: str, optional + """ + tool_type: Literal['integration'] = 'integration' + title: constr(strip_whitespace=True, min_length=1) + integration_type: Literal['azimuthal', 'radial', 'cake'] + detectors: conlist(item_type=Detector, min_items=1) + radial_units: str = 'q_A^-1' + radial_min: confloat(ge=0) + radial_max: confloat(gt=0) + radial_npt: conint(gt=0) = 1800 + azimuthal_units: str = 'chi_deg' + azimuthal_min: confloat(ge=-180) = -180 + azimuthal_max: confloat(le=360) = 180 + azimuthal_npt: conint(gt=0) = 3600 + error_model: Optional[Literal['poisson', 'azimuthal']] + sequence_index: Optional[conint(gt=0)] + @validator('radial_units', allow_reuse=True) + def validate_radial_units(cls, radial_units): + """ + Validate the radial units for the integration. + + :param radial_units: unvalidated radial units for the integration + :type radial_units: str + :raises ValueError: if radial units are not one of the recognized radial units + :return: validated radial units + :rtype: str + """ + if radial_units in pyFAI.units.RADIAL_UNITS.keys(): + return(radial_units) + else: + raise(ValueError(f'Invalid radial units: {radial_units}. Must be one of {", ".join(pyFAI.units.RADIAL_UNITS.keys())}')) + @validator('azimuthal_units', allow_reuse=True) + def validate_azimuthal_units(cls, azimuthal_units): + """ + Validate that `azimuthal_units` is one of the keys in the + `pyFAI.units.AZIMUTHAL_UNITS` dictionary. + + :param azimuthal_units: The string representing the unit to be validated. + :type azimuthal_units: str + :raises ValueError: If `azimuthal_units` is not one of the keys in `pyFAI.units.AZIMUTHAL_UNITS` + :return: The original supplied value, if is one of the keys in `pyFAI.units.AZIMUTHAL_UNITS`. + :rtype: str + """ + if azimuthal_units in pyFAI.units.AZIMUTHAL_UNITS.keys(): + return(azimuthal_units) + else: + raise(ValueError(f'Invalid azimuthal units: {azimuthal_units}. Must be one of {", ".join(pyFAI.units.AZIMUTHAL_UNITS.keys())}')) + def validate_range_max(range_name:str): + """Validate the maximum value of an integration range. + + :param range_name: The name of the integration range (e.g. radial, azimuthal). + :type range_name: str + :return: The callable that performs the validation. + :rtype: callable + """ + def _validate_range_max(cls, range_max, values): + """Check if the maximum value of the integration range is greater than its minimum value. + + :param range_max: The maximum value of the integration range. + :type range_max: float + :param values: The values of the other fields being validated. + :type values: dict + :raises ValueError: If the maximum value of the integration range is not greater than its minimum value. + :return: The validated maximum range value + :rtype: float + """ + range_min = values.get(f'{range_name}_min') + if range_min < range_max: + return(range_max) + else: + raise(ValueError(f'Maximum value of integration range must be greater than minimum value of integration range ({range_name}_min={range_min}).')) + return(_validate_range_max) + _validate_radial_max = validator('radial_max', allow_reuse=True)(validate_range_max('radial')) + _validate_azimuthal_max = validator('azimuthal_max', allow_reuse=True)(validate_range_max('azimuthal')) + def validate_for_map_config(self, map_config:MapConfig): + """ + Validate the existence of the detector data file for all scan points in `map_config`. + + :param map_config: The `MapConfig` instance to validate against. + :type map_config: MapConfig + :raises RuntimeError: If a detector data file could not be found for a scan point occurring in `map_config`. + :return: None + :rtype: None + """ + for detector in self.detectors: + for scans in map_config.spec_scans: + for scan_number in scans.scan_numbers: + scanparser = scans.get_scanparser(scan_number) + for scan_step_index in range(scanparser.spec_scan_npts): + # Make sure the detector data file exists for all scan points + try: + detector_data_file = scanparser.get_detector_data_file(detector.prefix, scan_step_index) + except: + raise(RuntimeError(f'Could not find data file for detector prefix {detector.prefix} on scan number {scan_number} in spec file {scans.spec_file}')) + def get_azimuthal_adjustments(self): + """To enable a continuous range of integration in the azimuthal direction + for radial and cake integration, obtain adjusted values for this + `IntegrationConfig`'s `azimuthal_min` and `azimuthal_max` values, the + angle amount by which those values were adjusted, and the proper location + of the discontinuity in the azimuthal direction. + + :return: Adjusted chi_min, adjusted chi_max, chi_offset, chi_discontinuity + :rtype: tuple[float,float,float,float] + """ + return(get_azimuthal_adjustments(self.azimuthal_min, self.azimuthal_max)) + def get_azimuthal_integrators(self): + """Get a list of `AzimuthalIntegrator`s that correspond to the detector + configurations in this instance of `IntegrationConfig`. + + The returned `AzimuthalIntegrator`s are (if need be) artificially rotated + in the azimuthal direction to achieve a continuous range of integration + in the azimuthal direction. + + :returns: A list of `AzimuthalIntegrator`s appropriate for use by this + `IntegrationConfig` tool + :rtype: list[pyFAI.azimuthalIntegrator.AzimuthalIntegrator] + """ + chi_min, chi_max, chi_offset, chi_disc = self.get_azimuthal_adjustments() + return(get_azimuthal_integrators(tuple([detector.poni_file for detector in self.detectors]), chi_offset=chi_offset)) + def get_multi_geometry_integrator(self): + """Get a `MultiGeometry` integrator suitable for use by this instance of + `IntegrationConfig`. + + :return: A `MultiGeometry` integrator + :rtype: pyFAI.multi_geometry.MultiGeometry + """ + poni_files = tuple([detector.poni_file for detector in self.detectors]) + radial_range = (self.radial_min, self.radial_max) + azimuthal_range = (self.azimuthal_min, self.azimuthal_max) + return(get_multi_geometry_integrator(poni_files, self.radial_units, radial_range, azimuthal_range)) + def get_azimuthally_integrated_data(self, spec_scans:SpecScans, scan_number:int, scan_step_index:int): + """Return azimuthally-integrated data for the scan step specified. + + :param spec_scans: An instance of `SpecScans` containing the scan step requested. + :type spec_scans: SpecScans + :param scan_number: The number of the scan containing the scan step requested. + :type scan_number: int + :param scan_step_index: The index of the scan step requested. + :type scan_step_index: int + :return: A 1D array of azimuthally-integrated raw detector intensities. + :rtype: np.ndarray + """ + detector_data = spec_scans.get_detector_data(self.detectors, scan_number, scan_step_index) + integrator = self.get_multi_geometry_integrator() + lst_mask = [detector.mask_array for detector in self.detectors] + result = integrator.integrate1d(detector_data, lst_mask=lst_mask, npt=self.radial_npt, error_model=self.error_model) + if result.sigma is None: + return(result.intensity) + else: + return(result.intensity, result.sigma) + def get_radially_integrated_data(self, spec_scans:SpecScans, scan_number:int, scan_step_index:int): + """Return radially-integrated data for the scan step specified. + + :param spec_scans: An instance of `SpecScans` containing the scan step requested. + :type spec_scans: SpecScans + :param scan_number: The number of the scan containing the scan step requested. + :type scan_number: int + :param scan_step_index: The index of the scan step requested. + :type scan_step_index: int + :return: A 1D array of radially-integrated raw detector intensities. + :rtype: np.ndarray + """ + # Handle idiosyncracies of azimuthal ranges in pyFAI + # Adjust chi ranges to get a continuous range of iintegrated data + chi_min, chi_max, chi_offset, chi_disc = self.get_azimuthal_adjustments() + # Perform radial integration on a detector-by-detector basis. + I_each_detector = [] + variance_each_detector = [] + integrators = self.get_azimuthal_integrators() + for i,(integrator,detector) in enumerate(zip(integrators,self.detectors)): + detector_data = spec_scans.get_detector_data([detector], scan_number, scan_step_index)[0] + result = integrator.integrate_radial(detector_data, self.azimuthal_npt, + unit=self.azimuthal_units, azimuth_range=(chi_min,chi_max), + radial_unit=self.radial_units, radial_range=(self.radial_min,self.radial_max), + mask=detector.mask_array) #, error_model=self.error_model) + I_each_detector.append(result.intensity) + if result.sigma is not None: + variance_each_detector.append(result.sigma**2) + # Add the individual detectors' integrated intensities together + I = np.nansum(I_each_detector, axis=0) + # Ignore data at values of chi for which there was no data + I = np.where(I==0, np.nan, I) + if len(I_each_detector) != len(variance_each_detector): + return(I) + else: + # Get the standard deviation of the summed detectors' intensities + sigma = np.sqrt(np.nansum(variance_each_detector, axis=0)) + return(I, sigma) + def get_cake_integrated_data(self, spec_scans:SpecScans, scan_number:int, scan_step_index:int): + """Return cake-integrated data for the scan step specified. + + :param spec_scans: An instance of `SpecScans` containing the scan step requested. + :type spec_scans: SpecScans + :param scan_number: The number of the scan containing the scan step requested. + :type scan_number: int + :param scan_step_index: The index of the scan step requested. + :type scan_step_index: int + :return: A 2D array of cake-integrated raw detector intensities. + :rtype: np.ndarray + """ + detector_data = spec_scans.get_detector_data(self.detectors, scan_number, scan_step_index) + integrator = self.get_multi_geometry_integrator() + lst_mask = [detector.mask_array for detector in self.detectors] + result = integrator.integrate2d(detector_data, lst_mask=lst_mask, + npt_rad=self.radial_npt, npt_azim=self.azimuthal_npt, + method='bbox', + error_model=self.error_model) + if result.sigma is None: + return(result.intensity) + else: + return(result.intensity, result.sigma) + def get_integrated_data(self, spec_scans:SpecScans, scan_number:int, scan_step_index:int): + """Return integrated data for the scan step specified. + + :param spec_scans: An instance of `SpecScans` containing the scan step requested. + :type spec_scans: SpecScans + :param scan_number: The number of the scan containing the scan step requested. + :type scan_number: int + :param scan_step_index: The index of the scan step requested. + :type scan_step_index: int + :return: An array of integrated raw detector intensities. + :rtype: np.ndarray + """ + if self.integration_type == 'azimuthal': + return(self.get_azimuthally_integrated_data(spec_scans, scan_number, scan_step_index)) + elif self.integration_type == 'radial': + return(self.get_radially_integrated_data(spec_scans, scan_number, scan_step_index)) + elif self.integration_type == 'cake': + return(self.get_cake_integrated_data(spec_scans, scan_number, scan_step_index)) + + @property + def integrated_data_coordinates(self): + """ + Return a dictionary of coordinate arrays for navigating the dimension(s) + of the integrated data produced by this instance of `IntegrationConfig`. + + :return: A dictionary with either one or two keys: 'azimuthal' and/or + 'radial', each of which points to a 1-D `numpy` array of coordinate + values. + :rtype: dict[str,np.ndarray] + """ + if self.integration_type == 'azimuthal': + return(get_integrated_data_coordinates(radial_range=(self.radial_min,self.radial_max), + radial_npt=self.radial_npt)) + elif self.integration_type == 'radial': + return(get_integrated_data_coordinates(azimuthal_range=(self.azimuthal_min,self.azimuthal_max), + azimuthal_npt=self.azimuthal_npt)) + elif self.integration_type == 'cake': + return(get_integrated_data_coordinates(radial_range=(self.radial_min,self.radial_max), + radial_npt=self.radial_npt, + azimuthal_range=(self.azimuthal_min,self.azimuthal_max), + azimuthal_npt=self.azimuthal_npt)) + @property + def integrated_data_dims(self): + """Return a tuple of the coordinate labels for the integrated data + produced by this instance of `IntegrationConfig`. + """ + directions = list(self.integrated_data_coordinates.keys()) + dim_names = [getattr(self, f'{direction}_units') for direction in directions] + return(dim_names) + @property + def integrated_data_shape(self): + """Return a tuple representing the shape of the integrated data + produced by this instance of `IntegrationConfig` for a single scan step. + """ + return(tuple([len(coordinate_values) for coordinate_name,coordinate_values in self.integrated_data_coordinates.items()])) + +@cache +def get_azimuthal_adjustments(chi_min:float, chi_max:float): + """ + Fix chi discontinuity at 180 degrees and return the adjusted chi range, + offset, and discontinuty. + + If the discontinuity is crossed, obtain the offset to artificially rotate + detectors to achieve a continuous azimuthal integration range. + + :param chi_min: The minimum value of the azimuthal range. + :type chi_min: float + :param chi_max: The maximum value of the azimuthal range. + :type chi_max: float + :return: The following four values: the adjusted minimum value of the + azimuthal range, the adjusted maximum value of the azimuthal range, the + value by which the chi angle was adjusted, the position of the chi + discontinuity. + """ + # Fix chi discontinuity at 180 degrees for now. + chi_disc = 180 + # If the discontinuity is crossed, artificially rotate the detectors to + # achieve a continuous azimuthal integration range + if chi_min < chi_disc and chi_max > chi_disc: + chi_offset = chi_max - chi_disc + else: + chi_offset = 0 + return(chi_min-chi_offset, chi_max-chi_offset, chi_offset, chi_disc) +@cache +def get_azimuthal_integrators(poni_files:tuple, chi_offset=0): + """ + Return a list of `AzimuthalIntegrator` objects generated from PONI files. + + :param poni_files: Tuple of strings, each string being a path to a PONI file. : tuple + :type poni_files: tuple + :param chi_offset: The angle in degrees by which the `AzimuthalIntegrator` objects will be rotated, defaults to 0. + :type chi_offset: float, optional + :return: List of `AzimuthalIntegrator` objects + :rtype: list[pyFAI.azimuthalIntegrator.AzimuthalIntegrator] + """ + ais = [] + for poni_file in poni_files: + ai = copy.deepcopy(azimuthal_integrator(poni_file)) + ai.rot3 += chi_offset * np.pi/180 + ais.append(ai) + return(ais) +@cache +def get_multi_geometry_integrator(poni_files:tuple, radial_unit:str, radial_range:tuple, azimuthal_range:tuple): + """Return a `MultiGeometry` instance that can be used for azimuthal or cake + integration. + + :param poni_files: Tuple of PONI files that describe the detectors to be + integrated. + :type poni_files: tuple + :param radial_unit: Unit to use for radial integration range. + :type radial_unit: str + :param radial_range: Tuple describing the range for radial integration. + :type radial_range: tuple[float,float] + :param azimuthal_range:Tuple describing the range for azimuthal integration. + :type azimuthal_range: tuple[float,float] + :return: `MultiGeometry` instance that can be used for azimuthal or cake + integration. + :rtype: pyFAI.multi_geometry.MultiGeometry + """ + chi_min, chi_max, chi_offset, chi_disc = get_azimuthal_adjustments(*azimuthal_range) + ais = copy.deepcopy(get_azimuthal_integrators(poni_files, chi_offset=chi_offset)) + multi_geometry = pyFAI.multi_geometry.MultiGeometry(ais, + unit=radial_unit, + radial_range=radial_range, + azimuth_range=(chi_min,chi_max), + wavelength=sum([ai.wavelength for ai in ais])/len(ais), + chi_disc=chi_disc) + return(multi_geometry) +@cache +def get_integrated_data_coordinates(azimuthal_range:tuple=None, azimuthal_npt:int=None, radial_range:tuple=None, radial_npt:int=None): + """ + Return a dictionary of coordinate arrays for the specified radial and/or + azimuthal integration ranges. + + :param azimuthal_range: Tuple specifying the range of azimuthal angles over + which to generate coordinates, in the format (min, max), defaults to + None. + :type azimuthal_range: tuple[float,float], optional + :param azimuthal_npt: Number of azimuthal coordinate points to generate, + defaults to None. + :type azimuthal_npt: int, optional + :param radial_range: Tuple specifying the range of radial distances over + which to generate coordinates, in the format (min, max), defaults to + None. + :type radial_range: tuple[float,float], optional + :param radial_npt: Number of radial coordinate points to generate, defaults + to None. + :type radial_npt: int, optional + :return: A dictionary with either one or two keys: 'azimuthal' and/or + 'radial', each of which points to a 1-D `numpy` array of coordinate + values. + :rtype: dict[str,np.ndarray] + """ + integrated_data_coordinates = {} + if azimuthal_range is not None and azimuthal_npt is not None: + integrated_data_coordinates['azimuthal'] = np.linspace(*azimuthal_range, azimuthal_npt) + if radial_range is not None and radial_npt is not None: + integrated_data_coordinates['radial'] = np.linspace(*radial_range, radial_npt) + return(integrated_data_coordinates)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/build/bdist.linux-x86_64/egg/CHAP/models/map.py Tue Mar 28 15:07:30 2023 +0000 @@ -0,0 +1,519 @@ +from functools import cache, lru_cache +import os +from typing import Literal, Optional, Union + +import numpy as np +from pydantic import (BaseModel, + conint, + conlist, + confloat, + constr, + FilePath, + PrivateAttr, + ValidationError, + validator) +from pyspec.file.spec import FileSpec + +class Sample(BaseModel): + """ + Class representing a sample metadata configuration. + + :ivar name: The name of the sample. + :type name: str + :ivar description: A description of the sample. + :type description: Optional[str] + """ + name: constr(min_length=1) + description: Optional[str] + +class SpecScans(BaseModel): + """ + Class representing a set of scans from a single SPEC file. + + :ivar spec_file: Path to the SPEC file. + :type spec_file: str + :ivar scan_numbers: List of scan numbers to use. + :type scan_numbers: list[int] + """ + spec_file: FilePath + scan_numbers: conlist(item_type=conint(gt=0), min_items=1) + @validator('spec_file', allow_reuse=True) + def validate_spec_file(cls, spec_file): + """ + Validate the specified SPEC file. + + :param spec_file: Path to the SPEC file. + :type spec_file: str + :raises ValueError: If the SPEC file is invalid. + :return: Absolute path to the SPEC file, if it is valid. + :rtype: str + """ + try: + spec_file = os.path.abspath(spec_file) + sspec_file = FileSpec(spec_file) + except: + raise(ValueError(f'Invalid SPEC file {spec_file}')) + else: + return(spec_file) + @validator('scan_numbers', allow_reuse=True) + def validate_scan_numbers(cls, scan_numbers, values): + """ + Validate the specified list of scan numbers. + + :param scan_numbers: List of scan numbers. + :type scan_numbers: list of int + :param values: Dictionary of values for all fields of the model. + :type values: dict + :raises ValueError: If a specified scan number is not found in the SPEC file. + :return: List of scan numbers. + :rtype: list of int + """ + spec_file = values.get('spec_file') + if spec_file is not None: + spec_scans = FileSpec(spec_file) + for scan_number in scan_numbers: + scan = spec_scans.get_scan_by_number(scan_number) + if scan is None: + raise(ValueError(f'There is no scan number {scan_number} in {spec_file}')) + return(scan_numbers) + + @property + def scanparsers(self): + '''A list of `ScanParser`s for each of the scans specified by the SPEC + file and scan numbers belonging to this instance of `SpecScans` + ''' + return([self.get_scanparser(scan_no) for scan_no in self.scan_numbers]) + + def get_scanparser(self, scan_number): + """This method returns a `ScanParser` for the specified scan number in + the specified SPEC file. + + :param scan_number: Scan number to get a `ScanParser` for + :type scan_number: int + :return: `ScanParser` for the specified scan number + :rtype: ScanParser + """ + return(get_scanparser(self.spec_file, scan_number)) + def get_index(self, scan_number:int, scan_step_index:int, map_config): + """This method returns a tuple representing the index of a specific step + in a specific spec scan within a map. + + :param scan_number: Scan number to get index for + :type scan_number: int + :param scan_step_index: Scan step index to get index for + :type scan_step_index: int + :param map_config: Map configuration to get index for + :type map_config: MapConfig + :return: Index for the specified scan number and scan step index within + the specified map configuration + :rtype: tuple + """ + index = () + for independent_dimension in map_config.independent_dimensions: + coordinate_index = list(map_config.coords[independent_dimension.label]).index(independent_dimension.get_value(self, scan_number, scan_step_index)) + index = (coordinate_index, *index) + return(index) + def get_detector_data(self, detectors:list, scan_number:int, scan_step_index:int): + """ + Return the raw data from the specified detectors at the specified scan + number and scan step index. + + :param detectors: List of detector prefixes to get raw data for + :type detectors: list[str] + :param scan_number: Scan number to get data for + :type scan_number: int + :param scan_step_index: Scan step index to get data for + :type scan_step_index: int + :return: Data from the specified detectors for the specified scan number + and scan step index + :rtype: list[np.ndarray] + """ + return(get_detector_data(tuple([detector.prefix for detector in detectors]), self.spec_file, scan_number, scan_step_index)) +@cache +def get_available_scan_numbers(spec_file:str): + scans = FileSpec(spec_file).scans + scan_numbers = list(scans.keys()) + return(scan_numbers) +@cache +def get_scanparser(spec_file:str, scan_number:int): + if scan_number not in get_available_scan_numbers(spec_file): + return(None) + else: + return(ScanParser(spec_file, scan_number)) +@lru_cache(maxsize=10) +def get_detector_data(detector_prefixes:tuple, spec_file:str, scan_number:int, scan_step_index:int): + detector_data = [] + scanparser = get_scanparser(spec_file, scan_number) + for prefix in detector_prefixes: + image_data = scanparser.get_detector_data(prefix, scan_step_index) + detector_data.append(image_data) + return(detector_data) + +class PointByPointScanData(BaseModel): + """Class representing a source of raw scalar-valued data for which a value + was recorded at every point in a `MapConfig`. + + :ivar label: A user-defined label for referring to this data in the NeXus + file and in other tools. + :type label: str + :ivar units: The units in which the data were recorded. + :type units: str + :ivar data_type: Represents how these data were recorded at time of data + collection. + :type data_type: Literal['spec_motor', 'scan_column', 'smb_par'] + :ivar name: Represents the name with which these raw data were recorded at + time of data collection. + :type name: str + """ + label: constr(min_length=1) + units: constr(strip_whitespace=True, min_length=1) + data_type: Literal['spec_motor', 'scan_column', 'smb_par'] + name: constr(strip_whitespace=True, min_length=1) + @validator('label') + def validate_label(cls, label): + """Validate that the supplied `label` does not conflict with any of the + values for `label` reserved for certain data needed to perform + corrections. + + :param label: The value of `label` to validate + :type label: str + :raises ValueError: If `label` is one of the reserved values. + :return: The original supplied value `label`, if it is allowed. + :rtype: str + """ + #if (not issubclass(cls,CorrectionsData)) and label in CorrectionsData.__fields__['label'].type_.__args__: + if (not issubclass(cls,CorrectionsData)) and label in CorrectionsData.reserved_labels(): + raise(ValueError(f'{cls.__name__}.label may not be any of the following reserved values: {CorrectionsData.reserved_labels()}')) + return(label) + def validate_for_station(self, station:str): + """Validate this instance of `PointByPointScanData` for a certain choice + of station (beamline). + + :param station: The name of the station (in 'idxx' format). + :type station: str + :raises TypeError: If the station is not compatible with the value of the + `data_type` attribute for this instance of PointByPointScanData. + :return: None + :rtype: None + """ + if station.lower() not in ('id1a3', 'id3a') and self.data_type == 'smb_par': + raise(TypeError(f'{self.__class__.__name__}.data_type may not be "smb_par" when station is "{station}"')) + def validate_for_spec_scans(self, spec_scans:list[SpecScans], scan_step_index:Union[Literal['all'],int]='all'): + """Validate this instance of `PointByPointScanData` for a list of + `SpecScans`. + + :param spec_scans: A list of `SpecScans` whose raw data will be checked + for the presence of the data represented by this instance of + `PointByPointScanData` + :type spec_scans: list[SpecScans] + :param scan_step_index: A specific scan step index to validate, defaults + to `'all'`. + :type scan_step_index: Union[Literal['all'],int], optional + :raises RuntimeError: If the data represented by this instance of + `PointByPointScanData` is missing for the specified scan steps. + :return: None + :rtype: None + """ + for scans in spec_scans: + for scan_number in scans.scan_numbers: + scanparser = scans.get_scanparser(scan_number) + if scan_step_index == 'all': + scan_step_index_range = range(scanparser.spec_scan_npts) + else: + scan_step_index_range = range(scan_step_index,scan_step_index+1) + for scan_step_index in scan_step_index_range: + try: + self.get_value(scans, scan_number, scan_step_index) + except: + raise(RuntimeError(f'Could not find data for {self.name} (data_type "{self.data_type}") on scan number {scan_number} in spec file {scans.spec_file}')) + def get_value(self, spec_scans:SpecScans, scan_number:int, scan_step_index:int): + """Return the value recorded for this instance of `PointByPointScanData` + at a specific scan step. + + :param spec_scans: An instance of `SpecScans` in which the requested scan step occurs. + :type spec_scans: SpecScans + :param scan_number: The number of the scan in which the requested scan step occurs. + :type scan_number: int + :param scan_step_index: The index of the requested scan step. + :type scan_step_index: int + :return: The value recorded of the data represented by this instance of + `PointByPointScanData` at the scan step requested + :rtype: float + """ + if self.data_type == 'spec_motor': + return(get_spec_motor_value(spec_scans.spec_file, scan_number, scan_step_index, self.name)) + elif self.data_type == 'scan_column': + return(get_spec_counter_value(spec_scans.spec_file, scan_number, scan_step_index, self.name)) + elif self.data_type == 'smb_par': + return(get_smb_par_value(spec_scans.spec_file, scan_number, self.name)) +@cache +def get_spec_motor_value(spec_file:str, scan_number:int, scan_step_index:int, spec_mnemonic:str): + """Return the value recorded for a SPEC motor at a specific scan step. + + :param spec_file: Location of a SPEC file in which the requested scan step occurs. + :type spec_scans: str + :param scan_number: The number of the scan in which the requested scan step occurs. + :type scan_number: int + :param scan_step_index: The index of the requested scan step. + :type scan_step_index: int + :param spec_mnemonic: The menmonic of a SPEC motor. + :type spec_mnemonic: str + :return: The value of the motor at the scan step requested + :rtype: float + """ + scanparser = get_scanparser(spec_file, scan_number) + if spec_mnemonic in scanparser.spec_scan_motor_mnes: + motor_i = scanparser.spec_scan_motor_mnes.index(spec_mnemonic) + if scan_step_index >= 0: + scan_step = np.unravel_index(scan_step_index, scanparser.spec_scan_shape, order='F') + motor_value = scanparser.spec_scan_motor_vals[motor_i][scan_step[motor_i]] + else: + motor_value = scanparser.spec_scan_motor_vals[motor_i] + else: + motor_value = scanparser.get_spec_positioner_value(spec_mnemonic) + return(motor_value) +@cache +def get_spec_counter_value(spec_file:str, scan_number:int, scan_step_index:int, spec_column_label:str): + """Return the value recorded for a SPEC counter at a specific scan step. + + :param spec_file: Location of a SPEC file in which the requested scan step occurs. + :type spec_scans: str + :param scan_number: The number of the scan in which the requested scan step occurs. + :type scan_number: int + :param scan_step_index: The index of the requested scan step. + :type scan_step_index: int + :param spec_column_label: The label of a SPEC data column. + :type spec_column_label: str + :return: The value of the counter at the scan step requested + :rtype: float + """ + scanparser = get_scanparser(spec_file, scan_number) + if scan_step_index >= 0: + return(scanparser.spec_scan_data[spec_column_label][scan_step_index]) + else: + return(scanparser.spec_scan_data[spec_column_label]) +@cache +def get_smb_par_value(spec_file:str, scan_number:int, par_name:str): + """Return the value recorded for a specific scan in SMB-tyle .par file. + + :param spec_file: Location of a SPEC file in which the requested scan step occurs. + :type spec_scans: str + :param scan_number: The number of the scan in which the requested scan step occurs. + :type scan_number: int + :param par_name: The name of the column in the .par file + :type par_name: str + :return: The value of the .par file value for the scan requested. + :rtype: float + """ + scanparser = get_scanparser(spec_file, scan_number) + return(scanparser.pars[par_name]) +def validate_data_source_for_map_config(data_source, values): + import_scanparser(values.get('station'), values.get('experiment_type')) + data_source.validate_for_station(values.get('station')) + data_source.validate_for_spec_scans(values.get('spec_scans')) + return(data_source) + +class CorrectionsData(PointByPointScanData): + """Class representing the special instances of `PointByPointScanData` that + are used by certain kinds of `CorrectionConfig` tools. + + :ivar label: One of the reserved values required by `CorrectionConfig`, + `'presample_intensity'`, `'postsample_intensity'`, or + `'dwell_time_actual'`. + :type label: Literal['presample_intensity','postsample_intensity','dwell_time_actual'] + :ivar units: The units in which the data were recorded. + :type units: str + :ivar data_type: Represents how these data were recorded at time of data + collection. + :type data_type: Literal['scan_column', 'smb_par'] + :ivar name: Represents the name with which these raw data were recorded at + time of data collection. + :type name: str + """ + label: Literal['presample_intensity','postsample_intensity','dwell_time_actual'] + data_type: Literal['scan_column','smb_par'] + @classmethod + def reserved_labels(cls): + """Return a list of all the labels reserved for corrections-related + scalar data. + + :return: A list of reserved labels + :rtype: list[str] + """ + return(list(cls.__fields__['label'].type_.__args__)) +class PresampleIntensity(CorrectionsData): + """Class representing a source of raw data for the intensity of the beam that + is incident on the sample. + + :ivar label: Must be `"presample_intensity"` + :type label: Literal["presample_intensity"] + :ivar units: Must be `"counts"` + :type units: Literal["counts"] + :ivar data_type: Represents how these data were recorded at time of data + collection. + :type data_type: Literal['scan_column', 'smb_par'] + :ivar name: Represents the name with which these raw data were recorded at + time of data collection. + :type name: str + """ + label: Literal['presample_intensity'] = 'presample_intensity' + units: Literal['counts'] = 'counts' +class PostsampleIntensity(CorrectionsData): + """Class representing a source of raw data for the intensity of the beam that + has passed through the sample. + + :ivar label: Must be `"postsample_intensity"` + :type label: Literal["postsample_intensity"] + :ivar units: Must be `"counts"` + :type units: Literal["counts"] + :ivar data_type: Represents how these data were recorded at time of data + collection. + :type data_type: Literal['scan_column', 'smb_par'] + :ivar name: Represents the name with which these raw data were recorded at + time of data collection. + :type name: str + """ + label: Literal['postsample_intensity'] = 'postsample_intensity' + units: Literal['counts'] = 'counts' +class DwellTimeActual(CorrectionsData): + """Class representing a source of raw data for the actual dwell time at each + scan point in SPEC (with some scan types, this value can vary slightly + point-to-point from the dwell time specified in the command). + + :ivar label: Must be `"dwell_time_actual"` + :type label: Literal["dwell_time_actual"] + :ivar units: Must be `"counts"` + :type units: Literal["counts"] + :ivar data_type: Represents how these data were recorded at time of data + collection. + :type data_type: Literal['scan_column', 'smb_par'] + :ivar name: Represents the name with which these raw data were recorded at + time of data collection. + :type name: str + """ + label: Literal['dwell_time_actual'] = 'dwell_time_actual' + units: Literal['s'] = 's' + +class MapConfig(BaseModel): + """Class representing an experiment consisting of one or more SPEC scans. + + :ivar title: The title for the map configuration. + :type title: str + :ivar station: The name of the station at which the map was collected. + :type station: Literal['id1a3','id3a','id3b'] + :ivar spec_scans: A list of the spec scans that compose the map. + :type spec_scans: list[SpecScans] + :ivar independent_dimensions: A list of the sources of data representing the + raw values of each independent dimension of the map. + :type independent_dimensions: list[PointByPointScanData] + :ivar presample_intensity: A source of point-by-point presample beam + intensity data. Required when applying a CorrectionConfig tool. + :type presample_intensity: Optional[PresampleIntensity] + :ivar dwell_time_actual: A source of point-by-point actual dwell times for + spec scans. Required when applying a CorrectionConfig tool. + :type dwell_time_actual: Optional[DwellTimeActual] + :ivar presample_intensity: A source of point-by-point postsample beam + intensity data. Required when applying a CorrectionConfig tool with + `correction_type="flux_absorption"` or + `correction_type="flux_absorption_background"`. + :type presample_intensity: Optional[PresampleIntensity] + :ivar scalar_data: A list of the sources of data representing other scalar + raw data values collected at each point ion the map. In the NeXus file + representation of the map, datasets for these values will be included. + :type scalar_values: Optional[list[PointByPointScanData]] + """ + title: constr(strip_whitespace=True, min_length=1) + station: Literal['id1a3','id3a','id3b'] + experiment_type: Literal['SAXSWAXS', 'EDD', 'XRF'] + sample: Sample + spec_scans: conlist(item_type=SpecScans, min_items=1) + independent_dimensions: conlist(item_type=PointByPointScanData, min_items=1) + presample_intensity: Optional[PresampleIntensity] + dwell_time_actual: Optional[DwellTimeActual] + postsample_intensity: Optional[PostsampleIntensity] + scalar_data: Optional[list[PointByPointScanData]] = [] + _coords: dict = PrivateAttr() + _validate_independent_dimensions = validator('independent_dimensions', each_item=True, allow_reuse=True)(validate_data_source_for_map_config) + _validate_presample_intensity = validator('presample_intensity', allow_reuse=True)(validate_data_source_for_map_config) + _validate_dwell_time_actual = validator('dwell_time_actual', allow_reuse=True)(validate_data_source_for_map_config) + _validate_postsample_intensity = validator('postsample_intensity', allow_reuse=True)(validate_data_source_for_map_config) + _validate_scalar_data = validator('scalar_data', each_item=True, allow_reuse=True)(validate_data_source_for_map_config) + @validator('experiment_type') + def validate_experiment_type(cls, value, values): + '''Ensure values for the station and experiment_type fields are compatible''' + station = values.get('station') + if station == 'id1a3': + allowed_experiment_types = ['SAXSWAXS', 'EDD'] + elif station == 'id3a': + allowed_experiment_types = ['EDD'] + elif station == 'id3b': + allowed_experiment_types = ['SAXSWAXS', 'XRF'] + else: + allowed_experiment_types = [] + if value not in allowed_experiment_types: + raise(ValueError(f'For station {station}, allowed experiment types are {allowed_experiment_types} (suuplied experiment type {value} is not allowed)')) + return(value) + @property + def coords(self): + """Return a dictionary of the values of each independent dimension across + the map. + + :returns: A dictionary ofthe map's coordinate values. + :rtype: dict[str,list[float]] + """ + try: + return(self._coords) + except: + coords = {} + for independent_dimension in self.independent_dimensions: + coords[independent_dimension.label] = [] + for scans in self.spec_scans: + for scan_number in scans.scan_numbers: + scanparser = scans.get_scanparser(scan_number) + for scan_step_index in range(scanparser.spec_scan_npts): + coords[independent_dimension.label].append(independent_dimension.get_value(scans, scan_number, scan_step_index)) + coords[independent_dimension.label] = np.unique(coords[independent_dimension.label]) + self._coords = coords + return(self._coords) + @property + def dims(self): + """Return a tuple of the independent dimension labels for the map.""" + return([point_by_point_scan_data.label for point_by_point_scan_data in self.independent_dimensions[::-1]]) + @property + def shape(self): + """Return the shape of the map -- a tuple representing the number of + unique values of each dimension across the map. + """ + return(tuple([len(values) for key,values in self.coords.items()][::-1])) + @property + def all_scalar_data(self): + """Return a list of all instances of `PointByPointScanData` for which + this map configuration will collect dataset-like data (as opposed to + axes-like data). + + This will be any and all of the items in the corrections-data-related + fields, as well as any additional items in the optional `scalar_data` + field.""" + return([getattr(self,l,None) for l in CorrectionsData.reserved_labels() if getattr(self,l,None) is not None] + self.scalar_data) + +def import_scanparser(station, experiment_type): + if station.lower() in ('id1a3', 'id3a'): + if experiment_type == 'SAXSWAXS': + from msnctools.scanparsers import SMBLinearScanParser + globals()['ScanParser'] = SMBLinearScanParser + elif experiment_type == 'EDD': + from msnctools.scanparsers import SMBMCAScanParser + globals()['ScanParser'] = SMBMCAScanParser + else: + raise(ValueError(f'Invalid experiment_type: {experiment_type}')) + elif station.lower() == 'id3b': + if experiment_type == 'SAXSWAXS': + from msnctools.scanparsers import FMBSAXSWAXSScanParser + globals()['ScanParser'] = FMBSAXSWAXSScanParser + elif experiment_type == 'XRF': + from msnctools.scanparsers import FMBXRFScanParser + globals()['ScanParser'] = FMBXRFScanParser + else: + raise(ValueError(f'Invalid experiment_type: {experiment_type}')) + else: + raise(ValueError(f'Invalid station: {station}'))
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/build/bdist.linux-x86_64/egg/CHAP/models/workflow.py Tue Mar 28 15:07:30 2023 +0000 @@ -0,0 +1,48 @@ +#!/usr/bin/env python +#-*- coding: utf-8 -*- +#pylint: disable= +""" +File : workflow.py +Author : Valentin Kuznetsov <vkuznet AT gmail dot com> +Description: Workflow module +""" + +# system modules +from basemodel import BaseModel + + +class Workflow(BaseModel): + """ + Workflow docstring + """ + def __init__(self, filename=None, **kwds): + super().__init__(filename, **kwds) + self.map['workflow'] = __name__ + print('create Workflow calls: ', end='') + + +class EDDWorkflow(Workflow): + """ + EDDWorkflow + """ + def __init__(self, filename=None, **kwds): + super().__init__(filename, **kwds) + self.map['workflow'] = 'edd' + print('create EDDWorkflow') + +class SAXWWorkflow(Workflow): + """ + SAXWWorkflow + """ + def __init__(self, filename=None, **kwds): + super().__init__(filename, **kwds) + self.map['workflow'] = 'saxw' + print('create SAXWWorkflow') + +if __name__ == '__main__': + print('--- create EDDWorkflow from config') + wflow = EDDWorkflow() + print('map', wflow.map) + print('--- create SAXWWorkflow from file.txt') + wflow = SAXWWorkflow('file.txt') + print('map', wflow.map)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/build/bdist.linux-x86_64/egg/CHAP/pipeline.py Tue Mar 28 15:07:30 2023 +0000 @@ -0,0 +1,84 @@ +#!/usr/bin/env python +#-*- coding: utf-8 -*- +#pylint: disable= +""" +File : pipeline.py +Author : Valentin Kuznetsov <vkuznet AT gmail dot com> +Description: +""" + +# system modules +import logging +from time import time + +class Pipeline(): + """ + Pipeline represent generic Pipeline class + """ + def __init__(self, items=None, kwds=None): + """ + Pipeline class constructor + + :param items: list of objects + :param kwds: list of method args for individual objects + """ + self.__name__ = self.__class__.__name__ + + self.items = items + self.kwds = kwds + + self.logger = logging.getLogger(self.__name__) + self.logger.propagate = False + + def execute(self): + """ + execute API + """ + + t0 = time() + self.logger.info(f'Executing "execute"\n') + + data = None + for item, kwargs in zip(self.items, self.kwds): + if hasattr(item, 'read'): + self.logger.info(f'Calling "read" on {item}') + data = item.read(**kwargs) + if hasattr(item, 'process'): + self.logger.info(f'Calling "process" on {item}') + data = item.process(data, **kwargs) + if hasattr(item, 'write'): + self.logger.info(f'Calling "write" on {item}') + data = item.write(data, **kwargs) + + self.logger.info(f'Exectuted "exectute" in {time()-t0:.3f} seconds') + +class PipelineObject(): + """ + PipelineObject represent generic Pipeline class + """ + def __init__(self, reader, writer, processor, fitter): + """ + PipelineObject class constructor + """ + self.reader = reader + self.writer = writer + self.processor = processor + + def read(self, filename): + """ + read object API + """ + return self.reader.read(filename) + + def write(self, data, filename): + """ + write object API + """ + return self.writer.write(data, filename) + + def process(self, data): + """ + process object API + """ + return self.processor.process(data) +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/build/bdist.linux-x86_64/egg/CHAP/processor.py Tue Mar 28 15:07:30 2023 +0000 @@ -0,0 +1,948 @@ +#!/usr/bin/env python +#-*- coding: utf-8 -*- +#pylint: disable= +""" +File : processor.py +Author : Valentin Kuznetsov <vkuznet AT gmail dot com> +Description: Processor module +""" + +# system modules +import argparse +import json +import logging +import sys +from time import time + +# local modules +# from pipeline import PipelineObject + +class Processor(): + """ + Processor represent generic processor + """ + def __init__(self): + """ + Processor constructor + """ + self.__name__ = self.__class__.__name__ + self.logger = logging.getLogger(self.__name__) + self.logger.propagate = False + + def process(self, data): + """ + process data API + """ + + t0 = time() + self.logger.info(f'Executing "process" with type(data)={type(data)}') + + data = self._process(data) + + self.logger.info(f'Finished "process" in {time()-t0:.3f} seconds\n') + + return(data) + + def _process(self, data): + # If needed, extract data from a returned value of Reader.read + if isinstance(data, list): + if all([isinstance(d,dict) for d in data]): + data = data[0]['data'] + # process operation is a simple print function + data += "process part\n" + # and we return data back to pipeline + return data + + +class TFaaSImageProcessor(Processor): + ''' + A Processor to get predictions from TFaaS inference server. + ''' + def process(self, data, url, model, verbose=False): + """ + process data API + """ + + t0 = time() + self.logger.info(f'Executing "process" with url {url} model {model}') + + data = self._process(data, url, model, verbose) + + self.logger.info(f'Finished "process" in {time()-t0:.3f} seconds\n') + + return(data) + + def _process(self, data, url, model, verbose): + '''Print and return the input data. + + :param data: Input image data, either file name or actual image data + :type data: object + :return: `data` + :rtype: object + ''' + from MLaaS.tfaas_client import predictImage + from pathlib import Path + self.logger.info(f"input data {type(data)}") + if isinstance(data, str) and Path(data).is_file(): + imgFile = data + data = predictImage(url, imgFile, model, verbose) + else: + rdict = data[0] + import requests + img = rdict['data'] + session = requests.Session() + rurl = url + '/predict/image' + payload = dict(model=model) + files = dict(image=img) + self.logger.info(f"HTTP request {rurl} with image file and {payload} payload") + req = session.post(rurl, files=files, data=payload ) + data = req.content + data = data.decode("utf-8").replace('\n', '') + self.logger.info(f"HTTP response {data}") + + return(data) + +class URLResponseProcessor(Processor): + def _process(self, data): + '''Take data returned from URLReader.read and return a decoded version of + the content. + + :param data: input data (output of URLReader.read) + :type data: list[dict] + :return: decoded data contents + :rtype: object + ''' + + data = data[0] + + content = data['data'] + encoding = data['encoding'] + + self.logger.debug(f'Decoding content of type {type(content)} with {encoding}') + + try: + content = content.decode(encoding) + except: + self.logger.warning(f'Failed to decode content of type {type(content)} with {encoding}') + + return(content) + +class PrintProcessor(Processor): + '''A Processor to simply print the input data to stdout and return the + original input data, unchanged in any way. + ''' + + def _process(self, data): + '''Print and return the input data. + + :param data: Input data + :type data: object + :return: `data` + :rtype: object + ''' + + print(f'{self.__name__} data :') + + if callable(getattr(data, '_str_tree', None)): + # If data is likely an NXobject, print its tree representation + # (since NXobjects' str representations are just their nxname -- not + # very helpful). + print(data._str_tree(attrs=True, recursive=True)) + else: + print(str(data)) + + return(data) + +class NexusToNumpyProcessor(Processor): + '''A class to convert the default plottable data in an `NXobject` into an + `numpy.ndarray`. + ''' + + def _process(self, data): + '''Return the default plottable data signal in `data` as an + `numpy.ndarray`. + + :param data: input NeXus structure + :type data: nexusformat.nexus.tree.NXobject + :raises ValueError: if `data` has no default plottable data signal + :return: default plottable data signal in `data` + :rtype: numpy.ndarray + ''' + + default_data = data.plottable_data + + if default_data is None: + default_data_path = data.attrs['default'] + default_data = data.get(default_data_path) + if default_data is None: + raise(ValueError(f'The structure of {data} contains no default data')) + + default_signal = default_data.attrs.get('signal') + if default_signal is None: + raise(ValueError(f'The signal of {default_data} is unknown')) + default_signal = default_signal.nxdata + + np_data = default_data[default_signal].nxdata + + return(np_data) + +class NexusToXarrayProcessor(Processor): + '''A class to convert the default plottable data in an `NXobject` into an + `xarray.DataArray`.''' + + def _process(self, data): + '''Return the default plottable data signal in `data` as an + `xarray.DataArray`. + + :param data: input NeXus structure + :type data: nexusformat.nexus.tree.NXobject + :raises ValueError: if metadata for `xarray` is absen from `data` + :return: default plottable data signal in `data` + :rtype: xarray.DataArray + ''' + + from xarray import DataArray + + default_data = data.plottable_data + + if default_data is None: + default_data_path = data.attrs['default'] + default_data = data.get(default_data_path) + if default_data is None: + raise(ValueError(f'The structure of {data} contains no default data')) + + default_signal = default_data.attrs.get('signal') + if default_signal is None: + raise(ValueError(f'The signal of {default_data} is unknown')) + default_signal = default_signal.nxdata + + signal_data = default_data[default_signal].nxdata + + axes = default_data.attrs['axes'] + coords = {} + for axis_name in axes: + axis = default_data[axis_name] + coords[axis_name] = (axis_name, + axis.nxdata, + axis.attrs) + + dims = tuple(axes) + + name = default_signal + + attrs = default_data[default_signal].attrs + + return(DataArray(data=signal_data, + coords=coords, + dims=dims, + name=name, + attrs=attrs)) + +class XarrayToNexusProcessor(Processor): + '''A class to convert the data in an `xarray` structure to an + `nexusformat.nexus.NXdata`. + ''' + + def _process(self, data): + '''Return `data` represented as an `nexusformat.nexus.NXdata`. + + :param data: The input `xarray` structure + :type data: typing.Union[xarray.DataArray, xarray.Dataset] + :return: The data and metadata in `data` + :rtype: nexusformat.nexus.NXdata + ''' + + from nexusformat.nexus import NXdata, NXfield + + signal = NXfield(value=data.data, name=data.name, attrs=data.attrs) + + axes = [] + for name, coord in data.coords.items(): + axes.append(NXfield(value=coord.data, name=name, attrs=coord.attrs)) + axes = tuple(axes) + + return(NXdata(signal=signal, axes=axes)) + +class XarrayToNumpyProcessor(Processor): + '''A class to convert the data in an `xarray.DataArray` structure to an + `numpy.ndarray`. + ''' + + def _process(self, data): + '''Return just the signal values contained in `data`. + + :param data: The input `xarray.DataArray` + :type data: xarray.DataArray + :return: The data in `data` + :rtype: numpy.ndarray + ''' + + return(data.data) + +class MapProcessor(Processor): + '''Class representing a process that takes a map configuration and returns a + `nexusformat.nexus.NXentry` representing that map's metadata and any + scalar-valued raw data requseted by the supplied map configuration. + ''' + + def _process(self, data): + '''Process the output of a `Reader` that contains a map configuration and + return a `nexusformat.nexus.NXentry` representing the map. + + :param data: Result of `Reader.read` where at least one item has the + value `'MapConfig'` for the `'schema'` key. + :type data: list[dict[str,object]] + :return: Map data & metadata (SPEC only, no detector) + :rtype: nexusformat.nexus.NXentry + ''' + + map_config = self.get_map_config(data) + nxentry = self.__class__.get_nxentry(map_config) + + return(nxentry) + + def get_map_config(self, data): + '''Get an instance of `MapConfig` from a returned value of `Reader.read` + + :param data: Result of `Reader.read` where at least one item has the + value `'MapConfig'` for the `'schema'` key. + :type data: list[dict[str,object]] + :raises Exception: If a valid `MapConfig` cannot be constructed from `data`. + :return: a valid instance of `MapConfig` with field values taken from `data`. + :rtype: MapConfig + ''' + + from CHAP.models.map import MapConfig + + map_config = False + if isinstance(data, list): + for item in data: + if isinstance(item, dict): + if item.get('schema') == 'MapConfig': + map_config = item.get('data') + break + + if not map_config: + raise(ValueError('No map configuration found')) + + return(MapConfig(**map_config)) + + @staticmethod + def get_nxentry(map_config): + '''Use a `MapConfig` to construct a `nexusformat.nexus.NXentry` + + :param map_config: a valid map configuration + :type map_config: MapConfig + :return: the map's data and metadata contained in a NeXus structure + :rtype: nexusformat.nexus.NXentry + ''' + + from nexusformat.nexus import (NXcollection, + NXdata, + NXentry, + NXfield, + NXsample) + import numpy as np + + nxentry = NXentry(name=map_config.title) + + nxentry.map_config = json.dumps(map_config.dict()) + + nxentry[map_config.sample.name] = NXsample(**map_config.sample.dict()) + + nxentry.attrs['station'] = map_config.station + + nxentry.spec_scans = NXcollection() + for scans in map_config.spec_scans: + nxentry.spec_scans[scans.scanparsers[0].scan_name] = \ + NXfield(value=scans.scan_numbers, + dtype='int8', + attrs={'spec_file':str(scans.spec_file)}) + + nxentry.data = NXdata() + nxentry.data.attrs['axes'] = map_config.dims + for i,dim in enumerate(map_config.independent_dimensions[::-1]): + nxentry.data[dim.label] = NXfield(value=map_config.coords[dim.label], + units=dim.units, + attrs={'long_name': f'{dim.label} ({dim.units})', + 'data_type': dim.data_type, + 'local_name': dim.name}) + nxentry.data.attrs[f'{dim.label}_indices'] = i + + signal = False + auxilliary_signals = [] + for data in map_config.all_scalar_data: + nxentry.data[data.label] = NXfield(value=np.empty(map_config.shape), + units=data.units, + attrs={'long_name': f'{data.label} ({data.units})', + 'data_type': data.data_type, + 'local_name': data.name}) + if not signal: + signal = data.label + else: + auxilliary_signals.append(data.label) + + if signal: + nxentry.data.attrs['signal'] = signal + nxentry.data.attrs['auxilliary_signals'] = auxilliary_signals + + for scans in map_config.spec_scans: + for scan_number in scans.scan_numbers: + scanparser = scans.get_scanparser(scan_number) + for scan_step_index in range(scanparser.spec_scan_npts): + map_index = scans.get_index(scan_number, scan_step_index, map_config) + for data in map_config.all_scalar_data: + nxentry.data[data.label][map_index] = data.get_value(scans, scan_number, scan_step_index) + + return(nxentry) + +class IntegrationProcessor(Processor): + '''Class for integrating 2D detector data + ''' + + def _process(self, data): + '''Integrate the input data with the integration method and keyword + arguments supplied and return the results. + + :param data: input data, including raw data, integration method, and + keyword args for the integration method. + :type data: tuple[typing.Union[numpy.ndarray, list[numpy.ndarray]], + callable, + dict] + :param integration_method: the method of a + `pyFAI.azimuthalIntegrator.AzimuthalIntegrator` or + `pyFAI.multi_geometry.MultiGeometry` that returns the desired + integration results. + :return: integrated raw data + :rtype: pyFAI.containers.IntegrateResult + ''' + + detector_data, integration_method, integration_kwargs = data + + return(integration_method(detector_data, **integration_kwargs)) + +class IntegrateMapProcessor(Processor): + '''Class representing a process that takes a map and integration + configuration and returns a `nexusformat.nexus.NXprocess` containing a map of + the integrated detector data requested. + ''' + + def _process(self, data): + '''Process the output of a `Reader` that contains a map and integration + configuration and return a `nexusformat.nexus.NXprocess` containing a map + of the integrated detector data requested + + :param data: Result of `Reader.read` where at least one item has the + value `'MapConfig'` for the `'schema'` key, and at least one item has + the value `'IntegrationConfig'` for the `'schema'` key. + :type data: list[dict[str,object]] + :return: integrated data and process metadata + :rtype: nexusformat.nexus.NXprocess + ''' + + map_config, integration_config = self.get_configs(data) + nxprocess = self.get_nxprocess(map_config, integration_config) + + return(nxprocess) + + def get_configs(self, data): + '''Return valid instances of `MapConfig` and `IntegrationConfig` from the + input supplied by `MultipleReader`. + + :param data: Result of `Reader.read` where at least one item has the + value `'MapConfig'` for the `'schema'` key, and at least one item has + the value `'IntegrationConfig'` for the `'schema'` key. + :type data: list[dict[str,object]] + :raises ValueError: if `data` cannot be parsed into map and integration configurations. + :return: valid map and integration configuration objects. + :rtype: tuple[MapConfig, IntegrationConfig] + ''' + + self.logger.debug('Getting configuration objects') + t0 = time() + + from CHAP.models.map import MapConfig + from CHAP.models.integration import IntegrationConfig + + map_config = False + integration_config = False + if isinstance(data, list): + for item in data: + if isinstance(item, dict): + schema = item.get('schema') + if schema == 'MapConfig': + map_config = item.get('data') + elif schema == 'IntegrationConfig': + integration_config = item.get('data') + + if not map_config: + raise(ValueError('No map configuration found')) + if not integration_config: + raise(ValueError('No integration configuration found')) + + map_config = MapConfig(**map_config) + integration_config = IntegrationConfig(**integration_config) + + self.logger.debug(f'Got configuration objects in {time()-t0:.3f} seconds') + + return(map_config, integration_config) + + def get_nxprocess(self, map_config, integration_config): + '''Use a `MapConfig` and `IntegrationConfig` to construct a + `nexusformat.nexus.NXprocess` + + :param map_config: a valid map configuration + :type map_config: MapConfig + :param integration_config: a valid integration configuration + :type integration_config" IntegrationConfig + :return: the integrated detector data and metadata contained in a NeXus + structure + :rtype: nexusformat.nexus.NXprocess + ''' + + self.logger.debug('Constructing NXprocess') + t0 = time() + + from nexusformat.nexus import (NXdata, + NXdetector, + NXfield, + NXprocess) + import numpy as np + import pyFAI + + nxprocess = NXprocess(name=integration_config.title) + + nxprocess.map_config = json.dumps(map_config.dict()) + nxprocess.integration_config = json.dumps(integration_config.dict()) + + nxprocess.program = 'pyFAI' + nxprocess.version = pyFAI.version + + for k,v in integration_config.dict().items(): + if k == 'detectors': + continue + nxprocess.attrs[k] = v + + for detector in integration_config.detectors: + nxprocess[detector.prefix] = NXdetector() + nxprocess[detector.prefix].local_name = detector.prefix + nxprocess[detector.prefix].distance = detector.azimuthal_integrator.dist + nxprocess[detector.prefix].distance.attrs['units'] = 'm' + nxprocess[detector.prefix].calibration_wavelength = detector.azimuthal_integrator.wavelength + nxprocess[detector.prefix].calibration_wavelength.attrs['units'] = 'm' + nxprocess[detector.prefix].attrs['poni_file'] = str(detector.poni_file) + nxprocess[detector.prefix].attrs['mask_file'] = str(detector.mask_file) + nxprocess[detector.prefix].raw_data_files = np.full(map_config.shape, '', dtype='|S256') + + nxprocess.data = NXdata() + + nxprocess.data.attrs['axes'] = (*map_config.dims, *integration_config.integrated_data_dims) + for i,dim in enumerate(map_config.independent_dimensions[::-1]): + nxprocess.data[dim.label] = NXfield(value=map_config.coords[dim.label], + units=dim.units, + attrs={'long_name': f'{dim.label} ({dim.units})', + 'data_type': dim.data_type, + 'local_name': dim.name}) + nxprocess.data.attrs[f'{dim.label}_indices'] = i + + for i,(coord_name,coord_values) in enumerate(integration_config.integrated_data_coordinates.items()): + if coord_name == 'radial': + type_ = pyFAI.units.RADIAL_UNITS + elif coord_name == 'azimuthal': + type_ = pyFAI.units.AZIMUTHAL_UNITS + coord_units = pyFAI.units.to_unit(getattr(integration_config, f'{coord_name}_units'), type_=type_) + nxprocess.data[coord_units.name] = coord_values + nxprocess.data.attrs[f'{coord_units.name}_indices'] = i+len(map_config.coords) + nxprocess.data[coord_units.name].units = coord_units.unit_symbol + nxprocess.data[coord_units.name].attrs['long_name'] = coord_units.label + + nxprocess.data.attrs['signal'] = 'I' + nxprocess.data.I = NXfield(value=np.empty((*tuple([len(coord_values) for coord_name,coord_values in map_config.coords.items()][::-1]), *integration_config.integrated_data_shape)), + units='a.u', + attrs={'long_name':'Intensity (a.u)'}) + + integrator = integration_config.get_multi_geometry_integrator() + if integration_config.integration_type == 'azimuthal': + integration_method = integrator.integrate1d + integration_kwargs = { + 'lst_mask': [detector.mask_array for detector in integration_config.detectors], + 'npt': integration_config.radial_npt + } + elif integration_config.integration_type == 'cake': + integration_method = integrator.integrate2d + integration_kwargs = { + 'lst_mask': [detector.mask_array for detector in integration_config.detectors], + 'npt_rad': integration_config.radial_npt, + 'npt_azim': integration_config.azimuthal_npt, + 'method': 'bbox' + } + + integration_processor = IntegrationProcessor() + integration_processor.logger.setLevel(self.logger.getEffectiveLevel()) + integration_processor.logger.addHandler(self.logger.handlers[0]) + lst_args = [] + for scans in map_config.spec_scans: + for scan_number in scans.scan_numbers: + scanparser = scans.get_scanparser(scan_number) + for scan_step_index in range(scanparser.spec_scan_npts): + map_index = scans.get_index(scan_number, scan_step_index, map_config) + detector_data = scans.get_detector_data(integration_config.detectors, scan_number, scan_step_index) + result = integration_processor.process((detector_data, integration_method, integration_kwargs)) + nxprocess.data.I[map_index] = result.intensity + for detector in integration_config.detectors: + nxprocess[detector.prefix].raw_data_files[map_index] = scanparser.get_detector_data_file(detector.prefix, scan_step_index) + + self.logger.debug(f'Constructed NXprocess in {time()-t0:.3f} seconds') + + return(nxprocess) + +class MCACeriaCalibrationProcessor(Processor): + '''Class representing the procedure to use a CeO2 scan to obtain tuned values + for the bragg diffraction angle and linear correction parameters for MCA + channel energies for an EDD experimental setup. + ''' + + def _process(self, data): + '''Return tuned values for 2&theta and linear correction parameters for + the MCA channel energies. + + :param data: input configuration for the raw data & tuning procedure + :type data: list[dict[str,object]] + :return: original configuration dictionary with tuned values added + :rtype: dict[str,float] + ''' + + calibration_config = self.get_config(data) + + tth, slope, intercept = self.calibrate(calibration_config) + + calibration_config.tth_calibrated = tth + calibration_config.slope_calibrated = slope + calibration_config.intercept_calibrated = intercept + + return(calibration_config.dict()) + + def get_config(self, data): + '''Get an instance of the configuration object needed by this + `Processor` from a returned value of `Reader.read` + + :param data: Result of `Reader.read` where at least one item has the + value `'MCACeriaCalibrationConfig'` for the `'schema'` key. + :type data: list[dict[str,object]] + :raises Exception: If a valid config object cannot be constructed from `data`. + :return: a valid instance of a configuration object with field values + taken from `data`. + :rtype: MCACeriaCalibrationConfig + ''' + + from CHAP.models.edd import MCACeriaCalibrationConfig + + calibration_config = False + if isinstance(data, list): + for item in data: + if isinstance(item, dict): + if item.get('schema') == 'MCACeriaCalibrationConfig': + calibration_config = item.get('data') + break + + if not calibration_config: + raise(ValueError('No MCA ceria calibration configuration found in input data')) + + return(MCACeriaCalibrationConfig(**calibration_config)) + + def calibrate(self, calibration_config): + '''Iteratively calibrate 2&theta by fitting selected peaks of an MCA + spectrum until the computed strain is sufficiently small. Use the fitted + peak locations to determine linear correction parameters for the MCA's + channel energies. + + :param calibration_config: object configuring the CeO2 calibration procedure + :type calibration_config: MCACeriaCalibrationConfig + :return: calibrated values of 2&theta and linear correction parameters + for MCA channel energies : tth, slope, intercept + :rtype: float, float, float + ''' + + from msnctools.fit import Fit, FitMultipeak + import numpy as np + from scipy.constants import physical_constants + + hc = physical_constants['Planck constant in eV/Hz'][0] * \ + physical_constants['speed of light in vacuum'][0] * \ + 1e7 # We'll work in keV and A, not eV and m. + + # Collect raw MCA data of interest + mca_data = calibration_config.mca_data() + mca_bin_energies = np.arange(0, calibration_config.num_bins) * \ + (calibration_config.max_energy_kev / calibration_config.num_bins) + + # Mask out the corrected MCA data for fitting + mca_mask = calibration_config.mca_mask() + fit_mca_energies = mca_bin_energies[mca_mask] + fit_mca_intensities = mca_data[mca_mask] + + # Correct raw MCA data for variable flux at different energies + flux_correct = calibration_config.flux_correction_interpolation_function() + mca_intensity_weights = flux_correct(fit_mca_energies) + fit_mca_intensities = fit_mca_intensities / mca_intensity_weights + + # Get the HKLs and lattice spacings that will be used for fitting + tth = calibration_config.tth_initial_guess + fit_hkls, fit_ds = calibration_config.fit_ds() + c_1 = fit_hkls[:,0]**2 + fit_hkls[:,1]**2 + fit_hkls[:,2]**2 + + for iter_i in range(calibration_config.max_iter): + + ### Perform the uniform fit first ### + + # Get expected peak energy locations for this iteration's starting + # value of tth + fit_lambda = 2.0 * fit_ds * np.sin(0.5*np.radians(tth)) + fit_E0 = hc / fit_lambda + + # Run the uniform fit + best_fit, residual, best_values, best_errors, redchi, success = \ + FitMultipeak.fit_multipeak(fit_mca_intensities, + fit_E0, + x=fit_mca_energies, + fit_type='uniform') + + # Extract values of interest from the best values for the uniform fit + # parameters + uniform_fit_centers = [best_values[f'peak{i+1}_center'] for i in range(len(calibration_config.fit_hkls))] + # uniform_a = best_values['scale_factor'] + # uniform_strain = np.log(uniform_a / calibration_config.lattice_parameter_angstrom) + # uniform_tth = tth * (1.0 + uniform_strain) + # uniform_rel_rms_error = np.linalg.norm(residual) / np.linalg.norm(fit_mca_intensities) + + ### Next, perform the unconstrained fit ### + + # Use the peak locations found in the uniform fit as the initial + # guesses for peak locations in the unconstrained fit + best_fit, residual, best_values, best_errors, redchi, success = \ + FitMultipeak.fit_multipeak(fit_mca_intensities, + uniform_fit_centers, + x=fit_mca_energies, + fit_type='unconstrained') + + # Extract values of interest from the best values for the + # unconstrained fit parameters + unconstrained_fit_centers = np.array([best_values[f'peak{i+1}_center'] for i in range(len(calibration_config.fit_hkls))]) + unconstrained_a = 0.5 * hc * np.sqrt(c_1) / (unconstrained_fit_centers * abs(np.sin(0.5*np.radians(tth)))) + unconstrained_strains = np.log(unconstrained_a / calibration_config.lattice_parameter_angstrom) + unconstrained_strain = np.mean(unconstrained_strains) + unconstrained_tth = tth * (1.0 + unconstrained_strain) + # unconstrained_rel_rms_error = np.linalg.norm(residual) / np.linalg.norm(fit_mca_intensities) + + + # Update tth for the next iteration of tuning + prev_tth = tth + tth = unconstrained_tth + + # Stop tuning tth at this iteration if differences are small enough + if abs(tth - prev_tth) < calibration_config.tune_tth_tol: + break + + # Fit line to expected / computed peak locations from the last + # unconstrained fit. + fit = Fit.fit_data(fit_E0,'linear', x=unconstrained_fit_centers, nan_policy='omit') + slope = fit.best_values['slope'] + intercept = fit.best_values['intercept'] + + return(float(tth), float(slope), float(intercept)) + +class MCADataProcessor(Processor): + '''Class representing a process to return data from a MCA, restuctured to + incorporate the shape & metadata associated with a map configuration to + which the MCA data belongs, and linearly transformed according to the + results of a ceria calibration. + ''' + + def _process(self, data): + '''Process configurations for a map and MCA detector(s), and return the + raw MCA data collected over the map. + + :param data: input map configuration and results of ceria calibration + :type data: list[dict[str,object]] + :return: calibrated and flux-corrected MCA data + :rtype: nexusformat.nexus.NXentry + ''' + + map_config, calibration_config = self.get_configs(data) + nxroot = self.get_nxroot(map_config, calibration_config) + + return(nxroot) + + def get_configs(self, data): + '''Get instances of the configuration objects needed by this + `Processor` from a returned value of `Reader.read` + + :param data: Result of `Reader.read` where at least one item has the + value `'MapConfig'` for the `'schema'` key, and at least one item has + the value `'MCACeriaCalibrationConfig'` for the `'schema'` key. + :type data: list[dict[str,object]] + :raises Exception: If valid config objects cannot be constructed from `data`. + :return: valid instances of the configuration objects with field values + taken from `data`. + :rtype: tuple[MapConfig, MCACeriaCalibrationConfig] + ''' + + from CHAP.models.map import MapConfig + from CHAP.models.edd import MCACeriaCalibrationConfig + + map_config = False + calibration_config = False + if isinstance(data, list): + for item in data: + if isinstance(item, dict): + schema = item.get('schema') + if schema == 'MapConfig': + map_config = item.get('data') + elif schema == 'MCACeriaCalibrationConfig': + calibration_config = item.get('data') + + if not map_config: + raise(ValueError('No map configuration found in input data')) + if not calibration_config: + raise(ValueError('No MCA ceria calibration configuration found in input data')) + + return(MapConfig(**map_config), MCACeriaCalibrationConfig(**calibration_config)) + + def get_nxroot(self, map_config, calibration_config): + '''Get a map of the MCA data collected by the scans in `map_config`. The + MCA data will be calibrated and flux-corrected according to the + parameters included in `calibration_config`. The data will be returned + along with relevant metadata in the form of a NeXus structure. + + :param map_config: the map configuration + :type map_config: MapConfig + :param calibration_config: the calibration configuration + :type calibration_config: MCACeriaCalibrationConfig + :return: a map of the calibrated and flux-corrected MCA data + :rtype: nexusformat.nexus.NXroot + ''' + + from nexusformat.nexus import (NXdata, + NXdetector, + NXentry, + NXinstrument, + NXroot) + import numpy as np + + nxroot = NXroot() + + nxroot[map_config.title] = MapProcessor.get_nxentry(map_config) + nxentry = nxroot[map_config.title] + + nxentry.instrument = NXinstrument() + nxentry.instrument.detector = NXdetector() + nxentry.instrument.detector.calibration_configuration = json.dumps(calibration_config.dict()) + + nxentry.instrument.detector.data = NXdata() + nxdata = nxentry.instrument.detector.data + nxdata.raw = np.empty((*map_config.shape, calibration_config.num_bins)) + nxdata.raw.attrs['units'] = 'counts' + nxdata.channel_energy = calibration_config.slope_calibrated * \ + np.arange(0, calibration_config.num_bins) * \ + (calibration_config.max_energy_kev / calibration_config.num_bins) + \ + calibration_config.intercept_calibrated + nxdata.channel_energy.attrs['units'] = 'keV' + + for scans in map_config.spec_scans: + for scan_number in scans.scan_numbers: + scanparser = scans.get_scanparser(scan_number) + for scan_step_index in range(scanparser.spec_scan_npts): + map_index = scans.get_index(scan_number, scan_step_index, map_config) + nxdata.raw[map_index] = scanparser.get_detector_data(calibration_config.detector_name, scan_step_index) + + nxentry.data.makelink(nxdata.raw, name=calibration_config.detector_name) + nxentry.data.makelink(nxdata.channel_energy, name=f'{calibration_config.detector_name}_channel_energy') + if isinstance(nxentry.data.attrs['axes'], str): + nxentry.data.attrs['axes'] = [nxentry.data.attrs['axes'], f'{calibration_config.detector_name}_channel_energy'] + else: + nxentry.data.attrs['axes'] += [f'{calibration_config.detector_name}_channel_energy'] + nxentry.data.attrs['signal'] = calibration_config.detector_name + + return(nxroot) + +class StrainAnalysisProcessor(Processor): + '''Class representing a process to compute a map of sample strains by fitting + bragg peaks in 1D detector data and analyzing the difference between measured + peak locations and expected peak locations for the sample measured. + ''' + + def _process(self, data): + '''Process the input map detector data & configuration for the strain + analysis procedure, and return a map of sample strains. + + :param data: results of `MutlipleReader.read` containing input map + detector data and strain analysis configuration + :type data: dict[list[str,object]] + :return: map of sample strains + :rtype: xarray.Dataset + ''' + + strain_analysis_config = self.get_config(data) + + return(data) + + def get_config(self, data): + '''Get instances of the configuration objects needed by this + `Processor` from a returned value of `Reader.read` + + :param data: Result of `Reader.read` where at least one item has the + value `'StrainAnalysisConfig'` for the `'schema'` key. + :type data: list[dict[str,object]] + :raises Exception: If valid config objects cannot be constructed from `data`. + :return: valid instances of the configuration objects with field values + taken from `data`. + :rtype: StrainAnalysisConfig + ''' + + strain_analysis_config = False + if isinstance(data, list): + for item in data: + if isinstance(item, dict): + schema = item.get('schema') + if item.get('schema') == 'StrainAnalysisConfig': + strain_analysis_config = item.get('data') + + if not strain_analysis_config: + raise(ValueError('No strain analysis configuration found in input data')) + + return(strain_analysis_config) + + +class OptionParser(): + '''User based option parser''' + def __init__(self): + self.parser = argparse.ArgumentParser(prog='PROG') + self.parser.add_argument("--data", action="store", + dest="data", default="", help="Input data") + self.parser.add_argument("--processor", action="store", + dest="processor", default="Processor", help="Processor class name") + self.parser.add_argument('--log-level', choices=logging._nameToLevel.keys(), + dest='log_level', default='INFO', help='logging level') + +def main(): + '''Main function''' + optmgr = OptionParser() + opts = optmgr.parser.parse_args() + clsName = opts.processor + try: + processorCls = getattr(sys.modules[__name__],clsName) + except: + print(f'Unsupported processor {clsName}') + sys.exit(1) + + processor = processorCls() + processor.logger.setLevel(getattr(logging, opts.log_level)) + log_handler = logging.StreamHandler() + log_handler.setFormatter(logging.Formatter('{name:20}: {message}', style='{')) + processor.logger.addHandler(log_handler) + data = processor.process(opts.data) + + print(f"Processor {processor} operates on data {data}") + +if __name__ == '__main__': + main()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/build/bdist.linux-x86_64/egg/CHAP/reader.py Tue Mar 28 15:07:30 2023 +0000 @@ -0,0 +1,209 @@ +#!/usr/bin/env python +""" +File : reader.py +Author : Valentin Kuznetsov <vkuznet AT gmail dot com> +Description: generic Reader module +""" + +# system modules +import argparse +import json +import logging +import sys +from time import time + +# local modules +# from pipeline import PipelineObject + +class Reader(): + """ + Reader represent generic file writer + """ + + def __init__(self): + """ + Constructor of Reader class + """ + self.__name__ = self.__class__.__name__ + self.logger = logging.getLogger(self.__name__) + self.logger.propagate = False + + def read(self, type_=None, schema=None, encoding=None, **_read_kwargs): + '''Read API + + Wrapper to read, format, and return the data requested. + + :param type_: the expected type of data read from `filename`, defualts + to `None` + :type type_: type, optional + :param schema: the expected schema of the data read from `filename`, + defaults to `None` + :type schema: str, otional + :param _read_kwargs: keyword arguments to pass to `self._read`, defaults + to `{}` + :type _read_kwargs: dict, optional + :return: list with one item: a dictionary containing the data read from + `filename`, the name of this `Reader`, and the values of `type_` and + `schema`. + :rtype: list[dict[str,object]] + ''' + + t0 = time() + self.logger.info(f'Executing "read" with type={type_}, schema={schema}, kwargs={_read_kwargs}') + + data = [{'name': self.__name__, + 'data': self._read(**_read_kwargs), + 'type': type_, + 'schema': schema, + 'encoding': encoding}] + + self.logger.info(f'Finished "read" in {time()-t0:.3f} seconds\n') + return(data) + + def _read(self, filename): + '''Read and return the data from requested from `filename` + + :param filename: Name of file to read from + :return: specific number of bytes from a file + ''' + + if not filename: + self.logger.warning('No file name is given, will skip read operation') + return None + + with open(filename) as file: + data = file.read() + return(data) + +class MultipleReader(Reader): + def read(self, readers): + '''Return resuts from multiple `Reader`s. + + :param readers: a dictionary where the keys are specific names that are + used by the next item in the `Pipeline`, and the values are `Reader` + configurations. + :type readers: list[dict] + :return: The results of calling `Reader.read(**kwargs)` for each item + configured in `readers`. + :rtype: list[dict[str,object]] + ''' + + t0 = time() + self.logger.info(f'Executing "read" with {len(readers)} Readers') + + data = [] + for reader_config in readers: + reader_name = list(reader_config.keys())[0] + reader_class = getattr(sys.modules[__name__], reader_name) + reader = reader_class() + reader_kwargs = reader_config[reader_name] + + data.extend(reader.read(**reader_kwargs)) + + self.logger.info(f'Finished "read" in {time()-t0:.3f} seconds\n') + + return(data) + +class YAMLReader(Reader): + def _read(self, filename): + '''Return a dictionary from the contents of a yaml file. + + :param filename: name of the YAML file to read from + :return: the contents of `filename` + :rtype: dict + ''' + + import yaml + + with open(filename) as file: + data = yaml.safe_load(file) + return(data) + +class BinaryFileReader(Reader): + def _read(self, filename): + '''Return a content of a given file name + + :param filename: name of the binart file to read from + :return: the content of `filename` + :rtype: binary + ''' + with open(filename, 'rb') as file: + data = file.read() + return(data) + +class NexusReader(Reader): + def _read(self, filename, nxpath='/'): + '''Return the NeXus object stored at `nxpath` in the nexus file + `filename`. + + :param filename: name of the NeXus file to read from + :type filename: str + :param nxpath: path to a specific loaction in the NeXus file to read + from, defaults to `'/'` + :type nxpath: str, optional + :raises nexusformat.nexus.NeXusError: if `filename` is not a NeXus + file or `nxpath` is not in `filename`. + :return: the NeXus structure indicated by `filename` and `nxpath`. + :rtype: nexusformat.nexus.NXobject + ''' + + from nexusformat.nexus import nxload + + nxobject = nxload(filename)[nxpath] + return(nxobject) + +class URLReader(Reader): + def _read(self, url, headers={}): + '''Make an HTTPS request to the provided URL and return the results. + Headers for the request are optional. + + :param url: the URL to read + :type url: str + :param headers: headers to attach to the request, defaults to `{}` + :type headers: dict, optional + :return: the content of the response + :rtype: object + ''' + + import requests + + resp = requests.get(url, headers=headers) + data = resp.content + + self.logger.debug(f'Response content: {data}') + + return(data) + +class OptionParser(): + '''User based option parser''' + def __init__(self): + self.parser = argparse.ArgumentParser(prog='PROG') + self.parser.add_argument("--filename", action="store", + dest="filename", default="", help="Input file") + self.parser.add_argument("--reader", action="store", + dest="reader", default="Reader", help="Reader class name") + self.parser.add_argument('--log-level', choices=logging._nameToLevel.keys(), + dest='log_level', default='INFO', help='logging level') + +def main(): + '''Main function''' + optmgr = OptionParser() + opts = optmgr.parser.parse_args() + clsName = opts.reader + try: + readerCls = getattr(sys.modules[__name__],clsName) + except: + print(f'Unsupported reader {clsName}') + sys.exit(1) + + reader = readerCls() + reader.logger.setLevel(getattr(logging, opts.log_level)) + log_handler = logging.StreamHandler() + log_handler.setFormatter(logging.Formatter('{name:20}: {message}', style='{')) + reader.logger.addHandler(log_handler) + data = reader.read(filename=opts.filename) + + print(f"Reader {reader} reads from {opts.filename}, data {data}") + +if __name__ == '__main__': + main()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/build/bdist.linux-x86_64/egg/CHAP/runner.py Tue Mar 28 15:07:30 2023 +0000 @@ -0,0 +1,82 @@ +#!/usr/bin/env python +#-*- coding: utf-8 -*- +#pylint: disable= +""" +File : runner.py +Author : Valentin Kuznetsov <vkuznet AT gmail dot com> +Description: +""" + +# system modules +import argparse +import logging +import os +import sys +import yaml + +# local modules +from CHAP.pipeline import Pipeline + + +class OptionParser(): + def __init__(self): + "User based option parser" + self.parser = argparse.ArgumentParser(prog='PROG') + self.parser.add_argument("--config", action="store", + dest="config", default="", help="Input configuration file") + self.parser.add_argument('--log-level', choices=logging._nameToLevel.keys(), + dest='log_level', default='INFO', help='logging level') + +def main(): + "Main function" + optmgr = OptionParser() + opts = optmgr.parser.parse_args() + runner(opts) + +def runner(opts): + """ + Main runner function + + :param opts: opts is an instance of argparse.Namespace which contains all input parameters + """ + + logger = logging.getLogger(__name__) + log_level = getattr(logging, opts.log_level.upper()) + logger.setLevel(log_level) + log_handler = logging.StreamHandler() + log_handler.setFormatter(logging.Formatter('{name:20}: {message}', style='{')) + logger.addHandler(log_handler) + + config = {} + with open(opts.config) as file: + config = yaml.safe_load(file) + logger.info(f'Input configuration: {config}\n') + pipeline_config = config.get('pipeline', []) + objects = [] + kwds = [] + for item in pipeline_config: + # load individual object with given name from its module + if isinstance(item, dict): + name = list(item.keys())[0] + kwargs = item[name] + else: + name = item + kwargs = {} + modName, clsName = name.split('.') + module = __import__(f'CHAP.{modName}') + obj = getattr(module, clsName)() + obj.logger.setLevel(log_level) + obj.logger.addHandler(log_handler) + logger.info(f'Loaded {obj}') + objects.append(obj) + kwds.append(kwargs) + pipeline = Pipeline(objects, kwds) + pipeline.logger.setLevel(log_level) + pipeline.logger.addHandler(log_handler) + logger.info(f'Loaded {pipeline} with {len(objects)} items\n') + logger.info(f'Calling "execute" on {pipeline}') + pipeline.execute() + + +if __name__ == '__main__': + main()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/build/bdist.linux-x86_64/egg/CHAP/writer.py Tue Mar 28 15:07:30 2023 +0000 @@ -0,0 +1,243 @@ +#!/usr/bin/env python +""" +File : writer.py +Author : Valentin Kuznetsov <vkuznet AT gmail dot com> +Description: generic Writer module +""" + +# system modules +import argparse +import json +import logging +import os +import sys +from time import time + +# local modules +# from pipeline import PipelineObject + +class Writer(): + """ + Writer represent generic file writer + """ + + def __init__(self): + """ + Constructor of Writer class + """ + self.__name__ = self.__class__.__name__ + self.logger = logging.getLogger(self.__name__) + self.logger.propagate = False + + def write(self, data, filename, **_write_kwargs): + """ + write API + + :param filename: Name of file to write to + :param data: data to write to file + :return: data written to file + """ + + t0 = time() + self.logger.info(f'Executing "write" with filename={filename}, type(data)={type(data)}, kwargs={_write_kwargs}') + + data = self._write(data, filename, **_write_kwargs) + + self.logger.info(f'Finished "write" in {time()-t0:.3f} seconds\n') + + return(data) + + def _write(self, data, filename): + with open(filename, 'a') as file: + file.write(data) + return(data) + +class YAMLWriter(Writer): + def _write(self, data, filename, force_overwrite=False): + '''If `data` is a `dict`, write it to `filename`. + + :param data: the dictionary to write to `filename`. + :type data: dict + :param filename: name of the file to write to. + :type filename: str + :param force_overwrite: flag to allow data in `filename` to be + overwritten if it already exists. + :type force_overwrite: bool + :raises TypeError: if `data` is not a `dict` + :raises RuntimeError: if `filename` already exists and + `force_overwrite` is `False`. + :return: the original input data + :rtype: dict + ''' + + import yaml + + if not isinstance(data, (dict, list)): + raise(TypeError(f'{self.__name__}.write: input data must be a dict or list.')) + + if not force_overwrite: + if os.path.isfile(filename): + raise(RuntimeError(f'{self.__name__}: {filename} already exists.')) + + with open(filename, 'w') as outf: + yaml.dump(data, outf, sort_keys=False) + + return(data) + +class ExtractArchiveWriter(Writer): + def _write(self, data, filename): + '''Take a .tar archive represented as bytes in `data` and write the + extracted archive to files. + + :param data: the archive data + :type data: bytes + :param filename: the name of a directory to which the archive files will + be written + :type filename: str + :return: the original `data` + :rtype: bytes + ''' + + from io import BytesIO + import tarfile + + tar = tarfile.open(fileobj=BytesIO(data)) + tar.extractall(path=filename) + + return(data) + + +class NexusWriter(Writer): + def _write(self, data, filename, force_overwrite=False): + '''Write `data` to a NeXus file + + :param data: the data to write to `filename`. + :param filename: name of the file to write to. + :param force_overwrite: flag to allow data in `filename` to be + overwritten, if it already exists. + :return: the original input data + ''' + + from nexusformat.nexus import NXobject + import xarray as xr + + if isinstance(data, NXobject): + nxstructure = data + + elif isinstance(data, xr.Dataset): + nxstructure = self.get_nxdata_from_dataset(data) + + elif isinstance(data, xr.DataArray): + nxstructure = self.get_nxdata_from_dataarray(data) + + else: + raise(TypeError(f'{self.__name__}.write: unknown data format: {type(data).__name__}')) + + mode = 'w' if force_overwrite else 'w-' + nxstructure.save(filename, mode=mode) + + return(data) + + + def get_nxdata_from_dataset(self, dset): + '''Return an instance of `nexusformat.nexus.NXdata` that represents the + data and metadata attributes contained in `dset`. + + :param dset: the input dataset to represent + :type data: xarray.Dataset + :return: `dset` represented as an instance of `nexusformat.nexus.NXdata` + :rtype: nexusformat.nexus.NXdata + ''' + + from nexusformat.nexus import NXdata, NXfield + + nxdata_args = {'signal':None, 'axes':()} + + for var in dset.data_vars: + data_var = dset[var] + nxfield = NXfield(data_var.data, + name=data_var.name, + attrs=data_var.attrs) + if nxdata_args['signal'] is None: + nxdata_args['signal'] = nxfield + else: + nxdata_args[var] = nxfield + + for coord in dset.coords: + coord_var = dset[coord] + nxfield = NXfield(coord_var.data, + name=coord_var.name, + attrs=coord_var.attrs) + nxdata_args['axes'] = (*nxdata_args['axes'], nxfield) + + nxdata = NXdata(**nxdata_args) + nxdata.attrs['xarray_attrs'] = json.dumps(dset.attrs) + + return(nxdata) + + def get_nxdata_from_dataarray(self, darr): + '''Return an instance of `nexusformat.nexus.NXdata` that represents the + data and metadata attributes contained in `darr`. + + :param darr: the input dataset to represent + :type darr: xarray.DataArray + :return: `darr` represented as an instance of `nexusformat.nexus.NXdata` + :rtype: nexusformat.nexus.NXdata + ''' + + from nexusformat.nexus import NXdata, NXfield + + nxdata_args = {'signal':None, 'axes':()} + + nxdata_args['signal'] = NXfield(darr.data, + name=darr.name, + attrs=darr.attrs) + + + for coord in darr.coords: + coord_var = darr[coord] + nxfield = NXfield(coord_var.data, + name=coord_var.name, + attrs=coord_var.attrs) + nxdata_args['axes'] = (*nxdata_args['axes'], nxfield) + + nxdata = NXdata(**nxdata_args) + nxdata.attrs['xarray_attrs'] = json.dumps(darr.attrs) + + return(nxdata) + + +class OptionParser(): + '''User based option parser''' + def __init__(self): + self.parser = argparse.ArgumentParser(prog='PROG') + self.parser.add_argument("--data", action="store", + dest="data", default="", help="Input data") + self.parser.add_argument("--filename", action="store", + dest="filename", default="", help="Output file") + self.parser.add_argument("--writer", action="store", + dest="writer", default="Writer", help="Writer class name") + self.parser.add_argument('--log-level', choices=logging._nameToLevel.keys(), + dest='log_level', default='INFO', help='logging level') + +def main(): + '''Main function''' + optmgr = OptionParser() + opts = optmgr.parser.parse_args() + clsName = opts.writer + try: + writerCls = getattr(sys.modules[__name__],clsName) + except: + print(f'Unsupported writer {clsName}') + sys.exit(1) + + writer = writerCls() + writer.logger.setLevel(getattr(logging, opts.log_level)) + log_handler = logging.StreamHandler() + log_handler.setFormatter(logging.Formatter('{name:20}: {message}', style='{')) + writer.logger.addHandler(log_handler) + data = writer.write(opts.data, opts.filename) + print(f"Writer {writer} writes to {opts.filename}, data {data}") + +if __name__ == '__main__': + main()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/build/bdist.linux-x86_64/egg/MLaaS/ktrain.py Tue Mar 28 15:07:30 2023 +0000 @@ -0,0 +1,205 @@ +#!/usr/bin/env python +#-*- coding: utf-8 -*- +#pylint: disable= +""" +File : ktrain.py +Author : Valentin Kuznetsov <vkuznet AT gmail dot com> +Description: Keras based ML network to train over MNIST dataset +""" + +# system modules +import os +import sys +import json +import gzip +import pickle +import argparse + +# third-party modules +import numpy as np +import tensorflow as tf +from tensorflow import keras +from tensorflow.keras import layers +from tensorflow.keras import backend as K +from tensorflow.python.tools import saved_model_utils + + +def modelGraph(model_dir): + """ + Provide input/output names used by TF Graph along with graph itself + The code is based on TF saved_model_cli.py script. + """ + input_names = [] + output_names = [] + tag_sets = saved_model_utils.get_saved_model_tag_sets(model_dir) + for tag_set in sorted(tag_sets): + print('%r' % ', '.join(sorted(tag_set))) + meta_graph_def = saved_model_utils.get_meta_graph_def(model_dir, tag_set[0]) + for key in meta_graph_def.signature_def.keys(): + meta = meta_graph_def.signature_def[key] + if hasattr(meta, 'inputs') and hasattr(meta, 'outputs'): + inputs = meta.inputs + outputs = meta.outputs + input_signatures = list(meta.inputs.values()) + input_names = [signature.name for signature in input_signatures] + if len(input_names) > 0: + output_signatures = list(meta.outputs.values()) + output_names = [signature.name for signature in output_signatures] + return input_names, output_names, meta_graph_def + +def readData(fin, num_classes): + """ + Helper function to read MNIST data and provide it to + upstream code, e.g. to the training layer + """ + # Load the data and split it between train and test sets +# (x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data() + f = gzip.open(fin, 'rb') + if sys.version_info < (3,): + mnist_data = pickle.load(f) + else: + mnist_data = pickle.load(f, encoding='bytes') + f.close() + (x_train, y_train), (x_test, y_test) = mnist_data + + # Scale images to the [0, 1] range + x_train = x_train.astype("float32") / 255 + x_test = x_test.astype("float32") / 255 + # Make sure images have shape (28, 28, 1) + x_train = np.expand_dims(x_train, -1) + x_test = np.expand_dims(x_test, -1) + print("x_train shape:", x_train.shape) + print(x_train.shape[0], "train samples") + print(x_test.shape[0], "test samples") + + + # convert class vectors to binary class matrices + y_train = keras.utils.to_categorical(y_train, num_classes) + y_test = keras.utils.to_categorical(y_test, num_classes) + return x_train, y_train, x_test, y_test + + +def train(fin, fout=None, model_name=None, epochs=1, batch_size=128, h5=False): + """ + train function for MNIST + """ + # Model / data parameters + num_classes = 10 + input_shape = (28, 28, 1) + + # create ML model + model = keras.Sequential( + [ + keras.Input(shape=input_shape), + layers.Conv2D(32, kernel_size=(3, 3), activation="relu"), + layers.MaxPooling2D(pool_size=(2, 2)), + layers.Conv2D(64, kernel_size=(3, 3), activation="relu"), + layers.MaxPooling2D(pool_size=(2, 2)), + layers.Flatten(), + layers.Dropout(0.5), + layers.Dense(num_classes, activation="softmax"), + ] + ) + + model.summary() + print("model input", model.input, type(model.input), model.input.__dict__) + print("model output", model.output, type(model.output), model.output.__dict__) + model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"]) + + # train model + x_train, y_train, x_test, y_test = readData(fin, num_classes) + model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_split=0.1) + + # evaluate trained model + score = model.evaluate(x_test, y_test, verbose=0) + print("Test loss:", score[0]) + print("Test accuracy:", score[1]) + print("save model to", fout) + writer(fout, model_name, model, input_shape, h5) + +def writer(fout, model_name, model, input_shape, h5=False): + """ + Writer provide write function for given model + """ + if not fout: + return + model.save(fout) + if h5: + model.save('{}/{}'.format(fout, h5), save_format='h5') + pbModel = '{}/saved_model.pb'.format(fout) + pbtxtModel = '{}/saved_model.pbtxt'.format(fout) + convert(pbModel, pbtxtModel) + + # get meta-data information about our ML model + input_names, output_names, model_graph = modelGraph(model_name) + print("### input", input_names) + print("### output", output_names) + # ML uses (28,28,1) shape, i.e. 28x28 black-white images + # if we'll use color images we'll use shape (28, 28, 3) + img_channels = input_shape[2] # last item represent number of colors + meta = {'name': model_name, + 'model': 'saved_model.pb', + 'labels': 'labels.txt', + 'img_channels': img_channels, + 'input_name': input_names[0].split(':')[0], + 'output_name': output_names[0].split(':')[0], + 'input_node': model.input.name, + 'output_node': model.output.name + } + with open(fout+'/params.json', 'w') as ostream: + ostream.write(json.dumps(meta)) + with open(fout+'/labels.txt', 'w') as ostream: + for i in range(0, 10): + ostream.write(str(i)+'\n') + with open(fout + '/model.graph', 'wb') as ostream: + ostream.write(model_graph.SerializeToString()) + +def convert(fin, fout): + """ + convert input model.pb into output model.pbtxt + Based on internet search: + - https://www.tensorflow.org/guide/saved_model + - https://www.programcreek.com/python/example/123317/tensorflow.core.protobuf.saved_model_pb2.SavedModel + """ + import google.protobuf + from tensorflow.core.protobuf import saved_model_pb2 + import tensorflow as tf + + saved_model = saved_model_pb2.SavedModel() + + with open(fin, 'rb') as f: + saved_model.ParseFromString(f.read()) + + with open(fout, 'w') as f: + f.write(google.protobuf.text_format.MessageToString(saved_model)) + + +class OptionParser(): + def __init__(self): + "User based option parser" + self.parser = argparse.ArgumentParser(prog='PROG') + self.parser.add_argument("--fin", action="store", + dest="fin", default="", help="Input MNIST file") + self.parser.add_argument("--fout", action="store", + dest="fout", default="", help="Output models area") + self.parser.add_argument("--model", action="store", + dest="model", default="mnist", help="model name") + self.parser.add_argument("--epochs", action="store", + dest="epochs", default=1, help="number of epochs to use in ML training") + self.parser.add_argument("--batch_size", action="store", + dest="batch_size", default=128, help="batch size to use in training") + self.parser.add_argument("--h5", action="store", + dest="h5", default="mnist", help="h5 model file name") + +def main(): + "Main function" + optmgr = OptionParser() + opts = optmgr.parser.parse_args() + train(opts.fin, opts.fout, + model_name=opts.model, + epochs=opts.epochs, + batch_size=opts.batch_size, + h5=opts.h5) + +if __name__ == '__main__': + main()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/build/bdist.linux-x86_64/egg/MLaaS/mnist_img.py Tue Mar 28 15:07:30 2023 +0000 @@ -0,0 +1,83 @@ +#!/usr/bin/env python +#-*- coding: utf-8 -*- +#pylint: disable= +""" +File : mnist_img.py +Author : Valentin Kuznetsov <vkuznet AT gmail dot com> +Description: +""" + +import json +import gzip +import argparse +# from itertools import chain + +import numpy as np +import matplotlib.pyplot as plt + + +def readImage(fname, fout, num_images=5, imgId=2): + """ + Helper function to read MNIST image + """ + image_size = 28 + with gzip.open(fname, 'r') as fstream: + fstream.read(16) + buf = fstream.read(image_size * image_size * num_images) + data = np.frombuffer(buf, dtype=np.uint8).astype(np.float32) + data = data.reshape(num_images, image_size, image_size, 1) + image = np.asarray(data[imgId]).squeeze() + plt.imsave(fout, image) + print("read:", fname, "wrote:", fout, "image:", type(image), "shape:", image.shape) + +def img2json(image): + """ + Convert given image to JSON data format used by TFaaS + """ + # values = [int(i) for i in list(chain.from_iterable(image))] + # values = image.tolist() + values = [] + for row in image.tolist(): + row = [int(i) for i in row] + vals = [[i] for i in row] + values.append(vals) + # final values should be an array of elements, e.g. single image representation + values = [values] + keys = [str(i) for i in range(0, 10)] + meta = { + 'keys': keys, + 'values': values, + 'model': 'mnist' + } + with open('img.json', 'w') as ostream: + ostream.write(json.dumps(meta)) + + +class OptionParser(): + def __init__(self): + "User based option parser" + fname = "train-images-idx3-ubyte.gz" + self.parser = argparse.ArgumentParser(prog='PROG') + self.parser.add_argument("--fin", action="store", + dest="fin", default=fname, help=f"Input MNIST file, default {fname}") + self.parser.add_argument("--fout", action="store", + dest="fout", default="img.png", help="Output image fila name, default img.png") + self.parser.add_argument("--nimages", action="store", + dest="nimages", default=5, help="number of images to read, default 5") + self.parser.add_argument("--imgid", action="store", + dest="imgid", default=2, help="image index to use from nimages, default 2 (number 4)") + +def main(): + """ + main function to produce image file from mnist dataset. + MNIST dataset can be downloaded from + curl -O http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz + """ + optmgr = OptionParser() + opts = optmgr.parser.parse_args() + num_images = int(opts.nimages) + imgId = int(opts.imgid) + img = readImage(opts.fin, opts.fout, num_images, imgId) + +if __name__ == '__main__': + main()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/build/bdist.linux-x86_64/egg/MLaaS/tfaas_client.py Tue Mar 28 15:07:30 2023 +0000 @@ -0,0 +1,371 @@ +#!/usr/bin/env python +#-*- coding: utf-8 -*- +#pylint: disable= +""" +File : tfaas_client.py +Author : Valentin Kuznetsov <vkuznet AT gmail dot com> +Description: simple python client to communicate with TFaaS server +""" + +# system modules +import os +import sys +import pwd +import ssl +import json +import binascii +import argparse +import itertools +import mimetypes +if sys.version_info < (2, 7): + raise Exception("TFaaS client requires python 2.7 or greater") +# python 3 +if sys.version.startswith('3.'): + import urllib.request as urllib2 + import urllib.parse as urllib + import http.client as httplib + import http.cookiejar as cookielib +else: + import mimetools + import urllib + import urllib2 + import httplib + import cookielib + +TFAAS_CLIENT = 'tfaas-client/1.1::python/%s.%s' % sys.version_info[:2] + +class OptionParser(): + def __init__(self): + "User based option parser" + self.parser = argparse.ArgumentParser(prog='PROG') + self.parser.add_argument("--url", action="store", + dest="url", default="", help="TFaaS URL") + self.parser.add_argument("--upload", action="store", + dest="upload", default="", help="upload model to TFaaS") + self.parser.add_argument("--bundle", action="store", + dest="bundle", default="", help="upload bundle ML files to TFaaS") + self.parser.add_argument("--predict", action="store", + dest="predict", default="", help="fetch prediction from TFaaS") + self.parser.add_argument("--image", action="store", + dest="image", default="", help="fetch prediction for given image") + self.parser.add_argument("--model", action="store", + dest="model", default="", help="TF model to use") + self.parser.add_argument("--delete", action="store", + dest="delete", default="", help="delete model in TFaaS") + self.parser.add_argument("--models", action="store_true", + dest="models", default=False, help="show existing models in TFaaS") + self.parser.add_argument("--verbose", action="store_true", + dest="verbose", default=False, help="verbose output") + msg = 'specify private key file name, default $X509_USER_PROXY' + self.parser.add_argument("--key", action="store", + default=x509(), dest="ckey", help=msg) + msg = 'specify private certificate file name, default $X509_USER_PROXY' + self.parser.add_argument("--cert", action="store", + default=x509(), dest="cert", help=msg) + default_ca = os.environ.get("X509_CERT_DIR") + if not default_ca or not os.path.exists(default_ca): + default_ca = "/etc/grid-security/certificates" + if not os.path.exists(default_ca): + default_ca = "" + if default_ca: + msg = 'specify CA path, default currently is %s' % default_ca + else: + msg = 'specify CA path; defaults to system CAs.' + self.parser.add_argument("--capath", action="store", + default=default_ca, dest="capath", help=msg) + msg = 'specify number of retries upon busy DAS server message' + +class HTTPSClientAuthHandler(urllib2.HTTPSHandler): + """ + Simple HTTPS client authentication class based on provided + key/ca information + """ + def __init__(self, key=None, cert=None, capath=None, level=0): + if level > 0: + urllib2.HTTPSHandler.__init__(self, debuglevel=1) + else: + urllib2.HTTPSHandler.__init__(self) + self.key = key + self.cert = cert + self.capath = capath + + def https_open(self, req): + """Open request method""" + #Rather than pass in a reference to a connection class, we pass in + # a reference to a function which, for all intents and purposes, + # will behave as a constructor + return self.do_open(self.get_connection, req) + + def get_connection(self, host, timeout=300): + """Connection method""" + if self.key and self.cert and not self.capath: + return httplib.HTTPSConnection(host, key_file=self.key, + cert_file=self.cert) + elif self.cert and self.capath: + context = ssl.SSLContext(ssl.PROTOCOL_TLSv1) + context.load_verify_locations(capath=self.capath) + context.load_cert_chain(self.cert) + return httplib.HTTPSConnection(host, context=context) + return httplib.HTTPSConnection(host) + +def x509(): + "Helper function to get x509 either from env or tmp file" + proxy = os.environ.get('X509_USER_PROXY', '') + if not proxy: + proxy = '/tmp/x509up_u%s' % pwd.getpwuid( os.getuid() ).pw_uid + if not os.path.isfile(proxy): + return '' + return proxy + +def check_auth(key): + "Check if user runs das_client with key/cert and warn users to switch" + if not key: + msg = "WARNING: tfaas_client is running without user credentials/X509 proxy, create proxy via 'voms-proxy-init -voms cms -rfc'" + print(msg) + +def fullpath(path): + "Expand path to full path" + if path and path[0] == '~': + path = path.replace('~', '') + path = path[1:] if path[0] == '/' else path + path = os.path.join(os.environ['HOME'], path) + return path + +def choose_boundary(): + """ + Helper function to replace deprecated mimetools.choose_boundary + https://stackoverflow.com/questions/27099290/where-is-mimetools-choose-boundary-function-in-python3 + https://docs.python.org/2.7/library/mimetools.html?highlight=choose_boundary#mimetools.choose_boundary + >>> mimetools.choose_boundary() + '192.168.1.191.502.42035.1678979116.376.1' + """ + # we will return any random string + import uuid + return str(uuid.uuid4()) + +# credit: https://pymotw.com/2/urllib2/#uploading-files +class MultiPartForm(object): + """Accumulate the data to be used when posting a form.""" + + def __init__(self): + self.form_fields = [] + self.files = [] + if sys.version.startswith('3.'): + self.boundary = choose_boundary() + else: + self.boundary = mimetools.choose_boundary() + return + + def get_content_type(self): + return 'multipart/form-data; boundary=%s' % self.boundary + + def add_field(self, name, value): + """Add a simple field to the form data.""" + self.form_fields.append((name, value)) + return + + def add_file(self, fieldname, filename, fileHandle, mimetype=None): + """Add a file to be uploaded.""" + body = fileHandle.read() + if mimetype is None: + mimetype = mimetypes.guess_type(filename)[0] or 'application/octet-stream' + if mimetype == 'application/octet-stream': + body = binascii.b2a_base64(body) +# if isinstance(body, bytes): +# body = body.decode("utf-8") + self.files.append((fieldname, filename, mimetype, body)) + return + + def __str__(self): + """Return a string representing the form data, including attached files.""" + # Build a list of lists, each containing "lines" of the + # request. Each part is separated by a boundary string. + # Once the list is built, return a string where each + # line is separated by '\r\n'. + parts = [] + part_boundary = '--' + self.boundary + + # Add the form fields + parts.extend( + [ part_boundary, + 'Content-Disposition: form-data; name="%s"' % name, + '', + value, + ] + for name, value in self.form_fields + ) + + # Add the files to upload + # here we use form-data content disposition instead of file one + # since this is how we define handlers in our Go server + # for more info see: https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Disposition + parts.extend( + [ part_boundary, + 'Content-Disposition: form-data; name="%s"; filename="%s"' % \ + (field_name, filename), + 'Content-Type: %s' % content_type, + '', + body, + ] + for field_name, filename, content_type, body in self.files + ) + + # Flatten the list and add closing boundary marker, + # then return CR+LF separated data + flattened = list(itertools.chain(*parts)) + flattened.append('--' + self.boundary + '--') + flattened.append('') + return '\r\n'.join(flattened) + +def models(host, verbose=None, ckey=None, cert=None, capath=None): + "models API shows models from TFaaS server" + url = host + '/models' + client = '%s (%s)' % (TFAAS_CLIENT, os.environ.get('USER', '')) + headers = {"Accept": "application/json", "User-Agent": client} + if verbose: + print("URL : %s" % url) + encoded_data = json.dumps({}) + return getdata(url, headers, encoded_data, ckey, cert, capath, verbose, 'GET') + +def delete(host, model, verbose=None, ckey=None, cert=None, capath=None): + "delete API deletes given model in TFaaS server" + url = host + '/delete' + client = '%s (%s)' % (TFAAS_CLIENT, os.environ.get('USER', '')) + headers = {"User-Agent": client} + if verbose: + print("URL : %s" % url) + print("model : %s" % model) + form = MultiPartForm() + form.add_field('model', model) + edata = str(form) + headers['Content-length'] = len(edata) + headers['Content-Type'] = form.get_content_type() + return getdata(url, headers, edata, ckey, cert, capath, verbose, method='DELETE') + +def bundle(host, ifile, verbose=None, ckey=None, cert=None, capath=None): + "bundle API uploads given bundle model files to TFaaS server" + url = host + '/upload' + client = '%s (%s)' % (TFAAS_CLIENT, os.environ.get('USER', '')) + headers = {"User-Agent": client, "Content-Encoding": "gzip", "Content-Type": "application/octet-stream"} + data = open(ifile, 'rb').read() + return getdata(url, headers, data, ckey, cert, capath, verbose) + +def upload(host, ifile, verbose=None, ckey=None, cert=None, capath=None): + "upload API uploads given model to TFaaS server" + url = host + '/upload' + client = '%s (%s)' % (TFAAS_CLIENT, os.environ.get('USER', '')) + headers = {"User-Agent": client} + params = json.load(open(ifile)) + if verbose: + print("URL : %s" % url) + print("ifile : %s" % ifile) + print("params: %s" % json.dumps(params)) + + form = MultiPartForm() + for key in params.keys(): + if key in ['model', 'labels', 'params']: + flag = 'r' + if key == 'model': + flag = 'rb' + name = params[key] + form.add_file(key, name, fileHandle=open(name, flag)) + else: + form.add_field(key, params[key]) + edata = str(form) + headers['Content-length'] = len(edata) + headers['Content-Type'] = form.get_content_type() + headers['Content-Encoding'] = 'base64' + return getdata(url, headers, edata, ckey, cert, capath, verbose) + +def predict(host, ifile, model, verbose=None, ckey=None, cert=None, capath=None): + "predict API get predictions from TFaaS server" + url = host + '/json' + client = '%s (%s)' % (TFAAS_CLIENT, os.environ.get('USER', '')) + headers = {"Accept": "application/json", "User-Agent": client} + params = json.load(open(ifile)) + if model: # overwrite model name in given input file + params['model'] = model + if verbose: + print("URL : %s" % url) + print("ifile : %s" % ifile) + print("params: %s" % json.dumps(params)) + encoded_data = json.dumps(params) + return getdata(url, headers, encoded_data, ckey, cert, capath, verbose) + +def predictImage(host, ifile, model, verbose=None, ckey=None, cert=None, capath=None): + "predict API get predictions from TFaaS server" + url = host + '/image' + client = '%s (%s)' % (TFAAS_CLIENT, os.environ.get('USER', '')) + headers = {"Accept": "application/json", "User-Agent": client} + if verbose: + print("URL : %s" % url) + print("ifile : %s" % ifile) + print("model : %s" % model) + form = MultiPartForm() +# form.add_file('image', ifile, fileHandle=open(ifile, 'r')) + form.add_file('image', ifile, fileHandle=open(ifile, 'rb')) + form.add_field('model', model) + edata = str(form) + headers['Content-length'] = len(edata) + headers['Content-Type'] = form.get_content_type() + return getdata(url, headers, edata, ckey, cert, capath, verbose) + +def getdata(url, headers, encoded_data, ckey, cert, capath, verbose=None, method='POST'): + "helper function to use in predict/upload APIs, it place given URL call to the server" + debug = 1 if verbose else 0 + req = urllib2.Request(url=url, headers=headers, data=encoded_data) + if method == 'DELETE': + req.get_method = lambda: 'DELETE' + elif method == 'GET': + req = urllib2.Request(url=url, headers=headers) + if ckey and cert: + ckey = fullpath(ckey) + cert = fullpath(cert) + http_hdlr = HTTPSClientAuthHandler(ckey, cert, capath, debug) + elif cert and capath: + cert = fullpath(cert) + http_hdlr = HTTPSClientAuthHandler(ckey, cert, capath, debug) + else: + http_hdlr = urllib2.HTTPHandler(debuglevel=debug) + proxy_handler = urllib2.ProxyHandler({}) + cookie_jar = cookielib.CookieJar() + cookie_handler = urllib2.HTTPCookieProcessor(cookie_jar) + data = {} + try: + opener = urllib2.build_opener(http_hdlr, proxy_handler, cookie_handler) + fdesc = opener.open(req) + if url.endswith('json'): + data = json.load(fdesc) + else: + data = fdesc.read() + fdesc.close() + except urllib2.HTTPError as error: + print(error.read()) + sys.exit(1) + if url.endswith('json'): + return json.dumps(data) + return data + +def main(): + "Main function" + optmgr = OptionParser() + opts = optmgr.parser.parse_args() + check_auth(opts.ckey) + res = '' + if opts.upload: + res = upload(opts.url, opts.upload, opts.verbose, opts.ckey, opts.cert, opts.capath) + if opts.bundle: + res = bundle(opts.url, opts.bundle, opts.verbose, opts.ckey, opts.cert, opts.capath) + elif opts.delete: + res = delete(opts.url, opts.delete, opts.verbose, opts.ckey, opts.cert, opts.capath) + elif opts.models: + res = models(opts.url, opts.verbose, opts.ckey, opts.cert, opts.capath) + elif opts.predict: + res = predict(opts.url, opts.predict, opts.model, opts.verbose, opts.ckey, opts.cert, opts.capath) + elif opts.image: + res = predictImage(opts.url, opts.image, opts.model, opts.verbose, opts.ckey, opts.cert, opts.capath) + if res: + print(res) + +if __name__ == '__main__': + main()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/build/bdist.linux-x86_64/egg/configs/pipeline.yaml Tue Mar 28 15:07:30 2023 +0000 @@ -0,0 +1,9 @@ +pipeline: + - reader.BinaryFileReader: + filename: /Users/vk/Work/CHESS/MLPipeline/MNIST/img4.png + - processor.TFaaSImageProcessor: + url: "http://localhost:8083" + model: mnist + verbose: true + - writer.Writer: + filename: predictions.json
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/build/lib/CHAP/async.py Tue Mar 28 15:07:30 2023 +0000 @@ -0,0 +1,56 @@ +#!/usr/bin/env python +#-*- coding: utf-8 -*- +#pylint: disable= +""" +File : async.py +Author : Valentin Kuznetsov <vkuznet AT gmail dot com> +Description: AsyncProcessor module +""" + +# system modules +import asyncio + +# local modules +from CHAP.processor import Processor, PrintProcessor + + +async def task(mgr, doc): + """ + Process given data using provided task manager + """ + return mgr.process(doc) + + +async def executeTasks(mgr, docs): + """ + Process given set of documents using provided task manager + """ + coRoutines = [task(mgr, d) for d in docs] + await asyncio.gather(*coRoutines) + + +class AsyncProcessor(Processor): + """ + AsyncProcesor process given data via asyncio module + """ + def __init__(self, mgr): + super().__init__() + self.mgr = mgr + + def _process(self, docs): + """ + Internal method to process given data documents + """ + asyncio.run(executeTasks(self.mgr, docs)) + +def example(): + """ + Helper function to demonstrate usage of AsyncProcessor + """ + docs = [1,2,3] + mgr = PrintProcessor() + processor = AsyncProcessor(mgr) + processor.process(docs) + +if __name__ == '__main__': + example()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/build/lib/CHAP/models/basemodel.py Tue Mar 28 15:07:30 2023 +0000 @@ -0,0 +1,84 @@ +#!/usr/bin/env python +#-*- coding: utf-8 -*- +#pylint: disable= +""" +File : basemodel.py +Author : Valentin Kuznetsov <vkuznet AT gmail dot com> +Description: BaseModel module +""" + +# system modules +import logging + + +class BaseModel(): + """ + BaseModel docstring + """ + def __init__(self, filename=None, **kwds): + self.logger = logging.getLogger(__name__) + self.construct(filename, **kwds) + self.map = dict(name=__name__) + + def construct(self, filename=None, **kwds): + """ + construct from CLI object + + :param filename: input file name + :param **kwds: named arguments + :return: Basemodel object + """ + print('construct API calls: ', end='') + if filename and filename.endswith('yaml'): + self.construct_from_yaml(filename) + elif filename and filename != '': + self.construct_from_file(filename) + else: + self.construct_from_config(**kwds) + + @classmethod + def construct_from_config(cls, **config): + """ + construct from config object + + :param **config: named arguments + :return: Basemodel object + """ + print(f'construct_from_config: {config}') + + @classmethod + def construct_from_yaml(cls, filename): + """ + construct from CLI object + + :param filename: input file name + :return: Basemodel object + """ + print(f'construct_from_yaml: {filename}') + + @classmethod + def construct_from_file(cls, filename): + """ + construct from filename + + :param filename: input file name + :return: Basemodel object + """ + print(f'construct_from_file: {filename}') + + def getMap(self): + """ + return model map + + :return: map object + """ + return self.map + + +if __name__ == '__main__': + print('### should construct from file.yaml') + base = BaseModel('file.yaml') + print('### should construct from file.txt') + base = BaseModel('file.txt') + print('### should construct from config') + base = BaseModel(param='file.txt', arg='bla')
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/build/lib/CHAP/models/edd.py Tue Mar 28 15:07:30 2023 +0000 @@ -0,0 +1,218 @@ +from msnctools.general import create_mask +from msnctools.material import Material +from msnctools.scanparsers import SMBMCAScanParser as ScanParser +import numpy as np +from pathlib import PosixPath +from pydantic import (BaseModel, + confloat, + conint, + conlist, + constr, + FilePath, + validator) +from scipy.interpolate import interp1d +from typing import Optional + + +class MCACeriaCalibrationConfig(BaseModel): + '''Class representing metadata required to perform a Ceria calibration for an + MCA detector. + + :ivar spec_file: Path to the SPEC file containing the CeO2 scan + :ivar scan_number: Number of the CeO2 scan in `spec_file` + :ivar scan_step_index: Index of the scan step to use for calibration, + optional. If not specified, the calibration routine will be performed on + the average of all MCA spectra for the scan. + + :ivar flux_file: csv file containing station beam energy in eV (column 0) + and flux (column 1) + + :ivar detector_name: name of the MCA to calibrate + :ivar num_bins: number of channels on the MCA to calibrate + :ivar max_energy_kev: maximum channel energy of the MCA in keV + + :ivar hexrd_h5_material_file: path to a HEXRD materials.h5 file containing an + entry for the material properties. + :ivar hexrd_h5_material_name: Name of the material entry in + `hexrd_h5_material_file`, defaults to `'CeO2'`. + :ivar lattice_parameter_angstrom: lattice spacing in angstrom to use for + the cubic CeO2 crystal, defaults to `5.41153`. + + :ivar tth_max: detector rotation about hutch x axis, defaults to `90`. + :ivar hkl_tth_tol: minimum resolvable difference in 2&theta between two + unique HKL peaks, defaults to `0.15`. + + :ivar fit_include_bin_ranges: list of MCA channel index ranges whose data + will be included in the calibration routine + :ivar fit_hkls: list of unique HKL indices to fit peaks for in the + calibration routine + + :ivar tth_initial_guess: initial guess for 2&theta + :ivar slope_initial_guess: initial guess for detector channel energy + correction linear slope, defaults to `1.0`. + :ivar intercept_initial_guess: initial guess for detector channel energy + correction y-intercept, defaults to `0.0`. + + :ivar tth_calibrated: calibrated value for 2&theta, defaults to None + :ivar slope_calibrated: calibrated value for detector channel energy + correction linear slope, defaults to `None` + :ivar intercept_calibrated: calibrated value for detector channel energy + correction y-intercept, defaluts to None + + :ivar max_iter: maximum number of iterations of the calibration routine, + defaults to `10`. + :ivar tune_tth_tol: stop iteratively tuning 2&theta when an iteration + produces a change in the tuned value of 2&theta that is smaller than this + value, defaults to `1e-8`. + ''' + + spec_file: FilePath + scan_number: conint(gt=0) + scan_step_index: Optional[conint(ge=0)] + + flux_file: FilePath + + detector_name: constr(strip_whitespace=True, min_length=1) + num_bins: conint(gt=0) + max_energy_kev: confloat(gt=0) + + hexrd_h5_material_file: FilePath + hexrd_h5_material_name: constr(strip_whitespace=True, min_length=1) = 'CeO2' + lattice_parameter_angstrom: confloat(gt=0) = 5.41153 + + tth_max: confloat(gt=0, allow_inf_nan=False) = 90.0 + hkl_tth_tol: confloat(gt=0, allow_inf_nan=False) = 0.15 + + fit_include_bin_ranges: conlist(min_items=1, + item_type=conlist(item_type=conint(ge=0), + min_items=2, + max_items=2)) + fit_hkls: conlist(item_type=conint(ge=0), min_items=1) + + tth_initial_guess: confloat(gt=0, le=tth_max, allow_inf_nan=False) + slope_initial_guess: float = 1.0 + intercept_initial_guess: float = 0.0 + tth_calibrated: Optional[confloat(gt=0, allow_inf_nan=False)] + slope_calibrated: Optional[confloat(allow_inf_nan=False)] + intercept_calibrated: Optional[confloat(allow_inf_nan=False)] + + max_iter: conint(gt=0) = 10 + tune_tth_tol: confloat(ge=0) = 1e-8 + + @validator('fit_include_bin_ranges', each_item=True) + def validate_include_bin_range(cls, value, values): + '''Ensure no bin ranges are outside the boundary of the detector''' + + num_bins = values.get('num_bins') + value[1] = min(value[1], num_bins) + return(value) + + def mca_data(self): + '''Get the 1D array of MCA data to use for calibration. + + :return: MCA data + :rtype: np.ndarray + ''' + + scanparser = ScanParser(self.spec_file, self.scan_number) + if self.scan_step_index is None: + data = scanparser.get_all_detector_data(self.detector_name) + if scanparser.spec_scan_npts > 1: + data = np.average(data, axis=1) + else: + data = data[0] + else: + data = scanparser.get_detector_data(self.detector_name, self.scan_step_index) + + return(np.array(data)) + + def mca_mask(self): + '''Get a boolean mask array to use on MCA data before fitting. + + :return: boolean mask array + :rtype: numpy.ndarray + ''' + + mask = None + bin_indices = np.arange(self.num_bins) + for bin_range in self.fit_include_bin_ranges: + mask = create_mask(bin_indices, + bounds=bin_range, + exclude_bounds=False, + current_mask=mask) + + return(mask) + + def flux_correction_interpolation_function(self): + '''Get an interpolation function to correct MCA data for relative energy + flux of the incident beam. + + :return: energy flux correction interpolation function + :rtype: scipy.interpolate._polyint._Interpolator1D + ''' + + flux = np.loadtxt(self.flux_file) + energies = flux[:,0]/1.e3 + relative_intensities = flux[:,1]/np.max(flux[:,1]) + interpolation_function = interp1d(energies, relative_intensities) + return(interpolation_function) + + def material(self): + '''Get CeO2 as a `msnctools.materials.Material` object. + + :return: CeO2 material + :rtype: msnctools.material.Material + ''' + + material = Material(material_name=self.hexrd_h5_material_name, + material_file=self.hexrd_h5_material_file, + lattice_parameters_angstroms=self.lattice_parameter_angstrom) + # The following kwargs will be needed if we allow the material to be + # built using xrayutilities (for now, we only allow hexrd to make the + # material): + # sgnum=225, + # atoms=['Ce4p', 'O2mdot'], + # pos=[(0.,0.,0.), (0.25,0.75,0.75)], + # enrgy=50000.) # Why do we need to specify an energy to get HKLs when using xrayutilities? + return(material) + + def unique_ds(self): + '''Get a list of unique HKLs and their lattice spacings + + :return: unique HKLs and their lattice spacings in angstroms + :rtype: np.ndarray, np.ndarray + ''' + + unique_hkls, unique_ds = self.material().get_unique_ds(tth_tol=self.hkl_tth_tol, tth_max=self.tth_max) + + return(unique_hkls, unique_ds) + + def fit_ds(self): + '''Get a list of HKLs and their lattice spacings that will be fit in the + calibration routine + + :return: HKLs to fit and their lattice spacings in angstroms + :rtype: np.ndarray, np.ndarray + ''' + + unique_hkls, unique_ds = self.unique_ds() + + fit_hkls = np.array([unique_hkls[i] for i in self.fit_hkls]) + fit_ds = np.array([unique_ds[i] for i in self.fit_hkls]) + + return(fit_hkls, fit_ds) + + def dict(self): + '''Return a representation of this configuration in a dictionary that is + suitable for dumping to a YAML file (one that converts all instances of + fields with type `PosixPath` to `str`). + + :return: dictionary representation of the configuration. + :rtype: dict + ''' + + d = super().dict() + for k,v in d.items(): + if isinstance(v, PosixPath): + d[k] = str(v) + return(d)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/build/lib/CHAP/models/integration.py Tue Mar 28 15:07:30 2023 +0000 @@ -0,0 +1,511 @@ +import copy +from functools import cache, lru_cache +import json +import logging +import os +from time import time +from typing import Literal, Optional + +from msnctools.general import input_menu +from multiprocessing.pool import ThreadPool +from nexusformat.nexus import (NXdata, + NXdetector, + NXfield, + NXprocess, + NXroot) +import numpy as np +from pydantic import (BaseModel, + validator, + constr, + conlist, + conint, + confloat, + FilePath) +import pyFAI, pyFAI.multi_geometry, pyFAI.units +from pyspec.file.tiff import TiffFile + +from .map import MapConfig, SpecScans + + +class Detector(BaseModel): + """ + Detector class to represent a single detector used in the experiment. + + :param prefix: Prefix of the detector in the SPEC file. + :type prefix: str + :param poni_file: Path to the poni file. + :type poni_file: str + :param mask_file: Optional path to the mask file. + :type mask_file: str, optional + """ + prefix: constr(strip_whitespace=True, min_length=1) + poni_file: FilePath + mask_file: Optional[FilePath] + @validator('poni_file', allow_reuse=True) + def validate_poni_file(cls, poni_file): + """ + Validate the poni file by checking if it's a valid PONI file. + + :param poni_file: Path to the poni file. + :type poni_file: str + :raises ValueError: If poni_file is not a valid PONI file. + :returns: Absolute path to the poni file. + :rtype: str + """ + poni_file = os.path.abspath(poni_file) + try: + ai = azimuthal_integrator(poni_file) + except: + raise(ValueError(f'{poni_file} is not a valid PONI file')) + else: + return(poni_file) + @validator('mask_file', allow_reuse=True) + def validate_mask_file(cls, mask_file, values): + """ + Validate the mask file. If a mask file is provided, it checks if it's a valid TIFF file. + + :param mask_file: Path to the mask file. + :type mask_file: str or None + :param values: A dictionary of the Detector fields. + :type values: dict + :raises ValueError: If mask_file is provided and it's not a valid TIFF file. + :raises ValueError: If `'poni_file'` is not provided in `values`. + :returns: Absolute path to the mask file or None. + :rtype: str or None + """ + if mask_file is None: + return(mask_file) + else: + mask_file = os.path.abspath(mask_file) + poni_file = values.get('poni_file') + if poni_file is None: + raise(ValueError('Cannot validate mask file without a PONI file.')) + else: + try: + mask_array = get_mask_array(mask_file, poni_file) + except BaseException as e: + raise(ValueError(f'Unable to open {mask_file} as a TIFF file')) + else: + return(mask_file) + @property + def azimuthal_integrator(self): + return(azimuthal_integrator(self.poni_file)) + @property + def mask_array(self): + return(get_mask_array(self.mask_file, self.poni_file)) + +@cache +def azimuthal_integrator(poni_file:str): + if not isinstance(poni_file, str): + poni_file = str(poni_file) + return(pyFAI.load(poni_file)) +@cache +def get_mask_array(mask_file:str, poni_file:str): + if mask_file is not None: + if not isinstance(mask_file, str): + mask_file = str(mask_file) + with TiffFile(mask_file) as tiff: + mask_array = tiff.asarray() + else: + mask_array = np.zeros(azimuthal_integrator(poni_file).detector.shape) + return(mask_array) + +class IntegrationConfig(BaseModel): + """ + Class representing the configuration for a raw detector data integration. + + :ivar tool_type: type of integration tool; always set to "integration" + :type tool_type: str, optional + :ivar title: title of the integration + :type title: str + :ivar integration_type: type of integration, one of "azimuthal", "radial", or "cake" + :type integration_type: str + :ivar detectors: list of detectors used in the integration + :type detectors: List[Detector] + :ivar radial_units: radial units for the integration, defaults to `'q_A^-1'` + :type radial_units: str, optional + :ivar radial_min: minimum radial value for the integration range + :type radial_min: float, optional + :ivar radial_max: maximum radial value for the integration range + :type radial_max: float, optional + :ivar radial_npt: number of points in the radial range for the integration + :type radial_npt: int, optional + :ivar azimuthal_units: azimuthal units for the integration + :type azimuthal_units: str, optional + :ivar azimuthal_min: minimum azimuthal value for the integration range + :type azimuthal_min: float, optional + :ivar azimuthal_max: maximum azimuthal value for the integration range + :type azimuthal_max: float, optional + :ivar azimuthal_npt: number of points in the azimuthal range for the integration + :type azimuthal_npt: int, optional + :ivar error_model: error model for the integration, one of "poisson" or "azimuthal" + :type error_model: str, optional + """ + tool_type: Literal['integration'] = 'integration' + title: constr(strip_whitespace=True, min_length=1) + integration_type: Literal['azimuthal', 'radial', 'cake'] + detectors: conlist(item_type=Detector, min_items=1) + radial_units: str = 'q_A^-1' + radial_min: confloat(ge=0) + radial_max: confloat(gt=0) + radial_npt: conint(gt=0) = 1800 + azimuthal_units: str = 'chi_deg' + azimuthal_min: confloat(ge=-180) = -180 + azimuthal_max: confloat(le=360) = 180 + azimuthal_npt: conint(gt=0) = 3600 + error_model: Optional[Literal['poisson', 'azimuthal']] + sequence_index: Optional[conint(gt=0)] + @validator('radial_units', allow_reuse=True) + def validate_radial_units(cls, radial_units): + """ + Validate the radial units for the integration. + + :param radial_units: unvalidated radial units for the integration + :type radial_units: str + :raises ValueError: if radial units are not one of the recognized radial units + :return: validated radial units + :rtype: str + """ + if radial_units in pyFAI.units.RADIAL_UNITS.keys(): + return(radial_units) + else: + raise(ValueError(f'Invalid radial units: {radial_units}. Must be one of {", ".join(pyFAI.units.RADIAL_UNITS.keys())}')) + @validator('azimuthal_units', allow_reuse=True) + def validate_azimuthal_units(cls, azimuthal_units): + """ + Validate that `azimuthal_units` is one of the keys in the + `pyFAI.units.AZIMUTHAL_UNITS` dictionary. + + :param azimuthal_units: The string representing the unit to be validated. + :type azimuthal_units: str + :raises ValueError: If `azimuthal_units` is not one of the keys in `pyFAI.units.AZIMUTHAL_UNITS` + :return: The original supplied value, if is one of the keys in `pyFAI.units.AZIMUTHAL_UNITS`. + :rtype: str + """ + if azimuthal_units in pyFAI.units.AZIMUTHAL_UNITS.keys(): + return(azimuthal_units) + else: + raise(ValueError(f'Invalid azimuthal units: {azimuthal_units}. Must be one of {", ".join(pyFAI.units.AZIMUTHAL_UNITS.keys())}')) + def validate_range_max(range_name:str): + """Validate the maximum value of an integration range. + + :param range_name: The name of the integration range (e.g. radial, azimuthal). + :type range_name: str + :return: The callable that performs the validation. + :rtype: callable + """ + def _validate_range_max(cls, range_max, values): + """Check if the maximum value of the integration range is greater than its minimum value. + + :param range_max: The maximum value of the integration range. + :type range_max: float + :param values: The values of the other fields being validated. + :type values: dict + :raises ValueError: If the maximum value of the integration range is not greater than its minimum value. + :return: The validated maximum range value + :rtype: float + """ + range_min = values.get(f'{range_name}_min') + if range_min < range_max: + return(range_max) + else: + raise(ValueError(f'Maximum value of integration range must be greater than minimum value of integration range ({range_name}_min={range_min}).')) + return(_validate_range_max) + _validate_radial_max = validator('radial_max', allow_reuse=True)(validate_range_max('radial')) + _validate_azimuthal_max = validator('azimuthal_max', allow_reuse=True)(validate_range_max('azimuthal')) + def validate_for_map_config(self, map_config:MapConfig): + """ + Validate the existence of the detector data file for all scan points in `map_config`. + + :param map_config: The `MapConfig` instance to validate against. + :type map_config: MapConfig + :raises RuntimeError: If a detector data file could not be found for a scan point occurring in `map_config`. + :return: None + :rtype: None + """ + for detector in self.detectors: + for scans in map_config.spec_scans: + for scan_number in scans.scan_numbers: + scanparser = scans.get_scanparser(scan_number) + for scan_step_index in range(scanparser.spec_scan_npts): + # Make sure the detector data file exists for all scan points + try: + detector_data_file = scanparser.get_detector_data_file(detector.prefix, scan_step_index) + except: + raise(RuntimeError(f'Could not find data file for detector prefix {detector.prefix} on scan number {scan_number} in spec file {scans.spec_file}')) + def get_azimuthal_adjustments(self): + """To enable a continuous range of integration in the azimuthal direction + for radial and cake integration, obtain adjusted values for this + `IntegrationConfig`'s `azimuthal_min` and `azimuthal_max` values, the + angle amount by which those values were adjusted, and the proper location + of the discontinuity in the azimuthal direction. + + :return: Adjusted chi_min, adjusted chi_max, chi_offset, chi_discontinuity + :rtype: tuple[float,float,float,float] + """ + return(get_azimuthal_adjustments(self.azimuthal_min, self.azimuthal_max)) + def get_azimuthal_integrators(self): + """Get a list of `AzimuthalIntegrator`s that correspond to the detector + configurations in this instance of `IntegrationConfig`. + + The returned `AzimuthalIntegrator`s are (if need be) artificially rotated + in the azimuthal direction to achieve a continuous range of integration + in the azimuthal direction. + + :returns: A list of `AzimuthalIntegrator`s appropriate for use by this + `IntegrationConfig` tool + :rtype: list[pyFAI.azimuthalIntegrator.AzimuthalIntegrator] + """ + chi_min, chi_max, chi_offset, chi_disc = self.get_azimuthal_adjustments() + return(get_azimuthal_integrators(tuple([detector.poni_file for detector in self.detectors]), chi_offset=chi_offset)) + def get_multi_geometry_integrator(self): + """Get a `MultiGeometry` integrator suitable for use by this instance of + `IntegrationConfig`. + + :return: A `MultiGeometry` integrator + :rtype: pyFAI.multi_geometry.MultiGeometry + """ + poni_files = tuple([detector.poni_file for detector in self.detectors]) + radial_range = (self.radial_min, self.radial_max) + azimuthal_range = (self.azimuthal_min, self.azimuthal_max) + return(get_multi_geometry_integrator(poni_files, self.radial_units, radial_range, azimuthal_range)) + def get_azimuthally_integrated_data(self, spec_scans:SpecScans, scan_number:int, scan_step_index:int): + """Return azimuthally-integrated data for the scan step specified. + + :param spec_scans: An instance of `SpecScans` containing the scan step requested. + :type spec_scans: SpecScans + :param scan_number: The number of the scan containing the scan step requested. + :type scan_number: int + :param scan_step_index: The index of the scan step requested. + :type scan_step_index: int + :return: A 1D array of azimuthally-integrated raw detector intensities. + :rtype: np.ndarray + """ + detector_data = spec_scans.get_detector_data(self.detectors, scan_number, scan_step_index) + integrator = self.get_multi_geometry_integrator() + lst_mask = [detector.mask_array for detector in self.detectors] + result = integrator.integrate1d(detector_data, lst_mask=lst_mask, npt=self.radial_npt, error_model=self.error_model) + if result.sigma is None: + return(result.intensity) + else: + return(result.intensity, result.sigma) + def get_radially_integrated_data(self, spec_scans:SpecScans, scan_number:int, scan_step_index:int): + """Return radially-integrated data for the scan step specified. + + :param spec_scans: An instance of `SpecScans` containing the scan step requested. + :type spec_scans: SpecScans + :param scan_number: The number of the scan containing the scan step requested. + :type scan_number: int + :param scan_step_index: The index of the scan step requested. + :type scan_step_index: int + :return: A 1D array of radially-integrated raw detector intensities. + :rtype: np.ndarray + """ + # Handle idiosyncracies of azimuthal ranges in pyFAI + # Adjust chi ranges to get a continuous range of iintegrated data + chi_min, chi_max, chi_offset, chi_disc = self.get_azimuthal_adjustments() + # Perform radial integration on a detector-by-detector basis. + I_each_detector = [] + variance_each_detector = [] + integrators = self.get_azimuthal_integrators() + for i,(integrator,detector) in enumerate(zip(integrators,self.detectors)): + detector_data = spec_scans.get_detector_data([detector], scan_number, scan_step_index)[0] + result = integrator.integrate_radial(detector_data, self.azimuthal_npt, + unit=self.azimuthal_units, azimuth_range=(chi_min,chi_max), + radial_unit=self.radial_units, radial_range=(self.radial_min,self.radial_max), + mask=detector.mask_array) #, error_model=self.error_model) + I_each_detector.append(result.intensity) + if result.sigma is not None: + variance_each_detector.append(result.sigma**2) + # Add the individual detectors' integrated intensities together + I = np.nansum(I_each_detector, axis=0) + # Ignore data at values of chi for which there was no data + I = np.where(I==0, np.nan, I) + if len(I_each_detector) != len(variance_each_detector): + return(I) + else: + # Get the standard deviation of the summed detectors' intensities + sigma = np.sqrt(np.nansum(variance_each_detector, axis=0)) + return(I, sigma) + def get_cake_integrated_data(self, spec_scans:SpecScans, scan_number:int, scan_step_index:int): + """Return cake-integrated data for the scan step specified. + + :param spec_scans: An instance of `SpecScans` containing the scan step requested. + :type spec_scans: SpecScans + :param scan_number: The number of the scan containing the scan step requested. + :type scan_number: int + :param scan_step_index: The index of the scan step requested. + :type scan_step_index: int + :return: A 2D array of cake-integrated raw detector intensities. + :rtype: np.ndarray + """ + detector_data = spec_scans.get_detector_data(self.detectors, scan_number, scan_step_index) + integrator = self.get_multi_geometry_integrator() + lst_mask = [detector.mask_array for detector in self.detectors] + result = integrator.integrate2d(detector_data, lst_mask=lst_mask, + npt_rad=self.radial_npt, npt_azim=self.azimuthal_npt, + method='bbox', + error_model=self.error_model) + if result.sigma is None: + return(result.intensity) + else: + return(result.intensity, result.sigma) + def get_integrated_data(self, spec_scans:SpecScans, scan_number:int, scan_step_index:int): + """Return integrated data for the scan step specified. + + :param spec_scans: An instance of `SpecScans` containing the scan step requested. + :type spec_scans: SpecScans + :param scan_number: The number of the scan containing the scan step requested. + :type scan_number: int + :param scan_step_index: The index of the scan step requested. + :type scan_step_index: int + :return: An array of integrated raw detector intensities. + :rtype: np.ndarray + """ + if self.integration_type == 'azimuthal': + return(self.get_azimuthally_integrated_data(spec_scans, scan_number, scan_step_index)) + elif self.integration_type == 'radial': + return(self.get_radially_integrated_data(spec_scans, scan_number, scan_step_index)) + elif self.integration_type == 'cake': + return(self.get_cake_integrated_data(spec_scans, scan_number, scan_step_index)) + + @property + def integrated_data_coordinates(self): + """ + Return a dictionary of coordinate arrays for navigating the dimension(s) + of the integrated data produced by this instance of `IntegrationConfig`. + + :return: A dictionary with either one or two keys: 'azimuthal' and/or + 'radial', each of which points to a 1-D `numpy` array of coordinate + values. + :rtype: dict[str,np.ndarray] + """ + if self.integration_type == 'azimuthal': + return(get_integrated_data_coordinates(radial_range=(self.radial_min,self.radial_max), + radial_npt=self.radial_npt)) + elif self.integration_type == 'radial': + return(get_integrated_data_coordinates(azimuthal_range=(self.azimuthal_min,self.azimuthal_max), + azimuthal_npt=self.azimuthal_npt)) + elif self.integration_type == 'cake': + return(get_integrated_data_coordinates(radial_range=(self.radial_min,self.radial_max), + radial_npt=self.radial_npt, + azimuthal_range=(self.azimuthal_min,self.azimuthal_max), + azimuthal_npt=self.azimuthal_npt)) + @property + def integrated_data_dims(self): + """Return a tuple of the coordinate labels for the integrated data + produced by this instance of `IntegrationConfig`. + """ + directions = list(self.integrated_data_coordinates.keys()) + dim_names = [getattr(self, f'{direction}_units') for direction in directions] + return(dim_names) + @property + def integrated_data_shape(self): + """Return a tuple representing the shape of the integrated data + produced by this instance of `IntegrationConfig` for a single scan step. + """ + return(tuple([len(coordinate_values) for coordinate_name,coordinate_values in self.integrated_data_coordinates.items()])) + +@cache +def get_azimuthal_adjustments(chi_min:float, chi_max:float): + """ + Fix chi discontinuity at 180 degrees and return the adjusted chi range, + offset, and discontinuty. + + If the discontinuity is crossed, obtain the offset to artificially rotate + detectors to achieve a continuous azimuthal integration range. + + :param chi_min: The minimum value of the azimuthal range. + :type chi_min: float + :param chi_max: The maximum value of the azimuthal range. + :type chi_max: float + :return: The following four values: the adjusted minimum value of the + azimuthal range, the adjusted maximum value of the azimuthal range, the + value by which the chi angle was adjusted, the position of the chi + discontinuity. + """ + # Fix chi discontinuity at 180 degrees for now. + chi_disc = 180 + # If the discontinuity is crossed, artificially rotate the detectors to + # achieve a continuous azimuthal integration range + if chi_min < chi_disc and chi_max > chi_disc: + chi_offset = chi_max - chi_disc + else: + chi_offset = 0 + return(chi_min-chi_offset, chi_max-chi_offset, chi_offset, chi_disc) +@cache +def get_azimuthal_integrators(poni_files:tuple, chi_offset=0): + """ + Return a list of `AzimuthalIntegrator` objects generated from PONI files. + + :param poni_files: Tuple of strings, each string being a path to a PONI file. : tuple + :type poni_files: tuple + :param chi_offset: The angle in degrees by which the `AzimuthalIntegrator` objects will be rotated, defaults to 0. + :type chi_offset: float, optional + :return: List of `AzimuthalIntegrator` objects + :rtype: list[pyFAI.azimuthalIntegrator.AzimuthalIntegrator] + """ + ais = [] + for poni_file in poni_files: + ai = copy.deepcopy(azimuthal_integrator(poni_file)) + ai.rot3 += chi_offset * np.pi/180 + ais.append(ai) + return(ais) +@cache +def get_multi_geometry_integrator(poni_files:tuple, radial_unit:str, radial_range:tuple, azimuthal_range:tuple): + """Return a `MultiGeometry` instance that can be used for azimuthal or cake + integration. + + :param poni_files: Tuple of PONI files that describe the detectors to be + integrated. + :type poni_files: tuple + :param radial_unit: Unit to use for radial integration range. + :type radial_unit: str + :param radial_range: Tuple describing the range for radial integration. + :type radial_range: tuple[float,float] + :param azimuthal_range:Tuple describing the range for azimuthal integration. + :type azimuthal_range: tuple[float,float] + :return: `MultiGeometry` instance that can be used for azimuthal or cake + integration. + :rtype: pyFAI.multi_geometry.MultiGeometry + """ + chi_min, chi_max, chi_offset, chi_disc = get_azimuthal_adjustments(*azimuthal_range) + ais = copy.deepcopy(get_azimuthal_integrators(poni_files, chi_offset=chi_offset)) + multi_geometry = pyFAI.multi_geometry.MultiGeometry(ais, + unit=radial_unit, + radial_range=radial_range, + azimuth_range=(chi_min,chi_max), + wavelength=sum([ai.wavelength for ai in ais])/len(ais), + chi_disc=chi_disc) + return(multi_geometry) +@cache +def get_integrated_data_coordinates(azimuthal_range:tuple=None, azimuthal_npt:int=None, radial_range:tuple=None, radial_npt:int=None): + """ + Return a dictionary of coordinate arrays for the specified radial and/or + azimuthal integration ranges. + + :param azimuthal_range: Tuple specifying the range of azimuthal angles over + which to generate coordinates, in the format (min, max), defaults to + None. + :type azimuthal_range: tuple[float,float], optional + :param azimuthal_npt: Number of azimuthal coordinate points to generate, + defaults to None. + :type azimuthal_npt: int, optional + :param radial_range: Tuple specifying the range of radial distances over + which to generate coordinates, in the format (min, max), defaults to + None. + :type radial_range: tuple[float,float], optional + :param radial_npt: Number of radial coordinate points to generate, defaults + to None. + :type radial_npt: int, optional + :return: A dictionary with either one or two keys: 'azimuthal' and/or + 'radial', each of which points to a 1-D `numpy` array of coordinate + values. + :rtype: dict[str,np.ndarray] + """ + integrated_data_coordinates = {} + if azimuthal_range is not None and azimuthal_npt is not None: + integrated_data_coordinates['azimuthal'] = np.linspace(*azimuthal_range, azimuthal_npt) + if radial_range is not None and radial_npt is not None: + integrated_data_coordinates['radial'] = np.linspace(*radial_range, radial_npt) + return(integrated_data_coordinates)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/build/lib/CHAP/models/map.py Tue Mar 28 15:07:30 2023 +0000 @@ -0,0 +1,519 @@ +from functools import cache, lru_cache +import os +from typing import Literal, Optional, Union + +import numpy as np +from pydantic import (BaseModel, + conint, + conlist, + confloat, + constr, + FilePath, + PrivateAttr, + ValidationError, + validator) +from pyspec.file.spec import FileSpec + +class Sample(BaseModel): + """ + Class representing a sample metadata configuration. + + :ivar name: The name of the sample. + :type name: str + :ivar description: A description of the sample. + :type description: Optional[str] + """ + name: constr(min_length=1) + description: Optional[str] + +class SpecScans(BaseModel): + """ + Class representing a set of scans from a single SPEC file. + + :ivar spec_file: Path to the SPEC file. + :type spec_file: str + :ivar scan_numbers: List of scan numbers to use. + :type scan_numbers: list[int] + """ + spec_file: FilePath + scan_numbers: conlist(item_type=conint(gt=0), min_items=1) + @validator('spec_file', allow_reuse=True) + def validate_spec_file(cls, spec_file): + """ + Validate the specified SPEC file. + + :param spec_file: Path to the SPEC file. + :type spec_file: str + :raises ValueError: If the SPEC file is invalid. + :return: Absolute path to the SPEC file, if it is valid. + :rtype: str + """ + try: + spec_file = os.path.abspath(spec_file) + sspec_file = FileSpec(spec_file) + except: + raise(ValueError(f'Invalid SPEC file {spec_file}')) + else: + return(spec_file) + @validator('scan_numbers', allow_reuse=True) + def validate_scan_numbers(cls, scan_numbers, values): + """ + Validate the specified list of scan numbers. + + :param scan_numbers: List of scan numbers. + :type scan_numbers: list of int + :param values: Dictionary of values for all fields of the model. + :type values: dict + :raises ValueError: If a specified scan number is not found in the SPEC file. + :return: List of scan numbers. + :rtype: list of int + """ + spec_file = values.get('spec_file') + if spec_file is not None: + spec_scans = FileSpec(spec_file) + for scan_number in scan_numbers: + scan = spec_scans.get_scan_by_number(scan_number) + if scan is None: + raise(ValueError(f'There is no scan number {scan_number} in {spec_file}')) + return(scan_numbers) + + @property + def scanparsers(self): + '''A list of `ScanParser`s for each of the scans specified by the SPEC + file and scan numbers belonging to this instance of `SpecScans` + ''' + return([self.get_scanparser(scan_no) for scan_no in self.scan_numbers]) + + def get_scanparser(self, scan_number): + """This method returns a `ScanParser` for the specified scan number in + the specified SPEC file. + + :param scan_number: Scan number to get a `ScanParser` for + :type scan_number: int + :return: `ScanParser` for the specified scan number + :rtype: ScanParser + """ + return(get_scanparser(self.spec_file, scan_number)) + def get_index(self, scan_number:int, scan_step_index:int, map_config): + """This method returns a tuple representing the index of a specific step + in a specific spec scan within a map. + + :param scan_number: Scan number to get index for + :type scan_number: int + :param scan_step_index: Scan step index to get index for + :type scan_step_index: int + :param map_config: Map configuration to get index for + :type map_config: MapConfig + :return: Index for the specified scan number and scan step index within + the specified map configuration + :rtype: tuple + """ + index = () + for independent_dimension in map_config.independent_dimensions: + coordinate_index = list(map_config.coords[independent_dimension.label]).index(independent_dimension.get_value(self, scan_number, scan_step_index)) + index = (coordinate_index, *index) + return(index) + def get_detector_data(self, detectors:list, scan_number:int, scan_step_index:int): + """ + Return the raw data from the specified detectors at the specified scan + number and scan step index. + + :param detectors: List of detector prefixes to get raw data for + :type detectors: list[str] + :param scan_number: Scan number to get data for + :type scan_number: int + :param scan_step_index: Scan step index to get data for + :type scan_step_index: int + :return: Data from the specified detectors for the specified scan number + and scan step index + :rtype: list[np.ndarray] + """ + return(get_detector_data(tuple([detector.prefix for detector in detectors]), self.spec_file, scan_number, scan_step_index)) +@cache +def get_available_scan_numbers(spec_file:str): + scans = FileSpec(spec_file).scans + scan_numbers = list(scans.keys()) + return(scan_numbers) +@cache +def get_scanparser(spec_file:str, scan_number:int): + if scan_number not in get_available_scan_numbers(spec_file): + return(None) + else: + return(ScanParser(spec_file, scan_number)) +@lru_cache(maxsize=10) +def get_detector_data(detector_prefixes:tuple, spec_file:str, scan_number:int, scan_step_index:int): + detector_data = [] + scanparser = get_scanparser(spec_file, scan_number) + for prefix in detector_prefixes: + image_data = scanparser.get_detector_data(prefix, scan_step_index) + detector_data.append(image_data) + return(detector_data) + +class PointByPointScanData(BaseModel): + """Class representing a source of raw scalar-valued data for which a value + was recorded at every point in a `MapConfig`. + + :ivar label: A user-defined label for referring to this data in the NeXus + file and in other tools. + :type label: str + :ivar units: The units in which the data were recorded. + :type units: str + :ivar data_type: Represents how these data were recorded at time of data + collection. + :type data_type: Literal['spec_motor', 'scan_column', 'smb_par'] + :ivar name: Represents the name with which these raw data were recorded at + time of data collection. + :type name: str + """ + label: constr(min_length=1) + units: constr(strip_whitespace=True, min_length=1) + data_type: Literal['spec_motor', 'scan_column', 'smb_par'] + name: constr(strip_whitespace=True, min_length=1) + @validator('label') + def validate_label(cls, label): + """Validate that the supplied `label` does not conflict with any of the + values for `label` reserved for certain data needed to perform + corrections. + + :param label: The value of `label` to validate + :type label: str + :raises ValueError: If `label` is one of the reserved values. + :return: The original supplied value `label`, if it is allowed. + :rtype: str + """ + #if (not issubclass(cls,CorrectionsData)) and label in CorrectionsData.__fields__['label'].type_.__args__: + if (not issubclass(cls,CorrectionsData)) and label in CorrectionsData.reserved_labels(): + raise(ValueError(f'{cls.__name__}.label may not be any of the following reserved values: {CorrectionsData.reserved_labels()}')) + return(label) + def validate_for_station(self, station:str): + """Validate this instance of `PointByPointScanData` for a certain choice + of station (beamline). + + :param station: The name of the station (in 'idxx' format). + :type station: str + :raises TypeError: If the station is not compatible with the value of the + `data_type` attribute for this instance of PointByPointScanData. + :return: None + :rtype: None + """ + if station.lower() not in ('id1a3', 'id3a') and self.data_type == 'smb_par': + raise(TypeError(f'{self.__class__.__name__}.data_type may not be "smb_par" when station is "{station}"')) + def validate_for_spec_scans(self, spec_scans:list[SpecScans], scan_step_index:Union[Literal['all'],int]='all'): + """Validate this instance of `PointByPointScanData` for a list of + `SpecScans`. + + :param spec_scans: A list of `SpecScans` whose raw data will be checked + for the presence of the data represented by this instance of + `PointByPointScanData` + :type spec_scans: list[SpecScans] + :param scan_step_index: A specific scan step index to validate, defaults + to `'all'`. + :type scan_step_index: Union[Literal['all'],int], optional + :raises RuntimeError: If the data represented by this instance of + `PointByPointScanData` is missing for the specified scan steps. + :return: None + :rtype: None + """ + for scans in spec_scans: + for scan_number in scans.scan_numbers: + scanparser = scans.get_scanparser(scan_number) + if scan_step_index == 'all': + scan_step_index_range = range(scanparser.spec_scan_npts) + else: + scan_step_index_range = range(scan_step_index,scan_step_index+1) + for scan_step_index in scan_step_index_range: + try: + self.get_value(scans, scan_number, scan_step_index) + except: + raise(RuntimeError(f'Could not find data for {self.name} (data_type "{self.data_type}") on scan number {scan_number} in spec file {scans.spec_file}')) + def get_value(self, spec_scans:SpecScans, scan_number:int, scan_step_index:int): + """Return the value recorded for this instance of `PointByPointScanData` + at a specific scan step. + + :param spec_scans: An instance of `SpecScans` in which the requested scan step occurs. + :type spec_scans: SpecScans + :param scan_number: The number of the scan in which the requested scan step occurs. + :type scan_number: int + :param scan_step_index: The index of the requested scan step. + :type scan_step_index: int + :return: The value recorded of the data represented by this instance of + `PointByPointScanData` at the scan step requested + :rtype: float + """ + if self.data_type == 'spec_motor': + return(get_spec_motor_value(spec_scans.spec_file, scan_number, scan_step_index, self.name)) + elif self.data_type == 'scan_column': + return(get_spec_counter_value(spec_scans.spec_file, scan_number, scan_step_index, self.name)) + elif self.data_type == 'smb_par': + return(get_smb_par_value(spec_scans.spec_file, scan_number, self.name)) +@cache +def get_spec_motor_value(spec_file:str, scan_number:int, scan_step_index:int, spec_mnemonic:str): + """Return the value recorded for a SPEC motor at a specific scan step. + + :param spec_file: Location of a SPEC file in which the requested scan step occurs. + :type spec_scans: str + :param scan_number: The number of the scan in which the requested scan step occurs. + :type scan_number: int + :param scan_step_index: The index of the requested scan step. + :type scan_step_index: int + :param spec_mnemonic: The menmonic of a SPEC motor. + :type spec_mnemonic: str + :return: The value of the motor at the scan step requested + :rtype: float + """ + scanparser = get_scanparser(spec_file, scan_number) + if spec_mnemonic in scanparser.spec_scan_motor_mnes: + motor_i = scanparser.spec_scan_motor_mnes.index(spec_mnemonic) + if scan_step_index >= 0: + scan_step = np.unravel_index(scan_step_index, scanparser.spec_scan_shape, order='F') + motor_value = scanparser.spec_scan_motor_vals[motor_i][scan_step[motor_i]] + else: + motor_value = scanparser.spec_scan_motor_vals[motor_i] + else: + motor_value = scanparser.get_spec_positioner_value(spec_mnemonic) + return(motor_value) +@cache +def get_spec_counter_value(spec_file:str, scan_number:int, scan_step_index:int, spec_column_label:str): + """Return the value recorded for a SPEC counter at a specific scan step. + + :param spec_file: Location of a SPEC file in which the requested scan step occurs. + :type spec_scans: str + :param scan_number: The number of the scan in which the requested scan step occurs. + :type scan_number: int + :param scan_step_index: The index of the requested scan step. + :type scan_step_index: int + :param spec_column_label: The label of a SPEC data column. + :type spec_column_label: str + :return: The value of the counter at the scan step requested + :rtype: float + """ + scanparser = get_scanparser(spec_file, scan_number) + if scan_step_index >= 0: + return(scanparser.spec_scan_data[spec_column_label][scan_step_index]) + else: + return(scanparser.spec_scan_data[spec_column_label]) +@cache +def get_smb_par_value(spec_file:str, scan_number:int, par_name:str): + """Return the value recorded for a specific scan in SMB-tyle .par file. + + :param spec_file: Location of a SPEC file in which the requested scan step occurs. + :type spec_scans: str + :param scan_number: The number of the scan in which the requested scan step occurs. + :type scan_number: int + :param par_name: The name of the column in the .par file + :type par_name: str + :return: The value of the .par file value for the scan requested. + :rtype: float + """ + scanparser = get_scanparser(spec_file, scan_number) + return(scanparser.pars[par_name]) +def validate_data_source_for_map_config(data_source, values): + import_scanparser(values.get('station'), values.get('experiment_type')) + data_source.validate_for_station(values.get('station')) + data_source.validate_for_spec_scans(values.get('spec_scans')) + return(data_source) + +class CorrectionsData(PointByPointScanData): + """Class representing the special instances of `PointByPointScanData` that + are used by certain kinds of `CorrectionConfig` tools. + + :ivar label: One of the reserved values required by `CorrectionConfig`, + `'presample_intensity'`, `'postsample_intensity'`, or + `'dwell_time_actual'`. + :type label: Literal['presample_intensity','postsample_intensity','dwell_time_actual'] + :ivar units: The units in which the data were recorded. + :type units: str + :ivar data_type: Represents how these data were recorded at time of data + collection. + :type data_type: Literal['scan_column', 'smb_par'] + :ivar name: Represents the name with which these raw data were recorded at + time of data collection. + :type name: str + """ + label: Literal['presample_intensity','postsample_intensity','dwell_time_actual'] + data_type: Literal['scan_column','smb_par'] + @classmethod + def reserved_labels(cls): + """Return a list of all the labels reserved for corrections-related + scalar data. + + :return: A list of reserved labels + :rtype: list[str] + """ + return(list(cls.__fields__['label'].type_.__args__)) +class PresampleIntensity(CorrectionsData): + """Class representing a source of raw data for the intensity of the beam that + is incident on the sample. + + :ivar label: Must be `"presample_intensity"` + :type label: Literal["presample_intensity"] + :ivar units: Must be `"counts"` + :type units: Literal["counts"] + :ivar data_type: Represents how these data were recorded at time of data + collection. + :type data_type: Literal['scan_column', 'smb_par'] + :ivar name: Represents the name with which these raw data were recorded at + time of data collection. + :type name: str + """ + label: Literal['presample_intensity'] = 'presample_intensity' + units: Literal['counts'] = 'counts' +class PostsampleIntensity(CorrectionsData): + """Class representing a source of raw data for the intensity of the beam that + has passed through the sample. + + :ivar label: Must be `"postsample_intensity"` + :type label: Literal["postsample_intensity"] + :ivar units: Must be `"counts"` + :type units: Literal["counts"] + :ivar data_type: Represents how these data were recorded at time of data + collection. + :type data_type: Literal['scan_column', 'smb_par'] + :ivar name: Represents the name with which these raw data were recorded at + time of data collection. + :type name: str + """ + label: Literal['postsample_intensity'] = 'postsample_intensity' + units: Literal['counts'] = 'counts' +class DwellTimeActual(CorrectionsData): + """Class representing a source of raw data for the actual dwell time at each + scan point in SPEC (with some scan types, this value can vary slightly + point-to-point from the dwell time specified in the command). + + :ivar label: Must be `"dwell_time_actual"` + :type label: Literal["dwell_time_actual"] + :ivar units: Must be `"counts"` + :type units: Literal["counts"] + :ivar data_type: Represents how these data were recorded at time of data + collection. + :type data_type: Literal['scan_column', 'smb_par'] + :ivar name: Represents the name with which these raw data were recorded at + time of data collection. + :type name: str + """ + label: Literal['dwell_time_actual'] = 'dwell_time_actual' + units: Literal['s'] = 's' + +class MapConfig(BaseModel): + """Class representing an experiment consisting of one or more SPEC scans. + + :ivar title: The title for the map configuration. + :type title: str + :ivar station: The name of the station at which the map was collected. + :type station: Literal['id1a3','id3a','id3b'] + :ivar spec_scans: A list of the spec scans that compose the map. + :type spec_scans: list[SpecScans] + :ivar independent_dimensions: A list of the sources of data representing the + raw values of each independent dimension of the map. + :type independent_dimensions: list[PointByPointScanData] + :ivar presample_intensity: A source of point-by-point presample beam + intensity data. Required when applying a CorrectionConfig tool. + :type presample_intensity: Optional[PresampleIntensity] + :ivar dwell_time_actual: A source of point-by-point actual dwell times for + spec scans. Required when applying a CorrectionConfig tool. + :type dwell_time_actual: Optional[DwellTimeActual] + :ivar presample_intensity: A source of point-by-point postsample beam + intensity data. Required when applying a CorrectionConfig tool with + `correction_type="flux_absorption"` or + `correction_type="flux_absorption_background"`. + :type presample_intensity: Optional[PresampleIntensity] + :ivar scalar_data: A list of the sources of data representing other scalar + raw data values collected at each point ion the map. In the NeXus file + representation of the map, datasets for these values will be included. + :type scalar_values: Optional[list[PointByPointScanData]] + """ + title: constr(strip_whitespace=True, min_length=1) + station: Literal['id1a3','id3a','id3b'] + experiment_type: Literal['SAXSWAXS', 'EDD', 'XRF'] + sample: Sample + spec_scans: conlist(item_type=SpecScans, min_items=1) + independent_dimensions: conlist(item_type=PointByPointScanData, min_items=1) + presample_intensity: Optional[PresampleIntensity] + dwell_time_actual: Optional[DwellTimeActual] + postsample_intensity: Optional[PostsampleIntensity] + scalar_data: Optional[list[PointByPointScanData]] = [] + _coords: dict = PrivateAttr() + _validate_independent_dimensions = validator('independent_dimensions', each_item=True, allow_reuse=True)(validate_data_source_for_map_config) + _validate_presample_intensity = validator('presample_intensity', allow_reuse=True)(validate_data_source_for_map_config) + _validate_dwell_time_actual = validator('dwell_time_actual', allow_reuse=True)(validate_data_source_for_map_config) + _validate_postsample_intensity = validator('postsample_intensity', allow_reuse=True)(validate_data_source_for_map_config) + _validate_scalar_data = validator('scalar_data', each_item=True, allow_reuse=True)(validate_data_source_for_map_config) + @validator('experiment_type') + def validate_experiment_type(cls, value, values): + '''Ensure values for the station and experiment_type fields are compatible''' + station = values.get('station') + if station == 'id1a3': + allowed_experiment_types = ['SAXSWAXS', 'EDD'] + elif station == 'id3a': + allowed_experiment_types = ['EDD'] + elif station == 'id3b': + allowed_experiment_types = ['SAXSWAXS', 'XRF'] + else: + allowed_experiment_types = [] + if value not in allowed_experiment_types: + raise(ValueError(f'For station {station}, allowed experiment types are {allowed_experiment_types} (suuplied experiment type {value} is not allowed)')) + return(value) + @property + def coords(self): + """Return a dictionary of the values of each independent dimension across + the map. + + :returns: A dictionary ofthe map's coordinate values. + :rtype: dict[str,list[float]] + """ + try: + return(self._coords) + except: + coords = {} + for independent_dimension in self.independent_dimensions: + coords[independent_dimension.label] = [] + for scans in self.spec_scans: + for scan_number in scans.scan_numbers: + scanparser = scans.get_scanparser(scan_number) + for scan_step_index in range(scanparser.spec_scan_npts): + coords[independent_dimension.label].append(independent_dimension.get_value(scans, scan_number, scan_step_index)) + coords[independent_dimension.label] = np.unique(coords[independent_dimension.label]) + self._coords = coords + return(self._coords) + @property + def dims(self): + """Return a tuple of the independent dimension labels for the map.""" + return([point_by_point_scan_data.label for point_by_point_scan_data in self.independent_dimensions[::-1]]) + @property + def shape(self): + """Return the shape of the map -- a tuple representing the number of + unique values of each dimension across the map. + """ + return(tuple([len(values) for key,values in self.coords.items()][::-1])) + @property + def all_scalar_data(self): + """Return a list of all instances of `PointByPointScanData` for which + this map configuration will collect dataset-like data (as opposed to + axes-like data). + + This will be any and all of the items in the corrections-data-related + fields, as well as any additional items in the optional `scalar_data` + field.""" + return([getattr(self,l,None) for l in CorrectionsData.reserved_labels() if getattr(self,l,None) is not None] + self.scalar_data) + +def import_scanparser(station, experiment_type): + if station.lower() in ('id1a3', 'id3a'): + if experiment_type == 'SAXSWAXS': + from msnctools.scanparsers import SMBLinearScanParser + globals()['ScanParser'] = SMBLinearScanParser + elif experiment_type == 'EDD': + from msnctools.scanparsers import SMBMCAScanParser + globals()['ScanParser'] = SMBMCAScanParser + else: + raise(ValueError(f'Invalid experiment_type: {experiment_type}')) + elif station.lower() == 'id3b': + if experiment_type == 'SAXSWAXS': + from msnctools.scanparsers import FMBSAXSWAXSScanParser + globals()['ScanParser'] = FMBSAXSWAXSScanParser + elif experiment_type == 'XRF': + from msnctools.scanparsers import FMBXRFScanParser + globals()['ScanParser'] = FMBXRFScanParser + else: + raise(ValueError(f'Invalid experiment_type: {experiment_type}')) + else: + raise(ValueError(f'Invalid station: {station}'))
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/build/lib/CHAP/models/workflow.py Tue Mar 28 15:07:30 2023 +0000 @@ -0,0 +1,48 @@ +#!/usr/bin/env python +#-*- coding: utf-8 -*- +#pylint: disable= +""" +File : workflow.py +Author : Valentin Kuznetsov <vkuznet AT gmail dot com> +Description: Workflow module +""" + +# system modules +from basemodel import BaseModel + + +class Workflow(BaseModel): + """ + Workflow docstring + """ + def __init__(self, filename=None, **kwds): + super().__init__(filename, **kwds) + self.map['workflow'] = __name__ + print('create Workflow calls: ', end='') + + +class EDDWorkflow(Workflow): + """ + EDDWorkflow + """ + def __init__(self, filename=None, **kwds): + super().__init__(filename, **kwds) + self.map['workflow'] = 'edd' + print('create EDDWorkflow') + +class SAXWWorkflow(Workflow): + """ + SAXWWorkflow + """ + def __init__(self, filename=None, **kwds): + super().__init__(filename, **kwds) + self.map['workflow'] = 'saxw' + print('create SAXWWorkflow') + +if __name__ == '__main__': + print('--- create EDDWorkflow from config') + wflow = EDDWorkflow() + print('map', wflow.map) + print('--- create SAXWWorkflow from file.txt') + wflow = SAXWWorkflow('file.txt') + print('map', wflow.map)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/build/lib/CHAP/pipeline.py Tue Mar 28 15:07:30 2023 +0000 @@ -0,0 +1,84 @@ +#!/usr/bin/env python +#-*- coding: utf-8 -*- +#pylint: disable= +""" +File : pipeline.py +Author : Valentin Kuznetsov <vkuznet AT gmail dot com> +Description: +""" + +# system modules +import logging +from time import time + +class Pipeline(): + """ + Pipeline represent generic Pipeline class + """ + def __init__(self, items=None, kwds=None): + """ + Pipeline class constructor + + :param items: list of objects + :param kwds: list of method args for individual objects + """ + self.__name__ = self.__class__.__name__ + + self.items = items + self.kwds = kwds + + self.logger = logging.getLogger(self.__name__) + self.logger.propagate = False + + def execute(self): + """ + execute API + """ + + t0 = time() + self.logger.info(f'Executing "execute"\n') + + data = None + for item, kwargs in zip(self.items, self.kwds): + if hasattr(item, 'read'): + self.logger.info(f'Calling "read" on {item}') + data = item.read(**kwargs) + if hasattr(item, 'process'): + self.logger.info(f'Calling "process" on {item}') + data = item.process(data, **kwargs) + if hasattr(item, 'write'): + self.logger.info(f'Calling "write" on {item}') + data = item.write(data, **kwargs) + + self.logger.info(f'Exectuted "exectute" in {time()-t0:.3f} seconds') + +class PipelineObject(): + """ + PipelineObject represent generic Pipeline class + """ + def __init__(self, reader, writer, processor, fitter): + """ + PipelineObject class constructor + """ + self.reader = reader + self.writer = writer + self.processor = processor + + def read(self, filename): + """ + read object API + """ + return self.reader.read(filename) + + def write(self, data, filename): + """ + write object API + """ + return self.writer.write(data, filename) + + def process(self, data): + """ + process object API + """ + return self.processor.process(data) +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/build/lib/CHAP/processor.py Tue Mar 28 15:07:30 2023 +0000 @@ -0,0 +1,948 @@ +#!/usr/bin/env python +#-*- coding: utf-8 -*- +#pylint: disable= +""" +File : processor.py +Author : Valentin Kuznetsov <vkuznet AT gmail dot com> +Description: Processor module +""" + +# system modules +import argparse +import json +import logging +import sys +from time import time + +# local modules +# from pipeline import PipelineObject + +class Processor(): + """ + Processor represent generic processor + """ + def __init__(self): + """ + Processor constructor + """ + self.__name__ = self.__class__.__name__ + self.logger = logging.getLogger(self.__name__) + self.logger.propagate = False + + def process(self, data): + """ + process data API + """ + + t0 = time() + self.logger.info(f'Executing "process" with type(data)={type(data)}') + + data = self._process(data) + + self.logger.info(f'Finished "process" in {time()-t0:.3f} seconds\n') + + return(data) + + def _process(self, data): + # If needed, extract data from a returned value of Reader.read + if isinstance(data, list): + if all([isinstance(d,dict) for d in data]): + data = data[0]['data'] + # process operation is a simple print function + data += "process part\n" + # and we return data back to pipeline + return data + + +class TFaaSImageProcessor(Processor): + ''' + A Processor to get predictions from TFaaS inference server. + ''' + def process(self, data, url, model, verbose=False): + """ + process data API + """ + + t0 = time() + self.logger.info(f'Executing "process" with url {url} model {model}') + + data = self._process(data, url, model, verbose) + + self.logger.info(f'Finished "process" in {time()-t0:.3f} seconds\n') + + return(data) + + def _process(self, data, url, model, verbose): + '''Print and return the input data. + + :param data: Input image data, either file name or actual image data + :type data: object + :return: `data` + :rtype: object + ''' + from MLaaS.tfaas_client import predictImage + from pathlib import Path + self.logger.info(f"input data {type(data)}") + if isinstance(data, str) and Path(data).is_file(): + imgFile = data + data = predictImage(url, imgFile, model, verbose) + else: + rdict = data[0] + import requests + img = rdict['data'] + session = requests.Session() + rurl = url + '/predict/image' + payload = dict(model=model) + files = dict(image=img) + self.logger.info(f"HTTP request {rurl} with image file and {payload} payload") + req = session.post(rurl, files=files, data=payload ) + data = req.content + data = data.decode("utf-8").replace('\n', '') + self.logger.info(f"HTTP response {data}") + + return(data) + +class URLResponseProcessor(Processor): + def _process(self, data): + '''Take data returned from URLReader.read and return a decoded version of + the content. + + :param data: input data (output of URLReader.read) + :type data: list[dict] + :return: decoded data contents + :rtype: object + ''' + + data = data[0] + + content = data['data'] + encoding = data['encoding'] + + self.logger.debug(f'Decoding content of type {type(content)} with {encoding}') + + try: + content = content.decode(encoding) + except: + self.logger.warning(f'Failed to decode content of type {type(content)} with {encoding}') + + return(content) + +class PrintProcessor(Processor): + '''A Processor to simply print the input data to stdout and return the + original input data, unchanged in any way. + ''' + + def _process(self, data): + '''Print and return the input data. + + :param data: Input data + :type data: object + :return: `data` + :rtype: object + ''' + + print(f'{self.__name__} data :') + + if callable(getattr(data, '_str_tree', None)): + # If data is likely an NXobject, print its tree representation + # (since NXobjects' str representations are just their nxname -- not + # very helpful). + print(data._str_tree(attrs=True, recursive=True)) + else: + print(str(data)) + + return(data) + +class NexusToNumpyProcessor(Processor): + '''A class to convert the default plottable data in an `NXobject` into an + `numpy.ndarray`. + ''' + + def _process(self, data): + '''Return the default plottable data signal in `data` as an + `numpy.ndarray`. + + :param data: input NeXus structure + :type data: nexusformat.nexus.tree.NXobject + :raises ValueError: if `data` has no default plottable data signal + :return: default plottable data signal in `data` + :rtype: numpy.ndarray + ''' + + default_data = data.plottable_data + + if default_data is None: + default_data_path = data.attrs['default'] + default_data = data.get(default_data_path) + if default_data is None: + raise(ValueError(f'The structure of {data} contains no default data')) + + default_signal = default_data.attrs.get('signal') + if default_signal is None: + raise(ValueError(f'The signal of {default_data} is unknown')) + default_signal = default_signal.nxdata + + np_data = default_data[default_signal].nxdata + + return(np_data) + +class NexusToXarrayProcessor(Processor): + '''A class to convert the default plottable data in an `NXobject` into an + `xarray.DataArray`.''' + + def _process(self, data): + '''Return the default plottable data signal in `data` as an + `xarray.DataArray`. + + :param data: input NeXus structure + :type data: nexusformat.nexus.tree.NXobject + :raises ValueError: if metadata for `xarray` is absen from `data` + :return: default plottable data signal in `data` + :rtype: xarray.DataArray + ''' + + from xarray import DataArray + + default_data = data.plottable_data + + if default_data is None: + default_data_path = data.attrs['default'] + default_data = data.get(default_data_path) + if default_data is None: + raise(ValueError(f'The structure of {data} contains no default data')) + + default_signal = default_data.attrs.get('signal') + if default_signal is None: + raise(ValueError(f'The signal of {default_data} is unknown')) + default_signal = default_signal.nxdata + + signal_data = default_data[default_signal].nxdata + + axes = default_data.attrs['axes'] + coords = {} + for axis_name in axes: + axis = default_data[axis_name] + coords[axis_name] = (axis_name, + axis.nxdata, + axis.attrs) + + dims = tuple(axes) + + name = default_signal + + attrs = default_data[default_signal].attrs + + return(DataArray(data=signal_data, + coords=coords, + dims=dims, + name=name, + attrs=attrs)) + +class XarrayToNexusProcessor(Processor): + '''A class to convert the data in an `xarray` structure to an + `nexusformat.nexus.NXdata`. + ''' + + def _process(self, data): + '''Return `data` represented as an `nexusformat.nexus.NXdata`. + + :param data: The input `xarray` structure + :type data: typing.Union[xarray.DataArray, xarray.Dataset] + :return: The data and metadata in `data` + :rtype: nexusformat.nexus.NXdata + ''' + + from nexusformat.nexus import NXdata, NXfield + + signal = NXfield(value=data.data, name=data.name, attrs=data.attrs) + + axes = [] + for name, coord in data.coords.items(): + axes.append(NXfield(value=coord.data, name=name, attrs=coord.attrs)) + axes = tuple(axes) + + return(NXdata(signal=signal, axes=axes)) + +class XarrayToNumpyProcessor(Processor): + '''A class to convert the data in an `xarray.DataArray` structure to an + `numpy.ndarray`. + ''' + + def _process(self, data): + '''Return just the signal values contained in `data`. + + :param data: The input `xarray.DataArray` + :type data: xarray.DataArray + :return: The data in `data` + :rtype: numpy.ndarray + ''' + + return(data.data) + +class MapProcessor(Processor): + '''Class representing a process that takes a map configuration and returns a + `nexusformat.nexus.NXentry` representing that map's metadata and any + scalar-valued raw data requseted by the supplied map configuration. + ''' + + def _process(self, data): + '''Process the output of a `Reader` that contains a map configuration and + return a `nexusformat.nexus.NXentry` representing the map. + + :param data: Result of `Reader.read` where at least one item has the + value `'MapConfig'` for the `'schema'` key. + :type data: list[dict[str,object]] + :return: Map data & metadata (SPEC only, no detector) + :rtype: nexusformat.nexus.NXentry + ''' + + map_config = self.get_map_config(data) + nxentry = self.__class__.get_nxentry(map_config) + + return(nxentry) + + def get_map_config(self, data): + '''Get an instance of `MapConfig` from a returned value of `Reader.read` + + :param data: Result of `Reader.read` where at least one item has the + value `'MapConfig'` for the `'schema'` key. + :type data: list[dict[str,object]] + :raises Exception: If a valid `MapConfig` cannot be constructed from `data`. + :return: a valid instance of `MapConfig` with field values taken from `data`. + :rtype: MapConfig + ''' + + from CHAP.models.map import MapConfig + + map_config = False + if isinstance(data, list): + for item in data: + if isinstance(item, dict): + if item.get('schema') == 'MapConfig': + map_config = item.get('data') + break + + if not map_config: + raise(ValueError('No map configuration found')) + + return(MapConfig(**map_config)) + + @staticmethod + def get_nxentry(map_config): + '''Use a `MapConfig` to construct a `nexusformat.nexus.NXentry` + + :param map_config: a valid map configuration + :type map_config: MapConfig + :return: the map's data and metadata contained in a NeXus structure + :rtype: nexusformat.nexus.NXentry + ''' + + from nexusformat.nexus import (NXcollection, + NXdata, + NXentry, + NXfield, + NXsample) + import numpy as np + + nxentry = NXentry(name=map_config.title) + + nxentry.map_config = json.dumps(map_config.dict()) + + nxentry[map_config.sample.name] = NXsample(**map_config.sample.dict()) + + nxentry.attrs['station'] = map_config.station + + nxentry.spec_scans = NXcollection() + for scans in map_config.spec_scans: + nxentry.spec_scans[scans.scanparsers[0].scan_name] = \ + NXfield(value=scans.scan_numbers, + dtype='int8', + attrs={'spec_file':str(scans.spec_file)}) + + nxentry.data = NXdata() + nxentry.data.attrs['axes'] = map_config.dims + for i,dim in enumerate(map_config.independent_dimensions[::-1]): + nxentry.data[dim.label] = NXfield(value=map_config.coords[dim.label], + units=dim.units, + attrs={'long_name': f'{dim.label} ({dim.units})', + 'data_type': dim.data_type, + 'local_name': dim.name}) + nxentry.data.attrs[f'{dim.label}_indices'] = i + + signal = False + auxilliary_signals = [] + for data in map_config.all_scalar_data: + nxentry.data[data.label] = NXfield(value=np.empty(map_config.shape), + units=data.units, + attrs={'long_name': f'{data.label} ({data.units})', + 'data_type': data.data_type, + 'local_name': data.name}) + if not signal: + signal = data.label + else: + auxilliary_signals.append(data.label) + + if signal: + nxentry.data.attrs['signal'] = signal + nxentry.data.attrs['auxilliary_signals'] = auxilliary_signals + + for scans in map_config.spec_scans: + for scan_number in scans.scan_numbers: + scanparser = scans.get_scanparser(scan_number) + for scan_step_index in range(scanparser.spec_scan_npts): + map_index = scans.get_index(scan_number, scan_step_index, map_config) + for data in map_config.all_scalar_data: + nxentry.data[data.label][map_index] = data.get_value(scans, scan_number, scan_step_index) + + return(nxentry) + +class IntegrationProcessor(Processor): + '''Class for integrating 2D detector data + ''' + + def _process(self, data): + '''Integrate the input data with the integration method and keyword + arguments supplied and return the results. + + :param data: input data, including raw data, integration method, and + keyword args for the integration method. + :type data: tuple[typing.Union[numpy.ndarray, list[numpy.ndarray]], + callable, + dict] + :param integration_method: the method of a + `pyFAI.azimuthalIntegrator.AzimuthalIntegrator` or + `pyFAI.multi_geometry.MultiGeometry` that returns the desired + integration results. + :return: integrated raw data + :rtype: pyFAI.containers.IntegrateResult + ''' + + detector_data, integration_method, integration_kwargs = data + + return(integration_method(detector_data, **integration_kwargs)) + +class IntegrateMapProcessor(Processor): + '''Class representing a process that takes a map and integration + configuration and returns a `nexusformat.nexus.NXprocess` containing a map of + the integrated detector data requested. + ''' + + def _process(self, data): + '''Process the output of a `Reader` that contains a map and integration + configuration and return a `nexusformat.nexus.NXprocess` containing a map + of the integrated detector data requested + + :param data: Result of `Reader.read` where at least one item has the + value `'MapConfig'` for the `'schema'` key, and at least one item has + the value `'IntegrationConfig'` for the `'schema'` key. + :type data: list[dict[str,object]] + :return: integrated data and process metadata + :rtype: nexusformat.nexus.NXprocess + ''' + + map_config, integration_config = self.get_configs(data) + nxprocess = self.get_nxprocess(map_config, integration_config) + + return(nxprocess) + + def get_configs(self, data): + '''Return valid instances of `MapConfig` and `IntegrationConfig` from the + input supplied by `MultipleReader`. + + :param data: Result of `Reader.read` where at least one item has the + value `'MapConfig'` for the `'schema'` key, and at least one item has + the value `'IntegrationConfig'` for the `'schema'` key. + :type data: list[dict[str,object]] + :raises ValueError: if `data` cannot be parsed into map and integration configurations. + :return: valid map and integration configuration objects. + :rtype: tuple[MapConfig, IntegrationConfig] + ''' + + self.logger.debug('Getting configuration objects') + t0 = time() + + from CHAP.models.map import MapConfig + from CHAP.models.integration import IntegrationConfig + + map_config = False + integration_config = False + if isinstance(data, list): + for item in data: + if isinstance(item, dict): + schema = item.get('schema') + if schema == 'MapConfig': + map_config = item.get('data') + elif schema == 'IntegrationConfig': + integration_config = item.get('data') + + if not map_config: + raise(ValueError('No map configuration found')) + if not integration_config: + raise(ValueError('No integration configuration found')) + + map_config = MapConfig(**map_config) + integration_config = IntegrationConfig(**integration_config) + + self.logger.debug(f'Got configuration objects in {time()-t0:.3f} seconds') + + return(map_config, integration_config) + + def get_nxprocess(self, map_config, integration_config): + '''Use a `MapConfig` and `IntegrationConfig` to construct a + `nexusformat.nexus.NXprocess` + + :param map_config: a valid map configuration + :type map_config: MapConfig + :param integration_config: a valid integration configuration + :type integration_config" IntegrationConfig + :return: the integrated detector data and metadata contained in a NeXus + structure + :rtype: nexusformat.nexus.NXprocess + ''' + + self.logger.debug('Constructing NXprocess') + t0 = time() + + from nexusformat.nexus import (NXdata, + NXdetector, + NXfield, + NXprocess) + import numpy as np + import pyFAI + + nxprocess = NXprocess(name=integration_config.title) + + nxprocess.map_config = json.dumps(map_config.dict()) + nxprocess.integration_config = json.dumps(integration_config.dict()) + + nxprocess.program = 'pyFAI' + nxprocess.version = pyFAI.version + + for k,v in integration_config.dict().items(): + if k == 'detectors': + continue + nxprocess.attrs[k] = v + + for detector in integration_config.detectors: + nxprocess[detector.prefix] = NXdetector() + nxprocess[detector.prefix].local_name = detector.prefix + nxprocess[detector.prefix].distance = detector.azimuthal_integrator.dist + nxprocess[detector.prefix].distance.attrs['units'] = 'm' + nxprocess[detector.prefix].calibration_wavelength = detector.azimuthal_integrator.wavelength + nxprocess[detector.prefix].calibration_wavelength.attrs['units'] = 'm' + nxprocess[detector.prefix].attrs['poni_file'] = str(detector.poni_file) + nxprocess[detector.prefix].attrs['mask_file'] = str(detector.mask_file) + nxprocess[detector.prefix].raw_data_files = np.full(map_config.shape, '', dtype='|S256') + + nxprocess.data = NXdata() + + nxprocess.data.attrs['axes'] = (*map_config.dims, *integration_config.integrated_data_dims) + for i,dim in enumerate(map_config.independent_dimensions[::-1]): + nxprocess.data[dim.label] = NXfield(value=map_config.coords[dim.label], + units=dim.units, + attrs={'long_name': f'{dim.label} ({dim.units})', + 'data_type': dim.data_type, + 'local_name': dim.name}) + nxprocess.data.attrs[f'{dim.label}_indices'] = i + + for i,(coord_name,coord_values) in enumerate(integration_config.integrated_data_coordinates.items()): + if coord_name == 'radial': + type_ = pyFAI.units.RADIAL_UNITS + elif coord_name == 'azimuthal': + type_ = pyFAI.units.AZIMUTHAL_UNITS + coord_units = pyFAI.units.to_unit(getattr(integration_config, f'{coord_name}_units'), type_=type_) + nxprocess.data[coord_units.name] = coord_values + nxprocess.data.attrs[f'{coord_units.name}_indices'] = i+len(map_config.coords) + nxprocess.data[coord_units.name].units = coord_units.unit_symbol + nxprocess.data[coord_units.name].attrs['long_name'] = coord_units.label + + nxprocess.data.attrs['signal'] = 'I' + nxprocess.data.I = NXfield(value=np.empty((*tuple([len(coord_values) for coord_name,coord_values in map_config.coords.items()][::-1]), *integration_config.integrated_data_shape)), + units='a.u', + attrs={'long_name':'Intensity (a.u)'}) + + integrator = integration_config.get_multi_geometry_integrator() + if integration_config.integration_type == 'azimuthal': + integration_method = integrator.integrate1d + integration_kwargs = { + 'lst_mask': [detector.mask_array for detector in integration_config.detectors], + 'npt': integration_config.radial_npt + } + elif integration_config.integration_type == 'cake': + integration_method = integrator.integrate2d + integration_kwargs = { + 'lst_mask': [detector.mask_array for detector in integration_config.detectors], + 'npt_rad': integration_config.radial_npt, + 'npt_azim': integration_config.azimuthal_npt, + 'method': 'bbox' + } + + integration_processor = IntegrationProcessor() + integration_processor.logger.setLevel(self.logger.getEffectiveLevel()) + integration_processor.logger.addHandler(self.logger.handlers[0]) + lst_args = [] + for scans in map_config.spec_scans: + for scan_number in scans.scan_numbers: + scanparser = scans.get_scanparser(scan_number) + for scan_step_index in range(scanparser.spec_scan_npts): + map_index = scans.get_index(scan_number, scan_step_index, map_config) + detector_data = scans.get_detector_data(integration_config.detectors, scan_number, scan_step_index) + result = integration_processor.process((detector_data, integration_method, integration_kwargs)) + nxprocess.data.I[map_index] = result.intensity + for detector in integration_config.detectors: + nxprocess[detector.prefix].raw_data_files[map_index] = scanparser.get_detector_data_file(detector.prefix, scan_step_index) + + self.logger.debug(f'Constructed NXprocess in {time()-t0:.3f} seconds') + + return(nxprocess) + +class MCACeriaCalibrationProcessor(Processor): + '''Class representing the procedure to use a CeO2 scan to obtain tuned values + for the bragg diffraction angle and linear correction parameters for MCA + channel energies for an EDD experimental setup. + ''' + + def _process(self, data): + '''Return tuned values for 2&theta and linear correction parameters for + the MCA channel energies. + + :param data: input configuration for the raw data & tuning procedure + :type data: list[dict[str,object]] + :return: original configuration dictionary with tuned values added + :rtype: dict[str,float] + ''' + + calibration_config = self.get_config(data) + + tth, slope, intercept = self.calibrate(calibration_config) + + calibration_config.tth_calibrated = tth + calibration_config.slope_calibrated = slope + calibration_config.intercept_calibrated = intercept + + return(calibration_config.dict()) + + def get_config(self, data): + '''Get an instance of the configuration object needed by this + `Processor` from a returned value of `Reader.read` + + :param data: Result of `Reader.read` where at least one item has the + value `'MCACeriaCalibrationConfig'` for the `'schema'` key. + :type data: list[dict[str,object]] + :raises Exception: If a valid config object cannot be constructed from `data`. + :return: a valid instance of a configuration object with field values + taken from `data`. + :rtype: MCACeriaCalibrationConfig + ''' + + from CHAP.models.edd import MCACeriaCalibrationConfig + + calibration_config = False + if isinstance(data, list): + for item in data: + if isinstance(item, dict): + if item.get('schema') == 'MCACeriaCalibrationConfig': + calibration_config = item.get('data') + break + + if not calibration_config: + raise(ValueError('No MCA ceria calibration configuration found in input data')) + + return(MCACeriaCalibrationConfig(**calibration_config)) + + def calibrate(self, calibration_config): + '''Iteratively calibrate 2&theta by fitting selected peaks of an MCA + spectrum until the computed strain is sufficiently small. Use the fitted + peak locations to determine linear correction parameters for the MCA's + channel energies. + + :param calibration_config: object configuring the CeO2 calibration procedure + :type calibration_config: MCACeriaCalibrationConfig + :return: calibrated values of 2&theta and linear correction parameters + for MCA channel energies : tth, slope, intercept + :rtype: float, float, float + ''' + + from msnctools.fit import Fit, FitMultipeak + import numpy as np + from scipy.constants import physical_constants + + hc = physical_constants['Planck constant in eV/Hz'][0] * \ + physical_constants['speed of light in vacuum'][0] * \ + 1e7 # We'll work in keV and A, not eV and m. + + # Collect raw MCA data of interest + mca_data = calibration_config.mca_data() + mca_bin_energies = np.arange(0, calibration_config.num_bins) * \ + (calibration_config.max_energy_kev / calibration_config.num_bins) + + # Mask out the corrected MCA data for fitting + mca_mask = calibration_config.mca_mask() + fit_mca_energies = mca_bin_energies[mca_mask] + fit_mca_intensities = mca_data[mca_mask] + + # Correct raw MCA data for variable flux at different energies + flux_correct = calibration_config.flux_correction_interpolation_function() + mca_intensity_weights = flux_correct(fit_mca_energies) + fit_mca_intensities = fit_mca_intensities / mca_intensity_weights + + # Get the HKLs and lattice spacings that will be used for fitting + tth = calibration_config.tth_initial_guess + fit_hkls, fit_ds = calibration_config.fit_ds() + c_1 = fit_hkls[:,0]**2 + fit_hkls[:,1]**2 + fit_hkls[:,2]**2 + + for iter_i in range(calibration_config.max_iter): + + ### Perform the uniform fit first ### + + # Get expected peak energy locations for this iteration's starting + # value of tth + fit_lambda = 2.0 * fit_ds * np.sin(0.5*np.radians(tth)) + fit_E0 = hc / fit_lambda + + # Run the uniform fit + best_fit, residual, best_values, best_errors, redchi, success = \ + FitMultipeak.fit_multipeak(fit_mca_intensities, + fit_E0, + x=fit_mca_energies, + fit_type='uniform') + + # Extract values of interest from the best values for the uniform fit + # parameters + uniform_fit_centers = [best_values[f'peak{i+1}_center'] for i in range(len(calibration_config.fit_hkls))] + # uniform_a = best_values['scale_factor'] + # uniform_strain = np.log(uniform_a / calibration_config.lattice_parameter_angstrom) + # uniform_tth = tth * (1.0 + uniform_strain) + # uniform_rel_rms_error = np.linalg.norm(residual) / np.linalg.norm(fit_mca_intensities) + + ### Next, perform the unconstrained fit ### + + # Use the peak locations found in the uniform fit as the initial + # guesses for peak locations in the unconstrained fit + best_fit, residual, best_values, best_errors, redchi, success = \ + FitMultipeak.fit_multipeak(fit_mca_intensities, + uniform_fit_centers, + x=fit_mca_energies, + fit_type='unconstrained') + + # Extract values of interest from the best values for the + # unconstrained fit parameters + unconstrained_fit_centers = np.array([best_values[f'peak{i+1}_center'] for i in range(len(calibration_config.fit_hkls))]) + unconstrained_a = 0.5 * hc * np.sqrt(c_1) / (unconstrained_fit_centers * abs(np.sin(0.5*np.radians(tth)))) + unconstrained_strains = np.log(unconstrained_a / calibration_config.lattice_parameter_angstrom) + unconstrained_strain = np.mean(unconstrained_strains) + unconstrained_tth = tth * (1.0 + unconstrained_strain) + # unconstrained_rel_rms_error = np.linalg.norm(residual) / np.linalg.norm(fit_mca_intensities) + + + # Update tth for the next iteration of tuning + prev_tth = tth + tth = unconstrained_tth + + # Stop tuning tth at this iteration if differences are small enough + if abs(tth - prev_tth) < calibration_config.tune_tth_tol: + break + + # Fit line to expected / computed peak locations from the last + # unconstrained fit. + fit = Fit.fit_data(fit_E0,'linear', x=unconstrained_fit_centers, nan_policy='omit') + slope = fit.best_values['slope'] + intercept = fit.best_values['intercept'] + + return(float(tth), float(slope), float(intercept)) + +class MCADataProcessor(Processor): + '''Class representing a process to return data from a MCA, restuctured to + incorporate the shape & metadata associated with a map configuration to + which the MCA data belongs, and linearly transformed according to the + results of a ceria calibration. + ''' + + def _process(self, data): + '''Process configurations for a map and MCA detector(s), and return the + raw MCA data collected over the map. + + :param data: input map configuration and results of ceria calibration + :type data: list[dict[str,object]] + :return: calibrated and flux-corrected MCA data + :rtype: nexusformat.nexus.NXentry + ''' + + map_config, calibration_config = self.get_configs(data) + nxroot = self.get_nxroot(map_config, calibration_config) + + return(nxroot) + + def get_configs(self, data): + '''Get instances of the configuration objects needed by this + `Processor` from a returned value of `Reader.read` + + :param data: Result of `Reader.read` where at least one item has the + value `'MapConfig'` for the `'schema'` key, and at least one item has + the value `'MCACeriaCalibrationConfig'` for the `'schema'` key. + :type data: list[dict[str,object]] + :raises Exception: If valid config objects cannot be constructed from `data`. + :return: valid instances of the configuration objects with field values + taken from `data`. + :rtype: tuple[MapConfig, MCACeriaCalibrationConfig] + ''' + + from CHAP.models.map import MapConfig + from CHAP.models.edd import MCACeriaCalibrationConfig + + map_config = False + calibration_config = False + if isinstance(data, list): + for item in data: + if isinstance(item, dict): + schema = item.get('schema') + if schema == 'MapConfig': + map_config = item.get('data') + elif schema == 'MCACeriaCalibrationConfig': + calibration_config = item.get('data') + + if not map_config: + raise(ValueError('No map configuration found in input data')) + if not calibration_config: + raise(ValueError('No MCA ceria calibration configuration found in input data')) + + return(MapConfig(**map_config), MCACeriaCalibrationConfig(**calibration_config)) + + def get_nxroot(self, map_config, calibration_config): + '''Get a map of the MCA data collected by the scans in `map_config`. The + MCA data will be calibrated and flux-corrected according to the + parameters included in `calibration_config`. The data will be returned + along with relevant metadata in the form of a NeXus structure. + + :param map_config: the map configuration + :type map_config: MapConfig + :param calibration_config: the calibration configuration + :type calibration_config: MCACeriaCalibrationConfig + :return: a map of the calibrated and flux-corrected MCA data + :rtype: nexusformat.nexus.NXroot + ''' + + from nexusformat.nexus import (NXdata, + NXdetector, + NXentry, + NXinstrument, + NXroot) + import numpy as np + + nxroot = NXroot() + + nxroot[map_config.title] = MapProcessor.get_nxentry(map_config) + nxentry = nxroot[map_config.title] + + nxentry.instrument = NXinstrument() + nxentry.instrument.detector = NXdetector() + nxentry.instrument.detector.calibration_configuration = json.dumps(calibration_config.dict()) + + nxentry.instrument.detector.data = NXdata() + nxdata = nxentry.instrument.detector.data + nxdata.raw = np.empty((*map_config.shape, calibration_config.num_bins)) + nxdata.raw.attrs['units'] = 'counts' + nxdata.channel_energy = calibration_config.slope_calibrated * \ + np.arange(0, calibration_config.num_bins) * \ + (calibration_config.max_energy_kev / calibration_config.num_bins) + \ + calibration_config.intercept_calibrated + nxdata.channel_energy.attrs['units'] = 'keV' + + for scans in map_config.spec_scans: + for scan_number in scans.scan_numbers: + scanparser = scans.get_scanparser(scan_number) + for scan_step_index in range(scanparser.spec_scan_npts): + map_index = scans.get_index(scan_number, scan_step_index, map_config) + nxdata.raw[map_index] = scanparser.get_detector_data(calibration_config.detector_name, scan_step_index) + + nxentry.data.makelink(nxdata.raw, name=calibration_config.detector_name) + nxentry.data.makelink(nxdata.channel_energy, name=f'{calibration_config.detector_name}_channel_energy') + if isinstance(nxentry.data.attrs['axes'], str): + nxentry.data.attrs['axes'] = [nxentry.data.attrs['axes'], f'{calibration_config.detector_name}_channel_energy'] + else: + nxentry.data.attrs['axes'] += [f'{calibration_config.detector_name}_channel_energy'] + nxentry.data.attrs['signal'] = calibration_config.detector_name + + return(nxroot) + +class StrainAnalysisProcessor(Processor): + '''Class representing a process to compute a map of sample strains by fitting + bragg peaks in 1D detector data and analyzing the difference between measured + peak locations and expected peak locations for the sample measured. + ''' + + def _process(self, data): + '''Process the input map detector data & configuration for the strain + analysis procedure, and return a map of sample strains. + + :param data: results of `MutlipleReader.read` containing input map + detector data and strain analysis configuration + :type data: dict[list[str,object]] + :return: map of sample strains + :rtype: xarray.Dataset + ''' + + strain_analysis_config = self.get_config(data) + + return(data) + + def get_config(self, data): + '''Get instances of the configuration objects needed by this + `Processor` from a returned value of `Reader.read` + + :param data: Result of `Reader.read` where at least one item has the + value `'StrainAnalysisConfig'` for the `'schema'` key. + :type data: list[dict[str,object]] + :raises Exception: If valid config objects cannot be constructed from `data`. + :return: valid instances of the configuration objects with field values + taken from `data`. + :rtype: StrainAnalysisConfig + ''' + + strain_analysis_config = False + if isinstance(data, list): + for item in data: + if isinstance(item, dict): + schema = item.get('schema') + if item.get('schema') == 'StrainAnalysisConfig': + strain_analysis_config = item.get('data') + + if not strain_analysis_config: + raise(ValueError('No strain analysis configuration found in input data')) + + return(strain_analysis_config) + + +class OptionParser(): + '''User based option parser''' + def __init__(self): + self.parser = argparse.ArgumentParser(prog='PROG') + self.parser.add_argument("--data", action="store", + dest="data", default="", help="Input data") + self.parser.add_argument("--processor", action="store", + dest="processor", default="Processor", help="Processor class name") + self.parser.add_argument('--log-level', choices=logging._nameToLevel.keys(), + dest='log_level', default='INFO', help='logging level') + +def main(): + '''Main function''' + optmgr = OptionParser() + opts = optmgr.parser.parse_args() + clsName = opts.processor + try: + processorCls = getattr(sys.modules[__name__],clsName) + except: + print(f'Unsupported processor {clsName}') + sys.exit(1) + + processor = processorCls() + processor.logger.setLevel(getattr(logging, opts.log_level)) + log_handler = logging.StreamHandler() + log_handler.setFormatter(logging.Formatter('{name:20}: {message}', style='{')) + processor.logger.addHandler(log_handler) + data = processor.process(opts.data) + + print(f"Processor {processor} operates on data {data}") + +if __name__ == '__main__': + main()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/build/lib/CHAP/reader.py Tue Mar 28 15:07:30 2023 +0000 @@ -0,0 +1,209 @@ +#!/usr/bin/env python +""" +File : reader.py +Author : Valentin Kuznetsov <vkuznet AT gmail dot com> +Description: generic Reader module +""" + +# system modules +import argparse +import json +import logging +import sys +from time import time + +# local modules +# from pipeline import PipelineObject + +class Reader(): + """ + Reader represent generic file writer + """ + + def __init__(self): + """ + Constructor of Reader class + """ + self.__name__ = self.__class__.__name__ + self.logger = logging.getLogger(self.__name__) + self.logger.propagate = False + + def read(self, type_=None, schema=None, encoding=None, **_read_kwargs): + '''Read API + + Wrapper to read, format, and return the data requested. + + :param type_: the expected type of data read from `filename`, defualts + to `None` + :type type_: type, optional + :param schema: the expected schema of the data read from `filename`, + defaults to `None` + :type schema: str, otional + :param _read_kwargs: keyword arguments to pass to `self._read`, defaults + to `{}` + :type _read_kwargs: dict, optional + :return: list with one item: a dictionary containing the data read from + `filename`, the name of this `Reader`, and the values of `type_` and + `schema`. + :rtype: list[dict[str,object]] + ''' + + t0 = time() + self.logger.info(f'Executing "read" with type={type_}, schema={schema}, kwargs={_read_kwargs}') + + data = [{'name': self.__name__, + 'data': self._read(**_read_kwargs), + 'type': type_, + 'schema': schema, + 'encoding': encoding}] + + self.logger.info(f'Finished "read" in {time()-t0:.3f} seconds\n') + return(data) + + def _read(self, filename): + '''Read and return the data from requested from `filename` + + :param filename: Name of file to read from + :return: specific number of bytes from a file + ''' + + if not filename: + self.logger.warning('No file name is given, will skip read operation') + return None + + with open(filename) as file: + data = file.read() + return(data) + +class MultipleReader(Reader): + def read(self, readers): + '''Return resuts from multiple `Reader`s. + + :param readers: a dictionary where the keys are specific names that are + used by the next item in the `Pipeline`, and the values are `Reader` + configurations. + :type readers: list[dict] + :return: The results of calling `Reader.read(**kwargs)` for each item + configured in `readers`. + :rtype: list[dict[str,object]] + ''' + + t0 = time() + self.logger.info(f'Executing "read" with {len(readers)} Readers') + + data = [] + for reader_config in readers: + reader_name = list(reader_config.keys())[0] + reader_class = getattr(sys.modules[__name__], reader_name) + reader = reader_class() + reader_kwargs = reader_config[reader_name] + + data.extend(reader.read(**reader_kwargs)) + + self.logger.info(f'Finished "read" in {time()-t0:.3f} seconds\n') + + return(data) + +class YAMLReader(Reader): + def _read(self, filename): + '''Return a dictionary from the contents of a yaml file. + + :param filename: name of the YAML file to read from + :return: the contents of `filename` + :rtype: dict + ''' + + import yaml + + with open(filename) as file: + data = yaml.safe_load(file) + return(data) + +class BinaryFileReader(Reader): + def _read(self, filename): + '''Return a content of a given file name + + :param filename: name of the binart file to read from + :return: the content of `filename` + :rtype: binary + ''' + with open(filename, 'rb') as file: + data = file.read() + return(data) + +class NexusReader(Reader): + def _read(self, filename, nxpath='/'): + '''Return the NeXus object stored at `nxpath` in the nexus file + `filename`. + + :param filename: name of the NeXus file to read from + :type filename: str + :param nxpath: path to a specific loaction in the NeXus file to read + from, defaults to `'/'` + :type nxpath: str, optional + :raises nexusformat.nexus.NeXusError: if `filename` is not a NeXus + file or `nxpath` is not in `filename`. + :return: the NeXus structure indicated by `filename` and `nxpath`. + :rtype: nexusformat.nexus.NXobject + ''' + + from nexusformat.nexus import nxload + + nxobject = nxload(filename)[nxpath] + return(nxobject) + +class URLReader(Reader): + def _read(self, url, headers={}): + '''Make an HTTPS request to the provided URL and return the results. + Headers for the request are optional. + + :param url: the URL to read + :type url: str + :param headers: headers to attach to the request, defaults to `{}` + :type headers: dict, optional + :return: the content of the response + :rtype: object + ''' + + import requests + + resp = requests.get(url, headers=headers) + data = resp.content + + self.logger.debug(f'Response content: {data}') + + return(data) + +class OptionParser(): + '''User based option parser''' + def __init__(self): + self.parser = argparse.ArgumentParser(prog='PROG') + self.parser.add_argument("--filename", action="store", + dest="filename", default="", help="Input file") + self.parser.add_argument("--reader", action="store", + dest="reader", default="Reader", help="Reader class name") + self.parser.add_argument('--log-level', choices=logging._nameToLevel.keys(), + dest='log_level', default='INFO', help='logging level') + +def main(): + '''Main function''' + optmgr = OptionParser() + opts = optmgr.parser.parse_args() + clsName = opts.reader + try: + readerCls = getattr(sys.modules[__name__],clsName) + except: + print(f'Unsupported reader {clsName}') + sys.exit(1) + + reader = readerCls() + reader.logger.setLevel(getattr(logging, opts.log_level)) + log_handler = logging.StreamHandler() + log_handler.setFormatter(logging.Formatter('{name:20}: {message}', style='{')) + reader.logger.addHandler(log_handler) + data = reader.read(filename=opts.filename) + + print(f"Reader {reader} reads from {opts.filename}, data {data}") + +if __name__ == '__main__': + main()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/build/lib/CHAP/runner.py Tue Mar 28 15:07:30 2023 +0000 @@ -0,0 +1,82 @@ +#!/usr/bin/env python +#-*- coding: utf-8 -*- +#pylint: disable= +""" +File : runner.py +Author : Valentin Kuznetsov <vkuznet AT gmail dot com> +Description: +""" + +# system modules +import argparse +import logging +import os +import sys +import yaml + +# local modules +from CHAP.pipeline import Pipeline + + +class OptionParser(): + def __init__(self): + "User based option parser" + self.parser = argparse.ArgumentParser(prog='PROG') + self.parser.add_argument("--config", action="store", + dest="config", default="", help="Input configuration file") + self.parser.add_argument('--log-level', choices=logging._nameToLevel.keys(), + dest='log_level', default='INFO', help='logging level') + +def main(): + "Main function" + optmgr = OptionParser() + opts = optmgr.parser.parse_args() + runner(opts) + +def runner(opts): + """ + Main runner function + + :param opts: opts is an instance of argparse.Namespace which contains all input parameters + """ + + logger = logging.getLogger(__name__) + log_level = getattr(logging, opts.log_level.upper()) + logger.setLevel(log_level) + log_handler = logging.StreamHandler() + log_handler.setFormatter(logging.Formatter('{name:20}: {message}', style='{')) + logger.addHandler(log_handler) + + config = {} + with open(opts.config) as file: + config = yaml.safe_load(file) + logger.info(f'Input configuration: {config}\n') + pipeline_config = config.get('pipeline', []) + objects = [] + kwds = [] + for item in pipeline_config: + # load individual object with given name from its module + if isinstance(item, dict): + name = list(item.keys())[0] + kwargs = item[name] + else: + name = item + kwargs = {} + modName, clsName = name.split('.') + module = __import__(f'CHAP.{modName}') + obj = getattr(module, clsName)() + obj.logger.setLevel(log_level) + obj.logger.addHandler(log_handler) + logger.info(f'Loaded {obj}') + objects.append(obj) + kwds.append(kwargs) + pipeline = Pipeline(objects, kwds) + pipeline.logger.setLevel(log_level) + pipeline.logger.addHandler(log_handler) + logger.info(f'Loaded {pipeline} with {len(objects)} items\n') + logger.info(f'Calling "execute" on {pipeline}') + pipeline.execute() + + +if __name__ == '__main__': + main()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/build/lib/CHAP/writer.py Tue Mar 28 15:07:30 2023 +0000 @@ -0,0 +1,243 @@ +#!/usr/bin/env python +""" +File : writer.py +Author : Valentin Kuznetsov <vkuznet AT gmail dot com> +Description: generic Writer module +""" + +# system modules +import argparse +import json +import logging +import os +import sys +from time import time + +# local modules +# from pipeline import PipelineObject + +class Writer(): + """ + Writer represent generic file writer + """ + + def __init__(self): + """ + Constructor of Writer class + """ + self.__name__ = self.__class__.__name__ + self.logger = logging.getLogger(self.__name__) + self.logger.propagate = False + + def write(self, data, filename, **_write_kwargs): + """ + write API + + :param filename: Name of file to write to + :param data: data to write to file + :return: data written to file + """ + + t0 = time() + self.logger.info(f'Executing "write" with filename={filename}, type(data)={type(data)}, kwargs={_write_kwargs}') + + data = self._write(data, filename, **_write_kwargs) + + self.logger.info(f'Finished "write" in {time()-t0:.3f} seconds\n') + + return(data) + + def _write(self, data, filename): + with open(filename, 'a') as file: + file.write(data) + return(data) + +class YAMLWriter(Writer): + def _write(self, data, filename, force_overwrite=False): + '''If `data` is a `dict`, write it to `filename`. + + :param data: the dictionary to write to `filename`. + :type data: dict + :param filename: name of the file to write to. + :type filename: str + :param force_overwrite: flag to allow data in `filename` to be + overwritten if it already exists. + :type force_overwrite: bool + :raises TypeError: if `data` is not a `dict` + :raises RuntimeError: if `filename` already exists and + `force_overwrite` is `False`. + :return: the original input data + :rtype: dict + ''' + + import yaml + + if not isinstance(data, (dict, list)): + raise(TypeError(f'{self.__name__}.write: input data must be a dict or list.')) + + if not force_overwrite: + if os.path.isfile(filename): + raise(RuntimeError(f'{self.__name__}: {filename} already exists.')) + + with open(filename, 'w') as outf: + yaml.dump(data, outf, sort_keys=False) + + return(data) + +class ExtractArchiveWriter(Writer): + def _write(self, data, filename): + '''Take a .tar archive represented as bytes in `data` and write the + extracted archive to files. + + :param data: the archive data + :type data: bytes + :param filename: the name of a directory to which the archive files will + be written + :type filename: str + :return: the original `data` + :rtype: bytes + ''' + + from io import BytesIO + import tarfile + + tar = tarfile.open(fileobj=BytesIO(data)) + tar.extractall(path=filename) + + return(data) + + +class NexusWriter(Writer): + def _write(self, data, filename, force_overwrite=False): + '''Write `data` to a NeXus file + + :param data: the data to write to `filename`. + :param filename: name of the file to write to. + :param force_overwrite: flag to allow data in `filename` to be + overwritten, if it already exists. + :return: the original input data + ''' + + from nexusformat.nexus import NXobject + import xarray as xr + + if isinstance(data, NXobject): + nxstructure = data + + elif isinstance(data, xr.Dataset): + nxstructure = self.get_nxdata_from_dataset(data) + + elif isinstance(data, xr.DataArray): + nxstructure = self.get_nxdata_from_dataarray(data) + + else: + raise(TypeError(f'{self.__name__}.write: unknown data format: {type(data).__name__}')) + + mode = 'w' if force_overwrite else 'w-' + nxstructure.save(filename, mode=mode) + + return(data) + + + def get_nxdata_from_dataset(self, dset): + '''Return an instance of `nexusformat.nexus.NXdata` that represents the + data and metadata attributes contained in `dset`. + + :param dset: the input dataset to represent + :type data: xarray.Dataset + :return: `dset` represented as an instance of `nexusformat.nexus.NXdata` + :rtype: nexusformat.nexus.NXdata + ''' + + from nexusformat.nexus import NXdata, NXfield + + nxdata_args = {'signal':None, 'axes':()} + + for var in dset.data_vars: + data_var = dset[var] + nxfield = NXfield(data_var.data, + name=data_var.name, + attrs=data_var.attrs) + if nxdata_args['signal'] is None: + nxdata_args['signal'] = nxfield + else: + nxdata_args[var] = nxfield + + for coord in dset.coords: + coord_var = dset[coord] + nxfield = NXfield(coord_var.data, + name=coord_var.name, + attrs=coord_var.attrs) + nxdata_args['axes'] = (*nxdata_args['axes'], nxfield) + + nxdata = NXdata(**nxdata_args) + nxdata.attrs['xarray_attrs'] = json.dumps(dset.attrs) + + return(nxdata) + + def get_nxdata_from_dataarray(self, darr): + '''Return an instance of `nexusformat.nexus.NXdata` that represents the + data and metadata attributes contained in `darr`. + + :param darr: the input dataset to represent + :type darr: xarray.DataArray + :return: `darr` represented as an instance of `nexusformat.nexus.NXdata` + :rtype: nexusformat.nexus.NXdata + ''' + + from nexusformat.nexus import NXdata, NXfield + + nxdata_args = {'signal':None, 'axes':()} + + nxdata_args['signal'] = NXfield(darr.data, + name=darr.name, + attrs=darr.attrs) + + + for coord in darr.coords: + coord_var = darr[coord] + nxfield = NXfield(coord_var.data, + name=coord_var.name, + attrs=coord_var.attrs) + nxdata_args['axes'] = (*nxdata_args['axes'], nxfield) + + nxdata = NXdata(**nxdata_args) + nxdata.attrs['xarray_attrs'] = json.dumps(darr.attrs) + + return(nxdata) + + +class OptionParser(): + '''User based option parser''' + def __init__(self): + self.parser = argparse.ArgumentParser(prog='PROG') + self.parser.add_argument("--data", action="store", + dest="data", default="", help="Input data") + self.parser.add_argument("--filename", action="store", + dest="filename", default="", help="Output file") + self.parser.add_argument("--writer", action="store", + dest="writer", default="Writer", help="Writer class name") + self.parser.add_argument('--log-level', choices=logging._nameToLevel.keys(), + dest='log_level', default='INFO', help='logging level') + +def main(): + '''Main function''' + optmgr = OptionParser() + opts = optmgr.parser.parse_args() + clsName = opts.writer + try: + writerCls = getattr(sys.modules[__name__],clsName) + except: + print(f'Unsupported writer {clsName}') + sys.exit(1) + + writer = writerCls() + writer.logger.setLevel(getattr(logging, opts.log_level)) + log_handler = logging.StreamHandler() + log_handler.setFormatter(logging.Formatter('{name:20}: {message}', style='{')) + writer.logger.addHandler(log_handler) + data = writer.write(opts.data, opts.filename) + print(f"Writer {writer} writes to {opts.filename}, data {data}") + +if __name__ == '__main__': + main()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/build/lib/MLaaS/ktrain.py Tue Mar 28 15:07:30 2023 +0000 @@ -0,0 +1,205 @@ +#!/usr/bin/env python +#-*- coding: utf-8 -*- +#pylint: disable= +""" +File : ktrain.py +Author : Valentin Kuznetsov <vkuznet AT gmail dot com> +Description: Keras based ML network to train over MNIST dataset +""" + +# system modules +import os +import sys +import json +import gzip +import pickle +import argparse + +# third-party modules +import numpy as np +import tensorflow as tf +from tensorflow import keras +from tensorflow.keras import layers +from tensorflow.keras import backend as K +from tensorflow.python.tools import saved_model_utils + + +def modelGraph(model_dir): + """ + Provide input/output names used by TF Graph along with graph itself + The code is based on TF saved_model_cli.py script. + """ + input_names = [] + output_names = [] + tag_sets = saved_model_utils.get_saved_model_tag_sets(model_dir) + for tag_set in sorted(tag_sets): + print('%r' % ', '.join(sorted(tag_set))) + meta_graph_def = saved_model_utils.get_meta_graph_def(model_dir, tag_set[0]) + for key in meta_graph_def.signature_def.keys(): + meta = meta_graph_def.signature_def[key] + if hasattr(meta, 'inputs') and hasattr(meta, 'outputs'): + inputs = meta.inputs + outputs = meta.outputs + input_signatures = list(meta.inputs.values()) + input_names = [signature.name for signature in input_signatures] + if len(input_names) > 0: + output_signatures = list(meta.outputs.values()) + output_names = [signature.name for signature in output_signatures] + return input_names, output_names, meta_graph_def + +def readData(fin, num_classes): + """ + Helper function to read MNIST data and provide it to + upstream code, e.g. to the training layer + """ + # Load the data and split it between train and test sets +# (x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data() + f = gzip.open(fin, 'rb') + if sys.version_info < (3,): + mnist_data = pickle.load(f) + else: + mnist_data = pickle.load(f, encoding='bytes') + f.close() + (x_train, y_train), (x_test, y_test) = mnist_data + + # Scale images to the [0, 1] range + x_train = x_train.astype("float32") / 255 + x_test = x_test.astype("float32") / 255 + # Make sure images have shape (28, 28, 1) + x_train = np.expand_dims(x_train, -1) + x_test = np.expand_dims(x_test, -1) + print("x_train shape:", x_train.shape) + print(x_train.shape[0], "train samples") + print(x_test.shape[0], "test samples") + + + # convert class vectors to binary class matrices + y_train = keras.utils.to_categorical(y_train, num_classes) + y_test = keras.utils.to_categorical(y_test, num_classes) + return x_train, y_train, x_test, y_test + + +def train(fin, fout=None, model_name=None, epochs=1, batch_size=128, h5=False): + """ + train function for MNIST + """ + # Model / data parameters + num_classes = 10 + input_shape = (28, 28, 1) + + # create ML model + model = keras.Sequential( + [ + keras.Input(shape=input_shape), + layers.Conv2D(32, kernel_size=(3, 3), activation="relu"), + layers.MaxPooling2D(pool_size=(2, 2)), + layers.Conv2D(64, kernel_size=(3, 3), activation="relu"), + layers.MaxPooling2D(pool_size=(2, 2)), + layers.Flatten(), + layers.Dropout(0.5), + layers.Dense(num_classes, activation="softmax"), + ] + ) + + model.summary() + print("model input", model.input, type(model.input), model.input.__dict__) + print("model output", model.output, type(model.output), model.output.__dict__) + model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"]) + + # train model + x_train, y_train, x_test, y_test = readData(fin, num_classes) + model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_split=0.1) + + # evaluate trained model + score = model.evaluate(x_test, y_test, verbose=0) + print("Test loss:", score[0]) + print("Test accuracy:", score[1]) + print("save model to", fout) + writer(fout, model_name, model, input_shape, h5) + +def writer(fout, model_name, model, input_shape, h5=False): + """ + Writer provide write function for given model + """ + if not fout: + return + model.save(fout) + if h5: + model.save('{}/{}'.format(fout, h5), save_format='h5') + pbModel = '{}/saved_model.pb'.format(fout) + pbtxtModel = '{}/saved_model.pbtxt'.format(fout) + convert(pbModel, pbtxtModel) + + # get meta-data information about our ML model + input_names, output_names, model_graph = modelGraph(model_name) + print("### input", input_names) + print("### output", output_names) + # ML uses (28,28,1) shape, i.e. 28x28 black-white images + # if we'll use color images we'll use shape (28, 28, 3) + img_channels = input_shape[2] # last item represent number of colors + meta = {'name': model_name, + 'model': 'saved_model.pb', + 'labels': 'labels.txt', + 'img_channels': img_channels, + 'input_name': input_names[0].split(':')[0], + 'output_name': output_names[0].split(':')[0], + 'input_node': model.input.name, + 'output_node': model.output.name + } + with open(fout+'/params.json', 'w') as ostream: + ostream.write(json.dumps(meta)) + with open(fout+'/labels.txt', 'w') as ostream: + for i in range(0, 10): + ostream.write(str(i)+'\n') + with open(fout + '/model.graph', 'wb') as ostream: + ostream.write(model_graph.SerializeToString()) + +def convert(fin, fout): + """ + convert input model.pb into output model.pbtxt + Based on internet search: + - https://www.tensorflow.org/guide/saved_model + - https://www.programcreek.com/python/example/123317/tensorflow.core.protobuf.saved_model_pb2.SavedModel + """ + import google.protobuf + from tensorflow.core.protobuf import saved_model_pb2 + import tensorflow as tf + + saved_model = saved_model_pb2.SavedModel() + + with open(fin, 'rb') as f: + saved_model.ParseFromString(f.read()) + + with open(fout, 'w') as f: + f.write(google.protobuf.text_format.MessageToString(saved_model)) + + +class OptionParser(): + def __init__(self): + "User based option parser" + self.parser = argparse.ArgumentParser(prog='PROG') + self.parser.add_argument("--fin", action="store", + dest="fin", default="", help="Input MNIST file") + self.parser.add_argument("--fout", action="store", + dest="fout", default="", help="Output models area") + self.parser.add_argument("--model", action="store", + dest="model", default="mnist", help="model name") + self.parser.add_argument("--epochs", action="store", + dest="epochs", default=1, help="number of epochs to use in ML training") + self.parser.add_argument("--batch_size", action="store", + dest="batch_size", default=128, help="batch size to use in training") + self.parser.add_argument("--h5", action="store", + dest="h5", default="mnist", help="h5 model file name") + +def main(): + "Main function" + optmgr = OptionParser() + opts = optmgr.parser.parse_args() + train(opts.fin, opts.fout, + model_name=opts.model, + epochs=opts.epochs, + batch_size=opts.batch_size, + h5=opts.h5) + +if __name__ == '__main__': + main()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/build/lib/MLaaS/mnist_img.py Tue Mar 28 15:07:30 2023 +0000 @@ -0,0 +1,83 @@ +#!/usr/bin/env python +#-*- coding: utf-8 -*- +#pylint: disable= +""" +File : mnist_img.py +Author : Valentin Kuznetsov <vkuznet AT gmail dot com> +Description: +""" + +import json +import gzip +import argparse +# from itertools import chain + +import numpy as np +import matplotlib.pyplot as plt + + +def readImage(fname, fout, num_images=5, imgId=2): + """ + Helper function to read MNIST image + """ + image_size = 28 + with gzip.open(fname, 'r') as fstream: + fstream.read(16) + buf = fstream.read(image_size * image_size * num_images) + data = np.frombuffer(buf, dtype=np.uint8).astype(np.float32) + data = data.reshape(num_images, image_size, image_size, 1) + image = np.asarray(data[imgId]).squeeze() + plt.imsave(fout, image) + print("read:", fname, "wrote:", fout, "image:", type(image), "shape:", image.shape) + +def img2json(image): + """ + Convert given image to JSON data format used by TFaaS + """ + # values = [int(i) for i in list(chain.from_iterable(image))] + # values = image.tolist() + values = [] + for row in image.tolist(): + row = [int(i) for i in row] + vals = [[i] for i in row] + values.append(vals) + # final values should be an array of elements, e.g. single image representation + values = [values] + keys = [str(i) for i in range(0, 10)] + meta = { + 'keys': keys, + 'values': values, + 'model': 'mnist' + } + with open('img.json', 'w') as ostream: + ostream.write(json.dumps(meta)) + + +class OptionParser(): + def __init__(self): + "User based option parser" + fname = "train-images-idx3-ubyte.gz" + self.parser = argparse.ArgumentParser(prog='PROG') + self.parser.add_argument("--fin", action="store", + dest="fin", default=fname, help=f"Input MNIST file, default {fname}") + self.parser.add_argument("--fout", action="store", + dest="fout", default="img.png", help="Output image fila name, default img.png") + self.parser.add_argument("--nimages", action="store", + dest="nimages", default=5, help="number of images to read, default 5") + self.parser.add_argument("--imgid", action="store", + dest="imgid", default=2, help="image index to use from nimages, default 2 (number 4)") + +def main(): + """ + main function to produce image file from mnist dataset. + MNIST dataset can be downloaded from + curl -O http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz + """ + optmgr = OptionParser() + opts = optmgr.parser.parse_args() + num_images = int(opts.nimages) + imgId = int(opts.imgid) + img = readImage(opts.fin, opts.fout, num_images, imgId) + +if __name__ == '__main__': + main()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/build/lib/MLaaS/tfaas_client.py Tue Mar 28 15:07:30 2023 +0000 @@ -0,0 +1,371 @@ +#!/usr/bin/env python +#-*- coding: utf-8 -*- +#pylint: disable= +""" +File : tfaas_client.py +Author : Valentin Kuznetsov <vkuznet AT gmail dot com> +Description: simple python client to communicate with TFaaS server +""" + +# system modules +import os +import sys +import pwd +import ssl +import json +import binascii +import argparse +import itertools +import mimetypes +if sys.version_info < (2, 7): + raise Exception("TFaaS client requires python 2.7 or greater") +# python 3 +if sys.version.startswith('3.'): + import urllib.request as urllib2 + import urllib.parse as urllib + import http.client as httplib + import http.cookiejar as cookielib +else: + import mimetools + import urllib + import urllib2 + import httplib + import cookielib + +TFAAS_CLIENT = 'tfaas-client/1.1::python/%s.%s' % sys.version_info[:2] + +class OptionParser(): + def __init__(self): + "User based option parser" + self.parser = argparse.ArgumentParser(prog='PROG') + self.parser.add_argument("--url", action="store", + dest="url", default="", help="TFaaS URL") + self.parser.add_argument("--upload", action="store", + dest="upload", default="", help="upload model to TFaaS") + self.parser.add_argument("--bundle", action="store", + dest="bundle", default="", help="upload bundle ML files to TFaaS") + self.parser.add_argument("--predict", action="store", + dest="predict", default="", help="fetch prediction from TFaaS") + self.parser.add_argument("--image", action="store", + dest="image", default="", help="fetch prediction for given image") + self.parser.add_argument("--model", action="store", + dest="model", default="", help="TF model to use") + self.parser.add_argument("--delete", action="store", + dest="delete", default="", help="delete model in TFaaS") + self.parser.add_argument("--models", action="store_true", + dest="models", default=False, help="show existing models in TFaaS") + self.parser.add_argument("--verbose", action="store_true", + dest="verbose", default=False, help="verbose output") + msg = 'specify private key file name, default $X509_USER_PROXY' + self.parser.add_argument("--key", action="store", + default=x509(), dest="ckey", help=msg) + msg = 'specify private certificate file name, default $X509_USER_PROXY' + self.parser.add_argument("--cert", action="store", + default=x509(), dest="cert", help=msg) + default_ca = os.environ.get("X509_CERT_DIR") + if not default_ca or not os.path.exists(default_ca): + default_ca = "/etc/grid-security/certificates" + if not os.path.exists(default_ca): + default_ca = "" + if default_ca: + msg = 'specify CA path, default currently is %s' % default_ca + else: + msg = 'specify CA path; defaults to system CAs.' + self.parser.add_argument("--capath", action="store", + default=default_ca, dest="capath", help=msg) + msg = 'specify number of retries upon busy DAS server message' + +class HTTPSClientAuthHandler(urllib2.HTTPSHandler): + """ + Simple HTTPS client authentication class based on provided + key/ca information + """ + def __init__(self, key=None, cert=None, capath=None, level=0): + if level > 0: + urllib2.HTTPSHandler.__init__(self, debuglevel=1) + else: + urllib2.HTTPSHandler.__init__(self) + self.key = key + self.cert = cert + self.capath = capath + + def https_open(self, req): + """Open request method""" + #Rather than pass in a reference to a connection class, we pass in + # a reference to a function which, for all intents and purposes, + # will behave as a constructor + return self.do_open(self.get_connection, req) + + def get_connection(self, host, timeout=300): + """Connection method""" + if self.key and self.cert and not self.capath: + return httplib.HTTPSConnection(host, key_file=self.key, + cert_file=self.cert) + elif self.cert and self.capath: + context = ssl.SSLContext(ssl.PROTOCOL_TLSv1) + context.load_verify_locations(capath=self.capath) + context.load_cert_chain(self.cert) + return httplib.HTTPSConnection(host, context=context) + return httplib.HTTPSConnection(host) + +def x509(): + "Helper function to get x509 either from env or tmp file" + proxy = os.environ.get('X509_USER_PROXY', '') + if not proxy: + proxy = '/tmp/x509up_u%s' % pwd.getpwuid( os.getuid() ).pw_uid + if not os.path.isfile(proxy): + return '' + return proxy + +def check_auth(key): + "Check if user runs das_client with key/cert and warn users to switch" + if not key: + msg = "WARNING: tfaas_client is running without user credentials/X509 proxy, create proxy via 'voms-proxy-init -voms cms -rfc'" + print(msg) + +def fullpath(path): + "Expand path to full path" + if path and path[0] == '~': + path = path.replace('~', '') + path = path[1:] if path[0] == '/' else path + path = os.path.join(os.environ['HOME'], path) + return path + +def choose_boundary(): + """ + Helper function to replace deprecated mimetools.choose_boundary + https://stackoverflow.com/questions/27099290/where-is-mimetools-choose-boundary-function-in-python3 + https://docs.python.org/2.7/library/mimetools.html?highlight=choose_boundary#mimetools.choose_boundary + >>> mimetools.choose_boundary() + '192.168.1.191.502.42035.1678979116.376.1' + """ + # we will return any random string + import uuid + return str(uuid.uuid4()) + +# credit: https://pymotw.com/2/urllib2/#uploading-files +class MultiPartForm(object): + """Accumulate the data to be used when posting a form.""" + + def __init__(self): + self.form_fields = [] + self.files = [] + if sys.version.startswith('3.'): + self.boundary = choose_boundary() + else: + self.boundary = mimetools.choose_boundary() + return + + def get_content_type(self): + return 'multipart/form-data; boundary=%s' % self.boundary + + def add_field(self, name, value): + """Add a simple field to the form data.""" + self.form_fields.append((name, value)) + return + + def add_file(self, fieldname, filename, fileHandle, mimetype=None): + """Add a file to be uploaded.""" + body = fileHandle.read() + if mimetype is None: + mimetype = mimetypes.guess_type(filename)[0] or 'application/octet-stream' + if mimetype == 'application/octet-stream': + body = binascii.b2a_base64(body) +# if isinstance(body, bytes): +# body = body.decode("utf-8") + self.files.append((fieldname, filename, mimetype, body)) + return + + def __str__(self): + """Return a string representing the form data, including attached files.""" + # Build a list of lists, each containing "lines" of the + # request. Each part is separated by a boundary string. + # Once the list is built, return a string where each + # line is separated by '\r\n'. + parts = [] + part_boundary = '--' + self.boundary + + # Add the form fields + parts.extend( + [ part_boundary, + 'Content-Disposition: form-data; name="%s"' % name, + '', + value, + ] + for name, value in self.form_fields + ) + + # Add the files to upload + # here we use form-data content disposition instead of file one + # since this is how we define handlers in our Go server + # for more info see: https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Disposition + parts.extend( + [ part_boundary, + 'Content-Disposition: form-data; name="%s"; filename="%s"' % \ + (field_name, filename), + 'Content-Type: %s' % content_type, + '', + body, + ] + for field_name, filename, content_type, body in self.files + ) + + # Flatten the list and add closing boundary marker, + # then return CR+LF separated data + flattened = list(itertools.chain(*parts)) + flattened.append('--' + self.boundary + '--') + flattened.append('') + return '\r\n'.join(flattened) + +def models(host, verbose=None, ckey=None, cert=None, capath=None): + "models API shows models from TFaaS server" + url = host + '/models' + client = '%s (%s)' % (TFAAS_CLIENT, os.environ.get('USER', '')) + headers = {"Accept": "application/json", "User-Agent": client} + if verbose: + print("URL : %s" % url) + encoded_data = json.dumps({}) + return getdata(url, headers, encoded_data, ckey, cert, capath, verbose, 'GET') + +def delete(host, model, verbose=None, ckey=None, cert=None, capath=None): + "delete API deletes given model in TFaaS server" + url = host + '/delete' + client = '%s (%s)' % (TFAAS_CLIENT, os.environ.get('USER', '')) + headers = {"User-Agent": client} + if verbose: + print("URL : %s" % url) + print("model : %s" % model) + form = MultiPartForm() + form.add_field('model', model) + edata = str(form) + headers['Content-length'] = len(edata) + headers['Content-Type'] = form.get_content_type() + return getdata(url, headers, edata, ckey, cert, capath, verbose, method='DELETE') + +def bundle(host, ifile, verbose=None, ckey=None, cert=None, capath=None): + "bundle API uploads given bundle model files to TFaaS server" + url = host + '/upload' + client = '%s (%s)' % (TFAAS_CLIENT, os.environ.get('USER', '')) + headers = {"User-Agent": client, "Content-Encoding": "gzip", "Content-Type": "application/octet-stream"} + data = open(ifile, 'rb').read() + return getdata(url, headers, data, ckey, cert, capath, verbose) + +def upload(host, ifile, verbose=None, ckey=None, cert=None, capath=None): + "upload API uploads given model to TFaaS server" + url = host + '/upload' + client = '%s (%s)' % (TFAAS_CLIENT, os.environ.get('USER', '')) + headers = {"User-Agent": client} + params = json.load(open(ifile)) + if verbose: + print("URL : %s" % url) + print("ifile : %s" % ifile) + print("params: %s" % json.dumps(params)) + + form = MultiPartForm() + for key in params.keys(): + if key in ['model', 'labels', 'params']: + flag = 'r' + if key == 'model': + flag = 'rb' + name = params[key] + form.add_file(key, name, fileHandle=open(name, flag)) + else: + form.add_field(key, params[key]) + edata = str(form) + headers['Content-length'] = len(edata) + headers['Content-Type'] = form.get_content_type() + headers['Content-Encoding'] = 'base64' + return getdata(url, headers, edata, ckey, cert, capath, verbose) + +def predict(host, ifile, model, verbose=None, ckey=None, cert=None, capath=None): + "predict API get predictions from TFaaS server" + url = host + '/json' + client = '%s (%s)' % (TFAAS_CLIENT, os.environ.get('USER', '')) + headers = {"Accept": "application/json", "User-Agent": client} + params = json.load(open(ifile)) + if model: # overwrite model name in given input file + params['model'] = model + if verbose: + print("URL : %s" % url) + print("ifile : %s" % ifile) + print("params: %s" % json.dumps(params)) + encoded_data = json.dumps(params) + return getdata(url, headers, encoded_data, ckey, cert, capath, verbose) + +def predictImage(host, ifile, model, verbose=None, ckey=None, cert=None, capath=None): + "predict API get predictions from TFaaS server" + url = host + '/image' + client = '%s (%s)' % (TFAAS_CLIENT, os.environ.get('USER', '')) + headers = {"Accept": "application/json", "User-Agent": client} + if verbose: + print("URL : %s" % url) + print("ifile : %s" % ifile) + print("model : %s" % model) + form = MultiPartForm() +# form.add_file('image', ifile, fileHandle=open(ifile, 'r')) + form.add_file('image', ifile, fileHandle=open(ifile, 'rb')) + form.add_field('model', model) + edata = str(form) + headers['Content-length'] = len(edata) + headers['Content-Type'] = form.get_content_type() + return getdata(url, headers, edata, ckey, cert, capath, verbose) + +def getdata(url, headers, encoded_data, ckey, cert, capath, verbose=None, method='POST'): + "helper function to use in predict/upload APIs, it place given URL call to the server" + debug = 1 if verbose else 0 + req = urllib2.Request(url=url, headers=headers, data=encoded_data) + if method == 'DELETE': + req.get_method = lambda: 'DELETE' + elif method == 'GET': + req = urllib2.Request(url=url, headers=headers) + if ckey and cert: + ckey = fullpath(ckey) + cert = fullpath(cert) + http_hdlr = HTTPSClientAuthHandler(ckey, cert, capath, debug) + elif cert and capath: + cert = fullpath(cert) + http_hdlr = HTTPSClientAuthHandler(ckey, cert, capath, debug) + else: + http_hdlr = urllib2.HTTPHandler(debuglevel=debug) + proxy_handler = urllib2.ProxyHandler({}) + cookie_jar = cookielib.CookieJar() + cookie_handler = urllib2.HTTPCookieProcessor(cookie_jar) + data = {} + try: + opener = urllib2.build_opener(http_hdlr, proxy_handler, cookie_handler) + fdesc = opener.open(req) + if url.endswith('json'): + data = json.load(fdesc) + else: + data = fdesc.read() + fdesc.close() + except urllib2.HTTPError as error: + print(error.read()) + sys.exit(1) + if url.endswith('json'): + return json.dumps(data) + return data + +def main(): + "Main function" + optmgr = OptionParser() + opts = optmgr.parser.parse_args() + check_auth(opts.ckey) + res = '' + if opts.upload: + res = upload(opts.url, opts.upload, opts.verbose, opts.ckey, opts.cert, opts.capath) + if opts.bundle: + res = bundle(opts.url, opts.bundle, opts.verbose, opts.ckey, opts.cert, opts.capath) + elif opts.delete: + res = delete(opts.url, opts.delete, opts.verbose, opts.ckey, opts.cert, opts.capath) + elif opts.models: + res = models(opts.url, opts.verbose, opts.ckey, opts.cert, opts.capath) + elif opts.predict: + res = predict(opts.url, opts.predict, opts.model, opts.verbose, opts.ckey, opts.cert, opts.capath) + elif opts.image: + res = predictImage(opts.url, opts.image, opts.model, opts.verbose, opts.ckey, opts.cert, opts.capath) + if res: + print(res) + +if __name__ == '__main__': + main()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/build/scripts-3.6/CHAP Tue Mar 28 15:07:30 2023 +0000 @@ -0,0 +1,3 @@ +#!/bin/bash + +python -m CHAP "$@"
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chap.xml Tue Mar 28 15:07:30 2023 +0000 @@ -0,0 +1,41 @@ +<tool id="chap" name="CHESS Analysis Pipeline" version="0.1.0+galaxy0" python_template_version="3.5" profile="21.05"> + <requirements> + </requirements> + <command detect_errors="exit_code"><![CDATA[ + cp '$input' data.csv && runner.py --config '$config' + ]]></command> + <inputs> + <param type="data" name="config" format="yaml" /> + <param type="data" name="input" format="csv" /> + </inputs> + <outputs> + <data name="output" format="csv" from_work_dir="test-data" /> + </outputs> + <tests> + <test> + <param name="config" value="config.yaml"/> + <param name="input" value="data.csv"/> + <output name="output" value="data.out"/> + </test> + </tests> + <help><![CDATA[ + usage: PROG [-h] [--config CONFIG] [--verbose] + +options: + -h, --help show this help message and exit + --config CONFIG Input configuration file + --verbose verbose output + + ]]></help> + <citations> + <citation type="bibtex"> +@misc{githubChessAnalysisPipeline, + author = {LastTODO, FirstTODO}, + year = {TODO}, + title = {ChessAnalysisPipeline}, + publisher = {GitHub}, + journal = {GitHub repository}, + url = {https://github.com/CHESSComputing/ChessAnalysisPipeline}, +}</citation> + </citations> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/environment.yml Tue Mar 28 15:07:30 2023 +0000 @@ -0,0 +1,6 @@ +name: CHAP +channels: + - defaults +dependencies: + - python>3.8 + - yaml \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/examples/edd/ceria_calibration_config.yaml Tue Mar 28 15:07:30 2023 +0000 @@ -0,0 +1,18 @@ +spec_file: examples/edd/ceria_2222-2/spec.log +scan_number: 1 + +flux_file: examples/edd/flux.dft + +detector_name: mca1 +num_bins: 2048 +max_energy_kev: 150 + +hexrd_h5_material_file: examples/edd/materials.h5 + +tth_max: 90.0 +hkl_tth_tol: 0.15 + +fit_include_bin_ranges: [[736,1529]] +fit_hkls: [3,4,5,6,7,8,9,10,11,12,13] + +tth_initial_guess: 7.5 \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/examples/edd/map.yaml Tue Mar 28 15:07:30 2023 +0000 @@ -0,0 +1,13 @@ +title: set2_c1-1 +station: id1a3 +experiment_type: EDD +sample: + name: set2_c1-1 +spec_scans: + - spec_file: examples/edd/set2_c1-1/spec.log + scan_numbers: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] +independent_dimensions: + - label: sample_y + data_type: smb_par + units: mm + name: sampYcp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/examples/edd/pipeline.yaml Tue Mar 28 15:07:30 2023 +0000 @@ -0,0 +1,47 @@ +pipeline: + + # Download example data + - reader.URLReader: + url: https://gitlab01.classe.cornell.edu/api/v4/projects/308/repository/files/edd%2fdata.tar/raw?ref=main + headers: + PRIVATE-TOKEN: # your token here + - processor.URLResponseProcessor + - writer.ExtractArchiveWriter: + filename: examples/edd + + # Calibrate detector + - reader.YAMLReader: + filename: examples/edd/ceria_calibration_config.yaml + schema: MCACeriaCalibrationConfig + - processor.MCACeriaCalibrationProcessor + - writer.YAMLWriter: + filename: examples/edd/ceria_calibrated.yaml + force_overwrite: true + + # Gather calibrated detector data + - reader.MultipleReader: + readers: + - YAMLReader: + filename: examples/edd/map.yaml + schema: MapConfig + - YAMLReader: + filename: examples/edd/ceria_calibrated.yaml + schema: MCACeriaCalibrationConfig + - processor.MCADataProcessor + - writer.NexusWriter: + filename: examples/edd/map_detector_data.nxs + force_overwrite: true + + # Compute sample strain map + - reader.MultipleReader: + readers: + - NexusReader: + filename: examples/edd/map_detector_data.nxs + - YAMLReader: + filename: examples/edd/strain_analysis_config.yaml + schema: StrainAnalysisConfig + - processor.StrainAnalysisProcessor + - writer.YAMLWriter: + filename: examples/edd/map_strain_data.yaml + force_overwrite: true +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/examples/edd/strain_analysis_config.yaml Tue Mar 28 15:07:30 2023 +0000 @@ -0,0 +1,21 @@ +energy_range: +- 54 +- 112 +lattice_parameters: +- 5.41153 +material_file: examples/edd/materials.h5 +material_name: CeO2 +selected_peaks: +- 3 +- 4 +- 5 +- 6 +- 7 +- 8 +- 9 +- 10 +- 11 +- 12 +- 13 +tth_max: 90.0 +tth_tol: 0.15
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/examples/inference/pipeline.yaml Tue Mar 28 15:07:30 2023 +0000 @@ -0,0 +1,9 @@ +pipeline: + - reader.BinaryFileReader: + filename: /Users/vk/Work/CHESS/MLPipeline/MNIST/img4.png + - processor.TFaaSImageProcessor: + url: "http://localhost:8083" + model: mnist + verbose: true + - writer.Writer: + filename: examples/inference/predictions.json
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/examples/saxswaxs/integration_saxs_azimuthal.yaml Tue Mar 28 15:07:30 2023 +0000 @@ -0,0 +1,15 @@ +tool_type: integration +title: saxs_azimuthal +integration_type: azimuthal +detectors: +- prefix: PIL5 + poni_file: examples/saxswaxs/PIL5.poni + mask_file: examples/saxswaxs/PIL5.tif +radial_units: q_A^-1 +radial_min: 0.0 +radial_max: 0.21821 +radial_npt: 200 +azimuthal_units: chi_deg +azimuthal_min: -180.0 +azimuthal_max: 180.0 +azimuthal_npt: 180
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/examples/saxswaxs/integration_waxs_azimuthal.yaml Tue Mar 28 15:07:30 2023 +0000 @@ -0,0 +1,18 @@ +tool_type: integration +title: waxs_azimuthal +integration_type: azimuthal +detectors: +- prefix: PIL9 + poni_file: examples/saxswaxs/PIL9.poni + mask_file: examples/saxswaxs/PIL9.tif +- prefix: PIL11 + poni_file: examples/saxswaxs/PIL11.poni + mask_file: examples/saxswaxs/PIL11.tif +radial_units: q_A^-1 +radial_min: 0.0 +radial_max: 3.33209 +radial_npt: 200 +azimuthal_units: chi_deg +azimuthal_min: 100.0 +azimuthal_max: 360.0 +azimuthal_npt: 180
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/examples/saxswaxs/map_1d.yaml Tue Mar 28 15:07:30 2023 +0000 @@ -0,0 +1,23 @@ +title: test_1d +station: id3b +experiment_type: SAXSWAXS +sample: + name: sample_14_align +spec_scans: +- spec_file: examples/saxswaxs/test_1d + scan_numbers: + - 1 +independent_dimensions: +- label: samx + units: mm + data_type: spec_motor + name: samx +presample_intensity: + data_type: scan_column + name: ic3 +dwell_time_actual: + data_type: scan_column + name: sec_2 +postsample_intensity: + data_type: scan_column + name: diode
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/examples/saxswaxs/map_2d.yaml Tue Mar 28 15:07:30 2023 +0000 @@ -0,0 +1,27 @@ +title: test_2d +station: id3b +experiment_type: SAXSWAXS +sample: + name: sample_14 +spec_scans: +- spec_file: examples/saxswaxs/test_2d + scan_numbers: + - 1 +independent_dimensions: +- label: samz + units: mm + data_type: spec_motor + name: samz +- label: samx + units: mm + data_type: spec_motor + name: samx +presample_intensity: + data_type: scan_column + name: ic3 +dwell_time_actual: + data_type: scan_column + name: sec_2 +postsample_intensity: + data_type: scan_column + name: diode
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/examples/saxswaxs/pipeline.yaml Tue Mar 28 15:07:30 2023 +0000 @@ -0,0 +1,33 @@ +pipeline: + + # Download example data + - reader.URLReader: + url: https://gitlab01.classe.cornell.edu/api/v4/projects/308/repository/files/saxswaxs%2fdata.tar/raw?ref=main + headers: + PRIVATE-TOKEN: # your token here + - processor.URLResponseProcessor + - writer.ExtractArchiveWriter: + filename: examples/saxswaxs + + # Collect map data + - reader.YAMLReader: + filename: examples/saxswaxs/map_1d.yaml + schema: MapConfig + - processor.MapProcessor + - writer.NexusWriter: + filename: examples/saxswaxs/saxswaxs_map.nxs + force_overwrite: true + + # Integrate map detetcor data + - reader.MultipleReader: + readers: + - YAMLReader: + filename: examples/saxswaxs/map_1d.yaml + schema: MapConfig + - YAMLReader: + filename: examples/saxswaxs/integration_saxs_azimuthal.yaml + schema: IntegrationConfig + - processor.IntegrateMapProcessor + - writer.NexusWriter: + filename: examples/saxswaxs/saxs_azimuthal_integrated.nxs + force_overwrite: true
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/examples/sin2psi/integration.yaml Tue Mar 28 15:07:30 2023 +0000 @@ -0,0 +1,15 @@ +tool_type: integration +title: saxs_azimuthal +integration_type: azimuthal +detectors: +- prefix: EIG500 + poni_file: examples/sin2psi/EIG500.poni + mask_file: examples/sin2psi/EIG500.tif +radial_units: q_A^-1 +radial_min: 11.5 +radial_max: 17.0 +radial_npt: 800 +azimuthal_units: chi_deg +azimuthal_min: 173.0 +azimuthal_max: 192.0 +azimuthal_npt: 180
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/examples/sin2psi/map.yaml Tue Mar 28 15:07:30 2023 +0000 @@ -0,0 +1,31 @@ +title: test_2d +station: id1a3 +experiment_type: SAXSWAXS +sample: + name: Ti_1_A-2 +spec_scans: +- spec_file: examples/sin2psi/Ti_1_A-2/spec.log + scan_numbers: [1,2,16,17] +independent_dimensions: +- label: samx + units: mm + data_type: spec_motor + name: samx +- label: samy + units: mm + data_type: spec_motor + name: samy +- label: chi + data_type: smb_par + name: chi + units: degrees +- label: phi + data_type: smb_par + name: phi + units: degrees +presample_intensity: + data_type: scan_column + name: u3ic5 +dwell_time_actual: + data_type: scan_column + name: usbsec
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/examples/sin2psi/pipeline.yaml Tue Mar 28 15:07:30 2023 +0000 @@ -0,0 +1,38 @@ +pipeline: + + # Collect map data + - reader.YAMLReader: + filename: examples/sin2psi/map.yaml + schema: MapConfig + - processor.MapProcessor + - writer.NexusWriter: + filename: examples/sin2psi/map.nxs + force_overwrite: true + + # Integrate map detector data + - reader.MultipleReader: + readers: + - YAMLReader: + filename: examples/sin2psi/map.yaml + schema: MapConfig + - YAMLReader: + filename: examples/sin2psi/integration.yaml + schema: IntegrationConfig + - processor.IntegrateMapProcessor + - writer.NexusWriter: + filename: examples/sin2psi/integrated_detector_data.nxs + force_overwrite: true + + # Compute sample strain map + - reader.MultipleReader: + readers: + - NexusReader: + filename: examples/sin2psi/integrated_detector_data.nxs + - YAMLReader: + filename: examples/sin2psi/strain_analysis_config.yaml + schema: StrainAnalysisConfig + - processor.StrainAnalysisProcessor + - writer.YAMLWriter: + filename: examples/sin2psi/sample_strain_data.yaml + force_overwrite: true +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/examples/sin2psi/strain_analysis_config.yaml Tue Mar 28 15:07:30 2023 +0000 @@ -0,0 +1,6 @@ +gutter: [[0,40],[350,1000]] +material_name: Ti +lattice_parameters: [2.9216, 4.67] +energy_kev: 22.7 +half_width: 0.01 +max_chi: 0.003
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/requirements.txt Tue Mar 28 15:07:30 2023 +0000 @@ -0,0 +1,2 @@ +PyYAML==6.0 +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scripts/CHAP Tue Mar 28 15:07:30 2023 +0000 @@ -0,0 +1,3 @@ +#!/bin/bash + +python -m CHAP "$@"
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/setup.py Tue Mar 28 15:07:30 2023 +0000 @@ -0,0 +1,55 @@ +""" +Standard python setup.py file +to build : python setup.py build +to install : python setup.py install --prefix=<some dir> +to clean : python setup.py clean +to build doc : python setup.py doc +to run tests : python setup.py test +""" + +import os +import setuptools + +def datafiles(idir, pattern=None): + """Return list of data files in provided relative dir""" + files = [] + for dirname, dirnames, filenames in os.walk(idir): + for subdirname in dirnames: + files.append(os.path.join(dirname, subdirname)) + for filename in filenames: + if filename[-1] == '~': + continue + # match file name pattern (e.g. *.css) if one given + if pattern and not fnmatch.fnmatch(filename, pattern): + continue + files.append(os.path.join(dirname, filename)) + return files + +data_files = datafiles('examples') + +with open("README.md", "r") as fh: + long_description = fh.read() + +setuptools.setup( + name="ChessAnalysisPipeline", + version="0.0.1", + author="Keara Soloway, Rolf Verberg, Valentin Kuznetsov", + author_email="", + description="CHESS analysis pipeline framework", + long_description=long_description, + long_description_content_type="text/markdown", + url="https://github.com/CHESSComputing/ChessAnalysisPipeline", + packages=['CHAP', 'MLaaS'], + package_dir={'CHAP': 'CHAP', 'MLaaS': 'MLaaS'}, + package_data={'examples': data_files}, + scripts=['scripts/CHAP'], + classifiers=[ + "Programming Language :: Python :: 3", + "License :: OSI Approved :: MIT License", + "Operating System :: OS Independent", + ], + python_requires='>=3.8', + install_requires=[ + 'PyYAML' + ], +)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/config.yaml Tue Mar 28 15:07:30 2023 +0000 @@ -0,0 +1,6 @@ +pipeline: + - reader.Reader: + filename: data.csv + - processor.Processor: {} + - writer.Writer: + filename: data.out
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/data.csv Tue Mar 28 15:07:30 2023 +0000 @@ -0,0 +1,3 @@ +col1,col2 +1,2 +2,3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/data.out Tue Mar 28 15:07:30 2023 +0000 @@ -0,0 +1,26 @@ +col1,col2 +1,2 +2,3 +process part +fitted part +process part +col1,col2 +1,2 +2,3 +process part +fitted part +process part +fitted part +col1,col2 +1,2 +2,3 +process part +fitted part +process part +col1,col2 +1,2 +2,3 +process part +fitted part +process part +fitted part