Mercurial > repos > imgteam > 2d_feature_extraction
comparison 2d_feature_extraction.py @ 5:5530132d500e draft
planemo upload for repository https://github.com/BMCV/galaxy-image-analysis/tree/master/tools/2d_feature_extraction/ commit bb2d58ed37d8eb09583b86e3cdd9f5d1b56c42a0
| author | imgteam |
|---|---|
| date | Sun, 04 Jan 2026 20:56:17 +0000 |
| parents | a4bc9dfde846 |
| children | 048545339ced |
comparison
equal
deleted
inserted
replaced
| 4:a4bc9dfde846 | 5:5530132d500e |
|---|---|
| 1 import argparse | 1 import giatools |
| 2 | |
| 3 import giatools.io | |
| 4 import numpy as np | 2 import numpy as np |
| 5 import pandas as pd | 3 import pandas as pd |
| 6 import skimage.feature | 4 import scipy.ndimage as ndi |
| 7 import skimage.measure | 5 import skimage.measure |
| 8 import skimage.morphology | 6 |
| 9 import skimage.segmentation | 7 # Fail early if an optional backend is not available |
| 8 giatools.require_backend('omezarr') | |
| 9 | |
| 10 | |
| 11 def surface(labels: np.ndarray, label: int) -> int: | |
| 12 """ | |
| 13 Ad-hoc implementation for computation of the "perimeter" of an object in 3D (that is a surface). | |
| 14 """ | |
| 15 assert labels.ndim == 3 # sanity check | |
| 16 | |
| 17 # Create 3-D structuring element with 4-connectivity | |
| 18 selem = np.zeros((3, 3, 3), bool) | |
| 19 for ijk in np.ndindex(*selem.shape): | |
| 20 if (np.array(ijk) == 1).sum() >= 2: | |
| 21 selem[*ijk] = True # noqa: E999 | |
| 22 assert selem.sum() == 7 # sanity check | |
| 23 | |
| 24 # Compute the area of the surface | |
| 25 cc = (labels == label) | |
| 26 cc_interior = ndi.binary_erosion(cc, selem) | |
| 27 surface = np.logical_xor(cc, cc_interior) | |
| 28 return surface.sum() # number of voxels on the surface of the object | |
| 29 | |
| 30 | |
| 31 def compute_if_dask(obj): | |
| 32 """ | |
| 33 Return the computed object or array if it is a Dask array or deferred computable Dask object. | |
| 34 """ | |
| 35 return obj.compute() if hasattr(obj, 'compute') else obj | |
| 10 | 36 |
| 11 | 37 |
| 12 if __name__ == '__main__': | 38 if __name__ == '__main__': |
| 39 tool = giatools.ToolBaseplate() | |
| 40 tool.add_input_image('labels') | |
| 41 tool.add_input_image('intensities', required=False) | |
| 42 tool.parser.add_argument('--output', type=str) | |
| 43 tool.parse_args() | |
| 13 | 44 |
| 14 parser = argparse.ArgumentParser(description='Extract image features') | 45 # Validate the input image |
| 46 try: | |
| 47 label_image = tool.args.input_images['labels'] | |
| 48 if any(label_image.shape[label_image.axes.index(axis)] > 1 for axis in label_image.axes if axis not in 'ZYX'): | |
| 49 raise ValueError(f'This tool is not applicable to images with {label_image.original_axes} axes.') | |
| 15 | 50 |
| 16 # TODO create factory for boilerplate code | 51 # Extract the image features |
| 17 features = parser.add_argument_group('compute features') | 52 for section in tool.run('ZYX'): # the validation code above guarantees that we will have only a single iteration |
| 18 features.add_argument('--all', dest='all_features', action='store_true') | 53 df = pd.DataFrame() |
| 19 features.add_argument('--label', dest='add_label', action='store_true') | |
| 20 features.add_argument('--patches', dest='add_roi_patches', action='store_true') | |
| 21 features.add_argument('--max_intensity', dest='max_intensity', action='store_true') | |
| 22 features.add_argument('--mean_intensity', dest='mean_intensity', action='store_true') | |
| 23 features.add_argument('--min_intensity', dest='min_intensity', action='store_true') | |
| 24 features.add_argument('--moments_hu', dest='moments_hu', action='store_true') | |
| 25 features.add_argument('--centroid', dest='centroid', action='store_true') | |
| 26 features.add_argument('--bbox', dest='bbox', action='store_true') | |
| 27 features.add_argument('--area', dest='area', action='store_true') | |
| 28 features.add_argument('--filled_area', dest='filled_area', action='store_true') | |
| 29 features.add_argument('--convex_area', dest='convex_area', action='store_true') | |
| 30 features.add_argument('--perimeter', dest='perimeter', action='store_true') | |
| 31 features.add_argument('--extent', dest='extent', action='store_true') | |
| 32 features.add_argument('--eccentricity', dest='eccentricity', action='store_true') | |
| 33 features.add_argument('--equivalent_diameter', dest='equivalent_diameter', action='store_true') | |
| 34 features.add_argument('--euler_number', dest='euler_number', action='store_true') | |
| 35 features.add_argument('--inertia_tensor_eigvals', dest='inertia_tensor_eigvals', action='store_true') | |
| 36 features.add_argument('--major_axis_length', dest='major_axis_length', action='store_true') | |
| 37 features.add_argument('--minor_axis_length', dest='minor_axis_length', action='store_true') | |
| 38 features.add_argument('--orientation', dest='orientation', action='store_true') | |
| 39 features.add_argument('--solidity', dest='solidity', action='store_true') | |
| 40 features.add_argument('--moments', dest='moments', action='store_true') | |
| 41 features.add_argument('--convexity', dest='convexity', action='store_true') | |
| 42 | 54 |
| 43 parser.add_argument('--label_file_binary', dest='label_file_binary', action='store_true') | 55 # Get the labels array and cast to `uint8` if it is `bool` (`skimage.measure.regionprops` refuses `bool` typed arrays) |
| 56 labels_section_data = section['labels'].data.squeeze() | |
| 57 if np.issubdtype(labels_section_data.dtype, bool): | |
| 58 print('Convert labels from bool to uint8') | |
| 59 labels_section_data = labels_section_data.astype(np.uint8) | |
| 44 | 60 |
| 45 parser.add_argument('--raw', dest='raw_file', type=argparse.FileType('r'), | 61 # Some features currently cannot be computed from Dask arrays |
| 46 help='Original input file', required=False) | 62 if any( |
| 47 parser.add_argument('label_file', type=argparse.FileType('r'), | 63 feature_name in tool.args.params['features'] for feature_name in ( |
| 48 help='Label input file') | 64 'inertia_tensor_eigvals', |
| 49 parser.add_argument('output_file', type=argparse.FileType('w'), | 65 'axis_major_length', |
| 50 help='Tabular output file') | 66 'axis_minor_length', |
| 51 args = parser.parse_args() | 67 'eccentricity', |
| 68 'orientation', | |
| 69 'moments_hu', | |
| 70 ) | |
| 71 ): | |
| 72 labels_section_data = compute_if_dask(labels_section_data) | |
| 52 | 73 |
| 53 label_file_binary = args.label_file_binary | 74 # Compute the image features |
| 54 label_file = args.label_file.name | 75 if 'intensities' in tool.args.input_images: |
| 55 out_file = args.output_file.name | 76 regions = skimage.measure.regionprops(labels_section_data, intensity_image=section['intensities'].data.squeeze()) |
| 56 add_patch = args.add_roi_patches | 77 else: |
| 78 regions = skimage.measure.regionprops(labels_section_data, intensity_image=None) | |
| 79 df['it'] = np.arange(len(regions)) | |
| 80 for feature_name in tool.args.params['features']: | |
| 57 | 81 |
| 58 raw_image = None | 82 # Add the object label |
| 59 if args.raw_file is not None: | 83 if feature_name == 'label': |
| 60 raw_image = giatools.io.imread(args.raw_file.name) | 84 df['label'] = df['it'].map(lambda ait: regions[ait].label) |
| 61 | 85 |
| 62 raw_label_image = giatools.io.imread(label_file) | 86 # Add the object perimeter/surface |
| 87 elif feature_name == 'perimeter' and labels_section_data.ndim == 3: | |
| 88 df['perimeter'] = df['it'].map( | |
| 89 lambda ait: surface(labels_section_data, regions[ait].label), # `skimage.measure.regionprops` cannot compute perimeters for 3-D data | |
| 90 ) | |
| 63 | 91 |
| 64 df = pd.DataFrame() | 92 # Skip features that are not available when processing 3-D images |
| 65 if label_file_binary: | 93 elif feature_name in ('eccentricity', 'moments_hu', 'orientation') and labels_section_data.ndim == 3: |
| 66 raw_label_image = skimage.measure.label(raw_label_image) | 94 print(f'Skip feature that is not available for 3-D images: "{feature_name}"') |
| 67 regions = skimage.measure.regionprops(raw_label_image, intensity_image=raw_image) | |
| 68 | 95 |
| 69 df['it'] = np.arange(len(regions)) | 96 # Add another feature from `regions` that was computed via `skimage.measure.regionprops` |
| 97 else: | |
| 98 try: | |
| 99 df[feature_name] = df['it'].map(lambda ait: getattr(regions[ait], feature_name)) | |
| 100 except TypeError: | |
| 101 raise ValueError(f'Unknown feature: "{feature_name}"') | |
| 70 | 102 |
| 71 if add_patch: | 103 # Resolve any remaining Dask objects to the actual values (e.g., when processing Zarrs) |
| 72 df['image'] = df['it'].map(lambda ait: regions[ait].image.astype(np.float).tolist()) | 104 df = df.map(compute_if_dask) |
| 73 df['intensity_image'] = df['it'].map(lambda ait: regions[ait].intensity_image.astype(np.float).tolist()) | |
| 74 | 105 |
| 75 # TODO no matrix features, but split in own rows? | 106 # Convert lists/tuples/arrays to lists of plain Python numbers (e.g., float instead of np.float64) |
| 76 if args.add_label or args.all_features: | 107 df = df.map( |
| 77 df['label'] = df['it'].map(lambda ait: regions[ait].label) | 108 lambda obj: np.asarray(obj).tolist() if type(obj) in (list, tuple, np.ndarray) else obj, |
| 109 ) | |
| 78 | 110 |
| 79 if raw_image is not None: | 111 del df['it'] |
| 80 if args.max_intensity or args.all_features: | 112 df.to_csv(tool.args.raw_args.output, sep='\t', lineterminator='\n', index=False) |
| 81 df['max_intensity'] = df['it'].map(lambda ait: regions[ait].max_intensity) | |
| 82 if args.mean_intensity or args.all_features: | |
| 83 df['mean_intensity'] = df['it'].map(lambda ait: regions[ait].mean_intensity) | |
| 84 if args.min_intensity or args.all_features: | |
| 85 df['min_intensity'] = df['it'].map(lambda ait: regions[ait].min_intensity) | |
| 86 if args.moments_hu or args.all_features: | |
| 87 df['moments_hu'] = df['it'].map(lambda ait: regions[ait].moments_hu) | |
| 88 | 113 |
| 89 if args.centroid or args.all_features: | 114 except ValueError as err: |
| 90 df['centroid'] = df['it'].map(lambda ait: regions[ait].centroid) | 115 exit(err.args[0]) |
| 91 if args.bbox or args.all_features: | |
| 92 df['bbox'] = df['it'].map(lambda ait: regions[ait].bbox) | |
| 93 if args.area or args.all_features: | |
| 94 df['area'] = df['it'].map(lambda ait: regions[ait].area) | |
| 95 if args.filled_area or args.all_features: | |
| 96 df['filled_area'] = df['it'].map(lambda ait: regions[ait].filled_area) | |
| 97 if args.convex_area or args.all_features: | |
| 98 df['convex_area'] = df['it'].map(lambda ait: regions[ait].convex_area) | |
| 99 if args.perimeter or args.all_features: | |
| 100 df['perimeter'] = df['it'].map(lambda ait: regions[ait].perimeter) | |
| 101 if args.extent or args.all_features: | |
| 102 df['extent'] = df['it'].map(lambda ait: regions[ait].extent) | |
| 103 if args.eccentricity or args.all_features: | |
| 104 df['eccentricity'] = df['it'].map(lambda ait: regions[ait].eccentricity) | |
| 105 if args.equivalent_diameter or args.all_features: | |
| 106 df['equivalent_diameter'] = df['it'].map(lambda ait: regions[ait].equivalent_diameter) | |
| 107 if args.euler_number or args.all_features: | |
| 108 df['euler_number'] = df['it'].map(lambda ait: regions[ait].euler_number) | |
| 109 if args.inertia_tensor_eigvals or args.all_features: | |
| 110 df['inertia_tensor_eigvals'] = df['it'].map(lambda ait: regions[ait].inertia_tensor_eigvals) | |
| 111 if args.major_axis_length or args.all_features: | |
| 112 df['major_axis_length'] = df['it'].map(lambda ait: regions[ait].major_axis_length) | |
| 113 if args.minor_axis_length or args.all_features: | |
| 114 df['minor_axis_length'] = df['it'].map(lambda ait: regions[ait].minor_axis_length) | |
| 115 if args.orientation or args.all_features: | |
| 116 df['orientation'] = df['it'].map(lambda ait: regions[ait].orientation) | |
| 117 if args.solidity or args.all_features: | |
| 118 df['solidity'] = df['it'].map(lambda ait: regions[ait].solidity) | |
| 119 if args.moments or args.all_features: | |
| 120 df['moments'] = df['it'].map(lambda ait: regions[ait].moments) | |
| 121 if args.convexity or args.all_features: | |
| 122 perimeter = df['it'].map(lambda ait: regions[ait].perimeter) | |
| 123 area = df['it'].map(lambda ait: regions[ait].area) | |
| 124 df['convexity'] = area / (perimeter * perimeter) | |
| 125 | |
| 126 del df['it'] | |
| 127 df.to_csv(out_file, sep='\t', lineterminator='\n', index=False) |
