comparison 2d_feature_extraction.py @ 5:5530132d500e draft

planemo upload for repository https://github.com/BMCV/galaxy-image-analysis/tree/master/tools/2d_feature_extraction/ commit bb2d58ed37d8eb09583b86e3cdd9f5d1b56c42a0
author imgteam
date Sun, 04 Jan 2026 20:56:17 +0000
parents a4bc9dfde846
children 048545339ced
comparison
equal deleted inserted replaced
4:a4bc9dfde846 5:5530132d500e
1 import argparse 1 import giatools
2
3 import giatools.io
4 import numpy as np 2 import numpy as np
5 import pandas as pd 3 import pandas as pd
6 import skimage.feature 4 import scipy.ndimage as ndi
7 import skimage.measure 5 import skimage.measure
8 import skimage.morphology 6
9 import skimage.segmentation 7 # Fail early if an optional backend is not available
8 giatools.require_backend('omezarr')
9
10
11 def surface(labels: np.ndarray, label: int) -> int:
12 """
13 Ad-hoc implementation for computation of the "perimeter" of an object in 3D (that is a surface).
14 """
15 assert labels.ndim == 3 # sanity check
16
17 # Create 3-D structuring element with 4-connectivity
18 selem = np.zeros((3, 3, 3), bool)
19 for ijk in np.ndindex(*selem.shape):
20 if (np.array(ijk) == 1).sum() >= 2:
21 selem[*ijk] = True # noqa: E999
22 assert selem.sum() == 7 # sanity check
23
24 # Compute the area of the surface
25 cc = (labels == label)
26 cc_interior = ndi.binary_erosion(cc, selem)
27 surface = np.logical_xor(cc, cc_interior)
28 return surface.sum() # number of voxels on the surface of the object
29
30
31 def compute_if_dask(obj):
32 """
33 Return the computed object or array if it is a Dask array or deferred computable Dask object.
34 """
35 return obj.compute() if hasattr(obj, 'compute') else obj
10 36
11 37
12 if __name__ == '__main__': 38 if __name__ == '__main__':
39 tool = giatools.ToolBaseplate()
40 tool.add_input_image('labels')
41 tool.add_input_image('intensities', required=False)
42 tool.parser.add_argument('--output', type=str)
43 tool.parse_args()
13 44
14 parser = argparse.ArgumentParser(description='Extract image features') 45 # Validate the input image
46 try:
47 label_image = tool.args.input_images['labels']
48 if any(label_image.shape[label_image.axes.index(axis)] > 1 for axis in label_image.axes if axis not in 'ZYX'):
49 raise ValueError(f'This tool is not applicable to images with {label_image.original_axes} axes.')
15 50
16 # TODO create factory for boilerplate code 51 # Extract the image features
17 features = parser.add_argument_group('compute features') 52 for section in tool.run('ZYX'): # the validation code above guarantees that we will have only a single iteration
18 features.add_argument('--all', dest='all_features', action='store_true') 53 df = pd.DataFrame()
19 features.add_argument('--label', dest='add_label', action='store_true')
20 features.add_argument('--patches', dest='add_roi_patches', action='store_true')
21 features.add_argument('--max_intensity', dest='max_intensity', action='store_true')
22 features.add_argument('--mean_intensity', dest='mean_intensity', action='store_true')
23 features.add_argument('--min_intensity', dest='min_intensity', action='store_true')
24 features.add_argument('--moments_hu', dest='moments_hu', action='store_true')
25 features.add_argument('--centroid', dest='centroid', action='store_true')
26 features.add_argument('--bbox', dest='bbox', action='store_true')
27 features.add_argument('--area', dest='area', action='store_true')
28 features.add_argument('--filled_area', dest='filled_area', action='store_true')
29 features.add_argument('--convex_area', dest='convex_area', action='store_true')
30 features.add_argument('--perimeter', dest='perimeter', action='store_true')
31 features.add_argument('--extent', dest='extent', action='store_true')
32 features.add_argument('--eccentricity', dest='eccentricity', action='store_true')
33 features.add_argument('--equivalent_diameter', dest='equivalent_diameter', action='store_true')
34 features.add_argument('--euler_number', dest='euler_number', action='store_true')
35 features.add_argument('--inertia_tensor_eigvals', dest='inertia_tensor_eigvals', action='store_true')
36 features.add_argument('--major_axis_length', dest='major_axis_length', action='store_true')
37 features.add_argument('--minor_axis_length', dest='minor_axis_length', action='store_true')
38 features.add_argument('--orientation', dest='orientation', action='store_true')
39 features.add_argument('--solidity', dest='solidity', action='store_true')
40 features.add_argument('--moments', dest='moments', action='store_true')
41 features.add_argument('--convexity', dest='convexity', action='store_true')
42 54
43 parser.add_argument('--label_file_binary', dest='label_file_binary', action='store_true') 55 # Get the labels array and cast to `uint8` if it is `bool` (`skimage.measure.regionprops` refuses `bool` typed arrays)
56 labels_section_data = section['labels'].data.squeeze()
57 if np.issubdtype(labels_section_data.dtype, bool):
58 print('Convert labels from bool to uint8')
59 labels_section_data = labels_section_data.astype(np.uint8)
44 60
45 parser.add_argument('--raw', dest='raw_file', type=argparse.FileType('r'), 61 # Some features currently cannot be computed from Dask arrays
46 help='Original input file', required=False) 62 if any(
47 parser.add_argument('label_file', type=argparse.FileType('r'), 63 feature_name in tool.args.params['features'] for feature_name in (
48 help='Label input file') 64 'inertia_tensor_eigvals',
49 parser.add_argument('output_file', type=argparse.FileType('w'), 65 'axis_major_length',
50 help='Tabular output file') 66 'axis_minor_length',
51 args = parser.parse_args() 67 'eccentricity',
68 'orientation',
69 'moments_hu',
70 )
71 ):
72 labels_section_data = compute_if_dask(labels_section_data)
52 73
53 label_file_binary = args.label_file_binary 74 # Compute the image features
54 label_file = args.label_file.name 75 if 'intensities' in tool.args.input_images:
55 out_file = args.output_file.name 76 regions = skimage.measure.regionprops(labels_section_data, intensity_image=section['intensities'].data.squeeze())
56 add_patch = args.add_roi_patches 77 else:
78 regions = skimage.measure.regionprops(labels_section_data, intensity_image=None)
79 df['it'] = np.arange(len(regions))
80 for feature_name in tool.args.params['features']:
57 81
58 raw_image = None 82 # Add the object label
59 if args.raw_file is not None: 83 if feature_name == 'label':
60 raw_image = giatools.io.imread(args.raw_file.name) 84 df['label'] = df['it'].map(lambda ait: regions[ait].label)
61 85
62 raw_label_image = giatools.io.imread(label_file) 86 # Add the object perimeter/surface
87 elif feature_name == 'perimeter' and labels_section_data.ndim == 3:
88 df['perimeter'] = df['it'].map(
89 lambda ait: surface(labels_section_data, regions[ait].label), # `skimage.measure.regionprops` cannot compute perimeters for 3-D data
90 )
63 91
64 df = pd.DataFrame() 92 # Skip features that are not available when processing 3-D images
65 if label_file_binary: 93 elif feature_name in ('eccentricity', 'moments_hu', 'orientation') and labels_section_data.ndim == 3:
66 raw_label_image = skimage.measure.label(raw_label_image) 94 print(f'Skip feature that is not available for 3-D images: "{feature_name}"')
67 regions = skimage.measure.regionprops(raw_label_image, intensity_image=raw_image)
68 95
69 df['it'] = np.arange(len(regions)) 96 # Add another feature from `regions` that was computed via `skimage.measure.regionprops`
97 else:
98 try:
99 df[feature_name] = df['it'].map(lambda ait: getattr(regions[ait], feature_name))
100 except TypeError:
101 raise ValueError(f'Unknown feature: "{feature_name}"')
70 102
71 if add_patch: 103 # Resolve any remaining Dask objects to the actual values (e.g., when processing Zarrs)
72 df['image'] = df['it'].map(lambda ait: regions[ait].image.astype(np.float).tolist()) 104 df = df.map(compute_if_dask)
73 df['intensity_image'] = df['it'].map(lambda ait: regions[ait].intensity_image.astype(np.float).tolist())
74 105
75 # TODO no matrix features, but split in own rows? 106 # Convert lists/tuples/arrays to lists of plain Python numbers (e.g., float instead of np.float64)
76 if args.add_label or args.all_features: 107 df = df.map(
77 df['label'] = df['it'].map(lambda ait: regions[ait].label) 108 lambda obj: np.asarray(obj).tolist() if type(obj) in (list, tuple, np.ndarray) else obj,
109 )
78 110
79 if raw_image is not None: 111 del df['it']
80 if args.max_intensity or args.all_features: 112 df.to_csv(tool.args.raw_args.output, sep='\t', lineterminator='\n', index=False)
81 df['max_intensity'] = df['it'].map(lambda ait: regions[ait].max_intensity)
82 if args.mean_intensity or args.all_features:
83 df['mean_intensity'] = df['it'].map(lambda ait: regions[ait].mean_intensity)
84 if args.min_intensity or args.all_features:
85 df['min_intensity'] = df['it'].map(lambda ait: regions[ait].min_intensity)
86 if args.moments_hu or args.all_features:
87 df['moments_hu'] = df['it'].map(lambda ait: regions[ait].moments_hu)
88 113
89 if args.centroid or args.all_features: 114 except ValueError as err:
90 df['centroid'] = df['it'].map(lambda ait: regions[ait].centroid) 115 exit(err.args[0])
91 if args.bbox or args.all_features:
92 df['bbox'] = df['it'].map(lambda ait: regions[ait].bbox)
93 if args.area or args.all_features:
94 df['area'] = df['it'].map(lambda ait: regions[ait].area)
95 if args.filled_area or args.all_features:
96 df['filled_area'] = df['it'].map(lambda ait: regions[ait].filled_area)
97 if args.convex_area or args.all_features:
98 df['convex_area'] = df['it'].map(lambda ait: regions[ait].convex_area)
99 if args.perimeter or args.all_features:
100 df['perimeter'] = df['it'].map(lambda ait: regions[ait].perimeter)
101 if args.extent or args.all_features:
102 df['extent'] = df['it'].map(lambda ait: regions[ait].extent)
103 if args.eccentricity or args.all_features:
104 df['eccentricity'] = df['it'].map(lambda ait: regions[ait].eccentricity)
105 if args.equivalent_diameter or args.all_features:
106 df['equivalent_diameter'] = df['it'].map(lambda ait: regions[ait].equivalent_diameter)
107 if args.euler_number or args.all_features:
108 df['euler_number'] = df['it'].map(lambda ait: regions[ait].euler_number)
109 if args.inertia_tensor_eigvals or args.all_features:
110 df['inertia_tensor_eigvals'] = df['it'].map(lambda ait: regions[ait].inertia_tensor_eigvals)
111 if args.major_axis_length or args.all_features:
112 df['major_axis_length'] = df['it'].map(lambda ait: regions[ait].major_axis_length)
113 if args.minor_axis_length or args.all_features:
114 df['minor_axis_length'] = df['it'].map(lambda ait: regions[ait].minor_axis_length)
115 if args.orientation or args.all_features:
116 df['orientation'] = df['it'].map(lambda ait: regions[ait].orientation)
117 if args.solidity or args.all_features:
118 df['solidity'] = df['it'].map(lambda ait: regions[ait].solidity)
119 if args.moments or args.all_features:
120 df['moments'] = df['it'].map(lambda ait: regions[ait].moments)
121 if args.convexity or args.all_features:
122 perimeter = df['it'].map(lambda ait: regions[ait].perimeter)
123 area = df['it'].map(lambda ait: regions[ait].area)
124 df['convexity'] = area / (perimeter * perimeter)
125
126 del df['it']
127 df.to_csv(out_file, sep='\t', lineterminator='\n', index=False)