Coverage for src/recon3d/utility.py: 77%

262 statements  

« prev     ^ index     » next       coverage.py v7.8.0, created at 2025-04-02 00:06 +0000

1"""  

2This module holds utilties that can be reused across other modules 

3""" 

4 

5# Standard library imports 

6from datetime import datetime 

7from itertools import cycle, repeat, tee 

8from pathlib import Path 

9from typing import Iterable, Tuple 

10import glob 

11import argparse 

12 

13# Third-party library imports 

14import h5py 

15import numpy as np 

16import yaml 

17from PIL import Image 

18from scipy import ndimage 

19import skimage 

20import skimage.io as skio 

21 

22# Local imports 

23from recon3d.types import * 

24 

25# import recon3d.feature_analysis as fa 

26# from recon3d.feature_analysis import SemanticImageStack 

27 

28 

29# def instance_to_ndarray(data: InstanceImageStack) -> np.ndarray: 

30# """Extract the array data within the Instance ImageStack object""" 

31 

32# return data.data # Trivial, do we need a function? 

33 

34 

35# def hdf_dataset_to_npy(hdf_path:Path, hdf_dataset_location: str, save_path: Path): 

36# # read 

37# with h5py.File(hdf_path, "r") as f: 

38# data = np.squeeze(f[hdf_dataset_location][:]) 

39 

40# instance_to_ndarray 

41# np.save(save_path, data) 

42 

43 

44def binary_with_pores_to_semantic(input_path: Path, output_path: Path) -> dict: 

45 """ 

46 Convert a folder of segmented/binarized TIFF images to a Semantic Image Stack. 

47 

48 This function is designed for a specific use case (NOMAD, pores in AM tensile bars) 

49 and converts binary images with metal as 1 and all else as 0 into a semantic image stack. 

50 

51 Parameters 

52 ---------- 

53 input_path : Path 

54 Folder containing binary images with metal as 1 and all else as 0. 

55 Figure 1: 

56 *---*---*---*---* 

57 | 1 | 1 | 1 | 1 | 

58 *---*---*---*---* 

59 | 1 | 0 | 1 | 0 | 

60 *---*---*---*---* 

61 | 1 | 1 | 1 | 0 | 

62 *---*---*---*---* 

63 output_path : Path 

64 Directory where the semantic images will be saved. 

65 Figure 2: 

66 *---*---*---*---* 

67 | 1 | 1 | 1 | 1 | 

68 *---*---*---*---* 

69 | 1 | 2 | 1 | 0 | 

70 *---*---*---*---* 

71 | 1 | 1 | 1 | 0 | 

72 *---*---*---*---* 

73 

74 Returns 

75 ------- 

76 dict 

77 Dictionary of class labels: 

78 class_labels: 

79 air: 

80 value: 0 

81 metal: 

82 value: 1 

83 pore: 

84 value: 2 

85 

86 Raises 

87 ------ 

88 ValueError 

89 If the input images contain more than two phases. 

90 

91 Examples 

92 -------- 

93 >>> input_path = Path("path/to/binary_images") 

94 >>> output_path = Path("path/to/save/semantic_images") 

95 >>> binary_with_pores_to_semantic(input_path, output_path) 

96 {'class_labels': {'air': {'value': 0}, 'metal': {'value': 1}, 'pore': {'value': 2}}} 

97 

98 """ 

99 

100 # read in binary image stack 

101 bw_data = read_images(input_path) 

102 

103 # check data is binary, only 0 or 1 

104 phase_ids = np.unique(bw_data) 

105 n_phases = len(phase_ids) 

106 

107 if n_phases != 2: 

108 raise ValueError( 

109 f"Only two phases expected in segmented image, {n_phases} phase found" 

110 ) 

111 

112 # elif n_phases == 2: 

113 # TODO remove hardcodes and impart flexibility from here to ut.ndarray_to_img call 

114 else: # images are binary 

115 

116 # preallocate the output data 

117 output_data = np.zeros(shape=bw_data.shape, dtype=np.int8) 

118 

119 # fill holes and assign as 'metal', 1 

120 print("\tIsolating Sample...") 

121 sample = ndimage.binary_fill_holes(bw_data.astype(np.bool_)) 

122 np.place(output_data, sample, 1) 

123 

124 # isolate holes within 'metal', assign as 'pore', 2 

125 print("\tIsolating Voids...") 

126 voids = np.logical_xor(sample, bw_data) 

127 np.place(output_data, voids, 2) 

128 

129 # thus, everything else is 'air', 0 

130 

131 class_labels = { 

132 "class_labels": { 

133 "air": {"value": 0}, 

134 "metal": {"value": 1}, 

135 "pore": {"value": 2}, 

136 } 

137 } 

138 

139 ndarray_to_img( 

140 data=output_data, 

141 slice_axis=CartesianAxis3D.Z, 

142 parent_dir=output_path, 

143 folder_name="", 

144 ) 

145 

146 return class_labels 

147 

148 

149def validate_yml(yml_input_file: Path, cli_entry_point: str) -> tuple[Path, Path]: 

150 """ 

151 Verify that the YAML file contains the required arguments. 

152 

153 This function checks if the specified YAML file contains the necessary arguments 

154 and validates the CLI entry point. It also verifies the existence of input and output directories. 

155 

156 Parameters 

157 ---------- 

158 yml_input_file : Path 

159 The path to the YAML input file. 

160 cli_entry_point : str 

161 The CLI entry point to validate against the YAML file. 

162 

163 Returns 

164 ------- 

165 tuple[Path, Path, dict] 

166 A tuple containing: 

167 - Path to the input directory. 

168 - Path to the output directory. 

169 - Dictionary of YAML values. 

170 

171 Raises 

172 ------ 

173 ValueError 

174 If the CLI entry point is not found in the YAML file or if the input directory does not exist. 

175 

176 Examples 

177 -------- 

178 >>> yml_input_file = Path("path/to/input.yml") 

179 >>> cli_entry_point = "process_images" 

180 >>> validate_yml(yml_input_file, cli_entry_point) 

181 (PosixPath('path/to/images'), PosixPath('path/to/output'), {'cli_entry_points': ['process_images'], 'image_dir': 'path/to/images', 'out_dir': 'path/to/output'}) 

182 """ 

183 

184 print(f"Processing specification file: {yml_input_file}") 

185 yml_vals = yaml_to_dict(yml_input_file) 

186 

187 # check cli_entry_points is valid 

188 if cli_entry_point not in yml_vals["cli_entry_points"]: 

189 raise ValueError( 

190 f"""Error. Incorrect yml format.  

191 This function requires the "cli_entry_points" key to contain {cli_entry_point},  

192 but currently contains the following options: {yml_vals["cli_entry_points"]} """ 

193 ) 

194 

195 path_input = Path(yml_vals["image_dir"]).expanduser() 

196 if not path_input.is_dir(): 

197 raise ValueError(f"Error, 'image_dir', {path_input} not found.") 

198 print(f"Input path: {path_input}") 

199 

200 path_output = Path(yml_vals["out_dir"]).expanduser() 

201 path_output.mkdir(parents=True, exist_ok=True) 

202 print(f"Output path: {path_output}") 

203 

204 return (path_input, path_output, yml_vals) 

205 

206 

207def binarize(data: np.ndarray, val: int) -> np.ndarray: 

208 """ 

209 Binarize the data based on a specified value. 

210 

211 This function converts the values within the data matching the specified value to 1, 

212 and all other values to 0. 

213 

214 Parameters 

215 ---------- 

216 data : np.ndarray 

217 The input array to be binarized. 

218 val : int 

219 The value in the data to be binarized (converted to 1). 

220 

221 Returns 

222 ------- 

223 np.ndarray 

224 The binarized array with the same shape as the input data. 

225 

226 Examples 

227 -------- 

228 >>> data = np.array([[1, 2, 3], [4, 1, 6], [7, 8, 1]]) 

229 >>> binarize(data, 1) 

230 array([[1, 0, 0], 

231 [0, 1, 0], 

232 [0, 0, 1]], dtype=int8) 

233 """ 

234 

235 bw_data = np.zeros(shape=data.shape, dtype=np.int8) 

236 np.place(bw_data, data == val, 1) 

237 

238 return bw_data 

239 

240 

241def semantic_to_binary(yml_input_file: Path) -> bool: 

242 """ 

243 Convert a semantic image stack to a binary image stack using a YAML configuration file. 

244 

245 This function reads a YAML file to prepare a binary image stack from a semantic image stack. 

246 The selected class from the semantic stack is binarized based on the specified value. 

247 

248 Parameters 

249 ---------- 

250 yml_input_file : Path 

251 The path to the YAML input file containing configuration settings. 

252 

253 Returns 

254 ------- 

255 bool 

256 True if the binary image stack was successfully created, False otherwise. 

257 

258 Examples 

259 -------- 

260 >>> yml_input_file = Path("path/to/config.yml") 

261 >>> semantic_to_binary(yml_input_file) 

262 True 

263 """ 

264 

265 [input_path, output_path, params] = validate_yml( 

266 yml_input_file, "semantic_to_binary" 

267 ) 

268 

269 # params = ut.yaml_to_dict(yml_input_file) 

270 

271 sel_class = params["selected_class"] 

272 sel_class_value = params["class_labels"][sel_class]["value"] 

273 

274 input_data = read_images(input_path) 

275 

276 bw_data = binarize(input_data, sel_class_value) 

277 

278 ndarray_to_img( 

279 data=bw_data, 

280 slice_axis=CartesianAxis3D.Z, 

281 parent_dir=output_path, 

282 folder_name="", 

283 ) 

284 

285 

286def binary_to_semantic(yml_input_file: Path) -> bool: 

287 """ 

288 Convert a binary image stack to a semantic image stack using a YAML configuration file. 

289 

290 This function reads a YAML file to prepare a semantic image stack from a binary image stack. 

291 It is designed for a specific use case (NOMAD, AM tensile bars) as of 31 May 2024. 

292 

293 Parameters 

294 ---------- 

295 yml_input_file : Path 

296 The path to the YAML input file containing configuration settings. 

297 

298 Returns 

299 ------- 

300 bool 

301 True if the semantic image stack was successfully created, False otherwise. 

302 

303 Examples 

304 -------- 

305 >>> yml_input_file = Path("path/to/config.yml") 

306 >>> binary_to_semantic(yml_input_file) 

307 True 

308 """ 

309 

310 [input_path, output_path, params] = validate_yml( 

311 yml_input_file, "binary_to_semantic" 

312 ) 

313 

314 class_labels = binary_with_pores_to_semantic( 

315 input_path=input_path, output_path=output_path 

316 ) 

317 

318 print(f"class labels for semantic stack:\n{class_labels}") 

319 

320 

321def main_binary_to_semantic(): 

322 """ 

323 Run the binary to semantic conversion module from the command line. 

324 

325 This function serves as the entry point for terminal-based access to the binary to semantic conversion module. 

326 It is invoked from the command line using the 'binary_to_semantic' command specified in pyproject.toml. 

327 The function processes a YAML input file to convert a binary image stack to a semantic image stack. 

328 

329 Parameters 

330 ---------- 

331 None 

332 

333 Returns 

334 ------- 

335 None 

336 

337 Examples 

338 -------- 

339 To run the binary to semantic conversion, use the following command in the terminal: 

340 $ binary_to_semantic path/to/input.yml 

341 """ 

342 

343 parser = argparse.ArgumentParser() 

344 parser.add_argument("input_file", help="the .yml user input file") 

345 args = parser.parse_args() 

346 yml_input_file = args.input_file 

347 

348 binary_to_semantic(yml_input_file=yml_input_file) 

349 

350 print(f"{yml_input_file} processed!") 

351 

352 

353def main_semantic_to_binary(): 

354 """ 

355 Run the semantic to binary conversion module from the command line. 

356 

357 This function serves as the entry point for terminal-based access to the semantic to binary conversion module. 

358 It is invoked from the command line using the 'semantic_to_binary' command specified in pyproject.toml. 

359 The function processes a YAML input file to convert a semantic image stack to a binary image stack. 

360 

361 Parameters 

362 ---------- 

363 None 

364 

365 Returns 

366 ------- 

367 None 

368 

369 Examples 

370 -------- 

371 To run the semantic to binary conversion, use the following command in the terminal: 

372 $ semantic_to_binary path/to/input.yml 

373 """ 

374 parser = argparse.ArgumentParser() 

375 parser.add_argument("input_file", help="the .yml user input file") 

376 args = parser.parse_args() 

377 yml_input_file = args.input_file 

378 

379 semantic_to_binary(yml_input_file=yml_input_file) 

380 

381 print(f"{yml_input_file} processed!") 

382 

383 

384def hdf_to_instance_properties(hdf_path: Path, group_path: str) -> InstanceProperties: 

385 """ 

386 Read instance analysis data from an HDF5 file and create an InstanceProperties object. 

387 

388 This function reads data from an HDF5 file that has had instance analysis performed 

389 with a pre-defined internal structure to create an InstanceProperties object. 

390 The group name should be the internal 'folder' in the HDF5 file containing the instance 

391 analysis property data. 

392 

393 Parameters 

394 ---------- 

395 hdf_path : Path 

396 The path to the HDF5 file containing the instance analysis data. 

397 group_path : str 

398 The internal 'folder' in the HDF5 file containing the instance analysis property data. 

399 

400 Returns 

401 ------- 

402 InstanceProperties 

403 An InstanceProperties object containing the instance analysis data. 

404 

405 Examples 

406 -------- 

407 >>> hdf_path = Path("path/to/instance_analysis.h5") 

408 >>> group_path = "instance_properties" 

409 >>> instance_properties = hdf_to_instance_properties(hdf_path, group_path) 

410 >>> print(instance_properties) 

411 InstanceProperties(source_name='instance_properties', labels=InstanceLabels(data=array([0, 1, 2, ...])), n_voxels=[NVoxel(value=...), ...], equivalent_sphere_diameters=[Length(value=..., unit=<Units.MICRON: 'micron'>), ...], centroids=Centroids(data=[Centroid(cx=Length(value=..., unit=<Units.MICRON: 'micron'>), cy=Length(value=..., unit=<Units.MICRON: 'micron'>), cz=Length(value=..., unit=<Units.MICRON: 'micron'>)), ...]), ellipsoids=BestFitEllipsoids(data=[BestFitEllipsoid(a=EllipsoidAxis(length=Length(value=..., unit=<Units.MICRON: 'micron'>), orientation=UnitVector(u=..., v=..., w=...)), b=EllipsoidAxis(length=Length(value=..., unit=<Units.MICRON: 'micron'>), orientation=UnitVector(u=..., v=..., w=...)), c=EllipsoidAxis(length=Length(value=..., unit=<Units.MICRON: 'micron'>), orientation=UnitVector(u=..., v=..., w=...))), ...]), surface_areas=EllipsoidSurfaceAreas(data=[Area(value=..., unit_squared=<Units.MICRON: 'micron'>), ...]), volumes=EllipsoidVolumes(data=[Volume(value=..., unit_cubed=<Units.MICRON: 'micron'>), ...])) 

412 """ 

413 

414 with h5py.File(hdf_path, "r") as f: 

415 instance_data = f[group_path] 

416 hdf_n_voxels = np.squeeze(instance_data["num_voxels"][:]) 

417 hdf_equiv_diam = np.squeeze(instance_data["equivalent_sphere_diameters"][:]) 

418 equiv_diam_unit = Units( 

419 instance_data["equivalent_sphere_diameters"].attrs["units"] 

420 ) 

421 hdf_centroids = np.squeeze(instance_data["centroids"][:]) 

422 centroid_unit = Units(instance_data["centroids"].attrs["units"]) 

423 hdf_semi_axes = np.squeeze(instance_data["semi-axis_lengths"][:]) 

424 semi_axes_unit = Units(instance_data["semi-axis_lengths"].attrs["units"]) 

425 hdf_vectors = np.squeeze(instance_data["axis_vectors"][:]) 

426 hdf_surface_areas = np.squeeze(instance_data["ellipsoid_surface_areas"][:]) 

427 surface_area_unit = Units( 

428 instance_data["ellipsoid_surface_areas"].attrs["units_squared"] 

429 ) 

430 hdf_volumes = np.squeeze(instance_data["ellipsoid_volumes"][:]) 

431 volume_unit = Units(instance_data["ellipsoid_volumes"].attrs["units_cubed"]) 

432 

433 labels = np.arange(0, len(hdf_equiv_diam), dtype=int) 

434 n_voxels = [NVoxel(value=i) for i in hdf_n_voxels] 

435 equivalent_sphere_diameters = [ 

436 Length(value=i, unit=equiv_diam_unit) for i in hdf_equiv_diam 

437 ] 

438 centroids = Centroids( 

439 data=[ 

440 Centroid( 

441 cx=Length(value=i[0], unit=centroid_unit), 

442 cy=Length(value=i[1], unit=centroid_unit), 

443 cz=Length(value=i[2], unit=centroid_unit), 

444 ) 

445 for i in hdf_centroids 

446 ] 

447 ) 

448 

449 ellipsoids = BestFitEllipsoids( 

450 [ 

451 BestFitEllipsoid( 

452 a=EllipsoidAxis( 

453 length=Length(value=i[0], unit=semi_axes_unit), 

454 orientation=UnitVector(u=j[0], v=j[1], w=j[2]), 

455 ), 

456 b=EllipsoidAxis( 

457 length=Length(value=i[1], unit=semi_axes_unit), 

458 orientation=UnitVector(u=j[3], v=j[4], w=j[5]), 

459 ), 

460 c=EllipsoidAxis( 

461 length=Length(value=i[2], unit=semi_axes_unit), 

462 orientation=UnitVector(u=j[6], v=j[7], w=j[8]), 

463 ), 

464 ) 

465 for i, j in zip(hdf_semi_axes, hdf_vectors) 

466 ] 

467 ) 

468 

469 surface_areas = EllipsoidSurfaceAreas( 

470 [Area(value=i, unit_squared=surface_area_unit) for i in hdf_surface_areas] 

471 ) 

472 

473 volumes = EllipsoidVolumes( 

474 [Volume(value=i, unit_cubed=volume_unit) for i in hdf_volumes] 

475 ) 

476 

477 instance_props = InstanceProperties( 

478 source_name=group_path, 

479 labels=InstanceLabels(data=labels), 

480 n_voxels=n_voxels, 

481 equivalent_sphere_diameters=equivalent_sphere_diameters, 

482 centroids=centroids, 

483 ellipsoids=ellipsoids, 

484 surface_areas=surface_areas, 

485 volumes=volumes, 

486 ) 

487 

488 # n_voxels: list[NVoxel] # could have a InstanceImageStack 

489 # equivalent_sphere_diameters: list[Length] 

490 # centroids: Centroids # could have a InstanceImageStack 

491 # ellipsoids: BestFitEllipsoids 

492 # surface_areas: SurfaceAreas) 

493 

494 return instance_props 

495 

496 

497def hdf_to_metadata(hdf_path: Path, dataset_path: str) -> MetaData: 

498 """ 

499 Extract metadata from an HDF5 dataset containing string metadata attributes. 

500 

501 This function reads metadata attributes from a specified dataset within an HDF5 file 

502 and returns a MetaData object. 

503 

504 Parameters 

505 ---------- 

506 hdf_path : Path 

507 The path to the HDF5 file containing the dataset. 

508 dataset_path : str 

509 The internal path to the dataset within the HDF5 file. 

510 

511 Returns 

512 ------- 

513 MetaData 

514 A MetaData object containing the extracted metadata. 

515 

516 Examples 

517 -------- 

518 >>> hdf_path = Path("path/to/data.h5") 

519 >>> dataset_path = "dataset" 

520 >>> metadata = hdf_to_metadata(hdf_path, dataset_path) 

521 >>> print(metadata) 

522 MetaData(data_volume=DataVolume(x_width=..., y_height=..., z_image_count=...), resolution=Resolution(dx=Length(value=..., unit=<Units.MICRON: 'micron'>), dy=Length(value=..., unit=<Units.MICRON: 'micron'>), dz=Length(value=..., unit=<Units.MICRON: 'micron'>)), pixel_units=<Units.MICRON: 'micron'>, origin=Origin(x0=Length(value=..., unit=<Units.MICRON: 'micron'>), y0=Length(value=..., unit=<Units.MICRON: 'micron'>), z0=Length(value=..., unit=<Units.MICRON: 'micron'>))) 

523 """ 

524 

525 with h5py.File(hdf_path, "r") as f: 

526 dataset = f[dataset_path] 

527 

528 x_width = int(dataset.attrs["x_width (pixels)"]) 

529 y_height = int(dataset.attrs["y_height (pixels)"]) 

530 z_image_count = int(dataset.attrs["z_image_count (pixels)"]) 

531 data_volume = DataVolume( 

532 x_width=x_width, y_height=y_height, z_image_count=z_image_count 

533 ) 

534 

535 pixel_units = Units(dataset.attrs["Resolution, units"]) 

536 dx = Length(value=float(dataset.attrs["Resolution, dx"]), unit=pixel_units) 

537 dy = Length(value=float(dataset.attrs["Resolution, dy"]), unit=pixel_units) 

538 dz = Length(value=float(dataset.attrs["Resolution, dz"]), unit=pixel_units) 

539 resolution = Resolution( 

540 dx=dx, 

541 dy=dy, 

542 dz=dz, 

543 ) 

544 

545 origin_units = Units(dataset.attrs["Origin, units"]) 

546 x0 = Length(value=float(dataset.attrs["Origin, x0"]), unit=origin_units) 

547 y0 = Length(value=float(dataset.attrs["Origin, y0"]), unit=origin_units) 

548 z0 = Length(value=float(dataset.attrs["Origin, z0"]), unit=origin_units) 

549 origin = Origin( 

550 x0=x0, 

551 y0=y0, 

552 z0=z0, 

553 ) 

554 

555 metadata = MetaData( 

556 data_volume=data_volume, 

557 resolution=resolution, 

558 pixel_units=pixel_units, 

559 origin=origin, 

560 ) 

561 

562 return metadata 

563 

564 

565def centroids_to_ndarray(centroids: Centroids) -> np.ndarray: 

566 """ 

567 Convert centroid data type into a NumPy array. 

568 

569 This function converts a Centroids object into a NumPy array with each row representing 

570 the (cx, cy, cz) coordinates of a centroid. 

571 

572 Parameters 

573 ---------- 

574 centroids : Centroids 

575 The Centroids object containing the centroid data. 

576 

577 Returns 

578 ------- 

579 np.ndarray 

580 A NumPy array with shape (n, 3), where n is the number of centroids, and each row 

581 contains the (cx, cy, cz) coordinates of a centroid. 

582 

583 Examples 

584 -------- 

585 >>> centroids = Centroids(data=[ 

586 ... Centroid(cx=Length(0.0, Units.MICRON), cy=Length(0.0, Units.MICRON), cz=Length(0.0, Units.MICRON)), 

587 ... Centroid(cx=Length(1.0, Units.MICRON), cy=Length(1.0, Units.MICRON), cz=Length(1.0, Units.MICRON)), 

588 ... Centroid(cx=Length(2.0, Units.MICRON), cy=Length(2.0, Units.MICRON), cz=Length(2.0, Units.MICRON)) 

589 ... ]) 

590 >>> centroids_to_ndarray(centroids) 

591 array([[0., 0., 0.], 

592 [1., 1., 1.], 

593 [2., 2., 2.]]) 

594 """ 

595 

596 data = centroids.data 

597 cx = [i.cx.value for i in data] 

598 cy = [i.cy.value for i in data] 

599 cz = [i.cz.value for i in data] 

600 

601 ndarray = np.array((cx, cy, cz), dtype=float).T 

602 

603 return ndarray 

604 

605 

606def ellipsoids_to_ndarray( 

607 ellipsoids: BestFitEllipsoids, 

608) -> Tuple[np.ndarray, np.ndarray]: 

609 """ 

610 Convert ellipsoid data type into NumPy arrays. 

611 

612 This function converts a BestFitEllipsoids object into two NumPy arrays: 

613 one for the axis lengths and one for the axis vectors. 

614 

615 Parameters 

616 ---------- 

617 ellipsoids : BestFitEllipsoids 

618 The BestFitEllipsoids object containing the ellipsoid data. 

619 

620 Returns 

621 ------- 

622 Tuple[np.ndarray, np.ndarray] 

623 A tuple containing: 

624 - A NumPy array with shape (n, 3) for the axis lengths, where n is the number of ellipsoids. 

625 - A NumPy array with shape (n, 9) for the axis vectors, where n is the number of ellipsoids. 

626 

627 Examples 

628 -------- 

629 >>> ellipsoids = BestFitEllipsoids(data=[ 

630 ... BestFitEllipsoid( 

631 ... a=EllipsoidAxis(length=Length(5.0, Units.MICRON), orientation=UnitVector(u=1.0, v=0.0, w=0.0)), 

632 ... b=EllipsoidAxis(length=Length(3.0, Units.MICRON), orientation=UnitVector(u=0.0, v=1.0, w=0.0)), 

633 ... c=EllipsoidAxis(length=Length(2.0, Units.MICRON), orientation=UnitVector(u=0.0, v=0.0, w=1.0)) 

634 ... ), 

635 ... BestFitEllipsoid( 

636 ... a=EllipsoidAxis(length=Length(6.0, Units.MICRON), orientation=UnitVector(u=0.707, v=0.707, w=0.0)), 

637 ... b=EllipsoidAxis(length=Length(4.0, Units.MICRON), orientation=UnitVector(u=-0.707, v=0.707, w=0.0)), 

638 ... c=EllipsoidAxis(length=Length(3.0, Units.MICRON), orientation=UnitVector(u=0.0, v=0.0, w=1.0)) 

639 ... ) 

640 ... ]) 

641 >>> axis_lengths, axis_vectors = ellipsoids_to_ndarray(ellipsoids) 

642 >>> print(axis_lengths) 

643 array([[5., 3., 2.], 

644 [6., 4., 3.]]) 

645 >>> print(axis_vectors) 

646 array([[ 1. , 0. , 0. , 0. , 1. , 0. , 0. , 0. , 1. ], 

647 [ 0.707, 0.707, 0. , -0.707, 0.707, 0. , 0. , 0. , 1. ]]) 

648 """ 

649 

650 data = ellipsoids.data 

651 # axis lengths: 

652 a = [i.a.length.value for i in data] 

653 b = [i.b.length.value for i in data] 

654 c = [i.c.length.value for i in data] 

655 axis_lengths = np.array((a, b, c), dtype=float).T 

656 

657 # axes vectors 

658 a_u = [i.a.orientation.u for i in data] 

659 a_v = [i.a.orientation.v for i in data] 

660 a_w = [i.a.orientation.w for i in data] 

661 

662 b_u = [i.b.orientation.u for i in data] 

663 b_v = [i.b.orientation.v for i in data] 

664 b_w = [i.b.orientation.w for i in data] 

665 

666 c_u = [i.c.orientation.u for i in data] 

667 c_v = [i.c.orientation.v for i in data] 

668 c_w = [i.c.orientation.w for i in data] 

669 

670 axis_vectors = np.array( 

671 (a_u, a_v, a_w, b_u, b_v, b_w, c_u, c_v, c_w), dtype=float 

672 ).T 

673 

674 return axis_lengths, axis_vectors 

675 

676 

677def surface_areas_to_ndarray(surface_areas: EllipsoidSurfaceAreas) -> np.ndarray: 

678 """ 

679 Convert surface area data type into a NumPy array. 

680 

681 This function converts an EllipsoidSurfaceAreas object into a NumPy array 

682 containing the surface area values. 

683 

684 Parameters 

685 ---------- 

686 surface_areas : EllipsoidSurfaceAreas 

687 The EllipsoidSurfaceAreas object containing the surface area data. 

688 

689 Returns 

690 ------- 

691 np.ndarray 

692 A NumPy array containing the surface area values. 

693 

694 Examples 

695 -------- 

696 >>> surface_areas = EllipsoidSurfaceAreas(data=[ 

697 ... Area(value=100.0, unit_squared=Units.MICRON), 

698 ... Area(value=200.0, unit_squared=Units.MICRON), 

699 ... Area(value=300.0, unit_squared=Units.MICRON) 

700 ... ]) 

701 >>> surface_areas_to_ndarray(surface_areas) 

702 array([100., 200., 300.]) 

703 """ 

704 

705 data = surface_areas.data 

706 areas = [i.value for i in data] 

707 

708 ndarray = np.array((areas), dtype=float).T 

709 

710 return ndarray 

711 

712 

713def volumes_to_ndarray(ellipsoid_volumes: EllipsoidVolumes) -> np.ndarray: 

714 """ 

715 Convert ellipsoid volume data type into a NumPy array. 

716 

717 This function converts an EllipsoidVolumes object into a NumPy array 

718 containing the volume values. 

719 

720 Parameters 

721 ---------- 

722 ellipsoid_volumes : EllipsoidVolumes 

723 The EllipsoidVolumes object containing the volume data. 

724 

725 Returns 

726 ------- 

727 np.ndarray 

728 A NumPy array containing the volume values. 

729 

730 Examples 

731 -------- 

732 >>> ellipsoid_volumes = EllipsoidVolumes(data=[ 

733 ... Volume(value=500.0, unit_cubed=Units.MICRON), 

734 ... Volume(value=1000.0, unit_cubed=Units.MICRON), 

735 ... Volume(value=1500.0, unit_cubed=Units.MICRON) 

736 ... ]) 

737 >>> volumes_to_ndarray(ellipsoid_volumes) 

738 array([ 500., 1000., 1500.]) 

739 """ 

740 

741 data = ellipsoid_volumes.data 

742 volumes = [i.value for i in data] 

743 

744 ndarray = np.array((volumes), dtype=float).T 

745 

746 return ndarray 

747 

748 

749def rmdir(directory: Path) -> None: 

750 """ 

751 Recursively delete a directory and all its contents. 

752 (credit to: https://stackoverflow.com/questions/13118029/deleting-folders-in-python-recursively/49782093#49782093) 

753 

754 

755 This function deletes the specified directory and all its contents, including 

756 subdirectories and files. If the directory does not exist, the function does nothing. 

757 

758 Parameters 

759 ---------- 

760 directory : Path 

761 The path to the directory to be deleted. 

762 

763 Returns 

764 ------- 

765 None 

766 

767 Examples 

768 -------- 

769 >>> from pathlib import Path 

770 >>> directory = Path("path/to/directory") 

771 >>> rmdir(directory) 

772 """ 

773 

774 if not directory.exists(): 

775 return 

776 

777 for item in directory.iterdir(): 

778 if item.is_dir(): 

779 rmdir(item) 

780 else: 

781 item.unlink() 

782 directory.rmdir() 

783 

784 

785def compare_files(file1: Path, file2: Path, ignore_words: list[str]) -> bool: 

786 """ 

787 Compare two files line-by-line, ignoring lines that contain specified words. 

788 

789 This function compares two files line-by-line and ignores lines that contain 

790 any of the words specified in the `ignore_words` list. If the files are identical 

791 up to the allowed differences, the function returns True. Otherwise, it returns False. 

792 

793 Parameters 

794 ---------- 

795 file1 : Path 

796 Path to the first file for comparison. 

797 file2 : Path 

798 Path to the second file for comparison. 

799 ignore_words : list of str 

800 List of words that cause a particular line to be ignored during comparison. 

801 Use an empty list for strict word-by-word comparison. Use a non-empty list, 

802 e.g., ["date", "time"], to ignore lines with these words, as they may differ 

803 from file to file. 

804 

805 Returns 

806 ------- 

807 bool 

808 True if the two files are the same (up to any allowed differences in the 

809 ignore_words list), False if the two files are different. 

810 

811 Examples 

812 -------- 

813 >>> file1 = Path("path/to/file1.txt") 

814 >>> file2 = Path("path/to/file2.txt") 

815 >>> ignore_words = ["date", "time"] 

816 >>> compare_files(file1, file2, ignore_words) 

817 True 

818 """ 

819 

820 with open(file=file1, mode="r", encoding="utf-8") as fa: 

821 with open(file=file2, mode="r", encoding="utf-8") as fb: 

822 dba = fa.readlines() 

823 dbb = fb.readlines() 

824 

825 if len(dba) == len(dbb): 

826 for k, line in enumerate(dba): 

827 if line != dbb[k]: 

828 # if "Autogenerated" in line: 

829 iw_in_fa_line = [iw in line for iw in ignore_words] 

830 iw_in_fb_line = [iw in dbb[k] for iw in ignore_words] 

831 in_either_line = iw_in_fa_line + iw_in_fb_line 

832 # if any(iw in [line + dbb[k]] for iw in ignore_words): 

833 if any(in_either_line): 

834 # skip comparison of the ignore_words case(s) 

835 print("Skip comparison of word in ignore_word.") 

836 else: 

837 print(f"Files differ at line {k}:") 

838 print(f"\t file1: {line}") 

839 print(f"\t file2: {dbb[k]}") 

840 return False 

841 

842 else: 

843 print("Files have non-equal line length.") 

844 print("Line-by-line comparison not performed.") 

845 return False 

846 

847 return True # files are the same 

848 

849 

850def date_time_utc() -> str: 

851 """ 

852 Returns the current date and time in ISO format in the UTC time zone. 

853 

854 This function returns the current date and time in ISO 8601 format, with the time zone 

855 specified as UTC. The colons (:) and periods (.) in the time string are replaced with 

856 underscores (_) for compatibility with file naming conventions. 

857 

858 Returns 

859 ------- 

860 str 

861 The current date and time in ISO format in the UTC time zone. 

862 

863 Examples 

864 -------- 

865 >>> date_time_utc() 

866 '2024-08-13_UTC_12_34_56_789012' 

867 """ 

868 

869 # ts = datetime.utcnow().isoformat() # The "naive object in UTC", utcnow is deprecated 

870 ts = datetime.now(datetime.UTC).isocalendar() # The "naive object in UTC" 

871 ts = ts.replace(":", "_").replace(".", "_") # Overwrite ":", "." with "_" 

872 ts = ts.replace("T", "_UTC_") # Overwrite T with UTC time zone indication 

873 return ts 

874 

875 

876def underline() -> str: 

877 """ 

878 Return a commented underline composed of 78 dashes ('-') and a newline character ('\\n'). 

879 

880 This function generates a string that represents a commented underline 

881 consisting of 78 dashes prefixed with a comment character ('* ') and 

882 followed by a newline character. 

883 

884 Returns 

885 ------- 

886 str 

887 A string representing a commented underline. 

888 

889 Examples 

890 -------- 

891 >>> underline() 

892 '* ------------------------------------------------------------------------------\\n' 

893 """ 

894 

895 return "* " + "".join(repeat("-", 78)) + "\n" 

896 

897 

898def in_a_but_not_in_b(*, a: Iterable, b: Iterable) -> Iterable: 

899 """ 

900 Return items in Iterable `a` that are not in Iterable `b`. 

901 

902 This function takes two iterables `a` and `b`, and returns a tuple containing 

903 all the items that are present in `a` but not in `b`. 

904 

905 Parameters 

906 ---------- 

907 a : Iterable 

908 The first iterable to compare. 

909 b : Iterable 

910 The second iterable to compare. 

911 

912 Returns 

913 ------- 

914 Tuple 

915 A tuple containing items that are in `a` but not in `b`. 

916 

917 Examples 

918 -------- 

919 >>> a = [1, 2, 3, 4] 

920 >>> b = [3, 4, 5, 6] 

921 >>> in_a_but_not_in_b(a=a, b=b) 

922 (1, 2) 

923 """ 

924 

925 result = () # empty tuple 

926 for item in a: 

927 if item not in b: 

928 result = result + (item,) 

929 

930 return result 

931 

932 

933def pairwise(x: Iterable) -> Iterable: 

934 """ 

935 Return successive overlapping pairs taken from the input iterable. 

936 This appears to be implemented in Python 3.10 

937 https://docs.python.org/3/library/itertools.html#itertools.pairwise 

938 but we currently use 3.9, so we implement `pairwise` here. 

939 

940 The number of 2-tuples in the output iterator will be one fewer than the 

941 number of inputs. It will be empty if the input iterable has fewer than 

942 two values. 

943 

944 Parameters 

945 ---------- 

946 x : Iterable 

947 The input iterable from which to generate pairs. 

948 

949 Returns 

950 ------- 

951 Iterable[Tuple] 

952 An iterable of 2-tuples containing successive overlapping pairs from the input iterable. 

953 

954 Examples 

955 -------- 

956 >>> list(pairwise('ABCDEFG')) 

957 [('A', 'B'), ('B', 'C'), ('C', 'D'), ('D', 'E'), ('E', 'F'), ('F', 'G')] 

958 >>> list(pairwise([1, 2, 3, 4])) 

959 [(1, 2), (2, 3), (3, 4)] 

960 """ 

961 

962 a, b = tee(x) 

963 next(b, None) 

964 return zip(a, b) 

965 

966 

967def pairwise_circular(x: Iterable) -> Iterable: 

968 """ 

969 Return successive overlapping pairs taken from the input iterable. 

970 

971 The number of 2-tuples in the output iterator will be one fewer than the 

972 number of inputs. It will be empty if the input iterable has fewer than 

973 two values. 

974 

975 Parameters 

976 ---------- 

977 x : Iterable 

978 The input iterable from which to generate pairs. 

979 

980 Returns 

981 ------- 

982 Iterable[Tuple] 

983 An iterable of 2-tuples containing successive overlapping pairs from the input iterable. 

984 

985 Examples 

986 -------- 

987 >>> list(pairwise('ABCDEFG')) 

988 [('A', 'B'), ('B', 'C'), ('C', 'D'), ('D', 'E'), ('E', 'F'), ('F', 'G')] 

989 >>> list(pairwise([1, 2, 3, 4])) 

990 [(1, 2), (2, 3), (3, 4)] 

991 """ 

992 

993 a = cycle(x) 

994 next(a) 

995 return zip(x, a) 

996 

997 

998def metadata_to_dict(metadata: MetaData) -> dict: 

999 """ 

1000 Convert MetaData to a dictionary. 

1001 

1002 This function converts a MetaData object to a dictionary representation. 

1003 

1004 Parameters 

1005 ---------- 

1006 metadata : MetaData 

1007 The metadata of the Data Volume. 

1008 

1009 Returns 

1010 ------- 

1011 dict 

1012 A dictionary containing the metadata. 

1013 

1014 Examples 

1015 -------- 

1016 >>> metadata = MetaData( 

1017 ... data_volume=DataVolume(x_width=256, y_height=256, z_image_count=10), 

1018 ... resolution=Resolution( 

1019 ... dx=Length(1.0, Units.MICRON), 

1020 ... dy=Length(1.0, Units.MICRON), 

1021 ... dz=Length(1.0, Units.MICRON) 

1022 ... ), 

1023 ... pixel_units=Units.MICRON, 

1024 ... origin=Origin( 

1025 ... x0=Length(0.0, Units.MICRON), 

1026 ... y0=Length(0.0, Units.MICRON), 

1027 ... z0=Length(0.0, Units.MICRON) 

1028 ... ) 

1029 ... ) 

1030 >>> metadata_to_dict(metadata) 

1031 { 

1032 'x_width (pixels)': '256', 

1033 'y_height (pixels)': '256', 

1034 'z_image_count (pixels)': '10', 

1035 'Resolution, dx': '1.0', 

1036 'Resolution, dy': '1.0', 

1037 'Resolution, dz': '1.0', 

1038 'Resolution, units': 'micron', 

1039 'Origin, x0': '0.0', 

1040 'Origin, y0': '0.0', 

1041 'Origin, z0': '0.0', 

1042 'Origin, units': 'micron' 

1043 } 

1044 """ 

1045 

1046 data_volume = metadata.data_volume 

1047 resolution = metadata.resolution 

1048 # pixel_units = metadata.pixel_units 

1049 origin = metadata.origin 

1050 

1051 x_width = data_volume.x_width 

1052 y_height = data_volume.y_height 

1053 z_image_count = data_volume.z_image_count 

1054 

1055 dx = resolution.dx 

1056 dy = resolution.dz 

1057 dz = resolution.dz 

1058 

1059 meta_dict = { 

1060 "x_width (pixels)": str(x_width), 

1061 "y_height (pixels)": str(y_height), 

1062 "z_image_count (pixels)": str(z_image_count), 

1063 "Resolution, dx": str(dx.value), 

1064 "Resolution, dy": str(dy.value), 

1065 "Resolution, dz": str(dz.value), 

1066 "Resolution, units": str(dx.unit.value), 

1067 "Origin, x0": str(origin.x0.value), 

1068 "Origin, y0": str(origin.y0.value), 

1069 "Origin, z0": str(origin.z0.value), 

1070 "Origin, units": str(origin.x0.unit.value), 

1071 } 

1072 # print(meta_dict) 

1073 

1074 return meta_dict 

1075 

1076 

1077def yaml_to_dict(path_file_input: Path) -> dict: 

1078 """ 

1079 Convert a YAML file to a dictionary. 

1080 

1081 This function reads a YAML file and converts its contents to a dictionary. 

1082 

1083 Parameters 

1084 ---------- 

1085 path_file_input : Path 

1086 The full path to the input YAML file. 

1087 

1088 Returns 

1089 ------- 

1090 dict 

1091 A dictionary containing the contents of the YAML file. 

1092 

1093 Raises 

1094 ------ 

1095 TypeError 

1096 If the file type is not supported. 

1097 OSError 

1098 If there is an error opening or decoding the YAML file. 

1099 

1100 Examples 

1101 -------- 

1102 >>> path_file_input = Path("path/to/config.yml") 

1103 >>> yaml_to_dict(path_file_input) 

1104 {'key1': 'value1', 'key2': 'value2', ...} 

1105 """ 

1106 

1107 file_type = Path(path_file_input).suffix.casefold() 

1108 supported_types = (".yaml", ".yml") 

1109 

1110 if file_type not in supported_types: 

1111 raise TypeError("Only file types .yaml, and .yml are supported.") 

1112 

1113 try: 

1114 with open(file=path_file_input, mode="r", encoding="utf-8") as stream: 

1115 db = yaml.load(stream, Loader=yaml.SafeLoader) # Load YAML file 

1116 except yaml.YAMLError as error: 

1117 print(f"Error with YAML file: {error}") 

1118 print(f"Could not open or decode: {path_file_input}") 

1119 raise OSError from error 

1120 

1121 print(f"Success: database created from file: {path_file_input}") 

1122 print("key, value, type") 

1123 print("---, -----, ----") 

1124 for key, value in db.items(): 

1125 print(f"{key}, {value}, {type(value)}") 

1126 

1127 return db 

1128 

1129 

1130def dict_to_yaml(db: dict, file: str) -> Path: 

1131 """ 

1132 Convert a dictionary to a YAML file. 

1133 

1134 This function writes the contents of a dictionary to a YAML file. 

1135 

1136 Parameters 

1137 ---------- 

1138 db : dict 

1139 The dictionary to be converted to YAML. 

1140 file : str 

1141 The path to the output YAML file. 

1142 

1143 Returns 

1144 ------- 

1145 Path 

1146 The path to the created YAML file. 

1147 

1148 Examples 

1149 -------- 

1150 >>> db = {'key1': 'value1', 'key2': 'value2'} 

1151 >>> file = "path/to/output.yml" 

1152 >>> dict_to_yaml(db, file) 

1153 PosixPath('path/to/output.yml') 

1154 """ 

1155 

1156 with open(file, "w", encoding="utf-8") as out_file: 

1157 yaml.dump(db, out_file, default_flow_style=False) # Write dictionary to YAML 

1158 

1159 return Path(file) 

1160 

1161 

1162def ndarray_to_img( 

1163 *, 

1164 data: np.ndarray, 

1165 slice_axis: CartesianAxis3D, 

1166 parent_dir: Path, 

1167 folder_name: str, 

1168 pad_length: int = 4, 

1169 file_type: str = ".tif", 

1170) -> bool: 

1171 """ 

1172 Convert an ndarray to an image stack and save it to a specified directory. 

1173 

1174 This function takes a NumPy ndarray and creates an image stack, saving the images 

1175 into a user-specified directory. 

1176 

1177 Parameters 

1178 ---------- 

1179 data : np.ndarray 

1180 The semantic labels. 

1181 slice_axis : CartesianAxis3D 

1182 The axis along which to slice the ndarray: 

1183 - 0 for Z axis 

1184 - 1 for Y axis 

1185 - 2 for X axis 

1186 parent_dir : Path 

1187 The parent directory to save the image folder. 

1188 folder_name : str 

1189 The folder name to save images. 

1190 pad_length : int, optional 

1191 The number of digits to pad the file names with (default is 4). 

1192 file_type : str, optional 

1193 The image file type (default is ".tif"). 

1194 

1195 Returns 

1196 ------- 

1197 bool 

1198 True if the images were successfully created. 

1199 

1200 Examples 

1201 -------- 

1202 >>> data = np.random.randint(0, 255, (10, 256, 256), dtype=np.uint8) 

1203 >>> slice_axis = CartesianAxis3D.Z 

1204 >>> parent_dir = Path("path/to/save") 

1205 >>> folder_name = "image_stack" 

1206 >>> ndarray_to_img(data=data, slice_axis=slice_axis, parent_dir=parent_dir, folder_name=folder_name) 

1207 True 

1208 """ 

1209 

1210 img_dir = parent_dir.joinpath(folder_name).expanduser() 

1211 img_dir.mkdir(parents=True, exist_ok=True) 

1212 n_slices = data.shape[slice_axis.value] 

1213 

1214 for i in range(n_slices): 

1215 fname = f"{img_dir}/{i:0{pad_length}}{file_type}" 

1216 mode = "L" if data.dtype == np.int8 else None 

1217 

1218 match slice_axis: 

1219 case CartesianAxis3D.Z: 

1220 img = Image.fromarray(data[i, :, :], mode=mode) 

1221 case CartesianAxis3D.Y: 

1222 img = Image.fromarray(data[:, i, :], mode=mode) 

1223 case CartesianAxis3D.X: 

1224 img = Image.fromarray(data[:, :, i], mode=mode) 

1225 case _: 

1226 raise ValueError( 

1227 f"Unknown slice_axis value {slice_axis}, value must be 0, 1, or 2." 

1228 ) 

1229 

1230 img.save(fname) 

1231 return True 

1232 

1233 

1234def read_images( 

1235 file_dir: Path, 

1236 file_type: str = ".tif", 

1237) -> np.ndarray: 

1238 """ 

1239 Read images from a directory and return a NumPy array representation of 

1240 the images. 

1241 

1242 Parameters 

1243 ---------- 

1244 file_dir : Path 

1245 The fully pathed location of the images. 

1246 file_type : str, optional 

1247 The image type (default is ".tif"). 

1248 

1249 Returns 

1250 ------- 

1251 np.ndarray 

1252 A NumPy array representation of the images. 

1253 

1254 Raises 

1255 ------ 

1256 FileNotFoundError 

1257 If no images of the specified type are found in the directory. 

1258 

1259 Examples 

1260 -------- 

1261 >>> file_dir = Path("path/to/images") 

1262 >>> read_images(file_dir, file_type=".tif") 

1263 array([[[...], [...], ...], [[...], [...], ...], ...]) 

1264 """ 

1265 

1266 image_list = list(glob.glob(f"{str(file_dir.as_posix())}/*{file_type}")) 

1267 

1268 if len(image_list) == 0: 

1269 raise FileNotFoundError( 

1270 f"File type of {file_type} not found in directory: {str(file_dir)}" 

1271 ) 

1272 

1273 image_list.sort() # Sort images in ascending order 

1274 

1275 image_stack = np.array([np.array(Image.open(f)) for f in image_list]) 

1276 

1277 # Handle the case where only a single image is read 

1278 if image_stack.ndim < 3: 

1279 image_stack = np.expand_dims(image_stack, axis=-1) 

1280 print(f"Only single image read, new image array size: {image_stack.shape}") 

1281 else: 

1282 print(f"Images read, image array size: {image_stack.shape}") 

1283 

1284 return image_stack