Coverage for src/recon3d/utility.py: 77%

1"""

2This module holds utilties that can be reused across other modules

3"""

5# Standard library imports

6from datetime import datetime

7from itertools import cycle, repeat, tee

8from pathlib import Path

9from typing import Iterable, Tuple

10import glob

11import argparse

13# Third-party library imports

14import h5py

15import numpy as np

16import yaml

17from PIL import Image

18from scipy import ndimage

19import skimage

20import skimage.io as skio

22# Local imports

23from recon3d.types import *

25# import recon3d.feature_analysis as fa

26# from recon3d.feature_analysis import SemanticImageStack

29# def instance_to_ndarray(data: InstanceImageStack) -> np.ndarray:

30# """Extract the array data within the Instance ImageStack object"""

32# return data.data # Trivial, do we need a function?

35# def hdf_dataset_to_npy(hdf_path:Path, hdf_dataset_location: str, save_path: Path):

36# # read

37# with h5py.File(hdf_path, "r") as f:

38# data = np.squeeze(f[hdf_dataset_location][:])

40# instance_to_ndarray

41# np.save(save_path, data)

44def binary_with_pores_to_semantic(input_path: Path, output_path: Path) -> dict:

45 """

46 Convert a folder of segmented/binarized TIFF images to a Semantic Image Stack.

48 This function is designed for a specific use case (NOMAD, pores in AM tensile bars)

49 and converts binary images with metal as 1 and all else as 0 into a semantic image stack.

51 Parameters

52 ----------

53 input_path : Path

54 Folder containing binary images with metal as 1 and all else as 0.

55 Figure 1:

56 *---*---*---*---*

57 | 1 | 1 | 1 | 1 |

58 *---*---*---*---*

59 | 1 | 0 | 1 | 0 |

60 *---*---*---*---*

61 | 1 | 1 | 1 | 0 |

62 *---*---*---*---*

63 output_path : Path

64 Directory where the semantic images will be saved.

65 Figure 2:

66 *---*---*---*---*

67 | 1 | 1 | 1 | 1 |

68 *---*---*---*---*

69 | 1 | 2 | 1 | 0 |

70 *---*---*---*---*

71 | 1 | 1 | 1 | 0 |

72 *---*---*---*---*

74 Returns

75 -------

76 dict

77 Dictionary of class labels:

78 class_labels:

79 air:

80 value: 0

81 metal:

82 value: 1

83 pore:

84 value: 2

86 Raises

87 ------

88 ValueError

89 If the input images contain more than two phases.

91 Examples

92 --------

93 >>> input_path = Path("path/to/binary_images")

94 >>> output_path = Path("path/to/save/semantic_images")

95 >>> binary_with_pores_to_semantic(input_path, output_path)

96 {'class_labels': {'air': {'value': 0}, 'metal': {'value': 1}, 'pore': {'value': 2}}}

98 """

100 # read in binary image stack

101 bw_data = read_images(input_path)

102

103 # check data is binary, only 0 or 1

104 phase_ids = np.unique(bw_data)

105 n_phases = len(phase_ids)

106

107 if n_phases != 2:

108 raise ValueError(

109 f"Only two phases expected in segmented image, {n_phases} phase found"

110 )

111

112 # elif n_phases == 2:

113 # TODO remove hardcodes and impart flexibility from here to ut.ndarray_to_img call

114 else: # images are binary

115

116 # preallocate the output data

117 output_data = np.zeros(shape=bw_data.shape, dtype=np.int8)

118

119 # fill holes and assign as 'metal', 1

120 print("\tIsolating Sample...")

121 sample = ndimage.binary_fill_holes(bw_data.astype(np.bool_))

122 np.place(output_data, sample, 1)

123

124 # isolate holes within 'metal', assign as 'pore', 2

125 print("\tIsolating Voids...")

126 voids = np.logical_xor(sample, bw_data)

127 np.place(output_data, voids, 2)

128

129 # thus, everything else is 'air', 0

130

131 class_labels = {

132 "class_labels": {

133 "air": {"value": 0},

134 "metal": {"value": 1},

135 "pore": {"value": 2},

136 }

137 }

138

139 ndarray_to_img(

140 data=output_data,

141 slice_axis=CartesianAxis3D.Z,

142 parent_dir=output_path,

143 folder_name="",

144 )

145

146 return class_labels

147

148

149def validate_yml(yml_input_file: Path, cli_entry_point: str) -> tuple[Path, Path]:

150 """

151 Verify that the YAML file contains the required arguments.

152

153 This function checks if the specified YAML file contains the necessary arguments

154 and validates the CLI entry point. It also verifies the existence of input and output directories.

155

156 Parameters

157 ----------

158 yml_input_file : Path

159 The path to the YAML input file.

160 cli_entry_point : str

161 The CLI entry point to validate against the YAML file.

162

163 Returns

164 -------

165 tuple[Path, Path, dict]

166 A tuple containing:

167 - Path to the input directory.

168 - Path to the output directory.

169 - Dictionary of YAML values.

170

171 Raises

172 ------

173 ValueError

174 If the CLI entry point is not found in the YAML file or if the input directory does not exist.

175

176 Examples

177 --------

178 >>> yml_input_file = Path("path/to/input.yml")

179 >>> cli_entry_point = "process_images"

180 >>> validate_yml(yml_input_file, cli_entry_point)

181 (PosixPath('path/to/images'), PosixPath('path/to/output'), {'cli_entry_points': ['process_images'], 'image_dir': 'path/to/images', 'out_dir': 'path/to/output'})

182 """

183

184 print(f"Processing specification file: {yml_input_file}")

185 yml_vals = yaml_to_dict(yml_input_file)

186

187 # check cli_entry_points is valid

188 if cli_entry_point not in yml_vals["cli_entry_points"]:

189 raise ValueError(

190 f"""Error. Incorrect yml format.

191 This function requires the "cli_entry_points" key to contain {cli_entry_point},

192 but currently contains the following options: {yml_vals["cli_entry_points"]} """

193 )

194

195 path_input = Path(yml_vals["image_dir"]).expanduser()

196 if not path_input.is_dir():

197 raise ValueError(f"Error, 'image_dir', {path_input} not found.")

198 print(f"Input path: {path_input}")

199

200 path_output = Path(yml_vals["out_dir"]).expanduser()

201 path_output.mkdir(parents=True, exist_ok=True)

202 print(f"Output path: {path_output}")

203

204 return (path_input, path_output, yml_vals)

205

206

207def binarize(data: np.ndarray, val: int) -> np.ndarray:

208 """

209 Binarize the data based on a specified value.

210

211 This function converts the values within the data matching the specified value to 1,

212 and all other values to 0.

213

214 Parameters

215 ----------

216 data : np.ndarray

217 The input array to be binarized.

218 val : int

219 The value in the data to be binarized (converted to 1).

220

221 Returns

222 -------

223 np.ndarray

224 The binarized array with the same shape as the input data.

225

226 Examples

227 --------

228 >>> data = np.array([[1, 2, 3], [4, 1, 6], [7, 8, 1]])

229 >>> binarize(data, 1)

230 array([[1, 0, 0],

231 [0, 1, 0],

232 [0, 0, 1]], dtype=int8)

233 """

234

235 bw_data = np.zeros(shape=data.shape, dtype=np.int8)

236 np.place(bw_data, data == val, 1)

237

238 return bw_data

239

240

241def semantic_to_binary(yml_input_file: Path) -> bool:

242 """

243 Convert a semantic image stack to a binary image stack using a YAML configuration file.

244

245 This function reads a YAML file to prepare a binary image stack from a semantic image stack.

246 The selected class from the semantic stack is binarized based on the specified value.

247

248 Parameters

249 ----------

250 yml_input_file : Path

251 The path to the YAML input file containing configuration settings.

252

253 Returns

254 -------

255 bool

256 True if the binary image stack was successfully created, False otherwise.

257

258 Examples

259 --------

260 >>> yml_input_file = Path("path/to/config.yml")

261 >>> semantic_to_binary(yml_input_file)

262 True

263 """

264

265 [input_path, output_path, params] = validate_yml(

266 yml_input_file, "semantic_to_binary"

267 )

268

269 # params = ut.yaml_to_dict(yml_input_file)

270

271 sel_class = params["selected_class"]

272 sel_class_value = params["class_labels"][sel_class]["value"]

273

274 input_data = read_images(input_path)

275

276 bw_data = binarize(input_data, sel_class_value)

277

278 ndarray_to_img(

279 data=bw_data,

280 slice_axis=CartesianAxis3D.Z,

281 parent_dir=output_path,

282 folder_name="",

283 )

284

285

286def binary_to_semantic(yml_input_file: Path) -> bool:

287 """

288 Convert a binary image stack to a semantic image stack using a YAML configuration file.

289

290 This function reads a YAML file to prepare a semantic image stack from a binary image stack.

291 It is designed for a specific use case (NOMAD, AM tensile bars) as of 31 May 2024.

292

293 Parameters

294 ----------

295 yml_input_file : Path

296 The path to the YAML input file containing configuration settings.

297

298 Returns

299 -------

300 bool

301 True if the semantic image stack was successfully created, False otherwise.

302

303 Examples

304 --------

305 >>> yml_input_file = Path("path/to/config.yml")

306 >>> binary_to_semantic(yml_input_file)

307 True

308 """

309

310 [input_path, output_path, params] = validate_yml(

311 yml_input_file, "binary_to_semantic"

312 )

313

314 class_labels = binary_with_pores_to_semantic(

315 input_path=input_path, output_path=output_path

316 )

317

318 print(f"class labels for semantic stack:\n{class_labels}")

319

320

321def main_binary_to_semantic():

322 """

323 Run the binary to semantic conversion module from the command line.

324

325 This function serves as the entry point for terminal-based access to the binary to semantic conversion module.

326 It is invoked from the command line using the 'binary_to_semantic' command specified in pyproject.toml.

327 The function processes a YAML input file to convert a binary image stack to a semantic image stack.

328

329 Parameters

330 ----------

331 None

332

333 Returns

334 -------

335 None

336

337 Examples

338 --------

339 To run the binary to semantic conversion, use the following command in the terminal:

340 $ binary_to_semantic path/to/input.yml

341 """

342

343 parser = argparse.ArgumentParser()

344 parser.add_argument("input_file", help="the .yml user input file")

345 args = parser.parse_args()

346 yml_input_file = args.input_file

347

348 binary_to_semantic(yml_input_file=yml_input_file)

349

350 print(f"{yml_input_file} processed!")

351

352

353def main_semantic_to_binary():

354 """

355 Run the semantic to binary conversion module from the command line.

356

357 This function serves as the entry point for terminal-based access to the semantic to binary conversion module.

358 It is invoked from the command line using the 'semantic_to_binary' command specified in pyproject.toml.

359 The function processes a YAML input file to convert a semantic image stack to a binary image stack.

360

361 Parameters

362 ----------

363 None

364

365 Returns

366 -------

367 None

368

369 Examples

370 --------

371 To run the semantic to binary conversion, use the following command in the terminal:

372 $ semantic_to_binary path/to/input.yml

373 """

374 parser = argparse.ArgumentParser()

375 parser.add_argument("input_file", help="the .yml user input file")

376 args = parser.parse_args()

377 yml_input_file = args.input_file

378

379 semantic_to_binary(yml_input_file=yml_input_file)

380

381 print(f"{yml_input_file} processed!")

382

383

384def hdf_to_instance_properties(hdf_path: Path, group_path: str) -> InstanceProperties:

385 """

386 Read instance analysis data from an HDF5 file and create an InstanceProperties object.

387

388 This function reads data from an HDF5 file that has had instance analysis performed

389 with a pre-defined internal structure to create an InstanceProperties object.

390 The group name should be the internal 'folder' in the HDF5 file containing the instance

391 analysis property data.

392

393 Parameters

394 ----------

395 hdf_path : Path

396 The path to the HDF5 file containing the instance analysis data.

397 group_path : str

398 The internal 'folder' in the HDF5 file containing the instance analysis property data.

399

400 Returns

401 -------

402 InstanceProperties

403 An InstanceProperties object containing the instance analysis data.

404

405 Examples

406 --------

407 >>> hdf_path = Path("path/to/instance_analysis.h5")

408 >>> group_path = "instance_properties"

409 >>> instance_properties = hdf_to_instance_properties(hdf_path, group_path)

410 >>> print(instance_properties)

411 InstanceProperties(source_name='instance_properties', labels=InstanceLabels(data=array([0, 1, 2, ...])), n_voxels=[NVoxel(value=...), ...], equivalent_sphere_diameters=[Length(value=..., unit=<Units.MICRON: 'micron'>), ...], centroids=Centroids(data=[Centroid(cx=Length(value=..., unit=<Units.MICRON: 'micron'>), cy=Length(value=..., unit=<Units.MICRON: 'micron'>), cz=Length(value=..., unit=<Units.MICRON: 'micron'>)), ...]), ellipsoids=BestFitEllipsoids(data=[BestFitEllipsoid(a=EllipsoidAxis(length=Length(value=..., unit=<Units.MICRON: 'micron'>), orientation=UnitVector(u=..., v=..., w=...)), b=EllipsoidAxis(length=Length(value=..., unit=<Units.MICRON: 'micron'>), orientation=UnitVector(u=..., v=..., w=...)), c=EllipsoidAxis(length=Length(value=..., unit=<Units.MICRON: 'micron'>), orientation=UnitVector(u=..., v=..., w=...))), ...]), surface_areas=EllipsoidSurfaceAreas(data=[Area(value=..., unit_squared=<Units.MICRON: 'micron'>), ...]), volumes=EllipsoidVolumes(data=[Volume(value=..., unit_cubed=<Units.MICRON: 'micron'>), ...]))

412 """

413

414 with h5py.File(hdf_path, "r") as f:

415 instance_data = f[group_path]

416 hdf_n_voxels = np.squeeze(instance_data["num_voxels"][:])

417 hdf_equiv_diam = np.squeeze(instance_data["equivalent_sphere_diameters"][:])

418 equiv_diam_unit = Units(

419 instance_data["equivalent_sphere_diameters"].attrs["units"]

420 )

421 hdf_centroids = np.squeeze(instance_data["centroids"][:])

422 centroid_unit = Units(instance_data["centroids"].attrs["units"])

423 hdf_semi_axes = np.squeeze(instance_data["semi-axis_lengths"][:])

424 semi_axes_unit = Units(instance_data["semi-axis_lengths"].attrs["units"])

425 hdf_vectors = np.squeeze(instance_data["axis_vectors"][:])

426 hdf_surface_areas = np.squeeze(instance_data["ellipsoid_surface_areas"][:])

427 surface_area_unit = Units(

428 instance_data["ellipsoid_surface_areas"].attrs["units_squared"]

429 )

430 hdf_volumes = np.squeeze(instance_data["ellipsoid_volumes"][:])

431 volume_unit = Units(instance_data["ellipsoid_volumes"].attrs["units_cubed"])

432

433 labels = np.arange(0, len(hdf_equiv_diam), dtype=int)

434 n_voxels = [NVoxel(value=i) for i in hdf_n_voxels]

435 equivalent_sphere_diameters = [

436 Length(value=i, unit=equiv_diam_unit) for i in hdf_equiv_diam

437 ]

438 centroids = Centroids(

439 data=[

440 Centroid(

441 cx=Length(value=i[0], unit=centroid_unit),

442 cy=Length(value=i[1], unit=centroid_unit),

443 cz=Length(value=i[2], unit=centroid_unit),

444 )

445 for i in hdf_centroids

446 ]

447 )

448

449 ellipsoids = BestFitEllipsoids(

450 [

451 BestFitEllipsoid(

452 a=EllipsoidAxis(

453 length=Length(value=i[0], unit=semi_axes_unit),

454 orientation=UnitVector(u=j[0], v=j[1], w=j[2]),

455 ),

456 b=EllipsoidAxis(

457 length=Length(value=i[1], unit=semi_axes_unit),

458 orientation=UnitVector(u=j[3], v=j[4], w=j[5]),

459 ),

460 c=EllipsoidAxis(

461 length=Length(value=i[2], unit=semi_axes_unit),

462 orientation=UnitVector(u=j[6], v=j[7], w=j[8]),

463 ),

464 )

465 for i, j in zip(hdf_semi_axes, hdf_vectors)

466 ]

467 )

468

469 surface_areas = EllipsoidSurfaceAreas(

470 [Area(value=i, unit_squared=surface_area_unit) for i in hdf_surface_areas]

471 )

472

473 volumes = EllipsoidVolumes(

474 [Volume(value=i, unit_cubed=volume_unit) for i in hdf_volumes]

475 )

476

477 instance_props = InstanceProperties(

478 source_name=group_path,

479 labels=InstanceLabels(data=labels),

480 n_voxels=n_voxels,

481 equivalent_sphere_diameters=equivalent_sphere_diameters,

482 centroids=centroids,

483 ellipsoids=ellipsoids,

484 surface_areas=surface_areas,

485 volumes=volumes,

486 )

487

488 # n_voxels: list[NVoxel] # could have a InstanceImageStack

489 # equivalent_sphere_diameters: list[Length]

490 # centroids: Centroids # could have a InstanceImageStack

491 # ellipsoids: BestFitEllipsoids

492 # surface_areas: SurfaceAreas)

493

494 return instance_props

495

496

497def hdf_to_metadata(hdf_path: Path, dataset_path: str) -> MetaData:

498 """

499 Extract metadata from an HDF5 dataset containing string metadata attributes.

500

501 This function reads metadata attributes from a specified dataset within an HDF5 file

502 and returns a MetaData object.

503

504 Parameters

505 ----------

506 hdf_path : Path

507 The path to the HDF5 file containing the dataset.

508 dataset_path : str

509 The internal path to the dataset within the HDF5 file.

510

511 Returns

512 -------

513 MetaData

514 A MetaData object containing the extracted metadata.

515

516 Examples

517 --------

518 >>> hdf_path = Path("path/to/data.h5")

519 >>> dataset_path = "dataset"

520 >>> metadata = hdf_to_metadata(hdf_path, dataset_path)

521 >>> print(metadata)

522 MetaData(data_volume=DataVolume(x_width=..., y_height=..., z_image_count=...), resolution=Resolution(dx=Length(value=..., unit=<Units.MICRON: 'micron'>), dy=Length(value=..., unit=<Units.MICRON: 'micron'>), dz=Length(value=..., unit=<Units.MICRON: 'micron'>)), pixel_units=<Units.MICRON: 'micron'>, origin=Origin(x0=Length(value=..., unit=<Units.MICRON: 'micron'>), y0=Length(value=..., unit=<Units.MICRON: 'micron'>), z0=Length(value=..., unit=<Units.MICRON: 'micron'>)))

523 """

524

525 with h5py.File(hdf_path, "r") as f:

526 dataset = f[dataset_path]

527

528 x_width = int(dataset.attrs["x_width (pixels)"])

529 y_height = int(dataset.attrs["y_height (pixels)"])

530 z_image_count = int(dataset.attrs["z_image_count (pixels)"])

531 data_volume = DataVolume(

532 x_width=x_width, y_height=y_height, z_image_count=z_image_count

533 )

534

535 pixel_units = Units(dataset.attrs["Resolution, units"])

536 dx = Length(value=float(dataset.attrs["Resolution, dx"]), unit=pixel_units)

537 dy = Length(value=float(dataset.attrs["Resolution, dy"]), unit=pixel_units)

538 dz = Length(value=float(dataset.attrs["Resolution, dz"]), unit=pixel_units)

539 resolution = Resolution(

540 dx=dx,

541 dy=dy,

542 dz=dz,

543 )

544

545 origin_units = Units(dataset.attrs["Origin, units"])

546 x0 = Length(value=float(dataset.attrs["Origin, x0"]), unit=origin_units)

547 y0 = Length(value=float(dataset.attrs["Origin, y0"]), unit=origin_units)

548 z0 = Length(value=float(dataset.attrs["Origin, z0"]), unit=origin_units)

549 origin = Origin(

550 x0=x0,

551 y0=y0,

552 z0=z0,

553 )

554

555 metadata = MetaData(

556 data_volume=data_volume,

557 resolution=resolution,

558 pixel_units=pixel_units,

559 origin=origin,

560 )

561

562 return metadata

563

564

565def centroids_to_ndarray(centroids: Centroids) -> np.ndarray:

566 """

567 Convert centroid data type into a NumPy array.

568

569 This function converts a Centroids object into a NumPy array with each row representing

570 the (cx, cy, cz) coordinates of a centroid.

571

572 Parameters

573 ----------

574 centroids : Centroids

575 The Centroids object containing the centroid data.

576

577 Returns

578 -------

579 np.ndarray

580 A NumPy array with shape (n, 3), where n is the number of centroids, and each row

581 contains the (cx, cy, cz) coordinates of a centroid.

582

583 Examples

584 --------

585 >>> centroids = Centroids(data=[

586 ... Centroid(cx=Length(0.0, Units.MICRON), cy=Length(0.0, Units.MICRON), cz=Length(0.0, Units.MICRON)),

587 ... Centroid(cx=Length(1.0, Units.MICRON), cy=Length(1.0, Units.MICRON), cz=Length(1.0, Units.MICRON)),

588 ... Centroid(cx=Length(2.0, Units.MICRON), cy=Length(2.0, Units.MICRON), cz=Length(2.0, Units.MICRON))

589 ... ])

590 >>> centroids_to_ndarray(centroids)

591 array([[0., 0., 0.],

592 [1., 1., 1.],

593 [2., 2., 2.]])

594 """

595

596 data = centroids.data

597 cx = [i.cx.value for i in data]

598 cy = [i.cy.value for i in data]

599 cz = [i.cz.value for i in data]

600

601 ndarray = np.array((cx, cy, cz), dtype=float).T

602

603 return ndarray

604

605

606def ellipsoids_to_ndarray(

607 ellipsoids: BestFitEllipsoids,

608) -> Tuple[np.ndarray, np.ndarray]:

609 """

610 Convert ellipsoid data type into NumPy arrays.

611

612 This function converts a BestFitEllipsoids object into two NumPy arrays:

613 one for the axis lengths and one for the axis vectors.

614

615 Parameters

616 ----------

617 ellipsoids : BestFitEllipsoids

618 The BestFitEllipsoids object containing the ellipsoid data.

619

620 Returns

621 -------

622 Tuple[np.ndarray, np.ndarray]

623 A tuple containing:

624 - A NumPy array with shape (n, 3) for the axis lengths, where n is the number of ellipsoids.

625 - A NumPy array with shape (n, 9) for the axis vectors, where n is the number of ellipsoids.

626

627 Examples

628 --------

629 >>> ellipsoids = BestFitEllipsoids(data=[

630 ... BestFitEllipsoid(

631 ... a=EllipsoidAxis(length=Length(5.0, Units.MICRON), orientation=UnitVector(u=1.0, v=0.0, w=0.0)),

632 ... b=EllipsoidAxis(length=Length(3.0, Units.MICRON), orientation=UnitVector(u=0.0, v=1.0, w=0.0)),

633 ... c=EllipsoidAxis(length=Length(2.0, Units.MICRON), orientation=UnitVector(u=0.0, v=0.0, w=1.0))

634 ... ),

635 ... BestFitEllipsoid(

636 ... a=EllipsoidAxis(length=Length(6.0, Units.MICRON), orientation=UnitVector(u=0.707, v=0.707, w=0.0)),

637 ... b=EllipsoidAxis(length=Length(4.0, Units.MICRON), orientation=UnitVector(u=-0.707, v=0.707, w=0.0)),

638 ... c=EllipsoidAxis(length=Length(3.0, Units.MICRON), orientation=UnitVector(u=0.0, v=0.0, w=1.0))

639 ... )

640 ... ])

641 >>> axis_lengths, axis_vectors = ellipsoids_to_ndarray(ellipsoids)

642 >>> print(axis_lengths)

643 array([[5., 3., 2.],

644 [6., 4., 3.]])

645 >>> print(axis_vectors)

646 array([[ 1. , 0. , 0. , 0. , 1. , 0. , 0. , 0. , 1. ],

647 [ 0.707, 0.707, 0. , -0.707, 0.707, 0. , 0. , 0. , 1. ]])

648 """

649

650 data = ellipsoids.data

651 # axis lengths:

652 a = [i.a.length.value for i in data]

653 b = [i.b.length.value for i in data]

654 c = [i.c.length.value for i in data]

655 axis_lengths = np.array((a, b, c), dtype=float).T

656

657 # axes vectors

658 a_u = [i.a.orientation.u for i in data]

659 a_v = [i.a.orientation.v for i in data]

660 a_w = [i.a.orientation.w for i in data]

661

662 b_u = [i.b.orientation.u for i in data]

663 b_v = [i.b.orientation.v for i in data]

664 b_w = [i.b.orientation.w for i in data]

665

666 c_u = [i.c.orientation.u for i in data]

667 c_v = [i.c.orientation.v for i in data]

668 c_w = [i.c.orientation.w for i in data]

669

670 axis_vectors = np.array(

671 (a_u, a_v, a_w, b_u, b_v, b_w, c_u, c_v, c_w), dtype=float

672 ).T

673

674 return axis_lengths, axis_vectors

675

676

677def surface_areas_to_ndarray(surface_areas: EllipsoidSurfaceAreas) -> np.ndarray:

678 """

679 Convert surface area data type into a NumPy array.

680

681 This function converts an EllipsoidSurfaceAreas object into a NumPy array

682 containing the surface area values.

683

684 Parameters

685 ----------

686 surface_areas : EllipsoidSurfaceAreas

687 The EllipsoidSurfaceAreas object containing the surface area data.

688

689 Returns

690 -------

691 np.ndarray

692 A NumPy array containing the surface area values.

693

694 Examples

695 --------

696 >>> surface_areas = EllipsoidSurfaceAreas(data=[

697 ... Area(value=100.0, unit_squared=Units.MICRON),

698 ... Area(value=200.0, unit_squared=Units.MICRON),

699 ... Area(value=300.0, unit_squared=Units.MICRON)

700 ... ])

701 >>> surface_areas_to_ndarray(surface_areas)

702 array([100., 200., 300.])

703 """

704

705 data = surface_areas.data

706 areas = [i.value for i in data]

707

708 ndarray = np.array((areas), dtype=float).T

709

710 return ndarray

711

712

713def volumes_to_ndarray(ellipsoid_volumes: EllipsoidVolumes) -> np.ndarray:

714 """

715 Convert ellipsoid volume data type into a NumPy array.

716

717 This function converts an EllipsoidVolumes object into a NumPy array

718 containing the volume values.

719

720 Parameters

721 ----------

722 ellipsoid_volumes : EllipsoidVolumes

723 The EllipsoidVolumes object containing the volume data.

724

725 Returns

726 -------

727 np.ndarray

728 A NumPy array containing the volume values.

729

730 Examples

731 --------

732 >>> ellipsoid_volumes = EllipsoidVolumes(data=[

733 ... Volume(value=500.0, unit_cubed=Units.MICRON),

734 ... Volume(value=1000.0, unit_cubed=Units.MICRON),

735 ... Volume(value=1500.0, unit_cubed=Units.MICRON)

736 ... ])

737 >>> volumes_to_ndarray(ellipsoid_volumes)

738 array([ 500., 1000., 1500.])

739 """

740

741 data = ellipsoid_volumes.data

742 volumes = [i.value for i in data]

743

744 ndarray = np.array((volumes), dtype=float).T

745

746 return ndarray

747

748

749def rmdir(directory: Path) -> None:

750 """

751 Recursively delete a directory and all its contents.

752 (credit to: https://stackoverflow.com/questions/13118029/deleting-folders-in-python-recursively/49782093#49782093)

753

754

755 This function deletes the specified directory and all its contents, including

756 subdirectories and files. If the directory does not exist, the function does nothing.

757

758 Parameters

759 ----------

760 directory : Path

761 The path to the directory to be deleted.

762

763 Returns

764 -------

765 None

766

767 Examples

768 --------

769 >>> from pathlib import Path

770 >>> directory = Path("path/to/directory")

771 >>> rmdir(directory)

772 """

773

774 if not directory.exists():

775 return

776

777 for item in directory.iterdir():

778 if item.is_dir():

779 rmdir(item)

780 else:

781 item.unlink()

782 directory.rmdir()

783

784

785def compare_files(file1: Path, file2: Path, ignore_words: list[str]) -> bool:

786 """

787 Compare two files line-by-line, ignoring lines that contain specified words.

788

789 This function compares two files line-by-line and ignores lines that contain

790 any of the words specified in the `ignore_words` list. If the files are identical

791 up to the allowed differences, the function returns True. Otherwise, it returns False.

792

793 Parameters

794 ----------

795 file1 : Path

796 Path to the first file for comparison.

797 file2 : Path

798 Path to the second file for comparison.

799 ignore_words : list of str

800 List of words that cause a particular line to be ignored during comparison.

801 Use an empty list for strict word-by-word comparison. Use a non-empty list,

802 e.g., ["date", "time"], to ignore lines with these words, as they may differ

803 from file to file.

804

805 Returns

806 -------

807 bool

808 True if the two files are the same (up to any allowed differences in the

809 ignore_words list), False if the two files are different.

810

811 Examples

812 --------

813 >>> file1 = Path("path/to/file1.txt")

814 >>> file2 = Path("path/to/file2.txt")

815 >>> ignore_words = ["date", "time"]

816 >>> compare_files(file1, file2, ignore_words)

817 True

818 """

819

820 with open(file=file1, mode="r", encoding="utf-8") as fa:

821 with open(file=file2, mode="r", encoding="utf-8") as fb:

822 dba = fa.readlines()

823 dbb = fb.readlines()

824

825 if len(dba) == len(dbb):

826 for k, line in enumerate(dba):

827 if line != dbb[k]:

828 # if "Autogenerated" in line:

829 iw_in_fa_line = [iw in line for iw in ignore_words]

830 iw_in_fb_line = [iw in dbb[k] for iw in ignore_words]

831 in_either_line = iw_in_fa_line + iw_in_fb_line

832 # if any(iw in [line + dbb[k]] for iw in ignore_words):

833 if any(in_either_line):

834 # skip comparison of the ignore_words case(s)

835 print("Skip comparison of word in ignore_word.")

836 else:

837 print(f"Files differ at line {k}:")

838 print(f"\t file1: {line}")

839 print(f"\t file2: {dbb[k]}")

840 return False

841

842 else:

843 print("Files have non-equal line length.")

844 print("Line-by-line comparison not performed.")

845 return False

846

847 return True # files are the same

848

849

850def date_time_utc() -> str:

851 """

852 Returns the current date and time in ISO format in the UTC time zone.

853

854 This function returns the current date and time in ISO 8601 format, with the time zone

855 specified as UTC. The colons (:) and periods (.) in the time string are replaced with

856 underscores (_) for compatibility with file naming conventions.

857

858 Returns

859 -------

860 str

861 The current date and time in ISO format in the UTC time zone.

862

863 Examples

864 --------

865 >>> date_time_utc()

866 '2024-08-13_UTC_12_34_56_789012'

867 """

868

869 # ts = datetime.utcnow().isoformat() # The "naive object in UTC", utcnow is deprecated

870 ts = datetime.now(datetime.UTC).isocalendar() # The "naive object in UTC"

871 ts = ts.replace(":", "_").replace(".", "_") # Overwrite ":", "." with "_"

872 ts = ts.replace("T", "_UTC_") # Overwrite T with UTC time zone indication

873 return ts

874

875

876def underline() -> str:

877 """

878 Return a commented underline composed of 78 dashes ('-') and a newline character ('\\n').

879

880 This function generates a string that represents a commented underline

881 consisting of 78 dashes prefixed with a comment character ('* ') and

882 followed by a newline character.

883

884 Returns

885 -------

886 str

887 A string representing a commented underline.

888

889 Examples

890 --------

891 >>> underline()

892 '* ------------------------------------------------------------------------------\\n'

893 """

894

895 return "* " + "".join(repeat("-", 78)) + "\n"

896

897

898def in_a_but_not_in_b(*, a: Iterable, b: Iterable) -> Iterable:

899 """

900 Return items in Iterable `a` that are not in Iterable `b`.

901

902 This function takes two iterables `a` and `b`, and returns a tuple containing

903 all the items that are present in `a` but not in `b`.

904

905 Parameters

906 ----------

907 a : Iterable

908 The first iterable to compare.

909 b : Iterable

910 The second iterable to compare.

911

912 Returns

913 -------

914 Tuple

915 A tuple containing items that are in `a` but not in `b`.

916

917 Examples

918 --------

919 >>> a = [1, 2, 3, 4]

920 >>> b = [3, 4, 5, 6]

921 >>> in_a_but_not_in_b(a=a, b=b)

922 (1, 2)

923 """

924

925 result = () # empty tuple

926 for item in a:

927 if item not in b:

928 result = result + (item,)

929

930 return result

931

932

933def pairwise(x: Iterable) -> Iterable:

934 """

935 Return successive overlapping pairs taken from the input iterable.

936 This appears to be implemented in Python 3.10

937 https://docs.python.org/3/library/itertools.html#itertools.pairwise

938 but we currently use 3.9, so we implement `pairwise` here.

939

940 The number of 2-tuples in the output iterator will be one fewer than the

941 number of inputs. It will be empty if the input iterable has fewer than

942 two values.

943

944 Parameters

945 ----------

946 x : Iterable

947 The input iterable from which to generate pairs.

948

949 Returns

950 -------

951 Iterable[Tuple]

952 An iterable of 2-tuples containing successive overlapping pairs from the input iterable.

953

954 Examples

955 --------

956 >>> list(pairwise('ABCDEFG'))

957 [('A', 'B'), ('B', 'C'), ('C', 'D'), ('D', 'E'), ('E', 'F'), ('F', 'G')]

958 >>> list(pairwise([1, 2, 3, 4]))

959 [(1, 2), (2, 3), (3, 4)]

960 """

961

962 a, b = tee(x)

963 next(b, None)

964 return zip(a, b)

965

966

967def pairwise_circular(x: Iterable) -> Iterable:

968 """

969 Return successive overlapping pairs taken from the input iterable.

970

971 The number of 2-tuples in the output iterator will be one fewer than the

972 number of inputs. It will be empty if the input iterable has fewer than

973 two values.

974

975 Parameters

976 ----------

977 x : Iterable

978 The input iterable from which to generate pairs.

979

980 Returns

981 -------

982 Iterable[Tuple]

983 An iterable of 2-tuples containing successive overlapping pairs from the input iterable.

984

985 Examples

986 --------

987 >>> list(pairwise('ABCDEFG'))

988 [('A', 'B'), ('B', 'C'), ('C', 'D'), ('D', 'E'), ('E', 'F'), ('F', 'G')]

989 >>> list(pairwise([1, 2, 3, 4]))

990 [(1, 2), (2, 3), (3, 4)]

991 """

992

993 a = cycle(x)

994 next(a)

995 return zip(x, a)

996

997

998def metadata_to_dict(metadata: MetaData) -> dict:

999 """

1000 Convert MetaData to a dictionary.

1001

1002 This function converts a MetaData object to a dictionary representation.

1003

1004 Parameters

1005 ----------

1006 metadata : MetaData

1007 The metadata of the Data Volume.

1008

1009 Returns

1010 -------

1011 dict

1012 A dictionary containing the metadata.

1013

1014 Examples

1015 --------

1016 >>> metadata = MetaData(

1017 ... data_volume=DataVolume(x_width=256, y_height=256, z_image_count=10),

1018 ... resolution=Resolution(

1019 ... dx=Length(1.0, Units.MICRON),

1020 ... dy=Length(1.0, Units.MICRON),

1021 ... dz=Length(1.0, Units.MICRON)

1022 ... ),

1023 ... pixel_units=Units.MICRON,

1024 ... origin=Origin(

1025 ... x0=Length(0.0, Units.MICRON),

1026 ... y0=Length(0.0, Units.MICRON),

1027 ... z0=Length(0.0, Units.MICRON)

1028 ... )

1029 ... )

1030 >>> metadata_to_dict(metadata)

1031 {

1032 'x_width (pixels)': '256',

1033 'y_height (pixels)': '256',

1034 'z_image_count (pixels)': '10',

1035 'Resolution, dx': '1.0',

1036 'Resolution, dy': '1.0',

1037 'Resolution, dz': '1.0',

1038 'Resolution, units': 'micron',

1039 'Origin, x0': '0.0',

1040 'Origin, y0': '0.0',

1041 'Origin, z0': '0.0',

1042 'Origin, units': 'micron'

1043 }

1044 """

1045

1046 data_volume = metadata.data_volume

1047 resolution = metadata.resolution

1048 # pixel_units = metadata.pixel_units

1049 origin = metadata.origin

1050

1051 x_width = data_volume.x_width

1052 y_height = data_volume.y_height

1053 z_image_count = data_volume.z_image_count

1054

1055 dx = resolution.dx

1056 dy = resolution.dz

1057 dz = resolution.dz

1058

1059 meta_dict = {

1060 "x_width (pixels)": str(x_width),

1061 "y_height (pixels)": str(y_height),

1062 "z_image_count (pixels)": str(z_image_count),

1063 "Resolution, dx": str(dx.value),

1064 "Resolution, dy": str(dy.value),

1065 "Resolution, dz": str(dz.value),

1066 "Resolution, units": str(dx.unit.value),

1067 "Origin, x0": str(origin.x0.value),

1068 "Origin, y0": str(origin.y0.value),

1069 "Origin, z0": str(origin.z0.value),

1070 "Origin, units": str(origin.x0.unit.value),

1071 }

1072 # print(meta_dict)

1073

1074 return meta_dict

1075

1076

1077def yaml_to_dict(path_file_input: Path) -> dict:

1078 """

1079 Convert a YAML file to a dictionary.

1080

1081 This function reads a YAML file and converts its contents to a dictionary.

1082

1083 Parameters

1084 ----------

1085 path_file_input : Path

1086 The full path to the input YAML file.

1087

1088 Returns

1089 -------

1090 dict

1091 A dictionary containing the contents of the YAML file.

1092

1093 Raises

1094 ------

1095 TypeError

1096 If the file type is not supported.

1097 OSError

1098 If there is an error opening or decoding the YAML file.

1099

1100 Examples

1101 --------

1102 >>> path_file_input = Path("path/to/config.yml")

1103 >>> yaml_to_dict(path_file_input)

1104 {'key1': 'value1', 'key2': 'value2', ...}

1105 """

1106

1107 file_type = Path(path_file_input).suffix.casefold()

1108 supported_types = (".yaml", ".yml")

1109

1110 if file_type not in supported_types:

1111 raise TypeError("Only file types .yaml, and .yml are supported.")

1112

1113 try:

1114 with open(file=path_file_input, mode="r", encoding="utf-8") as stream:

1115 db = yaml.load(stream, Loader=yaml.SafeLoader) # Load YAML file

1116 except yaml.YAMLError as error:

1117 print(f"Error with YAML file: {error}")

1118 print(f"Could not open or decode: {path_file_input}")

1119 raise OSError from error

1120

1121 print(f"Success: database created from file: {path_file_input}")

1122 print("key, value, type")

1123 print("---, -----, ----")

1124 for key, value in db.items():

1125 print(f"{key}, {value}, {type(value)}")

1126

1127 return db

1128

1129

1130def dict_to_yaml(db: dict, file: str) -> Path:

1131 """

1132 Convert a dictionary to a YAML file.

1133

1134 This function writes the contents of a dictionary to a YAML file.

1135

1136 Parameters

1137 ----------

1138 db : dict

1139 The dictionary to be converted to YAML.

1140 file : str

1141 The path to the output YAML file.

1142

1143 Returns

1144 -------

1145 Path

1146 The path to the created YAML file.

1147

1148 Examples

1149 --------

1150 >>> db = {'key1': 'value1', 'key2': 'value2'}

1151 >>> file = "path/to/output.yml"

1152 >>> dict_to_yaml(db, file)

1153 PosixPath('path/to/output.yml')

1154 """

1155

1156 with open(file, "w", encoding="utf-8") as out_file:

1157 yaml.dump(db, out_file, default_flow_style=False) # Write dictionary to YAML

1158

1159 return Path(file)

1160

1161

1162def ndarray_to_img(

1163 *,

1164 data: np.ndarray,

1165 slice_axis: CartesianAxis3D,

1166 parent_dir: Path,

1167 folder_name: str,

1168 pad_length: int = 4,

1169 file_type: str = ".tif",

1170) -> bool:

1171 """

1172 Convert an ndarray to an image stack and save it to a specified directory.

1173

1174 This function takes a NumPy ndarray and creates an image stack, saving the images

1175 into a user-specified directory.

1176

1177 Parameters

1178 ----------

1179 data : np.ndarray

1180 The semantic labels.

1181 slice_axis : CartesianAxis3D

1182 The axis along which to slice the ndarray:

1183 - 0 for Z axis

1184 - 1 for Y axis

1185 - 2 for X axis

1186 parent_dir : Path

1187 The parent directory to save the image folder.

1188 folder_name : str

1189 The folder name to save images.

1190 pad_length : int, optional

1191 The number of digits to pad the file names with (default is 4).

1192 file_type : str, optional

1193 The image file type (default is ".tif").

1194

1195 Returns

1196 -------

1197 bool

1198 True if the images were successfully created.

1199

1200 Examples

1201 --------

1202 >>> data = np.random.randint(0, 255, (10, 256, 256), dtype=np.uint8)

1203 >>> slice_axis = CartesianAxis3D.Z

1204 >>> parent_dir = Path("path/to/save")

1205 >>> folder_name = "image_stack"

1206 >>> ndarray_to_img(data=data, slice_axis=slice_axis, parent_dir=parent_dir, folder_name=folder_name)

1207 True

1208 """

1209

1210 img_dir = parent_dir.joinpath(folder_name).expanduser()

1211 img_dir.mkdir(parents=True, exist_ok=True)

1212 n_slices = data.shape[slice_axis.value]

1213

1214 for i in range(n_slices):

1215 fname = f"{img_dir}/{i:0{pad_length}}{file_type}"

1216 mode = "L" if data.dtype == np.int8 else None

1217

1218 match slice_axis:

1219 case CartesianAxis3D.Z:

1220 img = Image.fromarray(data[i, :, :], mode=mode)

1221 case CartesianAxis3D.Y:

1222 img = Image.fromarray(data[:, i, :], mode=mode)

1223 case CartesianAxis3D.X:

1224 img = Image.fromarray(data[:, :, i], mode=mode)

1225 case _:

1226 raise ValueError(

1227 f"Unknown slice_axis value {slice_axis}, value must be 0, 1, or 2."

1228 )

1229

1230 img.save(fname)

1231 return True

1232

1233

1234def read_images(

1235 file_dir: Path,

1236 file_type: str = ".tif",

1237) -> np.ndarray:

1238 """

1239 Read images from a directory and return a NumPy array representation of

1240 the images.

1241

1242 Parameters

1243 ----------

1244 file_dir : Path

1245 The fully pathed location of the images.

1246 file_type : str, optional

1247 The image type (default is ".tif").

1248

1249 Returns

1250 -------

1251 np.ndarray

1252 A NumPy array representation of the images.

1253

1254 Raises

1255 ------

1256 FileNotFoundError

1257 If no images of the specified type are found in the directory.

1258

1259 Examples

1260 --------

1261 >>> file_dir = Path("path/to/images")

1262 >>> read_images(file_dir, file_type=".tif")

1263 array([[[...], [...], ...], [[...], [...], ...], ...])

1264 """

1265

1266 image_list = list(glob.glob(f"{str(file_dir.as_posix())}/*{file_type}"))

1267

1268 if len(image_list) == 0:

1269 raise FileNotFoundError(

1270 f"File type of {file_type} not found in directory: {str(file_dir)}"

1271 )

1272

1273 image_list.sort() # Sort images in ascending order

1274

1275 image_stack = np.array([np.array(Image.open(f)) for f in image_list])

1276

1277 # Handle the case where only a single image is read

1278 if image_stack.ndim < 3:

1279 image_stack = np.expand_dims(image_stack, axis=-1)

1280 print(f"Only single image read, new image array size: {image_stack.shape}")

1281 else:

1282 print(f"Images read, image array size: {image_stack.shape}")

1283

1284 return image_stack