Coverage for src/recon3d/utility.py: 77%
262 statements
« prev ^ index » next coverage.py v7.8.0, created at 2025-04-02 00:06 +0000
« prev ^ index » next coverage.py v7.8.0, created at 2025-04-02 00:06 +0000
1"""
2This module holds utilties that can be reused across other modules
3"""
5# Standard library imports
6from datetime import datetime
7from itertools import cycle, repeat, tee
8from pathlib import Path
9from typing import Iterable, Tuple
10import glob
11import argparse
13# Third-party library imports
14import h5py
15import numpy as np
16import yaml
17from PIL import Image
18from scipy import ndimage
19import skimage
20import skimage.io as skio
22# Local imports
23from recon3d.types import *
25# import recon3d.feature_analysis as fa
26# from recon3d.feature_analysis import SemanticImageStack
29# def instance_to_ndarray(data: InstanceImageStack) -> np.ndarray:
30# """Extract the array data within the Instance ImageStack object"""
32# return data.data # Trivial, do we need a function?
35# def hdf_dataset_to_npy(hdf_path:Path, hdf_dataset_location: str, save_path: Path):
36# # read
37# with h5py.File(hdf_path, "r") as f:
38# data = np.squeeze(f[hdf_dataset_location][:])
40# instance_to_ndarray
41# np.save(save_path, data)
44def binary_with_pores_to_semantic(input_path: Path, output_path: Path) -> dict:
45 """
46 Convert a folder of segmented/binarized TIFF images to a Semantic Image Stack.
48 This function is designed for a specific use case (NOMAD, pores in AM tensile bars)
49 and converts binary images with metal as 1 and all else as 0 into a semantic image stack.
51 Parameters
52 ----------
53 input_path : Path
54 Folder containing binary images with metal as 1 and all else as 0.
55 Figure 1:
56 *---*---*---*---*
57 | 1 | 1 | 1 | 1 |
58 *---*---*---*---*
59 | 1 | 0 | 1 | 0 |
60 *---*---*---*---*
61 | 1 | 1 | 1 | 0 |
62 *---*---*---*---*
63 output_path : Path
64 Directory where the semantic images will be saved.
65 Figure 2:
66 *---*---*---*---*
67 | 1 | 1 | 1 | 1 |
68 *---*---*---*---*
69 | 1 | 2 | 1 | 0 |
70 *---*---*---*---*
71 | 1 | 1 | 1 | 0 |
72 *---*---*---*---*
74 Returns
75 -------
76 dict
77 Dictionary of class labels:
78 class_labels:
79 air:
80 value: 0
81 metal:
82 value: 1
83 pore:
84 value: 2
86 Raises
87 ------
88 ValueError
89 If the input images contain more than two phases.
91 Examples
92 --------
93 >>> input_path = Path("path/to/binary_images")
94 >>> output_path = Path("path/to/save/semantic_images")
95 >>> binary_with_pores_to_semantic(input_path, output_path)
96 {'class_labels': {'air': {'value': 0}, 'metal': {'value': 1}, 'pore': {'value': 2}}}
98 """
100 # read in binary image stack
101 bw_data = read_images(input_path)
103 # check data is binary, only 0 or 1
104 phase_ids = np.unique(bw_data)
105 n_phases = len(phase_ids)
107 if n_phases != 2:
108 raise ValueError(
109 f"Only two phases expected in segmented image, {n_phases} phase found"
110 )
112 # elif n_phases == 2:
113 # TODO remove hardcodes and impart flexibility from here to ut.ndarray_to_img call
114 else: # images are binary
116 # preallocate the output data
117 output_data = np.zeros(shape=bw_data.shape, dtype=np.int8)
119 # fill holes and assign as 'metal', 1
120 print("\tIsolating Sample...")
121 sample = ndimage.binary_fill_holes(bw_data.astype(np.bool_))
122 np.place(output_data, sample, 1)
124 # isolate holes within 'metal', assign as 'pore', 2
125 print("\tIsolating Voids...")
126 voids = np.logical_xor(sample, bw_data)
127 np.place(output_data, voids, 2)
129 # thus, everything else is 'air', 0
131 class_labels = {
132 "class_labels": {
133 "air": {"value": 0},
134 "metal": {"value": 1},
135 "pore": {"value": 2},
136 }
137 }
139 ndarray_to_img(
140 data=output_data,
141 slice_axis=CartesianAxis3D.Z,
142 parent_dir=output_path,
143 folder_name="",
144 )
146 return class_labels
149def validate_yml(yml_input_file: Path, cli_entry_point: str) -> tuple[Path, Path]:
150 """
151 Verify that the YAML file contains the required arguments.
153 This function checks if the specified YAML file contains the necessary arguments
154 and validates the CLI entry point. It also verifies the existence of input and output directories.
156 Parameters
157 ----------
158 yml_input_file : Path
159 The path to the YAML input file.
160 cli_entry_point : str
161 The CLI entry point to validate against the YAML file.
163 Returns
164 -------
165 tuple[Path, Path, dict]
166 A tuple containing:
167 - Path to the input directory.
168 - Path to the output directory.
169 - Dictionary of YAML values.
171 Raises
172 ------
173 ValueError
174 If the CLI entry point is not found in the YAML file or if the input directory does not exist.
176 Examples
177 --------
178 >>> yml_input_file = Path("path/to/input.yml")
179 >>> cli_entry_point = "process_images"
180 >>> validate_yml(yml_input_file, cli_entry_point)
181 (PosixPath('path/to/images'), PosixPath('path/to/output'), {'cli_entry_points': ['process_images'], 'image_dir': 'path/to/images', 'out_dir': 'path/to/output'})
182 """
184 print(f"Processing specification file: {yml_input_file}")
185 yml_vals = yaml_to_dict(yml_input_file)
187 # check cli_entry_points is valid
188 if cli_entry_point not in yml_vals["cli_entry_points"]:
189 raise ValueError(
190 f"""Error. Incorrect yml format.
191 This function requires the "cli_entry_points" key to contain {cli_entry_point},
192 but currently contains the following options: {yml_vals["cli_entry_points"]} """
193 )
195 path_input = Path(yml_vals["image_dir"]).expanduser()
196 if not path_input.is_dir():
197 raise ValueError(f"Error, 'image_dir', {path_input} not found.")
198 print(f"Input path: {path_input}")
200 path_output = Path(yml_vals["out_dir"]).expanduser()
201 path_output.mkdir(parents=True, exist_ok=True)
202 print(f"Output path: {path_output}")
204 return (path_input, path_output, yml_vals)
207def binarize(data: np.ndarray, val: int) -> np.ndarray:
208 """
209 Binarize the data based on a specified value.
211 This function converts the values within the data matching the specified value to 1,
212 and all other values to 0.
214 Parameters
215 ----------
216 data : np.ndarray
217 The input array to be binarized.
218 val : int
219 The value in the data to be binarized (converted to 1).
221 Returns
222 -------
223 np.ndarray
224 The binarized array with the same shape as the input data.
226 Examples
227 --------
228 >>> data = np.array([[1, 2, 3], [4, 1, 6], [7, 8, 1]])
229 >>> binarize(data, 1)
230 array([[1, 0, 0],
231 [0, 1, 0],
232 [0, 0, 1]], dtype=int8)
233 """
235 bw_data = np.zeros(shape=data.shape, dtype=np.int8)
236 np.place(bw_data, data == val, 1)
238 return bw_data
241def semantic_to_binary(yml_input_file: Path) -> bool:
242 """
243 Convert a semantic image stack to a binary image stack using a YAML configuration file.
245 This function reads a YAML file to prepare a binary image stack from a semantic image stack.
246 The selected class from the semantic stack is binarized based on the specified value.
248 Parameters
249 ----------
250 yml_input_file : Path
251 The path to the YAML input file containing configuration settings.
253 Returns
254 -------
255 bool
256 True if the binary image stack was successfully created, False otherwise.
258 Examples
259 --------
260 >>> yml_input_file = Path("path/to/config.yml")
261 >>> semantic_to_binary(yml_input_file)
262 True
263 """
265 [input_path, output_path, params] = validate_yml(
266 yml_input_file, "semantic_to_binary"
267 )
269 # params = ut.yaml_to_dict(yml_input_file)
271 sel_class = params["selected_class"]
272 sel_class_value = params["class_labels"][sel_class]["value"]
274 input_data = read_images(input_path)
276 bw_data = binarize(input_data, sel_class_value)
278 ndarray_to_img(
279 data=bw_data,
280 slice_axis=CartesianAxis3D.Z,
281 parent_dir=output_path,
282 folder_name="",
283 )
286def binary_to_semantic(yml_input_file: Path) -> bool:
287 """
288 Convert a binary image stack to a semantic image stack using a YAML configuration file.
290 This function reads a YAML file to prepare a semantic image stack from a binary image stack.
291 It is designed for a specific use case (NOMAD, AM tensile bars) as of 31 May 2024.
293 Parameters
294 ----------
295 yml_input_file : Path
296 The path to the YAML input file containing configuration settings.
298 Returns
299 -------
300 bool
301 True if the semantic image stack was successfully created, False otherwise.
303 Examples
304 --------
305 >>> yml_input_file = Path("path/to/config.yml")
306 >>> binary_to_semantic(yml_input_file)
307 True
308 """
310 [input_path, output_path, params] = validate_yml(
311 yml_input_file, "binary_to_semantic"
312 )
314 class_labels = binary_with_pores_to_semantic(
315 input_path=input_path, output_path=output_path
316 )
318 print(f"class labels for semantic stack:\n{class_labels}")
321def main_binary_to_semantic():
322 """
323 Run the binary to semantic conversion module from the command line.
325 This function serves as the entry point for terminal-based access to the binary to semantic conversion module.
326 It is invoked from the command line using the 'binary_to_semantic' command specified in pyproject.toml.
327 The function processes a YAML input file to convert a binary image stack to a semantic image stack.
329 Parameters
330 ----------
331 None
333 Returns
334 -------
335 None
337 Examples
338 --------
339 To run the binary to semantic conversion, use the following command in the terminal:
340 $ binary_to_semantic path/to/input.yml
341 """
343 parser = argparse.ArgumentParser()
344 parser.add_argument("input_file", help="the .yml user input file")
345 args = parser.parse_args()
346 yml_input_file = args.input_file
348 binary_to_semantic(yml_input_file=yml_input_file)
350 print(f"{yml_input_file} processed!")
353def main_semantic_to_binary():
354 """
355 Run the semantic to binary conversion module from the command line.
357 This function serves as the entry point for terminal-based access to the semantic to binary conversion module.
358 It is invoked from the command line using the 'semantic_to_binary' command specified in pyproject.toml.
359 The function processes a YAML input file to convert a semantic image stack to a binary image stack.
361 Parameters
362 ----------
363 None
365 Returns
366 -------
367 None
369 Examples
370 --------
371 To run the semantic to binary conversion, use the following command in the terminal:
372 $ semantic_to_binary path/to/input.yml
373 """
374 parser = argparse.ArgumentParser()
375 parser.add_argument("input_file", help="the .yml user input file")
376 args = parser.parse_args()
377 yml_input_file = args.input_file
379 semantic_to_binary(yml_input_file=yml_input_file)
381 print(f"{yml_input_file} processed!")
384def hdf_to_instance_properties(hdf_path: Path, group_path: str) -> InstanceProperties:
385 """
386 Read instance analysis data from an HDF5 file and create an InstanceProperties object.
388 This function reads data from an HDF5 file that has had instance analysis performed
389 with a pre-defined internal structure to create an InstanceProperties object.
390 The group name should be the internal 'folder' in the HDF5 file containing the instance
391 analysis property data.
393 Parameters
394 ----------
395 hdf_path : Path
396 The path to the HDF5 file containing the instance analysis data.
397 group_path : str
398 The internal 'folder' in the HDF5 file containing the instance analysis property data.
400 Returns
401 -------
402 InstanceProperties
403 An InstanceProperties object containing the instance analysis data.
405 Examples
406 --------
407 >>> hdf_path = Path("path/to/instance_analysis.h5")
408 >>> group_path = "instance_properties"
409 >>> instance_properties = hdf_to_instance_properties(hdf_path, group_path)
410 >>> print(instance_properties)
411 InstanceProperties(source_name='instance_properties', labels=InstanceLabels(data=array([0, 1, 2, ...])), n_voxels=[NVoxel(value=...), ...], equivalent_sphere_diameters=[Length(value=..., unit=<Units.MICRON: 'micron'>), ...], centroids=Centroids(data=[Centroid(cx=Length(value=..., unit=<Units.MICRON: 'micron'>), cy=Length(value=..., unit=<Units.MICRON: 'micron'>), cz=Length(value=..., unit=<Units.MICRON: 'micron'>)), ...]), ellipsoids=BestFitEllipsoids(data=[BestFitEllipsoid(a=EllipsoidAxis(length=Length(value=..., unit=<Units.MICRON: 'micron'>), orientation=UnitVector(u=..., v=..., w=...)), b=EllipsoidAxis(length=Length(value=..., unit=<Units.MICRON: 'micron'>), orientation=UnitVector(u=..., v=..., w=...)), c=EllipsoidAxis(length=Length(value=..., unit=<Units.MICRON: 'micron'>), orientation=UnitVector(u=..., v=..., w=...))), ...]), surface_areas=EllipsoidSurfaceAreas(data=[Area(value=..., unit_squared=<Units.MICRON: 'micron'>), ...]), volumes=EllipsoidVolumes(data=[Volume(value=..., unit_cubed=<Units.MICRON: 'micron'>), ...]))
412 """
414 with h5py.File(hdf_path, "r") as f:
415 instance_data = f[group_path]
416 hdf_n_voxels = np.squeeze(instance_data["num_voxels"][:])
417 hdf_equiv_diam = np.squeeze(instance_data["equivalent_sphere_diameters"][:])
418 equiv_diam_unit = Units(
419 instance_data["equivalent_sphere_diameters"].attrs["units"]
420 )
421 hdf_centroids = np.squeeze(instance_data["centroids"][:])
422 centroid_unit = Units(instance_data["centroids"].attrs["units"])
423 hdf_semi_axes = np.squeeze(instance_data["semi-axis_lengths"][:])
424 semi_axes_unit = Units(instance_data["semi-axis_lengths"].attrs["units"])
425 hdf_vectors = np.squeeze(instance_data["axis_vectors"][:])
426 hdf_surface_areas = np.squeeze(instance_data["ellipsoid_surface_areas"][:])
427 surface_area_unit = Units(
428 instance_data["ellipsoid_surface_areas"].attrs["units_squared"]
429 )
430 hdf_volumes = np.squeeze(instance_data["ellipsoid_volumes"][:])
431 volume_unit = Units(instance_data["ellipsoid_volumes"].attrs["units_cubed"])
433 labels = np.arange(0, len(hdf_equiv_diam), dtype=int)
434 n_voxels = [NVoxel(value=i) for i in hdf_n_voxels]
435 equivalent_sphere_diameters = [
436 Length(value=i, unit=equiv_diam_unit) for i in hdf_equiv_diam
437 ]
438 centroids = Centroids(
439 data=[
440 Centroid(
441 cx=Length(value=i[0], unit=centroid_unit),
442 cy=Length(value=i[1], unit=centroid_unit),
443 cz=Length(value=i[2], unit=centroid_unit),
444 )
445 for i in hdf_centroids
446 ]
447 )
449 ellipsoids = BestFitEllipsoids(
450 [
451 BestFitEllipsoid(
452 a=EllipsoidAxis(
453 length=Length(value=i[0], unit=semi_axes_unit),
454 orientation=UnitVector(u=j[0], v=j[1], w=j[2]),
455 ),
456 b=EllipsoidAxis(
457 length=Length(value=i[1], unit=semi_axes_unit),
458 orientation=UnitVector(u=j[3], v=j[4], w=j[5]),
459 ),
460 c=EllipsoidAxis(
461 length=Length(value=i[2], unit=semi_axes_unit),
462 orientation=UnitVector(u=j[6], v=j[7], w=j[8]),
463 ),
464 )
465 for i, j in zip(hdf_semi_axes, hdf_vectors)
466 ]
467 )
469 surface_areas = EllipsoidSurfaceAreas(
470 [Area(value=i, unit_squared=surface_area_unit) for i in hdf_surface_areas]
471 )
473 volumes = EllipsoidVolumes(
474 [Volume(value=i, unit_cubed=volume_unit) for i in hdf_volumes]
475 )
477 instance_props = InstanceProperties(
478 source_name=group_path,
479 labels=InstanceLabels(data=labels),
480 n_voxels=n_voxels,
481 equivalent_sphere_diameters=equivalent_sphere_diameters,
482 centroids=centroids,
483 ellipsoids=ellipsoids,
484 surface_areas=surface_areas,
485 volumes=volumes,
486 )
488 # n_voxels: list[NVoxel] # could have a InstanceImageStack
489 # equivalent_sphere_diameters: list[Length]
490 # centroids: Centroids # could have a InstanceImageStack
491 # ellipsoids: BestFitEllipsoids
492 # surface_areas: SurfaceAreas)
494 return instance_props
497def hdf_to_metadata(hdf_path: Path, dataset_path: str) -> MetaData:
498 """
499 Extract metadata from an HDF5 dataset containing string metadata attributes.
501 This function reads metadata attributes from a specified dataset within an HDF5 file
502 and returns a MetaData object.
504 Parameters
505 ----------
506 hdf_path : Path
507 The path to the HDF5 file containing the dataset.
508 dataset_path : str
509 The internal path to the dataset within the HDF5 file.
511 Returns
512 -------
513 MetaData
514 A MetaData object containing the extracted metadata.
516 Examples
517 --------
518 >>> hdf_path = Path("path/to/data.h5")
519 >>> dataset_path = "dataset"
520 >>> metadata = hdf_to_metadata(hdf_path, dataset_path)
521 >>> print(metadata)
522 MetaData(data_volume=DataVolume(x_width=..., y_height=..., z_image_count=...), resolution=Resolution(dx=Length(value=..., unit=<Units.MICRON: 'micron'>), dy=Length(value=..., unit=<Units.MICRON: 'micron'>), dz=Length(value=..., unit=<Units.MICRON: 'micron'>)), pixel_units=<Units.MICRON: 'micron'>, origin=Origin(x0=Length(value=..., unit=<Units.MICRON: 'micron'>), y0=Length(value=..., unit=<Units.MICRON: 'micron'>), z0=Length(value=..., unit=<Units.MICRON: 'micron'>)))
523 """
525 with h5py.File(hdf_path, "r") as f:
526 dataset = f[dataset_path]
528 x_width = int(dataset.attrs["x_width (pixels)"])
529 y_height = int(dataset.attrs["y_height (pixels)"])
530 z_image_count = int(dataset.attrs["z_image_count (pixels)"])
531 data_volume = DataVolume(
532 x_width=x_width, y_height=y_height, z_image_count=z_image_count
533 )
535 pixel_units = Units(dataset.attrs["Resolution, units"])
536 dx = Length(value=float(dataset.attrs["Resolution, dx"]), unit=pixel_units)
537 dy = Length(value=float(dataset.attrs["Resolution, dy"]), unit=pixel_units)
538 dz = Length(value=float(dataset.attrs["Resolution, dz"]), unit=pixel_units)
539 resolution = Resolution(
540 dx=dx,
541 dy=dy,
542 dz=dz,
543 )
545 origin_units = Units(dataset.attrs["Origin, units"])
546 x0 = Length(value=float(dataset.attrs["Origin, x0"]), unit=origin_units)
547 y0 = Length(value=float(dataset.attrs["Origin, y0"]), unit=origin_units)
548 z0 = Length(value=float(dataset.attrs["Origin, z0"]), unit=origin_units)
549 origin = Origin(
550 x0=x0,
551 y0=y0,
552 z0=z0,
553 )
555 metadata = MetaData(
556 data_volume=data_volume,
557 resolution=resolution,
558 pixel_units=pixel_units,
559 origin=origin,
560 )
562 return metadata
565def centroids_to_ndarray(centroids: Centroids) -> np.ndarray:
566 """
567 Convert centroid data type into a NumPy array.
569 This function converts a Centroids object into a NumPy array with each row representing
570 the (cx, cy, cz) coordinates of a centroid.
572 Parameters
573 ----------
574 centroids : Centroids
575 The Centroids object containing the centroid data.
577 Returns
578 -------
579 np.ndarray
580 A NumPy array with shape (n, 3), where n is the number of centroids, and each row
581 contains the (cx, cy, cz) coordinates of a centroid.
583 Examples
584 --------
585 >>> centroids = Centroids(data=[
586 ... Centroid(cx=Length(0.0, Units.MICRON), cy=Length(0.0, Units.MICRON), cz=Length(0.0, Units.MICRON)),
587 ... Centroid(cx=Length(1.0, Units.MICRON), cy=Length(1.0, Units.MICRON), cz=Length(1.0, Units.MICRON)),
588 ... Centroid(cx=Length(2.0, Units.MICRON), cy=Length(2.0, Units.MICRON), cz=Length(2.0, Units.MICRON))
589 ... ])
590 >>> centroids_to_ndarray(centroids)
591 array([[0., 0., 0.],
592 [1., 1., 1.],
593 [2., 2., 2.]])
594 """
596 data = centroids.data
597 cx = [i.cx.value for i in data]
598 cy = [i.cy.value for i in data]
599 cz = [i.cz.value for i in data]
601 ndarray = np.array((cx, cy, cz), dtype=float).T
603 return ndarray
606def ellipsoids_to_ndarray(
607 ellipsoids: BestFitEllipsoids,
608) -> Tuple[np.ndarray, np.ndarray]:
609 """
610 Convert ellipsoid data type into NumPy arrays.
612 This function converts a BestFitEllipsoids object into two NumPy arrays:
613 one for the axis lengths and one for the axis vectors.
615 Parameters
616 ----------
617 ellipsoids : BestFitEllipsoids
618 The BestFitEllipsoids object containing the ellipsoid data.
620 Returns
621 -------
622 Tuple[np.ndarray, np.ndarray]
623 A tuple containing:
624 - A NumPy array with shape (n, 3) for the axis lengths, where n is the number of ellipsoids.
625 - A NumPy array with shape (n, 9) for the axis vectors, where n is the number of ellipsoids.
627 Examples
628 --------
629 >>> ellipsoids = BestFitEllipsoids(data=[
630 ... BestFitEllipsoid(
631 ... a=EllipsoidAxis(length=Length(5.0, Units.MICRON), orientation=UnitVector(u=1.0, v=0.0, w=0.0)),
632 ... b=EllipsoidAxis(length=Length(3.0, Units.MICRON), orientation=UnitVector(u=0.0, v=1.0, w=0.0)),
633 ... c=EllipsoidAxis(length=Length(2.0, Units.MICRON), orientation=UnitVector(u=0.0, v=0.0, w=1.0))
634 ... ),
635 ... BestFitEllipsoid(
636 ... a=EllipsoidAxis(length=Length(6.0, Units.MICRON), orientation=UnitVector(u=0.707, v=0.707, w=0.0)),
637 ... b=EllipsoidAxis(length=Length(4.0, Units.MICRON), orientation=UnitVector(u=-0.707, v=0.707, w=0.0)),
638 ... c=EllipsoidAxis(length=Length(3.0, Units.MICRON), orientation=UnitVector(u=0.0, v=0.0, w=1.0))
639 ... )
640 ... ])
641 >>> axis_lengths, axis_vectors = ellipsoids_to_ndarray(ellipsoids)
642 >>> print(axis_lengths)
643 array([[5., 3., 2.],
644 [6., 4., 3.]])
645 >>> print(axis_vectors)
646 array([[ 1. , 0. , 0. , 0. , 1. , 0. , 0. , 0. , 1. ],
647 [ 0.707, 0.707, 0. , -0.707, 0.707, 0. , 0. , 0. , 1. ]])
648 """
650 data = ellipsoids.data
651 # axis lengths:
652 a = [i.a.length.value for i in data]
653 b = [i.b.length.value for i in data]
654 c = [i.c.length.value for i in data]
655 axis_lengths = np.array((a, b, c), dtype=float).T
657 # axes vectors
658 a_u = [i.a.orientation.u for i in data]
659 a_v = [i.a.orientation.v for i in data]
660 a_w = [i.a.orientation.w for i in data]
662 b_u = [i.b.orientation.u for i in data]
663 b_v = [i.b.orientation.v for i in data]
664 b_w = [i.b.orientation.w for i in data]
666 c_u = [i.c.orientation.u for i in data]
667 c_v = [i.c.orientation.v for i in data]
668 c_w = [i.c.orientation.w for i in data]
670 axis_vectors = np.array(
671 (a_u, a_v, a_w, b_u, b_v, b_w, c_u, c_v, c_w), dtype=float
672 ).T
674 return axis_lengths, axis_vectors
677def surface_areas_to_ndarray(surface_areas: EllipsoidSurfaceAreas) -> np.ndarray:
678 """
679 Convert surface area data type into a NumPy array.
681 This function converts an EllipsoidSurfaceAreas object into a NumPy array
682 containing the surface area values.
684 Parameters
685 ----------
686 surface_areas : EllipsoidSurfaceAreas
687 The EllipsoidSurfaceAreas object containing the surface area data.
689 Returns
690 -------
691 np.ndarray
692 A NumPy array containing the surface area values.
694 Examples
695 --------
696 >>> surface_areas = EllipsoidSurfaceAreas(data=[
697 ... Area(value=100.0, unit_squared=Units.MICRON),
698 ... Area(value=200.0, unit_squared=Units.MICRON),
699 ... Area(value=300.0, unit_squared=Units.MICRON)
700 ... ])
701 >>> surface_areas_to_ndarray(surface_areas)
702 array([100., 200., 300.])
703 """
705 data = surface_areas.data
706 areas = [i.value for i in data]
708 ndarray = np.array((areas), dtype=float).T
710 return ndarray
713def volumes_to_ndarray(ellipsoid_volumes: EllipsoidVolumes) -> np.ndarray:
714 """
715 Convert ellipsoid volume data type into a NumPy array.
717 This function converts an EllipsoidVolumes object into a NumPy array
718 containing the volume values.
720 Parameters
721 ----------
722 ellipsoid_volumes : EllipsoidVolumes
723 The EllipsoidVolumes object containing the volume data.
725 Returns
726 -------
727 np.ndarray
728 A NumPy array containing the volume values.
730 Examples
731 --------
732 >>> ellipsoid_volumes = EllipsoidVolumes(data=[
733 ... Volume(value=500.0, unit_cubed=Units.MICRON),
734 ... Volume(value=1000.0, unit_cubed=Units.MICRON),
735 ... Volume(value=1500.0, unit_cubed=Units.MICRON)
736 ... ])
737 >>> volumes_to_ndarray(ellipsoid_volumes)
738 array([ 500., 1000., 1500.])
739 """
741 data = ellipsoid_volumes.data
742 volumes = [i.value for i in data]
744 ndarray = np.array((volumes), dtype=float).T
746 return ndarray
749def rmdir(directory: Path) -> None:
750 """
751 Recursively delete a directory and all its contents.
752 (credit to: https://stackoverflow.com/questions/13118029/deleting-folders-in-python-recursively/49782093#49782093)
755 This function deletes the specified directory and all its contents, including
756 subdirectories and files. If the directory does not exist, the function does nothing.
758 Parameters
759 ----------
760 directory : Path
761 The path to the directory to be deleted.
763 Returns
764 -------
765 None
767 Examples
768 --------
769 >>> from pathlib import Path
770 >>> directory = Path("path/to/directory")
771 >>> rmdir(directory)
772 """
774 if not directory.exists():
775 return
777 for item in directory.iterdir():
778 if item.is_dir():
779 rmdir(item)
780 else:
781 item.unlink()
782 directory.rmdir()
785def compare_files(file1: Path, file2: Path, ignore_words: list[str]) -> bool:
786 """
787 Compare two files line-by-line, ignoring lines that contain specified words.
789 This function compares two files line-by-line and ignores lines that contain
790 any of the words specified in the `ignore_words` list. If the files are identical
791 up to the allowed differences, the function returns True. Otherwise, it returns False.
793 Parameters
794 ----------
795 file1 : Path
796 Path to the first file for comparison.
797 file2 : Path
798 Path to the second file for comparison.
799 ignore_words : list of str
800 List of words that cause a particular line to be ignored during comparison.
801 Use an empty list for strict word-by-word comparison. Use a non-empty list,
802 e.g., ["date", "time"], to ignore lines with these words, as they may differ
803 from file to file.
805 Returns
806 -------
807 bool
808 True if the two files are the same (up to any allowed differences in the
809 ignore_words list), False if the two files are different.
811 Examples
812 --------
813 >>> file1 = Path("path/to/file1.txt")
814 >>> file2 = Path("path/to/file2.txt")
815 >>> ignore_words = ["date", "time"]
816 >>> compare_files(file1, file2, ignore_words)
817 True
818 """
820 with open(file=file1, mode="r", encoding="utf-8") as fa:
821 with open(file=file2, mode="r", encoding="utf-8") as fb:
822 dba = fa.readlines()
823 dbb = fb.readlines()
825 if len(dba) == len(dbb):
826 for k, line in enumerate(dba):
827 if line != dbb[k]:
828 # if "Autogenerated" in line:
829 iw_in_fa_line = [iw in line for iw in ignore_words]
830 iw_in_fb_line = [iw in dbb[k] for iw in ignore_words]
831 in_either_line = iw_in_fa_line + iw_in_fb_line
832 # if any(iw in [line + dbb[k]] for iw in ignore_words):
833 if any(in_either_line):
834 # skip comparison of the ignore_words case(s)
835 print("Skip comparison of word in ignore_word.")
836 else:
837 print(f"Files differ at line {k}:")
838 print(f"\t file1: {line}")
839 print(f"\t file2: {dbb[k]}")
840 return False
842 else:
843 print("Files have non-equal line length.")
844 print("Line-by-line comparison not performed.")
845 return False
847 return True # files are the same
850def date_time_utc() -> str:
851 """
852 Returns the current date and time in ISO format in the UTC time zone.
854 This function returns the current date and time in ISO 8601 format, with the time zone
855 specified as UTC. The colons (:) and periods (.) in the time string are replaced with
856 underscores (_) for compatibility with file naming conventions.
858 Returns
859 -------
860 str
861 The current date and time in ISO format in the UTC time zone.
863 Examples
864 --------
865 >>> date_time_utc()
866 '2024-08-13_UTC_12_34_56_789012'
867 """
869 # ts = datetime.utcnow().isoformat() # The "naive object in UTC", utcnow is deprecated
870 ts = datetime.now(datetime.UTC).isocalendar() # The "naive object in UTC"
871 ts = ts.replace(":", "_").replace(".", "_") # Overwrite ":", "." with "_"
872 ts = ts.replace("T", "_UTC_") # Overwrite T with UTC time zone indication
873 return ts
876def underline() -> str:
877 """
878 Return a commented underline composed of 78 dashes ('-') and a newline character ('\\n').
880 This function generates a string that represents a commented underline
881 consisting of 78 dashes prefixed with a comment character ('* ') and
882 followed by a newline character.
884 Returns
885 -------
886 str
887 A string representing a commented underline.
889 Examples
890 --------
891 >>> underline()
892 '* ------------------------------------------------------------------------------\\n'
893 """
895 return "* " + "".join(repeat("-", 78)) + "\n"
898def in_a_but_not_in_b(*, a: Iterable, b: Iterable) -> Iterable:
899 """
900 Return items in Iterable `a` that are not in Iterable `b`.
902 This function takes two iterables `a` and `b`, and returns a tuple containing
903 all the items that are present in `a` but not in `b`.
905 Parameters
906 ----------
907 a : Iterable
908 The first iterable to compare.
909 b : Iterable
910 The second iterable to compare.
912 Returns
913 -------
914 Tuple
915 A tuple containing items that are in `a` but not in `b`.
917 Examples
918 --------
919 >>> a = [1, 2, 3, 4]
920 >>> b = [3, 4, 5, 6]
921 >>> in_a_but_not_in_b(a=a, b=b)
922 (1, 2)
923 """
925 result = () # empty tuple
926 for item in a:
927 if item not in b:
928 result = result + (item,)
930 return result
933def pairwise(x: Iterable) -> Iterable:
934 """
935 Return successive overlapping pairs taken from the input iterable.
936 This appears to be implemented in Python 3.10
937 https://docs.python.org/3/library/itertools.html#itertools.pairwise
938 but we currently use 3.9, so we implement `pairwise` here.
940 The number of 2-tuples in the output iterator will be one fewer than the
941 number of inputs. It will be empty if the input iterable has fewer than
942 two values.
944 Parameters
945 ----------
946 x : Iterable
947 The input iterable from which to generate pairs.
949 Returns
950 -------
951 Iterable[Tuple]
952 An iterable of 2-tuples containing successive overlapping pairs from the input iterable.
954 Examples
955 --------
956 >>> list(pairwise('ABCDEFG'))
957 [('A', 'B'), ('B', 'C'), ('C', 'D'), ('D', 'E'), ('E', 'F'), ('F', 'G')]
958 >>> list(pairwise([1, 2, 3, 4]))
959 [(1, 2), (2, 3), (3, 4)]
960 """
962 a, b = tee(x)
963 next(b, None)
964 return zip(a, b)
967def pairwise_circular(x: Iterable) -> Iterable:
968 """
969 Return successive overlapping pairs taken from the input iterable.
971 The number of 2-tuples in the output iterator will be one fewer than the
972 number of inputs. It will be empty if the input iterable has fewer than
973 two values.
975 Parameters
976 ----------
977 x : Iterable
978 The input iterable from which to generate pairs.
980 Returns
981 -------
982 Iterable[Tuple]
983 An iterable of 2-tuples containing successive overlapping pairs from the input iterable.
985 Examples
986 --------
987 >>> list(pairwise('ABCDEFG'))
988 [('A', 'B'), ('B', 'C'), ('C', 'D'), ('D', 'E'), ('E', 'F'), ('F', 'G')]
989 >>> list(pairwise([1, 2, 3, 4]))
990 [(1, 2), (2, 3), (3, 4)]
991 """
993 a = cycle(x)
994 next(a)
995 return zip(x, a)
998def metadata_to_dict(metadata: MetaData) -> dict:
999 """
1000 Convert MetaData to a dictionary.
1002 This function converts a MetaData object to a dictionary representation.
1004 Parameters
1005 ----------
1006 metadata : MetaData
1007 The metadata of the Data Volume.
1009 Returns
1010 -------
1011 dict
1012 A dictionary containing the metadata.
1014 Examples
1015 --------
1016 >>> metadata = MetaData(
1017 ... data_volume=DataVolume(x_width=256, y_height=256, z_image_count=10),
1018 ... resolution=Resolution(
1019 ... dx=Length(1.0, Units.MICRON),
1020 ... dy=Length(1.0, Units.MICRON),
1021 ... dz=Length(1.0, Units.MICRON)
1022 ... ),
1023 ... pixel_units=Units.MICRON,
1024 ... origin=Origin(
1025 ... x0=Length(0.0, Units.MICRON),
1026 ... y0=Length(0.0, Units.MICRON),
1027 ... z0=Length(0.0, Units.MICRON)
1028 ... )
1029 ... )
1030 >>> metadata_to_dict(metadata)
1031 {
1032 'x_width (pixels)': '256',
1033 'y_height (pixels)': '256',
1034 'z_image_count (pixels)': '10',
1035 'Resolution, dx': '1.0',
1036 'Resolution, dy': '1.0',
1037 'Resolution, dz': '1.0',
1038 'Resolution, units': 'micron',
1039 'Origin, x0': '0.0',
1040 'Origin, y0': '0.0',
1041 'Origin, z0': '0.0',
1042 'Origin, units': 'micron'
1043 }
1044 """
1046 data_volume = metadata.data_volume
1047 resolution = metadata.resolution
1048 # pixel_units = metadata.pixel_units
1049 origin = metadata.origin
1051 x_width = data_volume.x_width
1052 y_height = data_volume.y_height
1053 z_image_count = data_volume.z_image_count
1055 dx = resolution.dx
1056 dy = resolution.dz
1057 dz = resolution.dz
1059 meta_dict = {
1060 "x_width (pixels)": str(x_width),
1061 "y_height (pixels)": str(y_height),
1062 "z_image_count (pixels)": str(z_image_count),
1063 "Resolution, dx": str(dx.value),
1064 "Resolution, dy": str(dy.value),
1065 "Resolution, dz": str(dz.value),
1066 "Resolution, units": str(dx.unit.value),
1067 "Origin, x0": str(origin.x0.value),
1068 "Origin, y0": str(origin.y0.value),
1069 "Origin, z0": str(origin.z0.value),
1070 "Origin, units": str(origin.x0.unit.value),
1071 }
1072 # print(meta_dict)
1074 return meta_dict
1077def yaml_to_dict(path_file_input: Path) -> dict:
1078 """
1079 Convert a YAML file to a dictionary.
1081 This function reads a YAML file and converts its contents to a dictionary.
1083 Parameters
1084 ----------
1085 path_file_input : Path
1086 The full path to the input YAML file.
1088 Returns
1089 -------
1090 dict
1091 A dictionary containing the contents of the YAML file.
1093 Raises
1094 ------
1095 TypeError
1096 If the file type is not supported.
1097 OSError
1098 If there is an error opening or decoding the YAML file.
1100 Examples
1101 --------
1102 >>> path_file_input = Path("path/to/config.yml")
1103 >>> yaml_to_dict(path_file_input)
1104 {'key1': 'value1', 'key2': 'value2', ...}
1105 """
1107 file_type = Path(path_file_input).suffix.casefold()
1108 supported_types = (".yaml", ".yml")
1110 if file_type not in supported_types:
1111 raise TypeError("Only file types .yaml, and .yml are supported.")
1113 try:
1114 with open(file=path_file_input, mode="r", encoding="utf-8") as stream:
1115 db = yaml.load(stream, Loader=yaml.SafeLoader) # Load YAML file
1116 except yaml.YAMLError as error:
1117 print(f"Error with YAML file: {error}")
1118 print(f"Could not open or decode: {path_file_input}")
1119 raise OSError from error
1121 print(f"Success: database created from file: {path_file_input}")
1122 print("key, value, type")
1123 print("---, -----, ----")
1124 for key, value in db.items():
1125 print(f"{key}, {value}, {type(value)}")
1127 return db
1130def dict_to_yaml(db: dict, file: str) -> Path:
1131 """
1132 Convert a dictionary to a YAML file.
1134 This function writes the contents of a dictionary to a YAML file.
1136 Parameters
1137 ----------
1138 db : dict
1139 The dictionary to be converted to YAML.
1140 file : str
1141 The path to the output YAML file.
1143 Returns
1144 -------
1145 Path
1146 The path to the created YAML file.
1148 Examples
1149 --------
1150 >>> db = {'key1': 'value1', 'key2': 'value2'}
1151 >>> file = "path/to/output.yml"
1152 >>> dict_to_yaml(db, file)
1153 PosixPath('path/to/output.yml')
1154 """
1156 with open(file, "w", encoding="utf-8") as out_file:
1157 yaml.dump(db, out_file, default_flow_style=False) # Write dictionary to YAML
1159 return Path(file)
1162def ndarray_to_img(
1163 *,
1164 data: np.ndarray,
1165 slice_axis: CartesianAxis3D,
1166 parent_dir: Path,
1167 folder_name: str,
1168 pad_length: int = 4,
1169 file_type: str = ".tif",
1170) -> bool:
1171 """
1172 Convert an ndarray to an image stack and save it to a specified directory.
1174 This function takes a NumPy ndarray and creates an image stack, saving the images
1175 into a user-specified directory.
1177 Parameters
1178 ----------
1179 data : np.ndarray
1180 The semantic labels.
1181 slice_axis : CartesianAxis3D
1182 The axis along which to slice the ndarray:
1183 - 0 for Z axis
1184 - 1 for Y axis
1185 - 2 for X axis
1186 parent_dir : Path
1187 The parent directory to save the image folder.
1188 folder_name : str
1189 The folder name to save images.
1190 pad_length : int, optional
1191 The number of digits to pad the file names with (default is 4).
1192 file_type : str, optional
1193 The image file type (default is ".tif").
1195 Returns
1196 -------
1197 bool
1198 True if the images were successfully created.
1200 Examples
1201 --------
1202 >>> data = np.random.randint(0, 255, (10, 256, 256), dtype=np.uint8)
1203 >>> slice_axis = CartesianAxis3D.Z
1204 >>> parent_dir = Path("path/to/save")
1205 >>> folder_name = "image_stack"
1206 >>> ndarray_to_img(data=data, slice_axis=slice_axis, parent_dir=parent_dir, folder_name=folder_name)
1207 True
1208 """
1210 img_dir = parent_dir.joinpath(folder_name).expanduser()
1211 img_dir.mkdir(parents=True, exist_ok=True)
1212 n_slices = data.shape[slice_axis.value]
1214 for i in range(n_slices):
1215 fname = f"{img_dir}/{i:0{pad_length}}{file_type}"
1216 mode = "L" if data.dtype == np.int8 else None
1218 match slice_axis:
1219 case CartesianAxis3D.Z:
1220 img = Image.fromarray(data[i, :, :], mode=mode)
1221 case CartesianAxis3D.Y:
1222 img = Image.fromarray(data[:, i, :], mode=mode)
1223 case CartesianAxis3D.X:
1224 img = Image.fromarray(data[:, :, i], mode=mode)
1225 case _:
1226 raise ValueError(
1227 f"Unknown slice_axis value {slice_axis}, value must be 0, 1, or 2."
1228 )
1230 img.save(fname)
1231 return True
1234def read_images(
1235 file_dir: Path,
1236 file_type: str = ".tif",
1237) -> np.ndarray:
1238 """
1239 Read images from a directory and return a NumPy array representation of
1240 the images.
1242 Parameters
1243 ----------
1244 file_dir : Path
1245 The fully pathed location of the images.
1246 file_type : str, optional
1247 The image type (default is ".tif").
1249 Returns
1250 -------
1251 np.ndarray
1252 A NumPy array representation of the images.
1254 Raises
1255 ------
1256 FileNotFoundError
1257 If no images of the specified type are found in the directory.
1259 Examples
1260 --------
1261 >>> file_dir = Path("path/to/images")
1262 >>> read_images(file_dir, file_type=".tif")
1263 array([[[...], [...], ...], [[...], [...], ...], ...])
1264 """
1266 image_list = list(glob.glob(f"{str(file_dir.as_posix())}/*{file_type}"))
1268 if len(image_list) == 0:
1269 raise FileNotFoundError(
1270 f"File type of {file_type} not found in directory: {str(file_dir)}"
1271 )
1273 image_list.sort() # Sort images in ascending order
1275 image_stack = np.array([np.array(Image.open(f)) for f in image_list])
1277 # Handle the case where only a single image is read
1278 if image_stack.ndim < 3:
1279 image_stack = np.expand_dims(image_stack, axis=-1)
1280 print(f"Only single image read, new image array size: {image_stack.shape}")
1281 else:
1282 print(f"Images read, image array size: {image_stack.shape}")
1284 return image_stack