Coverage for src/recon3d/hdf_io.py: 89%
186 statements
« prev ^ index » next coverage.py v7.8.0, created at 2025-04-02 00:06 +0000
« prev ^ index » next coverage.py v7.8.0, created at 2025-04-02 00:06 +0000
1"""
2HDF5 Processing Module
3=======================
5This module provides a set of functions for creating, modifying, and interacting with HDF5 files,
6as well as converting between image stacks and voxel data. It includes command line interfaces
7for some of the key functionalities.
9Functions
10---------
11create_hdf(hdf_path, base_container)
12 Prescribe foundational structure of the HDF5 file.
14modify_hdf_dataset(hdf_path, dataset_loc, data, dtype, operation, mutable)
15 Modify an existing HDF5 file to alter data within a container.
17write_attr_dict(hdf_path, d, dataset_loc)
18 Write attributes to a specified dataset in the HDF5 file.
20write_h5(hdf_path, data)
21 Write data to an HDF5 file.
23add_to_h5(data, hdf_path, hdf_group)
24 Add data to an HDF5 file based on its type.
26image_to_voxel(yml_path)
27 Populate the HDF5 file with the semantic segmentation image stack
28 specified in the YAML file, including metadata.
30image_to_voxel_command_line()
31 The command line wrapper for the `image_to_voxel` function.
33voxel_to_image(yml_path)
34 Save the image data within the HDF5 file as TIFFs in a new directory.
36voxel_to_image_command_line()
37 The command line wrapper for the `voxel_to_image` function.
39Examples
40--------
41To create an HDF5 file with a base container:
43 >>> create_hdf(Path("output.h5"), "base_group")
45To modify an HDF5 dataset:
47 >>> data = np.random.rand(10, 5)
48 >>> modify_hdf_dataset(Path("output.h5"), "group/dataset", data, np.float64, "create", True)
50To add data to an HDF5 file based on its type:
52 >>> centroids = Centroids(...)
53 >>> add_to_h5(centroids, Path("output.h5"), "group_name")
55To populate an HDF5 file with image stack data from a YAML file:
57 >>> yml_path = Path("config.yml")
58 >>> hdf5_path = image_to_voxel(yml_path)
60To convert voxel data in an HDF5 file to image files:
62 >>> yml_path = Path("config.yml")
63 >>> image_dir = voxel_to_image(yml_path)
65To run the `image_to_voxel` function from the command line:
67 $ python -m your_module_name input_file.yml
69To run the `voxel_to_image` function from the command line:
71 $ python -m your_module_name input_file.yml
72"""
74import argparse
75from pathlib import Path
76import numpy as np
77import h5py
78from functools import singledispatch
80# from recon3d.feature_analysis import SemanticImageStack
81from recon3d.types import *
82import recon3d.utility as ut
83import recon3d.instance_analysis as ia
84import recon3d.types as cs
87### BASE/HELPER FUNCTIONS ###
88def create_hdf(hdf_path: Path, base_container: str) -> bool:
89 """
90 Prescribe foundational structure of the HDF5 file.
92 This function creates an HDF5 file at the specified path and initializes
93 it with a base container group.
95 Parameters
96 ----------
97 hdf_path : Path
98 The path to the location of the HDF5 file.
99 base_container : str
100 The name of the base container group in the HDF5 file.
102 Returns
103 -------
104 bool
105 True if the HDF5 file is created successfully, False otherwise.
107 Examples
108 --------
109 >>> create_hdf(Path("output.h5"), "base_group")
110 True
111 """
113 with h5py.File(hdf_path, "w") as file:
114 file.create_group(base_container)
117def modify_hdf_dataset(
118 hdf_path: Path,
119 dataset_loc: str,
120 data: np.ndarray,
121 dtype: type,
122 operation: str,
123 mutable: bool,
124) -> bool:
125 """
126 Modify an existing HDF5 file to alter data within a container.
128 This function modifies an HDF5 file by creating, appending, or overwriting
129 a dataset at the specified location.
131 Parameters
132 ----------
133 hdf_path : Path
134 The path to the location of the HDF5 file.
135 dataset_loc : str
136 The internal path to the dataset in the HDF5 file
137 (e.g., "file/container/container2/dataset").
138 data : np.ndarray
139 The array to be written to the HDF5 file at the specified dataset
140 location.
141 dtype : type
142 The data type of the dataset (e.g., np.float64, np.int, np.uint16,
143 for string: h5py.special_dtype(vlen=str)).
144 operation : str
145 The operation to perform on the dataset:
146 - "create": create a new dataset
147 - "append": append to an existing dataset along dimension 0
148 - "overwrite": overwrite an existing dataset along dimension 0
149 (e.g., shrinking dataset)
150 mutable : bool
151 If True, the initial shape is zero in the first dimension, with no
152 size limit.
154 Returns
155 -------
156 bool
157 True if the operation is successful, False otherwise.
159 Raises
160 ------
161 ValueError
162 If the dataset already exists when trying to create a new one.
163 KeyError
164 If an unsupported operation is requested.
166 Examples
167 --------
168 >>> data = np.random.rand(10, 5)
169 >>> modify_hdf_dataset(Path("output.h5"), "group/dataset", data, np.float64, "create", True)
170 True
172 >>> new_data = np.random.rand(5, 5)
173 >>> modify_hdf_dataset(Path("output.h5"), "group/dataset", new_data, np.float64, "append", True)
174 True
176 >>> overwrite_data = np.random.rand(8, 5)
177 >>> modify_hdf_dataset(Path("output.h5"), "group/dataset", overwrite_data, np.float64, "overwrite", True)
178 True
179 """
181 # initial shape is zero in first dimension, with no size limit
182 shape, maxshape = list(data.shape), list(data.shape)
183 if mutable:
184 # shape[0], maxshape[0] = 0, None
185 maxshape[0] = None
186 shape, maxshape = tuple(shape), tuple(maxshape)
188 with h5py.File(hdf_path, "r+") as hdf_file:
189 # with h5py.File(hdf_path, "r+", locking=False) as hdf_file:
190 if operation == "create":
191 if not dataset_loc in hdf_file:
193 dataset = hdf_file.create_dataset(
194 name=dataset_loc,
195 data=data,
196 shape=shape,
197 dtype=dtype,
198 # compression="gzip", #TODO 4/2/24 compression not allowing read in HDF_View
199 maxshape=maxshape,
200 chunks=True,
201 )
202 return True
203 else:
204 error_string = f"Dataset already exists, cannot {operation}"
205 raise ValueError(error_string)
207 dataset = hdf_file[dataset_loc]
209 if operation == "append":
210 # appending to dataset
211 dataset.resize(dataset.shape[0] + data.shape[0], axis=0)
212 dataset[-data.shape[0] :] = data
213 return True
215 elif operation == "overwrite":
216 # shrinking dataset
217 dataset.resize(data.shape[0], axis=0)
218 dataset[:] = data
219 return True
221 else:
222 raise KeyError(
223 f'operation of "{operation}" requested, only, {"create"}, {"expand"}, and {"overwrite"} currently supported.'
224 )
227def write_attr_dict(hdf_path: Path, d: dict, dataset_loc: str) -> bool:
228 """Write attributes to group or dataset in the h5 file
229 from the provided dictionary
231 hdf_path : path object to location of hdf file
232 d: dict of attributes to add
233 dataset_loc: internal path to the dataset in the hdf file
234 (e.g. "file/container/container2/dataset")
236 """
238 with h5py.File(hdf_path, "r+") as hdf_file:
239 loc = hdf_file[dataset_loc]
240 # print(d, type(d))
241 for key, value in d.items():
242 # print(key, value)
243 if isinstance(value, Path):
244 value = str(value)
245 dt = h5py.special_dtype(vlen=str)
247 loc.attrs.create(name=key, data=value, dtype=dt)
249 return
252### INITIAL DATA CREATION ###
253def write_h5(hdf_path: Path, semantic_stack: SemanticImageStack) -> bool:
254 """Write new h5 file
255 the data will be written into a dataset
256 the metadata will be written as attributes of the dataset
258 hdf_path : path object to location of hdf file
259 semantic_stack: the semantic image stack
261 """
263 base_container = "VoxelData"
264 create_hdf(hdf_path=hdf_path, base_container=base_container)
266 img_data = semantic_stack.data
267 dtype = type(
268 img_data.flat[0]
269 ) # data.item(0) will return python dtype, which may be better
270 dataset_loc = f"{base_container}/{semantic_stack.name}"
271 modify_hdf_dataset(
272 hdf_path=hdf_path,
273 dataset_loc=dataset_loc,
274 data=img_data,
275 dtype=dtype,
276 operation="create",
277 mutable=False,
278 )
280 # TODO 4/2/2024 write metadata as attributes
281 md = semantic_stack.metadata
282 # TODO 5/7/2024 make metadata strings add to h5
283 metadata_dict = ut.metadata_to_dict(md)
284 # d = {"height": }
285 write_attr_dict(hdf_path=hdf_path, d=metadata_dict, dataset_loc=dataset_loc)
288### ALL OTHER FUNCTIONALITIES ###
289@singledispatch
290def add_to_h5(
291 data,
292 h5_path: Path,
293 h5_group: str, # internal "folder" in the hdf to save data
294):
295 """
296 Add data to an HDF5 file.
298 This is a generic function that adds data to an HDF5 file. Specific
299 implementations are provided for different types of data.
301 Parameters
302 ----------
303 data : object
304 The data to be added to the HDF5 file.
305 h5_path : Path
306 The path to the HDF5 file.
307 h5_group : str
308 The internal "folder" in the HDF5 file to save data.
310 Returns
311 -------
312 None
314 Raises
315 ------
316 NotImplementedError
317 If no handler is registered for the type of `data`.
319 Examples
320 --------
321 >>> add_to_h5(some_data, Path("output.h5"), "group_name")
322 NotImplementedError: No handler for type <class 'type'>
323 """
324 _ = data
325 __ = h5_path
326 ___ = h5_group
327 raise NotImplementedError(f"No handler for type {type(data)}")
330@add_to_h5.register(Centroids)
331def _(
332 data,
333 h5_path: Path,
334 h5_group: str,
335):
336 """
337 Add centroid data to an HDF5 file.
339 This function processes and adds centroid data to an HDF5 file.
341 Parameters
342 ----------
343 data : Centroids
344 The centroid data to be added.
345 h5_path : Path
346 The path to the HDF5 file.
347 h5_group : str
348 The internal "folder" in the HDF5 file to save data.
350 Returns
351 -------
352 None
354 Examples
355 --------
356 >>> centroids = Centroids(...)
357 >>> add_to_h5(centroids, Path("output.h5"), "group_name")
358 """
360 units = data.data[0].cx.unit.value
361 centroid_data = ut.centroids_to_ndarray(centroids=data)
363 # add data
364 dataset_loc = f"{h5_group}/centroids"
365 modify_hdf_dataset(
366 hdf_path=h5_path,
367 dataset_loc=dataset_loc,
368 data=centroid_data,
369 dtype=float,
370 operation="create",
371 mutable=False,
372 )
373 attrs = {
374 "nlabels": str(centroid_data.shape[0]),
375 "units": str(units),
376 "ordering": str("X, Y, Z"),
377 }
378 write_attr_dict(
379 hdf_path=h5_path,
380 d=attrs,
381 dataset_loc=dataset_loc,
382 )
385@add_to_h5.register(BestFitEllipsoids)
386def _(
387 data,
388 h5_path: Path,
389 h5_group: str,
390):
391 """
392 Add best fit ellipsoid data to an HDF5 file.
394 This function processes and adds best fit ellipsoid data to an HDF5 file.
396 Parameters
397 ----------
398 data : BestFitEllipsoids
399 The best fit ellipsoid data to be added.
400 h5_path : Path
401 The path to the HDF5 file.
402 h5_group : str
403 The internal "folder" in the HDF5 file to save data.
405 Returns
406 -------
407 None
409 Examples
410 --------
411 >>> ellipsoids = BestFitEllipsoids(...)
412 >>> add_to_h5(ellipsoids, Path("output.h5"), "group_name")
413 """
415 axis_units = data.data[0].a.length.unit.value
416 axis_lengths, axis_vectors = ut.ellipsoids_to_ndarray(data)
418 # add data
419 dataset_loc = f"{h5_group}/semi-axis_lengths"
420 modify_hdf_dataset(
421 hdf_path=h5_path,
422 dataset_loc=dataset_loc,
423 data=axis_lengths,
424 dtype=float,
425 operation="create",
426 mutable=False,
427 )
428 attrs = {
429 "notes": str("property of best fit ellipsoid"),
430 "nlabels": str(axis_lengths.shape[0]),
431 "units": str(axis_units),
432 "ordering": str("a, b, c, with a > b > c"),
433 }
434 write_attr_dict(
435 hdf_path=h5_path,
436 d=attrs,
437 dataset_loc=dataset_loc,
438 )
440 # add orientation data
441 dataset_loc = f"{h5_group}/axis_vectors"
442 modify_hdf_dataset(
443 hdf_path=h5_path,
444 dataset_loc=dataset_loc,
445 data=axis_vectors,
446 dtype=float,
447 operation="create",
448 mutable=False,
449 )
450 attrs = {
451 "notes": str("property of best fit ellipsoid"),
452 "nlabels": str(axis_lengths.shape[0]),
453 "units": "unit vector",
454 "ordering": str(
455 "u, v, w for each axis a, b, c \n\t(a_u, a_v, a_w, b_u, b_v, b_w, c_u, c_v, c_w)"
456 ),
457 }
458 write_attr_dict(
459 hdf_path=h5_path,
460 d=attrs,
461 dataset_loc=dataset_loc,
462 )
465@add_to_h5.register(EllipsoidSurfaceAreas)
466def _(
467 data,
468 h5_path: Path,
469 h5_group: str,
470):
471 """
472 Add ellipsoid surface area data to an HDF5 file.
474 This function processes and adds ellipsoid surface area data to an
475 HDF5 file.
477 Parameters
478 ----------
479 data : EllipsoidSurfaceAreas
480 The ellipsoid surface area data to be added.
481 h5_path : Path
482 The path to the HDF5 file.
483 h5_group : str
484 The internal "folder" in the HDF5 file to save data.
486 Returns
487 -------
488 None
490 Examples
491 --------
492 >>> surface_areas = EllipsoidSurfaceAreas(...)
493 >>> add_to_h5(surface_areas, Path("output.h5"), "group_name")
494 """
496 area_units = data.data[0].unit_squared.value
497 surface_areas = ut.surface_areas_to_ndarray(data)
499 # add data
500 dataset_loc = f"{h5_group}/ellipsoid_surface_areas"
501 modify_hdf_dataset(
502 hdf_path=h5_path,
503 dataset_loc=dataset_loc,
504 data=surface_areas,
505 dtype=float,
506 operation="create",
507 mutable=False,
508 )
509 attrs = {
510 "notes": str("property of best fit ellipsoid"),
511 "nlabels": str(surface_areas.shape[0]),
512 "units_squared": f"{area_units}",
513 "method": "Knud Thomsen approximation for scalene ellipsoids (2004)",
514 }
515 write_attr_dict(
516 hdf_path=h5_path,
517 d=attrs,
518 dataset_loc=dataset_loc,
519 )
522@add_to_h5.register(EllipsoidVolumes)
523def _(
524 data,
525 h5_path: Path,
526 h5_group: str,
527):
528 """
529 Add ellipsoid volume data to an HDF5 file.
531 This function processes and adds ellipsoid volume data to an HDF5 file.
533 Parameters
534 ----------
535 data : EllipsoidVolumes
536 The ellipsoid volume data to be added.
537 h5_path : Path
538 The path to the HDF5 file.
539 h5_group : str
540 The internal "folder" in the HDF5 file to save data.
542 Returns
543 -------
544 None
546 Examples
547 --------
548 >>> volumes = EllipsoidVolumes(...)
549 >>> add_to_h5(volumes, Path("output.h5"), "group_name")
550 """
551 area_units = data.data[0].unit_cubed.value
552 ellipsoid_volumes = ut.volumes_to_ndarray(data)
554 # add data
555 dataset_loc = f"{h5_group}/ellipsoid_volumes"
556 modify_hdf_dataset(
557 hdf_path=h5_path,
558 dataset_loc=dataset_loc,
559 data=ellipsoid_volumes,
560 dtype=float,
561 operation="create",
562 mutable=False,
563 )
564 attrs = {
565 "notes": str("property of best fit ellipsoid"),
566 "nlabels": str(ellipsoid_volumes.shape[0]),
567 "units_cubed": f"{area_units}",
568 "method": "4/3 * pi * a * b * c",
569 }
570 write_attr_dict(
571 hdf_path=h5_path,
572 d=attrs,
573 dataset_loc=dataset_loc,
574 )
577@add_to_h5.register(InstanceImageStack)
578def _(
579 data,
580 h5_path: Path,
581 h5_group: str,
582):
583 """
584 Add instance image stack data to an HDF5 file.
586 This function processes and adds instance image stack data to an HDF5 file.
588 Parameters
589 ----------
590 data : InstanceImageStack
591 The instance image stack data to be added.
592 h5_path : Path
593 The path to the HDF5 file.
594 h5_group : str
595 The internal "folder" in the HDF5 file to save data.
597 Returns
598 -------
599 None
601 Examples
602 --------
603 >>> image_stack = InstanceImageStack(...)
604 >>> add_to_h5(image_stack, Path("output.h5"), "group_name")
605 """
607 # print(f"data type: {type(data)}")
609 instance_image_stack = data
611 # add data
612 dataset_loc = f"{h5_group}/{instance_image_stack.name}"
613 modify_hdf_dataset(
614 hdf_path=h5_path,
615 dataset_loc=dataset_loc,
616 data=instance_image_stack.data,
617 dtype=type(instance_image_stack.data.flat[0]),
618 operation="create",
619 mutable=False,
620 )
622 # add metadata
623 md = instance_image_stack.metadata
624 metadata_dict = ut.metadata_to_dict(md)
625 write_attr_dict(
626 hdf_path=h5_path,
627 d=metadata_dict,
628 dataset_loc=dataset_loc,
629 )
631 extra_attrs = {
632 "nlabels": str(instance_image_stack.nlabels),
633 "min_feature_size": str(instance_image_stack.min_feature_size),
634 }
635 write_attr_dict(
636 hdf_path=h5_path,
637 d=extra_attrs,
638 dataset_loc=dataset_loc,
639 )
642@add_to_h5.register(InstanceIndices)
643def _(
644 data,
645 h5_path: Path,
646 h5_group: str,
647):
648 """
649 Add instance indices data to an HDF5 file.
651 This function processes and adds instance indices data to an HDF5 file.
653 Parameters
654 ----------
655 data : InstanceIndices
656 The instance indices data to be added.
657 h5_path : Path
658 The path to the HDF5 file.
659 h5_group : str
660 The internal "folder" in the HDF5 file to save data.
662 Returns
663 -------
664 None
666 Examples
667 --------
668 >>> indices = InstanceIndices(...)
669 >>> add_to_h5(indices, Path("output.h5"), "group_name")
670 """
671 raise NotImplementedError(
672 "Ability to write out instance indices not yet implemented."
673 )
674 # TODO: AP, write out as single variable length data instead of
675 # individual datasets (currently)
676 # https://docs.h5py.org/en/stable/special.html
677 # dt = h5py.vlen_dtype(np.dtype('int32'))
678 instance_indices = data
680 base_container = f"{instance_indices.source_name}_indices"
681 # NumPy doesn’t support ragged arrays, and the ‘arrays of arrays’
682 # h5py uses as a workaround are not as convenient or efficient as
683 # regular NumPy arrays. If you’re deciding how to store data,
684 # consider whether there’s a sensible way to do it without a
685 # variable-length type.
686 ragged_label_indices_array = np.array((len(instance_indices.indices)))
687 for each_label in instance_indices.labels.data:
688 ragged_label_indices_array[each_label] = instance_indices.indices[each_label]
690 dataset_loc = f"{base_container}/label_indices"
691 modify_hdf_dataset(
692 hdf_path=h5_path,
693 dataset_loc=dataset_loc,
694 data=ragged_label_indices_array,
695 dtype=h5py.vlen_dtype(np.dtype("int32")),
696 operation="create",
697 mutable=False,
698 )
699 # # Tree the labels
700 # for (
701 # each_label
702 # ) in (
703 # instance_indices.labels.data
704 # ): # TODO: why does the instance_indices.labels not return the correct type, instead it provides the ndarray...
706 # dataset_loc = f"{base_container}/label_{each_label:06d}"
707 # modify_hdf_dataset(
708 # hdf_path=h5_path,
709 # dataset_loc=dataset_loc,
710 # data=instance_indices.indices[each_label],
711 # dtype=type(instance_indices.indices[each_label].flat[0]),
712 # operation="create",
713 # mutable=False,
714 # )
717@add_to_h5.register(InstanceProperties)
718def _(
719 data,
720 h5_path: Path,
721 h5_group: str,
722):
723 """
724 Add instance properties data to an HDF5 file.
726 This function processes and adds instance properties data to an HDF5 file.
728 Parameters
729 ----------
730 data : InstanceProperties
731 The instance properties data to be added.
732 h5_path : Path
733 The path to the HDF5 file.
734 h5_group : str
735 The internal "folder" in the HDF5 file to save data.
737 Returns
738 -------
739 None
741 Examples
742 --------
743 >>> properties = InstanceProperties(...)
744 >>> add_to_h5(properties, Path("output.h5"), "group_name")
745 """
747 # centroids
748 add_to_h5(data.centroids, h5_path=h5_path, h5_group=h5_group)
750 # ellipsoids
751 add_to_h5(data.ellipsoids, h5_path=h5_path, h5_group=h5_group)
753 # surface area
754 add_to_h5(data.surface_areas, h5_path=h5_path, h5_group=h5_group)
756 add_to_h5(data.volumes, h5_path=h5_path, h5_group=h5_group)
758 # equiv spherical diameter
759 eq_diam = data.equivalent_sphere_diameters
760 diam = [i.value for i in eq_diam]
761 diam_data = np.array(diam, dtype=float).T
762 diam_units = eq_diam[0].unit.value
764 dataset_loc = f"{h5_group}/equivalent_sphere_diameters"
765 modify_hdf_dataset(
766 hdf_path=h5_path,
767 dataset_loc=dataset_loc,
768 data=diam_data,
769 dtype=float,
770 operation="create",
771 mutable=False,
772 )
773 attrs = {
774 "notes": str("from volume detemined by voxel count and resolution"),
775 "nlabels": str(diam_data.shape[0]),
776 "units": str(diam_units),
777 }
778 write_attr_dict(
779 hdf_path=h5_path,
780 d=attrs,
781 dataset_loc=dataset_loc,
782 )
784 # num voxels
785 num_voxel = data.n_voxels
786 n_voxels = [i.value for i in num_voxel]
787 n_vox_data = np.array(n_voxels, dtype=int).T
788 n_vox_units = num_voxel[0].unit.value
790 dataset_loc = f"{h5_group}/num_voxels"
791 modify_hdf_dataset(
792 hdf_path=h5_path,
793 dataset_loc=dataset_loc,
794 data=n_vox_data,
795 dtype=int,
796 operation="create",
797 mutable=False,
798 )
799 attrs = {
800 "nlabels": str(diam_data.shape[0]),
801 "units": str(n_vox_units),
802 }
803 write_attr_dict(
804 hdf_path=h5_path,
805 d=attrs,
806 dataset_loc=dataset_loc,
807 )
810@add_to_h5.register(NthNearestNeighbors)
811def _(
812 data,
813 h5_path: Path,
814 h5_group: str,
815):
816 """
817 Add nth nearest neighbor data to an HDF5 file.
819 This function processes and adds nth nearest neighbor data to an HDF5 file.
821 Parameters
822 ----------
823 data : NthNearestNeighbors
824 The nth nearest neighbor data to be added.
825 h5_path : Path
826 The path to the HDF5 file.
827 h5_group : str
828 The internal "folder" in the HDF5 file to save data.
830 Returns
831 -------
832 None
834 Examples
835 --------
836 >>> neighbors = NthNearestNeighbors(...)
837 >>> add_to_h5(neighbors, Path("output.h5"), "group_name")
838 """
840 aa = 2
841 units = data.distances[0].unit.value
843 distance_list = data.distances
844 distances = [i.value for i in distance_list]
845 distance_array = np.array(distances, dtype=float).T
846 neighbor_id = data.instance_id
848 dataset_loc = f"{h5_group}/nearest_neighbor_distances"
849 modify_hdf_dataset(
850 hdf_path=h5_path,
851 dataset_loc=dataset_loc,
852 data=distance_array,
853 dtype=float,
854 operation="create",
855 mutable=False,
856 )
857 attrs = {
858 "nlabels": str(distance_array.shape[0]),
859 "units": str(units),
860 "nth_nearest": f"{data.nth_nearest}: 1st nearest is the feature itself",
861 }
862 write_attr_dict(
863 hdf_path=h5_path,
864 d=attrs,
865 dataset_loc=dataset_loc,
866 )
868 dataset_loc = f"{h5_group}/nearest_neighbor_IDs"
869 modify_hdf_dataset(
870 hdf_path=h5_path,
871 dataset_loc=dataset_loc,
872 data=neighbor_id,
873 dtype=int,
874 operation="create",
875 mutable=False,
876 )
877 attrs = {
878 "nlabels": str(neighbor_id.shape[0]),
879 "nth_nearest": f"{data.nth_nearest}: 1st nearest is the feature itself",
880 }
881 write_attr_dict(
882 hdf_path=h5_path,
883 d=attrs,
884 dataset_loc=dataset_loc,
885 )
888### CONVERT BETWEEN H5 <-> FOLDER OF IMAGES ###
889def image_to_voxel(yml_path: Path) -> Path:
890 """
891 Populate the HDF5 file with the semantic segmentation image stack specified in the YAML file, including metadata.
893 This function reads the YAML file to obtain the necessary parameters, processes the image stack, and writes the resulting data to an HDF5 file.
895 Parameters
896 ----------
897 yml_path : Path
898 The path to the YAML file containing the configuration and parameters.
900 Returns
901 -------
902 Path
903 The path to the created HDF5 file.
905 Raises
906 ------
907 ValueError
908 If the "cli_entry_points" key in the YAML file does not contain "image_to_voxel".
910 Examples
911 --------
912 >>> yml_path = Path("config.yml")
913 >>> hdf5_path = image_to_voxel(yml_path)
914 Wrote output file: /path/to/output.h5
915 >>> print(hdf5_path)
916 /path/to/output.h5
917 """
919 # Import images and metadata into sematic_stack
920 yml_vals = ut.yaml_to_dict(yml_path)
922 # check cli_entry_points is valid
923 if "image_to_voxel" not in yml_vals["cli_entry_points"]:
924 raise ValueError(
925 f"""Error. Incorrect yml format.
926 This function requires the "cli_entry_points" key to contain "image_to_voxel",
927 but currently contains the following options: {yml_vals["cli_entry_points"]} """
928 )
929 semantic_stack_save = ia.process_image_stack(yml_path)
931 # Write semantic_stack to hdf
932 h5_name = yml_vals["h5_filename"] + ".h5"
934 # path_file_output = Path(yml_vals["out_dir"]).joinpath(h5_name)
935 path_file_output = Path(yml_vals["out_dir"]).expanduser().joinpath(h5_name)
936 write_h5(path_file_output, semantic_stack_save)
938 print(f"Wrote output file: {path_file_output}")
940 return path_file_output
943def image_to_voxel_command_line():
944 """
945 The command line wrapper for the `image_to_voxel` function.
947 This function sets up the command line argument parser, parses the input arguments,
948 and calls the `image_to_voxel` function with the provided YAML input file.
950 Parameters
951 ----------
952 None
954 Returns
955 -------
956 None
958 Examples
959 --------
960 To run this function from the command line:
962 $ python -m your_module_name input_file.yml
963 """
964 parser = argparse.ArgumentParser()
965 parser.add_argument("input_file", help="the .yml user input file")
966 args = parser.parse_args()
967 input_file = args.input_file
969 image_to_voxel(yml_path=input_file)
972def voxel_to_image(yml_path: Path) -> Path:
973 """
974 Save the image data within the HDF5 file as TIFFs in a new directory.
976 This function reads the YAML file to obtain the necessary parameters,
977 extracts the image data from the HDF5 file,
978 and saves the images as TIFF files in a specified directory.
980 Parameters
981 ----------
982 yml_path : Path
983 The path to the YAML file containing the configuration and parameters.
985 Returns
986 -------
987 Path
988 The path to the directory containing the saved images.
990 Raises
991 ------
992 ValueError
993 If the "cli_entry_points" key in the YAML file does not contain "voxel_to_image".
994 If the specified slicing direction is not valid.
996 Examples
997 --------
998 >>> yml_path = Path("config.yml")
999 >>> image_dir = voxel_to_image(yml_path)
1000 >>> print(image_dir)
1001 /path/to/output/images
1002 """
1004 yml_vals = ut.yaml_to_dict(yml_path)
1006 # check cli_entry_points is valid
1007 if "voxel_to_image" not in yml_vals["cli_entry_points"]:
1008 raise ValueError(
1009 f"""Error. Incorrect yml format.
1010 This function requires the "cli_entry_points" key to contain "voxel_to_image",
1011 but currently contains the following options: {yml_vals["cli_entry_points"]} """
1012 )
1014 hdf_path = Path(yml_vals["voxel_data_path"]).expanduser()
1015 # TODO add alternative to ingest npy data dir
1017 hdf_dataset_location = yml_vals["voxel_data_location"]
1018 output_image_dir = Path(yml_vals["image_parent_dir"]).expanduser()
1019 output_image_type = yml_vals["image_output_type"]
1021 slice_normal = yml_vals["image_slice_normal"]
1022 valid_slice_normal = set(item.name for item in cs.CartesianAxis3D)
1023 if slice_normal not in valid_slice_normal:
1024 raise ValueError(
1025 f"Error, '{slice_normal}' is not a valid slicing direction, accepted units are: {valid_slice_normal}"
1026 )
1027 slice_axis = cs.CartesianAxis3D[slice_normal]
1029 with h5py.File(hdf_path, "r") as f:
1030 data = np.squeeze(f[hdf_dataset_location][:])
1031 ut.ndarray_to_img(
1032 data=data,
1033 parent_dir=output_image_dir,
1034 folder_name=Path(hdf_dataset_location).stem,
1035 file_type=output_image_type,
1036 slice_axis=slice_axis,
1037 )
1039 return output_image_dir.joinpath(hdf_path.stem)
1042def voxel_to_image_command_line():
1043 """
1044 The command line wrapper for the `voxel_to_image` function.
1046 This function sets up the command line argument parser, parses the input
1047 arguments, and calls the `voxel_to_image` function with the provided YAML
1048 input file.
1050 Parameters
1051 ----------
1052 None
1054 Returns
1055 -------
1056 None
1058 Examples
1059 --------
1060 To run this function from the command line:
1062 $ python -m your_module_name input_file.yml
1063 """
1064 parser = argparse.ArgumentParser()
1065 parser.add_argument("input_file", help="the .yml user input file")
1066 args = parser.parse_args()
1067 input_file = args.input_file
1069 new_path = voxel_to_image(yml_path=input_file)
1071 print(f"\nVoxel data extracted to the following relative directory:'{new_path}'")