Coverage for src/recon3d/hdf_io.py: 81%
205 statements
« prev ^ index » next coverage.py v7.10.7, created at 2025-09-23 14:20 +0000
« prev ^ index » next coverage.py v7.10.7, created at 2025-09-23 14:20 +0000
1"""
2HDF5 Processing Module
3=======================
5This module provides a set of functions for creating, modifying, and interacting with HDF5 files,
6as well as converting between image stacks and voxel data. It includes command line interfaces
7for some of the key functionalities.
9Functions
10---------
11create_hdf(hdf_path, base_container)
12 Prescribe foundational structure of the HDF5 file.
14modify_hdf_dataset(hdf_path, dataset_loc, data, dtype, operation, mutable)
15 Modify an existing HDF5 file to alter data within a container.
17write_attr_dict(hdf_path, d, dataset_loc)
18 Write attributes to a specified dataset in the HDF5 file.
20write_h5(hdf_path, data)
21 Write data to an HDF5 file.
23add_to_h5(data, hdf_path, hdf_group)
24 Add data to an HDF5 file based on its type.
26image_to_hdf(yml_path)
27 Populate the HDF5 file with the semantic segmentation image stack
28 specified in the YAML file, including metadata.
30image_to_hdf_command_line()
31 The command line wrapper for the `image_to_hdf` function.
33voxel_to_image(yml_path)
34 Save the image data within the HDF5 file as TIFFs in a new directory.
36voxel_to_image_command_line()
37 The command line wrapper for the `voxel_to_image` function.
39Examples
40--------
41To create an HDF5 file with a base container:
43 >>> create_hdf(Path("output.h5"), "base_group")
45To modify an HDF5 dataset:
47 >>> data = np.random.rand(10, 5)
48 >>> modify_hdf_dataset(Path("output.h5"), "group/dataset", data, np.float64, "create", True)
50To add data to an HDF5 file based on its type:
52 >>> centroids = Centroids(...)
53 >>> add_to_h5(centroids, Path("output.h5"), "group_name")
55To populate an HDF5 file with image stack data from a YAML file:
57 >>> yml_path = Path("config.yml")
58 >>> hdf5_path = image_to_hdf(yml_path)
60To convert voxel data in an HDF5 file to image files:
62 >>> yml_path = Path("config.yml")
63 >>> image_dir = voxel_to_image(yml_path)
65To run the `image_to_hdf` function from the command line:
67 $ python -m image_to_hdf input_file.yml
69To run the `hdf_to_image` function from the command line:
71 $ python -m hdf_to_image input_file.yml
72"""
74import argparse
75from pathlib import Path
76import numpy as np
77import h5py
78from functools import singledispatch
80# from recon3d.feature_analysis import SemanticImageStack
81# from recon3d.types import *
82from recon3d.types import (
83 BestFitEllipsoids,
84 Centroids,
85 EllipsoidSurfaceAreas,
86 EllipsoidVolumes,
87 InstanceImageStack,
88 InstanceIndices,
89 InstanceProperties,
90 NthNearestNeighbors,
91 SemanticImageStack,
92)
93import recon3d.utility as ut
94import recon3d.instance_analysis as ia
95import recon3d.types as cs
98### BASE/HELPER FUNCTIONS ###
99def create_hdf(hdf_path: Path, base_container: str) -> bool:
100 """
101 Prescribe foundational structure of the HDF5 file.
103 This function creates an HDF5 file at the specified path and initializes
104 it with a base container group.
106 Parameters
107 ----------
108 hdf_path : Path
109 The path to the location of the HDF5 file.
110 base_container : str
111 The name of the base container group in the HDF5 file.
113 Returns
114 -------
115 bool
116 True if the HDF5 file is created successfully, False otherwise.
118 Examples
119 --------
120 >>> create_hdf(Path("output.h5"), "base_group")
121 True
122 """
124 with h5py.File(hdf_path, "w") as file:
125 file.create_group(base_container)
128def modify_hdf_dataset(
129 hdf_path: Path,
130 dataset_loc: str,
131 data: np.ndarray,
132 dtype: type,
133 operation: str,
134 mutable: bool,
135) -> bool:
136 """
137 Modify an existing HDF5 file to alter data within a container.
139 This function modifies an HDF5 file by creating, appending, or overwriting
140 a dataset at the specified location.
142 Parameters
143 ----------
144 hdf_path : Path
145 The path to the location of the HDF5 file.
146 dataset_loc : str
147 The internal path to the dataset in the HDF5 file
148 (e.g., "file/container/container2/dataset").
149 data : np.ndarray
150 The array to be written to the HDF5 file at the specified dataset
151 location.
152 dtype : type
153 The data type of the dataset (e.g., np.float64, np.int, np.uint16,
154 for string: h5py.special_dtype(vlen=str)).
155 operation : str
156 The operation to perform on the dataset:
157 - "create": create a new dataset
158 - "append": append to an existing dataset along dimension 0
159 - "overwrite": overwrite an existing dataset along dimension 0
160 (e.g., shrinking dataset)
161 mutable : bool
162 If True, the initial shape is zero in the first dimension, with no
163 size limit.
165 Returns
166 -------
167 bool
168 True if the operation is successful, False otherwise.
170 Raises
171 ------
172 ValueError
173 If the dataset already exists when trying to create a new one.
174 KeyError
175 If an unsupported operation is requested.
177 Examples
178 --------
179 >>> data = np.random.rand(10, 5)
180 >>> modify_hdf_dataset(Path("output.h5"), "group/dataset", data, np.float64, "create", True)
181 True
183 >>> new_data = np.random.rand(5, 5)
184 >>> modify_hdf_dataset(Path("output.h5"), "group/dataset", new_data, np.float64, "append", True)
185 True
187 >>> overwrite_data = np.random.rand(8, 5)
188 >>> modify_hdf_dataset(Path("output.h5"), "group/dataset", overwrite_data, np.float64, "overwrite", True)
189 True
190 """
192 # initial shape is zero in first dimension, with no size limit
193 shape, maxshape = list(data.shape), list(data.shape)
194 if mutable:
195 # shape[0], maxshape[0] = 0, None
196 maxshape[0] = None
197 shape, maxshape = tuple(shape), tuple(maxshape)
199 with h5py.File(hdf_path, "r+") as hdf_file:
200 # with h5py.File(hdf_path, "r+", locking=False) as hdf_file:
201 if operation == "create":
202 # if not dataset_loc in hdf_file:
203 if dataset_loc not in hdf_file:
204 dataset = hdf_file.create_dataset(
205 name=dataset_loc,
206 data=data,
207 shape=shape,
208 dtype=dtype,
209 # compression="gzip", #TODO 4/2/24 compression not allowing read in HDF_View
210 maxshape=maxshape,
211 chunks=True,
212 )
213 return True
214 else:
215 error_string = f"Dataset already exists, cannot {operation}"
216 raise ValueError(error_string)
218 dataset = hdf_file[dataset_loc]
220 if operation == "append":
221 # appending to dataset
222 dataset.resize(dataset.shape[0] + data.shape[0], axis=0)
223 dataset[-data.shape[0] :] = data
224 return True
226 elif operation == "overwrite":
227 # shrinking dataset
228 dataset.resize(data.shape[0], axis=0)
229 dataset[:] = data
230 return True
232 else:
233 raise KeyError(
234 f'operation of "{operation}" requested, only, {"create"}, {"expand"}, and {"overwrite"} currently supported.'
235 )
238def write_attr_dict(hdf_path: Path, d: dict, dataset_loc: str) -> bool:
239 """Write attributes to group or dataset in the h5 file
240 from the provided dictionary
242 hdf_path : path object to location of hdf file
243 d: dict of attributes to add
244 dataset_loc: internal path to the dataset in the hdf file
245 (e.g. "file/container/container2/dataset")
247 """
249 with h5py.File(hdf_path, "r+") as hdf_file:
250 loc = hdf_file[dataset_loc]
251 # print(d, type(d))
252 for key, value in d.items():
253 # print(key, value)
254 if isinstance(value, Path):
255 value = str(value)
256 dt = h5py.special_dtype(vlen=str)
258 loc.attrs.create(name=key, data=value, dtype=dt)
260 return
263### INITIAL DATA CREATION ###
264def write_h5(hdf_path: Path, semantic_stack: SemanticImageStack) -> bool:
265 """Write new h5 file
266 the data will be written into a dataset
267 the metadata will be written as attributes of the dataset
269 hdf_path : path object to location of hdf file
270 semantic_stack: the semantic image stack
272 """
274 base_container = "VoxelData"
275 create_hdf(hdf_path=hdf_path, base_container=base_container)
277 img_data = semantic_stack.data
278 dtype = type(
279 img_data.flat[0]
280 ) # data.item(0) will return python dtype, which may be better
281 dataset_loc = f"{base_container}/{semantic_stack.name}"
282 modify_hdf_dataset(
283 hdf_path=hdf_path,
284 dataset_loc=dataset_loc,
285 data=img_data,
286 dtype=dtype,
287 operation="create",
288 mutable=False,
289 )
291 # TODO 4/2/2024 write metadata as attributes
292 md = semantic_stack.metadata
293 # TODO 5/7/2024 make metadata strings add to h5
294 metadata_dict = ut.metadata_to_dict(md)
295 # d = {"height": }
296 write_attr_dict(hdf_path=hdf_path, d=metadata_dict, dataset_loc=dataset_loc)
299### ALL OTHER FUNCTIONALITIES ###
300@singledispatch
301def add_to_h5(
302 data,
303 h5_path: Path,
304 h5_group: str, # internal "folder" in the hdf to save data
305):
306 """
307 Add data to an HDF5 file.
309 This is a generic function that adds data to an HDF5 file. Specific
310 implementations are provided for different types of data.
312 Parameters
313 ----------
314 data : object
315 The data to be added to the HDF5 file.
316 h5_path : Path
317 The path to the HDF5 file.
318 h5_group : str
319 The internal "folder" in the HDF5 file to save data.
321 Returns
322 -------
323 None
325 Raises
326 ------
327 NotImplementedError
328 If no handler is registered for the type of `data`.
330 Examples
331 --------
332 >>> add_to_h5(some_data, Path("output.h5"), "group_name")
333 NotImplementedError: No handler for type <class 'type'>
334 """
335 _ = data
336 __ = h5_path
337 ___ = h5_group
338 raise NotImplementedError(f"No handler for type {type(data)}")
341@add_to_h5.register(Centroids)
342def _(
343 data,
344 h5_path: Path,
345 h5_group: str,
346):
347 """
348 Add centroid data to an HDF5 file.
350 This function processes and adds centroid data to an HDF5 file.
352 Parameters
353 ----------
354 data : Centroids
355 The centroid data to be added.
356 h5_path : Path
357 The path to the HDF5 file.
358 h5_group : str
359 The internal "folder" in the HDF5 file to save data.
361 Returns
362 -------
363 None
365 Examples
366 --------
367 >>> centroids = Centroids(...)
368 >>> add_to_h5(centroids, Path("output.h5"), "group_name")
369 """
371 units = data.data[0].cx.unit.value
372 centroid_data = ut.centroids_to_ndarray(centroids=data)
374 # add data
375 dataset_loc = f"{h5_group}/centroids"
376 modify_hdf_dataset(
377 hdf_path=h5_path,
378 dataset_loc=dataset_loc,
379 data=centroid_data,
380 dtype=float,
381 operation="create",
382 mutable=False,
383 )
384 attrs = {
385 "nlabels": str(centroid_data.shape[0]),
386 "units": str(units),
387 "ordering": str("X, Y, Z"),
388 }
389 write_attr_dict(
390 hdf_path=h5_path,
391 d=attrs,
392 dataset_loc=dataset_loc,
393 )
396@add_to_h5.register(BestFitEllipsoids)
397def _(
398 data,
399 h5_path: Path,
400 h5_group: str,
401):
402 """
403 Add best fit ellipsoid data to an HDF5 file.
405 This function processes and adds best fit ellipsoid data to an HDF5 file.
407 Parameters
408 ----------
409 data : BestFitEllipsoids
410 The best fit ellipsoid data to be added.
411 h5_path : Path
412 The path to the HDF5 file.
413 h5_group : str
414 The internal "folder" in the HDF5 file to save data.
416 Returns
417 -------
418 None
420 Examples
421 --------
422 >>> ellipsoids = BestFitEllipsoids(...)
423 >>> add_to_h5(ellipsoids, Path("output.h5"), "group_name")
424 """
426 axis_units = data.data[0].a.length.unit.value
427 axis_lengths, axis_vectors = ut.ellipsoids_to_ndarray(data)
429 # add data
430 dataset_loc = f"{h5_group}/semi-axis_lengths"
431 modify_hdf_dataset(
432 hdf_path=h5_path,
433 dataset_loc=dataset_loc,
434 data=axis_lengths,
435 dtype=float,
436 operation="create",
437 mutable=False,
438 )
439 attrs = {
440 "notes": str("property of best fit ellipsoid"),
441 "nlabels": str(axis_lengths.shape[0]),
442 "units": str(axis_units),
443 "ordering": str("a, b, c, with a > b > c"),
444 }
445 write_attr_dict(
446 hdf_path=h5_path,
447 d=attrs,
448 dataset_loc=dataset_loc,
449 )
451 # add orientation data
452 dataset_loc = f"{h5_group}/axis_vectors"
453 modify_hdf_dataset(
454 hdf_path=h5_path,
455 dataset_loc=dataset_loc,
456 data=axis_vectors,
457 dtype=float,
458 operation="create",
459 mutable=False,
460 )
461 attrs = {
462 "notes": str("property of best fit ellipsoid"),
463 "nlabels": str(axis_lengths.shape[0]),
464 "units": "unit vector",
465 "ordering": str(
466 "u, v, w for each axis a, b, c \n\t(a_u, a_v, a_w, b_u, b_v, b_w, c_u, c_v, c_w)"
467 ),
468 }
469 write_attr_dict(
470 hdf_path=h5_path,
471 d=attrs,
472 dataset_loc=dataset_loc,
473 )
476@add_to_h5.register(EllipsoidSurfaceAreas)
477def _(
478 data,
479 h5_path: Path,
480 h5_group: str,
481):
482 """
483 Add ellipsoid surface area data to an HDF5 file.
485 This function processes and adds ellipsoid surface area data to an
486 HDF5 file.
488 Parameters
489 ----------
490 data : EllipsoidSurfaceAreas
491 The ellipsoid surface area data to be added.
492 h5_path : Path
493 The path to the HDF5 file.
494 h5_group : str
495 The internal "folder" in the HDF5 file to save data.
497 Returns
498 -------
499 None
501 Examples
502 --------
503 >>> surface_areas = EllipsoidSurfaceAreas(...)
504 >>> add_to_h5(surface_areas, Path("output.h5"), "group_name")
505 """
507 area_units = data.data[0].unit_squared.value
508 surface_areas = ut.surface_areas_to_ndarray(data)
510 # add data
511 dataset_loc = f"{h5_group}/ellipsoid_surface_areas"
512 modify_hdf_dataset(
513 hdf_path=h5_path,
514 dataset_loc=dataset_loc,
515 data=surface_areas,
516 dtype=float,
517 operation="create",
518 mutable=False,
519 )
520 attrs = {
521 "notes": str("property of best fit ellipsoid"),
522 "nlabels": str(surface_areas.shape[0]),
523 "units_squared": f"{area_units}",
524 "method": "Knud Thomsen approximation for scalene ellipsoids (2004)",
525 }
526 write_attr_dict(
527 hdf_path=h5_path,
528 d=attrs,
529 dataset_loc=dataset_loc,
530 )
533@add_to_h5.register(EllipsoidVolumes)
534def _(
535 data,
536 h5_path: Path,
537 h5_group: str,
538):
539 """
540 Add ellipsoid volume data to an HDF5 file.
542 This function processes and adds ellipsoid volume data to an HDF5 file.
544 Parameters
545 ----------
546 data : EllipsoidVolumes
547 The ellipsoid volume data to be added.
548 h5_path : Path
549 The path to the HDF5 file.
550 h5_group : str
551 The internal "folder" in the HDF5 file to save data.
553 Returns
554 -------
555 None
557 Examples
558 --------
559 >>> volumes = EllipsoidVolumes(...)
560 >>> add_to_h5(volumes, Path("output.h5"), "group_name")
561 """
562 area_units = data.data[0].unit_cubed.value
563 ellipsoid_volumes = ut.volumes_to_ndarray(data)
565 # add data
566 dataset_loc = f"{h5_group}/ellipsoid_volumes"
567 modify_hdf_dataset(
568 hdf_path=h5_path,
569 dataset_loc=dataset_loc,
570 data=ellipsoid_volumes,
571 dtype=float,
572 operation="create",
573 mutable=False,
574 )
575 attrs = {
576 "notes": str("property of best fit ellipsoid"),
577 "nlabels": str(ellipsoid_volumes.shape[0]),
578 "units_cubed": f"{area_units}",
579 "method": "4/3 * pi * a * b * c",
580 }
581 write_attr_dict(
582 hdf_path=h5_path,
583 d=attrs,
584 dataset_loc=dataset_loc,
585 )
588@add_to_h5.register(InstanceImageStack)
589def _(
590 data,
591 h5_path: Path,
592 h5_group: str,
593):
594 """
595 Add instance image stack data to an HDF5 file.
597 This function processes and adds instance image stack data to an HDF5 file.
599 Parameters
600 ----------
601 data : InstanceImageStack
602 The instance image stack data to be added.
603 h5_path : Path
604 The path to the HDF5 file.
605 h5_group : str
606 The internal "folder" in the HDF5 file to save data.
608 Returns
609 -------
610 None
612 Examples
613 --------
614 >>> image_stack = InstanceImageStack(...)
615 >>> add_to_h5(image_stack, Path("output.h5"), "group_name")
616 """
618 # print(f"data type: {type(data)}")
620 instance_image_stack = data
622 # add data
623 dataset_loc = f"{h5_group}/{instance_image_stack.name}"
624 modify_hdf_dataset(
625 hdf_path=h5_path,
626 dataset_loc=dataset_loc,
627 data=instance_image_stack.data,
628 dtype=type(instance_image_stack.data.flat[0]),
629 operation="create",
630 mutable=False,
631 )
633 # add metadata
634 md = instance_image_stack.metadata
635 metadata_dict = ut.metadata_to_dict(md)
636 write_attr_dict(
637 hdf_path=h5_path,
638 d=metadata_dict,
639 dataset_loc=dataset_loc,
640 )
642 extra_attrs = {
643 "nlabels": str(instance_image_stack.nlabels),
644 "min_feature_size": str(instance_image_stack.min_feature_size),
645 }
646 write_attr_dict(
647 hdf_path=h5_path,
648 d=extra_attrs,
649 dataset_loc=dataset_loc,
650 )
653@add_to_h5.register(InstanceIndices)
654def _(
655 data,
656 h5_path: Path,
657 h5_group: str,
658):
659 """
660 Add instance indices data to an HDF5 file.
662 This function processes and adds instance indices data to an HDF5 file.
664 Parameters
665 ----------
666 data : InstanceIndices
667 The instance indices data to be added.
668 h5_path : Path
669 The path to the HDF5 file.
670 h5_group : str
671 The internal "folder" in the HDF5 file to save data.
673 Returns
674 -------
675 None
677 Examples
678 --------
679 >>> indices = InstanceIndices(...)
680 >>> add_to_h5(indices, Path("output.h5"), "group_name")
681 """
682 raise NotImplementedError(
683 "Ability to write out instance indices not yet implemented."
684 )
685 # TODO: AP, write out as single variable length data instead of
686 # individual datasets (currently)
687 # https://docs.h5py.org/en/stable/special.html
688 # dt = h5py.vlen_dtype(np.dtype('int32'))
689 instance_indices = data
691 base_container = f"{instance_indices.source_name}_indices"
692 # NumPy doesn’t support ragged arrays, and the ‘arrays of arrays’
693 # h5py uses as a workaround are not as convenient or efficient as
694 # regular NumPy arrays. If you’re deciding how to store data,
695 # consider whether there’s a sensible way to do it without a
696 # variable-length type.
697 ragged_label_indices_array = np.array((len(instance_indices.indices)))
698 for each_label in instance_indices.labels.data:
699 ragged_label_indices_array[each_label] = instance_indices.indices[each_label]
701 dataset_loc = f"{base_container}/label_indices"
702 modify_hdf_dataset(
703 hdf_path=h5_path,
704 dataset_loc=dataset_loc,
705 data=ragged_label_indices_array,
706 dtype=h5py.vlen_dtype(np.dtype("int32")),
707 operation="create",
708 mutable=False,
709 )
710 # # Tree the labels
711 # for (
712 # each_label
713 # ) in (
714 # instance_indices.labels.data
715 # ): # TODO: why does the instance_indices.labels not return the correct type, instead it provides the ndarray...
717 # dataset_loc = f"{base_container}/label_{each_label:06d}"
718 # modify_hdf_dataset(
719 # hdf_path=h5_path,
720 # dataset_loc=dataset_loc,
721 # data=instance_indices.indices[each_label],
722 # dtype=type(instance_indices.indices[each_label].flat[0]),
723 # operation="create",
724 # mutable=False,
725 # )
728@add_to_h5.register(InstanceProperties)
729def _(
730 data,
731 h5_path: Path,
732 h5_group: str,
733):
734 """
735 Add instance properties data to an HDF5 file.
737 This function processes and adds instance properties data to an HDF5 file.
739 Parameters
740 ----------
741 data : InstanceProperties
742 The instance properties data to be added.
743 h5_path : Path
744 The path to the HDF5 file.
745 h5_group : str
746 The internal "folder" in the HDF5 file to save data.
748 Returns
749 -------
750 None
752 Examples
753 --------
754 >>> properties = InstanceProperties(...)
755 >>> add_to_h5(properties, Path("output.h5"), "group_name")
756 """
758 # centroids
759 add_to_h5(data.centroids, h5_path=h5_path, h5_group=h5_group)
761 # ellipsoids
762 add_to_h5(data.ellipsoids, h5_path=h5_path, h5_group=h5_group)
764 # surface area
765 add_to_h5(data.surface_areas, h5_path=h5_path, h5_group=h5_group)
767 add_to_h5(data.volumes, h5_path=h5_path, h5_group=h5_group)
769 # equiv spherical diameter
770 eq_diam = data.equivalent_sphere_diameters
771 diam = [i.value for i in eq_diam]
772 diam_data = np.array(diam, dtype=float).T
773 diam_units = eq_diam[0].unit.value
775 dataset_loc = f"{h5_group}/equivalent_sphere_diameters"
776 modify_hdf_dataset(
777 hdf_path=h5_path,
778 dataset_loc=dataset_loc,
779 data=diam_data,
780 dtype=float,
781 operation="create",
782 mutable=False,
783 )
784 attrs = {
785 "notes": str("from volume detemined by voxel count and resolution"),
786 "nlabels": str(diam_data.shape[0]),
787 "units": str(diam_units),
788 }
789 write_attr_dict(
790 hdf_path=h5_path,
791 d=attrs,
792 dataset_loc=dataset_loc,
793 )
795 # num voxels
796 num_voxel = data.n_voxels
797 n_voxels = [i.value for i in num_voxel]
798 n_vox_data = np.array(n_voxels, dtype=int).T
799 n_vox_units = num_voxel[0].unit.value
801 dataset_loc = f"{h5_group}/num_voxels"
802 modify_hdf_dataset(
803 hdf_path=h5_path,
804 dataset_loc=dataset_loc,
805 data=n_vox_data,
806 dtype=int,
807 operation="create",
808 mutable=False,
809 )
810 attrs = {
811 "nlabels": str(diam_data.shape[0]),
812 "units": str(n_vox_units),
813 }
814 write_attr_dict(
815 hdf_path=h5_path,
816 d=attrs,
817 dataset_loc=dataset_loc,
818 )
821@add_to_h5.register(NthNearestNeighbors)
822def _(
823 data,
824 h5_path: Path,
825 h5_group: str,
826):
827 """
828 Add nth nearest neighbor data to an HDF5 file.
830 This function processes and adds nth nearest neighbor data to an HDF5 file.
832 Parameters
833 ----------
834 data : NthNearestNeighbors
835 The nth nearest neighbor data to be added.
836 h5_path : Path
837 The path to the HDF5 file.
838 h5_group : str
839 The internal "folder" in the HDF5 file to save data.
841 Returns
842 -------
843 None
845 Examples
846 --------
847 >>> neighbors = NthNearestNeighbors(...)
848 >>> add_to_h5(neighbors, Path("output.h5"), "group_name")
849 """
851 units = data.distances[0].unit.value
853 distance_list = data.distances
854 distances = [i.value for i in distance_list]
855 distance_array = np.array(distances, dtype=float).T
856 neighbor_id = data.instance_id
858 dataset_loc = f"{h5_group}/nearest_neighbor_distances"
859 modify_hdf_dataset(
860 hdf_path=h5_path,
861 dataset_loc=dataset_loc,
862 data=distance_array,
863 dtype=float,
864 operation="create",
865 mutable=False,
866 )
867 attrs = {
868 "nlabels": str(distance_array.shape[0]),
869 "units": str(units),
870 "nth_nearest": f"{data.nth_nearest}: 1st nearest is the feature itself",
871 }
872 write_attr_dict(
873 hdf_path=h5_path,
874 d=attrs,
875 dataset_loc=dataset_loc,
876 )
878 dataset_loc = f"{h5_group}/nearest_neighbor_IDs"
879 modify_hdf_dataset(
880 hdf_path=h5_path,
881 dataset_loc=dataset_loc,
882 data=neighbor_id,
883 dtype=int,
884 operation="create",
885 mutable=False,
886 )
887 attrs = {
888 "nlabels": str(neighbor_id.shape[0]),
889 "nth_nearest": f"{data.nth_nearest}: 1st nearest is the feature itself",
890 }
891 write_attr_dict(
892 hdf_path=h5_path,
893 d=attrs,
894 dataset_loc=dataset_loc,
895 )
898### CONVERT BETWEEN H5 <-> FOLDER OF IMAGES ###
899def image_to_hdf(yml_path: Path) -> Path:
900 """
901 Populate the HDF5 file with the semantic segmentation image stack specified in the YAML file, including metadata.
903 This function reads the YAML file to obtain the necessary parameters, processes the image stack, and writes the resulting data to an HDF5 file.
905 Parameters
906 ----------
907 yml_path : Path
908 The path to the YAML file containing the configuration and parameters.
910 Returns
911 -------
912 Path
913 The path to the created HDF5 file.
915 Raises
916 ------
917 ValueError
918 If the "cli_entry_points" key in the YAML file does not contain "image_to_hdf".
920 Examples
921 --------
922 >>> yml_path = Path("config.yml")
923 >>> hdf5_path = image_to_hdf(yml_path)
924 Wrote output file: /path/to/output.h5
925 >>> print(hdf5_path)
926 /path/to/output.h5
927 """
929 # Import images and metadata into sematic_stack
930 yml_vals = ut.yaml_to_dict(yml_path)
932 # check cli_entry_points is valid
933 if "image_to_hdf" not in yml_vals["cli_entry_points"]:
934 raise ValueError(
935 f"""Error. Incorrect yml format.
936 This function requires the "cli_entry_points" key to contain "image_to_hdf",
937 but currently contains the following options: {yml_vals["cli_entry_points"]} """
938 )
939 semantic_stack_save = ia.process_image_stack(yml_path)
941 # Write semantic_stack to hdf
942 h5_name = yml_vals["h5_filename"] + ".h5"
944 # path_file_output = Path(yml_vals["out_dir"]).joinpath(h5_name)
945 path_file_output = Path(yml_vals["out_dir"]).expanduser().joinpath(h5_name)
946 write_h5(path_file_output, semantic_stack_save)
948 print(f"Wrote output file: {path_file_output}")
950 return path_file_output
953def image_to_hdf_command_line():
954 """
955 The command line wrapper for the `image_to_hdf` function.
957 This function sets up the command line argument parser, parses the input arguments,
958 and calls the `image_to_hdf` function with the provided YAML input file.
960 Parameters
961 ----------
962 None
964 Returns
965 -------
966 None
968 Examples
969 --------
970 To run this function from the command line:
972 $ python -m image_to_hdf_command_line input_file.yml
973 """
974 parser = argparse.ArgumentParser()
975 parser.add_argument("input_file", help="the .yml user input file")
976 args = parser.parse_args()
977 input_file = args.input_file
979 image_to_hdf(yml_path=input_file)
982# def voxel_to_image(yml_path: Path) -> Path: # deprecated
983def hdf_to_image(yml_path: Path) -> Path:
984 """
985 Save the image data within the HDF5 file as TIFFs in a new directory.
987 This function reads the YAML file to obtain the necessary parameters,
988 extracts the image data from the HDF5 file,
989 and saves the images as TIFF files in a specified directory.
991 Parameters
992 ----------
993 yml_path : Path
994 The path to the YAML file containing the configuration and parameters.
996 Returns
997 -------
998 Path
999 The path to the directory containing the saved images.
1001 Raises
1002 ------
1003 ValueError
1004 If the "cli_entry_points" key in the YAML file does not contain "hdf_to_image".
1005 If the specified slicing direction is not valid.
1007 Examples
1008 --------
1009 >>> yml_path = Path("config.yml")
1010 >>> image_dir = hdf_to_image(yml_path)
1011 >>> print(image_dir)
1012 /path/to/output/images
1013 """
1015 yml_vals = ut.yaml_to_dict(yml_path)
1017 # check cli_entry_points is valid
1018 if "hdf_to_image" not in yml_vals["cli_entry_points"]:
1019 raise ValueError(
1020 f"""Error. Incorrect yml format.
1021 This function requires the "cli_entry_points" key to contain "hdf_to_image",
1022 but currently contains the following options: {yml_vals["cli_entry_points"]} """
1023 )
1025 hdf_path = Path(yml_vals["hdf_data_path"]).expanduser()
1026 # TODO add alternative to ingest npy data dir
1028 hdf_dataset_location = yml_vals["voxel_data_location"]
1029 output_image_dir = Path(yml_vals["image_parent_dir"]).expanduser()
1030 output_image_type = yml_vals["image_output_type"]
1032 slice_normal = yml_vals["image_slice_normal"]
1033 valid_slice_normal = set(item.name for item in cs.CartesianAxis3D)
1034 if slice_normal not in valid_slice_normal:
1035 raise ValueError(
1036 f"Error, '{slice_normal}' is not a valid slicing direction, accepted units are: {valid_slice_normal}"
1037 )
1038 slice_axis = cs.CartesianAxis3D[slice_normal]
1040 with h5py.File(hdf_path, "r") as f:
1041 data = np.squeeze(f[hdf_dataset_location][:])
1042 ut.ndarray_to_img(
1043 data=data,
1044 parent_dir=output_image_dir,
1045 folder_name=Path(hdf_dataset_location).stem,
1046 file_type=output_image_type,
1047 slice_axis=slice_axis,
1048 )
1050 return output_image_dir.joinpath(hdf_path.stem)
1053# def voxel_to_image_command_line(): # deprecated
1054def hdf_to_image_command_line():
1055 """
1056 The command line wrapper for the `hdf_to_image` function.
1058 This function sets up the command line argument parser, parses the input
1059 arguments, and calls the `hdf_to_image` function with the provided YAML
1060 input file.
1062 Parameters
1063 ----------
1064 None
1066 Returns
1067 -------
1068 None
1070 Examples
1071 --------
1072 To run this function from the command line:
1074 $ python -m hdf_to_image_command_line input_file.yml
1075 """
1076 parser = argparse.ArgumentParser()
1077 parser.add_argument("input_file", help="the .yml user input file")
1078 args = parser.parse_args()
1079 input_file = args.input_file
1081 new_path = hdf_to_image(yml_path=input_file)
1083 print(f"\nVoxel data extracted to the following relative directory:'{new_path}'")
1086def hdf_to_npy(yml_path: Path) -> Path:
1087 """
1088 Save the image data within the HDF5 file a NumPy .npy file.
1090 This function reads the YAML file to obtain the necessary parameters,
1091 extracts the image data from the HDF5 file,
1092 and saves the images as .npy file in a specified directory.
1094 Parameters
1095 ----------
1096 yml_path : Path
1097 The path to the YAML file containing the configuration and parameters.
1099 Returns
1100 -------
1101 Path
1102 The path to the npy file.
1104 Raises
1105 ------
1106 ValueError
1107 If the "cli_entry_points" key in the YAML file does not contain "hdf_to_npy".
1108 If the specified slicing direction is not valid.
1110 Examples
1111 --------
1112 >>> yml_path = Path("config.yml")
1113 >>> npy_file = hdf_to_npy(yml_path)
1114 >>> print(npy_file)
1115 /path/to/output/npy_file.npy
1116 """
1118 yml_vals = ut.yaml_to_dict(yml_path)
1120 # check cli_entry_points is valid
1121 if "hdf_to_npy" not in yml_vals["cli_entry_points"]:
1122 raise ValueError(
1123 f"""Error. Incorrect yml format.
1124 This function requires the "cli_entry_points" key to contain "hdf_to_npy",
1125 but currently contains the following options: {yml_vals["cli_entry_points"]} """
1126 )
1128 hdf_path = Path(yml_vals["hdf_data_path"]).expanduser()
1130 hdf_dataset_location = yml_vals["voxel_data_location"]
1131 output_dir = Path(yml_vals["output_dir"]).expanduser()
1132 output_type = yml_vals["output_type"]
1133 output_path = output_dir.joinpath(hdf_path.stem + output_type)
1135 # slice_normal = yml_vals["image_slice_normal"]
1136 # valid_slice_normal = set(item.name for item in cs.CartesianAxis3D)
1137 # if slice_normal not in valid_slice_normal:
1138 # raise ValueError(
1139 # f"Error, '{slice_normal}' is not a valid slicing direction, accepted units are: {valid_slice_normal}"
1140 # )
1141 # slice_axis = cs.CartesianAxis3D[slice_normal]
1143 with h5py.File(hdf_path, "r") as f:
1144 data = np.squeeze(f[hdf_dataset_location][:])
1146 np.save(output_path, data)
1148 # ut.ndarray_to_img(
1149 # data=data,
1150 # parent_dir=output_image_dir,
1151 # folder_name=Path(hdf_dataset_location).stem,
1152 # file_type=output_image_type,
1153 # slice_axis=slice_axis,
1154 # )
1156 return output_path
1159def hdf_to_npy_command_line():
1160 """
1161 The command line wrapper for the `hdf_to_npy` function.
1163 This function sets up the command line argument parser, parses the input
1164 arguments, and calls the `hdf_to_npy` function with the provided YAML
1165 input file.
1167 Parameters
1168 ----------
1169 None
1171 Returns
1172 -------
1173 None
1175 Examples
1176 --------
1177 To run this function from the command line:
1179 $ python -m hdf_to_npy_command_line input_file.yml
1180 """
1181 parser = argparse.ArgumentParser()
1182 parser.add_argument("input_file", help="the .yml user input file")
1183 args = parser.parse_args()
1184 input_file = args.input_file
1186 new_path = hdf_to_npy(yml_path=input_file)
1188 print(f"\nVoxel data extracted to:'{new_path}'")