Coverage for src/recon3d/hdf_io.py: 81%

205 statements  

« prev     ^ index     » next       coverage.py v7.10.7, created at 2025-09-23 14:20 +0000

1""" 

2HDF5 Processing Module 

3======================= 

4 

5This module provides a set of functions for creating, modifying, and interacting with HDF5 files, 

6as well as converting between image stacks and voxel data. It includes command line interfaces 

7for some of the key functionalities. 

8 

9Functions 

10--------- 

11create_hdf(hdf_path, base_container) 

12 Prescribe foundational structure of the HDF5 file. 

13 

14modify_hdf_dataset(hdf_path, dataset_loc, data, dtype, operation, mutable) 

15 Modify an existing HDF5 file to alter data within a container. 

16 

17write_attr_dict(hdf_path, d, dataset_loc) 

18 Write attributes to a specified dataset in the HDF5 file. 

19 

20write_h5(hdf_path, data) 

21 Write data to an HDF5 file. 

22 

23add_to_h5(data, hdf_path, hdf_group) 

24 Add data to an HDF5 file based on its type. 

25 

26image_to_hdf(yml_path) 

27 Populate the HDF5 file with the semantic segmentation image stack 

28 specified in the YAML file, including metadata. 

29 

30image_to_hdf_command_line() 

31 The command line wrapper for the `image_to_hdf` function. 

32 

33voxel_to_image(yml_path) 

34 Save the image data within the HDF5 file as TIFFs in a new directory. 

35 

36voxel_to_image_command_line() 

37 The command line wrapper for the `voxel_to_image` function. 

38 

39Examples 

40-------- 

41To create an HDF5 file with a base container: 

42 

43 >>> create_hdf(Path("output.h5"), "base_group") 

44 

45To modify an HDF5 dataset: 

46 

47 >>> data = np.random.rand(10, 5) 

48 >>> modify_hdf_dataset(Path("output.h5"), "group/dataset", data, np.float64, "create", True) 

49 

50To add data to an HDF5 file based on its type: 

51 

52 >>> centroids = Centroids(...) 

53 >>> add_to_h5(centroids, Path("output.h5"), "group_name") 

54 

55To populate an HDF5 file with image stack data from a YAML file: 

56 

57 >>> yml_path = Path("config.yml") 

58 >>> hdf5_path = image_to_hdf(yml_path) 

59 

60To convert voxel data in an HDF5 file to image files: 

61 

62 >>> yml_path = Path("config.yml") 

63 >>> image_dir = voxel_to_image(yml_path) 

64 

65To run the `image_to_hdf` function from the command line: 

66 

67 $ python -m image_to_hdf input_file.yml 

68 

69To run the `hdf_to_image` function from the command line: 

70 

71 $ python -m hdf_to_image input_file.yml 

72""" 

73 

74import argparse 

75from pathlib import Path 

76import numpy as np 

77import h5py 

78from functools import singledispatch 

79 

80# from recon3d.feature_analysis import SemanticImageStack 

81# from recon3d.types import * 

82from recon3d.types import ( 

83 BestFitEllipsoids, 

84 Centroids, 

85 EllipsoidSurfaceAreas, 

86 EllipsoidVolumes, 

87 InstanceImageStack, 

88 InstanceIndices, 

89 InstanceProperties, 

90 NthNearestNeighbors, 

91 SemanticImageStack, 

92) 

93import recon3d.utility as ut 

94import recon3d.instance_analysis as ia 

95import recon3d.types as cs 

96 

97 

98### BASE/HELPER FUNCTIONS ### 

99def create_hdf(hdf_path: Path, base_container: str) -> bool: 

100 """ 

101 Prescribe foundational structure of the HDF5 file. 

102 

103 This function creates an HDF5 file at the specified path and initializes 

104 it with a base container group. 

105 

106 Parameters 

107 ---------- 

108 hdf_path : Path 

109 The path to the location of the HDF5 file. 

110 base_container : str 

111 The name of the base container group in the HDF5 file. 

112 

113 Returns 

114 ------- 

115 bool 

116 True if the HDF5 file is created successfully, False otherwise. 

117 

118 Examples 

119 -------- 

120 >>> create_hdf(Path("output.h5"), "base_group") 

121 True 

122 """ 

123 

124 with h5py.File(hdf_path, "w") as file: 

125 file.create_group(base_container) 

126 

127 

128def modify_hdf_dataset( 

129 hdf_path: Path, 

130 dataset_loc: str, 

131 data: np.ndarray, 

132 dtype: type, 

133 operation: str, 

134 mutable: bool, 

135) -> bool: 

136 """ 

137 Modify an existing HDF5 file to alter data within a container. 

138 

139 This function modifies an HDF5 file by creating, appending, or overwriting 

140 a dataset at the specified location. 

141 

142 Parameters 

143 ---------- 

144 hdf_path : Path 

145 The path to the location of the HDF5 file. 

146 dataset_loc : str 

147 The internal path to the dataset in the HDF5 file 

148 (e.g., "file/container/container2/dataset"). 

149 data : np.ndarray 

150 The array to be written to the HDF5 file at the specified dataset 

151 location. 

152 dtype : type 

153 The data type of the dataset (e.g., np.float64, np.int, np.uint16, 

154 for string: h5py.special_dtype(vlen=str)). 

155 operation : str 

156 The operation to perform on the dataset: 

157 - "create": create a new dataset 

158 - "append": append to an existing dataset along dimension 0 

159 - "overwrite": overwrite an existing dataset along dimension 0 

160 (e.g., shrinking dataset) 

161 mutable : bool 

162 If True, the initial shape is zero in the first dimension, with no 

163 size limit. 

164 

165 Returns 

166 ------- 

167 bool 

168 True if the operation is successful, False otherwise. 

169 

170 Raises 

171 ------ 

172 ValueError 

173 If the dataset already exists when trying to create a new one. 

174 KeyError 

175 If an unsupported operation is requested. 

176 

177 Examples 

178 -------- 

179 >>> data = np.random.rand(10, 5) 

180 >>> modify_hdf_dataset(Path("output.h5"), "group/dataset", data, np.float64, "create", True) 

181 True 

182 

183 >>> new_data = np.random.rand(5, 5) 

184 >>> modify_hdf_dataset(Path("output.h5"), "group/dataset", new_data, np.float64, "append", True) 

185 True 

186 

187 >>> overwrite_data = np.random.rand(8, 5) 

188 >>> modify_hdf_dataset(Path("output.h5"), "group/dataset", overwrite_data, np.float64, "overwrite", True) 

189 True 

190 """ 

191 

192 # initial shape is zero in first dimension, with no size limit 

193 shape, maxshape = list(data.shape), list(data.shape) 

194 if mutable: 

195 # shape[0], maxshape[0] = 0, None 

196 maxshape[0] = None 

197 shape, maxshape = tuple(shape), tuple(maxshape) 

198 

199 with h5py.File(hdf_path, "r+") as hdf_file: 

200 # with h5py.File(hdf_path, "r+", locking=False) as hdf_file: 

201 if operation == "create": 

202 # if not dataset_loc in hdf_file: 

203 if dataset_loc not in hdf_file: 

204 dataset = hdf_file.create_dataset( 

205 name=dataset_loc, 

206 data=data, 

207 shape=shape, 

208 dtype=dtype, 

209 # compression="gzip", #TODO 4/2/24 compression not allowing read in HDF_View 

210 maxshape=maxshape, 

211 chunks=True, 

212 ) 

213 return True 

214 else: 

215 error_string = f"Dataset already exists, cannot {operation}" 

216 raise ValueError(error_string) 

217 

218 dataset = hdf_file[dataset_loc] 

219 

220 if operation == "append": 

221 # appending to dataset 

222 dataset.resize(dataset.shape[0] + data.shape[0], axis=0) 

223 dataset[-data.shape[0] :] = data 

224 return True 

225 

226 elif operation == "overwrite": 

227 # shrinking dataset 

228 dataset.resize(data.shape[0], axis=0) 

229 dataset[:] = data 

230 return True 

231 

232 else: 

233 raise KeyError( 

234 f'operation of "{operation}" requested, only, {"create"}, {"expand"}, and {"overwrite"} currently supported.' 

235 ) 

236 

237 

238def write_attr_dict(hdf_path: Path, d: dict, dataset_loc: str) -> bool: 

239 """Write attributes to group or dataset in the h5 file 

240 from the provided dictionary 

241 

242 hdf_path : path object to location of hdf file 

243 d: dict of attributes to add 

244 dataset_loc: internal path to the dataset in the hdf file 

245 (e.g. "file/container/container2/dataset") 

246 

247 """ 

248 

249 with h5py.File(hdf_path, "r+") as hdf_file: 

250 loc = hdf_file[dataset_loc] 

251 # print(d, type(d)) 

252 for key, value in d.items(): 

253 # print(key, value) 

254 if isinstance(value, Path): 

255 value = str(value) 

256 dt = h5py.special_dtype(vlen=str) 

257 

258 loc.attrs.create(name=key, data=value, dtype=dt) 

259 

260 return 

261 

262 

263### INITIAL DATA CREATION ### 

264def write_h5(hdf_path: Path, semantic_stack: SemanticImageStack) -> bool: 

265 """Write new h5 file 

266 the data will be written into a dataset 

267 the metadata will be written as attributes of the dataset 

268 

269 hdf_path : path object to location of hdf file 

270 semantic_stack: the semantic image stack 

271 

272 """ 

273 

274 base_container = "VoxelData" 

275 create_hdf(hdf_path=hdf_path, base_container=base_container) 

276 

277 img_data = semantic_stack.data 

278 dtype = type( 

279 img_data.flat[0] 

280 ) # data.item(0) will return python dtype, which may be better 

281 dataset_loc = f"{base_container}/{semantic_stack.name}" 

282 modify_hdf_dataset( 

283 hdf_path=hdf_path, 

284 dataset_loc=dataset_loc, 

285 data=img_data, 

286 dtype=dtype, 

287 operation="create", 

288 mutable=False, 

289 ) 

290 

291 # TODO 4/2/2024 write metadata as attributes 

292 md = semantic_stack.metadata 

293 # TODO 5/7/2024 make metadata strings add to h5 

294 metadata_dict = ut.metadata_to_dict(md) 

295 # d = {"height": } 

296 write_attr_dict(hdf_path=hdf_path, d=metadata_dict, dataset_loc=dataset_loc) 

297 

298 

299### ALL OTHER FUNCTIONALITIES ### 

300@singledispatch 

301def add_to_h5( 

302 data, 

303 h5_path: Path, 

304 h5_group: str, # internal "folder" in the hdf to save data 

305): 

306 """ 

307 Add data to an HDF5 file. 

308 

309 This is a generic function that adds data to an HDF5 file. Specific 

310 implementations are provided for different types of data. 

311 

312 Parameters 

313 ---------- 

314 data : object 

315 The data to be added to the HDF5 file. 

316 h5_path : Path 

317 The path to the HDF5 file. 

318 h5_group : str 

319 The internal "folder" in the HDF5 file to save data. 

320 

321 Returns 

322 ------- 

323 None 

324 

325 Raises 

326 ------ 

327 NotImplementedError 

328 If no handler is registered for the type of `data`. 

329 

330 Examples 

331 -------- 

332 >>> add_to_h5(some_data, Path("output.h5"), "group_name") 

333 NotImplementedError: No handler for type <class 'type'> 

334 """ 

335 _ = data 

336 __ = h5_path 

337 ___ = h5_group 

338 raise NotImplementedError(f"No handler for type {type(data)}") 

339 

340 

341@add_to_h5.register(Centroids) 

342def _( 

343 data, 

344 h5_path: Path, 

345 h5_group: str, 

346): 

347 """ 

348 Add centroid data to an HDF5 file. 

349 

350 This function processes and adds centroid data to an HDF5 file. 

351 

352 Parameters 

353 ---------- 

354 data : Centroids 

355 The centroid data to be added. 

356 h5_path : Path 

357 The path to the HDF5 file. 

358 h5_group : str 

359 The internal "folder" in the HDF5 file to save data. 

360 

361 Returns 

362 ------- 

363 None 

364 

365 Examples 

366 -------- 

367 >>> centroids = Centroids(...) 

368 >>> add_to_h5(centroids, Path("output.h5"), "group_name") 

369 """ 

370 

371 units = data.data[0].cx.unit.value 

372 centroid_data = ut.centroids_to_ndarray(centroids=data) 

373 

374 # add data 

375 dataset_loc = f"{h5_group}/centroids" 

376 modify_hdf_dataset( 

377 hdf_path=h5_path, 

378 dataset_loc=dataset_loc, 

379 data=centroid_data, 

380 dtype=float, 

381 operation="create", 

382 mutable=False, 

383 ) 

384 attrs = { 

385 "nlabels": str(centroid_data.shape[0]), 

386 "units": str(units), 

387 "ordering": str("X, Y, Z"), 

388 } 

389 write_attr_dict( 

390 hdf_path=h5_path, 

391 d=attrs, 

392 dataset_loc=dataset_loc, 

393 ) 

394 

395 

396@add_to_h5.register(BestFitEllipsoids) 

397def _( 

398 data, 

399 h5_path: Path, 

400 h5_group: str, 

401): 

402 """ 

403 Add best fit ellipsoid data to an HDF5 file. 

404 

405 This function processes and adds best fit ellipsoid data to an HDF5 file. 

406 

407 Parameters 

408 ---------- 

409 data : BestFitEllipsoids 

410 The best fit ellipsoid data to be added. 

411 h5_path : Path 

412 The path to the HDF5 file. 

413 h5_group : str 

414 The internal "folder" in the HDF5 file to save data. 

415 

416 Returns 

417 ------- 

418 None 

419 

420 Examples 

421 -------- 

422 >>> ellipsoids = BestFitEllipsoids(...) 

423 >>> add_to_h5(ellipsoids, Path("output.h5"), "group_name") 

424 """ 

425 

426 axis_units = data.data[0].a.length.unit.value 

427 axis_lengths, axis_vectors = ut.ellipsoids_to_ndarray(data) 

428 

429 # add data 

430 dataset_loc = f"{h5_group}/semi-axis_lengths" 

431 modify_hdf_dataset( 

432 hdf_path=h5_path, 

433 dataset_loc=dataset_loc, 

434 data=axis_lengths, 

435 dtype=float, 

436 operation="create", 

437 mutable=False, 

438 ) 

439 attrs = { 

440 "notes": str("property of best fit ellipsoid"), 

441 "nlabels": str(axis_lengths.shape[0]), 

442 "units": str(axis_units), 

443 "ordering": str("a, b, c, with a > b > c"), 

444 } 

445 write_attr_dict( 

446 hdf_path=h5_path, 

447 d=attrs, 

448 dataset_loc=dataset_loc, 

449 ) 

450 

451 # add orientation data 

452 dataset_loc = f"{h5_group}/axis_vectors" 

453 modify_hdf_dataset( 

454 hdf_path=h5_path, 

455 dataset_loc=dataset_loc, 

456 data=axis_vectors, 

457 dtype=float, 

458 operation="create", 

459 mutable=False, 

460 ) 

461 attrs = { 

462 "notes": str("property of best fit ellipsoid"), 

463 "nlabels": str(axis_lengths.shape[0]), 

464 "units": "unit vector", 

465 "ordering": str( 

466 "u, v, w for each axis a, b, c \n\t(a_u, a_v, a_w, b_u, b_v, b_w, c_u, c_v, c_w)" 

467 ), 

468 } 

469 write_attr_dict( 

470 hdf_path=h5_path, 

471 d=attrs, 

472 dataset_loc=dataset_loc, 

473 ) 

474 

475 

476@add_to_h5.register(EllipsoidSurfaceAreas) 

477def _( 

478 data, 

479 h5_path: Path, 

480 h5_group: str, 

481): 

482 """ 

483 Add ellipsoid surface area data to an HDF5 file. 

484 

485 This function processes and adds ellipsoid surface area data to an 

486 HDF5 file. 

487 

488 Parameters 

489 ---------- 

490 data : EllipsoidSurfaceAreas 

491 The ellipsoid surface area data to be added. 

492 h5_path : Path 

493 The path to the HDF5 file. 

494 h5_group : str 

495 The internal "folder" in the HDF5 file to save data. 

496 

497 Returns 

498 ------- 

499 None 

500 

501 Examples 

502 -------- 

503 >>> surface_areas = EllipsoidSurfaceAreas(...) 

504 >>> add_to_h5(surface_areas, Path("output.h5"), "group_name") 

505 """ 

506 

507 area_units = data.data[0].unit_squared.value 

508 surface_areas = ut.surface_areas_to_ndarray(data) 

509 

510 # add data 

511 dataset_loc = f"{h5_group}/ellipsoid_surface_areas" 

512 modify_hdf_dataset( 

513 hdf_path=h5_path, 

514 dataset_loc=dataset_loc, 

515 data=surface_areas, 

516 dtype=float, 

517 operation="create", 

518 mutable=False, 

519 ) 

520 attrs = { 

521 "notes": str("property of best fit ellipsoid"), 

522 "nlabels": str(surface_areas.shape[0]), 

523 "units_squared": f"{area_units}", 

524 "method": "Knud Thomsen approximation for scalene ellipsoids (2004)", 

525 } 

526 write_attr_dict( 

527 hdf_path=h5_path, 

528 d=attrs, 

529 dataset_loc=dataset_loc, 

530 ) 

531 

532 

533@add_to_h5.register(EllipsoidVolumes) 

534def _( 

535 data, 

536 h5_path: Path, 

537 h5_group: str, 

538): 

539 """ 

540 Add ellipsoid volume data to an HDF5 file. 

541 

542 This function processes and adds ellipsoid volume data to an HDF5 file. 

543 

544 Parameters 

545 ---------- 

546 data : EllipsoidVolumes 

547 The ellipsoid volume data to be added. 

548 h5_path : Path 

549 The path to the HDF5 file. 

550 h5_group : str 

551 The internal "folder" in the HDF5 file to save data. 

552 

553 Returns 

554 ------- 

555 None 

556 

557 Examples 

558 -------- 

559 >>> volumes = EllipsoidVolumes(...) 

560 >>> add_to_h5(volumes, Path("output.h5"), "group_name") 

561 """ 

562 area_units = data.data[0].unit_cubed.value 

563 ellipsoid_volumes = ut.volumes_to_ndarray(data) 

564 

565 # add data 

566 dataset_loc = f"{h5_group}/ellipsoid_volumes" 

567 modify_hdf_dataset( 

568 hdf_path=h5_path, 

569 dataset_loc=dataset_loc, 

570 data=ellipsoid_volumes, 

571 dtype=float, 

572 operation="create", 

573 mutable=False, 

574 ) 

575 attrs = { 

576 "notes": str("property of best fit ellipsoid"), 

577 "nlabels": str(ellipsoid_volumes.shape[0]), 

578 "units_cubed": f"{area_units}", 

579 "method": "4/3 * pi * a * b * c", 

580 } 

581 write_attr_dict( 

582 hdf_path=h5_path, 

583 d=attrs, 

584 dataset_loc=dataset_loc, 

585 ) 

586 

587 

588@add_to_h5.register(InstanceImageStack) 

589def _( 

590 data, 

591 h5_path: Path, 

592 h5_group: str, 

593): 

594 """ 

595 Add instance image stack data to an HDF5 file. 

596 

597 This function processes and adds instance image stack data to an HDF5 file. 

598 

599 Parameters 

600 ---------- 

601 data : InstanceImageStack 

602 The instance image stack data to be added. 

603 h5_path : Path 

604 The path to the HDF5 file. 

605 h5_group : str 

606 The internal "folder" in the HDF5 file to save data. 

607 

608 Returns 

609 ------- 

610 None 

611 

612 Examples 

613 -------- 

614 >>> image_stack = InstanceImageStack(...) 

615 >>> add_to_h5(image_stack, Path("output.h5"), "group_name") 

616 """ 

617 

618 # print(f"data type: {type(data)}") 

619 

620 instance_image_stack = data 

621 

622 # add data 

623 dataset_loc = f"{h5_group}/{instance_image_stack.name}" 

624 modify_hdf_dataset( 

625 hdf_path=h5_path, 

626 dataset_loc=dataset_loc, 

627 data=instance_image_stack.data, 

628 dtype=type(instance_image_stack.data.flat[0]), 

629 operation="create", 

630 mutable=False, 

631 ) 

632 

633 # add metadata 

634 md = instance_image_stack.metadata 

635 metadata_dict = ut.metadata_to_dict(md) 

636 write_attr_dict( 

637 hdf_path=h5_path, 

638 d=metadata_dict, 

639 dataset_loc=dataset_loc, 

640 ) 

641 

642 extra_attrs = { 

643 "nlabels": str(instance_image_stack.nlabels), 

644 "min_feature_size": str(instance_image_stack.min_feature_size), 

645 } 

646 write_attr_dict( 

647 hdf_path=h5_path, 

648 d=extra_attrs, 

649 dataset_loc=dataset_loc, 

650 ) 

651 

652 

653@add_to_h5.register(InstanceIndices) 

654def _( 

655 data, 

656 h5_path: Path, 

657 h5_group: str, 

658): 

659 """ 

660 Add instance indices data to an HDF5 file. 

661 

662 This function processes and adds instance indices data to an HDF5 file. 

663 

664 Parameters 

665 ---------- 

666 data : InstanceIndices 

667 The instance indices data to be added. 

668 h5_path : Path 

669 The path to the HDF5 file. 

670 h5_group : str 

671 The internal "folder" in the HDF5 file to save data. 

672 

673 Returns 

674 ------- 

675 None 

676 

677 Examples 

678 -------- 

679 >>> indices = InstanceIndices(...) 

680 >>> add_to_h5(indices, Path("output.h5"), "group_name") 

681 """ 

682 raise NotImplementedError( 

683 "Ability to write out instance indices not yet implemented." 

684 ) 

685 # TODO: AP, write out as single variable length data instead of 

686 # individual datasets (currently) 

687 # https://docs.h5py.org/en/stable/special.html 

688 # dt = h5py.vlen_dtype(np.dtype('int32')) 

689 instance_indices = data 

690 

691 base_container = f"{instance_indices.source_name}_indices" 

692 # NumPy doesn’t support ragged arrays, and the ‘arrays of arrays’ 

693 # h5py uses as a workaround are not as convenient or efficient as 

694 # regular NumPy arrays. If you’re deciding how to store data, 

695 # consider whether there’s a sensible way to do it without a 

696 # variable-length type. 

697 ragged_label_indices_array = np.array((len(instance_indices.indices))) 

698 for each_label in instance_indices.labels.data: 

699 ragged_label_indices_array[each_label] = instance_indices.indices[each_label] 

700 

701 dataset_loc = f"{base_container}/label_indices" 

702 modify_hdf_dataset( 

703 hdf_path=h5_path, 

704 dataset_loc=dataset_loc, 

705 data=ragged_label_indices_array, 

706 dtype=h5py.vlen_dtype(np.dtype("int32")), 

707 operation="create", 

708 mutable=False, 

709 ) 

710 # # Tree the labels 

711 # for ( 

712 # each_label 

713 # ) in ( 

714 # instance_indices.labels.data 

715 # ): # TODO: why does the instance_indices.labels not return the correct type, instead it provides the ndarray... 

716 

717 # dataset_loc = f"{base_container}/label_{each_label:06d}" 

718 # modify_hdf_dataset( 

719 # hdf_path=h5_path, 

720 # dataset_loc=dataset_loc, 

721 # data=instance_indices.indices[each_label], 

722 # dtype=type(instance_indices.indices[each_label].flat[0]), 

723 # operation="create", 

724 # mutable=False, 

725 # ) 

726 

727 

728@add_to_h5.register(InstanceProperties) 

729def _( 

730 data, 

731 h5_path: Path, 

732 h5_group: str, 

733): 

734 """ 

735 Add instance properties data to an HDF5 file. 

736 

737 This function processes and adds instance properties data to an HDF5 file. 

738 

739 Parameters 

740 ---------- 

741 data : InstanceProperties 

742 The instance properties data to be added. 

743 h5_path : Path 

744 The path to the HDF5 file. 

745 h5_group : str 

746 The internal "folder" in the HDF5 file to save data. 

747 

748 Returns 

749 ------- 

750 None 

751 

752 Examples 

753 -------- 

754 >>> properties = InstanceProperties(...) 

755 >>> add_to_h5(properties, Path("output.h5"), "group_name") 

756 """ 

757 

758 # centroids 

759 add_to_h5(data.centroids, h5_path=h5_path, h5_group=h5_group) 

760 

761 # ellipsoids 

762 add_to_h5(data.ellipsoids, h5_path=h5_path, h5_group=h5_group) 

763 

764 # surface area 

765 add_to_h5(data.surface_areas, h5_path=h5_path, h5_group=h5_group) 

766 

767 add_to_h5(data.volumes, h5_path=h5_path, h5_group=h5_group) 

768 

769 # equiv spherical diameter 

770 eq_diam = data.equivalent_sphere_diameters 

771 diam = [i.value for i in eq_diam] 

772 diam_data = np.array(diam, dtype=float).T 

773 diam_units = eq_diam[0].unit.value 

774 

775 dataset_loc = f"{h5_group}/equivalent_sphere_diameters" 

776 modify_hdf_dataset( 

777 hdf_path=h5_path, 

778 dataset_loc=dataset_loc, 

779 data=diam_data, 

780 dtype=float, 

781 operation="create", 

782 mutable=False, 

783 ) 

784 attrs = { 

785 "notes": str("from volume detemined by voxel count and resolution"), 

786 "nlabels": str(diam_data.shape[0]), 

787 "units": str(diam_units), 

788 } 

789 write_attr_dict( 

790 hdf_path=h5_path, 

791 d=attrs, 

792 dataset_loc=dataset_loc, 

793 ) 

794 

795 # num voxels 

796 num_voxel = data.n_voxels 

797 n_voxels = [i.value for i in num_voxel] 

798 n_vox_data = np.array(n_voxels, dtype=int).T 

799 n_vox_units = num_voxel[0].unit.value 

800 

801 dataset_loc = f"{h5_group}/num_voxels" 

802 modify_hdf_dataset( 

803 hdf_path=h5_path, 

804 dataset_loc=dataset_loc, 

805 data=n_vox_data, 

806 dtype=int, 

807 operation="create", 

808 mutable=False, 

809 ) 

810 attrs = { 

811 "nlabels": str(diam_data.shape[0]), 

812 "units": str(n_vox_units), 

813 } 

814 write_attr_dict( 

815 hdf_path=h5_path, 

816 d=attrs, 

817 dataset_loc=dataset_loc, 

818 ) 

819 

820 

821@add_to_h5.register(NthNearestNeighbors) 

822def _( 

823 data, 

824 h5_path: Path, 

825 h5_group: str, 

826): 

827 """ 

828 Add nth nearest neighbor data to an HDF5 file. 

829 

830 This function processes and adds nth nearest neighbor data to an HDF5 file. 

831 

832 Parameters 

833 ---------- 

834 data : NthNearestNeighbors 

835 The nth nearest neighbor data to be added. 

836 h5_path : Path 

837 The path to the HDF5 file. 

838 h5_group : str 

839 The internal "folder" in the HDF5 file to save data. 

840 

841 Returns 

842 ------- 

843 None 

844 

845 Examples 

846 -------- 

847 >>> neighbors = NthNearestNeighbors(...) 

848 >>> add_to_h5(neighbors, Path("output.h5"), "group_name") 

849 """ 

850 

851 units = data.distances[0].unit.value 

852 

853 distance_list = data.distances 

854 distances = [i.value for i in distance_list] 

855 distance_array = np.array(distances, dtype=float).T 

856 neighbor_id = data.instance_id 

857 

858 dataset_loc = f"{h5_group}/nearest_neighbor_distances" 

859 modify_hdf_dataset( 

860 hdf_path=h5_path, 

861 dataset_loc=dataset_loc, 

862 data=distance_array, 

863 dtype=float, 

864 operation="create", 

865 mutable=False, 

866 ) 

867 attrs = { 

868 "nlabels": str(distance_array.shape[0]), 

869 "units": str(units), 

870 "nth_nearest": f"{data.nth_nearest}: 1st nearest is the feature itself", 

871 } 

872 write_attr_dict( 

873 hdf_path=h5_path, 

874 d=attrs, 

875 dataset_loc=dataset_loc, 

876 ) 

877 

878 dataset_loc = f"{h5_group}/nearest_neighbor_IDs" 

879 modify_hdf_dataset( 

880 hdf_path=h5_path, 

881 dataset_loc=dataset_loc, 

882 data=neighbor_id, 

883 dtype=int, 

884 operation="create", 

885 mutable=False, 

886 ) 

887 attrs = { 

888 "nlabels": str(neighbor_id.shape[0]), 

889 "nth_nearest": f"{data.nth_nearest}: 1st nearest is the feature itself", 

890 } 

891 write_attr_dict( 

892 hdf_path=h5_path, 

893 d=attrs, 

894 dataset_loc=dataset_loc, 

895 ) 

896 

897 

898### CONVERT BETWEEN H5 <-> FOLDER OF IMAGES ### 

899def image_to_hdf(yml_path: Path) -> Path: 

900 """ 

901 Populate the HDF5 file with the semantic segmentation image stack specified in the YAML file, including metadata. 

902 

903 This function reads the YAML file to obtain the necessary parameters, processes the image stack, and writes the resulting data to an HDF5 file. 

904 

905 Parameters 

906 ---------- 

907 yml_path : Path 

908 The path to the YAML file containing the configuration and parameters. 

909 

910 Returns 

911 ------- 

912 Path 

913 The path to the created HDF5 file. 

914 

915 Raises 

916 ------ 

917 ValueError 

918 If the "cli_entry_points" key in the YAML file does not contain "image_to_hdf". 

919 

920 Examples 

921 -------- 

922 >>> yml_path = Path("config.yml") 

923 >>> hdf5_path = image_to_hdf(yml_path) 

924 Wrote output file: /path/to/output.h5 

925 >>> print(hdf5_path) 

926 /path/to/output.h5 

927 """ 

928 

929 # Import images and metadata into sematic_stack 

930 yml_vals = ut.yaml_to_dict(yml_path) 

931 

932 # check cli_entry_points is valid 

933 if "image_to_hdf" not in yml_vals["cli_entry_points"]: 

934 raise ValueError( 

935 f"""Error. Incorrect yml format. 

936 This function requires the "cli_entry_points" key to contain "image_to_hdf", 

937 but currently contains the following options: {yml_vals["cli_entry_points"]} """ 

938 ) 

939 semantic_stack_save = ia.process_image_stack(yml_path) 

940 

941 # Write semantic_stack to hdf 

942 h5_name = yml_vals["h5_filename"] + ".h5" 

943 

944 # path_file_output = Path(yml_vals["out_dir"]).joinpath(h5_name) 

945 path_file_output = Path(yml_vals["out_dir"]).expanduser().joinpath(h5_name) 

946 write_h5(path_file_output, semantic_stack_save) 

947 

948 print(f"Wrote output file: {path_file_output}") 

949 

950 return path_file_output 

951 

952 

953def image_to_hdf_command_line(): 

954 """ 

955 The command line wrapper for the `image_to_hdf` function. 

956 

957 This function sets up the command line argument parser, parses the input arguments, 

958 and calls the `image_to_hdf` function with the provided YAML input file. 

959 

960 Parameters 

961 ---------- 

962 None 

963 

964 Returns 

965 ------- 

966 None 

967 

968 Examples 

969 -------- 

970 To run this function from the command line: 

971 

972 $ python -m image_to_hdf_command_line input_file.yml 

973 """ 

974 parser = argparse.ArgumentParser() 

975 parser.add_argument("input_file", help="the .yml user input file") 

976 args = parser.parse_args() 

977 input_file = args.input_file 

978 

979 image_to_hdf(yml_path=input_file) 

980 

981 

982# def voxel_to_image(yml_path: Path) -> Path: # deprecated 

983def hdf_to_image(yml_path: Path) -> Path: 

984 """ 

985 Save the image data within the HDF5 file as TIFFs in a new directory. 

986 

987 This function reads the YAML file to obtain the necessary parameters, 

988 extracts the image data from the HDF5 file, 

989 and saves the images as TIFF files in a specified directory. 

990 

991 Parameters 

992 ---------- 

993 yml_path : Path 

994 The path to the YAML file containing the configuration and parameters. 

995 

996 Returns 

997 ------- 

998 Path 

999 The path to the directory containing the saved images. 

1000 

1001 Raises 

1002 ------ 

1003 ValueError 

1004 If the "cli_entry_points" key in the YAML file does not contain "hdf_to_image". 

1005 If the specified slicing direction is not valid. 

1006 

1007 Examples 

1008 -------- 

1009 >>> yml_path = Path("config.yml") 

1010 >>> image_dir = hdf_to_image(yml_path) 

1011 >>> print(image_dir) 

1012 /path/to/output/images 

1013 """ 

1014 

1015 yml_vals = ut.yaml_to_dict(yml_path) 

1016 

1017 # check cli_entry_points is valid 

1018 if "hdf_to_image" not in yml_vals["cli_entry_points"]: 

1019 raise ValueError( 

1020 f"""Error. Incorrect yml format. 

1021 This function requires the "cli_entry_points" key to contain "hdf_to_image", 

1022 but currently contains the following options: {yml_vals["cli_entry_points"]} """ 

1023 ) 

1024 

1025 hdf_path = Path(yml_vals["hdf_data_path"]).expanduser() 

1026 # TODO add alternative to ingest npy data dir 

1027 

1028 hdf_dataset_location = yml_vals["voxel_data_location"] 

1029 output_image_dir = Path(yml_vals["image_parent_dir"]).expanduser() 

1030 output_image_type = yml_vals["image_output_type"] 

1031 

1032 slice_normal = yml_vals["image_slice_normal"] 

1033 valid_slice_normal = set(item.name for item in cs.CartesianAxis3D) 

1034 if slice_normal not in valid_slice_normal: 

1035 raise ValueError( 

1036 f"Error, '{slice_normal}' is not a valid slicing direction, accepted units are: {valid_slice_normal}" 

1037 ) 

1038 slice_axis = cs.CartesianAxis3D[slice_normal] 

1039 

1040 with h5py.File(hdf_path, "r") as f: 

1041 data = np.squeeze(f[hdf_dataset_location][:]) 

1042 ut.ndarray_to_img( 

1043 data=data, 

1044 parent_dir=output_image_dir, 

1045 folder_name=Path(hdf_dataset_location).stem, 

1046 file_type=output_image_type, 

1047 slice_axis=slice_axis, 

1048 ) 

1049 

1050 return output_image_dir.joinpath(hdf_path.stem) 

1051 

1052 

1053# def voxel_to_image_command_line(): # deprecated 

1054def hdf_to_image_command_line(): 

1055 """ 

1056 The command line wrapper for the `hdf_to_image` function. 

1057 

1058 This function sets up the command line argument parser, parses the input 

1059 arguments, and calls the `hdf_to_image` function with the provided YAML 

1060 input file. 

1061 

1062 Parameters 

1063 ---------- 

1064 None 

1065 

1066 Returns 

1067 ------- 

1068 None 

1069 

1070 Examples 

1071 -------- 

1072 To run this function from the command line: 

1073 

1074 $ python -m hdf_to_image_command_line input_file.yml 

1075 """ 

1076 parser = argparse.ArgumentParser() 

1077 parser.add_argument("input_file", help="the .yml user input file") 

1078 args = parser.parse_args() 

1079 input_file = args.input_file 

1080 

1081 new_path = hdf_to_image(yml_path=input_file) 

1082 

1083 print(f"\nVoxel data extracted to the following relative directory:'{new_path}'") 

1084 

1085 

1086def hdf_to_npy(yml_path: Path) -> Path: 

1087 """ 

1088 Save the image data within the HDF5 file a NumPy .npy file. 

1089 

1090 This function reads the YAML file to obtain the necessary parameters, 

1091 extracts the image data from the HDF5 file, 

1092 and saves the images as .npy file in a specified directory. 

1093 

1094 Parameters 

1095 ---------- 

1096 yml_path : Path 

1097 The path to the YAML file containing the configuration and parameters. 

1098 

1099 Returns 

1100 ------- 

1101 Path 

1102 The path to the npy file. 

1103 

1104 Raises 

1105 ------ 

1106 ValueError 

1107 If the "cli_entry_points" key in the YAML file does not contain "hdf_to_npy". 

1108 If the specified slicing direction is not valid. 

1109 

1110 Examples 

1111 -------- 

1112 >>> yml_path = Path("config.yml") 

1113 >>> npy_file = hdf_to_npy(yml_path) 

1114 >>> print(npy_file) 

1115 /path/to/output/npy_file.npy 

1116 """ 

1117 

1118 yml_vals = ut.yaml_to_dict(yml_path) 

1119 

1120 # check cli_entry_points is valid 

1121 if "hdf_to_npy" not in yml_vals["cli_entry_points"]: 

1122 raise ValueError( 

1123 f"""Error. Incorrect yml format. 

1124 This function requires the "cli_entry_points" key to contain "hdf_to_npy", 

1125 but currently contains the following options: {yml_vals["cli_entry_points"]} """ 

1126 ) 

1127 

1128 hdf_path = Path(yml_vals["hdf_data_path"]).expanduser() 

1129 

1130 hdf_dataset_location = yml_vals["voxel_data_location"] 

1131 output_dir = Path(yml_vals["output_dir"]).expanduser() 

1132 output_type = yml_vals["output_type"] 

1133 output_path = output_dir.joinpath(hdf_path.stem + output_type) 

1134 

1135 # slice_normal = yml_vals["image_slice_normal"] 

1136 # valid_slice_normal = set(item.name for item in cs.CartesianAxis3D) 

1137 # if slice_normal not in valid_slice_normal: 

1138 # raise ValueError( 

1139 # f"Error, '{slice_normal}' is not a valid slicing direction, accepted units are: {valid_slice_normal}" 

1140 # ) 

1141 # slice_axis = cs.CartesianAxis3D[slice_normal] 

1142 

1143 with h5py.File(hdf_path, "r") as f: 

1144 data = np.squeeze(f[hdf_dataset_location][:]) 

1145 

1146 np.save(output_path, data) 

1147 

1148 # ut.ndarray_to_img( 

1149 # data=data, 

1150 # parent_dir=output_image_dir, 

1151 # folder_name=Path(hdf_dataset_location).stem, 

1152 # file_type=output_image_type, 

1153 # slice_axis=slice_axis, 

1154 # ) 

1155 

1156 return output_path 

1157 

1158 

1159def hdf_to_npy_command_line(): 

1160 """ 

1161 The command line wrapper for the `hdf_to_npy` function. 

1162 

1163 This function sets up the command line argument parser, parses the input 

1164 arguments, and calls the `hdf_to_npy` function with the provided YAML 

1165 input file. 

1166 

1167 Parameters 

1168 ---------- 

1169 None 

1170 

1171 Returns 

1172 ------- 

1173 None 

1174 

1175 Examples 

1176 -------- 

1177 To run this function from the command line: 

1178 

1179 $ python -m hdf_to_npy_command_line input_file.yml 

1180 """ 

1181 parser = argparse.ArgumentParser() 

1182 parser.add_argument("input_file", help="the .yml user input file") 

1183 args = parser.parse_args() 

1184 input_file = args.input_file 

1185 

1186 new_path = hdf_to_npy(yml_path=input_file) 

1187 

1188 print(f"\nVoxel data extracted to:'{new_path}'")