Coverage for src/recon3d/hdf_io.py: 89%

186 statements  

« prev     ^ index     » next       coverage.py v7.8.0, created at 2025-04-02 00:06 +0000

1""" 

2HDF5 Processing Module 

3======================= 

4 

5This module provides a set of functions for creating, modifying, and interacting with HDF5 files, 

6as well as converting between image stacks and voxel data. It includes command line interfaces 

7for some of the key functionalities. 

8 

9Functions 

10--------- 

11create_hdf(hdf_path, base_container) 

12 Prescribe foundational structure of the HDF5 file. 

13 

14modify_hdf_dataset(hdf_path, dataset_loc, data, dtype, operation, mutable) 

15 Modify an existing HDF5 file to alter data within a container. 

16 

17write_attr_dict(hdf_path, d, dataset_loc) 

18 Write attributes to a specified dataset in the HDF5 file. 

19 

20write_h5(hdf_path, data) 

21 Write data to an HDF5 file. 

22 

23add_to_h5(data, hdf_path, hdf_group) 

24 Add data to an HDF5 file based on its type. 

25 

26image_to_voxel(yml_path) 

27 Populate the HDF5 file with the semantic segmentation image stack 

28 specified in the YAML file, including metadata. 

29 

30image_to_voxel_command_line() 

31 The command line wrapper for the `image_to_voxel` function. 

32 

33voxel_to_image(yml_path) 

34 Save the image data within the HDF5 file as TIFFs in a new directory. 

35 

36voxel_to_image_command_line() 

37 The command line wrapper for the `voxel_to_image` function. 

38 

39Examples 

40-------- 

41To create an HDF5 file with a base container: 

42 

43 >>> create_hdf(Path("output.h5"), "base_group") 

44 

45To modify an HDF5 dataset: 

46 

47 >>> data = np.random.rand(10, 5) 

48 >>> modify_hdf_dataset(Path("output.h5"), "group/dataset", data, np.float64, "create", True) 

49 

50To add data to an HDF5 file based on its type: 

51 

52 >>> centroids = Centroids(...) 

53 >>> add_to_h5(centroids, Path("output.h5"), "group_name") 

54 

55To populate an HDF5 file with image stack data from a YAML file: 

56 

57 >>> yml_path = Path("config.yml") 

58 >>> hdf5_path = image_to_voxel(yml_path) 

59 

60To convert voxel data in an HDF5 file to image files: 

61 

62 >>> yml_path = Path("config.yml") 

63 >>> image_dir = voxel_to_image(yml_path) 

64 

65To run the `image_to_voxel` function from the command line: 

66 

67 $ python -m your_module_name input_file.yml 

68 

69To run the `voxel_to_image` function from the command line: 

70 

71 $ python -m your_module_name input_file.yml 

72""" 

73 

74import argparse 

75from pathlib import Path 

76import numpy as np 

77import h5py 

78from functools import singledispatch 

79 

80# from recon3d.feature_analysis import SemanticImageStack 

81from recon3d.types import * 

82import recon3d.utility as ut 

83import recon3d.instance_analysis as ia 

84import recon3d.types as cs 

85 

86 

87### BASE/HELPER FUNCTIONS ### 

88def create_hdf(hdf_path: Path, base_container: str) -> bool: 

89 """ 

90 Prescribe foundational structure of the HDF5 file. 

91 

92 This function creates an HDF5 file at the specified path and initializes 

93 it with a base container group. 

94 

95 Parameters 

96 ---------- 

97 hdf_path : Path 

98 The path to the location of the HDF5 file. 

99 base_container : str 

100 The name of the base container group in the HDF5 file. 

101 

102 Returns 

103 ------- 

104 bool 

105 True if the HDF5 file is created successfully, False otherwise. 

106 

107 Examples 

108 -------- 

109 >>> create_hdf(Path("output.h5"), "base_group") 

110 True 

111 """ 

112 

113 with h5py.File(hdf_path, "w") as file: 

114 file.create_group(base_container) 

115 

116 

117def modify_hdf_dataset( 

118 hdf_path: Path, 

119 dataset_loc: str, 

120 data: np.ndarray, 

121 dtype: type, 

122 operation: str, 

123 mutable: bool, 

124) -> bool: 

125 """ 

126 Modify an existing HDF5 file to alter data within a container. 

127 

128 This function modifies an HDF5 file by creating, appending, or overwriting 

129 a dataset at the specified location. 

130 

131 Parameters 

132 ---------- 

133 hdf_path : Path 

134 The path to the location of the HDF5 file. 

135 dataset_loc : str 

136 The internal path to the dataset in the HDF5 file 

137 (e.g., "file/container/container2/dataset"). 

138 data : np.ndarray 

139 The array to be written to the HDF5 file at the specified dataset 

140 location. 

141 dtype : type 

142 The data type of the dataset (e.g., np.float64, np.int, np.uint16, 

143 for string: h5py.special_dtype(vlen=str)). 

144 operation : str 

145 The operation to perform on the dataset: 

146 - "create": create a new dataset 

147 - "append": append to an existing dataset along dimension 0 

148 - "overwrite": overwrite an existing dataset along dimension 0 

149 (e.g., shrinking dataset) 

150 mutable : bool 

151 If True, the initial shape is zero in the first dimension, with no 

152 size limit. 

153 

154 Returns 

155 ------- 

156 bool 

157 True if the operation is successful, False otherwise. 

158 

159 Raises 

160 ------ 

161 ValueError 

162 If the dataset already exists when trying to create a new one. 

163 KeyError 

164 If an unsupported operation is requested. 

165 

166 Examples 

167 -------- 

168 >>> data = np.random.rand(10, 5) 

169 >>> modify_hdf_dataset(Path("output.h5"), "group/dataset", data, np.float64, "create", True) 

170 True 

171 

172 >>> new_data = np.random.rand(5, 5) 

173 >>> modify_hdf_dataset(Path("output.h5"), "group/dataset", new_data, np.float64, "append", True) 

174 True 

175 

176 >>> overwrite_data = np.random.rand(8, 5) 

177 >>> modify_hdf_dataset(Path("output.h5"), "group/dataset", overwrite_data, np.float64, "overwrite", True) 

178 True 

179 """ 

180 

181 # initial shape is zero in first dimension, with no size limit 

182 shape, maxshape = list(data.shape), list(data.shape) 

183 if mutable: 

184 # shape[0], maxshape[0] = 0, None 

185 maxshape[0] = None 

186 shape, maxshape = tuple(shape), tuple(maxshape) 

187 

188 with h5py.File(hdf_path, "r+") as hdf_file: 

189 # with h5py.File(hdf_path, "r+", locking=False) as hdf_file: 

190 if operation == "create": 

191 if not dataset_loc in hdf_file: 

192 

193 dataset = hdf_file.create_dataset( 

194 name=dataset_loc, 

195 data=data, 

196 shape=shape, 

197 dtype=dtype, 

198 # compression="gzip", #TODO 4/2/24 compression not allowing read in HDF_View 

199 maxshape=maxshape, 

200 chunks=True, 

201 ) 

202 return True 

203 else: 

204 error_string = f"Dataset already exists, cannot {operation}" 

205 raise ValueError(error_string) 

206 

207 dataset = hdf_file[dataset_loc] 

208 

209 if operation == "append": 

210 # appending to dataset 

211 dataset.resize(dataset.shape[0] + data.shape[0], axis=0) 

212 dataset[-data.shape[0] :] = data 

213 return True 

214 

215 elif operation == "overwrite": 

216 # shrinking dataset 

217 dataset.resize(data.shape[0], axis=0) 

218 dataset[:] = data 

219 return True 

220 

221 else: 

222 raise KeyError( 

223 f'operation of "{operation}" requested, only, {"create"}, {"expand"}, and {"overwrite"} currently supported.' 

224 ) 

225 

226 

227def write_attr_dict(hdf_path: Path, d: dict, dataset_loc: str) -> bool: 

228 """Write attributes to group or dataset in the h5 file 

229 from the provided dictionary 

230 

231 hdf_path : path object to location of hdf file 

232 d: dict of attributes to add 

233 dataset_loc: internal path to the dataset in the hdf file 

234 (e.g. "file/container/container2/dataset") 

235 

236 """ 

237 

238 with h5py.File(hdf_path, "r+") as hdf_file: 

239 loc = hdf_file[dataset_loc] 

240 # print(d, type(d)) 

241 for key, value in d.items(): 

242 # print(key, value) 

243 if isinstance(value, Path): 

244 value = str(value) 

245 dt = h5py.special_dtype(vlen=str) 

246 

247 loc.attrs.create(name=key, data=value, dtype=dt) 

248 

249 return 

250 

251 

252### INITIAL DATA CREATION ### 

253def write_h5(hdf_path: Path, semantic_stack: SemanticImageStack) -> bool: 

254 """Write new h5 file 

255 the data will be written into a dataset 

256 the metadata will be written as attributes of the dataset 

257 

258 hdf_path : path object to location of hdf file 

259 semantic_stack: the semantic image stack 

260 

261 """ 

262 

263 base_container = "VoxelData" 

264 create_hdf(hdf_path=hdf_path, base_container=base_container) 

265 

266 img_data = semantic_stack.data 

267 dtype = type( 

268 img_data.flat[0] 

269 ) # data.item(0) will return python dtype, which may be better 

270 dataset_loc = f"{base_container}/{semantic_stack.name}" 

271 modify_hdf_dataset( 

272 hdf_path=hdf_path, 

273 dataset_loc=dataset_loc, 

274 data=img_data, 

275 dtype=dtype, 

276 operation="create", 

277 mutable=False, 

278 ) 

279 

280 # TODO 4/2/2024 write metadata as attributes 

281 md = semantic_stack.metadata 

282 # TODO 5/7/2024 make metadata strings add to h5 

283 metadata_dict = ut.metadata_to_dict(md) 

284 # d = {"height": } 

285 write_attr_dict(hdf_path=hdf_path, d=metadata_dict, dataset_loc=dataset_loc) 

286 

287 

288### ALL OTHER FUNCTIONALITIES ### 

289@singledispatch 

290def add_to_h5( 

291 data, 

292 h5_path: Path, 

293 h5_group: str, # internal "folder" in the hdf to save data 

294): 

295 """ 

296 Add data to an HDF5 file. 

297 

298 This is a generic function that adds data to an HDF5 file. Specific 

299 implementations are provided for different types of data. 

300 

301 Parameters 

302 ---------- 

303 data : object 

304 The data to be added to the HDF5 file. 

305 h5_path : Path 

306 The path to the HDF5 file. 

307 h5_group : str 

308 The internal "folder" in the HDF5 file to save data. 

309 

310 Returns 

311 ------- 

312 None 

313 

314 Raises 

315 ------ 

316 NotImplementedError 

317 If no handler is registered for the type of `data`. 

318 

319 Examples 

320 -------- 

321 >>> add_to_h5(some_data, Path("output.h5"), "group_name") 

322 NotImplementedError: No handler for type <class 'type'> 

323 """ 

324 _ = data 

325 __ = h5_path 

326 ___ = h5_group 

327 raise NotImplementedError(f"No handler for type {type(data)}") 

328 

329 

330@add_to_h5.register(Centroids) 

331def _( 

332 data, 

333 h5_path: Path, 

334 h5_group: str, 

335): 

336 """ 

337 Add centroid data to an HDF5 file. 

338 

339 This function processes and adds centroid data to an HDF5 file. 

340 

341 Parameters 

342 ---------- 

343 data : Centroids 

344 The centroid data to be added. 

345 h5_path : Path 

346 The path to the HDF5 file. 

347 h5_group : str 

348 The internal "folder" in the HDF5 file to save data. 

349 

350 Returns 

351 ------- 

352 None 

353 

354 Examples 

355 -------- 

356 >>> centroids = Centroids(...) 

357 >>> add_to_h5(centroids, Path("output.h5"), "group_name") 

358 """ 

359 

360 units = data.data[0].cx.unit.value 

361 centroid_data = ut.centroids_to_ndarray(centroids=data) 

362 

363 # add data 

364 dataset_loc = f"{h5_group}/centroids" 

365 modify_hdf_dataset( 

366 hdf_path=h5_path, 

367 dataset_loc=dataset_loc, 

368 data=centroid_data, 

369 dtype=float, 

370 operation="create", 

371 mutable=False, 

372 ) 

373 attrs = { 

374 "nlabels": str(centroid_data.shape[0]), 

375 "units": str(units), 

376 "ordering": str("X, Y, Z"), 

377 } 

378 write_attr_dict( 

379 hdf_path=h5_path, 

380 d=attrs, 

381 dataset_loc=dataset_loc, 

382 ) 

383 

384 

385@add_to_h5.register(BestFitEllipsoids) 

386def _( 

387 data, 

388 h5_path: Path, 

389 h5_group: str, 

390): 

391 """ 

392 Add best fit ellipsoid data to an HDF5 file. 

393 

394 This function processes and adds best fit ellipsoid data to an HDF5 file. 

395 

396 Parameters 

397 ---------- 

398 data : BestFitEllipsoids 

399 The best fit ellipsoid data to be added. 

400 h5_path : Path 

401 The path to the HDF5 file. 

402 h5_group : str 

403 The internal "folder" in the HDF5 file to save data. 

404 

405 Returns 

406 ------- 

407 None 

408 

409 Examples 

410 -------- 

411 >>> ellipsoids = BestFitEllipsoids(...) 

412 >>> add_to_h5(ellipsoids, Path("output.h5"), "group_name") 

413 """ 

414 

415 axis_units = data.data[0].a.length.unit.value 

416 axis_lengths, axis_vectors = ut.ellipsoids_to_ndarray(data) 

417 

418 # add data 

419 dataset_loc = f"{h5_group}/semi-axis_lengths" 

420 modify_hdf_dataset( 

421 hdf_path=h5_path, 

422 dataset_loc=dataset_loc, 

423 data=axis_lengths, 

424 dtype=float, 

425 operation="create", 

426 mutable=False, 

427 ) 

428 attrs = { 

429 "notes": str("property of best fit ellipsoid"), 

430 "nlabels": str(axis_lengths.shape[0]), 

431 "units": str(axis_units), 

432 "ordering": str("a, b, c, with a > b > c"), 

433 } 

434 write_attr_dict( 

435 hdf_path=h5_path, 

436 d=attrs, 

437 dataset_loc=dataset_loc, 

438 ) 

439 

440 # add orientation data 

441 dataset_loc = f"{h5_group}/axis_vectors" 

442 modify_hdf_dataset( 

443 hdf_path=h5_path, 

444 dataset_loc=dataset_loc, 

445 data=axis_vectors, 

446 dtype=float, 

447 operation="create", 

448 mutable=False, 

449 ) 

450 attrs = { 

451 "notes": str("property of best fit ellipsoid"), 

452 "nlabels": str(axis_lengths.shape[0]), 

453 "units": "unit vector", 

454 "ordering": str( 

455 "u, v, w for each axis a, b, c \n\t(a_u, a_v, a_w, b_u, b_v, b_w, c_u, c_v, c_w)" 

456 ), 

457 } 

458 write_attr_dict( 

459 hdf_path=h5_path, 

460 d=attrs, 

461 dataset_loc=dataset_loc, 

462 ) 

463 

464 

465@add_to_h5.register(EllipsoidSurfaceAreas) 

466def _( 

467 data, 

468 h5_path: Path, 

469 h5_group: str, 

470): 

471 """ 

472 Add ellipsoid surface area data to an HDF5 file. 

473 

474 This function processes and adds ellipsoid surface area data to an 

475 HDF5 file. 

476 

477 Parameters 

478 ---------- 

479 data : EllipsoidSurfaceAreas 

480 The ellipsoid surface area data to be added. 

481 h5_path : Path 

482 The path to the HDF5 file. 

483 h5_group : str 

484 The internal "folder" in the HDF5 file to save data. 

485 

486 Returns 

487 ------- 

488 None 

489 

490 Examples 

491 -------- 

492 >>> surface_areas = EllipsoidSurfaceAreas(...) 

493 >>> add_to_h5(surface_areas, Path("output.h5"), "group_name") 

494 """ 

495 

496 area_units = data.data[0].unit_squared.value 

497 surface_areas = ut.surface_areas_to_ndarray(data) 

498 

499 # add data 

500 dataset_loc = f"{h5_group}/ellipsoid_surface_areas" 

501 modify_hdf_dataset( 

502 hdf_path=h5_path, 

503 dataset_loc=dataset_loc, 

504 data=surface_areas, 

505 dtype=float, 

506 operation="create", 

507 mutable=False, 

508 ) 

509 attrs = { 

510 "notes": str("property of best fit ellipsoid"), 

511 "nlabels": str(surface_areas.shape[0]), 

512 "units_squared": f"{area_units}", 

513 "method": "Knud Thomsen approximation for scalene ellipsoids (2004)", 

514 } 

515 write_attr_dict( 

516 hdf_path=h5_path, 

517 d=attrs, 

518 dataset_loc=dataset_loc, 

519 ) 

520 

521 

522@add_to_h5.register(EllipsoidVolumes) 

523def _( 

524 data, 

525 h5_path: Path, 

526 h5_group: str, 

527): 

528 """ 

529 Add ellipsoid volume data to an HDF5 file. 

530 

531 This function processes and adds ellipsoid volume data to an HDF5 file. 

532 

533 Parameters 

534 ---------- 

535 data : EllipsoidVolumes 

536 The ellipsoid volume data to be added. 

537 h5_path : Path 

538 The path to the HDF5 file. 

539 h5_group : str 

540 The internal "folder" in the HDF5 file to save data. 

541 

542 Returns 

543 ------- 

544 None 

545 

546 Examples 

547 -------- 

548 >>> volumes = EllipsoidVolumes(...) 

549 >>> add_to_h5(volumes, Path("output.h5"), "group_name") 

550 """ 

551 area_units = data.data[0].unit_cubed.value 

552 ellipsoid_volumes = ut.volumes_to_ndarray(data) 

553 

554 # add data 

555 dataset_loc = f"{h5_group}/ellipsoid_volumes" 

556 modify_hdf_dataset( 

557 hdf_path=h5_path, 

558 dataset_loc=dataset_loc, 

559 data=ellipsoid_volumes, 

560 dtype=float, 

561 operation="create", 

562 mutable=False, 

563 ) 

564 attrs = { 

565 "notes": str("property of best fit ellipsoid"), 

566 "nlabels": str(ellipsoid_volumes.shape[0]), 

567 "units_cubed": f"{area_units}", 

568 "method": "4/3 * pi * a * b * c", 

569 } 

570 write_attr_dict( 

571 hdf_path=h5_path, 

572 d=attrs, 

573 dataset_loc=dataset_loc, 

574 ) 

575 

576 

577@add_to_h5.register(InstanceImageStack) 

578def _( 

579 data, 

580 h5_path: Path, 

581 h5_group: str, 

582): 

583 """ 

584 Add instance image stack data to an HDF5 file. 

585 

586 This function processes and adds instance image stack data to an HDF5 file. 

587 

588 Parameters 

589 ---------- 

590 data : InstanceImageStack 

591 The instance image stack data to be added. 

592 h5_path : Path 

593 The path to the HDF5 file. 

594 h5_group : str 

595 The internal "folder" in the HDF5 file to save data. 

596 

597 Returns 

598 ------- 

599 None 

600 

601 Examples 

602 -------- 

603 >>> image_stack = InstanceImageStack(...) 

604 >>> add_to_h5(image_stack, Path("output.h5"), "group_name") 

605 """ 

606 

607 # print(f"data type: {type(data)}") 

608 

609 instance_image_stack = data 

610 

611 # add data 

612 dataset_loc = f"{h5_group}/{instance_image_stack.name}" 

613 modify_hdf_dataset( 

614 hdf_path=h5_path, 

615 dataset_loc=dataset_loc, 

616 data=instance_image_stack.data, 

617 dtype=type(instance_image_stack.data.flat[0]), 

618 operation="create", 

619 mutable=False, 

620 ) 

621 

622 # add metadata 

623 md = instance_image_stack.metadata 

624 metadata_dict = ut.metadata_to_dict(md) 

625 write_attr_dict( 

626 hdf_path=h5_path, 

627 d=metadata_dict, 

628 dataset_loc=dataset_loc, 

629 ) 

630 

631 extra_attrs = { 

632 "nlabels": str(instance_image_stack.nlabels), 

633 "min_feature_size": str(instance_image_stack.min_feature_size), 

634 } 

635 write_attr_dict( 

636 hdf_path=h5_path, 

637 d=extra_attrs, 

638 dataset_loc=dataset_loc, 

639 ) 

640 

641 

642@add_to_h5.register(InstanceIndices) 

643def _( 

644 data, 

645 h5_path: Path, 

646 h5_group: str, 

647): 

648 """ 

649 Add instance indices data to an HDF5 file. 

650 

651 This function processes and adds instance indices data to an HDF5 file. 

652 

653 Parameters 

654 ---------- 

655 data : InstanceIndices 

656 The instance indices data to be added. 

657 h5_path : Path 

658 The path to the HDF5 file. 

659 h5_group : str 

660 The internal "folder" in the HDF5 file to save data. 

661 

662 Returns 

663 ------- 

664 None 

665 

666 Examples 

667 -------- 

668 >>> indices = InstanceIndices(...) 

669 >>> add_to_h5(indices, Path("output.h5"), "group_name") 

670 """ 

671 raise NotImplementedError( 

672 "Ability to write out instance indices not yet implemented." 

673 ) 

674 # TODO: AP, write out as single variable length data instead of 

675 # individual datasets (currently) 

676 # https://docs.h5py.org/en/stable/special.html 

677 # dt = h5py.vlen_dtype(np.dtype('int32')) 

678 instance_indices = data 

679 

680 base_container = f"{instance_indices.source_name}_indices" 

681 # NumPy doesn’t support ragged arrays, and the ‘arrays of arrays’ 

682 # h5py uses as a workaround are not as convenient or efficient as 

683 # regular NumPy arrays. If you’re deciding how to store data, 

684 # consider whether there’s a sensible way to do it without a 

685 # variable-length type. 

686 ragged_label_indices_array = np.array((len(instance_indices.indices))) 

687 for each_label in instance_indices.labels.data: 

688 ragged_label_indices_array[each_label] = instance_indices.indices[each_label] 

689 

690 dataset_loc = f"{base_container}/label_indices" 

691 modify_hdf_dataset( 

692 hdf_path=h5_path, 

693 dataset_loc=dataset_loc, 

694 data=ragged_label_indices_array, 

695 dtype=h5py.vlen_dtype(np.dtype("int32")), 

696 operation="create", 

697 mutable=False, 

698 ) 

699 # # Tree the labels 

700 # for ( 

701 # each_label 

702 # ) in ( 

703 # instance_indices.labels.data 

704 # ): # TODO: why does the instance_indices.labels not return the correct type, instead it provides the ndarray... 

705 

706 # dataset_loc = f"{base_container}/label_{each_label:06d}" 

707 # modify_hdf_dataset( 

708 # hdf_path=h5_path, 

709 # dataset_loc=dataset_loc, 

710 # data=instance_indices.indices[each_label], 

711 # dtype=type(instance_indices.indices[each_label].flat[0]), 

712 # operation="create", 

713 # mutable=False, 

714 # ) 

715 

716 

717@add_to_h5.register(InstanceProperties) 

718def _( 

719 data, 

720 h5_path: Path, 

721 h5_group: str, 

722): 

723 """ 

724 Add instance properties data to an HDF5 file. 

725 

726 This function processes and adds instance properties data to an HDF5 file. 

727 

728 Parameters 

729 ---------- 

730 data : InstanceProperties 

731 The instance properties data to be added. 

732 h5_path : Path 

733 The path to the HDF5 file. 

734 h5_group : str 

735 The internal "folder" in the HDF5 file to save data. 

736 

737 Returns 

738 ------- 

739 None 

740 

741 Examples 

742 -------- 

743 >>> properties = InstanceProperties(...) 

744 >>> add_to_h5(properties, Path("output.h5"), "group_name") 

745 """ 

746 

747 # centroids 

748 add_to_h5(data.centroids, h5_path=h5_path, h5_group=h5_group) 

749 

750 # ellipsoids 

751 add_to_h5(data.ellipsoids, h5_path=h5_path, h5_group=h5_group) 

752 

753 # surface area 

754 add_to_h5(data.surface_areas, h5_path=h5_path, h5_group=h5_group) 

755 

756 add_to_h5(data.volumes, h5_path=h5_path, h5_group=h5_group) 

757 

758 # equiv spherical diameter 

759 eq_diam = data.equivalent_sphere_diameters 

760 diam = [i.value for i in eq_diam] 

761 diam_data = np.array(diam, dtype=float).T 

762 diam_units = eq_diam[0].unit.value 

763 

764 dataset_loc = f"{h5_group}/equivalent_sphere_diameters" 

765 modify_hdf_dataset( 

766 hdf_path=h5_path, 

767 dataset_loc=dataset_loc, 

768 data=diam_data, 

769 dtype=float, 

770 operation="create", 

771 mutable=False, 

772 ) 

773 attrs = { 

774 "notes": str("from volume detemined by voxel count and resolution"), 

775 "nlabels": str(diam_data.shape[0]), 

776 "units": str(diam_units), 

777 } 

778 write_attr_dict( 

779 hdf_path=h5_path, 

780 d=attrs, 

781 dataset_loc=dataset_loc, 

782 ) 

783 

784 # num voxels 

785 num_voxel = data.n_voxels 

786 n_voxels = [i.value for i in num_voxel] 

787 n_vox_data = np.array(n_voxels, dtype=int).T 

788 n_vox_units = num_voxel[0].unit.value 

789 

790 dataset_loc = f"{h5_group}/num_voxels" 

791 modify_hdf_dataset( 

792 hdf_path=h5_path, 

793 dataset_loc=dataset_loc, 

794 data=n_vox_data, 

795 dtype=int, 

796 operation="create", 

797 mutable=False, 

798 ) 

799 attrs = { 

800 "nlabels": str(diam_data.shape[0]), 

801 "units": str(n_vox_units), 

802 } 

803 write_attr_dict( 

804 hdf_path=h5_path, 

805 d=attrs, 

806 dataset_loc=dataset_loc, 

807 ) 

808 

809 

810@add_to_h5.register(NthNearestNeighbors) 

811def _( 

812 data, 

813 h5_path: Path, 

814 h5_group: str, 

815): 

816 """ 

817 Add nth nearest neighbor data to an HDF5 file. 

818 

819 This function processes and adds nth nearest neighbor data to an HDF5 file. 

820 

821 Parameters 

822 ---------- 

823 data : NthNearestNeighbors 

824 The nth nearest neighbor data to be added. 

825 h5_path : Path 

826 The path to the HDF5 file. 

827 h5_group : str 

828 The internal "folder" in the HDF5 file to save data. 

829 

830 Returns 

831 ------- 

832 None 

833 

834 Examples 

835 -------- 

836 >>> neighbors = NthNearestNeighbors(...) 

837 >>> add_to_h5(neighbors, Path("output.h5"), "group_name") 

838 """ 

839 

840 aa = 2 

841 units = data.distances[0].unit.value 

842 

843 distance_list = data.distances 

844 distances = [i.value for i in distance_list] 

845 distance_array = np.array(distances, dtype=float).T 

846 neighbor_id = data.instance_id 

847 

848 dataset_loc = f"{h5_group}/nearest_neighbor_distances" 

849 modify_hdf_dataset( 

850 hdf_path=h5_path, 

851 dataset_loc=dataset_loc, 

852 data=distance_array, 

853 dtype=float, 

854 operation="create", 

855 mutable=False, 

856 ) 

857 attrs = { 

858 "nlabels": str(distance_array.shape[0]), 

859 "units": str(units), 

860 "nth_nearest": f"{data.nth_nearest}: 1st nearest is the feature itself", 

861 } 

862 write_attr_dict( 

863 hdf_path=h5_path, 

864 d=attrs, 

865 dataset_loc=dataset_loc, 

866 ) 

867 

868 dataset_loc = f"{h5_group}/nearest_neighbor_IDs" 

869 modify_hdf_dataset( 

870 hdf_path=h5_path, 

871 dataset_loc=dataset_loc, 

872 data=neighbor_id, 

873 dtype=int, 

874 operation="create", 

875 mutable=False, 

876 ) 

877 attrs = { 

878 "nlabels": str(neighbor_id.shape[0]), 

879 "nth_nearest": f"{data.nth_nearest}: 1st nearest is the feature itself", 

880 } 

881 write_attr_dict( 

882 hdf_path=h5_path, 

883 d=attrs, 

884 dataset_loc=dataset_loc, 

885 ) 

886 

887 

888### CONVERT BETWEEN H5 <-> FOLDER OF IMAGES ### 

889def image_to_voxel(yml_path: Path) -> Path: 

890 """ 

891 Populate the HDF5 file with the semantic segmentation image stack specified in the YAML file, including metadata. 

892 

893 This function reads the YAML file to obtain the necessary parameters, processes the image stack, and writes the resulting data to an HDF5 file. 

894 

895 Parameters 

896 ---------- 

897 yml_path : Path 

898 The path to the YAML file containing the configuration and parameters. 

899 

900 Returns 

901 ------- 

902 Path 

903 The path to the created HDF5 file. 

904 

905 Raises 

906 ------ 

907 ValueError 

908 If the "cli_entry_points" key in the YAML file does not contain "image_to_voxel". 

909 

910 Examples 

911 -------- 

912 >>> yml_path = Path("config.yml") 

913 >>> hdf5_path = image_to_voxel(yml_path) 

914 Wrote output file: /path/to/output.h5 

915 >>> print(hdf5_path) 

916 /path/to/output.h5 

917 """ 

918 

919 # Import images and metadata into sematic_stack 

920 yml_vals = ut.yaml_to_dict(yml_path) 

921 

922 # check cli_entry_points is valid 

923 if "image_to_voxel" not in yml_vals["cli_entry_points"]: 

924 raise ValueError( 

925 f"""Error. Incorrect yml format.  

926 This function requires the "cli_entry_points" key to contain "image_to_voxel",  

927 but currently contains the following options: {yml_vals["cli_entry_points"]} """ 

928 ) 

929 semantic_stack_save = ia.process_image_stack(yml_path) 

930 

931 # Write semantic_stack to hdf 

932 h5_name = yml_vals["h5_filename"] + ".h5" 

933 

934 # path_file_output = Path(yml_vals["out_dir"]).joinpath(h5_name) 

935 path_file_output = Path(yml_vals["out_dir"]).expanduser().joinpath(h5_name) 

936 write_h5(path_file_output, semantic_stack_save) 

937 

938 print(f"Wrote output file: {path_file_output}") 

939 

940 return path_file_output 

941 

942 

943def image_to_voxel_command_line(): 

944 """ 

945 The command line wrapper for the `image_to_voxel` function. 

946 

947 This function sets up the command line argument parser, parses the input arguments, 

948 and calls the `image_to_voxel` function with the provided YAML input file. 

949 

950 Parameters 

951 ---------- 

952 None 

953 

954 Returns 

955 ------- 

956 None 

957 

958 Examples 

959 -------- 

960 To run this function from the command line: 

961 

962 $ python -m your_module_name input_file.yml 

963 """ 

964 parser = argparse.ArgumentParser() 

965 parser.add_argument("input_file", help="the .yml user input file") 

966 args = parser.parse_args() 

967 input_file = args.input_file 

968 

969 image_to_voxel(yml_path=input_file) 

970 

971 

972def voxel_to_image(yml_path: Path) -> Path: 

973 """ 

974 Save the image data within the HDF5 file as TIFFs in a new directory. 

975 

976 This function reads the YAML file to obtain the necessary parameters, 

977 extracts the image data from the HDF5 file, 

978 and saves the images as TIFF files in a specified directory. 

979 

980 Parameters 

981 ---------- 

982 yml_path : Path 

983 The path to the YAML file containing the configuration and parameters. 

984 

985 Returns 

986 ------- 

987 Path 

988 The path to the directory containing the saved images. 

989 

990 Raises 

991 ------ 

992 ValueError 

993 If the "cli_entry_points" key in the YAML file does not contain "voxel_to_image". 

994 If the specified slicing direction is not valid. 

995 

996 Examples 

997 -------- 

998 >>> yml_path = Path("config.yml") 

999 >>> image_dir = voxel_to_image(yml_path) 

1000 >>> print(image_dir) 

1001 /path/to/output/images 

1002 """ 

1003 

1004 yml_vals = ut.yaml_to_dict(yml_path) 

1005 

1006 # check cli_entry_points is valid 

1007 if "voxel_to_image" not in yml_vals["cli_entry_points"]: 

1008 raise ValueError( 

1009 f"""Error. Incorrect yml format.  

1010 This function requires the "cli_entry_points" key to contain "voxel_to_image",  

1011 but currently contains the following options: {yml_vals["cli_entry_points"]} """ 

1012 ) 

1013 

1014 hdf_path = Path(yml_vals["voxel_data_path"]).expanduser() 

1015 # TODO add alternative to ingest npy data dir 

1016 

1017 hdf_dataset_location = yml_vals["voxel_data_location"] 

1018 output_image_dir = Path(yml_vals["image_parent_dir"]).expanduser() 

1019 output_image_type = yml_vals["image_output_type"] 

1020 

1021 slice_normal = yml_vals["image_slice_normal"] 

1022 valid_slice_normal = set(item.name for item in cs.CartesianAxis3D) 

1023 if slice_normal not in valid_slice_normal: 

1024 raise ValueError( 

1025 f"Error, '{slice_normal}' is not a valid slicing direction, accepted units are: {valid_slice_normal}" 

1026 ) 

1027 slice_axis = cs.CartesianAxis3D[slice_normal] 

1028 

1029 with h5py.File(hdf_path, "r") as f: 

1030 data = np.squeeze(f[hdf_dataset_location][:]) 

1031 ut.ndarray_to_img( 

1032 data=data, 

1033 parent_dir=output_image_dir, 

1034 folder_name=Path(hdf_dataset_location).stem, 

1035 file_type=output_image_type, 

1036 slice_axis=slice_axis, 

1037 ) 

1038 

1039 return output_image_dir.joinpath(hdf_path.stem) 

1040 

1041 

1042def voxel_to_image_command_line(): 

1043 """ 

1044 The command line wrapper for the `voxel_to_image` function. 

1045 

1046 This function sets up the command line argument parser, parses the input 

1047 arguments, and calls the `voxel_to_image` function with the provided YAML 

1048 input file. 

1049 

1050 Parameters 

1051 ---------- 

1052 None 

1053 

1054 Returns 

1055 ------- 

1056 None 

1057 

1058 Examples 

1059 -------- 

1060 To run this function from the command line: 

1061 

1062 $ python -m your_module_name input_file.yml 

1063 """ 

1064 parser = argparse.ArgumentParser() 

1065 parser.add_argument("input_file", help="the .yml user input file") 

1066 args = parser.parse_args() 

1067 input_file = args.input_file 

1068 

1069 new_path = voxel_to_image(yml_path=input_file) 

1070 

1071 print(f"\nVoxel data extracted to the following relative directory:'{new_path}'")