Module riid.data.converters.aipt
This module provides tools for handling data related to the multi-lab Algorithm Improvement Program Team (AIPT).
Expand source code Browse git
# Copyright 2021 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
# Under the terms of Contract DE-NA0003525 with NTESS,
# the U.S. Government retains certain rights in this software.
"""This module provides tools for handling data related to the multi-lab Algorithm Improvement
Program Team (AIPT).
"""
import os
from pathlib import Path
from typing import List
import pandas as pd
from riid import SAMPLESET_HDF_FILE_EXTENSION, SampleSet
from riid.data.converters import _validate_and_create_output_dir
ELEMENT_IDS_PER_FILE = [0, 1, 2, 3]
DEFAULT_ECAL = [
-3.00000,
3010.00000,
150.00000,
0.00000,
0.00000,
]
def _element_to_ss(data_df: pd.DataFrame, eid: int, description) -> SampleSet:
if eid not in ELEMENT_IDS_PER_FILE:
msg = (
f"Element #{eid} is invalid. "
"The available options are: {ELEMENT_IDS_PER_FILE}"
)
raise ValueError(msg)
ss = SampleSet()
ss.spectra = data_df[f"spectrum-channels{eid}"]\
.str\
.split(",", expand=True)\
.astype(int)
ss.info.live_time = data_df[f"spectrum-lt{eid}"] / 1000
ss.info.real_time = data_df[f"spectrum-rt{eid}"] / 1000
ss.info.total_counts = data_df[f"gc{eid}"]
ss.info.neutron_counts = data_df["nc0"]
ss.info.ecal_order_0 = DEFAULT_ECAL[0]
ss.info.ecal_order_1 = DEFAULT_ECAL[1]
ss.info.ecal_order_2 = DEFAULT_ECAL[2]
ss.info.ecal_order_3 = DEFAULT_ECAL[3]
ss.info.ecal_low_e = DEFAULT_ECAL[4]
ss.info.timestamp = data_df["utc-time"]
ss.info.description = description
ss.info["latitude"] = data_df["latitude"]
ss.info["longitude"] = data_df["longitude"]
ss.info["is_in_zone"] = data_df["is-in-zone"]
ss.info["is_closest_approach"] = data_df["is-closest-approach"]
ss.info["is_source_present"] = data_df["is-source-present"]
detector_name = f"{data_df['detector'].unique()[0]}.{eid}"
ss.detector_info = {
"name": detector_name
}
return ss
def aipt_file_to_ss_list(file_path: str) -> List[SampleSet]:
"""Process an AIPT CSV file into a list of SampleSets.
Each file contains a series of spectra for multiple detectors running simultaneously.
As such, each `SampleSet` in the list returned by this function represents the data
collected by each detector.
Each row of each `SampleSet` represents a measurement from each detector at the same
moment in time.
As such, after calling this function, you might consider summing all four spectra at
each timestep into a single spectrum as another processing step.
Args:
file_path: file path of the CSV file
Returns:
List of `SampleSet`s each containing a series of spectra for a single run
"""
data_df = pd.read_csv(file_path, header=0, sep="\t")
base_description = os.path.splitext(os.path.basename(file_path))[0]
ss_list = []
for eid in ELEMENT_IDS_PER_FILE:
description = f"{base_description}_{eid}"
ss = _element_to_ss(data_df, eid, description)
ss_list.append(ss)
return ss_list
def convert_and_save(input_file_path: str, output_dir: str = None,
skip_existing: bool = True, **kwargs):
"""Convert AIPT file to SampleSet and save as HDF.
Output file will have same name but appended with a detector identifier
and having a different extension.
Args:
input_file_path: file path of the CSV file
output_dir: alternative directory in which to save HDF files
(defaults to `input_file_path` parent if not provided)
skip_existing: whether to skip conversion if the file already exists
kwargs: keyword args passed to `aipt_file_to_ss_list()` (not currently used)
"""
input_path = Path(input_file_path)
if not output_dir:
output_dir = input_path.parent
_validate_and_create_output_dir(output_dir)
output_file_paths = [
os.path.join(output_dir, input_path.stem + f"-{i}{SAMPLESET_HDF_FILE_EXTENSION}")
for i in ELEMENT_IDS_PER_FILE
]
all_output_files_exist = all([os.path.exists(p) for p in output_file_paths])
if skip_existing and all_output_files_exist:
return
ss_list = aipt_file_to_ss_list(input_file_path, **kwargs)
for output_file_path, ss in zip(output_file_paths, ss_list):
ss.to_hdf(output_file_path)
Functions
def aipt_file_to_ss_list(file_path: str) ‑> List[SampleSet]-
Process an AIPT CSV file into a list of SampleSets.
Each file contains a series of spectra for multiple detectors running simultaneously. As such, each
SampleSetin the list returned by this function represents the data collected by each detector. Each row of eachSampleSetrepresents a measurement from each detector at the same moment in time. As such, after calling this function, you might consider summing all four spectra at each timestep into a single spectrum as another processing step.Args
file_path- file path of the CSV file
Returns
List of
SampleSets each containing a series of spectra for a single runExpand source code Browse git
def aipt_file_to_ss_list(file_path: str) -> List[SampleSet]: """Process an AIPT CSV file into a list of SampleSets. Each file contains a series of spectra for multiple detectors running simultaneously. As such, each `SampleSet` in the list returned by this function represents the data collected by each detector. Each row of each `SampleSet` represents a measurement from each detector at the same moment in time. As such, after calling this function, you might consider summing all four spectra at each timestep into a single spectrum as another processing step. Args: file_path: file path of the CSV file Returns: List of `SampleSet`s each containing a series of spectra for a single run """ data_df = pd.read_csv(file_path, header=0, sep="\t") base_description = os.path.splitext(os.path.basename(file_path))[0] ss_list = [] for eid in ELEMENT_IDS_PER_FILE: description = f"{base_description}_{eid}" ss = _element_to_ss(data_df, eid, description) ss_list.append(ss) return ss_list def convert_and_save(input_file_path: str, output_dir: str = None, skip_existing: bool = True, **kwargs)-
Convert AIPT file to SampleSet and save as HDF.
Output file will have same name but appended with a detector identifier and having a different extension.
Args
input_file_path- file path of the CSV file
output_dir- alternative directory in which to save HDF files
(defaults to
input_file_pathparent if not provided) skip_existing- whether to skip conversion if the file already exists
kwargs- keyword args passed to
aipt_file_to_ss_list()(not currently used)
Expand source code Browse git
def convert_and_save(input_file_path: str, output_dir: str = None, skip_existing: bool = True, **kwargs): """Convert AIPT file to SampleSet and save as HDF. Output file will have same name but appended with a detector identifier and having a different extension. Args: input_file_path: file path of the CSV file output_dir: alternative directory in which to save HDF files (defaults to `input_file_path` parent if not provided) skip_existing: whether to skip conversion if the file already exists kwargs: keyword args passed to `aipt_file_to_ss_list()` (not currently used) """ input_path = Path(input_file_path) if not output_dir: output_dir = input_path.parent _validate_and_create_output_dir(output_dir) output_file_paths = [ os.path.join(output_dir, input_path.stem + f"-{i}{SAMPLESET_HDF_FILE_EXTENSION}") for i in ELEMENT_IDS_PER_FILE ] all_output_files_exist = all([os.path.exists(p) for p in output_file_paths]) if skip_existing and all_output_files_exist: return ss_list = aipt_file_to_ss_list(input_file_path, **kwargs) for output_file_path, ss in zip(output_file_paths, ss_list): ss.to_hdf(output_file_path)