Module riid.data.synthetic
This module contains utilities for synthesizing gamma spectra.
Expand source code Browse git
# Copyright 2021 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
# Under the terms of Contract DE-NA0003525 with NTESS,
# the U.S. Government retains certain rights in this software.
"""This module contains utilities for synthesizing gamma spectra."""
# The following imports are left to not break previous imports; remove in v3
from riid.data.synthetic.base import Synthesizer, get_distribution_values
from riid.data.synthetic.seed import get_dummy_seeds
__all__ = ["get_dummy_seeds", "Synthesizer", "get_distribution_values"]
Sub-modules
riid.data.synthetic.base-
This module contains utilities for synthesizing gamma spectra.
riid.data.synthetic.passby-
This module contains utilities for synthesizing gamma spectra based on a detector moving past a source.
riid.data.synthetic.seed-
This module contains utilities for generating synthetic gamma spectrum templates from GADRAS.
riid.data.synthetic.static-
This module contains utilities for synthesizing gamma spectra based on a source moving past a detector.
Functions
def get_distribution_values(function: str, function_args: Any, n_values: int, rng: numpy.random._generator.Generator = Generator(PCG64))-
Randomly sample a list of values based one of many distributions.
Args
function- name of the distribution function
function_args- argument or collection of arguments to be passed to the function, if any.
n_values- size of the distribution
rng- NumPy random number generator, useful for experiment repeatability
Returns
Value or collection of sampled values
Raises
ValueErrorwhen an unsupported function type is providedExpand source code Browse git
def get_distribution_values(function: str, function_args: Any, n_values: int, rng: Generator = np.random.default_rng()): """Randomly sample a list of values based one of many distributions. Args: function: name of the distribution function function_args: argument or collection of arguments to be passed to the function, if any. n_values: size of the distribution rng: NumPy random number generator, useful for experiment repeatability Returns: Value or collection of sampled values Raises: `ValueError` when an unsupported function type is provided """ values = None if function == "uniform": values = rng.uniform(*function_args, size=n_values) elif function == "log10": log10_args = tuple(map(np.log10, function_args)) values = np.power(10, rng.uniform(*log10_args, size=n_values)) elif function == "discrete": values = rng.choice(function_args, size=n_values) elif function == "list": values = np.array(function_args) else: raise ValueError(f"{function} function not supported for sampling.") return values def get_dummy_seeds(n_channels: int = 512, live_time: float = 600.0, count_rate: float = 1000.0, normalize: bool = True, rng: numpy.random._generator.Generator = Generator(PCG64)) ‑> SampleSet-
Get a random, dummy
SampleSetof ideal seeds.WARNING: the spectra returned by this function each contain one gaussian peak that does not overlap with the peaks of other spectra. Such data is about as ideal as one could hope to be working with and does not represent anything real. Therefore, do not use this data for any purpose other than testing, debugging, or examples where code, not results, is being demonstrated. Any use in scientific studies does not make sense.
Args
n_channels- number of channels in the spectra DataFrame
live_time- collection time on which to base seeds (higher creates a less noisy shape)
count_rate- count rate on which to base seeds (higher creates a less noisy shape)
normalize- whether to apply an L1-norm to the spectra
rng- NumPy random number generator, useful for experiment repeatability
Returns
SampleSetwith randomly generated spectraExpand source code Browse git
def get_dummy_seeds(n_channels: int = 512, live_time: float = 600.0, count_rate: float = 1000.0, normalize: bool = True, rng: Generator = np.random.default_rng()) -> SampleSet: """Get a random, dummy `SampleSet` of ideal seeds. WARNING: the spectra returned by this function each contain one gaussian peak that does not overlap with the peaks of other spectra. Such data is about as *ideal* as one could hope to be working with and does not represent anything real. Therefore, **do not** use this data for any purpose other than testing, debugging, or examples where code, not results, is being demonstrated. Any use in scientific studies does not make sense. Args: n_channels: number of channels in the spectra DataFrame live_time: collection time on which to base seeds (higher creates a less noisy shape) count_rate: count rate on which to base seeds (higher creates a less noisy shape) normalize: whether to apply an L1-norm to the spectra rng: NumPy random number generator, useful for experiment repeatability Returns: `SampleSet` with randomly generated spectra """ ss = SampleSet() ss.measured_or_synthetic = "synthetic" ss.spectra_state = SpectraState.Counts ss.spectra_type = SpectraType.BackgroundForeground ss.synthesis_info = { "subtract_background": True, } sources = [ ("Industrial", "Am241", "Unshielded Am241"), ("Industrial", "Ba133", "Unshielded Ba133"), ("NORM", "K40", "PotassiumInSoil"), ("NORM", "K40", "Moderately Shielded K40"), ("NORM", "Ra226", "UraniumInSoil"), ("NORM", "Th232", "ThoriumInSoil"), ("SNM", "U238", "Unshielded U238"), ("SNM", "Pu239", "Unshielded Pu239"), ("SNM", "Pu239", "Moderately Shielded Pu239"), ("SNM", "Pu239", "Heavily Shielded Pu239"), ] n_sources = len(sources) n_fg_sources = n_sources sources_cols = pd.MultiIndex.from_tuples( sources, names=SampleSet.SOURCES_MULTI_INDEX_NAMES ) sources_data = np.identity(n_sources) ss.sources = pd.DataFrame(data=sources_data, columns=sources_cols) histograms = [] N_FG_COUNTS = int(count_rate * live_time) fg_std = np.sqrt(n_channels / n_sources) channels_per_sources = n_channels / n_fg_sources for i in range(n_fg_sources): mu = i * channels_per_sources + channels_per_sources / 2 counts = rng.normal(mu, fg_std, size=N_FG_COUNTS) fg_histogram, _ = np.histogram(counts, bins=n_channels, range=(0, n_channels)) histograms.append(fg_histogram) histograms = np.array(histograms) ss.spectra = pd.DataFrame(data=histograms) ss.info.total_counts = ss.spectra.sum(axis=1) ss.info.live_time = live_time ss.info.real_time = live_time ss.info.snr = None ss.info.ecal_order_0 = 0 ss.info.ecal_order_1 = 3000 ss.info.ecal_order_2 = 100 ss.info.ecal_order_3 = 0 ss.info.ecal_low_e = 0 ss.info.description = "" ss.update_timestamp() if normalize: ss.normalize() return ss
Classes
class Synthesizer (bg_cps: float = 300.0, long_bg_live_time: float = 120.0, apply_poisson_noise: bool = True, normalize_sources: bool = True, return_fg: bool = True, return_gross: bool = False, rng: numpy.random._generator.Generator = Generator(PCG64))-
Base class for synthesizers.
Args
bg_cps- constant rate of gammas from background
long_bg_live_time- live time on which to base background subtractions
apply_poisson_noise- whether to apply Poisson noise to spectra
normalize_sources- whether to normalize ground truth proportions to sum to 1
return_fg- whether to compute and return background subtracted spectra
return_gross- whether to return gross spectra (always computed)
rng- NumPy random number generator, useful for experiment repeatability
Expand source code Browse git
class Synthesizer(): """Base class for synthesizers.""" SYNTHETIC_STR = "synthetic" SUPPORTED_SAMPLING_FUNCTIONS = ["uniform", "log10", "discrete", "list"] def __init__(self, bg_cps: float = 300.0, long_bg_live_time: float = 120.0, apply_poisson_noise: bool = True, normalize_sources: bool = True, return_fg: bool = True, return_gross: bool = False, rng: Generator = np.random.default_rng()): """ Args: bg_cps: constant rate of gammas from background long_bg_live_time: live time on which to base background subtractions apply_poisson_noise: whether to apply Poisson noise to spectra normalize_sources: whether to normalize ground truth proportions to sum to 1 return_fg: whether to compute and return background subtracted spectra return_gross: whether to return gross spectra (always computed) rng: NumPy random number generator, useful for experiment repeatability """ self.bg_cps = bg_cps self.long_bg_live_time = long_bg_live_time self.apply_poisson_noise = apply_poisson_noise self.normalize_sources = normalize_sources self.return_fg = return_fg self.return_gross = return_gross self._rng = rng self._synthesis_start_dt = None self._n_samples_synthesized = 0 def __str__(self): output = "SynthesizerConfig" for k, v in sorted(vars(self).items()): output += " {}: {}".format(k, str(v)) return output def _reset_progress(self): self._n_samples_synthesized = 0 self._synthesis_start_dt = _get_utc_timestamp() def _report_progress(self, n_samples_expected, batch_name): percent_complete = 100 * self._n_samples_synthesized / n_samples_expected msg = ( f"Synthesizing ... {percent_complete:.0f}% " f"(currently on {batch_name}" ) MAX_MSG_LEN = 80 msg = (msg[:MAX_MSG_LEN] + "...") if len(msg) > MAX_MSG_LEN else msg msg += ")" print("\033[K" + msg, end="\r") def _report_completion(self, delay): summary = ( f"Synthesis complete!\n" f"Generated {self._n_samples_synthesized} samples in ~{delay:.2f}s " f"(~{(self._n_samples_synthesized / delay):.2f} samples/sec)." ) print("\033[K" + summary) def _verify_n_samples_synthesized(self, actual: int, expected: int): assert expected == actual, ( f"{actual} generated, but {expected} were expected. " "Be sure to remove any columns from your seeds' sources DataFrame that " "contain all zeroes.") def _get_batch(self, fg_seed, fg_sources, bg_seed, bg_sources, lt_targets, snr_targets): if not (self.return_fg or self.return_gross): raise ValueError("Computing to return nothing.") bg_counts_expected = lt_targets * self.bg_cps fg_counts_expected = snr_targets * np.sqrt(bg_counts_expected) fg_spectra = get_expected_spectra(fg_seed.values, fg_counts_expected) bg_spectra = get_expected_spectra(bg_seed.values, bg_counts_expected) long_bg_counts_expected = self.long_bg_live_time * self.bg_cps long_bg_spectrum_expected = bg_seed.values * long_bg_counts_expected gross_spectra = None long_bg_spectra = None fg_counts = 0 bg_counts = 0 long_bg_counts = 0 fg_ss = None gross_ss = None # Spectra if self.apply_poisson_noise: gross_spectra = self._rng.poisson(fg_spectra + bg_spectra) if self.return_fg: long_bg_spectrum = self._rng.poisson(long_bg_spectrum_expected) long_bg_seed = long_bg_spectrum / long_bg_spectrum.sum() long_bg_spectra = get_expected_spectra(long_bg_seed, bg_counts_expected) fg_spectra = gross_spectra - long_bg_spectra else: gross_spectra = fg_spectra + bg_spectra if self.return_fg: long_bg_spectra = bg_spectra fg_spectra = gross_spectra - long_bg_spectra # Counts fg_counts = fg_spectra.sum(axis=1, dtype=float) if self.return_fg: long_bg_counts = long_bg_spectra.sum(axis=1, dtype=float) if self.return_gross: bg_counts = bg_spectra.sum(axis=1, dtype=float) # Sample sets if self.return_fg: snrs = fg_counts / np.sqrt(long_bg_counts.clip(1)) fg_ss = get_fg_sample_set(fg_spectra, fg_sources, lt_targets, snrs=snrs, total_counts=fg_counts) self._n_samples_synthesized += fg_ss.n_samples if self.return_gross: tiled_fg_sources = _tile_sources_and_scale( fg_sources, gross_spectra.shape[0], fg_counts, ) tiled_bg_sources = _tile_sources_and_scale( bg_sources, gross_spectra.shape[0], bg_counts, ) gross_sources = get_merged_sources_samplewise(tiled_fg_sources, tiled_bg_sources) gross_counts = gross_spectra.sum(axis=1) snrs = fg_counts / np.sqrt(bg_counts.clip(1)) gross_ss = get_gross_sample_set(gross_spectra, gross_sources, lt_targets, snrs, gross_counts) self._n_samples_synthesized += gross_ss.n_samples return fg_ss, gross_ssSubclasses
Class variables
var SUPPORTED_SAMPLING_FUNCTIONSvar SYNTHETIC_STR