Module `riid.data.synthetic`

This module contains utilities for synthesizing gamma spectra.

Expand source code Browse git

# Copyright 2021 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
# Under the terms of Contract DE-NA0003525 with NTESS,
# the U.S. Government retains certain rights in this software.
"""This module contains utilities for synthesizing gamma spectra."""
# The following imports are left to not break previous imports; remove in v3
from riid.data.synthetic.base import Synthesizer, get_distribution_values
from riid.data.synthetic.seed import get_dummy_seeds

__all__ = ["get_dummy_seeds", "Synthesizer", "get_distribution_values"]

Sub-modules

riid.data.synthetic.base: This module contains utilities for synthesizing gamma spectra.
riid.data.synthetic.passby: This module contains utilities for synthesizing gamma spectra based on a detector moving past a source.
riid.data.synthetic.seed: This module contains utilities for generating synthetic gamma spectrum templates from GADRAS.
riid.data.synthetic.static: This module contains utilities for synthesizing gamma spectra based on a source moving past a detector.

Functions

def get_distribution_values(function: str, function_args: Any, n_values: int, rng: numpy.random._generator.Generator = Generator(PCG64))

Randomly sample a list of values based one of many distributions.

Args

function: name of the distribution function
function_args: argument or collection of arguments to be passed to the function, if any.
n_values: size of the distribution
rng: NumPy random number generator, useful for experiment repeatability

Returns

Value or collection of sampled values

Raises

ValueError when an unsupported function type is provided

Expand source code Browse git

def get_distribution_values(function: str, function_args: Any, n_values: int,
                            rng: Generator = np.random.default_rng()):
    """Randomly sample a list of values based one of many distributions.

    Args:
        function: name of the distribution function
        function_args: argument or collection of arguments to be
            passed to the function, if any.
        n_values: size of the distribution
        rng: NumPy random number generator, useful for experiment repeatability

    Returns:
        Value or collection of sampled values

    Raises:
        `ValueError` when an unsupported function type is provided
    """
    values = None
    if function == "uniform":
        values = rng.uniform(*function_args, size=n_values)
    elif function == "log10":
        log10_args = tuple(map(np.log10, function_args))
        values = np.power(10, rng.uniform(*log10_args, size=n_values))
    elif function == "discrete":
        values = rng.choice(function_args, size=n_values)
    elif function == "list":
        values = np.array(function_args)
    else:
        raise ValueError(f"{function} function not supported for sampling.")

    return values

def get_dummy_seeds(n_channels: int = 512, live_time: float = 600.0, count_rate: float = 1000.0, normalize: bool = True, rng: numpy.random._generator.Generator = Generator(PCG64)) ‑> SampleSet

Get a random, dummy SampleSet of ideal seeds.

WARNING: the spectra returned by this function each contain one gaussian peak that does not overlap with the peaks of other spectra. Such data is about as ideal as one could hope to be working with and does not represent anything real. Therefore, do not use this data for any purpose other than testing, debugging, or examples where code, not results, is being demonstrated. Any use in scientific studies does not make sense.

Args

n_channels: number of channels in the spectra DataFrame
live_time: collection time on which to base seeds (higher creates a less noisy shape)
count_rate: count rate on which to base seeds (higher creates a less noisy shape)
normalize: whether to apply an L1-norm to the spectra
rng: NumPy random number generator, useful for experiment repeatability

Returns

SampleSet with randomly generated spectra

Expand source code Browse git

def get_dummy_seeds(n_channels: int = 512, live_time: float = 600.0,
                    count_rate: float = 1000.0, normalize: bool = True,
                    rng: Generator = np.random.default_rng()) -> SampleSet:
    """Get a random, dummy `SampleSet` of ideal seeds.

    WARNING: the spectra returned by this function each contain one gaussian peak that does
    not overlap with the peaks of other spectra.  Such data is about as *ideal* as one
    could hope to be working with and does not represent anything real.
    Therefore, **do not** use this data for any purpose other than testing, debugging, or
    examples where code, not results, is being demonstrated. Any use in scientific studies
    does not make sense.

    Args:
        n_channels: number of channels in the spectra DataFrame
        live_time: collection time on which to base seeds
            (higher creates a less noisy shape)
        count_rate: count rate on which to base seeds
            (higher creates a less noisy shape)
        normalize: whether to apply an L1-norm to the spectra
        rng: NumPy random number generator, useful for experiment repeatability

    Returns:
        `SampleSet` with randomly generated spectra
    """
    ss = SampleSet()
    ss.measured_or_synthetic = "synthetic"
    ss.spectra_state = SpectraState.Counts
    ss.spectra_type = SpectraType.BackgroundForeground
    ss.synthesis_info = {
        "subtract_background": True,
    }
    sources = [
        ("Industrial",  "Am241",    "Unshielded Am241"),
        ("Industrial",  "Ba133",    "Unshielded Ba133"),
        ("NORM",        "K40",      "PotassiumInSoil"),
        ("NORM",        "K40",      "Moderately Shielded K40"),
        ("NORM",        "Ra226",    "UraniumInSoil"),
        ("NORM",        "Th232",    "ThoriumInSoil"),
        ("SNM",         "U238",     "Unshielded U238"),
        ("SNM",         "Pu239",    "Unshielded Pu239"),
        ("SNM",         "Pu239",    "Moderately Shielded Pu239"),
        ("SNM",         "Pu239",    "Heavily Shielded Pu239"),
    ]
    n_sources = len(sources)
    n_fg_sources = n_sources
    sources_cols = pd.MultiIndex.from_tuples(
        sources,
        names=SampleSet.SOURCES_MULTI_INDEX_NAMES
    )
    sources_data = np.identity(n_sources)
    ss.sources = pd.DataFrame(data=sources_data, columns=sources_cols)

    histograms = []
    N_FG_COUNTS = int(count_rate * live_time)
    fg_std = np.sqrt(n_channels / n_sources)
    channels_per_sources = n_channels / n_fg_sources
    for i in range(n_fg_sources):
        mu = i * channels_per_sources + channels_per_sources / 2
        counts = rng.normal(mu, fg_std, size=N_FG_COUNTS)
        fg_histogram, _ = np.histogram(counts, bins=n_channels, range=(0, n_channels))
        histograms.append(fg_histogram)
    histograms = np.array(histograms)

    ss.spectra = pd.DataFrame(data=histograms)

    ss.info.total_counts = ss.spectra.sum(axis=1)
    ss.info.live_time = live_time
    ss.info.real_time = live_time
    ss.info.snr = None
    ss.info.ecal_order_0 = 0
    ss.info.ecal_order_1 = 3000
    ss.info.ecal_order_2 = 100
    ss.info.ecal_order_3 = 0
    ss.info.ecal_low_e = 0
    ss.info.description = ""
    ss.update_timestamp()

    if normalize:
        ss.normalize()

    return ss

Classes

class Synthesizer (bg_cps: float = 300.0, long_bg_live_time: float = 120.0, apply_poisson_noise: bool = True, normalize_sources: bool = True, return_fg: bool = True, return_gross: bool = False, rng: numpy.random._generator.Generator = Generator(PCG64))

Base class for synthesizers.

Args

bg_cps: constant rate of gammas from background
long_bg_live_time: live time on which to base background subtractions
apply_poisson_noise: whether to apply Poisson noise to spectra
normalize_sources: whether to normalize ground truth proportions to sum to 1
return_fg: whether to compute and return background subtracted spectra
return_gross: whether to return gross spectra (always computed)
rng: NumPy random number generator, useful for experiment repeatability

Expand source code Browse git

class Synthesizer():
    """Base class for synthesizers."""

    SYNTHETIC_STR = "synthetic"
    SUPPORTED_SAMPLING_FUNCTIONS = ["uniform", "log10", "discrete", "list"]

    def __init__(self, bg_cps: float = 300.0, long_bg_live_time: float = 120.0,
                 apply_poisson_noise: bool = True,
                 normalize_sources: bool = True,
                 return_fg: bool = True,
                 return_gross: bool = False,
                 rng: Generator = np.random.default_rng()):
        """
        Args:
            bg_cps: constant rate of gammas from background
            long_bg_live_time: live time on which to base background subtractions
            apply_poisson_noise: whether to apply Poisson noise to spectra
            normalize_sources: whether to normalize ground truth proportions to sum to 1
            return_fg: whether to compute and return background subtracted spectra
            return_gross: whether to return gross spectra (always computed)
            rng: NumPy random number generator, useful for experiment repeatability
        """
        self.bg_cps = bg_cps
        self.long_bg_live_time = long_bg_live_time
        self.apply_poisson_noise = apply_poisson_noise
        self.normalize_sources = normalize_sources
        self.return_fg = return_fg
        self.return_gross = return_gross
        self._rng = rng
        self._synthesis_start_dt = None
        self._n_samples_synthesized = 0

    def __str__(self):
        output = "SynthesizerConfig"
        for k, v in sorted(vars(self).items()):
            output += "  {}: {}".format(k, str(v))
        return output

    def _reset_progress(self):
        self._n_samples_synthesized = 0
        self._synthesis_start_dt = _get_utc_timestamp()

    def _report_progress(self, n_samples_expected, batch_name):
        percent_complete = 100 * self._n_samples_synthesized / n_samples_expected
        msg = (
            f"Synthesizing ... {percent_complete:.0f}% "
            f"(currently on {batch_name}"
        )
        MAX_MSG_LEN = 80
        msg = (msg[:MAX_MSG_LEN] + "...") if len(msg) > MAX_MSG_LEN else msg
        msg += ")"
        print("\033[K" + msg, end="\r")

    def _report_completion(self, delay):
        summary = (
            f"Synthesis complete!\n"
            f"Generated {self._n_samples_synthesized} samples in ~{delay:.2f}s "
            f"(~{(self._n_samples_synthesized / delay):.2f} samples/sec)."
        )
        print("\033[K" + summary)

    def _verify_n_samples_synthesized(self, actual: int, expected: int):
        assert expected == actual, (
            f"{actual} generated, but {expected} were expected. "
            "Be sure to remove any columns from your seeds' sources DataFrame that "
            "contain all zeroes.")

    def _get_batch(self, fg_seed, fg_sources, bg_seed, bg_sources, lt_targets, snr_targets):
        if not (self.return_fg or self.return_gross):
            raise ValueError("Computing to return nothing.")

        bg_counts_expected = lt_targets * self.bg_cps
        fg_counts_expected = snr_targets * np.sqrt(bg_counts_expected)

        fg_spectra = get_expected_spectra(fg_seed.values, fg_counts_expected)
        bg_spectra = get_expected_spectra(bg_seed.values, bg_counts_expected)

        long_bg_counts_expected = self.long_bg_live_time * self.bg_cps
        long_bg_spectrum_expected = bg_seed.values * long_bg_counts_expected

        gross_spectra = None
        long_bg_spectra = None
        fg_counts = 0
        bg_counts = 0
        long_bg_counts = 0
        fg_ss = None
        gross_ss = None

        # Spectra
        if self.apply_poisson_noise:
            gross_spectra = self._rng.poisson(fg_spectra + bg_spectra)
            if self.return_fg:
                long_bg_spectrum = self._rng.poisson(long_bg_spectrum_expected)
                long_bg_seed = long_bg_spectrum / long_bg_spectrum.sum()
                long_bg_spectra = get_expected_spectra(long_bg_seed, bg_counts_expected)
                fg_spectra = gross_spectra - long_bg_spectra
        else:
            gross_spectra = fg_spectra + bg_spectra
            if self.return_fg:
                long_bg_spectra = bg_spectra
                fg_spectra = gross_spectra - long_bg_spectra

        # Counts
        fg_counts = fg_spectra.sum(axis=1, dtype=float)
        if self.return_fg:
            long_bg_counts = long_bg_spectra.sum(axis=1, dtype=float)
        if self.return_gross:
            bg_counts = bg_spectra.sum(axis=1, dtype=float)

        # Sample sets
        if self.return_fg:
            snrs = fg_counts / np.sqrt(long_bg_counts.clip(1))
            fg_ss = get_fg_sample_set(fg_spectra, fg_sources, lt_targets,
                                      snrs=snrs, total_counts=fg_counts)
            self._n_samples_synthesized += fg_ss.n_samples
        if self.return_gross:
            tiled_fg_sources = _tile_sources_and_scale(
                fg_sources,
                gross_spectra.shape[0],
                fg_counts,
            )
            tiled_bg_sources = _tile_sources_and_scale(
                bg_sources,
                gross_spectra.shape[0],
                bg_counts,
            )
            gross_sources = get_merged_sources_samplewise(tiled_fg_sources, tiled_bg_sources)
            gross_counts = gross_spectra.sum(axis=1)
            snrs = fg_counts / np.sqrt(bg_counts.clip(1))
            gross_ss = get_gross_sample_set(gross_spectra, gross_sources,
                                            lt_targets, snrs, gross_counts)
            self._n_samples_synthesized += gross_ss.n_samples

        return fg_ss, gross_ss

Subclasses

Class variables

var SUPPORTED_SAMPLING_FUNCTIONS
var SYNTHETIC_STR