Module riid.models
This module contains PyRIID models.
Expand source code Browse git
# Copyright 2021 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
# Under the terms of Contract DE-NA0003525 with NTESS,
# the U.S. Government retains certain rights in this software.
"""This module contains PyRIID models."""
from riid.models.bayes import PoissonBayesClassifier
from riid.models.neural_nets import LabelProportionEstimator, MLPClassifier
from riid.models.neural_nets.arad import ARADLatentPredictor, ARADv1, ARADv2
__all__ = ["PoissonBayesClassifier", "LabelProportionEstimator", "MLPClassifier",
"ARADLatentPredictor", "ARADv1", "ARADv2"]
Sub-modules
riid.models.base-
This module contains functionality shared across all PyRIID models.
riid.models.bayes-
This module contains the Poisson-Bayes classifier.
riid.models.layers-
This module contains custom Keras layers.
riid.models.neural_nets-
This module contains neural network-based classifiers and regressors.
Classes
class ARADLatentPredictor (hidden_layers: tuple = (8, 4), hidden_activation: str = 'relu', final_activation: str = 'linear', loss: str = 'mse', optimizer='adam', optimizer_kwargs=None, learning_rate: float = 0.001, metrics=None, kernel_l1_regularization: float = 0.0, kernel_l2_regularization: float = 0.0, bias_l1_regularization: float = 0.0, bias_l2_regularization: float = 0.0, activity_l1_regularization: float = 0.0, activity_l2_regularization: float = 0.0, dropout: float = 0.0, **base_kwargs)-
PyRIID-compatible model for branching from the latent space of a pre-trained ARAD model for a separate, arbitrary prediction task.
Args
hidden_layers- tuple defining the number and size of dense layers
hidden_activation- activation function to use for each dense layer
final_activation- activation function to use for final layer
loss- loss function to use for training
optimizer- tensorflow optimizer or optimizer name to use for training
optimizer_kwargs- kwargs for optimizer
learning_rate- optional learning rate for the optimizer
metrics- list of metrics to be evaluating during training
kernel_l1_regularization- l1 regularization value for the kernel regularizer
kernel_l2_regularization- l2 regularization value for the kernel regularizer
bias_l1_regularization- l1 regularization value for the bias regularizer
bias_l2_regularization- l2 regularization value for the bias regularizer
activity_l1_regularization- l1 regularization value for the activity regularizer
activity_l2_regularization- l2 regularization value for the activity regularizer
dropout- amount of dropout to apply to each dense layer
Expand source code Browse git
class ARADLatentPredictor(PyRIIDModel): """PyRIID-compatible model for branching from the latent space of a pre-trained ARAD model for a separate, arbitrary prediction task. """ def __init__(self, hidden_layers: tuple = (8, 4,), hidden_activation: str = "relu", final_activation: str = "linear", loss: str = "mse", optimizer="adam", optimizer_kwargs=None, learning_rate: float = 1e-3, metrics=None, kernel_l1_regularization: float = 0.0, kernel_l2_regularization: float = 0.0, bias_l1_regularization: float = 0.0, bias_l2_regularization: float = 0.0, activity_l1_regularization: float = 0.0, activity_l2_regularization: float = 0.0, dropout: float = 0.0, **base_kwargs): """ Args: hidden_layers: tuple defining the number and size of dense layers hidden_activation: activation function to use for each dense layer final_activation: activation function to use for final layer loss: loss function to use for training optimizer: tensorflow optimizer or optimizer name to use for training optimizer_kwargs: kwargs for optimizer learning_rate: optional learning rate for the optimizer metrics: list of metrics to be evaluating during training kernel_l1_regularization: l1 regularization value for the kernel regularizer kernel_l2_regularization: l2 regularization value for the kernel regularizer bias_l1_regularization: l1 regularization value for the bias regularizer bias_l2_regularization: l2 regularization value for the bias regularizer activity_l1_regularization: l1 regularization value for the activity regularizer activity_l2_regularization: l2 regularization value for the activity regularizer dropout: amount of dropout to apply to each dense layer """ super().__init__(**base_kwargs) self.hidden_layers = hidden_layers self.hidden_activation = hidden_activation self.final_activation = final_activation self.loss = loss self.optimizer = optimizer if isinstance(optimizer, str): self.optimizer = keras.optimizers.get(optimizer) if optimizer_kwargs is not None: for key, value in optimizer_kwargs.items(): setattr(self.optimizer, key, value) self.optimizer.learning_rate = learning_rate self.metrics = metrics if self.metrics is None: self.metrics = [MeanSquaredError()] self.kernel_l1_regularization = kernel_l1_regularization self.kernel_l2_regularization = kernel_l2_regularization self.bias_l1_regularization = bias_l1_regularization self.bias_l2_regularization = bias_l2_regularization self.activity_l1_regularization = activity_l1_regularization self.activity_l2_regularization = activity_l2_regularization self.dropout = dropout self.model = None self.encoder = None def _initialize_model(self, arad: Model, output_size: int): """Build Keras MLP model. """ encoder: Model = arad.get_layer("encoder") encoder_input = encoder.input encoder_output = encoder.output encoder_output_shape = encoder_output.shape[-1] predictor_input = Input(shape=(encoder_output_shape,), name="inner_predictor_input") x = predictor_input for layer, nodes in enumerate(self.hidden_layers): x = Dense( nodes, activation=self.hidden_activation, kernel_regularizer=L1L2( l1=self.kernel_l1_regularization, l2=self.kernel_l2_regularization ), bias_regularizer=L1L2( l1=self.bias_l1_regularization, l2=self.bias_l2_regularization ), activity_regularizer=L1L2( l1=self.activity_l1_regularization, l2=self.activity_l2_regularization ), name=f"inner_predictor_dense_{layer}" )(x) if self.dropout > 0: x = Dropout(self.dropout)(x) predictor_output = Dense( output_size, activation=self.final_activation, name="inner_predictor_output" )(x) inner_predictor = Model(predictor_input, predictor_output, name="inner_predictor") encoded_spectrum = encoder(encoder_input) predictions = inner_predictor(encoded_spectrum) self.model = Model(encoder_input, predictions, name="predictor") # Freeze the layers corresponding to the autoencoder # Note: setting trainable to False is recursive to sub-layers per TF docs: # https://www.tensorflow.org/guide/keras/transfer_learning#recursive_setting_of_the_trainable_attribute for layer in self.model.layers[:-1]: layer.trainable = False def _check_targets(self, target_info_columns, target_level): """Check that valid target options are provided.""" if target_info_columns and target_level: raise ValueError(( "You have specified both target_info_columns (regression task) and " "a target_level (classification task), but only one can be set." )) if not target_info_columns and not target_level: raise ValueError(( "You must specify either target_info_columns (regression task) or " "a target_level (classification task)." )) def fit(self, arad: Model, ss: SampleSet, target_info_columns: List[str] = None, target_level: str = None, batch_size: int = 10, epochs: int = 20, validation_split: float = 0.2, callbacks=None, patience: int = 15, es_monitor: str = "val_loss", es_mode: str = "min", es_verbose=0, es_min_delta: float = 0.0, verbose: bool = False): """Fit a model to the given SampleSet(s). Args: arad: a pretrained ARAD model (a TensorFlow Model object, not a PyRIIDModel wrapper) ss: `SampleSet` of `n` spectra where `n` >= 1 target_info_columns: list of columns names from SampleSet info dataframe which denote what values the model should target target_level: `SampleSet.sources` column level to target for classification batch_size: number of samples per gradient update epochs: maximum number of training iterations validation_split: proportion of training data to use as validation data callbacks: list of callbacks to be passed to TensorFlow Model.fit() method patience: number of epochs to wait for tf.keras.callbacks.EarlyStopping object es_monitor: quantity to be monitored for tf.keras.callbacks.EarlyStopping object es_mode: mode for tf.keras.callbacks.EarlyStopping object es_verbose: verbosity level for tf.keras.callbacks.EarlyStopping object es_min_delta: minimum change to count as an improvement for early stopping verbose: whether model training output is printed to the terminal """ self._check_targets(target_info_columns, target_level) x_train = ss.get_samples().astype(float) if target_info_columns: y_train = ss.info[target_info_columns].values.astype(float) else: source_contributions_df = ss.sources.groupby( axis=1, level=target_level, sort=False ).sum() y_train = source_contributions_df.values.astype(float) if not self.model: self._initialize_model(arad=arad, output_size=y_train.shape[1]) self.model.compile( loss=self.loss, optimizer=self.optimizer, metrics=self.metrics ) es = EarlyStopping( monitor=es_monitor, patience=patience, verbose=es_verbose, restore_best_weights=True, mode=es_mode, min_delta=es_min_delta ) if callbacks: callbacks.append(es) else: callbacks = [es] history = self.model.fit( x_train, y_train, epochs=epochs, verbose=verbose, validation_split=validation_split, callbacks=callbacks, shuffle=True, batch_size=batch_size ) self._update_info( normalization=ss.spectra_state, target_level=target_level, model_outputs=target_info_columns, ) if target_level: self._update_info( model_outputs=source_contributions_df.columns.values.tolist(), ) return history def predict(self, ss: SampleSet, verbose=False): spectra = ss.get_samples().astype(float) predictions = self.model.predict(spectra, verbose=verbose) if self.target_level: col_level_idx = SampleSet.SOURCES_MULTI_INDEX_NAMES.index(self.target_level) col_level_subset = SampleSet.SOURCES_MULTI_INDEX_NAMES[:col_level_idx+1] ss.prediction_probas = pd.DataFrame( data=predictions, columns=pd.MultiIndex.from_tuples( self.get_model_outputs_as_label_tuples(), names=col_level_subset ) ) ss.classified_by = self.model_id return predictionsAncestors
Methods
def fit(self, arad: keras.src.models.model.Model, ss: SampleSet, target_info_columns: List[str] = None, target_level: str = None, batch_size: int = 10, epochs: int = 20, validation_split: float = 0.2, callbacks=None, patience: int = 15, es_monitor: str = 'val_loss', es_mode: str = 'min', es_verbose=0, es_min_delta: float = 0.0, verbose: bool = False)-
Fit a model to the given SampleSet(s).
Args
arad- a pretrained ARAD model (a TensorFlow Model object, not a PyRIIDModel wrapper)
ssSampleSetofnspectra wheren>= 1target_info_columns- list of columns names from SampleSet info dataframe which denote what values the model should target
target_levelSampleSet.sourcescolumn level to target for classificationbatch_size- number of samples per gradient update
epochs- maximum number of training iterations
validation_split- proportion of training data to use as validation data
callbacks- list of callbacks to be passed to TensorFlow Model.fit() method
patience- number of epochs to wait for tf.keras.callbacks.EarlyStopping object
es_monitor- quantity to be monitored for tf.keras.callbacks.EarlyStopping object
es_mode- mode for tf.keras.callbacks.EarlyStopping object
es_verbose- verbosity level for tf.keras.callbacks.EarlyStopping object
es_min_delta- minimum change to count as an improvement for early stopping
verbose- whether model training output is printed to the terminal
Expand source code Browse git
def fit(self, arad: Model, ss: SampleSet, target_info_columns: List[str] = None, target_level: str = None, batch_size: int = 10, epochs: int = 20, validation_split: float = 0.2, callbacks=None, patience: int = 15, es_monitor: str = "val_loss", es_mode: str = "min", es_verbose=0, es_min_delta: float = 0.0, verbose: bool = False): """Fit a model to the given SampleSet(s). Args: arad: a pretrained ARAD model (a TensorFlow Model object, not a PyRIIDModel wrapper) ss: `SampleSet` of `n` spectra where `n` >= 1 target_info_columns: list of columns names from SampleSet info dataframe which denote what values the model should target target_level: `SampleSet.sources` column level to target for classification batch_size: number of samples per gradient update epochs: maximum number of training iterations validation_split: proportion of training data to use as validation data callbacks: list of callbacks to be passed to TensorFlow Model.fit() method patience: number of epochs to wait for tf.keras.callbacks.EarlyStopping object es_monitor: quantity to be monitored for tf.keras.callbacks.EarlyStopping object es_mode: mode for tf.keras.callbacks.EarlyStopping object es_verbose: verbosity level for tf.keras.callbacks.EarlyStopping object es_min_delta: minimum change to count as an improvement for early stopping verbose: whether model training output is printed to the terminal """ self._check_targets(target_info_columns, target_level) x_train = ss.get_samples().astype(float) if target_info_columns: y_train = ss.info[target_info_columns].values.astype(float) else: source_contributions_df = ss.sources.groupby( axis=1, level=target_level, sort=False ).sum() y_train = source_contributions_df.values.astype(float) if not self.model: self._initialize_model(arad=arad, output_size=y_train.shape[1]) self.model.compile( loss=self.loss, optimizer=self.optimizer, metrics=self.metrics ) es = EarlyStopping( monitor=es_monitor, patience=patience, verbose=es_verbose, restore_best_weights=True, mode=es_mode, min_delta=es_min_delta ) if callbacks: callbacks.append(es) else: callbacks = [es] history = self.model.fit( x_train, y_train, epochs=epochs, verbose=verbose, validation_split=validation_split, callbacks=callbacks, shuffle=True, batch_size=batch_size ) self._update_info( normalization=ss.spectra_state, target_level=target_level, model_outputs=target_info_columns, ) if target_level: self._update_info( model_outputs=source_contributions_df.columns.values.tolist(), ) return history def predict(self, ss: SampleSet, verbose=False)-
Expand source code Browse git
def predict(self, ss: SampleSet, verbose=False): spectra = ss.get_samples().astype(float) predictions = self.model.predict(spectra, verbose=verbose) if self.target_level: col_level_idx = SampleSet.SOURCES_MULTI_INDEX_NAMES.index(self.target_level) col_level_subset = SampleSet.SOURCES_MULTI_INDEX_NAMES[:col_level_idx+1] ss.prediction_probas = pd.DataFrame( data=predictions, columns=pd.MultiIndex.from_tuples( self.get_model_outputs_as_label_tuples(), names=col_level_subset ) ) ss.classified_by = self.model_id return predictions
Inherited members
class ARADv1 (model: ARADv1TF = None)-
PyRIID-compatible ARAD v1 model supporting
SampleSets.Args
model- a previously initialized TF implementation of ARADv1
Expand source code Browse git
class ARADv1(PyRIIDModel): """PyRIID-compatible ARAD v1 model supporting `SampleSet`s. """ def __init__(self, model: ARADv1TF = None): """ Args: model: a previously initialized TF implementation of ARADv1 """ super().__init__() self.model = model self._update_custom_objects("ARADv1TF", ARADv1TF) def fit(self, ss: SampleSet, epochs: int = 300, validation_split=0.2, es_verbose: int = 0, verbose: bool = False): """Fit a model to the given `SampleSet`. Args: ss: `SampleSet` of `n` spectra where `n` >= 1 epochs: maximum number of training epochs validation_split: percentage of the training data to use as validation data es_verbose: verbosity level for `tf.keras.callbacks.EarlyStopping` verbose: whether to show detailed model training output Returns: reconstructed_spectra: output of ARAD model """ _check_spectra(ss) x = ss.get_samples().astype(float) optimizer = Nadam(learning_rate=1e-4) if not self.model: self.model = ARADv1TF() self.model.compile(optimizer=optimizer) callbacks = [ EarlyStopping( monitor="val_loss", patience=5, verbose=es_verbose, restore_best_weights=True, mode="min", min_delta=1e-7 ), ReduceLROnPlateau( monitor="val_loss", factor=0.1, patience=3, min_delta=1e-8 ) ] history = self.model.fit( x=x, y=None, epochs=epochs, verbose=verbose, validation_split=validation_split, callbacks=callbacks, shuffle=True, batch_size=64 ) self._update_info( normalization=ss.spectra_state, ) return history def predict(self, ss: SampleSet, verbose=False): """Generate reconstructions for given `SampleSet`. Args: ss: `SampleSet` of `n` spectra where `n` >= 1 Returns: reconstructed_spectra: output of ARAD model """ _check_spectra(ss) x = ss.get_samples().astype(float) reconstructed_spectra = self.model.predict(x, verbose=verbose) reconstruction_errors = entropy(x, reconstructed_spectra, axis=1) ss.info["recon_error"] = reconstruction_errors return reconstructed_spectraAncestors
Methods
def fit(self, ss: SampleSet, epochs: int = 300, validation_split=0.2, es_verbose: int = 0, verbose: bool = False)-
Fit a model to the given
SampleSet.Args
ssSampleSetofnspectra wheren>= 1epochs- maximum number of training epochs
validation_split- percentage of the training data to use as validation data
es_verbose- verbosity level for
tf.keras.callbacks.EarlyStopping verbose- whether to show detailed model training output
Returns
reconstructed_spectra- output of ARAD model
Expand source code Browse git
def fit(self, ss: SampleSet, epochs: int = 300, validation_split=0.2, es_verbose: int = 0, verbose: bool = False): """Fit a model to the given `SampleSet`. Args: ss: `SampleSet` of `n` spectra where `n` >= 1 epochs: maximum number of training epochs validation_split: percentage of the training data to use as validation data es_verbose: verbosity level for `tf.keras.callbacks.EarlyStopping` verbose: whether to show detailed model training output Returns: reconstructed_spectra: output of ARAD model """ _check_spectra(ss) x = ss.get_samples().astype(float) optimizer = Nadam(learning_rate=1e-4) if not self.model: self.model = ARADv1TF() self.model.compile(optimizer=optimizer) callbacks = [ EarlyStopping( monitor="val_loss", patience=5, verbose=es_verbose, restore_best_weights=True, mode="min", min_delta=1e-7 ), ReduceLROnPlateau( monitor="val_loss", factor=0.1, patience=3, min_delta=1e-8 ) ] history = self.model.fit( x=x, y=None, epochs=epochs, verbose=verbose, validation_split=validation_split, callbacks=callbacks, shuffle=True, batch_size=64 ) self._update_info( normalization=ss.spectra_state, ) return history def predict(self, ss: SampleSet, verbose=False)-
Generate reconstructions for given
SampleSet.Args
ssSampleSetofnspectra wheren>= 1
Returns
reconstructed_spectra- output of ARAD model
Expand source code Browse git
def predict(self, ss: SampleSet, verbose=False): """Generate reconstructions for given `SampleSet`. Args: ss: `SampleSet` of `n` spectra where `n` >= 1 Returns: reconstructed_spectra: output of ARAD model """ _check_spectra(ss) x = ss.get_samples().astype(float) reconstructed_spectra = self.model.predict(x, verbose=verbose) reconstruction_errors = entropy(x, reconstructed_spectra, axis=1) ss.info["recon_error"] = reconstruction_errors return reconstructed_spectra
Inherited members
class ARADv2 (model: ARADv2TF = None)-
PyRIID-compatible ARAD v2 model supporting
SampleSets.Args
model- a previously initialized TF implementation of ARADv1
Expand source code Browse git
class ARADv2(PyRIIDModel): """PyRIID-compatible ARAD v2 model supporting `SampleSet`s. """ def __init__(self, model: ARADv2TF = None): """ Args: model: a previously initialized TF implementation of ARADv1 """ super().__init__() self.model = model self._update_custom_objects("ARADv2TF", ARADv2TF) def fit(self, ss: SampleSet, epochs: int = 300, validation_split=0.2, es_verbose: int = 0, verbose: bool = False): """Fit a model to the given `SampleSet`. Args: ss: `SampleSet` of `n` spectra where `n` >= 1 epochs: maximum number of training epochs validation_split: percentage of the training data to use as validation data es_verbose: verbosity level for `tf.keras.callbacks.EarlyStopping` verbose: whether to show detailed model training output Returns: reconstructed_spectra: output of ARAD model """ _check_spectra(ss) x = ss.get_samples().astype(float) optimizer = Adam(learning_rate=0.01, epsilon=0.05) if not self.model: self.model = ARADv2TF() self.model.compile(optimizer=optimizer) callbacks = [ EarlyStopping( monitor="val_loss", patience=6, verbose=es_verbose, restore_best_weights=True, mode="min", min_delta=1e-4 ), ReduceLROnPlateau( monitor="val_loss", factor=0.1, patience=3, min_delta=1e-4 ) ] history = self.model.fit( x=x, y=x, epochs=epochs, verbose=verbose, validation_split=validation_split, callbacks=callbacks, shuffle=True, batch_size=32 ) self._update_info( normalization=ss.spectra_state, ) return history def predict(self, ss: SampleSet, verbose=False): """Generate reconstructions for given `SampleSet`. Args: ss: `SampleSet` of `n` spectra where `n` >= 1 Returns: reconstructed_spectra: output of ARAD model """ _check_spectra(ss) x = ss.get_samples().astype(float) reconstructed_spectra = self.model.predict(x, verbose=verbose) reconstruction_errors = jensenshannon(x, reconstructed_spectra, axis=1) ss.info["recon_error"] = reconstruction_errors return reconstructed_spectraAncestors
Methods
def fit(self, ss: SampleSet, epochs: int = 300, validation_split=0.2, es_verbose: int = 0, verbose: bool = False)-
Fit a model to the given
SampleSet.Args
ssSampleSetofnspectra wheren>= 1epochs- maximum number of training epochs
validation_split- percentage of the training data to use as validation data
es_verbose- verbosity level for
tf.keras.callbacks.EarlyStopping verbose- whether to show detailed model training output
Returns
reconstructed_spectra- output of ARAD model
Expand source code Browse git
def fit(self, ss: SampleSet, epochs: int = 300, validation_split=0.2, es_verbose: int = 0, verbose: bool = False): """Fit a model to the given `SampleSet`. Args: ss: `SampleSet` of `n` spectra where `n` >= 1 epochs: maximum number of training epochs validation_split: percentage of the training data to use as validation data es_verbose: verbosity level for `tf.keras.callbacks.EarlyStopping` verbose: whether to show detailed model training output Returns: reconstructed_spectra: output of ARAD model """ _check_spectra(ss) x = ss.get_samples().astype(float) optimizer = Adam(learning_rate=0.01, epsilon=0.05) if not self.model: self.model = ARADv2TF() self.model.compile(optimizer=optimizer) callbacks = [ EarlyStopping( monitor="val_loss", patience=6, verbose=es_verbose, restore_best_weights=True, mode="min", min_delta=1e-4 ), ReduceLROnPlateau( monitor="val_loss", factor=0.1, patience=3, min_delta=1e-4 ) ] history = self.model.fit( x=x, y=x, epochs=epochs, verbose=verbose, validation_split=validation_split, callbacks=callbacks, shuffle=True, batch_size=32 ) self._update_info( normalization=ss.spectra_state, ) return history def predict(self, ss: SampleSet, verbose=False)-
Generate reconstructions for given
SampleSet.Args
ssSampleSetofnspectra wheren>= 1
Returns
reconstructed_spectra- output of ARAD model
Expand source code Browse git
def predict(self, ss: SampleSet, verbose=False): """Generate reconstructions for given `SampleSet`. Args: ss: `SampleSet` of `n` spectra where `n` >= 1 Returns: reconstructed_spectra: output of ARAD model """ _check_spectra(ss) x = ss.get_samples().astype(float) reconstructed_spectra = self.model.predict(x, verbose=verbose) reconstruction_errors = jensenshannon(x, reconstructed_spectra, axis=1) ss.info["recon_error"] = reconstruction_errors return reconstructed_spectra
Inherited members
class LabelProportionEstimator (hidden_layers: tuple = (256,), sup_loss='sparsemax', unsup_loss='sse', metrics: list = ['mae', 'categorical_crossentropy'], beta=0.9, source_dict=None, optimizer='adam', optimizer_kwargs=None, learning_rate: float = 0.001, hidden_layer_activation: str = 'mish', kernel_l1_regularization: float = 0.0, kernel_l2_regularization: float = 0.0, bias_l1_regularization: float = 0.0, bias_l2_regularization: float = 0.0, activity_l1_regularization: float = 0.0, activity_l2_regularization: float = 0.0, dropout: float = 0.0, ood_fp_rate: float = 0.05, fit_spline: bool = True, spline_bins: int = 15, spline_k: int = 3, spline_s: int = 0, spline_snrs=None, spline_recon_errors=None)-
Regressor for predicting label proportions that uses a semi-supervised loss.
Optionally, a U-spline-based out-of-distribution detection model can be fit to target a desired false positive rate.
Args
hidden_layers- tuple defining the number and size of dense layers
sup_loss- supervised loss function to use for training
unsup_loss- unsupervised loss function to use for training the foreground branch of the network (options: "sse", "poisson_nll", "normal_nll", "weighted_sse", "jsd", or "chi_squared")
metrics- list of metrics to be evaluating during training
beta- tradeoff parameter between the supervised and unsupervised foreground loss
source_dict- 2D array of pure, long-collect foreground spectra
optimizer- tensorflow optimizer or optimizer name to use for training
optimizer_kwargs- kwargs for optimizer
learning_rate- learning rate for the optimizer
hidden_layer_activation- activation function to use for each dense layer
kernel_l1_regularization- l1 regularization value for the kernel regularizer
kernel_l2_regularization- l2 regularization value for the kernel regularizer
bias_l1_regularization- l1 regularization value for the bias regularizer
bias_l2_regularization- l2 regularization value for the bias regularizer
activity_l1_regularization- l1 regularization value for the activity regularizer
activity_l2_regularization- l2 regularization value for the activity regularizer
dropout- amount of dropout to apply to each dense layer
ood_fp_rate- false positive rate used to determine threshold for out-of-distribution (OOD) detection
fit_spline- whether or not to fit UnivariateSpline for OOD threshold function
spline_bins- number of bins used when fitting the UnivariateSpline threshold function for OOD detection
spline_k- degree of smoothing for the UnivariateSpline
spline_s- positive smoothing factor used to choose the number of knots in the UnivariateSpline (s=0 forces the spline through all the datapoints, equivalent to InterpolatedUnivariateSpline)
spline_snrs- SNRs from training used as the x-values to fit the UnivariateSpline
spline_recon_errors- reconstruction errors from training used as the y-values to fit the UnivariateSpline
Expand source code Browse git
class LabelProportionEstimator(PyRIIDModel): """Regressor for predicting label proportions that uses a semi-supervised loss. Optionally, a U-spline-based out-of-distribution detection model can be fit to target a desired false positive rate. """ UNSUPERVISED_LOSS_FUNCS = { "poisson_nll": poisson_nll_diff, "normal_nll": normal_nll_diff, "sse": sse_diff, "weighted_sse": weighted_sse_diff, "jsd": jensen_shannon_divergence, "chi_squared": chi_squared_diff } SUPERVISED_LOSS_FUNCS = { "sparsemax": ( SparsemaxLoss, { "from_logits": True, "reduction": tf.keras.losses.Reduction.NONE, }, sparsemax, ), "categorical_crossentropy": ( CategoricalCrossentropy, { "from_logits": True, "reduction": tf.keras.losses.Reduction.NONE, }, softmax, ), "mse": ( MeanSquaredError, { "reduction": tf.keras.losses.Reduction.NONE, }, sigmoid, ) } INFO_KEYS = ( # model architecture "hidden_layers", "learning_rate", "epsilon", "sup_loss", "unsup_loss", "metrics", "beta", "hidden_layer_activation", "kernel_l1_regularization", "kernel_l2_regularization", "bias_l1_regularization", "bias_l2_regularization", "activity_l1_regularization", "activity_l2_regularization", "dropout", "ood_fp_rate", "fit_spline", "spline_bins", "spline_k", "spline_s", # dictionaries "source_dict", # populated when loading model "spline_snrs", "spline_recon_errors", ) def __init__(self, hidden_layers: tuple = (256,), sup_loss="sparsemax", unsup_loss="sse", metrics: list = ["mae", "categorical_crossentropy"], beta=0.9, source_dict=None, optimizer="adam", optimizer_kwargs=None, learning_rate: float = 1e-3, hidden_layer_activation: str = "mish", kernel_l1_regularization: float = 0.0, kernel_l2_regularization: float = 0.0, bias_l1_regularization: float = 0.0, bias_l2_regularization: float = 0.0, activity_l1_regularization: float = 0.0, activity_l2_regularization: float = 0.0, dropout: float = 0.0, ood_fp_rate: float = 0.05, fit_spline: bool = True, spline_bins: int = 15, spline_k: int = 3, spline_s: int = 0, spline_snrs=None, spline_recon_errors=None): """ Args: hidden_layers: tuple defining the number and size of dense layers sup_loss: supervised loss function to use for training unsup_loss: unsupervised loss function to use for training the foreground branch of the network (options: "sse", "poisson_nll", "normal_nll", "weighted_sse", "jsd", or "chi_squared") metrics: list of metrics to be evaluating during training beta: tradeoff parameter between the supervised and unsupervised foreground loss source_dict: 2D array of pure, long-collect foreground spectra optimizer: tensorflow optimizer or optimizer name to use for training optimizer_kwargs: kwargs for optimizer learning_rate: learning rate for the optimizer hidden_layer_activation: activation function to use for each dense layer kernel_l1_regularization: l1 regularization value for the kernel regularizer kernel_l2_regularization: l2 regularization value for the kernel regularizer bias_l1_regularization: l1 regularization value for the bias regularizer bias_l2_regularization: l2 regularization value for the bias regularizer activity_l1_regularization: l1 regularization value for the activity regularizer activity_l2_regularization: l2 regularization value for the activity regularizer dropout: amount of dropout to apply to each dense layer ood_fp_rate: false positive rate used to determine threshold for out-of-distribution (OOD) detection fit_spline: whether or not to fit UnivariateSpline for OOD threshold function spline_bins: number of bins used when fitting the UnivariateSpline threshold function for OOD detection spline_k: degree of smoothing for the UnivariateSpline spline_s: positive smoothing factor used to choose the number of knots in the UnivariateSpline (s=0 forces the spline through all the datapoints, equivalent to InterpolatedUnivariateSpline) spline_snrs: SNRs from training used as the x-values to fit the UnivariateSpline spline_recon_errors: reconstruction errors from training used as the y-values to fit the UnivariateSpline """ super().__init__() self.hidden_layers = hidden_layers self.sup_loss = sup_loss self.unsup_loss = unsup_loss self.sup_loss_func, self.activation = self._get_sup_loss_func( sup_loss, prefix="sup" ) self.sup_loss_func_name = self.sup_loss_func.name self.optimizer = optimizer if isinstance(optimizer, str): self.optimizer = keras.optimizers.get(optimizer) if optimizer_kwargs is not None: for key, value in optimizer_kwargs.items(): setattr(self.optimizer, key, value) self.optimizer.learning_rate = learning_rate self.unsup_loss_func = self._get_unsup_loss_func(unsup_loss) self.unsup_loss_func_name = f"unsup_{unsup_loss}_loss" self.metrics = metrics self.beta = beta self.source_dict = source_dict self.semisup_loss_func_name = "semisup_loss" self.hidden_layer_activation = hidden_layer_activation self.kernel_l1_regularization = kernel_l1_regularization self.kernel_l2_regularization = kernel_l2_regularization self.bias_l1_regularization = bias_l1_regularization self.bias_l2_regularization = bias_l2_regularization self.activity_l1_regularization = activity_l1_regularization self.activity_l2_regularization = activity_l2_regularization self.dropout = dropout self.ood_fp_rate = ood_fp_rate self.fit_spline = fit_spline self.spline_bins = spline_bins self.spline_k = spline_k self.spline_s = spline_s self.spline_snrs = spline_snrs self.spline_recon_errors = spline_recon_errors self.model = None self._update_custom_objects("L1NormLayer", L1NormLayer) @property def source_dict(self) -> dict: return self.info["source_dict"] @source_dict.setter def source_dict(self, value: dict): self.info["source_dict"] = value def _get_sup_loss_func(self, loss_func_str, prefix): if loss_func_str not in self.SUPERVISED_LOSS_FUNCS: raise KeyError(f"'{loss_func_str}' is not a supported supervised loss function.") func, kwargs, activation = self.SUPERVISED_LOSS_FUNCS[loss_func_str] loss_func_name = f"{prefix}_{loss_func_str}_loss" return func(name=loss_func_name, **kwargs), activation def _get_unsup_loss_func(self, loss_func_str): if loss_func_str not in self.UNSUPERVISED_LOSS_FUNCS: raise KeyError(f"'{loss_func_str}' is not a supported unsupervised loss function.") return self.UNSUPERVISED_LOSS_FUNCS[loss_func_str] def _initialize_model(self, input_size, output_size): spectra_input = Input(input_size, name="input_spectrum") spectra_norm = L1NormLayer(name="normalized_input_spectrum")(spectra_input) x = spectra_norm for layer, nodes in enumerate(self.hidden_layers): x = Dense( nodes, activation=self.hidden_layer_activation, kernel_regularizer=L1L2( l1=self.kernel_l1_regularization, l2=self.kernel_l2_regularization ), bias_regularizer=L1L2( l1=self.bias_l1_regularization, l2=self.bias_l2_regularization ), activity_regularizer=L1L2( l1=self.activity_l1_regularization, l2=self.activity_l2_regularization ), name=f"dense_{layer}" )(x) if self.dropout > 0: x = Dropout(self.dropout)(x) output = Dense( output_size, activation="linear", name="output" )(x) self.model = Model(inputs=[spectra_input], outputs=[output]) def _get_info_as_dict(self): info_dict = {} for k, v in vars(self).items(): if k not in self.INFO_KEYS: continue if isinstance(v, np.ndarray): info_dict[k] = v.tolist() else: info_dict[k] = v return info_dict def _get_spline_threshold_func(self): return UnivariateSpline( self.info["avg_snrs"], self.info["thresholds"], k=self.spline_k, s=self.spline_s ) def _fit_spline_threshold_func(self): out = pd.qcut( np.array(self.spline_snrs), self.spline_bins, labels=False, ) thresholds = [ np.quantile(np.array(self.spline_recon_errors)[out == int(i)], 1-self.ood_fp_rate) for i in range(self.spline_bins) ] avg_snrs = [ np.mean(np.array(self.spline_snrs)[out == int(i)]) for i in range(self.spline_bins) ] self._update_info( avg_snrs=avg_snrs, thresholds=thresholds, spline_k=self.spline_k, spline_s=self.spline_s, ) def _get_snrs(self, ss: SampleSet, bg_cps: float, is_gross: bool) -> np.ndarray: fg_counts = ss.info.total_counts.values.astype("float64") bg_counts = ss.info.live_time.values * bg_cps if is_gross: fg_counts = fg_counts - bg_counts snrs = fg_counts / np.sqrt(bg_counts) return snrs def fit(self, seeds_ss: SampleSet, ss: SampleSet, bg_cps: int = 300, is_gross: bool = False, batch_size: int = 10, epochs: int = 20, validation_split: float = 0.2, callbacks=None, patience: int = 15, es_monitor: str = "val_loss", es_mode: str = "min", es_verbose=0, es_min_delta: float = 0.0, normalize_sup_loss: bool = True, normalize_func=tf.math.tanh, normalize_scaler: float = 1.0, target_level="Isotope", verbose: bool = False): """Fit a model to the given SampleSet(s). Args: seeds_ss: `SampleSet` of pure, long-collect spectra ss: `SampleSet` of `n` gross or foreground spectra where `n` >= 1 bg_cps: background rate assumption used for calculating SNR in spline function using in OOD detection is_gross: whether `ss` contains gross spectra batch_size: number of samples per gradient update epochs: maximum number of training iterations validation_split: proportion of training data to use as validation data callbacks: list of callbacks to be passed to TensorFlow Model.fit() method patience: number of epochs to wait for `EarlyStopping` object es_monitor: quantity to be monitored for `EarlyStopping` object es_mode: mode for `EarlyStopping` object es_verbose: verbosity level for `EarlyStopping` object es_min_delta: minimum change to count as an improvement for early stopping normalize_sup_loss: whether to normalize the supervised loss term normalize_func: normalization function used for supervised loss term normalize_scaler: scalar that sets the steepness of the normalization function target_level: source level to target for model output verbose: whether model training output is printed to the terminal """ spectra = ss.get_samples().astype(float) sources_df = ss.sources.T.groupby(target_level, sort=False).sum().T sources = sources_df.values.astype(float) self.sources_columns = sources_df.columns if verbose: print("Building dictionary...") if self.source_dict is None: self.source_dict = _get_reordered_spectra( seeds_ss.spectra, seeds_ss.sources, self.sources_columns, target_level=target_level ).values if not self.model: if verbose: print("Initializing model...") self._initialize_model( (ss.n_channels,), sources.shape[1], ) elif verbose: print("Model already initialized.") if verbose: print("Building loss functions...") self.semisup_loss_func = build_keras_semisupervised_loss_func( self.sup_loss_func, self.unsup_loss_func, self.source_dict, self.beta, self.activation, n_labels=sources.shape[1], normalize=normalize_sup_loss, normalize_func=normalize_func, normalize_scaler=normalize_scaler ) semisup_metrics = None if self.metrics: if verbose: print("Building metric functions...") semisup_metrics = [] for each in self.metrics: if isinstance(each, str): semisup_metrics.append( build_keras_semisupervised_metric_func( tf.keras.metrics.get(each), self.activation, sources.shape[1] ) ) else: semisup_metrics.append( build_keras_semisupervised_metric_func( each, self.activation, sources.shape[1] ) ) self.model.compile( loss=self.semisup_loss_func, optimizer=self.optimizer, metrics=semisup_metrics ) es = EarlyStopping( monitor=es_monitor, patience=patience, verbose=es_verbose, restore_best_weights=True, mode=es_mode, min_delta=es_min_delta, ) if callbacks: callbacks.append(es) else: callbacks = [es] history = self.model.fit( spectra, np.append(sources, spectra, axis=1), epochs=epochs, verbose=verbose, validation_split=validation_split, callbacks=callbacks, shuffle=True, batch_size=batch_size ) if self.fit_spline: if verbose: print("Finding OOD detection threshold function...") train_logits = self.model.predict(spectra, verbose=0) train_lpes = self.activation(tf.convert_to_tensor(train_logits, dtype=tf.float32)) self.spline_recon_errors = reconstruction_error( tf.convert_to_tensor(spectra, dtype=tf.float32), train_lpes, self.source_dict, self.unsup_loss_func ).numpy() self.spline_snrs = self._get_snrs(ss, bg_cps, is_gross) self._fit_spline_threshold_func() info = self._get_info_as_dict() self._update_info( target_level=target_level, model_outputs=sources_df.columns.values.tolist(), normalization=ss.spectra_state, **info, ) return history def predict(self, ss: SampleSet, bg_cps: int = 300, is_gross: bool = False, verbose=False): """Estimate the proportions of counts present in each sample of the provided SampleSet. Results are stored inside the SampleSet's prediction_probas property. Args: ss: `SampleSet` of `n` foreground or gross spectra where `n` >= 1 bg_cps: background rate used for estimating sample SNRs. If background rate varies to a significant degree, split up sampleset by SNR and make multiple calls to this method. is_gross: whether `ss` contains gross spectra """ test_spectra = ss.get_samples().astype(float) logits = self.model.predict(test_spectra, verbose=verbose) lpes = self.activation(tf.convert_to_tensor(logits, dtype=tf.float32)) col_level_idx = SampleSet.SOURCES_MULTI_INDEX_NAMES.index(self.target_level) col_level_subset = SampleSet.SOURCES_MULTI_INDEX_NAMES[:col_level_idx+1] ss.prediction_probas = pd.DataFrame( data=lpes, columns=pd.MultiIndex.from_tuples( self.get_model_outputs_as_label_tuples(), names=col_level_subset ) ) # Fill in unsupervised losses recon_errors = reconstruction_error( tf.convert_to_tensor(test_spectra, dtype=tf.float32), lpes, self.source_dict, self.unsup_loss_func ).numpy() if self.fit_spline: snrs = self._get_snrs(ss, bg_cps, is_gross) thresholds = self._get_spline_threshold_func()(snrs) is_ood = recon_errors > thresholds ss.info["ood"] = is_ood ss.info["recon_error"] = recon_errorsAncestors
Class variables
var INFO_KEYSvar SUPERVISED_LOSS_FUNCSvar UNSUPERVISED_LOSS_FUNCS
Instance variables
var source_dict : dict-
Expand source code Browse git
@property def source_dict(self) -> dict: return self.info["source_dict"]
Methods
def fit(self, seeds_ss: SampleSet, ss: SampleSet, bg_cps: int = 300, is_gross: bool = False, batch_size: int = 10, epochs: int = 20, validation_split: float = 0.2, callbacks=None, patience: int = 15, es_monitor: str = 'val_loss', es_mode: str = 'min', es_verbose=0, es_min_delta: float = 0.0, normalize_sup_loss: bool = True, normalize_func=<function tanh>, normalize_scaler: float = 1.0, target_level='Isotope', verbose: bool = False)-
Fit a model to the given SampleSet(s).
Args
seeds_ssSampleSetof pure, long-collect spectrassSampleSetofngross or foreground spectra wheren>= 1bg_cps- background rate assumption used for calculating SNR in spline function using in OOD detection
is_gross- whether
sscontains gross spectra batch_size- number of samples per gradient update
epochs- maximum number of training iterations
validation_split- proportion of training data to use as validation data
callbacks- list of callbacks to be passed to TensorFlow Model.fit() method
patience- number of epochs to wait for
EarlyStoppingobject es_monitor- quantity to be monitored for
EarlyStoppingobject es_mode- mode for
EarlyStoppingobject es_verbose- verbosity level for
EarlyStoppingobject es_min_delta- minimum change to count as an improvement for early stopping
normalize_sup_loss- whether to normalize the supervised loss term
normalize_func- normalization function used for supervised loss term
normalize_scaler- scalar that sets the steepness of the normalization function
target_level- source level to target for model output
verbose- whether model training output is printed to the terminal
Expand source code Browse git
def fit(self, seeds_ss: SampleSet, ss: SampleSet, bg_cps: int = 300, is_gross: bool = False, batch_size: int = 10, epochs: int = 20, validation_split: float = 0.2, callbacks=None, patience: int = 15, es_monitor: str = "val_loss", es_mode: str = "min", es_verbose=0, es_min_delta: float = 0.0, normalize_sup_loss: bool = True, normalize_func=tf.math.tanh, normalize_scaler: float = 1.0, target_level="Isotope", verbose: bool = False): """Fit a model to the given SampleSet(s). Args: seeds_ss: `SampleSet` of pure, long-collect spectra ss: `SampleSet` of `n` gross or foreground spectra where `n` >= 1 bg_cps: background rate assumption used for calculating SNR in spline function using in OOD detection is_gross: whether `ss` contains gross spectra batch_size: number of samples per gradient update epochs: maximum number of training iterations validation_split: proportion of training data to use as validation data callbacks: list of callbacks to be passed to TensorFlow Model.fit() method patience: number of epochs to wait for `EarlyStopping` object es_monitor: quantity to be monitored for `EarlyStopping` object es_mode: mode for `EarlyStopping` object es_verbose: verbosity level for `EarlyStopping` object es_min_delta: minimum change to count as an improvement for early stopping normalize_sup_loss: whether to normalize the supervised loss term normalize_func: normalization function used for supervised loss term normalize_scaler: scalar that sets the steepness of the normalization function target_level: source level to target for model output verbose: whether model training output is printed to the terminal """ spectra = ss.get_samples().astype(float) sources_df = ss.sources.T.groupby(target_level, sort=False).sum().T sources = sources_df.values.astype(float) self.sources_columns = sources_df.columns if verbose: print("Building dictionary...") if self.source_dict is None: self.source_dict = _get_reordered_spectra( seeds_ss.spectra, seeds_ss.sources, self.sources_columns, target_level=target_level ).values if not self.model: if verbose: print("Initializing model...") self._initialize_model( (ss.n_channels,), sources.shape[1], ) elif verbose: print("Model already initialized.") if verbose: print("Building loss functions...") self.semisup_loss_func = build_keras_semisupervised_loss_func( self.sup_loss_func, self.unsup_loss_func, self.source_dict, self.beta, self.activation, n_labels=sources.shape[1], normalize=normalize_sup_loss, normalize_func=normalize_func, normalize_scaler=normalize_scaler ) semisup_metrics = None if self.metrics: if verbose: print("Building metric functions...") semisup_metrics = [] for each in self.metrics: if isinstance(each, str): semisup_metrics.append( build_keras_semisupervised_metric_func( tf.keras.metrics.get(each), self.activation, sources.shape[1] ) ) else: semisup_metrics.append( build_keras_semisupervised_metric_func( each, self.activation, sources.shape[1] ) ) self.model.compile( loss=self.semisup_loss_func, optimizer=self.optimizer, metrics=semisup_metrics ) es = EarlyStopping( monitor=es_monitor, patience=patience, verbose=es_verbose, restore_best_weights=True, mode=es_mode, min_delta=es_min_delta, ) if callbacks: callbacks.append(es) else: callbacks = [es] history = self.model.fit( spectra, np.append(sources, spectra, axis=1), epochs=epochs, verbose=verbose, validation_split=validation_split, callbacks=callbacks, shuffle=True, batch_size=batch_size ) if self.fit_spline: if verbose: print("Finding OOD detection threshold function...") train_logits = self.model.predict(spectra, verbose=0) train_lpes = self.activation(tf.convert_to_tensor(train_logits, dtype=tf.float32)) self.spline_recon_errors = reconstruction_error( tf.convert_to_tensor(spectra, dtype=tf.float32), train_lpes, self.source_dict, self.unsup_loss_func ).numpy() self.spline_snrs = self._get_snrs(ss, bg_cps, is_gross) self._fit_spline_threshold_func() info = self._get_info_as_dict() self._update_info( target_level=target_level, model_outputs=sources_df.columns.values.tolist(), normalization=ss.spectra_state, **info, ) return history def predict(self, ss: SampleSet, bg_cps: int = 300, is_gross: bool = False, verbose=False)-
Estimate the proportions of counts present in each sample of the provided SampleSet.
Results are stored inside the SampleSet's prediction_probas property.
Args
ssSampleSetofnforeground or gross spectra wheren>= 1bg_cps- background rate used for estimating sample SNRs. If background rate varies to a significant degree, split up sampleset by SNR and make multiple calls to this method.
is_gross- whether
sscontains gross spectra
Expand source code Browse git
def predict(self, ss: SampleSet, bg_cps: int = 300, is_gross: bool = False, verbose=False): """Estimate the proportions of counts present in each sample of the provided SampleSet. Results are stored inside the SampleSet's prediction_probas property. Args: ss: `SampleSet` of `n` foreground or gross spectra where `n` >= 1 bg_cps: background rate used for estimating sample SNRs. If background rate varies to a significant degree, split up sampleset by SNR and make multiple calls to this method. is_gross: whether `ss` contains gross spectra """ test_spectra = ss.get_samples().astype(float) logits = self.model.predict(test_spectra, verbose=verbose) lpes = self.activation(tf.convert_to_tensor(logits, dtype=tf.float32)) col_level_idx = SampleSet.SOURCES_MULTI_INDEX_NAMES.index(self.target_level) col_level_subset = SampleSet.SOURCES_MULTI_INDEX_NAMES[:col_level_idx+1] ss.prediction_probas = pd.DataFrame( data=lpes, columns=pd.MultiIndex.from_tuples( self.get_model_outputs_as_label_tuples(), names=col_level_subset ) ) # Fill in unsupervised losses recon_errors = reconstruction_error( tf.convert_to_tensor(test_spectra, dtype=tf.float32), lpes, self.source_dict, self.unsup_loss_func ).numpy() if self.fit_spline: snrs = self._get_snrs(ss, bg_cps, is_gross) thresholds = self._get_spline_threshold_func()(snrs) is_ood = recon_errors > thresholds ss.info["ood"] = is_ood ss.info["recon_error"] = recon_errors
Inherited members
class MLPClassifier (activation=None, loss=None, optimizer=None, metrics=None, l2_alpha: float = 0.0001, activity_regularizer=None, final_activation=None, dense_layer_size=None, dropout=None)-
Multi-layer perceptron classifier.
Args
activation- activate function to use for each dense layer
loss- loss function to use for training
optimizer- tensorflow optimizer or optimizer name to use for training
metrics- list of metrics to be evaluating during training
l2_alpha- alpha value for the L2 regularization of each dense layer
activity_regularizer- regularizer function applied each dense layer output
final_activation- final activation function to apply to model output
Expand source code Browse git
class MLPClassifier(PyRIIDModel): """Multi-layer perceptron classifier.""" def __init__(self, activation=None, loss=None, optimizer=None, metrics=None, l2_alpha: float = 1e-4, activity_regularizer=None, final_activation=None, dense_layer_size=None, dropout=None): """ Args: activation: activate function to use for each dense layer loss: loss function to use for training optimizer: tensorflow optimizer or optimizer name to use for training metrics: list of metrics to be evaluating during training l2_alpha: alpha value for the L2 regularization of each dense layer activity_regularizer: regularizer function applied each dense layer output final_activation: final activation function to apply to model output """ super().__init__() self.activation = activation self.loss = loss self.optimizer = optimizer self.final_activation = final_activation self.metrics = metrics self.l2_alpha = l2_alpha self.activity_regularizer = activity_regularizer self.final_activation = final_activation self.dense_layer_size = dense_layer_size self.dropout = dropout if self.activation is None: self.activation = "relu" if self.loss is None: self.loss = CategoricalCrossentropy() if optimizer is None: self.optimizer = Adam(learning_rate=0.01, clipnorm=0.001) if self.metrics is None: self.metrics = [F1Score(), Precision(), Recall()] if self.activity_regularizer is None: self.activity_regularizer = l1(0.0) if self.final_activation is None: self.final_activation = "softmax" self.model = None self._set_predict_fn() def fit(self, ss: SampleSet, batch_size: int = 200, epochs: int = 20, validation_split: float = 0.2, callbacks=None, patience: int = 15, es_monitor: str = "val_loss", es_mode: str = "min", es_verbose=0, target_level="Isotope", verbose: bool = False): """Fit a model to the given `SampleSet`(s). Args: ss: `SampleSet` of `n` spectra where `n` >= 1 and the spectra are either foreground (AKA, "net") or gross. batch_size: number of samples per gradient update epochs: maximum number of training iterations validation_split: percentage of the training data to use as validation data callbacks: list of callbacks to be passed to the TensorFlow `Model.fit()` method patience: number of epochs to wait for `EarlyStopping` object es_monitor: quantity to be monitored for `EarlyStopping` object es_mode: mode for `EarlyStopping` object es_verbose: verbosity level for `EarlyStopping` object target_level: `SampleSet.sources` column level to use verbose: whether to show detailed model training output Returns: `tf.History` object. Raises: `ValueError` when no spectra are provided as input """ if ss.n_samples <= 0: raise ValueError("No spectr[a|um] provided!") if ss.spectra_type == SpectraType.Gross: self.model_inputs = (ModelInput.GrossSpectrum,) elif ss.spectra_type == SpectraType.Foreground: self.model_inputs = (ModelInput.ForegroundSpectrum,) elif ss.spectra_type == SpectraType.Background: self.model_inputs = (ModelInput.BackgroundSpectrum,) else: raise ValueError(f"{ss.spectra_type} is not supported in this model.") X = ss.get_samples() source_contributions_df = ss.sources.T.groupby(target_level, sort=False).sum().T model_outputs = source_contributions_df.columns.values.tolist() Y = source_contributions_df.values spectra_tensor = tf.convert_to_tensor(X, dtype=tf.float32) labels_tensor = tf.convert_to_tensor(Y, dtype=tf.float32) training_dataset = tf.data.Dataset.from_tensor_slices((spectra_tensor, labels_tensor)) training_dataset, validation_dataset = split_dataset( training_dataset, right_size=validation_split, shuffle=True ) training_dataset = training_dataset.batch(batch_size=batch_size) validation_dataset = validation_dataset.batch(batch_size=batch_size) if not self.model: inputs = Input(shape=(X.shape[1],), name="Spectrum") if self.dense_layer_size is None: dense_layer_size = X.shape[1] // 2 else: dense_layer_size = self.dense_layer_size dense_layer = Dense( dense_layer_size, activation=self.activation, activity_regularizer=self.activity_regularizer, kernel_regularizer=l2(self.l2_alpha), )(inputs) if self.dropout is not None: last_layer = Dropout(self.dropout)(dense_layer) else: last_layer = dense_layer outputs = Dense(Y.shape[1], activation=self.final_activation)(last_layer) self.model = Model(inputs, outputs) self.model.compile(loss=self.loss, optimizer=self.optimizer, metrics=self.metrics) es = EarlyStopping( monitor=es_monitor, patience=patience, verbose=es_verbose, restore_best_weights=True, mode=es_mode, ) if callbacks: callbacks.append(es) else: callbacks = [es] history = self.model.fit( training_dataset, epochs=epochs, verbose=verbose, validation_data=validation_dataset, callbacks=callbacks, ) # Update model information self._update_info( target_level=target_level, model_outputs=model_outputs, normalization=ss.spectra_state, ) # Define the predict function with tf.function and input_signature self._set_predict_fn() return history def _set_predict_fn(self): self._predict_fn = tf.function( self._predict, experimental_relax_shapes=True ) def _predict(self, input_tensor): return self.model(input_tensor, training=False) def predict(self, ss: SampleSet, bg_ss: SampleSet = None): """Classify the spectra in the provided `SampleSet`(s). Results are stored inside the first SampleSet's prediction-related properties. Args: ss: `SampleSet` of `n` spectra where `n` >= 1 and the spectra are either foreground (AKA, "net") or gross bg_ss: `SampleSet` of `n` spectra where `n` >= 1 and the spectra are background """ x_test = ss.get_samples().astype(float) if bg_ss: X = [x_test, bg_ss.get_samples().astype(float)] else: X = x_test spectra_tensor = tf.convert_to_tensor(X, dtype=tf.float32) results = self._predict_fn(spectra_tensor) col_level_idx = SampleSet.SOURCES_MULTI_INDEX_NAMES.index(self.target_level) col_level_subset = SampleSet.SOURCES_MULTI_INDEX_NAMES[:col_level_idx+1] ss.prediction_probas = pd.DataFrame( data=results, columns=pd.MultiIndex.from_tuples( self.get_model_outputs_as_label_tuples(), names=col_level_subset ) ) ss.classified_by = self.model_idAncestors
Methods
def fit(self, ss: SampleSet, batch_size: int = 200, epochs: int = 20, validation_split: float = 0.2, callbacks=None, patience: int = 15, es_monitor: str = 'val_loss', es_mode: str = 'min', es_verbose=0, target_level='Isotope', verbose: bool = False)-
Fit a model to the given
SampleSet(s).Args
ssSampleSetofnspectra wheren>= 1 and the spectra are either foreground (AKA, "net") or gross.batch_size- number of samples per gradient update
epochs- maximum number of training iterations
validation_split- percentage of the training data to use as validation data
callbacks- list of callbacks to be passed to the TensorFlow
Model.fit()method patience- number of epochs to wait for
EarlyStoppingobject es_monitor- quantity to be monitored for
EarlyStoppingobject es_mode- mode for
EarlyStoppingobject es_verbose- verbosity level for
EarlyStoppingobject target_levelSampleSet.sourcescolumn level to useverbose- whether to show detailed model training output
Returns
tf.Historyobject.Raises
ValueErrorwhen no spectra are provided as inputExpand source code Browse git
def fit(self, ss: SampleSet, batch_size: int = 200, epochs: int = 20, validation_split: float = 0.2, callbacks=None, patience: int = 15, es_monitor: str = "val_loss", es_mode: str = "min", es_verbose=0, target_level="Isotope", verbose: bool = False): """Fit a model to the given `SampleSet`(s). Args: ss: `SampleSet` of `n` spectra where `n` >= 1 and the spectra are either foreground (AKA, "net") or gross. batch_size: number of samples per gradient update epochs: maximum number of training iterations validation_split: percentage of the training data to use as validation data callbacks: list of callbacks to be passed to the TensorFlow `Model.fit()` method patience: number of epochs to wait for `EarlyStopping` object es_monitor: quantity to be monitored for `EarlyStopping` object es_mode: mode for `EarlyStopping` object es_verbose: verbosity level for `EarlyStopping` object target_level: `SampleSet.sources` column level to use verbose: whether to show detailed model training output Returns: `tf.History` object. Raises: `ValueError` when no spectra are provided as input """ if ss.n_samples <= 0: raise ValueError("No spectr[a|um] provided!") if ss.spectra_type == SpectraType.Gross: self.model_inputs = (ModelInput.GrossSpectrum,) elif ss.spectra_type == SpectraType.Foreground: self.model_inputs = (ModelInput.ForegroundSpectrum,) elif ss.spectra_type == SpectraType.Background: self.model_inputs = (ModelInput.BackgroundSpectrum,) else: raise ValueError(f"{ss.spectra_type} is not supported in this model.") X = ss.get_samples() source_contributions_df = ss.sources.T.groupby(target_level, sort=False).sum().T model_outputs = source_contributions_df.columns.values.tolist() Y = source_contributions_df.values spectra_tensor = tf.convert_to_tensor(X, dtype=tf.float32) labels_tensor = tf.convert_to_tensor(Y, dtype=tf.float32) training_dataset = tf.data.Dataset.from_tensor_slices((spectra_tensor, labels_tensor)) training_dataset, validation_dataset = split_dataset( training_dataset, right_size=validation_split, shuffle=True ) training_dataset = training_dataset.batch(batch_size=batch_size) validation_dataset = validation_dataset.batch(batch_size=batch_size) if not self.model: inputs = Input(shape=(X.shape[1],), name="Spectrum") if self.dense_layer_size is None: dense_layer_size = X.shape[1] // 2 else: dense_layer_size = self.dense_layer_size dense_layer = Dense( dense_layer_size, activation=self.activation, activity_regularizer=self.activity_regularizer, kernel_regularizer=l2(self.l2_alpha), )(inputs) if self.dropout is not None: last_layer = Dropout(self.dropout)(dense_layer) else: last_layer = dense_layer outputs = Dense(Y.shape[1], activation=self.final_activation)(last_layer) self.model = Model(inputs, outputs) self.model.compile(loss=self.loss, optimizer=self.optimizer, metrics=self.metrics) es = EarlyStopping( monitor=es_monitor, patience=patience, verbose=es_verbose, restore_best_weights=True, mode=es_mode, ) if callbacks: callbacks.append(es) else: callbacks = [es] history = self.model.fit( training_dataset, epochs=epochs, verbose=verbose, validation_data=validation_dataset, callbacks=callbacks, ) # Update model information self._update_info( target_level=target_level, model_outputs=model_outputs, normalization=ss.spectra_state, ) # Define the predict function with tf.function and input_signature self._set_predict_fn() return history def predict(self, ss: SampleSet, bg_ss: SampleSet = None)-
Classify the spectra in the provided
SampleSet(s).Results are stored inside the first SampleSet's prediction-related properties.
Args
ssSampleSetofnspectra wheren>= 1 and the spectra are either foreground (AKA, "net") or grossbg_ssSampleSetofnspectra wheren>= 1 and the spectra are background
Expand source code Browse git
def predict(self, ss: SampleSet, bg_ss: SampleSet = None): """Classify the spectra in the provided `SampleSet`(s). Results are stored inside the first SampleSet's prediction-related properties. Args: ss: `SampleSet` of `n` spectra where `n` >= 1 and the spectra are either foreground (AKA, "net") or gross bg_ss: `SampleSet` of `n` spectra where `n` >= 1 and the spectra are background """ x_test = ss.get_samples().astype(float) if bg_ss: X = [x_test, bg_ss.get_samples().astype(float)] else: X = x_test spectra_tensor = tf.convert_to_tensor(X, dtype=tf.float32) results = self._predict_fn(spectra_tensor) col_level_idx = SampleSet.SOURCES_MULTI_INDEX_NAMES.index(self.target_level) col_level_subset = SampleSet.SOURCES_MULTI_INDEX_NAMES[:col_level_idx+1] ss.prediction_probas = pd.DataFrame( data=results, columns=pd.MultiIndex.from_tuples( self.get_model_outputs_as_label_tuples(), names=col_level_subset ) ) ss.classified_by = self.model_id
Inherited members
class PoissonBayesClassifier-
Classifier calculating the conditional Poisson log probability of each seed spectrum given the measurement.
This implementation is an adaptation of a naive Bayes classifier, a formal description of which can be found in ESLII:
Hastie, Trevor, et al. The elements of statistical learning: data mining, inference, and prediction. Vol. 2. New York. Springer, 2009.
For this model, each spectrum channel is treated as a Poisson random variable and expectations are provided by the user in the form of seeds rather than learned. Like the model described in ESLII, all classes are considered equally likely and features are assumed to be conditionally independent.
Expand source code Browse git
class PoissonBayesClassifier(PyRIIDModel): """Classifier calculating the conditional Poisson log probability of each seed spectrum given the measurement. This implementation is an adaptation of a naive Bayes classifier, a formal description of which can be found in ESLII: Hastie, Trevor, et al. The elements of statistical learning: data mining, inference, and prediction. Vol. 2. New York. Springer, 2009. For this model, each spectrum channel is treated as a Poisson random variable and expectations are provided by the user in the form of seeds rather than learned. Like the model described in ESLII, all classes are considered equally likely and features are assumed to be conditionally independent. """ def __init__(self): super().__init__() self._update_custom_objects("ReduceSumLayer", ReduceSumLayer) self._update_custom_objects("ReduceMaxLayer", ReduceMaxLayer) self._update_custom_objects("DivideLayer", DivideLayer) self._update_custom_objects("ExpandDimsLayer", ExpandDimsLayer) self._update_custom_objects("ClipByValueLayer", ClipByValueLayer) self._update_custom_objects("PoissonLogProbabilityLayer", PoissonLogProbabilityLayer) self._update_custom_objects("SeedLayer", SeedLayer) def fit(self, seeds_ss: SampleSet): """Construct a TF-based implementation of a poisson-bayes classifier in terms of the given seeds. Args: seeds_ss: `SampleSet` of `n` foreground seed spectra where `n` >= 1. Raises: - `ValueError` when no seeds are provided - `NegativeSpectrumError` when any seed spectrum has negative counts in any bin - `ZeroTotalCountsError` when any seed spectrum contains zero total counts """ if seeds_ss.n_samples <= 0: raise ValueError("Argument 'seeds_ss' must contain at least one seed.") if (seeds_ss.spectra.values < 0).any(): msg = "Argument 'seeds_ss' can't contain any spectra with negative values." raise NegativeSpectrumError(msg) if (seeds_ss.spectra.values.sum(axis=1) <= 0).any(): msg = "Argument 'seeds_ss' can't contain any spectra with zero total counts." raise ZeroTotalCountsError(msg) self._seeds = tf.convert_to_tensor( seeds_ss.spectra.values, dtype=tf.float32 ) # Inputs gross_spectrum_input = Input(shape=(seeds_ss.n_channels,), name="gross_spectrum") gross_live_time_input = Input(shape=(), name="gross_live_time") bg_spectrum_input = Input(shape=(seeds_ss.n_channels,), name="bg_spectrum") bg_live_time_input = Input(shape=(), name="bg_live_time") model_inputs = ( gross_spectrum_input, gross_live_time_input, bg_spectrum_input, bg_live_time_input, ) # Input statistics gross_total_counts = ReduceSumLayer(name="gross_total_counts")(gross_spectrum_input, axis=1) bg_total_counts = ReduceSumLayer(name="bg_total_counts")(bg_spectrum_input, axis=1) bg_count_rate = DivideLayer(name="bg_count_rate")([bg_total_counts, bg_live_time_input]) gross_spectrum_input_expanded = ExpandDimsLayer( name="gross_spectrum_input_expanded" )(gross_spectrum_input, axis=1) bg_total_counts_expanded = ExpandDimsLayer( name="bg_total_counts_expanded" )(bg_total_counts, axis=1) # Expectations seed_layer = SeedLayer(self._seeds)(model_inputs) seed_layer_expanded = ExpandDimsLayer()(seed_layer, axis=0) expected_bg_counts = Multiply( trainable=False, name="expected_bg_counts" )([bg_count_rate, gross_live_time_input]) expected_bg_counts_expanded = ExpandDimsLayer( name="expected_bg_counts_expanded" )(expected_bg_counts, axis=1) normalized_bg_spectrum = DivideLayer( name="normalized_bg_spectrum" )([bg_spectrum_input, bg_total_counts_expanded]) expected_bg_spectrum = Multiply( trainable=False, name="expected_bg_spectrum" )([normalized_bg_spectrum, expected_bg_counts_expanded]) expected_fg_counts = Subtract( trainable=False, name="expected_fg_counts" )([gross_total_counts, expected_bg_counts]) expected_fg_counts_expanded = ExpandDimsLayer( name="expected_fg_counts_expanded" )(expected_fg_counts, axis=-1) expected_fg_counts_expanded2 = ExpandDimsLayer( name="expected_fg_counts_expanded2" )(expected_fg_counts_expanded, axis=-1) expected_fg_spectrum = Multiply( trainable=False, name="expected_fg_spectrum" )([seed_layer_expanded, expected_fg_counts_expanded2]) max_fg_value = ReduceMaxLayer( name="max_fg_value" )(expected_fg_spectrum) expected_fg_spectrum = ClipByValueLayer( name="clip_expected_fg_spectrum" )(expected_fg_spectrum, clip_value_min=1e-8, clip_value_max=max_fg_value) expected_bg_spectrum_expanded = ExpandDimsLayer( name="expected_bg_spectrum_expanded" )(expected_bg_spectrum, axis=1) expected_gross_spectrum = Add( trainable=False, name="expected_gross_spectrum" )([expected_fg_spectrum, expected_bg_spectrum_expanded]) # Compute probabilities log_probabilities = PoissonLogProbabilityLayer( name="log_probabilities" )([expected_gross_spectrum, gross_spectrum_input_expanded]) summed_log_probabilities = ReduceSumLayer( name="summed_log_probabilities" )(log_probabilities, axis=2) # Assemble model self.model = Model(model_inputs, summed_log_probabilities) self.model.compile() self.target_level = "Seed" sources_df = seeds_ss.sources.T.groupby(self.target_level, sort=False).sum().T self.model_outputs = sources_df.columns.values.tolist() def predict(self, gross_ss: SampleSet, bg_ss: SampleSet, normalize_scores: bool = False, verbose: bool = False): """Compute the conditional Poisson log probability between spectra in a `SampleSet` and the seeds to which the model was fit. Args: gross_ss: `SampleSet` of `n` gross spectra where `n` >= 1 bg_ss: `SampleSet` of `n` background spectra where `n` >= 1 normalize_scores (bool): whether to normalize prediction probabilities When True, this makes the probabilities positive and rescales them by the minimum value present in given the dataset. While this can be helpful in terms of visualizing probabilities in log scale, it can adversely affects one's ability to detect significantly anomalous signatures. """ gross_spectra = tf.convert_to_tensor(gross_ss.spectra.values, dtype=tf.float32) gross_lts = tf.convert_to_tensor(gross_ss.info.live_time.values, dtype=tf.float32) bg_spectra = tf.convert_to_tensor(bg_ss.spectra.values, dtype=tf.float32) bg_lts = tf.convert_to_tensor(bg_ss.info.live_time.values, dtype=tf.float32) prediction_probas = self.model.predict(( gross_spectra, gross_lts, bg_spectra, bg_lts ), batch_size=512, verbose=verbose) # Normalization if normalize_scores: rows_min = np.min(prediction_probas, axis=1) prediction_probas = prediction_probas - rows_min[:, np.newaxis] gross_ss.prediction_probas = pd.DataFrame( prediction_probas, columns=pd.MultiIndex.from_tuples( self.get_model_outputs_as_label_tuples(), names=SampleSet.SOURCES_MULTI_INDEX_NAMES ) )Ancestors
Methods
def fit(self, seeds_ss: SampleSet)-
Construct a TF-based implementation of a poisson-bayes classifier in terms of the given seeds.
Args
seeds_ssSampleSetofnforeground seed spectra wheren>= 1.
Raises
ValueErrorwhen no seeds are providedNegativeSpectrumErrorwhen any seed spectrum has negative counts in any binZeroTotalCountsErrorwhen any seed spectrum contains zero total counts
Expand source code Browse git
def fit(self, seeds_ss: SampleSet): """Construct a TF-based implementation of a poisson-bayes classifier in terms of the given seeds. Args: seeds_ss: `SampleSet` of `n` foreground seed spectra where `n` >= 1. Raises: - `ValueError` when no seeds are provided - `NegativeSpectrumError` when any seed spectrum has negative counts in any bin - `ZeroTotalCountsError` when any seed spectrum contains zero total counts """ if seeds_ss.n_samples <= 0: raise ValueError("Argument 'seeds_ss' must contain at least one seed.") if (seeds_ss.spectra.values < 0).any(): msg = "Argument 'seeds_ss' can't contain any spectra with negative values." raise NegativeSpectrumError(msg) if (seeds_ss.spectra.values.sum(axis=1) <= 0).any(): msg = "Argument 'seeds_ss' can't contain any spectra with zero total counts." raise ZeroTotalCountsError(msg) self._seeds = tf.convert_to_tensor( seeds_ss.spectra.values, dtype=tf.float32 ) # Inputs gross_spectrum_input = Input(shape=(seeds_ss.n_channels,), name="gross_spectrum") gross_live_time_input = Input(shape=(), name="gross_live_time") bg_spectrum_input = Input(shape=(seeds_ss.n_channels,), name="bg_spectrum") bg_live_time_input = Input(shape=(), name="bg_live_time") model_inputs = ( gross_spectrum_input, gross_live_time_input, bg_spectrum_input, bg_live_time_input, ) # Input statistics gross_total_counts = ReduceSumLayer(name="gross_total_counts")(gross_spectrum_input, axis=1) bg_total_counts = ReduceSumLayer(name="bg_total_counts")(bg_spectrum_input, axis=1) bg_count_rate = DivideLayer(name="bg_count_rate")([bg_total_counts, bg_live_time_input]) gross_spectrum_input_expanded = ExpandDimsLayer( name="gross_spectrum_input_expanded" )(gross_spectrum_input, axis=1) bg_total_counts_expanded = ExpandDimsLayer( name="bg_total_counts_expanded" )(bg_total_counts, axis=1) # Expectations seed_layer = SeedLayer(self._seeds)(model_inputs) seed_layer_expanded = ExpandDimsLayer()(seed_layer, axis=0) expected_bg_counts = Multiply( trainable=False, name="expected_bg_counts" )([bg_count_rate, gross_live_time_input]) expected_bg_counts_expanded = ExpandDimsLayer( name="expected_bg_counts_expanded" )(expected_bg_counts, axis=1) normalized_bg_spectrum = DivideLayer( name="normalized_bg_spectrum" )([bg_spectrum_input, bg_total_counts_expanded]) expected_bg_spectrum = Multiply( trainable=False, name="expected_bg_spectrum" )([normalized_bg_spectrum, expected_bg_counts_expanded]) expected_fg_counts = Subtract( trainable=False, name="expected_fg_counts" )([gross_total_counts, expected_bg_counts]) expected_fg_counts_expanded = ExpandDimsLayer( name="expected_fg_counts_expanded" )(expected_fg_counts, axis=-1) expected_fg_counts_expanded2 = ExpandDimsLayer( name="expected_fg_counts_expanded2" )(expected_fg_counts_expanded, axis=-1) expected_fg_spectrum = Multiply( trainable=False, name="expected_fg_spectrum" )([seed_layer_expanded, expected_fg_counts_expanded2]) max_fg_value = ReduceMaxLayer( name="max_fg_value" )(expected_fg_spectrum) expected_fg_spectrum = ClipByValueLayer( name="clip_expected_fg_spectrum" )(expected_fg_spectrum, clip_value_min=1e-8, clip_value_max=max_fg_value) expected_bg_spectrum_expanded = ExpandDimsLayer( name="expected_bg_spectrum_expanded" )(expected_bg_spectrum, axis=1) expected_gross_spectrum = Add( trainable=False, name="expected_gross_spectrum" )([expected_fg_spectrum, expected_bg_spectrum_expanded]) # Compute probabilities log_probabilities = PoissonLogProbabilityLayer( name="log_probabilities" )([expected_gross_spectrum, gross_spectrum_input_expanded]) summed_log_probabilities = ReduceSumLayer( name="summed_log_probabilities" )(log_probabilities, axis=2) # Assemble model self.model = Model(model_inputs, summed_log_probabilities) self.model.compile() self.target_level = "Seed" sources_df = seeds_ss.sources.T.groupby(self.target_level, sort=False).sum().T self.model_outputs = sources_df.columns.values.tolist() def predict(self, gross_ss: SampleSet, bg_ss: SampleSet, normalize_scores: bool = False, verbose: bool = False)-
Compute the conditional Poisson log probability between spectra in a
SampleSetand the seeds to which the model was fit.Args
gross_ssSampleSetofngross spectra wheren>= 1bg_ssSampleSetofnbackground spectra wheren>= 1normalize_scores:bool- whether to normalize prediction probabilities When True, this makes the probabilities positive and rescales them by the minimum value present in given the dataset. While this can be helpful in terms of visualizing probabilities in log scale, it can adversely affects one's ability to detect significantly anomalous signatures.
Expand source code Browse git
def predict(self, gross_ss: SampleSet, bg_ss: SampleSet, normalize_scores: bool = False, verbose: bool = False): """Compute the conditional Poisson log probability between spectra in a `SampleSet` and the seeds to which the model was fit. Args: gross_ss: `SampleSet` of `n` gross spectra where `n` >= 1 bg_ss: `SampleSet` of `n` background spectra where `n` >= 1 normalize_scores (bool): whether to normalize prediction probabilities When True, this makes the probabilities positive and rescales them by the minimum value present in given the dataset. While this can be helpful in terms of visualizing probabilities in log scale, it can adversely affects one's ability to detect significantly anomalous signatures. """ gross_spectra = tf.convert_to_tensor(gross_ss.spectra.values, dtype=tf.float32) gross_lts = tf.convert_to_tensor(gross_ss.info.live_time.values, dtype=tf.float32) bg_spectra = tf.convert_to_tensor(bg_ss.spectra.values, dtype=tf.float32) bg_lts = tf.convert_to_tensor(bg_ss.info.live_time.values, dtype=tf.float32) prediction_probas = self.model.predict(( gross_spectra, gross_lts, bg_spectra, bg_lts ), batch_size=512, verbose=verbose) # Normalization if normalize_scores: rows_min = np.min(prediction_probas, axis=1) prediction_probas = prediction_probas - rows_min[:, np.newaxis] gross_ss.prediction_probas = pd.DataFrame( prediction_probas, columns=pd.MultiIndex.from_tuples( self.get_model_outputs_as_label_tuples(), names=SampleSet.SOURCES_MULTI_INDEX_NAMES ) )
Inherited members