Tweak and refactor neural nets and losses.

alanjvano · tymorrow · tymorrow · commit 93a03f81487c · 2023-05-30T15:44:49.000-06:00
Detailed changes are as follows:

- add custom final activation to MLPClassifier
- update license information related to using sparsemax code from TFA
- refactor sparsemax functions
- refactor some loss functions and the folder structure
- update dependencies
- update README

Co-authored-by: Tyler Morrow &lt;tmorro@sandia.gov&gt;
diff --git a/NOTICE.md b/NOTICE.md
@@ -0,0 +1,22 @@
+This source code is part of the PyRIID project and is licensed under the BSD-style licence.
+This project also contains code covered under the Apache-2.0 license based on Tensorflow-Addons functions which can be found in `riid/models/losses/sparsemax.py`.
+
+The following is a list of the relevent copyright and license information.
+
+---
+
+Copyright 2021 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
+Under the terms of Contract DE-NA0003525 with NTESS, the U.S. Government retains certain rights in this software.
+This source code is licensed under the BSD-style license found [here](https://github.com/sandialabs/PyRIID/blob/main/LICENSE.md).
+
+---
+
+Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and limitations under the License.
diff --git a/README.md b/README.md
@@ -5,39 +5,56 @@
 [![Python](https://img.shields.io/pypi/pyversions/riid)](https://badge.fury.io/py/riid)
 [![PyPI](https://badge.fury.io/py/riid.svg)](https://badge.fury.io/py/riid)
 
-This repository contains the PyRIID package (as well as tests and examples) which is intended to provide utilities that support machine learning-based research and solutions to radioisotope identification.
+This repository contains the PyRIID package (as well as tests and examples) which provides utilities that support machine learning-based research and solutions to radioisotope identification.
 
 ## Installation
 
-These instructions assume you have an up-to-date and stable Python installation; a virtual environment is recommended.
+These instructions assume you meet the following requirements:
 
-To use the latest version on PyPI (note: changes are slower to appear here), run:
+- Python version: 3.7+
+- Operating systems: Windows, Mac, or Ubuntu
+
+A virtual environment is recommended.
+
+Tests and examples are ran via Actions on many combinations of Python version and operating system.
+You can verify support for your platform by checking the workflow files.
+
+### For Use
+
+To use the latest version on PyPI (note: changes are currently slower to appear here), run:
 
 ```
 pip install riid
 ```
 
-For the latest features, run:
+**For the latest features, run:**
 
 ```
 pip install git+https://github.com/sandialabs/pyriid.git@main
 ```
 
+### For Development
 
 If you are developing PyRIID, clone this repository and run:
 
 ```
 pip install -e ".[dev]"
 ```
 
-If you have trouble with Pylance resolving imports for an editable install, try this:
+**If you have trouble with Pylance resolving imports for an editable install, try this:**
 
 ```
 pip install -e ".[dev]" --config-settings editable_mode=compat
 ```
 
+## Examples
+
+Examples for how to use this package can be found [here](https://github.com/sandialabs/PyRIID/blob/main/examples).
+
 ## Tests
 
+Unit tests for this package can be found [here](https://github.com/sandialabs/PyRIID/blob/main/tests).
+
 Run all unit tests with the following command:
 
 ```sh
@@ -59,10 +76,7 @@ Maintainers and authors can be found [here](https://github.com/sandialabs/PyRIID
 
 ## Copyright
 
-Copyright 2021 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
-Under the terms of Contract DE-NA0003525 with NTESS, the U.S. Government retains certain rights in this software.
-
-This source code is licensed under the BSD-style license found [here](https://github.com/sandialabs/PyRIID/blob/main/LICENSE.md).
+Full copyright details are outlined [here](https://github.com/sandialabs/PyRIID/blob/main/NOTICE.md)
 
 ## Acknowlegements
 
diff --git a/pyproject.toml b/pyproject.toml
@@ -61,6 +61,7 @@ dependencies = [
     "tensorflow-io                  ~=0.27",
     "tensorflow-model-optimization  ~=0.7",
     "tensorflow-probability         ==0.18.*",  # this package is currently limiting the Python version to 3.9
+    "typeguard                      >=2.7,<3.0.0",
     "scikit-learn                   >=1.1;      python_version >= '3.10'",
     "scikit-learn                   ~=1.0;      python_version < '3.10'",
     "seaborn                        ~=0.12",
diff --git a/riid/models/losses/__init__.py b/riid/models/losses/__init__.py
@@ -3,9 +3,8 @@
 # the U.S. Government retains certain rights in this software.
 """This module contains custom loss functions."""
 import numpy as np
-from tensorflow.keras import backend as K
 import tensorflow as tf
-from math import pi
+from tensorflow.keras import backend as K
 
 
 def negative_log_f1(y_true: np.ndarray, y_pred: np.ndarray):
@@ -127,7 +126,7 @@ def normal_nll_diff(spectra, reconstructed_spectra, eps=1e-8):
 
     var = tf.clip_by_value(spectra, clip_value_min=1, clip_value_max=np.inf)
 
-    sigma_term = tf.math.log(2 * pi * var)
+    sigma_term = tf.math.log(2 * np.pi * var)
     mu_term = tf.math.divide(tf.math.square(scaled_reconstructed_spectra - spectra), var)
     diff = sigma_term + mu_term
     diff = 0.5 * tf.reduce_sum(diff, axis=-1)
@@ -152,7 +151,7 @@ def weighted_sse_diff(spectra, reconstructed_spectra):
 
     sample_variance = tf.sqrt(tf.math.reduce_variance(spectra, axis=1))
 
-    sigma_term = tf.math.log(2 * pi * sample_variance)
+    sigma_term = tf.math.log(2 * np.pi * sample_variance)
 
     mu_term = tf.math.divide(
         tf.math.square(scaled_reconstructed_spectra - spectra),
@@ -167,153 +166,3 @@ def reconstruction_error(spectra, lpes, dictionary, diff_func):
     reconstructed_spectra = tf.matmul(lpes, dictionary)
     reconstruction_errors = diff_func(spectra, reconstructed_spectra)
     return reconstruction_errors
-
-
-# based off code from Tensorflow-Addons (https://www.tensorflow.org/addons)
-def sparsemax(logits, axis: int = -1) -> tf.Tensor:
-    """Sparsemax activation function.
-
-    Args:
-        logits: tensor of logits (should not be activated)
-        axis: axis along which activation is applied
-    """
-
-    logits = tf.convert_to_tensor(logits, name="logits")
-
-    shape = logits.get_shape()
-    rank = shape.rank
-    is_last_axis = (axis == -1) or (axis == rank - 1)
-
-    if not is_last_axis:
-        raise ValueError("Currently only last axis is supported.")
-
-    output = _compute_2d_sparsemax(logits)
-    output.set_shape(shape)
-    return output
-
-
-# based off code from Tensorflow-Addons (https://www.tensorflow.org/addons)
-@tf.function
-def sparsemax_loss_from_logits(y_true, logits_pred) -> tf.Tensor:
-    logits = tf.convert_to_tensor(logits_pred, name="logits")
-    sparsemax_values = tf.convert_to_tensor(sparsemax(logits_pred), name="sparsemax")
-    labels = tf.convert_to_tensor(y_true, name="labels")
-
-    z = logits
-    sum_s = tf.where(
-        tf.math.logical_or(sparsemax_values > 0, tf.math.is_nan(sparsemax_values)),
-        sparsemax_values * (z - 0.5 * sparsemax_values),
-        tf.zeros_like(sparsemax_values),
-    )
-    q_part = labels * (0.5 * labels - z)
-
-    q_part_safe = tf.where(
-        tf.math.logical_and(tf.math.equal(labels, 0), tf.math.is_inf(z)),
-        tf.zeros_like(z),
-        q_part,
-    )
-
-    loss = tf.math.reduce_sum(sum_s + q_part_safe, axis=1)
-
-    return loss
-
-
-# taken from Tensorflow-Addons (https://www.tensorflow.org/addons)
-def _compute_2d_sparsemax(logits):
-    """Performs the sparsemax operation when axis=-1."""
-    shape_op = tf.shape(logits)
-    obs = tf.math.reduce_prod(shape_op[:-1])
-    dims = shape_op[-1]
-
-    # In the paper, they call the logits z.
-    # The mean(logits) can be substracted from logits to make the algorithm
-    # more numerically stable. the instability in this algorithm comes mostly
-    # from the z_cumsum. Substacting the mean will cause z_cumsum to be close
-    # to zero. However, in practise the numerical instability issues are very
-    # minor and substacting the mean causes extra issues with inf and nan
-    # input.
-    # Reshape to [obs, dims] as it is almost free and means the remanining
-    # code doesn't need to worry about the rank.
-    z = tf.reshape(logits, [obs, dims])
-
-    # sort z
-    z_sorted, _ = tf.nn.top_k(z, k=dims)
-
-    # calculate k(z)
-    z_cumsum = tf.math.cumsum(z_sorted, axis=-1)
-    k = tf.range(1, tf.cast(dims, logits.dtype) + 1, dtype=logits.dtype)
-    z_check = 1 + k * z_sorted > z_cumsum
-    # because the z_check vector is always [1,1,...1,0,0,...0] finding the
-    # (index + 1) of the last `1` is the same as just summing the number of 1.
-    k_z = tf.math.reduce_sum(tf.cast(z_check, tf.int32), axis=-1)
-
-    # calculate tau(z)
-    # If there are inf values or all values are -inf, the k_z will be zero,
-    # this is mathematically invalid and will also cause the gather_nd to fail.
-    # Prevent this issue for now by setting k_z = 1 if k_z = 0, this is then
-    # fixed later (see p_safe) by returning p = nan. This results in the same
-    # behavior as softmax.
-    k_z_safe = tf.math.maximum(k_z, 1)
-    indices = tf.stack([tf.range(0, obs), tf.reshape(k_z_safe, [-1]) - 1], axis=1)
-    tau_sum = tf.gather_nd(z_cumsum, indices)
-    tau_z = (tau_sum - 1) / tf.cast(k_z, logits.dtype)
-
-    # calculate p
-    p = tf.math.maximum(tf.cast(0, logits.dtype), z - tf.expand_dims(tau_z, -1))
-    # If k_z = 0 or if z = nan, then the input is invalid
-    p_safe = tf.where(
-        tf.expand_dims(
-            tf.math.logical_or(tf.math.equal(k_z, 0), tf.math.is_nan(z_cumsum[:, -1])),
-            axis=-1,
-        ),
-        tf.fill([obs, dims], tf.cast(float("nan"), logits.dtype)),
-        p,
-    )
-
-    # Reshape back to original size
-    p_safe = tf.reshape(p_safe, shape_op)
-    return p_safe
-
-
-# taken from Tensorflow-Addons (https://www.tensorflow.org/addons)
-class SparsemaxLoss(tf.keras.losses.Loss):
-    """Sparsemax loss function.
-
-    Computes the generalized multi-label classification loss for the sparsemax
-    function.
-
-    Because the sparsemax loss function needs both the probability output and
-    the logits to compute the loss value, `from_logits` must be `True`.
-
-    Because it computes the generalized multi-label loss, the shape of both
-    `y_pred` and `y_true` must be `[batch_size, num_classes]`.
-
-    Args:
-      from_logits: Whether `y_pred` is expected to be a logits tensor. Default
-        is `True`, meaning `y_pred` is the logits.
-      reduction: (Optional) Type of `tf.keras.losses.Reduction` to apply to
-        loss. Default value is `SUM_OVER_BATCH_SIZE`.
-      name: Optional name for the op.
-    """
-
-    def __init__(
-        self,
-        from_logits: bool = True,
-        reduction: str = tf.keras.losses.Reduction.SUM_OVER_BATCH_SIZE,
-        name: str = "sparsemax_loss",
-    ):
-        if from_logits is not True:
-            raise ValueError("from_logits must be True")
-
-        super().__init__(name=name, reduction=reduction)
-        self.from_logits = from_logits
-
-    def call(self, y_true, y_pred):
-        return sparsemax_loss_from_logits(y_true, y_pred)
-
-    def get_config(self):
-        config = {
-            "from_logits": self.from_logits,
-        }
-        base_config = super().get_config()
-        return {**base_config, **config}
diff --git a/riid/models/losses/sparsemax.py b/riid/models/losses/sparsemax.py
diff --git a/riid/models/neural_nets.py b/riid/models/neural_nets.py