feat: Add random state feature. (#150)

john-halloran · John Halloran · sbillinge · web-flow · commit 373da1194e73 · 2025-06-15T17:13:29.000-04:00
* feat: Add random state feature.

* Add class docstring

* components-&gt;n_components

* Updated docstring

* Shorten and reformat docstring

* docstring typo

* Flag self.rng as private

* Make logic for n_components and Y0 more rigid

* added class attributes to docstring

* fix: cleaner import of SNMFOptimizer

* fix: correct class instantiation after change in import

---------

Co-authored-by: John Halloran &lt;jhalloran@oxy.edu&gt;
Co-authored-by: Simon Billinge &lt;sbillinge@users.noreply.github.com&gt;
diff --git a/src/diffpy/snmf/main.py b/src/diffpy/snmf/main.py
@@ -1,33 +1,14 @@
 import numpy as np
-import snmf_class
+from snmf_class import SNMFOptimizer
 
 X0 = np.loadtxt("input/X0.txt", dtype=float)
 MM = np.loadtxt("input/MM.txt", dtype=float)
 A0 = np.loadtxt("input/A0.txt", dtype=float)
 Y0 = np.loadtxt("input/W0.txt", dtype=float)
 N, M = MM.shape
 
-# Convert to DataFrames for display
-# df_X = pd.DataFrame(X, columns=[f"Comp_{i+1}" for i in range(X.shape[1])])
-# df_Y = pd.DataFrame(Y, columns=[f"Sample_{i+1}" for i in range(Y.shape[1])])
-# df_MM = pd.DataFrame(MM, columns=[f"Sample_{i+1}" for i in range(MM.shape[1])])
-# df_Y0 = pd.DataFrame(Y0, columns=[f"Sample_{i+1}" for i in range(Y0.shape[1])])
-
-# Print the matrices
-"""
-print("Feature Matrix (X):\n", df_X, "\n")
-print("Coefficient Matrix (Y):\n", df_Y, "\n")
-print("Data Matrix (MM):\n", df_MM, "\n")
-print("Initial Guess (Y0):\n", df_Y0, "\n")
-"""
-
-
-my_model = snmf_class.SNMFOptimizer(MM=MM, Y0=Y0, X0=X0, A=A0, components=2)
+my_model = SNMFOptimizer(MM=MM, Y0=Y0, X0=X0, A0=A0)
 print("Done")
-# print(f"My final guess for X: {my_model.X}")
-# print(f"My final guess for Y: {my_model.Y}")
-# print(f"Compare to true X: {X_norm}")
-# print(f"Compare to true Y: {Y_norm}")
 np.savetxt("my_norm_X.txt", my_model.X, fmt="%.6g", delimiter=" ")
 np.savetxt("my_norm_Y.txt", my_model.Y, fmt="%.6g", delimiter=" ")
 np.savetxt("my_norm_A.txt", my_model.A, fmt="%.6g", delimiter=" ")
diff --git a/src/diffpy/snmf/snmf_class.py b/src/diffpy/snmf/snmf_class.py
@@ -4,51 +4,165 @@
 
 
 class SNMFOptimizer:
-    def __init__(self, MM, Y0=None, X0=None, A=None, rho=1e12, eta=610, max_iter=500, tol=5e-7, components=None):
-        print("Initializing SNMF Optimizer")
+    """A implementation of stretched NMF (sNMF), including sparse stretched NMF.
+
+    Instantiating the SNMFOptimizer class runs all the analysis immediately.
+    The results matrices can then be accessed as instance attributes
+    of the class (X, Y, and A).
+
+    For more information on sNMF, please reference:
+    Gu, R., Rakita, Y., Lan, L. et al. Stretched non-negative matrix factorization.
+    npj Comput Mater 10, 193 (2024). https://doi.org/10.1038/s41524-024-01377-5
+
+    Attributes
+    ----------
+    MM : ndarray
+        The original, unmodified data to be decomposed and later, compared against.
+        Shape is (length_of_signal, number_of_conditions).
+    Y : ndarray
+        The best guess (or while running, the current guess) for the stretching
+        factor matrix.
+    X : ndarray
+        The best guess (or while running, the current guess) for the matrix of
+        component intensities.
+    A : ndarray
+        The best guess (or while running, the current guess) for the matrix of
+        component weights.
+    rho : float
+            The stretching factor that influences the decomposition. Zero corresponds to no
+            stretching present. Relatively insensitive and typically adjusted in powers of 10.
+    eta : float
+        The sparsity factor that influences the decomposition. Should be set to zero for
+        non-sparse data such as PDF. Can be used to improve results for sparse data such
+        as XRD, but due to instability, should be used only after first selecting the
+        best value for rho. Suggested adjustment is by powers of 2.
+    max_iter : int
+            The maximum number of times to update each of A, X, and Y before stopping
+            the optimization.
+    tol : float
+        The convergence threshold. This is the minimum fractional improvement in the
+        objective function to allow without terminating the optimization. Note that
+        a minimum of 20 updates are run before this parameter is checked.
+    n_components : int
+        The number of components to extract from MM. Must be provided when and only when
+        Y0 is not provided.
+    random_state : int
+        The seed for the initial guesses at the matrices (A, X, and Y) created by
+        the decomposition.
+    num_updates : int
+        The total number of times that any of (A, X, and Y) have had their values changed.
+        If not terminated by other means, this value is used to stop when reaching max_iter.
+    objective_function: float
+        The value corresponding to the minimization of the difference between the MM and the
+        products of A, X, and Y. For full details see the sNMF paper. Smaller corresponds to
+        better agreement and is desirable.
+    objective_difference : float
+        The change in the objective function value since the last update. A negative value
+        means that the result improved.
+    """
+
+    def __init__(
+        self,
+        MM,
+        Y0=None,
+        X0=None,
+        A0=None,
+        rho=1e12,
+        eta=610,
+        max_iter=500,
+        tol=5e-7,
+        n_components=None,
+        random_state=None,
+    ):
+        """Initialize an instance of SNMF and run the optimization.
+
+        Parameters
+        ----------
+        MM : ndarray
+            The data to be decomposed. Shape is (length_of_signal, number_of_conditions).
+        Y0 : ndarray
+            The initial guesses for the component weights at each stretching condition.
+            Shape is (number_of_components, number_of_conditions) Must provide exactly one
+            of this or n_components.
+        X0 : ndarray
+            The initial guesses for the intensities of each component per
+            row/sample/angle. Shape is (length_of_signal, number_of_components).
+        A0 : ndarray
+            The initial guesses for the stretching factor for each component, at each
+            condition. Shape is (number_of_components, number_of_conditions).
+        rho : float
+            The stretching factor that influences the decomposition. Zero corresponds to no
+            stretching present. Relatively insensitive and typically adjusted in powers of 10.
+        eta : float
+            The sparsity factor that influences the decomposition. Should be set to zero for
+            non-sparse data such as PDF. Can be used to improve results for sparse data such
+            as XRD, but due to instability, should be used only after first selecting the
+            best value for rho. Suggested adjustment is by powers of 2.
+        max_iter : int
+            The maximum number of times to update each of A, X, and Y before stopping
+            the optimization.
+        tol : float
+            The convergence threshold. This is the minimum fractional improvement in the
+            objective function to allow without terminating the optimization. Note that
+            a minimum of 20 updates are run before this parameter is checked.
+        n_components : int
+            The number of components to extract from MM. Must be provided when and only when
+            Y0 is not provided.
+        random_state : int
+            The seed for the initial guesses at the matrices (A, X, and Y) created by
+            the decomposition.
+        """
+
         self.MM = MM
-        self.X0 = X0
-        self.Y0 = Y0
-        self.A = A
         self.rho = rho
         self.eta = eta
         # Capture matrix dimensions
-        self.N, self.M = MM.shape
+        self._N, self._M = MM.shape
         self.num_updates = 0
+        self._rng = np.random.default_rng(random_state)
 
+        # Enforce exclusive specification of n_components or Y0
+        if (n_components is None) == (Y0 is not None):
+            raise ValueError("Must provide exactly one of Y0 or n_components, but not both.")
+
+        # Initialize Y0 and determine number of components
         if Y0 is None:
-            if components is None:
-                raise ValueError("Must provide either Y0 or a number of components.")
-            else:
-                self.K = components
-                self.Y0 = np.random.beta(a=2.5, b=1.5, size=(self.K, self.M))  # This is untested
+            self._K = n_components
+            self.Y = self._rng.beta(a=2.5, b=1.5, size=(self._K, self._M))
         else:
-            self.K = Y0.shape[0]
+            self._K = Y0.shape[0]
+            self.Y = Y0
 
-        # Initialize A, X0 if not provided
+        # Initialize A if not provided
         if self.A is None:
-            self.A = np.ones((self.K, self.M)) + np.random.randn(self.K, self.M) * 1e-3  # Small perturbation
-        if self.X0 is None:
-            self.X0 = np.random.rand(self.N, self.K)  # Ensures values in [0,1]
+            self.A = np.ones((self._K, self._M)) + self._rng.normal(0, 1e-3, size=(self._K, self._M))
+        else:
+            self.A = A0
+
+        # Initialize X0 if not provided
+        if self.X is None:
+            self.X = self._rng.random((self._N, self._K))
+        else:
+            self.X = X0
 
-        # Initialize solution matrices to be iterated on
-        self.X = np.maximum(0, self.X0)
-        self.Y = np.maximum(0, self.Y0)
+        # Enforce non-negativity
+        self.X = np.maximum(0, self.X)
+        self.Y = np.maximum(0, self.Y)
 
         # Second-order spline: Tridiagonal (-2 on diagonal, 1 on sub/superdiagonals)
-        self.P = 0.25 * diags([1, -2, 1], offsets=[0, 1, 2], shape=(self.M - 2, self.M))
+        self.P = 0.25 * diags([1, -2, 1], offsets=[0, 1, 2], shape=(self._M - 2, self._M))
         self.PP = self.P.T @ self.P
 
         # Set up residual matrix, objective function, and history
         self.R = self.get_residual_matrix()
         self.objective_function = self.get_objective_function()
         self.objective_difference = None
-        self.objective_history = [self.objective_function]
+        self._objective_history = [self.objective_function]
 
         # Set up tracking variables for updateX()
-        self.preX = None
-        self.GraX = np.zeros_like(self.X)  # Gradient of X (zeros for now)
-        self.preGraX = np.zeros_like(self.X)  # Previous gradient of X (zeros for now)
+        self._preX = None
+        self._GraX = np.zeros_like(self.X)  # Gradient of X (zeros for now)
+        self._preGraX = np.zeros_like(self.X)  # Previous gradient of X (zeros for now)
 
         regularization_term = 0.5 * rho * np.linalg.norm(self.P @ self.A.T, "fro") ** 2
         sparsity_term = eta * np.sum(np.sqrt(self.X))  # Square root penalty
@@ -83,53 +197,53 @@ def __init__(self, MM, Y0=None, X0=None, A=None, rho=1e12, eta=610, max_iter=500
         # loop to normalize X
         # effectively just re-running class with non-normalized X, normalized Y/A as inputs, then only update X
         # reset difference trackers and initialize
-        self.preX = None
-        self.GraX = np.zeros_like(self.X)  # Gradient of X (zeros for now)
-        self.preGraX = np.zeros_like(self.X)  # Previous gradient of X (zeros for now)
+        self._preX = None
+        self._GraX = np.zeros_like(self.X)  # Gradient of X (zeros for now)
+        self._preGraX = np.zeros_like(self.X)  # Previous gradient of X (zeros for now)
         self.R = self.get_residual_matrix()
         self.objective_function = self.get_objective_function()
         self.objective_difference = None
-        self.objective_history = [self.objective_function]
+        self._objective_history = [self.objective_function]
         for norm_iter in range(100):
             self.updateX()
             self.R = self.get_residual_matrix()
             self.objective_function = self.get_objective_function()
             print(f"Objective function after normX: {self.objective_function:.5e}")
-            self.objective_history.append(self.objective_function)
-            self.objective_difference = self.objective_history[-2] - self.objective_history[-1]
+            self._objective_history.append(self.objective_function)
+            self.objective_difference = self._objective_history[-2] - self._objective_history[-1]
             if self.objective_difference < self.objective_function * tol and norm_iter >= 20:
                 break
         # end of normalization (and program)
         # note that objective function may not fully recover after normalization, this is okay
         print("Finished optimization.")
 
     def optimize_loop(self):
-        self.preGraX = self.GraX.copy()
+        self._preGraX = self._GraX.copy()
         self.updateX()
         self.num_updates += 1
         self.R = self.get_residual_matrix()
         self.objective_function = self.get_objective_function()
         print(f"Objective function after updateX: {self.objective_function:.5e}")
-        self.objective_history.append(self.objective_function)
+        self._objective_history.append(self.objective_function)
         if self.objective_difference is None:
-            self.objective_difference = self.objective_history[-1] - self.objective_function
+            self.objective_difference = self._objective_history[-1] - self.objective_function
 
         # Now we update Y
         self.updateY2()
         self.num_updates += 1
         self.R = self.get_residual_matrix()
         self.objective_function = self.get_objective_function()
         print(f"Objective function after updateY2: {self.objective_function:.5e}")
-        self.objective_history.append(self.objective_function)
+        self._objective_history.append(self.objective_function)
 
         self.updateA2()
 
         self.num_updates += 1
         self.R = self.get_residual_matrix()
         self.objective_function = self.get_objective_function()
         print(f"Objective function after updateA2: {self.objective_function:.5e}")
-        self.objective_history.append(self.objective_function)
-        self.objective_difference = self.objective_history[-2] - self.objective_history[-1]
+        self._objective_history.append(self.objective_function)
+        self.objective_difference = self._objective_history[-2] - self._objective_history[-1]
 
     def apply_interpolation(self, a, x, return_derivatives=False):
         """
@@ -401,36 +515,38 @@ def updateX(self):
         # Compute `AX` using the interpolation function
         AX, _, _ = self.apply_interpolation_matrix()  # Skip the other two outputs
         # Compute RA and RR
-        intermediate_RA = AX.flatten(order="F").reshape((self.N * self.M, self.K), order="F")
-        RA = intermediate_RA.sum(axis=1).reshape((self.N, self.M), order="F")
+        intermediate_RA = AX.flatten(order="F").reshape((self._N * self._M, self._K), order="F")
+        RA = intermediate_RA.sum(axis=1).reshape((self._N, self._M), order="F")
         RR = RA - self.MM
         # Compute gradient `GraX`
-        self.GraX = self.apply_transformation_matrix(R=RR).toarray()  # toarray equivalent of full, make non-sparse
+        self._GraX = self.apply_transformation_matrix(
+            R=RR
+        ).toarray()  # toarray equivalent of full, make non-sparse
 
         # Compute initial step size `L0`
         L0 = np.linalg.eigvalsh(self.Y.T @ self.Y).max() * np.max([self.A.max(), 1 / self.A.min()])
         # Compute adaptive step size `L`
-        if self.preX is None:
+        if self._preX is None:
             L = L0
         else:
-            num = np.sum((self.GraX - self.preGraX) * (self.X - self.preX))  # Element-wise multiplication
-            denom = np.linalg.norm(self.X - self.preX, "fro") ** 2  # Frobenius norm squared
+            num = np.sum((self._GraX - self._preGraX) * (self.X - self._preX))  # Element-wise multiplication
+            denom = np.linalg.norm(self.X - self._preX, "fro") ** 2  # Frobenius norm squared
             L = num / denom if denom > 0 else L0
             if L <= 0:
                 L = L0
 
         # Store our old X before updating because it is used in step selection
-        self.preX = self.X.copy()
+        self._preX = self.X.copy()
 
         while True:  # iterate updating X
-            x_step = self.preX - self.GraX / L
+            x_step = self._preX - self._GraX / L
             # Solve x^3 + p*x + q = 0 for the largest real root
             self.X = np.square(cubic_largest_real_root(-x_step, self.eta / (2 * L)))
             # Mask values that should be set to zero
             mask = self.X**2 * L / 2 - L * self.X * x_step + self.eta * np.sqrt(self.X) < 0
             self.X = mask * self.X
 
-            objective_improvement = self.objective_history[-1] - self.get_objective_function(
+            objective_improvement = self._objective_history[-1] - self.get_objective_function(
                 R=self.get_residual_matrix()
             )
 
@@ -447,9 +563,9 @@ def updateY2(self):
         Updates Y using matrix operations, solving a quadratic program via `solve_mkr_box`.
         """
 
-        K = self.K
-        N = self.N
-        M = self.M
+        K = self._K
+        N = self._N
+        M = self._M
 
         for m in range(M):
             T = np.zeros((N, K))  # Initialize T as an (N, K) zero matrix
@@ -476,9 +592,9 @@ def regularize_function(self, A=None):
         if A is None:
             A = self.A
 
-        K = self.K
-        M = self.M
-        N = self.N
+        K = self._K
+        M = self._M
+        N = self._N
 
         # Compute interpolated matrices
         AX, TX, HX = self.apply_interpolation_matrix(A=A, return_derivatives=True)