Cleanup and fix tests and examples

mcbal · mcbal · commit bff98077e3a2 · 2021-05-03T16:58:22.000+02:00
diff --git a/deep_implicit_attention/attention.py b/deep_implicit_attention/attention.py
@@ -8,7 +8,7 @@
 from .utils import batched_eye, batched_eye_like
 
 
-class DeepImplicitAttention(_DEQModule):
+class DEQMeanFieldAttention(_DEQModule):
     """Deep implicit attention.
 
     Attention as a fixed-point mean-field response of an Ising-like vector
@@ -36,6 +36,10 @@ class DeepImplicitAttention(_DEQModule):
             norm of tensor |weight| ~ O(1).
         weight_training (bool):
             Allow coupling weights to be trained. (default: `True`).
+        weight_sym_internal (bool):
+            Symmetrize internal indices of weight tensor. (default: `False`).
+        weight_sym_sites (bool):
+            Symmetrize site indices of weight tensor. (default: `False`).
         lin_response (bool):
             Toggle linear response correction to mean-field (default: `True`).
     """
@@ -46,6 +50,8 @@ def __init__(
         dim,
         weight_init_std=None,
         weight_training=True,
+        weight_sym_internal=False,
+        weight_sym_sites=False,
         lin_response=True,
     ):
         super().__init__()
@@ -60,6 +66,9 @@ def __init__(
             ),
             training=weight_training,
         )
+        self.weight_sym_internal = weight_sym_internal
+        self.weight_sym_sites = weight_sym_sites
+
         if lin_response:
             self.correction = FeedForward(dim)  # no dropout
         self.lin_response = lin_response
@@ -73,28 +82,28 @@ def _init_weight(self, num_spins, dim, init_std, training):
         else:
             self.register_buffer('_weight', weight)
 
-    def weight(self, symmetrize_internal=True, symmetrize_sites=True):
-        """
-        Return symmetrized and traceless weight tensor.
-
-        Note:
-            This implementation is very inefficient since it stores N^2*d^2
-            parameters but only needs N*(N-1)*d*(d+1)/4. Also look into new
-            torch parametrization functionality:
-            https://pytorch.org/tutorials/intermediate/parametrizations.html
-        """
+    def weight(self):
+        """Return symmetrized and traceless weight tensor."""
         num_spins, dim = self._weight.size(0), self._weight.size(2)
         weight = self._weight
-        if symmetrize_internal:  # local dofs at every site
+        if self.weight_sym_internal:
             weight = 0.5 * (weight + weight.permute([0, 1, 3, 2]))
-        if symmetrize_sites:  # between sites
+        if self.weight_sym_sites:
             weight = 0.5 * (weight + weight.permute([1, 0, 2, 3]))
         mask = batched_eye(dim ** 2, num_spins,
                            device=weight.device, dtype=weight.dtype)
         mask = rearrange(mask, '(a b) i j -> i j a b', a=dim, b=dim)
-        weight = (1.0 - mask) * weight  # zeros on sites' block-diagonal
+        weight = (1.0 - mask) * weight
         return weight
 
+    def count_params(self):
+        num_spins, dim = self._weight.size(0), self._weight.size(2)
+        site_factor = 0.5*num_spins * \
+            (num_spins-1) if self.weight_sym_sites else num_spins*(num_spins-1)
+        internal_factor = 0.5*dim * \
+            (dim+1) if self.weight_sym_internal else dim**2
+        return site_factor*internal_factor
+
     def _initial_guess(self, x):
         """Return initial guess tensors."""
         bsz, N, d = x.shape
@@ -119,14 +128,13 @@ def forward(self, z, x, *args):
 
         spin_mean = torch.einsum(
             'i j c d, b j d -> b i c', self.weight(), spin_mean) + x
-
         if self.lin_response:
             spin_mean = spin_mean - self.correction(spin_mean)
 
         return self.pack_state([spin_mean])
 
 
-class ExplicitDeepImplicitAttention(_DEQModule):
+class DEQAdaTAPMeanFieldAttention(_DEQModule):
     """Ising-like vector model with multivariate Gaussian prior over spins.
 
     Generalization of the application of the adaptive TAP mean-field approach
@@ -162,6 +170,10 @@ class ExplicitDeepImplicitAttention(_DEQModule):
             norm of tensor |weight| ~ O(1).
         weight_training (bool):
             Allow coupling weights to be trained. (default: `True`).
+        weight_sym_internal (bool):
+            Symmetrize internal indices of weight tensor. (default: `True`).
+        weight_sym_sites (bool):
+            Symmetrize site indices of weight tensor. (default: `True`).
         lin_response (bool):
             Toggle linear response correction to mean-field (default: `True`).
     """
@@ -172,6 +184,8 @@ def __init__(
         dim,
         weight_init_std=None,
         weight_training=True,
+        weight_sym_internal=True,
+        weight_sym_sites=True,
         lin_response=True,
     ):
         super().__init__()
@@ -186,11 +200,15 @@ def __init__(
             ),
             training=weight_training,
         )
+        self.weight_sym_internal = weight_sym_internal
+        self.weight_sym_sites = weight_sym_sites
+
         self.register_buffer(
             'spin_prior_inv_var',
             batched_eye_like(
                 torch.zeros(num_spins, dim, dim))
         )
+
         self.lin_response = lin_response
 
     def _init_weight(self, num_spins, dim, init_std, training):
@@ -202,7 +220,7 @@ def _init_weight(self, num_spins, dim, init_std, training):
         else:
             self.register_buffer('_weight', weight)
 
-    def weight(self, symmetrize_internal=True, symmetrize_sites=True):
+    def weight(self):
         """
         Return symmetrized and traceless weight tensor.
 
@@ -214,16 +232,24 @@ def weight(self, symmetrize_internal=True, symmetrize_sites=True):
         """
         num_spins, dim = self._weight.size(0), self._weight.size(2)
         weight = self._weight
-        if symmetrize_internal:  # local dofs at every site
+        if self.weight_sym_internal:
             weight = 0.5 * (weight + weight.permute([0, 1, 3, 2]))
-        if symmetrize_sites:  # between sites
+        if self.weight_sym_sites:
             weight = 0.5 * (weight + weight.permute([1, 0, 2, 3]))
         mask = batched_eye(dim ** 2, num_spins,
                            device=weight.device, dtype=weight.dtype)
         mask = rearrange(mask, '(a b) i j -> i j a b', a=dim, b=dim)
-        weight = (1.0 - mask) * weight  # zeros on sites' block-diagonal
+        weight = (1.0 - mask) * weight
         return weight
 
+    def count_params(self):
+        num_spins, dim = self._weight.size(0), self._weight.size(2)
+        site_factor = 0.5*num_spins * \
+            (num_spins-1) if self.weight_sym_sites else num_spins*(num_spins-1)
+        internal_factor = 0.5*dim * \
+            (dim+1) if self.weight_sym_internal else dim**2
+        return site_factor*internal_factor
+
     def _initial_guess(self, x):
         """Return initial guess tensors."""
         bsz, N, d = x.shape
@@ -243,7 +269,7 @@ def _spin_mean_var(self, x, cav_mean, cav_var):
         inv_var = self.spin_prior_inv_var - cav_var
         prefactor = torch.solve(batched_eye_like(inv_var), inv_var).solution
         spin_mean = torch.einsum(
-            'n d e, b n d -> b n e', prefactor, (cav_mean + x)
+            'i d e, b i d -> b i e', prefactor, (cav_mean + x)
         )
         spin_var = prefactor
         return spin_mean, spin_var
@@ -275,8 +301,8 @@ def forward(self, z, x, *args):
         weight = self.weight()
 
         cav_mean = torch.einsum(
-            'n m d e, b m e -> b n d', weight, spin_mean
-        ) - torch.einsum('b n d e, b n d -> b n e', cav_var, spin_mean)
+            'i j d e, b j e -> b i d', weight, spin_mean
+        ) - torch.einsum('b i d e, b i d -> b i e', cav_var, spin_mean)
 
         spin_mean, spin_var = self._spin_mean_var(x, cav_mean, cav_var[0])
 
diff --git a/examples/single_layer.py b/examples/single_layer.py
@@ -1,7 +1,7 @@
 import numpy as np
 import torch
 
-from deep_implicit_attention.attention import DeepImplicitAttention
+from deep_implicit_attention.attention import DEQMeanFieldAttention
 from deep_implicit_attention.deq import DEQFixedPoint
 from deep_implicit_attention.solvers import anderson
 
@@ -10,7 +10,7 @@
 
 # Initialize fixed-point wrapper around model system.
 deq_attn = DEQFixedPoint(
-    DeepImplicitAttention(
+    DEQMeanFieldAttention(
         num_spins=num_spins,
         dim=dim,
         weight_init_std=1.0 / np.sqrt(num_spins * dim**2),
diff --git a/tests/test_gradients.py b/tests/test_gradients.py
@@ -5,23 +5,23 @@
 from torch.autograd import gradcheck
 
 from deep_implicit_attention.attention import (
-    DeepImplicitAttention,
-    ExplicitDeepImplicitAttention,
+    DEQMeanFieldAttention,
+    DEQAdaTAPMeanFieldAttention,
 )
 from deep_implicit_attention.deq import DEQFixedPoint
 from deep_implicit_attention.solvers import anderson
 
 
 class TestGradients(unittest.TestCase):
-    def test_explicit_deep_implicit_attention(self):
+    def test_adatap_mean_field_attention(self):
         """Run a small network with double precision."""
 
         num_spins, dim = 11, 3
 
         for lin_response in [False, True]:
             with self.subTest():
                 deq_attn = DEQFixedPoint(
-                    ExplicitDeepImplicitAttention(
+                    DEQAdaTAPMeanFieldAttention(
                         num_spins=num_spins,
                         dim=dim,
                         lin_response=lin_response,
@@ -41,15 +41,15 @@ def test_explicit_deep_implicit_attention(self):
                     )
                 )
 
-    def test_deep_implicit_attention(self):
+    def test_mean_field_attention(self):
         """Run a small network with double precision."""
 
         num_spins, dim = 11, 3
 
         for lin_response in [False, True]:
             with self.subTest():
                 deq_attn = DEQFixedPoint(
-                    DeepImplicitAttention(
+                    DEQMeanFieldAttention(
                         num_spins=num_spins,
                         dim=dim,
                         lin_response=lin_response,