ritheshkumar95
diff --git a/‎modules.py
+17-5 b/‎modules.py
+17-5
diff --git a/‎pixelcnn_prior.py
+6-6 b/‎pixelcnn_prior.py
+6-6
diff --git a/‎samples/reconstructions_CIFAR10.png
-137 KB b/‎samples/reconstructions_CIFAR10.png
-137 KB
diff --git a/‎samples/samples_CIFAR10.png
3.18 KB b/‎samples/samples_CIFAR10.png
3.18 KB
diff --git a/‎samples/samples_FashionMNIST.png
-1.45 KB b/‎samples/samples_FashionMNIST.png
-1.45 KB
diff --git a/‎samples/samples_MNIST.png
-768 Bytes b/‎samples/samples_MNIST.png
-768 Bytes
diff --git a/‎samples/vae_reconstructions_CIFAR10.png
134 KB b/‎samples/vae_reconstructions_CIFAR10.png
134 KB
diff --git a/‎samples/vae_reconstructions_FashionMNIST.png
49.1 KB b/‎samples/vae_reconstructions_FashionMNIST.png
49.1 KB
diff --git a/‎samples/vae_reconstructions_MNIST.png
30.3 KB b/‎samples/vae_reconstructions_MNIST.png
30.3 KB
diff --git a/‎samples/vae_samples_CIFAR10.png
-3.16 KB b/‎samples/vae_samples_CIFAR10.png
-3.16 KB
diff --git a/‎samples/vae_samples_FashionMNIST.png
51.1 KB b/‎samples/vae_samples_FashionMNIST.png
51.1 KB
diff --git a/‎samples/vae_samples_MNIST.png
2.2 KB b/‎samples/vae_samples_MNIST.png
2.2 KB
diff --git a/‎samples/vqvae_reconstructions_CIFAR10.png
4.12 KB b/‎samples/vqvae_reconstructions_CIFAR10.png
4.12 KB
diff --git a/‎samples/vqvae_reconstructions_FashionMNIST.png
158 Bytes b/‎samples/vqvae_reconstructions_FashionMNIST.png
158 Bytes
diff --git a/‎vae.py
+3-3 b/‎vae.py
+3-3
diff --git a/‎vqvae.py
+4-3 b/‎vqvae.py
+4-3
@@ -15,29 +15,39 @@ def to_scalar(arr):
 def weights_init(m):
     classname = m.__class__.__name__
     if classname.find('Conv') != -1:
-        nn.init.xavier_uniform_(m.weight.data)
-        m.bias.data.fill_(0)
+        try:
+            nn.init.xavier_uniform_(m.weight.data)
+            m.bias.data.fill_(0)
+        except AttributeError:
+            print("Skipping initialization of ", classname)
 
 
 class VAE(nn.Module):
     def __init__(self, input_dim, dim, z_dim):
         super().__init__()
         self.encoder = nn.Sequential(
             nn.Conv2d(input_dim, dim, 4, 2, 1),
+            nn.BatchNorm2d(dim),
             nn.ReLU(True),
             nn.Conv2d(dim, dim, 4, 2, 1),
+            nn.BatchNorm2d(dim),
             nn.ReLU(True),
             nn.Conv2d(dim, dim, 5, 1, 0),
+            nn.BatchNorm2d(dim),
             nn.ReLU(True),
             nn.Conv2d(dim, z_dim * 2, 3, 1, 0),
+            nn.BatchNorm2d(z_dim * 2)
         )
 
         self.decoder = nn.Sequential(
             nn.ConvTranspose2d(z_dim, dim, 3, 1, 0),
+            nn.BatchNorm2d(dim),
             nn.ReLU(True),
             nn.ConvTranspose2d(dim, dim, 5, 1, 0),
+            nn.BatchNorm2d(dim),
             nn.ReLU(True),
             nn.ConvTranspose2d(dim, dim, 4, 2, 1),
+            nn.BatchNorm2d(dim),
             nn.ReLU(True),
             nn.ConvTranspose2d(dim, input_dim, 4, 2, 1),
             nn.Tanh()
@@ -204,7 +214,7 @@ def forward(self, x_v, x_h, h):
 class GatedPixelCNN(nn.Module):
     def __init__(self, input_dim=256, dim=64, n_layers=15):
         super().__init__()
-        self.dim = 64
+        self.dim = dim
 
         # Create embedding layer to embed input
         self.embedding = nn.Embedding(input_dim, dim)
@@ -225,11 +235,13 @@ def __init__(self, input_dim=256, dim=64, n_layers=15):
 
         # Add the output layer
         self.output_conv = nn.Sequential(
-            nn.Conv2d(dim, dim, 1),
+            nn.Conv2d(dim, 512, 1),
             nn.ReLU(True),
-            nn.Conv2d(dim, input_dim, 1)
+            nn.Conv2d(512, input_dim, 1)
         )
 
+        self.apply(weights_init)
+
     def forward(self, x, label):
         shp = x.size() + (-1, )
         x = self.embedding(x.view(-1)).view(shp)  # (B, H, W, C)
 
@@ -11,16 +11,16 @@
 N_EPOCHS = 100
 PRINT_INTERVAL = 100
 ALWAYS_SAVE = True
-DATASET = 'MNIST'  # CIFAR10 | MNIST | FashionMNIST
+DATASET = 'CIFAR10'  # CIFAR10 | MNIST | FashionMNIST
 NUM_WORKERS = 4
 
-LATENT_SHAPE = (7, 7)  # (8, 8) -> 32x32 images, (7, 7) -> 28x28 images
-INPUT_DIM = 1  # 3 (RGB) | 1 (Grayscale)
-DIM = 64
+LATENT_SHAPE = (8, 8)  # (8, 8) -> 32x32 images, (7, 7) -> 28x28 images
+INPUT_DIM = 3  # 3 (RGB) | 1 (Grayscale)
+DIM = 256
 VAE_DIM = 256
-N_LAYERS = 15
+N_LAYERS = 12
 K = 512
-LR = 1e-3
+LR = 3e-4
 
 DEVICE = torch.device('cuda') # torch.device('cpu')
 
 
@@ -14,13 +14,13 @@
 BATCH_SIZE = 32
 N_EPOCHS = 100
 PRINT_INTERVAL = 500
-DATASET = 'CIFAR10'  # CIFAR10 | MNIST | FashionMNIST
+DATASET = 'FashionMNIST'  # CIFAR10 | MNIST | FashionMNIST
 NUM_WORKERS = 4
 
-INPUT_DIM = 3
+INPUT_DIM = 1
 DIM = 256
 Z_DIM = 128
-LR = 3e-4
+LR = 1e-3
 
 
 preproc_transform = transforms.Compose([
 
@@ -81,12 +81,13 @@ def train():
         loss_commit.backward()
         opt.step()
 
-        nll = -Normal(x_tilde, torch.ones_like(x_tilde)).log_prob(x)
-        log_px = nll.mean().item() - np.log(128) + np.log(K)
+        N = x.numel()
+        nll = Normal(x_tilde, torch.ones_like(x_tilde)).log_prob(x)
+        log_px = nll.sum() / N + np.log(128) - np.log(K * 2)
         log_px /= np.log(2)
 
         train_loss.append(
-            [log_px] + to_scalar([loss_recons, loss_vq])
+            [log_px.item()] + to_scalar([loss_recons, loss_vq])
         )
 
         if (batch_idx + 1) % PRINT_INTERVAL == 0: