1
1
import os
2
2
from collections import OrderedDict
3
+ from typing import Dict
3
4
4
5
import torch
5
6
import torch .nn as nn
8
9
from torch .utils .data import DataLoader
9
10
from torchvision import transforms
10
11
from torchvision .datasets import MNIST
11
- from typing import Dict
12
12
13
13
try :
14
14
from test_tube import HyperOptArgumentParser
@@ -174,9 +174,8 @@ def configure_optimizers(self):
174
174
optimizer = optim .LBFGS (self .parameters (), lr = self .hparams .learning_rate )
175
175
else :
176
176
optimizer = optim .Adam (self .parameters (), lr = self .hparams .learning_rate )
177
-
178
- # test returning only 1 list instead of 2
179
- return optimizer
177
+ scheduler = optim .lr_scheduler .CosineAnnealingLR (optimizer , T_max = 10 )
178
+ return [optimizer ], [scheduler ]
180
179
181
180
def prepare_data (self ):
182
181
transform = transforms .Compose ([transforms .ToTensor (),
@@ -201,36 +200,3 @@ def _dataloader(self, train):
201
200
)
202
201
203
202
return loader
204
-
205
- @staticmethod
206
- def add_model_specific_args (parent_parser , root_dir ): # pragma: no-cover
207
- """
208
- Parameters you define here will be available to your model through self.hparams
209
- :param parent_parser:
210
- :param root_dir:
211
- :return:
212
- """
213
- parser = HyperOptArgumentParser (strategy = parent_parser .strategy , parents = [parent_parser ])
214
-
215
- # param overwrites
216
- # parser.set_defaults(gradient_clip_val=5.0)
217
-
218
- # network params
219
- parser .opt_list ('--drop_prob' , default = 0.2 , options = [0.2 , 0.5 ], type = float , tunable = False )
220
- parser .add_argument ('--in_features' , default = 28 * 28 , type = int )
221
- parser .add_argument ('--out_features' , default = 10 , type = int )
222
- # use 500 for CPU, 50000 for GPU to see speed difference
223
- parser .add_argument ('--hidden_dim' , default = 50000 , type = int )
224
- # data
225
- parser .add_argument ('--data_root' , default = os .path .join (root_dir , 'mnist' ), type = str )
226
- # training params (opt)
227
- parser .opt_list ('--learning_rate' , default = 0.001 * 8 , type = float ,
228
- options = [0.0001 , 0.0005 , 0.001 , 0.005 ], tunable = False )
229
- parser .opt_list ('--optimizer_name' , default = 'adam' , type = str ,
230
- options = ['adam' ], tunable = False )
231
- # if using 2 nodes with 4 gpus each the batch size here
232
- # (256) will be 256 / (2*8) = 16 per gpu
233
- parser .opt_list ('--batch_size' , default = 256 * 8 , type = int ,
234
- options = [32 , 64 , 128 , 256 ], tunable = False ,
235
- help = 'batch size will be divided over all GPUs being used across all nodes' )
236
- return parser
0 commit comments