-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathtest.py
139 lines (119 loc) · 4.54 KB
/
test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
import tensorflow as tf
import numpy as np
import time
import cPickle as pkl
import h5py
import scipy.sparse
from models.dmfd import DMFD
import configs.configs_dmfd as configs
FLAGS = tf.app.flags.FLAGS
tf.flags.DEFINE_string("data_dir", "./data/MovieLens1M/", "Data directory.")
tf.flags.DEFINE_string("snapshot_dir", "./outputs/snapshots/", "Directory for saving and loading model checkpoints.")
tf.flags.DEFINE_string("model_fname", "", "Name of the pretrained model checkpoints (to resume from)")
cfgs = configs.CONFIGS
DEFAULT_RATING = 3.0
def sigmoid(x):
return 1 / (1 + np.exp(-x))
def string_to_array(str, dtype='int'):
arr = str.strip().split(',')
for i in xrange(len(arr)):
if dtype == 'int':
arr[i] = int(arr[i])
elif dtype == 'float':
arr[i] = float(arr[i])
return arr
def embed_x(model, X, dim, bs=1000):
n_samples = X.shape[0]
fv = np.zeros((n_samples, dim))
start = 0
while True:
end = start + bs
if end > n_samples:
end = n_samples
fv[start:end,:] = model.embed_x(X[start:end,:])
if end == n_samples:
break
start = end
return fv
def embed_y(model, Y, dim, bs=1000):
n_samples = Y.shape[0]
fv = np.zeros((n_samples, dim))
start = 0
while True:
end = start + bs
if end > n_samples:
end = n_samples
fv[start:end,:] = model.embed_y(Y[start:end,:])
if end == n_samples:
break
start = end
return fv
def reconstruct_quantize_custom(latent_x, latent_y, min_val, max_val, bs, qs, Delta, Is):
l2_norm_lx = latent_x / np.linalg.norm(latent_x, axis=1, keepdims=True)
l2_norm_ly = latent_y / np.linalg.norm(latent_y, axis=1, keepdims=True)
mul = np.matmul(l2_norm_lx, l2_norm_ly.T)
mid = (max_val + min_val) / 2
# re-scale the cosine similarity to the original entry range
mul = mul * (mid - min_val) + mid
k = 1e30 # a very big value
recons = np.zeros_like(mul)
for i in xrange(len(bs)):
vals = Delta * sigmoid(k * (mul - bs[i])) + Is[i]
if i < len(bs) - 1:
mask = (mul >= qs[i]) * (mul < qs[i+1])
else:
mask = (mul >= qs[i]) * (mul <= qs[i+1])
recons += vals * mask
return recons
def RMSE(A, B, mask):
rmse = np.sqrt(np.sum(mask * (A - B)**2) / np.sum(mask))
return rmse
def MAE(A, B, mask):
mae = np.sum(mask * np.abs(A - B)) / np.sum(mask)
return mae
def main(unused_argv):
# load data
R = scipy.sparse.load_npz(FLAGS.data_dir + 'rating.npz')
val_set = np.unique(R.data)
min_val = float(val_set[0])
max_val = float(val_set[-1])
tr_mask = scipy.sparse.load_npz(FLAGS.data_dir + 'train_mask.npz')
val_mask = scipy.sparse.load_npz(FLAGS.data_dir + 'val_mask.npz')
te_mask = scipy.sparse.load_npz(FLAGS.data_dir + 'test_mask.npz')
print('Finished loading data')
count = np.sum((tr_mask + val_mask).multiply(te_mask))
assert count == 0, 'Train and test masks overlap !!!'
tr_mask += val_mask
X = R.multiply(tr_mask).todense()
# load model
assert (FLAGS.snapshot_dir != "" or FLAGS.model_fname != ""), 'No pretrained model specified'
model = DMFD(X.shape[1], X.shape[0], min_val, max_val, cfgs, phase='test', log_dir=None)
snapshot_fname = FLAGS.model_fname if FLAGS.model_fname != "" \
else tf.train.latest_checkpoint(FLAGS.snapshot_dir)
model.restore(snapshot_fname)
print('Restored from %s' %snapshot_fname)
# complete matrix
embed_dim = int(configs.ModelConfig.u_hidden_sizes[-1])
lX = embed_x(model, X, embed_dim, bs=1000)
lY = embed_y(model, X.T, embed_dim, bs=1000)
bs = model.get_boundaries()
print('Learned boundaries: ', bs)
qs = string_to_array(cfgs.qs, dtype='float')
Is = string_to_array(cfgs.Is, dtype='float')
recons = reconstruct_quantize_custom(lX, lY, min_val, max_val, bs, qs, cfgs.Delta, Is)
print('Reconstructed value set:')
print(np.unique(recons))
# evaluate
R = np.array(R.todense())
tr_mask = np.array(tr_mask.todense()).astype(np.float32)
te_mask = np.array(te_mask.todense()).astype(np.float32)
rmse_tr = RMSE(recons, R, tr_mask)
rmse_te = RMSE(recons, R, te_mask)
mae_tr = MAE(recons, R, tr_mask)
mae_te= MAE(recons, R, te_mask)
print('--------------------------------')
print('RMSE (train - test): %f - %f' %(rmse_tr, rmse_te))
print('MAE (train - test): %f - %f' %(mae_tr, mae_te))
print('--------------------------------')
if __name__ == '__main__':
tf.app.run()