-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathpredict_cv.py
226 lines (197 loc) · 7.28 KB
/
predict_cv.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
import os
import cv2
import glob
import shutil
import time
import logging as log
import torch
from torchvision import transforms as tf
from pprint import pformat
from PIL import Image, ImageOps
import numpy as np
import sys
sys.path.insert(0, '.')
from utils.envs import initEnv
import models
def py_cpu_nms(dets, thresh=0.5):
"""Pure Python NMS baseline."""
x1 = dets[:, 0]
y1 = dets[:, 1]
x2 = dets[:, 2]
y2 = dets[:, 3]
scores = dets[:, 4]
areas = (x2 - x1 + 1) * (y2 - y1 + 1)
order = scores.argsort()[::-1]
keep = []
while order.size > 0:
i = order[0]
keep.append(i)
xx1 = np.maximum(x1[i], x1[order[1:]])
yy1 = np.maximum(y1[i], y1[order[1:]])
xx2 = np.minimum(x2[i], x2[order[1:]])
yy2 = np.minimum(y2[i], y2[order[1:]])
w = np.maximum(0.0, xx2 - xx1 + 1)
h = np.maximum(0.0, yy2 - yy1 + 1)
inter = w * h
ovr = inter / (areas[i] + areas[order[1:]] - inter)
inds = np.where(ovr <= thresh)[0]
order = order[inds + 1]
return keep
def detect(net, img_path, use_cuda, network_size, nms_thresh):
#data = Image.open(img_path)
data = cv2.imread(img_path)
data = cv2.cvtColor(data, cv2.COLOR_BGR2RGB)
data = Image.fromarray(data)
orig_width, orig_height = data.size # use Image.open
#orig_height, orig_width, _ = data.shape
netw, neth = network_size
scale = min(float(netw) / orig_width, float(neth) / orig_height)
new_width = orig_width * scale
new_height = orig_height * scale
pad_w = (netw - new_width) / 2.0
pad_h = (neth - new_height) / 2.0
st = time.time()
#data = process_cv(data, network_size)
data = process_data(data, network_size) # use Image.open
print("process time:", time.time()-st)
s22 = time.time()
data = tf.ToTensor()(data)
s33 = time.time()
print("totensor time:",s33-s22)
data = data.unsqueeze(0)
print("unsqueeze time:",time.time()-s33)
if use_cuda:
data = data.cuda()
with torch.no_grad():
s1 = time.time()
output, _ = net(data)
print("inference time:",time.time()-s1)
res, res_label = [], []
# conver x,y,w,h to x1,y1,x2,y2
for o in output[0]:
xmin = o.x_top_left
ymin = o.y_top_left
xmax = xmin + o.width
ymax = ymin + o.height
conf = o.confidence
class_label = o.class_label
x1 = max(0, float(xmin - pad_w) / scale)
x2 = min(orig_width - 1, float(xmax - pad_w) / scale)
y1 = max(0, float(ymin - pad_h) / scale)
y2 = min(orig_height - 1, float(ymax - pad_h) / scale)
res.append([x1, y1, x2, y2, conf])
res_label.append([int(x1), int(y1), int(x2), int(y2), conf, class_label])
if len(res) == 0:
return []
# do nms
t11 = time.time()
nms_keep = py_cpu_nms(np.array(res), nms_thresh)
print("nms time:",time.time()-t11)
final_res = []
for index in nms_keep:
x1, y1, x2, y2 = res_label[index][0], res_label[index][1], res_label[index][2], res_label[index][3]
conf = res_label[index][4]
class_label = res_label[index][5]
final_res.append((class_label, conf, [x1, y1, x2, y2]))
end = time.time()
print("detect time(process time + inference + nms): ", end-st)
return final_res
def process_data(img, dimension):
fill_color = 127
net_w, net_h = dimension
im_w, im_h = img.size
if im_w == net_w and im_h == net_h:
return img
# Rescaling
if im_w / net_w >= im_h / net_h:
scale = net_w / im_w
else:
scale = net_h / im_h
if scale != 1:
resample_mode = Image.NEAREST # Image.BILINEAR if self.scale > 1 else Image.ANTIALIAS
img = img.resize((int(scale * im_w), int(scale * im_h)), resample_mode)
im_w, im_h = img.size
if im_w == net_w and im_h == net_h:
return img
# Padding
img_np = np.array(img)
channels = img_np.shape[2] if len(img_np.shape) > 2 else 1
pad_w = (net_w - im_w) / 2
pad_h = (net_h - im_h) / 2
pad = (int(pad_w), int(pad_h), int(pad_w+.5), int(pad_h+.5))
img = ImageOps.expand(img, border=pad, fill=(fill_color,)*channels)
return img
class HyperParams(object):
def __init__(self, config):
self.cuda = True
self.labels = config['labels']
self.classes = len(self.labels)
self.data_root = config['data_root_dir']
self.model_name = config['model_name']
cur_cfg = config
dataset = cur_cfg['dataset']
self.testfile = f'{self.data_root}/{dataset}.pkl'
self.nworkers = cur_cfg['nworkers']
self.pin_mem = cur_cfg['pin_mem']
self.network_size = cur_cfg['input_shape']
self.batch = cur_cfg['batch_size']
self.weights = cur_cfg['weights']
self.conf_thresh = cur_cfg['conf_thresh']
self.nms_thresh = cur_cfg['nms_thresh']
self.results = cur_cfg['results']
# cuda check
if self.cuda:
if not torch.cuda.is_available():
log.debug('CUDA not available')
print('CUDA not available')
self.cuda = False
else:
log.debug('CUDA enabled')
print('CUDA enabled')
def voc_test(hyper_params):
model_name = hyper_params.model_name
use_cuda = hyper_params.cuda
weights = hyper_params.weights
conf_thresh = hyper_params.conf_thresh
network_size = hyper_params.network_size
labels = hyper_params.labels
nms_thresh = hyper_params.nms_thresh
save_dir = hyper_params.results
if os.path.exists(save_dir):
shutil.rmtree(save_dir)
os.mkdir(save_dir)
test_args = {'conf_thresh': conf_thresh, 'network_size': network_size, 'labels': labels}
net = models.__dict__[model_name](hyper_params.classes, weights, train_flag=2, test_args=test_args)
net.eval()
log.info('Net structure\n%s' % net)
if use_cuda:
net.cuda()
#img_path = "/home/ming.zhang04/data/VOCdevkit/VOC2007/JPEGImages"
img_path = "test_img"
#img_path = "/media/lishundong/DATA2/docker/data/VOC_face/VOC2012/JPEGImages"
# img_path = "/media/lishundong/DATA2/docker/data/VOC_key/VOC2012/JPEGImages"
img_list = glob.glob(img_path + "/*.jpg")
for idx, img_path in enumerate(img_list):
print("--------------------------------")
result = detect(net, img_path, use_cuda, network_size, nms_thresh)
img = cv2.imread(img_path)
for res in result:
class_label, conf, box = res
x1, y1, x2, y2 = box
cv2.putText(img, class_label + ":"+str(conf)[:5], (max(0, x1), max(15, y1)), cv2.FONT_ITALIC, 0.6, (0, 255, 0), 2)
cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0))
cv2.imwrite(os.path.join(save_dir, os.path.basename(img_path)), img)
if len(result) != 0:
print("detect: {}".format(result))
print("save: {} -> {}".format(img_path, os.path.join(save_dir, os.path.basename(img_path))))
else:
print("detect nothing")
if __name__ == '__main__':
# model_name = "Yolov3"
model_name = "TinyYolov3"
train_flag = 2
config = initEnv(train_flag=train_flag, model_name=model_name)
log.info('Config\n\n%s\n' % pformat(config))
# init env
hyper_params = HyperParams(config)
voc_test(hyper_params)