-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathakida_yolo.py
218 lines (174 loc) · 6.05 KB
/
akida_yolo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
import os
import cv2
import time
import threading
from imutils.video import VideoStream
from tensorflow.keras.utils import get_file
from akida import Model as AkidaModel, devices
from akida_models import yolo_voc_pretrained, yolo_base
from akida_models.detection.processing import (
decode_output,
parse_voc_annotations,
)
from tensorflow.keras import Model
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.layers import Reshape
from cnn2snn import convert
import numpy as np
CAMERA_SRC = 0
FRAME_WIDTH = 640
FRAME_HEIGHT = 480
INFERENCE_PER_SECOND = 2
PREDICTION_CONFIDENCE_MIN = 0.6
TARGET_WIDTH = 224
TARGET_HEIGHT = 224
MODEL_FBZ = "models/yolo.fbz"
NUM_ANCHORS = 5
GRID_SIZE = (7, 7)
NUM_CLASSES = 2
CAR = "car"
PERSON = "person"
LABELS = [CAR, PERSON]
COLOURS = {
CAR: (255, 0, 0),
PERSON: (0, 255, 0),
}
DATA_PATH = get_file(
"voc_test_car_person.tar.gz",
"http://data.brainchip.com/dataset-mirror/voc/voc_test_car_person.tar.gz",
cache_subdir="datasets/voc",
extract=True,
)
DATA_DIR = os.path.dirname(DATA_PATH)
GT_FOLDER = os.path.join(DATA_DIR, "voc_test_car_person", "Annotations")
IMG_FOLDER = os.path.join(DATA_DIR, "voc_test_car_person", "JPEGImages")
FILE_PATH = os.path.join(DATA_DIR, "voc_test_car_person", "test_car_person.txt")
def initialise():
val_data = parse_voc_annotations(GT_FOLDER, IMG_FOLDER, FILE_PATH, LABELS)
print(
"Loaded VOC2007 test data for car and person classes: "
f"{len(val_data)} images."
)
# Create a yolo model for 2 classes with 5 anchors and grid size of 7
model = yolo_base(input_shape=(TARGET_WIDTH, TARGET_HEIGHT, 3),
classes=NUM_CLASSES,
nb_box=NUM_ANCHORS,
alpha=0.5
)
# Define a reshape output to be added to the YOLO model
output = Reshape(
(GRID_SIZE[1], GRID_SIZE[0], NUM_ACHORS, 4 + 1 + NUM_CLASSES),
name="YOLO_output",
)(model.output)
# Build the complete model
full_model = Model(model.input, output)
full_model.output
# Load the pretrained model along with anchors
model_keras, anchors = yolo_voc_pretrained()
# Define the final reshape and build the model
output = Reshape(
(GRID_SIZE[1], GRID_SIZE[0], NUM_ACHORS, 4 + 1 + NUM_CLASSES),
name="YOLO_output",
)(model_keras.output)
model_keras = Model(model_keras.input, output)
# Rebuild a model without the last layer
compatible_model = Model(model_keras.input, model_keras.layers[-2].output)
model_akida = convert(compatible_model, input_scaling=(127.5, 127.5))
model_akida.summary()
# save the model to file
model_akida.save(MODEL_FBZ)
# create a model if one doesnt exist
if not os.path.exists(MODEL_FBZ):
print("Initialising Akida model")
initialise()
"""
Class to capture video feed from webcam
"""
class Camera:
def __init__(self):
self.stream = VideoStream(
src=CAMERA_SRC, resolution=(FRAME_WIDTH, FRAME_HEIGHT)
).start()
self.label = 0
self.pred_boxes = []
def get_frame(self):
frame = cv2.resize(self.stream.read(), (TARGET_WIDTH, TARGET_HEIGHT))
return frame
def get_input_array(self):
frame = cv2.resize(self.stream.read(), (TARGET_WIDTH, TARGET_HEIGHT))
input_array = img_to_array(frame)
input_array = np.array([input_array], dtype="uint8")
return input_array
def show_frame(self):
frame = self.render_boxes(self.stream.read())
cv2.imshow("frame", frame)
key = cv2.waitKey(1) & 0xFF
def render_boxes(self, frame):
for box in self.pred_boxes:
if box[5] > PREDICTION_CONFIDENCE_MIN:
x1, y1 = int(box[0]), int(box[1])
x2, y2 = int(box[2]), int(box[3])
label = LABELS[int(box[4])]
score = "{:.2%}".format(box[5])
colour = COLOURS[label]
frame = cv2.rectangle(frame, (x1, y1), (x2, y2), colour, 1)
cv2.putText(
frame,
"{} - {}".format(label, score),
(x1, y1 - 5),
cv2.FONT_HERSHEY_SIMPLEX,
0.5,
colour,
1,
cv2.LINE_AA,
)
return frame
def set_prod_boxes(self, pred_boxes):
self.pred_boxes = pred_boxes
"""
Class to run inference over frames from the webcam
"""
class Inference:
def __init__(self, camera):
# init the camera
self.camera = camera
_, self.anchors = yolo_voc_pretrained()
# run inference in separate thread
self.t1 = threading.Thread(target=self.infer)
self.t1.start()
# load the akida model
self.model_ak = AkidaModel(filename=MODEL_FBZ)
if len(devices()) > 0:
device = devices()[0]
self.model_ak.map(device)
def infer(self):
while True:
input_array = self.camera.get_input_array()
# Call evaluate on the image
pots = self.model_ak.predict(input_array)[0]
# Reshape the potentials to prepare for decoding
w, h, c = pots.shape
pots = pots.reshape((h, w, len(self.anchors), 4 + 1 + len(LABELS)))
# Decode potentials into bounding boxes
raw_boxes = decode_output(pots, self.anchors, len(LABELS))
# Rescale boxes to the original image size
pred_boxes = np.array(
[
[
box.x1 * FRAME_WIDTH,
box.y1 * FRAME_HEIGHT,
box.x2 * FRAME_WIDTH,
box.y2 * FRAME_HEIGHT,
box.get_label(),
box.get_score(),
]
for box in raw_boxes
]
)
self.camera.set_prod_boxes(pred_boxes)
time.sleep(1 / INFERENCE_PER_SECOND)
camera = Camera()
inference = Inference(camera)
# main loop to display camera feed
while True:
camera.show_frame()