Skip to content

Multi instance #1

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion TODO.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ https://community.st.com/s/topic/0TO0X0000003iUqWAI/stm32-machine-learning-ai
Experiment

- Test 16kHz with 30 mels
- Use multi-instance learning. Get bigger batches and improve GPU utilization
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Did you forget XXX?

- Do hyperparameter optimization per model
- Double-check reproduction of SB-CNN results
- Improve Data Augmentation
Expand Down
4 changes: 2 additions & 2 deletions experiments/ldcnn20k60.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,11 @@ hop_length: 512
augmentations: 12
augment: 1
frames: 31
batch: 400
batch: 30
epochs: 100
train_samples: 30000
val_samples: 5000
learning_rate: 0.005
learning_rate: 0.001
voting: 'mean'
voting_overlap: 0.0
nesterov_momentum: 0.9
Expand Down
105 changes: 47 additions & 58 deletions microesc/features.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,18 +59,6 @@ def compute_mels(y, settings):
return mels


def sample_windows(length, frame_samples, window_frames, overlap=0.5, start=0):
"""Split @samples into a number of windows of samples
with length @frame_samples * @window_frames
"""

ws = frame_samples * window_frames
while start < length:
end = min(start + ws, length)
yield start, end
start += (ws * (1-overlap))


def features_url(settings, base=default_base_url):
id = settings_id(settings)
ext = '.zip'
Expand Down Expand Up @@ -106,36 +94,17 @@ def download_progress(count, blocksize, totalsize):
return feature_dir


def load_sample(sample, settings, feature_dir, window_frames,
start_time=None, augment=None, normalize='meanstd'):
def extract_window(inmels, settings, start_time, normalize):

n_mels = settings['n_mels']
sample_rate = settings['samplerate']
hop_length = settings['hop_length']
window_frames = settings['frames']

aug = None
if augment and settings['augmentations'] > 0:
aug = numpy.random.randint(-1, settings['augmentations'])
if aug == -1:
aug = None

# Load precomputed features
folder = os.path.join(feature_dir, settings_id(settings))
path = feature_path(sample, out_folder=folder, augmentation=aug)
mels = numpy.load(path)['arr_0']
assert mels.shape[0] == n_mels, mels.shape

if start_time is None:
# Sample a window in time randomly
min_start = max(0, mels.shape[1]-window_frames)
if min_start == 0:
start = 0
else:
start = numpy.random.randint(0, min_start)
else:
start = int(start_time * (sample_rate / hop_length))

start = int(start_time * (sample_rate / hop_length))
end = start + window_frames
mels = mels[:, start:end]
#print('s', start, end, inmels.shape[1], end-start/inmels.shape[1])
mels = inmels[:, start:end]

# Normalize the window
if mels.shape[1] > 0:
Expand All @@ -149,40 +118,60 @@ def load_sample(sample, settings, feature_dir, window_frames,
else:
mels = librosa.core.power_to_db(mels, top_db=80, ref=0.0)
else:
print('Warning: Sample {} with start {} has 0 length'.format(sample, start_time))
print('Warning: Sample {} with start {} has 0 length'.format(inmels.shape, start_time))

# Pad to standard size
if window_frames is None:
padded = mels
else:
padded = numpy.full((n_mels, window_frames), 0.0, dtype=float)
inp = mels[:, 0:min(window_frames, mels.shape[1])]
padded[:, 0:inp.shape[1]] = inp
padded = numpy.full((n_mels, window_frames), 0.0, dtype=float)
inp = mels[:, 0:min(window_frames, mels.shape[1])]
padded[:, 0:inp.shape[1]] = inp

# add channel dimension
data = numpy.expand_dims(padded, -1)
return data


Sample = collections.namedtuple('Sample', 'start end fold slice_file_name')
def load_sample(sample, exsettings, feature_dir,
overlap=0, start=0, augmentation=None, normalize='meanstd'):

def load_windows(sample, settings, loader, overlap, start=0):
sample_rate = settings['samplerate']
frame_samples = settings['hop_length']
window_frames = settings['frames']
n_mels = exsettings['n_mels']
f_settings = settings(exsettings)

windows = []
# Load precomputed features
folder = os.path.join(feature_dir, settings_id(f_settings))
path = feature_path(sample, out_folder=folder, augmentation=augmentation)
mels = numpy.load(path)['arr_0']
assert mels.shape[0] == n_mels, mels.shape

duration = sample.end - sample.start
length = int(sample_rate * duration)
sample_rate = exsettings['samplerate']
frame_samples = exsettings['hop_length']
window_frames = exsettings['frames']

for win in sample_windows(length, frame_samples, window_frames, overlap=overlap, start=start):
chunk = Sample(start=win[0]/sample_rate,
end=win[1]/sample_rate,
fold=sample.fold,
slice_file_name=sample.slice_file_name)
d = loader(chunk)
# augmentations may change the sample duration
duration = mels.shape[1] * frame_samples/sample_rate
duration -= start

# cut into windows, and normalized
window_length = ((frame_samples * window_frames) / sample_rate)
hop_length = (1-overlap) * window_length
n_windows = int(numpy.ceil(4.0 / hop_length))

starts = [ start + (i*hop_length) for i in range(0, n_windows) ]
if duration < window_length:
# make sure short files have at least one window
starts = [ starts[0] ]
else:
starts = [ s for s in starts if s < (duration-(hop_length/2)) ]

windows = []
for s in starts:
d = extract_window(mels, exsettings, s, normalize=normalize)
windows.append(d)
d = numpy.stack(windows)

# single numpy array, zero-padded
s = (n_windows, n_mels, window_frames, 1)
windows = numpy.zeros(shape=s)
windows[:d.shape[0], :, :, :] = d

return windows

Expand Down
164 changes: 110 additions & 54 deletions microesc/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,25 +19,75 @@
from . import settings as Settings


def dataframe_generator(X, Y, loader, batchsize=10, n_classes=10):
"""
Keras generator for lazy-loading data based on a pandas.DataFrame

class Generator(keras.utils.Sequence):

def __init__(self, x_set, y_set, feature_dir, settings, n_classes=10, augment=False):
self.x, self.y = x_set, y_set
self.batch_size = settings['batch']
self.n_classes = n_classes
self.augment = augment
self.n_augmentations = settings['augmentations'] if self.augment else 1
self.feature_dir = feature_dir
self.feature_settings = features.settings(settings)
self.settings = settings

def _load(self, sample, augmentation=None):

# Time-shift augmentation, randomize starts
sample_rate = self.settings['samplerate']
frame_samples = self.settings['hop_length']
window_frames = self.settings['frames']
dur = sample.end - sample.start
window_length = ((frame_samples * window_frames) / sample_rate)

if self.augment:
start = numpy.random.random() * (min(window_length, dur)/2)
else:
start = 0

windows = features.load_sample(sample,
self.settings,
feature_dir=self.feature_dir,
augmentation=augmentation,
overlap=self.settings['voting_overlap'],
start=start)

#no = numpy.random.randint(0, 100)
#name = sample.slice_file_name.replace('.wav', '.npy')
#numpy.save(f'features/{name}', windows)

return windows

def __len__(self):
# FIXME: make sure to include all data, not using floor
sample_batches = int(numpy.floor(len(self.x) / float(self.batch_size)))
augmented = sample_batches * self.n_augmentations
return augmented

X: data column(s)
Y: target column
loader: function will be passed batches of X to load actual training data
"""

assert len(X) == len(Y), 'X and Y must be equal length'
def __getitem__(self, idx):
# take augmentation into account
aug_idx = idx % self.n_augmentations
sample_idx = idx // self.n_augmentations

# select data
from_idx = sample_idx * self.batch_size
to_idx = (sample_idx + 1) * self.batch_size
X = self.x.iloc[from_idx:to_idx]
y = self.y.iloc[from_idx:to_idx]

assert X.shape[0] == self.batch_size, (X.shape, self.batch_size, from_idx)
assert y.shape[0] == self.batch_size, (y.shape)

while True:
idx = numpy.random.choice(len(X), size=batchsize, replace=False)
rows = X.iloc[idx, :].iterrows()
data = [ loader(d) for _, d in rows ]
y = Y.iloc[idx]
y = keras.utils.to_categorical(y, num_classes=n_classes)
batch = (numpy.array(data), numpy.array(y))
yield batch
#print('xx', X.shape, y.shape)
if not self.augment:
aug_idx = None

data = [ self._load(d, augmentation=aug_idx) for _, d in X.iterrows() ]
y = keras.utils.to_categorical(y, num_classes=self.n_classes)
batch = (numpy.stack(data), numpy.array(y))
#print('x', batch[0].shape)
return batch


class LogCallback(keras.callbacks.Callback):
Expand Down Expand Up @@ -72,33 +122,46 @@ def write_entry(self, epoch, data):
def on_epoch_end(self, epoch, logs):
logs = logs.copy()

more = self.score() # uses current model
more = self.score(epoch, logs) # uses current model
for k, v in more.items():
logs[k] = v

self.write_entry(epoch, logs)


def build_multi_instance(base, windows=6, bands=32, frames=72, channels=1):
from keras import Model
from keras.layers import Input, TimeDistributed, GlobalAveragePooling1D

input_shape = (windows, bands, frames, channels)

input = Input(shape=input_shape)
x = input # BatchNormalization()(input)
x = TimeDistributed(base)(x)
x = GlobalAveragePooling1D()(x)
model = Model(input,x)
return model


def train_model(out_dir, train, val, model,
loader, val_loader, settings, seed=1):
def train_model(out_dir, fold, builder,
feature_dir, settings, name):
"""Train a single model"""

frame_samples = settings['hop_length']
train_samples = settings['train_samples']
window_frames = settings['frames']
val_samples = settings['val_samples']
epochs = settings['epochs']
batch_size = settings['batch']
learning_rate = settings.get('learning_rate', 0.01)

assert len(train) > len(val) * 5, 'training data should be much larger than validation'
def generator(data, augment):
return Generator(data, data.classID, feature_dir=feature_dir, settings=settings, augment=augment)

def top3(y_true, y_pred):
return keras.metrics.top_k_categorical_accuracy(y_true, y_pred, k=3)
model = builder()
model = build_multi_instance(model, bands=settings['n_mels'], frames=window_frames, windows=6)
model.summary()

optimizer = keras.optimizers.SGD(lr=learning_rate, momentum=settings['nesterov_momentum'], nesterov=True)

model.compile(loss='categorical_crossentropy',
optimizer=optimizer,
metrics=['accuracy'])
Expand All @@ -107,30 +170,30 @@ def top3(y_true, y_pred):
checkpoint = keras.callbacks.ModelCheckpoint(model_path, monitor='val_acc', mode='max',
period=1, verbose=1, save_best_only=False)

def voted_score():
y_pred = features.predict_voted(settings, model, val,
loader=val_loader, method=settings['voting'], overlap=settings['voting_overlap'])
class_pred = numpy.argmax(y_pred, axis=1)
acc = sklearn.metrics.accuracy_score(val.classID, class_pred)
tensorboard = keras.callbacks.TensorBoard(log_dir=f'./logs/{name}',
histogram_freq=0, update_freq=1000,
write_graph=True, write_images=False)

def voted_score(epoch, logs):
d = {
'voted_val_acc': acc,
'voted_val_acc': logs['val_acc'], # XXX: legacy compat
}
for k, v in d.items():
print("{}: {:.4f}".format(k, v))
return d

log_path = os.path.join(out_dir, 'train.csv')
log = LogCallback(log_path, voted_score)

train_gen = generator(fold[0], augment=True)
val_gen = generator(fold[1], augment=False)

train_gen = dataframe_generator(train, train.classID, loader=loader, batchsize=batch_size)
val_gen = dataframe_generator(val, val.classID, loader=val_loader, batchsize=batch_size)

callbacks_list = [checkpoint, log]
hist = model.fit_generator(train_gen, validation_data=val_gen,
steps_per_epoch=math.ceil(train_samples/batch_size),
validation_steps=math.ceil(val_samples/batch_size),
callbacks_list = [checkpoint, log, tensorboard]
hist = model.fit_generator(train_gen,
validation_data=val_gen,
callbacks=callbacks_list,
epochs=epochs, verbose=1)
epochs=epochs,
shuffle=True,
verbose=1,
workers=1)

df = history_dataframe(hist)
history_path = os.path.join(out_dir, 'history.csv')
Expand Down Expand Up @@ -230,14 +293,7 @@ def main():
features.maybe_download(feature_settings, feature_dir)

data = urbansound8k.load_dataset()
train_data, val_data = load_training_data(data, fold)

def load(sample, validation):
augment = not validation and train_settings['augment'] != 0
d = features.load_sample(sample, feature_settings, feature_dir=feature_dir,
window_frames=model_settings['frames'],
augment=augment, normalize=exsettings['normalize'])
return d
fold_data = load_training_data(data, fold)

def build_model():
m = models.build(exsettings)
Expand All @@ -261,11 +317,11 @@ def build_model():
print('Training model', name)
print('Settings', json.dumps(exsettings))

h = train_model(output_dir, train_data, val_data,
model=m,
loader=functools.partial(load, validation=False),
val_loader=functools.partial(load, validation=True),
settings=exsettings)

h = train_model(output_dir, fold_data,
builder=build_model,
feature_dir = feature_dir,
settings=exsettings, name=name)



Expand Down