diff --git a/TODO.md b/TODO.md index c1b2ceb..f90f5c1 100644 --- a/TODO.md +++ b/TODO.md @@ -18,7 +18,6 @@ https://community.st.com/s/topic/0TO0X0000003iUqWAI/stm32-machine-learning-ai Experiment - Test 16kHz with 30 mels -- Use multi-instance learning. Get bigger batches and improve GPU utilization - Do hyperparameter optimization per model - Double-check reproduction of SB-CNN results - Improve Data Augmentation diff --git a/experiments/ldcnn20k60.yaml b/experiments/ldcnn20k60.yaml index 149571b..72a8c44 100644 --- a/experiments/ldcnn20k60.yaml +++ b/experiments/ldcnn20k60.yaml @@ -7,11 +7,11 @@ hop_length: 512 augmentations: 12 augment: 1 frames: 31 -batch: 400 +batch: 30 epochs: 100 train_samples: 30000 val_samples: 5000 -learning_rate: 0.005 +learning_rate: 0.001 voting: 'mean' voting_overlap: 0.0 nesterov_momentum: 0.9 diff --git a/microesc/features.py b/microesc/features.py index 89d3f36..8642f5c 100644 --- a/microesc/features.py +++ b/microesc/features.py @@ -59,18 +59,6 @@ def compute_mels(y, settings): return mels -def sample_windows(length, frame_samples, window_frames, overlap=0.5, start=0): - """Split @samples into a number of windows of samples - with length @frame_samples * @window_frames - """ - - ws = frame_samples * window_frames - while start < length: - end = min(start + ws, length) - yield start, end - start += (ws * (1-overlap)) - - def features_url(settings, base=default_base_url): id = settings_id(settings) ext = '.zip' @@ -106,36 +94,17 @@ def download_progress(count, blocksize, totalsize): return feature_dir -def load_sample(sample, settings, feature_dir, window_frames, - start_time=None, augment=None, normalize='meanstd'): +def extract_window(inmels, settings, start_time, normalize): + n_mels = settings['n_mels'] sample_rate = settings['samplerate'] hop_length = settings['hop_length'] + window_frames = settings['frames'] - aug = None - if augment and settings['augmentations'] > 0: - aug = numpy.random.randint(-1, settings['augmentations']) - if aug == -1: - aug = None - - # Load precomputed features - folder = os.path.join(feature_dir, settings_id(settings)) - path = feature_path(sample, out_folder=folder, augmentation=aug) - mels = numpy.load(path)['arr_0'] - assert mels.shape[0] == n_mels, mels.shape - - if start_time is None: - # Sample a window in time randomly - min_start = max(0, mels.shape[1]-window_frames) - if min_start == 0: - start = 0 - else: - start = numpy.random.randint(0, min_start) - else: - start = int(start_time * (sample_rate / hop_length)) - + start = int(start_time * (sample_rate / hop_length)) end = start + window_frames - mels = mels[:, start:end] + #print('s', start, end, inmels.shape[1], end-start/inmels.shape[1]) + mels = inmels[:, start:end] # Normalize the window if mels.shape[1] > 0: @@ -149,40 +118,60 @@ def load_sample(sample, settings, feature_dir, window_frames, else: mels = librosa.core.power_to_db(mels, top_db=80, ref=0.0) else: - print('Warning: Sample {} with start {} has 0 length'.format(sample, start_time)) + print('Warning: Sample {} with start {} has 0 length'.format(inmels.shape, start_time)) # Pad to standard size - if window_frames is None: - padded = mels - else: - padded = numpy.full((n_mels, window_frames), 0.0, dtype=float) - inp = mels[:, 0:min(window_frames, mels.shape[1])] - padded[:, 0:inp.shape[1]] = inp + padded = numpy.full((n_mels, window_frames), 0.0, dtype=float) + inp = mels[:, 0:min(window_frames, mels.shape[1])] + padded[:, 0:inp.shape[1]] = inp # add channel dimension data = numpy.expand_dims(padded, -1) return data -Sample = collections.namedtuple('Sample', 'start end fold slice_file_name') +def load_sample(sample, exsettings, feature_dir, + overlap=0, start=0, augmentation=None, normalize='meanstd'): -def load_windows(sample, settings, loader, overlap, start=0): - sample_rate = settings['samplerate'] - frame_samples = settings['hop_length'] - window_frames = settings['frames'] + n_mels = exsettings['n_mels'] + f_settings = settings(exsettings) - windows = [] + # Load precomputed features + folder = os.path.join(feature_dir, settings_id(f_settings)) + path = feature_path(sample, out_folder=folder, augmentation=augmentation) + mels = numpy.load(path)['arr_0'] + assert mels.shape[0] == n_mels, mels.shape - duration = sample.end - sample.start - length = int(sample_rate * duration) + sample_rate = exsettings['samplerate'] + frame_samples = exsettings['hop_length'] + window_frames = exsettings['frames'] - for win in sample_windows(length, frame_samples, window_frames, overlap=overlap, start=start): - chunk = Sample(start=win[0]/sample_rate, - end=win[1]/sample_rate, - fold=sample.fold, - slice_file_name=sample.slice_file_name) - d = loader(chunk) + # augmentations may change the sample duration + duration = mels.shape[1] * frame_samples/sample_rate + duration -= start + + # cut into windows, and normalized + window_length = ((frame_samples * window_frames) / sample_rate) + hop_length = (1-overlap) * window_length + n_windows = int(numpy.ceil(4.0 / hop_length)) + + starts = [ start + (i*hop_length) for i in range(0, n_windows) ] + if duration < window_length: + # make sure short files have at least one window + starts = [ starts[0] ] + else: + starts = [ s for s in starts if s < (duration-(hop_length/2)) ] + + windows = [] + for s in starts: + d = extract_window(mels, exsettings, s, normalize=normalize) windows.append(d) + d = numpy.stack(windows) + + # single numpy array, zero-padded + s = (n_windows, n_mels, window_frames, 1) + windows = numpy.zeros(shape=s) + windows[:d.shape[0], :, :, :] = d return windows diff --git a/microesc/train.py b/microesc/train.py index 6c74801..03d903a 100644 --- a/microesc/train.py +++ b/microesc/train.py @@ -19,25 +19,75 @@ from . import settings as Settings -def dataframe_generator(X, Y, loader, batchsize=10, n_classes=10): - """ - Keras generator for lazy-loading data based on a pandas.DataFrame + +class Generator(keras.utils.Sequence): + + def __init__(self, x_set, y_set, feature_dir, settings, n_classes=10, augment=False): + self.x, self.y = x_set, y_set + self.batch_size = settings['batch'] + self.n_classes = n_classes + self.augment = augment + self.n_augmentations = settings['augmentations'] if self.augment else 1 + self.feature_dir = feature_dir + self.feature_settings = features.settings(settings) + self.settings = settings + + def _load(self, sample, augmentation=None): + + # Time-shift augmentation, randomize starts + sample_rate = self.settings['samplerate'] + frame_samples = self.settings['hop_length'] + window_frames = self.settings['frames'] + dur = sample.end - sample.start + window_length = ((frame_samples * window_frames) / sample_rate) + + if self.augment: + start = numpy.random.random() * (min(window_length, dur)/2) + else: + start = 0 + + windows = features.load_sample(sample, + self.settings, + feature_dir=self.feature_dir, + augmentation=augmentation, + overlap=self.settings['voting_overlap'], + start=start) + + #no = numpy.random.randint(0, 100) + #name = sample.slice_file_name.replace('.wav', '.npy') + #numpy.save(f'features/{name}', windows) + + return windows + + def __len__(self): + # FIXME: make sure to include all data, not using floor + sample_batches = int(numpy.floor(len(self.x) / float(self.batch_size))) + augmented = sample_batches * self.n_augmentations + return augmented - X: data column(s) - Y: target column - loader: function will be passed batches of X to load actual training data - """ - - assert len(X) == len(Y), 'X and Y must be equal length' + def __getitem__(self, idx): + # take augmentation into account + aug_idx = idx % self.n_augmentations + sample_idx = idx // self.n_augmentations + + # select data + from_idx = sample_idx * self.batch_size + to_idx = (sample_idx + 1) * self.batch_size + X = self.x.iloc[from_idx:to_idx] + y = self.y.iloc[from_idx:to_idx] + + assert X.shape[0] == self.batch_size, (X.shape, self.batch_size, from_idx) + assert y.shape[0] == self.batch_size, (y.shape) - while True: - idx = numpy.random.choice(len(X), size=batchsize, replace=False) - rows = X.iloc[idx, :].iterrows() - data = [ loader(d) for _, d in rows ] - y = Y.iloc[idx] - y = keras.utils.to_categorical(y, num_classes=n_classes) - batch = (numpy.array(data), numpy.array(y)) - yield batch + #print('xx', X.shape, y.shape) + if not self.augment: + aug_idx = None + + data = [ self._load(d, augmentation=aug_idx) for _, d in X.iterrows() ] + y = keras.utils.to_categorical(y, num_classes=self.n_classes) + batch = (numpy.stack(data), numpy.array(y)) + #print('x', batch[0].shape) + return batch class LogCallback(keras.callbacks.Callback): @@ -72,33 +122,46 @@ def write_entry(self, epoch, data): def on_epoch_end(self, epoch, logs): logs = logs.copy() - more = self.score() # uses current model + more = self.score(epoch, logs) # uses current model for k, v in more.items(): logs[k] = v self.write_entry(epoch, logs) +def build_multi_instance(base, windows=6, bands=32, frames=72, channels=1): + from keras import Model + from keras.layers import Input, TimeDistributed, GlobalAveragePooling1D + + input_shape = (windows, bands, frames, channels) + + input = Input(shape=input_shape) + x = input # BatchNormalization()(input) + x = TimeDistributed(base)(x) + x = GlobalAveragePooling1D()(x) + model = Model(input,x) + return model -def train_model(out_dir, train, val, model, - loader, val_loader, settings, seed=1): +def train_model(out_dir, fold, builder, + feature_dir, settings, name): """Train a single model""" frame_samples = settings['hop_length'] - train_samples = settings['train_samples'] window_frames = settings['frames'] - val_samples = settings['val_samples'] epochs = settings['epochs'] batch_size = settings['batch'] learning_rate = settings.get('learning_rate', 0.01) - assert len(train) > len(val) * 5, 'training data should be much larger than validation' + def generator(data, augment): + return Generator(data, data.classID, feature_dir=feature_dir, settings=settings, augment=augment) - def top3(y_true, y_pred): - return keras.metrics.top_k_categorical_accuracy(y_true, y_pred, k=3) + model = builder() + model = build_multi_instance(model, bands=settings['n_mels'], frames=window_frames, windows=6) + model.summary() optimizer = keras.optimizers.SGD(lr=learning_rate, momentum=settings['nesterov_momentum'], nesterov=True) + model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy']) @@ -107,30 +170,30 @@ def top3(y_true, y_pred): checkpoint = keras.callbacks.ModelCheckpoint(model_path, monitor='val_acc', mode='max', period=1, verbose=1, save_best_only=False) - def voted_score(): - y_pred = features.predict_voted(settings, model, val, - loader=val_loader, method=settings['voting'], overlap=settings['voting_overlap']) - class_pred = numpy.argmax(y_pred, axis=1) - acc = sklearn.metrics.accuracy_score(val.classID, class_pred) + tensorboard = keras.callbacks.TensorBoard(log_dir=f'./logs/{name}', + histogram_freq=0, update_freq=1000, + write_graph=True, write_images=False) + + def voted_score(epoch, logs): d = { - 'voted_val_acc': acc, + 'voted_val_acc': logs['val_acc'], # XXX: legacy compat } - for k, v in d.items(): - print("{}: {:.4f}".format(k, v)) return d + log_path = os.path.join(out_dir, 'train.csv') log = LogCallback(log_path, voted_score) + train_gen = generator(fold[0], augment=True) + val_gen = generator(fold[1], augment=False) - train_gen = dataframe_generator(train, train.classID, loader=loader, batchsize=batch_size) - val_gen = dataframe_generator(val, val.classID, loader=val_loader, batchsize=batch_size) - - callbacks_list = [checkpoint, log] - hist = model.fit_generator(train_gen, validation_data=val_gen, - steps_per_epoch=math.ceil(train_samples/batch_size), - validation_steps=math.ceil(val_samples/batch_size), + callbacks_list = [checkpoint, log, tensorboard] + hist = model.fit_generator(train_gen, + validation_data=val_gen, callbacks=callbacks_list, - epochs=epochs, verbose=1) + epochs=epochs, + shuffle=True, + verbose=1, + workers=1) df = history_dataframe(hist) history_path = os.path.join(out_dir, 'history.csv') @@ -230,14 +293,7 @@ def main(): features.maybe_download(feature_settings, feature_dir) data = urbansound8k.load_dataset() - train_data, val_data = load_training_data(data, fold) - - def load(sample, validation): - augment = not validation and train_settings['augment'] != 0 - d = features.load_sample(sample, feature_settings, feature_dir=feature_dir, - window_frames=model_settings['frames'], - augment=augment, normalize=exsettings['normalize']) - return d + fold_data = load_training_data(data, fold) def build_model(): m = models.build(exsettings) @@ -261,11 +317,11 @@ def build_model(): print('Training model', name) print('Settings', json.dumps(exsettings)) - h = train_model(output_dir, train_data, val_data, - model=m, - loader=functools.partial(load, validation=False), - val_loader=functools.partial(load, validation=True), - settings=exsettings) + + h = train_model(output_dir, fold_data, + builder=build_model, + feature_dir = feature_dir, + settings=exsettings, name=name)