Prepare testing and experiments.

lbasek · lbasek · commit ed042962075f · 2018-08-13T10:59:15.000+02:00
diff --git a/.gitignore b/.gitignore
@@ -64,5 +64,4 @@ __pycache__
 .idea/*
 
 embedding/*.txt
-models/*
-results/*
+models/*
diff --git a/embedding/glove.py b/embedding/glove.py
@@ -4,7 +4,7 @@
 GLOVE_DIR = 'embedding/glove.6B.100d.txt'
 
 
-def get_pretrained_glove(num_words, text_vocab):
+def pre_trained_glove(num_words, text_vocab):
     embeddings_index = {}
     try:
         f = open(GLOVE_DIR, 'r+', encoding="utf-8")
diff --git a/evaluation.py b/evaluation.py
@@ -1,17 +1,18 @@
 import itertools
+import sys
 
 import matplotlib.pyplot as plt
 import numpy as np
-from keras.models import load_model
 from sklearn.metrics import confusion_matrix, precision_recall_fscore_support
 
-from constants import MAX_LEN
 from utils.classification_report import classification_report
 from utils.plot_confusion_matrix_util import plot_confusion_matrix
 
 
-def test_model(model_path, test, test_input, labels_vocab):
-    model = load_model(model_path + 'ner_model')
+def evaluate(model, test, test_input, labels_vocab, save_path):
+    test_eval = model.evaluate(test_input, np.array(test.y))
+    print('Test loss:', test_eval[0])
+    print('Test accuracy:', test_eval[1])
 
     predicted_values = np.argmax(model.predict(test_input), axis=-1)
     true_values = np.argmax(test.y, -1)
@@ -20,33 +21,35 @@ def test_model(model_path, test, test_input, labels_vocab):
     true_values = list(itertools.chain(*true_values))
     predicted_values = list(itertools.chain(*predicted_values))
 
+    orig_stdout = sys.stdout
+    f = open(save_path + 'results.txt', 'w')
+    sys.stdout = f
+
     print("Macro Precision/Recall/F1 score:")
     print(precision_recall_fscore_support(true_values, predicted_values, average='macro'))
+    print(60 * "-")
 
     print("Micro Precision/Recall/F1 score:")
     print(precision_recall_fscore_support(true_values, predicted_values, average='micro'))
+    print(60 * "-")
 
-    print("Weighted Precision/Recall/F1 score:")
-    print(precision_recall_fscore_support(true_values, predicted_values, average='weighted'))
-
-    # Remove padding label
     keys = list(labels_vocab.stoi.keys())
     values = list(labels_vocab.stoi.values())
-    # values.remove(labels_vocab.stoi[NO_ENTITY_TOKEN])
-    # keys.remove(NO_ENTITY_TOKEN)
 
-    # Classification report
-    report = classification_report(true_values, predicted_values, labels=values, target_names=keys, digits=4, average='macro')
-    print(report)
+    # Classification report's
+    macro_report = classification_report(true_values, predicted_values, labels=values, target_names=keys, digits=4, average='macro')
+    print(macro_report)
+    print(60 * "-")
+
+    micro_report = classification_report(true_values, predicted_values, labels=values, target_names=keys, digits=4, average='micro')
+    print(micro_report)
 
-    # plot_classification_report(report)
-    # plt.savefig('results/classification_report.png', dpi=200, format='png', bbox_inches='tight')
-    # plt.close()
+    sys.stdout = orig_stdout
+    f.close()
 
     # Confusion Matrix
     cnf_matrix = confusion_matrix(true_values, predicted_values)
     np.set_printoptions(precision=2)
-    # TODO fix classes
     plot_confusion_matrix(cnf_matrix, classes=list(labels_vocab.stoi.keys()), normalize=True, title='Normalized confusion matrix')
-    plt.savefig('results/confusion_matrix.png', dpi=200, format='png', bbox_inches='tight')
+    plt.savefig(save_path + '/images/confusion_matrix.png', dpi=200, format='png', bbox_inches='tight')
     plt.close()
diff --git a/experiments.py b/experiments.py
@@ -1,7 +1,7 @@
 import json
 import os
-from datetime import datetime
 from collections import namedtuple
+
 from train import train
 
 
@@ -18,6 +18,7 @@ def main():
         data = fd.read()
     args = json2obj(data)
     print(args.rnn_type)
+    os.makedirs(args.save_path + 'images', exist_ok=True)
 
     train(args)
 
diff --git a/inputs.py b/inputs.py
@@ -1,18 +1,18 @@
 from keras import Input
 from keras.layers import Embedding, Dropout, LSTM, TimeDistributed, SpatialDropout1D, concatenate
-from embedding.glove import get_pretrained_glove
+from embedding.glove import pre_trained_glove
 from constants import MAX_LEN, MAX_LEN_CHAR
 
 
 def inputs_factory(args, vocabs):
     inputs = []
     input_layers = []
 
-    for key, func in inputs_map.items():
-        if key in args.inputs:
-            input, input_layer = func(args, vocabs)
-            inputs.append(input)
-            input_layers.append(input_layer)
+    # Args inputs element must be separated by -, the order is important here
+    for i in args.inputs.split('-'):
+        input, input_layer = inputs_map.get(i)(args, vocabs)
+        inputs.append(input)
+        input_layers.append(input_layer)
 
     # Concatenate inputs (if there are multiple)
     if len(inputs) > 1:
@@ -31,7 +31,7 @@ def words_input(args, vocabs):
     if args.embeddings_type == 'glove':
         txt_embed = Embedding(input_dim=num_words, output_dim=MAX_LEN, input_length=None,
                               name='txt_embedding', trainable=args.embeddings_trainable,
-                              weights=([get_pretrained_glove(num_words, vocabs.words)]))(txt_input)
+                              weights=([pre_trained_glove(num_words, vocabs.words)]))(txt_input)
     else:
         txt_embed = Embedding(input_dim=num_words, output_dim=MAX_LEN, input_length=None,
                               name='txt_embedding', trainable=args.embeddings_trainable)(txt_input)
@@ -51,7 +51,7 @@ def pos_input(args, vocabs):
 
 def chars_input(args, vocabs):
     char_in = Input(shape=(None, MAX_LEN_CHAR,), name="char_input")
-    emb_char = TimeDistributed(Embedding(input_dim=len(vocabs.chars.itos), output_dim=MAX_LEN_CHAR, input_length=None))\
+    emb_char = TimeDistributed(Embedding(input_dim=len(vocabs.chars.itos), output_dim=MAX_LEN_CHAR, input_length=None)) \
         (char_in)
     char_enc = TimeDistributed(LSTM(units=20, return_sequences=False, recurrent_dropout=0.5))(emb_char)
     return char_in, char_enc
diff --git a/model.py b/model.py
@@ -80,27 +80,22 @@ def train(self, epochs, embedding=None):
 
         model.compile(optimizer="rmsprop", loss='categorical_crossentropy', metrics=['accuracy'])
 
-        plot_model(model, to_file=self.save_path + 'ner_model_image.png')
+        plot_model(model, to_file=self.save_path + 'model_structure.png')
         print(model.summary())
 
-        dir = create_dir()
-
-        tensorboard_callback = TensorBoard(log_dir=dir, histogram_freq=0, write_graph=True, write_images=True)
-
         history = model.fit(
             [self.X_train, self.train_pos, self.train_characters],
             np.array(self.Y_train), batch_size=32, epochs=epochs,
-            validation_data=(
-                [self.X_validation, self.valid_pos, self.valid_characters],
-                np.array(self.Y_validation)),
-            callbacks=[tensorboard_callback], verbose=1)
+            validation_data=([self.X_validation, self.valid_pos, self.valid_characters], np.array(self.Y_validation)), verbose=1)
 
-        model.save(self.save_path + 'ner_model')
+        model.save(self.save_path + 'model_ner')
 
         test_eval = model.evaluate(
             [self.X_test, self.test_pos, self.test_characters],
             np.array(self.Y_test))
+
         print('Test loss:', test_eval[0])
         print('Test accuracy:', test_eval[1])
 
         return model, history
+
diff --git a/model_args.txt b/model_args.txt
@@ -1,7 +1,7 @@
 {
-    "max_epochs": 5,
+    "max_epochs": 2,
     "batch_size": 32,
-    "save_path": "models/",
+    "save_path": "models/test/",
     "inputs": "words",
     "embeddings_trainable": false,
     "embeddings_type": "glove",
diff --git a/predict.py b/predict.py
@@ -23,7 +23,7 @@ def main():
     args = parse_args()
 
     vocabs = load_object(args.path + 'vocabs')
-    model = load_model(args.path + 'ner_model')
+    model = load_model(args.path + 'model_ner')
     nlp = spacy.load('en')
 
     while True:
diff --git a/train.py b/train.py
@@ -1,11 +1,10 @@
 import numpy as np
 import matplotlib.pyplot as plt
 import argparse
-import re
 import os
 from keras.callbacks import TensorBoard
 from dataset.api import load_dataset
-from test_model import test_model
+from evaluation import evaluate
 from keras.utils.vis_utils import plot_model
 from datetime import datetime
 from inputs import inputs_factory
@@ -40,34 +39,15 @@ def parse_args():
     return args
 
 
-def create_dir():
-    runs = ([x[0] for x in os.walk("results/logs")])
-    runs = [x for x in runs if "run" in x]
-    runs = list(map(int, re.findall(r'\d+', "".join(runs))))
-    runs.sort()
-    if len(runs) == 0:
-        return "results/logs/run1"
-
-    dir_idx = runs[-1] + 1
-
-    dir = "results/logs/run" + str(dir_idx)
-
-    if not os.path.exists(dir):
-        os.makedirs(dir)
-        return dir
-    else:
-        raise FileExistsError('Clear logs dir.')
-
-
-def plot_train_and_save(history):
+def plot_train_and_save(history, path):
     # Plot accuracy
     plt.plot(history.history['acc'])
     plt.plot(history.history['val_acc'])
     plt.title('Model Accuracy')
     plt.ylabel('Accuracy')
     plt.xlabel('Epoch')
     plt.legend(['train', 'validation'], loc='lower right')
-    plt.savefig('results/model_accuracy.png', dpi=200, format='png', bbox_inches='tight')
+    plt.savefig(path + '/images/model_accuracy.png', dpi=200, format='png', bbox_inches='tight')
     plt.close()
 
     # Plot loss
@@ -77,7 +57,7 @@ def plot_train_and_save(history):
     plt.ylabel('Loss')
     plt.xlabel('Epoch')
     plt.legend(['train', 'validation'], loc='upper right')
-    plt.savefig('results/model_loss.png', dpi=200, format='png', bbox_inches='tight')
+    plt.savefig(path + '/images/model_loss.png', dpi=200, format='png', bbox_inches='tight')
     plt.close()
 
 
@@ -114,12 +94,10 @@ def train(args):
 
     # prepare model
     model.compile(optimizer="rmsprop", loss='categorical_crossentropy', metrics=['accuracy'])
-    plot_model(model, to_file=args.save_path + 'ner_model_image.png')
+    plot_model(model, to_file=args.save_path + 'images/model_structure.png')
     print(model.summary())
 
-    dir = create_dir()
-
-    tensorboard_callback = TensorBoard(log_dir=dir, histogram_freq=0, write_graph=True, write_images=True)
+    tensorboard_callback = TensorBoard(log_dir=args.save_path, histogram_freq=0, write_graph=True, write_images=True)
 
     # get inputs based on args.inputs argument
     train, val, test = filter_inputs(args, datasets)
@@ -130,14 +108,11 @@ def train(args):
         validation_data=(val, np.array(datasets.val.y)),
         callbacks=[tensorboard_callback], verbose=1)
 
-    model.save(args.save_path + 'ner_model')
+    model.save(args.save_path + 'model_ner')
 
-    test_eval = model.evaluate(test, np.array(datasets.test.y))
-    print('Test loss:', test_eval[0])
-    print('Test accuracy:', test_eval[1])
+    evaluate(model, datasets.test, test, vocabs.labels, args.save_path)
 
-    test_model(args.save_path, datasets.test, test, vocabs.labels)
-    plot_train_and_save(history)
+    plot_train_and_save(history, args.save_path)
 
 
 if __name__ == '__main__':
diff --git a/train.pyc b/train.pyc
diff --git a/utils/plot_classification_report_util.py b/utils/plot_classification_report_util.py
@@ -34,7 +34,7 @@ def cm2inch(*tupl):
         return tuple(i / inch for i in tupl)
 
 
-def heatmap(AUC, title, xlabel, ylabel, xticklabels, yticklabels, figure_width=40, figure_height=20,
+def heatmap(path, AUC, title, xlabel, ylabel, xticklabels, yticklabels, figure_width=40, figure_height=20,
             correct_orientation=False, cmap='RdBu'):
     '''
     Inspired by:
@@ -89,9 +89,11 @@ def heatmap(AUC, title, xlabel, ylabel, xticklabels, yticklabels, figure_width=4
     # fig.set_size_inches(cm2inch(40, 20))
     # fig.set_size_inches(cm2inch(40*4, 20*4))
     fig.set_size_inches(cm2inch(figure_width, figure_height))
+    plt.savefig(path + 'model_loss.png', dpi=200, format='png', bbox_inches='tight')
+    plt.close()
 
 
-def plot_classification_report(classification_report, title='Classification report ', cmap='RdBu'):
+def plot_classification_report(classification_report, path, title='Classification report ', cmap='RdBu'):
     '''
     Plot scikit-learn classification report.
     Extension based on https://stackoverflow.com/a/31689645/395857
@@ -110,7 +112,7 @@ def plot_classification_report(classification_report, title='Classification repo
         if len(t) < 2:
             continue
 
-        if t[0] == 'avg':
+        if t[0] == 'avg' or t[0] == 'macro avg' or t[0] == 'micro avg':
             t[0:3] = [''.join(t[0:3]).upper()]
 
         classes.append(t[0])
@@ -130,5 +132,5 @@ def plot_classification_report(classification_report, title='Classification repo
     figure_width = 25
     figure_height = len(class_names) + 7
     correct_orientation = False
-    heatmap(np.array(plotMat), title, xlabel, ylabel, xticklabels, yticklabels, figure_width, figure_height,
+    heatmap(path, np.array(plotMat), title, xlabel, ylabel, xticklabels, yticklabels, figure_width, figure_height,
             correct_orientation, cmap=cmap)
diff --git a/utils/plot_confusion_matrix_util.py b/utils/plot_confusion_matrix_util.py
@@ -13,9 +13,6 @@ def plot_confusion_matrix(cm, classes,
     """
     if normalize:
         cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
-        print("Normalized confusion matrix")
-    else:
-        print('Confusion matrix, without normalization')
 
     # print(cm)
 
diff --git a/utils/sentence_getter.py b/utils/sentence_getter.py