Skip to content

Commit 1ac17a8

Browse files
working with epoch
1 parent 08fc5b3 commit 1ac17a8

File tree

1 file changed

+30
-8
lines changed

1 file changed

+30
-8
lines changed

model.py

Lines changed: 30 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from keras.preprocessing.sequence import pad_sequences
1212
from sklearn.preprocessing import LabelEncoder
1313
import pickle
14+
import matplotlib.pyplot as plt
1415

1516
nltk.download('stopwords')
1617
nltk.download('punkt')
@@ -51,7 +52,7 @@ def preprocess_text(text):
5152
return ' '.join(tokens)
5253

5354
# Example usage:
54-
text = "This is an example text with some numbers like 12345 and punctuation! But we'll remove them."
55+
text = "This is an example text with some numbers like 12345, email like [email protected] and punctuation! But we'll remove them."
5556
processed_text = preprocess_text(text)
5657
print(processed_text)
5758

@@ -112,8 +113,8 @@ def preprocess_text_list(text_list):
112113
labels = df['label'].tolist()
113114

114115
# Create a Tokenizer with an out-of-vocabulary (OOV) token
115-
tokenizer = Tokenizer(oov_token='<OOV>')
116-
# print(tokenizer)
116+
# this will replace any unknown words with a token of our choosing
117+
tokenizer = Tokenizer(num_words=95000, oov_token='OOV', filters='!"#$%&()*+,-./:;<=>@[\]^_`{|}~ ')
117118
tokenizer.fit_on_texts(texts)
118119

119120
# Save the tokenizer to a file
@@ -122,7 +123,7 @@ def preprocess_text_list(text_list):
122123

123124
# Convert the text data to sequences of integers using the tokenizer
124125
sequences = tokenizer.texts_to_sequences(texts)
125-
# print(sequences)
126+
126127
# Pad the sequences to ensure uniform length for neural network input
127128
padded_sequences = pad_sequences(sequences, padding='post')
128129

@@ -135,21 +136,22 @@ def preprocess_text_list(text_list):
135136
Embedding(input_dim=len(tokenizer.word_index) + 1, output_dim=32),
136137

137138
# LSTM layer for processing sequential data
138-
LSTM(100),
139+
LSTM(50),
139140

140141
# Dense output layer for classification
141142
Dense(num_classes, activation='softmax')
142143
])
143144

144145
# Assuming 'df' is your DataFrame containing the 'label' column
145-
label_encoder = LabelEncoder()
146-
df['encoded_label'] = label_encoder.fit_transform(df['label'])
146+
label_encoder = LabelEncoder() # will be used to convert categorical labels into numerical labels.
147+
df['encoded_label'] = label_encoder.fit_transform(df['label']) # transform these labels into numerical format
147148

148149
# Extract the encoded labels
149150
encoded_labels = df['encoded_label'].tolist()
150151

151152
# Convert labels to NumPy array
152153
labels_np = np.array(encoded_labels)
154+
153155
# Replace the lambda function with a named function
154156
def custom_sparse_softmax_cross_entropy(labels, logits):
155157
return tf.compat.v1.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)
@@ -158,6 +160,26 @@ def custom_sparse_softmax_cross_entropy(labels, logits):
158160
model.compile(optimizer='adam', loss=custom_sparse_softmax_cross_entropy, metrics=['accuracy', 'precision', 'recall'])
159161

160162
# Train the model
161-
model.fit(padded_sequences, labels_np, epochs=100)
163+
# model.fit(padded_sequences, labels_np, epochs=100)
164+
165+
# Assuming you have stored the model training history in a variable named 'history'
166+
history = model.fit(padded_sequences, labels_np, epochs=100, validation_split=0.2)
167+
168+
# Extracting training and validation loss from history
169+
training_loss = history.history['loss']
170+
validation_loss = history.history['val_loss']
171+
172+
# Plotting the training and validation loss
173+
epochs = range(1, len(training_loss) + 1)
174+
plt.figure(figsize=(10, 6))
175+
plt.plot(epochs, training_loss, 'bo-', label='Training Loss')
176+
plt.plot(epochs, validation_loss, 'ro-', label='Validation Loss')
177+
plt.title('Training and Validation Loss')
178+
plt.xlabel('Epochs')
179+
plt.ylabel('Loss')
180+
plt.legend()
181+
plt.grid(True)
182+
plt.show()
183+
162184
# Save the model in the recommended Keras format
163185
model.save('trained_model.keras')

0 commit comments

Comments
 (0)