anuragjain-git
diff --git a/‎model.py
Lines changed: 1 addition & 36 deletions b/‎model.py
Lines changed: 1 addition & 36 deletions
diff --git a/‎ner.py
Lines changed: 0 additions & 53 deletions b/‎ner.py
Lines changed: 0 additions & 53 deletions
diff --git a/‎runmodel.py
Lines changed: 1 addition & 1 deletion b/‎runmodel.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎testmodels/testing1.py
Lines changed: 0 additions & 101 deletions b/‎testmodels/testing1.py
Lines changed: 0 additions & 101 deletions
diff --git a/‎tokenizer.pkl
-5.14 KB b/‎tokenizer.pkl
-5.14 KB
diff --git a/‎tokenizer_config.json
Lines changed: 0 additions & 1 deletion b/‎tokenizer_config.json
Lines changed: 0 additions & 1 deletion
diff --git a/‎trained_model.keras
-721 KB b/‎trained_model.keras
-721 KB
diff --git a/‎trained_ner_model/config.cfg
Lines changed: 0 additions & 130 deletions b/‎trained_ner_model/config.cfg
Lines changed: 0 additions & 130 deletions
diff --git a/‎trained_ner_model/meta.json
Lines changed: 0 additions & 35 deletions b/‎trained_ner_model/meta.json
Lines changed: 0 additions & 35 deletions
@@ -10,31 +10,11 @@
 from keras.preprocessing.text import Tokenizer
 from keras.preprocessing.sequence import pad_sequences
 from sklearn.preprocessing import LabelEncoder
-import re
-
 import pickle
 
-
 nltk.download('stopwords')
 nltk.download('punkt')
 
-# def preprocess_text(text):
-#     # Remove punctuation, convert to lowercase
-#     # text = ''.join([char.lower() for char in text if char.isalnum() and not char.isdigit() or char.isspace()])
-#     text = ''.join([char.lower() for char in text if char.isalnum() or char.isspace()])
-    
-#     # Tokenization
-#     tokens = word_tokenize(text)
-    
-#     # Remove stopwords
-#     stop_words = set(stopwords.words('english'))
-#     tokens = [word for word in tokens if word not in stop_words]
-    
-#     # Stemming
-#     stemmer = PorterStemmer()
-#     tokens = [stemmer.stem(word) for word in tokens]
-    
-#     return ' '.join(tokens)
 
 def preprocess_text(text):
     # Convert to lowercase
@@ -180,19 +160,4 @@ def custom_sparse_softmax_cross_entropy(labels, logits):
 # Train the model
 model.fit(padded_sequences, labels_np, epochs=100)
 # Save the model in the recommended Keras format
-model.save('trained_model.keras')
-
-
-# One-hot encode labels (assuming labels are text strings)
-# label_encoder = LabelEncoder()
-# labels_encoded = label_encoder.fit_transform(labels)
-# labels_onehot = tf.keras.utils.to_categorical(labels_encoded, num_classes=len(set(labels)))  # Adjust num_classes if needed
-
-# # Compile the model with categorical crossentropy loss
-# model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy', 'precision', 'recall'])
-
-# # Train the model
-# model.fit(padded_sequences, labels_onehot, epochs=100)
-
-# # Save the model
-# model.save('trained_model.keras')
+model.save('trained_model.keras')
@@ -73,4 +73,4 @@ def custom_sparse_softmax_cross_entropy(labels, logits):
     # Check relevance and print the result
     is_relevant = any(score >= similarity_threshold for score in similarity_scores)
     relevance_status = "Relevant" if is_relevant else "Irrelevant"
-    print(f"Text: {text} | Predicted Label: {predicted_class_labels[0]} | Relevance: {relevance_status}")
+    print(f"Text: {text} | Predicted Label: {predicted_class_labels[0]} | Relevance: {relevance_status}")