anuragjain-git
diff --git a/‎__pycache__/model.cpython-311.pyc
640 Bytes b/‎__pycache__/model.cpython-311.pyc
640 Bytes
diff --git a/‎model.py
Lines changed: 66 additions & 10 deletions b/‎model.py
Lines changed: 66 additions & 10 deletions
diff --git a/‎processed_dataset.csv
Lines changed: 22 additions & 23 deletions b/‎processed_dataset.csv
Lines changed: 22 additions & 23 deletions
diff --git a/‎runmodel.py
Lines changed: 1 addition & 1 deletion b/‎runmodel.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎tokenizer.pkl
-1.87 KB b/‎tokenizer.pkl
-1.87 KB
diff --git a/‎trained_model.keras
-24.4 KB b/‎trained_model.keras
-24.4 KB
@@ -10,19 +10,55 @@
 from keras.preprocessing.text import Tokenizer
 from keras.preprocessing.sequence import pad_sequences
 from sklearn.preprocessing import LabelEncoder
+import re
 
 import pickle
 
 
 nltk.download('stopwords')
 nltk.download('punkt')
 
+# def preprocess_text(text):
+#     # Remove punctuation, convert to lowercase
+#     # text = ''.join([char.lower() for char in text if char.isalnum() and not char.isdigit() or char.isspace()])
+#     text = ''.join([char.lower() for char in text if char.isalnum() or char.isspace()])
+    
+#     # Tokenization
+#     tokens = word_tokenize(text)
+    
+#     # Remove stopwords
+#     stop_words = set(stopwords.words('english'))
+#     tokens = [word for word in tokens if word not in stop_words]
+    
+#     # Stemming
+#     stemmer = PorterStemmer()
+#     tokens = [stemmer.stem(word) for word in tokens]
+    
+#     return ' '.join(tokens)
+
 def preprocess_text(text):
-    # Remove punctuation and convert to lowercase
-    text = ''.join([char.lower() for char in text if char.isalnum() or char.isspace()])
+    # Convert to lowercase
+    text = text.lower()
+    
+    # Initialize an empty list to store processed characters
+    processed_chars = []
+    
+    i = 0
+    while i < len(text):
+        # If character is a digit, skip all characters until the next space
+        if text[i].isdigit():
+            while i < len(text) and text[i] != ' ':
+                i += 1
+        # If character is alphanumeric or space, add it to processed_chars
+        elif text[i].isalnum() and not text[i].isdigit() or text[i].isspace():
+            processed_chars.append(text[i])
+        i += 1
+    
+    # Join the processed characters into a string
+    processed_text = ''.join(processed_chars)
 
     # Tokenization
-    tokens = word_tokenize(text)
+    tokens = word_tokenize(processed_text)
 
     # Remove stopwords
     stop_words = set(stopwords.words('english'))
@@ -34,6 +70,12 @@ def preprocess_text(text):
 
     return ' '.join(tokens)
 
+# Example usage:
+text = "This is an example text with some numbers like 12345 and punctuation! But we'll remove them."
+processed_text = preprocess_text(text)
+print(processed_text)
+
+
 def preprocess_text_list(text_list):
     preprocessed_texts = [preprocess_text(text) for text in text_list]
     return preprocessed_texts
@@ -70,16 +112,15 @@ def preprocess_text_list(text_list):
     "You have received UPI mandate collect request from TATA TECHNOLOGIES LI for INR 15000.00. Log into Google Pay app to authorize - Axis Bank",
     "ANURAG JAIN has requested money from you on Google Pay. On approving the request, INR 31.00 will be debited from your A/c - Axis Bank",
     "Flipkart Refund Processed: Refund of Rs. 237.0 for favoru Household wrap ... is successfully transferred and will be credited to your account by Oct 04, 2023.",
-    "UPI mandate has been successfully created towards TATA TECHNOLOGIES LI for INR 15000.00. Funds blocked from A/c no. XX8926. 12e5d61d2ac145738241fbf117bb295c@okaxis - Axis Bank",
-    "hi"
+    "UPI mandate has been successfully created towards TATA TECHNOLOGIES LI for INR 15000.00. Funds blocked from A/c no. XX8926. 12e5d61d2ac145738241fbf117bb295c@okaxis - Axis Bank"
 ]
 
 # Preprocess the texts
 processed_texts = preprocess_text_list(texts)
 
 # Example storage after cleaning
 data = {'text': processed_texts,
-        'label': ['debited', 'debited', 'credited', 'credited', 'debited', 'credited', 'debited', 'credited', 'debited', 'credited', 'debited', 'debited', 'credited', 'debited', 'credited', 'credited','debited', 'requested', 'requested', 'requested', 'willcredit', 'blocked', 'x']}
+        'label': ['debited', 'debited', 'credited', 'credited', 'debited', 'credited', 'debited', 'credited', 'debited', 'credited', 'debited', 'debited', 'credited', 'debited', 'credited', 'credited','debited', 'requested', 'requested', 'requested', 'willcredit', 'blocked']}
 df = pd.DataFrame(data)
 df.to_csv('processed_dataset.csv', index=False)
 
@@ -92,7 +133,7 @@ def preprocess_text_list(text_list):
 
 # Create a Tokenizer with an out-of-vocabulary (OOV) token
 tokenizer = Tokenizer(oov_token='<OOV>')
-print(tokenizer)
+# print(tokenizer)
 tokenizer.fit_on_texts(texts)
 
 # Save the tokenizer to a file
@@ -101,7 +142,7 @@ def preprocess_text_list(text_list):
 
 # Convert the text data to sequences of integers using the tokenizer
 sequences = tokenizer.texts_to_sequences(texts)
-print(sequences)
+# print(sequences)
 # Pad the sequences to ensure uniform length for neural network input
 padded_sequences = pad_sequences(sequences, padding='post')
 
@@ -131,12 +172,27 @@ def preprocess_text_list(text_list):
 labels_np = np.array(encoded_labels)
 # Replace the lambda function with a named function
 def custom_sparse_softmax_cross_entropy(labels, logits):
-    return tf.compat.v1.losses.sparse_softmax_cross_entropy(labels, logits)
+    return tf.compat.v1.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)
 
 # Compile the model with the named function
-model.compile(optimizer='adam', loss=custom_sparse_softmax_cross_entropy, metrics=['accuracy'])
+model.compile(optimizer='adam', loss=custom_sparse_softmax_cross_entropy, metrics=['accuracy', 'precision', 'recall'])
 
 # Train the model
 model.fit(padded_sequences, labels_np, epochs=100)
 # Save the model in the recommended Keras format
 model.save('trained_model.keras')
+
+
+# One-hot encode labels (assuming labels are text strings)
+# label_encoder = LabelEncoder()
+# labels_encoded = label_encoder.fit_transform(labels)
+# labels_onehot = tf.keras.utils.to_categorical(labels_encoded, num_classes=len(set(labels)))  # Adjust num_classes if needed
+
+# # Compile the model with categorical crossentropy loss
+# model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy', 'precision', 'recall'])
+
+# # Train the model
+# model.fit(padded_sequences, labels_onehot, epochs=100)
+
+# # Save the model
+# model.save('trained_model.keras')
@@ -1,24 +1,23 @@
 text,label
-debit inr 50000 ac xx8926 121023 200219 upip2a328546155288anurag jain sm blockupi cust id 01351860002 axi bank,debited
-debit inr 10900 ac xx8926 270124 113657 upip2m6321837696198add money wallet sm blockupi cust id 919951860002 axi bank,debited
-inr 559000 credit ac xx8926 091123 115928 ist info upip2a334365332111anurag jainaxi bank axi bank,credited
-inr 21635 credit ac xx8926 060124 073216 ist info neftcms333334641nextbil avl bal inr 3347822 axi bank,credited
-ac xx8360 debit rs 1900 07022024 ucoupiavl bal rs3298 report disput httpsbitly3y39tlp feedback httpsrbgyfdfmda,debited
-ac xx8360 credit rs600 07022024 ucoupiavl bal rs5198 report disput httpsbitly3y39tlp feedback httpsrbgyfdfmda,credited
-dear upi user ac x0429 debit 200 date 22jan24 trf mr narayan badat refno 437652379634 u call 1800111109 sbi,debited
-dear sbi upi user ur acx0429 credit rs500 04feb24 ref 403585759002,credited
-ac 9172 debit rs5000 11022024 194440 mob bk ref 444816787760 avl bal rs187055if call 1800222243 union bank india,debited
-ac 9172 credit rs50100 23012024 200545 mob bk ref 402347890661 avl bal rs55600never share otppincvvunion bank india,credited
-rs 5000 debit ac use upi 03022024 164428 vpa abcd4321oksbi upi ref 403417856009feder bank,debited
-sent rs2000 kotak bank ac x8136 abcd2003oksbi 030224upi ref 403418725300 kotakcomfraud,debited
-receiv rs5000 kotak bank ac x8136 abcd4321oksbi 030224upi ref400653974000,credited
-updat inr 100000 debit hdfc bank xx2002 11dec23 info ft dr xxxxxxxxxx1498 anurag jain avl balinr 489100,debited
-hdfc bank rs 100 credit ac xxxxxx2002 230124 ac link vpa 9777777711fam upi ref 408888887329,credited
-jpb ac xxxx0956 credit rs2500 25aug2023 current account balanc rs25,credited
-rs550 sent abcd12341okicici ppbl ac 91xx8089upi ref439432479819balancehttpsmpaytmmepbcheckb helphttpmpytmcar,debited
-irctc cf request money googl pay upi app approv inr 103360 debit ac axi bank,requested
-receiv upi mandat collect request tata technolog li inr 1500000 log googl pay app author axi bank,requested
-anurag jain request money googl pay approv request inr 3100 debit ac axi bank,requested
-flipkart refund process refund rs 2370 favoru household wrap success transfer credit account oct 04 2023,willcredit
-upi mandat success creat toward tata technolog li inr 1500000 fund block ac xx8926 12e5d61d2ac145738241fbf117bb295cokaxi axi bank,blocked
-hi,x
+debit inr ac xxupipjain sm blockupi cust id axi bank,debited
+debit inr ac xxupipmoney wallet sm blockupi cust id axi bank,debited
+inr credit ac xxon ist info upipjainaxi bank axi bank,credited
+inr credit ac xxon ist info neftcmsavl bal inr axi bank,credited
+ac xxdebit rs ucoupiavl bal rsreport disput httpsbitlyfor feedback httpsrbgyfdfmda,debited
+ac xxcredit rson ucoupiavl bal rsreport disput httpsbitlyfor feedback httpsrbgyfdfmda,credited
+dear upi user ac xdebit date trf mr narayan badat refno u call sbi,debited
+dear sbi upi user ur acxcredit rson ref,credited
+ac debit rson mob bk ref avl bal rsnot call union bank india,debited
+ac credit rson mob bk ref avl bal rsshare otppincvvunion bank india,credited
+rs debit ac use upi vpa abcd upi ref bank,debited
+sent rsfrom kotak bank ac xto abcdon ref kotakcomfraud,debited
+receiv rsin kotak bank ac xfrom abcdon ref,credited
+updat inr debit hdfc bank xxon info ft dr xxxxxxxxxx anurag jain avl balinr,debited
+hdfc bank rs credit ac xxxxxxon ac link vpa upi ref,credited
+jpb ac xxxxi credit rson current account balanc rs,credited
+rssent abcdfrom ppbl ac refhelphttpmpytmcar,debited
+irctc cf request money googl pay upi app approv inr debit ac axi bank,requested
+receiv upi mandat collect request tata technolog li inr log googl pay app author axi bank,requested
+anurag jain request money googl pay approv request inr debit ac axi bank,requested
+flipkart refund process refund rs favoru household wrap success transfer credit account oct,willcredit
+upi mandat success creat toward tata technolog li inr fund block ac xx axi bank,blocked
@@ -40,7 +40,7 @@ def custom_sparse_softmax_cross_entropy(labels, logits):
     "UPI mandate has been successfully created towards TATA TECHNOLOGIES LI for INR 15000.00. Funds blocked from A/c no. XX8926. 12e5d61d2ac145738241fbf117bb295c@okaxis - Axis Bank",
     "Dear Player, Rs.10,000* is credited to your RummyTime a/c Ref Id: RT210XX Download the app & make your 1st deposit now - http://gmg.im/bKSfALT&C Apply"]
 
-similarity_threshold = 0.7
+similarity_threshold = 0.9
 
 for text in new_texts:
     # Preprocess the new text using spaCy