Skip to content

Commit 6aaf9f1

Browse files
author
Zoran Pandovski
committed
Add helper for train test dataset split
1 parent 1a3645b commit 6aaf9f1

File tree

1 file changed

+32
-0
lines changed

1 file changed

+32
-0
lines changed

helpers.py

+32
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
from sklearn.model_selection import train_test_split
2+
import pandas
3+
4+
5+
DATASET = 'creditcard-test.csv'
6+
7+
def read_file(dataset):
8+
lines = pandas.read_csv(dataset)
9+
return lines
10+
11+
12+
def create_train_dataset(train_data):
13+
df = pandas.DataFrame(data=train_data)
14+
df.to_csv("./train-{0}.csv".format(DATASET), sep=',',index=False)
15+
16+
17+
def create_test_dataset(test_data):
18+
df = pandas.DataFrame(data=test_data)
19+
df.to_csv("./test-{0}.csv".format(DATASET), sep=',',index=False)
20+
21+
22+
def train_test_split_dataset():
23+
dataset = read_file()
24+
X_train, X_test, = train_test_split(
25+
dataset, test_size=0.33, random_state=42)
26+
27+
create_test_dataset(X_train)
28+
create_train_dataset(X_test)
29+
30+
31+
if __name__ == "__main__":
32+
train_test_split_dataset(DATASET)

0 commit comments

Comments
 (0)