Codacy is up and running + utils tests #176

dpuenteramirez · dpuenteramirez · commit 61f10dc0fe8b · 2022-04-16T17:08:49.000+02:00
diff --git a/is-ssl.yml b/is-ssl.yml
@@ -3,8 +3,9 @@ channels:
   - conda-forge
   - default
   - anaconda
+  - gwerbin
 dependencies:
-  - numpy=1.20.3
+  - numpy=1.22.3
   - scikit-learn=0.24.2
   - matplotlib=3.4.3
   - pandas=1.3.4
diff --git a/requirements.txt b/requirements.txt
@@ -1,4 +1,4 @@
-numpy~=1.20.3
+numpy~=1.22.3
 scikit-learn~=0.24.2
 matplotlib~=3.4.3
 pandas~=1.3.4
diff --git a/tests/utils.py b/tests/utils.py
@@ -0,0 +1,24 @@
+#!/usr/bin/env python
+# -*- coding:utf-8 -*-
+# @Filename:    utils.py
+# @Author:      Daniel Puente Ramírez
+# @Time:        16/4/22 16:24
+
+from os.path import join
+
+import pytest
+from sklearn.utils import Bunch
+
+from utils import arff_data
+
+
+@pytest.fixture
+def arff_path_file():
+    return join('datasets', 'iris.arff')
+
+
+def test_arff_data(arff_path_file):
+    dataset = arff_data(arff_path_file)
+    assert isinstance(dataset, Bunch)
+    dataset1 = arff_data(arff_path_file, ['a', 'b', 'c', 'd'])
+    assert isinstance(dataset1, Bunch)
diff --git a/utils/__init__.py b/utils/__init__.py
@@ -4,3 +4,6 @@
 # @Author:      Daniel Puente Ramírez
 # @Time:        22/12/21 18:05
 
+from .arff2dataset import arff_data
+
+__all__ = ['arff_data']
diff --git a/utils/arff2dataset.py b/utils/arff2dataset.py
@@ -3,40 +3,13 @@
 # @Filename:    arff2dataset.py
 # @Author:      Daniel Puente Ramírez
 # @Time:        22/12/21 18:05
-# @Version:     2.0
+# @Version:     3.0
 
-import arff
 import numpy as np
 from sklearn.preprocessing import LabelEncoder
 from sklearn.utils import Bunch
 
 
-def arff2sk_dataset(dataset_path):
-    dataset = arff.load(open(dataset_path, 'r'))
-    dat = np.array(dataset['data'])
-    tt = np.array(dat[:, -1])
-    dat = np.delete(dat, -1, 1)
-    dat[dat == ''] = 0.0
-    dat = dat.astype(float)
-
-    try:
-        tar_names = np.array(dataset['attributes'][-1][1]).astype(int)
-        tar = tt.astype(int)
-    except ValueError:
-        tar_names = np.array([x for x in range(len(dataset['attributes'][-1][
-                                                       1]))])
-        relation = {}
-        for index, target in enumerate(dataset['attributes'][-1][1]):
-            relation[target] = index
-        tar = np.array([relation[t] for t in tt])
-
-    att_names = np.array([x[0] for x in dataset['attributes'][:-1]])
-    dataset = Bunch(data=dat, target=tar, feature_names=att_names,
-                    class_names=tar_names)
-
-    return dataset
-
-
 def arff_data(dataset_path, attr=False):
     file = open(dataset_path, 'r')
     data = []
diff --git a/utils/custom_plots.py b/utils/custom_plots.py
@@ -3,8 +3,8 @@
 # @Filename:    custom_plots.py
 # @Author:      Daniel Puente Ramírez
 # @Time:        27/1/22 17:27
-import numpy as np
 import matplotlib.pyplot as plt
+import numpy as np
 
 
 def plot_bar_line(name, metric, precision, data_df, save_path):
diff --git a/utils/reading_tests.py b/utils/reading_tests.py
@@ -5,6 +5,7 @@
 # @Time:        25/1/22 16:01
 
 from collections.abc import Iterable
+
 from numpy import nanmean
 
 
@@ -127,8 +128,8 @@ def n_samples_values(self, n_samples):
 
     def __eq__(self, other):
         return isinstance(other, DatasetResult) and (
-            self.name() == other.name() and
-            self.precision() == other.precision()
+                self.name() == other.name() and
+                self.precision() == other.precision()
         )
 
     def __hash__(self):
diff --git a/utils/threads.py b/utils/threads.py
@@ -3,8 +3,8 @@
 # -python-thread/
 
 
-import threading
 import sys
+import threading
 
 
 class ReturnValueThread(threading.Thread):

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-numpy~=1.20.3`
	`1`	`+numpy~=1.22.3`
`2`	`2`	`scikit-learn~=0.24.2`
`3`	`3`	`matplotlib~=3.4.3`
`4`	`4`	`pandas~=1.3.4`