Skip to content

Commit 7489122

Browse files
authored
Merge pull request #1 from amirdianov/tasks/1
Tasks/1
2 parents 85a8dec + 0f82aca commit 7489122

12 files changed

+1287
-57
lines changed

.gitignore

+173
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,173 @@
1+
# Created by https://www.toptal.com/developers/gitignore/api/python
2+
# Edit at https://www.toptal.com/developers/gitignore?templates=python
3+
4+
### Python ###
5+
# Byte-compiled / optimized / DLL files
6+
__pycache__/
7+
*.py[cod]
8+
*$py.class
9+
10+
# C extensions
11+
*.so
12+
13+
# Distribution / packaging
14+
.Python
15+
build/
16+
develop-eggs/
17+
dist/
18+
downloads/
19+
eggs/
20+
.eggs/
21+
lib/
22+
lib64/
23+
parts/
24+
sdist/
25+
var/
26+
wheels/
27+
share/python-wheels/
28+
*.egg-info/
29+
.installed.cfg
30+
*.egg
31+
MANIFEST
32+
33+
# PyInstaller
34+
# Usually these files are written by a python script from a template
35+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
36+
*.manifest
37+
*.spec
38+
39+
# Installer logs
40+
pip-log.txt
41+
pip-delete-this-directory.txt
42+
43+
# Unit test / coverage reports
44+
htmlcov/
45+
.tox/
46+
.nox/
47+
.coverage
48+
.coverage.*
49+
.cache
50+
nosetests.xml
51+
coverage.xml
52+
*.cover
53+
*.py,cover
54+
.hypothesis/
55+
.pytest_cache/
56+
cover/
57+
58+
# Translations
59+
*.mo
60+
*.pot
61+
62+
# Django stuff:
63+
*.log
64+
local_settings.py
65+
db.sqlite3
66+
db.sqlite3-journal
67+
68+
# Flask stuff:
69+
instance/
70+
.webassets-cache
71+
72+
# Scrapy stuff:
73+
.scrapy
74+
75+
# Sphinx documentation
76+
docs/_build/
77+
78+
# PyBuilder
79+
.pybuilder/
80+
target/
81+
82+
# Jupyter Notebook
83+
.ipynb_checkpoints
84+
85+
# IPython
86+
profile_default/
87+
ipython_config.py
88+
89+
# pyenv
90+
# For a library or package, you might want to ignore these files since the code is
91+
# intended to run in multiple environments; otherwise, check them in:
92+
# .python-version
93+
94+
# pipenv
95+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
96+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
97+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
98+
# install all needed dependencies.
99+
#Pipfile.lock
100+
101+
# poetry
102+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
103+
# This is especially recommended for binary packages to ensure reproducibility, and is more
104+
# commonly ignored for libraries.
105+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
106+
#poetry.lock
107+
108+
# pdm
109+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
110+
#pdm.lock
111+
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
112+
# in version control.
113+
# https://pdm.fming.dev/#use-with-ide
114+
.pdm.toml
115+
116+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
117+
__pypackages__/
118+
119+
# Celery stuff
120+
celerybeat-schedule
121+
celerybeat.pid
122+
123+
# SageMath parsed files
124+
*.sage.py
125+
126+
# Environments
127+
.env
128+
.venv
129+
env/
130+
venv/
131+
ENV/
132+
env.bak/
133+
venv.bak/
134+
135+
# Spyder project settings
136+
.spyderproject
137+
.spyproject
138+
139+
# Rope project settings
140+
.ropeproject
141+
142+
# mkdocs documentation
143+
/site
144+
145+
# mypy
146+
.mypy_cache/
147+
.dmypy.json
148+
dmypy.json
149+
150+
# Pyre type checker
151+
.pyre/
152+
153+
# pytype static type analyzer
154+
.pytype/
155+
156+
# Cython debug symbols
157+
cython_debug/
158+
159+
# PyCharm
160+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
161+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
162+
# and can be added to the global gitignore or merged into this file. For a more nuclear
163+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
164+
#.idea/
165+
166+
### Python Patch ###
167+
# Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration
168+
poetry.toml
169+
170+
# ruff
171+
.ruff_cache/
172+
173+
# End of https://www.toptal.com/developers/gitignore/api/python

.pre-commit-config.yaml

+6
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
repos:
2+
- repo: https://github.com/psf/black
3+
rev: 23.1.0
4+
hooks:
5+
- id: black
6+
language_version: python3.9

configs/linear_regression_cfg.py

+6-7
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,8 @@
1+
import os
2+
13
from easydict import EasyDict
2-
import numpy as np
3-
cfg = EasyDict()
4-
cfg.dataframe_path = ''
54

6-
cfg.base_functions = [] # TODO list of basis functions
7-
cfg.regularization_coeff = 0
8-
cfg.train_set_percent = 0.8
9-
cfg.valid_set_percent = 0.1
5+
cfg = EasyDict()
6+
cfg.dataframe_path = os.path.basename("linear_regression_dataset.csv")
7+
# list of basis functions in execute file
8+
cfg.base_functions = []

datasets/linear_regression_dataset.py

+8-19
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,14 @@
11
import numpy as np
2-
from utils.common_functions import read_dataframe_file
3-
from easydict import EasyDict
4-
class LinRegDataset():
52

6-
def __init__(self, cfg: EasyDict):
7-
advertising_dataframe = read_dataframe_file(cfg.dataframe_path)
8-
inputs, targets = np.asarray(advertising_dataframe['inputs']), np.asarray(advertising_dataframe['targets'])
9-
self.__divide_into_sets(inputs, targets, cfg.train_set_percent, cfg.valid_set_percent)
3+
from utils.common_functions import read_dataframe_file
104

11-
def __divide_into_sets(self, inputs: np.ndarray, targets: np.ndarray, train_set_percent: float = 0.8,
12-
valid_set_percent: float = 0.1) -> None:
13-
# TODO define self.inputs_train, self.targets_train, self.inputs_valid, self.targets_valid, self.inputs_test, self.targets_test
14-
pass
155

16-
def __call__(self) -> dict:
17-
return {'inputs': {'train': self.inputs_train,
18-
'valid': self.inputs_valid,
19-
'test': self.inputs_test},
20-
'targets': {'train': self.targets_train,
21-
'valid': self.targets_valid,
22-
'test': self.targets_test}
23-
}
246

7+
class LinRegDataset:
8+
def __call__(self, dataframe_path: str) -> dict:
9+
advertising_dataframe = read_dataframe_file(dataframe_path)
10+
return {
11+
"inputs": np.asarray(advertising_dataframe["inputs"]),
12+
"targets": np.asarray(advertising_dataframe["targets"]),
13+
}
2514

execute.py

+28-3
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,33 @@
1-
from models.linear_regression_model import LinearRegression
21
from datasets.linear_regression_dataset import LinRegDataset
2+
from models.linear_regression_model import LinearRegression
33
from utils.metrics import MSE
44
from utils.visualisation import Visualisation
55

6-
def experiment(*args,**kwargs):
7-
pass
6+
7+
def experiment(lin_reg_cfg, visualise_prediction=True):
8+
lin_reg_model = LinearRegression(lin_reg_cfg.base_functions)
9+
linreg_dataset = LinRegDataset()(lin_reg_cfg.dataframe_path)
10+
11+
predictions = lin_reg_model(linreg_dataset["inputs"])
12+
error = MSE(predictions, linreg_dataset["targets"])
13+
14+
if visualise_prediction:
15+
Visualisation.visualise_predicted_trace(
16+
predictions,
17+
linreg_dataset["inputs"],
18+
linreg_dataset["targets"],
19+
plot_title=f"Полином степени {len(lin_reg_cfg.base_functions)}; MSE = {round(error, 2)}",
20+
)
21+
22+
23+
if __name__ == "__main__":
24+
from configs.linear_regression_cfg import cfg as lin_reg_cfg
25+
26+
degrees = [1, 8, 100]
27+
for elem in degrees:
28+
lin_reg_cfg.update(
29+
# we have f1, f2, ... fn functions, about f0 in __plan_matrix
30+
base_functions=[lambda x, degree=i: x**degree for i in range(1, 1 + elem)]
31+
)
32+
experiment(lin_reg_cfg, visualise_prediction=True)
833

0 commit comments

Comments
 (0)