Skip to content

Commit 5bd8f9a

Browse files
committed
feature: dataset from paths
1 parent 515b1e6 commit 5bd8f9a

File tree

3 files changed

+37
-21
lines changed

3 files changed

+37
-21
lines changed

.github/workflows/publish.yml

+9-14
Original file line numberDiff line numberDiff line change
@@ -19,35 +19,30 @@ jobs:
1919

2020
- name: Install dependencies
2121
run: |
22-
curl -sSL https://raw.githubusercontent.com/python-poetry/poetry/master/get-poetry.py | python
23-
source $HOME/.poetry/env
24-
poetry install
22+
curl -sSL https://install.python-poetry.org | python - --version 1.2.1
23+
$HOME/.local/bin/poetry install --no-root
2524
2625
- name: Run tests
2726
run: |
28-
source $HOME/.poetry/env
29-
poetry run pytest
27+
$HOME/.local/bin/poetry run pytest
3028
3129
- name: Build wheels
3230
run: |
33-
source $HOME/.poetry/env
34-
poetry version $(git tag --points-at HEAD)
35-
poetry build
31+
$HOME/.local/bin/poetry version $(git tag --points-at HEAD)
32+
$HOME/.local/bin/poetry build
3633
3734
- name: Test install package
3835
run: |
39-
source $HOME/.poetry/env
4036
mkdir test_install
4137
cd test_install
42-
poetry init
43-
poetry add ../dist/$(ls dist/*.whl)
38+
$HOME/.local/bin/poetry init
39+
$HOME/.local/bin/poetry add ../dist/$(ls dist/*.whl)
4440
45-
poetry run python -c "import datastream"
41+
$HOME/.local/bin/poetry run python -c "import datastream"
4642
4743
- name: Upload
4844
env:
4945
USERNAME: __token__
5046
PASSWORD: ${{ secrets.PYPI_TOKEN }}
5147
run: |
52-
source $HOME/.poetry/env
53-
poetry publish --username=$USERNAME --password=$PASSWORD
48+
$HOME/.local/bin/poetry publish --username=$USERNAME --password=$PASSWORD

.github/workflows/test.yml

+4-7
Original file line numberDiff line numberDiff line change
@@ -26,19 +26,16 @@ jobs:
2626
2727
- name: Install dependencies
2828
run: |
29-
curl -sSL https://raw.githubusercontent.com/python-poetry/poetry/master/get-poetry.py | python
30-
source $HOME/.poetry/env
31-
poetry install
29+
curl -sSL https://install.python-poetry.org | python - --version 1.2.1
30+
$HOME/.local/bin/poetry install install
3231
3332
- name: Run tests
3433
run: |
35-
source $HOME/.poetry/env
36-
poetry run pytest
34+
$HOME/.local/bin/poetry install run pytest
3735
3836
- name: Build wheels
3937
run: |
40-
source $HOME/.poetry/env
41-
poetry build
38+
$HOME/.local/bin/poetry install build
4239
4340
build-docs:
4441
runs-on: ubuntu-latest

datastream/dataset.py

+24
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,30 @@ def from_dataframe(dataframe: pd.DataFrame) -> Dataset[pd.Series]:
8989
get_item=lambda df, index: df.iloc[index],
9090
)
9191

92+
@staticmethod
93+
def from_paths(paths: Iterable[str, Path], pattern: str) -> Dataset[pd.Series]:
94+
'''
95+
Create ``Dataset`` from paths using regex pattern that extracts information
96+
from the path itself.
97+
:func:`Dataset.__getitem__` will return a row from the dataframe and
98+
:func:`Dataset.map` should be given a function that takes a row from
99+
the dataframe as input.
100+
101+
>>> image_paths = ["dataset/damage/1.png"]
102+
>>> (
103+
... Dataset.from_paths(image_paths, pattern=r".*/(?P<class_name>\w+)/(?P<index>\d+).png")
104+
... .map(lambda row: row["class_name"])
105+
... )[-1]
106+
'damage'
107+
'''
108+
paths = list(paths)
109+
return Dataset.from_dataframe(
110+
pd.Series(paths)
111+
.astype(str)
112+
.str.extract(pattern)
113+
.assign(path=paths)
114+
)
115+
92116
def __getitem__(
93117
self: Dataset[T],
94118
select: Union[int, slice, Iterable, Callable[[pd.DataFrame], Iterable[int]]]

0 commit comments

Comments
 (0)