nextml-code · Oct 25, 2022
diff --git a/‎.github/workflows/publish.yml
Lines changed: 16 additions & 12 deletions b/‎.github/workflows/publish.yml
Lines changed: 16 additions & 12 deletions
diff --git a/‎.github/workflows/test.yml
Lines changed: 9 additions & 4 deletions b/‎.github/workflows/test.yml
Lines changed: 9 additions & 4 deletions
diff --git a/‎README.rst
Lines changed: 0 additions & 6 deletions b/‎README.rst
Lines changed: 0 additions & 6 deletions
diff --git a/‎datastream/__init__.py
Lines changed: 2 additions & 1 deletion b/‎datastream/__init__.py
Lines changed: 2 additions & 1 deletion
diff --git a/‎datastream/dataset.py
Lines changed: 239 additions & 218 deletions b/‎datastream/dataset.py
Lines changed: 239 additions & 218 deletions
diff --git a/‎datastream/datastream.py
Lines changed: 167 additions & 174 deletions b/‎datastream/datastream.py
Lines changed: 167 additions & 174 deletions
diff --git a/‎datastream/samplers/merge_sampler.py
Lines changed: 18 additions & 25 deletions b/‎datastream/samplers/merge_sampler.py
Lines changed: 18 additions & 25 deletions
diff --git a/‎datastream/samplers/multi_sampler.py
Lines changed: 12 additions & 17 deletions b/‎datastream/samplers/multi_sampler.py
Lines changed: 12 additions & 17 deletions
diff --git a/‎datastream/samplers/repeat_sampler.py
Lines changed: 3 additions & 3 deletions b/‎datastream/samplers/repeat_sampler.py
Lines changed: 3 additions & 3 deletions
diff --git a/‎datastream/samplers/sequential_sampler.py
Lines changed: 2 additions & 6 deletions b/‎datastream/samplers/sequential_sampler.py
Lines changed: 2 additions & 6 deletions
diff --git a/‎datastream/samplers/standard_sampler.py
Lines changed: 3 additions & 3 deletions b/‎datastream/samplers/standard_sampler.py
Lines changed: 3 additions & 3 deletions
diff --git a/‎datastream/samplers/zip_sampler.py
Lines changed: 8 additions & 17 deletions b/‎datastream/samplers/zip_sampler.py
Lines changed: 8 additions & 17 deletions
diff --git a/‎datastream/tools/numpy_seed.py
Lines changed: 10 additions & 10 deletions b/‎datastream/tools/numpy_seed.py
Lines changed: 10 additions & 10 deletions
diff --git a/‎datastream/tools/split_dataframes.py
Lines changed: 51 additions & 47 deletions b/‎datastream/tools/split_dataframes.py
Lines changed: 51 additions & 47 deletions
diff --git a/‎datastream/tools/star.py
Lines changed: 3 additions & 1 deletion b/‎datastream/tools/star.py
Lines changed: 3 additions & 1 deletion
diff --git a/‎datastream/tools/starcompose.py
Lines changed: 13 additions & 11 deletions b/‎datastream/tools/starcompose.py
Lines changed: 13 additions & 11 deletions
diff --git a/‎datastream/tools/stratified_split.py
Lines changed: 4 additions & 9 deletions b/‎datastream/tools/stratified_split.py
Lines changed: 4 additions & 9 deletions
diff --git a/‎datastream/tools/verify_split.py
Lines changed: 11 additions & 6 deletions b/‎datastream/tools/verify_split.py
Lines changed: 11 additions & 6 deletions
diff --git a/‎docs/source/requirements.txt
Lines changed: 5 additions & 5 deletions b/‎docs/source/requirements.txt
Lines changed: 5 additions & 5 deletions
diff --git a/‎poetry.lock
Lines changed: 413 additions & 302 deletions b/‎poetry.lock
Lines changed: 413 additions & 302 deletions
diff --git a/‎pyproject.toml
Lines changed: 5 additions & 4 deletions b/‎pyproject.toml
Lines changed: 5 additions & 4 deletions
@@ -9,40 +9,44 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: [3.7, 3.8, 3.9]
+        python-version: [3.8]
     steps:
       - uses: actions/checkout@v2
       - name: Set up Python ${{ matrix.python-version }}
         uses: actions/setup-python@v2
         with:
           python-version: ${{ matrix.python-version }}
 
+      - name: Install poetry
+        run: |
+          python -m pip install --upgrade pip
+          curl -sSL https://install.python-poetry.org | python - --version 1.2.2
+          echo "${HOME}/.local/bin" >> $GITHUB_PATH
+
       - name: Install dependencies
         run: |
-          curl -sSL https://install.python-poetry.org | python - --version 1.2.1
-          $HOME/.local/bin/poetry install --no-root
+          poetry install --no-root
 
       - name: Run tests
         run: |
-          $HOME/.local/bin/poetry run pytest
+          poetry run pytest
 
       - name: Build wheels
         run: |
-          $HOME/.local/bin/poetry version $(git tag --points-at HEAD)
-          $HOME/.local/bin/poetry build
+          poetry version $(git tag --points-at HEAD)
+          poetry build
 
       - name: Test install package
         run: |
-          mkdir test_install
-          cd test_install
-          $HOME/.local/bin/poetry init
-          $HOME/.local/bin/poetry add ../dist/$(ls dist/*.whl)
+          poetry new test-install
+          cd test-install
+          poetry add ../dist/$(ls ../dist/*.whl)
 
-          $HOME/.local/bin/poetry run python -c "import datastream"
+          poetry run python -c "import datastream"
 
       - name: Upload
         env:
           USERNAME: __token__
           PASSWORD: ${{ secrets.PYPI_TOKEN }}
         run: |
-          $HOME/.local/bin/poetry publish --username=$USERNAME --password=$PASSWORD
+          poetry publish --username=$USERNAME --password=$PASSWORD
@@ -24,18 +24,23 @@ jobs:
             ${{ runner.os }}-pip-
             ${{ runner.os }}-
 
+      - name: Install poetry
+        run: |
+          python -m pip install --upgrade pip
+          curl -sSL https://install.python-poetry.org | python - --version 1.2.2
+          echo "${HOME}/.local/bin" >> $GITHUB_PATH
+
       - name: Install dependencies
         run: |
-          curl -sSL https://install.python-poetry.org | python - --version 1.2.1
-          $HOME/.local/bin/poetry install install
+          poetry install
 
       - name: Run tests
         run: |
-          $HOME/.local/bin/poetry install run pytest
+          poetry run pytest
 
       - name: Build wheels
         run: |
-          $HOME/.local/bin/poetry install build
+          poetry build
 
   build-docs:
     runs-on: ubuntu-latest
 
@@ -110,9 +110,3 @@ Install from source
 ===================
 
 .. pip install -e .
-
-To patch the code locally for `Python 3.6` run `patch-python3.6.sh`.
-
-.. code-block:: bash
-
-    $ ./patch-python3.6.sh
@@ -2,7 +2,8 @@
 from datastream.datastream import Datastream
 
 from pkg_resources import get_distribution, DistributionNotFound
+
 try:
-    __version__ = get_distribution('pytorch-datastream').version
+    __version__ = get_distribution("pytorch-datastream").version
 except DistributionNotFound:
     pass
@@ -28,9 +28,7 @@ def __init__(self, samplers, datasets, ns):
             ns=ns,
             length=MergeSampler.merged_samplers_length(samplers, ns),
             from_mapping=Dataset.create_from_concat_mapping(datasets),
-            merged_samplers=MergeSampler.merge_samplers(
-                samplers, datasets, ns
-            ),
+            merged_samplers=MergeSampler.merge_samplers(samplers, datasets, ns),
         )
 
     def __len__(self):
@@ -41,10 +39,7 @@ def __iter__(self):
 
     @staticmethod
     def merged_samplers_length(samplers, ns):
-        return (
-            min([len(sampler) / n for sampler, n in zip(samplers, ns)])
-            * sum(ns)
-        )
+        return min([len(sampler) / n for sampler, n in zip(samplers, ns)]) * sum(ns)
 
     @staticmethod
     def merge_samplers(samplers, datasets, ns):
@@ -54,13 +49,18 @@ def batch(iterable, n):
             while True:
                 yield [next(iterable) for _ in range(n)]
 
-        index_batch = zip(*[
-            batch(map(
-                partial(to_mapping, dataset_index),
-                repeat_map_chain(iter, sampler),
-            ), n)
-            for dataset_index, (sampler, n) in enumerate(zip(samplers, ns))
-        ])
+        index_batch = zip(
+            *[
+                batch(
+                    map(
+                        partial(to_mapping, dataset_index),
+                        repeat_map_chain(iter, sampler),
+                    ),
+                    n,
+                )
+                for dataset_index, (sampler, n) in enumerate(zip(samplers, ns))
+            ]
+        )
 
         return chain.from_iterable(chain.from_iterable(index_batch))
 
@@ -74,25 +74,18 @@ def update_weights_(self, function):
 
     def update_example_weight_(self, weight, index):
         dataset_index, inner_index = self.from_mapping(index)
-        self.samplers[dataset_index].update_example_weight_(
-            weight, inner_index
-        )
+        self.samplers[dataset_index].update_example_weight_(weight, inner_index)
 
     def sample_proportion(self, proportion):
         return MergeSampler(
-            [
-                sampler.sample_proportion(proportion)
-                for sampler in self.samplers
-            ],
+            [sampler.sample_proportion(proportion) for sampler in self.samplers],
             self.datasets,
             self.ns,
         )
 
     def state_dict(self):
-        return dict(
-            samplers=[sampler.state_dict() for sampler in self.samplers]
-        )
+        return dict(samplers=[sampler.state_dict() for sampler in self.samplers])
 
     def load_state_dict(self, state_dict):
-        for sampler, state_dict in zip(self.samplers, state_dict['samplers']):
+        for sampler, state_dict in zip(self.samplers, state_dict["samplers"]):
             sampler.load_state_dict(state_dict)
@@ -28,7 +28,7 @@ def __init__(self, samplers, dataset):
             merged_samplers=MultiSampler.merge_samplers(
                 samplers,
                 [1 for _ in samplers],
-            )
+            ),
         )
 
     @staticmethod
@@ -50,10 +50,12 @@ def batch(iterable, n):
             while True:
                 yield [next(iterable) for _ in range(n)]
 
-        index_batch = zip(*[
-            batch(repeat_map_chain(iter, sampler), n)
-            for sampler, n in zip(samplers, ns)
-        ])
+        index_batch = zip(
+            *[
+                batch(repeat_map_chain(iter, sampler), n)
+                for sampler, n in zip(samplers, ns)
+            ]
+        )
 
         return chain.from_iterable(chain.from_iterable(index_batch))
 
@@ -66,24 +68,17 @@ def update_weights_(self, function):
 
     def update_example_weight_(self, weights, index):
         for sampler, weight in zip(self.samplers, weights):
-            sampler.update_example_weight_(
-                weight, index
-            )
+            sampler.update_example_weight_(weight, index)
 
     def sample_proportion(self, proportion):
         return MultiSampler(
-            [
-                sampler.sample_proportion(proportion)
-                for sampler in self.samplers
-            ],
-            self.dataset
+            [sampler.sample_proportion(proportion) for sampler in self.samplers],
+            self.dataset,
         )
 
     def state_dict(self):
-        return dict(
-            samplers=[sampler.state_dict() for sampler in self.samplers]
-        )
+        return dict(samplers=[sampler.state_dict() for sampler in self.samplers])
 
     def load_state_dict(self, state_dict):
-        for sampler, state_dict in zip(self.samplers, state_dict['samplers']):
+        for sampler, state_dict in zip(self.samplers, state_dict["samplers"]):
             sampler.load_state_dict(state_dict)
@@ -14,16 +14,16 @@ class Config:
         arbitrary_types_allowed = True
 
     def __init__(self, sampler, length, epoch_bound=False):
-        '''
+        """
         Wrapper that repeats and limits length of sampling based on
         epoch length and batch size
-        '''
+        """
         BaseModel.__init__(
             self,
             sampler=sampler,
             length=length,
             epoch_bound=epoch_bound,
-            queue=iter(sampler)
+            queue=iter(sampler),
         )
 
     def __iter__(self):
 
@@ -12,8 +12,7 @@ class Config:
 
     def __init__(self, length):
         BaseModel.__init__(
-            self,
-            sampler=torch.utils.data.SequentialSampler(torch.ones(length))
+            self, sampler=torch.utils.data.SequentialSampler(torch.ones(length))
         )
 
     def __len__(self):
@@ -23,7 +22,4 @@ def __iter__(self):
         return iter(self.sampler)
 
     def sample_proportion(self, proportion):
-        return SequentialSampler(min(
-            len(self),
-            int(len(self) * proportion)
-        ))
+        return SequentialSampler(min(len(self), int(len(self) * proportion)))
@@ -21,7 +21,7 @@ def __init__(self, length, proportion=1.0, replacement=False):
                 torch.ones(length).double(),
                 num_samples=int(max(1, min(length, length * proportion))),
                 replacement=replacement,
-            )
+            ),
         )
 
     def __len__(self):
@@ -41,7 +41,7 @@ def update_weights_(self, function):
         self.sampler.weights[:] = function(self.sampler.weights)
 
     def update_example_weight_(self, weight, index):
-        if hasattr(weight, 'item'):
+        if hasattr(weight, "item"):
             weight = weight.item()
 
         self.sampler.weights[index] = weight
@@ -59,4 +59,4 @@ def state_dict(self):
         return dict(weights=self.sampler.weights)
 
     def load_state_dict(self, state_dict):
-        self.sampler.weights[:] = state_dict['weights']
+        self.sampler.weights[:] = state_dict["weights"]
@@ -50,9 +50,7 @@ def zip_samplers(samplers, datasets):
     def weight(self, index):
         return [
             sampler.weight(inner_index)
-            for sampler, inner_index in zip(
-                self.samplers, self.from_mapping(index)
-            )
+            for sampler, inner_index in zip(self.samplers, self.from_mapping(index))
         ]
 
     def update_weights_(self, function):
@@ -61,24 +59,17 @@ def update_weights_(self, function):
 
     def update_example_weight_(self, weights, index):
         inner_indices = self.from_mapping(index)
-        for sampler, weight, inner_index in zip(
-            self.samplers, weights, inner_indices
-        ):
-            sampler.update_example_weight_(
-                weight, inner_index
-            )
+        for sampler, weight, inner_index in zip(self.samplers, weights, inner_indices):
+            sampler.update_example_weight_(weight, inner_index)
 
     def sample_proportion(self, proportion):
-        return ZipSampler([
-            sampler.sample_proportion(proportion)
-            for sampler in self.samplers
-        ])
+        return ZipSampler(
+            [sampler.sample_proportion(proportion) for sampler in self.samplers]
+        )
 
     def state_dict(self):
-        return dict(
-            samplers=[sampler.state_dict() for sampler in self.samplers]
-        )
+        return dict(samplers=[sampler.state_dict() for sampler in self.samplers])
 
     def load_state_dict(self, state_dict):
-        for sampler, state_dict in zip(self.samplers, state_dict['samplers']):
+        for sampler, state_dict in zip(self.samplers, state_dict["samplers"]):
             sampler.load_state_dict(state_dict)
@@ -3,7 +3,8 @@
 
 
 def numpy_seed(seed):
-    '''Function decorator that sets a temporary numpy seed during execution'''
+    """Function decorator that sets a temporary numpy seed during execution"""
+
     def decorator(fn):
         @wraps(fn)
         def seeded_function(*args, **kwargs):
@@ -12,25 +13,24 @@ def seeded_function(*args, **kwargs):
             output = fn(*args, **kwargs)
             np.random.set_state(random_state)
             return output
+
         return seeded_function
+
     return decorator
 
 
 def test_numpy_seed():
-
     def get_random_uniform(min, max):
         return np.random.random() * (max - min) + min
 
     random_state = np.random.get_state()
     numpy_seed(1)(get_random_uniform)(-1, 1)
     assert np.all(random_state[1] == np.random.get_state()[1])
 
-    assert (
-        numpy_seed(1)(get_random_uniform)(-1, 1) ==
-        numpy_seed(1)(get_random_uniform)(-1, 1)
-    )
+    assert numpy_seed(1)(get_random_uniform)(-1, 1) == numpy_seed(1)(
+        get_random_uniform
+    )(-1, 1)
 
-    assert (
-        numpy_seed(1)(get_random_uniform)(-1, 1) !=
-        numpy_seed(None)(get_random_uniform)(-1, 1)
-    )
+    assert numpy_seed(1)(get_random_uniform)(-1, 1) != numpy_seed(None)(
+        get_random_uniform
+    )(-1, 1)
@@ -14,45 +14,50 @@ def split_dataframes(
     filepath: Optional[Path] = None,
     frozen: Optional[bool] = False,
 ):
-    '''
+    """
     Split and save result. Add new examples and continue from the old split.
 
     As new examples come in it can handle:
     - Changing test size
     - Adapt after removing examples from dataset
     - Adapt to new stratification
-    '''
+    """
     if abs(sum(proportions.values()) - 1.0) >= 1e-5:
-        raise ValueError(' '.join([
-            'Expected sum of proportions to be 1.',
-            f'Proportions were {tuple(proportions.values())}',
-        ]))
+        raise ValueError(
+            " ".join(
+                [
+                    "Expected sum of proportions to be 1.",
+                    f"Proportions were {tuple(proportions.values())}",
+                ]
+            )
+        )
 
     if filepath is not None and filepath.exists():
         split = json.loads(filepath.read_text())
 
         if set(proportions.keys()) != set(split.keys()):
-            raise ValueError(' '.join([
-                'Expected split names in split file to be the same as the',
-                'keys in proportions',
-            ]))
+            raise ValueError(
+                " ".join(
+                    [
+                        "Expected split names in split file to be the same as the",
+                        "keys in proportions",
+                    ]
+                )
+            )
     else:
-        split = {
-            split_name: list()
-            for split_name in proportions.keys()
-        }
+        split = {split_name: list() for split_name in proportions.keys()}
 
     key_dataframe = pd.DataFrame({key_column: np.sort(dataframe[key_column].unique())})
 
     if frozen:
         if sum(map(len, split.values())) == 0:
-            raise ValueError('Frozen split is empty')
+            raise ValueError("Frozen split is empty")
         n_unassigned = (~key_dataframe[key_column].isin(sum(split.values(), []))).sum()
         if n_unassigned > 0:
             warnings.warn(
                 (
-                    f'Found {n_unassigned} unassigned examples when splitting the dataset.'
-                    ' The split is frozen so they will will be discarded'
+                    f"Found {n_unassigned} unassigned examples when splitting the dataset."
+                    " The split is frozen so they will will be discarded"
                 ),
                 UserWarning,
             )
@@ -120,23 +125,23 @@ def n_target_split(keys, proportion):
 
 
 def selected(k, unassigned):
-    return np.random.choice(
-        unassigned, size=k, replace=False
-    ).tolist()
+    return np.random.choice(unassigned, size=k, replace=False).tolist()
 
 
 def mock_dataframe():
-    return pd.DataFrame(dict(
-        index=np.arange(100),
-        number=np.random.randn(100),
-    ))
+    return pd.DataFrame(
+        dict(
+            index=np.arange(100),
+            number=np.random.randn(100),
+        )
+    )
 
 
 def test_standard():
-    split_file = Path('test_standard.json')
+    split_file = Path("test_standard.json")
     split_dataframes_ = split_dataframes(
         mock_dataframe(),
-        key_column='index',
+        key_column="index",
         proportions=dict(
             gradient=0.8,
             early_stopping=0.1,
@@ -151,18 +156,17 @@ def test_standard():
 
 
 def test_group_split_dataframe():
-    dataframe = mock_dataframe().assign(group=lambda df: df['index'] // 4)
+    dataframe = mock_dataframe().assign(group=lambda df: df["index"] // 4)
     split_dataframes_ = split_dataframes(
         dataframe,
-        key_column='group',
+        key_column="group",
         proportions=dict(
             train=0.8,
             compare=0.2,
         ),
     )
-    group_overlap = (
-        set(split_dataframes_['train'].group)
-        .intersection(split_dataframes_['compare'].group)
+    group_overlap = set(split_dataframes_["train"].group).intersection(
+        split_dataframes_["compare"].group
     )
     assert len(group_overlap) == 0
     assert tuple(map(len, split_dataframes_.values())) == (80, 20)
@@ -171,11 +175,11 @@ def test_group_split_dataframe():
 def test_validate_proportions():
     from pytest import raises
 
-    split_file = Path('test_validate_proportions.json')
+    split_file = Path("test_validate_proportions.json")
     with raises(ValueError):
         split_dataframes(
             mock_dataframe(),
-            key_column='index',
+            key_column="index",
             proportions=dict(train=0.4, test=0.4),
             filepath=split_file,
         )
@@ -184,11 +188,11 @@ def test_validate_proportions():
 def test_missing_key_column():
     from pytest import raises
 
-    split_file = Path('test_missing_key_column.json')
+    split_file = Path("test_missing_key_column.json")
     with raises(KeyError):
         split_dataframes(
             mock_dataframe(),
-            key_column='should_fail',
+            key_column="should_fail",
             proportions=dict(train=0.8, test=0.2),
             filepath=split_file,
         )
@@ -198,18 +202,18 @@ def test_missing_key_column():
 
 
 def test_no_split():
-    '''we do not need to support this'''
+    """we do not need to support this"""
     split_dataframes(
         mock_dataframe(),
-        key_column='index',
+        key_column="index",
         proportions=dict(all=1.0),
     )
 
 
 def test_split_empty():
     split_dataframes_ = split_dataframes(
         mock_dataframe().iloc[:0],
-        key_column='index',
+        key_column="index",
         proportions=dict(train=0.8, test=0.2),
     )
     for df in split_dataframes_.values():
@@ -219,28 +223,28 @@ def test_split_empty():
 def test_split_single_row():
     split_dataframes_ = split_dataframes(
         mock_dataframe().iloc[:1],
-        key_column='index',
+        key_column="index",
         proportions=dict(train=0.9999, test=0.0001),
     )
-    assert len(split_dataframes_['train']) == 1
-    assert len(split_dataframes_['test']) == 0
+    assert len(split_dataframes_["train"]) == 1
+    assert len(split_dataframes_["test"]) == 0
 
 
 def test_changed_split_names():
     from pytest import raises
 
-    split_file = Path('test_changed_split_names.json')
+    split_file = Path("test_changed_split_names.json")
     split_dataframes(
         mock_dataframe(),
-        key_column='index',
+        key_column="index",
         proportions=dict(train=0.8, test=0.2),
         filepath=split_file,
     )
 
     with raises(ValueError):
         split_dataframes(
             mock_dataframe(),
-            key_column='index',
+            key_column="index",
             proportions=dict(should_fail=0.8, test=0.2),
             filepath=split_file,
         )
@@ -255,15 +259,15 @@ def test_frozen():
     with raises(ValueError):
         split_dataframes(
             dataframe,
-            key_column='index',
+            key_column="index",
             proportions=dict(train=0.8, test=0.2),
             frozen=True,
         )
 
-    split_file = Path('test_frozen.json')
+    split_file = Path("test_frozen.json")
     split_dataframes(
         dataframe,
-        key_column='index',
+        key_column="index",
         proportions=dict(train=0.8, test=0.2),
         filepath=split_file,
     )
 
@@ -2,8 +2,10 @@
 
 
 def star(fn):
-    '''Wrap function to expand input to arguments'''
+    """Wrap function to expand input to arguments"""
+
     @wraps(fn)
     def wrapper(args):
         return fn(*args)
+
     return wrapper
@@ -1,11 +1,9 @@
-
-
 def starcompose(*transforms):
-    '''
+    """
     left compose functions together and expand tuples to args
 
     Use starcompose.debug for verbose output when debugging
-    '''
+    """
 
     # TODO: consider doing starcompose with inner function calls rather than
     # a loop
@@ -16,24 +14,28 @@ def _compose(*x):
             else:
                 x = t(x)
         return x
+
     return _compose
 
 
 def starcompose_debug(*transforms):
-    '''
+    """
     verbose starcompose for debugging
-    '''
-    print('starcompose debug')
+    """
+    print("starcompose debug")
+
     def _compose(*x):
         for index, t in enumerate(transforms):
-            print(f'{index}:, fn={t}, x={x}')
+            print(f"{index}:, fn={t}, x={x}")
             if type(x) is tuple:
                 x = t(*x)
             else:
                 x = t(x)
         return x
+
     return _compose
 
+
 starcompose.debug = starcompose_debug
 
 
@@ -42,16 +44,16 @@ def test_starcompose():
 
     test = starcompose(lambda x, y: x + y)
     if test(3, 5) != 8:
-        raise Exception('Two args inputs failed')
+        raise Exception("Two args inputs failed")
 
     test = starcompose(lambda x: sum(x))
     if test((3, 5)) != 8:
-        raise Exception('Tuple input failed')
+        raise Exception("Tuple input failed")
 
     test = starcompose(
         lambda x: (x, x),
         lambda x, y: x + y,
         lambda x: x * 2,
     )
     if test(10) != 40:
-        raise Exception('Expanded tuple for inner function failed')
+        raise Exception("Expanded tuple for inner function failed")
@@ -13,19 +13,14 @@ def stratified_split(
     seed: Optional[int] = None,
     frozen: Optional[bool] = False,
 ):
-    if (
-        stratify_column is not None
-        and any(dataset.dataframe[key_column].duplicated())
-    ):
+    if stratify_column is not None and any(dataset.dataframe[key_column].duplicated()):
         # mathematically impossible in the general case
         warnings.warn(
-            'Trying to do stratified split with non-unique key column'
-            ' - cannot guarantee correct splitting of key values.'
+            "Trying to do stratified split with non-unique key column"
+            " - cannot guarantee correct splitting of key values."
         )
     strata = {
-        stratum_value: dataset.subset(
-            lambda df: df[stratify_column] == stratum_value
-        )
+        stratum_value: dataset.subset(lambda df: df[stratify_column] == stratum_value)
         for stratum_value in dataset.dataframe[stratify_column].unique()
     }
     split_strata = [
 
@@ -5,7 +5,7 @@
 
 @validate_arguments
 def verify_split(old_path: Path, new_path: Path):
-    '''
+    """
     Verify that no keys from an old split are present in a different new split.
 
     .. highlight:: python
@@ -16,7 +16,7 @@ def verify_split(old_path: Path, new_path: Path):
             "path/to/new/split.json",
         )
 
-    '''
+    """
     for old_split_name, old_split in json.loads(old_path.read_text()).items():
         for new_split_name, new_split in json.loads(new_path.read_text()).items():
             if (
@@ -26,8 +26,13 @@ def verify_split(old_path: Path, new_path: Path):
                 raise ValueError(
                     f'Some keys from old split "{old_split_name}"'
                     f' are present in new split "{new_split_name}":\n'
-                    + str("\n".join(
-                        [str(old_split[index]) for index in range(min(10, len(old_split)))]
-                        + (["..."] if len(old_split) > 10 else [])
-                    ))
+                    + str(
+                        "\n".join(
+                            [
+                                str(old_split[index])
+                                for index in range(min(10, len(old_split)))
+                            ]
+                            + (["..."] if len(old_split) > 10 else [])
+                        )
+                    )
                 )
@@ -23,18 +23,18 @@ lazy-object-proxy==1.4.3
 MarkupSafe==1.1.1
 mccabe==0.6.1
 more-itertools==8.3.0
-numpy==1.18.5
+numpy==1.23.4
 packaging==20.4
 pandas==1.1.5
 pkginfo==1.5.0.1
 pluggy==0.13.1
-py==1.10.0
+py==1.11.0
 pycparser==2.20
 pydantic==1.8.2
 Pygments==2.7.4
 pylint==2.5.3
 pyparsing==2.4.7
-pyspark==3.0.3
+pyspark==3.3.0
 pytest==5.4.3
 python-dateutil==2.8.1
 pytz==2020.1
@@ -58,12 +58,12 @@ sphinxcontrib-jsmath==1.0.1
 sphinxcontrib-qthelp==1.0.3
 sphinxcontrib-serializinghtml==1.1.4
 toml==0.10.1
-torch==1.8.1
+torch==1.12.1
 tqdm==4.46.1
 twine==3.1.1
 typing-extensions==3.10.0.0
 urllib3==1.26.5
-waitress==1.4.4
+waitress==2.1.1
 wcwidth==0.2.4
 webencodings==0.5.1
 WebOb==1.8.6
 
@@ -2,10 +2,10 @@
 name = "pytorch-datastream"
 version = "0.0.0"
 description = "Simple dataset to dataloader library for pytorch"
-authors = ["Aiwizo"]
+authors = ["NextML"]
 license = "Apache-2.0"
 readme = "README.rst"
-repository = "https://github.com/Aiwizo/pytorch-datastream"
+repository = "https://github.com/nextml-code/pytorch-datastream"
 documentation = "https://pytorch-datastream.readthedocs.io"
 keywords = [
     "pytorch",
@@ -34,16 +34,17 @@ packages = [
 ]
 
 [tool.poetry.dependencies]
-python = "^3.7"
+python = "^3.8"
 torch = "^1.4.0"
 numpy = "^1.17.0"
 pandas = "^1.0.5"
 pydantic = "^1.5.0"
 
-[tool.poetry.dev-dependencies]
+[tool.poetry.group.dev.dependencies]
 pylint = "^2.6.0"
 flake8 = "^3.8.4"
 pytest = "^6.1.2"
+black = "^22.10.0"
 
 [build-system]
 requires = ["poetry-core>=1.0.0"]