Skip to content

Commit 822d44e

Browse files
author
FelixAbrahamsson
committed
improve: warn when frozen split has unassigned data
1 parent eebbf00 commit 822d44e

File tree

1 file changed

+10
-0
lines changed

1 file changed

+10
-0
lines changed

datastream/tools/split_dataframes.py

+10
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import json
55
import numpy as np
66
import pandas as pd
7+
import warnings
78

89

910
def split_dataframes(
@@ -46,6 +47,15 @@ def split_dataframes(
4647
if frozen:
4748
if sum(map(len, split.values())) == 0:
4849
raise ValueError('Frozen split is empty')
50+
n_unassigned = (~key_dataframe[key_column].isin(sum(split.values(), []))).sum()
51+
if n_unassigned > 0:
52+
warnings.warn(
53+
(
54+
f'Found {n_unassigned} unassigned examples when splitting the dataset.'
55+
' The split is frozen so they will will be discarded'
56+
),
57+
UserWarning,
58+
)
4959
else:
5060
split_proportions = tuple(proportions.items())
5161

0 commit comments

Comments
 (0)