Skip to content

Commit 15427e2

Browse files
MSD-739: introduce noise for KDEs calculation (#63)
1 parent 1e996b4 commit 15427e2

File tree

2 files changed

+12
-11
lines changed

2 files changed

+12
-11
lines changed

mostlyai/qa/_accuracy.py

+9-11
Original file line numberDiff line numberDiff line change
@@ -250,18 +250,16 @@ def calculate_numeric_uni_kdes(df: pd.DataFrame, trn_kdes: dict[str, pd.Series]
250250

251251
# estimate gaussian kernels
252252
series_vals = series.dropna().to_numpy("float")
253-
if len(series_vals) > 1:
254-
try:
255-
series_kde = scipy.stats.gaussian_kde(series_vals)
256-
val_y = series_kde(val_x.to_numpy("float"))
257-
val_y = (val_y / (val_y.sum() + 1e-30)).round(5)
258-
except np.linalg.LinAlgError:
259-
# handle `singular matrix` error that can occur for constants
260-
val_y = [1] * len(val_x)
261-
elif len(series_vals) == 1:
253+
# avoid singular matrix error by adding some noise
254+
noise = np.abs(minimum * 1e-3 if (minimum := np.min(series_vals)) != 0 else 1e-18)
255+
series_vals += np.random.normal(loc=0, scale=noise, size=series_vals.shape)
256+
try:
257+
series_kde = scipy.stats.gaussian_kde(series_vals)
258+
val_y = series_kde(val_x.to_numpy("float"))
259+
val_y = (val_y / (val_y.sum() + 1e-30)).round(5)
260+
except Exception as e:
261+
_LOG.warning(f"gaussian_kde failed, using ones instead: {e}")
262262
val_y = [1] * len(val_x)
263-
else:
264-
val_y = [np.nan] * len(val_x)
265263
col_kdes[col] = pd.Series(val_y, index=val_x, name=col)
266264

267265
if trn_kdes is not None:

mostlyai/qa/_similarity.py

+3
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,9 @@ def make_contour_and_centroid_traces(
131131

132132
# estimate gaussian kernels
133133
data = data.T
134+
# avoid singular matrix error by adding some noise
135+
noise = np.abs(minimum * 1e-3 if (minimum := np.min(data)) != 0 else 1e-18)
136+
data += np.random.normal(loc=0, scale=noise, size=data.shape)
134137
try:
135138
Z = scipy.stats.gaussian_kde(data)(np.vstack([X.ravel(), Y.ravel()])).reshape(X.shape)
136139
except Exception as e:

0 commit comments

Comments
 (0)