diff --git a/torcharrow/icolumn.py b/torcharrow/icolumn.py index 8bde374e6..dbcffe2ae 100644 --- a/torcharrow/icolumn.py +++ b/torcharrow/icolumn.py @@ -1457,14 +1457,17 @@ def _nunique(self, drop_null=True): else: return len(set(i for i in self if i is not None)) - # quantile @trace @expression def _quantile(self, q, interpolation="midpoint"): """ - Compute the q-th percentile of non-null data. + Compute the q-th quantile of non-null data. Inefficient prototype implementation. + + Args: + q: float or array-like quantiles to compute, value must be within [0, 1] + interpolation: interpolation method to use """ if interpolation != "midpoint": @@ -1475,8 +1478,8 @@ def _quantile(self, q, interpolation="midpoint"): return [] out = [] s = sorted(self) - for percent in q: - k = (len(self) - 1) * (percent / 100) + for p in q: + k = (len(self) - 1) * p f = math.floor(k) c = math.ceil(k) if f == c: diff --git a/torcharrow/inumerical_column.py b/torcharrow/inumerical_column.py index 131221b4b..0fc961a8f 100644 --- a/torcharrow/inumerical_column.py +++ b/torcharrow/inumerical_column.py @@ -189,7 +189,7 @@ def describe( res._append(("mean", self.mean())) res._append(("std", self.std())) res._append(("min", self.min())) - values = self._quantile(percentiles, "midpoint") + values = self._quantile(percentiles / 100, "midpoint") for p, v in zip(percentiles, values): res._append((f"{p}%", v)) res._append(("max", self.max()))