Skip to content

Commit 16f12b8

Browse files
author
cmgoold
committed
docs: add comparison vignette
Add a comparison notebook using simulated data. Addresses #10
1 parent eefca59 commit 16f12b8

File tree

6 files changed

+406
-155
lines changed

6 files changed

+406
-155
lines changed

bayesblend/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
from .models import BayesStacking, HierarchicalBayesStacking, MleStacking, PseudoBma
2+
from .io import Draws
23

34
__all__ = [
45
"MleStacking",
56
"PseudoBma",
67
"BayesStacking",
78
"HierarchicalBayesStacking",
9+
"Draws",
810
]

bayesblend/models.py

Lines changed: 9 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,8 @@ def _blend(
215215
for model, draws in model_draws.items():
216216
blend_id = blend_idx[model]
217217
for par, samples in draws:
218+
if samples is None:
219+
continue
218220
blended_list = [
219221
list(samples[draws_idx == blend_id, idx])
220222
for idx, draws_idx in enumerate(draws_idx_list)
@@ -271,27 +273,23 @@ def __init__(
271273
def _obj_fun(self, w, *args):
272274
"""Negative sum of the weighted log predictive densities"""
273275
Y = args[0]
274-
w = np.concatenate([w, [1 - sum(w)]])
275276
log_scores = np.log(Y @ w)
276277
return -sum(log_scores)
277278

278279
def _grad(self, w, *args):
279280
"""Jacobian of the objective function.
280281
281282
The gradient of log(Y @ w) wrt w is 1/(Y @ w) Y, using
282-
the chain rule. Since we are only estimating K - 1
283-
weights, we can correct the gradient by multiplying
284-
by Y[:,:K - 1] - Y[:,-1].
283+
the chain rule.
285284
"""
286285
Y = args[0]
287-
w = np.concatenate([w, [1 - sum(w)]])
288286
N, K = Y.shape
289-
grad = np.diag(np.ones(N) / (Y @ w)) @ (Y[:,:K - 1] - Y[:,-1].reshape((N, 1)))
287+
grad = np.diag(np.ones(N) / (Y @ w)) @ Y
290288
return -grad.sum(axis=0)
291289

292290
def _constraint(self, w):
293291
# -sum(w) + 1 > 0
294-
return -sum(w) + 1
292+
return sum(w) - 1
295293

296294
def fit(self) -> MleStacking:
297295
lpd_points = np.array([draws.lpd for draws in self.model_draws.values()]).T
@@ -302,18 +300,16 @@ def fit(self) -> MleStacking:
302300
fun=self._obj_fun,
303301
jac=self._grad,
304302
args=(exp_lpd),
305-
x0=np.repeat(1 / K, K - 1),
303+
x0=np.repeat(1 / K, K),
306304
method="SLSQP",
307-
constraints=dict(type="ineq", fun=self._constraint),
308-
bounds=[(0, 1) for _ in range(K - 1)],
305+
constraints=dict(type="eq", fun=self._constraint),
306+
bounds=[(0, 1) for _ in range(K)],
309307
options=self.optimizer_options,
310308
)
311-
_weights = np.concatenate([res.x, [1 - sum(res.x)]])
312-
breakpoint()
313309

314310
self._weights = {
315311
model: np.atleast_2d(weight)
316-
for model, weight in zip(self.model_draws, _weights)
312+
for model, weight in zip(self.model_draws, res.x)
317313
}
318314
self._model_info = res
319315
return self

docs/user-guide/blending.md

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,9 @@ marginal likelihood or evidence for each model in $\mathcal{M}$.
2525
Because this quantity is often difficult to calculate,
2626
*pseudo Bayesian model averaging* (pseudo-BMA) has been introduced
2727
as a method of approximating BMA using information criteria.
28+
In addition, *pseudo Bayesian model averaging plus* (pseudo-BMA+)
29+
accounts for uncertainty in the information criteria by applying
30+
the Bayesian bootstrap to log likelihood vectors.
2831

2932
[Stacking](
3033
https://en.wikipedia.org/wiki/Ensemble_learning#Stacking
@@ -45,8 +48,6 @@ represents any scoring rule
4548
used to evaluate data point $y_{i}$ from
4649
the posterior distribution of model $k$, $p(\Theta_{k} \mid \mathbf{y})$,
4750
with parameters $\Theta_{k}$.
48-
In practice, we only need to estimate $K - 1$ weights as the final
49-
weight is known due to $\sum_{k=1} w = 1$.
5051

5152
The appeal of stacking, apart from its reported improved predictive
5253
accuracy over other procedures (see e.g.
@@ -61,7 +62,9 @@ The weights can also be a function of covariates,
6162
and/or can be estimated hierarchically.
6263
These extensions have generally been referred to
6364
as hierarchical Bayesian stacking (see
64-
[Yao *et al.*, 2021](https://arxiv.org/abs/2101.08954)).
65+
[Yao *et al.*, 2021](https://arxiv.org/abs/2101.08954))
66+
because of their use of a fully Bayesian model
67+
to estimate the weights.
6568

6669
## Blending
6770

docs/user-guide/index.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,5 +5,5 @@ that illustrate the use of BayesBlend.
55

66
* [Getting started](getting-started.md)
77
* [Model averaging, stacking and blending](blending.md)
8-
* [Recovering blending weights using simulated data](simulation.md)
8+
* [Comparing mixture modelling to pseudo-BMA+ and stacking](simulation.md)
99
* [Integration with Arviz](arviz.md)

0 commit comments

Comments
 (0)