Skip to content

Commit ecaec64

Browse files
authored
Merge pull request #232 from pabloitu/brier_evaluations
Implementation of the Brier Score and its consistency test.
2 parents 7f151aa + a50d1e3 commit ecaec64

File tree

2 files changed

+238
-1
lines changed

2 files changed

+238
-1
lines changed

csep/core/brier_evaluations.py

+178
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,178 @@
1+
import numpy
2+
import numpy as np
3+
from scipy.stats import poisson
4+
5+
from csep.models import EvaluationResult
6+
from csep.core.exceptions import CSEPCatalogException
7+
8+
9+
def _brier_score_ndarray(forecast, observations):
10+
""" Computes the brier (binary) score for spatial-magnitude cells
11+
using the formula:
12+
13+
Q(Lambda, Sigma) = 1/N sum_{i=1}^N (Lambda_i - Ind(Sigma_i > 0 ))^2
14+
15+
where Lambda is the forecast array, Sigma is the observed catalog, N the
16+
number of spatial-magnitude cells and Ind is the indicator function, which
17+
is 1 if Sigma_i > 0 and 0 otherwise.
18+
19+
Args:
20+
forecast: 2d array of forecasted rates
21+
observations: 2d array of observed counts
22+
Returns
23+
brier: float, brier score
24+
"""
25+
26+
prob_success = 1 - poisson.cdf(0, forecast)
27+
brier_cell = np.square(prob_success.ravel() - (observations.ravel() > 0))
28+
brier = -2 * brier_cell.sum()
29+
30+
for n_dim in observations.shape:
31+
brier /= n_dim
32+
return brier
33+
34+
35+
def _simulate_catalog(sim_cells, sampling_weights, random_numbers=None):
36+
"""
37+
Simulates a catalog by sampling from the sampling_weights array.
38+
Identical to binomial_evaluations._simulate_catalog
39+
40+
Args:
41+
sim_cells:
42+
sampling_weights:
43+
random_numbers:
44+
45+
Returns:
46+
47+
"""
48+
sim_fore = numpy.zeros(sampling_weights.shape)
49+
50+
if random_numbers is None:
51+
# Reset simulation array to zero, but don't reallocate
52+
sim_fore.fill(0)
53+
num_active_cells = 0
54+
while num_active_cells < sim_cells:
55+
random_num = numpy.random.uniform(0,1)
56+
loc = numpy.searchsorted(sampling_weights, random_num,
57+
side='right')
58+
if sim_fore[loc] == 0:
59+
sim_fore[loc] = 1
60+
num_active_cells += 1
61+
else:
62+
# Find insertion points using binary search inserting
63+
# to satisfy a[i-1] <= v < a[i]
64+
pnts = numpy.searchsorted(sampling_weights, random_numbers,
65+
side='right')
66+
# Create simulated catalog by adding to the original locations
67+
numpy.add.at(sim_fore, pnts, 1)
68+
69+
assert sim_fore.sum() == sim_cells, "simulated the wrong number of events!"
70+
return sim_fore
71+
72+
73+
def _brier_score_test(forecast_data, observed_data, num_simulations=1000,
74+
random_numbers=None, seed=None, verbose=True):
75+
""" Computes the Brier consistency test conditional on the total observed
76+
number of events
77+
78+
Args:
79+
forecast_data: 2d array of forecasted rates for spatial_magnitude cells
80+
observed_data: 2d array of a catalog resampled to spatial_magnitude
81+
cells
82+
num_simulations: number of synthetic catalog simulations
83+
random_numbers: numpy array of random numbers to use for simulation
84+
seed: seed for random number generator
85+
verbose: print status updates
86+
87+
88+
89+
"""
90+
# Array-masking that avoids log singularities:
91+
forecast_data = numpy.ma.masked_where(forecast_data <= 0.0, forecast_data)
92+
93+
# set seed for the likelihood test
94+
if seed is not None:
95+
numpy.random.seed(seed)
96+
97+
# used to determine where simulated earthquake should
98+
# be placed, by definition of cumsum these are sorted
99+
sampling_weights = (numpy.cumsum(forecast_data.ravel()) /
100+
numpy.sum(forecast_data))
101+
102+
# data structures to store results
103+
simulated_brier = []
104+
n_active_cells = len(numpy.unique(numpy.nonzero(observed_data.ravel())))
105+
num_cells_to_simulate = int(n_active_cells)
106+
107+
# main simulation step in this loop
108+
for idx in range(num_simulations):
109+
if random_numbers is None:
110+
sim_fore = _simulate_catalog(num_cells_to_simulate,
111+
sampling_weights)
112+
else:
113+
sim_fore = _simulate_catalog(num_cells_to_simulate,
114+
sampling_weights,
115+
random_numbers=random_numbers[idx, :])
116+
117+
# compute Brier score
118+
current_brier = _brier_score_ndarray(forecast_data.data, sim_fore)
119+
120+
# append to list of simulated Brier score
121+
simulated_brier.append(current_brier)
122+
123+
# just be verbose
124+
if verbose:
125+
if (idx + 1) % 100 == 0:
126+
print(f'... {idx + 1} catalogs simulated.')
127+
128+
obs_brier = _brier_score_ndarray(forecast_data.data, observed_data)
129+
# quantile score
130+
qs = numpy.sum(simulated_brier <= obs_brier) / num_simulations
131+
132+
# float, float, list
133+
return qs, obs_brier, simulated_brier
134+
135+
136+
def brier_score_test(gridded_forecast,
137+
observed_catalog,
138+
num_simulations=1000,
139+
seed=None,
140+
random_numbers=None,
141+
verbose=False):
142+
"""
143+
Performs the Brier conditional test on a Gridded Forecast using an
144+
Observed Catalog. Normalizes the forecast so the forecasted rate are
145+
consistent with the observations. This modification eliminates the strong
146+
impact differences in the number distribution have on the forecasted rates.
147+
"""
148+
149+
# grid catalog onto spatial grid
150+
try:
151+
_ = observed_catalog.region.magnitudes
152+
except CSEPCatalogException:
153+
observed_catalog.region = gridded_forecast.region
154+
gridded_catalog_data = observed_catalog.spatial_magnitude_counts()
155+
156+
# simply call likelihood test on catalog data and forecast
157+
qs, obs_brier, simulated_brier = _brier_score_test(
158+
gridded_forecast.data,
159+
gridded_catalog_data,
160+
num_simulations=num_simulations,
161+
seed=seed,
162+
random_numbers=random_numbers,
163+
verbose=verbose
164+
)
165+
166+
# populate result data structure
167+
result = EvaluationResult()
168+
result.test_distribution = simulated_brier
169+
result.name = 'Brier score-Test'
170+
result.observed_statistic = obs_brier
171+
result.quantile = qs
172+
result.sim_name = gridded_forecast.name
173+
result.obs_name = observed_catalog.name
174+
result.status = 'normal'
175+
result.min_mw = numpy.min(gridded_forecast.magnitudes)
176+
177+
return result
178+

tests/test_evaluations.py

+60-1
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
11
import os
22
import numpy
3+
import numpy as np
4+
import scipy
35
import unittest
46

57
import csep.core.poisson_evaluations as poisson
68
import csep.core.binomial_evaluations as binary
7-
9+
import csep.core.brier_evaluations as brier
810
def get_datadir():
911
root_dir = os.path.dirname(os.path.abspath(__file__))
1012
data_dir = os.path.join(root_dir, 'artifacts', 'Comcat')
@@ -115,5 +117,62 @@ def test_binomial_likelihood(self):
115117
numpy.testing.assert_allclose(simulated_ll[0], -7.921741654647629)
116118

117119

120+
class TestPoissonBrier(unittest.TestCase):
121+
122+
def __init__(self, *args, **kwargs):
123+
super().__init__(*args, **kwargs)
124+
self.seed = 1
125+
self.forecast_data = numpy.array([[0.3, 0.2, 0.1], [0.2, 0.1, 0.1]])
126+
self.observed_data = numpy.array([[0, 1, 2], [1, 1, 0]])
127+
128+
def test_brier_score_calculation(self):
129+
130+
# 1 bin
131+
rate = 1
132+
prob = 1 - scipy.stats.poisson.pmf(0, rate)
133+
brier_score_hit = -2 * (prob - 1)**2
134+
brier_score = brier._brier_score_ndarray(numpy.array([[rate]]),
135+
numpy.array([[1]]))
136+
numpy.testing.assert_allclose(brier_score_hit, brier_score)
137+
138+
brier_score_nohit = -2 * prob**2
139+
brier_score = brier._brier_score_ndarray(numpy.array([[rate]]),
140+
numpy.array([[0]]))
141+
numpy.testing.assert_allclose(brier_score_nohit, brier_score)
142+
# 2 bins
143+
rate = np.array([1, 0.5])
144+
hits = np.array([0, 1])
145+
prob = 1 - scipy.stats.poisson.pmf(0, rate)
146+
brier_score_2bins = (-2 * (prob - hits)**2).sum() / len(rate)
147+
brier_score = brier._brier_score_ndarray(np.array(rate),
148+
np.array([hits]))
149+
numpy.testing.assert_allclose(brier_score_2bins, brier_score)
150+
151+
hits = np.array([0, 0])
152+
brier_score_2bins = (-2 * (prob - hits)**2).sum() / len(rate)
153+
brier_score = brier._brier_score_ndarray(np.array(rate),
154+
np.array([hits]))
155+
numpy.testing.assert_allclose(brier_score_2bins, brier_score)
156+
157+
def test_brier_test(self):
158+
159+
expected_sim = numpy.array([1, 1, 1, 1, 0, 0])
160+
qs, obs_brier, simulated_brier = brier._brier_score_test(
161+
self.forecast_data,
162+
self.observed_data,
163+
num_simulations=1,
164+
seed=self.seed,
165+
verbose=True)
166+
167+
probs = 1 - scipy.stats.poisson.pmf(0, self.forecast_data.ravel())
168+
sim_brier_analytic = ((-2 * (probs - expected_sim)**2).sum() /
169+
len(probs))
170+
numpy.testing.assert_allclose(simulated_brier[0], sim_brier_analytic)
171+
obs_brier_analytic = (
172+
(-2 * (probs - self.observed_data.ravel().astype(bool))**2).sum() /
173+
len(probs))
174+
numpy.testing.assert_allclose(obs_brier, obs_brier_analytic)
175+
176+
118177
if __name__ == '__main__':
119178
unittest.main()

0 commit comments

Comments
 (0)