Skip to content

Commit 5fb55ca

Browse files
committed
ft: significantly increased brier test speed
1 parent e36bc42 commit 5fb55ca

File tree

1 file changed

+36
-44
lines changed

1 file changed

+36
-44
lines changed

csep/core/brier_evaluations.py

+36-44
Original file line numberDiff line numberDiff line change
@@ -1,96 +1,88 @@
11
import numpy
2-
import scipy.stats
3-
import scipy.spatial
2+
import numpy as np
3+
from scipy.stats import poisson
44

55
from csep.models import EvaluationResult
66
from csep.core.exceptions import CSEPCatalogException
77

88

9-
def bier_score_ndarray(forecast, observations):
9+
def _brier_score_ndarray(forecast, observations):
1010
""" Computes the brier score
1111
"""
12-
13-
observations = (observations >= 1).astype(int)
14-
prob_success = 1 - scipy.stats.poisson.cdf(0, forecast)
15-
brier = []
16-
17-
for p, o in zip(prob_success.ravel(), observations.ravel()):
18-
19-
if o == 0:
20-
brier.append(-2 * p ** 2)
21-
else:
22-
brier.append(-2 * (p - 1) ** 2)
23-
brier = numpy.sum(brier)
12+
prob_success = 1 - poisson.cdf(0, forecast)
13+
brier_cell = np.square(prob_success.ravel() - (observations.ravel() > 0))
14+
brier = -2 * brier_cell.sum()
2415

2516
for n_dim in observations.shape:
2617
brier /= n_dim
27-
2818
return brier
2919

3020

31-
def _simulate_catalog(sim_cells, sampling_weights, sim_fore, random_numbers=None):
32-
# Modified this code to generate simulations in a way that every cell gets one earthquake
33-
# Generate uniformly distributed random numbers in [0,1), this
21+
def _simulate_catalog(sim_cells, sampling_weights, random_numbers=None):
22+
sim_fore = numpy.zeros(sampling_weights.shape)
23+
3424
if random_numbers is None:
3525
# Reset simulation array to zero, but don't reallocate
3626
sim_fore.fill(0)
3727
num_active_cells = 0
3828
while num_active_cells < sim_cells:
3929
random_num = numpy.random.uniform(0,1)
40-
loc = numpy.searchsorted(sampling_weights, random_num, side='right')
30+
loc = numpy.searchsorted(sampling_weights, random_num,
31+
side='right')
4132
if sim_fore[loc] == 0:
4233
sim_fore[loc] = 1
43-
num_active_cells = num_active_cells + 1
34+
num_active_cells += 1
4435
else:
45-
# Find insertion points using binary search inserting to satisfy a[i-1] <= v < a[i]
46-
pnts = numpy.searchsorted(sampling_weights, random_numbers, side='right')
36+
# Find insertion points using binary search inserting
37+
# to satisfy a[i-1] <= v < a[i]
38+
pnts = numpy.searchsorted(sampling_weights, random_numbers,
39+
side='right')
4740
# Create simulated catalog by adding to the original locations
4841
numpy.add.at(sim_fore, pnts, 1)
4942

5043
assert sim_fore.sum() == sim_cells, "simulated the wrong number of events!"
5144
return sim_fore
5245

5346

54-
def _brier_score_test(forecast_data, observed_data, num_simulations=1000, random_numbers=None,
55-
seed=None, use_observed_counts=True, verbose=True):
56-
""" Computes binary conditional-likelihood test from CSEP using an efficient simulation based approach.
47+
def _brier_score_test(forecast_data, observed_data, num_simulations=1000,
48+
random_numbers=None, seed=None, verbose=True):
49+
""" Computes binary conditional-likelihood test from CSEP using an
50+
efficient simulation based approach.
5751
5852
Args:
5953
6054
"""
61-
6255
# Array-masking that avoids log singularities:
6356
forecast_data = numpy.ma.masked_where(forecast_data <= 0.0, forecast_data)
6457

6558
# set seed for the likelihood test
6659
if seed is not None:
6760
numpy.random.seed(seed)
61+
import time
62+
start = time.process_time()
6863

69-
# used to determine where simulated earthquake should be placed, by definition of cumsum these are sorted
70-
sampling_weights = numpy.cumsum(forecast_data.ravel()) / numpy.sum(forecast_data)
64+
# used to determine where simulated earthquake should
65+
# be placed, by definition of cumsum these are sorted
66+
sampling_weights = (numpy.cumsum(forecast_data.ravel()) /
67+
numpy.sum(forecast_data))
7168

7269
# data structures to store results
73-
sim_fore = numpy.zeros(sampling_weights.shape)
7470
simulated_brier = []
7571
n_active_cells = len(numpy.unique(numpy.nonzero(observed_data.ravel())))
72+
num_cells_to_simulate = int(n_active_cells)
7673

7774
# main simulation step in this loop
7875
for idx in range(num_simulations):
79-
if use_observed_counts:
80-
num_cells_to_simulate = int(n_active_cells)
81-
8276
if random_numbers is None:
8377
sim_fore = _simulate_catalog(num_cells_to_simulate,
84-
sampling_weights,
85-
sim_fore)
78+
sampling_weights)
8679
else:
8780
sim_fore = _simulate_catalog(num_cells_to_simulate,
8881
sampling_weights,
89-
sim_fore,
9082
random_numbers=random_numbers[idx, :])
9183

9284
# compute Brier score
93-
current_brier = bier_score_ndarray(forecast_data.data, sim_fore)
85+
current_brier = _brier_score_ndarray(forecast_data.data, sim_fore)
9486

9587
# append to list of simulated Brier score
9688
simulated_brier.append(current_brier)
@@ -100,8 +92,7 @@ def _brier_score_test(forecast_data, observed_data, num_simulations=1000, random
10092
if (idx + 1) % 100 == 0:
10193
print(f'... {idx + 1} catalogs simulated.')
10294

103-
# observed Brier score
104-
obs_brier = bier_score_ndarray(forecast_data.data, observed_data)
95+
obs_brier = _brier_score_ndarray(forecast_data.data, observed_data)
10596

10697
# quantile score
10798
qs = numpy.sum(simulated_brier <= obs_brier) / num_simulations
@@ -116,10 +107,12 @@ def brier_score_test(gridded_forecast,
116107
seed=None,
117108
random_numbers=None,
118109
verbose=False):
119-
""" Performs the Brier conditional test on Gridded Forecast using an Observed Catalog.
120-
121-
Normalizes the forecast so the forecasted rate are consistent with the observations. This modification
122-
eliminates the strong impact differences in the number distribution have on the forecasted rates.
110+
""" Performs the Brier conditional test on Gridded Forecast using an
111+
Observed Catalog.
112+
Normalizes the forecast so the forecasted rate are consistent with the
113+
observations. This modification
114+
eliminates the strong impact differences in the number distribution
115+
have on the forecasted rates.
123116
124117
"""
125118

@@ -137,7 +130,6 @@ def brier_score_test(gridded_forecast,
137130
num_simulations=num_simulations,
138131
seed=seed,
139132
random_numbers=random_numbers,
140-
use_observed_counts=True,
141133
verbose=verbose
142134
)
143135

0 commit comments

Comments
 (0)