diff --git a/src/optimagic/optimization/internal_optimization_problem.py b/src/optimagic/optimization/internal_optimization_problem.py index 6e0e58e46..5205fd3af 100644 --- a/src/optimagic/optimization/internal_optimization_problem.py +++ b/src/optimagic/optimization/internal_optimization_problem.py @@ -11,7 +11,10 @@ from optimagic.batch_evaluators import process_batch_evaluator from optimagic.differentiation.derivatives import first_derivative from optimagic.differentiation.numdiff_options import NumdiffOptions -from optimagic.exceptions import UserFunctionRuntimeError, get_traceback +from optimagic.exceptions import ( + UserFunctionRuntimeError, + get_traceback, +) from optimagic.logging.logger import LogStore from optimagic.logging.types import IterationState from optimagic.optimization.fun_value import ( @@ -471,6 +474,7 @@ def _pure_evaluate_jac( out_jac = _process_jac_value( value=jac_value, direction=self._direction, converter=self._converter, x=x ) + self._assert_finite_jac(out_jac, jac_value, params) stop_time = time.perf_counter() @@ -508,6 +512,7 @@ def func(x: NDArray[np.float64]) -> SpecificFunctionValue: p = self._converter.params_from_internal(x) return self._fun(p) + params = self._converter.params_from_internal(x) try: numdiff_res = first_derivative( func, @@ -543,6 +548,8 @@ def func(x: NDArray[np.float64]) -> SpecificFunctionValue: warnings.warn(msg) fun_value, jac_value = self._error_penalty_func(x) + self._assert_finite_jac(jac_value, jac_value, params) + algo_fun_value, hist_fun_value = _process_fun_value( value=fun_value, # type: ignore solver_type=self._solver_type, @@ -682,6 +689,8 @@ def _pure_evaluate_fun_and_jac( if self._direction == Direction.MAXIMIZE: out_jac = -out_jac + self._assert_finite_jac(out_jac, jac_value, params) + stop_time = time.perf_counter() hist_entry = HistoryEntry( @@ -704,6 +713,32 @@ def _pure_evaluate_fun_and_jac( return (algo_fun_value, out_jac), hist_entry, log_entry + def _assert_finite_jac( + self, out_jac: NDArray[np.float64], jac_value: PyTree, params: PyTree + ) -> None: + """Check for infinite and NaN values in the jacobian and raise an error if + found. + + Args: + out_jac: internal processed jacobian to check for infinities. + jac_value: original jacobian value as returned by the user function, + included in error messages for debugging. + params: user-facing parameter representation at evaluation point. + + Raises: + UserFunctionRuntimeError: If any infinite values are found in the jacobian. + + """ + if not np.all(np.isfinite(out_jac)): + msg = ( + "The optimization received Jacobian containing infinite " + "or NaN values.\nCheck your objective function or its " + "jacobian, or try a different optimizer.\n" + f"Parameters at evaluation point: {params}\n" + f"Jacobian values: {jac_value}" + ) + raise UserFunctionRuntimeError(msg) + def _process_fun_value( value: SpecificFunctionValue, diff --git a/src/optimagic/parameters/space_conversion.py b/src/optimagic/parameters/space_conversion.py index 73ed5edd3..f0710cf28 100644 --- a/src/optimagic/parameters/space_conversion.py +++ b/src/optimagic/parameters/space_conversion.py @@ -145,7 +145,6 @@ def get_space_converter( soft_lower_bounds=_soft_lower, soft_upper_bounds=_soft_upper, ) - return converter, params diff --git a/tests/optimagic/optimization/test_invalid_jacobian_value.py b/tests/optimagic/optimization/test_invalid_jacobian_value.py new file mode 100644 index 000000000..906579b24 --- /dev/null +++ b/tests/optimagic/optimization/test_invalid_jacobian_value.py @@ -0,0 +1,342 @@ +import numpy as np +import pandas as pd +import pytest + +from optimagic.exceptions import UserFunctionRuntimeError +from optimagic.optimization.optimize import minimize + + +def test_with_infinite_jac_value_unconditional_in_lists(): + def sphere(params): + return params @ params + + def sphere_gradient(params): + return np.full_like(params, np.inf) + + with pytest.raises(UserFunctionRuntimeError): + minimize( + fun=sphere, + params=np.arange(10) + 400, + algorithm="scipy_lbfgsb", + jac=sphere_gradient, + ) + + +def test_with_infinite_jac_value_conditional_in_lists(): + def sphere(params): + return params @ params + + def true_gradient(params): + return 2 * params + + def param_norm(params): + return np.linalg.norm(params) + + def sphere_gradient(params): + if param_norm(params) >= 1: + return true_gradient(params) + else: + return np.full_like(params, np.inf) + + with pytest.raises(UserFunctionRuntimeError): + minimize( + fun=sphere, + params=np.arange(10) + 400, + algorithm="scipy_lbfgsb", + jac=sphere_gradient, + ) + + +def test_with_infinite_fun_and_jac_value_unconditional_in_lists(): + def sphere_and_gradient(params): + function_value = params @ params + grad = np.full_like(params, np.inf) + return function_value, grad + + with pytest.raises(UserFunctionRuntimeError): + minimize( + fun_and_jac=sphere_and_gradient, + params=np.arange(10) + 400, + algorithm="scipy_lbfgsb", + ) + + +def test_with_infinite_fun_and_jac_value_conditional_in_lists(): + def true_gradient(params): + return 2 * params + + def param_norm(params): + return np.linalg.norm(params) + + def sphere_gradient(params): + if param_norm(params) >= 1: + return true_gradient(params) + else: + return np.full_like(params, np.inf) + + def sphere_and_gradient(params): + function_value = params @ params + grad = sphere_gradient(params) + return function_value, grad + + with pytest.raises(UserFunctionRuntimeError): + minimize( + fun_and_jac=sphere_and_gradient, + params=np.arange(10) + 400, + algorithm="scipy_lbfgsb", + ) + + +def test_with_infinite_jac_value_unconditional_in_dicts(): + def sphere(params): + return params["a"] ** 2 + params["b"] ** 2 + (params["c"] ** 2).sum() + + def sphere_gradient(params): + return {"a": np.inf, "b": np.inf, "c": np.full_like(params["c"], np.inf)} + + with pytest.raises(UserFunctionRuntimeError): + minimize( + fun=sphere, + params={"a": 400, "b": 400, "c": pd.Series([200, 300, 400])}, + algorithm="scipy_lbfgsb", + jac=sphere_gradient, + ) + + +def test_with_infinite_jac_value_conditional_in_dicts(): + def sphere(params): + return params["a"] ** 2 + params["b"] ** 2 + (params["c"] ** 2).sum() + + def true_gradient(params): + return {"a": 2 * params["a"], "b": 2 * params["b"], "c": 2 * params["c"]} + + def param_norm(params): + squared_norm = ( + params["a"] ** 2 + params["b"] ** 2 + np.linalg.norm(params["c"]) ** 2 + ) + return np.sqrt(squared_norm) + + def sphere_gradient(params): + if param_norm(params) >= 1: + return true_gradient(params) + else: + return {"a": np.inf, "b": np.inf, "c": np.full_like(params["c"], np.inf)} + + with pytest.raises(UserFunctionRuntimeError): + minimize( + fun=sphere, + params={"a": 400, "b": 400, "c": pd.Series([200, 300, 400])}, + algorithm="scipy_lbfgsb", + jac=sphere_gradient, + ) + + +def test_with_infinite_fun_and_jac_value_unconditional_in_dicts(): + def sphere_and_gradient(params): + function_value = params["a"] ** 2 + params["b"] ** 2 + (params["c"] ** 2).sum() + grad = {"a": np.inf, "b": np.inf, "c": np.full_like(params["c"], np.inf)} + return function_value, grad + + with pytest.raises(UserFunctionRuntimeError): + minimize( + fun_and_jac=sphere_and_gradient, + params={"a": 400, "b": 400, "c": pd.Series([200, 300, 400])}, + algorithm="scipy_lbfgsb", + ) + + +def test_with_infinite_fun_and_jac_value_conditional_in_dicts(): + def true_gradient(params): + return {"a": 2 * params["a"], "b": 2 * params["b"], "c": 2 * params["c"]} + + def param_norm(params): + squared_norm = ( + params["a"] ** 2 + params["b"] ** 2 + np.linalg.norm(params["c"]) ** 2 + ) + return np.sqrt(squared_norm) + + def sphere_gradient(params): + if param_norm(params) >= 1: + return true_gradient(params) + else: + return {"a": np.inf, "b": np.inf, "c": np.full_like(params["c"], np.inf)} + + def sphere_and_gradient(params): + function_value = params["a"] ** 2 + params["b"] ** 2 + (params["c"] ** 2).sum() + grad = sphere_gradient(params) + return function_value, grad + + with pytest.raises(UserFunctionRuntimeError): + minimize( + fun_and_jac=sphere_and_gradient, + params={"a": 400, "b": 400, "c": pd.Series([200, 300, 400])}, + algorithm="scipy_lbfgsb", + ) + + +def test_with_nan_jac_value_unconditional_in_lists(): + def sphere(params): + return params @ params + + def sphere_gradient(params): + return np.full_like(params, np.nan) + + with pytest.raises(UserFunctionRuntimeError): + minimize( + fun=sphere, + params=np.arange(10) + 400, + algorithm="scipy_lbfgsb", + jac=sphere_gradient, + ) + + +def test_with_nan_jac_value_conditional_in_lists(): + def sphere(params): + return params @ params + + def true_gradient(params): + return 2 * params + + def param_norm(params): + return np.linalg.norm(params) + + def sphere_gradient(params): + if param_norm(params) >= 1: + return true_gradient(params) + else: + return np.full_like(params, np.nan) + + with pytest.raises(UserFunctionRuntimeError): + minimize( + fun=sphere, + params=np.arange(10) + 400, + algorithm="scipy_lbfgsb", + jac=sphere_gradient, + ) + + +def test_with_nan_fun_and_jac_value_unconditional_in_lists(): + def sphere_and_gradient(params): + function_value = params @ params + grad = np.full_like(params, np.nan) + return function_value, grad + + with pytest.raises(UserFunctionRuntimeError): + minimize( + fun_and_jac=sphere_and_gradient, + params=np.arange(10) + 400, + algorithm="scipy_lbfgsb", + ) + + +def test_with_nan_fun_and_jac_value_conditional_in_lists(): + def true_gradient(params): + return 2 * params + + def param_norm(params): + return np.linalg.norm(params) + + def sphere_gradient(params): + if param_norm(params) >= 1: + return true_gradient(params) + else: + return np.full_like(params, np.nan) + + def sphere_and_gradient(params): + function_value = params @ params + grad = sphere_gradient(params) + return function_value, grad + + with pytest.raises(UserFunctionRuntimeError): + minimize( + fun_and_jac=sphere_and_gradient, + params=np.arange(10) + 400, + algorithm="scipy_lbfgsb", + ) + + +def test_with_nan_jac_value_unconditional_in_dicts(): + def sphere(params): + return params["a"] ** 2 + params["b"] ** 2 + (params["c"] ** 2).sum() + + def sphere_gradient(params): + return {"a": np.nan, "b": np.nan, "c": np.full_like(params["c"], np.nan)} + + with pytest.raises(UserFunctionRuntimeError): + minimize( + fun=sphere, + params={"a": 400, "b": 400, "c": pd.Series([200, 300, 400])}, + algorithm="scipy_lbfgsb", + jac=sphere_gradient, + ) + + +def test_with_nan_jac_value_conditional_in_dicts(): + def sphere(params): + return params["a"] ** 2 + params["b"] ** 2 + (params["c"] ** 2).sum() + + def true_gradient(params): + return {"a": 2 * params["a"], "b": 2 * params["b"], "c": 2 * params["c"]} + + def param_norm(params): + squared_norm = ( + params["a"] ** 2 + params["b"] ** 2 + np.linalg.norm(params["c"]) ** 2 + ) + return np.sqrt(squared_norm) + + def sphere_gradient(params): + if param_norm(params) >= 1: + return true_gradient(params) + else: + return {"a": np.nan, "b": np.nan, "c": np.full_like(params["c"], np.nan)} + + with pytest.raises(UserFunctionRuntimeError): + minimize( + fun=sphere, + params={"a": 400, "b": 400, "c": pd.Series([200, 300, 400])}, + algorithm="scipy_lbfgsb", + jac=sphere_gradient, + ) + + +def test_with_nan_fun_and_jac_value_unconditional_in_dicts(): + def sphere_and_gradient(params): + function_value = params["a"] ** 2 + params["b"] ** 2 + (params["c"] ** 2).sum() + grad = {"a": np.nan, "b": np.nan, "c": np.full_like(params["c"], np.nan)} + return function_value, grad + + with pytest.raises(UserFunctionRuntimeError): + minimize( + fun_and_jac=sphere_and_gradient, + params={"a": 400, "b": 400, "c": pd.Series([200, 300, 400])}, + algorithm="scipy_lbfgsb", + ) + + +def test_with_nan_fun_and_jac_value_conditional_in_dicts(): + def true_gradient(params): + return {"a": 2 * params["a"], "b": 2 * params["b"], "c": 2 * params["c"]} + + def param_norm(params): + squared_norm = ( + params["a"] ** 2 + params["b"] ** 2 + np.linalg.norm(params["c"]) ** 2 + ) + return np.sqrt(squared_norm) + + def sphere_gradient(params): + if param_norm(params) >= 1: + return true_gradient(params) + else: + return {"a": np.nan, "b": np.nan, "c": np.full_like(params["c"], np.nan)} + + def sphere_and_gradient(params): + function_value = params["a"] ** 2 + params["b"] ** 2 + (params["c"] ** 2).sum() + grad = sphere_gradient(params) + return function_value, grad + + with pytest.raises(UserFunctionRuntimeError): + minimize( + fun_and_jac=sphere_and_gradient, + params={"a": 400, "b": 400, "c": pd.Series([200, 300, 400])}, + algorithm="scipy_lbfgsb", + )