Skip to content

Commit 7fdd92f

Browse files
Warn if concurrent_batches config is set to True, but the available adapter doesn't support it (#11145) (#11154)
* Begin producing warning when attempting to force concurrent batches without adapter support Batches of microbatch models can be executed sequentially or concurrently. We try to figure out which to do intelligently. As part of that, we implemented an override, the model config `concurrent_batches`, to allow the user to bypass _some_ of our automatic detection. However, a user _cannot_ for batches to run concurrently if the adapter doesn't support concurrent batches (declaring support is opt in). Thus, if an adapter _doesn't_ support running batches concurrently, and a user tries to force concurrent execution via `concurrent_batches`, then we need to warn the user that that isn't happening. * Add custom event type for warning about invalid `concurrent_batches` config * Fire `InvalidConcurrentBatchesConfig` warning via `warn_or_error` so it can be silenced (cherry picked from commit 6c61cb7) Co-authored-by: Quigley Malcolm <[email protected]>
1 parent dbd8ef3 commit 7fdd92f

File tree

8 files changed

+608
-448
lines changed

8 files changed

+608
-448
lines changed

Diff for: .changes/unreleased/Fixes-20241212-113611.yaml

+6
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
kind: Fixes
2+
body: Warn about invalid usages of `concurrent_batches` config
3+
time: 2024-12-12T11:36:11.451962-06:00
4+
custom:
5+
Author: QMalcolm
6+
Issue: "11122"

Diff for: core/dbt/events/core_types.proto

+12
Original file line numberDiff line numberDiff line change
@@ -928,6 +928,18 @@ message MicrobatchModelNoEventTimeInputsMsg {
928928
}
929929

930930

931+
// I075
932+
message InvalidConcurrentBatchesConfig {
933+
int32 num_models = 1;
934+
string adapter_type = 2;
935+
}
936+
937+
message InvalidConcurrentBatchesConfigMsg {
938+
CoreEventInfo info = 1;
939+
InvalidConcurrentBatchesConfig data = 2;
940+
}
941+
942+
931943
// M - Deps generation
932944

933945

Diff for: core/dbt/events/core_types_pb2.py

+451-447
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Diff for: core/dbt/events/types.py

+10
Original file line numberDiff line numberDiff line change
@@ -967,6 +967,16 @@ def message(self) -> str:
967967
return warning_tag(msg)
968968

969969

970+
class InvalidConcurrentBatchesConfig(WarnLevel):
971+
def code(self) -> str:
972+
return "I075"
973+
974+
def message(self) -> str:
975+
maybe_plural_count_of_models = pluralize(self.num_models, "microbatch model")
976+
description = f"Found {maybe_plural_count_of_models} with the `concurrent_batches` config set to true, but the {self.adapter_type} adapter does not support running batches concurrently. Batches will be run sequentially."
977+
return line_wrap_message(warning_tag(description))
978+
979+
970980
# =======================================================
971981
# M - Deps generation
972982
# =======================================================

Diff for: core/dbt/parser/manifest.py

+24
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
import dbt.utils
1818
import dbt_common.utils
1919
from dbt import plugins
20+
from dbt.adapters.capability import Capability
2021
from dbt.adapters.factory import (
2122
get_adapter,
2223
get_adapter_package_names,
@@ -66,6 +67,7 @@
6667
ArtifactWritten,
6768
DeprecatedModel,
6869
DeprecatedReference,
70+
InvalidConcurrentBatchesConfig,
6971
InvalidDisabledTargetInTestNode,
7072
MicrobatchModelNoEventTimeInputs,
7173
NodeNotFoundOrDisabled,
@@ -510,6 +512,7 @@ def load(self) -> Manifest:
510512
self.check_for_model_deprecations()
511513
self.check_for_spaces_in_resource_names()
512514
self.check_for_microbatch_deprecations()
515+
self.check_forcing_batch_concurrency()
513516

514517
return self.manifest
515518

@@ -1484,6 +1487,27 @@ def check_valid_microbatch_config(self):
14841487
if not has_input_with_event_time_config:
14851488
fire_event(MicrobatchModelNoEventTimeInputs(model_name=node.name))
14861489

1490+
def check_forcing_batch_concurrency(self) -> None:
1491+
if self.manifest.use_microbatch_batches(project_name=self.root_project.project_name):
1492+
adapter = get_adapter(self.root_project)
1493+
1494+
if not adapter.supports(Capability.MicrobatchConcurrency):
1495+
models_forcing_concurrent_batches = 0
1496+
for node in self.manifest.nodes.values():
1497+
if (
1498+
hasattr(node.config, "concurrent_batches")
1499+
and node.config.concurrent_batches is True
1500+
):
1501+
models_forcing_concurrent_batches += 1
1502+
1503+
if models_forcing_concurrent_batches > 0:
1504+
warn_or_error(
1505+
InvalidConcurrentBatchesConfig(
1506+
num_models=models_forcing_concurrent_batches,
1507+
adapter_type=adapter.type(),
1508+
)
1509+
)
1510+
14871511
def write_perf_info(self, target_path: str):
14881512
path = os.path.join(target_path, PERF_INFO_FILE_NAME)
14891513
write_file(path, json.dumps(self._perf_info, cls=dbt.utils.JSONEncoder, indent=4))

Diff for: tests/functional/microbatch/test_microbatch.py

+45
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
ArtifactWritten,
88
EndOfRunSummary,
99
GenericExceptionOnRun,
10+
InvalidConcurrentBatchesConfig,
1011
JinjaLogDebug,
1112
LogBatchResult,
1213
LogModelResult,
@@ -71,6 +72,11 @@
7172
select * from {{ ref('input_model') }}
7273
"""
7374

75+
microbatch_model_force_concurrent_batches_sql = """
76+
{{ config(materialized='incremental', incremental_strategy='microbatch', unique_key='id', event_time='event_time', batch_size='day', begin=modules.datetime.datetime(2020, 1, 1, 0, 0, 0), concurrent_batches=true) }}
77+
select * from {{ ref('input_model') }}
78+
"""
79+
7480
microbatch_yearly_model_sql = """
7581
{{ config(materialized='incremental', incremental_strategy='microbatch', unique_key='id', event_time='event_time', batch_size='year', begin=modules.datetime.datetime(2020, 1, 1, 0, 0, 0)) }}
7682
select * from {{ ref('input_model') }}
@@ -1083,3 +1089,42 @@ def test_microbatch(
10831089

10841090
# we had a bug where having only one batch caused a generic exception
10851091
assert len(generic_exception_catcher.caught_events) == 0
1092+
1093+
1094+
class TestCanSilenceInvalidConcurrentBatchesConfigWarning(BaseMicrobatchTest):
1095+
@pytest.fixture(scope="class")
1096+
def models(self):
1097+
return {
1098+
"input_model.sql": input_model_sql,
1099+
"microbatch_model.sql": microbatch_model_force_concurrent_batches_sql,
1100+
}
1101+
1102+
@pytest.fixture
1103+
def event_catcher(self) -> EventCatcher:
1104+
return EventCatcher(event_to_catch=InvalidConcurrentBatchesConfig) # type: ignore
1105+
1106+
def test_microbatch(
1107+
self,
1108+
project,
1109+
event_catcher: EventCatcher,
1110+
) -> None:
1111+
# This test works because postgres doesn't support concurrent batch execution
1112+
# If the postgres adapter starts supporting concurrent batch execution we'll
1113+
# need to start mocking the return value of `adapter.supports()`
1114+
1115+
with patch_microbatch_end_time("2020-01-01 13:57:00"):
1116+
_ = run_dbt(["run"], callbacks=[event_catcher.catch])
1117+
# We didn't silence the warning, so we get it
1118+
assert len(event_catcher.caught_events) == 1
1119+
1120+
# Clear caught events
1121+
event_catcher.caught_events = []
1122+
1123+
# Run again with silencing
1124+
with patch_microbatch_end_time("2020-01-01 13:57:00"):
1125+
_ = run_dbt(
1126+
["run", "--warn-error-options", "{'silence': ['InvalidConcurrentBatchesConfig']}"],
1127+
callbacks=[event_catcher.catch],
1128+
)
1129+
# Because we silenced the warning, it shouldn't get fired
1130+
assert len(event_catcher.caught_events) == 0

Diff for: tests/unit/parser/test_manifest.py

+59-1
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,21 @@
11
from argparse import Namespace
2+
from typing import Optional
23
from unittest.mock import MagicMock, patch
34

45
import pytest
56
from pytest_mock import MockerFixture
67

8+
from dbt.adapters.postgres import PostgresAdapter
79
from dbt.artifacts.resources.base import FileHash
810
from dbt.config import RuntimeConfig
911
from dbt.contracts.graph.manifest import Manifest, ManifestStateCheck
10-
from dbt.events.types import UnusedResourceConfigPath
12+
from dbt.events.types import InvalidConcurrentBatchesConfig, UnusedResourceConfigPath
1113
from dbt.flags import set_from_args
1214
from dbt.parser.manifest import ManifestLoader, _warn_for_unused_resource_config_paths
1315
from dbt.parser.read_files import FileDiff
1416
from dbt.tracking import User
1517
from dbt_common.events.event_manager_client import add_callback_to_manager
18+
from tests.unit.fixtures import model_node
1619
from tests.utils import EventCatcher
1720

1821

@@ -238,3 +241,58 @@ def test_warn_for_unused_resource_config_paths(
238241
else:
239242
assert len(catcher.caught_events) == 1
240243
assert f"{resource_type}.{path}" in str(catcher.caught_events[0].data)
244+
245+
246+
class TestCheckForcingConcurrentBatches:
247+
@pytest.fixture
248+
@patch("dbt.parser.manifest.ManifestLoader.build_manifest_state_check")
249+
@patch("dbt.parser.manifest.os.path.exists")
250+
@patch("dbt.parser.manifest.open")
251+
def manifest_loader(
252+
self, patched_open, patched_os_exist, patched_state_check
253+
) -> ManifestLoader:
254+
mock_project = MagicMock(RuntimeConfig)
255+
mock_project.project_target_path = "mock_target_path"
256+
mock_project.project_name = "mock_project_name"
257+
return ManifestLoader(mock_project, {})
258+
259+
@pytest.fixture
260+
def event_catcher(self) -> EventCatcher:
261+
return EventCatcher(InvalidConcurrentBatchesConfig) # type: ignore
262+
263+
@pytest.mark.parametrize(
264+
"adapter_support,concurrent_batches_config,expect_warning",
265+
[
266+
(False, True, True),
267+
(False, False, False),
268+
(False, None, False),
269+
(True, True, False),
270+
(True, False, False),
271+
(True, None, False),
272+
],
273+
)
274+
def test_check_forcing_concurrent_batches(
275+
self,
276+
mocker: MockerFixture,
277+
manifest_loader: ManifestLoader,
278+
postgres_adapter: PostgresAdapter,
279+
event_catcher: EventCatcher,
280+
adapter_support: bool,
281+
concurrent_batches_config: Optional[bool],
282+
expect_warning: bool,
283+
):
284+
add_callback_to_manager(event_catcher.catch)
285+
model = model_node()
286+
model.config.concurrent_batches = concurrent_batches_config
287+
mocker.patch.object(postgres_adapter, "supports").return_value = adapter_support
288+
mocker.patch("dbt.parser.manifest.get_adapter").return_value = postgres_adapter
289+
mocker.patch.object(manifest_loader.manifest, "use_microbatch_batches").return_value = True
290+
291+
manifest_loader.manifest.add_node_nofile(model)
292+
manifest_loader.check_forcing_batch_concurrency()
293+
294+
if expect_warning:
295+
assert len(event_catcher.caught_events) == 1
296+
assert "Batches will be run sequentially" in event_catcher.caught_events[0].info.msg # type: ignore
297+
else:
298+
assert len(event_catcher.caught_events) == 0

Diff for: tests/unit/test_events.py

+1
Original file line numberDiff line numberDiff line change
@@ -289,6 +289,7 @@ def test_event_codes(self):
289289
core_types.FreshnessConfigProblem(msg=""),
290290
core_types.SemanticValidationFailure(msg=""),
291291
core_types.MicrobatchModelNoEventTimeInputs(model_name=""),
292+
core_types.InvalidConcurrentBatchesConfig(num_models=1, adapter_type=""),
292293
# M - Deps generation ======================
293294
core_types.GitSparseCheckoutSubdirectory(subdir=""),
294295
core_types.GitProgressCheckoutRevision(revision=""),

0 commit comments

Comments
 (0)