Skip to content

Commit 8017178

Browse files
authored
feat: run containerdebug in the background (#667)
1 parent b1be42f commit 8017178

File tree

5 files changed

+29
-11
lines changed

5 files changed

+29
-11
lines changed

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ All notable changes to this project will be documented in this file.
88

99
- The lifetime of auto generated TLS certificates is now configurable with the role and roleGroup
1010
config property `requestedSecretLifetime`. This helps reducing frequent Pod restarts ([#660]).
11+
- Run a `containerdebug` process in the background of each "druid" container to collect debugging information ([#667]).
1112

1213
### Fixed
1314

@@ -19,6 +20,7 @@ All notable changes to this project will be documented in this file.
1920
[#656]: https://github.com/stackabletech/druid-operator/pull/656
2021
[#657]: https://github.com/stackabletech/druid-operator/pull/657
2122
[#660]: https://github.com/stackabletech/druid-operator/pull/660
23+
[#667]: https://github.com/stackabletech/druid-operator/pull/667
2224

2325
## [24.11.0] - 2024-11-18
2426

rust/crd/src/lib.rs

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ pub const JVM_SECURITY_PROPERTIES_FILE: &str = "security.properties";
7575
pub const STACKABLE_TRUST_STORE: &str = "/stackable/truststore.p12";
7676
pub const STACKABLE_TRUST_STORE_PASSWORD: &str = "changeit";
7777
pub const CERTS_DIR: &str = "/stackable/certificates";
78-
pub const LOG_DIR: &str = "/stackable/log";
78+
pub const STACKABLE_LOG_DIR: &str = "/stackable/log";
7979

8080
// store file names
8181
pub const DRUID_LOG_FILE: &str = "druid.log4j2.xml";
@@ -604,16 +604,17 @@ impl DruidRole {
604604
{COMMON_BASH_TRAP_FUNCTIONS}
605605
{remove_vector_shutdown_file_command}
606606
prepare_signal_handlers
607+
CONTAINERDEBUG_LOG_DIRECTORY={STACKABLE_LOG_DIR}/containerdebug containerdebug --output={STACKABLE_LOG_DIR}/containerdebug-state.json --loop &
607608
/stackable/druid/bin/run-druid {process_name} {RW_CONFIG_DIRECTORY} &
608609
echo \"$!\" >> /tmp/DRUID_PID
609610
wait_for_termination $(cat /tmp/DRUID_PID)
610611
{create_vector_shutdown_file_command}
611612
",
612613
process_name = self.get_process_name(),
613614
remove_vector_shutdown_file_command =
614-
remove_vector_shutdown_file_command(LOG_DIR),
615+
remove_vector_shutdown_file_command(STACKABLE_LOG_DIR),
615616
create_vector_shutdown_file_command =
616-
create_vector_shutdown_file_command(LOG_DIR),
617+
create_vector_shutdown_file_command(STACKABLE_LOG_DIR),
617618
}
618619
}
619620
}

rust/operator-binary/src/druid_controller.rs

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,9 @@ use stackable_druid_crd::{
1717
Container, DeepStorageSpec, DruidCluster, DruidClusterStatus, DruidRole, APP_NAME,
1818
AUTH_AUTHORIZER_OPA_URI, CREDENTIALS_SECRET_PROPERTY, DB_PASSWORD_ENV, DB_USERNAME_ENV,
1919
DRUID_CONFIG_DIRECTORY, DS_BUCKET, EXTENSIONS_LOADLIST, HDFS_CONFIG_DIRECTORY, JVM_CONFIG,
20-
JVM_SECURITY_PROPERTIES_FILE, LOG_CONFIG_DIRECTORY, LOG_DIR, MAX_DRUID_LOG_FILES_SIZE,
21-
RUNTIME_PROPS, RW_CONFIG_DIRECTORY, S3_ACCESS_KEY, S3_ENDPOINT_URL, S3_PATH_STYLE_ACCESS,
22-
S3_SECRET_KEY, ZOOKEEPER_CONNECTION_STRING,
20+
JVM_SECURITY_PROPERTIES_FILE, LOG_CONFIG_DIRECTORY, MAX_DRUID_LOG_FILES_SIZE, RUNTIME_PROPS,
21+
RW_CONFIG_DIRECTORY, S3_ACCESS_KEY, S3_ENDPOINT_URL, S3_PATH_STYLE_ACCESS, S3_SECRET_KEY,
22+
STACKABLE_LOG_DIR, ZOOKEEPER_CONNECTION_STRING,
2323
};
2424
use stackable_operator::{
2525
builder::{
@@ -964,7 +964,7 @@ fn build_rolegroup_statefulset(
964964
// This command needs to be added at the beginning of the shell commands,
965965
// otherwise the output of the following commands will not be captured!
966966
prepare_container_commands.push(product_logging::framework::capture_shell_output(
967-
LOG_DIR,
967+
STACKABLE_LOG_DIR,
968968
&prepare_container_name,
969969
log_config,
970970
));
@@ -1292,10 +1292,10 @@ fn add_log_volume_and_volume_mounts(
12921292
pb: &mut PodBuilder,
12931293
) -> Result<()> {
12941294
cb_druid
1295-
.add_volume_mount(LOG_VOLUME_NAME, LOG_DIR)
1295+
.add_volume_mount(LOG_VOLUME_NAME, STACKABLE_LOG_DIR)
12961296
.context(AddVolumeMountSnafu)?;
12971297
cb_prepare
1298-
.add_volume_mount(LOG_VOLUME_NAME, LOG_DIR)
1298+
.add_volume_mount(LOG_VOLUME_NAME, STACKABLE_LOG_DIR)
12991299
.context(AddVolumeMountSnafu)?;
13001300
pb.add_volume(
13011301
VolumeBuilder::new(LOG_VOLUME_NAME)

rust/operator-binary/src/product_logging.rs

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
use snafu::{OptionExt, ResultExt, Snafu};
22
use stackable_druid_crd::{
3-
Container, DruidCluster, DRUID_LOG_FILE, LOG4J2_CONFIG, LOG_DIR, MAX_DRUID_LOG_FILES_SIZE,
3+
Container, DruidCluster, DRUID_LOG_FILE, LOG4J2_CONFIG, MAX_DRUID_LOG_FILES_SIZE,
4+
STACKABLE_LOG_DIR,
45
};
56
use stackable_operator::{
67
builder::configmap::ConfigMapBuilder,
@@ -90,7 +91,10 @@ pub fn extend_role_group_config_map(
9091
cm_builder.add_data(
9192
LOG4J2_CONFIG,
9293
product_logging::framework::create_log4j2_config(
93-
&format!("{LOG_DIR}/{container}", container = Container::Druid),
94+
&format!(
95+
"{STACKABLE_LOG_DIR}/{container}",
96+
container = Container::Druid
97+
),
9498
DRUID_LOG_FILE,
9599
MAX_DRUID_LOG_FILES_SIZE
96100
.scale_to(BinaryMultiple::Mebi)

tests/templates/kuttl/smoke/50-assert.yaml

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,3 +135,14 @@ status:
135135
expectedPods: 1
136136
currentHealthy: 1
137137
disruptionsAllowed: 1
138+
---
139+
# This test checks if the containerdebug-state.json file is present and valid
140+
apiVersion: kuttl.dev/v1beta1
141+
kind: TestAssert
142+
timeout: 600
143+
commands:
144+
- script: kubectl exec -n $NAMESPACE --container druid druid-coordinator-default-0 -- cat /stackable/log/containerdebug-state.json | jq --exit-status
145+
- script: kubectl exec -n $NAMESPACE --container druid druid-router-default-0 -- cat /stackable/log/containerdebug-state.json | jq --exit-status
146+
- script: kubectl exec -n $NAMESPACE --container druid druid-middlemanager-default-0 -- cat /stackable/log/containerdebug-state.json | jq --exit-status
147+
- script: kubectl exec -n $NAMESPACE --container druid druid-router-default-0 -- cat /stackable/log/containerdebug-state.json | jq --exit-status
148+
- script: kubectl exec -n $NAMESPACE --container druid druid-historical-default-0 -- cat /stackable/log/containerdebug-state.json | jq --exit-status

0 commit comments

Comments
 (0)