Skip to content

Commit d337333

Browse files
authored
[clickhouse] Clickana monitoring dashboard tool (#7207)
### Overview As part of Stage 1 of [RFD468](https://rfd.shared.oxide.computer/rfd/0468) we'll be observing how a ClickHouse cluster behaves in comparison with a single node server. This commit introduces a basic tool that lets us visualize internal ClickHouse metric information. As a starting point, Clickana only has 4 charts, and the user may not choose what these are. Additionally, it is only capable of rendering data by making API calls. I'd like to make the tool more flexible; other capabilities will be added in follow up PRs. ### Usage ```console clickana --help Usage: clickana [OPTIONS] --clickhouse-addr <CLICKHOUSE_ADDR> Options: -l, --log-path <LOG_PATH> Path to the log file [env: CLICKANA_LOG_PATH=] [default: /tmp/clickana.log] -a, --clickhouse-addr <CLICKHOUSE_ADDR> Address where a clickhouse admin server is listening on -s, --sampling-interval <SAMPLING_INTERVAL> The interval to collect monitoring data in seconds [default: 60] -t, --time-range <TIME_RANGE> Range of time to collect monitoring data in seconds [default: 3600] -r, --refresh-interval <REFRESH_INTERVAL> The interval at which the dashboards will refresh [default: 60] -h, --help Print help ``` ### Manual Testing ``` root@oxz_clickhouse_015f9c34:~# /opt/oxide/clickana/bin/clickana -a [fd00:1122:3344:101::e]:8888 ``` <img width="1208" alt="Screenshot 2024-12-12 at 4 11 15 PM" src="https://github.com/user-attachments/assets/53658b02-3729-4b29-ac28-0a387c3143ac" /> ### Next Steps - Let the user set which metrics they would like to visualise in each chart. This may be nice to do through a TOML file or something. We could let them choose which unit to represent them in as well perhaps. - Have more metrics available. - It'd be nice to have the ability to take the timeseries as JSON instead of calling the API as well. This could be useful in the future to have some insight into our customer's racks for debugging purposes. We could include ClickHouse internal metric timeseries as part of the support bundles and they could be visualised via Clickana. WDYT @smklein ? Related: #6953
1 parent 72ac078 commit d337333

File tree

12 files changed

+1198
-42
lines changed

12 files changed

+1198
-42
lines changed

Cargo.lock

Lines changed: 26 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ members = [
2727
"cockroach-admin/types",
2828
"common",
2929
"dev-tools/cert-dev",
30+
"dev-tools/clickana",
3031
"dev-tools/clickhouse-cluster-dev",
3132
"dev-tools/ch-dev",
3233
"dev-tools/crdb-seed",
@@ -158,6 +159,7 @@ default-members = [
158159
"cockroach-admin/types",
159160
"common",
160161
"dev-tools/cert-dev",
162+
"dev-tools/clickana",
161163
"dev-tools/clickhouse-cluster-dev",
162164
"dev-tools/ch-dev",
163165
"dev-tools/crdb-seed",
@@ -332,6 +334,7 @@ chrono = { version = "0.4", features = [ "serde" ] }
332334
chrono-tz = "0.10.0"
333335
ciborium = "0.2.2"
334336
clap = { version = "4.5", features = ["cargo", "derive", "env", "wrap_help"] }
337+
clickana = { path = "dev-tools/clickana" }
335338
clickhouse-admin-api = { path = "clickhouse-admin/api" }
336339
clickhouse-admin-keeper-client = { path = "clients/clickhouse-admin-keeper-client" }
337340
clickhouse-admin-server-client = { path = "clients/clickhouse-admin-server-client" }

clickhouse-admin/Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,9 @@ slog.workspace = true
2222
slog-async.workspace = true
2323
slog-dtrace.workspace = true
2424
slog-error-chain.workspace = true
25+
slog-term.workspace = true
2526
serde.workspace = true
27+
serde_json.workspace = true
2628
thiserror.workspace = true
2729
tokio.workspace = true
2830
tokio-postgres.workspace = true

clickhouse-admin/types/src/lib.rs

Lines changed: 8 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1200,7 +1200,7 @@ pub enum Timestamp {
12001200
#[derive(Debug, Serialize, Deserialize, JsonSchema, PartialEq)]
12011201
#[serde(rename_all = "snake_case")]
12021202
pub struct SystemTimeSeries {
1203-
pub time: Timestamp,
1203+
pub time: String,
12041204
pub value: f64,
12051205
// TODO: Would be really nice to have an enum with possible units (s, ms, bytes)
12061206
// Not sure if I can even add this, the system tables don't mention units at all.
@@ -2099,15 +2099,15 @@ snapshot_storage_disk=LocalSnapshotDisk
20992099

21002100
let expected = vec![
21012101
SystemTimeSeries {
2102-
time: crate::Timestamp::Unix("1732494720".to_string()),
2102+
time: "1732494720".to_string(),
21032103
value: 110220450825.75238,
21042104
},
21052105
SystemTimeSeries {
2106-
time: crate::Timestamp::Unix("1732494840".to_string()),
2106+
time: "1732494840".to_string(),
21072107
value: 110339992917.33331,
21082108
},
21092109
SystemTimeSeries {
2110-
time: crate::Timestamp::Unix("1732494960".to_string()),
2110+
time: "1732494960".to_string(),
21112111
value: 110421854037.33331,
21122112
},
21132113
];
@@ -2127,21 +2127,15 @@ snapshot_storage_disk=LocalSnapshotDisk
21272127

21282128
let expected = vec![
21292129
SystemTimeSeries {
2130-
time: crate::Timestamp::Utc(
2131-
"2024-11-25T00:34:00Z".parse::<DateTime<Utc>>().unwrap(),
2132-
),
2130+
time: "2024-11-25T00:34:00Z".to_string(),
21332131
value: 110220450825.75238,
21342132
},
21352133
SystemTimeSeries {
2136-
time: crate::Timestamp::Utc(
2137-
"2024-11-25T00:35:00Z".parse::<DateTime<Utc>>().unwrap(),
2138-
),
2134+
time: "2024-11-25T00:35:00Z".to_string(),
21392135
value: 110339992917.33331,
21402136
},
21412137
SystemTimeSeries {
2142-
time: crate::Timestamp::Utc(
2143-
"2024-11-25T00:36:00Z".parse::<DateTime<Utc>>().unwrap(),
2144-
),
2138+
time: "2024-11-25T00:36:00Z".to_string(),
21452139
value: 110421854037.33331,
21462140
},
21472141
];
@@ -2176,7 +2170,7 @@ snapshot_storage_disk=LocalSnapshotDisk
21762170

21772171
assert_eq!(
21782172
format!("{}", root_cause),
2179-
"data did not match any variant of untagged enum Timestamp at line 1 column 12",
2173+
"invalid type: integer `2024`, expected a string at line 1 column 12",
21802174
);
21812175
}
21822176
}

dev-tools/clickana/Cargo.toml

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
[package]
2+
name = "clickana"
3+
version = "0.1.0"
4+
edition = "2021"
5+
license = "MPL-2.0"
6+
7+
[dependencies]
8+
anyhow.workspace = true
9+
camino.workspace = true
10+
chrono.workspace = true
11+
clap.workspace = true
12+
clickhouse-admin-types.workspace = true
13+
clickhouse-admin-server-client.workspace = true
14+
dropshot.workspace = true
15+
futures.workspace = true
16+
omicron-common.workspace = true
17+
ratatui.workspace = true
18+
schemars.workspace = true
19+
slog.workspace = true
20+
slog-async.workspace = true
21+
slog-dtrace.workspace = true
22+
slog-error-chain.workspace = true
23+
slog-term.workspace = true
24+
serde_json.workspace = true
25+
tokio.workspace = true
26+
tokio-postgres.workspace = true
27+
28+
omicron-workspace-hack.workspace = true
29+
30+
[lints]
31+
workspace = true
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
// This Source Code Form is subject to the terms of the Mozilla Public
2+
// License, v. 2.0. If a copy of the MPL was not distributed with this
3+
// file, You can obtain one at https://mozilla.org/MPL/2.0/.
4+
5+
use anyhow::Result;
6+
use camino::Utf8PathBuf;
7+
use clap::Parser;
8+
use clickana::Clickana;
9+
use std::net::SocketAddr;
10+
11+
const CLICKANA_LOG_FILE: &str = "/tmp/clickana.log";
12+
13+
#[tokio::main]
14+
async fn main() -> Result<()> {
15+
let args = Cli::parse();
16+
17+
let terminal = ratatui::init();
18+
let result = Clickana::new(
19+
args.clickhouse_addr,
20+
args.log_path,
21+
args.sampling_interval,
22+
args.time_range,
23+
args.refresh_interval,
24+
)
25+
.run(terminal)
26+
.await;
27+
ratatui::restore();
28+
result
29+
}
30+
31+
#[derive(Debug, Parser)]
32+
struct Cli {
33+
/// Path to the log file
34+
#[arg(
35+
long,
36+
short,
37+
env = "CLICKANA_LOG_PATH",
38+
default_value = CLICKANA_LOG_FILE,
39+
)]
40+
log_path: Utf8PathBuf,
41+
42+
/// Address where a clickhouse admin server is listening on
43+
#[arg(long, short = 'a')]
44+
clickhouse_addr: SocketAddr,
45+
46+
/// The interval to collect monitoring data in seconds
47+
#[arg(long, short, default_value_t = 60)]
48+
sampling_interval: u64,
49+
50+
/// Range of time to collect monitoring data in seconds
51+
#[arg(long, short, default_value_t = 3600)]
52+
time_range: u64,
53+
54+
/// The interval at which the dashboards will refresh
55+
#[arg(long, short, default_value_t = 60)]
56+
refresh_interval: u64,
57+
}

0 commit comments

Comments
 (0)