Skip to content

Commit c1dfdab

Browse files
feat(deployment): add sequencer liveness check stage to system test workflow
1 parent 57d2097 commit c1dfdab

File tree

2 files changed

+158
-0
lines changed

2 files changed

+158
-0
lines changed

.github/workflows/consolidated_system_test.yaml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,18 @@ jobs:
123123
- name: Run readiness check
124124
run: pipenv run python ./scripts/system_tests/readiness_check.py --deployment_config_path ${{ env.deployment_config_path }} --namespace ${{ env.namespace }}
125125

126+
- name: Test sequencer is alive
127+
env:
128+
initial_delay_sec: 10
129+
check_interval_sec: 5
130+
check_timeout_sec: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.liveness_test_duration_sec || 10 }}
131+
run: |
132+
pipenv run python ./scripts/system_tests/liveness_check.py \
133+
"${{ env.deployment_config_path }}" \
134+
"${{ env.config_dir }}" \
135+
"${{ env.check_timeout_sec }}" \
136+
"${{ env.check_interval_sec }}"
137+
126138
- name: Get container logs
127139
if: always()
128140
run: |
Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
import argparse
2+
import json
3+
import os
4+
import subprocess
5+
import sys
6+
import time
7+
from typing import List
8+
9+
10+
def run(
11+
cmd: List[str], capture_output=False, check=True, text=True
12+
) -> subprocess.CompletedProcess:
13+
return subprocess.run(cmd, capture_output=capture_output, check=check, text=text)
14+
15+
16+
def get_services(deployment_config_path: str) -> List[str]:
17+
with open(deployment_config_path, "r", encoding="utf-8") as f:
18+
config = json.load(f)
19+
return [s["name"] for s in config.get("services", [])]
20+
21+
22+
def get_config_path(deployment_config_path: str, service_name: str) -> str:
23+
with open(deployment_config_path, "r", encoding="utf-8") as f:
24+
config = json.load(f)
25+
for service in config["services"]:
26+
if service["name"] == service_name:
27+
paths = service.get("config_paths", [])
28+
if not paths:
29+
raise ValueError(f"No config_paths found for service {service_name}")
30+
return paths[0]
31+
raise ValueError(f"Service {service_name} not found in deployment config")
32+
33+
34+
def get_monitoring_port(config_file_path: str) -> int:
35+
with open(config_file_path, "r", encoding="utf-8") as f:
36+
config = json.load(f)
37+
return config["monitoring_endpoint_config.port"]
38+
39+
40+
def main(
41+
deployment_config_path: str,
42+
config_dir: str,
43+
timeout: int,
44+
interval: int,
45+
initial_delay: int,
46+
):
47+
print(
48+
f"Running liveness checks on config_dir: {config_dir} and deployment_config_path: {deployment_config_path} "
49+
)
50+
services = get_services(deployment_config_path)
51+
print("📡 Finding pods for services...")
52+
for i, service_name in enumerate(services):
53+
service_label = f"sequencer-{service_name.lower()}"
54+
55+
print(f"📡 Finding {service_name} pod...")
56+
try:
57+
pod_name = run(
58+
[
59+
"kubectl",
60+
"get",
61+
"pods",
62+
"-l",
63+
f"service={service_label}",
64+
"-o",
65+
"jsonpath={.items[0].metadata.name}",
66+
],
67+
capture_output=True,
68+
).stdout.strip()
69+
except subprocess.CalledProcessError:
70+
print(f"❌ Missing pod for {service_name}. Aborting!")
71+
sys.exit(1)
72+
73+
if not pod_name:
74+
print(f"❌ No pod found for {service_name}. Aborting!")
75+
sys.exit(1)
76+
77+
print(f"{service_name} pod found - {pod_name}")
78+
79+
config_path = get_config_path(deployment_config_path, service_name)
80+
full_config_path = os.path.join(config_dir, config_path)
81+
monitoring_port = get_monitoring_port(full_config_path)
82+
83+
local_port = monitoring_port + i
84+
print(
85+
f"🚀 Starting port-forwarding for {service_name} on local port {local_port}..."
86+
)
87+
pf_process = subprocess.Popen(
88+
["kubectl", "port-forward", pod_name, f"{local_port}:{monitoring_port}"],
89+
stdout=subprocess.DEVNULL,
90+
stderr=subprocess.DEVNULL,
91+
)
92+
93+
time.sleep(3) # Allow port-forward to establish
94+
95+
try:
96+
print(f"✅ Running health check for {service_name}...")
97+
result = subprocess.run(
98+
[
99+
"./devops/scripts/check_alive.sh",
100+
"--address",
101+
f"http://localhost:{local_port}/monitoring/alive",
102+
"--timeout",
103+
str(timeout),
104+
"--interval",
105+
str(interval),
106+
"--initial-delay",
107+
str(initial_delay),
108+
],
109+
check=False,
110+
)
111+
if result.returncode == 0:
112+
print(f"✅ Test passed: {service_name} ran for {timeout} seconds!")
113+
else:
114+
print(f"❌ Test failed: {service_name} did not run successfully.")
115+
pf_process.terminate()
116+
pf_process.wait()
117+
sys.exit(result.returncode)
118+
finally:
119+
pf_process.terminate()
120+
pf_process.wait()
121+
122+
123+
if __name__ == "__main__":
124+
parser = argparse.ArgumentParser(
125+
description="Run liveness checks on Kubernetes services."
126+
)
127+
parser.add_argument("deployment_config_path", help="Path to the deployment config JSON file")
128+
parser.add_argument("config_dir", help="Base directory for service config files")
129+
parser.add_argument("timeout", type=int, help="Timeout duration in seconds for each service check")
130+
parser.add_argument("interval", type=int, help="Interval between health checks in seconds")
131+
parser.add_argument(
132+
"--initial-delay",
133+
type=int,
134+
default=int(os.getenv("INITIAL_DELAY_SEC", "10")),
135+
help="Initial delay before starting health checks (default: value from INITIAL_DELAY_SEC env var or 10)",
136+
)
137+
138+
args = parser.parse_args()
139+
140+
main(
141+
deployment_config_path=args.deployment_config_path,
142+
config_dir=args.config_dir,
143+
timeout=args.timeout,
144+
interval=args.interval,
145+
initial_delay=args.initial_delay,
146+
)

0 commit comments

Comments
 (0)