Skip to content

Commit 3be5b26

Browse files
authored
[CI/Build] Add shell script linting using shellcheck (vllm-project#7925)
Signed-off-by: Russell Bryant <[email protected]>
1 parent de0e61a commit 3be5b26

28 files changed

+204
-129
lines changed

.buildkite/lm-eval-harness/run-lm-eval-gsm-hf-baseline.sh

+3-3
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,6 @@ while getopts "m:b:l:f:" OPT; do
4141
done
4242

4343
lm_eval --model hf \
44-
--model_args pretrained=$MODEL,parallelize=True \
45-
--tasks gsm8k --num_fewshot $FEWSHOT --limit $LIMIT \
46-
--batch_size $BATCH_SIZE
44+
--model_args "pretrained=$MODEL,parallelize=True" \
45+
--tasks gsm8k --num_fewshot "$FEWSHOT" --limit "$LIMIT" \
46+
--batch_size "$BATCH_SIZE"

.buildkite/lm-eval-harness/run-lm-eval-gsm-vllm-baseline.sh

+3-3
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,6 @@ while getopts "m:b:l:f:t:" OPT; do
4646
done
4747

4848
lm_eval --model vllm \
49-
--model_args pretrained=$MODEL,tensor_parallel_size=$TP_SIZE,distributed_executor_backend="ray",trust_remote_code=true,max_model_len=4096 \
50-
--tasks gsm8k --num_fewshot $FEWSHOT --limit $LIMIT \
51-
--batch_size $BATCH_SIZE
49+
--model_args "pretrained=$MODEL,tensor_parallel_size=$TP_SIZE,distributed_executor_backend=ray,trust_remote_code=true,max_model_len=4096" \
50+
--tasks gsm8k --num_fewshot "$FEWSHOT" --limit "$LIMIT" \
51+
--batch_size "$BATCH_SIZE"

.buildkite/lm-eval-harness/run-tests.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ while getopts "c:t:" OPT; do
3030
done
3131

3232
# Parse list of configs.
33-
IFS=$'\n' read -d '' -r -a MODEL_CONFIGS < $CONFIG
33+
IFS=$'\n' read -d '' -r -a MODEL_CONFIGS < "$CONFIG"
3434

3535
for MODEL_CONFIG in "${MODEL_CONFIGS[@]}"
3636
do

.buildkite/nightly-benchmarks/scripts/launch-server.sh

+25-38
Original file line numberDiff line numberDiff line change
@@ -50,58 +50,54 @@ launch_trt_server() {
5050
git clone https://github.com/triton-inference-server/tensorrtllm_backend.git
5151
git lfs install
5252
cd tensorrtllm_backend
53-
git checkout $trt_llm_version
54-
tensorrtllm_backend_dir=$(pwd)
53+
git checkout "$trt_llm_version"
5554
git submodule update --init --recursive
5655

5756
# build trtllm engine
5857
cd /tensorrtllm_backend
59-
cd ./tensorrt_llm/examples/${model_type}
58+
cd "./tensorrt_llm/examples/${model_type}"
6059
python3 convert_checkpoint.py \
61-
--model_dir ${model_path} \
62-
--dtype ${model_dtype} \
63-
--tp_size ${model_tp_size} \
64-
--output_dir ${trt_model_path}
60+
--model_dir "${model_path}" \
61+
--dtype "${model_dtype}" \
62+
--tp_size "${model_tp_size}" \
63+
--output_dir "${trt_model_path}"
6564
trtllm-build \
66-
--checkpoint_dir ${trt_model_path} \
65+
--checkpoint_dir "${trt_model_path}" \
6766
--use_fused_mlp \
6867
--reduce_fusion disable \
6968
--workers 8 \
70-
--gpt_attention_plugin ${model_dtype} \
71-
--gemm_plugin ${model_dtype} \
72-
--tp_size ${model_tp_size} \
73-
--max_batch_size ${max_batch_size} \
74-
--max_input_len ${max_input_len} \
75-
--max_seq_len ${max_seq_len} \
76-
--max_num_tokens ${max_num_tokens} \
77-
--output_dir ${trt_engine_path}
69+
--gpt_attention_plugin "${model_dtype}" \
70+
--gemm_plugin "${model_dtype}" \
71+
--tp_size "${model_tp_size}" \
72+
--max_batch_size "${max_batch_size}" \
73+
--max_input_len "${max_input_len}" \
74+
--max_seq_len "${max_seq_len}" \
75+
--max_num_tokens "${max_num_tokens}" \
76+
--output_dir "${trt_engine_path}"
7877

7978
# handle triton protobuf files and launch triton server
8079
cd /tensorrtllm_backend
8180
mkdir triton_model_repo
8281
cp -r all_models/inflight_batcher_llm/* triton_model_repo/
8382
cd triton_model_repo
8483
rm -rf ./tensorrt_llm/1/*
85-
cp -r ${trt_engine_path}/* ./tensorrt_llm/1
84+
cp -r "${trt_engine_path}"/* ./tensorrt_llm/1
8685
python3 ../tools/fill_template.py -i tensorrt_llm/config.pbtxt triton_backend:tensorrtllm,engine_dir:/tensorrtllm_backend/triton_model_repo/tensorrt_llm/1,decoupled_mode:true,batching_strategy:inflight_fused_batching,batch_scheduler_policy:guaranteed_no_evict,exclude_input_in_output:true,triton_max_batch_size:2048,max_queue_delay_microseconds:0,max_beam_width:1,max_queue_size:2048,enable_kv_cache_reuse:false
87-
python3 ../tools/fill_template.py -i preprocessing/config.pbtxt triton_max_batch_size:2048,tokenizer_dir:$model_path,preprocessing_instance_count:5
88-
python3 ../tools/fill_template.py -i postprocessing/config.pbtxt triton_max_batch_size:2048,tokenizer_dir:$model_path,postprocessing_instance_count:5,skip_special_tokens:false
89-
python3 ../tools/fill_template.py -i ensemble/config.pbtxt triton_max_batch_size:$max_batch_size
90-
python3 ../tools/fill_template.py -i tensorrt_llm_bls/config.pbtxt triton_max_batch_size:$max_batch_size,decoupled_mode:true,accumulate_tokens:"False",bls_instance_count:1
86+
python3 ../tools/fill_template.py -i preprocessing/config.pbtxt "triton_max_batch_size:2048,tokenizer_dir:$model_path,preprocessing_instance_count:5"
87+
python3 ../tools/fill_template.py -i postprocessing/config.pbtxt "triton_max_batch_size:2048,tokenizer_dir:$model_path,postprocessing_instance_count:5,skip_special_tokens:false"
88+
python3 ../tools/fill_template.py -i ensemble/config.pbtxt triton_max_batch_size:"$max_batch_size"
89+
python3 ../tools/fill_template.py -i tensorrt_llm_bls/config.pbtxt "triton_max_batch_size:$max_batch_size,decoupled_mode:true,accumulate_tokens:False,bls_instance_count:1"
9190
cd /tensorrtllm_backend
9291
python3 scripts/launch_triton_server.py \
93-
--world_size=${model_tp_size} \
92+
--world_size="${model_tp_size}" \
9493
--model_repo=/tensorrtllm_backend/triton_model_repo &
9594

9695
}
9796

9897
launch_tgi_server() {
9998
model=$(echo "$common_params" | jq -r '.model')
10099
tp=$(echo "$common_params" | jq -r '.tp')
101-
dataset_name=$(echo "$common_params" | jq -r '.dataset_name')
102-
dataset_path=$(echo "$common_params" | jq -r '.dataset_path')
103100
port=$(echo "$common_params" | jq -r '.port')
104-
num_prompts=$(echo "$common_params" | jq -r '.num_prompts')
105101
server_args=$(json2args "$server_params")
106102

107103
if echo "$common_params" | jq -e 'has("fp8")' >/dev/null; then
@@ -129,10 +125,7 @@ launch_tgi_server() {
129125
launch_lmdeploy_server() {
130126
model=$(echo "$common_params" | jq -r '.model')
131127
tp=$(echo "$common_params" | jq -r '.tp')
132-
dataset_name=$(echo "$common_params" | jq -r '.dataset_name')
133-
dataset_path=$(echo "$common_params" | jq -r '.dataset_path')
134128
port=$(echo "$common_params" | jq -r '.port')
135-
num_prompts=$(echo "$common_params" | jq -r '.num_prompts')
136129
server_args=$(json2args "$server_params")
137130

138131
server_command="lmdeploy serve api_server $model \
@@ -149,10 +142,7 @@ launch_sglang_server() {
149142

150143
model=$(echo "$common_params" | jq -r '.model')
151144
tp=$(echo "$common_params" | jq -r '.tp')
152-
dataset_name=$(echo "$common_params" | jq -r '.dataset_name')
153-
dataset_path=$(echo "$common_params" | jq -r '.dataset_path')
154145
port=$(echo "$common_params" | jq -r '.port')
155-
num_prompts=$(echo "$common_params" | jq -r '.num_prompts')
156146
server_args=$(json2args "$server_params")
157147

158148
if echo "$common_params" | jq -e 'has("fp8")' >/dev/null; then
@@ -185,10 +175,7 @@ launch_vllm_server() {
185175

186176
model=$(echo "$common_params" | jq -r '.model')
187177
tp=$(echo "$common_params" | jq -r '.tp')
188-
dataset_name=$(echo "$common_params" | jq -r '.dataset_name')
189-
dataset_path=$(echo "$common_params" | jq -r '.dataset_path')
190178
port=$(echo "$common_params" | jq -r '.port')
191-
num_prompts=$(echo "$common_params" | jq -r '.num_prompts')
192179
server_args=$(json2args "$server_params")
193180

194181
if echo "$common_params" | jq -e 'has("fp8")' >/dev/null; then
@@ -217,19 +204,19 @@ launch_vllm_server() {
217204

218205
main() {
219206

220-
if [[ $CURRENT_LLM_SERVING_ENGINE == "trt" ]]; then
207+
if [[ "$CURRENT_LLM_SERVING_ENGINE" == "trt" ]]; then
221208
launch_trt_server
222209
fi
223210

224-
if [[ $CURRENT_LLM_SERVING_ENGINE == "tgi" ]]; then
211+
if [[ "$CURRENT_LLM_SERVING_ENGINE" == "tgi" ]]; then
225212
launch_tgi_server
226213
fi
227214

228-
if [[ $CURRENT_LLM_SERVING_ENGINE == "lmdeploy" ]]; then
215+
if [[ "$CURRENT_LLM_SERVING_ENGINE" == "lmdeploy" ]]; then
229216
launch_lmdeploy_server
230217
fi
231218

232-
if [[ $CURRENT_LLM_SERVING_ENGINE == "sglang" ]]; then
219+
if [[ "$CURRENT_LLM_SERVING_ENGINE" == "sglang" ]]; then
233220
launch_sglang_server
234221
fi
235222

.buildkite/nightly-benchmarks/scripts/nightly-annotate.sh

+6-6
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,10 @@ main() {
1616
fi
1717

1818
# initial annotation
19-
description="$VLLM_SOURCE_CODE_LOC/.buildkite/nightly-benchmarks/nightly-descriptions.md"
19+
#description="$VLLM_SOURCE_CODE_LOC/.buildkite/nightly-benchmarks/nightly-descriptions.md"
2020

2121
# download results
22-
cd $VLLM_SOURCE_CODE_LOC/benchmarks
22+
cd "$VLLM_SOURCE_CODE_LOC/benchmarks"
2323
mkdir -p results/
2424
/workspace/buildkite-agent artifact download 'results/*nightly_results.json' results/
2525
ls
@@ -30,15 +30,15 @@ main() {
3030
/workspace/buildkite-agent artifact upload "results.zip"
3131

3232
# upload benchmarking scripts
33-
cd $VLLM_SOURCE_CODE_LOC/
33+
cd "$VLLM_SOURCE_CODE_LOC/"
3434
zip -r nightly-benchmarks.zip .buildkite/ benchmarks/
3535
/workspace/buildkite-agent artifact upload "nightly-benchmarks.zip"
3636

37-
cd $VLLM_SOURCE_CODE_LOC/.buildkite/nightly-benchmarks/
37+
cd "$VLLM_SOURCE_CODE_LOC/.buildkite/nightly-benchmarks/"
3838
# upload benchmarking pipeline
3939
/workspace/buildkite-agent artifact upload "nightly-pipeline.yaml"
4040

41-
cd $VLLM_SOURCE_CODE_LOC/.buildkite/nightly-benchmarks/
41+
cd "$VLLM_SOURCE_CODE_LOC/.buildkite/nightly-benchmarks/"
4242
/workspace/buildkite-agent annotate --style "success" --context "nightly-benchmarks-results" --append < nightly-annotation.md
4343

4444

@@ -75,4 +75,4 @@ main() {
7575
# /workspace/buildkite-agent annotate --style "success" --context "nightly-benchmarks-results" --append < nightly_results.md
7676
}
7777

78-
main "$@"
78+
main "$@"

.buildkite/nightly-benchmarks/scripts/run-nightly-benchmarks.sh

+14-16
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ check_gpus() {
1212
echo "Need at least 1 GPU to run benchmarking."
1313
exit 1
1414
fi
15-
declare -g gpu_type=$(echo $(nvidia-smi --query-gpu=name --format=csv,noheader) | awk '{print $2}')
15+
declare -g gpu_type="$(nvidia-smi --query-gpu=name --format=csv,noheader | awk '{print $2}')"
1616
echo "GPU type is $gpu_type"
1717
}
1818

@@ -102,7 +102,7 @@ kill_gpu_processes() {
102102
pkill -f text-generation
103103
pkill -f lmdeploy
104104

105-
while [ $(nvidia-smi --query-gpu=memory.used --format=csv,noheader,nounits | head -n 1) -ge 1000 ]; do
105+
while [ "$(nvidia-smi --query-gpu=memory.used --format=csv,noheader,nounits | head -n 1)" -ge 1000 ]; do
106106
sleep 1
107107
done
108108
}
@@ -119,8 +119,8 @@ wait_for_server() {
119119
ensure_installed() {
120120
# Ensure that the given command is installed by apt-get
121121
local cmd=$1
122-
if ! which $cmd >/dev/null; then
123-
apt-get update && apt-get install -y $cmd
122+
if ! which "$cmd" >/dev/null; then
123+
apt-get update && apt-get install -y "$cmd"
124124
fi
125125
}
126126

@@ -173,13 +173,11 @@ run_serving_tests() {
173173
echo "Reuse previous server for test case $test_name"
174174
else
175175
kill_gpu_processes
176-
bash $VLLM_SOURCE_CODE_LOC/.buildkite/nightly-benchmarks/scripts/launch-server.sh \
176+
bash "$VLLM_SOURCE_CODE_LOC/.buildkite/nightly-benchmarks/scripts/launch-server.sh" \
177177
"$server_params" "$common_params"
178178
fi
179179

180-
wait_for_server
181-
182-
if [ $? -eq 0 ]; then
180+
if wait_for_server; then
183181
echo ""
184182
echo "$CURRENT_LLM_SERVING_ENGINE server is up and running."
185183
else
@@ -190,13 +188,13 @@ run_serving_tests() {
190188

191189
# prepare tokenizer
192190
# this is required for lmdeploy.
193-
cd $VLLM_SOURCE_CODE_LOC/benchmarks
191+
cd "$VLLM_SOURCE_CODE_LOC/benchmarks"
194192
rm -rf /tokenizer_cache
195193
mkdir /tokenizer_cache
196194
python3 ../.buildkite/nightly-benchmarks/scripts/download-tokenizer.py \
197195
--model "$model" \
198196
--cachedir /tokenizer_cache
199-
cd $VLLM_SOURCE_CODE_LOC/benchmarks
197+
cd "$VLLM_SOURCE_CODE_LOC/benchmarks"
200198

201199

202200
# change model name for lmdeploy (it will not follow standard hf name)
@@ -307,11 +305,11 @@ run_serving_tests() {
307305
prepare_dataset() {
308306

309307
# download sharegpt dataset
310-
cd $VLLM_SOURCE_CODE_LOC/benchmarks
308+
cd "$VLLM_SOURCE_CODE_LOC/benchmarks"
311309
wget https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json
312310

313311
# duplicate sonnet by 4x, to allow benchmarking with input length 2048
314-
cd $VLLM_SOURCE_CODE_LOC/benchmarks
312+
cd "$VLLM_SOURCE_CODE_LOC/benchmarks"
315313
echo "" > sonnet_4x.txt
316314
for _ in {1..4}
317315
do
@@ -339,17 +337,17 @@ main() {
339337

340338
prepare_dataset
341339

342-
cd $VLLM_SOURCE_CODE_LOC/benchmarks
340+
cd "$VLLM_SOURCE_CODE_LOC/benchmarks"
343341
declare -g RESULTS_FOLDER=results/
344342
mkdir -p $RESULTS_FOLDER
345-
BENCHMARK_ROOT=$VLLM_SOURCE_CODE_LOC/.buildkite/nightly-benchmarks/
343+
BENCHMARK_ROOT="$VLLM_SOURCE_CODE_LOC/.buildkite/nightly-benchmarks/"
346344

347345
# run the test
348-
run_serving_tests $BENCHMARK_ROOT/tests/nightly-tests.json
346+
run_serving_tests "$BENCHMARK_ROOT/tests/nightly-tests.json"
349347

350348
# upload benchmark results to buildkite
351349
python3 -m pip install tabulate pandas
352-
python3 $BENCHMARK_ROOT/scripts/summary-nightly-results.py
350+
python3 "$BENCHMARK_ROOT/scripts/summary-nightly-results.py"
353351
upload_to_buildkite
354352

355353
}

.buildkite/nightly-benchmarks/scripts/run-performance-benchmarks.sh

+9-10
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ check_gpus() {
1717
echo "Need at least 1 GPU to run benchmarking."
1818
exit 1
1919
fi
20-
declare -g gpu_type=$(echo $(nvidia-smi --query-gpu=name --format=csv,noheader) | awk '{print $2}')
20+
declare -g gpu_type=$(nvidia-smi --query-gpu=name --format=csv,noheader | awk '{print $2}')
2121
echo "GPU type is $gpu_type"
2222
}
2323

@@ -93,7 +93,7 @@ kill_gpu_processes() {
9393

9494

9595
# wait until GPU memory usage smaller than 1GB
96-
while [ $(nvidia-smi --query-gpu=memory.used --format=csv,noheader,nounits | head -n 1) -ge 1000 ]; do
96+
while [ "$(nvidia-smi --query-gpu=memory.used --format=csv,noheader,nounits | head -n 1)" -ge 1000 ]; do
9797
sleep 1
9898
done
9999

@@ -117,7 +117,7 @@ upload_to_buildkite() {
117117
fi
118118

119119
# Use the determined command to annotate and upload artifacts
120-
$BUILDKITE_AGENT_COMMAND annotate --style "info" --context "$BUILDKITE_LABEL-benchmark-results" <$RESULTS_FOLDER/benchmark_results.md
120+
$BUILDKITE_AGENT_COMMAND annotate --style "info" --context "$BUILDKITE_LABEL-benchmark-results" < "$RESULTS_FOLDER/benchmark_results.md"
121121
$BUILDKITE_AGENT_COMMAND artifact upload "$RESULTS_FOLDER/*"
122122
}
123123

@@ -150,7 +150,7 @@ run_latency_tests() {
150150
# check if there is enough GPU to run the test
151151
tp=$(echo "$latency_params" | jq -r '.tensor_parallel_size')
152152
if [[ $gpu_count -lt $tp ]]; then
153-
echo "Required tensor-parallel-size $tp but only $gpu_count GPU found. Skip testcase $testname."
153+
echo "Required tensor-parallel-size $tp but only $gpu_count GPU found. Skip testcase $test_name."
154154
continue
155155
fi
156156

@@ -206,9 +206,9 @@ run_throughput_tests() {
206206
throughput_args=$(json2args "$throughput_params")
207207

208208
# check if there is enough GPU to run the test
209-
tp=$(echo $throughput_params | jq -r '.tensor_parallel_size')
209+
tp=$(echo "$throughput_params" | jq -r '.tensor_parallel_size')
210210
if [[ $gpu_count -lt $tp ]]; then
211-
echo "Required tensor-parallel-size $tp but only $gpu_count GPU found. Skip testcase $testname."
211+
echo "Required tensor-parallel-size $tp but only $gpu_count GPU found. Skip testcase $test_name."
212212
continue
213213
fi
214214

@@ -270,15 +270,15 @@ run_serving_tests() {
270270
# check if there is enough GPU to run the test
271271
tp=$(echo "$server_params" | jq -r '.tensor_parallel_size')
272272
if [[ $gpu_count -lt $tp ]]; then
273-
echo "Required tensor-parallel-size $tp but only $gpu_count GPU found. Skip testcase $testname."
273+
echo "Required tensor-parallel-size $tp but only $gpu_count GPU found. Skip testcase $test_name."
274274
continue
275275
fi
276276

277277
# check if server model and client model is aligned
278278
server_model=$(echo "$server_params" | jq -r '.model')
279279
client_model=$(echo "$client_params" | jq -r '.model')
280280
if [[ $server_model != "$client_model" ]]; then
281-
echo "Server model and client model must be the same. Skip testcase $testname."
281+
echo "Server model and client model must be the same. Skip testcase $test_name."
282282
continue
283283
fi
284284

@@ -293,8 +293,7 @@ run_serving_tests() {
293293
server_pid=$!
294294

295295
# wait until the server is alive
296-
wait_for_server
297-
if [ $? -eq 0 ]; then
296+
if wait_for_server; then
298297
echo ""
299298
echo "vllm server is up and running."
300299
else

.buildkite/nightly-benchmarks/scripts/wait-for-image.sh

+2-2
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ TIMEOUT_SECONDS=10
66

77
retries=0
88
while [ $retries -lt 1000 ]; do
9-
if [ $(curl -s --max-time $TIMEOUT_SECONDS -L -H "Authorization: Bearer $TOKEN" -o /dev/null -w "%{http_code}" $URL) -eq 200 ]; then
9+
if [ "$(curl -s --max-time "$TIMEOUT_SECONDS" -L -H "Authorization: Bearer $TOKEN" -o /dev/null -w "%{http_code}" "$URL")" -eq 200 ]; then
1010
exit 0
1111
fi
1212

@@ -16,4 +16,4 @@ while [ $retries -lt 1000 ]; do
1616
sleep 5
1717
done
1818

19-
exit 1
19+
exit 1

0 commit comments

Comments
 (0)