From 7927e1b9cd5703bbc3d8ef956f402c2c3ad71b90 Mon Sep 17 00:00:00 2001
From: Vinay Damodaran <vrdn@hey.com>
Date: Fri, 11 Apr 2025 22:08:56 -0700
Subject: [PATCH] Add warmup client option

---
 README.md                       | 5 +++++
 docs/openapi.json               | 4 ++++
 docs/source/en/cli_arguments.md | 5 +++++
 router/src/lib.rs               | 5 ++++-
 router/src/main.rs              | 6 ++++++
 5 files changed, 24 insertions(+), 1 deletion(-)
diff --git a/README.md b/README.md
index 520ad8d0..6f6472e7 100644
--- a/README.md
+++ b/README.md
@@ -215,6 +215,11 @@ Options:
           Unused for gRPC servers
 
           [env: AUTO_TRUNCATE=]
+      
+      --warmup-model
+          Send a dummy request to the model before server start-up
+
+          [env: WARMUP_MODEL=]
 
       --default-prompt-name <DEFAULT_PROMPT_NAME>
           The name of the prompt that should be used by default for encoding. If not set, no prompt will be applied.
diff --git a/docs/openapi.json b/docs/openapi.json
index 35000a1c..7fccdd50 100644
--- a/docs/openapi.json
+++ b/docs/openapi.json
@@ -1058,6 +1058,7 @@
           "max_batch_tokens",
           "max_client_batch_size",
           "auto_truncate",
+          "warmup_model",
           "tokenization_workers",
           "version"
         ],
@@ -1129,6 +1130,9 @@
             "type": "string",
             "description": "Router Info",
             "example": "0.5.0"
+          },
+          "warmup_model": {
+            "type": "boolean"
           }
         }
       },
diff --git a/docs/source/en/cli_arguments.md b/docs/source/en/cli_arguments.md
index 0882893c..7ed38fa1 100644
--- a/docs/source/en/cli_arguments.md
+++ b/docs/source/en/cli_arguments.md
@@ -106,6 +106,11 @@ Options:
           Unused for gRPC servers
 
           [env: AUTO_TRUNCATE=]
+      
+      --warmup-model
+          Send a dummy request to the model before server start-up
+
+          [env: WARMUP_MODEL=]
 
       --default-prompt-name <DEFAULT_PROMPT_NAME>
           The name of the prompt that should be used by default for encoding. If not set, no prompt will be applied.
diff --git a/router/src/lib.rs b/router/src/lib.rs
index 49e0581d..d116b587 100644
--- a/router/src/lib.rs
+++ b/router/src/lib.rs
@@ -52,6 +52,7 @@ pub async fn run(
     max_batch_requests: Option<usize>,
     max_client_batch_size: usize,
     auto_truncate: bool,
+    warmup_model: bool,
     default_prompt: Option<String>,
     default_prompt_name: Option<String>,
     hf_token: Option<String>,
@@ -248,7 +249,7 @@ pub async fn run(
         .await
         .context("Model backend is not healthy")?;
 
-    if !backend.padded_model {
+    if !backend.padded_model || warmup_model {
         tracing::info!("Warming up model");
         backend
             .warmup(max_input_length, max_batch_tokens, max_batch_requests)
@@ -288,6 +289,7 @@ pub async fn run(
         max_batch_requests,
         max_client_batch_size,
         auto_truncate,
+        warmup_model,
         version: env!("CARGO_PKG_VERSION"),
         sha: option_env!("VERGEN_GIT_SHA"),
         docker_label: option_env!("DOCKER_LABEL"),
@@ -510,6 +512,7 @@ pub struct Info {
     #[cfg_attr(feature = "http", schema(example = "32"))]
     pub max_client_batch_size: usize,
     pub auto_truncate: bool,
+    pub warmup_model: bool,
     #[cfg_attr(feature = "http", schema(example = "4"))]
     pub tokenization_workers: usize,
     /// Router Info
diff --git a/router/src/main.rs b/router/src/main.rs
index e4a902d6..241189ea 100644
--- a/router/src/main.rs
+++ b/router/src/main.rs
@@ -79,6 +79,11 @@ struct Args {
     #[clap(long, env)]
     auto_truncate: bool,
 
+    /// Send a dummy request to the model before server start-up
+    ///
+    #[clap(long, env)]
+    warmup_model: bool,
+
     /// The name of the prompt that should be used by default for encoding. If not set, no prompt
     /// will be applied.
     ///
@@ -216,6 +221,7 @@ async fn main() -> Result<()> {
         args.max_batch_requests,
         args.max_client_batch_size,
         args.auto_truncate,
+        args.warmup_model,
         args.default_prompt,
         args.default_prompt_name,
         token,