From f7d402f3b537a66f37ed4ee569f73fb276b04681 Mon Sep 17 00:00:00 2001
From: miaochangyu <miaochangyu@xiaomi.com>
Date: Tue, 24 Jun 2025 11:03:18 +0800
Subject: [PATCH 1/9] add growing object graph util to analyse memory leak and
 OOM

Signed-off-by: miaochangyu <miaochangyu@xiaomi.com>
---
 requirements/common.txt                 |  1 +
 vllm/engine/async_llm_engine.py         |  6 ++
 vllm/engine/llm_engine.py               |  6 ++
 vllm/engine/multiprocessing/__init__.py |  3 +
 vllm/engine/multiprocessing/client.py   | 14 +++-
 vllm/engine/multiprocessing/engine.py   | 13 +++-
 vllm/engine/protocol.py                 | 10 +++
 vllm/entrypoints/openai/api_server.py   | 20 ++++++
 vllm/envs.py                            |  7 ++
 vllm/executor/executor_base.py          |  6 ++
 vllm/utils.py                           | 86 +++++++++++++++++++++++++
 vllm/worker/worker_base.py              | 21 +++++-
 12 files changed, 190 insertions(+), 3 deletions(-)

diff --git a/requirements/common.txt b/requirements/common.txt
index 639abe51101..ceac64d8a48 100644
--- a/requirements/common.txt
+++ b/requirements/common.txt
@@ -44,3 +44,4 @@ watchfiles # required for http server to monitor the updates of TLS files
 python-json-logger # Used by logging as per examples/others/logging_configuration.md
 scipy # Required for phi-4-multimodal-instruct
 ninja # Required for xgrammar, rocm, tpu, xpu
+objgraph # Required for memory debugging and object graph analysis
diff --git a/vllm/engine/async_llm_engine.py b/vllm/engine/async_llm_engine.py
index 3d7d28055dd..d9ed9a7b4b2 100644
--- a/vllm/engine/async_llm_engine.py
+++ b/vllm/engine/async_llm_engine.py
@@ -1162,6 +1162,12 @@ async def start_profile(self) -> None:
     async def stop_profile(self) -> None:
         self.engine.stop_profile()
 
+    async def start_object_graph(self) -> None:
+        self.engine.start_object_graph()
+
+    async def stop_object_graph(self) -> None:
+        self.engine.stop_object_graph()
+
     async def reset_mm_cache(self) -> None:
         self.engine.reset_mm_cache()
 
diff --git a/vllm/engine/llm_engine.py b/vllm/engine/llm_engine.py
index 8fccf9bd2aa..3738e830321 100644
--- a/vllm/engine/llm_engine.py
+++ b/vllm/engine/llm_engine.py
@@ -1854,6 +1854,12 @@ def start_profile(self) -> None:
     def stop_profile(self) -> None:
         self.model_executor.stop_profile()
 
+    def start_object_graph(self) -> None:
+        self.model_executor.start_object_graph()
+
+    def stop_object_graph(self) -> None:
+        self.model_executor.stop_object_graph()
+
     def sleep(self, level: int = 1) -> None:
         assert self.vllm_config.model_config.enable_sleep_mode, (
             "Sleep mode is not enabled in the model config")
diff --git a/vllm/engine/multiprocessing/__init__.py b/vllm/engine/multiprocessing/__init__.py
index db968cd6b5d..5f2578e20f3 100644
--- a/vllm/engine/multiprocessing/__init__.py
+++ b/vllm/engine/multiprocessing/__init__.py
@@ -82,6 +82,9 @@ class RPCUProfileRequest(Enum):
     START_PROFILE = 1
     STOP_PROFILE = 2
 
+class RPCUObjectGraphRequest(Enum):
+    START_OBJECT_GRAPH = 1
+    STOP_OBJECT_GRAPH = 2
 
 class RPCResetMultiModalCacheRequest(Enum):
     RESET = 1
diff --git a/vllm/engine/multiprocessing/client.py b/vllm/engine/multiprocessing/client.py
index 9e018ec7f34..e6bdd87b5f1 100644
--- a/vllm/engine/multiprocessing/client.py
+++ b/vllm/engine/multiprocessing/client.py
@@ -34,7 +34,7 @@
                                          RPCResetMultiModalCacheRequest,
                                          RPCResetPrefixCacheRequest,
                                          RPCSleepRequest, RPCStartupRequest,
-                                         RPCStartupResponse,
+                                         RPCStartupResponse, RPCUObjectGraphRequest,
                                          RPCUProfileRequest, RPCWakeUpRequest)
 from vllm.engine.protocol import EngineClient
 # yapf: enable
@@ -615,6 +615,18 @@ async def stop_profile(self) -> None:
 
         await self._send_one_way_rpc_request(
             request=RPCUProfileRequest.STOP_PROFILE, socket=self.input_socket)
+        
+    async def start_object_graph(self) -> None:
+        """Start object graph the engine"""
+
+        await self._send_one_way_rpc_request(
+            request=RPCUObjectGraphRequest.START_OBJECT_GRAPH, socket=self.input_socket)
+        
+    async def stop_object_graph(self) -> None:
+        """Stop object graph the engine"""
+
+        await self._send_one_way_rpc_request(
+            request=RPCUObjectGraphRequest.STOP_OBJECT_GRAPH, socket=self.input_socket)
 
     async def reset_mm_cache(self) -> None:
         """Reset the multi-modal cache"""
diff --git a/vllm/engine/multiprocessing/engine.py b/vllm/engine/multiprocessing/engine.py
index ef088bd3933..ba97c03a364 100644
--- a/vllm/engine/multiprocessing/engine.py
+++ b/vllm/engine/multiprocessing/engine.py
@@ -26,7 +26,7 @@
                                          RPCResetMultiModalCacheRequest,
                                          RPCResetPrefixCacheRequest,
                                          RPCSleepRequest, RPCStartupRequest,
-                                         RPCStartupResponse,
+                                         RPCStartupResponse, RPCUObjectGraphRequest,
                                          RPCUProfileRequest, RPCWakeUpRequest)
 # yapf: enable
 from vllm.logger import init_logger
@@ -284,6 +284,11 @@ def handle_new_input(self):
                     self.wake_up(request.tags)
                 elif isinstance(request, RPCIsSleepingRequest):
                     self._handle_is_sleeping_request(request)
+                elif isinstance(request, RPCUObjectGraphRequest):
+                    if request == RPCUObjectGraphRequest.START_OBJECT_GRAPH:
+                        self.start_object_graph()
+                    else:
+                        self.stop_object_graph()
                 else:
                     raise ValueError("Unknown RPCRequest Type: "
                                      f"{type(request)}")
@@ -416,6 +421,12 @@ def start_profile(self) -> None:
     def stop_profile(self) -> None:
         self.engine.stop_profile()
 
+    def start_object_graph(self) -> None:
+        self.engine.start_object_graph()
+
+    def stop_object_graph(self) -> None:
+        self.engine.stop_object_graph()
+
     def reset_mm_cache(self) -> bool:
         return self.engine.reset_mm_cache()
 
diff --git a/vllm/engine/protocol.py b/vllm/engine/protocol.py
index 8688fcc82cd..96ef1816031 100644
--- a/vllm/engine/protocol.py
+++ b/vllm/engine/protocol.py
@@ -294,6 +294,16 @@ async def stop_profile(self) -> None:
         """Start profiling the engine"""
         ...
 
+    @abstractmethod
+    async def start_object_graph(self) -> None:
+        """Start object graph the engine"""
+        ...
+    
+    @abstractmethod
+    async def stop_object_graph(self) -> None:
+        """Stop object graph the engine"""
+        ...
+
     @abstractmethod
     async def reset_mm_cache(self) -> None:
         """Reset the multi-modal cache"""
diff --git a/vllm/entrypoints/openai/api_server.py b/vllm/entrypoints/openai/api_server.py
index 62f1c6a7c12..e6c8e8fe6af 100644
--- a/vllm/entrypoints/openai/api_server.py
+++ b/vllm/entrypoints/openai/api_server.py
@@ -972,6 +972,26 @@ async def stop_profile(raw_request: Request):
         return Response(status_code=200)
 
 
+if envs.VLLM_OBJ_GRAPH_DIR:
+    logger.warning(
+        "Object Graph is enabled in the API server. This should ONLY be "
+        "used for local development!")
+
+    @router.post("/start_object_graph")
+    async def start_object_graph(raw_request: Request):
+        logger.info("Starting object graph...")
+        await engine_client(raw_request).start_object_graph()
+        logger.info("Object graph started.")
+        return Response(status_code=200)
+
+    @router.post("/stop_object_graph")
+    async def stop_object_graph(raw_request: Request):
+        logger.info("Stopping object graph...")
+        await engine_client(raw_request).stop_object_graph()
+        logger.info("Object graph stopped.")
+        return Response(status_code=200)
+
+
 if envs.VLLM_ALLOW_RUNTIME_LORA_UPDATING:
     logger.warning(
         "LoRA dynamic loading & unloading is enabled in the API server. "
diff --git a/vllm/envs.py b/vllm/envs.py
index 01d8d8a2d2e..30d0714e5e7 100644
--- a/vllm/envs.py
+++ b/vllm/envs.py
@@ -77,6 +77,7 @@
     VLLM_PLUGINS: Optional[list[str]] = None
     VLLM_LORA_RESOLVER_CACHE_DIR: Optional[str] = None
     VLLM_TORCH_PROFILER_DIR: Optional[str] = None
+    VLLM_OBJ_GRAPH_DIR: Optional[str] = None
     VLLM_USE_TRITON_AWQ: bool = False
     VLLM_ALLOW_RUNTIME_LORA_UPDATING: bool = False
     VLLM_SKIP_P2P_CHECK: bool = False
@@ -599,6 +600,12 @@ def get_vllm_port() -> Optional[int]:
     lambda: (None if os.getenv("VLLM_TORCH_PROFILER_DIR", None) is None else os
              .path.expanduser(os.getenv("VLLM_TORCH_PROFILER_DIR", "."))),
 
+    # Enables memory object graph tracking if set. Path to the directory where
+    # object graph files are saved. Note that it must be an absolute path.
+    "VLLM_OBJ_GRAPH_DIR":
+    lambda: (None if os.getenv("VLLM_OBJ_GRAPH_DIR", None) is None else os
+             .path.expanduser(os.getenv("VLLM_OBJ_GRAPH_DIR", "."))),
+
     # If set, vLLM will use Triton implementations of AWQ.
     "VLLM_USE_TRITON_AWQ":
     lambda: bool(int(os.getenv("VLLM_USE_TRITON_AWQ", "0"))),
diff --git a/vllm/executor/executor_base.py b/vllm/executor/executor_base.py
index 99e12201c96..1221ff115ff 100644
--- a/vllm/executor/executor_base.py
+++ b/vllm/executor/executor_base.py
@@ -199,6 +199,12 @@ def start_profile(self) -> None:
     def stop_profile(self) -> None:
         self.collective_rpc("stop_profile")
 
+    def start_object_graph(self) -> None:
+        self.collective_rpc("start_object_graph")
+
+    def stop_object_graph(self) -> None:
+        self.collective_rpc("stop_object_graph")
+
     def sleep(self, level: int = 1):
         if self.is_sleeping:
             logger.warning("Executor is already sleeping.")
diff --git a/vllm/utils.py b/vllm/utils.py
index 34be4d52c48..502c1f9a5ab 100644
--- a/vllm/utils.py
+++ b/vllm/utils.py
@@ -2924,3 +2924,89 @@ def is_torch_equal_or_newer(target: str) -> bool:
     except Exception:
         # Fallback to PKG-INFO to load the package info, needed by the doc gen.
         return Version(importlib.metadata.version('torch')) >= Version(target)
+
+class GrowingMemoryObjGraph:
+    def __init__(self):
+        from vllm import envs
+        if not envs.VLLM_OBJ_GRAPH_DIR:
+            raise RuntimeError("VLLM_OBJ_GRAPH_DIR is not set.")
+        self._obj_graph_dir = envs.VLLM_OBJ_GRAPH_DIR
+        os.makedirs(self._obj_graph_dir, exist_ok=True)
+
+        self._start_state = False
+
+
+    def start(self) -> str:
+        import objgraph
+
+        gc.collect()
+        objgraph.growth()
+        self._start_state = True
+        self.start_time = time.time()
+        return "start growing obj graph statistics"
+
+    def stop(self) -> str:
+        import objgraph
+        import gc
+
+        if not self._start_state:
+            msg = "obj graph statistics is not started"
+            logger.warning(msg)
+            return msg
+
+        # Generate output filename with date
+        current_date = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
+        
+        # Create subdirectory for this analysis
+        analysis_dir = os.path.join(self._obj_graph_dir, f"analysis_{current_date}")
+        os.makedirs(analysis_dir, exist_ok=True)
+        
+        output_lines = []
+        current_time = time.time()
+        statistics_time = current_time - self.start_time
+        output_lines.append(f"{'='*50}\n start time {self.start_time}, Statistics time: {statistics_time} seconds\n{'='*50}\n")
+
+        gc.collect()
+        growth_info = objgraph.growth()
+
+        for gt in growth_info:
+            output_lines.append(f"Growth type: {gt[0]}, Count: {gt[1]}, Growth amount: {gt[2]}")
+
+        for gt in growth_info:
+            # Get the first object of this type
+            try:
+                obj = objgraph.by_type(gt[0])[0]
+            except IndexError:
+                logger.warning(f"Type {gt[0]} has no available objects")
+                continue
+
+            # Generate back reference graph
+            objgraph.show_backrefs(
+                obj, 
+                max_depth=10, 
+                too_many=5,
+                filename=os.path.join(analysis_dir, f"{gt[0]}_backrefs.dot")
+            )
+            
+            # Generate reference graph
+            objgraph.show_refs(
+                obj, 
+                max_depth=10, 
+                too_many=5,
+                filename=os.path.join(analysis_dir, f"{gt[0]}_refs.dot")
+            )
+            
+            # Generate reference chain to module
+            objgraph.show_chain(
+                objgraph.find_backref_chain(obj, objgraph.is_proper_module),
+                filename=os.path.join(analysis_dir, f"{gt[0]}_chain.dot")
+            )
+
+        output_file_path = os.path.join(analysis_dir, "growing_memory_stats.log")
+        with open(output_file_path, 'w', encoding='utf-8') as f:
+            for line in output_lines:
+                f.write(line + '\n')
+        
+        logger.info(f"obj graph statistics completed, output_lines: {output_lines}")
+        
+        return "obj graph statistics completed"
\ No newline at end of file
diff --git a/vllm/worker/worker_base.py b/vllm/worker/worker_base.py
index c382b29ad19..4002f54f789 100644
--- a/vllm/worker/worker_base.py
+++ b/vllm/worker/worker_base.py
@@ -11,6 +11,7 @@
 import torch
 import torch.nn as nn
 
+from vllm import envs
 from vllm.config import (ObservabilityConfig, VllmConfig,
                          set_current_vllm_config)
 from vllm.distributed import broadcast_tensor_dict, get_pp_group, get_tp_group
@@ -18,7 +19,7 @@
 from vllm.lora.request import LoRARequest
 from vllm.model_executor.layers.sampler import SamplerOutput
 from vllm.sequence import ExecuteModelRequest, IntermediateTensors
-from vllm.utils import (enable_trace_function_call_for_thread,
+from vllm.utils import (GrowingMemoryObjGraph, enable_trace_function_call_for_thread,
                         resolve_obj_by_qualname, run_method,
                         update_environment_variables,
                         warn_for_unimplemented_methods)
@@ -56,6 +57,14 @@ def __init__(
         from vllm.platforms import current_platform
         self.current_platform = current_platform
 
+        if envs.VLLM_OBJ_GRAPH_DIR:
+            object_graph_dir = envs.VLLM_OBJ_GRAPH_DIR
+            logger.info("Object graph enabled. Traces will be saved to: %s",
+                        object_graph_dir)
+            self.obj_graph = GrowingMemoryObjGraph()
+        else:
+            self.obj_graph = None
+
     def init_device(self) -> None:
         """Initialize device state, such as loading the model or other on-device
         memory allocations.
@@ -129,6 +138,16 @@ def list_loras(self) -> Set[int]:
     def vocab_size(self) -> int:
         """Get vocabulary size from model configuration."""
         return self.model_config.get_vocab_size()
+        
+    def start_object_graph(self):
+        if self.obj_graph is None:
+            raise RuntimeError("Object graph is not enabled.")
+        return self.obj_graph.start()
+    
+    def stop_object_graph(self):
+        if self.obj_graph is None:
+            raise RuntimeError("Object graph is not enabled.")
+        return self.obj_graph.stop()
 
 
 class DelegateWorkerBase(WorkerBase):

From 4c47615a395007cece72d7069b37f3705512fc06 Mon Sep 17 00:00:00 2001
From: miaochangyu <miaochangyu@xiaomi.com>
Date: Tue, 24 Jun 2025 11:54:33 +0800
Subject: [PATCH 2/9] add offline example

Signed-off-by: miaochangyu <miaochangyu@xiaomi.com>
---
 .../simple_growthing_obj_graph.py             | 54 +++++++++++++++++++
 vllm/entrypoints/llm.py                       |  6 +++
 2 files changed, 60 insertions(+)
 create mode 100644 examples/offline_inference/simple_growthing_obj_graph.py

diff --git a/examples/offline_inference/simple_growthing_obj_graph.py b/examples/offline_inference/simple_growthing_obj_graph.py
new file mode 100644
index 00000000000..2c82a546491
--- /dev/null
+++ b/examples/offline_inference/simple_growthing_obj_graph.py
@@ -0,0 +1,54 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import os
+import time
+
+from vllm import LLM, SamplingParams
+
+# Enable object graph analysis by setting environment variable
+os.environ["VLLM_OBJ_GRAPH_DIR"] = "./vllm_obj_graph"
+
+# Sample prompts
+prompts = [
+    "Hello, my name is",
+    "The president of the United States is",
+    "The capital of France is",
+    "The future of artificial intelligence is",
+]
+# Create sampling parameters object
+sampling_params = SamplingParams(temperature=0.8, top_p=0.95)
+
+
+def main():
+    # Create LLM instance
+    llm = LLM(model="facebook/opt-125m", tensor_parallel_size=1)
+
+    # Start object graph analysis
+    llm.start_object_graph()
+
+    # Generate text from prompts. The output is a list of RequestOutput objects
+    # containing the prompt, generated text, and other information.
+    outputs = llm.generate(prompts, sampling_params)
+
+    # Stop object graph analysis
+    llm.stop_object_graph()
+
+    # Print output results
+    print("-" * 50)
+    for output in outputs:
+        prompt = output.prompt
+        generated_text = output.outputs[0].text
+        print(f"Prompt: {prompt!r}\nGenerated text: {generated_text!r}")
+        print("-" * 50)
+
+    # Add buffer time to wait for background processes (if multiprocessing is enabled)
+    # to complete writing object graph analysis output.
+    time.sleep(10)
+    
+    print(f"Object graph analysis completed! Results saved to: {os.environ['VLLM_OBJ_GRAPH_DIR']}")
+    print("You can check the generated .dot files and .log files to analyze memory object growth")
+
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/vllm/entrypoints/llm.py b/vllm/entrypoints/llm.py
index 05e0be61ada..5b5bb599485 100644
--- a/vllm/entrypoints/llm.py
+++ b/vllm/entrypoints/llm.py
@@ -1329,6 +1329,12 @@ def start_profile(self) -> None:
     def stop_profile(self) -> None:
         self.llm_engine.stop_profile()
 
+    def start_object_graph(self) -> None:
+        self.llm_engine.start_object_graph()
+    
+    def stop_object_graph(self) -> None:
+        self.llm_engine.stop_object_graph()
+
     def reset_prefix_cache(self, device: Optional[Device] = None) -> bool:
         return self.llm_engine.reset_prefix_cache(device)
 

From ff0be92b55fce300537a1480937fc5cab64328b5 Mon Sep 17 00:00:00 2001
From: miaochangyu <miaochangyu@xiaomi.com>
Date: Tue, 24 Jun 2025 23:13:14 +0800
Subject: [PATCH 3/9] fix Code Review comment

Signed-off-by: miaochangyu <miaochangyu@xiaomi.com>
---
 vllm/utils.py | 25 +++++++++++++++++--------
 1 file changed, 17 insertions(+), 8 deletions(-)

diff --git a/vllm/utils.py b/vllm/utils.py
index 502c1f9a5ab..a7812b0d847 100644
--- a/vllm/utils.py
+++ b/vllm/utils.py
@@ -2959,12 +2959,17 @@ def stop(self) -> str:
         
         # Create subdirectory for this analysis
         analysis_dir = os.path.join(self._obj_graph_dir, f"analysis_{current_date}")
-        os.makedirs(analysis_dir, exist_ok=True)
-        
+        try:
+            os.makedirs(analysis_dir, exist_ok=True)
+        except OSError as e:
+            logger.error("Failed to create directory %s: %s", analysis_dir, e)
+            return f"Failed to create directory: {e}"
+                
         output_lines = []
         current_time = time.time()
         statistics_time = current_time - self.start_time
-        output_lines.append(f"{'='*50}\n start time {self.start_time}, Statistics time: {statistics_time} seconds\n{'='*50}\n")
+        start_time_formatted = datetime.datetime.fromtimestamp(self.start_time).strftime("%Y-%m-%d %H:%M:%S")
+        output_lines.append(f"{'='*50}\n start time {start_time_formatted}, Statistics time: {statistics_time} seconds\n{'='*50}\n")
 
         gc.collect()
         growth_info = objgraph.growth()
@@ -2977,7 +2982,7 @@ def stop(self) -> str:
             try:
                 obj = objgraph.by_type(gt[0])[0]
             except IndexError:
-                logger.warning(f"Type {gt[0]} has no available objects")
+                logger.warning("Type %s has no available objects", gt[0])
                 continue
 
             # Generate back reference graph
@@ -3003,10 +3008,14 @@ def stop(self) -> str:
             )
 
         output_file_path = os.path.join(analysis_dir, "growing_memory_stats.log")
-        with open(output_file_path, 'w', encoding='utf-8') as f:
-            for line in output_lines:
-                f.write(line + '\n')
+        try:
+            with open(output_file_path, 'w', encoding='utf-8') as f:
+                for line in output_lines:
+                    f.write(line + '\n')
+        except OSError as e:
+            logger.error("Failed to write to file %s: %s", output_file_path, e)
+            return f"Failed to write to file: {e}"
         
-        logger.info(f"obj graph statistics completed, output_lines: {output_lines}")
+        logger.info("obj graph statistics completed, output_lines: %s", output_lines)
         
         return "obj graph statistics completed"
\ No newline at end of file

From d0b65b9a3f7b03d7bb8a94b0b4e9f559b13c5811 Mon Sep 17 00:00:00 2001
From: miaochangyu <miaochangyu@xiaomi.com>
Date: Wed, 25 Jun 2025 10:41:08 +0800
Subject: [PATCH 4/9] fix pre-commit pipeline

Signed-off-by: miaochangyu <miaochangyu@xiaomi.com>
---
 .../simple_growthing_obj_graph.py             |  4 +--
 vllm/engine/multiprocessing/__init__.py       |  3 +-
 vllm/engine/multiprocessing/client.py         | 11 ++++---
 vllm/engine/multiprocessing/engine.py         |  5 +--
 vllm/envs.py                                  |  4 +--
 vllm/utils.py                                 | 33 ++++++++++++-------
 vllm/worker/worker_base.py                    |  3 +-
 7 files changed, 40 insertions(+), 23 deletions(-)

diff --git a/examples/offline_inference/simple_growthing_obj_graph.py b/examples/offline_inference/simple_growthing_obj_graph.py
index 2c82a546491..e152e8941e6 100644
--- a/examples/offline_inference/simple_growthing_obj_graph.py
+++ b/examples/offline_inference/simple_growthing_obj_graph.py
@@ -46,8 +46,8 @@ def main():
     # to complete writing object graph analysis output.
     time.sleep(10)
     
-    print(f"Object graph analysis completed! Results saved to: {os.environ['VLLM_OBJ_GRAPH_DIR']}")
-    print("You can check the generated .dot files and .log files to analyze memory object growth")
+    print(f"Completed! Results saved to: {os.environ['VLLM_OBJ_GRAPH_DIR']}")
+    print("You can check the generated files to analyze memory growth")
 
 
 if __name__ == "__main__":
diff --git a/vllm/engine/multiprocessing/__init__.py b/vllm/engine/multiprocessing/__init__.py
index 5f2578e20f3..0c288376bc4 100644
--- a/vllm/engine/multiprocessing/__init__.py
+++ b/vllm/engine/multiprocessing/__init__.py
@@ -133,7 +133,8 @@ class RPCAdapterLoadedResponse:
                       RPCUProfileRequest, RPCLoadAdapterRequest,
                       RPCResetMultiModalCacheRequest,
                       RPCResetPrefixCacheRequest, RPCSleepRequest,
-                      RPCWakeUpRequest, RPCIsSleepingRequest]
+                      RPCWakeUpRequest, RPCIsSleepingRequest,
+                      RPCUObjectGraphRequest]
 
 REQUEST_OUTPUTS_T = Union[List[RequestOutput], RPCAdapterLoadedResponse,
                           RPCIsSleepingResponse, RPCError]
diff --git a/vllm/engine/multiprocessing/client.py b/vllm/engine/multiprocessing/client.py
index e6bdd87b5f1..842cc2f6187 100644
--- a/vllm/engine/multiprocessing/client.py
+++ b/vllm/engine/multiprocessing/client.py
@@ -34,8 +34,9 @@
                                          RPCResetMultiModalCacheRequest,
                                          RPCResetPrefixCacheRequest,
                                          RPCSleepRequest, RPCStartupRequest,
-                                         RPCStartupResponse, RPCUObjectGraphRequest,
-                                         RPCUProfileRequest, RPCWakeUpRequest)
+                                         RPCStartupResponse,
+                                         RPCUProfileRequest, RPCWakeUpRequest,
+                                         RPCUObjectGraphRequest)
 from vllm.engine.protocol import EngineClient
 # yapf: enable
 from vllm.envs import VLLM_RPC_TIMEOUT
@@ -620,13 +621,15 @@ async def start_object_graph(self) -> None:
         """Start object graph the engine"""
 
         await self._send_one_way_rpc_request(
-            request=RPCUObjectGraphRequest.START_OBJECT_GRAPH, socket=self.input_socket)
+            request=RPCUObjectGraphRequest.START_OBJECT_GRAPH, 
+            socket=self.input_socket)
         
     async def stop_object_graph(self) -> None:
         """Stop object graph the engine"""
 
         await self._send_one_way_rpc_request(
-            request=RPCUObjectGraphRequest.STOP_OBJECT_GRAPH, socket=self.input_socket)
+            request=RPCUObjectGraphRequest.STOP_OBJECT_GRAPH, 
+            socket=self.input_socket)
 
     async def reset_mm_cache(self) -> None:
         """Reset the multi-modal cache"""
diff --git a/vllm/engine/multiprocessing/engine.py b/vllm/engine/multiprocessing/engine.py
index ba97c03a364..e15b6b72a28 100644
--- a/vllm/engine/multiprocessing/engine.py
+++ b/vllm/engine/multiprocessing/engine.py
@@ -26,8 +26,9 @@
                                          RPCResetMultiModalCacheRequest,
                                          RPCResetPrefixCacheRequest,
                                          RPCSleepRequest, RPCStartupRequest,
-                                         RPCStartupResponse, RPCUObjectGraphRequest,
-                                         RPCUProfileRequest, RPCWakeUpRequest)
+                                         RPCStartupResponse,
+                                         RPCUProfileRequest, RPCWakeUpRequest,
+                                         RPCUObjectGraphRequest)
 # yapf: enable
 from vllm.logger import init_logger
 from vllm.outputs import RequestOutput
diff --git a/vllm/envs.py b/vllm/envs.py
index 30d0714e5e7..94740ba6eff 100644
--- a/vllm/envs.py
+++ b/vllm/envs.py
@@ -603,8 +603,8 @@ def get_vllm_port() -> Optional[int]:
     # Enables memory object graph tracking if set. Path to the directory where
     # object graph files are saved. Note that it must be an absolute path.
     "VLLM_OBJ_GRAPH_DIR":
-    lambda: (None if os.getenv("VLLM_OBJ_GRAPH_DIR", None) is None else os
-             .path.expanduser(os.getenv("VLLM_OBJ_GRAPH_DIR", "."))),
+    lambda: (None if os.getenv("VLLM_OBJ_GRAPH_DIR", None) is None else os.path
+             .expanduser(os.getenv("VLLM_OBJ_GRAPH_DIR", "."))),
 
     # If set, vLLM will use Triton implementations of AWQ.
     "VLLM_USE_TRITON_AWQ":
diff --git a/vllm/utils.py b/vllm/utils.py
index a7812b0d847..2797705ec86 100644
--- a/vllm/utils.py
+++ b/vllm/utils.py
@@ -2947,7 +2947,6 @@ def start(self) -> str:
 
     def stop(self) -> str:
         import objgraph
-        import gc
 
         if not self._start_state:
             msg = "obj graph statistics is not started"
@@ -2958,7 +2957,8 @@ def stop(self) -> str:
         current_date = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
         
         # Create subdirectory for this analysis
-        analysis_dir = os.path.join(self._obj_graph_dir, f"analysis_{current_date}")
+        analysis_dir = os.path.join(self._obj_graph_dir, 
+                                    f"analysis_{current_date}")
         try:
             os.makedirs(analysis_dir, exist_ok=True)
         except OSError as e:
@@ -2968,14 +2968,19 @@ def stop(self) -> str:
         output_lines = []
         current_time = time.time()
         statistics_time = current_time - self.start_time
-        start_time_formatted = datetime.datetime.fromtimestamp(self.start_time).strftime("%Y-%m-%d %H:%M:%S")
-        output_lines.append(f"{'='*50}\n start time {start_time_formatted}, Statistics time: {statistics_time} seconds\n{'='*50}\n")
+        start_time_formatted = datetime.datetime.fromtimestamp(
+            self.start_time).strftime("%Y-%m-%d %H:%M:%S")
+        output_lines.append(
+            f"{'='*50}\n start time {start_time_formatted}, Statistics time: {statistics_time} seconds\n{'='*50}\n"
+        )
 
         gc.collect()
         growth_info = objgraph.growth()
 
         for gt in growth_info:
-            output_lines.append(f"Growth type: {gt[0]}, Count: {gt[1]}, Growth amount: {gt[2]}")
+            output_lines.append(
+                f"Growth type: {gt[0]}, Count: {gt[1]}, Growth amount: {gt[2]}"
+            )
 
         for gt in growth_info:
             # Get the first object of this type
@@ -2990,7 +2995,8 @@ def stop(self) -> str:
                 obj, 
                 max_depth=10, 
                 too_many=5,
-                filename=os.path.join(analysis_dir, f"{gt[0]}_backrefs.dot")
+                filename=os.path.join(
+                    analysis_dir, f"{gt[0]}_backrefs.dot")
             )
             
             # Generate reference graph
@@ -2998,16 +3004,20 @@ def stop(self) -> str:
                 obj, 
                 max_depth=10, 
                 too_many=5,
-                filename=os.path.join(analysis_dir, f"{gt[0]}_refs.dot")
+                filename=os.path.join(
+                    analysis_dir, f"{gt[0]}_refs.dot")
             )
             
             # Generate reference chain to module
             objgraph.show_chain(
-                objgraph.find_backref_chain(obj, objgraph.is_proper_module),
-                filename=os.path.join(analysis_dir, f"{gt[0]}_chain.dot")
+                objgraph.find_backref_chain(
+                    obj, objgraph.is_proper_module),
+                filename=os.path.join(
+                    analysis_dir, f"{gt[0]}_chain.dot")
             )
 
-        output_file_path = os.path.join(analysis_dir, "growing_memory_stats.log")
+        output_file_path = os.path.join(
+            analysis_dir, "growing_memory_stats.log")
         try:
             with open(output_file_path, 'w', encoding='utf-8') as f:
                 for line in output_lines:
@@ -3016,6 +3026,7 @@ def stop(self) -> str:
             logger.error("Failed to write to file %s: %s", output_file_path, e)
             return f"Failed to write to file: {e}"
         
-        logger.info("obj graph statistics completed, output_lines: %s", output_lines)
+        logger.info("obj graph statistics completed, output_lines: %s", 
+                    output_lines)
         
         return "obj graph statistics completed"
\ No newline at end of file
diff --git a/vllm/worker/worker_base.py b/vllm/worker/worker_base.py
index 4002f54f789..6eb39a7b9c2 100644
--- a/vllm/worker/worker_base.py
+++ b/vllm/worker/worker_base.py
@@ -19,7 +19,8 @@
 from vllm.lora.request import LoRARequest
 from vllm.model_executor.layers.sampler import SamplerOutput
 from vllm.sequence import ExecuteModelRequest, IntermediateTensors
-from vllm.utils import (GrowingMemoryObjGraph, enable_trace_function_call_for_thread,
+from vllm.utils import (GrowingMemoryObjGraph, 
+                        enable_trace_function_call_for_thread,
                         resolve_obj_by_qualname, run_method,
                         update_environment_variables,
                         warn_for_unimplemented_methods)

From ff621b3ed06d1ff8902742a0c05a4f414e2155c3 Mon Sep 17 00:00:00 2001
From: miaochangyu <miaochangyu@xiaomi.com>
Date: Wed, 25 Jun 2025 11:08:24 +0800
Subject: [PATCH 5/9] fix pre-commit pipeline

Signed-off-by: miaochangyu <miaochangyu@xiaomi.com>
---
 vllm/utils.py | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/vllm/utils.py b/vllm/utils.py
index fcc55722be2..88039a12d7d 100644
--- a/vllm/utils.py
+++ b/vllm/utils.py
@@ -2971,12 +2971,18 @@ def stop(self) -> str:
             return f"Failed to create directory: {e}"
 
         output_lines = []
-        current_time = time.time()
-        statistics_time = current_time - self.start_time
         start_time_formatted = datetime.datetime.fromtimestamp(
             self.start_time).strftime("%Y-%m-%d %H:%M:%S")
+        current_time_formatted = datetime.datetime.now().strftime(
+            "%Y-%m-%d %H:%M:%S")
         output_lines.append(
-            f"{'='*50}\n start time {start_time_formatted}, Statistics time: {statistics_time} seconds\n{'='*50}\n"
+            f"{'='*50}"
+        )
+        output_lines.append(
+            f"start time {start_time_formatted}, current time: {current_time_formatted}"
+        )
+        output_lines.append(
+            f"{'='*50}"
         )
 
         gc.collect()

From dd497d8296fc83da1622960fa409e85b3f4f238b Mon Sep 17 00:00:00 2001
From: miaochangyu <miaochangyu@xiaomi.com>
Date: Wed, 25 Jun 2025 11:30:28 +0800
Subject: [PATCH 6/9] fix pre-commit pipeline

Signed-off-by: miaochangyu <miaochangyu@xiaomi.com>
---
 vllm/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vllm/utils.py b/vllm/utils.py
index 88039a12d7d..a547466e601 100644
--- a/vllm/utils.py
+++ b/vllm/utils.py
@@ -2979,7 +2979,7 @@ def stop(self) -> str:
             f"{'='*50}"
         )
         output_lines.append(
-            f"start time {start_time_formatted}, current time: {current_time_formatted}"
+            f"start {start_time_formatted}, current: {current_time_formatted}"
         )
         output_lines.append(
             f"{'='*50}"

From 3d747dc72b00d7e83368389e6c235c9732c1eec8 Mon Sep 17 00:00:00 2001
From: miaochangyu <miaochangyu@xiaomi.com>
Date: Wed, 25 Jun 2025 21:01:36 +0800
Subject: [PATCH 7/9] fix pre-commit pipeline

Signed-off-by: miaochangyu <miaochangyu@xiaomi.com>
---
 .../simple_growthing_obj_graph.py             |  2 -
 vllm/engine/multiprocessing/client.py         |  8 ++--
 vllm/engine/multiprocessing/engine.py         |  4 +-
 vllm/utils.py                                 | 48 +++++++------------
 vllm/worker/worker_base.py                    |  2 +-
 5 files changed, 25 insertions(+), 39 deletions(-)

diff --git a/examples/offline_inference/simple_growthing_obj_graph.py b/examples/offline_inference/simple_growthing_obj_graph.py
index e152e8941e6..d95159ef006 100644
--- a/examples/offline_inference/simple_growthing_obj_graph.py
+++ b/examples/offline_inference/simple_growthing_obj_graph.py
@@ -45,10 +45,8 @@ def main():
     # Add buffer time to wait for background processes (if multiprocessing is enabled)
     # to complete writing object graph analysis output.
     time.sleep(10)
-    
     print(f"Completed! Results saved to: {os.environ['VLLM_OBJ_GRAPH_DIR']}")
     print("You can check the generated files to analyze memory growth")
 
-
 if __name__ == "__main__":
     main()
\ No newline at end of file
diff --git a/vllm/engine/multiprocessing/client.py b/vllm/engine/multiprocessing/client.py
index 842cc2f6187..a276bc0d24f 100644
--- a/vllm/engine/multiprocessing/client.py
+++ b/vllm/engine/multiprocessing/client.py
@@ -35,8 +35,8 @@
                                          RPCResetPrefixCacheRequest,
                                          RPCSleepRequest, RPCStartupRequest,
                                          RPCStartupResponse,
-                                         RPCUProfileRequest, RPCWakeUpRequest,
-                                         RPCUObjectGraphRequest)
+                                         RPCUObjectGraphRequest,
+                                         RPCUProfileRequest, RPCWakeUpRequest)
 from vllm.engine.protocol import EngineClient
 # yapf: enable
 from vllm.envs import VLLM_RPC_TIMEOUT
@@ -621,14 +621,14 @@ async def start_object_graph(self) -> None:
         """Start object graph the engine"""
 
         await self._send_one_way_rpc_request(
-            request=RPCUObjectGraphRequest.START_OBJECT_GRAPH, 
+            request=RPCUObjectGraphRequest.START_OBJECT_GRAPH,
             socket=self.input_socket)
         
     async def stop_object_graph(self) -> None:
         """Stop object graph the engine"""
 
         await self._send_one_way_rpc_request(
-            request=RPCUObjectGraphRequest.STOP_OBJECT_GRAPH, 
+            request=RPCUObjectGraphRequest.STOP_OBJECT_GRAPH,
             socket=self.input_socket)
 
     async def reset_mm_cache(self) -> None:
diff --git a/vllm/engine/multiprocessing/engine.py b/vllm/engine/multiprocessing/engine.py
index e15b6b72a28..3e35a0d5ab2 100644
--- a/vllm/engine/multiprocessing/engine.py
+++ b/vllm/engine/multiprocessing/engine.py
@@ -27,8 +27,8 @@
                                          RPCResetPrefixCacheRequest,
                                          RPCSleepRequest, RPCStartupRequest,
                                          RPCStartupResponse,
-                                         RPCUProfileRequest, RPCWakeUpRequest,
-                                         RPCUObjectGraphRequest)
+                                         RPCUObjectGraphRequest,
+                                         RPCUProfileRequest, RPCWakeUpRequest)
 # yapf: enable
 from vllm.logger import init_logger
 from vllm.outputs import RequestOutput
diff --git a/vllm/utils.py b/vllm/utils.py
index a547466e601..4ab79a9093f 100644
--- a/vllm/utils.py
+++ b/vllm/utils.py
@@ -2975,15 +2975,10 @@ def stop(self) -> str:
             self.start_time).strftime("%Y-%m-%d %H:%M:%S")
         current_time_formatted = datetime.datetime.now().strftime(
             "%Y-%m-%d %H:%M:%S")
+        output_lines.append(f"{'='*50}")
         output_lines.append(
-            f"{'='*50}"
-        )
-        output_lines.append(
-            f"start {start_time_formatted}, current: {current_time_formatted}"
-        )
-        output_lines.append(
-            f"{'='*50}"
-        )
+            f"start {start_time_formatted}, current: {current_time_formatted}")
+        output_lines.append(f"{'='*50}")
 
         gc.collect()
         growth_info = objgraph.growth()
@@ -3002,33 +2997,27 @@ def stop(self) -> str:
                 continue
 
             # Generate back reference graph
-            objgraph.show_backrefs(
-                obj,
-                max_depth=10,
-                too_many=5,
-                filename=os.path.join(
-                    analysis_dir, f"{gt[0]}_backrefs.dot")
-            )
+            objgraph.show_backrefs(obj,
+                                   max_depth=10,
+                                   too_many=5,
+                                   filename=os.path.join(
+                                       analysis_dir, f"{gt[0]}_backrefs.dot"))
 
             # Generate reference graph
-            objgraph.show_refs(
-                obj,
-                max_depth=10,
-                too_many=5,
-                filename=os.path.join(
-                    analysis_dir, f"{gt[0]}_refs.dot")
-            )
+            objgraph.show_refs(obj,
+                               max_depth=10,
+                               too_many=5,
+                               filename=os.path.join(
+                                   analysis_dir, f"{gt[0]}_refs.dot"))
 
             # Generate reference chain to module
-            objgraph.show_chain(
-                objgraph.find_backref_chain(
-                    obj, objgraph.is_proper_module),
+            objgraph.show_chain(objgraph.find_backref_chain(
+                obj, objgraph.is_proper_module),
                 filename=os.path.join(
-                    analysis_dir, f"{gt[0]}_chain.dot")
-            )
+                    analysis_dir, f"{gt[0]}_chain.dot"))
 
-        output_file_path = os.path.join(
-            analysis_dir, "growing_memory_stats.log")
+        output_file_path = os.path.join(analysis_dir, 
+                                        "growing_memory_stats.log")
         try:
             with open(output_file_path, 'w', encoding='utf-8') as f:
                 for line in output_lines:
@@ -3039,5 +3028,4 @@ def stop(self) -> str:
 
         logger.info("obj graph statistics completed, output_lines: %s",
                     output_lines)
-
         return "obj graph statistics completed"
\ No newline at end of file
diff --git a/vllm/worker/worker_base.py b/vllm/worker/worker_base.py
index 6eb39a7b9c2..f074847bc52 100644
--- a/vllm/worker/worker_base.py
+++ b/vllm/worker/worker_base.py
@@ -19,7 +19,7 @@
 from vllm.lora.request import LoRARequest
 from vllm.model_executor.layers.sampler import SamplerOutput
 from vllm.sequence import ExecuteModelRequest, IntermediateTensors
-from vllm.utils import (GrowingMemoryObjGraph, 
+from vllm.utils import (GrowingMemoryObjGraph,
                         enable_trace_function_call_for_thread,
                         resolve_obj_by_qualname, run_method,
                         update_environment_variables,

From cd1acd2bf78dec1b0526d28d6ea2526cbe4ec7f7 Mon Sep 17 00:00:00 2001
From: miaochangyu <miaochangyu@xiaomi.com>
Date: Thu, 26 Jun 2025 00:13:03 +0800
Subject: [PATCH 8/9] fix pre-commit pipeline

Signed-off-by: miaochangyu <miaochangyu@xiaomi.com>
---
 .../offline_inference/simple_growthing_obj_graph.py  |  2 +-
 vllm/utils.py                                        | 12 ++++++------
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/examples/offline_inference/simple_growthing_obj_graph.py b/examples/offline_inference/simple_growthing_obj_graph.py
index d95159ef006..965cabe9481 100644
--- a/examples/offline_inference/simple_growthing_obj_graph.py
+++ b/examples/offline_inference/simple_growthing_obj_graph.py
@@ -49,4 +49,4 @@ def main():
     print("You can check the generated files to analyze memory growth")
 
 if __name__ == "__main__":
-    main()
\ No newline at end of file
+    main()
diff --git a/vllm/utils.py b/vllm/utils.py
index 4ab79a9093f..316c7bb8346 100644
--- a/vllm/utils.py
+++ b/vllm/utils.py
@@ -3000,8 +3000,8 @@ def stop(self) -> str:
             objgraph.show_backrefs(obj,
                                    max_depth=10,
                                    too_many=5,
-                                   filename=os.path.join(
-                                       analysis_dir, f"{gt[0]}_backrefs.dot"))
+                                   filename=os.path.join(analysis_dir,
+                                                         f"{gt[0]}_backrefs.dot"))
 
             # Generate reference graph
             objgraph.show_refs(obj,
@@ -3013,10 +3013,10 @@ def stop(self) -> str:
             # Generate reference chain to module
             objgraph.show_chain(objgraph.find_backref_chain(
                 obj, objgraph.is_proper_module),
-                filename=os.path.join(
-                    analysis_dir, f"{gt[0]}_chain.dot"))
+                filename=os.path.join(analysis_dir,
+                                      f"{gt[0]}_chain.dot"))
 
-        output_file_path = os.path.join(analysis_dir, 
+        output_file_path = os.path.join(analysis_dir,
                                         "growing_memory_stats.log")
         try:
             with open(output_file_path, 'w', encoding='utf-8') as f:
@@ -3028,4 +3028,4 @@ def stop(self) -> str:
 
         logger.info("obj graph statistics completed, output_lines: %s",
                     output_lines)
-        return "obj graph statistics completed"
\ No newline at end of file
+        return "obj graph statistics completed"

From 964081471a83d68511a1917ab32483775bf38312 Mon Sep 17 00:00:00 2001
From: miaochangyu <miaochangyu@xiaomi.com>
Date: Thu, 26 Jun 2025 00:31:33 +0800
Subject: [PATCH 9/9] fix pre-commit pipeline

Signed-off-by: miaochangyu <miaochangyu@xiaomi.com>
---
 vllm/utils.py | 40 +++++++++++++++++++++-------------------
 1 file changed, 21 insertions(+), 19 deletions(-)

diff --git a/vllm/utils.py b/vllm/utils.py
index 316c7bb8346..6940e381ad2 100644
--- a/vllm/utils.py
+++ b/vllm/utils.py
@@ -2995,26 +2995,28 @@ def stop(self) -> str:
             except IndexError:
                 logger.warning("Type %s has no available objects", gt[0])
                 continue
+        
+        # Generate back reference graph
+        objgraph.show_backrefs(
+            obj,
+            max_depth=10,
+            too_many=5,
+            filename=os.path.join(analysis_dir, f"{gt[0]}_backrefs.dot"),
+        )
+
+        # Generate reference graph
+        objgraph.show_refs(
+            obj,
+            max_depth=10,
+            too_many=5,
+            filename=os.path.join(analysis_dir, f"{gt[0]}_refs.dot"),
+        )
 
-            # Generate back reference graph
-            objgraph.show_backrefs(obj,
-                                   max_depth=10,
-                                   too_many=5,
-                                   filename=os.path.join(analysis_dir,
-                                                         f"{gt[0]}_backrefs.dot"))
-
-            # Generate reference graph
-            objgraph.show_refs(obj,
-                               max_depth=10,
-                               too_many=5,
-                               filename=os.path.join(
-                                   analysis_dir, f"{gt[0]}_refs.dot"))
-
-            # Generate reference chain to module
-            objgraph.show_chain(objgraph.find_backref_chain(
-                obj, objgraph.is_proper_module),
-                filename=os.path.join(analysis_dir,
-                                      f"{gt[0]}_chain.dot"))
+        # Generate reference chain to module
+        objgraph.show_chain(
+            objgraph.find_backref_chain(obj, objgraph.is_proper_module),
+            filename=os.path.join(analysis_dir, f"{gt[0]}_chain.dot"),
+        )
 
         output_file_path = os.path.join(analysis_dir,
                                         "growing_memory_stats.log")