From f7d402f3b537a66f37ed4ee569f73fb276b04681 Mon Sep 17 00:00:00 2001 From: miaochangyu Date: Tue, 24 Jun 2025 11:03:18 +0800 Subject: [PATCH 1/9] add growing object graph util to analyse memory leak and OOM Signed-off-by: miaochangyu --- requirements/common.txt | 1 + vllm/engine/async_llm_engine.py | 6 ++ vllm/engine/llm_engine.py | 6 ++ vllm/engine/multiprocessing/__init__.py | 3 + vllm/engine/multiprocessing/client.py | 14 +++- vllm/engine/multiprocessing/engine.py | 13 +++- vllm/engine/protocol.py | 10 +++ vllm/entrypoints/openai/api_server.py | 20 ++++++ vllm/envs.py | 7 ++ vllm/executor/executor_base.py | 6 ++ vllm/utils.py | 86 +++++++++++++++++++++++++ vllm/worker/worker_base.py | 21 +++++- 12 files changed, 190 insertions(+), 3 deletions(-) diff --git a/requirements/common.txt b/requirements/common.txt index 639abe51101..ceac64d8a48 100644 --- a/requirements/common.txt +++ b/requirements/common.txt @@ -44,3 +44,4 @@ watchfiles # required for http server to monitor the updates of TLS files python-json-logger # Used by logging as per examples/others/logging_configuration.md scipy # Required for phi-4-multimodal-instruct ninja # Required for xgrammar, rocm, tpu, xpu +objgraph # Required for memory debugging and object graph analysis diff --git a/vllm/engine/async_llm_engine.py b/vllm/engine/async_llm_engine.py index 3d7d28055dd..d9ed9a7b4b2 100644 --- a/vllm/engine/async_llm_engine.py +++ b/vllm/engine/async_llm_engine.py @@ -1162,6 +1162,12 @@ async def start_profile(self) -> None: async def stop_profile(self) -> None: self.engine.stop_profile() + async def start_object_graph(self) -> None: + self.engine.start_object_graph() + + async def stop_object_graph(self) -> None: + self.engine.stop_object_graph() + async def reset_mm_cache(self) -> None: self.engine.reset_mm_cache() diff --git a/vllm/engine/llm_engine.py b/vllm/engine/llm_engine.py index 8fccf9bd2aa..3738e830321 100644 --- a/vllm/engine/llm_engine.py +++ b/vllm/engine/llm_engine.py @@ -1854,6 +1854,12 @@ def start_profile(self) -> None: def stop_profile(self) -> None: self.model_executor.stop_profile() + def start_object_graph(self) -> None: + self.model_executor.start_object_graph() + + def stop_object_graph(self) -> None: + self.model_executor.stop_object_graph() + def sleep(self, level: int = 1) -> None: assert self.vllm_config.model_config.enable_sleep_mode, ( "Sleep mode is not enabled in the model config") diff --git a/vllm/engine/multiprocessing/__init__.py b/vllm/engine/multiprocessing/__init__.py index db968cd6b5d..5f2578e20f3 100644 --- a/vllm/engine/multiprocessing/__init__.py +++ b/vllm/engine/multiprocessing/__init__.py @@ -82,6 +82,9 @@ class RPCUProfileRequest(Enum): START_PROFILE = 1 STOP_PROFILE = 2 +class RPCUObjectGraphRequest(Enum): + START_OBJECT_GRAPH = 1 + STOP_OBJECT_GRAPH = 2 class RPCResetMultiModalCacheRequest(Enum): RESET = 1 diff --git a/vllm/engine/multiprocessing/client.py b/vllm/engine/multiprocessing/client.py index 9e018ec7f34..e6bdd87b5f1 100644 --- a/vllm/engine/multiprocessing/client.py +++ b/vllm/engine/multiprocessing/client.py @@ -34,7 +34,7 @@ RPCResetMultiModalCacheRequest, RPCResetPrefixCacheRequest, RPCSleepRequest, RPCStartupRequest, - RPCStartupResponse, + RPCStartupResponse, RPCUObjectGraphRequest, RPCUProfileRequest, RPCWakeUpRequest) from vllm.engine.protocol import EngineClient # yapf: enable @@ -615,6 +615,18 @@ async def stop_profile(self) -> None: await self._send_one_way_rpc_request( request=RPCUProfileRequest.STOP_PROFILE, socket=self.input_socket) + + async def start_object_graph(self) -> None: + """Start object graph the engine""" + + await self._send_one_way_rpc_request( + request=RPCUObjectGraphRequest.START_OBJECT_GRAPH, socket=self.input_socket) + + async def stop_object_graph(self) -> None: + """Stop object graph the engine""" + + await self._send_one_way_rpc_request( + request=RPCUObjectGraphRequest.STOP_OBJECT_GRAPH, socket=self.input_socket) async def reset_mm_cache(self) -> None: """Reset the multi-modal cache""" diff --git a/vllm/engine/multiprocessing/engine.py b/vllm/engine/multiprocessing/engine.py index ef088bd3933..ba97c03a364 100644 --- a/vllm/engine/multiprocessing/engine.py +++ b/vllm/engine/multiprocessing/engine.py @@ -26,7 +26,7 @@ RPCResetMultiModalCacheRequest, RPCResetPrefixCacheRequest, RPCSleepRequest, RPCStartupRequest, - RPCStartupResponse, + RPCStartupResponse, RPCUObjectGraphRequest, RPCUProfileRequest, RPCWakeUpRequest) # yapf: enable from vllm.logger import init_logger @@ -284,6 +284,11 @@ def handle_new_input(self): self.wake_up(request.tags) elif isinstance(request, RPCIsSleepingRequest): self._handle_is_sleeping_request(request) + elif isinstance(request, RPCUObjectGraphRequest): + if request == RPCUObjectGraphRequest.START_OBJECT_GRAPH: + self.start_object_graph() + else: + self.stop_object_graph() else: raise ValueError("Unknown RPCRequest Type: " f"{type(request)}") @@ -416,6 +421,12 @@ def start_profile(self) -> None: def stop_profile(self) -> None: self.engine.stop_profile() + def start_object_graph(self) -> None: + self.engine.start_object_graph() + + def stop_object_graph(self) -> None: + self.engine.stop_object_graph() + def reset_mm_cache(self) -> bool: return self.engine.reset_mm_cache() diff --git a/vllm/engine/protocol.py b/vllm/engine/protocol.py index 8688fcc82cd..96ef1816031 100644 --- a/vllm/engine/protocol.py +++ b/vllm/engine/protocol.py @@ -294,6 +294,16 @@ async def stop_profile(self) -> None: """Start profiling the engine""" ... + @abstractmethod + async def start_object_graph(self) -> None: + """Start object graph the engine""" + ... + + @abstractmethod + async def stop_object_graph(self) -> None: + """Stop object graph the engine""" + ... + @abstractmethod async def reset_mm_cache(self) -> None: """Reset the multi-modal cache""" diff --git a/vllm/entrypoints/openai/api_server.py b/vllm/entrypoints/openai/api_server.py index 62f1c6a7c12..e6c8e8fe6af 100644 --- a/vllm/entrypoints/openai/api_server.py +++ b/vllm/entrypoints/openai/api_server.py @@ -972,6 +972,26 @@ async def stop_profile(raw_request: Request): return Response(status_code=200) +if envs.VLLM_OBJ_GRAPH_DIR: + logger.warning( + "Object Graph is enabled in the API server. This should ONLY be " + "used for local development!") + + @router.post("/start_object_graph") + async def start_object_graph(raw_request: Request): + logger.info("Starting object graph...") + await engine_client(raw_request).start_object_graph() + logger.info("Object graph started.") + return Response(status_code=200) + + @router.post("/stop_object_graph") + async def stop_object_graph(raw_request: Request): + logger.info("Stopping object graph...") + await engine_client(raw_request).stop_object_graph() + logger.info("Object graph stopped.") + return Response(status_code=200) + + if envs.VLLM_ALLOW_RUNTIME_LORA_UPDATING: logger.warning( "LoRA dynamic loading & unloading is enabled in the API server. " diff --git a/vllm/envs.py b/vllm/envs.py index 01d8d8a2d2e..30d0714e5e7 100644 --- a/vllm/envs.py +++ b/vllm/envs.py @@ -77,6 +77,7 @@ VLLM_PLUGINS: Optional[list[str]] = None VLLM_LORA_RESOLVER_CACHE_DIR: Optional[str] = None VLLM_TORCH_PROFILER_DIR: Optional[str] = None + VLLM_OBJ_GRAPH_DIR: Optional[str] = None VLLM_USE_TRITON_AWQ: bool = False VLLM_ALLOW_RUNTIME_LORA_UPDATING: bool = False VLLM_SKIP_P2P_CHECK: bool = False @@ -599,6 +600,12 @@ def get_vllm_port() -> Optional[int]: lambda: (None if os.getenv("VLLM_TORCH_PROFILER_DIR", None) is None else os .path.expanduser(os.getenv("VLLM_TORCH_PROFILER_DIR", "."))), + # Enables memory object graph tracking if set. Path to the directory where + # object graph files are saved. Note that it must be an absolute path. + "VLLM_OBJ_GRAPH_DIR": + lambda: (None if os.getenv("VLLM_OBJ_GRAPH_DIR", None) is None else os + .path.expanduser(os.getenv("VLLM_OBJ_GRAPH_DIR", "."))), + # If set, vLLM will use Triton implementations of AWQ. "VLLM_USE_TRITON_AWQ": lambda: bool(int(os.getenv("VLLM_USE_TRITON_AWQ", "0"))), diff --git a/vllm/executor/executor_base.py b/vllm/executor/executor_base.py index 99e12201c96..1221ff115ff 100644 --- a/vllm/executor/executor_base.py +++ b/vllm/executor/executor_base.py @@ -199,6 +199,12 @@ def start_profile(self) -> None: def stop_profile(self) -> None: self.collective_rpc("stop_profile") + def start_object_graph(self) -> None: + self.collective_rpc("start_object_graph") + + def stop_object_graph(self) -> None: + self.collective_rpc("stop_object_graph") + def sleep(self, level: int = 1): if self.is_sleeping: logger.warning("Executor is already sleeping.") diff --git a/vllm/utils.py b/vllm/utils.py index 34be4d52c48..502c1f9a5ab 100644 --- a/vllm/utils.py +++ b/vllm/utils.py @@ -2924,3 +2924,89 @@ def is_torch_equal_or_newer(target: str) -> bool: except Exception: # Fallback to PKG-INFO to load the package info, needed by the doc gen. return Version(importlib.metadata.version('torch')) >= Version(target) + +class GrowingMemoryObjGraph: + def __init__(self): + from vllm import envs + if not envs.VLLM_OBJ_GRAPH_DIR: + raise RuntimeError("VLLM_OBJ_GRAPH_DIR is not set.") + self._obj_graph_dir = envs.VLLM_OBJ_GRAPH_DIR + os.makedirs(self._obj_graph_dir, exist_ok=True) + + self._start_state = False + + + def start(self) -> str: + import objgraph + + gc.collect() + objgraph.growth() + self._start_state = True + self.start_time = time.time() + return "start growing obj graph statistics" + + def stop(self) -> str: + import objgraph + import gc + + if not self._start_state: + msg = "obj graph statistics is not started" + logger.warning(msg) + return msg + + # Generate output filename with date + current_date = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") + + # Create subdirectory for this analysis + analysis_dir = os.path.join(self._obj_graph_dir, f"analysis_{current_date}") + os.makedirs(analysis_dir, exist_ok=True) + + output_lines = [] + current_time = time.time() + statistics_time = current_time - self.start_time + output_lines.append(f"{'='*50}\n start time {self.start_time}, Statistics time: {statistics_time} seconds\n{'='*50}\n") + + gc.collect() + growth_info = objgraph.growth() + + for gt in growth_info: + output_lines.append(f"Growth type: {gt[0]}, Count: {gt[1]}, Growth amount: {gt[2]}") + + for gt in growth_info: + # Get the first object of this type + try: + obj = objgraph.by_type(gt[0])[0] + except IndexError: + logger.warning(f"Type {gt[0]} has no available objects") + continue + + # Generate back reference graph + objgraph.show_backrefs( + obj, + max_depth=10, + too_many=5, + filename=os.path.join(analysis_dir, f"{gt[0]}_backrefs.dot") + ) + + # Generate reference graph + objgraph.show_refs( + obj, + max_depth=10, + too_many=5, + filename=os.path.join(analysis_dir, f"{gt[0]}_refs.dot") + ) + + # Generate reference chain to module + objgraph.show_chain( + objgraph.find_backref_chain(obj, objgraph.is_proper_module), + filename=os.path.join(analysis_dir, f"{gt[0]}_chain.dot") + ) + + output_file_path = os.path.join(analysis_dir, "growing_memory_stats.log") + with open(output_file_path, 'w', encoding='utf-8') as f: + for line in output_lines: + f.write(line + '\n') + + logger.info(f"obj graph statistics completed, output_lines: {output_lines}") + + return "obj graph statistics completed" \ No newline at end of file diff --git a/vllm/worker/worker_base.py b/vllm/worker/worker_base.py index c382b29ad19..4002f54f789 100644 --- a/vllm/worker/worker_base.py +++ b/vllm/worker/worker_base.py @@ -11,6 +11,7 @@ import torch import torch.nn as nn +from vllm import envs from vllm.config import (ObservabilityConfig, VllmConfig, set_current_vllm_config) from vllm.distributed import broadcast_tensor_dict, get_pp_group, get_tp_group @@ -18,7 +19,7 @@ from vllm.lora.request import LoRARequest from vllm.model_executor.layers.sampler import SamplerOutput from vllm.sequence import ExecuteModelRequest, IntermediateTensors -from vllm.utils import (enable_trace_function_call_for_thread, +from vllm.utils import (GrowingMemoryObjGraph, enable_trace_function_call_for_thread, resolve_obj_by_qualname, run_method, update_environment_variables, warn_for_unimplemented_methods) @@ -56,6 +57,14 @@ def __init__( from vllm.platforms import current_platform self.current_platform = current_platform + if envs.VLLM_OBJ_GRAPH_DIR: + object_graph_dir = envs.VLLM_OBJ_GRAPH_DIR + logger.info("Object graph enabled. Traces will be saved to: %s", + object_graph_dir) + self.obj_graph = GrowingMemoryObjGraph() + else: + self.obj_graph = None + def init_device(self) -> None: """Initialize device state, such as loading the model or other on-device memory allocations. @@ -129,6 +138,16 @@ def list_loras(self) -> Set[int]: def vocab_size(self) -> int: """Get vocabulary size from model configuration.""" return self.model_config.get_vocab_size() + + def start_object_graph(self): + if self.obj_graph is None: + raise RuntimeError("Object graph is not enabled.") + return self.obj_graph.start() + + def stop_object_graph(self): + if self.obj_graph is None: + raise RuntimeError("Object graph is not enabled.") + return self.obj_graph.stop() class DelegateWorkerBase(WorkerBase): From 4c47615a395007cece72d7069b37f3705512fc06 Mon Sep 17 00:00:00 2001 From: miaochangyu Date: Tue, 24 Jun 2025 11:54:33 +0800 Subject: [PATCH 2/9] add offline example Signed-off-by: miaochangyu --- .../simple_growthing_obj_graph.py | 54 +++++++++++++++++++ vllm/entrypoints/llm.py | 6 +++ 2 files changed, 60 insertions(+) create mode 100644 examples/offline_inference/simple_growthing_obj_graph.py diff --git a/examples/offline_inference/simple_growthing_obj_graph.py b/examples/offline_inference/simple_growthing_obj_graph.py new file mode 100644 index 00000000000..2c82a546491 --- /dev/null +++ b/examples/offline_inference/simple_growthing_obj_graph.py @@ -0,0 +1,54 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project + +import os +import time + +from vllm import LLM, SamplingParams + +# Enable object graph analysis by setting environment variable +os.environ["VLLM_OBJ_GRAPH_DIR"] = "./vllm_obj_graph" + +# Sample prompts +prompts = [ + "Hello, my name is", + "The president of the United States is", + "The capital of France is", + "The future of artificial intelligence is", +] +# Create sampling parameters object +sampling_params = SamplingParams(temperature=0.8, top_p=0.95) + + +def main(): + # Create LLM instance + llm = LLM(model="facebook/opt-125m", tensor_parallel_size=1) + + # Start object graph analysis + llm.start_object_graph() + + # Generate text from prompts. The output is a list of RequestOutput objects + # containing the prompt, generated text, and other information. + outputs = llm.generate(prompts, sampling_params) + + # Stop object graph analysis + llm.stop_object_graph() + + # Print output results + print("-" * 50) + for output in outputs: + prompt = output.prompt + generated_text = output.outputs[0].text + print(f"Prompt: {prompt!r}\nGenerated text: {generated_text!r}") + print("-" * 50) + + # Add buffer time to wait for background processes (if multiprocessing is enabled) + # to complete writing object graph analysis output. + time.sleep(10) + + print(f"Object graph analysis completed! Results saved to: {os.environ['VLLM_OBJ_GRAPH_DIR']}") + print("You can check the generated .dot files and .log files to analyze memory object growth") + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/vllm/entrypoints/llm.py b/vllm/entrypoints/llm.py index 05e0be61ada..5b5bb599485 100644 --- a/vllm/entrypoints/llm.py +++ b/vllm/entrypoints/llm.py @@ -1329,6 +1329,12 @@ def start_profile(self) -> None: def stop_profile(self) -> None: self.llm_engine.stop_profile() + def start_object_graph(self) -> None: + self.llm_engine.start_object_graph() + + def stop_object_graph(self) -> None: + self.llm_engine.stop_object_graph() + def reset_prefix_cache(self, device: Optional[Device] = None) -> bool: return self.llm_engine.reset_prefix_cache(device) From ff0be92b55fce300537a1480937fc5cab64328b5 Mon Sep 17 00:00:00 2001 From: miaochangyu Date: Tue, 24 Jun 2025 23:13:14 +0800 Subject: [PATCH 3/9] fix Code Review comment Signed-off-by: miaochangyu --- vllm/utils.py | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/vllm/utils.py b/vllm/utils.py index 502c1f9a5ab..a7812b0d847 100644 --- a/vllm/utils.py +++ b/vllm/utils.py @@ -2959,12 +2959,17 @@ def stop(self) -> str: # Create subdirectory for this analysis analysis_dir = os.path.join(self._obj_graph_dir, f"analysis_{current_date}") - os.makedirs(analysis_dir, exist_ok=True) - + try: + os.makedirs(analysis_dir, exist_ok=True) + except OSError as e: + logger.error("Failed to create directory %s: %s", analysis_dir, e) + return f"Failed to create directory: {e}" + output_lines = [] current_time = time.time() statistics_time = current_time - self.start_time - output_lines.append(f"{'='*50}\n start time {self.start_time}, Statistics time: {statistics_time} seconds\n{'='*50}\n") + start_time_formatted = datetime.datetime.fromtimestamp(self.start_time).strftime("%Y-%m-%d %H:%M:%S") + output_lines.append(f"{'='*50}\n start time {start_time_formatted}, Statistics time: {statistics_time} seconds\n{'='*50}\n") gc.collect() growth_info = objgraph.growth() @@ -2977,7 +2982,7 @@ def stop(self) -> str: try: obj = objgraph.by_type(gt[0])[0] except IndexError: - logger.warning(f"Type {gt[0]} has no available objects") + logger.warning("Type %s has no available objects", gt[0]) continue # Generate back reference graph @@ -3003,10 +3008,14 @@ def stop(self) -> str: ) output_file_path = os.path.join(analysis_dir, "growing_memory_stats.log") - with open(output_file_path, 'w', encoding='utf-8') as f: - for line in output_lines: - f.write(line + '\n') + try: + with open(output_file_path, 'w', encoding='utf-8') as f: + for line in output_lines: + f.write(line + '\n') + except OSError as e: + logger.error("Failed to write to file %s: %s", output_file_path, e) + return f"Failed to write to file: {e}" - logger.info(f"obj graph statistics completed, output_lines: {output_lines}") + logger.info("obj graph statistics completed, output_lines: %s", output_lines) return "obj graph statistics completed" \ No newline at end of file From d0b65b9a3f7b03d7bb8a94b0b4e9f559b13c5811 Mon Sep 17 00:00:00 2001 From: miaochangyu Date: Wed, 25 Jun 2025 10:41:08 +0800 Subject: [PATCH 4/9] fix pre-commit pipeline Signed-off-by: miaochangyu --- .../simple_growthing_obj_graph.py | 4 +-- vllm/engine/multiprocessing/__init__.py | 3 +- vllm/engine/multiprocessing/client.py | 11 ++++--- vllm/engine/multiprocessing/engine.py | 5 +-- vllm/envs.py | 4 +-- vllm/utils.py | 33 ++++++++++++------- vllm/worker/worker_base.py | 3 +- 7 files changed, 40 insertions(+), 23 deletions(-) diff --git a/examples/offline_inference/simple_growthing_obj_graph.py b/examples/offline_inference/simple_growthing_obj_graph.py index 2c82a546491..e152e8941e6 100644 --- a/examples/offline_inference/simple_growthing_obj_graph.py +++ b/examples/offline_inference/simple_growthing_obj_graph.py @@ -46,8 +46,8 @@ def main(): # to complete writing object graph analysis output. time.sleep(10) - print(f"Object graph analysis completed! Results saved to: {os.environ['VLLM_OBJ_GRAPH_DIR']}") - print("You can check the generated .dot files and .log files to analyze memory object growth") + print(f"Completed! Results saved to: {os.environ['VLLM_OBJ_GRAPH_DIR']}") + print("You can check the generated files to analyze memory growth") if __name__ == "__main__": diff --git a/vllm/engine/multiprocessing/__init__.py b/vllm/engine/multiprocessing/__init__.py index 5f2578e20f3..0c288376bc4 100644 --- a/vllm/engine/multiprocessing/__init__.py +++ b/vllm/engine/multiprocessing/__init__.py @@ -133,7 +133,8 @@ class RPCAdapterLoadedResponse: RPCUProfileRequest, RPCLoadAdapterRequest, RPCResetMultiModalCacheRequest, RPCResetPrefixCacheRequest, RPCSleepRequest, - RPCWakeUpRequest, RPCIsSleepingRequest] + RPCWakeUpRequest, RPCIsSleepingRequest, + RPCUObjectGraphRequest] REQUEST_OUTPUTS_T = Union[List[RequestOutput], RPCAdapterLoadedResponse, RPCIsSleepingResponse, RPCError] diff --git a/vllm/engine/multiprocessing/client.py b/vllm/engine/multiprocessing/client.py index e6bdd87b5f1..842cc2f6187 100644 --- a/vllm/engine/multiprocessing/client.py +++ b/vllm/engine/multiprocessing/client.py @@ -34,8 +34,9 @@ RPCResetMultiModalCacheRequest, RPCResetPrefixCacheRequest, RPCSleepRequest, RPCStartupRequest, - RPCStartupResponse, RPCUObjectGraphRequest, - RPCUProfileRequest, RPCWakeUpRequest) + RPCStartupResponse, + RPCUProfileRequest, RPCWakeUpRequest, + RPCUObjectGraphRequest) from vllm.engine.protocol import EngineClient # yapf: enable from vllm.envs import VLLM_RPC_TIMEOUT @@ -620,13 +621,15 @@ async def start_object_graph(self) -> None: """Start object graph the engine""" await self._send_one_way_rpc_request( - request=RPCUObjectGraphRequest.START_OBJECT_GRAPH, socket=self.input_socket) + request=RPCUObjectGraphRequest.START_OBJECT_GRAPH, + socket=self.input_socket) async def stop_object_graph(self) -> None: """Stop object graph the engine""" await self._send_one_way_rpc_request( - request=RPCUObjectGraphRequest.STOP_OBJECT_GRAPH, socket=self.input_socket) + request=RPCUObjectGraphRequest.STOP_OBJECT_GRAPH, + socket=self.input_socket) async def reset_mm_cache(self) -> None: """Reset the multi-modal cache""" diff --git a/vllm/engine/multiprocessing/engine.py b/vllm/engine/multiprocessing/engine.py index ba97c03a364..e15b6b72a28 100644 --- a/vllm/engine/multiprocessing/engine.py +++ b/vllm/engine/multiprocessing/engine.py @@ -26,8 +26,9 @@ RPCResetMultiModalCacheRequest, RPCResetPrefixCacheRequest, RPCSleepRequest, RPCStartupRequest, - RPCStartupResponse, RPCUObjectGraphRequest, - RPCUProfileRequest, RPCWakeUpRequest) + RPCStartupResponse, + RPCUProfileRequest, RPCWakeUpRequest, + RPCUObjectGraphRequest) # yapf: enable from vllm.logger import init_logger from vllm.outputs import RequestOutput diff --git a/vllm/envs.py b/vllm/envs.py index 30d0714e5e7..94740ba6eff 100644 --- a/vllm/envs.py +++ b/vllm/envs.py @@ -603,8 +603,8 @@ def get_vllm_port() -> Optional[int]: # Enables memory object graph tracking if set. Path to the directory where # object graph files are saved. Note that it must be an absolute path. "VLLM_OBJ_GRAPH_DIR": - lambda: (None if os.getenv("VLLM_OBJ_GRAPH_DIR", None) is None else os - .path.expanduser(os.getenv("VLLM_OBJ_GRAPH_DIR", "."))), + lambda: (None if os.getenv("VLLM_OBJ_GRAPH_DIR", None) is None else os.path + .expanduser(os.getenv("VLLM_OBJ_GRAPH_DIR", "."))), # If set, vLLM will use Triton implementations of AWQ. "VLLM_USE_TRITON_AWQ": diff --git a/vllm/utils.py b/vllm/utils.py index a7812b0d847..2797705ec86 100644 --- a/vllm/utils.py +++ b/vllm/utils.py @@ -2947,7 +2947,6 @@ def start(self) -> str: def stop(self) -> str: import objgraph - import gc if not self._start_state: msg = "obj graph statistics is not started" @@ -2958,7 +2957,8 @@ def stop(self) -> str: current_date = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") # Create subdirectory for this analysis - analysis_dir = os.path.join(self._obj_graph_dir, f"analysis_{current_date}") + analysis_dir = os.path.join(self._obj_graph_dir, + f"analysis_{current_date}") try: os.makedirs(analysis_dir, exist_ok=True) except OSError as e: @@ -2968,14 +2968,19 @@ def stop(self) -> str: output_lines = [] current_time = time.time() statistics_time = current_time - self.start_time - start_time_formatted = datetime.datetime.fromtimestamp(self.start_time).strftime("%Y-%m-%d %H:%M:%S") - output_lines.append(f"{'='*50}\n start time {start_time_formatted}, Statistics time: {statistics_time} seconds\n{'='*50}\n") + start_time_formatted = datetime.datetime.fromtimestamp( + self.start_time).strftime("%Y-%m-%d %H:%M:%S") + output_lines.append( + f"{'='*50}\n start time {start_time_formatted}, Statistics time: {statistics_time} seconds\n{'='*50}\n" + ) gc.collect() growth_info = objgraph.growth() for gt in growth_info: - output_lines.append(f"Growth type: {gt[0]}, Count: {gt[1]}, Growth amount: {gt[2]}") + output_lines.append( + f"Growth type: {gt[0]}, Count: {gt[1]}, Growth amount: {gt[2]}" + ) for gt in growth_info: # Get the first object of this type @@ -2990,7 +2995,8 @@ def stop(self) -> str: obj, max_depth=10, too_many=5, - filename=os.path.join(analysis_dir, f"{gt[0]}_backrefs.dot") + filename=os.path.join( + analysis_dir, f"{gt[0]}_backrefs.dot") ) # Generate reference graph @@ -2998,16 +3004,20 @@ def stop(self) -> str: obj, max_depth=10, too_many=5, - filename=os.path.join(analysis_dir, f"{gt[0]}_refs.dot") + filename=os.path.join( + analysis_dir, f"{gt[0]}_refs.dot") ) # Generate reference chain to module objgraph.show_chain( - objgraph.find_backref_chain(obj, objgraph.is_proper_module), - filename=os.path.join(analysis_dir, f"{gt[0]}_chain.dot") + objgraph.find_backref_chain( + obj, objgraph.is_proper_module), + filename=os.path.join( + analysis_dir, f"{gt[0]}_chain.dot") ) - output_file_path = os.path.join(analysis_dir, "growing_memory_stats.log") + output_file_path = os.path.join( + analysis_dir, "growing_memory_stats.log") try: with open(output_file_path, 'w', encoding='utf-8') as f: for line in output_lines: @@ -3016,6 +3026,7 @@ def stop(self) -> str: logger.error("Failed to write to file %s: %s", output_file_path, e) return f"Failed to write to file: {e}" - logger.info("obj graph statistics completed, output_lines: %s", output_lines) + logger.info("obj graph statistics completed, output_lines: %s", + output_lines) return "obj graph statistics completed" \ No newline at end of file diff --git a/vllm/worker/worker_base.py b/vllm/worker/worker_base.py index 4002f54f789..6eb39a7b9c2 100644 --- a/vllm/worker/worker_base.py +++ b/vllm/worker/worker_base.py @@ -19,7 +19,8 @@ from vllm.lora.request import LoRARequest from vllm.model_executor.layers.sampler import SamplerOutput from vllm.sequence import ExecuteModelRequest, IntermediateTensors -from vllm.utils import (GrowingMemoryObjGraph, enable_trace_function_call_for_thread, +from vllm.utils import (GrowingMemoryObjGraph, + enable_trace_function_call_for_thread, resolve_obj_by_qualname, run_method, update_environment_variables, warn_for_unimplemented_methods) From ff621b3ed06d1ff8902742a0c05a4f414e2155c3 Mon Sep 17 00:00:00 2001 From: miaochangyu Date: Wed, 25 Jun 2025 11:08:24 +0800 Subject: [PATCH 5/9] fix pre-commit pipeline Signed-off-by: miaochangyu --- vllm/utils.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/vllm/utils.py b/vllm/utils.py index fcc55722be2..88039a12d7d 100644 --- a/vllm/utils.py +++ b/vllm/utils.py @@ -2971,12 +2971,18 @@ def stop(self) -> str: return f"Failed to create directory: {e}" output_lines = [] - current_time = time.time() - statistics_time = current_time - self.start_time start_time_formatted = datetime.datetime.fromtimestamp( self.start_time).strftime("%Y-%m-%d %H:%M:%S") + current_time_formatted = datetime.datetime.now().strftime( + "%Y-%m-%d %H:%M:%S") output_lines.append( - f"{'='*50}\n start time {start_time_formatted}, Statistics time: {statistics_time} seconds\n{'='*50}\n" + f"{'='*50}" + ) + output_lines.append( + f"start time {start_time_formatted}, current time: {current_time_formatted}" + ) + output_lines.append( + f"{'='*50}" ) gc.collect() From dd497d8296fc83da1622960fa409e85b3f4f238b Mon Sep 17 00:00:00 2001 From: miaochangyu Date: Wed, 25 Jun 2025 11:30:28 +0800 Subject: [PATCH 6/9] fix pre-commit pipeline Signed-off-by: miaochangyu --- vllm/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/utils.py b/vllm/utils.py index 88039a12d7d..a547466e601 100644 --- a/vllm/utils.py +++ b/vllm/utils.py @@ -2979,7 +2979,7 @@ def stop(self) -> str: f"{'='*50}" ) output_lines.append( - f"start time {start_time_formatted}, current time: {current_time_formatted}" + f"start {start_time_formatted}, current: {current_time_formatted}" ) output_lines.append( f"{'='*50}" From 3d747dc72b00d7e83368389e6c235c9732c1eec8 Mon Sep 17 00:00:00 2001 From: miaochangyu Date: Wed, 25 Jun 2025 21:01:36 +0800 Subject: [PATCH 7/9] fix pre-commit pipeline Signed-off-by: miaochangyu --- .../simple_growthing_obj_graph.py | 2 - vllm/engine/multiprocessing/client.py | 8 ++-- vllm/engine/multiprocessing/engine.py | 4 +- vllm/utils.py | 48 +++++++------------ vllm/worker/worker_base.py | 2 +- 5 files changed, 25 insertions(+), 39 deletions(-) diff --git a/examples/offline_inference/simple_growthing_obj_graph.py b/examples/offline_inference/simple_growthing_obj_graph.py index e152e8941e6..d95159ef006 100644 --- a/examples/offline_inference/simple_growthing_obj_graph.py +++ b/examples/offline_inference/simple_growthing_obj_graph.py @@ -45,10 +45,8 @@ def main(): # Add buffer time to wait for background processes (if multiprocessing is enabled) # to complete writing object graph analysis output. time.sleep(10) - print(f"Completed! Results saved to: {os.environ['VLLM_OBJ_GRAPH_DIR']}") print("You can check the generated files to analyze memory growth") - if __name__ == "__main__": main() \ No newline at end of file diff --git a/vllm/engine/multiprocessing/client.py b/vllm/engine/multiprocessing/client.py index 842cc2f6187..a276bc0d24f 100644 --- a/vllm/engine/multiprocessing/client.py +++ b/vllm/engine/multiprocessing/client.py @@ -35,8 +35,8 @@ RPCResetPrefixCacheRequest, RPCSleepRequest, RPCStartupRequest, RPCStartupResponse, - RPCUProfileRequest, RPCWakeUpRequest, - RPCUObjectGraphRequest) + RPCUObjectGraphRequest, + RPCUProfileRequest, RPCWakeUpRequest) from vllm.engine.protocol import EngineClient # yapf: enable from vllm.envs import VLLM_RPC_TIMEOUT @@ -621,14 +621,14 @@ async def start_object_graph(self) -> None: """Start object graph the engine""" await self._send_one_way_rpc_request( - request=RPCUObjectGraphRequest.START_OBJECT_GRAPH, + request=RPCUObjectGraphRequest.START_OBJECT_GRAPH, socket=self.input_socket) async def stop_object_graph(self) -> None: """Stop object graph the engine""" await self._send_one_way_rpc_request( - request=RPCUObjectGraphRequest.STOP_OBJECT_GRAPH, + request=RPCUObjectGraphRequest.STOP_OBJECT_GRAPH, socket=self.input_socket) async def reset_mm_cache(self) -> None: diff --git a/vllm/engine/multiprocessing/engine.py b/vllm/engine/multiprocessing/engine.py index e15b6b72a28..3e35a0d5ab2 100644 --- a/vllm/engine/multiprocessing/engine.py +++ b/vllm/engine/multiprocessing/engine.py @@ -27,8 +27,8 @@ RPCResetPrefixCacheRequest, RPCSleepRequest, RPCStartupRequest, RPCStartupResponse, - RPCUProfileRequest, RPCWakeUpRequest, - RPCUObjectGraphRequest) + RPCUObjectGraphRequest, + RPCUProfileRequest, RPCWakeUpRequest) # yapf: enable from vllm.logger import init_logger from vllm.outputs import RequestOutput diff --git a/vllm/utils.py b/vllm/utils.py index a547466e601..4ab79a9093f 100644 --- a/vllm/utils.py +++ b/vllm/utils.py @@ -2975,15 +2975,10 @@ def stop(self) -> str: self.start_time).strftime("%Y-%m-%d %H:%M:%S") current_time_formatted = datetime.datetime.now().strftime( "%Y-%m-%d %H:%M:%S") + output_lines.append(f"{'='*50}") output_lines.append( - f"{'='*50}" - ) - output_lines.append( - f"start {start_time_formatted}, current: {current_time_formatted}" - ) - output_lines.append( - f"{'='*50}" - ) + f"start {start_time_formatted}, current: {current_time_formatted}") + output_lines.append(f"{'='*50}") gc.collect() growth_info = objgraph.growth() @@ -3002,33 +2997,27 @@ def stop(self) -> str: continue # Generate back reference graph - objgraph.show_backrefs( - obj, - max_depth=10, - too_many=5, - filename=os.path.join( - analysis_dir, f"{gt[0]}_backrefs.dot") - ) + objgraph.show_backrefs(obj, + max_depth=10, + too_many=5, + filename=os.path.join( + analysis_dir, f"{gt[0]}_backrefs.dot")) # Generate reference graph - objgraph.show_refs( - obj, - max_depth=10, - too_many=5, - filename=os.path.join( - analysis_dir, f"{gt[0]}_refs.dot") - ) + objgraph.show_refs(obj, + max_depth=10, + too_many=5, + filename=os.path.join( + analysis_dir, f"{gt[0]}_refs.dot")) # Generate reference chain to module - objgraph.show_chain( - objgraph.find_backref_chain( - obj, objgraph.is_proper_module), + objgraph.show_chain(objgraph.find_backref_chain( + obj, objgraph.is_proper_module), filename=os.path.join( - analysis_dir, f"{gt[0]}_chain.dot") - ) + analysis_dir, f"{gt[0]}_chain.dot")) - output_file_path = os.path.join( - analysis_dir, "growing_memory_stats.log") + output_file_path = os.path.join(analysis_dir, + "growing_memory_stats.log") try: with open(output_file_path, 'w', encoding='utf-8') as f: for line in output_lines: @@ -3039,5 +3028,4 @@ def stop(self) -> str: logger.info("obj graph statistics completed, output_lines: %s", output_lines) - return "obj graph statistics completed" \ No newline at end of file diff --git a/vllm/worker/worker_base.py b/vllm/worker/worker_base.py index 6eb39a7b9c2..f074847bc52 100644 --- a/vllm/worker/worker_base.py +++ b/vllm/worker/worker_base.py @@ -19,7 +19,7 @@ from vllm.lora.request import LoRARequest from vllm.model_executor.layers.sampler import SamplerOutput from vllm.sequence import ExecuteModelRequest, IntermediateTensors -from vllm.utils import (GrowingMemoryObjGraph, +from vllm.utils import (GrowingMemoryObjGraph, enable_trace_function_call_for_thread, resolve_obj_by_qualname, run_method, update_environment_variables, From cd1acd2bf78dec1b0526d28d6ea2526cbe4ec7f7 Mon Sep 17 00:00:00 2001 From: miaochangyu Date: Thu, 26 Jun 2025 00:13:03 +0800 Subject: [PATCH 8/9] fix pre-commit pipeline Signed-off-by: miaochangyu --- .../offline_inference/simple_growthing_obj_graph.py | 2 +- vllm/utils.py | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/examples/offline_inference/simple_growthing_obj_graph.py b/examples/offline_inference/simple_growthing_obj_graph.py index d95159ef006..965cabe9481 100644 --- a/examples/offline_inference/simple_growthing_obj_graph.py +++ b/examples/offline_inference/simple_growthing_obj_graph.py @@ -49,4 +49,4 @@ def main(): print("You can check the generated files to analyze memory growth") if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/vllm/utils.py b/vllm/utils.py index 4ab79a9093f..316c7bb8346 100644 --- a/vllm/utils.py +++ b/vllm/utils.py @@ -3000,8 +3000,8 @@ def stop(self) -> str: objgraph.show_backrefs(obj, max_depth=10, too_many=5, - filename=os.path.join( - analysis_dir, f"{gt[0]}_backrefs.dot")) + filename=os.path.join(analysis_dir, + f"{gt[0]}_backrefs.dot")) # Generate reference graph objgraph.show_refs(obj, @@ -3013,10 +3013,10 @@ def stop(self) -> str: # Generate reference chain to module objgraph.show_chain(objgraph.find_backref_chain( obj, objgraph.is_proper_module), - filename=os.path.join( - analysis_dir, f"{gt[0]}_chain.dot")) + filename=os.path.join(analysis_dir, + f"{gt[0]}_chain.dot")) - output_file_path = os.path.join(analysis_dir, + output_file_path = os.path.join(analysis_dir, "growing_memory_stats.log") try: with open(output_file_path, 'w', encoding='utf-8') as f: @@ -3028,4 +3028,4 @@ def stop(self) -> str: logger.info("obj graph statistics completed, output_lines: %s", output_lines) - return "obj graph statistics completed" \ No newline at end of file + return "obj graph statistics completed" From 964081471a83d68511a1917ab32483775bf38312 Mon Sep 17 00:00:00 2001 From: miaochangyu Date: Thu, 26 Jun 2025 00:31:33 +0800 Subject: [PATCH 9/9] fix pre-commit pipeline Signed-off-by: miaochangyu --- vllm/utils.py | 40 +++++++++++++++++++++------------------- 1 file changed, 21 insertions(+), 19 deletions(-) diff --git a/vllm/utils.py b/vllm/utils.py index 316c7bb8346..6940e381ad2 100644 --- a/vllm/utils.py +++ b/vllm/utils.py @@ -2995,26 +2995,28 @@ def stop(self) -> str: except IndexError: logger.warning("Type %s has no available objects", gt[0]) continue + + # Generate back reference graph + objgraph.show_backrefs( + obj, + max_depth=10, + too_many=5, + filename=os.path.join(analysis_dir, f"{gt[0]}_backrefs.dot"), + ) + + # Generate reference graph + objgraph.show_refs( + obj, + max_depth=10, + too_many=5, + filename=os.path.join(analysis_dir, f"{gt[0]}_refs.dot"), + ) - # Generate back reference graph - objgraph.show_backrefs(obj, - max_depth=10, - too_many=5, - filename=os.path.join(analysis_dir, - f"{gt[0]}_backrefs.dot")) - - # Generate reference graph - objgraph.show_refs(obj, - max_depth=10, - too_many=5, - filename=os.path.join( - analysis_dir, f"{gt[0]}_refs.dot")) - - # Generate reference chain to module - objgraph.show_chain(objgraph.find_backref_chain( - obj, objgraph.is_proper_module), - filename=os.path.join(analysis_dir, - f"{gt[0]}_chain.dot")) + # Generate reference chain to module + objgraph.show_chain( + objgraph.find_backref_chain(obj, objgraph.is_proper_module), + filename=os.path.join(analysis_dir, f"{gt[0]}_chain.dot"), + ) output_file_path = os.path.join(analysis_dir, "growing_memory_stats.log")