vllm-project · alexm-redhat · Jun 17, 2025 · Jun 17, 2025 · Jun 18, 2025 · Jun 18, 2025
diff --git a/examples/offline_inference/basic/basic.py b/examples/offline_inference/basic/basic.py
@@ -16,7 +16,7 @@
 
 def main():
     # Create an LLM.
-    llm = LLM(model="facebook/opt-125m")
+    llm = LLM(model="deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct", trust_remote_code=True)
     # Generate texts from the prompts.
     # The output is a list of RequestOutput objects
     # that contain the prompt, generated text, and other information.

@@ -564,6 +564,8 @@ def forward(
         """
         assert output is not None, "Output tensor must be provided."
 
+        print("kv_cache.shape = {}".format(kv_cache.shape))
+
         if output_scale is not None:
             raise NotImplementedError(
                 "fused output quantization is not yet supported"