vllm-project · arunmadhusud · Jun 20, 2025 · Jun 20, 2025 · Jun 20, 2025 · gemini-code-assist
diff --git a/benchmarks/README.md b/benchmarks/README.md
@@ -58,6 +58,12 @@ become available.
       <td style="text-align: center;">✅</td>
       <td><code>AI-MO/aimo-validation-aime</code> , <code>AI-MO/NuminaMath-1.5</code>, <code>AI-MO/NuminaMath-CoT</code></td>
     </tr>
+    <tr>
+      <td><strong>HuggingFace-Unsloth</strong></td>
+      <td style="text-align: center;">✅</td>
+      <td style="text-align: center;">✅</td>
+      <td><code>unsloth/LaTeX_OCR</code>, <code>unsloth/Radiology_mini</code></td>
+    </tr>
     <tr>
       <td><strong>HuggingFace-Other</strong></td>
       <td style="text-align: center;">✅</td>
@@ -251,6 +257,49 @@ python3 vllm/benchmarks/benchmark_serving.py \
     --num-prompts 80
 ```
 
+**`unsloth/LaTeX_OCR`**
+
+``` bash
+# Serve the model
+vllm serve unsloth/Qwen2-VL-2B-Instruct \
+    --dtype bfloat16 \
+    --max-model-len 4096 \
+    --max-num-seqs 5 \
+    --limit-mm-per-prompt "image=1,video=0" \
+    --max-seq-len-to-capture 4096 \
+    --mm-processor-kwargs '{"min_pixels": 784, "max_pixels": 1003520}'
+```
+
+``` bash
+python3 vllm/benchmarks/benchmark_serving.py \
+  --backend openai-chat \
+  --request-rate 5 \
+  --max-concurrency 5 \
+  --model unsloth/Qwen2-VL-2B-Instruct \
+  --endpoint /v1/chat/completions \
+  --dataset-name hf \
+  --dataset-path unsloth/LaTeX_OCR \
+  --hf-split train \
+  --hf-output-len 256 \
+  --num-prompts 1000 
+```
+
+**`unsloth/Radiology_mini`**
+
+``` bash
+python3 vllm/benchmarks/benchmark_serving.py \
+  --backend openai-chat \
+  --request-rate 5 \
+  --max-concurrency 5 \
+  --model unsloth/Qwen2-VL-2B-Instruct \
+  --endpoint /v1/chat/completions \
+  --dataset-name hf \
+  --dataset-path unsloth/Radiology_mini \
+  --hf-split train \
+  --hf-output-len 256 \
+  --num-prompts 1000
+```
+
 ### Running With Sampling Parameters
 
 When using OpenAI-compatible backends such as `vllm`, optional sampling
@@ -371,6 +420,30 @@ python3 benchmarks/benchmark_throughput.py \
   --num-prompts 10
 ```
 
+**`unsloth/LaTeX_OCR`**
+
+```bash
+python3 vllm/benchmarks/benchmark_throughput.py \
+  --model unsloth/Qwen2-VL-2B-Instruct \
+  --backend vllm-chat \
+  --dataset-name hf \
+  --dataset-path unsloth/LaTeX_OCR \
+  --hf-split train \
+  --num-prompts 1000 
+```
+
+**`unsloth/Radiology_mini`**
+
+```bash
+python3 vllm/benchmarks/benchmark_throughput.py \
+  --model unsloth/Qwen2-VL-2B-Instruct \
+  --backend vllm-chat \
+  --dataset-name hf \
+  --dataset-path unsloth/Radiology_mini \
+  --hf-split train \
+  --num-prompts 1000
+```
+
 ### Benchmark with LoRA Adapters
 
 ``` bash

diff --git a/benchmarks/benchmark_dataset.py b/benchmarks/benchmark_dataset.py
@@ -832,6 +832,66 @@
         return sampled_requests
 
 
+# -----------------------------------------------------------------------------
+# Unsloth Vision Dataset Implementation
+# -----------------------------------------------------------------------------
+
+class UnslothVisionDataset(HuggingFaceDataset):
+    """
+    Unsloth Vision Dataset.
+    """
+
+    DEFAULT_OUTPUT_LEN = 256
+    SUPPORTED_DATASET_PATHS = {
+        "unsloth/LaTeX_OCR": lambda x: (
+            "Write the LaTeX representation for this image.",
+            x["image"]
+        ),
+        "unsloth/Radiology_mini": lambda x: (
+            "You are an expert radiographer. Describe accurately what you see in this image.",
+            x["image"]
+        ),
+    }
+    IS_MULTIMODAL = True
+
+    def sample(
+        self,
+        tokenizer: PreTrainedTokenizerBase,
+        num_requests: int,
+        output_len: Optional[int] = None,
+        enable_multimodal_chat: bool = False,
+        **kwargs,
+    ) -> list:
+        output_len = output_len if output_len is not None else self.DEFAULT_OUTPUT_LEN
+        sampled_requests = []
+
+        parser_fn = self.SUPPORTED_DATASET_PATHS.get(self.dataset_path)
+        if parser_fn is None:
+            raise ValueError(f"Unsupported dataset path: {self.dataset_path}")
+
+        for item in self.data:
+            if len(sampled_requests) >= num_requests:
+                break            
-            if len(sampled_requests) >= num_requests:
-                break            
+        for item in self.data:
+            if not self.data:
+                break
+            if len(sampled_requests) >= num_requests:
-                break            
+        if len(sampled_requests) >= num_requests:
+            break
+        for item in self.data:
-            if len(sampled_requests) >= num_requests:
-                break            
+        for item in self.data:
+            if not self.data:
+                break
+            if len(sampled_requests) >= num_requests:
-                break            
+        if len(sampled_requests) >= num_requests:
+            break
+        for item in self.data:
+            prompt, mm_content = parser_fn(item)
+            mm_content = process_image(mm_content)
+            prompt_len = len(tokenizer(prompt).input_ids)
+            if enable_multimodal_chat:
+                # Note: when chat is enabled the request prompt_len is no longer
+                # accurate and we will be using request output to count the
+                # actual prompt len
+                prompt = self.apply_multimodal_chat_transformation(
+                    prompt, mm_content)
+            sampled_requests.append(
+                SampleRequest(
+                    prompt=prompt,
+                    prompt_len=prompt_len,
+                    expected_output_len=output_len,
+                    multi_modal_data=mm_content,
+                ))
+        self.maybe_oversample_requests(sampled_requests, num_requests)
+        return sampled_requests
+
+
 # -----------------------------------------------------------------------------
 # Instruct Coder Dataset Implementation
 # -----------------------------------------------------------------------------

diff --git a/benchmarks/benchmark_serving.py b/benchmarks/benchmark_serving.py
@@ -70,6 +70,7 @@
     SampleRequest,
     ShareGPTDataset,
     SonnetDataset,
+    UnslothVisionDataset,
     VisionArenaDataset,
 )
 from benchmark_utils import convert_to_pytorch_benchmark_format, write_to_json
@@ -687,6 +688,10 @@ def main(args: argparse.Namespace):
         elif args.dataset_path in ASRDataset.SUPPORTED_DATASET_PATHS:
             dataset_class = ASRDataset
             args.hf_split = "train"
+        elif args.dataset_path in UnslothVisionDataset.SUPPORTED_DATASET_PATHS:
+            dataset_class = UnslothVisionDataset
+            args.hf_split = "train"
+            args.hf_subset = None
         else:
             supported_datasets = set(
                 [

diff --git a/benchmarks/benchmark_throughput.py b/benchmarks/benchmark_throughput.py
@@ -25,6 +25,7 @@
     SampleRequest,
     ShareGPTDataset,
     SonnetDataset,
+    UnslothVisionDataset,
     VisionArenaDataset,
 )
 from benchmark_utils import convert_to_pytorch_benchmark_format, write_to_json
@@ -373,6 +374,12 @@ def get_requests(args, tokenizer):
             dataset_cls = AIMODataset
             common_kwargs["dataset_subset"] = None
             common_kwargs["dataset_split"] = "train"
+        elif args.dataset_path in UnslothVisionDataset.SUPPORTED_DATASET_PATHS:
+            dataset_cls = UnslothVisionDataset
+            common_kwargs['dataset_subset'] = None
+            common_kwargs['dataset_split'] = args.hf_split
+            sample_kwargs["enable_multimodal_chat"] = True
-        elif args.dataset_path in UnslothVisionDataset.SUPPORTED_DATASET_PATHS:
-            dataset_cls = UnslothVisionDataset
-            common_kwargs['dataset_subset'] = None
-            common_kwargs['dataset_split'] = args.hf_split
-            sample_kwargs["enable_multimodal_chat"] = True
+        elif args.dataset_path and args.dataset_path in UnslothVisionDataset.SUPPORTED_DATASET_PATHS:
+            dataset_cls = UnslothVisionDataset
+            common_kwargs['dataset_subset'] = None
+            common_kwargs['dataset_split'] = args.hf_split
+            sample_kwargs["enable_multimodal_chat"] = True
-        elif args.dataset_path in UnslothVisionDataset.SUPPORTED_DATASET_PATHS:
-            dataset_cls = UnslothVisionDataset
-            common_kwargs['dataset_subset'] = None
-            common_kwargs['dataset_split'] = args.hf_split
-            sample_kwargs["enable_multimodal_chat"] = True
+        elif args.dataset_path and args.dataset_path in UnslothVisionDataset.SUPPORTED_DATASET_PATHS:
+            dataset_cls = UnslothVisionDataset
+            common_kwargs['dataset_subset'] = None
+            common_kwargs['dataset_split'] = args.hf_split
+            sample_kwargs["enable_multimodal_chat"] = True
+
     else:
         raise ValueError(f"Unknown dataset name: {args.dataset_name}")
     # Remove None values
@@ -527,6 +534,7 @@ def validate_args(args):
         if args.dataset_path in (
             VisionArenaDataset.SUPPORTED_DATASET_PATHS.keys()
             | ConversationDataset.SUPPORTED_DATASET_PATHS
+            | UnslothVisionDataset.SUPPORTED_DATASET_PATHS.keys()
         ):
             assert args.backend == "vllm-chat", (
                 f"{args.dataset_path} needs to use vllm-chat as the backend."