Merge branch 'main' of https://github.com/allenai/olmocr

jakep-allenai · jakep-allenai · commit 4e990e258431 · 2025-04-08T22:31:01.000Z
diff --git a/olmocr/bench/prompts.py b/olmocr/bench/prompts.py
@@ -1,5 +1,5 @@
 def build_basic_prompt() -> str:
-    return "Please provide a natural, plain text representation of the document, formatted in Markdown. For mathematical expressions, use LaTeX notation with \( and \) for inline equations and \[ and \] for display equations. Convert any tables into Markdown format."
+    return "Please provide a natural, plain text representation of the document, formatted in Markdown. Skip any headers and footers. For ALL mathematical expressions, use LaTeX notation with \( and \) for inline equations and \[ and \] for display equations. Convert any tables into Markdown format."
 
 
 def claude_response_format_schema() -> dict:
diff --git a/olmocr/bench/scripts/convert_all.sh b/olmocr/bench/scripts/convert_all.sh
@@ -260,7 +260,7 @@ pip install --upgrade vllm==0.8.3
 
 
 start_server vllm "Qwen/Qwen2.5-VL-7B-Instruct" --max-model-len 8192
-python -m olmocr.bench.convert --dir "$BENCH_DIR" server:name=qwen25vl_prompt6:model=Qwen/Qwen2.5-VL-7B-Instruct:temperature=0.1:prompt_template=basic:response_template=plain --parallel 50
+python -m olmocr.bench.convert --dir "$BENCH_DIR" server:name=qwen25vl_prompt7:model=Qwen/Qwen2.5-VL-7B-Instruct:temperature=0.1:prompt_template=basic:response_template=plain --parallel 50
 stop_server
 
 start_server vllm "reducto/RolmOCR" --max-model-len 8192