Skip to content

Commit ca6e142

Browse files
committed
Adding some extra unit tests on some math cases I wasn't sure of
1 parent 7a638c7 commit ca6e142

File tree

3 files changed

+61
-4
lines changed

3 files changed

+61
-4
lines changed

Diff for: olmocr/bench/benchmark.py

+42-3
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,7 @@ def main():
172172
help="Run benchmark even if some files are missing",
173173
)
174174
parser.add_argument("--candidate", type=str, default=None, help="Run test only for a single candidate")
175+
parser.add_argument("--skip_baseline", action="store_true", help="Skip running baseline tests (ex. that check that basic content is present on each page)")
175176
parser.add_argument(
176177
"--bootstrap_samples",
177178
type=int,
@@ -226,8 +227,12 @@ def main():
226227
sys.exit(1)
227228

228229
all_tests = []
230+
test_to_jsonl = {} # Map test IDs to their source jsonl files
229231
for jsonl_path in jsonl_files:
232+
jsonl_basename = os.path.basename(jsonl_path)
230233
tests = load_tests(jsonl_path)
234+
for test in tests:
235+
test_to_jsonl[test.id] = jsonl_basename
231236
all_tests.extend(tests)
232237

233238
if not all_tests:
@@ -237,6 +242,7 @@ def main():
237242
for pdf in pdf_basenames:
238243
if not any(t.type == "baseline" for t in all_tests if t.pdf == pdf):
239244
all_tests.append(BaselineTest(id=f"{pdf}_baseline", pdf=pdf, page=1, type="baseline"))
245+
test_to_jsonl[all_tests[-1].id] = "baseline"
240246

241247
for pdf in pdf_basenames:
242248
pdf_doc = PdfReader(os.path.join(pdf_folder, pdf))
@@ -245,6 +251,9 @@ def main():
245251
print(f"No dataset entry found for pdf {pdf} page {page}")
246252
sys.exit(1)
247253

254+
if args.skip_baseline:
255+
all_tests = [test for test in all_tests if test.type != "baseline"]
256+
248257
# Sample tests if requested
249258
if args.sample is not None and args.sample > 0:
250259
if args.sample >= len(all_tests):
@@ -280,9 +289,8 @@ def main():
280289
candidate, all_tests, pdf_basenames, args.force
281290
)
282291

283-
# Store test results for the report if needed
284-
if args.test_report:
285-
test_results_by_candidate[candidate_name] = test_results
292+
# Always store test results for displaying jsonl file groupings
293+
test_results_by_candidate[candidate_name] = test_results
286294

287295
if all_test_scores:
288296
ci = calculate_bootstrap_ci(all_test_scores, n_bootstrap=n_bootstrap, ci_level=ci_level)
@@ -316,6 +324,37 @@ def main():
316324
scores = test_type_breakdown[ttype]
317325
avg = sum(scores) / len(scores) * 100 if scores else 0.0
318326
print(f" {ttype:8s}: {avg:0.1f}% average pass rate over {len(scores)} tests")
327+
328+
# Group results by jsonl file
329+
jsonl_results = {}
330+
for test in all_tests:
331+
# Get the jsonl file this test came from
332+
jsonl_file = test_to_jsonl.get(test.id, "unknown")
333+
334+
if jsonl_file not in jsonl_results:
335+
jsonl_results[jsonl_file] = {"total": 0, "passed": 0}
336+
337+
jsonl_results[jsonl_file]["total"] += 1
338+
339+
# Get the test result for this candidate if it exists
340+
test_result = None
341+
if not candidate_errors and hasattr(test, "pdf") and hasattr(test, "page"):
342+
pdf_name = test.pdf
343+
page = test.page
344+
if pdf_name in test_results_by_candidate.get(candidate_name, {}) and page in test_results_by_candidate[candidate_name].get(pdf_name, {}):
345+
for t, passed, _ in test_results_by_candidate[candidate_name][pdf_name][page]:
346+
if t.id == test.id:
347+
test_result = passed
348+
break
349+
350+
if test_result:
351+
jsonl_results[jsonl_file]["passed"] += 1
352+
353+
print("\n Results by JSONL file:")
354+
for jsonl_file, results in sorted(jsonl_results.items()):
355+
if results["total"] > 0:
356+
pass_rate = (results["passed"] / results["total"]) * 100
357+
print(f" {jsonl_file:30s}: {pass_rate:0.1f}% ({results['passed']}/{results['total']} tests)")
319358
print("")
320359

321360
if args.permutation_tests is not None:

Diff for: olmocr/bench/katex/render.py

+14
Original file line numberDiff line numberDiff line change
@@ -606,6 +606,20 @@ def test_mathbf_vs_boldsymbol(self):
606606
eq2 = render_equation("\\boldsymbol{x}", use_cache=False)
607607
self.assertFalse(compare_rendered_equations(eq1, eq2))
608608

609+
def test_assert_subtle_square_root(self):
610+
eq1 = render_equation("A N'P' = \\int \\beta d\\alpha = \\frac{2}{3\\sqrt{3} a}\\int (\\alpha - 2a)^{\\frac{3}{2}} d\\alpha", use_cache=False)
611+
eq2 = render_equation("AN'P' = \\int \\beta \\, d\\alpha = \\frac{2}{3 \\sqrt{3a}} \\int (a - 2a)^{\\frac{3}{2}} d\\alpha")
612+
self.assertFalse(compare_rendered_equations(eq1, eq2))
613+
614+
def test_text_added(self):
615+
eq1 = render_equation("A N'P' = \\int \\beta d\\alpha = \\frac{2}{3\\sqrt{3} a}\\int (\\alpha - 2a)^{\\frac{3}{2}} d\\alpha", use_cache=False)
616+
eq2 = render_equation("AN'P' = \\int \\beta d\\alpha = \\frac{2}{3 \\sqrt{3} a} \\int (\\alpha - 2a)^{\\frac{3}{2}} d\\alpha")
617+
self.assertTrue(compare_rendered_equations(eq1, eq2))
618+
619+
eq1 = render_equation("A N'P' = \\int \\beta d\\alpha = \\frac{2}{3\\sqrt{3} a}\\int (\\alpha - 2a)^{\\frac{3}{2}} d\\alpha", use_cache=False)
620+
eq2 = render_equation("\\text{area evolute } AN'P' = \\int \\beta d\\alpha = \\frac{2}{3 \\sqrt{3} a} \\int (\\alpha - 2a)^{\\frac{3}{2}} d\\alpha")
621+
self.assertTrue(compare_rendered_equations(eq1, eq2))
622+
609623
def test_tensor_notation_equivalent(self):
610624
eq1 = render_equation("T_{ij}^{kl}", use_cache=False)
611625
eq2 = render_equation("T^{kl}_{ij}", use_cache=False)

Diff for: olmocr/bench/report.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
import os
33
from typing import Dict, List, Tuple
44

5+
from tqdm import tqdm
6+
57
from olmocr.data.renderpdf import render_pdf_to_base64webp
68

79
from .tests import BasePDFTest
@@ -357,6 +359,7 @@ def generate_html_report(
357359
"""
358360

359361
# Calculate summary statistics for each candidate
362+
print("Calculating summary statistics...")
360363
for candidate in candidates:
361364
total_tests = 0
362365
passed_tests = 0
@@ -396,6 +399,7 @@ def generate_html_report(
396399
"""
397400

398401
# Create content for each candidate
402+
print("Generating candidate content...")
399403
for i, candidate in enumerate(candidates):
400404
html += f""" <div id="{candidate}" class="candidate-content{' active' if i == 0 else ''}">
401405
"""
@@ -404,7 +408,7 @@ def generate_html_report(
404408
for c in test_results_by_candidate.values():
405409
all_pdfs.update(c.keys())
406410

407-
for pdf_name in sorted(all_pdfs):
411+
for pdf_name in tqdm(sorted(all_pdfs), desc="Rendering report pages"):
408412
if pdf_name not in test_results_by_candidate[candidate]:
409413
continue
410414

0 commit comments

Comments
 (0)