@misc{Austin-pre,
archiveprefix = {arXiv},
author = {Shrey Pandit and Austin Xu and Xuan-Phi Nguyen and Yifei Ming and Caiming Xiong and Shafiq Joty},
eprint = {2510.13744},
primaryclass = {cs.AI},
title = {Hard2Verify: A Step-Level Verification Benchmark for Open-Ended Frontier Math},
url = {https://arxiv.org/abs/2510.13744},
year = {2025}
}