@inproceedings{Austin-pre,
author = {Shrey Pandit and Austin Xu and Xuan-Phi Nguyen and Yifei Ming and Caiming Xiong and Shafiq Joty},
booktitle = {Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics},
series = {ACL-26},
title = {Hard2Verify: A Step-Level Verification Benchmark for Open-Ended Frontier Math},
url = {https://arxiv.org/abs/2510.13744},
year = {2026}
}