@inproceedings{zhou2025evaluating,
author = {Zhou Yilun and Xu Austin and Wang Peifeng and Xiong Caiming and Joty Shafiq},
booktitle = {International Conference on Machine Learning},
series = {ICML-25},
title = {Evaluating Judges as Evaluators: The JETTS Benchmark of LLM-as-Judges as Test-Time Scaling Evaluators},
url = {https://arxiv.org/abs/2504.15253},
year = {2025}
}