@inproceedings{xu2025j4rlearningjudgeequivalent,
author = {Austin Xu and Yilun Zhou and Xuan-Phi Nguyen and Caiming Xiong and Shafiq Joty},
booktitle = {Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics},
series = {ACL-26},
title = {J4R: Learning to Judge with Equivalent Initial State Group Relative Policy Optimization},
url = {https://arxiv.org/abs/2505.13346},
year = {2026}
}