@misc{xu2025j4rlearningjudgeequivalent,
archiveprefix = {arXiv},
author = {Austin Xu and Yilun Zhou and Xuan-Phi Nguyen and Caiming Xiong and Shafiq Joty},
eprint = {2505.13346},
primaryclass = {cs.CL},
title = {J4R: Learning to Judge with Equivalent Initial State Group Relative Policy Optimization},
url = {https://arxiv.org/abs/2505.13346},
year = {2025}
}