@inproceedings{Phi-MoE,
author = {Xuan-Phi Nguyen and Shrey Pandit and Austin Xu and Caiming Xiong and Shafiq Joty},
booktitle = {International Conference on Machine Learning},
series = {ICML-26},
title = {Least-Loaded Expert Parallelism: Load Balancing An Imbalanced Mixture-of-Experts},
url = {https://arxiv.org/abs/2601.17111},
year = {2026}
}