@inproceedings{bari-et-al-arxiv-20,
abstract = {Transfer learning has yielded state-of-the-art results in many supervised natural language processing tasks. However, annotated data for every target task in every target language is rare, especially for low-resource languages. In this work, we propose MultiMix, a novel data augmentation method for semi-supervised learning in zero-shot transfer learning scenarios. In particular, MultiMix targets to solve cross-lingual adaptation problems from a source (language) distribution to an unknown target (language) distribution assuming it has no training labels in the target language task. In its heart, MultiMix performs simultaneous self-training with data augmentation and unsupervised sample selection. To show its effectiveness, we have performed extensive experiments on zero-shot transfers for cross-lingual named entity recognition (XNER) and natural language inference (XNLI). Our experiments show sizeable improvements in both tasks outperforming the baselines by a good margin.},
address = {Bangkok, Thailand},
author = {M Saiful Bari and Tasnim Mohiuddin and Shafiq Joty},
booktitle = {Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics},
numpages = {9},
pages = {1978–-1992},
publisher = {ACL},
series = {ACL'21},
title = {{UXLA: A Robust Unsupervised Data Augmentation Framework for Cross-Lingual NLP}},
url = {},
year = {2021}
}