@inproceedings{Ding-et-al-acl-24,
abstract = {In the rapidly evolving field of machine learning (ML), data augmentation (DA) has emerged as a pivotal technique for enhancing model performance by diversifying training examples without the need for additional data collection. This survey explores the transformative impact of Large Language Models (LLMs) on DA, particularly addressing the unique challenges and opportunities they present in the context of natural language processing (NLP) and beyond. We provide a comprehensive overview of methods leveraging LLMs for DA, including a novel exploration of learning paradigms where LLM-generated data is used for further training, thus enhancing model robustness and performance. Additionally, this paper delineates the primary challenges faced in this domain, ranging from controllable data augmentation to multi modal data augmentation. This survey highlights the paradigm shift introduced by LLMs in DA, aims to serve as a foundational guide for researchers and practitioners in this field.},
address = {Bangkok, Thailand},
author = {Bosheng Ding and Chengwei Qin and Ruochen Zhao and Tianze Luo and Xinze Li and Guizhen Chen and Wenhan Xia and Junjie Hu and Anh-Tuan Luu and Shafiq Joty},
booktitle = {Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics},
publisher = {ACL},
series = {ACL'24 Findings},
title = {Data Augmentation using LLMs: Methods, Learning Paradigms and Challenges},
url = {https://arxiv.org/abs/2403.02990},
year = {2024}
}