@inproceedings{bosheng-et-al-emnlp-20,
abstract = {Data augmentation techniques have been widely used to improve machine learning performance. In this work, we propose a novel method to generate high quality synthetic data for low-resource tagging tasks with language models, where the language model is trained with the linearized labeled sentences. Our method is applicable to both supervised and semi-supervised settings. For the supervised setting, we conduct extensive experiments on named entity recognition (NER), part of speech (POS) and end-to-end target based sentiment analysis (E2E-TBSA) tasks. While for the semi-supervised setting, we evaluate our method on the NER task under the conditions of given unlabeled data only and unlabeled data plus a knowledge base. The results show that our method can consistently outperform the baselines, particularly when the given gold training data are less.},
address = {Punta Cana, Dominican Republic},
author = {Bosheng Ding and Linlin Liu and Lidong Bing and Canasai Kruengkrai and Thien Hai Nguyen and Shafiq Joty and Luo Si and Chunyan Miao},
booktitle = {Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing},
numpages = {9},
pages = {6045–-6057},
publisher = {ACL},
series = {EMNLP'20},
title = {DAGA: Data Augmentation with a Generation Approach for Low-resource Tagging Tasks},
url = {https://www.aclweb.org/anthology/2020.emnlp-main.488/},
year = {2020}
}