@inproceedings{alex-naacl-21,
abstract = {Models pretrained with self-supervised objectives on large text corpora achieve state-of-the-art performance on text summarization tasks. However, these models are typically fine-tuned on hundreds of thousands of data points, an infeasible requirement when applying summarization to new, niche domains. In this work, we introduce a general method, called WikiTransfer, for fine-tuning pretrained models for summarization in an unsupervised, dataset-specific manner which makes use of characteristics of the target dataset such as the length and abstractiveness of the desired summaries. We achieve state-of-the-art, zero-shot abstractive summarization performance on the CNN-DailyMail dataset and demonstrate the effectiveness of our approach on three additional, diverse datasets. The models fine-tuned in this unsupervised manner are more robust to noisy data and also achieve better few-shot performance using 10 and 100 training examples. We perform ablation studies on the effect of the components of our unsupervised fine-tuning data and analyze the performance of these models in few-shot scenarios along with data augmentation techniques using both automatic and human evaluation.},
address = {Mexico City, Mexico},
author = {Alexander Fabbri and Simeng Han and Haoyuan Li and Haoran Li and Marjan Ghazvininejad and Shafiq Joty and Dragomir Radev and Yashar Mehdad},
booktitle = {Proceedings of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies},
numpages = {9},
pages = {xx–-xx},
publisher = {ACL},
series = {NAACL'21},
title = {Improving Zero and Few-Shot Abstractive Summarization with Intermediate Fine-tuning and Data Augmentation},
url = {https://arxiv.org/abs/2010.12836},
year = {2021}
}