@inproceedings{nguyen-naacl-21,
abstract = {We introduce a novel top-down end-to-end formulation of document level discourse parsing in the Rhetorical Structure Theory (RST) framework. In this formulation, we consider discourse parsing as a sequence of splitting decisions at token boundaries and use a seq2seq network to model the splitting decisions. Our framework facilitates discourse parsing from scratch without requiring discourse segmentation as a prerequisite; rather, it yields segmentation as part of the parsing process. Our unified parsing model adopts a beam search to decode the best tree structure by searching through a space of high scoring trees. With extensive experiments on the standard RST discourse treebank, we demonstrate that our parser outperforms existing methods by a good margin in both end-to-end parsing and parsing with gold segmentation. More importantly, it does so without using any handcrafted features, making it faster and easily adaptable to new languages and domains.},
address = {Mexico City, Mexico},
author = {Thanh-Tung Nguyen and Xuan-Phi Nguyen and Shafiq Joty and Xiaoli Li},
booktitle = {Proceedings of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies},
numpages = {9},
pages = {xx–-xx},
publisher = {ACL},
series = {NAACL'21},
title = {RST Parsing from Scratch},
url = {https://arxiv.org/abs/2105.10861},
year = {2021}
}