@inproceedings{mascarell-etal-2023-entropy,
title = "Entropy-based Sampling for Abstractive Multi-document Summarization in Low-resource Settings",
author = "Mascarell, Laura and
Chalumattu, Ribin and
Heitmann, Julien",
editor = "Keet, C. Maria and
Lee, Hung-Yi and
Zarrie{\ss}, Sina",
booktitle = "Proceedings of the 16th International Natural Language Generation Conference",
month = sep,
year = "2023",
address = "Prague, Czechia",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.inlg-main.9",
doi = "10.18653/v1/2023.inlg-main.9",
pages = "123--133",
abstract = "Research in Multi-document Summarization (MDS) mostly focuses on the English language and depends on large MDS datasets that are not available for other languages. Some of these approaches concatenate the source documents, resulting in overlong model inputs. Existing transformer architectures are unable to process such long inputs entirely, omitting documents in the summarization process. Other solutions address this issue by implementing multi-stage approaches that also require changes in the model architecture. In this paper, we introduce various sampling approaches based on information entropy that allow us to perform MDS in a single stage. These approaches also consider all source documents without using MDS training data nor changing the model{'}s architecture. Besides, we build a MDS test set of German news articles to assess the performance of our methods on abstractive multi-document summaries. Experimental results show that our entropy-based approaches outperform previous state-of-the-art on German MDS, while still remaining primarily abstractive. We release our code and MDS test set to encourage further research in German abstractive MDS.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="mascarell-etal-2023-entropy">
<titleInfo>
<title>Entropy-based Sampling for Abstractive Multi-document Summarization in Low-resource Settings</title>
</titleInfo>
<name type="personal">
<namePart type="given">Laura</namePart>
<namePart type="family">Mascarell</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ribin</namePart>
<namePart type="family">Chalumattu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Julien</namePart>
<namePart type="family">Heitmann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 16th International Natural Language Generation Conference</title>
</titleInfo>
<name type="personal">
<namePart type="given">C</namePart>
<namePart type="given">Maria</namePart>
<namePart type="family">Keet</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hung-Yi</namePart>
<namePart type="family">Lee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sina</namePart>
<namePart type="family">Zarrieß</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Prague, Czechia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Research in Multi-document Summarization (MDS) mostly focuses on the English language and depends on large MDS datasets that are not available for other languages. Some of these approaches concatenate the source documents, resulting in overlong model inputs. Existing transformer architectures are unable to process such long inputs entirely, omitting documents in the summarization process. Other solutions address this issue by implementing multi-stage approaches that also require changes in the model architecture. In this paper, we introduce various sampling approaches based on information entropy that allow us to perform MDS in a single stage. These approaches also consider all source documents without using MDS training data nor changing the model’s architecture. Besides, we build a MDS test set of German news articles to assess the performance of our methods on abstractive multi-document summaries. Experimental results show that our entropy-based approaches outperform previous state-of-the-art on German MDS, while still remaining primarily abstractive. We release our code and MDS test set to encourage further research in German abstractive MDS.</abstract>
<identifier type="citekey">mascarell-etal-2023-entropy</identifier>
<identifier type="doi">10.18653/v1/2023.inlg-main.9</identifier>
<location>
<url>https://aclanthology.org/2023.inlg-main.9</url>
</location>
<part>
<date>2023-09</date>
<extent unit="page">
<start>123</start>
<end>133</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Entropy-based Sampling for Abstractive Multi-document Summarization in Low-resource Settings
%A Mascarell, Laura
%A Chalumattu, Ribin
%A Heitmann, Julien
%Y Keet, C. Maria
%Y Lee, Hung-Yi
%Y Zarrieß, Sina
%S Proceedings of the 16th International Natural Language Generation Conference
%D 2023
%8 September
%I Association for Computational Linguistics
%C Prague, Czechia
%F mascarell-etal-2023-entropy
%X Research in Multi-document Summarization (MDS) mostly focuses on the English language and depends on large MDS datasets that are not available for other languages. Some of these approaches concatenate the source documents, resulting in overlong model inputs. Existing transformer architectures are unable to process such long inputs entirely, omitting documents in the summarization process. Other solutions address this issue by implementing multi-stage approaches that also require changes in the model architecture. In this paper, we introduce various sampling approaches based on information entropy that allow us to perform MDS in a single stage. These approaches also consider all source documents without using MDS training data nor changing the model’s architecture. Besides, we build a MDS test set of German news articles to assess the performance of our methods on abstractive multi-document summaries. Experimental results show that our entropy-based approaches outperform previous state-of-the-art on German MDS, while still remaining primarily abstractive. We release our code and MDS test set to encourage further research in German abstractive MDS.
%R 10.18653/v1/2023.inlg-main.9
%U https://aclanthology.org/2023.inlg-main.9
%U https://doi.org/10.18653/v1/2023.inlg-main.9
%P 123-133
Markdown (Informal)
[Entropy-based Sampling for Abstractive Multi-document Summarization in Low-resource Settings](https://aclanthology.org/2023.inlg-main.9) (Mascarell et al., INLG-SIGDIAL 2023)
ACL