@inproceedings{saxena-el-haj-2023-exploring,
title = "Exploring Abstractive Text Summarisation for Podcasts: A Comparative Study of {BART} and T5 Models",
author = "Saxena, Parth and
El-Haj, Mo",
editor = "Mitkov, Ruslan and
Angelova, Galia",
booktitle = "Proceedings of the 14th International Conference on Recent Advances in Natural Language Processing",
month = sep,
year = "2023",
address = "Varna, Bulgaria",
publisher = "INCOMA Ltd., Shoumen, Bulgaria",
url = "https://aclanthology.org/2023.ranlp-1.110",
pages = "1023--1033",
abstract = "Podcasts have become increasingly popular in recent years, resulting in a massive amount of audio content being produced every day. Efficient summarisation of podcast episodes can enable better content management and discovery for users. In this paper, we explore the use of abstractive text summarisation methods to generate high-quality summaries of podcast episodes. We use pre-trained models, BART and T5, to fine-tune on a dataset of Spotify{'}s 100K podcast. We evaluate our models using automated metrics and human evaluation, and find that the BART model fine-tuned on the podcast dataset achieved a higher ROUGE-1 and ROUGE-L score compared to other models, while the T5 model performed better in terms of semantic meaning. The human evaluation indicates that both models produced high-quality summaries that were well received by participants. Our study demonstrates the effectiveness of abstractive summarisation methods for podcast episodes and offers insights for improving the summarisation of audio content.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="saxena-el-haj-2023-exploring">
<titleInfo>
<title>Exploring Abstractive Text Summarisation for Podcasts: A Comparative Study of BART and T5 Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Parth</namePart>
<namePart type="family">Saxena</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mo</namePart>
<namePart type="family">El-Haj</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 14th International Conference on Recent Advances in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ruslan</namePart>
<namePart type="family">Mitkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Galia</namePart>
<namePart type="family">Angelova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>INCOMA Ltd., Shoumen, Bulgaria</publisher>
<place>
<placeTerm type="text">Varna, Bulgaria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Podcasts have become increasingly popular in recent years, resulting in a massive amount of audio content being produced every day. Efficient summarisation of podcast episodes can enable better content management and discovery for users. In this paper, we explore the use of abstractive text summarisation methods to generate high-quality summaries of podcast episodes. We use pre-trained models, BART and T5, to fine-tune on a dataset of Spotify’s 100K podcast. We evaluate our models using automated metrics and human evaluation, and find that the BART model fine-tuned on the podcast dataset achieved a higher ROUGE-1 and ROUGE-L score compared to other models, while the T5 model performed better in terms of semantic meaning. The human evaluation indicates that both models produced high-quality summaries that were well received by participants. Our study demonstrates the effectiveness of abstractive summarisation methods for podcast episodes and offers insights for improving the summarisation of audio content.</abstract>
<identifier type="citekey">saxena-el-haj-2023-exploring</identifier>
<location>
<url>https://aclanthology.org/2023.ranlp-1.110</url>
</location>
<part>
<date>2023-09</date>
<extent unit="page">
<start>1023</start>
<end>1033</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Exploring Abstractive Text Summarisation for Podcasts: A Comparative Study of BART and T5 Models
%A Saxena, Parth
%A El-Haj, Mo
%Y Mitkov, Ruslan
%Y Angelova, Galia
%S Proceedings of the 14th International Conference on Recent Advances in Natural Language Processing
%D 2023
%8 September
%I INCOMA Ltd., Shoumen, Bulgaria
%C Varna, Bulgaria
%F saxena-el-haj-2023-exploring
%X Podcasts have become increasingly popular in recent years, resulting in a massive amount of audio content being produced every day. Efficient summarisation of podcast episodes can enable better content management and discovery for users. In this paper, we explore the use of abstractive text summarisation methods to generate high-quality summaries of podcast episodes. We use pre-trained models, BART and T5, to fine-tune on a dataset of Spotify’s 100K podcast. We evaluate our models using automated metrics and human evaluation, and find that the BART model fine-tuned on the podcast dataset achieved a higher ROUGE-1 and ROUGE-L score compared to other models, while the T5 model performed better in terms of semantic meaning. The human evaluation indicates that both models produced high-quality summaries that were well received by participants. Our study demonstrates the effectiveness of abstractive summarisation methods for podcast episodes and offers insights for improving the summarisation of audio content.
%U https://aclanthology.org/2023.ranlp-1.110
%P 1023-1033
Markdown (Informal)
[Exploring Abstractive Text Summarisation for Podcasts: A Comparative Study of BART and T5 Models](https://aclanthology.org/2023.ranlp-1.110) (Saxena & El-Haj, RANLP 2023)
ACL