@inproceedings{bamfo-odoom-etal-2024-synthetic,
title = "Can Synthetic Speech Improve End-to-End Conversational Speech Translation?",
author = "Bamfo Odoom, Bismarck and
Robinson, Nathaniel and
Rippeth, Elijah and
Tavarez-Arce, Luis and
Murray, Kenton and
Wiesner, Matthew and
McNamee, Paul and
Koehn, Philipp and
Duh, Kevin",
editor = "Knowles, Rebecca and
Eriguchi, Akiko and
Goel, Shivali",
booktitle = "Proceedings of the 16th Conference of the Association for Machine Translation in the Americas (Volume 1: Research Track)",
month = sep,
year = "2024",
address = "Chicago, USA",
publisher = "Association for Machine Translation in the Americas",
url = "https://aclanthology.org/2024.amta-research.15",
pages = "167--177",
abstract = "Conversational speech translation is an important technology that fosters communication among people of different language backgrounds. Three-way parallel data in the form of source speech, source transcript, and target translation is usually required to train end-to-end systems. However, such datasets are not readily available and are expensive to create as this involves multiple annotation stages. In this paper, we investigate the use of synthetic data from generative models, namely machine translation and text-to-speech synthesis, for training conversational speech translation systems. We show that adding synthetic data to the training recipe increasingly improves end-to-end training performance, especially when limited real data is available. However, when no real data is available, no amount of synthetic data helps.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="bamfo-odoom-etal-2024-synthetic">
<titleInfo>
<title>Can Synthetic Speech Improve End-to-End Conversational Speech Translation?</title>
</titleInfo>
<name type="personal">
<namePart type="given">Bismarck</namePart>
<namePart type="family">Bamfo Odoom</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nathaniel</namePart>
<namePart type="family">Robinson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Elijah</namePart>
<namePart type="family">Rippeth</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Luis</namePart>
<namePart type="family">Tavarez-Arce</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kenton</namePart>
<namePart type="family">Murray</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Matthew</namePart>
<namePart type="family">Wiesner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Paul</namePart>
<namePart type="family">McNamee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Philipp</namePart>
<namePart type="family">Koehn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kevin</namePart>
<namePart type="family">Duh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 16th Conference of the Association for Machine Translation in the Americas (Volume 1: Research Track)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Rebecca</namePart>
<namePart type="family">Knowles</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Akiko</namePart>
<namePart type="family">Eriguchi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shivali</namePart>
<namePart type="family">Goel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Machine Translation in the Americas</publisher>
<place>
<placeTerm type="text">Chicago, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Conversational speech translation is an important technology that fosters communication among people of different language backgrounds. Three-way parallel data in the form of source speech, source transcript, and target translation is usually required to train end-to-end systems. However, such datasets are not readily available and are expensive to create as this involves multiple annotation stages. In this paper, we investigate the use of synthetic data from generative models, namely machine translation and text-to-speech synthesis, for training conversational speech translation systems. We show that adding synthetic data to the training recipe increasingly improves end-to-end training performance, especially when limited real data is available. However, when no real data is available, no amount of synthetic data helps.</abstract>
<identifier type="citekey">bamfo-odoom-etal-2024-synthetic</identifier>
<location>
<url>https://aclanthology.org/2024.amta-research.15</url>
</location>
<part>
<date>2024-09</date>
<extent unit="page">
<start>167</start>
<end>177</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Can Synthetic Speech Improve End-to-End Conversational Speech Translation?
%A Bamfo Odoom, Bismarck
%A Robinson, Nathaniel
%A Rippeth, Elijah
%A Tavarez-Arce, Luis
%A Murray, Kenton
%A Wiesner, Matthew
%A McNamee, Paul
%A Koehn, Philipp
%A Duh, Kevin
%Y Knowles, Rebecca
%Y Eriguchi, Akiko
%Y Goel, Shivali
%S Proceedings of the 16th Conference of the Association for Machine Translation in the Americas (Volume 1: Research Track)
%D 2024
%8 September
%I Association for Machine Translation in the Americas
%C Chicago, USA
%F bamfo-odoom-etal-2024-synthetic
%X Conversational speech translation is an important technology that fosters communication among people of different language backgrounds. Three-way parallel data in the form of source speech, source transcript, and target translation is usually required to train end-to-end systems. However, such datasets are not readily available and are expensive to create as this involves multiple annotation stages. In this paper, we investigate the use of synthetic data from generative models, namely machine translation and text-to-speech synthesis, for training conversational speech translation systems. We show that adding synthetic data to the training recipe increasingly improves end-to-end training performance, especially when limited real data is available. However, when no real data is available, no amount of synthetic data helps.
%U https://aclanthology.org/2024.amta-research.15
%P 167-177
Markdown (Informal)
[Can Synthetic Speech Improve End-to-End Conversational Speech Translation?](https://aclanthology.org/2024.amta-research.15) (Bamfo Odoom et al., AMTA 2024)
ACL
- Bismarck Bamfo Odoom, Nathaniel Robinson, Elijah Rippeth, Luis Tavarez-Arce, Kenton Murray, Matthew Wiesner, Paul McNamee, Philipp Koehn, and Kevin Duh. 2024. Can Synthetic Speech Improve End-to-End Conversational Speech Translation?. In Proceedings of the 16th Conference of the Association for Machine Translation in the Americas (Volume 1: Research Track), pages 167–177, Chicago, USA. Association for Machine Translation in the Americas.