@inproceedings{ganesh-etal-2023-findings,
title = "Findings of the {C}o{C}o4{MT} 2023 Shared Task on Corpus Construction for Machine Translation",
author = "Ganesh, Ananya and
Carpuat, Marine and
Chen, William and
Kann, Katharina and
Lignos, Constantine and
Ortega, John E. and
Saleva, Jonne and
Tafreshi, Shabnam and
Zevallos, Rodolfo",
booktitle = "Proceedings of the Second Workshop on Corpus Generation and Corpus Augmentation for Machine Translation",
month = sep,
year = "2023",
address = "Macau SAR, China",
publisher = "Asia-Pacific Association for Machine Translation",
url = "https://aclanthology.org/2023.mtsummit-coco4mt.3",
pages = "22--27",
abstract = "This paper provides an overview of the first shared task on choosing beneficial instances for machine translation, conducted as part of the CoCo4MT 2023 Workshop at MTSummit. This shared task was motivated by the need to make the data annotation process for machine translation more efficient, particularly for low-resource languages for which collecting human translations may be difficult or expensive. The task involved developing methods for selecting the most beneficial instances for training a machine translation system without access to an existing parallel dataset in the target language, such that the best selected instances can then be manually translated. Two teams participated in the shared task, namely the Williams team and the AST team. Submissions were evaluated by training a machine translation model on each submission{'}s chosen instances, and comparing their performance with the chRF++ score. The system that ranked first is by the Williams team, that finds representative instances by clustering the training data.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ganesh-etal-2023-findings">
<titleInfo>
<title>Findings of the CoCo4MT 2023 Shared Task on Corpus Construction for Machine Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ananya</namePart>
<namePart type="family">Ganesh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marine</namePart>
<namePart type="family">Carpuat</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">William</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Katharina</namePart>
<namePart type="family">Kann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Constantine</namePart>
<namePart type="family">Lignos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">John</namePart>
<namePart type="given">E</namePart>
<namePart type="family">Ortega</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jonne</namePart>
<namePart type="family">Saleva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shabnam</namePart>
<namePart type="family">Tafreshi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rodolfo</namePart>
<namePart type="family">Zevallos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Second Workshop on Corpus Generation and Corpus Augmentation for Machine Translation</title>
</titleInfo>
<originInfo>
<publisher>Asia-Pacific Association for Machine Translation</publisher>
<place>
<placeTerm type="text">Macau SAR, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper provides an overview of the first shared task on choosing beneficial instances for machine translation, conducted as part of the CoCo4MT 2023 Workshop at MTSummit. This shared task was motivated by the need to make the data annotation process for machine translation more efficient, particularly for low-resource languages for which collecting human translations may be difficult or expensive. The task involved developing methods for selecting the most beneficial instances for training a machine translation system without access to an existing parallel dataset in the target language, such that the best selected instances can then be manually translated. Two teams participated in the shared task, namely the Williams team and the AST team. Submissions were evaluated by training a machine translation model on each submission’s chosen instances, and comparing their performance with the chRF++ score. The system that ranked first is by the Williams team, that finds representative instances by clustering the training data.</abstract>
<identifier type="citekey">ganesh-etal-2023-findings</identifier>
<location>
<url>https://aclanthology.org/2023.mtsummit-coco4mt.3</url>
</location>
<part>
<date>2023-09</date>
<extent unit="page">
<start>22</start>
<end>27</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Findings of the CoCo4MT 2023 Shared Task on Corpus Construction for Machine Translation
%A Ganesh, Ananya
%A Carpuat, Marine
%A Chen, William
%A Kann, Katharina
%A Lignos, Constantine
%A Ortega, John E.
%A Saleva, Jonne
%A Tafreshi, Shabnam
%A Zevallos, Rodolfo
%S Proceedings of the Second Workshop on Corpus Generation and Corpus Augmentation for Machine Translation
%D 2023
%8 September
%I Asia-Pacific Association for Machine Translation
%C Macau SAR, China
%F ganesh-etal-2023-findings
%X This paper provides an overview of the first shared task on choosing beneficial instances for machine translation, conducted as part of the CoCo4MT 2023 Workshop at MTSummit. This shared task was motivated by the need to make the data annotation process for machine translation more efficient, particularly for low-resource languages for which collecting human translations may be difficult or expensive. The task involved developing methods for selecting the most beneficial instances for training a machine translation system without access to an existing parallel dataset in the target language, such that the best selected instances can then be manually translated. Two teams participated in the shared task, namely the Williams team and the AST team. Submissions were evaluated by training a machine translation model on each submission’s chosen instances, and comparing their performance with the chRF++ score. The system that ranked first is by the Williams team, that finds representative instances by clustering the training data.
%U https://aclanthology.org/2023.mtsummit-coco4mt.3
%P 22-27
Markdown (Informal)
[Findings of the CoCo4MT 2023 Shared Task on Corpus Construction for Machine Translation](https://aclanthology.org/2023.mtsummit-coco4mt.3) (Ganesh et al., MTSummit 2023)
ACL
- Ananya Ganesh, Marine Carpuat, William Chen, Katharina Kann, Constantine Lignos, John E. Ortega, Jonne Saleva, Shabnam Tafreshi, and Rodolfo Zevallos. 2023. Findings of the CoCo4MT 2023 Shared Task on Corpus Construction for Machine Translation. In Proceedings of the Second Workshop on Corpus Generation and Corpus Augmentation for Machine Translation, pages 22–27, Macau SAR, China. Asia-Pacific Association for Machine Translation.