@inproceedings{zhu-etal-2023-investigating,
title = "Investigating Techniques for a Deeper Understanding of Neural Machine Translation ({NMT}) Systems through Data Filtering and Fine-tuning Strategies",
author = "Zhu, Lichao and
Zimina, Maria and
B{\'e}nard, Maud and
Namdar, Behnoosh and
Ballier, Nicolas and
Wisniewski, Guillaume and
Yun{\`e}s, Jean-Baptiste",
editor = "Koehn, Philipp and
Haddow, Barry and
Kocmi, Tom and
Monz, Christof",
booktitle = "Proceedings of the Eighth Conference on Machine Translation",
month = dec,
year = "2023",
address = "Singapore",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.wmt-1.28",
doi = "10.18653/v1/2023.wmt-1.28",
pages = "275--281",
abstract = "In the context of this biomedical shared task, we have implemented data filters to enhance the selection of relevant training data for fine- tuning from the available training data sources. Specifically, we have employed textometric analysis to detect repetitive segments within the test set, which we have then used for re- fining the training data used to fine-tune the mBart-50 baseline model. Through this approach, we aim to achieve several objectives: developing a practical fine-tuning strategy for training biomedical in-domain fr{\textless}{\textgreater}en models, defining criteria for filtering in-domain training data, and comparing model predictions, fine-tuning data in accordance with the test set to gain a deeper insight into the functioning of Neural Machine Translation (NMT) systems.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zhu-etal-2023-investigating">
<titleInfo>
<title>Investigating Techniques for a Deeper Understanding of Neural Machine Translation (NMT) Systems through Data Filtering and Fine-tuning Strategies</title>
</titleInfo>
<name type="personal">
<namePart type="given">Lichao</namePart>
<namePart type="family">Zhu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="family">Zimina</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maud</namePart>
<namePart type="family">Bénard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Behnoosh</namePart>
<namePart type="family">Namdar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nicolas</namePart>
<namePart type="family">Ballier</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Guillaume</namePart>
<namePart type="family">Wisniewski</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jean-Baptiste</namePart>
<namePart type="family">Yunès</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Eighth Conference on Machine Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Philipp</namePart>
<namePart type="family">Koehn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Barry</namePart>
<namePart type="family">Haddow</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tom</namePart>
<namePart type="family">Kocmi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christof</namePart>
<namePart type="family">Monz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Singapore</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In the context of this biomedical shared task, we have implemented data filters to enhance the selection of relevant training data for fine- tuning from the available training data sources. Specifically, we have employed textometric analysis to detect repetitive segments within the test set, which we have then used for re- fining the training data used to fine-tune the mBart-50 baseline model. Through this approach, we aim to achieve several objectives: developing a practical fine-tuning strategy for training biomedical in-domain fr\textless\textgreateren models, defining criteria for filtering in-domain training data, and comparing model predictions, fine-tuning data in accordance with the test set to gain a deeper insight into the functioning of Neural Machine Translation (NMT) systems.</abstract>
<identifier type="citekey">zhu-etal-2023-investigating</identifier>
<identifier type="doi">10.18653/v1/2023.wmt-1.28</identifier>
<location>
<url>https://aclanthology.org/2023.wmt-1.28</url>
</location>
<part>
<date>2023-12</date>
<extent unit="page">
<start>275</start>
<end>281</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Investigating Techniques for a Deeper Understanding of Neural Machine Translation (NMT) Systems through Data Filtering and Fine-tuning Strategies
%A Zhu, Lichao
%A Zimina, Maria
%A Bénard, Maud
%A Namdar, Behnoosh
%A Ballier, Nicolas
%A Wisniewski, Guillaume
%A Yunès, Jean-Baptiste
%Y Koehn, Philipp
%Y Haddow, Barry
%Y Kocmi, Tom
%Y Monz, Christof
%S Proceedings of the Eighth Conference on Machine Translation
%D 2023
%8 December
%I Association for Computational Linguistics
%C Singapore
%F zhu-etal-2023-investigating
%X In the context of this biomedical shared task, we have implemented data filters to enhance the selection of relevant training data for fine- tuning from the available training data sources. Specifically, we have employed textometric analysis to detect repetitive segments within the test set, which we have then used for re- fining the training data used to fine-tune the mBart-50 baseline model. Through this approach, we aim to achieve several objectives: developing a practical fine-tuning strategy for training biomedical in-domain fr\textless\textgreateren models, defining criteria for filtering in-domain training data, and comparing model predictions, fine-tuning data in accordance with the test set to gain a deeper insight into the functioning of Neural Machine Translation (NMT) systems.
%R 10.18653/v1/2023.wmt-1.28
%U https://aclanthology.org/2023.wmt-1.28
%U https://doi.org/10.18653/v1/2023.wmt-1.28
%P 275-281
Markdown (Informal)
[Investigating Techniques for a Deeper Understanding of Neural Machine Translation (NMT) Systems through Data Filtering and Fine-tuning Strategies](https://aclanthology.org/2023.wmt-1.28) (Zhu et al., WMT 2023)
ACL