@inproceedings{jacqmin-etal-2021-spanalign,
title = "{S}pan{A}lign: Efficient Sequence Tagging Annotation Projection into Translated Data applied to Cross-Lingual Opinion Mining",
author = "Jacqmin, L{\'e}o and
Marzinotto, Gabriel and
Gromada, Justyna and
Szczekocka, Ewelina and
Ko{\l}ody{\'n}ski, Robert and
Damnati, G{\'e}raldine",
editor = "Xu, Wei and
Ritter, Alan and
Baldwin, Tim and
Rahimi, Afshin",
booktitle = "Proceedings of the Seventh Workshop on Noisy User-generated Text (W-NUT 2021)",
month = nov,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.wnut-1.27/",
doi = "10.18653/v1/2021.wnut-1.27",
pages = "238--248",
abstract = "Following the increasing performance of neural machine translation systems, the paradigm of using automatically translated data for cross-lingual adaptation is now studied in several applicative domains. The capacity to accurately project annotations remains however an issue for sequence tagging tasks where annotation must be projected with correct spans. Additionally, when the task implies noisy user-generated text, the quality of translation and annotation projection can be affected. In this paper we propose to tackle multilingual sequence tagging with a new span alignment method and apply it to opinion target extraction from customer reviews. We show that provided suitable heuristics, translated data with automatic span-level annotation projection can yield improvements both for cross-lingual adaptation compared to zero-shot transfer, and data augmentation compared to a multilingual baseline."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="jacqmin-etal-2021-spanalign">
<titleInfo>
<title>SpanAlign: Efficient Sequence Tagging Annotation Projection into Translated Data applied to Cross-Lingual Opinion Mining</title>
</titleInfo>
<name type="personal">
<namePart type="given">Léo</namePart>
<namePart type="family">Jacqmin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gabriel</namePart>
<namePart type="family">Marzinotto</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Justyna</namePart>
<namePart type="family">Gromada</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ewelina</namePart>
<namePart type="family">Szczekocka</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Robert</namePart>
<namePart type="family">Kołodyński</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Géraldine</namePart>
<namePart type="family">Damnati</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Seventh Workshop on Noisy User-generated Text (W-NUT 2021)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wei</namePart>
<namePart type="family">Xu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alan</namePart>
<namePart type="family">Ritter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tim</namePart>
<namePart type="family">Baldwin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Afshin</namePart>
<namePart type="family">Rahimi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Following the increasing performance of neural machine translation systems, the paradigm of using automatically translated data for cross-lingual adaptation is now studied in several applicative domains. The capacity to accurately project annotations remains however an issue for sequence tagging tasks where annotation must be projected with correct spans. Additionally, when the task implies noisy user-generated text, the quality of translation and annotation projection can be affected. In this paper we propose to tackle multilingual sequence tagging with a new span alignment method and apply it to opinion target extraction from customer reviews. We show that provided suitable heuristics, translated data with automatic span-level annotation projection can yield improvements both for cross-lingual adaptation compared to zero-shot transfer, and data augmentation compared to a multilingual baseline.</abstract>
<identifier type="citekey">jacqmin-etal-2021-spanalign</identifier>
<identifier type="doi">10.18653/v1/2021.wnut-1.27</identifier>
<location>
<url>https://aclanthology.org/2021.wnut-1.27/</url>
</location>
<part>
<date>2021-11</date>
<extent unit="page">
<start>238</start>
<end>248</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T SpanAlign: Efficient Sequence Tagging Annotation Projection into Translated Data applied to Cross-Lingual Opinion Mining
%A Jacqmin, Léo
%A Marzinotto, Gabriel
%A Gromada, Justyna
%A Szczekocka, Ewelina
%A Kołodyński, Robert
%A Damnati, Géraldine
%Y Xu, Wei
%Y Ritter, Alan
%Y Baldwin, Tim
%Y Rahimi, Afshin
%S Proceedings of the Seventh Workshop on Noisy User-generated Text (W-NUT 2021)
%D 2021
%8 November
%I Association for Computational Linguistics
%C Online
%F jacqmin-etal-2021-spanalign
%X Following the increasing performance of neural machine translation systems, the paradigm of using automatically translated data for cross-lingual adaptation is now studied in several applicative domains. The capacity to accurately project annotations remains however an issue for sequence tagging tasks where annotation must be projected with correct spans. Additionally, when the task implies noisy user-generated text, the quality of translation and annotation projection can be affected. In this paper we propose to tackle multilingual sequence tagging with a new span alignment method and apply it to opinion target extraction from customer reviews. We show that provided suitable heuristics, translated data with automatic span-level annotation projection can yield improvements both for cross-lingual adaptation compared to zero-shot transfer, and data augmentation compared to a multilingual baseline.
%R 10.18653/v1/2021.wnut-1.27
%U https://aclanthology.org/2021.wnut-1.27/
%U https://doi.org/10.18653/v1/2021.wnut-1.27
%P 238-248
Markdown (Informal)
[SpanAlign: Efficient Sequence Tagging Annotation Projection into Translated Data applied to Cross-Lingual Opinion Mining](https://aclanthology.org/2021.wnut-1.27/) (Jacqmin et al., WNUT 2021)
ACL