@inproceedings{buschbeck-exel-2020-parallel,
title = "A Parallel Evaluation Data Set of Software Documentation with Document Structure Annotation",
author = "Buschbeck, Bianka and
Exel, Miriam",
editor = "Nakazawa, Toshiaki and
Nakayama, Hideki and
Ding, Chenchen and
Dabre, Raj and
Kunchukuttan, Anoop and
Pa, Win Pa and
Bojar, Ond{\v{r}}ej and
Parida, Shantipriya and
Goto, Isao and
Mino, Hidaya and
Manabe, Hiroshi and
Sudoh, Katsuhito and
Kurohashi, Sadao and
Bhattacharyya, Pushpak",
booktitle = "Proceedings of the 7th Workshop on Asian Translation",
month = dec,
year = "2020",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.wat-1.20",
pages = "160--169",
abstract = "This paper accompanies the software documentation data set for machine translation, a parallel evaluation data set of data originating from the SAP Help Portal, that we released to the machine translation community for research purposes. It offers the possibility to tune and evaluate machine translation systems in the domain of corporate software documentation and contributes to the availability of a wider range of evaluation scenarios. The data set comprises of the language pairs English to Hindi, Indonesian, Malay and Thai, and thus also increases the test coverage for the many low-resource language pairs. Unlike most evaluation data sets that consist of plain parallel text, the segments in this data set come with additional metadata that describes structural information of the document context. We provide insights into the origin and creation, the particularities and characteristics of the data set as well as machine translation results.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="buschbeck-exel-2020-parallel">
<titleInfo>
<title>A Parallel Evaluation Data Set of Software Documentation with Document Structure Annotation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Bianka</namePart>
<namePart type="family">Buschbeck</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Miriam</namePart>
<namePart type="family">Exel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 7th Workshop on Asian Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Toshiaki</namePart>
<namePart type="family">Nakazawa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hideki</namePart>
<namePart type="family">Nakayama</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chenchen</namePart>
<namePart type="family">Ding</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Raj</namePart>
<namePart type="family">Dabre</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anoop</namePart>
<namePart type="family">Kunchukuttan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Win</namePart>
<namePart type="given">Pa</namePart>
<namePart type="family">Pa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ondřej</namePart>
<namePart type="family">Bojar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shantipriya</namePart>
<namePart type="family">Parida</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Isao</namePart>
<namePart type="family">Goto</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hidaya</namePart>
<namePart type="family">Mino</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hiroshi</namePart>
<namePart type="family">Manabe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Katsuhito</namePart>
<namePart type="family">Sudoh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sadao</namePart>
<namePart type="family">Kurohashi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pushpak</namePart>
<namePart type="family">Bhattacharyya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Suzhou, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper accompanies the software documentation data set for machine translation, a parallel evaluation data set of data originating from the SAP Help Portal, that we released to the machine translation community for research purposes. It offers the possibility to tune and evaluate machine translation systems in the domain of corporate software documentation and contributes to the availability of a wider range of evaluation scenarios. The data set comprises of the language pairs English to Hindi, Indonesian, Malay and Thai, and thus also increases the test coverage for the many low-resource language pairs. Unlike most evaluation data sets that consist of plain parallel text, the segments in this data set come with additional metadata that describes structural information of the document context. We provide insights into the origin and creation, the particularities and characteristics of the data set as well as machine translation results.</abstract>
<identifier type="citekey">buschbeck-exel-2020-parallel</identifier>
<location>
<url>https://aclanthology.org/2020.wat-1.20</url>
</location>
<part>
<date>2020-12</date>
<extent unit="page">
<start>160</start>
<end>169</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Parallel Evaluation Data Set of Software Documentation with Document Structure Annotation
%A Buschbeck, Bianka
%A Exel, Miriam
%Y Nakazawa, Toshiaki
%Y Nakayama, Hideki
%Y Ding, Chenchen
%Y Dabre, Raj
%Y Kunchukuttan, Anoop
%Y Pa, Win Pa
%Y Bojar, Ondřej
%Y Parida, Shantipriya
%Y Goto, Isao
%Y Mino, Hidaya
%Y Manabe, Hiroshi
%Y Sudoh, Katsuhito
%Y Kurohashi, Sadao
%Y Bhattacharyya, Pushpak
%S Proceedings of the 7th Workshop on Asian Translation
%D 2020
%8 December
%I Association for Computational Linguistics
%C Suzhou, China
%F buschbeck-exel-2020-parallel
%X This paper accompanies the software documentation data set for machine translation, a parallel evaluation data set of data originating from the SAP Help Portal, that we released to the machine translation community for research purposes. It offers the possibility to tune and evaluate machine translation systems in the domain of corporate software documentation and contributes to the availability of a wider range of evaluation scenarios. The data set comprises of the language pairs English to Hindi, Indonesian, Malay and Thai, and thus also increases the test coverage for the many low-resource language pairs. Unlike most evaluation data sets that consist of plain parallel text, the segments in this data set come with additional metadata that describes structural information of the document context. We provide insights into the origin and creation, the particularities and characteristics of the data set as well as machine translation results.
%U https://aclanthology.org/2020.wat-1.20
%P 160-169
Markdown (Informal)
[A Parallel Evaluation Data Set of Software Documentation with Document Structure Annotation](https://aclanthology.org/2020.wat-1.20) (Buschbeck & Exel, WAT 2020)
ACL