@inproceedings{das-etal-2016-development,
title = "Development of a {B}engali parser by cross-lingual transfer from {H}indi",
author = "Das, Ayan and
Saha, Agnivo and
Sarkar, Sudeshna",
editor = "Wu, Dekai and
Bhattacharyya, Pushpak",
booktitle = "Proceedings of the 6th Workshop on South and Southeast {A}sian Natural Language Processing ({WSSANLP}2016)",
month = dec,
year = "2016",
address = "Osaka, Japan",
publisher = "The COLING 2016 Organizing Committee",
url = "https://aclanthology.org/W16-3704",
pages = "33--43",
abstract = "In recent years there has been a lot of interest in cross-lingual parsing for developing treebanks for languages with small or no annotated treebanks. In this paper, we explore the development of a cross-lingual transfer parser from Hindi to Bengali using a Hindi parser and a Hindi-Bengali parallel corpus. A parser is trained and applied to the Hindi sentences of the parallel corpus and the parse trees are projected to construct probable parse trees of the corresponding Bengali sentences. Only about 14{\%} of these trees are complete (transferred trees contain all the target sentence words) and they are used to construct a Bengali parser. We relax the criteria of completeness to consider well-formed trees (43{\%} of the trees) leading to an improvement. We note that the words often do not have a one-to-one mapping in the two languages but considering sentences at the chunk-level results in better correspondence between the two languages. Based on this we present a method to use chunking as a preprocessing step and do the transfer on the chunk trees. We find that about 72{\%} of the projected parse trees of Bengali are now well-formed. The resultant parser achieves significant improvement in both Unlabeled Attachment Score (UAS) as well as Labeled Attachment Score (LAS) over the baseline word-level transferred parser.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="das-etal-2016-development">
<titleInfo>
<title>Development of a Bengali parser by cross-lingual transfer from Hindi</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ayan</namePart>
<namePart type="family">Das</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Agnivo</namePart>
<namePart type="family">Saha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sudeshna</namePart>
<namePart type="family">Sarkar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2016-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 6th Workshop on South and Southeast Asian Natural Language Processing (WSSANLP2016)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Dekai</namePart>
<namePart type="family">Wu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pushpak</namePart>
<namePart type="family">Bhattacharyya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>The COLING 2016 Organizing Committee</publisher>
<place>
<placeTerm type="text">Osaka, Japan</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In recent years there has been a lot of interest in cross-lingual parsing for developing treebanks for languages with small or no annotated treebanks. In this paper, we explore the development of a cross-lingual transfer parser from Hindi to Bengali using a Hindi parser and a Hindi-Bengali parallel corpus. A parser is trained and applied to the Hindi sentences of the parallel corpus and the parse trees are projected to construct probable parse trees of the corresponding Bengali sentences. Only about 14% of these trees are complete (transferred trees contain all the target sentence words) and they are used to construct a Bengali parser. We relax the criteria of completeness to consider well-formed trees (43% of the trees) leading to an improvement. We note that the words often do not have a one-to-one mapping in the two languages but considering sentences at the chunk-level results in better correspondence between the two languages. Based on this we present a method to use chunking as a preprocessing step and do the transfer on the chunk trees. We find that about 72% of the projected parse trees of Bengali are now well-formed. The resultant parser achieves significant improvement in both Unlabeled Attachment Score (UAS) as well as Labeled Attachment Score (LAS) over the baseline word-level transferred parser.</abstract>
<identifier type="citekey">das-etal-2016-development</identifier>
<location>
<url>https://aclanthology.org/W16-3704</url>
</location>
<part>
<date>2016-12</date>
<extent unit="page">
<start>33</start>
<end>43</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Development of a Bengali parser by cross-lingual transfer from Hindi
%A Das, Ayan
%A Saha, Agnivo
%A Sarkar, Sudeshna
%Y Wu, Dekai
%Y Bhattacharyya, Pushpak
%S Proceedings of the 6th Workshop on South and Southeast Asian Natural Language Processing (WSSANLP2016)
%D 2016
%8 December
%I The COLING 2016 Organizing Committee
%C Osaka, Japan
%F das-etal-2016-development
%X In recent years there has been a lot of interest in cross-lingual parsing for developing treebanks for languages with small or no annotated treebanks. In this paper, we explore the development of a cross-lingual transfer parser from Hindi to Bengali using a Hindi parser and a Hindi-Bengali parallel corpus. A parser is trained and applied to the Hindi sentences of the parallel corpus and the parse trees are projected to construct probable parse trees of the corresponding Bengali sentences. Only about 14% of these trees are complete (transferred trees contain all the target sentence words) and they are used to construct a Bengali parser. We relax the criteria of completeness to consider well-formed trees (43% of the trees) leading to an improvement. We note that the words often do not have a one-to-one mapping in the two languages but considering sentences at the chunk-level results in better correspondence between the two languages. Based on this we present a method to use chunking as a preprocessing step and do the transfer on the chunk trees. We find that about 72% of the projected parse trees of Bengali are now well-formed. The resultant parser achieves significant improvement in both Unlabeled Attachment Score (UAS) as well as Labeled Attachment Score (LAS) over the baseline word-level transferred parser.
%U https://aclanthology.org/W16-3704
%P 33-43
Markdown (Informal)
[Development of a Bengali parser by cross-lingual transfer from Hindi](https://aclanthology.org/W16-3704) (Das et al., WSSANLP 2016)
ACL