@inproceedings{bansal-etal-2021-low,
title = "How Low is Too Low? A Computational Perspective on Extremely Low-Resource Languages",
author = "Bansal, Rachit and
Choudhary, Himanshu and
Punia, Ravneet and
Schenk, Niko and
Pag{\'e}-Perron, {\'E}milie and
Dahl, Jacob",
editor = "Kabbara, Jad and
Lin, Haitao and
Paullada, Amandalynne and
Vamvas, Jannis",
booktitle = "Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing: Student Research Workshop",
month = aug,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.acl-srw.5",
doi = "10.18653/v1/2021.acl-srw.5",
pages = "44--59",
abstract = "Despite the recent advancements of attention-based deep learning architectures across a majority of Natural Language Processing tasks, their application remains limited in a low-resource setting because of a lack of pre-trained models for such languages. In this study, we make the first attempt to investigate the challenges of adapting these techniques to an extremely low-resource language {--} Sumerian cuneiform {--} one of the world{'}s oldest written languages attested from at least the beginning of the 3rd millennium BC. Specifically, we introduce the first cross-lingual information extraction pipeline for Sumerian, which includes part-of-speech tagging, named entity recognition, and machine translation. We introduce InterpretLR, an interpretability toolkit for low-resource NLP and use it alongside human evaluations to gauge the trained models. Notably, all our techniques and most components of our pipeline can be generalised to any low-resource language. We publicly release all our implementations including a novel data set with domain-specific pre-processing to promote further research in this domain.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="bansal-etal-2021-low">
<titleInfo>
<title>How Low is Too Low? A Computational Perspective on Extremely Low-Resource Languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Rachit</namePart>
<namePart type="family">Bansal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Himanshu</namePart>
<namePart type="family">Choudhary</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ravneet</namePart>
<namePart type="family">Punia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Niko</namePart>
<namePart type="family">Schenk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Émilie</namePart>
<namePart type="family">Pagé-Perron</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jacob</namePart>
<namePart type="family">Dahl</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing: Student Research Workshop</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jad</namePart>
<namePart type="family">Kabbara</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Haitao</namePart>
<namePart type="family">Lin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Amandalynne</namePart>
<namePart type="family">Paullada</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jannis</namePart>
<namePart type="family">Vamvas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Despite the recent advancements of attention-based deep learning architectures across a majority of Natural Language Processing tasks, their application remains limited in a low-resource setting because of a lack of pre-trained models for such languages. In this study, we make the first attempt to investigate the challenges of adapting these techniques to an extremely low-resource language – Sumerian cuneiform – one of the world’s oldest written languages attested from at least the beginning of the 3rd millennium BC. Specifically, we introduce the first cross-lingual information extraction pipeline for Sumerian, which includes part-of-speech tagging, named entity recognition, and machine translation. We introduce InterpretLR, an interpretability toolkit for low-resource NLP and use it alongside human evaluations to gauge the trained models. Notably, all our techniques and most components of our pipeline can be generalised to any low-resource language. We publicly release all our implementations including a novel data set with domain-specific pre-processing to promote further research in this domain.</abstract>
<identifier type="citekey">bansal-etal-2021-low</identifier>
<identifier type="doi">10.18653/v1/2021.acl-srw.5</identifier>
<location>
<url>https://aclanthology.org/2021.acl-srw.5</url>
</location>
<part>
<date>2021-08</date>
<extent unit="page">
<start>44</start>
<end>59</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T How Low is Too Low? A Computational Perspective on Extremely Low-Resource Languages
%A Bansal, Rachit
%A Choudhary, Himanshu
%A Punia, Ravneet
%A Schenk, Niko
%A Pagé-Perron, Émilie
%A Dahl, Jacob
%Y Kabbara, Jad
%Y Lin, Haitao
%Y Paullada, Amandalynne
%Y Vamvas, Jannis
%S Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing: Student Research Workshop
%D 2021
%8 August
%I Association for Computational Linguistics
%C Online
%F bansal-etal-2021-low
%X Despite the recent advancements of attention-based deep learning architectures across a majority of Natural Language Processing tasks, their application remains limited in a low-resource setting because of a lack of pre-trained models for such languages. In this study, we make the first attempt to investigate the challenges of adapting these techniques to an extremely low-resource language – Sumerian cuneiform – one of the world’s oldest written languages attested from at least the beginning of the 3rd millennium BC. Specifically, we introduce the first cross-lingual information extraction pipeline for Sumerian, which includes part-of-speech tagging, named entity recognition, and machine translation. We introduce InterpretLR, an interpretability toolkit for low-resource NLP and use it alongside human evaluations to gauge the trained models. Notably, all our techniques and most components of our pipeline can be generalised to any low-resource language. We publicly release all our implementations including a novel data set with domain-specific pre-processing to promote further research in this domain.
%R 10.18653/v1/2021.acl-srw.5
%U https://aclanthology.org/2021.acl-srw.5
%U https://doi.org/10.18653/v1/2021.acl-srw.5
%P 44-59
Markdown (Informal)
[How Low is Too Low? A Computational Perspective on Extremely Low-Resource Languages](https://aclanthology.org/2021.acl-srw.5) (Bansal et al., ACL-IJCNLP 2021)
ACL
- Rachit Bansal, Himanshu Choudhary, Ravneet Punia, Niko Schenk, Émilie Pagé-Perron, and Jacob Dahl. 2021. How Low is Too Low? A Computational Perspective on Extremely Low-Resource Languages. In Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing: Student Research Workshop, pages 44–59, Online. Association for Computational Linguistics.