@inproceedings{arora-etal-2022-computational,
title = "Computational Historical Linguistics and Language Diversity in {S}outh {A}sia",
author = "Arora, Aryaman and
Farris, Adam and
Basu, Samopriya and
Kolichala, Suresh",
editor = "Muresan, Smaranda and
Nakov, Preslav and
Villavicencio, Aline",
booktitle = "Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
month = may,
year = "2022",
address = "Dublin, Ireland",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.acl-long.99",
doi = "10.18653/v1/2022.acl-long.99",
pages = "1396--1409",
abstract = "South Asia is home to a plethora of languages, many of which severely lack access to new language technologies. This linguistic diversity also results in a research environment conducive to the study of comparative, contact, and historical linguistics{--}fields which necessitate the gathering of extensive data from many languages. We claim that data scatteredness (rather than scarcity) is the primary obstacle in the development of South Asian language technology, and suggest that the study of language history is uniquely aligned with surmounting this obstacle. We review recent developments in and at the intersection of South Asian NLP and historical-comparative linguistics, describing our and others{'} current efforts in this area. We also offer new strategies towards breaking the data barrier.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="arora-etal-2022-computational">
<titleInfo>
<title>Computational Historical Linguistics and Language Diversity in South Asia</title>
</titleInfo>
<name type="personal">
<namePart type="given">Aryaman</namePart>
<namePart type="family">Arora</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Adam</namePart>
<namePart type="family">Farris</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Samopriya</namePart>
<namePart type="family">Basu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Suresh</namePart>
<namePart type="family">Kolichala</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Smaranda</namePart>
<namePart type="family">Muresan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Preslav</namePart>
<namePart type="family">Nakov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aline</namePart>
<namePart type="family">Villavicencio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Dublin, Ireland</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>South Asia is home to a plethora of languages, many of which severely lack access to new language technologies. This linguistic diversity also results in a research environment conducive to the study of comparative, contact, and historical linguistics–fields which necessitate the gathering of extensive data from many languages. We claim that data scatteredness (rather than scarcity) is the primary obstacle in the development of South Asian language technology, and suggest that the study of language history is uniquely aligned with surmounting this obstacle. We review recent developments in and at the intersection of South Asian NLP and historical-comparative linguistics, describing our and others’ current efforts in this area. We also offer new strategies towards breaking the data barrier.</abstract>
<identifier type="citekey">arora-etal-2022-computational</identifier>
<identifier type="doi">10.18653/v1/2022.acl-long.99</identifier>
<location>
<url>https://aclanthology.org/2022.acl-long.99</url>
</location>
<part>
<date>2022-05</date>
<extent unit="page">
<start>1396</start>
<end>1409</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Computational Historical Linguistics and Language Diversity in South Asia
%A Arora, Aryaman
%A Farris, Adam
%A Basu, Samopriya
%A Kolichala, Suresh
%Y Muresan, Smaranda
%Y Nakov, Preslav
%Y Villavicencio, Aline
%S Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)
%D 2022
%8 May
%I Association for Computational Linguistics
%C Dublin, Ireland
%F arora-etal-2022-computational
%X South Asia is home to a plethora of languages, many of which severely lack access to new language technologies. This linguistic diversity also results in a research environment conducive to the study of comparative, contact, and historical linguistics–fields which necessitate the gathering of extensive data from many languages. We claim that data scatteredness (rather than scarcity) is the primary obstacle in the development of South Asian language technology, and suggest that the study of language history is uniquely aligned with surmounting this obstacle. We review recent developments in and at the intersection of South Asian NLP and historical-comparative linguistics, describing our and others’ current efforts in this area. We also offer new strategies towards breaking the data barrier.
%R 10.18653/v1/2022.acl-long.99
%U https://aclanthology.org/2022.acl-long.99
%U https://doi.org/10.18653/v1/2022.acl-long.99
%P 1396-1409
Markdown (Informal)
[Computational Historical Linguistics and Language Diversity in South Asia](https://aclanthology.org/2022.acl-long.99) (Arora et al., ACL 2022)
ACL