@inproceedings{laparra-bethard-2020-dataset,
title = "A Dataset and Evaluation Framework for Complex Geographical Description Parsing",
author = "Laparra, Egoitz and
Bethard, Steven",
editor = "Scott, Donia and
Bel, Nuria and
Zong, Chengqing",
booktitle = "Proceedings of the 28th International Conference on Computational Linguistics",
month = dec,
year = "2020",
address = "Barcelona, Spain (Online)",
publisher = "International Committee on Computational Linguistics",
url = "https://aclanthology.org/2020.coling-main.81",
doi = "10.18653/v1/2020.coling-main.81",
pages = "936--948",
abstract = "Much previous work on geoparsing has focused on identifying and resolving individual toponyms in text like Adrano, S.Maria di Licodia or Catania. However, geographical locations occur not only as individual toponyms, but also as compositions of reference geolocations joined and modified by connectives, e.g., {``}. . . between the towns of Adrano and S.Maria di Licodia, 32 kilometres northwest of Catania{''}. Ideally, a geoparser should be able to take such text, and the geographical shapes of the toponyms referenced within it, and parse these into a geographical shape, formed by a set of coordinates, that represents the location described. But creating a dataset for this complex geoparsing task is difficult and, if done manually, would require a huge amount of effort to annotate the geographical shapes of not only the geolocation described but also the reference toponyms. We present an approach that automates most of the process by combining Wikipedia and OpenStreetMap. As a result, we have gathered a collection of 360,187 uncurated complex geolocation descriptions, from which we have manually curated 1,000 examples intended to be used as a test set. To accompany the data, we define a new geoparsing evaluation framework along with a scoring methodology and a set of baselines.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="laparra-bethard-2020-dataset">
<titleInfo>
<title>A Dataset and Evaluation Framework for Complex Geographical Description Parsing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Egoitz</namePart>
<namePart type="family">Laparra</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Steven</namePart>
<namePart type="family">Bethard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 28th International Conference on Computational Linguistics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Donia</namePart>
<namePart type="family">Scott</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nuria</namePart>
<namePart type="family">Bel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chengqing</namePart>
<namePart type="family">Zong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>International Committee on Computational Linguistics</publisher>
<place>
<placeTerm type="text">Barcelona, Spain (Online)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Much previous work on geoparsing has focused on identifying and resolving individual toponyms in text like Adrano, S.Maria di Licodia or Catania. However, geographical locations occur not only as individual toponyms, but also as compositions of reference geolocations joined and modified by connectives, e.g., “. . . between the towns of Adrano and S.Maria di Licodia, 32 kilometres northwest of Catania”. Ideally, a geoparser should be able to take such text, and the geographical shapes of the toponyms referenced within it, and parse these into a geographical shape, formed by a set of coordinates, that represents the location described. But creating a dataset for this complex geoparsing task is difficult and, if done manually, would require a huge amount of effort to annotate the geographical shapes of not only the geolocation described but also the reference toponyms. We present an approach that automates most of the process by combining Wikipedia and OpenStreetMap. As a result, we have gathered a collection of 360,187 uncurated complex geolocation descriptions, from which we have manually curated 1,000 examples intended to be used as a test set. To accompany the data, we define a new geoparsing evaluation framework along with a scoring methodology and a set of baselines.</abstract>
<identifier type="citekey">laparra-bethard-2020-dataset</identifier>
<identifier type="doi">10.18653/v1/2020.coling-main.81</identifier>
<location>
<url>https://aclanthology.org/2020.coling-main.81</url>
</location>
<part>
<date>2020-12</date>
<extent unit="page">
<start>936</start>
<end>948</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Dataset and Evaluation Framework for Complex Geographical Description Parsing
%A Laparra, Egoitz
%A Bethard, Steven
%Y Scott, Donia
%Y Bel, Nuria
%Y Zong, Chengqing
%S Proceedings of the 28th International Conference on Computational Linguistics
%D 2020
%8 December
%I International Committee on Computational Linguistics
%C Barcelona, Spain (Online)
%F laparra-bethard-2020-dataset
%X Much previous work on geoparsing has focused on identifying and resolving individual toponyms in text like Adrano, S.Maria di Licodia or Catania. However, geographical locations occur not only as individual toponyms, but also as compositions of reference geolocations joined and modified by connectives, e.g., “. . . between the towns of Adrano and S.Maria di Licodia, 32 kilometres northwest of Catania”. Ideally, a geoparser should be able to take such text, and the geographical shapes of the toponyms referenced within it, and parse these into a geographical shape, formed by a set of coordinates, that represents the location described. But creating a dataset for this complex geoparsing task is difficult and, if done manually, would require a huge amount of effort to annotate the geographical shapes of not only the geolocation described but also the reference toponyms. We present an approach that automates most of the process by combining Wikipedia and OpenStreetMap. As a result, we have gathered a collection of 360,187 uncurated complex geolocation descriptions, from which we have manually curated 1,000 examples intended to be used as a test set. To accompany the data, we define a new geoparsing evaluation framework along with a scoring methodology and a set of baselines.
%R 10.18653/v1/2020.coling-main.81
%U https://aclanthology.org/2020.coling-main.81
%U https://doi.org/10.18653/v1/2020.coling-main.81
%P 936-948
Markdown (Informal)
[A Dataset and Evaluation Framework for Complex Geographical Description Parsing](https://aclanthology.org/2020.coling-main.81) (Laparra & Bethard, COLING 2020)
ACL