@inproceedings{rojas-etal-2022-assessing,
title = "Assessing the Limits of Straightforward Models for Nested Named Entity Recognition in {S}panish Clinical Narratives",
author = "Rojas, Matias and
Carrino, Casimiro Pio and
Gonzalez-Agirre, Aitor and
Dunstan, Jocelyn and
Villegas, Marta",
editor = "Lavelli, Alberto and
Holderness, Eben and
Jimeno Yepes, Antonio and
Minard, Anne-Lyse and
Pustejovsky, James and
Rinaldi, Fabio",
booktitle = "Proceedings of the 13th International Workshop on Health Text Mining and Information Analysis (LOUHI)",
month = dec,
year = "2022",
address = "Abu Dhabi, United Arab Emirates (Hybrid)",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.louhi-1.2/",
doi = "10.18653/v1/2022.louhi-1.2",
pages = "14--25",
abstract = "Nested Named Entity Recognition (NER) is an information extraction task that aims to identify entities that may be nested within other entity mentions. Despite the availability of several corpora with nested entities in the Spanish clinical domain, most previous work has overlooked them due to the lack of models and a clear annotation scheme for dealing with the task. To fill this gap, this paper provides an empirical study of straightforward methods for tackling the nested NER task on two Spanish clinical datasets, Clinical Trials, and the Chilean Waiting List. We assess the advantages and limitations of two sequence labeling approaches; one based on Multiple LSTM-CRF architectures and another on Joint labeling models. To better understand the differences between these models, we compute task-specific metrics that adequately measure the ability of models to detect nested entities and perform a fine-grained comparison across models. Our experimental results show that employing domain-specific language models trained from scratch significantly improves the performance obtained with strong domain-specific and general-domain baselines, achieving state-of-the-art results in both datasets. Specifically, we obtained F1 scores of 89.21 and 83.16 in Clinical Trials and the Chilean Waiting List, respectively. Interestingly enough, we observe that the task-specific metrics and analysis properly reflect the limitations of the models when recognizing nested entities. Finally, we perform a case study on an aggregated NER dataset created from several clinical corpora in Spanish. We highlight how entity length and the simultaneous recognition of inner and outer entities are the most critical variables for the nested NER task."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="rojas-etal-2022-assessing">
<titleInfo>
<title>Assessing the Limits of Straightforward Models for Nested Named Entity Recognition in Spanish Clinical Narratives</title>
</titleInfo>
<name type="personal">
<namePart type="given">Matias</namePart>
<namePart type="family">Rojas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Casimiro</namePart>
<namePart type="given">Pio</namePart>
<namePart type="family">Carrino</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aitor</namePart>
<namePart type="family">Gonzalez-Agirre</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jocelyn</namePart>
<namePart type="family">Dunstan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marta</namePart>
<namePart type="family">Villegas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 13th International Workshop on Health Text Mining and Information Analysis (LOUHI)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Alberto</namePart>
<namePart type="family">Lavelli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eben</namePart>
<namePart type="family">Holderness</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Antonio</namePart>
<namePart type="family">Jimeno Yepes</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anne-Lyse</namePart>
<namePart type="family">Minard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">James</namePart>
<namePart type="family">Pustejovsky</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fabio</namePart>
<namePart type="family">Rinaldi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, United Arab Emirates (Hybrid)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Nested Named Entity Recognition (NER) is an information extraction task that aims to identify entities that may be nested within other entity mentions. Despite the availability of several corpora with nested entities in the Spanish clinical domain, most previous work has overlooked them due to the lack of models and a clear annotation scheme for dealing with the task. To fill this gap, this paper provides an empirical study of straightforward methods for tackling the nested NER task on two Spanish clinical datasets, Clinical Trials, and the Chilean Waiting List. We assess the advantages and limitations of two sequence labeling approaches; one based on Multiple LSTM-CRF architectures and another on Joint labeling models. To better understand the differences between these models, we compute task-specific metrics that adequately measure the ability of models to detect nested entities and perform a fine-grained comparison across models. Our experimental results show that employing domain-specific language models trained from scratch significantly improves the performance obtained with strong domain-specific and general-domain baselines, achieving state-of-the-art results in both datasets. Specifically, we obtained F1 scores of 89.21 and 83.16 in Clinical Trials and the Chilean Waiting List, respectively. Interestingly enough, we observe that the task-specific metrics and analysis properly reflect the limitations of the models when recognizing nested entities. Finally, we perform a case study on an aggregated NER dataset created from several clinical corpora in Spanish. We highlight how entity length and the simultaneous recognition of inner and outer entities are the most critical variables for the nested NER task.</abstract>
<identifier type="citekey">rojas-etal-2022-assessing</identifier>
<identifier type="doi">10.18653/v1/2022.louhi-1.2</identifier>
<location>
<url>https://aclanthology.org/2022.louhi-1.2/</url>
</location>
<part>
<date>2022-12</date>
<extent unit="page">
<start>14</start>
<end>25</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Assessing the Limits of Straightforward Models for Nested Named Entity Recognition in Spanish Clinical Narratives
%A Rojas, Matias
%A Carrino, Casimiro Pio
%A Gonzalez-Agirre, Aitor
%A Dunstan, Jocelyn
%A Villegas, Marta
%Y Lavelli, Alberto
%Y Holderness, Eben
%Y Jimeno Yepes, Antonio
%Y Minard, Anne-Lyse
%Y Pustejovsky, James
%Y Rinaldi, Fabio
%S Proceedings of the 13th International Workshop on Health Text Mining and Information Analysis (LOUHI)
%D 2022
%8 December
%I Association for Computational Linguistics
%C Abu Dhabi, United Arab Emirates (Hybrid)
%F rojas-etal-2022-assessing
%X Nested Named Entity Recognition (NER) is an information extraction task that aims to identify entities that may be nested within other entity mentions. Despite the availability of several corpora with nested entities in the Spanish clinical domain, most previous work has overlooked them due to the lack of models and a clear annotation scheme for dealing with the task. To fill this gap, this paper provides an empirical study of straightforward methods for tackling the nested NER task on two Spanish clinical datasets, Clinical Trials, and the Chilean Waiting List. We assess the advantages and limitations of two sequence labeling approaches; one based on Multiple LSTM-CRF architectures and another on Joint labeling models. To better understand the differences between these models, we compute task-specific metrics that adequately measure the ability of models to detect nested entities and perform a fine-grained comparison across models. Our experimental results show that employing domain-specific language models trained from scratch significantly improves the performance obtained with strong domain-specific and general-domain baselines, achieving state-of-the-art results in both datasets. Specifically, we obtained F1 scores of 89.21 and 83.16 in Clinical Trials and the Chilean Waiting List, respectively. Interestingly enough, we observe that the task-specific metrics and analysis properly reflect the limitations of the models when recognizing nested entities. Finally, we perform a case study on an aggregated NER dataset created from several clinical corpora in Spanish. We highlight how entity length and the simultaneous recognition of inner and outer entities are the most critical variables for the nested NER task.
%R 10.18653/v1/2022.louhi-1.2
%U https://aclanthology.org/2022.louhi-1.2/
%U https://doi.org/10.18653/v1/2022.louhi-1.2
%P 14-25
Markdown (Informal)
[Assessing the Limits of Straightforward Models for Nested Named Entity Recognition in Spanish Clinical Narratives](https://aclanthology.org/2022.louhi-1.2/) (Rojas et al., Louhi 2022)
ACL