@inproceedings{hristov-etal-2023-clinical,
title = "Clinical Text Classification to {SNOMED} {CT} Codes Using Transformers Trained on Linked Open Medical Ontologies",
author = "Hristov, Anton and
Ivanov, Petar and
Aksenova, Anna and
Asamov, Tsvetan and
Gyurov, Pavlin and
Primov, Todor and
Boytcheva, Svetla",
editor = "Mitkov, Ruslan and
Angelova, Galia",
booktitle = "Proceedings of the 14th International Conference on Recent Advances in Natural Language Processing",
month = sep,
year = "2023",
address = "Varna, Bulgaria",
publisher = "INCOMA Ltd., Shoumen, Bulgaria",
url = "https://aclanthology.org/2023.ranlp-1.57",
pages = "519--526",
abstract = "We present an approach for medical text coding with SNOMED CT. Our approach uses publicly available linked open data from terminologies and ontologies as training data for the algorithms. We claim that even small training corpora made of short text snippets can be used to train models for the given task. We propose a method based on transformers enhanced with clustering and filtering of the candidates. Further, we adopt a classical machine learning approach - support vector classification (SVC) using transformer embeddings. The resulting approach proves to be more accurate than the predictions given by Large Language Models. We evaluate on a dataset generated from linked open data for SNOMED codes related to morphology and topography for four use cases. Our transformers-based approach achieves an F1-score of 0.82 for morphology and 0.99 for topography codes. Further, we validate the applicability of our approach in a clinical context using labelled real clinical data that are not used for model training.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="hristov-etal-2023-clinical">
<titleInfo>
<title>Clinical Text Classification to SNOMED CT Codes Using Transformers Trained on Linked Open Medical Ontologies</title>
</titleInfo>
<name type="personal">
<namePart type="given">Anton</namePart>
<namePart type="family">Hristov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Petar</namePart>
<namePart type="family">Ivanov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Aksenova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tsvetan</namePart>
<namePart type="family">Asamov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pavlin</namePart>
<namePart type="family">Gyurov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Todor</namePart>
<namePart type="family">Primov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Svetla</namePart>
<namePart type="family">Boytcheva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 14th International Conference on Recent Advances in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ruslan</namePart>
<namePart type="family">Mitkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Galia</namePart>
<namePart type="family">Angelova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>INCOMA Ltd., Shoumen, Bulgaria</publisher>
<place>
<placeTerm type="text">Varna, Bulgaria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We present an approach for medical text coding with SNOMED CT. Our approach uses publicly available linked open data from terminologies and ontologies as training data for the algorithms. We claim that even small training corpora made of short text snippets can be used to train models for the given task. We propose a method based on transformers enhanced with clustering and filtering of the candidates. Further, we adopt a classical machine learning approach - support vector classification (SVC) using transformer embeddings. The resulting approach proves to be more accurate than the predictions given by Large Language Models. We evaluate on a dataset generated from linked open data for SNOMED codes related to morphology and topography for four use cases. Our transformers-based approach achieves an F1-score of 0.82 for morphology and 0.99 for topography codes. Further, we validate the applicability of our approach in a clinical context using labelled real clinical data that are not used for model training.</abstract>
<identifier type="citekey">hristov-etal-2023-clinical</identifier>
<location>
<url>https://aclanthology.org/2023.ranlp-1.57</url>
</location>
<part>
<date>2023-09</date>
<extent unit="page">
<start>519</start>
<end>526</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Clinical Text Classification to SNOMED CT Codes Using Transformers Trained on Linked Open Medical Ontologies
%A Hristov, Anton
%A Ivanov, Petar
%A Aksenova, Anna
%A Asamov, Tsvetan
%A Gyurov, Pavlin
%A Primov, Todor
%A Boytcheva, Svetla
%Y Mitkov, Ruslan
%Y Angelova, Galia
%S Proceedings of the 14th International Conference on Recent Advances in Natural Language Processing
%D 2023
%8 September
%I INCOMA Ltd., Shoumen, Bulgaria
%C Varna, Bulgaria
%F hristov-etal-2023-clinical
%X We present an approach for medical text coding with SNOMED CT. Our approach uses publicly available linked open data from terminologies and ontologies as training data for the algorithms. We claim that even small training corpora made of short text snippets can be used to train models for the given task. We propose a method based on transformers enhanced with clustering and filtering of the candidates. Further, we adopt a classical machine learning approach - support vector classification (SVC) using transformer embeddings. The resulting approach proves to be more accurate than the predictions given by Large Language Models. We evaluate on a dataset generated from linked open data for SNOMED codes related to morphology and topography for four use cases. Our transformers-based approach achieves an F1-score of 0.82 for morphology and 0.99 for topography codes. Further, we validate the applicability of our approach in a clinical context using labelled real clinical data that are not used for model training.
%U https://aclanthology.org/2023.ranlp-1.57
%P 519-526
Markdown (Informal)
[Clinical Text Classification to SNOMED CT Codes Using Transformers Trained on Linked Open Medical Ontologies](https://aclanthology.org/2023.ranlp-1.57) (Hristov et al., RANLP 2023)
ACL
- Anton Hristov, Petar Ivanov, Anna Aksenova, Tsvetan Asamov, Pavlin Gyurov, Todor Primov, and Svetla Boytcheva. 2023. Clinical Text Classification to SNOMED CT Codes Using Transformers Trained on Linked Open Medical Ontologies. In Proceedings of the 14th International Conference on Recent Advances in Natural Language Processing, pages 519–526, Varna, Bulgaria. INCOMA Ltd., Shoumen, Bulgaria.