@inproceedings{jiang-etal-2023-combo,
title = "{COMBO}: A Complete Benchmark for Open {KG} Canonicalization",
author = "Jiang, Chengyue and
Jiang, Yong and
Wu, Weiqi and
Zheng, Yuting and
Xie, Pengjun and
Tu, Kewei",
editor = "Vlachos, Andreas and
Augenstein, Isabelle",
booktitle = "Proceedings of the 17th Conference of the European Chapter of the Association for Computational Linguistics",
month = may,
year = "2023",
address = "Dubrovnik, Croatia",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.eacl-main.26",
doi = "10.18653/v1/2023.eacl-main.26",
pages = "340--357",
abstract = "Open knowledge graph (KG) consists of (subject, relation, object) triples extracted from millions of raw text. The subject and object noun phrases and the relation in open KG have severe redundancy and ambiguity and need to be canonicalized. Existing datasets for open KG canonicalization only provide gold entity-level canonicalization for noun phrases. In this paper, we present COMBO, a Complete Benchmark for Open KG canonicalization. Compared with existing datasets, we additionally provide gold canonicalization for relation phrases, gold ontology-level canonicalization for noun phrases, as well as source sentences from which triples are extracted. We also propose metrics for evaluating each type of canonicalization. On the COMBO dataset, we empirically compare previously proposed canonicalization methods as well as a few simple baseline methods based on pretrained language models. We find that properly encoding the phrases in a triple using pretrained language models results in better relation canonicalization and ontology-level canonicalization of the noun phrase. We release our dataset, baselines, and evaluation scripts at path/to/url.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="jiang-etal-2023-combo">
<titleInfo>
<title>COMBO: A Complete Benchmark for Open KG Canonicalization</title>
</titleInfo>
<name type="personal">
<namePart type="given">Chengyue</namePart>
<namePart type="family">Jiang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yong</namePart>
<namePart type="family">Jiang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Weiqi</namePart>
<namePart type="family">Wu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yuting</namePart>
<namePart type="family">Zheng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pengjun</namePart>
<namePart type="family">Xie</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kewei</namePart>
<namePart type="family">Tu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 17th Conference of the European Chapter of the Association for Computational Linguistics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Andreas</namePart>
<namePart type="family">Vlachos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Isabelle</namePart>
<namePart type="family">Augenstein</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Dubrovnik, Croatia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Open knowledge graph (KG) consists of (subject, relation, object) triples extracted from millions of raw text. The subject and object noun phrases and the relation in open KG have severe redundancy and ambiguity and need to be canonicalized. Existing datasets for open KG canonicalization only provide gold entity-level canonicalization for noun phrases. In this paper, we present COMBO, a Complete Benchmark for Open KG canonicalization. Compared with existing datasets, we additionally provide gold canonicalization for relation phrases, gold ontology-level canonicalization for noun phrases, as well as source sentences from which triples are extracted. We also propose metrics for evaluating each type of canonicalization. On the COMBO dataset, we empirically compare previously proposed canonicalization methods as well as a few simple baseline methods based on pretrained language models. We find that properly encoding the phrases in a triple using pretrained language models results in better relation canonicalization and ontology-level canonicalization of the noun phrase. We release our dataset, baselines, and evaluation scripts at path/to/url.</abstract>
<identifier type="citekey">jiang-etal-2023-combo</identifier>
<identifier type="doi">10.18653/v1/2023.eacl-main.26</identifier>
<location>
<url>https://aclanthology.org/2023.eacl-main.26</url>
</location>
<part>
<date>2023-05</date>
<extent unit="page">
<start>340</start>
<end>357</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T COMBO: A Complete Benchmark for Open KG Canonicalization
%A Jiang, Chengyue
%A Jiang, Yong
%A Wu, Weiqi
%A Zheng, Yuting
%A Xie, Pengjun
%A Tu, Kewei
%Y Vlachos, Andreas
%Y Augenstein, Isabelle
%S Proceedings of the 17th Conference of the European Chapter of the Association for Computational Linguistics
%D 2023
%8 May
%I Association for Computational Linguistics
%C Dubrovnik, Croatia
%F jiang-etal-2023-combo
%X Open knowledge graph (KG) consists of (subject, relation, object) triples extracted from millions of raw text. The subject and object noun phrases and the relation in open KG have severe redundancy and ambiguity and need to be canonicalized. Existing datasets for open KG canonicalization only provide gold entity-level canonicalization for noun phrases. In this paper, we present COMBO, a Complete Benchmark for Open KG canonicalization. Compared with existing datasets, we additionally provide gold canonicalization for relation phrases, gold ontology-level canonicalization for noun phrases, as well as source sentences from which triples are extracted. We also propose metrics for evaluating each type of canonicalization. On the COMBO dataset, we empirically compare previously proposed canonicalization methods as well as a few simple baseline methods based on pretrained language models. We find that properly encoding the phrases in a triple using pretrained language models results in better relation canonicalization and ontology-level canonicalization of the noun phrase. We release our dataset, baselines, and evaluation scripts at path/to/url.
%R 10.18653/v1/2023.eacl-main.26
%U https://aclanthology.org/2023.eacl-main.26
%U https://doi.org/10.18653/v1/2023.eacl-main.26
%P 340-357
Markdown (Informal)
[COMBO: A Complete Benchmark for Open KG Canonicalization](https://aclanthology.org/2023.eacl-main.26) (Jiang et al., EACL 2023)
ACL
- Chengyue Jiang, Yong Jiang, Weiqi Wu, Yuting Zheng, Pengjun Xie, and Kewei Tu. 2023. COMBO: A Complete Benchmark for Open KG Canonicalization. In Proceedings of the 17th Conference of the European Chapter of the Association for Computational Linguistics, pages 340–357, Dubrovnik, Croatia. Association for Computational Linguistics.