@inproceedings{ricci-etal-2022-unsupervised,
title = "Unsupervised Partial Sentence Matching for Cited Text Identification",
author = "Ricci, Kathryn and
Chang, Haw-Shiuan and
Goyal, Purujit and
McCallum, Andrew",
editor = "Cohan, Arman and
Feigenblat, Guy and
Freitag, Dayne and
Ghosal, Tirthankar and
Herrmannova, Drahomira and
Knoth, Petr and
Lo, Kyle and
Mayr, Philipp and
Shmueli-Scheuer, Michal and
de Waard, Anita and
Wang, Lucy Lu",
booktitle = "Proceedings of the Third Workshop on Scholarly Document Processing",
month = oct,
year = "2022",
address = "Gyeongju, Republic of Korea",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.sdp-1.11",
pages = "95--104",
abstract = "Given a citation in the body of a research paper, cited text identification aims to find the sentences in the cited paper that are most relevant to the citing sentence. The task is fundamentally one of sentence matching, where affinity is often assessed by a cosine similarity between sentence embeddings. However, (a) sentences may not be well-represented by a single embedding because they contain multiple distinct semantic aspects, and (b) good matches may not require a strong match in all aspects. To overcome these limitations, we propose a simple and efficient unsupervised method for cited text identification that adapts an asymmetric similarity measure to allow partial matches of multiple aspects in both sentences. On the CL-SciSumm dataset we find that our method outperforms a baseline symmetric approach, and, surprisingly, also outperforms all supervised and unsupervised systems submitted to past editions of CL-SciSumm Shared Task 1a.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ricci-etal-2022-unsupervised">
<titleInfo>
<title>Unsupervised Partial Sentence Matching for Cited Text Identification</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kathryn</namePart>
<namePart type="family">Ricci</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Haw-Shiuan</namePart>
<namePart type="family">Chang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Purujit</namePart>
<namePart type="family">Goyal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andrew</namePart>
<namePart type="family">McCallum</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-10</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Third Workshop on Scholarly Document Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Arman</namePart>
<namePart type="family">Cohan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Guy</namePart>
<namePart type="family">Feigenblat</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dayne</namePart>
<namePart type="family">Freitag</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tirthankar</namePart>
<namePart type="family">Ghosal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Drahomira</namePart>
<namePart type="family">Herrmannova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Petr</namePart>
<namePart type="family">Knoth</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kyle</namePart>
<namePart type="family">Lo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Philipp</namePart>
<namePart type="family">Mayr</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Michal</namePart>
<namePart type="family">Shmueli-Scheuer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anita</namePart>
<namePart type="family">de Waard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lucy</namePart>
<namePart type="given">Lu</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Gyeongju, Republic of Korea</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Given a citation in the body of a research paper, cited text identification aims to find the sentences in the cited paper that are most relevant to the citing sentence. The task is fundamentally one of sentence matching, where affinity is often assessed by a cosine similarity between sentence embeddings. However, (a) sentences may not be well-represented by a single embedding because they contain multiple distinct semantic aspects, and (b) good matches may not require a strong match in all aspects. To overcome these limitations, we propose a simple and efficient unsupervised method for cited text identification that adapts an asymmetric similarity measure to allow partial matches of multiple aspects in both sentences. On the CL-SciSumm dataset we find that our method outperforms a baseline symmetric approach, and, surprisingly, also outperforms all supervised and unsupervised systems submitted to past editions of CL-SciSumm Shared Task 1a.</abstract>
<identifier type="citekey">ricci-etal-2022-unsupervised</identifier>
<location>
<url>https://aclanthology.org/2022.sdp-1.11</url>
</location>
<part>
<date>2022-10</date>
<extent unit="page">
<start>95</start>
<end>104</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Unsupervised Partial Sentence Matching for Cited Text Identification
%A Ricci, Kathryn
%A Chang, Haw-Shiuan
%A Goyal, Purujit
%A McCallum, Andrew
%Y Cohan, Arman
%Y Feigenblat, Guy
%Y Freitag, Dayne
%Y Ghosal, Tirthankar
%Y Herrmannova, Drahomira
%Y Knoth, Petr
%Y Lo, Kyle
%Y Mayr, Philipp
%Y Shmueli-Scheuer, Michal
%Y de Waard, Anita
%Y Wang, Lucy Lu
%S Proceedings of the Third Workshop on Scholarly Document Processing
%D 2022
%8 October
%I Association for Computational Linguistics
%C Gyeongju, Republic of Korea
%F ricci-etal-2022-unsupervised
%X Given a citation in the body of a research paper, cited text identification aims to find the sentences in the cited paper that are most relevant to the citing sentence. The task is fundamentally one of sentence matching, where affinity is often assessed by a cosine similarity between sentence embeddings. However, (a) sentences may not be well-represented by a single embedding because they contain multiple distinct semantic aspects, and (b) good matches may not require a strong match in all aspects. To overcome these limitations, we propose a simple and efficient unsupervised method for cited text identification that adapts an asymmetric similarity measure to allow partial matches of multiple aspects in both sentences. On the CL-SciSumm dataset we find that our method outperforms a baseline symmetric approach, and, surprisingly, also outperforms all supervised and unsupervised systems submitted to past editions of CL-SciSumm Shared Task 1a.
%U https://aclanthology.org/2022.sdp-1.11
%P 95-104
Markdown (Informal)
[Unsupervised Partial Sentence Matching for Cited Text Identification](https://aclanthology.org/2022.sdp-1.11) (Ricci et al., sdp 2022)
ACL