@inproceedings{jansen-2018-multi,
title = "Multi-hop Inference for Sentence-level {T}ext{G}raphs: How Challenging is Meaningfully Combining Information for Science Question Answering?",
author = "Jansen, Peter",
editor = "Glava{\v{s}}, Goran and
Somasundaran, Swapna and
Riedl, Martin and
Hovy, Eduard",
booktitle = "Proceedings of the Twelfth Workshop on Graph-Based Methods for Natural Language Processing ({T}ext{G}raphs-12)",
month = jun,
year = "2018",
address = "New Orleans, Louisiana, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W18-1703",
doi = "10.18653/v1/W18-1703",
pages = "12--17",
abstract = "Question Answering for complex questions is often modelled as a graph construction or traversal task, where a solver must build or traverse a graph of facts that answer and explain a given question. This {``}multi-hop{''} inference has been shown to be extremely challenging, with few models able to aggregate more than two facts before being overwhelmed by {``}semantic drift{''}, or the tendency for long chains of facts to quickly drift off topic. This is a major barrier to current inference models, as even elementary science questions require an average of 4 to 6 facts to answer and explain. In this work we empirically characterize the difficulty of building or traversing a graph of sentences connected by lexical overlap, by evaluating chance sentence aggregation quality through 9,784 manually-annotated judgements across knowledge graphs built from three free-text corpora (including study guides and Simple Wikipedia). We demonstrate semantic drift tends to be high and aggregation quality low, at between 0.04 and 3, and highlight scenarios that maximize the likelihood of meaningfully combining information.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="jansen-2018-multi">
<titleInfo>
<title>Multi-hop Inference for Sentence-level TextGraphs: How Challenging is Meaningfully Combining Information for Science Question Answering?</title>
</titleInfo>
<name type="personal">
<namePart type="given">Peter</namePart>
<namePart type="family">Jansen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2018-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Twelfth Workshop on Graph-Based Methods for Natural Language Processing (TextGraphs-12)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Goran</namePart>
<namePart type="family">Glavaš</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Swapna</namePart>
<namePart type="family">Somasundaran</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Martin</namePart>
<namePart type="family">Riedl</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eduard</namePart>
<namePart type="family">Hovy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">New Orleans, Louisiana, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Question Answering for complex questions is often modelled as a graph construction or traversal task, where a solver must build or traverse a graph of facts that answer and explain a given question. This “multi-hop” inference has been shown to be extremely challenging, with few models able to aggregate more than two facts before being overwhelmed by “semantic drift”, or the tendency for long chains of facts to quickly drift off topic. This is a major barrier to current inference models, as even elementary science questions require an average of 4 to 6 facts to answer and explain. In this work we empirically characterize the difficulty of building or traversing a graph of sentences connected by lexical overlap, by evaluating chance sentence aggregation quality through 9,784 manually-annotated judgements across knowledge graphs built from three free-text corpora (including study guides and Simple Wikipedia). We demonstrate semantic drift tends to be high and aggregation quality low, at between 0.04 and 3, and highlight scenarios that maximize the likelihood of meaningfully combining information.</abstract>
<identifier type="citekey">jansen-2018-multi</identifier>
<identifier type="doi">10.18653/v1/W18-1703</identifier>
<location>
<url>https://aclanthology.org/W18-1703</url>
</location>
<part>
<date>2018-06</date>
<extent unit="page">
<start>12</start>
<end>17</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Multi-hop Inference for Sentence-level TextGraphs: How Challenging is Meaningfully Combining Information for Science Question Answering?
%A Jansen, Peter
%Y Glavaš, Goran
%Y Somasundaran, Swapna
%Y Riedl, Martin
%Y Hovy, Eduard
%S Proceedings of the Twelfth Workshop on Graph-Based Methods for Natural Language Processing (TextGraphs-12)
%D 2018
%8 June
%I Association for Computational Linguistics
%C New Orleans, Louisiana, USA
%F jansen-2018-multi
%X Question Answering for complex questions is often modelled as a graph construction or traversal task, where a solver must build or traverse a graph of facts that answer and explain a given question. This “multi-hop” inference has been shown to be extremely challenging, with few models able to aggregate more than two facts before being overwhelmed by “semantic drift”, or the tendency for long chains of facts to quickly drift off topic. This is a major barrier to current inference models, as even elementary science questions require an average of 4 to 6 facts to answer and explain. In this work we empirically characterize the difficulty of building or traversing a graph of sentences connected by lexical overlap, by evaluating chance sentence aggregation quality through 9,784 manually-annotated judgements across knowledge graphs built from three free-text corpora (including study guides and Simple Wikipedia). We demonstrate semantic drift tends to be high and aggregation quality low, at between 0.04 and 3, and highlight scenarios that maximize the likelihood of meaningfully combining information.
%R 10.18653/v1/W18-1703
%U https://aclanthology.org/W18-1703
%U https://doi.org/10.18653/v1/W18-1703
%P 12-17
Markdown (Informal)
[Multi-hop Inference for Sentence-level TextGraphs: How Challenging is Meaningfully Combining Information for Science Question Answering?](https://aclanthology.org/W18-1703) (Jansen, TextGraphs 2018)
ACL