@inproceedings{glenn-etal-2024-blendsql,
title = "{B}lend{SQL}: A Scalable Dialect for Unifying Hybrid Question Answering in Relational Algebra",
author = "Glenn, Parker and
Dakle, Parag and
Wang, Liang and
Raghavan, Preethi",
editor = "Ku, Lun-Wei and
Martins, Andre and
Srikumar, Vivek",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2024",
month = aug,
year = "2024",
address = "Bangkok, Thailand",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.findings-acl.25",
doi = "10.18653/v1/2024.findings-acl.25",
pages = "453--466",
abstract = "Many existing end-to-end systems for hybrid question answering tasks can often be boiled down to a {``}prompt-and-pray{''} paradigm, where the user has limited control and insight into the intermediate reasoning steps used to achieve the final result. Additionally, due to the context size limitation of many transformer-based LLMs, it is often not reasonable to expect that the full structured and unstructured context will fit into a given prompt in a zero-shot setting, let alone a few-shot setting. We introduce BlendSQL, a superset of SQLite to act as a unified dialect for orchestrating reasoning across both unstructured and structured data. For hybrid question answering tasks involving multi-hop reasoning, we encode the full decomposed reasoning roadmap into a single interpretable BlendSQL query. Notably, we show that BlendSQL can scale to massive datasets and improve the performance of end-to-end systems while using 35{\%} fewer tokens. Our code is available and installable as a package at https://github.com/parkervg/blendsql.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="glenn-etal-2024-blendsql">
<titleInfo>
<title>BlendSQL: A Scalable Dialect for Unifying Hybrid Question Answering in Relational Algebra</title>
</titleInfo>
<name type="personal">
<namePart type="given">Parker</namePart>
<namePart type="family">Glenn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Parag</namePart>
<namePart type="family">Dakle</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Liang</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Preethi</namePart>
<namePart type="family">Raghavan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2024</title>
</titleInfo>
<name type="personal">
<namePart type="given">Lun-Wei</namePart>
<namePart type="family">Ku</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andre</namePart>
<namePart type="family">Martins</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vivek</namePart>
<namePart type="family">Srikumar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Bangkok, Thailand</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Many existing end-to-end systems for hybrid question answering tasks can often be boiled down to a “prompt-and-pray” paradigm, where the user has limited control and insight into the intermediate reasoning steps used to achieve the final result. Additionally, due to the context size limitation of many transformer-based LLMs, it is often not reasonable to expect that the full structured and unstructured context will fit into a given prompt in a zero-shot setting, let alone a few-shot setting. We introduce BlendSQL, a superset of SQLite to act as a unified dialect for orchestrating reasoning across both unstructured and structured data. For hybrid question answering tasks involving multi-hop reasoning, we encode the full decomposed reasoning roadmap into a single interpretable BlendSQL query. Notably, we show that BlendSQL can scale to massive datasets and improve the performance of end-to-end systems while using 35% fewer tokens. Our code is available and installable as a package at https://github.com/parkervg/blendsql.</abstract>
<identifier type="citekey">glenn-etal-2024-blendsql</identifier>
<identifier type="doi">10.18653/v1/2024.findings-acl.25</identifier>
<location>
<url>https://aclanthology.org/2024.findings-acl.25</url>
</location>
<part>
<date>2024-08</date>
<extent unit="page">
<start>453</start>
<end>466</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T BlendSQL: A Scalable Dialect for Unifying Hybrid Question Answering in Relational Algebra
%A Glenn, Parker
%A Dakle, Parag
%A Wang, Liang
%A Raghavan, Preethi
%Y Ku, Lun-Wei
%Y Martins, Andre
%Y Srikumar, Vivek
%S Findings of the Association for Computational Linguistics: ACL 2024
%D 2024
%8 August
%I Association for Computational Linguistics
%C Bangkok, Thailand
%F glenn-etal-2024-blendsql
%X Many existing end-to-end systems for hybrid question answering tasks can often be boiled down to a “prompt-and-pray” paradigm, where the user has limited control and insight into the intermediate reasoning steps used to achieve the final result. Additionally, due to the context size limitation of many transformer-based LLMs, it is often not reasonable to expect that the full structured and unstructured context will fit into a given prompt in a zero-shot setting, let alone a few-shot setting. We introduce BlendSQL, a superset of SQLite to act as a unified dialect for orchestrating reasoning across both unstructured and structured data. For hybrid question answering tasks involving multi-hop reasoning, we encode the full decomposed reasoning roadmap into a single interpretable BlendSQL query. Notably, we show that BlendSQL can scale to massive datasets and improve the performance of end-to-end systems while using 35% fewer tokens. Our code is available and installable as a package at https://github.com/parkervg/blendsql.
%R 10.18653/v1/2024.findings-acl.25
%U https://aclanthology.org/2024.findings-acl.25
%U https://doi.org/10.18653/v1/2024.findings-acl.25
%P 453-466
Markdown (Informal)
[BlendSQL: A Scalable Dialect for Unifying Hybrid Question Answering in Relational Algebra](https://aclanthology.org/2024.findings-acl.25) (Glenn et al., Findings 2024)
ACL