@inproceedings{howcroft-etal-2023-building,
title = "Building a dual dataset of text- and image-grounded conversations and summarisation in G{\`a}idhlig ({S}cottish {G}aelic)",
author = "Howcroft, David M. and
Lamb, William and
Groundwater, Anna and
Gkatzia, Dimitra",
editor = "Keet, C. Maria and
Lee, Hung-Yi and
Zarrie{\ss}, Sina",
booktitle = "Proceedings of the 16th International Natural Language Generation Conference",
month = sep,
year = "2023",
address = "Prague, Czechia",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.inlg-main.34",
doi = "10.18653/v1/2023.inlg-main.34",
pages = "443--448",
abstract = "G{\`a}idhlig (Scottish Gaelic; gd) is spoken by about 57k people in Scotland, but remains an under-resourced language with respect to natural language processing in general and natural language generation (NLG) in particular. To address this gap, we developed the first datasets for Scottish Gaelic NLG, collecting both conversational and summarisation data in a single setting. Our task setup involves dialogues between a pair of speakers discussing museum exhibits, grounding the conversation in images and texts. Then, both interlocutors summarise the dialogue resulting in a secondary dialogue summarisation dataset. This paper presents the dialogue and summarisation corpora, as well as the software used for data collection. The corpus consists of 43 conversations (13.7k words) and 61 summaries (2.0k words), and will be released along with the data collection interface.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="howcroft-etal-2023-building">
<titleInfo>
<title>Building a dual dataset of text- and image-grounded conversations and summarisation in Gàidhlig (Scottish Gaelic)</title>
</titleInfo>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="given">M</namePart>
<namePart type="family">Howcroft</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">William</namePart>
<namePart type="family">Lamb</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Groundwater</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dimitra</namePart>
<namePart type="family">Gkatzia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 16th International Natural Language Generation Conference</title>
</titleInfo>
<name type="personal">
<namePart type="given">C</namePart>
<namePart type="given">Maria</namePart>
<namePart type="family">Keet</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hung-Yi</namePart>
<namePart type="family">Lee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sina</namePart>
<namePart type="family">Zarrieß</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Prague, Czechia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Gàidhlig (Scottish Gaelic; gd) is spoken by about 57k people in Scotland, but remains an under-resourced language with respect to natural language processing in general and natural language generation (NLG) in particular. To address this gap, we developed the first datasets for Scottish Gaelic NLG, collecting both conversational and summarisation data in a single setting. Our task setup involves dialogues between a pair of speakers discussing museum exhibits, grounding the conversation in images and texts. Then, both interlocutors summarise the dialogue resulting in a secondary dialogue summarisation dataset. This paper presents the dialogue and summarisation corpora, as well as the software used for data collection. The corpus consists of 43 conversations (13.7k words) and 61 summaries (2.0k words), and will be released along with the data collection interface.</abstract>
<identifier type="citekey">howcroft-etal-2023-building</identifier>
<identifier type="doi">10.18653/v1/2023.inlg-main.34</identifier>
<location>
<url>https://aclanthology.org/2023.inlg-main.34</url>
</location>
<part>
<date>2023-09</date>
<extent unit="page">
<start>443</start>
<end>448</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Building a dual dataset of text- and image-grounded conversations and summarisation in Gàidhlig (Scottish Gaelic)
%A Howcroft, David M.
%A Lamb, William
%A Groundwater, Anna
%A Gkatzia, Dimitra
%Y Keet, C. Maria
%Y Lee, Hung-Yi
%Y Zarrieß, Sina
%S Proceedings of the 16th International Natural Language Generation Conference
%D 2023
%8 September
%I Association for Computational Linguistics
%C Prague, Czechia
%F howcroft-etal-2023-building
%X Gàidhlig (Scottish Gaelic; gd) is spoken by about 57k people in Scotland, but remains an under-resourced language with respect to natural language processing in general and natural language generation (NLG) in particular. To address this gap, we developed the first datasets for Scottish Gaelic NLG, collecting both conversational and summarisation data in a single setting. Our task setup involves dialogues between a pair of speakers discussing museum exhibits, grounding the conversation in images and texts. Then, both interlocutors summarise the dialogue resulting in a secondary dialogue summarisation dataset. This paper presents the dialogue and summarisation corpora, as well as the software used for data collection. The corpus consists of 43 conversations (13.7k words) and 61 summaries (2.0k words), and will be released along with the data collection interface.
%R 10.18653/v1/2023.inlg-main.34
%U https://aclanthology.org/2023.inlg-main.34
%U https://doi.org/10.18653/v1/2023.inlg-main.34
%P 443-448
Markdown (Informal)
[Building a dual dataset of text- and image-grounded conversations and summarisation in Gàidhlig (Scottish Gaelic)](https://aclanthology.org/2023.inlg-main.34) (Howcroft et al., INLG-SIGDIAL 2023)
ACL