@inproceedings{zheng-etal-2023-makes,
title = "What Makes Good Counterspeech? A Comparison of Generation Approaches and Evaluation Metrics",
author = {Zheng, Yi and
Ross, Bj{\"o}rn and
Magdy, Walid},
editor = "Chung, Yi-Ling and
Bonaldi, Helena and
Abercrombie, Gavin and
Guerini, Marco",
booktitle = "Proceedings of the 1st Workshop on CounterSpeech for Online Abuse (CS4OA)",
month = sep,
year = "2023",
address = "Prague, Czechia",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.cs4oa-1.5",
pages = "62--71",
abstract = "Counterspeech has been proposed as a solution to the proliferation of online hate. Research has shown that natural language processing (NLP) approaches could generate such counterspeech automatically, but there are competing ideas for how NLP models might be used for this task and a variety of evaluation metrics whose relationship to one another is unclear. We test three different approaches and collect ratings of the generated counterspeech for 1,740 tweet-participant pairs to systematically compare the counterspeech on three aspects: quality, effectiveness and user preferences. We examine which model performs best at which metric and which aspects of counterspeech predict user preferences. A free-form text generation approach using ChatGPT performs the most consistently well, though its generations are occasionally unspecific and repetitive. In our experiment, participants{'} preferences for counterspeech are predicted by the quality of the counterspeech, not its perceived effectiveness. The results can help future research approach counterspeech evaluation more systematically.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zheng-etal-2023-makes">
<titleInfo>
<title>What Makes Good Counterspeech? A Comparison of Generation Approaches and Evaluation Metrics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yi</namePart>
<namePart type="family">Zheng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Björn</namePart>
<namePart type="family">Ross</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Walid</namePart>
<namePart type="family">Magdy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 1st Workshop on CounterSpeech for Online Abuse (CS4OA)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yi-Ling</namePart>
<namePart type="family">Chung</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Helena</namePart>
<namePart type="family">Bonaldi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gavin</namePart>
<namePart type="family">Abercrombie</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marco</namePart>
<namePart type="family">Guerini</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Prague, Czechia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Counterspeech has been proposed as a solution to the proliferation of online hate. Research has shown that natural language processing (NLP) approaches could generate such counterspeech automatically, but there are competing ideas for how NLP models might be used for this task and a variety of evaluation metrics whose relationship to one another is unclear. We test three different approaches and collect ratings of the generated counterspeech for 1,740 tweet-participant pairs to systematically compare the counterspeech on three aspects: quality, effectiveness and user preferences. We examine which model performs best at which metric and which aspects of counterspeech predict user preferences. A free-form text generation approach using ChatGPT performs the most consistently well, though its generations are occasionally unspecific and repetitive. In our experiment, participants’ preferences for counterspeech are predicted by the quality of the counterspeech, not its perceived effectiveness. The results can help future research approach counterspeech evaluation more systematically.</abstract>
<identifier type="citekey">zheng-etal-2023-makes</identifier>
<location>
<url>https://aclanthology.org/2023.cs4oa-1.5</url>
</location>
<part>
<date>2023-09</date>
<extent unit="page">
<start>62</start>
<end>71</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T What Makes Good Counterspeech? A Comparison of Generation Approaches and Evaluation Metrics
%A Zheng, Yi
%A Ross, Björn
%A Magdy, Walid
%Y Chung, Yi-Ling
%Y Bonaldi, Helena
%Y Abercrombie, Gavin
%Y Guerini, Marco
%S Proceedings of the 1st Workshop on CounterSpeech for Online Abuse (CS4OA)
%D 2023
%8 September
%I Association for Computational Linguistics
%C Prague, Czechia
%F zheng-etal-2023-makes
%X Counterspeech has been proposed as a solution to the proliferation of online hate. Research has shown that natural language processing (NLP) approaches could generate such counterspeech automatically, but there are competing ideas for how NLP models might be used for this task and a variety of evaluation metrics whose relationship to one another is unclear. We test three different approaches and collect ratings of the generated counterspeech for 1,740 tweet-participant pairs to systematically compare the counterspeech on three aspects: quality, effectiveness and user preferences. We examine which model performs best at which metric and which aspects of counterspeech predict user preferences. A free-form text generation approach using ChatGPT performs the most consistently well, though its generations are occasionally unspecific and repetitive. In our experiment, participants’ preferences for counterspeech are predicted by the quality of the counterspeech, not its perceived effectiveness. The results can help future research approach counterspeech evaluation more systematically.
%U https://aclanthology.org/2023.cs4oa-1.5
%P 62-71
Markdown (Informal)
[What Makes Good Counterspeech? A Comparison of Generation Approaches and Evaluation Metrics](https://aclanthology.org/2023.cs4oa-1.5) (Zheng et al., CS4OA-WS 2023)
ACL