@inproceedings{tripto-etal-2024-ship,
title = "A Ship of Theseus: Curious Cases of Paraphrasing in {LLM}-Generated Texts",
author = "Tripto, Nafis Irtiza and
Venkatraman, Saranya and
Macko, Dominik and
Moro, Robert and
Srba, Ivan and
Uchendu, Adaku and
Le, Thai and
Lee, Dongwon",
editor = "Ku, Lun-Wei and
Martins, Andre and
Srikumar, Vivek",
booktitle = "Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
month = aug,
year = "2024",
address = "Bangkok, Thailand",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.acl-long.357",
doi = "10.18653/v1/2024.acl-long.357",
pages = "6608--6625",
abstract = "In the realm of text manipulation and linguistic transformation, the question of authorship has been a subject of fascination and philosophical inquiry. Much like the Ship of Theseus paradox, which ponders whether a ship remains the same when each of its original planks is replaced, our research delves into an intriguing question: Does a text retain its original authorship when it undergoes numerous paraphrasing iterations? Specifically, since Large Language Models (LLMs) have demonstrated remarkable proficiency in both the generation of original content and the modification of human-authored texts, a pivotal question emerges concerning the determination of authorship in instances where LLMs or similar paraphrasing tools are employed to rephrase the text{--}i.e., whether authorship should be attributed to the original human author or the AI-powered tool. Therefore, we embark on a philosophical voyage through the seas of language and authorship to unravel this intricate puzzle. Using a computational approach, we discover that the diminishing performance in text classification models, with each successive paraphrasing iteration, is closely associated with the extent of deviation from the original author{'}s style, thus provoking a reconsideration of the current notion of authorship.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="tripto-etal-2024-ship">
<titleInfo>
<title>A Ship of Theseus: Curious Cases of Paraphrasing in LLM-Generated Texts</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nafis</namePart>
<namePart type="given">Irtiza</namePart>
<namePart type="family">Tripto</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Saranya</namePart>
<namePart type="family">Venkatraman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dominik</namePart>
<namePart type="family">Macko</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Robert</namePart>
<namePart type="family">Moro</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ivan</namePart>
<namePart type="family">Srba</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Adaku</namePart>
<namePart type="family">Uchendu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thai</namePart>
<namePart type="family">Le</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dongwon</namePart>
<namePart type="family">Lee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Lun-Wei</namePart>
<namePart type="family">Ku</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andre</namePart>
<namePart type="family">Martins</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vivek</namePart>
<namePart type="family">Srikumar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Bangkok, Thailand</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In the realm of text manipulation and linguistic transformation, the question of authorship has been a subject of fascination and philosophical inquiry. Much like the Ship of Theseus paradox, which ponders whether a ship remains the same when each of its original planks is replaced, our research delves into an intriguing question: Does a text retain its original authorship when it undergoes numerous paraphrasing iterations? Specifically, since Large Language Models (LLMs) have demonstrated remarkable proficiency in both the generation of original content and the modification of human-authored texts, a pivotal question emerges concerning the determination of authorship in instances where LLMs or similar paraphrasing tools are employed to rephrase the text–i.e., whether authorship should be attributed to the original human author or the AI-powered tool. Therefore, we embark on a philosophical voyage through the seas of language and authorship to unravel this intricate puzzle. Using a computational approach, we discover that the diminishing performance in text classification models, with each successive paraphrasing iteration, is closely associated with the extent of deviation from the original author’s style, thus provoking a reconsideration of the current notion of authorship.</abstract>
<identifier type="citekey">tripto-etal-2024-ship</identifier>
<identifier type="doi">10.18653/v1/2024.acl-long.357</identifier>
<location>
<url>https://aclanthology.org/2024.acl-long.357</url>
</location>
<part>
<date>2024-08</date>
<extent unit="page">
<start>6608</start>
<end>6625</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Ship of Theseus: Curious Cases of Paraphrasing in LLM-Generated Texts
%A Tripto, Nafis Irtiza
%A Venkatraman, Saranya
%A Macko, Dominik
%A Moro, Robert
%A Srba, Ivan
%A Uchendu, Adaku
%A Le, Thai
%A Lee, Dongwon
%Y Ku, Lun-Wei
%Y Martins, Andre
%Y Srikumar, Vivek
%S Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)
%D 2024
%8 August
%I Association for Computational Linguistics
%C Bangkok, Thailand
%F tripto-etal-2024-ship
%X In the realm of text manipulation and linguistic transformation, the question of authorship has been a subject of fascination and philosophical inquiry. Much like the Ship of Theseus paradox, which ponders whether a ship remains the same when each of its original planks is replaced, our research delves into an intriguing question: Does a text retain its original authorship when it undergoes numerous paraphrasing iterations? Specifically, since Large Language Models (LLMs) have demonstrated remarkable proficiency in both the generation of original content and the modification of human-authored texts, a pivotal question emerges concerning the determination of authorship in instances where LLMs or similar paraphrasing tools are employed to rephrase the text–i.e., whether authorship should be attributed to the original human author or the AI-powered tool. Therefore, we embark on a philosophical voyage through the seas of language and authorship to unravel this intricate puzzle. Using a computational approach, we discover that the diminishing performance in text classification models, with each successive paraphrasing iteration, is closely associated with the extent of deviation from the original author’s style, thus provoking a reconsideration of the current notion of authorship.
%R 10.18653/v1/2024.acl-long.357
%U https://aclanthology.org/2024.acl-long.357
%U https://doi.org/10.18653/v1/2024.acl-long.357
%P 6608-6625
Markdown (Informal)
[A Ship of Theseus: Curious Cases of Paraphrasing in LLM-Generated Texts](https://aclanthology.org/2024.acl-long.357) (Tripto et al., ACL 2024)
ACL
- Nafis Irtiza Tripto, Saranya Venkatraman, Dominik Macko, Robert Moro, Ivan Srba, Adaku Uchendu, Thai Le, and Dongwon Lee. 2024. A Ship of Theseus: Curious Cases of Paraphrasing in LLM-Generated Texts. In Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pages 6608–6625, Bangkok, Thailand. Association for Computational Linguistics.