@inproceedings{wang-etal-2022-multimodal,
title = "Multimodal Sarcasm Target Identification in Tweets",
author = "Wang, Jiquan and
Sun, Lin and
Liu, Yi and
Shao, Meizhi and
Zheng, Zengwei",
editor = "Muresan, Smaranda and
Nakov, Preslav and
Villavicencio, Aline",
booktitle = "Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
month = may,
year = "2022",
address = "Dublin, Ireland",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.acl-long.562",
doi = "10.18653/v1/2022.acl-long.562",
pages = "8164--8175",
abstract = "Sarcasm is important to sentiment analysis on social media. Sarcasm Target Identification (STI) deserves further study to understand sarcasm in depth. However, text lacking context or missing sarcasm target makes target identification very difficult. In this paper, we introduce multimodality to STI and present Multimodal Sarcasm Target Identification (MSTI) task. We propose a novel multi-scale cross-modality model that can simultaneously perform textual target labeling and visual target detection. In the model, we extract multi-scale visual features to enrich spatial information for different sized visual sarcasm targets. We design a set of convolution networks to unify multi-scale visual features with textual features for cross-modal attention learning, and correspondingly a set of transposed convolution networks to restore multi-scale visual information. The results show that visual clues can improve the performance of TSTI by a large margin, and VSTI achieves good accuracy.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="wang-etal-2022-multimodal">
<titleInfo>
<title>Multimodal Sarcasm Target Identification in Tweets</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jiquan</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lin</namePart>
<namePart type="family">Sun</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yi</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Meizhi</namePart>
<namePart type="family">Shao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zengwei</namePart>
<namePart type="family">Zheng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Smaranda</namePart>
<namePart type="family">Muresan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Preslav</namePart>
<namePart type="family">Nakov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aline</namePart>
<namePart type="family">Villavicencio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Dublin, Ireland</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Sarcasm is important to sentiment analysis on social media. Sarcasm Target Identification (STI) deserves further study to understand sarcasm in depth. However, text lacking context or missing sarcasm target makes target identification very difficult. In this paper, we introduce multimodality to STI and present Multimodal Sarcasm Target Identification (MSTI) task. We propose a novel multi-scale cross-modality model that can simultaneously perform textual target labeling and visual target detection. In the model, we extract multi-scale visual features to enrich spatial information for different sized visual sarcasm targets. We design a set of convolution networks to unify multi-scale visual features with textual features for cross-modal attention learning, and correspondingly a set of transposed convolution networks to restore multi-scale visual information. The results show that visual clues can improve the performance of TSTI by a large margin, and VSTI achieves good accuracy.</abstract>
<identifier type="citekey">wang-etal-2022-multimodal</identifier>
<identifier type="doi">10.18653/v1/2022.acl-long.562</identifier>
<location>
<url>https://aclanthology.org/2022.acl-long.562</url>
</location>
<part>
<date>2022-05</date>
<extent unit="page">
<start>8164</start>
<end>8175</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Multimodal Sarcasm Target Identification in Tweets
%A Wang, Jiquan
%A Sun, Lin
%A Liu, Yi
%A Shao, Meizhi
%A Zheng, Zengwei
%Y Muresan, Smaranda
%Y Nakov, Preslav
%Y Villavicencio, Aline
%S Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)
%D 2022
%8 May
%I Association for Computational Linguistics
%C Dublin, Ireland
%F wang-etal-2022-multimodal
%X Sarcasm is important to sentiment analysis on social media. Sarcasm Target Identification (STI) deserves further study to understand sarcasm in depth. However, text lacking context or missing sarcasm target makes target identification very difficult. In this paper, we introduce multimodality to STI and present Multimodal Sarcasm Target Identification (MSTI) task. We propose a novel multi-scale cross-modality model that can simultaneously perform textual target labeling and visual target detection. In the model, we extract multi-scale visual features to enrich spatial information for different sized visual sarcasm targets. We design a set of convolution networks to unify multi-scale visual features with textual features for cross-modal attention learning, and correspondingly a set of transposed convolution networks to restore multi-scale visual information. The results show that visual clues can improve the performance of TSTI by a large margin, and VSTI achieves good accuracy.
%R 10.18653/v1/2022.acl-long.562
%U https://aclanthology.org/2022.acl-long.562
%U https://doi.org/10.18653/v1/2022.acl-long.562
%P 8164-8175
Markdown (Informal)
[Multimodal Sarcasm Target Identification in Tweets](https://aclanthology.org/2022.acl-long.562) (Wang et al., ACL 2022)
ACL
- Jiquan Wang, Lin Sun, Yi Liu, Meizhi Shao, and Zengwei Zheng. 2022. Multimodal Sarcasm Target Identification in Tweets. In Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pages 8164–8175, Dublin, Ireland. Association for Computational Linguistics.