@inproceedings{sah-abulaish-2022-deepada,
title = "{D}eep{ADA}:An Attention-Based Deep Learning Framework for Augmenting Imbalanced Textual Datasets",
author = "Sah, Amit and
Abulaish, Muhammad",
editor = "Akhtar, Md. Shad and
Chakraborty, Tanmoy",
booktitle = "Proceedings of the 19th International Conference on Natural Language Processing (ICON)",
month = dec,
year = "2022",
address = "New Delhi, India",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.icon-main.38",
pages = "318--327",
abstract = "In this paper, we present an attention-based deep learning framework, DeepADA, which uses data augmentation to address the class imbalance problem in textual datasets. The proposed framework carries out the following functions:(i) using MPNET-based embeddings to extract keywords out of documents from the minority class, (ii) making use of a CNN-BiLSTM architecture with parallel attention to learn the important contextual words associated with the minority class documents{'} keywords and provide them with word-level characteristics derived from their statistical and semantic features, (iii) using MPNET, replacing the key contextual terms derived from the oversampled documents that match to a keyword with the contextual term that best fits the context, and finally (iv) oversampling the minority class dataset to produce a balanced dataset. Using a 2-layer stacked BiLSTM classifier, we assess the efficacy of the proposed framework using the original and oversampled versions of three Amazon{'}s reviews datasets. We contrast the proposed data augmentation approach with two state-of-the-art text data augmentation methods. The experimental results reveal that our method produces an oversampled dataset that is more useful and helps the classifier perform better than the other two state-of-the-art methods. Nevertheless, we discover that the oversampled datasets outperformed their original ones by a wide margin.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="sah-abulaish-2022-deepada">
<titleInfo>
<title>DeepADA:An Attention-Based Deep Learning Framework for Augmenting Imbalanced Textual Datasets</title>
</titleInfo>
<name type="personal">
<namePart type="given">Amit</namePart>
<namePart type="family">Sah</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Muhammad</namePart>
<namePart type="family">Abulaish</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 19th International Conference on Natural Language Processing (ICON)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Md.</namePart>
<namePart type="given">Shad</namePart>
<namePart type="family">Akhtar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tanmoy</namePart>
<namePart type="family">Chakraborty</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">New Delhi, India</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper, we present an attention-based deep learning framework, DeepADA, which uses data augmentation to address the class imbalance problem in textual datasets. The proposed framework carries out the following functions:(i) using MPNET-based embeddings to extract keywords out of documents from the minority class, (ii) making use of a CNN-BiLSTM architecture with parallel attention to learn the important contextual words associated with the minority class documents’ keywords and provide them with word-level characteristics derived from their statistical and semantic features, (iii) using MPNET, replacing the key contextual terms derived from the oversampled documents that match to a keyword with the contextual term that best fits the context, and finally (iv) oversampling the minority class dataset to produce a balanced dataset. Using a 2-layer stacked BiLSTM classifier, we assess the efficacy of the proposed framework using the original and oversampled versions of three Amazon’s reviews datasets. We contrast the proposed data augmentation approach with two state-of-the-art text data augmentation methods. The experimental results reveal that our method produces an oversampled dataset that is more useful and helps the classifier perform better than the other two state-of-the-art methods. Nevertheless, we discover that the oversampled datasets outperformed their original ones by a wide margin.</abstract>
<identifier type="citekey">sah-abulaish-2022-deepada</identifier>
<location>
<url>https://aclanthology.org/2022.icon-main.38</url>
</location>
<part>
<date>2022-12</date>
<extent unit="page">
<start>318</start>
<end>327</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T DeepADA:An Attention-Based Deep Learning Framework for Augmenting Imbalanced Textual Datasets
%A Sah, Amit
%A Abulaish, Muhammad
%Y Akhtar, Md. Shad
%Y Chakraborty, Tanmoy
%S Proceedings of the 19th International Conference on Natural Language Processing (ICON)
%D 2022
%8 December
%I Association for Computational Linguistics
%C New Delhi, India
%F sah-abulaish-2022-deepada
%X In this paper, we present an attention-based deep learning framework, DeepADA, which uses data augmentation to address the class imbalance problem in textual datasets. The proposed framework carries out the following functions:(i) using MPNET-based embeddings to extract keywords out of documents from the minority class, (ii) making use of a CNN-BiLSTM architecture with parallel attention to learn the important contextual words associated with the minority class documents’ keywords and provide them with word-level characteristics derived from their statistical and semantic features, (iii) using MPNET, replacing the key contextual terms derived from the oversampled documents that match to a keyword with the contextual term that best fits the context, and finally (iv) oversampling the minority class dataset to produce a balanced dataset. Using a 2-layer stacked BiLSTM classifier, we assess the efficacy of the proposed framework using the original and oversampled versions of three Amazon’s reviews datasets. We contrast the proposed data augmentation approach with two state-of-the-art text data augmentation methods. The experimental results reveal that our method produces an oversampled dataset that is more useful and helps the classifier perform better than the other two state-of-the-art methods. Nevertheless, we discover that the oversampled datasets outperformed their original ones by a wide margin.
%U https://aclanthology.org/2022.icon-main.38
%P 318-327
Markdown (Informal)
[DeepADA:An Attention-Based Deep Learning Framework for Augmenting Imbalanced Textual Datasets](https://aclanthology.org/2022.icon-main.38) (Sah & Abulaish, ICON 2022)
ACL