@inproceedings{kwan-etal-2024-jotr,
title = "{J}o{TR}: A Joint Transformer and Reinforcement Learning Framework for Dialogue Policy Learning",
author = "Kwan, Wai-Chung and
Wang, Huimin and
Wang, Hongru and
Wang, Zezhong and
Liang, Bin and
Wu, Xian and
Zheng, Yefeng and
Wong, Kam-Fai",
editor = "Calzolari, Nicoletta and
Kan, Min-Yen and
Hoste, Veronique and
Lenci, Alessandro and
Sakti, Sakriani and
Xue, Nianwen",
booktitle = "Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://aclanthology.org/2024.lrec-main.837",
pages = "9578--9588",
abstract = "Dialogue policy learning (DPL) aims to determine an abstract representation (also known as action) to guide what the response should be. Typically, DPL is cast as a sequential decision problem across a series of predefined action candidates. However, such static and narrow actions can limit response diversity and impede the dialogue agent{'}s adaptability to new scenarios and edge cases. To overcome these challenges, we introduce a novel \textbf{Jo}int \textbf{T}ransformer \textbf{R}einforcement Learning framework, coined as \textbf{JoTR}, where a text-to-text Transformer-based model is employed to directly generate dialogue actions. More concretely, JoTR formulates a token-grained policy, facilitating more dynamic and adaptable dialogue action generation without the need for predefined action candidates. This method not only enhances the diversity of responses but also significantly improves the system{'}s capability to manage unfamiliar scenarios. Furthermore, JoTR utilizes Reinforcement Learning with a reward-shaping mechanism to efficiently fine-tune the token-grained policy. This allows the model to evolve through interactions, thereby enhancing its performance over time. Our extensive evaluation demonstrates that JoTR surpasses previous state-of-the-art models, showing improvements of 9{\%} and 13{\%} in success rate, and 34{\%} and 37{\%} in the diversity of dialogue actions across two benchmark dialogue modeling tasks respectively. These results have been validated by both user simulators and human evaluators. Code and data are available at ://github.com/KwanWaiChung/JoTR.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kwan-etal-2024-jotr">
<titleInfo>
<title>JoTR: A Joint Transformer and Reinforcement Learning Framework for Dialogue Policy Learning</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wai-Chung</namePart>
<namePart type="family">Kwan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Huimin</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hongru</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zezhong</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bin</namePart>
<namePart type="family">Liang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xian</namePart>
<namePart type="family">Wu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yefeng</namePart>
<namePart type="family">Zheng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kam-Fai</namePart>
<namePart type="family">Wong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Min-Yen</namePart>
<namePart type="family">Kan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Veronique</namePart>
<namePart type="family">Hoste</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alessandro</namePart>
<namePart type="family">Lenci</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sakriani</namePart>
<namePart type="family">Sakti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nianwen</namePart>
<namePart type="family">Xue</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>ELRA and ICCL</publisher>
<place>
<placeTerm type="text">Torino, Italia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Dialogue policy learning (DPL) aims to determine an abstract representation (also known as action) to guide what the response should be. Typically, DPL is cast as a sequential decision problem across a series of predefined action candidates. However, such static and narrow actions can limit response diversity and impede the dialogue agent’s adaptability to new scenarios and edge cases. To overcome these challenges, we introduce a novel Joint Transformer Reinforcement Learning framework, coined as JoTR, where a text-to-text Transformer-based model is employed to directly generate dialogue actions. More concretely, JoTR formulates a token-grained policy, facilitating more dynamic and adaptable dialogue action generation without the need for predefined action candidates. This method not only enhances the diversity of responses but also significantly improves the system’s capability to manage unfamiliar scenarios. Furthermore, JoTR utilizes Reinforcement Learning with a reward-shaping mechanism to efficiently fine-tune the token-grained policy. This allows the model to evolve through interactions, thereby enhancing its performance over time. Our extensive evaluation demonstrates that JoTR surpasses previous state-of-the-art models, showing improvements of 9% and 13% in success rate, and 34% and 37% in the diversity of dialogue actions across two benchmark dialogue modeling tasks respectively. These results have been validated by both user simulators and human evaluators. Code and data are available at ://github.com/KwanWaiChung/JoTR.</abstract>
<identifier type="citekey">kwan-etal-2024-jotr</identifier>
<location>
<url>https://aclanthology.org/2024.lrec-main.837</url>
</location>
<part>
<date>2024-05</date>
<extent unit="page">
<start>9578</start>
<end>9588</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T JoTR: A Joint Transformer and Reinforcement Learning Framework for Dialogue Policy Learning
%A Kwan, Wai-Chung
%A Wang, Huimin
%A Wang, Hongru
%A Wang, Zezhong
%A Liang, Bin
%A Wu, Xian
%A Zheng, Yefeng
%A Wong, Kam-Fai
%Y Calzolari, Nicoletta
%Y Kan, Min-Yen
%Y Hoste, Veronique
%Y Lenci, Alessandro
%Y Sakti, Sakriani
%Y Xue, Nianwen
%S Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)
%D 2024
%8 May
%I ELRA and ICCL
%C Torino, Italia
%F kwan-etal-2024-jotr
%X Dialogue policy learning (DPL) aims to determine an abstract representation (also known as action) to guide what the response should be. Typically, DPL is cast as a sequential decision problem across a series of predefined action candidates. However, such static and narrow actions can limit response diversity and impede the dialogue agent’s adaptability to new scenarios and edge cases. To overcome these challenges, we introduce a novel Joint Transformer Reinforcement Learning framework, coined as JoTR, where a text-to-text Transformer-based model is employed to directly generate dialogue actions. More concretely, JoTR formulates a token-grained policy, facilitating more dynamic and adaptable dialogue action generation without the need for predefined action candidates. This method not only enhances the diversity of responses but also significantly improves the system’s capability to manage unfamiliar scenarios. Furthermore, JoTR utilizes Reinforcement Learning with a reward-shaping mechanism to efficiently fine-tune the token-grained policy. This allows the model to evolve through interactions, thereby enhancing its performance over time. Our extensive evaluation demonstrates that JoTR surpasses previous state-of-the-art models, showing improvements of 9% and 13% in success rate, and 34% and 37% in the diversity of dialogue actions across two benchmark dialogue modeling tasks respectively. These results have been validated by both user simulators and human evaluators. Code and data are available at ://github.com/KwanWaiChung/JoTR.
%U https://aclanthology.org/2024.lrec-main.837
%P 9578-9588
Markdown (Informal)
[JoTR: A Joint Transformer and Reinforcement Learning Framework for Dialogue Policy Learning](https://aclanthology.org/2024.lrec-main.837) (Kwan et al., LREC-COLING 2024)
ACL
- Wai-Chung Kwan, Huimin Wang, Hongru Wang, Zezhong Wang, Bin Liang, Xian Wu, Yefeng Zheng, and Kam-Fai Wong. 2024. JoTR: A Joint Transformer and Reinforcement Learning Framework for Dialogue Policy Learning. In Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024), pages 9578–9588, Torino, Italia. ELRA and ICCL.