@inproceedings{gulcehre-etal-2017-plan,
title = "Plan, Attend, Generate: Character-Level Neural Machine Translation with Planning",
author = "Gulcehre, Caglar and
Dutil, Francis and
Trischler, Adam and
Bengio, Yoshua",
editor = "Blunsom, Phil and
Bordes, Antoine and
Cho, Kyunghyun and
Cohen, Shay and
Dyer, Chris and
Grefenstette, Edward and
Hermann, Karl Moritz and
Rimell, Laura and
Weston, Jason and
Yih, Scott",
booktitle = "Proceedings of the 2nd Workshop on Representation Learning for {NLP}",
month = aug,
year = "2017",
address = "Vancouver, Canada",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W17-2627",
doi = "10.18653/v1/W17-2627",
pages = "228--234",
abstract = "We investigate the integration of a planning mechanism into an encoder-decoder architecture with attention. We develop a model that can plan ahead when it computes alignments between the source and target sequences not only for a single time-step but for the next k time-steps as well by constructing a matrix of proposed future alignments and a commitment vector that governs whether to follow or recompute the plan. This mechanism is inspired by strategic attentive reader and writer (STRAW) model, a recent neural architecture for planning with hierarchical reinforcement learning that can also learn higher level temporal abstractions. Our proposed model is end-to-end trainable with differentiable operations. We show that our model outperforms strong baselines on character-level translation task from WMT{'}15 with fewer parameters and computes alignments that are qualitatively intuitive.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="gulcehre-etal-2017-plan">
<titleInfo>
<title>Plan, Attend, Generate: Character-Level Neural Machine Translation with Planning</title>
</titleInfo>
<name type="personal">
<namePart type="given">Caglar</namePart>
<namePart type="family">Gulcehre</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Francis</namePart>
<namePart type="family">Dutil</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Adam</namePart>
<namePart type="family">Trischler</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yoshua</namePart>
<namePart type="family">Bengio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2017-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2nd Workshop on Representation Learning for NLP</title>
</titleInfo>
<name type="personal">
<namePart type="given">Phil</namePart>
<namePart type="family">Blunsom</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Antoine</namePart>
<namePart type="family">Bordes</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kyunghyun</namePart>
<namePart type="family">Cho</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shay</namePart>
<namePart type="family">Cohen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chris</namePart>
<namePart type="family">Dyer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Edward</namePart>
<namePart type="family">Grefenstette</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Karl</namePart>
<namePart type="given">Moritz</namePart>
<namePart type="family">Hermann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Laura</namePart>
<namePart type="family">Rimell</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jason</namePart>
<namePart type="family">Weston</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Scott</namePart>
<namePart type="family">Yih</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vancouver, Canada</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We investigate the integration of a planning mechanism into an encoder-decoder architecture with attention. We develop a model that can plan ahead when it computes alignments between the source and target sequences not only for a single time-step but for the next k time-steps as well by constructing a matrix of proposed future alignments and a commitment vector that governs whether to follow or recompute the plan. This mechanism is inspired by strategic attentive reader and writer (STRAW) model, a recent neural architecture for planning with hierarchical reinforcement learning that can also learn higher level temporal abstractions. Our proposed model is end-to-end trainable with differentiable operations. We show that our model outperforms strong baselines on character-level translation task from WMT’15 with fewer parameters and computes alignments that are qualitatively intuitive.</abstract>
<identifier type="citekey">gulcehre-etal-2017-plan</identifier>
<identifier type="doi">10.18653/v1/W17-2627</identifier>
<location>
<url>https://aclanthology.org/W17-2627</url>
</location>
<part>
<date>2017-08</date>
<extent unit="page">
<start>228</start>
<end>234</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Plan, Attend, Generate: Character-Level Neural Machine Translation with Planning
%A Gulcehre, Caglar
%A Dutil, Francis
%A Trischler, Adam
%A Bengio, Yoshua
%Y Blunsom, Phil
%Y Bordes, Antoine
%Y Cho, Kyunghyun
%Y Cohen, Shay
%Y Dyer, Chris
%Y Grefenstette, Edward
%Y Hermann, Karl Moritz
%Y Rimell, Laura
%Y Weston, Jason
%Y Yih, Scott
%S Proceedings of the 2nd Workshop on Representation Learning for NLP
%D 2017
%8 August
%I Association for Computational Linguistics
%C Vancouver, Canada
%F gulcehre-etal-2017-plan
%X We investigate the integration of a planning mechanism into an encoder-decoder architecture with attention. We develop a model that can plan ahead when it computes alignments between the source and target sequences not only for a single time-step but for the next k time-steps as well by constructing a matrix of proposed future alignments and a commitment vector that governs whether to follow or recompute the plan. This mechanism is inspired by strategic attentive reader and writer (STRAW) model, a recent neural architecture for planning with hierarchical reinforcement learning that can also learn higher level temporal abstractions. Our proposed model is end-to-end trainable with differentiable operations. We show that our model outperforms strong baselines on character-level translation task from WMT’15 with fewer parameters and computes alignments that are qualitatively intuitive.
%R 10.18653/v1/W17-2627
%U https://aclanthology.org/W17-2627
%U https://doi.org/10.18653/v1/W17-2627
%P 228-234
Markdown (Informal)
[Plan, Attend, Generate: Character-Level Neural Machine Translation with Planning](https://aclanthology.org/W17-2627) (Gulcehre et al., RepL4NLP 2017)
ACL