@inproceedings{tu-etal-2018-pay,
title = "Pay-Per-Request Deployment of Neural Network Models Using Serverless Architectures",
author = "Tu, Zhucheng and
Li, Mengping and
Lin, Jimmy",
editor = "Liu, Yang and
Paek, Tim and
Patwardhan, Manasi",
booktitle = "Proceedings of the 2018 Conference of the North {A}merican Chapter of the Association for Computational Linguistics: Demonstrations",
month = jun,
year = "2018",
address = "New Orleans, Louisiana",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/N18-5002",
doi = "10.18653/v1/N18-5002",
pages = "6--10",
abstract = "We demonstrate the serverless deployment of neural networks for model inferencing in NLP applications using Amazon{'}s Lambda service for feedforward evaluation and DynamoDB for storing word embeddings. Our architecture realizes a pay-per-request pricing model, requiring zero ongoing costs for maintaining server instances. All virtual machine management is handled behind the scenes by the cloud provider without any direct developer intervention. We describe a number of techniques that allow efficient use of serverless resources, and evaluations confirm that our design is both scalable and inexpensive.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="tu-etal-2018-pay">
<titleInfo>
<title>Pay-Per-Request Deployment of Neural Network Models Using Serverless Architectures</title>
</titleInfo>
<name type="personal">
<namePart type="given">Zhucheng</namePart>
<namePart type="family">Tu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mengping</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jimmy</namePart>
<namePart type="family">Lin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2018-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2018 Conference of the North American Chapter of the Association for Computational Linguistics: Demonstrations</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yang</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tim</namePart>
<namePart type="family">Paek</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Manasi</namePart>
<namePart type="family">Patwardhan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">New Orleans, Louisiana</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We demonstrate the serverless deployment of neural networks for model inferencing in NLP applications using Amazon’s Lambda service for feedforward evaluation and DynamoDB for storing word embeddings. Our architecture realizes a pay-per-request pricing model, requiring zero ongoing costs for maintaining server instances. All virtual machine management is handled behind the scenes by the cloud provider without any direct developer intervention. We describe a number of techniques that allow efficient use of serverless resources, and evaluations confirm that our design is both scalable and inexpensive.</abstract>
<identifier type="citekey">tu-etal-2018-pay</identifier>
<identifier type="doi">10.18653/v1/N18-5002</identifier>
<location>
<url>https://aclanthology.org/N18-5002</url>
</location>
<part>
<date>2018-06</date>
<extent unit="page">
<start>6</start>
<end>10</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Pay-Per-Request Deployment of Neural Network Models Using Serverless Architectures
%A Tu, Zhucheng
%A Li, Mengping
%A Lin, Jimmy
%Y Liu, Yang
%Y Paek, Tim
%Y Patwardhan, Manasi
%S Proceedings of the 2018 Conference of the North American Chapter of the Association for Computational Linguistics: Demonstrations
%D 2018
%8 June
%I Association for Computational Linguistics
%C New Orleans, Louisiana
%F tu-etal-2018-pay
%X We demonstrate the serverless deployment of neural networks for model inferencing in NLP applications using Amazon’s Lambda service for feedforward evaluation and DynamoDB for storing word embeddings. Our architecture realizes a pay-per-request pricing model, requiring zero ongoing costs for maintaining server instances. All virtual machine management is handled behind the scenes by the cloud provider without any direct developer intervention. We describe a number of techniques that allow efficient use of serverless resources, and evaluations confirm that our design is both scalable and inexpensive.
%R 10.18653/v1/N18-5002
%U https://aclanthology.org/N18-5002
%U https://doi.org/10.18653/v1/N18-5002
%P 6-10
Markdown (Informal)
[Pay-Per-Request Deployment of Neural Network Models Using Serverless Architectures](https://aclanthology.org/N18-5002) (Tu et al., NAACL 2018)
ACL