@inproceedings{madhyastha-jain-2019-model,
title = "On Model Stability as a Function of Random Seed",
author = "Madhyastha, Pranava and
Jain, Rishabh",
editor = "Bansal, Mohit and
Villavicencio, Aline",
booktitle = "Proceedings of the 23rd Conference on Computational Natural Language Learning (CoNLL)",
month = nov,
year = "2019",
address = "Hong Kong, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/K19-1087",
doi = "10.18653/v1/K19-1087",
pages = "929--939",
abstract = "In this paper, we focus on quantifying model stability as a function of random seed by investigating the effects of the induced randomness on model performance and the robustness of the model in general. We specifically perform a controlled study on the effect of random seeds on the behaviour of attention, gradient-based and surrogate model based (LIME) interpretations. Our analysis suggests that random seeds can adversely affect the consistency of models resulting in counterfactual interpretations. We propose a technique called Aggressive Stochastic Weight Averaging (ASWA) and an extension called Norm-filtered Aggressive Stochastic Weight Averaging (NASWA) which improves the stability of models over random seeds. With our ASWA and NASWA based optimization, we are able to improve the robustness of the original model, on average reducing the standard deviation of the model{'}s performance by 72{\%}.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="madhyastha-jain-2019-model">
<titleInfo>
<title>On Model Stability as a Function of Random Seed</title>
</titleInfo>
<name type="personal">
<namePart type="given">Pranava</namePart>
<namePart type="family">Madhyastha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rishabh</namePart>
<namePart type="family">Jain</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 23rd Conference on Computational Natural Language Learning (CoNLL)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mohit</namePart>
<namePart type="family">Bansal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aline</namePart>
<namePart type="family">Villavicencio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Hong Kong, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper, we focus on quantifying model stability as a function of random seed by investigating the effects of the induced randomness on model performance and the robustness of the model in general. We specifically perform a controlled study on the effect of random seeds on the behaviour of attention, gradient-based and surrogate model based (LIME) interpretations. Our analysis suggests that random seeds can adversely affect the consistency of models resulting in counterfactual interpretations. We propose a technique called Aggressive Stochastic Weight Averaging (ASWA) and an extension called Norm-filtered Aggressive Stochastic Weight Averaging (NASWA) which improves the stability of models over random seeds. With our ASWA and NASWA based optimization, we are able to improve the robustness of the original model, on average reducing the standard deviation of the model’s performance by 72%.</abstract>
<identifier type="citekey">madhyastha-jain-2019-model</identifier>
<identifier type="doi">10.18653/v1/K19-1087</identifier>
<location>
<url>https://aclanthology.org/K19-1087</url>
</location>
<part>
<date>2019-11</date>
<extent unit="page">
<start>929</start>
<end>939</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T On Model Stability as a Function of Random Seed
%A Madhyastha, Pranava
%A Jain, Rishabh
%Y Bansal, Mohit
%Y Villavicencio, Aline
%S Proceedings of the 23rd Conference on Computational Natural Language Learning (CoNLL)
%D 2019
%8 November
%I Association for Computational Linguistics
%C Hong Kong, China
%F madhyastha-jain-2019-model
%X In this paper, we focus on quantifying model stability as a function of random seed by investigating the effects of the induced randomness on model performance and the robustness of the model in general. We specifically perform a controlled study on the effect of random seeds on the behaviour of attention, gradient-based and surrogate model based (LIME) interpretations. Our analysis suggests that random seeds can adversely affect the consistency of models resulting in counterfactual interpretations. We propose a technique called Aggressive Stochastic Weight Averaging (ASWA) and an extension called Norm-filtered Aggressive Stochastic Weight Averaging (NASWA) which improves the stability of models over random seeds. With our ASWA and NASWA based optimization, we are able to improve the robustness of the original model, on average reducing the standard deviation of the model’s performance by 72%.
%R 10.18653/v1/K19-1087
%U https://aclanthology.org/K19-1087
%U https://doi.org/10.18653/v1/K19-1087
%P 929-939
Markdown (Informal)
[On Model Stability as a Function of Random Seed](https://aclanthology.org/K19-1087) (Madhyastha & Jain, CoNLL 2019)
ACL
- Pranava Madhyastha and Rishabh Jain. 2019. On Model Stability as a Function of Random Seed. In Proceedings of the 23rd Conference on Computational Natural Language Learning (CoNLL), pages 929–939, Hong Kong, China. Association for Computational Linguistics.