@inproceedings{drissi-etal-2019-harvey,
title = "Harvey Mudd College at {S}em{E}val-2019 Task 4: The Clint Buchanan Hyperpartisan News Detector",
author = "Drissi, Mehdi and
Sandoval Segura, Pedro and
Ojha, Vivaswat and
Medero, Julie",
editor = "May, Jonathan and
Shutova, Ekaterina and
Herbelot, Aurelie and
Zhu, Xiaodan and
Apidianaki, Marianna and
Mohammad, Saif M.",
booktitle = "Proceedings of the 13th International Workshop on Semantic Evaluation",
month = jun,
year = "2019",
address = "Minneapolis, Minnesota, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/S19-2165",
doi = "10.18653/v1/S19-2165",
pages = "962--966",
abstract = "We investigate the recently developed Bidi- rectional Encoder Representations from Transformers (BERT) model (Devlin et al. 2018) for the hyperpartisan news detection task. Using a subset of hand-labeled articles from SemEval as a validation set, we test the performance of different parameters for BERT models. We find that accuracy from two different BERT models using different proportions of the articles is consistently high, with our best-performing model on the validation set achieving 85{\%} accuracy and the best-performing model on the test set achieving 77{\%}. We further determined that our model exhibits strong consistency, labeling independent slices of the same article identically. Finally, we find that randomizing the order of word pieces dramatically reduces validation accuracy (to approximately 60{\%}), but that shuffling groups of four or more word pieces maintains an accuracy of about 80{\%}, indicating the model mainly gains value from local context.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="drissi-etal-2019-harvey">
<titleInfo>
<title>Harvey Mudd College at SemEval-2019 Task 4: The Clint Buchanan Hyperpartisan News Detector</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mehdi</namePart>
<namePart type="family">Drissi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pedro</namePart>
<namePart type="family">Sandoval Segura</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vivaswat</namePart>
<namePart type="family">Ojha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Julie</namePart>
<namePart type="family">Medero</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 13th International Workshop on Semantic Evaluation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jonathan</namePart>
<namePart type="family">May</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Shutova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aurelie</namePart>
<namePart type="family">Herbelot</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiaodan</namePart>
<namePart type="family">Zhu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marianna</namePart>
<namePart type="family">Apidianaki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Saif</namePart>
<namePart type="given">M</namePart>
<namePart type="family">Mohammad</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Minneapolis, Minnesota, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We investigate the recently developed Bidi- rectional Encoder Representations from Transformers (BERT) model (Devlin et al. 2018) for the hyperpartisan news detection task. Using a subset of hand-labeled articles from SemEval as a validation set, we test the performance of different parameters for BERT models. We find that accuracy from two different BERT models using different proportions of the articles is consistently high, with our best-performing model on the validation set achieving 85% accuracy and the best-performing model on the test set achieving 77%. We further determined that our model exhibits strong consistency, labeling independent slices of the same article identically. Finally, we find that randomizing the order of word pieces dramatically reduces validation accuracy (to approximately 60%), but that shuffling groups of four or more word pieces maintains an accuracy of about 80%, indicating the model mainly gains value from local context.</abstract>
<identifier type="citekey">drissi-etal-2019-harvey</identifier>
<identifier type="doi">10.18653/v1/S19-2165</identifier>
<location>
<url>https://aclanthology.org/S19-2165</url>
</location>
<part>
<date>2019-06</date>
<extent unit="page">
<start>962</start>
<end>966</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Harvey Mudd College at SemEval-2019 Task 4: The Clint Buchanan Hyperpartisan News Detector
%A Drissi, Mehdi
%A Sandoval Segura, Pedro
%A Ojha, Vivaswat
%A Medero, Julie
%Y May, Jonathan
%Y Shutova, Ekaterina
%Y Herbelot, Aurelie
%Y Zhu, Xiaodan
%Y Apidianaki, Marianna
%Y Mohammad, Saif M.
%S Proceedings of the 13th International Workshop on Semantic Evaluation
%D 2019
%8 June
%I Association for Computational Linguistics
%C Minneapolis, Minnesota, USA
%F drissi-etal-2019-harvey
%X We investigate the recently developed Bidi- rectional Encoder Representations from Transformers (BERT) model (Devlin et al. 2018) for the hyperpartisan news detection task. Using a subset of hand-labeled articles from SemEval as a validation set, we test the performance of different parameters for BERT models. We find that accuracy from two different BERT models using different proportions of the articles is consistently high, with our best-performing model on the validation set achieving 85% accuracy and the best-performing model on the test set achieving 77%. We further determined that our model exhibits strong consistency, labeling independent slices of the same article identically. Finally, we find that randomizing the order of word pieces dramatically reduces validation accuracy (to approximately 60%), but that shuffling groups of four or more word pieces maintains an accuracy of about 80%, indicating the model mainly gains value from local context.
%R 10.18653/v1/S19-2165
%U https://aclanthology.org/S19-2165
%U https://doi.org/10.18653/v1/S19-2165
%P 962-966
Markdown (Informal)
[Harvey Mudd College at SemEval-2019 Task 4: The Clint Buchanan Hyperpartisan News Detector](https://aclanthology.org/S19-2165) (Drissi et al., SemEval 2019)
ACL