@inproceedings{mieskes-schmunk-2019-ocr,
title = "{OCR} Quality and {NLP} Preprocessing",
author = "Mieskes, Margot and
Schmunk, Stefan",
editor = "Axelrod, Amittai and
Yang, Diyi and
Cunha, Rossana and
Shaikh, Samira and
Waseem, Zeerak",
booktitle = "Proceedings of the 2019 Workshop on Widening NLP",
month = aug,
year = "2019",
address = "Florence, Italy",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W19-3633",
pages = "102--105",
abstract = "We present initial experiments to evaluate the performance of tasks such as Part of Speech Tagging on data corrupted by Optical Character Recognition (OCR). Our results, based on English and German data, using artificial experiments as well as initial real OCRed data indicate that already a small drop in OCR quality considerably increases the error rates, which would have a significant impact on subsequent processing steps.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="mieskes-schmunk-2019-ocr">
<titleInfo>
<title>OCR Quality and NLP Preprocessing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Margot</namePart>
<namePart type="family">Mieskes</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stefan</namePart>
<namePart type="family">Schmunk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2019 Workshop on Widening NLP</title>
</titleInfo>
<name type="personal">
<namePart type="given">Amittai</namePart>
<namePart type="family">Axelrod</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Diyi</namePart>
<namePart type="family">Yang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rossana</namePart>
<namePart type="family">Cunha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Samira</namePart>
<namePart type="family">Shaikh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zeerak</namePart>
<namePart type="family">Waseem</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Florence, Italy</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We present initial experiments to evaluate the performance of tasks such as Part of Speech Tagging on data corrupted by Optical Character Recognition (OCR). Our results, based on English and German data, using artificial experiments as well as initial real OCRed data indicate that already a small drop in OCR quality considerably increases the error rates, which would have a significant impact on subsequent processing steps.</abstract>
<identifier type="citekey">mieskes-schmunk-2019-ocr</identifier>
<location>
<url>https://aclanthology.org/W19-3633</url>
</location>
<part>
<date>2019-08</date>
<extent unit="page">
<start>102</start>
<end>105</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T OCR Quality and NLP Preprocessing
%A Mieskes, Margot
%A Schmunk, Stefan
%Y Axelrod, Amittai
%Y Yang, Diyi
%Y Cunha, Rossana
%Y Shaikh, Samira
%Y Waseem, Zeerak
%S Proceedings of the 2019 Workshop on Widening NLP
%D 2019
%8 August
%I Association for Computational Linguistics
%C Florence, Italy
%F mieskes-schmunk-2019-ocr
%X We present initial experiments to evaluate the performance of tasks such as Part of Speech Tagging on data corrupted by Optical Character Recognition (OCR). Our results, based on English and German data, using artificial experiments as well as initial real OCRed data indicate that already a small drop in OCR quality considerably increases the error rates, which would have a significant impact on subsequent processing steps.
%U https://aclanthology.org/W19-3633
%P 102-105
Markdown (Informal)
[OCR Quality and NLP Preprocessing](https://aclanthology.org/W19-3633) (Mieskes & Schmunk, WiNLP 2019)
ACL
- Margot Mieskes and Stefan Schmunk. 2019. OCR Quality and NLP Preprocessing. In Proceedings of the 2019 Workshop on Widening NLP, pages 102–105, Florence, Italy. Association for Computational Linguistics.