Source code for nltk.stem.wordnet

# Natural Language Toolkit: WordNet stemmer interface
#
# Copyright (C) 2001-2024 NLTK Project
# Author: Steven Bird <stevenbird1@gmail.com>
#         Edward Loper <edloper@gmail.com>
#         Eric Kafe <kafe.eric@gmail.com>
# URL: <https://www.nltk.org/>
# For license information, see LICENSE.TXT


[docs] class WordNetLemmatizer: """ WordNet Lemmatizer Provides 3 lemmatizer modes: _morphy(), morphy() and lemmatize(). lemmatize() is a permissive wrapper around _morphy(). It returns the shortest lemma found in WordNet, or the input string unchanged if nothing is found. >>> from nltk.stem import WordNetLemmatizer as wnl >>> print(wnl().lemmatize('us', 'n')) u >>> print(wnl().lemmatize('Anythinggoeszxcv')) Anythinggoeszxcv """ def _morphy(self, form, pos, check_exceptions=True): """ _morphy() is WordNet's _morphy lemmatizer. It returns a list of all lemmas found in WordNet. >>> from nltk.stem import WordNetLemmatizer as wnl >>> print(wnl()._morphy('us', 'n')) ['us', 'u'] """ from nltk.corpus import wordnet as wn return wn._morphy(form, pos, check_exceptions)
[docs] def morphy(self, form, pos=None, check_exceptions=True): """ morphy() is a restrictive wrapper around _morphy(). It returns the first lemma found in WordNet, or None if no lemma is found. >>> from nltk.stem import WordNetLemmatizer as wnl >>> print(wnl().morphy('us', 'n')) us >>> print(wnl().morphy('catss')) None """ from nltk.corpus import wordnet as wn return wn.morphy(form, pos, check_exceptions)
[docs] def lemmatize(self, word: str, pos: str = "n") -> str: """Lemmatize `word` by picking the shortest of the possible lemmas, using the wordnet corpus reader's built-in _morphy function. Returns the input word unchanged if it cannot be found in WordNet. >>> from nltk.stem import WordNetLemmatizer as wnl >>> print(wnl().lemmatize('dogs')) dog >>> print(wnl().lemmatize('churches')) church >>> print(wnl().lemmatize('aardwolves')) aardwolf >>> print(wnl().lemmatize('abaci')) abacus >>> print(wnl().lemmatize('hardrock')) hardrock :param word: The input word to lemmatize. :type word: str :param pos: The Part Of Speech tag. Valid options are `"n"` for nouns, `"v"` for verbs, `"a"` for adjectives, `"r"` for adverbs and `"s"` for satellite adjectives. :type pos: str :return: The shortest lemma of `word`, for the given `pos`. """ lemmas = self._morphy(word, pos) return min(lemmas, key=len) if lemmas else word
def __repr__(self): return "<WordNetLemmatizer>"