“Lemmatisierung” Code-Antworten

Lemmatisierung

nlp = spacy.load("en_core_web_sm")
def text_cleaning(text, stop_words = stopwords.words('english'),allow_postags = set(['NOUN', 'VERB', 'ADJ', 'ADV', 'PROPN'])):
    text = re.sub("[^A-Za-z" "]+"," ",text).lower()
    text = re.sub("[0-9" "]+"," ",text)
    words = []
    for token in nlp(text):
        if token.text not in stop_words and token.pos_ in allow_postags:
            words.append(token.lemma_)
    return' '.join(words)

Lazy Leopard