From be65623de6076f63198569af8e8c73198565e0e3 Mon Sep 17 00:00:00 2001 From: Ajda Date: Thu, 12 Dec 2024 12:10:05 +0100 Subject: [PATCH] Topic Modeling: POS tag tests --- orangecontrib/text/tests/test_topic_modeling.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/orangecontrib/text/tests/test_topic_modeling.py b/orangecontrib/text/tests/test_topic_modeling.py index 94cd906d2..16b06cff5 100644 --- a/orangecontrib/text/tests/test_topic_modeling.py +++ b/orangecontrib/text/tests/test_topic_modeling.py @@ -6,6 +6,7 @@ from orangecontrib.text.topics import LdaWrapper, HdpWrapper, LsiWrapper, NmfWrapper from orangecontrib.text.corpus import Corpus from orangecontrib.text import preprocess +from orangecontrib.text.tag import AveragedPerceptronTagger class BaseTests: @@ -82,6 +83,17 @@ def test_existing_attributes(self): self.assertEqual(self.model.doc_topic.shape[1], self.model.actual_topics) + def test_pos_tags(self): + corpus = Corpus.from_file('deerwester') + pp_list = [preprocess.WordPunctTokenizer(), + AveragedPerceptronTagger(), + preprocess.PosTagFilter("NN")] + for pp in pp_list: + corpus = pp(corpus) + self.model.fit_transform(corpus) + self.assertTrue(all("_NN" in word for word in + self.model.get_top_words_by_id(0, 10)[0])) + class LDATests(unittest.TestCase, BaseTests): def setUp(self):