Skip to content

Commit

Permalink
Topic Modeling: POS tag tests
Browse files Browse the repository at this point in the history
  • Loading branch information
ajdapretnar committed Dec 12, 2024
1 parent 13f71c6 commit be65623
Showing 1 changed file with 12 additions and 0 deletions.
12 changes: 12 additions & 0 deletions orangecontrib/text/tests/test_topic_modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from orangecontrib.text.topics import LdaWrapper, HdpWrapper, LsiWrapper, NmfWrapper
from orangecontrib.text.corpus import Corpus
from orangecontrib.text import preprocess
from orangecontrib.text.tag import AveragedPerceptronTagger


class BaseTests:
Expand Down Expand Up @@ -82,6 +83,17 @@ def test_existing_attributes(self):
self.assertEqual(self.model.doc_topic.shape[1],
self.model.actual_topics)

def test_pos_tags(self):
corpus = Corpus.from_file('deerwester')
pp_list = [preprocess.WordPunctTokenizer(),
AveragedPerceptronTagger(),
preprocess.PosTagFilter("NN")]
for pp in pp_list:
corpus = pp(corpus)
self.model.fit_transform(corpus)
self.assertTrue(all("_NN" in word for word in
self.model.get_top_words_by_id(0, 10)[0]))


class LDATests(unittest.TestCase, BaseTests):
def setUp(self):
Expand Down

0 comments on commit be65623

Please sign in to comment.