Skip to content

Commit

Permalink
Merge pull request #1020 from PrimozGodec/topic-change-default
Browse files Browse the repository at this point in the history
[ENH] Topic modelling - Reorder methods by relevance
  • Loading branch information
ajdapretnar authored Nov 10, 2023
2 parents 985fe05 + 12e7a24 commit 64c4e25
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 8 deletions.
14 changes: 13 additions & 1 deletion orangecontrib/text/widgets/owtopicmodeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,13 +143,14 @@ class Outputs:
want_main_area = True

methods = [
(LsiWidget, 'lsi'),
(LdaWidget, 'lda'),
(LsiWidget, 'lsi'),
(HdpWidget, 'hdp'),
(NmfWidget, 'nmf')
]

# Settings
settings_version = 2
autocommit = settings.Setting(True)
method_index = settings.Setting(0)

Expand Down Expand Up @@ -266,6 +267,8 @@ def on_done(self, corpus):
if self.model.name == "Latent Dirichlet Allocation":
bound = self.model.model.log_perplexity(infer_ngrams_corpus(corpus))
self.perplexity = "{:.5f}".format(np.exp2(-bound))
else:
self.perplexity = "n/a"
# for small corpora it is slower to use more processes
# there is no good estimation when multiprocessing is helpful, but it is
# definitely not helpful for corpora smaller than 100
Expand Down Expand Up @@ -299,6 +302,15 @@ def send_topic_by_id(self, topic_id=None):
self.Outputs.selected_topic.send(
self.model.get_topics_table_by_id(topic_id))

@classmethod
def migrate_settings(cls, settings, version=0):
if version < 2 and "method_index" in settings:
# in version 2 we change the position of first and second method (lsi, lda)
# map changes that correct method from the workflow is loaded
change = {1: 0, 0: 1}
method_idx = settings["method_index"]
settings["method_index"] = change.get(method_idx, method_idx)


class TopicViewerTreeWidgetItem(QTreeWidgetItem):
def __init__(self, topic_id, words, weights, parent,
Expand Down
37 changes: 30 additions & 7 deletions orangecontrib/text/widgets/tests/test_owtopicmodeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ def test_data(self):
self.assertIsNone(output)

def test_saved_selection(self):
self.widget.method_index = 1
self.send_signal(self.widget.Inputs.corpus, self.corpus)
self.wait_until_finished()

Expand Down Expand Up @@ -58,17 +57,41 @@ def test_topic_evaluation(self):
self.send_signal(self.widget.Inputs.corpus, self.corpus)
self.wait_until_finished()

# test LSI
self.assertEqual(self.widget.perplexity, "n/a")
self.assertNotEqual(self.widget.coherence, "n/a")

# test LDA, which is the only one with log perplexity
self.assertNotEqual(self.widget.perplexity, "n/a")
self.assertTrue(self.widget.coherence)

# test LSI
self.widget.method_index = 1
self.widget.commit.now()
self.wait_until_finished()
self.assertEqual(self.widget.perplexity, "n/a")
self.assertNotEqual(self.widget.coherence, "n/a")

self.assertNotEqual(self.widget.perplexity, "n/a")
self.assertTrue(self.widget.coherence)
def test_migrate_settings_transform(self):
# 0 used to be LSI in version <2 - it is on index 1 now
settings = {"__version__": 1, "method_index": 0}
widget = self.create_widget(OWTopicModeling, stored_settings=settings)
self.assertEqual(1, widget.method_index)
self.assertEqual("Latent Semantic Indexing", widget.model.name)

# 1 used to be LDA in version <2 - it is on index 0 now
settings = {"__version__": 1, "method_index": 1}
widget = self.create_widget(OWTopicModeling, stored_settings=settings)
self.assertEqual(0, widget.method_index)
self.assertEqual("Latent Dirichlet Allocation", widget.model.name)

# 2 is unchanged - still HDP
settings = {"__version__": 1, "method_index": 2}
widget = self.create_widget(OWTopicModeling, stored_settings=settings)
self.assertEqual(2, widget.method_index)
self.assertEqual("Hierarchical Dirichlet Process", widget.model.name)

# 2 is unchanged - still NMF
settings = {"__version__": 1, "method_index": 3}
widget = self.create_widget(OWTopicModeling, stored_settings=settings)
self.assertEqual(3, widget.method_index)
self.assertEqual("Negative Matrix Factorization", widget.model.name)


if __name__ == "__main__":
Expand Down

0 comments on commit 64c4e25

Please sign in to comment.