From c6a70828cdc87d7e7c567252f627235381bf7966 Mon Sep 17 00:00:00 2001 From: PrimozGodec Date: Fri, 26 Jan 2024 14:42:02 +0100 Subject: [PATCH] Sentiment - Store ISO language in settings --- .../text/widgets/owsentimentanalysis.py | 56 +++++++++---------- .../widgets/tests/test_owsentimentanalysis.py | 38 +++++++++---- 2 files changed, 53 insertions(+), 41 deletions(-) diff --git a/orangecontrib/text/widgets/owsentimentanalysis.py b/orangecontrib/text/widgets/owsentimentanalysis.py index 16efb9a49..de06f898d 100644 --- a/orangecontrib/text/widgets/owsentimentanalysis.py +++ b/orangecontrib/text/widgets/owsentimentanalysis.py @@ -3,12 +3,14 @@ from AnyQt.QtCore import Qt from AnyQt.QtWidgets import QGridLayout, QLabel -from Orange.widgets import gui, settings +from Orange.widgets import gui from Orange.widgets.utils.concurrent import ConcurrentWidgetMixin, TaskState from Orange.widgets.utils.signals import Input, Output from Orange.widgets.widget import OWWidget, Msg +from orangewidget.settings import Setting + from orangecontrib.text import Corpus, preprocess -from orangecontrib.text.language import ISO2LANG, LANG2ISO +from orangecontrib.text.language import LanguageModel, LANG2ISO from orangecontrib.text.sentiment import ( VaderSentiment, LiuHuSentiment, @@ -37,24 +39,16 @@ class Inputs: class Outputs: corpus = Output("Corpus", Corpus) - settings_version = 1 + settings_version = 2 want_main_area = False resizing_enabled = False - method_idx: int = settings.Setting(1) - autocommit: bool = settings.Setting(True) - liu_language: str = settings.Setting( - ISO2LANG[LiuHuSentiment.DEFAULT_LANG], schema_only=True - ) - multi_language: str = settings.Setting( - ISO2LANG[MultiSentiment.DEFAULT_LANG], schema_only=True - ) - senti_language: str = settings.Setting( - ISO2LANG[SentiArt.DEFAULT_LANG], schema_only=True - ) - lilah_language: str = settings.Setting( - ISO2LANG[LilahSentiment.DEFAULT_LANG], schema_only=True - ) + method_idx: int = Setting(1) + autocommit: bool = Setting(True) + liu_language: str = Setting(LiuHuSentiment.DEFAULT_LANG, schema_only=True) + multi_language: str = Setting(MultiSentiment.DEFAULT_LANG, schema_only=True) + senti_language: str = Setting(SentiArt.DEFAULT_LANG, schema_only=True) + lilah_language: str = Setting(LilahSentiment.DEFAULT_LANG, schema_only=True) METHODS = [ LiuHuSentiment, @@ -99,9 +93,8 @@ def __init__(self): None, self, "liu_language", - sendSelectedValue=True, contentsLength=10, - items=[ISO2LANG[lg] for lg in LiuHuSentiment.LANGUAGES], + model=LanguageModel(languages=LiuHuSentiment.LANGUAGES), callback=self._method_changed, ) self.vader = gui.appendRadioButton(box, "Vader", addToLayout=False) @@ -112,9 +105,8 @@ def __init__(self): None, self, "multi_language", - sendSelectedValue=True, contentsLength=10, - items=[ISO2LANG[lg] for lg in MultiSentiment.LANGUAGES], + model=LanguageModel(languages=MultiSentiment.LANGUAGES), callback=self._method_changed, ) self.senti_art = gui.appendRadioButton(box, "SentiArt", addToLayout=False) @@ -124,7 +116,7 @@ def __init__(self): "senti_language", sendSelectedValue=True, contentsLength=10, - items=[ISO2LANG[lg] for lg in SentiArt.LANGUAGES], + model=LanguageModel(languages=SentiArt.LANGUAGES), callback=self._method_changed, ) self.lilah_sent = gui.appendRadioButton( @@ -134,9 +126,8 @@ def __init__(self): None, self, "lilah_language", - sendSelectedValue=True, contentsLength=10, - items=[ISO2LANG[lg] for lg in LilahSentiment.LANGUAGES], + model=LanguageModel(languages=LilahSentiment.LANGUAGES), callback=self._method_changed, ) self.custom_list = gui.appendRadioButton( @@ -228,10 +219,10 @@ def __set_language_settings(self): for l_pending, l_setting, model in settings_: if self.pp_corpus and self.pp_corpus.language in model.LANGUAGES: - setattr(self, l_setting, ISO2LANG[self.pp_corpus.language]) + setattr(self, l_setting, self.pp_corpus.language) else: # if Corpus's language not supported use default language - setattr(self, l_setting, ISO2LANG[model.DEFAULT_LANG]) + setattr(self, l_setting, model.DEFAULT_LANG) # when workflow loaded use language saved in workflow if l_pending is not None: @@ -249,13 +240,13 @@ def _compute_sentiment(self): method = self.METHODS[self.method_idx] kwargs = {} if method.name == "Liu Hu": - kwargs = dict(language=LANG2ISO[self.liu_language]) + kwargs = dict(language=self.liu_language) elif method.name == "Multilingual Sentiment": - kwargs = dict(language=LANG2ISO[self.multi_language]) + kwargs = dict(language=self.multi_language) elif method.name == "SentiArt": - kwargs = dict(language=LANG2ISO[self.senti_language]) + kwargs = dict(language=self.senti_language) elif method.name == "LiLaH Sentiment": - kwargs = dict(language=LANG2ISO[self.lilah_language]) + kwargs = dict(language=self.lilah_language) elif method.name == "Custom Dictionaries": kwargs = dict(pos=self.pos_file, neg=self.neg_file) if bool(self.pos_file) != bool(self.neg_file): # xor: one of them None @@ -313,6 +304,11 @@ def migrate_settings(cls, settings, version): method_idx = settings["method_idx"] if method_idx == 4: settings["metric_idx"] = 5 + if version is None or version < 2: + s = ("liu_language", "lilah_language", "multi_language", "senti_language") + for lang_set in s: + if lang_set in settings: + settings[lang_set] = LANG2ISO[settings[lang_set]] if __name__ == '__main__': diff --git a/orangecontrib/text/widgets/tests/test_owsentimentanalysis.py b/orangecontrib/text/widgets/tests/test_owsentimentanalysis.py index 1ff2dddc0..8d51f237b 100644 --- a/orangecontrib/text/widgets/tests/test_owsentimentanalysis.py +++ b/orangecontrib/text/widgets/tests/test_owsentimentanalysis.py @@ -12,7 +12,9 @@ from orangecontrib.text import preprocess from orangecontrib.text.corpus import Corpus from orangecontrib.text.language import ISO2LANG -from orangecontrib.text.sentiment import DictionaryNotFound +from orangecontrib.text.sentiment import ( + DictionaryNotFound, LiuHuSentiment, MultiSentiment, SentiArt, LilahSentiment +) from orangecontrib.text.widgets.owsentimentanalysis import OWSentimentAnalysis MS_FILES = [ @@ -164,6 +166,20 @@ def test_migrates_settings(self): OWSentimentAnalysis.migrate_settings(settings, version=None) self.assertTrue(settings.get("method_idx", 5)) + def test_migrate_language_settings(self): + methods = ( + ("liu_language", LiuHuSentiment), + ("multi_language", MultiSentiment), + ("senti_language", SentiArt), + ("lilah_language", LilahSentiment), + ) + for setting, method in methods: + if hasattr(method, "LANGUAGES"): + for lang in getattr(method, "LANGUAGES"): + se = {setting: ISO2LANG[lang], "__version__": 1} + widget = self.create_widget(OWSentimentAnalysis, stored_settings=se) + self.assertEqual(lang, getattr(widget, setting)) + def test_preprocessed(self): widget = self.create_widget(OWSentimentAnalysis) corpus = self.corpus.copy() @@ -184,7 +200,7 @@ def test_language_from_corpus(self): w = self.widget settings = [ w.liu_language, - "English", + "en", w.multi_language, w.senti_language, w.lilah_language, @@ -198,7 +214,7 @@ def test_language_from_corpus(self): self.send_signal(self.widget.Inputs.corpus, self.corpus) self.widget.findChildren(QRadioButton)[i].click() self.assertIsNotNone(self.get_output(self.widget.Outputs.corpus)) - self.assertEqual(ISO2LANG[s], sett) + self.assertEqual(s, sett) # try with unsupported language - use default language istead self.corpus.attributes["language"] = ns @@ -213,18 +229,18 @@ def test_language_from_settings(self): simulate.combobox_activate_item(self.widget.senti_box, "German") simulate.combobox_activate_item(self.widget.lilah_box, "Croatian") - self.assertEqual("Slovenian", self.widget.liu_language) - self.assertEqual("Spanish", self.widget.multi_language) - self.assertEqual("German", self.widget.senti_language) - self.assertEqual("Croatian", self.widget.lilah_language) + self.assertEqual("sl", self.widget.liu_language) + self.assertEqual("es", self.widget.multi_language) + self.assertEqual("de", self.widget.senti_language) + self.assertEqual("hr", self.widget.lilah_language) settings = self.widget.settingsHandler.pack_data(self.widget) widget = self.create_widget(OWSentimentAnalysis, stored_settings=settings) self.send_signal(widget.Inputs.corpus, self.corpus, widget=widget) - self.assertEqual("Slovenian", widget.liu_language) - self.assertEqual("Spanish", widget.multi_language) - self.assertEqual("German", widget.senti_language) - self.assertEqual("Croatian", widget.lilah_language) + self.assertEqual("sl", widget.liu_language) + self.assertEqual("es", widget.multi_language) + self.assertEqual("de", widget.senti_language) + self.assertEqual("hr", widget.lilah_language) def test_dictionary_offline(self): """Test case when offline and dictionary not found locally"""