Skip to content

Commit

Permalink
Merge pull request #23 from navigating-stories/refactor
Browse files Browse the repository at this point in the history
creates working tagging widget to generate data table to be used down…
  • Loading branch information
kodymoodley committed Jan 12, 2024
2 parents 21a0b5b + 02ee75a commit 56616e7
Show file tree
Hide file tree
Showing 10 changed files with 1,937 additions and 624 deletions.
87 changes: 86 additions & 1 deletion orangecontrib/storynavigation/modules/constants.py
Original file line number Diff line number Diff line change
@@ -1,36 +1,121 @@
import sys

if sys.version_info < (3, 9):
# importlib.resources either doesn't exist or lacks the files()
# function, so use the PyPI version:
import importlib_resources
else:
import importlib.resources as importlib_resources

# metrics for measuring importance of characters or actors in the story
AGENT_PROMINENCE_METRICS = ['Subject frequency', 'Subject frequency (normalized)']
SFREQ_METRIC = 'Subject frequency'
SFREQ_NORM_METRIC = 'Subject frequency (normalized)'

# list of punctuation characters
PUNC = '''!()-[]{};:'"\,<>./?@#$%^&*_~0123456789'''

# name of storynavigator package directory
MAIN_PACKAGE = "storynavigation"
# directory name for resource files for usage by storynavigator add-on
RESOURCES_SUBPACKAGE = "resources"

# Supported languages
NL = 'NL'
EN = 'EN'
SUPPORTED_LANGUAGES = [EN, NL]

# filename from which to retrieve a list of dutch stopwords
NL_STOPWORDS_FILENAME = "dutchstopwords.txt"
# filename from which to retrieve a list of dutch past tense verbs
NL_PAST_TENSE_FILENAME = "past_tense_verbs_dutch.txt"
# filename from which to retrieve a list of dutch present tense verbs
NL_PRESENT_TENSE_FILENAME = "present_tense_verbs_dutch.txt"
# filename from which to retrieve a list of dutch false positive verbs
NL_FALSE_POSITIVE_VERB_FILENAME = "false_positive_verbs.txt"
NL_FALSE_POSITIVE_VERB_FILENAME = "false_positive_verbs_dutch.txt"
# filename from which to retrieve a list of dutch stopwords
NL_PRONOUNS_FILENAME = "dutchpronouns.txt"

# filename from which to retrieve a list of english stopwords
EN_STOPWORDS_FILENAME = "englishstopwords.txt"
# filename from which to retrieve a list of dutch past tense verbs
EN_PAST_TENSE_FILENAME = "past_tense_verbs_english.txt"
# filename from which to retrieve a list of dutch present tense verbs
EN_PRESENT_TENSE_FILENAME = "present_tense_verbs_english.txt"
# filename from which to retrieve a list of dutch false positive verbs
EN_FALSE_POSITIVE_VERB_FILENAME = "false_positive_verbs_english.txt"
# filename from which to retrieve a list of dutch stopwords
EN_PRONOUNS_FILENAME = "englishpronouns.txt"


# package paths
PKG = importlib_resources.files(MAIN_PACKAGE)

NL_STOPWORDS_FILE = (
PKG / RESOURCES_SUBPACKAGE / NL_STOPWORDS_FILENAME
)

NL_PRONOUNS_FILE = (
PKG / RESOURCES_SUBPACKAGE / NL_PRONOUNS_FILENAME
)

NL_PAST_TENSE_FILE = (
PKG / RESOURCES_SUBPACKAGE / NL_PAST_TENSE_FILENAME
)

NL_PRESENT_TENSE_FILE = (
PKG / RESOURCES_SUBPACKAGE / NL_PRESENT_TENSE_FILENAME
)

NL_FALSE_POSITIVE_VERB_FILE = (
PKG / RESOURCES_SUBPACKAGE / NL_FALSE_POSITIVE_VERB_FILENAME
)

EN_STOPWORDS_FILE = (
PKG / RESOURCES_SUBPACKAGE / EN_STOPWORDS_FILENAME
)

EN_PRONOUNS_FILE = (
PKG / RESOURCES_SUBPACKAGE / EN_PRONOUNS_FILENAME
)


EN_PAST_TENSE_FILE = (
PKG / RESOURCES_SUBPACKAGE / EN_PAST_TENSE_FILENAME
)

EN_PRESENT_TENSE_FILE = (
PKG / RESOURCES_SUBPACKAGE / EN_PRESENT_TENSE_FILENAME
)

EN_FALSE_POSITIVE_VERB_FILE = (
PKG / RESOURCES_SUBPACKAGE / EN_FALSE_POSITIVE_VERB_FILENAME
)

# currently selected agent prominence metric
SELECTED_PROMINENCE_METRIC = 'Subject frequency'

# column names for agency table
FREQ_TABLE_HEADER = ['actor', 'raw_frequency']
ACTION_FREQ_TABLE_HEADER = ['action', 'raw_frequency']
ACTION_TENSEFREQ_TABLE_HEADER = ['tense', 'frequency']
SUBFREQ_TABLE_HEADER = ['actor', 'subject_frequency']
CUSTOMFREQ_TABLE_HEADER = ['category', 'frequency', 'category-level']
AGENCY_TABLE_HEADER = ['actor', 'agency']

# Halliday dimensions file
HALLIDAY_FILENAME = "halliday_dimensions_{}.json"

# dutch spacy model (small)
NL_SPACY_MODEL = "nl_core_news_sm"
# dutch spacy model (large)
NL_SPACY_MODEL_LG = "nl_core_news_sm"

# english spacy model (small)
EN_SPACY_MODEL = "en_core_news_sm"
# dutch spacy model (large)
EN_SPACY_MODEL_LG = "en_core_news_lg"

# colors for highlighting words in text
SUBJECT_PRONOUN_HIGHLIGHT_COLOR = "#87CEFA"
SUBJECT_NONPRONOUN_HIGHLIGHT_COLOR = "#ADD8E6"
Expand Down
Loading

0 comments on commit 56616e7

Please sign in to comment.