Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add parameter from_topn in evaluate_word_analogies #3400

Open
wants to merge 1 commit into
base: develop
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 10 additions & 3 deletions gensim/models/keyedvectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -1284,7 +1284,7 @@ def _log_evaluate_word_analogies(section):

def evaluate_word_analogies(
self, analogies, restrict_vocab=300000, case_insensitive=True,
dummy4unknown=False, similarity_function='most_similar'):
dummy4unknown=False, similarity_function='most_similar', from_topn=1):
"""Compute performance of the model on an analogy test set.

The accuracy is reported (printed to log and returned as a score) for each section separately,
Expand Down Expand Up @@ -1312,6 +1312,8 @@ def evaluate_word_analogies(
Otherwise, these tuples are skipped entirely and not used in the evaluation.
similarity_function : str, optional
Function name used for similarity calculation.
from_topn : int, optional
If `similarity_function` is `most_similar`, use `from_topn` most similar words to calculate similarity.

Returns
-------
Expand Down Expand Up @@ -1368,14 +1370,19 @@ def evaluate_word_analogies(
# find the most likely prediction using 3CosAdd (vector offset) method
# TODO: implement 3CosMul and set-based methods for solving analogies

sims = self.most_similar(positive=[b, c], negative=[a], topn=5, restrict_vocab=restrict_vocab)
sims = self.most_similar(positive=[b, c], negative=[a], topn=5+from_topn-1, restrict_vocab=restrict_vocab)
self.key_to_index = original_key_to_index
attempts = 0
for element in sims:
predicted = element[0].upper() if case_insensitive else element[0]
if predicted in ok_vocab and predicted not in ignore:
if predicted != expected:
logger.debug("%s: expected %s, predicted %s", line.strip(), expected, predicted)
break
attempts += 1
else:
break
if attempts == from_topn - 1:
break
if predicted == expected:
section['correct'].append((a, b, c, expected))
else:
Expand Down