Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
KennethEnevoldsen committed Apr 24, 2024
2 parents 6faae86 + 7b595d7 commit f3044b1
Show file tree
Hide file tree
Showing 50 changed files with 381 additions and 329 deletions.
2 changes: 1 addition & 1 deletion docs/mmteb/points/480.jsonl
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
{"GitHub": "dokato", "New dataset": 38}
{"GitHub": "dokato", "New dataset": 2}
{"GitHub": "isaac-chung", "Review PR": 2}
3 changes: 3 additions & 0 deletions docs/mmteb/points/527.jsonl
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{"GitHub": "isaac-chung", "Review PR": 2}
{"GitHub": "KennethEnevoldsen", "Review PR": 2}
{"GitHub": "imenelydiaker", "Bug fixes": 3}
1 change: 1 addition & 0 deletions mteb/tasks/Classification/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
from .eng.EmotionClassification import *
from .eng.ImdbClassification import *
from .eng.NewsClassification import *
from .eng.ToxicChatClassification import *
from .eng.ToxicConversationsClassification import *
from .eng.TweetSentimentExtractionClassification import *
from .eng.YelpReviewFullClassification import *
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,6 @@ class RestaurantReviewSentimentClassification(AbsTaskClassification):
def dataset_transform(self):
# labels: 0 negative, 1 positive
self.dataset = self.dataset.rename_column("polarity", "label")
self.dataset["train"] = (
self.dataset["train"].shuffle(seed=self.seed).select(range(N_SAMPLES))
self.dataset = self.stratified_subsampling(
self.dataset, seed=self.seed, splits=["train"]
)
4 changes: 2 additions & 2 deletions mteb/tasks/Classification/ara/TweetEmotionClassification.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,6 @@ class TweetEmotionClassification(AbsTaskClassification):

def dataset_transform(self):
self.dataset = self.dataset.rename_column("tweet", "text")
self.dataset["train"] = (
self.dataset["train"].shuffle(seed=self.seed).select(range(N_SAMPLES))
self.dataset = self.stratified_subsampling(
self.dataset, seed=self.seed, splits=["train"]
)
Original file line number Diff line number Diff line change
Expand Up @@ -40,4 +40,6 @@ class BengaliHateSpeechClassification(AbsTaskClassification):
)

def dataset_transform(self):
self.dataset["train"] = self.dataset["train"].select(range(2048))
self.dataset = self.stratified_subsampling(
self.dataset, seed=self.seed, splits=["train"]
)
Original file line number Diff line number Diff line change
Expand Up @@ -3,37 +3,7 @@
from mteb.abstasks import AbsTaskClassification
from mteb.abstasks.TaskMetadata import TaskMetadata

_LANGS = [
"eng-Latn",
"pol-Latn",
"ron-Latn",
"glg-Latn",
"lit-Latn",
"bre-Latn",
"mlt-Latn",
"spa-Latn",
"swa-Latn",
"slv-Latn",
"msa-Latn",
"tgl-Latn",
"ita-Latn",
"est-Latn",
"mlg-Latn",
"xho-Latn",
"que-Latn",
"eus-Latn",
"nor-Latn",
"cym-Latn",
"cat-Latn",
"fra-Latn",
"nno-Latn",
"nld-Latn",
"ido-Latn",
"por-Latn",
"fin-Latn",
"deu-Latn",
"dan-Latn",
]
_EVAL_SPLITS = ["test"]


class ToxicChatClassification(AbsTaskClassification):
Expand All @@ -56,8 +26,8 @@ class ToxicChatClassification(AbsTaskClassification):
},
type="Classification",
category="s2s",
eval_splits=["test"],
eval_langs=_LANGS,
eval_splits=_EVAL_SPLITS,
eval_langs=["eng-Latn"],
main_score="accuracy",
date=("2023-10-26", "2024-01-31"),
form=["written"],
Expand All @@ -81,14 +51,17 @@ class ToxicChatClassification(AbsTaskClassification):
)

def dataset_transform(self):
# only use human-annotated data
self.dataset = self.dataset["test"].filter(lambda x: x["human_annotation"])
keep_cols = ["user_input", "toxicity"]
rename_dict = dict(zip(keep_cols, ["text", "label"]))
remove_cols = [col for col in self.dataset.column_names if col not in keep_cols]
remove_cols = [
col
for col in self.dataset[_EVAL_SPLITS[0]].column_names
if col not in keep_cols
]
self.dataset = self.dataset.rename_columns(rename_dict)
self.dataset = self.stratified_subsampling(
self.dataset, seed=self.seed, splits=["test"]
)
# only use human-annotated data
self.dataset = self.dataset.filter(lambda x: x["human_annotation"])
self.dataset = self.dataset.remove_columns(remove_cols)
self.dataset = self.dataset.class_encode_column("label")
self.dataset = self.dataset.train_test_split(
test_size=0.5, seed=self.seed, stratify_by_column="label"
) # balanced sampling across types of hate speech
Original file line number Diff line number Diff line change
Expand Up @@ -39,3 +39,8 @@ def metadata_dict(self) -> dict[str, str]:
metadata_dict["n_experiments"] = 10
metadata_dict["samples_per_label"] = 16
return metadata_dict

def dataset_transform(self):
self.dataset = self.stratified_subsampling(
self.dataset, seed=self.seed, splits=["test"]
)
Original file line number Diff line number Diff line change
Expand Up @@ -45,11 +45,6 @@ class FilipinoHateSpeechClassification(AbsTaskClassification):
)

def dataset_transform(self):
self.dataset["validation"] = (
self.dataset["validation"]
.shuffle(seed=self.seed)
.select(range(TEST_SAMPLES))
)
self.dataset["test"] = (
self.dataset["test"].shuffle(seed=self.seed).select(range(TEST_SAMPLES))
self.dataset = self.stratified_subsampling(
self.dataset, seed=self.seed, splits=["validation", "test"]
)
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,6 @@ class MovieReviewSentimentClassification(AbsTaskClassification):

def dataset_transform(self):
self.dataset = self.dataset.rename_column("review", "text")
self.dataset["validation"] = (
self.dataset["validation"].shuffle(seed=self.seed).select(range(N_SAMPLES))
)
self.dataset["test"] = (
self.dataset["test"].shuffle(seed=self.seed).select(range(N_SAMPLES))
self.dataset = self.stratified_subsampling(
self.dataset, seed=self.seed, splits=["validation", "test"]
)
Original file line number Diff line number Diff line change
Expand Up @@ -58,4 +58,6 @@ def dataset_transform(self):
self.dataset = self.dataset.rename_columns(
{"Sentence": "text", "Discourse Mode": "label"}
).remove_columns(["Story_no"])
self.dataset["train"] = self.dataset["train"].select(range(2048))
self.dataset = self.stratified_subsampling(
self.dataset, seed=self.seed, splits=["train"]
)
Original file line number Diff line number Diff line change
Expand Up @@ -52,4 +52,6 @@ def dataset_transform(self):
self.dataset = self.dataset.remove_columns(["label"]).rename_columns(
{"title": "text", "label_score": "label"}
)
self.dataset["train"] = self.dataset["train"].select(range(2048))
self.dataset = self.stratified_subsampling(
self.dataset, seed=self.seed, splits=["train"]
)
12 changes: 6 additions & 6 deletions mteb/tasks/Classification/jpn/WRIMEClassification.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,9 +62,9 @@ def dataset_transform(self):
self.dataset = self.dataset.flatten().select_columns(
["sentence", "avg_readers.sentiment"]
)
self.dataset = self.dataset.rename_column("sentence", "text")
self.dataset = self.dataset.rename_column("avg_readers.sentiment", "label")
# random downsample to 2048
self.dataset["test"] = self.dataset["test"].shuffle(seed=42)
max_samples = min(2048, len(self.dataset["test"]))
self.dataset["test"] = self.dataset["test"].select(range(max_samples))
self.dataset = self.dataset.rename_columns(
{"sentence": "text", "avg_readers.sentiment": "label"}
)
self.dataset = self.stratified_subsampling(
self.dataset, seed=self.seed, splits=["test"]
)
Original file line number Diff line number Diff line change
Expand Up @@ -109,9 +109,7 @@ class MultiHateClassification(MultilingualTask, AbsTaskClassification):
)

def load_data(self, **kwargs):
"""
Load dataset from HuggingFace hub
"""
"""Load dataset from HuggingFace hub"""
if self.data_loaded:
return
self.dataset = {}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,5 +48,6 @@ def metadata_dict(self) -> dict[str, str]:
return metadata_dict

def dataset_transform(self):
self.dataset = self.dataset.rename_column("sentence", "text")
self.dataset = self.dataset.rename_column("language", "label")
self.dataset = self.dataset.rename_columns(
{"sentence": "text", "language": "label"}
)
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,6 @@ class RomanianSentimentClassification(AbsTaskClassification):

def dataset_transform(self):
self.dataset = self.dataset.rename_column("sentence", "text")
self.dataset["test"] = (
self.dataset["test"].shuffle(seed=self.seed).select(range(TEST_SAMPLES))
self.dataset = self.stratified_subsampling(
self.dataset, seed=self.seed, splits=["test"]
)
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,11 @@ class TurkishMovieSentimentClassification(AbsTaskClassification):
url={https://api.semanticscholar.org/CorpusID:3912960}
}
""",
n_samples={"train": 7972, "test": 2644},
avg_character_length={"train": 141.03, "test": 141.50},
n_samples={"test": 2644},
avg_character_length={"test": 141.50},
)

def dataset_transform(self):
self.dataset = self.stratified_subsampling(
self.dataset, seed=self.seed, splits=["test"]
)
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,6 @@ class TurkishProductSentimentClassification(AbsTaskClassification):
url={https://api.semanticscholar.org/CorpusID:3912960}
}
""",
n_samples={"train": 4800, "test": 800},
avg_character_length={"train": 247.75, "test": 246.85},
n_samples={"test": 800},
avg_character_length={"test": 246.85},
)
46 changes: 6 additions & 40 deletions mteb/tasks/Classification/vie/VieStudentFeedbackClassification.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,5 @@
from __future__ import annotations

import random
from collections import Counter

from mteb.abstasks import AbsTaskClassification
from mteb.abstasks.TaskMetadata import TaskMetadata

Expand Down Expand Up @@ -47,40 +44,9 @@ class VieStudentFeedbackClassification(AbsTaskClassification):
)

def dataset_transform(self):
seed = 42
random.seed(seed)
self.dataset = self.dataset.rename_column("sentence", "text")
self.dataset = self.dataset.rename_column("sentiment", "label")

for split in ["test"]:
ds = self.dataset[split]
# Determine number of classes and samples per class
class_count = Counter([sample["label"] for sample in ds])
num_classes = len(class_count)
total_samples = min(TEST_SAMPLES, len(ds))
samples_per_class = total_samples // num_classes

# Try to maintain class balance
balanced_samples = []
for label, count in class_count.items():
indices = [i for i, sample in enumerate(ds) if sample["label"] == label]
if count <= samples_per_class:
balanced_samples.extend(indices)
else:
balanced_samples.extend(random.sample(indices, samples_per_class))

# Add missing quantity since minority classes might have too few
if len(balanced_samples) < total_samples:
extra_samples_needed = total_samples - len(balanced_samples)
remaining_indices = [
i for i in range(len(ds)) if i not in balanced_samples
]
balanced_samples.extend(
random.sample(remaining_indices, extra_samples_needed)
)

test_data = ds.select(balanced_samples)
self.dataset["test"] = test_data
assert (
len(test_data) == TEST_SAMPLES
), f"Exceeded {TEST_SAMPLES} samples for 'test' split."
self.dataset = self.dataset.rename_columns(
{"sentence": "text", "sentiment": "label"}
)
self.dataset = self.stratified_subsampling(
self.dataset, seed=self.seed, splits=["test"]
)
Original file line number Diff line number Diff line change
Expand Up @@ -45,3 +45,8 @@ def metadata_dict(self) -> dict[str, str]:
metadata_dict["n_experiments"] = 10
metadata_dict["samples_per_label"] = 32
return metadata_dict

def dataset_transform(self):
self.dataset = self.stratified_subsampling(
self.dataset, seed=self.seed, splits=["test"]
)
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
{
"dataset_revision": "99612296bc093f0720cac7d7cbfcb67eecf1ca2f",
"mteb_dataset_name": "BengaliHateSpeechClassification",
"mteb_version": "1.6.12",
"mteb_version": "1.7.11",
"train": {
"accuracy": 0.578271484375,
"accuracy_stderr": 0.036551398523713245,
"evaluation_time": 10.4,
"f1": 0.4925466352014416,
"f1_stderr": 0.029539127589504242,
"main_score": 0.4925466352014416
"accuracy": 0.579931640625,
"accuracy_stderr": 0.0310817384762391,
"evaluation_time": 9.58,
"f1": 0.492375715944452,
"f1_stderr": 0.032669816416586527,
"main_score": 0.492375715944452
}
}
Original file line number Diff line number Diff line change
@@ -1,25 +1,25 @@
{
"dataset_revision": "1994e9bb7f3ec07518e3f0d9e870cb293e234686",
"mteb_dataset_name": "FilipinoHateSpeechClassification",
"mteb_version": "1.7.0",
"mteb_version": "1.7.14",
"test": {
"accuracy": 0.588623046875,
"accuracy_stderr": 0.04424121838518151,
"ap": 0.5235485680653846,
"ap_stderr": 0.0292128856610349,
"evaluation_time": 3.28,
"f1": 0.5815646745354887,
"f1_stderr": 0.04485825288601201,
"main_score": 0.588623046875
"accuracy": 0.58359375,
"accuracy_stderr": 0.04045951590625083,
"ap": 0.5183748795941082,
"ap_stderr": 0.026155224372107547,
"evaluation_time": 7.17,
"f1": 0.576893865333713,
"f1_stderr": 0.04030538689824078,
"main_score": 0.58359375
},
"validation": {
"accuracy": 0.57958984375,
"accuracy_stderr": 0.042122005910901344,
"ap": 0.49819582657111905,
"ap_stderr": 0.02577058528891724,
"evaluation_time": 4.19,
"f1": 0.5716219492625749,
"f1_stderr": 0.042080406267862754,
"main_score": 0.57958984375
"accuracy": 0.57998046875,
"accuracy_stderr": 0.04052722609169668,
"ap": 0.5067390857072789,
"ap_stderr": 0.02591176836681942,
"evaluation_time": 8.38,
"f1": 0.5733334721155943,
"f1_stderr": 0.03945981336363871,
"main_score": 0.57998046875
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
{
"dataset_revision": "218ce687943a0da435d6d62751a4ab216be6cd40",
"mteb_dataset_name": "HindiDiscourseClassification",
"mteb_version": "1.7.14",
"train": {
"accuracy": 0.394580078125,
"accuracy_stderr": 0.03347234746154177,
"evaluation_time": 6.03,
"f1": 0.32526446425415323,
"f1_stderr": 0.019650476318047193,
"main_score": 0.394580078125
}
}
Loading

0 comments on commit f3044b1

Please sign in to comment.