Skip to content

Commit

Permalink
add points for mmteb and fix comments and bibtex
Browse files Browse the repository at this point in the history
  • Loading branch information
ManuelFay committed Apr 22, 2024
1 parent 530d77a commit 911c364
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 9 deletions.
6 changes: 6 additions & 0 deletions docs/mmteb/points/460.jsonl
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{"GitHub": "ManuelFay", "New dataset": 2}
{"GitHub": "isaac-chung", "Review PR": 2}
{"GitHub": "KennethEnevoldsen", "Review PR": 2}
{"GitHub": "Muennighoff", "Review PR": 2}
{"GitHub": "imenelydiaker", "Review PR": 2}

28 changes: 19 additions & 9 deletions mteb/tasks/Retrieval/fra/FQuADRetrieval.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,24 @@ class FQuADRetrieval(AbsTaskRetrieval):
annotations_creators="human-annotated",
dialect=[],
text_creation="created",
bibtex_citation="""@misc{dhoffschmidt2020fquad,
title={FQuAD: French Question Answering Dataset},
author={Martin d'Hoffschmidt and Wacim Belblidia and Tom Brendlé and Quentin Heinrich and Maxime Vidal},
year={2020},
eprint={2002.06071},
archivePrefix={arXiv},
primaryClass={cs.CL}
bibtex_citation="""@inproceedings{dhoffschmidt-etal-2020-fquad,
title = "{FQ}u{AD}: {F}rench Question Answering Dataset",
author = "d{'}Hoffschmidt, Martin and
Belblidia, Wacim and
Heinrich, Quentin and
Brendl{\'e}, Tom and
Vidal, Maxime",
editor = "Cohn, Trevor and
He, Yulan and
Liu, Yang",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2020",
month = nov,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.findings-emnlp.107",
doi = "10.18653/v1/2020.findings-emnlp.107",
pages = "1193--1208",
}""",
n_samples={"test": 400, "validation": 100},
avg_character_length={"test": 937, "validation": 930},
Expand All @@ -47,12 +58,11 @@ class FQuADRetrieval(AbsTaskRetrieval):
def load_data(self, **kwargs):
if self.data_loaded:
return
# fetch both subsets of the dataset
dataset_raw = datasets.load_dataset(
**self.metadata_dict["dataset"],
)

# set valid_hasAns as the validation split
# set valid_hasAns and test_hasAns as the validation and test splits (only queries with answers)
dataset_raw["validation"] = dataset_raw["valid_hasAns"]
del dataset_raw["valid_hasAns"]

Expand Down

0 comments on commit 911c364

Please sign in to comment.