Skip to content

Commit

Permalink
docs: Added create task table (#525)
Browse files Browse the repository at this point in the history
* Added get tasks getter
* docs: added points
* added create task table
* docs: added points
* Update .github/workflows/docs.yml
Co-authored-by: Isaac Chung <[email protected]>
* Update docs/create_tasks_table.py
Co-authored-by: Isaac Chung <[email protected]>
---------
Co-authored-by: Isaac Chung <[email protected]>
  • Loading branch information
KennethEnevoldsen authored Apr 24, 2024
1 parent dd3f0a5 commit 5974e05
Show file tree
Hide file tree
Showing 6 changed files with 407 additions and 133 deletions.
41 changes: 41 additions & 0 deletions .github/workflows/docs.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# GitHub action for the task table generation.

name: documentation

on:
push:
branches: [main]

jobs:
create-table:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
with:
token: ${{ secrets.RELEASE }}

- uses: actions/setup-python@v4
with:
python-version: "3.10"
cache: "pip"

- name: Install dependencies
run: |
make install
- name: Create table
run: python docs/create_tasks_table.py

- name: Push table
run: |
git config --global user.email "github-actions[bot]@users.noreply.github.com"
git config --global user.name "github-actions[bot]"
# Check if changes exist
if git diff --quiet; then
echo "No changes detected"
else
git add docs/tasks.md
git commit -m "Update tasks table"
git push
fi
85 changes: 85 additions & 0 deletions docs/create_tasks_table.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
from __future__ import annotations

import re
from pathlib import Path

import mteb


def author_from_bibtex(bibtex: str | None) -> str:
"""Create (Authors, Year) from bibtex entry (author = {Authors}, year = {Year})"""
if bibtex is None:
return ""
# get authors from bibtex (author = {Authors} or author={Authors})
authors = re.search(r"author\s*=\s*{([^}]*)}", bibtex)
if authors is None:
return ""
authors = authors.group(1)
authors = [a.split(", ") for a in authors.split(" and ")]
author_str_w_et_al = (
authors[0][0] + " et al." if len(authors[0]) > 1 else authors[0][0]
)
# replace any newline characters
author_str_w_et_al = author_str_w_et_al.replace("\n", " ")
year = re.search(r"year\s*=\s*{([^}]*)}", bibtex)
if year is None:
return ""
year_str = year.group(1)
return f" ({author_str_w_et_al}, {year_str})"


def task_to_markdown_row(task):
name = task.metadata.name
name_w_reference = (
f"[{name}]({task.metadata.reference})" if task.metadata.reference else name
)
domains = (
"[" + ", ".join(task.metadata.domains) + "]" if task.metadata.domains else ""
)
n_samples = task.metadata.n_samples if task.metadata.n_samples else ""
avg_character_length = (
task.metadata.avg_character_length if task.metadata.avg_character_length else ""
)

name_w_reference += author_from_bibtex(task.metadata.bibtex_citation)

return f"| {name_w_reference} | {task.metadata.languages} | {task.metadata.type} | {task.metadata.category} | {domains} | {n_samples} | {avg_character_length} |"


def create_tasks_table(tasks):
table = """
| Name | Languages | Type | Category | Domains | # Samples | Avg. Length (Char.) |
|------|-----------|------|----------|---------|-----------|---------------------|
"""
for task in tasks:
table += task_to_markdown_row(task) + "\n"
return table


def insert_table(file_path, table):
"""Insert table in the in <!-- TABLE START --> and <!-- TABLE END -->"""
with open(file_path, "r") as file:
md = file.read()

start = "<!-- TABLE START -->"
end = "<!-- TABLE END -->"

md = md.replace(md[md.index(start) + len(start) : md.index(end)], table)

with open(file_path, "w") as file:
file.write(md)


def main():
tasks = mteb.get_tasks()
tasks = sorted(tasks, key=lambda x: x.metadata.name)

table = create_tasks_table(tasks)

file_path = Path(__file__).parent / "tasks.md"

insert_table(file_path, table)


if __name__ == "__main__":
main()
2 changes: 2 additions & 0 deletions docs/mmteb/points/525.jsonl
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
{"GitHub": "isaac-chung", "Review PR": 2}
{"GitHub": "KennethEnevoldsen", "Bug fixes": 3}
Loading

0 comments on commit 5974e05

Please sign in to comment.