-
Notifications
You must be signed in to change notification settings - Fork 3
/
update_labels.py
191 lines (159 loc) · 6.15 KB
/
update_labels.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
import os
from urllib.request import urlopen
from pathlib import Path
from bs4 import BeautifulSoup
# This file is used to sync labels from kdb/issues/labels with their
# mentions as shields in md documents, such as in domains/Readme.md
def reset(shields):
"""Put all "visited" values of shields to False
Args:
shields (dict): each dict in shields is a dict with a boolean key "visited"
"""
for s in shields.values():
s["visited"] = False
def update_shields(labels_path, shields_path):
"""Write shields as md image reference to be copied at the end of a file
for instance :
[bug]: https://img.shields.io/badge/bug-d73a4a.svg
As parsing is fairly simple, it is done line by line in order to
use as few dependencies as possible. PyYAML would do fine otherwise
Args:
labels_path (pathlib.Path): where to read the labels as yml
shields_path (pathlib.Path): where to write the shields as txt
"""
if labels_path.exists():
# read file
with labels_path.open() as f:
lines = f.readlines()
shields = []
names = []
# skip first line
i = 1
while i < len(lines):
l = lines[i]
# begining of a label
if " - name" in l:
name = l.split("name:")[-1].strip().replace('"', "")
color = lines[i + 1].split("color:")[-1].strip().replace('"', "")
shields.append(
f"https://img.shields.io/badge/{name}-{color}.svg".replace(
" ", "%20"
)
)
names.append(name)
# find next label
i += 2 if i < len(lines) - 1 and " - name" not in lines[i + 2] else 1
else:
# skip line
i += 1
# format shield strings
shields_lines = sorted(f"[{n}]: {s}" for n, s in zip(names, shields))
# write to file
with open(shields_path, "w") as f:
f.write("\n".join(shields_lines))
def parse_item(item):
"""Get label dict from parse HTML item from BeautifulSoup
Example output:
{
"color": "ffffff",
"description": "",
"name": "bug"
}
Args:
item (BeautifulSoup.element): .label-link element from the github labels page
Returns:
dict: label dict formatted to be then used in yml
"""
return {
"description": item.get("title", ""),
"name": item.find("span").get_text(),
"color": [
col.split("background-color: ")[1].replace("#", "")
for col in item.get_attribute_list("style")[0].split(";")
if "background-color" in col
][0],
}
def get_label_yml(label):
"""Get yml format for label
Args:
label (dict): dictionnary if labels with keys color, name and description
as strings, possibly empty
Returns:
str: yml formatted dict, as a yml list item
"""
text = f' - name: "{label["name"]}"\n'
text += f' color: "{label["color"]}"\n'
text += f' description: "{label["description"]}"\n'
return text
if __name__ == "__main__":
# Running this file from the root of the repository will:
# 1 - fetch all labels from the `labels_url` page
# 2 - write them into a yml file in workflow/
# 3 - update shields markdowns in workflow/shields.txt
# 4 - update all shields links in all markdown files which use them
# Constants
labels_url = "https://github.com/cc-ai/kdb/issues/labels"
yml_path = Path() / "workflow" / "labels.yml"
shield_path = Path() / "workflow" / "shields.txt"
labels_class = ".label-link"
split_marker = "]: https://img.shields.io/badge/"
# Fetching kdb's issue labels
html = urlopen(labels_url).read()
# Parsing HTML
bs = BeautifulSoup(html, "lxml")
print("Fetched and parsed", labels_url)
# extracting labels from .label-link elements
items = bs.select(labels_class)
labels = map(parse_item, items)
# formatting to yml
yml_text = "labels:\n"
yml_text += "\n".join(map(get_label_yml, labels))
# writing to target file
with yml_path.open("w") as f:
f.write(yml_text)
print("Wrote new labels to", str(yml_path))
# write shields according to labels fetched
update_shields(yml_path, shield_path)
with shield_path.open("r") as f:
shields = f.readlines()
# parse updated shields
shields = {
s.split(split_marker)[0][1:]: {"text": s.strip(), "visited": False}
for s in shields
}
# Updating shields in all md files
candidates = Path().glob("**/*.md")
for cand in candidates:
text = cand.open("r").read()
# there is a shield image used in this candidate md file
if split_marker in text:
print("Updating", str(cand))
lines = text.split("\n")
# lines of new file with updated shields
new_lines = []
for i, l in enumerate(lines):
if split_marker in l:
# this line is a shield reference
name = l.split(split_marker)[0][1:]
if name in shields:
# shield still exist but may have an outdated color
new_lines.append(shields[name]["text"])
shields[name]["visited"] = True
if shields[name]["text"] != l:
# color indeed has changed
print("Updating line", i, ":", l)
else:
# no shield, use unaltered line
new_lines.append(l)
for shield in shields.values():
# add not listed shields to the end of file,
# if someone wants to use it at some point
if not shield["visited"]:
new_lines.append(shield["text"])
print("Adding", shield["text"])
# write updated file
with cand.open("w") as f:
f.writelines("\n".join(new_lines))
print()
# put shields back to visited: False
reset(shields)