forked from mozilla-mobile/focus-ios
-
Notifications
You must be signed in to change notification settings - Fork 0
/
build-disconnect.py
executable file
·122 lines (95 loc) · 4.45 KB
/
build-disconnect.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
#!/usr/bin/env python
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
from __future__ import print_function
import json
import urlparse
def url_filter(resource):
return "^https?://([^/]+\\.)?" + resource.replace(".", "\\.")
def unless_domain(properties):
return ["*" + domain for domain in properties]
def create_blocklist_entry(resource, properties):
return {"trigger": {"url-filter": url_filter(resource),
"load-type": ["third-party"],
"unless-domain": unless_domain(properties)},
"action": {"type": "block"}}
def generate_entity_list(path="shavar-prod-lists/disconnect-entitylist.json"):
with open(path) as fp:
entitylist = json.load(fp)
blocklist = []
for name, value in entitylist.items():
for resource in value['resources']:
entry = create_blocklist_entry(resource, value['properties'])
blocklist.append(entry)
f = open('Lists/disconnect.json', 'w')
out = json.dumps(blocklist, indent=0,
separators=(',', ':')).replace('\n', '')
f.write(out)
# Human-readable output.
# print json.dumps(blocklist, indent=2)
def generate_blacklists(blacklist="shavar-prod-lists/disconnect-blacklist.json", entitylist="shavar-prod-lists/disconnect-entitylist.json"):
# Generating the categorical lists requires some manual tweaking to the
# data at the moment.
def find_entry(entry, list_):
for d in list_:
if d.keys() == [entry]:
return d
# First, massage the existing categorical data slightly
with open(blacklist) as fp:
categories = json.load(fp)["categories"]
# Remove what we know we don't care about
del categories["Legacy Disconnect"]
del categories["Legacy Content"]
# Move the Twitter and Facebook entries into the Social category from
# the Disconnect category
disconnect = categories["Disconnect"]
del categories["Disconnect"]
categories["Social"].append(find_entry("Facebook", disconnect))
categories["Social"].append(find_entry("Twitter", disconnect))
# Load the entitylist to map the whitelist entries.
with open(entitylist) as fp:
entities = json.load(fp)
# Change the Google entries for the respective categories
with open("shavar-prod-lists/google_mapping.json") as fp:
tweaks = json.load(fp)["categories"]
for category in ("Advertising", "Analytics", "Social"):
cat = categories[category]
goog = find_entry("Google", cat) or None
if goog is None:
# No data exist for this category, just append
cat.append(tweaks[category][0])
else:
for prop, resources in tweaks[category]["Google"].items():
if prop not in goog:
goog[prop] = resources
continue
for resource in resources:
if resource not in goog[prop]:
goog[prop].append(resource)
goog[prop].sort()
cat.sort()
for category in ("Advertising", "Analytics", "Social", "Content"):
blocklist = []
for entity in categories[category]:
for name, domains in entity.iteritems():
for property_, resources in domains.iteritems():
if name in entities:
props = entities[name]["properties"]
else:
prop = urlparse.urlparse(property_).netloc.split(".")
if prop[0] == "www":
prop.pop(0)
props = [".".join(prop)]
for res in resources:
blocklist.append(create_blocklist_entry(res, props))
print("{cat} blacklist has {count} entries."
.format(cat=category, count=len(blocklist)))
with open("Lists/disconnect-{0}.json".format(category.lower()),
"w") as fp:
out = json.dumps(blocklist, indent=0,
separators=(',', ':')).replace('\n', '')
fp.write(out)
if __name__ == "__main__":
# generate_entity_list()
generate_blacklists()