-
Notifications
You must be signed in to change notification settings - Fork 0
/
challenge.py
executable file
·53 lines (47 loc) · 1.87 KB
/
challenge.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
"""Solution to Sortable's Coding Challenge (http://sortable.com/challenge/).
"""
import re
from codecs import open
import simplejson
from fuzzywuzzy import fuzz
__author__ = "Najam Ahmed Ansari"
__email__ = "[email protected]"
PRODUCTS = "products.txt"
LISTINGS = "listings.txt"
RESULTS = "results-python.txt"
UNMATCHED = "unmatched.txt"
pattern = re.compile('[\W_]+')
listings_tmp = []
results = []
with open(LISTINGS, "r", encoding='utf-8') as listings:
for listing in listings:
listings_tmp.append(simplejson.loads(listing.strip()))
with open(PRODUCTS, "r", encoding='utf-8') as products:
with open(RESULTS, "w", encoding='utf-8') as results_file:
for product in products:
product = simplejson.loads(product.strip())
temp = {
"product_name": product.get("product_name"),
"listings": []
}
model = pattern.sub('', product.get("model"))
regex = re.compile(r"\b({})\b".format(model))
for index, listing in enumerate(listings_tmp):
if product.get("manufacturer").lower() !=\
listing.get("manufacturer").lower():
ratio = fuzz.ratio(
product.get("manufacturer"),
listing.get("manufacturer")
)
if ratio < 50:
continue
title = pattern.sub('', listing.get("title"))
if not regex.search(listing.get("title")) and not\
regex.search(title):
if fuzz.partial_ratio(model, title) < 90:
continue
temp["listings"].append(listings_tmp.pop(index))
if temp["listings"]:
results_file.write(
"%s\n" % simplejson.dumps(temp, ensure_ascii=False)
)