-
Notifications
You must be signed in to change notification settings - Fork 1
/
preference_filter.py
83 lines (70 loc) · 2.93 KB
/
preference_filter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import json
from dotenv import load_dotenv
from os.path import join, dirname
import os
import requests
import pandas as pd
import numpy as np
from collections import Counter
import watson_developer_cloud
import watson_developer_cloud.natural_language_understanding.features.v1 as features
load_dotenv('.env')
# Get credentials
username=os.environ.get('NLU_USERNAME')
password = os.environ.get('NLU_PASSWORD')
foursquare_client_id=os.environ.get('FOURSQUARE_CLIENT_ID')
foursquare_client_secret=os.environ.get('FOURSQUARE_CLIENT_SECRET')
# VERSION = "20170511"
# LIMIT = 30
# radius = 1000
def pull_foursquare_json(venue_id):
VERSION = "20170511"
foursquare_client_id=os.environ.get('FOURSQUARE_CLIENT_ID')
foursquare_client_secret=os.environ.get('FOURSQUARE_CLIENT_SECRET')
url="https://api.foursquare.com/v2/venues/{}/tips?client_id={}&client_secret={}&v={}&limit=150".format(venue_id, foursquare_client_id, foursquare_client_secret, VERSION)
json_f = requests.get(url).json()
return json_f
# Helper function to return list of tips from restaurant's JSON file
def tips_list(json_file):
try:
num_tips = json_file['response']['tips']['count']
return [json_file['response']['tips']['items'][k]['text'] for k in range(num_tips)]
except:
return [""]
def combine_u_prefs(u1_dict, u2_dict, k):
u1_counter = Counter(u1_dict)
u2_counter = Counter(u2_dict)
u_prefs = u1_counter + u2_counter
# Pull k highest values
u_prefs_top = dict(u_prefs.most_common(k))
vallist = [val for val in u_prefs_top.values()]
factor = np.median(vallist)
normed_val = [val/factor for val in vallist]
topic_idx = [key for key in u_prefs_top.keys()]
pref_vec = pd.Series(data=normed_val, index=topic_idx)
return pref_vec, topic_idx
def construct_matrix(venue_ids, topic_idx):
empty_matrix = pd.DataFrame(index=venue_ids, columns=topic_idx)
return empty_matrix
def sentiment(tips):
# Helper function to return text sentiment analysis
# Load Watson credentials
username=os.environ.get('NLU_USERNAME')
password = os.environ.get('NLU_PASSWORD')
nlu = watson_developer_cloud.NaturalLanguageUnderstandingV1(version='2017-02-27',
username=username, password=password)
output = nlu.analyze(text=tips, features=[features.Sentiment()])
return output['sentiment']['document']['score']
def fill_sentiment_matrix(mat):
for j in range(mat.shape[0]):
venue_id = mat.index[j]
json_f = pull_foursquare_json(venue_id)
tips = tips_list(json_f)
for k in range(mat.shape[1]):
topic = mat.columns[k]
score = np.median([sentiment(tip) for tip in tips if topic in tip])
mat.loc[venue_id, topic] = score
return mat.fillna(0)
def recommend(score_mat, user_vec, venues_ids, top_n):
score_vec = pd.Series(np.dot(score_mat, user_vec), index=venues_ids)
return score_vec.sort_values(ascending=False)[:top_n].index.values