-
Notifications
You must be signed in to change notification settings - Fork 0
/
helper.py
119 lines (92 loc) · 4.56 KB
/
helper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
import numpy as np
def medal_tally(df):
medal_tally = df.drop_duplicates(
subset=["Team", "NOC", "Games", "Year", "City", "Sport", "Medal", "Event"])
medal_tally = medal_tally.groupby("region").sum(
)[["Gold", "Silver", "Bronze"]].sort_values("Gold", ascending=False).reset_index()
medal_tally["total"] = medal_tally["Gold"] + \
medal_tally["Silver"] + medal_tally["Bronze"]
medal_tally["Gold"] = medal_tally["Gold"].astype("int")
medal_tally["Silver"] = medal_tally["Silver"].astype("int")
medal_tally["Bronze"] = medal_tally["Bronze"].astype("int")
medal_tally["total"] = medal_tally["total"].astype("int")
return medal_tally
def country_year_list(df):
years = df["Year"].unique().tolist()
years.sort()
years.insert(0, "Overall")
country = np.unique(df["region"].dropna().values).tolist()
country.sort()
country.insert(0, "Overall")
return years, country
def fetch_medal_tally(df, year, country):
medal_df = df.drop_duplicates(
subset=["Team", "NOC", "Games", "Year", "City", "Sport", "Medal", "Event"])
flag = 0
if year == "Overall" and country == "Overall":
temp_df = medal_df
if year == "Overall" and country != "Overall":
flag = 1
temp_df = medal_df[medal_df["region"] == country]
if year != "Overall" and country == "Overall":
temp_df = medal_df[medal_df["Year"] == int(year)]
if year != "Overall" and country != "Overall":
temp_df = medal_df[(medal_df["Year"] == int(year))
& (medal_df["region"] == country)]
if flag == 1:
x = temp_df.groupby("Year").sum()[
["Gold", "Silver", "Bronze"]].sort_values("Year").reset_index()
else:
x = temp_df.groupby("region").sum()[["Gold", "Silver", "Bronze"]].sort_values(
"Gold", ascending=False).reset_index()
x["total"] = x["Gold"] + x["Silver"] + x["Bronze"]
x["Gold"] = x["Gold"].astype("int")
x["Silver"] = x["Silver"].astype("int")
x["Bronze"] = x["Bronze"].astype("int")
x["total"] = x["total"].astype("int")
return x
def data_over_time(df,col):
nations_over_time = df.drop_duplicates(["Year",col])["Year"].value_counts().reset_index().sort_values("index")
nations_over_time.rename(columns={"index": "Edition","Year": col},inplace=True)
return nations_over_time
def most_successful_athletes(df,Sport):
temp_df = df.dropna(subset=["Medal"])
if Sport != "Overall":
temp_df = temp_df[temp_df["Sport"] == Sport]
x = temp_df["Name"].value_counts().reset_index().head(15).merge(df,left_on="index", right_on = "Name",how="left")[["index","Name_x","Sport","region"]].drop_duplicates("index")
x.rename(columns={"index": "Name","Name_x": "Medals"}, inplace=True)
return x
def yearwise_medal_tally(df,country):
temp_df = df.dropna(subset=["Medal"])
temp_df.drop_duplicates(subset=["Team","NOC","Games","Year","City","Sport","Medal","Event"],inplace=True)
new_df = temp_df[temp_df["region"]==country]
final_df = new_df.groupby("Year").count()["Medal"].reset_index()
return final_df
def country_event_heatmap(df,country):
temp_df = df.dropna(subset=["Medal"])
temp_df.drop_duplicates(subset=["Team","NOC","Games","Year","City","Sport","Medal","Event"],inplace=True)
new_df = temp_df[temp_df["region"]==country]
pt = new_df.pivot_table(index="Sport", columns="Year", values="Medal",aggfunc="count").fillna(0)
return pt
def most_successful_country(df,country):
temp_df = df.dropna(subset=["Medal"])
temp_df = temp_df[temp_df["region"] == country]
x = temp_df["Name"].value_counts().reset_index().head(10).merge(df,left_on="index", right_on = "Name",how="left")[["index","Name_x","Sport",]].drop_duplicates("index")
x.rename(columns={"index": "Name","Name_x": "Medals"}, inplace=True)
return x
def weight_v_height(df,sport):
athlete_df = df.drop_duplicates(subset=["Name","region"])
athlete_df["Medal"].fillna("No Medal",inplace=True)
if sport != "Overall":
temp_df = athlete_df[athlete_df["Sport"]==sport]
return temp_df
else:
return athlete_df
def men_vs_women(df):
athlete_df = df.drop_duplicates(subset=["Name","region"])
men = athlete_df[athlete_df["Sex"]=="M"].groupby("Year").count()["Name"].reset_index()
women = athlete_df[athlete_df["Sex"]=="F"].groupby("Year").count()["Name"].reset_index()
final = men.merge(women, on="Year",how="left")
final.rename(columns={"Name_x":"Male","Name_y":"Female"},inplace=True)
final.fillna(0,inplace=True)
return final