From 4ff9a5f28d5fc7308692241afc3508aee422e215 Mon Sep 17 00:00:00 2001 From: Mohammed Saleh <34711999+MDSH14@users.noreply.github.com> Date: Fri, 5 Nov 2021 23:22:35 +0200 Subject: [PATCH] Update Engezny.py --- Engezny/Engezny.py | 114 ++++++++++++++++++++++++++++++--------------- 1 file changed, 77 insertions(+), 37 deletions(-) diff --git a/Engezny/Engezny.py b/Engezny/Engezny.py index 65b8526..a70c055 100644 --- a/Engezny/Engezny.py +++ b/Engezny/Engezny.py @@ -8,26 +8,61 @@ class Engezny: def __init__(self, DataFrame): self.DataFrame = DataFrame - def visualize(self, start= 0, end = -1, location = 'Charts/', extention = 'jpg'): - if end == -1: - end = len(list(self.DataFrame)) + def labelcolor(self, colors): + rgb = [tuple(int(item.lstrip('#')[i:i+2], 16) for i in (0, 2, 4)) for item in color] + return ["white" if (0.2126*item[0] + 0.7152*item[1] + 0.0722*item[2]) < 128 else "black" for item in rgb] + + def value_counts(self, Data, multi_sep=None, single_sep=None): + if multi_sep == None and single_sep == None: + return dict(Data.value_counts()) + Dict = dict() + try: + for i in Data.dropna().index: + Words = str(Data).split(multi_sep) + for Word in Words: + Word = Word.split(single_sep)[0].strip() + if Word in Dict.keys(): + Dict[Word] += 1 + else: + Dict[Word] = 1 + except: + return "Error" + return dict(sorted(Dict.items(), key=lambda item: item[1], reverse=True)) + + + def save(self, location, Count, extention, item): + Title = '{}'.format(item).title() + Title = ''.join(e for e in Title if e.isalnum()) + try: + os.mkdir(location) + except: + pass + plt.savefig('{}{}. {}.{}'.format(location, Count, Title, extention), bbox_inches='tight', transparent=True); + + + def visualize(self, + start= 0, + end = None, + location = 'Charts/', + extention = 'jpg', + colors = None, + save=True, + multi_sep=None, + single_sep=None, + figsize=(15, 15), + base = 'total_values', + other= False): Count = 1 for item in list(self.DataFrame)[start:end]: - Dict = dict() - try: - for i in self.DataFrame[item].dropna().index: - Words = str(self.DataFrame[item][i]).split(",") - for Word in Words: - Word = Word.split("[")[0].strip() - if Word in Dict.keys(): - Dict[Word] += 1 - else: - Dict[Word] = 1 - except: - print(item) + Dict = self.value_counts(self.DataFrame[item], multi_sep, single_sep) + if Dict == "Error": continue - Dict = dict(sorted(Dict.items(), key=lambda item: item[1], reverse=True)) - + + if other: + if len(Dict) > 5: + Tot = sum(Dict.values()) + Dict = dict(list(Dict.items())[:4]) + Dict['Other'] = Tot - sum(Dict.values()) Keys = list() for Key in Dict.keys(): @@ -35,25 +70,36 @@ def visualize(self, start= 0, end = -1, location = 'Charts/', extention = 'jpg') Values = list(Dict.values()) labels = [] - Tot = sum(Values) + if not other: + if base == 'total_values': + Tot = sum(Values) + elif base == "data_base": + Tot = len(self.DataFrame) + elif base == "column_base": + Tot = len(self.DataFrame[item].dropna()) + else: + raise ValueError('Unknown base !') + for i in Values: labels.append("{}/{} ({}%)".format(i, Tot, int((i/Tot)*100))) - matplotlib.rcParams.update({'font.size': 50}) - f, ax = plt.subplots(figsize=(32, 32)) + matplotlib.rcParams.update({'font.size': 25}) + f, ax = plt.subplots(figsize=figsize) - if len(Keys) <= 5: + if len(Keys) <= 5 and base == 'total_values': for i in range(len(Keys)): Keys[i] += " (" + labels[i].split()[0] + ")" - plt.pie(Values, labels=["" for k in Keys], autopct="%.1f%%") + _, _, autotexts = plt.pie(Values, labels=["" for k in Keys], autopct="%.1f%%", colors=colors) + for color, autotext in zip(self.labelcolor(colors), autotexts): + autotext.set_color(color) plt.legend(loc = 'lower center', bbox_to_anchor=(0.25, -0.1, 0.5, 0.5), labels = Keys) elif len(Keys) < 8: for i, v in enumerate(labels): - ax.text(i-0.5, Values[i], str(v), fontsize=50) + ax.text(i-0.5, Values[i], str(v), fontsize=25) - plt.bar(Keys, Values) + plt.bar(Keys, Values, color=colors[0]) plt.xticks(rotation = 90) else: @@ -62,21 +108,15 @@ def visualize(self, start= 0, end = -1, location = 'Charts/', extention = 'jpg') labels.reverse() if len(Keys) > 20: for i, v in enumerate(labels[-20:]): - ax.text(Values[-20:][i], i, str(v), fontsize=50) - plt.barh(Keys[-20:], Values[-20:]) + ax.text(Values[-20:][i], i, str(v), fontsize=25) + plt.barh(Keys[-20:], Values[-20:], color=colors[0]) else: for i, v in enumerate(labels): - ax.text(Values[i], i, str(v), fontsize=50) - plt.barh(Keys, Values) - - plt.title(item.title(), fontsize=70) - Title = '{}'.format(item).title() - Title = ''.join(e for e in Title if e.isalnum()) + ax.text(Values[i], i, str(v), fontsize=25) + plt.barh(Keys, Values, color=colors[0]) - try: - os.mkdir(location) - except: - pass - plt.savefig('{}{}. {}.{}'.format(location, Count, Title, extention), bbox_inches='tight'); + plt.title(get_display(arabic_reshaper.reshape(item.title())), fontsize=35) + if save: + self.save(location, Count, extention, item) plt.show() Count += 1