From 4ff9a5f28d5fc7308692241afc3508aee422e215 Mon Sep 17 00:00:00 2001
From: Mohammed Saleh <34711999+MDSH14@users.noreply.github.com>
Date: Fri, 5 Nov 2021 23:22:35 +0200
Subject: [PATCH] Update Engezny.py

---
 Engezny/Engezny.py | 114 ++++++++++++++++++++++++++++++---------------
 1 file changed, 77 insertions(+), 37 deletions(-)

diff --git a/Engezny/Engezny.py b/Engezny/Engezny.py
index 65b8526..a70c055 100644
--- a/Engezny/Engezny.py
+++ b/Engezny/Engezny.py
@@ -8,26 +8,61 @@ class Engezny:
     def __init__(self, DataFrame):
         self.DataFrame = DataFrame
     
-    def visualize(self, start= 0, end = -1, location = 'Charts/', extention = 'jpg'):
-        if end == -1:
-            end = len(list(self.DataFrame))
+    def labelcolor(self, colors):
+        rgb = [tuple(int(item.lstrip('#')[i:i+2], 16) for i in (0, 2, 4)) for item in color]
+        return ["white" if (0.2126*item[0] + 0.7152*item[1] + 0.0722*item[2]) < 128 else "black" for item in rgb]
+    
+    def value_counts(self, Data, multi_sep=None, single_sep=None):
+        if multi_sep == None and single_sep == None:
+            return dict(Data.value_counts())
+        Dict = dict()
+        try:
+            for i in Data.dropna().index:
+                Words = str(Data).split(multi_sep)
+                for Word in Words:
+                    Word = Word.split(single_sep)[0].strip()
+                    if Word in Dict.keys():
+                        Dict[Word] += 1
+                    else:
+                        Dict[Word] = 1
+        except:
+            return "Error"
+        return dict(sorted(Dict.items(), key=lambda item: item[1], reverse=True))
+    
+    
+    def save(self, location, Count, extention, item):
+        Title = '{}'.format(item).title()
+        Title = ''.join(e for e in Title if e.isalnum())
+        try:
+            os.mkdir(location)
+        except:
+            pass
+        plt.savefig('{}{}. {}.{}'.format(location, Count, Title, extention), bbox_inches='tight', transparent=True);
+        
+    
+    def visualize(self,
+                  start= 0,
+                  end = None,
+                  location = 'Charts/',
+                  extention = 'jpg',
+                  colors = None,
+                  save=True,
+                  multi_sep=None,
+                  single_sep=None,
+                  figsize=(15, 15),
+                  base = 'total_values',
+                  other= False):
         Count = 1
         for item in list(self.DataFrame)[start:end]:
-            Dict = dict()
-            try:
-                for i in self.DataFrame[item].dropna().index:
-                    Words = str(self.DataFrame[item][i]).split(",")
-                    for Word in Words:
-                        Word = Word.split("[")[0].strip()
-                        if Word in Dict.keys():
-                            Dict[Word] += 1
-                        else:
-                            Dict[Word] = 1
-            except:
-                print(item)
+            Dict = self.value_counts(self.DataFrame[item], multi_sep, single_sep)
+            if Dict == "Error":
                 continue
-            Dict = dict(sorted(Dict.items(), key=lambda item: item[1], reverse=True))
-
+            
+            if other:
+                if len(Dict) > 5:
+                    Tot = sum(Dict.values())
+                    Dict = dict(list(Dict.items())[:4])
+                    Dict['Other'] = Tot - sum(Dict.values())
 
             Keys = list()
             for Key in Dict.keys():
@@ -35,25 +70,36 @@ def visualize(self, start= 0, end = -1, location = 'Charts/', extention = 'jpg')
             Values = list(Dict.values())
 
             labels = []
-            Tot = sum(Values)
+            if not other:
+                if base == 'total_values':
+                    Tot = sum(Values)
+                elif base == "data_base":
+                    Tot = len(self.DataFrame)
+                elif base == "column_base":
+                    Tot = len(self.DataFrame[item].dropna())
+                else:
+                    raise ValueError('Unknown base !')
+                
             for i in Values:
                 labels.append("{}/{} ({}%)".format(i, Tot, int((i/Tot)*100)))
 
-            matplotlib.rcParams.update({'font.size': 50})
-            f, ax = plt.subplots(figsize=(32, 32))
+            matplotlib.rcParams.update({'font.size': 25})
+            f, ax = plt.subplots(figsize=figsize)
 
-            if len(Keys) <= 5:
+            if len(Keys) <= 5 and base == 'total_values':
                 for i in range(len(Keys)):
                     Keys[i] += " (" + labels[i].split()[0] + ")"
 
-                plt.pie(Values, labels=["" for k in Keys], autopct="%.1f%%")
+                _, _, autotexts = plt.pie(Values, labels=["" for k in Keys], autopct="%.1f%%", colors=colors)
+                for color, autotext in zip(self.labelcolor(colors), autotexts):
+                    autotext.set_color(color)
                 plt.legend(loc = 'lower center', bbox_to_anchor=(0.25, -0.1, 0.5, 0.5), labels = Keys)
 
             elif len(Keys) < 8:
                 for i, v in enumerate(labels):
-                    ax.text(i-0.5, Values[i], str(v), fontsize=50)
+                    ax.text(i-0.5, Values[i], str(v), fontsize=25)
 
-                plt.bar(Keys, Values)
+                plt.bar(Keys, Values, color=colors[0])
                 plt.xticks(rotation = 90)
 
             else:
@@ -62,21 +108,15 @@ def visualize(self, start= 0, end = -1, location = 'Charts/', extention = 'jpg')
                 labels.reverse()
                 if len(Keys) > 20:
                     for i, v in enumerate(labels[-20:]):
-                        ax.text(Values[-20:][i], i, str(v), fontsize=50)
-                    plt.barh(Keys[-20:], Values[-20:])
+                        ax.text(Values[-20:][i], i, str(v), fontsize=25)
+                    plt.barh(Keys[-20:], Values[-20:], color=colors[0])
                 else:
                     for i, v in enumerate(labels):
-                        ax.text(Values[i], i, str(v), fontsize=50)
-                    plt.barh(Keys, Values)
-
-            plt.title(item.title(), fontsize=70)
-            Title = '{}'.format(item).title()
-            Title = ''.join(e for e in Title if e.isalnum())
+                        ax.text(Values[i], i, str(v), fontsize=25)
+                    plt.barh(Keys, Values, color=colors[0])
 
-            try:
-                os.mkdir(location)
-            except:
-                pass
-            plt.savefig('{}{}. {}.{}'.format(location, Count, Title, extention), bbox_inches='tight');
+            plt.title(get_display(arabic_reshaper.reshape(item.title())), fontsize=35)
+            if save:
+                self.save(location, Count, extention, item)
             plt.show()
             Count += 1