-
Notifications
You must be signed in to change notification settings - Fork 0
/
text_detector.py
111 lines (103 loc) · 3.85 KB
/
text_detector.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
import cv2
from middleware_text import *
from pytesseract import Output
import pytesseract
import numpy as np
from collections import Counter
from ranges import min_range, max_range
def find_most_common_color(img):
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
flattened_image_array = img.reshape(-1, 3)
color_counts = Counter(tuple(color) for color in flattened_image_array)
most_common_color_rgb = max(color_counts, key=color_counts.get)
return list(most_common_color_rgb)
def remove_other_regions():
return [[330,520,120,650], [0,1000,900,1350]]
def remove_other_colours(img, array_min, array_max):
min0 = np.array(array_min[0],np.uint8)
max0 = np.array(array_max[0],np.uint8)
min1 = np.array(array_min[1],np.uint8)
max1 = np.array(array_max[1],np.uint8)
HSV = cv2.cvtColor(img,cv2.COLOR_BGR2HSV)
mask1 = cv2.inRange(HSV, min0, max0)
mask2 = cv2.inRange(HSV, min1, max1)
mask = np.ma.mask_or(mask1, mask2)
img[mask<=0] = [170,170,170]
return img
def text_erasor(img, array_min=min_range, array_max=max_range):
min0 = np.array(array_min[0],np.uint8)
max0 = np.array(array_max[0],np.uint8)
min1 = np.array(array_min[1],np.uint8)
max1 = np.array(array_max[1],np.uint8)
HSV = cv2.cvtColor(img,cv2.COLOR_BGR2HSV)
kernel = np.ones((15, 15), np.uint8)
mask1 = cv2.inRange(HSV, min0, max0)
mask2 = cv2.inRange(HSV, min1, max1)
mask1 = cv2.dilate(mask1, kernel, iterations=1)
mask2 = cv2.dilate(mask2, kernel, iterations=1)
mask = np.ma.mask_or(mask1, mask2)
img[mask>0] = rgb_to_hsv(find_most_common_color(img))
return img
def dict_join(index, position, dict1, dict2):
keys = ["left","top","width","height", "text", "colour"]
dict_refer = {
"left":position[2],
"top":position[0]
}
dict2["colour"] = [str(index)]*len(dict2["left"])
for x in keys:
if x in dict_refer.keys():
temp = dict2[x]
for y in range(len(dict2[x])):
temp[y] = dict2[x][y] + dict_refer[x]
dict2[x] = temp
for x in keys:
temp = dict1[x]
temp.extend(dict2[x])
dict1[x] = temp
return dict1
def rgb_to_hsv(rgb_color):
bgr_color = np.array([[rgb_color]], np.uint8)
hsv_color = cv2.cvtColor(bgr_color, cv2.COLOR_RGB2BGR)
hsv_color = hsv_color[0][0]
return hsv_color
def text_detector(img_path):
img = cv2.imread(img_path)
array = remove_other_regions()
data_opening = {
"left":[],
"top":[],
"width":[],
"height":[],
"text":[],
"colour":[]
}
data_canny = {
"left":[],
"top":[],
"width":[],
"height":[],
"text":[],
"colour":[]
}
data_threshold = {
"left":[],
"top":[],
"width":[],
"height":[],
"text":[],
"colour":[]
}
for x in array:
only_banner_text = remove_other_colours(img[x[0]:x[1], x[2]:x[3]], min_range, max_range)
noise_removed = remove_noise(only_banner_text)
eroded = erode(noise_removed)
dilated = dilate(eroded)
gray = get_grayscale(eroded)
opening_img = opening(gray)
threshold = thresholding(gray)
canny_img = canny(gray)
data_opening = dict_join(array.index(x), x, data_opening, pytesseract.image_to_data(opening_img, lang='eng', config='--psm 12 -c tessedit_char_whitelist=ROHIT', output_type=Output.DICT))
data_canny = dict_join(array.index(x), x, data_canny, pytesseract.image_to_data(canny_img, lang='eng', config='--psm 12 -c tessedit_char_whitelist=ROHIT', output_type=Output.DICT))
data_threshold = dict_join(array.index(x), x, data_threshold, pytesseract.image_to_data(threshold, lang='eng', config='--psm 12 -c tessedit_char_whitelist=ROHIT', output_type=Output.DICT))
return [data_opening,data_canny,data_threshold]