From 7bd25a395c167549117f5711288ba947442cb0a9 Mon Sep 17 00:00:00 2001 From: Clayton Mork Date: Wed, 22 Feb 2023 11:47:43 +0900 Subject: [PATCH 1/9] exposed detector boxes, and added readme --- README.md | 28 ++++++++++++++++++++++++++++ easyocr/easyocr.py | 16 +++++++++++++++- easyocr/utils.py | 42 +++++++++++++++++++++++++++++++++--------- 3 files changed, 76 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 4ccd7f100..a2a09e658 100644 --- a/README.md +++ b/README.md @@ -65,6 +65,34 @@ Integrated into [Huggingface Spaces 🤗](https://huggingface.co/spaces) using [ ![example3](examples/example3.png) +## Getting character level detector bounding box results: +The number of character detection might be different with result, but it should be close or the same. +```python +# increasing the link_threshold can break bbox detection into character level bboxes +result = ocr.readtext(image, link_threshold=1-1e-100) + +# increasing the link_threshold can break bbox detection into character level bboxes +textBoxList = ocr.detector_text_box_list +batchTextBoxIndices = ocr.detector_text_box_indices + + +im = Image.open(image, formats=['png']) +draw = ImageDraw.Draw(im) +for batch, textBoxIndices in enumerate(batchTextBoxIndices): + for i, bboxCharacterIndices in enumerate(textBoxIndices): + for bboxCharacterIndex in indices: + + box = textBoxList[batch][bboxCharacterIndex] + x_min = np.min(box[::2]) + x_max = np.max(box[::2]) + y_min = np.min(box[1::2]) + y_max = np.max(box[1::2]) + + draw.rectangle([x_min, y_min, x_max, y_max], width=2, outline=(255,0,0)) +im.save('characterBox.png') +``` + + ## Installation Install using `pip` diff --git a/easyocr/easyocr.py b/easyocr/easyocr.py index 4ef943401..b7f272507 100644 --- a/easyocr/easyocr.py +++ b/easyocr/easyocr.py @@ -228,6 +228,8 @@ def __init__(self, lang_list, gpu=True, model_storage_directory=None, self.recognizer, self.converter = get_recognizer(recog_network, network_params,\ self.character, separator_list,\ dict_list, model_path, device = self.device, quantize=quantize) + self.detector_text_box_indices = None + self.detector_text_box_list = None def getDetectorPath(self, detect_network): if detect_network in self.support_detection_network: @@ -332,8 +334,10 @@ def detect(self, img, min_size = 20, text_threshold = 0.7, low_text = 0.4,\ ) horizontal_list_agg, free_list_agg = [], [] + horizontal_list_agg_idx, free_list_agg_idx = [], [] + for text_box in text_box_list: - horizontal_list, free_list = group_text_box(text_box, slope_ths, + horizontal_list, free_list, craft_list_idx, free_idx = group_text_box(text_box, slope_ths, ycenter_ths, height_ths, width_ths, add_margin, (optimal_num_chars is None)) @@ -342,8 +346,18 @@ def detect(self, img, min_size = 20, text_threshold = 0.7, low_text = 0.4,\ i[1] - i[0], i[3] - i[2]) > min_size] free_list = [i for i in free_list if max( diff([c[0] for c in i]), diff([c[1] for c in i])) > min_size] + + craft_list_idx = [idx for i, idx in zip(horizontal_list, craft_list_idx) if max( + i[1] - i[0], i[3] - i[2]) > min_size] + free_idx = [idx for i, idx in zip(free_list, free_idx) if max( + diff([c[0] for c in i]), diff([c[1] for c in i])) > min_size] horizontal_list_agg.append(horizontal_list) free_list_agg.append(free_list) + horizontal_list_agg_idx.append(craft_list_idx) + free_list_agg_idx.append(free_idx) + + self.detector_text_box_list = text_box_list + self.detector_text_box_indices = horizontal_list_agg_idx + free_list_agg_idx return horizontal_list_agg, free_list_agg diff --git a/easyocr/utils.py b/easyocr/utils.py index 64435cfdb..ec912de99 100644 --- a/easyocr/utils.py +++ b/easyocr/utils.py @@ -407,9 +407,11 @@ def four_point_transform(image, rect): def group_text_box(polys, slope_ths = 0.1, ycenter_ths = 0.5, height_ths = 0.5, width_ths = 1.0, add_margin = 0.05, sort_output = True): # poly top-left, top-right, low-right, low-left - horizontal_list, free_list,combined_list, merged_list = [],[],[],[] + horizontal_list, free_list, combined_list, merged_list = [],[],[],[] + horizontal_idx, free_idx, combined_idx, merged_idx = [],[],[],[] - for poly in polys: + # this part just differentiate between boxes with high slope (free), or just normal horizontal texts (horizontal_list) + for i, poly in enumerate(polys): slope_up = (poly[3]-poly[1])/np.maximum(10, (poly[2]-poly[0])) slope_down = (poly[5]-poly[7])/np.maximum(10, (poly[4]-poly[6])) if max(abs(slope_up), abs(slope_down)) < slope_ths: @@ -418,6 +420,7 @@ def group_text_box(polys, slope_ths = 0.1, ycenter_ths = 0.5, height_ths = 0.5, y_max = max([poly[1],poly[3],poly[5],poly[7]]) y_min = min([poly[1],poly[3],poly[5],poly[7]]) horizontal_list.append([x_min, x_max, y_min, y_max, 0.5*(y_min+y_max), y_max-y_min]) + horizontal_idx.append(i) else: height = np.linalg.norm([poly[6]-poly[0],poly[7]-poly[1]]) width = np.linalg.norm([poly[2]-poly[0],poly[3]-poly[1]]) @@ -437,58 +440,76 @@ def group_text_box(polys, slope_ths = 0.1, ycenter_ths = 0.5, height_ths = 0.5, y4 = poly[7] + np.sin(theta24)*margin free_list.append([[x1,y1],[x2,y2],[x3,y3],[x4,y4]]) + free_idx.append(i) if sort_output: horizontal_list = sorted(horizontal_list, key=lambda item: item[4]) + horizontal_idx = [x for _,x in sorted(zip(horizontal_list,horizontal_idx), key=lambda pair: pair[0][4])] # combine box + # this part combine boxes based on horizontal lines new_box = [] - for poly in horizontal_list: + new_box_idx = [] + for poly, idx in zip(horizontal_list, horizontal_idx): if len(new_box) == 0: b_height = [poly[5]] b_ycenter = [poly[4]] new_box.append(poly) + new_box_idx.append(idx) else: # comparable height and comparable y_center level up to ths*height if abs(np.mean(b_ycenter) - poly[4]) < ycenter_ths*np.mean(b_height): b_height.append(poly[5]) b_ycenter.append(poly[4]) new_box.append(poly) + new_box_idx.append(idx) else: b_height = [poly[5]] b_ycenter = [poly[4]] combined_list.append(new_box) + combined_idx.append(new_box_idx) new_box = [poly] + new_box_idx = [idx] combined_list.append(new_box) + combined_idx.append(new_box_idx) # merge list use sort again - for boxes in combined_list: + for boxes, indices in zip(combined_list, combined_idx): if len(boxes) == 1: # one box per line box = boxes[0] margin = int(add_margin*min(box[1]-box[0],box[5])) merged_list.append([box[0]-margin,box[1]+margin,box[2]-margin,box[3]+margin]) + merged_idx.append(indices) else: # multiple boxes per line boxes = sorted(boxes, key=lambda item: item[0]) + indices = [x for _,x in sorted(zip(boxes,indices), key=lambda pair: pair[0][0])] merged_box, new_box = [],[] - for box in boxes: + merged_box_idx, new_box_idx = [],[] + assert len(boxes) == len(indices) + for box, idx in zip(boxes, indices): if len(new_box) == 0: b_height = [box[5]] x_max = box[1] new_box.append(box) + new_box_idx.append(idx) else: if (abs(np.mean(b_height) - box[5]) < height_ths*np.mean(b_height)) and ((box[0]-x_max) < width_ths *(box[3]-box[2])): # merge boxes b_height.append(box[5]) x_max = box[1] new_box.append(box) + new_box_idx.append(idx) else: b_height = [box[5]] x_max = box[1] merged_box.append(new_box) + merged_box_idx.append(new_box_idx) new_box = [box] - if len(new_box) >0: merged_box.append(new_box) - - for mbox in merged_box: + new_box_idx = [idx] + if len(new_box) >0: + merged_box.append(new_box) + merged_box_idx.append(new_box_idx) + for mbox, mbox_idx in zip(merged_box, merged_box_idx): if len(mbox) != 1: # adjacent box in same line # do I need to add margin here? x_min = min(mbox, key=lambda x: x[0])[0] @@ -501,6 +522,7 @@ def group_text_box(polys, slope_ths = 0.1, ycenter_ths = 0.5, height_ths = 0.5, margin = int(add_margin * (min(box_width, box_height))) merged_list.append([x_min-margin, x_max+margin, y_min-margin, y_max+margin]) + merged_idx.append(mbox_idx) else: # non adjacent box in same line box = mbox[0] @@ -509,8 +531,10 @@ def group_text_box(polys, slope_ths = 0.1, ycenter_ths = 0.5, height_ths = 0.5, margin = int(add_margin * (min(box_width, box_height))) merged_list.append([box[0]-margin,box[1]+margin,box[2]-margin,box[3]+margin]) + merged_idx.append(mbox_idx) + # may need to check if box is really in image - return merged_list, free_list + return merged_list, free_list, merged_idx, free_idx def calculate_ratio(width,height): ''' From 5d7b6584c2f13ce546a86052b74a98188169b228 Mon Sep 17 00:00:00 2001 From: darwinharianto Date: Tue, 28 Feb 2023 09:25:07 +0900 Subject: [PATCH 2/9] updated free list and gotizontal list to be at the batch --- easyocr/easyocr.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/easyocr/easyocr.py b/easyocr/easyocr.py index b7f272507..e05845ae3 100644 --- a/easyocr/easyocr.py +++ b/easyocr/easyocr.py @@ -357,8 +357,8 @@ def detect(self, img, min_size = 20, text_threshold = 0.7, low_text = 0.4,\ free_list_agg_idx.append(free_idx) self.detector_text_box_list = text_box_list - self.detector_text_box_indices = horizontal_list_agg_idx + free_list_agg_idx - + self.detector_text_box_indices = [hori + free for hori, free in zip(horizontal_list_agg_idx, free_list_agg_idx)] + return horizontal_list_agg, free_list_agg def recognize(self, img_cv_grey, horizontal_list=None, free_list=None,\ From ae78546268754e54d537f1b01bcbda2095ae96cf Mon Sep 17 00:00:00 2001 From: darwinharianto Date: Tue, 28 Feb 2023 09:27:13 +0900 Subject: [PATCH 3/9] updated readme sample on freeidx and character idx --- README.md | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index a2a09e658..6f1611ad5 100644 --- a/README.md +++ b/README.md @@ -80,9 +80,21 @@ im = Image.open(image, formats=['png']) draw = ImageDraw.Draw(im) for batch, textBoxIndices in enumerate(batchTextBoxIndices): for i, bboxCharacterIndices in enumerate(textBoxIndices): - for bboxCharacterIndex in indices: - - box = textBoxList[batch][bboxCharacterIndex] + if type(indices) == list: + for bboxCharacterIndex in indices: + + if type(indices) == list: + # this is horizontal list + box = textBoxList[batch][bboxCharacterIndex] + x_min = np.min(box[::2]) + x_max = np.max(box[::2]) + y_min = np.min(box[1::2]) + y_max = np.max(box[1::2]) + + draw.rectangle([x_min, y_min, x_max, y_max], width=2, outline=(255,0,0)) + elif type(indices) == int: + # this is free idx + box = textBoxList[batch][indices] x_min = np.min(box[::2]) x_max = np.max(box[::2]) y_min = np.min(box[1::2]) From 4ac4c67b60898df308b67edeffd6fb4ff3657e91 Mon Sep 17 00:00:00 2001 From: darwinharianto Date: Wed, 1 Mar 2023 09:25:03 +0900 Subject: [PATCH 4/9] added rotation info in ocr results --- easyocr/easyocr.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/easyocr/easyocr.py b/easyocr/easyocr.py index e05845ae3..d2e027b44 100644 --- a/easyocr/easyocr.py +++ b/easyocr/easyocr.py @@ -432,13 +432,21 @@ def recognize(self, img_cv_grey, horizontal_list=None, free_list=None,\ if paragraph: result = get_paragraph(result, x_ths=x_ths, y_ths=y_ths, mode = direction_mode) + + if rotation_info is not None: + # added rotation info that gives the best result + result = [item[:3] + (rotation_info[item[3]], ) for item in result] if detail == 0: return [item[1] for item in result] elif output_format == 'dict': - return [ {'boxes':item[0],'text':item[1],'confident':item[2]} for item in result] + if rotation_info is not None: + return [ {'boxes':item[0],'text':item[1],'confident':item[2]} for item in result] + return [ {'boxes':item[0],'text':item[1],'confident':item[2], 'rotation_idx': item[3]} for item in result] elif output_format == 'json': - return [json.dumps({'boxes':[list(map(int, lst)) for lst in item[0]],'text':item[1],'confident':item[2]}, ensure_ascii=False) for item in result] + if rotation_info is not None: + return [json.dumps({'boxes':[list(map(int, lst)) for lst in item[0]],'text':item[1],'confident':item[2]}, ensure_ascii=False) for item in result] + return [json.dumps({'boxes':[list(map(int, lst)) for lst in item[0]],'text':item[1],'confident':item[2], 'rotation_idx': item[3]}, ensure_ascii=False) for item in result] else: return result From 8c361b371aef921919caee5c650a01c6bebb4705 Mon Sep 17 00:00:00 2001 From: darwinharianto Date: Wed, 1 Mar 2023 09:33:55 +0900 Subject: [PATCH 5/9] make sort output default to false, not sure how to rearrange detector indices if it is true --- easyocr/utils.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/easyocr/utils.py b/easyocr/utils.py index ec912de99..8948c9c34 100644 --- a/easyocr/utils.py +++ b/easyocr/utils.py @@ -474,15 +474,15 @@ def group_text_box(polys, slope_ths = 0.1, ycenter_ths = 0.5, height_ths = 0.5, combined_idx.append(new_box_idx) # merge list use sort again - for boxes, indices in zip(combined_list, combined_idx): + for boxes, index in zip(combined_list, combined_idx): if len(boxes) == 1: # one box per line box = boxes[0] margin = int(add_margin*min(box[1]-box[0],box[5])) merged_list.append([box[0]-margin,box[1]+margin,box[2]-margin,box[3]+margin]) - merged_idx.append(indices) + merged_idx.append(index) else: # multiple boxes per line boxes = sorted(boxes, key=lambda item: item[0]) - indices = [x for _,x in sorted(zip(boxes,indices), key=lambda pair: pair[0][0])] + indices = [x for _,x in sorted(zip(boxes,index), key=lambda pair: pair[0][0])] merged_box, new_box = [],[] merged_box_idx, new_box_idx = [],[] @@ -559,7 +559,7 @@ def compute_ratio_and_resize(img,width,height,model_height): return img,ratio -def get_image_list(horizontal_list, free_list, img, model_height = 64, sort_output = True): +def get_image_list(horizontal_list, free_list, img, model_height = 64, sort_output = False): image_list = [] maximum_y,maximum_x = img.shape @@ -812,6 +812,7 @@ def set_result_with_confidence(results): best_row = max( [(row_ix, results[row_ix][col_ix][2]) for row_ix in range(len(results))], key=lambda x: x[1])[0] - final_result.append(results[best_row][col_ix]) + result_angle = results[best_row][col_ix] + (best_row, ) + final_result.append(result_angle) return final_result From 113f6600747a179a412e216193d6c05a8386ee61 Mon Sep 17 00:00:00 2001 From: darwinharianto Date: Wed, 1 Mar 2023 09:35:28 +0900 Subject: [PATCH 6/9] rearrage to make free list indices come first, since images in free list is processed before horizontal list --- easyocr/easyocr.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/easyocr/easyocr.py b/easyocr/easyocr.py index d2e027b44..042045fe1 100644 --- a/easyocr/easyocr.py +++ b/easyocr/easyocr.py @@ -357,7 +357,7 @@ def detect(self, img, min_size = 20, text_threshold = 0.7, low_text = 0.4,\ free_list_agg_idx.append(free_idx) self.detector_text_box_list = text_box_list - self.detector_text_box_indices = [hori + free for hori, free in zip(horizontal_list_agg_idx, free_list_agg_idx)] + self.detector_text_box_indices = [free + hori for hori, free in zip(horizontal_list_agg_idx, free_list_agg_idx)] return horizontal_list_agg, free_list_agg From b3df1375b5b6b868b8a9d6edcf1293fa2dedf46a Mon Sep 17 00:00:00 2001 From: darwinharianto Date: Thu, 2 Mar 2023 13:27:08 +0900 Subject: [PATCH 7/9] rearrange free and horizontal list, sort textbox indices along with sort_output --- easyocr/easyocr.py | 25 +++++++++++++------------ easyocr/utils.py | 39 ++++++++++++++++++++++----------------- 2 files changed, 35 insertions(+), 29 deletions(-) diff --git a/easyocr/easyocr.py b/easyocr/easyocr.py index 042045fe1..ac4137cd9 100644 --- a/easyocr/easyocr.py +++ b/easyocr/easyocr.py @@ -342,15 +342,15 @@ def detect(self, img, min_size = 20, text_threshold = 0.7, low_text = 0.4,\ width_ths, add_margin, (optimal_num_chars is None)) if min_size: + craft_list_idx = [idx for i, idx in zip(horizontal_list, craft_list_idx) if max( + i[1] - i[0], i[3] - i[2]) > min_size] + free_idx = [idx for i, idx in zip(free_list, free_idx) if max( + diff([c[0] for c in i]), diff([c[1] for c in i])) > min_size] horizontal_list = [i for i in horizontal_list if max( i[1] - i[0], i[3] - i[2]) > min_size] free_list = [i for i in free_list if max( diff([c[0] for c in i]), diff([c[1] for c in i])) > min_size] - craft_list_idx = [idx for i, idx in zip(horizontal_list, craft_list_idx) if max( - i[1] - i[0], i[3] - i[2]) > min_size] - free_idx = [idx for i, idx in zip(free_list, free_idx) if max( - diff([c[0] for c in i]), diff([c[1] for c in i])) > min_size] horizontal_list_agg.append(horizontal_list) free_list_agg.append(free_list) horizontal_list_agg_idx.append(craft_list_idx) @@ -366,7 +366,8 @@ def recognize(self, img_cv_grey, horizontal_list=None, free_list=None,\ workers = 0, allowlist = None, blocklist = None, detail = 1,\ rotation_info = None,paragraph = False,\ contrast_ths = 0.1,adjust_contrast = 0.5, filter_ths = 0.003,\ - y_ths = 0.5, x_ths = 1.0, reformat=True, output_format='standard'): + y_ths = 0.5, x_ths = 1.0, reformat=True, output_format='standard',\ + textbox_indices=None ): if reformat: img, img_cv_grey = reformat_input(img_cv_grey) @@ -388,25 +389,25 @@ def recognize(self, img_cv_grey, horizontal_list=None, free_list=None,\ # without gpu/parallelization, it is faster to process image one by one if ((batch_size == 1) or (self.device == 'cpu')) and not rotation_info: result = [] - for bbox in horizontal_list: + for i, bbox in enumerate(horizontal_list): h_list = [bbox] f_list = [] - image_list, max_width = get_image_list(h_list, f_list, img_cv_grey, model_height = imgH) + image_list, max_width, = get_image_list(h_list, f_list, img_cv_grey, model_height = imgH, textbox_indices=textbox_indices[len(free_list)+i: len(free_list)+i+1]) result0 = get_text(self.character, imgH, int(max_width), self.recognizer, self.converter, image_list,\ ignore_char, decoder, beamWidth, batch_size, contrast_ths, adjust_contrast, filter_ths,\ workers, self.device) result += result0 - for bbox in free_list: + for i, bbox in enumerate(free_list): h_list = [] f_list = [bbox] - image_list, max_width = get_image_list(h_list, f_list, img_cv_grey, model_height = imgH) + image_list, max_width = get_image_list(h_list, f_list, img_cv_grey, model_height = imgH, textbox_indices=textbox_indices[i:i+1]) result0 = get_text(self.character, imgH, int(max_width), self.recognizer, self.converter, image_list,\ ignore_char, decoder, beamWidth, batch_size, contrast_ths, adjust_contrast, filter_ths,\ workers, self.device) result += result0 # default mode will try to process multiple boxes at the same time else: - image_list, max_width = get_image_list(horizontal_list, free_list, img_cv_grey, model_height = imgH) + image_list, max_width = get_image_list(horizontal_list, free_list, img_cv_grey, model_height = imgH, textbox_indices=textbox_indices) image_len = len(image_list) if rotation_info and image_list: image_list = make_rotated_img_list(rotation_info, image_list) @@ -477,12 +478,12 @@ def readtext(self, image, decoder = 'greedy', beamWidth= 5, batch_size = 1,\ bbox_min_size = bbox_min_size, max_candidates = max_candidates ) # get the 1st result from hor & free list as self.detect returns a list of depth 3 - horizontal_list, free_list = horizontal_list[0], free_list[0] + horizontal_list, free_list, textbox_indices = horizontal_list[0], free_list[0], self.detector_text_box_indices[0] result = self.recognize(img_cv_grey, horizontal_list, free_list,\ decoder, beamWidth, batch_size,\ workers, allowlist, blocklist, detail, rotation_info,\ paragraph, contrast_ths, adjust_contrast,\ - filter_ths, y_ths, x_ths, False, output_format) + filter_ths, y_ths, x_ths, False, output_format, textbox_indices) return result diff --git a/easyocr/utils.py b/easyocr/utils.py index 8948c9c34..239ef4f81 100644 --- a/easyocr/utils.py +++ b/easyocr/utils.py @@ -439,11 +439,11 @@ def group_text_box(polys, slope_ths = 0.1, ycenter_ths = 0.5, height_ths = 0.5, x4 = poly[6] - np.cos(theta24)*margin y4 = poly[7] + np.sin(theta24)*margin - free_list.append([[x1,y1],[x2,y2],[x3,y3],[x4,y4]]) free_idx.append(i) + free_list.append([[x1,y1],[x2,y2],[x3,y3],[x4,y4]]) if sort_output: - horizontal_list = sorted(horizontal_list, key=lambda item: item[4]) horizontal_idx = [x for _,x in sorted(zip(horizontal_list,horizontal_idx), key=lambda pair: pair[0][4])] + horizontal_list = sorted(horizontal_list, key=lambda item: item[4]) # combine box # this part combine boxes based on horizontal lines @@ -461,54 +461,54 @@ def group_text_box(polys, slope_ths = 0.1, ycenter_ths = 0.5, height_ths = 0.5, if abs(np.mean(b_ycenter) - poly[4]) < ycenter_ths*np.mean(b_height): b_height.append(poly[5]) b_ycenter.append(poly[4]) - new_box.append(poly) new_box_idx.append(idx) + new_box.append(poly) else: b_height = [poly[5]] b_ycenter = [poly[4]] combined_list.append(new_box) combined_idx.append(new_box_idx) - new_box = [poly] new_box_idx = [idx] - combined_list.append(new_box) + new_box = [poly] combined_idx.append(new_box_idx) + combined_list.append(new_box) # merge list use sort again for boxes, index in zip(combined_list, combined_idx): if len(boxes) == 1: # one box per line box = boxes[0] margin = int(add_margin*min(box[1]-box[0],box[5])) - merged_list.append([box[0]-margin,box[1]+margin,box[2]-margin,box[3]+margin]) merged_idx.append(index) + merged_list.append([box[0]-margin,box[1]+margin,box[2]-margin,box[3]+margin]) else: # multiple boxes per line + index = [x for _,x in sorted(zip(boxes,index), key=lambda pair: pair[0][0])] boxes = sorted(boxes, key=lambda item: item[0]) - indices = [x for _,x in sorted(zip(boxes,index), key=lambda pair: pair[0][0])] merged_box, new_box = [],[] merged_box_idx, new_box_idx = [],[] - assert len(boxes) == len(indices) - for box, idx in zip(boxes, indices): + assert len(boxes) == len(index) + for box, idx in zip(boxes, index): if len(new_box) == 0: b_height = [box[5]] x_max = box[1] - new_box.append(box) new_box_idx.append(idx) + new_box.append(box) else: if (abs(np.mean(b_height) - box[5]) < height_ths*np.mean(b_height)) and ((box[0]-x_max) < width_ths *(box[3]-box[2])): # merge boxes b_height.append(box[5]) x_max = box[1] - new_box.append(box) new_box_idx.append(idx) + new_box.append(box) else: b_height = [box[5]] x_max = box[1] - merged_box.append(new_box) merged_box_idx.append(new_box_idx) - new_box = [box] + merged_box.append(new_box) new_box_idx = [idx] + new_box = [box] if len(new_box) >0: - merged_box.append(new_box) merged_box_idx.append(new_box_idx) + merged_box.append(new_box) for mbox, mbox_idx in zip(merged_box, merged_box_idx): if len(mbox) != 1: # adjacent box in same line # do I need to add margin here? @@ -521,8 +521,8 @@ def group_text_box(polys, slope_ths = 0.1, ycenter_ths = 0.5, height_ths = 0.5, box_height = y_max - y_min margin = int(add_margin * (min(box_width, box_height))) - merged_list.append([x_min-margin, x_max+margin, y_min-margin, y_max+margin]) merged_idx.append(mbox_idx) + merged_list.append([x_min-margin, x_max+margin, y_min-margin, y_max+margin]) else: # non adjacent box in same line box = mbox[0] @@ -530,8 +530,8 @@ def group_text_box(polys, slope_ths = 0.1, ycenter_ths = 0.5, height_ths = 0.5, box_height = box[3] - box[2] margin = int(add_margin * (min(box_width, box_height))) - merged_list.append([box[0]-margin,box[1]+margin,box[2]-margin,box[3]+margin]) merged_idx.append(mbox_idx) + merged_list.append([box[0]-margin,box[1]+margin,box[2]-margin,box[3]+margin]) # may need to check if box is really in image return merged_list, free_list, merged_idx, free_idx @@ -559,10 +559,12 @@ def compute_ratio_and_resize(img,width,height,model_height): return img,ratio -def get_image_list(horizontal_list, free_list, img, model_height = 64, sort_output = False): +def get_image_list(horizontal_list, free_list, img, model_height = 64, sort_output = True, textbox_indices=None): image_list = [] maximum_y,maximum_x = img.shape + print(f"{len(free_list)=} {len(horizontal_list)=} {len(textbox_indices)=}") + max_ratio_hori, max_ratio_free = 1,1 for box in free_list: rect = np.array(box, dtype = "float32") @@ -601,7 +603,10 @@ def get_image_list(horizontal_list, free_list, img, model_height = 64, sort_outp max_width = math.ceil(max_ratio)*model_height if sort_output: + if textbox_indices is not None: + textbox_indices[:] = [x for _,x in sorted(zip(image_list, textbox_indices), key= lambda pair: pair[0][0][0][1])] image_list = sorted(image_list, key=lambda item: item[0][0][1]) # sort by vertical position + return image_list, max_width def download_and_unzip(url, filename, model_storage_directory, verbose=True): From e40036c16091a7e739c93b38bc10212b3e1bde0f Mon Sep 17 00:00:00 2001 From: darwinharianto Date: Thu, 2 Mar 2023 13:47:18 +0900 Subject: [PATCH 8/9] removed unnecessary print statement --- easyocr/utils.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/easyocr/utils.py b/easyocr/utils.py index 239ef4f81..b5ce136ec 100644 --- a/easyocr/utils.py +++ b/easyocr/utils.py @@ -563,8 +563,6 @@ def get_image_list(horizontal_list, free_list, img, model_height = 64, sort_outp image_list = [] maximum_y,maximum_x = img.shape - print(f"{len(free_list)=} {len(horizontal_list)=} {len(textbox_indices)=}") - max_ratio_hori, max_ratio_free = 1,1 for box in free_list: rect = np.array(box, dtype = "float32") From 9fe3cfa7dba3407d17e35ae3c4550b6c8c6d18b8 Mon Sep 17 00:00:00 2001 From: darwinharianto Date: Tue, 7 Mar 2023 14:58:26 +0900 Subject: [PATCH 9/9] rotation 0 always applied, fix the rotation array indexing --- easyocr/easyocr.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/easyocr/easyocr.py b/easyocr/easyocr.py index ac4137cd9..811b576aa 100644 --- a/easyocr/easyocr.py +++ b/easyocr/easyocr.py @@ -436,7 +436,7 @@ def recognize(self, img_cv_grey, horizontal_list=None, free_list=None,\ if rotation_info is not None: # added rotation info that gives the best result - result = [item[:3] + (rotation_info[item[3]], ) for item in result] + result = [item[:3] + (([0]+rotation_info)[item[3]], ) for item in result] if detail == 0: return [item[1] for item in result]