From 0b0b570e35b68ee49775c00f0ea03e2752dfadce Mon Sep 17 00:00:00 2001 From: "deepsource-autofix[bot]" <62050782+deepsource-autofix[bot]@users.noreply.github.com> Date: Sun, 5 Jun 2022 19:07:51 +0000 Subject: [PATCH] Format code with autopep8 This commit fixes the style issues introduced in 41d5a02 according to the output from autopep8. Details: https://deepsource.io/gh/gulldan/EasyOCR/transform/ba29a81b-b879-45c6-beb9-42269dfa324a/ --- setup.py | 2 + trainer/craft/config/load_config.py | 6 +- trainer/craft/data/boxEnlarge.py | 7 +- trainer/craft/data/dataset.py | 23 ++- trainer/craft/data/gaussian.py | 15 +- trainer/craft/data/imgaug.py | 21 ++- .../craft/data/pseudo_label/make_charbox.py | 24 ++- trainer/craft/eval.py | 39 +++-- trainer/craft/loss/mseloss.py | 15 +- trainer/craft/metrics/eval_det_iou.py | 14 +- trainer/craft/model/craft.py | 26 +-- trainer/craft/model/vgg16_bn.py | 22 ++- trainer/craft/train.py | 67 ++++---- trainer/craft/trainSynth.py | 28 ++-- trainer/craft/train_distributed.py | 41 +++-- trainer/craft/utils/craft_utils.py | 150 +++++++++++------- trainer/craft/utils/inference_boxes.py | 25 ++- trainer/craft/utils/util.py | 19 ++- 18 files changed, 342 insertions(+), 202 deletions(-) diff --git a/setup.py b/setup.py index 291e62662..6eee643b7 100644 --- a/setup.py +++ b/setup.py @@ -8,11 +8,13 @@ with open('requirements.txt', encoding="utf-8-sig") as f: requirements = f.readlines() + def readme(): with open('README.md', encoding="utf-8-sig") as f: README = f.read() return README + setup( name='easyocr', packages=['easyocr'], diff --git a/trainer/craft/config/load_config.py b/trainer/craft/config/load_config.py index abe3551f2..93a838ca2 100644 --- a/trainer/craft/config/load_config.py +++ b/trainer/craft/config/load_config.py @@ -4,13 +4,15 @@ CONFIG_PATH = os.path.dirname(__file__) + def load_yaml(config_name): - with open(os.path.join(CONFIG_PATH, config_name)+ '.yaml') as file: + with open(os.path.join(CONFIG_PATH, config_name) + '.yaml') as file: config = yaml.safe_load(file) return config + class DotDict(dict): def __getattr__(self, k): try: @@ -34,4 +36,4 @@ def get(self, k, default=None): return self[k] except KeyError: return default - return super().get(k, default=default) \ No newline at end of file + return super().get(k, default=default) diff --git a/trainer/craft/data/boxEnlarge.py b/trainer/craft/data/boxEnlarge.py index 73d5bc5c2..f5b83c9ed 100644 --- a/trainer/craft/data/boxEnlarge.py +++ b/trainer/craft/data/boxEnlarge.py @@ -6,18 +6,22 @@ def pointAngle(Apoint, Bpoint): angle = (Bpoint[1] - Apoint[1]) / ((Bpoint[0] - Apoint[0]) + 10e-8) return angle + def pointDistance(Apoint, Bpoint): return math.sqrt((Bpoint[1] - Apoint[1])**2 + (Bpoint[0] - Apoint[0])**2) + def lineBiasAndK(Apoint, Bpoint): K = pointAngle(Apoint, Bpoint) B = Apoint[1] - K*Apoint[0] return K, B + def getX(K, B, Ypoint): return int((Ypoint-B)/K) + def sidePoint(Apoint, Bpoint, h, w, placehold, enlarge_size): K, B = lineBiasAndK(Apoint, Bpoint) @@ -43,6 +47,7 @@ def sidePoint(Apoint, Bpoint, h, w, placehold, enlarge_size): y1 = min(h, Apoint[1] + YaxisIncreaseDistance) return int(x1), int(y1) + def enlargebox(box, h, w, enlarge_size, horizontal_text_bool): if not horizontal_text_bool: @@ -62,4 +67,4 @@ def enlargebox(box, h, w, enlarge_size, horizontal_text_bool): x3, y3 = sidePoint(center, Cpoint, h, w, 'rightBottom', enlarge_size) x4, y4 = sidePoint(Dpoint, center, h, w, 'leftBottom', enlarge_size) newcharbox = np.array([[x1, y1], [x2, y2], [x3, y3], [x4, y4]]) - return newcharbox \ No newline at end of file + return newcharbox diff --git a/trainer/craft/data/dataset.py b/trainer/craft/data/dataset.py index e7e6943e4..72fd188d8 100644 --- a/trainer/craft/data/dataset.py +++ b/trainer/craft/data/dataset.py @@ -55,14 +55,16 @@ def __init__( self.sample = sample if self.sample != -1: random.seed(0) - self.idx = random.sample(range(0, len(self.img_names)), self.sample) + self.idx = random.sample( + range(0, len(self.img_names)), self.sample) self.pre_crop_area = [] def augment_image( self, image, region_score, affinity_score, confidence_mask, word_level_char_bbox ): - augment_targets = [image, region_score, affinity_score, confidence_mask] + augment_targets = [image, region_score, + affinity_score, confidence_mask] if self.aug.random_scale.option: augment_targets, word_level_char_bbox = random_scale( @@ -100,7 +102,8 @@ def augment_image( ) elif self.aug.random_crop.version == "random_crop": - augment_targets = random_crop(augment_targets, self.output_size,) + augment_targets = random_crop( + augment_targets, self.output_size,) else: assert "Undefined RandomCrop version" @@ -185,7 +188,8 @@ def __getitem__(self, index): confidence_mask, ) - region_score = self.resize_to_half(region_score, interpolation=cv2.INTER_CUBIC) + region_score = self.resize_to_half( + region_score, interpolation=cv2.INTER_CUBIC) affinity_score = self.resize_to_half( affinity_score, interpolation=cv2.INTER_CUBIC ) @@ -284,7 +288,7 @@ def make_gt_score(self, index): for i in range(len(words)): length_of_word = len(words[i]) - word_bbox = all_char_bbox[char_idx : char_idx + length_of_word] + word_bbox = all_char_bbox[char_idx: char_idx + length_of_word] assert len(word_bbox) == length_of_word char_idx += length_of_word word_bbox = np.array(word_bbox) @@ -425,7 +429,8 @@ def load_data(self, index): self.net, self.gpu, image, word_bboxes[i], words[i], img_name=img_name ) - cv2.fillPoly(confidence_mask, [np.int32(_word_bboxes[i])], confidence) + cv2.fillPoly(confidence_mask, [ + np.int32(_word_bboxes[i])], confidence) do_care_words.append(words[i]) word_level_char_bbox.append(pseudo_char_bbox) horizontal_text_bools.append(horizontal_text_bool) @@ -512,8 +517,10 @@ def load_saved_gt_score(self, index): saved_cf_mask_path = os.path.join( self.saved_gt_dir, f"res_img_{query_idx}_cf_mask_thresh_0.6.jpg" ) - region_score = cv2.imread(saved_region_scores_path, cv2.IMREAD_GRAYSCALE) - affinity_score = cv2.imread(saved_affi_scores_path, cv2.IMREAD_GRAYSCALE) + region_score = cv2.imread( + saved_region_scores_path, cv2.IMREAD_GRAYSCALE) + affinity_score = cv2.imread( + saved_affi_scores_path, cv2.IMREAD_GRAYSCALE) confidence_mask = cv2.imread(saved_cf_mask_path, cv2.IMREAD_GRAYSCALE) region_score = cv2.resize(region_score, (img_w, img_h)) diff --git a/trainer/craft/data/gaussian.py b/trainer/craft/data/gaussian.py index 2d0b76e0a..e4e1accc0 100644 --- a/trainer/craft/data/gaussian.py +++ b/trainer/craft/data/gaussian.py @@ -38,7 +38,8 @@ def generate_gaussian_map(self): gaussian_map = (gaussian_map / np.max(gaussian_map)).astype(np.float32) gaussian_map_color = (gaussian_map * 255).astype(np.uint8) - gaussian_map_color = cv2.applyColorMap(gaussian_map_color, cv2.COLORMAP_JET) + gaussian_map_color = cv2.applyColorMap( + gaussian_map_color, cv2.COLORMAP_JET) return gaussian_map, gaussian_map_color def generate_circle_mask(self): @@ -73,7 +74,8 @@ def four_point_transform(self, bbox): ) M = cv2.getPerspectiveTransform(init_points, bbox) - warped_gaussian_map = cv2.warpPerspective(self.gaussian_map, M, (width, height)) + warped_gaussian_map = cv2.warpPerspective( + self.gaussian_map, M, (width, height)) return warped_gaussian_map, width, height def add_gaussian_map_to_score_map( @@ -97,7 +99,8 @@ def add_gaussian_map_to_score_map( """ map_h, map_w = score_map.shape - bbox = enlargebox(bbox, map_h, map_w, enlarge_size, horizontal_text_bool) + bbox = enlargebox(bbox, map_h, map_w, enlarge_size, + horizontal_text_bool) # If any one point of character bbox is out of range, don't put in on map if np.any(bbox < 0) or np.any(bbox[:, 0] > map_w) or np.any(bbox[:, 1] > map_h): @@ -113,7 +116,7 @@ def add_gaussian_map_to_score_map( try: bbox_area_of_image = score_map[ - bbox_top : bbox_top + height, bbox_left : bbox_left + width, + bbox_top: bbox_top + height, bbox_left: bbox_left + width, ] high_value_score = np.where( warped_gaussian_map > bbox_area_of_image, @@ -121,7 +124,7 @@ def add_gaussian_map_to_score_map( bbox_area_of_image, ) score_map[ - bbox_top : bbox_top + height, bbox_left : bbox_left + width, + bbox_top: bbox_top + height, bbox_left: bbox_left + width, ] = high_value_score except Exception as e: @@ -189,4 +192,4 @@ def generate_affinity( if len(all_affinity_bbox) > 0: all_affinity_bbox = np.concatenate(all_affinity_bbox, axis=0) - return affinity_map, all_affinity_bbox \ No newline at end of file + return affinity_map, all_affinity_bbox diff --git a/trainer/craft/data/imgaug.py b/trainer/craft/data/imgaug.py index d24a456d2..c615bff0c 100644 --- a/trainer/craft/data/imgaug.py +++ b/trainer/craft/data/imgaug.py @@ -11,7 +11,8 @@ def rescale(img, bboxes, target_size=2240): h, w = img.shape[0:2] scale = target_size / max(h, w) - img = cv2.resize(img, dsize=None, fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC) + img = cv2.resize(img, dsize=None, fx=scale, fy=scale, + interpolation=cv2.INTER_CUBIC) bboxes = bboxes * scale return img, bboxes @@ -25,13 +26,16 @@ def random_resize_crop_synth(augment_targets, size): confidence_mask = Image.fromarray(confidence_mask) short_side = min(image.size) - i, j, h, w = RandomCrop.get_params(image, output_size=(short_side, short_side)) + i, j, h, w = RandomCrop.get_params( + image, output_size=(short_side, short_side)) image = resized_crop( - image, i, j, h, w, size=(size, size), interpolation=InterpolationMode.BICUBIC + image, i, j, h, w, size=( + size, size), interpolation=InterpolationMode.BICUBIC ) region_score = resized_crop( - region_score, i, j, h, w, (size, size), interpolation=InterpolationMode.BICUBIC + region_score, i, j, h, w, (size, + size), interpolation=InterpolationMode.BICUBIC ) affinity_score = resized_crop( affinity_score, @@ -76,17 +80,20 @@ def random_resize_crop( else: if random.random() < threshold: - i, j, h, w = RandomResizedCrop.get_params(image, scale=scale, ratio=ratio) + i, j, h, w = RandomResizedCrop.get_params( + image, scale=scale, ratio=ratio) else: i, j, h, w = RandomResizedCrop.get_params( image, scale=(1.0, 1.0), ratio=(1.0, 1.0) ) image = resized_crop( - image, i, j, h, w, size=(size, size), interpolation=InterpolationMode.BICUBIC + image, i, j, h, w, size=( + size, size), interpolation=InterpolationMode.BICUBIC ) region_score = resized_crop( - region_score, i, j, h, w, (size, size), interpolation=InterpolationMode.BICUBIC + region_score, i, j, h, w, (size, + size), interpolation=InterpolationMode.BICUBIC ) affinity_score = resized_crop( affinity_score, diff --git a/trainer/craft/data/pseudo_label/make_charbox.py b/trainer/craft/data/pseudo_label/make_charbox.py index 09c5219b3..6114e98c8 100644 --- a/trainer/craft/data/pseudo_label/make_charbox.py +++ b/trainer/craft/data/pseudo_label/make_charbox.py @@ -21,10 +21,12 @@ def __init__(self, watershed_param, vis_test_dir, pseudo_vis_opt, gaussian_build def crop_image_by_bbox(self, image, box, word): w = max( - int(np.linalg.norm(box[0] - box[1])), int(np.linalg.norm(box[2] - box[3])) + int(np.linalg.norm(box[0] - box[1]) + ), int(np.linalg.norm(box[2] - box[3])) ) h = max( - int(np.linalg.norm(box[0] - box[3])), int(np.linalg.norm(box[1] - box[2])) + int(np.linalg.norm(box[0] - box[3]) + ), int(np.linalg.norm(box[1] - box[2])) ) try: word_ratio = h / w @@ -105,8 +107,10 @@ def visualize_pseudo_label( _watershed_box = np.int32(watershed_box) _pseudo_char_bbox = np.int32(pseudo_char_bbox) - region_score_color = cv2.applyColorMap(np.uint8(region_score), cv2.COLORMAP_JET) - region_score_color = cv2.resize(region_score_color, (word_img_w, word_img_h)) + region_score_color = cv2.applyColorMap( + np.uint8(region_score), cv2.COLORMAP_JET) + region_score_color = cv2.resize( + region_score_color, (word_img_w, word_img_h)) for box in _watershed_box: cv2.polylines( @@ -118,7 +122,8 @@ def visualize_pseudo_label( for box in _pseudo_char_bbox: cv2.polylines( - np.uint8(word_img_cp2), [np.reshape(box, (-1, 1, 2))], True, (255, 0, 0) + np.uint8(word_img_cp2), [np.reshape( + box, (-1, 1, 2))], True, (255, 0, 0) ) # NOTE: Just for visualize, put gaussian map on char box @@ -180,7 +185,8 @@ def split_word_equal_gap(self, word_img_w, word_img_h, word): continue left = j * width_per_char right = (j + 1) * width_per_char - bbox = np.array([[left, 0], [right, 0], [right, height], [left, height]]) + bbox = np.array( + [[left, 0], [right, 0], [right, height], [left, height]]) bboxes.append(bbox) bboxes = np.array(bboxes, np.float32) @@ -238,7 +244,8 @@ def build_char_box(self, net, gpu, image, word_bbox, word, img_name=""): confidence = self.get_confidence(real_char_len, len(pseudo_char_bbox)) if confidence <= 0.5: - pseudo_char_bbox = self.split_word_equal_gap(word_img_w, word_img_h, word) + pseudo_char_bbox = self.split_word_equal_gap( + word_img_w, word_img_h, word) confidence = 0.5 if self.pseudo_vis_opt and self.flag: @@ -258,6 +265,7 @@ def build_char_box(self, net, gpu, image, word_bbox, word, img_name=""): pseudo_char_bbox[i][None, :, :], M_inv ) - pseudo_char_bbox = self.clip_into_boundary(pseudo_char_bbox, image.shape) + pseudo_char_bbox = self.clip_into_boundary( + pseudo_char_bbox, image.shape) return pseudo_char_bbox, confidence, horizontal_text_bool diff --git a/trainer/craft/eval.py b/trainer/craft/eval.py index fceea4735..e8926d2ca 100644 --- a/trainer/craft/eval.py +++ b/trainer/craft/eval.py @@ -22,7 +22,6 @@ from utils.util import copyStateDict - def save_result_synth(img_file, img, pre_output, pre_box, gt_box=None, result_dir=""): img = np.array(img) @@ -49,7 +48,8 @@ def save_result_synth(img_file, img, pre_output, pre_box, gt_box=None, result_di for j in range(len(gt_box)): cv2.polylines( img, - [np.array(gt_box[j]["points"]).astype(np.int32).reshape((-1, 1, 2))], + [np.array(gt_box[j]["points"]).astype( + np.int32).reshape((-1, 1, 2))], True, color=(0, 0, 255), thickness=2, @@ -88,11 +88,14 @@ def save_result_2015(img_file, img, pre_output, pre_box, gt_box, result_dir): if gt_box is not None: for j in range(len(gt_box)): - _gt_box = np.array(gt_box[j]["points"]).reshape(-1, 2).astype(np.int32) + _gt_box = np.array(gt_box[j]["points"] + ).reshape(-1, 2).astype(np.int32) if gt_box[j]["text"] == "###": - cv2.polylines(img, [_gt_box], True, color=(128, 128, 128), thickness=2) + cv2.polylines(img, [_gt_box], True, color=( + 128, 128, 128), thickness=2) else: - cv2.polylines(img, [_gt_box], True, color=(0, 0, 255), thickness=2) + cv2.polylines(img, [_gt_box], True, + color=(0, 0, 255), thickness=2) # draw overlay image overlay_img = overlay(img_copy, region, affinity, pre_box) @@ -178,7 +181,8 @@ def overlay(image, region, affinity, single_img_bbox): def load_test_dataset_iou(test_folder_name, config): if test_folder_name == "synthtext": - total_bboxes_gt, total_img_path = load_synthtext_gt(config.test_data_dir) + total_bboxes_gt, total_img_path = load_synthtext_gt( + config.test_data_dir) elif test_folder_name == "icdar2013": total_bboxes_gt, total_img_path = load_icdar2013_gt( @@ -206,19 +210,23 @@ def viz_test(img, pre_output, pre_box, gt_box, img_name, result_dir, test_folder if test_folder_name == "synthtext": save_result_synth( - img_name, img[:, :, ::-1].copy(), pre_output, pre_box, gt_box, result_dir + img_name, img[:, :, ::- + 1].copy(), pre_output, pre_box, gt_box, result_dir ) elif test_folder_name == "icdar2013": save_result_2013( - img_name, img[:, :, ::-1].copy(), pre_output, pre_box, gt_box, result_dir + img_name, img[:, :, ::- + 1].copy(), pre_output, pre_box, gt_box, result_dir ) elif test_folder_name == "icdar2015": save_result_2015( - img_name, img[:, :, ::-1].copy(), pre_output, pre_box, gt_box, result_dir + img_name, img[:, :, ::- + 1].copy(), pre_output, pre_box, gt_box, result_dir ) elif test_folder_name == "custom_data": save_result_2015( - img_name, img[:, :, ::-1].copy(), pre_output, pre_box, gt_box, result_dir + img_name, img[:, :, ::- + 1].copy(), pre_output, pre_box, gt_box, result_dir ) else: print("not found test dataset") @@ -229,7 +237,8 @@ def main_eval(model_path, backbone, config, evaluator, result_dir, buffer, model if not os.path.exists(result_dir): os.makedirs(result_dir, exist_ok=True) - total_imgs_bboxes_gt, total_imgs_path = load_test_dataset_iou("custom_data", config) + total_imgs_bboxes_gt, total_imgs_path = load_test_dataset_iou( + "custom_data", config) if mode == "weak_supervision" and torch.cuda.device_count() != 1: gpu_count = torch.cuda.device_count() // 2 @@ -266,11 +275,11 @@ def main_eval(model_path, backbone, config, evaluator, result_dir, buffer, model # last gpu if gpu_idx == gpu_count - 1: - piece_imgs_path = total_imgs_path[gpu_idx * slice_idx :] + piece_imgs_path = total_imgs_path[gpu_idx * slice_idx:] # piece_imgs_bboxes_gt = total_imgs_bboxes_gt[gpu_idx * slice_idx:] else: piece_imgs_path = total_imgs_path[ - gpu_idx * slice_idx : (gpu_idx + 1) * slice_idx + gpu_idx * slice_idx: (gpu_idx + 1) * slice_idx ] # piece_imgs_bboxes_gt = total_imgs_bboxes_gt[gpu_idx * slice_idx: (gpu_idx + 1) * slice_idx] @@ -331,10 +340,12 @@ def main_eval(model_path, backbone, config, evaluator, result_dir, buffer, model print(metrics) return metrics + def cal_eval(config, data, res_dir_name, opt, mode): evaluator = DetectionIoUEvaluator() test_config = DotDict(config.test[data]) - res_dir = os.path.join(os.path.join("exp", args.yaml), "{}".format(res_dir_name)) + res_dir = os.path.join(os.path.join( + "exp", args.yaml), "{}".format(res_dir_name)) if opt == "iou_eval": main_eval( diff --git a/trainer/craft/loss/mseloss.py b/trainer/craft/loss/mseloss.py index dc24d5ab4..1eea69926 100644 --- a/trainer/craft/loss/mseloss.py +++ b/trainer/craft/loss/mseloss.py @@ -8,7 +8,8 @@ def __init__(self): def forward(self, gt_region, gt_affinity, pred_region, pred_affinity, conf_map): loss = torch.mean( - ((gt_region - pred_region).pow(2) + (gt_affinity - pred_affinity).pow(2)) + ((gt_region - pred_region).pow(2) + + (gt_affinity - pred_affinity).pow(2)) * conf_map ) return loss @@ -50,7 +51,8 @@ def batch_image_loss(self, pred_score, label_score, neg_rto, n_min_neg): sorted=False, )[0] ) / (positive_pixel_number * neg_rto) - positive_loss = torch.sum(positive_loss_region) / positive_pixel_number + positive_loss = torch.sum( + positive_loss_region) / positive_pixel_number else: # only negative pixel negative_loss = ( @@ -111,7 +113,8 @@ def single_image_loss(self, pre_loss, loss_label, neg_rto, n_min_neg): pos_pixel = (single_label >= 0.1).float() n_pos_pixel = torch.sum(pos_pixel) pos_loss_region = single_loss * pos_pixel - positive_loss += torch.sum(pos_loss_region) / max(n_pos_pixel, 1e-12) + positive_loss += torch.sum(pos_loss_region) / \ + max(n_pos_pixel, 1e-12) # negative_loss neg_pixel = (single_label < 0.1).float() @@ -126,14 +129,16 @@ def single_image_loss(self, pre_loss, loss_label, neg_rto, n_min_neg): # n_hard_neg = neg_rto*n_pos_pixel negative_loss += ( torch.sum( - torch.topk(neg_loss_region.view(-1), int(n_hard_neg))[0] + torch.topk(neg_loss_region.view(-1), + int(n_hard_neg))[0] ) / n_hard_neg ) else: # only negative pixel negative_loss += ( - torch.sum(torch.topk(neg_loss_region.view(-1), n_min_neg)[0]) + torch.sum(torch.topk( + neg_loss_region.view(-1), n_min_neg)[0]) / n_min_neg ) diff --git a/trainer/craft/metrics/eval_det_iou.py b/trainer/craft/metrics/eval_det_iou.py index f89004fd8..d6d758c35 100644 --- a/trainer/craft/metrics/eval_det_iou.py +++ b/trainer/craft/metrics/eval_det_iou.py @@ -97,7 +97,7 @@ def compute_ap(confList, matchList, numGtCare): if not Polygon(points).is_valid or not Polygon(points).is_simple: continue except: - import ipdb; + import ipdb ipdb.set_trace() #import ipdb;ipdb.set_trace() @@ -158,7 +158,8 @@ def compute_ap(confList, matchList, numGtCare): pairs.append({'gt': gtNum, 'det': detNum}) detMatchedNums.append(detNum) evaluationLog += "Match GT #" + \ - str(gtNum) + " with Det #" + str(detNum) + "\n" + str(gtNum) + " with Det #" + \ + str(detNum) + "\n" numGtCare = (len(gtPols) - len(gtDontCarePolsNum)) numDetCare = (len(detPols) - len(detDontCarePolsNum)) @@ -167,10 +168,11 @@ def compute_ap(confList, matchList, numGtCare): precision = float(0) if numDetCare > 0 else float(1) else: recall = float(detMatched) / numGtCare - precision = 0 if numDetCare == 0 else float(detMatched) / numDetCare + precision = 0 if numDetCare == 0 else float( + detMatched) / numDetCare hmean = 0 if (precision + recall) == 0 else 2.0 * \ - precision * recall / (precision + recall) + precision * recall / (precision + recall) matchedSum += detMatched numGlobalCareGt += numGtCare @@ -208,8 +210,8 @@ def combine_results(self, results): methodPrecision = 0 if numGlobalCareDet == 0 else float( matchedSum) / numGlobalCareDet methodHmean = 0 if methodRecall + methodPrecision == 0 else 2 * \ - methodRecall * methodPrecision / ( - methodRecall + methodPrecision) + methodRecall * methodPrecision / ( + methodRecall + methodPrecision) # print(methodRecall, methodPrecision, methodHmean) # sys.exit(-1) methodMetrics = { diff --git a/trainer/craft/model/craft.py b/trainer/craft/model/craft.py index f0da362a5..c7bde8677 100644 --- a/trainer/craft/model/craft.py +++ b/trainer/craft/model/craft.py @@ -10,6 +10,7 @@ from model.vgg16_bn import vgg16_bn, init_weights + class double_conv(nn.Module): def __init__(self, in_ch, mid_ch, out_ch): super(double_conv, self).__init__() @@ -56,7 +57,7 @@ def __init__(self, pretrained=True, freeze=False, amp=False): init_weights(self.upconv3.modules()) init_weights(self.upconv4.modules()) init_weights(self.conv_cls.modules()) - + def forward(self, x): """ Base network """ if self.amp: @@ -67,21 +68,24 @@ def forward(self, x): y = torch.cat([sources[0], sources[1]], dim=1) y = self.upconv1(y) - y = F.interpolate(y, size=sources[2].size()[2:], mode='bilinear', align_corners=False) + y = F.interpolate(y, size=sources[2].size()[ + 2:], mode='bilinear', align_corners=False) y = torch.cat([y, sources[2]], dim=1) y = self.upconv2(y) - y = F.interpolate(y, size=sources[3].size()[2:], mode='bilinear', align_corners=False) + y = F.interpolate(y, size=sources[3].size()[ + 2:], mode='bilinear', align_corners=False) y = torch.cat([y, sources[3]], dim=1) y = self.upconv3(y) - y = F.interpolate(y, size=sources[4].size()[2:], mode='bilinear', align_corners=False) + y = F.interpolate(y, size=sources[4].size()[ + 2:], mode='bilinear', align_corners=False) y = torch.cat([y, sources[4]], dim=1) feature = self.upconv4(y) y = self.conv_cls(feature) - return y.permute(0,2,3,1), feature + return y.permute(0, 2, 3, 1), feature else: sources = self.basenet(x) @@ -90,15 +94,18 @@ def forward(self, x): y = torch.cat([sources[0], sources[1]], dim=1) y = self.upconv1(y) - y = F.interpolate(y, size=sources[2].size()[2:], mode='bilinear', align_corners=False) + y = F.interpolate(y, size=sources[2].size()[ + 2:], mode='bilinear', align_corners=False) y = torch.cat([y, sources[2]], dim=1) y = self.upconv2(y) - y = F.interpolate(y, size=sources[3].size()[2:], mode='bilinear', align_corners=False) + y = F.interpolate(y, size=sources[3].size()[ + 2:], mode='bilinear', align_corners=False) y = torch.cat([y, sources[3]], dim=1) y = self.upconv3(y) - y = F.interpolate(y, size=sources[4].size()[2:], mode='bilinear', align_corners=False) + y = F.interpolate(y, size=sources[4].size()[ + 2:], mode='bilinear', align_corners=False) y = torch.cat([y, sources[4]], dim=1) feature = self.upconv4(y) @@ -106,7 +113,8 @@ def forward(self, x): return y.permute(0, 2, 3, 1), feature + if __name__ == '__main__': model = CRAFT(pretrained=True).cuda() output, _ = model(torch.randn(1, 3, 768, 768).cuda()) - print(output.shape) \ No newline at end of file + print(output.shape) diff --git a/trainer/craft/model/vgg16_bn.py b/trainer/craft/model/vgg16_bn.py index f3f21a79e..50f204c20 100644 --- a/trainer/craft/model/vgg16_bn.py +++ b/trainer/craft/model/vgg16_bn.py @@ -6,6 +6,7 @@ from torchvision import models from torchvision.models.vgg import model_urls + def init_weights(modules): for m in modules: if isinstance(m, nn.Conv2d): @@ -19,11 +20,14 @@ def init_weights(modules): m.weight.data.normal_(0, 0.01) m.bias.data.zero_() + class vgg16_bn(torch.nn.Module): def __init__(self, pretrained=True, freeze=True): super(vgg16_bn, self).__init__() - model_urls['vgg16_bn'] = model_urls['vgg16_bn'].replace('https://', 'http://') - vgg_pretrained_features = models.vgg16_bn(pretrained=pretrained).features + model_urls['vgg16_bn'] = model_urls['vgg16_bn'].replace( + 'https://', 'http://') + vgg_pretrained_features = models.vgg16_bn( + pretrained=pretrained).features self.slice1 = torch.nn.Sequential() self.slice2 = torch.nn.Sequential() self.slice3 = torch.nn.Sequential() @@ -40,9 +44,9 @@ def __init__(self, pretrained=True, freeze=True): # fc6, fc7 without atrous conv self.slice5 = torch.nn.Sequential( - nn.MaxPool2d(kernel_size=3, stride=1, padding=1), - nn.Conv2d(512, 1024, kernel_size=3, padding=6, dilation=6), - nn.Conv2d(1024, 1024, kernel_size=1) + nn.MaxPool2d(kernel_size=3, stride=1, padding=1), + nn.Conv2d(512, 1024, kernel_size=3, padding=6, dilation=6), + nn.Conv2d(1024, 1024, kernel_size=1) ) if not pretrained: @@ -51,11 +55,12 @@ def __init__(self, pretrained=True, freeze=True): init_weights(self.slice3.modules()) init_weights(self.slice4.modules()) - init_weights(self.slice5.modules()) # no pretrained model for fc6 and fc7 + # no pretrained model for fc6 and fc7 + init_weights(self.slice5.modules()) if freeze: for param in self.slice1.parameters(): # only first conv - param.requires_grad= False + param.requires_grad = False def forward(self, X): h = self.slice1(X) @@ -68,6 +73,7 @@ def forward(self, X): h_relu5_3 = h h = self.slice5(h) h_fc7 = h - vgg_outputs = namedtuple("VggOutputs", ['fc7', 'relu5_3', 'relu4_3', 'relu3_2', 'relu2_2']) + vgg_outputs = namedtuple( + "VggOutputs", ['fc7', 'relu5_3', 'relu4_3', 'relu3_2', 'relu2_2']) out = vgg_outputs(h_fc7, h_relu5_3, h_relu4_3, h_relu3_2, h_relu2_2) return out diff --git a/trainer/craft/train.py b/trainer/craft/train.py index de441cf2e..23f29086d 100644 --- a/trainer/craft/train.py +++ b/trainer/craft/train.py @@ -84,7 +84,8 @@ def get_load_param(self, gpu): if self.config.train.ckpt_path is not None: map_location = "cuda:%d" % gpu - param = torch.load(self.config.train.ckpt_path, map_location=map_location) + param = torch.load(self.config.train.ckpt_path, + map_location=map_location) else: param = None @@ -109,7 +110,8 @@ def iou_eval(self, dataset, train_step, buffer, model): test_config = DotDict(self.config.test[dataset]) val_result_dir = os.path.join( - self.config.results_dir, "{}/{}".format(dataset + "_iou", str(train_step)) + self.config.results_dir, "{}/{}".format( + dataset + "_iou", str(train_step)) ) evaluator = DetectionIoUEvaluator() @@ -143,7 +145,8 @@ def train(self, buffer_dict): # SUPERVISION model if self.config.mode == "weak_supervision": if self.config.train.backbone == "vgg": - supervision_model = CRAFT(pretrained=False, amp=self.config.train.amp) + supervision_model = CRAFT( + pretrained=False, amp=self.config.train.amp) else: raise Exception("Undefined architecture") @@ -153,7 +156,8 @@ def train(self, buffer_dict): supervision_model.load_state_dict( copyStateDict(supervision_param["craft"]) ) - supervision_model = supervision_model.to(f"cuda:{supervision_device}") + supervision_model = supervision_model.to( + f"cuda:{supervision_device}") print(f"Supervision model loading on : gpu {supervision_device}") else: supervision_model, supervision_device = None, None @@ -204,7 +208,8 @@ def train(self, buffer_dict): ) if self.config.train.ckpt_path is not None and self.config.train.st_iter != 0: - optimizer.load_state_dict(copyStateDict(self.net_param["optimizer"])) + optimizer.load_state_dict( + copyStateDict(self.net_param["optimizer"])) self.config.train.st_iter = self.net_param["optimizer"]["state"][0]["step"] self.config.train.lr = self.net_param["optimizer"]["param_groups"][0]["lr"] @@ -239,10 +244,10 @@ def train(self, buffer_dict): for ( index, ( - images, - region_scores, - affinity_scores, - confidence_masks, + images, + region_scores, + affinity_scores, + confidence_masks, ), ) in enumerate(trn_real_loader): craft.train() @@ -268,14 +273,16 @@ def train(self, buffer_dict): syn_image = syn_image.cuda(non_blocking=True) syn_region_label = syn_region_label.cuda(non_blocking=True) syn_affi_label = syn_affi_label.cuda(non_blocking=True) - syn_confidence_mask = syn_confidence_mask.cuda(non_blocking=True) + syn_confidence_mask = syn_confidence_mask.cuda( + non_blocking=True) # concat syn & custom image images = torch.cat((syn_image, images), 0) region_image_label = torch.cat( (syn_region_label, region_scores), 0 ) - affinity_image_label = torch.cat((syn_affi_label, affinity_scores), 0) + affinity_image_label = torch.cat( + (syn_affi_label, affinity_scores), 0) confidence_mask_label = torch.cat( (syn_confidence_mask, confidence_masks), 0 ) @@ -337,7 +344,8 @@ def train(self, buffer_dict): "{}, training_step: {}|{}, learning rate: {:.8f}, " "training_loss: {:.5f}, avg_batch_time: {:.5f}".format( time.strftime( - "%Y-%m-%d:%H:%M:%S", time.localtime(time.time()) + "%Y-%m-%d:%H:%M:%S", time.localtime( + time.time()) ), train_step, whole_training_step, @@ -348,7 +356,8 @@ def train(self, buffer_dict): ) if self.config.wandb_opt: - wandb.log({"train_step": train_step, "mean_loss": mean_loss}) + wandb.log({"train_step": train_step, + "mean_loss": mean_loss}) if ( train_step % self.config.train.eval_interval == 0 @@ -364,19 +373,19 @@ def train(self, buffer_dict): "optimizer": optimizer.state_dict(), } save_param_path = ( - self.config.results_dir - + "/CRAFT_clr_" - + repr(train_step) - + ".pth" + self.config.results_dir + + "/CRAFT_clr_" + + repr(train_step) + + ".pth" ) if self.config.train.amp: save_param_dic["scaler"] = scaler.state_dict() save_param_path = ( - self.config.results_dir - + "/CRAFT_clr_amp_" - + repr(train_step) - + ".pth" + self.config.results_dir + + "/CRAFT_clr_amp_" + + repr(train_step) + + ".pth" ) torch.save(save_param_dic, save_param_path) @@ -405,16 +414,16 @@ def train(self, buffer_dict): "optimizer": optimizer.state_dict(), } save_param_path = ( - self.config.results_dir + "/CRAFT_clr_" + repr(train_step) + ".pth" + self.config.results_dir + "/CRAFT_clr_" + repr(train_step) + ".pth" ) if self.config.train.amp: save_param_dic["scaler"] = scaler.state_dict() save_param_path = ( - self.config.results_dir - + "/CRAFT_clr_amp_" - + repr(train_step) - + ".pth" + self.config.results_dir + + "/CRAFT_clr_amp_" + + repr(train_step) + + ".pth" ) torch.save(save_param_dic, save_param_path) @@ -450,7 +459,8 @@ def main(): # Duplicate yaml file to result_dir shutil.copy( - "config/" + args.yaml + ".yaml", os.path.join(res_dir, args.yaml) + ".yaml" + "config/" + args.yaml + + ".yaml", os.path.join(res_dir, args.yaml) + ".yaml" ) if config["mode"] == "weak_supervision": @@ -458,7 +468,6 @@ def main(): else: mode = None - # Apply config to wandb if config["wandb_opt"]: wandb.init(project="craft-stage2", entity="user_name", name=exp_name) @@ -467,7 +476,7 @@ def main(): config = DotDict(config) # Start train - buffer_dict = {"custom_data":None} + buffer_dict = {"custom_data": None} trainer = Trainer(config, 0, mode) trainer.train(buffer_dict) diff --git a/trainer/craft/trainSynth.py b/trainer/craft/trainSynth.py index 4d1d0dc72..4465c98ef 100644 --- a/trainer/craft/trainSynth.py +++ b/trainer/craft/trainSynth.py @@ -64,7 +64,8 @@ def get_trn_loader(self): def get_load_param(self, gpu): if self.config.train.ckpt_path is not None: map_location = {"cuda:%d" % 0: "cuda:%d" % gpu} - param = torch.load(self.config.train.ckpt_path, map_location=map_location) + param = torch.load(self.config.train.ckpt_path, + map_location=map_location) else: param = None return param @@ -88,7 +89,8 @@ def iou_eval(self, dataset, train_step, save_param_path, buffer, model): test_config = DotDict(self.config.test[dataset]) val_result_dir = os.path.join( - self.config.results_dir, "{}/{}".format(dataset + "_iou", str(train_step)) + self.config.results_dir, "{}/{}".format( + dataset + "_iou", str(train_step)) ) evaluator = DetectionIoUEvaluator() @@ -130,7 +132,8 @@ def train(self, buffer_dict): craft.load_state_dict(copyStateDict(self.net_param["craft"])) craft = nn.SyncBatchNorm.convert_sync_batchnorm(craft) craft = craft.cuda() - craft = torch.nn.parallel.DistributedDataParallel(craft, device_ids=[self.gpu]) + craft = torch.nn.parallel.DistributedDataParallel( + craft, device_ids=[self.gpu]) torch.backends.cudnn.benchmark = True @@ -143,7 +146,8 @@ def train(self, buffer_dict): ) if self.config.train.ckpt_path is not None and self.config.train.st_iter != 0: - optimizer.load_state_dict(copyStateDict(self.net_param["optimizer"])) + optimizer.load_state_dict( + copyStateDict(self.net_param["optimizer"])) self.config.train.st_iter = self.net_param["optimizer"]["state"][0]["step"] self.config.train.lr = self.net_param["optimizer"]["param_groups"][0]["lr"] @@ -247,7 +251,8 @@ def train(self, buffer_dict): "{}, training_step: {}|{}, learning rate: {:.8f}, " "training_loss: {:.5f}, avg_batch_time: {:.5f}".format( time.strftime( - "%Y-%m-%d:%H:%M:%S", time.localtime(time.time()) + "%Y-%m-%d:%H:%M:%S", time.localtime( + time.time()) ), train_step, whole_training_step, @@ -257,7 +262,8 @@ def train(self, buffer_dict): ) ) if self.gpu == 0 and self.config.wandb_opt: - wandb.log({"train_step": train_step, "mean_loss": mean_loss}) + wandb.log({"train_step": train_step, + "mean_loss": mean_loss}) if ( train_step % self.config.train.eval_interval == 0 @@ -316,7 +322,8 @@ def train(self, buffer_dict): "optimizer": optimizer.state_dict(), } save_param_path = ( - self.config.results_dir + "/CRAFT_clr_" + repr(train_step) + ".pth" + self.config.results_dir + "/CRAFT_clr_" + + repr(train_step) + ".pth" ) if self.config.train.amp: @@ -361,14 +368,16 @@ def main(): # Duplicate yaml file to result_dir shutil.copy( - "config/" + args.yaml + ".yaml", os.path.join(res_dir, args.yaml) + ".yaml" + "config/" + args.yaml + + ".yaml", os.path.join(res_dir, args.yaml) + ".yaml" ) ngpus_per_node = torch.cuda.device_count() print(f"Total device num : {ngpus_per_node}") manager = mp.Manager() - buffer1 = manager.list([None] * config["test"]["icdar2013"]["test_set_size"]) + buffer1 = manager.list([None] * config["test"] + ["icdar2013"]["test_set_size"]) buffer_dict = {"icdar2013": buffer1} torch.multiprocessing.spawn( main_worker, @@ -404,5 +413,6 @@ def main_worker(gpu, port, ngpus_per_node, config, buffer_dict, exp_name): wandb.finish() torch.distributed.destroy_process_group() + if __name__ == "__main__": main() diff --git a/trainer/craft/train_distributed.py b/trainer/craft/train_distributed.py index 8ab320c19..017c2116a 100644 --- a/trainer/craft/train_distributed.py +++ b/trainer/craft/train_distributed.py @@ -87,7 +87,8 @@ def get_load_param(self, gpu): if self.config.train.ckpt_path is not None: map_location = "cuda:%d" % gpu - param = torch.load(self.config.train.ckpt_path, map_location=map_location) + param = torch.load(self.config.train.ckpt_path, + map_location=map_location) else: param = None @@ -112,7 +113,8 @@ def iou_eval(self, dataset, train_step, buffer, model): test_config = DotDict(self.config.test[dataset]) val_result_dir = os.path.join( - self.config.results_dir, "{}/{}".format(dataset + "_iou", str(train_step)) + self.config.results_dir, "{}/{}".format( + dataset + "_iou", str(train_step)) ) evaluator = DetectionIoUEvaluator() @@ -147,7 +149,8 @@ def train(self, buffer_dict): # SUPERVISION model if self.config.mode == "weak_supervision": if self.config.train.backbone == "vgg": - supervision_model = CRAFT(pretrained=False, amp=self.config.train.amp) + supervision_model = CRAFT( + pretrained=False, amp=self.config.train.amp) else: raise Exception("Undefined architecture") @@ -158,7 +161,8 @@ def train(self, buffer_dict): supervision_model.load_state_dict( copyStateDict(supervision_param["craft"]) ) - supervision_model = supervision_model.to(f"cuda:{supervision_device}") + supervision_model = supervision_model.to( + f"cuda:{supervision_device}") print(f"Supervision model loading on : gpu {supervision_device}") else: supervision_model, supervision_device = None, None @@ -174,7 +178,8 @@ def train(self, buffer_dict): craft = nn.SyncBatchNorm.convert_sync_batchnorm(craft) craft = craft.cuda() - craft = torch.nn.parallel.DistributedDataParallel(craft, device_ids=[self.gpu]) + craft = torch.nn.parallel.DistributedDataParallel( + craft, device_ids=[self.gpu]) torch.backends.cudnn.benchmark = True @@ -214,7 +219,8 @@ def train(self, buffer_dict): ) if self.config.train.ckpt_path is not None and self.config.train.st_iter != 0: - optimizer.load_state_dict(copyStateDict(self.net_param["optimizer"])) + optimizer.load_state_dict( + copyStateDict(self.net_param["optimizer"])) self.config.train.st_iter = self.net_param["optimizer"]["state"][0]["step"] self.config.train.lr = self.net_param["optimizer"]["param_groups"][0]["lr"] @@ -279,14 +285,16 @@ def train(self, buffer_dict): syn_image = syn_image.cuda(non_blocking=True) syn_region_label = syn_region_label.cuda(non_blocking=True) syn_affi_label = syn_affi_label.cuda(non_blocking=True) - syn_confidence_mask = syn_confidence_mask.cuda(non_blocking=True) + syn_confidence_mask = syn_confidence_mask.cuda( + non_blocking=True) # concat syn & custom image images = torch.cat((syn_image, images), 0) region_image_label = torch.cat( (syn_region_label, region_scores), 0 ) - affinity_image_label = torch.cat((syn_affi_label, affinity_scores), 0) + affinity_image_label = torch.cat( + (syn_affi_label, affinity_scores), 0) confidence_mask_label = torch.cat( (syn_confidence_mask, confidence_masks), 0 ) @@ -348,7 +356,8 @@ def train(self, buffer_dict): "{}, training_step: {}|{}, learning rate: {:.8f}, " "training_loss: {:.5f}, avg_batch_time: {:.5f}".format( time.strftime( - "%Y-%m-%d:%H:%M:%S", time.localtime(time.time()) + "%Y-%m-%d:%H:%M:%S", time.localtime( + time.time()) ), train_step, whole_training_step, @@ -359,7 +368,8 @@ def train(self, buffer_dict): ) if self.gpu == 0 and self.config.wandb_opt: - wandb.log({"train_step": train_step, "mean_loss": mean_loss}) + wandb.log({"train_step": train_step, + "mean_loss": mean_loss}) if ( train_step % self.config.train.eval_interval == 0 @@ -422,7 +432,8 @@ def train(self, buffer_dict): "optimizer": optimizer.state_dict(), } save_param_path = ( - self.config.results_dir + "/CRAFT_clr_" + repr(train_step) + ".pth" + self.config.results_dir + "/CRAFT_clr_" + + repr(train_step) + ".pth" ) if self.config.train.amp: @@ -435,6 +446,7 @@ def train(self, buffer_dict): ) torch.save(save_param_dic, save_param_path) + def main(): parser = argparse.ArgumentParser(description="CRAFT custom data train") parser.add_argument( @@ -466,7 +478,8 @@ def main(): # Duplicate yaml file to result_dir shutil.copy( - "config/" + args.yaml + ".yaml", os.path.join(res_dir, args.yaml) + ".yaml" + "config/" + args.yaml + + ".yaml", os.path.join(res_dir, args.yaml) + ".yaml" ) if config["mode"] == "weak_supervision": @@ -480,7 +493,8 @@ def main(): print(f"Total process num : {ngpus_per_node}") manager = mp.Manager() - buffer1 = manager.list([None] * config["test"]["custom_data"]["test_set_size"]) + buffer1 = manager.list([None] * config["test"] + ["custom_data"]["test_set_size"]) buffer_dict = {"custom_data": buffer1} torch.multiprocessing.spawn( @@ -519,5 +533,6 @@ def main_worker(gpu, port, ngpus_per_node, config, buffer_dict, exp_name, mode): torch.distributed.barrier() torch.distributed.destroy_process_group() + if __name__ == "__main__": main() diff --git a/trainer/craft/utils/craft_utils.py b/trainer/craft/utils/craft_utils.py index f5d39df4e..fd1ad19ee 100644 --- a/trainer/craft/utils/craft_utils.py +++ b/trainer/craft/utils/craft_utils.py @@ -11,16 +11,17 @@ # unwarp corodinates - - def warpCoord(Minv, pt): out = np.matmul(Minv, (pt[0], pt[1], 1)) return np.array([out[0]/out[2], out[1]/out[2]]) + + """ end of auxilary functions """ + def test(): print('pass') - + def getDetBoxes_core(textmap, linkmap, text_threshold, link_threshold, low_text): # prepare data @@ -34,39 +35,49 @@ def getDetBoxes_core(textmap, linkmap, text_threshold, link_threshold, low_text) text_score_comb = np.clip(text_score + link_score, 0, 1) nLabels, labels, stats, centroids = \ - cv2.connectedComponentsWithStats(text_score_comb.astype(np.uint8), connectivity=4) + cv2.connectedComponentsWithStats( + text_score_comb.astype(np.uint8), connectivity=4) det = [] mapper = [] - for k in range(1,nLabels): + for k in range(1, nLabels): # size filtering size = stats[k, cv2.CC_STAT_AREA] - if size < 10: continue + if size < 10: + continue # thresholding - if np.max(textmap[labels==k]) < text_threshold: continue + if np.max(textmap[labels == k]) < text_threshold: + continue # make segmentation map segmap = np.zeros(textmap.shape, dtype=np.uint8) - segmap[labels==k] = 255 - segmap[np.logical_and(link_score==1, text_score==0)] = 0 # remove link area + segmap[labels == k] = 255 + # remove link area + segmap[np.logical_and(link_score == 1, text_score == 0)] = 0 x, y = stats[k, cv2.CC_STAT_LEFT], stats[k, cv2.CC_STAT_TOP] w, h = stats[k, cv2.CC_STAT_WIDTH], stats[k, cv2.CC_STAT_HEIGHT] niter = int(math.sqrt(size * min(w, h) / (w * h)) * 2) sx, ex, sy, ey = x - niter, x + w + niter + 1, y - niter, y + h + niter + 1 # boundary check - if sx < 0 : sx = 0 - if sy < 0 : sy = 0 - if ex >= img_w: ex = img_w - if ey >= img_h: ey = img_h - kernel = cv2.getStructuringElement(cv2.MORPH_RECT,(1 + niter, 1 + niter)) - segmap[sy:ey, sx:ex] = cv2.dilate(segmap[sy:ey, sx:ex], kernel, iterations=1) + if sx < 0: + sx = 0 + if sy < 0: + sy = 0 + if ex >= img_w: + ex = img_w + if ey >= img_h: + ey = img_h + kernel = cv2.getStructuringElement( + cv2.MORPH_RECT, (1 + niter, 1 + niter)) + segmap[sy:ey, sx:ex] = cv2.dilate( + segmap[sy:ey, sx:ex], kernel, iterations=1) #kernel1 = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 5)) #segmap[sy:ey, sx:ex] = cv2.dilate(segmap[sy:ey, sx:ex], kernel1, iterations=1) - # make box - np_contours = np.roll(np.array(np.where(segmap!=0)),1,axis=0).transpose().reshape(-1,2) + np_contours = np.roll(np.array(np.where(segmap != 0)), + 1, axis=0).transpose().reshape(-1, 2) rectangle = cv2.minAreaRect(np_contours) box = cv2.boxPoints(rectangle) @@ -74,8 +85,8 @@ def getDetBoxes_core(textmap, linkmap, text_threshold, link_threshold, low_text) w, h = np.linalg.norm(box[0] - box[1]), np.linalg.norm(box[1] - box[2]) box_ratio = max(w, h) / (min(w, h) + 1e-5) if abs(1 - box_ratio) <= 0.1: - l, r = min(np_contours[:,0]), max(np_contours[:,0]) - t, b = min(np_contours[:,1]), max(np_contours[:,1]) + l, r = min(np_contours[:, 0]), max(np_contours[:, 0]) + t, b = min(np_contours[:, 1]), max(np_contours[:, 1]) box = np.array([[l, t], [r, t], [r, b], [l, b]], dtype=np.float32) # make clock-wise order @@ -88,6 +99,7 @@ def getDetBoxes_core(textmap, linkmap, text_threshold, link_threshold, low_text) return det, labels, mapper + def getPoly_core(boxes, labels, mapper, linkmap): # configs num_cp = 5 @@ -96,21 +108,25 @@ def getPoly_core(boxes, labels, mapper, linkmap): max_r = 2.0 step_r = 0.2 - polys = [] + polys = [] for k, box in enumerate(boxes): # size filter for small instance - w, h = int(np.linalg.norm(box[0] - box[1]) + 1), int(np.linalg.norm(box[1] - box[2]) + 1) + w, h = int(np.linalg.norm(box[0] - box[1]) + + 1), int(np.linalg.norm(box[1] - box[2]) + 1) if w < 30 or h < 30: - polys.append(None); continue + polys.append(None) + continue # warp image - tar = np.float32([[0,0],[w,0],[w,h],[0,h]]) + tar = np.float32([[0, 0], [w, 0], [w, h], [0, h]]) M = cv2.getPerspectiveTransform(box, tar) - word_label = cv2.warpPerspective(labels, M, (w, h), flags=cv2.INTER_NEAREST) + word_label = cv2.warpPerspective( + labels, M, (w, h), flags=cv2.INTER_NEAREST) try: Minv = np.linalg.inv(M) except: - polys.append(None); continue + polys.append(None) + continue # binarization for selected label cur_label = mapper[k] @@ -122,15 +138,18 @@ def getPoly_core(boxes, labels, mapper, linkmap): cp = [] max_len = -1 for i in range(w): - region = np.where(word_label[:,i] != 0)[0] - if len(region) < 2 : continue + region = np.where(word_label[:, i] != 0)[0] + if len(region) < 2: + continue cp.append((i, region[0], region[-1])) length = region[-1] - region[0] + 1 - if length > max_len: max_len = length + if length > max_len: + max_len = length # pass if max_len is similar to h if h * max_len_ratio < max_len: - polys.append(None); continue + polys.append(None) + continue # get pivot points with fixed length tot_seg = num_cp * 2 + 1 @@ -141,12 +160,14 @@ def getPoly_core(boxes, labels, mapper, linkmap): seg_num = 0 num_sec = 0 prev_h = -1 - for i in range(0,len(cp)): + for i in range(0, len(cp)): (x, sy, ey) = cp[i] if (seg_num + 1) * seg_w <= x and seg_num <= tot_seg: # average previous segment - if num_sec == 0: break - cp_section[seg_num] = [cp_section[seg_num][0] / num_sec, cp_section[seg_num][1] / num_sec] + if num_sec == 0: + break + cp_section[seg_num] = [cp_section[seg_num][0] / + num_sec, cp_section[seg_num][1] / num_sec] num_sec = 0 # reset variables @@ -156,10 +177,12 @@ def getPoly_core(boxes, labels, mapper, linkmap): # accumulate center points cy = (sy + ey) * 0.5 cur_h = ey - sy + 1 - cp_section[seg_num] = [cp_section[seg_num][0] + x, cp_section[seg_num][1] + cy] + cp_section[seg_num] = [cp_section[seg_num] + [0] + x, cp_section[seg_num][1] + cy] num_sec += 1 - if seg_num % 2 == 0: continue # No polygon area + if seg_num % 2 == 0: + continue # No polygon area if prev_h < cur_h: pp[int((seg_num - 1)/2)] = (x, cy) @@ -168,11 +191,13 @@ def getPoly_core(boxes, labels, mapper, linkmap): # processing last segment if num_sec != 0: - cp_section[-1] = [cp_section[-1][0] / num_sec, cp_section[-1][1] / num_sec] + cp_section[-1] = [cp_section[-1][0] / + num_sec, cp_section[-1][1] / num_sec] - # pass if num of pivots is not sufficient or segment widh is smaller than character height + # pass if num of pivots is not sufficient or segment widh is smaller than character height if None in pp or seg_w < np.max(seg_height) * 0.25: - polys.append(None); continue + polys.append(None) + continue # calc median maximum of pivot points half_char_h = np.median(seg_height) * expand_ratio / 2 @@ -191,15 +216,18 @@ def getPoly_core(boxes, labels, mapper, linkmap): # get edge points to cover character heatmaps isSppFound, isEppFound = False, False - grad_s = (pp[1][1] - pp[0][1]) / (pp[1][0] - pp[0][0]) + (pp[2][1] - pp[1][1]) / (pp[2][0] - pp[1][0]) - grad_e = (pp[-2][1] - pp[-1][1]) / (pp[-2][0] - pp[-1][0]) + (pp[-3][1] - pp[-2][1]) / (pp[-3][0] - pp[-2][0]) + grad_s = (pp[1][1] - pp[0][1]) / (pp[1][0] - pp[0][0]) + \ + (pp[2][1] - pp[1][1]) / (pp[2][0] - pp[1][0]) + grad_e = (pp[-2][1] - pp[-1][1]) / (pp[-2][0] - pp[-1][0]) + \ + (pp[-3][1] - pp[-2][1]) / (pp[-3][0] - pp[-2][0]) for r in np.arange(0.5, max_r, step_r): dx = 2 * half_char_h * r if not isSppFound: line_img = np.zeros(word_label.shape, dtype=np.uint8) dy = grad_s * dx p = np.array(new_pp[0]) - np.array([dx, dy, dx, dy]) - cv2.line(line_img, (int(p[0]), int(p[1])), (int(p[2]), int(p[3])), 1, thickness=1) + cv2.line(line_img, (int(p[0]), int(p[1])), + (int(p[2]), int(p[3])), 1, thickness=1) if np.sum(np.logical_and(word_label, line_img)) == 0 or r + 2 * step_r >= max_r: spp = p isSppFound = True @@ -207,7 +235,8 @@ def getPoly_core(boxes, labels, mapper, linkmap): line_img = np.zeros(word_label.shape, dtype=np.uint8) dy = grad_e * dx p = np.array(new_pp[-1]) + np.array([dx, dy, dx, dy]) - cv2.line(line_img, (int(p[0]), int(p[1])), (int(p[2]), int(p[3])), 1, thickness=1) + cv2.line(line_img, (int(p[0]), int(p[1])), + (int(p[2]), int(p[3])), 1, thickness=1) if np.sum(np.logical_and(word_label, line_img)) == 0 or r + 2 * step_r >= max_r: epp = p isEppFound = True @@ -216,7 +245,8 @@ def getPoly_core(boxes, labels, mapper, linkmap): # pass if boundary of polygon is not found if not (isSppFound and isEppFound): - polys.append(None); continue + polys.append(None) + continue # make final polygon poly = [] @@ -234,8 +264,10 @@ def getPoly_core(boxes, labels, mapper, linkmap): return polys + def getDetBoxes(textmap, linkmap, text_threshold, link_threshold, low_text, poly=False): - boxes, labels, mapper = getDetBoxes_core(textmap, linkmap, text_threshold, link_threshold, low_text) + boxes, labels, mapper = getDetBoxes_core( + textmap, linkmap, text_threshold, link_threshold, low_text) if poly: polys = getPoly_core(boxes, labels, mapper, linkmap) @@ -244,7 +276,8 @@ def getDetBoxes(textmap, linkmap, text_threshold, link_threshold, low_text, poly return boxes, polys -def adjustResultCoordinates(polys, ratio_w, ratio_h, ratio_net = 2): + +def adjustResultCoordinates(polys, ratio_w, ratio_h, ratio_net=2): if len(polys) > 0: polys = np.array(polys) for k in range(len(polys)): @@ -252,8 +285,9 @@ def adjustResultCoordinates(polys, ratio_w, ratio_h, ratio_net = 2): polys[k] *= (ratio_w * ratio_net, ratio_h * ratio_net) return polys + def save_outputs(image, region_scores, affinity_scores, text_threshold, link_threshold, - low_text, outoput_path, confidence_mask = None): + low_text, outoput_path, confidence_mask=None): """save image, region_scores, and affinity_scores in a single image. region_scores and affinity_scores must be cpu numpy arrays. You can convert GPU Tensors to CPU numpy arrays like this: >>> array = tensor.cpu().data.numpy() @@ -274,26 +308,31 @@ def save_outputs(image, region_scores, affinity_scores, text_threshold, link_thr assert len(image.shape) - 1 == len(region_scores.shape) boxes, polys = getDetBoxes(region_scores, affinity_scores, text_threshold, link_threshold, - low_text, False) + low_text, False) boxes = np.array(boxes, np.int32) * 2 if len(boxes) > 0: np.clip(boxes[:, :, 0], 0, image.shape[1]) np.clip(boxes[:, :, 1], 0, image.shape[0]) for box in boxes: - cv2.polylines(image, [np.reshape(box, (-1, 1, 2))], True, (0, 0, 255)) + cv2.polylines( + image, [np.reshape(box, (-1, 1, 2))], True, (0, 0, 255)) target_gaussian_heatmap_color = imgproc.cvt2HeatmapImg(region_scores) - target_gaussian_affinity_heatmap_color = imgproc.cvt2HeatmapImg(affinity_scores) + target_gaussian_affinity_heatmap_color = imgproc.cvt2HeatmapImg( + affinity_scores) if confidence_mask is not None: confidence_mask_gray = imgproc.cvt2HeatmapImg(confidence_mask) - gt_scores = np.hstack([target_gaussian_heatmap_color, target_gaussian_affinity_heatmap_color]) - confidence_mask_gray = np.hstack([np.zeros_like(confidence_mask_gray), confidence_mask_gray]) + gt_scores = np.hstack( + [target_gaussian_heatmap_color, target_gaussian_affinity_heatmap_color]) + confidence_mask_gray = np.hstack( + [np.zeros_like(confidence_mask_gray), confidence_mask_gray]) output = np.concatenate([gt_scores, confidence_mask_gray], axis=0) output = np.hstack([image, output]) else: - gt_scores = np.concatenate([target_gaussian_heatmap_color, target_gaussian_affinity_heatmap_color], axis=0) + gt_scores = np.concatenate( + [target_gaussian_heatmap_color, target_gaussian_affinity_heatmap_color], axis=0) output = np.hstack([image, gt_scores]) cv2.imwrite(outoput_path, output) @@ -301,8 +340,7 @@ def save_outputs(image, region_scores, affinity_scores, text_threshold, link_thr def save_outputs_from_tensors(images, region_scores, affinity_scores, text_threshold, link_threshold, - low_text, output_dir, image_names, confidence_mask = None): - + low_text, output_dir, image_names, confidence_mask=None): """takes images, region_scores, and affinity_scores as tensors (cab be GPU). :param images: 4D tensor :param region_scores: 3D tensor with values between 0 ~ 1 @@ -335,11 +373,11 @@ def save_outputs_from_tensors(images, region_scores, affinity_scores, text_thres affinity_score = affinity_scores[i] image_name = os.path.basename(image_names[i]) - outoput_path = os.path.join(output_dir,image_name) + outoput_path = os.path.join(output_dir, image_name) output_image = save_outputs(image, region_score, affinity_score, text_threshold, link_threshold, - low_text, outoput_path, confidence_mask=confidence_mask) + low_text, outoput_path, confidence_mask=confidence_mask) output_images.append(output_image) - return output_images \ No newline at end of file + return output_images diff --git a/trainer/craft/utils/inference_boxes.py b/trainer/craft/utils/inference_boxes.py index e334395bb..4c65ded8c 100644 --- a/trainer/craft/utils/inference_boxes.py +++ b/trainer/craft/utils/inference_boxes.py @@ -27,6 +27,7 @@ def rotatePoint(xc, yc, xp, yp, theta): # pRes = (xc + pResx, yc + pResy) return int(xc + pResx), int(yc + pResy) + def addRotatedShape(cx, cy, w, h, angle): p0x, p0y = rotatePoint(cx, cy, cx - w / 2, cy - h / 2, -angle) p1x, p1y = rotatePoint(cx, cy, cx + w / 2, cy - h / 2, -angle) @@ -37,6 +38,7 @@ def addRotatedShape(cx, cy, w, h, angle): return points + def xml_parsing(xml): tree = elemTree.parse(xml) @@ -46,7 +48,8 @@ def xml_parsing(xml): for element in iter_element: annotation = {} # Initialize the dict to store labels - annotation['name'] = element.find("name").text # Save the name tag value + annotation['name'] = element.find( + "name").text # Save the name tag value box_coords = element.iter(tag="robndbox") @@ -75,9 +78,6 @@ def xml_parsing(xml): [xmin, ymax]] annotations.append(annotation) - - - bounds = [] for i in range(len(annotations)): box_info_dict = {"points": None, "text": None, "ignore": None} @@ -92,14 +92,12 @@ def xml_parsing(xml): bounds.append(box_info_dict) - - return bounds #-------------------------------------------------------------------------------------------------------------------# -def load_prescription_gt(dataFolder): +def load_prescription_gt(dataFolder): total_img_path = [] total_imgs_bboxes = [] @@ -112,7 +110,6 @@ def load_prescription_gt(dataFolder): gt_path = os.path.join(root, file) total_imgs_bboxes.append(gt_path) - total_imgs_parsing_bboxes = [] for img_path, bbox in zip(sorted(total_img_path), sorted(total_imgs_bboxes)): # check file @@ -122,14 +119,12 @@ def load_prescription_gt(dataFolder): result_label = xml_parsing(bbox) total_imgs_parsing_bboxes.append(result_label) - return total_imgs_parsing_bboxes, sorted(total_img_path) # NOTE def load_prescription_cleval_gt(dataFolder): - total_img_path = [] total_gt_path = [] for (root, directories, files) in os.walk(dataFolder): @@ -141,7 +136,6 @@ def load_prescription_cleval_gt(dataFolder): gt_path = os.path.join(root, file) total_gt_path.append(gt_path) - total_imgs_parsing_bboxes = [] for img_path, gt_path in zip(sorted(total_img_path), sorted(total_gt_path)): # check file @@ -216,7 +210,8 @@ def load_icdar2015_gt(dataFolder, isTraing=False): total_imgs_bboxes = [] total_img_path = [] for gt_path in gt_folder_path: - gt_path = os.path.join(os.path.join(dataFolder, gt_folderName), gt_path) + gt_path = os.path.join(os.path.join( + dataFolder, gt_folderName), gt_path) img_path = ( gt_path.replace(gt_folderName, img_folderName) .replace(".txt", ".jpg") @@ -234,7 +229,8 @@ def load_icdar2015_gt(dataFolder, isTraing=False): word = ",".join(word) box_points = np.array(box_points, np.int32).reshape(4, 2) cv2.polylines( - image, [np.array(box_points).astype(np.int)], True, (0, 0, 255), 1 + image, [np.array(box_points).astype( + np.int)], True, (0, 0, 255), 1 ) box_info_dict["points"] = box_points box_info_dict["text"] = word @@ -264,7 +260,8 @@ def load_icdar2013_gt(dataFolder, isTraing=False): total_imgs_bboxes = [] total_img_path = [] for gt_path in gt_folder_path: - gt_path = os.path.join(os.path.join(dataFolder, gt_folderName), gt_path) + gt_path = os.path.join(os.path.join( + dataFolder, gt_folderName), gt_path) img_path = ( gt_path.replace(gt_folderName, img_folderName) .replace(".txt", ".jpg") diff --git a/trainer/craft/utils/util.py b/trainer/craft/utils/util.py index f6c862220..31708c018 100644 --- a/trainer/craft/utils/util.py +++ b/trainer/craft/utils/util.py @@ -39,15 +39,18 @@ def saveInput( np.clip(boxes[:, :, 0], 0, image.shape[1]) np.clip(boxes[:, :, 1], 0, image.shape[0]) for box in boxes: - cv2.polylines(image, [np.reshape(box, (-1, 1, 2))], True, (0, 0, 255)) + cv2.polylines( + image, [np.reshape(box, (-1, 1, 2))], True, (0, 0, 255)) target_gaussian_heatmap_color = imgproc.cvt2HeatmapImg(region_scores) - target_gaussian_affinity_heatmap_color = imgproc.cvt2HeatmapImg(affinity_scores) + target_gaussian_affinity_heatmap_color = imgproc.cvt2HeatmapImg( + affinity_scores) confidence_mask_gray = imgproc.cvt2HeatmapImg(confidence_mask) # overlay height, width, channel = image.shape overlay_region = cv2.resize(target_gaussian_heatmap_color, (width, height)) - overlay_aff = cv2.resize(target_gaussian_affinity_heatmap_color, (width, height)) + overlay_aff = cv2.resize( + target_gaussian_affinity_heatmap_color, (width, height)) confidence_mask_gray = cv2.resize( confidence_mask_gray, (width, height), interpolation=cv2.INTER_NEAREST ) @@ -103,13 +106,15 @@ def saveImage( ) target_gaussian_heatmap_color = imgproc.cvt2HeatmapImg(region_scores) - target_gaussian_affinity_heatmap_color = imgproc.cvt2HeatmapImg(affinity_scores) + target_gaussian_affinity_heatmap_color = imgproc.cvt2HeatmapImg( + affinity_scores) confidence_mask_gray = imgproc.cvt2HeatmapImg(confidence_mask) # overlay height, width, channel = image.shape overlay_region = cv2.resize(target_gaussian_heatmap_color, (width, height)) - overlay_aff = cv2.resize(target_gaussian_affinity_heatmap_color, (width, height)) + overlay_aff = cv2.resize( + target_gaussian_affinity_heatmap_color, (width, height)) overlay_region = cv2.addWeighted(image.copy(), 0.4, overlay_region, 0.6, 5) overlay_aff = cv2.addWeighted(image.copy(), 0.4, overlay_aff, 0.6, 5) @@ -120,7 +125,8 @@ def saveImage( if type(imagename) is not str: imagename = imagename[0].split("/")[-1][:-4] - output = np.concatenate([output_image, heat_map, confidence_mask_gray], axis=1) + output = np.concatenate( + [output_image, heat_map, confidence_mask_gray], axis=1) outpath = vis_dir + f"/{imagename}.jpg" if not os.path.exists(os.path.dirname(outpath)): os.makedirs(os.path.dirname(outpath), exist_ok=True) @@ -130,7 +136,6 @@ def saveImage( def save_parser(args): - """ final options """ with open(f"{args.results_dir}/opt.txt", "a", encoding="utf-8") as opt_file: opt_log = "------------ Options -------------\n"