diff --git a/python/rapidocr_onnxruntime/config.yaml b/python/rapidocr_onnxruntime/config.yaml index 3bd206126..1ebc0f75f 100644 --- a/python/rapidocr_onnxruntime/config.yaml +++ b/python/rapidocr_onnxruntime/config.yaml @@ -6,7 +6,8 @@ Global: print_verbose: false min_height: 30 width_height_ratio: 8 - + use_letterbox_like: true + Det: use_cuda: false diff --git a/python/rapidocr_onnxruntime/main.py b/python/rapidocr_onnxruntime/main.py index 269b95202..40790eac1 100644 --- a/python/rapidocr_onnxruntime/main.py +++ b/python/rapidocr_onnxruntime/main.py @@ -44,6 +44,7 @@ def __init__(self, config_path: Optional[str] = None, **kwargs): self.text_score = global_config["text_score"] self.min_height = global_config["min_height"] self.width_height_ratio = global_config["width_height_ratio"] + self.use_letterbox_like = global_config["use_letterbox_like"] self.use_det = global_config["use_det"] self.text_det = TextDetector(config["Det"]) @@ -62,11 +63,13 @@ def __call__( use_det: Optional[bool] = None, use_cls: Optional[bool] = None, use_rec: Optional[bool] = None, + use_letterbox_like: Optional[bool] = None, **kwargs, ): use_det = self.use_det if use_det is None else use_det use_cls = self.use_cls if use_cls is None else use_cls use_rec = self.use_rec if use_rec is None else use_rec + self.use_letterbox_like = self.use_letterbox_like if use_letterbox_like is None else use_letterbox_like if kwargs: box_thresh = kwargs.get("box_thresh", 0.5) @@ -83,10 +86,14 @@ def __call__( det_elapse, cls_elapse, rec_elapse = 0.0, 0.0, 0.0 if use_det: + zero_h = 0 + if self.use_letterbox_like: + img, zero_h = self.letterbox_like(img) + dt_boxes, det_elapse = self.auto_text_det(img) if dt_boxes is None: return None, None - + img = self.get_crop_img_list(img, dt_boxes) if use_cls: @@ -95,27 +102,50 @@ def __call__( if use_rec: rec_res, rec_elapse = self.text_rec(img) + if zero_h > 0 and dt_boxes is not None: + for box in dt_boxes: + box[:, 1] -= zero_h + ocr_res = self.get_final_res( dt_boxes, cls_res, rec_res, det_elapse, cls_elapse, rec_elapse ) return ocr_res + def letterbox_like(self, img) : + img_w, img_h= img.shape[1], img.shape[0] + + if self.width_height_ratio == -1: + use_limit_ratio = False + else: + use_limit_ratio = img_w / img_h > self.width_height_ratio + + if img_h <= self.min_height or use_limit_ratio: + # 居中放置 + new_h = max(int(img_w / self.width_height_ratio), self.min_height) + 1 + zero_h = int((new_h - img_h) / 2) + block_img = np.zeros((new_h, img_w, 3), dtype=np.uint8) + block_img[zero_h: zero_h + img_h, : , :] = img + return block_img, zero_h + return img, 0 + def auto_text_det( self, img: np.ndarray, ) -> Tuple[Optional[np.ndarray], float, Optional[List[np.ndarray]]]: h, w = img.shape[:2] - if self.width_height_ratio == -1: - use_limit_ratio = False - else: - use_limit_ratio = w / h > self.width_height_ratio - - if h <= self.min_height or use_limit_ratio: - logging.warning( - "Because the aspect ratio of the current image exceeds the limit (min_height or width_height_ratio), the program will skip the detection step." - ) - dt_boxes = self.get_boxes_img_without_det(h, w) - return dt_boxes, 0.0 + + if not self.use_letterbox_like: + if self.width_height_ratio == -1: + use_limit_ratio = False + else: + use_limit_ratio = w / h > self.width_height_ratio + + if h <= self.min_height or use_limit_ratio: + logging.warning( + "Because the aspect ratio of the current image exceeds the limit (min_height or width_height_ratio), the program will skip the detection step." + ) + dt_boxes = self.get_boxes_img_without_det(h, w) + return dt_boxes, 0.0 dt_boxes, det_elapse = self.text_det(img) if dt_boxes is None or len(dt_boxes) < 1: @@ -240,8 +270,9 @@ def main(): use_det = not args.no_det use_cls = not args.no_cls use_rec = not args.no_rec + use_letterbox_like = not args.no_letterbox_like result, elapse_list = ocr_engine( - args.img_path, use_det=use_det, use_cls=use_cls, use_rec=use_rec + args.img_path, use_det=use_det, use_cls=use_cls, use_rec=use_rec, use_letterbox_like = use_letterbox_like ) print(result) diff --git a/python/rapidocr_onnxruntime/utils.py b/python/rapidocr_onnxruntime/utils.py index 1eb01e618..d78dad715 100644 --- a/python/rapidocr_onnxruntime/utils.py +++ b/python/rapidocr_onnxruntime/utils.py @@ -234,7 +234,8 @@ def init_args(): global_group.add_argument("--print_verbose", action="store_true", default=False) global_group.add_argument("--min_height", type=int, default=30) global_group.add_argument("--width_height_ratio", type=int, default=8) - + global_group.add_argument("--no_letterbox_like", action="store_true", default=False) + det_group = parser.add_argument_group(title="Det") det_group.add_argument("--det_use_cuda", action="store_true", default=False) det_group.add_argument("--det_model_path", type=str, default=None)