From a7bb7852f2088294602f3e35dab5bd38a94a88fa Mon Sep 17 00:00:00 2001 From: tisfeng Date: Thu, 25 Jan 2024 10:17:36 +0800 Subject: [PATCH] perf(ocr): do not check if new paragraph by big line spacing --- .../Feature/Service/Apple/EZAppleService.m | 38 ++++++++++--------- 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/Easydict/Feature/Service/Apple/EZAppleService.m b/Easydict/Feature/Service/Apple/EZAppleService.m index be76679f2..e2a681b15 100644 --- a/Easydict/Feature/Service/Apple/EZAppleService.m +++ b/Easydict/Feature/Service/Apple/EZAppleService.m @@ -24,7 +24,7 @@ static NSArray *const kAllowedCharactersInPoetryList = @[ @"《", @"》", @"〔", @"〕" ]; -static CGFloat const kParagraphLineHeightRatio = 1.2; +static CGFloat const kParagraphLineHeightRatio = 1.5; static NSInteger const kShortPoetryCharacterCountOfLine = 12; @@ -1211,18 +1211,6 @@ - (void)setupOCRResult:(EZOCRResult *)ocrResult CGFloat deltaY = prevBoundingBox.origin.y - (boundingBox.origin.y + boundingBox.size.height); CGFloat deltaX = boundingBox.origin.x - (prevBoundingBox.origin.x + prevBoundingBox.size.width); - // Note that line spacing is inaccurate, sometimes it's too small 😢 - BOOL isNewParagraph = NO; - if (deltaY > 0) { - // averageLineSpacing may too small, so deltaY should be much larger than averageLineSpacing - BOOL isBigLineSpacing = [self isBigSpacingLineOfTextObservation:textObservation - prevTextObservation:prevTextObservation - greaterThanLineHeightRatio:kParagraphLineHeightRatio]; - if (isBigLineSpacing) { - isNewParagraph = YES; - } - } - // Note that sometimes the line frames will overlap a little, then deltaY will less then 0 BOOL isNewLine = NO; if (deltaY > 0) { @@ -1251,10 +1239,9 @@ - (void)setupOCRResult:(EZOCRResult *)ocrResult if (isNeedRemoveLastDashOfText) { mergedText = [mergedText substringToIndex:mergedText.length - 1].mutableCopy; } - } else if (isNewParagraph || isNewLine) { + } else if (isNewLine) { joinedString = [self joinedStringOfTextObservation:textObservation - prevTextObservation:prevTextObservation - isNewParagraph:isNewParagraph]; + prevTextObservation:prevTextObservation]; } else { joinedString = @" "; // if the same line, just join two texts } @@ -1489,9 +1476,10 @@ - (BOOL)isPoetryOftextObservations:(NSArray *)tex /// Get joined string of text, according to its last char. - (NSString *)joinedStringOfTextObservation:(VNRecognizedTextObservation *)textObservation prevTextObservation:(VNRecognizedTextObservation *)prevTextObservation - isNewParagraph:(BOOL)isNewParagraph { +{ NSString *joinedString = @""; BOOL needLineBreak = NO; + BOOL isNewParagraph = NO; CGRect prevBoundingBox = prevTextObservation.boundingBox; CGFloat prevLineLength = prevBoundingBox.size.width; @@ -1509,7 +1497,7 @@ - (NSString *)joinedStringOfTextObservation:(VNRecognizedTextObservation *)textO BOOL hasPrevIndentation = [self hasIndentationOfTextObservation:prevTextObservation]; BOOL hasIndentation = [self hasIndentationOfTextObservation:textObservation]; - + BOOL isPrevLongText = [self isLongTextObservation:prevTextObservation isStrict:NO]; BOOL isEqualChineseText = [self isEqualChineseTextObservation:textObservation prevTextObservation:prevTextObservation]; @@ -1895,6 +1883,20 @@ - (BOOL)hasIndentationOfTextObservation:(VNRecognizedTextObservation *)textObser return hasIndentation; } +- (BOOL)hasIndentationOfTextObservation:(VNRecognizedTextObservation *)textObservation + prevTextObservation:(VNRecognizedTextObservation *)prevTextObservation { + BOOL isEqualX = [self isEqualXOfTextObservation:textObservation prevTextObservation:prevTextObservation]; + + CGFloat lineX = CGRectGetMinX(textObservation.boundingBox); + CGFloat prevLineX = CGRectGetMinX(prevTextObservation.boundingBox); + CGFloat dx = lineX - prevLineX; + + if (!isEqualX && dx < 0) { + return YES; + } + return NO; +} + - (BOOL)isEqualTextObservation:(VNRecognizedTextObservation *)textObservation prevTextObservation:(VNRecognizedTextObservation *)prevTextObservation { // 0.06 - 0.025