Skip to content

Commit

Permalink
perf: improve removing comment symbol prefix and join texts
Browse files Browse the repository at this point in the history
  • Loading branch information
tisfeng committed Nov 22, 2023
1 parent f6d94d7 commit 7e0f942
Show file tree
Hide file tree
Showing 3 changed files with 88 additions and 132 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -18,17 +18,8 @@ NS_ASSUME_NONNULL_BEGIN
/// Remove comment block symbols, /* */
- (NSString *)removeCommentBlockSymbols;

/// Remove adjacent comment symbol prefix, // and #, and try to join texts.
- (NSString *)removeCommentSymbolPrefixAndJoinTexts;

/// Remove comment symbols, # and //
- (NSString *)removeCommentSymbols;

/// Is start with comment symbol prefix, // and #
- (BOOL)hasCommentSymbolPrefix;

/// Filter Private Use Area characters
- (NSString *)filterPrivateUseCharacters;
/// Check if all line starts with a comment symbol, #,//,*
- (BOOL)allLineStartsWithCommentSymbol;

@end

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
#import "NSString+EZSplit.h"
#import "EZAppleService.h"

static NSString *const kCommentSymbolPrefixPattern = @"^\\s*(//|#)";
static NSString *const kCommentSymbolPrefixPattern = @"^\\s*(//|#|\\*)";

@implementation NSString (EZHandleInputText)

Expand Down Expand Up @@ -51,157 +51,106 @@ - (NSString *)splitCodeText {

/// Remove comment block symbols, /* */
- (NSString *)removeCommentBlockSymbols {
NSMutableString *mutableSelf = [self mutableCopy];
if ([self allLineStartsWithCommentSymbol]) {
return [self removeCommentSymbolPrefixAndJoinTexts:self];
}

NSMutableString *mutableSelf = [self mutableCopy];

NSRegularExpression *regex = [NSRegularExpression regularExpressionWithPattern:@"/\\*+(.*?)\\*+/" options:NSRegularExpressionDotMatchesLineSeparators error:nil];
NSArray *results = [regex matchesInString:self options:0 range:NSMakeRange(0, self.length)];

for (NSTextCheckingResult *result in [[results reverseObjectEnumerator] allObjects]) {
NSRange range = [result rangeAtIndex:1];
NSString *content = [self substringWithRange:range].trim;
NSArray<NSString *> *lines = [content componentsSeparatedByCharactersInSet:[NSCharacterSet newlineCharacterSet]];

NSArray<NSNumber *> *widths = [self widthsOfTexts:lines];
// Get max width in widths
NSNumber *maxWidthValue = [widths valueForKeyPath:@"@max.self"];
NSInteger maxWidthIndex = [widths indexOfObject:maxWidthValue];
CGFloat singleAlphabetWidth = maxWidthValue.floatValue / ([lines[maxWidthIndex] length]);

EZLanguage language = [EZAppleService.shared detectText:content];
BOOL isEnglishTypeLanguage = [EZLanguageManager.shared isLanguageWordsNeedSpace:language];
CGFloat alphabetCount = isEnglishTypeLanguage ? 15 : 1.5;

NSMutableString *modifiedBlockText = [NSMutableString string];

for (int i = 0; i < lines.count; i++) {
NSString *line = lines[i];
// Remove all prefix *
NSString *newText = [line stringByReplacingOccurrencesOfString:@"\\*+"
withString:@""
options:NSRegularExpressionSearch
range:NSMakeRange(0, line.length)];
if (i > 0) {
BOOL isPrevLineLongText = NO;
CGFloat threshold = alphabetCount * singleAlphabetWidth;
if (maxWidthValue.floatValue - widths[i-1].floatValue <= threshold) {
isPrevLineLongText = YES;
}
BOOL isPrevLineEnd = [lines[i-1] hasEndPunctuationSuffix];
if (newText.trim.length > 0 && isPrevLineLongText && !isPrevLineEnd) {
NSString *wordConnector = isEnglishTypeLanguage ? @" " : @"";
[modifiedBlockText appendFormat:@"%@%@", wordConnector, newText];
} else {
[modifiedBlockText appendFormat:@"\n%@", newText];
}
} else {
[modifiedBlockText appendString:newText];
}
}

NSString *modifiedBlockText = [self removeCommentSymbolPrefixAndJoinTexts:content];
[mutableSelf replaceCharactersInRange:result.range withString:modifiedBlockText];
}

return mutableSelf;
}

/// Get each text widths
- (NSArray<NSNumber *> *)widthsOfTexts:(NSArray *)texts {
NSMutableArray *widths = [NSMutableArray array];
for (NSString *text in texts) {
CGFloat width = [text mm_widthWithFont:[NSFont systemFontOfSize:NSFont.systemFontSize]];
[widths addObject:@(width)];
}
return widths;
}

/**
Remove comment symbols, # and //
*/
- (NSString *)removeCommentSymbols2 {
// match // and #
NSString *pattern = @"//|#";
NSString *cleanedText = [self stringByReplacingOccurrencesOfString:pattern
withString:@""
options:NSRegularExpressionSearch
range:NSMakeRange(0, self.length)];
return cleanedText;
}

- (NSString *)removeCommentSymbols {
// match // and #, # abc #ddd # fff '#' ggg #
// NSString *pattern = @"(^\\s*[//|#]\\s+)|(\\s+[//|#]\\s*$)|(\\s+[//|#]\\s+)";
NSString *pattern = @"(^|\\s)(\\/\\/|\\#)(\\s|$)"; // replace // and # with white space
NSRegularExpression *regex = [NSRegularExpression regularExpressionWithPattern:pattern options:0 error:nil];
NSString *cleanedText = [regex stringByReplacingMatchesInString:self options:0 range:NSMakeRange(0, self.length) withTemplate:@"$1"];
return cleanedText;
}
#pragma mark -


/**
// These values will persist after the process is killed by the system
// and remain available via the same object.
hi
Remove comment block prefix symbols and join texts.
// good girl.
// good boy.
hello
FIX: https://github.com/tisfeng/Easydict/issues/135#issuecomment-1818136014
*/

/// Remove adjacent comment symbol prefix, // and #, and try to join texts.
- (NSString *)removeCommentSymbolPrefixAndJoinTexts {
NSArray *lines = [self componentsSeparatedByString:@"\n"];
- (NSString *)removeCommentSymbolPrefixAndJoinTexts:(NSString *)content {
NSArray<NSString *> *lines = [content componentsSeparatedByCharactersInSet:[NSCharacterSet newlineCharacterSet]];

NSArray<NSNumber *> *widths = [self widthsOfTexts:lines];
// Get max width in widths
NSNumber *maxWidthValue = [widths valueForKeyPath:@"@max.self"];
NSInteger maxWidthIndex = [widths indexOfObject:maxWidthValue];
CGFloat singleAlphabetWidth = maxWidthValue.floatValue / ([lines[maxWidthIndex] length]);

NSMutableString *resultText = [NSMutableString string];
BOOL previousLineIsComment = NO;
EZLanguage language = [EZAppleService.shared detectText:content];
BOOL isEnglishTypeLanguage = [EZLanguageManager.shared isLanguageWordsNeedSpace:language];
CGFloat alphabetCount = isEnglishTypeLanguage ? 15 : 1.5;

NSMutableString *modifiedBlockText = [NSMutableString string];

for (int i = 0; i < lines.count; i++) {
NSString *line = lines[i];
NSString *trimmedLine = [line stringByTrimmingCharactersInSet:[NSCharacterSet whitespaceAndNewlineCharacterSet]];

if ([trimmedLine hasCommentSymbolPrefix]) {
// 当前行为注释
trimmedLine = [trimmedLine removeCommentSymbolPrefix].trim;

if (i > 0) {
NSString *prevLine = lines[i - 1];
if (previousLineIsComment && ![prevLine hasEndPunctuationSuffix]) {
// 如果前一行是注释,拼接当前行
[resultText appendString:@" "];
} else {
[resultText appendString:@"\n"];
}
// Remove all prefix *
NSString *newText = [line stringByReplacingOccurrencesOfString:kCommentSymbolPrefixPattern
withString:@""
options:NSRegularExpressionSearch
range:NSMakeRange(0, line.length)];
if (i > 0) {
BOOL isPrevLineLongText = NO;
CGFloat threshold = alphabetCount * singleAlphabetWidth;
if (maxWidthValue.floatValue - widths[i-1].floatValue <= threshold) {
isPrevLineLongText = YES;
}
BOOL isPrevLineEnd = [lines[i-1] hasEndPunctuationSuffix];
NSString *newTrimmedText = newText.trim;
if (newTrimmedText.length > 0 && isPrevLineLongText && !isPrevLineEnd) {
NSString *wordConnector = isEnglishTypeLanguage ? @" " : @"";
[modifiedBlockText appendFormat:@"%@%@", wordConnector, newTrimmedText];
} else {
[modifiedBlockText appendFormat:@"\n%@", newText];
}

previousLineIsComment = YES;

} else {
[resultText appendString:@"\n"];
previousLineIsComment = NO;
[modifiedBlockText appendString:newText];
}

[resultText appendString:trimmedLine];
}

return resultText;
return modifiedBlockText;
}

// Remove comment symbol prefix, // and #
- (NSString *)removeCommentSymbolPrefix {
NSString *cleanedText = [self stringByReplacingOccurrencesOfString:kCommentSymbolPrefixPattern
withString:@""
options:NSRegularExpressionSearch
range:NSMakeRange(0, self.length)];
return cleanedText;
/// Check if all line starts with a comment symbol, #, // and *
- (BOOL)allLineStartsWithCommentSymbol {
NSArray<NSString *> *lines = [self componentsSeparatedByCharactersInSet:[NSCharacterSet newlineCharacterSet]];
for (NSString *line in lines) {
if (![line startsWithCommentSymbol]) {
return NO;
}
}
return YES;
}

// Is start with comment symbol prefix, // and #
- (BOOL)hasCommentSymbolPrefix {
/// Is start with comment symbol prefix, #, // and *
- (BOOL)startsWithCommentSymbol {
NSRange range = [self rangeOfString:kCommentSymbolPrefixPattern options:NSRegularExpressionSearch];
return range.location != NSNotFound;
}

/// Get each text widths
- (NSArray<NSNumber *> *)widthsOfTexts:(NSArray *)texts {
NSMutableArray *widths = [NSMutableArray array];
for (NSString *text in texts) {
CGFloat width = [text mm_widthWithFont:[NSFont systemFontOfSize:NSFont.systemFontSize]];
[widths addObject:@(width)];
}
return widths;
}


#pragma mark - Unused

/// Filter Private Use Area characters
- (NSString *)filterPrivateUseCharacters {
/**
Expand Down Expand Up @@ -241,4 +190,22 @@ - (NSString *)unicode {
return unicodeString;
}

/// Remove comment symbol prefix, // and #
- (NSString *)removeCommentSymbolPrefix {
NSString *cleanedText = [self stringByReplacingOccurrencesOfString:kCommentSymbolPrefixPattern
withString:@""
options:NSRegularExpressionSearch
range:NSMakeRange(0, self.length)];
return cleanedText;
}

- (NSString *)removeCommentSymbols {
// match // and #, # abc #ddd # fff '#' ggg #
// NSString *pattern = @"(^\\s*[//|#]\\s+)|(\\s+[//|#]\\s*$)|(\\s+[//|#]\\s+)";
NSString *pattern = @"(^|\\s)(\\/\\/|\\#)(\\s|$)"; // replace // and # with white space
NSRegularExpression *regex = [NSRegularExpression regularExpressionWithPattern:pattern options:0 error:nil];
NSString *cleanedText = [regex stringByReplacingMatchesInString:self options:0 range:NSMakeRange(0, self.length) withTemplate:@"$1"];
return cleanedText;
}

@end
4 changes: 1 addition & 3 deletions Easydict/Feature/ViewController/Model/EZQueryModel.m
Original file line number Diff line number Diff line change
Expand Up @@ -199,9 +199,7 @@ - (NSString *)handleInputText:(NSString *)inputText {
}

if (EZConfiguration.shared.isBeta) {
// Remove prefix // && #, and join texts.
queryText = [queryText removeCommentSymbolPrefixAndJoinTexts];
// Remove /* */
// Remove prefix [//,#,*,] and join texts.
queryText = [queryText removeCommentBlockSymbols];
}

Expand Down

0 comments on commit 7e0f942

Please sign in to comment.