Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: #20 : comma spacing and #498 : Asian commas #891

Open
wants to merge 13 commits into
base: master
Choose a base branch
from
Open
Prev Previous commit
Next Next commit
fix: foreign language stripper; ignore asian commas in asian text
  • Loading branch information
hippietrail committed Mar 22, 2025
commit cef2bc6fa99eeea24febb995ad84431d2fd00a05
14 changes: 12 additions & 2 deletions harper-core/src/linting/comma_fixes.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use super::{Lint, LintKind, Linter, Suggestion};
use crate::{Span, TokenKind, TokenStringExt};
use crate::{Span, Token, TokenKind, TokenStringExt};

const MSG_SPACE_BEFORE: &str = "Don't use a space before a comma.";
const MSG_AVOID_ASIAN: &str = "Avoid East Asian commas in English contexts.";
Expand Down Expand Up @@ -118,8 +118,13 @@ impl Linter for CommaFixes {
add_space_after = true;
}

// Handles Asian commas in all other contexts
// TokenKind::Unlintable is used for non-English tokens
// to prevent changing commas within CJK text
(None | Some(_), None | Some(_), _, None | Some(_), None | Some(_))
if comma_kind != ',' =>
if comma_kind != ','
&& !matches!(toks.1, Some(Token { kind: TokenKind::Unlintable, .. }))
&& !matches!(toks.3, Some(Token { kind: TokenKind::Unlintable, .. })) =>
{
span = toks.2.span;
suggestion = Suggestion::ReplaceWith(vec![',']);
Expand Down Expand Up @@ -242,4 +247,9 @@ mod tests {
fn corrects_asian_comma_between_words_with_space_on_both_sides() {
assert_suggestion_result("foo 、 bar", CommaFixes, "foo, bar")
}

#[test]
fn doesnt_correct_comma_between_non_english_tokens() {
assert_lint_count("严禁采摘花、 果、叶,挖掘树根、草药!", CommaFixes, 0);
}
}
2 changes: 1 addition & 1 deletion harper-core/src/parsers/isolate_english.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ impl<D: Dictionary> Parser for IsolateEnglish<D> {
let mut english_tokens: Vec<Token> = Vec::with_capacity(tokens.len());

for chunk in tokens.iter_chunks() {
if chunk.len() < 5 || is_likely_english(chunk, source, &self.dict) {
if chunk.len() < 4 || is_likely_english(chunk, source, &self.dict) {
english_tokens.extend_from_slice(chunk);
}
}
Expand Down
Loading