Skip to content

Commit

Permalink
Fix the spellchecker for identifiers with apostrophes
Browse files Browse the repository at this point in the history
  • Loading branch information
carymrobbins committed Aug 14, 2020
1 parent 4285d3d commit c0771e0
Show file tree
Hide file tree
Showing 3 changed files with 145 additions and 6 deletions.
109 changes: 109 additions & 0 deletions src/com/haskforce/spellchecker/HaskellSpellcheckingSplitter.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
package com.haskforce.spellchecker;

import com.intellij.openapi.progress.ProcessCanceledException;
import com.intellij.openapi.util.TextRange;
import com.intellij.openapi.util.text.StringUtil;
import com.intellij.spellchecker.inspections.BaseSplitter;
import com.intellij.spellchecker.inspections.PlainTextSplitter;
import com.intellij.spellchecker.inspections.Splitter;
import com.intellij.spellchecker.inspections.TextSplitter;
import com.intellij.util.Consumer;
import org.jdom.Verifier;
import org.jetbrains.annotations.NonNls;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;

import java.util.Collections;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import static com.intellij.util.io.URLUtil.URL_PATTERN;

/**
* Shameless copy-pasta of {@link PlainTextSplitter} that also splits on
* apostrophes since they are ubiquitous in Haskell identifiers.
*/
public class HaskellSpellcheckingSplitter extends BaseSplitter {

private static final PlainTextSplitter INSTANCE = new PlainTextSplitter();

public static PlainTextSplitter getInstance() {
return INSTANCE;
}

@NonNls
private static final
Pattern SPLIT_PATTERN = Pattern.compile("(\\s|\b|')");

@NonNls
private static final Pattern MAIL =
Pattern.compile("([\\p{L}0-9\\.\\-\\_\\+]+@([\\p{L}0-9\\-\\_]+(\\.)?)+(com|net|[a-z]{2})?)");

@NonNls
private static final Pattern UUID_PATTERN = Pattern.compile("[a-fA-F0-9]{8}(-[a-fA-F0-9]{4}){3}-[a-fA-F0-9]{12}");

@Override
public void split(@Nullable String text, @NotNull TextRange range, Consumer<TextRange> consumer) {
if (StringUtil.isEmpty(text)) {
return;
}
final Splitter ws = getTextSplitter();
int from = range.getStartOffset();
int till;

try {
Matcher matcher;
final String substring = range.substring(text).replace('\b', '\n').replace('\f', '\n');
if (Verifier.checkCharacterData(SPLIT_PATTERN.matcher(newBombedCharSequence(substring)).replaceAll("")) != null) {
return;
}
matcher = SPLIT_PATTERN.matcher(newBombedCharSequence(text, range));

while (true) {
checkCancelled();
List<TextRange> toCheck;
TextRange wRange;
String word;
if (matcher.find()) {
TextRange found = matcherRange(range, matcher);
till = found.getStartOffset();
if (badSize(from, till)) {
from = found.getEndOffset();
continue;
}
wRange = new TextRange(from, till);
word = wRange.substring(text);
from = found.getEndOffset();
}
else { // end hit or zero matches
wRange = new TextRange(from, range.getEndOffset());
word = wRange.substring(text);
}
if (word.contains("@")) {
toCheck = excludeByPattern(text, wRange, MAIL, 0);
}
else if (word.contains("://")) {
toCheck = excludeByPattern(text, wRange, URL_PATTERN, 0);
}
else if (word.contains("-")) {
toCheck = excludeByPattern(text, wRange, UUID_PATTERN, 0);
}
else {
toCheck = Collections.singletonList(wRange);
}
for (TextRange r : toCheck) {
ws.split(text, r, consumer);
}
if (matcher.hitEnd()) break;
}
}
catch (ProcessCanceledException ignored) {
}
}

@NotNull
protected Splitter getTextSplitter() {
return TextSplitter.getInstance();
}
}
38 changes: 32 additions & 6 deletions src/com/haskforce/spellchecker/HaskellSpellcheckingStrategy.scala
Original file line number Diff line number Diff line change
Expand Up @@ -6,22 +6,37 @@ import com.haskforce.psi._
import com.haskforce.utils.CastUtil.Ops
import com.haskforce.utils.PQ
import com.intellij.psi.{PsiComment, PsiElement}
import com.intellij.spellchecker.tokenizer.SpellcheckingStrategy
import com.intellij.spellchecker.inspections.PlainTextSplitter
import com.intellij.spellchecker.tokenizer.{SpellcheckingStrategy, Tokenizer, TokenizerBase}

/**
* Provide spellchecker support for Haskell/Cabal sources.
*/
class HaskellSpellcheckingStrategy extends SpellcheckingStrategy {

override def getTokenizer(element: PsiElement): Tokenizer[_ <: PsiElement] = {
// We need to split on apostrophes when spellchecking Haskell identifiers.
if (isHaskellIdent(element)) {
HaskellSpellcheckingStrategy.HASKELL_IDENT_TOKENIZER
} else {
HaskellSpellcheckingStrategy.STANDARD_TOKENIZER
}
}

override def isMyContext(e: PsiElement): Boolean = {
isHaskell(e) && isDefinitionNode(e)
(isHaskell(e) || isCabal(e)) && isDefinitionNode(e)
}

private def isHaskell(e: PsiElement): Boolean = {
Seq(
HaskellLanguage.INSTANCE,
CabalLanguage.INSTANCE
).exists(_.is(e.getLanguage))
HaskellLanguage.INSTANCE.is(e.getLanguage)
}

private def isHaskellIdent(e: PsiElement): Boolean = {
e.isInstanceOf[HaskellNamedElement]
}

private def isCabal(e: PsiElement): Boolean = {
CabalLanguage.INSTANCE.is(e.getLanguage)
}

private def isDefinitionNode(e: PsiElement): Boolean = {
Expand Down Expand Up @@ -129,3 +144,14 @@ class HaskellSpellcheckingStrategy extends SpellcheckingStrategy {
.flatMap(_.getParent.cast[HaskellNewtypedecl])
}
}

object HaskellSpellcheckingStrategy {

private val HASKELL_IDENT_TOKENIZER = new TokenizerBase[PsiElement](
HaskellSpellcheckingSplitter.getInstance()
)

private val STANDARD_TOKENIZER = new TokenizerBase[PsiElement](
PlainTextSplitter.getInstance()
)
}
4 changes: 4 additions & 0 deletions tests/gold/spellchecker/Comments.hs
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,7 @@ module Comments where
-- | And doc <TYPO>coments</TYPO>

-- ^ And and these <TYPO>commens</TYPO> two

-- Also <TYPO>the'se</TYPO>

-- But doesn't catch this.

0 comments on commit c0771e0

Please sign in to comment.