Skip to content

Commit

Permalink
Update SymSpell.cs
Browse files Browse the repository at this point in the history
  • Loading branch information
wolfgarbe authored Apr 22, 2018
1 parent 355e2ca commit 28ceecd
Showing 1 changed file with 4 additions and 5 deletions.
9 changes: 4 additions & 5 deletions SymSpell/SymSpell.cs
Original file line number Diff line number Diff line change
Expand Up @@ -907,9 +907,8 @@ public List<SuggestItem> LookupCompound(string input, int editDistanceMax)
//misspelled words are corrected and do not affect segmentation
//existing spaces are allowed and considered for optimum segmentation

//SymSpell.WordSegmentation uses a novel approach *without* dynamic programming and recursion.
//While Dynamic Programming reduces the time to solve repeating patterns by memoization,
//the SymSpell word segmentation algorithm prevents the generation of repeating problems in the first place.
//SymSpell.WordSegmentation uses a novel approach *without* recursion.
//https://medium.com/@wolfgarbe/fast-word-segmentation-for-noisy-text-2c2c41f9e8da
//While each string of length n can be segmentend in 2^n−1 possible compositions https://en.wikipedia.org/wiki/Composition_(combinatorics)
//SymSpell.WordSegmentation has a linear runtime O(n) to find the optimum composition

Expand Down Expand Up @@ -1023,15 +1022,15 @@ public List<Composition> WordSegmentation(string input, int maxEditDistance, int
}

//set values in first loop
if ((j == 0) || (i == maxSegmentationWordLength))
if (j == 0)
{
compositions[destinationIndex].segmentedString = part;
compositions[destinationIndex].correctedString = topResult;
compositions[destinationIndex].distanceSum = topEd;
compositions[destinationIndex].probabilityLogSum = topProbabilityLog;
}
//replace values if better probabilityLogSum, if same edit distance OR one space difference
else if (((compositions[callingIndex].distanceSum + topEd == compositions[destinationIndex].distanceSum) || (compositions[callingIndex].distanceSum + separatorLength + topEd == compositions[destinationIndex].distanceSum)) && (compositions[destinationIndex].probabilityLogSum < compositions[callingIndex].probabilityLogSum + topProbabilityLog))
else if ((i == maxSegmentationWordLength) || (((compositions[callingIndex].distanceSum + topEd == compositions[destinationIndex].distanceSum) || (compositions[callingIndex].distanceSum + separatorLength + topEd == compositions[destinationIndex].distanceSum)) && (compositions[destinationIndex].probabilityLogSum < compositions[callingIndex].probabilityLogSum + topProbabilityLog)))
{
compositions[destinationIndex].segmentedString = compositions[callingIndex].segmentedString + " " + part;
compositions[destinationIndex].correctedString = compositions[callingIndex].correctedString + " " + topResult;
Expand Down

0 comments on commit 28ceecd

Please sign in to comment.