forked from MichaelWehar/Open-Source-Spell-Checker
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathspellchecker.js
78 lines (63 loc) · 2.02 KB
/
spellchecker.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
// Created by Michael Wehar
var valid_word_list;
function set_valid_word_list(word_list){
valid_word_list = word_list;
}
function find_similar(word, score_thresh){
var max_size = 10;
var top_words = [];
var top_scores = [];
for(var i = 0; i < valid_word_list.length; i++){
// compute score
var element = valid_word_list[i];
var temp_score = score(word, element);
if(score_thresh < temp_score){
// check if it is a top score
var index = getListIndex(top_scores, temp_score);
if(index < max_size){
top_words.splice(index, 0, element);
top_scores.splice(index, 0, temp_score);
if(top_words.length > max_size){
top_words.pop();
top_scores.pop();
}
}
}
}
return [top_words, top_scores];
}
function getListIndex(scores, x){
for(var i = 0; i < scores.length; i++){
if(x > scores[i]) return i;
}
return scores.length;
}
function score(x, y){
var length_weight = 0.3;
var match_weight = 0.5;
var shift_weight = 0.2;
return length_weight * length_score(x,y) + match_weight * match_score(x,y)
+ shift_weight * shift_score(x,y);
}
function length_score(x, y){
var diff = Math.abs(x.length - y.length);
return Math.max(1.0 - diff / 4, 0);
}
function match_score(x, y){
var length = Math.min(x.length, y.length);
if(length <= 0) return 0.0;
var total = 0;
for(var i = 0; i < length; i++){
if(x.charAt(i) == y.charAt(i)) total++;
}
var diff = length - total;
return Math.max(1.0 - diff / 5, 0);
}
function shift_score(x, y){
var l2 = match_score(x.substring(2), y);
var l1 = match_score(x.substring(1), y);
var c = match_score(x, y);
var r1 = match_score(x, y.substring(1));
var r2 = match_score(x, y.substring(2));
return Math.max(l2, l1, c, r1, r2);
}