-
Notifications
You must be signed in to change notification settings - Fork 0
/
test.html
153 lines (150 loc) · 4.92 KB
/
test.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Rite</title>
<script async src="client/vendor/opencv.js" onload="console.log('loaded opencv');" type="text/javascript"></script>
<script src="https://unpkg.com/[email protected]/dist/tesseract.min.js" type="text/javascript"></script>
<style>
#imageSrc, #outputCanvas {
height: 850px;
}
#imageSrc {
position: fixed;
left: -999px;
}
#progress {
display: inline-block;
width: 100px;
height: 20px;
border: 1px solid black;
}
#progress > span {
display: inline-block;
background-color: lightblue;
height: 20px;
}
</style>
</head>
<body>
<h2>Rite</h2>
<div>
<div class="caption">
<input type="file" id="fileInput" name="file" /><span id="progress"><span></span></span>
</div><pre id="result"></pre>
<canvas id="outputCanvas" height="850"></canvas><img id="imageSrc" alt="No Image" />
</div>
<script type="text/javascript">
const SPREAD = 69, SUBTRACT = 50,
TOTAL_RE = /(s[ou]b|ne[t71]\s+)?([\[jf17t\]]?\s*[oun0]\s*[tf7]\s*[4a]\s*[17li\)_]?).*?([0-9]+\s*[.,'][0-9\s]+)\W*$/gim,
TAX_RE = /(H[GS]T|Tax\W*H).*([0-9]+\s*[.,'][0-9\s]+)$/gim;
const CAT_RES = [
/(cookie|cand(y|ies)|snack|chip|dorito|ruffle)/gim,
/(restaurant|eater(y|ies)|food|mcdonald|kfc)/gim,
/(sock|walk|winner|wear|lace|glove|uniqlo|pant|shirt|shoe|heel|jacket|coat)/gim,
/(best buy|outlet|comp|tech|keyb|mouse|head)/gim,
/(laundry|screw|nail|hammer|drill|ax|shovel)/gim,
/(nintendo|epic|xbox|play|game|entertain)/gim,
], CATS = [
'Snacks',
'Restaurant/Eating',
'Clothes/Fashion',
'Gadgets/Tech',
'Tools/Hardware',
'Entertainment/Games',
], METH_RES = [
/(cash|cdn|change|tend)/gim,
/(debit|interac)/gim,
/(credit|card)/gim,
], METHODS = [
'Cash', 'Debit/Interac', 'Credit'
];
let progress = document.getElementById('progress');
const worker = Tesseract.createWorker({ logger: m => {
//console.log(m, typeof m.progress);
if ((typeof m.progress) === 'number') {
progress.children[0].style.width = m.progress * 100 + 'px';
}
} });
function num(s) {
return parseFloat(s.replace(/\s/g, '').replace(/[,']/, '.'));
}
(async () => {
await worker.load();
await worker.loadLanguage('eng');
await worker.initialize('eng');
})();
let imgElement = document.getElementById("imageSrc")
let inputElement = document.getElementById("fileInput");
let canvas = document.getElementById("outputCanvas");
let result = document.getElementById("result");
let mat2;
inputElement.addEventListener("change", (e) => {
imgElement.src = URL.createObjectURL(e.target.files[0]);
}, false);
imgElement.addEventListener('load', () => {
result.innerText = '';
let img = cv.imread(imgElement);
//let size = img.size();
//let aspect = size.width / size.height;
//console.log(img, size, aspect, 850 * aspect, 850);
//cv.resize(img, img, { width: 850 * aspect, height: 850 }, 0, 0, cv.INTER_AREA);
cv.imshow("outputCanvas", img);
doThreshold(img);
doOCR();
})
function maxFromMatch(text, res, list) {
let count = 0, result = null;
for (i = 0; i < res.length; ++i) {
let c = text.match(res[i]);
if (!c) continue;
c = c.length;
if (c > count) {
count = c;
result = list[i];
}
}
return result;
}
function doThreshold(img, spread = SPREAD, subtract = SUBTRACT) {
let mat = img.clone();
cv.cvtColor(mat, mat, cv.COLOR_BGR2GRAY);
//cv.GaussianBlur(mat, mat, {width: 3, height: 3}, 0);
//cv.threshold(mat, mat, 127, 255, cv.THRESH_BINARY);
cv.adaptiveThreshold(mat, mat, 255, cv.ADAPTIVE_THRESH_GAUSSIAN_C, cv.THRESH_BINARY, spread, subtract);
cv.imshow("outputCanvas", mat);
return mat;
}
function doOCR() {
result.innerText = '';
canvas.toBlob(async blob => {
let url = URL.createObjectURL(blob), total;
const { data: { text } } = await worker.recognize(url);
result.innerText = text + '\n' + window.innerWidth + ', ' + window.innerHeight + '\n';
matches = Array.from(text.matchAll(TOTAL_RE));
match = matches[matches.length - 1];
if (!match) {
return;
} else if (match[1]) {
// if "Sub" matched last, this isn't the real total
let subtotal = num(match[3]);
matches = Array.from(text.matchAll(TAX_RE));
match = matches[matches.length - 1];
if (!match) {
// can't even fall back to subtotal + tax, autoeviscerate
return;
}
let tax = num(match[2]);
total = subtotal + tax;
} else {
total = num(match[3]);
}
let cat = maxFromMatch(text, CAT_RES, CATS);
let meth = maxFromMatch(text, METH_RES, METHODS) || METHODS[0];
result.innerText += `Total: ${total}\nCategory: ${cat}\nMethod: ${meth}`;
});
}
</script>
</body>
</html>