diff --git a/built/utils/encoding.js b/built/utils/encoding.js index 3ad1b192..5462c62a 100644 --- a/built/utils/encoding.js +++ b/built/utils/encoding.js @@ -4,6 +4,7 @@ exports.detectEncoding = detectEncoding; exports.toUtf8 = toUtf8; const iconv = require("iconv-lite"); const jschardet = require("jschardet"); +const Encoding = require('encoding-japanese'); const regCharset = new RegExp(/charset\s*=\s*["']?([\w-]+)/, 'i'); /** * Detect HTML encoding @@ -30,9 +31,14 @@ function detectEncoding(body) { return 'utf-8'; } function toUtf8(body, encoding) { + if (encoding === 'ISO-2022-JP') { + return Encoding.codeToString(Encoding.convert(body, 'UNICODE', encoding)); + } return iconv.decode(body, encoding); } function toEncoding(candicate) { + if (candicate.toUpperCase() === 'ISO-2022-JP') + return 'ISO-2022-JP'; if (iconv.encodingExists(candicate)) { if (['shift_jis', 'shift-jis', 'windows-31j', 'x-sjis'].includes(candicate.toLowerCase())) return 'cp932'; diff --git a/package.json b/package.json index 16276b42..921ec23d 100644 --- a/package.json +++ b/package.json @@ -24,6 +24,7 @@ }, "dependencies": { "cheerio": "1.0.0", + "encoding-japanese": "2.2.0", "escape-regexp": "0.0.1", "got": "11.8.6", "h3": "1.13.0", diff --git a/src/utils/encoding.ts b/src/utils/encoding.ts index d6a2cef4..5eae229a 100644 --- a/src/utils/encoding.ts +++ b/src/utils/encoding.ts @@ -1,5 +1,6 @@ import * as iconv from 'iconv-lite'; import * as jschardet from 'jschardet'; +const Encoding = require('encoding-japanese'); const regCharset = new RegExp(/charset\s*=\s*["']?([\w-]+)/, 'i'); @@ -29,10 +30,16 @@ export function detectEncoding(body: Buffer): string { } export function toUtf8(body: Buffer, encoding: string): string { + if (encoding === 'ISO-2022-JP') { + return Encoding.codeToString(Encoding.convert(body, 'UNICODE', encoding)) + } + return iconv.decode(body, encoding); } function toEncoding(candicate: string): string | null { + if (candicate.toUpperCase() === 'ISO-2022-JP') return 'ISO-2022-JP'; + if (iconv.encodingExists(candicate)) { if (['shift_jis', 'shift-jis', 'windows-31j', 'x-sjis'].includes(candicate.toLowerCase())) return 'cp932'; return candicate; diff --git a/yarn.lock b/yarn.lock index b7dac2da..b438ebab 100644 --- a/yarn.lock +++ b/yarn.lock @@ -331,6 +331,11 @@ domutils@^3.0.1, domutils@^3.1.0: domelementtype "^2.3.0" domhandler "^5.0.3" +encoding-japanese@2.2.0: + version "2.2.0" + resolved "https://registry.yarnpkg.com/encoding-japanese/-/encoding-japanese-2.2.0.tgz#0ef2d2351250547f432a2dd155453555c16deb59" + integrity sha512-EuJWwlHPZ1LbADuKTClvHtwbaFn4rOD+dRAbWysqEOXRc2Uui0hJInNJrsdH0c+OhJA4nrCBdSkW4DD5YxAo6A== + encoding-sniffer@^0.2.0: version "0.2.0" resolved "https://registry.yarnpkg.com/encoding-sniffer/-/encoding-sniffer-0.2.0.tgz#799569d66d443babe82af18c9f403498365ef1d5"