mirror of
https://github.com/advplyr/audiobookshelf.git
synced 2026-03-01 05:29:41 +00:00
try to replace html sniffing with chardet to fix ncc.html files with set encoding but strings that ignore that
This commit is contained in:
parent
fac4415595
commit
3a1be51a83
3 changed files with 19 additions and 29 deletions
20
package-lock.json
generated
20
package-lock.json
generated
|
|
@ -10,12 +10,12 @@
|
|||
"license": "GPL-3.0",
|
||||
"dependencies": {
|
||||
"axios": "^0.27.2",
|
||||
"chardet": "^2.1.1",
|
||||
"cookie-parser": "^1.4.6",
|
||||
"express": "^4.17.1",
|
||||
"express-rate-limit": "^7.5.1",
|
||||
"express-session": "^1.17.3",
|
||||
"graceful-fs": "^4.2.10",
|
||||
"html-encoding-sniffer": "^4.0.0",
|
||||
"htmlparser2": "^8.0.1",
|
||||
"lru-cache": "^10.0.3",
|
||||
"node-unrar-js": "^2.0.2",
|
||||
|
|
@ -1255,6 +1255,12 @@
|
|||
"node": ">=8"
|
||||
}
|
||||
},
|
||||
"node_modules/chardet": {
|
||||
"version": "2.1.1",
|
||||
"resolved": "https://registry.npmjs.org/chardet/-/chardet-2.1.1.tgz",
|
||||
"integrity": "sha512-PsezH1rqdV9VvyNhxxOW32/d75r01NY7TQCmOqomRo15ZSOKbpTFVsfjghxo6JloQUCGnH4k1LGu0R4yCLlWQQ==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/check-error": {
|
||||
"version": "1.0.3",
|
||||
"resolved": "https://registry.npmjs.org/check-error/-/check-error-1.0.3.tgz",
|
||||
|
|
@ -2304,18 +2310,6 @@
|
|||
"he": "bin/he"
|
||||
}
|
||||
},
|
||||
"node_modules/html-encoding-sniffer": {
|
||||
"version": "4.0.0",
|
||||
"resolved": "https://registry.npmjs.org/html-encoding-sniffer/-/html-encoding-sniffer-4.0.0.tgz",
|
||||
"integrity": "sha512-Y22oTqIU4uuPgEemfz7NDJz6OeKf12Lsu+QC+s3BVpda64lTiMYCyGwg5ki4vFxkMwQdeZDl2adZoqUgdFuTgQ==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"whatwg-encoding": "^3.1.1"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/html-escaper": {
|
||||
"version": "2.0.2",
|
||||
"resolved": "https://registry.npmjs.org/html-escaper/-/html-escaper-2.0.2.tgz",
|
||||
|
|
|
|||
|
|
@ -39,12 +39,12 @@
|
|||
"license": "GPL-3.0",
|
||||
"dependencies": {
|
||||
"axios": "^0.27.2",
|
||||
"chardet": "^2.1.1",
|
||||
"cookie-parser": "^1.4.6",
|
||||
"express": "^4.17.1",
|
||||
"express-rate-limit": "^7.5.1",
|
||||
"express-session": "^1.17.3",
|
||||
"graceful-fs": "^4.2.10",
|
||||
"html-encoding-sniffer": "^4.0.0",
|
||||
"htmlparser2": "^8.0.1",
|
||||
"lru-cache": "^10.0.3",
|
||||
"node-unrar-js": "^2.0.2",
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@ const fs = require('../libs/fsExtra')
|
|||
const rra = require('../libs/recursiveReaddirAsync')
|
||||
const Logger = require('../Logger')
|
||||
const { AudioMimeType } = require('./constants')
|
||||
const sniffHTMLEncoding = require('html-encoding-sniffer')
|
||||
const chardet = require('chardet')
|
||||
const whatwgEncoding = require('whatwg-encoding')
|
||||
|
||||
/**
|
||||
|
|
@ -119,22 +119,18 @@ module.exports.getIno = getIno
|
|||
|
||||
/**
|
||||
* @typedef ReadTextFileOptions
|
||||
* @property {boolean} [detectEncoding] detect text encoding before decoding
|
||||
* @property {boolean} [isHtml] use HTML charset hints when detectEncoding is enabled
|
||||
*/
|
||||
|
||||
function detectTextEncoding(buffer, options = {}) {
|
||||
const { isHtml = false } = options
|
||||
if (!isHtml) {
|
||||
return 'UTF-8'
|
||||
}
|
||||
|
||||
function detectTextEncoding(buffer) {
|
||||
try {
|
||||
const sniffedEncoding = sniffHTMLEncoding(buffer, { defaultEncoding: 'windows-1252' }) || 'windows-1252'
|
||||
return whatwgEncoding.labelToName(sniffedEncoding) || 'UTF-8'
|
||||
} catch {
|
||||
return 'UTF-8'
|
||||
}
|
||||
const detectedEncoding = chardet.detect(buffer)
|
||||
const labeledEncoding = detectedEncoding ? whatwgEncoding.labelToName(detectedEncoding) : null
|
||||
if (labeledEncoding) {
|
||||
return labeledEncoding
|
||||
}
|
||||
} catch {}
|
||||
|
||||
return 'UTF-8'
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -152,7 +148,7 @@ function decodeTextBuffer(buffer, options = {}) {
|
|||
return String(buffer)
|
||||
}
|
||||
|
||||
const fallbackEncoding = detectTextEncoding(buffer, { isHtml })
|
||||
const fallbackEncoding = detectTextEncoding(buffer)
|
||||
try {
|
||||
// WHATWG decode handles BOM override and legacy encoding tables.
|
||||
return whatwgEncoding.decode(buffer, fallbackEncoding)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue