mirror of
https://github.com/advplyr/audiobookshelf.git
synced 2026-04-19 05:29:44 +00:00
try to replace html sniffing with chardet to fix ncc.html files with set encoding but strings that ignore that
This commit is contained in:
parent
687e62e1fa
commit
b05acce22b
3 changed files with 19 additions and 29 deletions
20
package-lock.json
generated
20
package-lock.json
generated
|
|
@ -10,12 +10,12 @@
|
||||||
"license": "GPL-3.0",
|
"license": "GPL-3.0",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"axios": "^0.27.2",
|
"axios": "^0.27.2",
|
||||||
|
"chardet": "^2.1.1",
|
||||||
"cookie-parser": "^1.4.6",
|
"cookie-parser": "^1.4.6",
|
||||||
"express": "^4.17.1",
|
"express": "^4.17.1",
|
||||||
"express-rate-limit": "^7.5.1",
|
"express-rate-limit": "^7.5.1",
|
||||||
"express-session": "^1.17.3",
|
"express-session": "^1.17.3",
|
||||||
"graceful-fs": "^4.2.10",
|
"graceful-fs": "^4.2.10",
|
||||||
"html-encoding-sniffer": "^4.0.0",
|
|
||||||
"htmlparser2": "^8.0.1",
|
"htmlparser2": "^8.0.1",
|
||||||
"lru-cache": "^10.0.3",
|
"lru-cache": "^10.0.3",
|
||||||
"node-unrar-js": "^2.0.2",
|
"node-unrar-js": "^2.0.2",
|
||||||
|
|
@ -1255,6 +1255,12 @@
|
||||||
"node": ">=8"
|
"node": ">=8"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/chardet": {
|
||||||
|
"version": "2.1.1",
|
||||||
|
"resolved": "https://registry.npmjs.org/chardet/-/chardet-2.1.1.tgz",
|
||||||
|
"integrity": "sha512-PsezH1rqdV9VvyNhxxOW32/d75r01NY7TQCmOqomRo15ZSOKbpTFVsfjghxo6JloQUCGnH4k1LGu0R4yCLlWQQ==",
|
||||||
|
"license": "MIT"
|
||||||
|
},
|
||||||
"node_modules/check-error": {
|
"node_modules/check-error": {
|
||||||
"version": "1.0.3",
|
"version": "1.0.3",
|
||||||
"resolved": "https://registry.npmjs.org/check-error/-/check-error-1.0.3.tgz",
|
"resolved": "https://registry.npmjs.org/check-error/-/check-error-1.0.3.tgz",
|
||||||
|
|
@ -2304,18 +2310,6 @@
|
||||||
"he": "bin/he"
|
"he": "bin/he"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/html-encoding-sniffer": {
|
|
||||||
"version": "4.0.0",
|
|
||||||
"resolved": "https://registry.npmjs.org/html-encoding-sniffer/-/html-encoding-sniffer-4.0.0.tgz",
|
|
||||||
"integrity": "sha512-Y22oTqIU4uuPgEemfz7NDJz6OeKf12Lsu+QC+s3BVpda64lTiMYCyGwg5ki4vFxkMwQdeZDl2adZoqUgdFuTgQ==",
|
|
||||||
"license": "MIT",
|
|
||||||
"dependencies": {
|
|
||||||
"whatwg-encoding": "^3.1.1"
|
|
||||||
},
|
|
||||||
"engines": {
|
|
||||||
"node": ">=18"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/html-escaper": {
|
"node_modules/html-escaper": {
|
||||||
"version": "2.0.2",
|
"version": "2.0.2",
|
||||||
"resolved": "https://registry.npmjs.org/html-escaper/-/html-escaper-2.0.2.tgz",
|
"resolved": "https://registry.npmjs.org/html-escaper/-/html-escaper-2.0.2.tgz",
|
||||||
|
|
|
||||||
|
|
@ -39,12 +39,12 @@
|
||||||
"license": "GPL-3.0",
|
"license": "GPL-3.0",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"axios": "^0.27.2",
|
"axios": "^0.27.2",
|
||||||
|
"chardet": "^2.1.1",
|
||||||
"cookie-parser": "^1.4.6",
|
"cookie-parser": "^1.4.6",
|
||||||
"express": "^4.17.1",
|
"express": "^4.17.1",
|
||||||
"express-rate-limit": "^7.5.1",
|
"express-rate-limit": "^7.5.1",
|
||||||
"express-session": "^1.17.3",
|
"express-session": "^1.17.3",
|
||||||
"graceful-fs": "^4.2.10",
|
"graceful-fs": "^4.2.10",
|
||||||
"html-encoding-sniffer": "^4.0.0",
|
|
||||||
"htmlparser2": "^8.0.1",
|
"htmlparser2": "^8.0.1",
|
||||||
"lru-cache": "^10.0.3",
|
"lru-cache": "^10.0.3",
|
||||||
"node-unrar-js": "^2.0.2",
|
"node-unrar-js": "^2.0.2",
|
||||||
|
|
|
||||||
|
|
@ -6,7 +6,7 @@ const fs = require('../libs/fsExtra')
|
||||||
const rra = require('../libs/recursiveReaddirAsync')
|
const rra = require('../libs/recursiveReaddirAsync')
|
||||||
const Logger = require('../Logger')
|
const Logger = require('../Logger')
|
||||||
const { AudioMimeType } = require('./constants')
|
const { AudioMimeType } = require('./constants')
|
||||||
const sniffHTMLEncoding = require('html-encoding-sniffer')
|
const chardet = require('chardet')
|
||||||
const whatwgEncoding = require('whatwg-encoding')
|
const whatwgEncoding = require('whatwg-encoding')
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -119,22 +119,18 @@ module.exports.getIno = getIno
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @typedef ReadTextFileOptions
|
* @typedef ReadTextFileOptions
|
||||||
* @property {boolean} [detectEncoding] detect text encoding before decoding
|
|
||||||
* @property {boolean} [isHtml] use HTML charset hints when detectEncoding is enabled
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
function detectTextEncoding(buffer, options = {}) {
|
function detectTextEncoding(buffer) {
|
||||||
const { isHtml = false } = options
|
|
||||||
if (!isHtml) {
|
|
||||||
return 'UTF-8'
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const sniffedEncoding = sniffHTMLEncoding(buffer, { defaultEncoding: 'windows-1252' }) || 'windows-1252'
|
const detectedEncoding = chardet.detect(buffer)
|
||||||
return whatwgEncoding.labelToName(sniffedEncoding) || 'UTF-8'
|
const labeledEncoding = detectedEncoding ? whatwgEncoding.labelToName(detectedEncoding) : null
|
||||||
} catch {
|
if (labeledEncoding) {
|
||||||
return 'UTF-8'
|
return labeledEncoding
|
||||||
}
|
}
|
||||||
|
} catch {}
|
||||||
|
|
||||||
|
return 'UTF-8'
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -152,7 +148,7 @@ function decodeTextBuffer(buffer, options = {}) {
|
||||||
return String(buffer)
|
return String(buffer)
|
||||||
}
|
}
|
||||||
|
|
||||||
const fallbackEncoding = detectTextEncoding(buffer, { isHtml })
|
const fallbackEncoding = detectTextEncoding(buffer)
|
||||||
try {
|
try {
|
||||||
// WHATWG decode handles BOM override and legacy encoding tables.
|
// WHATWG decode handles BOM override and legacy encoding tables.
|
||||||
return whatwgEncoding.decode(buffer, fallbackEncoding)
|
return whatwgEncoding.decode(buffer, fallbackEncoding)
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue