Revert "try to replace html sniffing with chardet to fix ncc.html files with set encoding but strings that ignore that"

This reverts commit 3a1be51a83.

Revert "try to properly interpret ncc.html encoding (seems to be a bit weird / incorrect sometimes)"

This reverts commit fac4415595.
This commit is contained in:
Toni Barth 2026-02-08 03:33:56 +01:00
parent b05acce22b
commit 52a0b61b97
5 changed files with 3 additions and 120 deletions

51
package-lock.json generated
View file

@ -10,7 +10,6 @@
"license": "GPL-3.0",
"dependencies": {
"axios": "^0.27.2",
"chardet": "^2.1.1",
"cookie-parser": "^1.4.6",
"express": "^4.17.1",
"express-rate-limit": "^7.5.1",
@ -29,7 +28,6 @@
"socket.io": "^4.5.4",
"sqlite3": "^5.1.7",
"ssrf-req-filter": "^1.1.0",
"whatwg-encoding": "^3.1.1",
"xml2js": "^0.5.0"
},
"bin": {
@ -124,7 +122,6 @@
"resolved": "https://registry.npmjs.org/@babel/core/-/core-7.23.3.tgz",
"integrity": "sha512-Jg+msLuNuCJDyBvFv5+OKOUjWMZgd85bKjbICd3zWrKAo+bJ49HJufi7CQE0q0uR8NGyO6xkCACScNqyjHSZew==",
"dev": true,
"peer": true,
"dependencies": {
"@ampproject/remapping": "^2.2.0",
"@babel/code-frame": "^7.22.13",
@ -1052,7 +1049,6 @@
"url": "https://github.com/sponsors/ai"
}
],
"peer": true,
"dependencies": {
"caniuse-lite": "^1.0.30001541",
"electron-to-chromium": "^1.4.535",
@ -1255,12 +1251,6 @@
"node": ">=8"
}
},
"node_modules/chardet": {
"version": "2.1.1",
"resolved": "https://registry.npmjs.org/chardet/-/chardet-2.1.1.tgz",
"integrity": "sha512-PsezH1rqdV9VvyNhxxOW32/d75r01NY7TQCmOqomRo15ZSOKbpTFVsfjghxo6JloQUCGnH4k1LGu0R4yCLlWQQ==",
"license": "MIT"
},
"node_modules/check-error": {
"version": "1.0.3",
"resolved": "https://registry.npmjs.org/check-error/-/check-error-1.0.3.tgz",
@ -1867,7 +1857,6 @@
"version": "4.18.2",
"resolved": "https://registry.npmjs.org/express/-/express-4.18.2.tgz",
"integrity": "sha512-5/PsL6iGPdfQ/lKM1UuielYgv3BUoJfz1aUwU9vHZ+J7gyvwdQXFEBIEIaxeGf0GIcreATNyBExtalisDbuMqQ==",
"peer": true,
"dependencies": {
"accepts": "~1.3.8",
"array-flatten": "1.1.1",
@ -2124,21 +2113,6 @@
"integrity": "sha512-OO0pH2lK6a0hZnAdau5ItzHPI6pUlvI7jMVnxUQRtw4owF2wk8lOSabtGDCTP4Ggrg2MbGnWO9X8K1t4+fGMDw==",
"devOptional": true
},
"node_modules/fsevents": {
"version": "2.3.3",
"resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.3.tgz",
"integrity": "sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==",
"dev": true,
"hasInstallScript": true,
"license": "MIT",
"optional": true,
"os": [
"darwin"
],
"engines": {
"node": "^8.16.0 || ^10.6.0 || >=11.0.0"
}
},
"node_modules/function-bind": {
"version": "1.1.2",
"resolved": "https://registry.npmjs.org/function-bind/-/function-bind-1.1.2.tgz",
@ -5384,31 +5358,6 @@
"node": ">= 0.8"
}
},
"node_modules/whatwg-encoding": {
"version": "3.1.1",
"resolved": "https://registry.npmjs.org/whatwg-encoding/-/whatwg-encoding-3.1.1.tgz",
"integrity": "sha512-6qN4hJdMwfYBtE3YBTTHhoeuUrDBPZmbQaxWAqSALV/MeEnR5z1xd8UKud2RAkFoPkmB+hli1TZSnyi84xz1vQ==",
"deprecated": "Use @exodus/bytes instead for a more spec-conformant and faster implementation",
"license": "MIT",
"dependencies": {
"iconv-lite": "0.6.3"
},
"engines": {
"node": ">=18"
}
},
"node_modules/whatwg-encoding/node_modules/iconv-lite": {
"version": "0.6.3",
"resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.6.3.tgz",
"integrity": "sha512-4fCk79wshMdzMp2rH06qWrJE4iolqLhCUH+OiuIgU++RB0+94NlDL81atO7GX55uUKueo0txHNtvEyI6D7WdMw==",
"license": "MIT",
"dependencies": {
"safer-buffer": ">= 2.1.2 < 3.0.0"
},
"engines": {
"node": ">=0.10.0"
}
},
"node_modules/which": {
"version": "2.0.2",
"resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz",

View file

@ -39,7 +39,6 @@
"license": "GPL-3.0",
"dependencies": {
"axios": "^0.27.2",
"chardet": "^2.1.1",
"cookie-parser": "^1.4.6",
"express": "^4.17.1",
"express-rate-limit": "^7.5.1",
@ -58,7 +57,6 @@
"socket.io": "^4.5.4",
"sqlite3": "^5.1.7",
"ssrf-req-filter": "^1.1.0",
"whatwg-encoding": "^3.1.1",
"xml2js": "^0.5.0"
},
"devDependencies": {

View file

@ -12,7 +12,7 @@ class DaisyFileScanner {
* @param {Object} bookMetadata
*/
async scanBookDaisyFile(daisyLibraryFileObj, bookMetadata, audioFiles = []) {
const htmlText = await readTextFile(daisyLibraryFileObj.metadata.path, { detectEncoding: true, isHtml: true })
const htmlText = await readTextFile(daisyLibraryFileObj.metadata.path)
const daisyMetadata = htmlText ? parseDaisyMetadata(htmlText) : null
if (daisyMetadata) {
for (const key in daisyMetadata) {

View file

@ -6,8 +6,6 @@ const fs = require('../libs/fsExtra')
const rra = require('../libs/recursiveReaddirAsync')
const Logger = require('../Logger')
const { AudioMimeType } = require('./constants')
const chardet = require('chardet')
const whatwgEncoding = require('whatwg-encoding')
/**
* Make sure folder separator is POSIX for Windows file paths. e.g. "C:\Users\Abs" becomes "C:/Users/Abs"
@ -117,57 +115,15 @@ function getIno(path) {
}
module.exports.getIno = getIno
/**
* @typedef ReadTextFileOptions
*/
function detectTextEncoding(buffer) {
try {
const detectedEncoding = chardet.detect(buffer)
const labeledEncoding = detectedEncoding ? whatwgEncoding.labelToName(detectedEncoding) : null
if (labeledEncoding) {
return labeledEncoding
}
} catch {}
return 'UTF-8'
}
/**
* Decode raw text bytes with optional encoding detection.
*
* @param {Buffer} buffer
* @param {ReadTextFileOptions} [options]
* @returns {string}
*/
function decodeTextBuffer(buffer, options = {}) {
if (!buffer) return ''
const { detectEncoding = false, isHtml = false } = options
if (!detectEncoding) {
return String(buffer)
}
const fallbackEncoding = detectTextEncoding(buffer)
try {
// WHATWG decode handles BOM override and legacy encoding tables.
return whatwgEncoding.decode(buffer, fallbackEncoding)
} catch {
return String(buffer)
}
}
module.exports.decodeTextBuffer = decodeTextBuffer
/**
* Read contents of file
* @param {string} path
* @param {ReadTextFileOptions} [options]
* @returns {string}
*/
async function readTextFile(path, options = {}) {
async function readTextFile(path) {
try {
var data = await fs.readFile(path)
return decodeTextBuffer(data, options)
return String(data)
} catch (error) {
Logger.error(`[FileUtils] ReadTextFile error ${error}`)
return ''

View file

@ -6,26 +6,6 @@ const fs = require('fs')
const Logger = require('../../../server/Logger')
describe('fileUtils', () => {
describe('decodeTextBuffer', () => {
it('decodes html using charset declaration (windows-1252)', () => {
const htmlPrefix = Buffer.from('<html><head><meta charset="windows-1252"></head><body>M')
const htmlSuffix = Buffer.from('ller</body></html>')
const input = Buffer.concat([htmlPrefix, Buffer.from([0xfc]), htmlSuffix])
const decoded = fileUtils.decodeTextBuffer(input, { detectEncoding: true, isHtml: true })
expect(decoded).to.include('Müller')
})
it('falls back to windows-1252 for html without charset when utf-8 decoding is invalid', () => {
const htmlPrefix = Buffer.from('<html><body>Gr')
const htmlSuffix = Buffer.from('n</body></html>')
const input = Buffer.concat([htmlPrefix, Buffer.from([0xfc]), htmlSuffix])
const decoded = fileUtils.decodeTextBuffer(input, { detectEncoding: true, isHtml: true })
expect(decoded).to.include('Grün')
})
})
it('shouldIgnoreFile', () => {
global.isWin = process.platform === 'win32'