From 52a0b61b976775e80f18d8eb3dff3621a9a915ea Mon Sep 17 00:00:00 2001 From: Toni Barth Date: Sun, 8 Feb 2026 03:33:56 +0100 Subject: [PATCH] Revert "try to replace html sniffing with chardet to fix ncc.html files with set encoding but strings that ignore that" This reverts commit 3a1be51a830a7a725a32c0ae04de3090786e8722. Revert "try to properly interpret ncc.html encoding (seems to be a bit weird / incorrect sometimes)" This reverts commit fac441559584cfc2f65baadf3475138e7a6017c8. --- package-lock.json | 51 ----------------------------- package.json | 2 -- server/scanner/DaisyFileScanner.js | 2 +- server/utils/fileUtils.js | 48 ++------------------------- test/server/utils/fileUtils.test.js | 20 ----------- 5 files changed, 3 insertions(+), 120 deletions(-) diff --git a/package-lock.json b/package-lock.json index 5deb7ce2d..08707893d 100644 --- a/package-lock.json +++ b/package-lock.json @@ -10,7 +10,6 @@ "license": "GPL-3.0", "dependencies": { "axios": "^0.27.2", - "chardet": "^2.1.1", "cookie-parser": "^1.4.6", "express": "^4.17.1", "express-rate-limit": "^7.5.1", @@ -29,7 +28,6 @@ "socket.io": "^4.5.4", "sqlite3": "^5.1.7", "ssrf-req-filter": "^1.1.0", - "whatwg-encoding": "^3.1.1", "xml2js": "^0.5.0" }, "bin": { @@ -124,7 +122,6 @@ "resolved": "https://registry.npmjs.org/@babel/core/-/core-7.23.3.tgz", "integrity": "sha512-Jg+msLuNuCJDyBvFv5+OKOUjWMZgd85bKjbICd3zWrKAo+bJ49HJufi7CQE0q0uR8NGyO6xkCACScNqyjHSZew==", "dev": true, - "peer": true, "dependencies": { "@ampproject/remapping": "^2.2.0", "@babel/code-frame": "^7.22.13", @@ -1052,7 +1049,6 @@ "url": "https://github.com/sponsors/ai" } ], - "peer": true, "dependencies": { "caniuse-lite": "^1.0.30001541", "electron-to-chromium": "^1.4.535", @@ -1255,12 +1251,6 @@ "node": ">=8" } }, - "node_modules/chardet": { - "version": "2.1.1", - "resolved": "https://registry.npmjs.org/chardet/-/chardet-2.1.1.tgz", - "integrity": "sha512-PsezH1rqdV9VvyNhxxOW32/d75r01NY7TQCmOqomRo15ZSOKbpTFVsfjghxo6JloQUCGnH4k1LGu0R4yCLlWQQ==", - "license": "MIT" - }, "node_modules/check-error": { "version": "1.0.3", "resolved": "https://registry.npmjs.org/check-error/-/check-error-1.0.3.tgz", @@ -1867,7 +1857,6 @@ "version": "4.18.2", "resolved": "https://registry.npmjs.org/express/-/express-4.18.2.tgz", "integrity": "sha512-5/PsL6iGPdfQ/lKM1UuielYgv3BUoJfz1aUwU9vHZ+J7gyvwdQXFEBIEIaxeGf0GIcreATNyBExtalisDbuMqQ==", - "peer": true, "dependencies": { "accepts": "~1.3.8", "array-flatten": "1.1.1", @@ -2124,21 +2113,6 @@ "integrity": "sha512-OO0pH2lK6a0hZnAdau5ItzHPI6pUlvI7jMVnxUQRtw4owF2wk8lOSabtGDCTP4Ggrg2MbGnWO9X8K1t4+fGMDw==", "devOptional": true }, - "node_modules/fsevents": { - "version": "2.3.3", - "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.3.tgz", - "integrity": "sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==", - "dev": true, - "hasInstallScript": true, - "license": "MIT", - "optional": true, - "os": [ - "darwin" - ], - "engines": { - "node": "^8.16.0 || ^10.6.0 || >=11.0.0" - } - }, "node_modules/function-bind": { "version": "1.1.2", "resolved": "https://registry.npmjs.org/function-bind/-/function-bind-1.1.2.tgz", @@ -5384,31 +5358,6 @@ "node": ">= 0.8" } }, - "node_modules/whatwg-encoding": { - "version": "3.1.1", - "resolved": "https://registry.npmjs.org/whatwg-encoding/-/whatwg-encoding-3.1.1.tgz", - "integrity": "sha512-6qN4hJdMwfYBtE3YBTTHhoeuUrDBPZmbQaxWAqSALV/MeEnR5z1xd8UKud2RAkFoPkmB+hli1TZSnyi84xz1vQ==", - "deprecated": "Use @exodus/bytes instead for a more spec-conformant and faster implementation", - "license": "MIT", - "dependencies": { - "iconv-lite": "0.6.3" - }, - "engines": { - "node": ">=18" - } - }, - "node_modules/whatwg-encoding/node_modules/iconv-lite": { - "version": "0.6.3", - "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.6.3.tgz", - "integrity": "sha512-4fCk79wshMdzMp2rH06qWrJE4iolqLhCUH+OiuIgU++RB0+94NlDL81atO7GX55uUKueo0txHNtvEyI6D7WdMw==", - "license": "MIT", - "dependencies": { - "safer-buffer": ">= 2.1.2 < 3.0.0" - }, - "engines": { - "node": ">=0.10.0" - } - }, "node_modules/which": { "version": "2.0.2", "resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz", diff --git a/package.json b/package.json index 44be24be1..3ee3fb391 100644 --- a/package.json +++ b/package.json @@ -39,7 +39,6 @@ "license": "GPL-3.0", "dependencies": { "axios": "^0.27.2", - "chardet": "^2.1.1", "cookie-parser": "^1.4.6", "express": "^4.17.1", "express-rate-limit": "^7.5.1", @@ -58,7 +57,6 @@ "socket.io": "^4.5.4", "sqlite3": "^5.1.7", "ssrf-req-filter": "^1.1.0", - "whatwg-encoding": "^3.1.1", "xml2js": "^0.5.0" }, "devDependencies": { diff --git a/server/scanner/DaisyFileScanner.js b/server/scanner/DaisyFileScanner.js index e0b7cd84f..217709063 100644 --- a/server/scanner/DaisyFileScanner.js +++ b/server/scanner/DaisyFileScanner.js @@ -12,7 +12,7 @@ class DaisyFileScanner { * @param {Object} bookMetadata */ async scanBookDaisyFile(daisyLibraryFileObj, bookMetadata, audioFiles = []) { - const htmlText = await readTextFile(daisyLibraryFileObj.metadata.path, { detectEncoding: true, isHtml: true }) + const htmlText = await readTextFile(daisyLibraryFileObj.metadata.path) const daisyMetadata = htmlText ? parseDaisyMetadata(htmlText) : null if (daisyMetadata) { for (const key in daisyMetadata) { diff --git a/server/utils/fileUtils.js b/server/utils/fileUtils.js index c55261cd9..9a349bd54 100644 --- a/server/utils/fileUtils.js +++ b/server/utils/fileUtils.js @@ -6,8 +6,6 @@ const fs = require('../libs/fsExtra') const rra = require('../libs/recursiveReaddirAsync') const Logger = require('../Logger') const { AudioMimeType } = require('./constants') -const chardet = require('chardet') -const whatwgEncoding = require('whatwg-encoding') /** * Make sure folder separator is POSIX for Windows file paths. e.g. "C:\Users\Abs" becomes "C:/Users/Abs" @@ -117,57 +115,15 @@ function getIno(path) { } module.exports.getIno = getIno -/** - * @typedef ReadTextFileOptions - */ - -function detectTextEncoding(buffer) { - try { - const detectedEncoding = chardet.detect(buffer) - const labeledEncoding = detectedEncoding ? whatwgEncoding.labelToName(detectedEncoding) : null - if (labeledEncoding) { - return labeledEncoding - } - } catch {} - - return 'UTF-8' -} - -/** - * Decode raw text bytes with optional encoding detection. - * - * @param {Buffer} buffer - * @param {ReadTextFileOptions} [options] - * @returns {string} - */ -function decodeTextBuffer(buffer, options = {}) { - if (!buffer) return '' - const { detectEncoding = false, isHtml = false } = options - - if (!detectEncoding) { - return String(buffer) - } - - const fallbackEncoding = detectTextEncoding(buffer) - try { - // WHATWG decode handles BOM override and legacy encoding tables. - return whatwgEncoding.decode(buffer, fallbackEncoding) - } catch { - return String(buffer) - } -} -module.exports.decodeTextBuffer = decodeTextBuffer - /** * Read contents of file * @param {string} path - * @param {ReadTextFileOptions} [options] * @returns {string} */ -async function readTextFile(path, options = {}) { +async function readTextFile(path) { try { var data = await fs.readFile(path) - return decodeTextBuffer(data, options) + return String(data) } catch (error) { Logger.error(`[FileUtils] ReadTextFile error ${error}`) return '' diff --git a/test/server/utils/fileUtils.test.js b/test/server/utils/fileUtils.test.js index a0482f93b..b57a6fb86 100644 --- a/test/server/utils/fileUtils.test.js +++ b/test/server/utils/fileUtils.test.js @@ -6,26 +6,6 @@ const fs = require('fs') const Logger = require('../../../server/Logger') describe('fileUtils', () => { - describe('decodeTextBuffer', () => { - it('decodes html using charset declaration (windows-1252)', () => { - const htmlPrefix = Buffer.from('M') - const htmlSuffix = Buffer.from('ller') - const input = Buffer.concat([htmlPrefix, Buffer.from([0xfc]), htmlSuffix]) - - const decoded = fileUtils.decodeTextBuffer(input, { detectEncoding: true, isHtml: true }) - expect(decoded).to.include('Müller') - }) - - it('falls back to windows-1252 for html without charset when utf-8 decoding is invalid', () => { - const htmlPrefix = Buffer.from('Gr') - const htmlSuffix = Buffer.from('n') - const input = Buffer.concat([htmlPrefix, Buffer.from([0xfc]), htmlSuffix]) - - const decoded = fileUtils.decodeTextBuffer(input, { detectEncoding: true, isHtml: true }) - expect(decoded).to.include('Grün') - }) - }) - it('shouldIgnoreFile', () => { global.isWin = process.platform === 'win32'