diff --git a/package-lock.json b/package-lock.json index 5deb7ce2d..08707893d 100644 --- a/package-lock.json +++ b/package-lock.json @@ -10,7 +10,6 @@ "license": "GPL-3.0", "dependencies": { "axios": "^0.27.2", - "chardet": "^2.1.1", "cookie-parser": "^1.4.6", "express": "^4.17.1", "express-rate-limit": "^7.5.1", @@ -29,7 +28,6 @@ "socket.io": "^4.5.4", "sqlite3": "^5.1.7", "ssrf-req-filter": "^1.1.0", - "whatwg-encoding": "^3.1.1", "xml2js": "^0.5.0" }, "bin": { @@ -124,7 +122,6 @@ "resolved": "https://registry.npmjs.org/@babel/core/-/core-7.23.3.tgz", "integrity": "sha512-Jg+msLuNuCJDyBvFv5+OKOUjWMZgd85bKjbICd3zWrKAo+bJ49HJufi7CQE0q0uR8NGyO6xkCACScNqyjHSZew==", "dev": true, - "peer": true, "dependencies": { "@ampproject/remapping": "^2.2.0", "@babel/code-frame": "^7.22.13", @@ -1052,7 +1049,6 @@ "url": "https://github.com/sponsors/ai" } ], - "peer": true, "dependencies": { "caniuse-lite": "^1.0.30001541", "electron-to-chromium": "^1.4.535", @@ -1255,12 +1251,6 @@ "node": ">=8" } }, - "node_modules/chardet": { - "version": "2.1.1", - "resolved": "https://registry.npmjs.org/chardet/-/chardet-2.1.1.tgz", - "integrity": "sha512-PsezH1rqdV9VvyNhxxOW32/d75r01NY7TQCmOqomRo15ZSOKbpTFVsfjghxo6JloQUCGnH4k1LGu0R4yCLlWQQ==", - "license": "MIT" - }, "node_modules/check-error": { "version": "1.0.3", "resolved": "https://registry.npmjs.org/check-error/-/check-error-1.0.3.tgz", @@ -1867,7 +1857,6 @@ "version": "4.18.2", "resolved": "https://registry.npmjs.org/express/-/express-4.18.2.tgz", "integrity": "sha512-5/PsL6iGPdfQ/lKM1UuielYgv3BUoJfz1aUwU9vHZ+J7gyvwdQXFEBIEIaxeGf0GIcreATNyBExtalisDbuMqQ==", - "peer": true, "dependencies": { "accepts": "~1.3.8", "array-flatten": "1.1.1", @@ -2124,21 +2113,6 @@ "integrity": "sha512-OO0pH2lK6a0hZnAdau5ItzHPI6pUlvI7jMVnxUQRtw4owF2wk8lOSabtGDCTP4Ggrg2MbGnWO9X8K1t4+fGMDw==", "devOptional": true }, - "node_modules/fsevents": { - "version": "2.3.3", - "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.3.tgz", - "integrity": "sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==", - "dev": true, - "hasInstallScript": true, - "license": "MIT", - "optional": true, - "os": [ - "darwin" - ], - "engines": { - "node": "^8.16.0 || ^10.6.0 || >=11.0.0" - } - }, "node_modules/function-bind": { "version": "1.1.2", "resolved": "https://registry.npmjs.org/function-bind/-/function-bind-1.1.2.tgz", @@ -5384,31 +5358,6 @@ "node": ">= 0.8" } }, - "node_modules/whatwg-encoding": { - "version": "3.1.1", - "resolved": "https://registry.npmjs.org/whatwg-encoding/-/whatwg-encoding-3.1.1.tgz", - "integrity": "sha512-6qN4hJdMwfYBtE3YBTTHhoeuUrDBPZmbQaxWAqSALV/MeEnR5z1xd8UKud2RAkFoPkmB+hli1TZSnyi84xz1vQ==", - "deprecated": "Use @exodus/bytes instead for a more spec-conformant and faster implementation", - "license": "MIT", - "dependencies": { - "iconv-lite": "0.6.3" - }, - "engines": { - "node": ">=18" - } - }, - "node_modules/whatwg-encoding/node_modules/iconv-lite": { - "version": "0.6.3", - "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.6.3.tgz", - "integrity": "sha512-4fCk79wshMdzMp2rH06qWrJE4iolqLhCUH+OiuIgU++RB0+94NlDL81atO7GX55uUKueo0txHNtvEyI6D7WdMw==", - "license": "MIT", - "dependencies": { - "safer-buffer": ">= 2.1.2 < 3.0.0" - }, - "engines": { - "node": ">=0.10.0" - } - }, "node_modules/which": { "version": "2.0.2", "resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz", diff --git a/package.json b/package.json index 44be24be1..3ee3fb391 100644 --- a/package.json +++ b/package.json @@ -39,7 +39,6 @@ "license": "GPL-3.0", "dependencies": { "axios": "^0.27.2", - "chardet": "^2.1.1", "cookie-parser": "^1.4.6", "express": "^4.17.1", "express-rate-limit": "^7.5.1", @@ -58,7 +57,6 @@ "socket.io": "^4.5.4", "sqlite3": "^5.1.7", "ssrf-req-filter": "^1.1.0", - "whatwg-encoding": "^3.1.1", "xml2js": "^0.5.0" }, "devDependencies": { diff --git a/server/scanner/DaisyFileScanner.js b/server/scanner/DaisyFileScanner.js index e0b7cd84f..217709063 100644 --- a/server/scanner/DaisyFileScanner.js +++ b/server/scanner/DaisyFileScanner.js @@ -12,7 +12,7 @@ class DaisyFileScanner { * @param {Object} bookMetadata */ async scanBookDaisyFile(daisyLibraryFileObj, bookMetadata, audioFiles = []) { - const htmlText = await readTextFile(daisyLibraryFileObj.metadata.path, { detectEncoding: true, isHtml: true }) + const htmlText = await readTextFile(daisyLibraryFileObj.metadata.path) const daisyMetadata = htmlText ? parseDaisyMetadata(htmlText) : null if (daisyMetadata) { for (const key in daisyMetadata) { diff --git a/server/utils/fileUtils.js b/server/utils/fileUtils.js index c55261cd9..9a349bd54 100644 --- a/server/utils/fileUtils.js +++ b/server/utils/fileUtils.js @@ -6,8 +6,6 @@ const fs = require('../libs/fsExtra') const rra = require('../libs/recursiveReaddirAsync') const Logger = require('../Logger') const { AudioMimeType } = require('./constants') -const chardet = require('chardet') -const whatwgEncoding = require('whatwg-encoding') /** * Make sure folder separator is POSIX for Windows file paths. e.g. "C:\Users\Abs" becomes "C:/Users/Abs" @@ -117,57 +115,15 @@ function getIno(path) { } module.exports.getIno = getIno -/** - * @typedef ReadTextFileOptions - */ - -function detectTextEncoding(buffer) { - try { - const detectedEncoding = chardet.detect(buffer) - const labeledEncoding = detectedEncoding ? whatwgEncoding.labelToName(detectedEncoding) : null - if (labeledEncoding) { - return labeledEncoding - } - } catch {} - - return 'UTF-8' -} - -/** - * Decode raw text bytes with optional encoding detection. - * - * @param {Buffer} buffer - * @param {ReadTextFileOptions} [options] - * @returns {string} - */ -function decodeTextBuffer(buffer, options = {}) { - if (!buffer) return '' - const { detectEncoding = false, isHtml = false } = options - - if (!detectEncoding) { - return String(buffer) - } - - const fallbackEncoding = detectTextEncoding(buffer) - try { - // WHATWG decode handles BOM override and legacy encoding tables. - return whatwgEncoding.decode(buffer, fallbackEncoding) - } catch { - return String(buffer) - } -} -module.exports.decodeTextBuffer = decodeTextBuffer - /** * Read contents of file * @param {string} path - * @param {ReadTextFileOptions} [options] * @returns {string} */ -async function readTextFile(path, options = {}) { +async function readTextFile(path) { try { var data = await fs.readFile(path) - return decodeTextBuffer(data, options) + return String(data) } catch (error) { Logger.error(`[FileUtils] ReadTextFile error ${error}`) return '' diff --git a/test/server/utils/fileUtils.test.js b/test/server/utils/fileUtils.test.js index a0482f93b..b57a6fb86 100644 --- a/test/server/utils/fileUtils.test.js +++ b/test/server/utils/fileUtils.test.js @@ -6,26 +6,6 @@ const fs = require('fs') const Logger = require('../../../server/Logger') describe('fileUtils', () => { - describe('decodeTextBuffer', () => { - it('decodes html using charset declaration (windows-1252)', () => { - const htmlPrefix = Buffer.from('
M') - const htmlSuffix = Buffer.from('ller') - const input = Buffer.concat([htmlPrefix, Buffer.from([0xfc]), htmlSuffix]) - - const decoded = fileUtils.decodeTextBuffer(input, { detectEncoding: true, isHtml: true }) - expect(decoded).to.include('Müller') - }) - - it('falls back to windows-1252 for html without charset when utf-8 decoding is invalid', () => { - const htmlPrefix = Buffer.from('Gr') - const htmlSuffix = Buffer.from('n') - const input = Buffer.concat([htmlPrefix, Buffer.from([0xfc]), htmlSuffix]) - - const decoded = fileUtils.decodeTextBuffer(input, { detectEncoding: true, isHtml: true }) - expect(decoded).to.include('Grün') - }) - }) - it('shouldIgnoreFile', () => { global.isWin = process.platform === 'win32'