mirror of
https://github.com/advplyr/audiobookshelf.git
synced 2026-03-01 05:29:41 +00:00
Revert "try to replace html sniffing with chardet to fix ncc.html files with set encoding but strings that ignore that"
This reverts commit3a1be51a83. Revert "try to properly interpret ncc.html encoding (seems to be a bit weird / incorrect sometimes)" This reverts commitfac4415595.
This commit is contained in:
parent
b05acce22b
commit
52a0b61b97
5 changed files with 3 additions and 120 deletions
51
package-lock.json
generated
51
package-lock.json
generated
|
|
@ -10,7 +10,6 @@
|
|||
"license": "GPL-3.0",
|
||||
"dependencies": {
|
||||
"axios": "^0.27.2",
|
||||
"chardet": "^2.1.1",
|
||||
"cookie-parser": "^1.4.6",
|
||||
"express": "^4.17.1",
|
||||
"express-rate-limit": "^7.5.1",
|
||||
|
|
@ -29,7 +28,6 @@
|
|||
"socket.io": "^4.5.4",
|
||||
"sqlite3": "^5.1.7",
|
||||
"ssrf-req-filter": "^1.1.0",
|
||||
"whatwg-encoding": "^3.1.1",
|
||||
"xml2js": "^0.5.0"
|
||||
},
|
||||
"bin": {
|
||||
|
|
@ -124,7 +122,6 @@
|
|||
"resolved": "https://registry.npmjs.org/@babel/core/-/core-7.23.3.tgz",
|
||||
"integrity": "sha512-Jg+msLuNuCJDyBvFv5+OKOUjWMZgd85bKjbICd3zWrKAo+bJ49HJufi7CQE0q0uR8NGyO6xkCACScNqyjHSZew==",
|
||||
"dev": true,
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"@ampproject/remapping": "^2.2.0",
|
||||
"@babel/code-frame": "^7.22.13",
|
||||
|
|
@ -1052,7 +1049,6 @@
|
|||
"url": "https://github.com/sponsors/ai"
|
||||
}
|
||||
],
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"caniuse-lite": "^1.0.30001541",
|
||||
"electron-to-chromium": "^1.4.535",
|
||||
|
|
@ -1255,12 +1251,6 @@
|
|||
"node": ">=8"
|
||||
}
|
||||
},
|
||||
"node_modules/chardet": {
|
||||
"version": "2.1.1",
|
||||
"resolved": "https://registry.npmjs.org/chardet/-/chardet-2.1.1.tgz",
|
||||
"integrity": "sha512-PsezH1rqdV9VvyNhxxOW32/d75r01NY7TQCmOqomRo15ZSOKbpTFVsfjghxo6JloQUCGnH4k1LGu0R4yCLlWQQ==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/check-error": {
|
||||
"version": "1.0.3",
|
||||
"resolved": "https://registry.npmjs.org/check-error/-/check-error-1.0.3.tgz",
|
||||
|
|
@ -1867,7 +1857,6 @@
|
|||
"version": "4.18.2",
|
||||
"resolved": "https://registry.npmjs.org/express/-/express-4.18.2.tgz",
|
||||
"integrity": "sha512-5/PsL6iGPdfQ/lKM1UuielYgv3BUoJfz1aUwU9vHZ+J7gyvwdQXFEBIEIaxeGf0GIcreATNyBExtalisDbuMqQ==",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"accepts": "~1.3.8",
|
||||
"array-flatten": "1.1.1",
|
||||
|
|
@ -2124,21 +2113,6 @@
|
|||
"integrity": "sha512-OO0pH2lK6a0hZnAdau5ItzHPI6pUlvI7jMVnxUQRtw4owF2wk8lOSabtGDCTP4Ggrg2MbGnWO9X8K1t4+fGMDw==",
|
||||
"devOptional": true
|
||||
},
|
||||
"node_modules/fsevents": {
|
||||
"version": "2.3.3",
|
||||
"resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.3.tgz",
|
||||
"integrity": "sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==",
|
||||
"dev": true,
|
||||
"hasInstallScript": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"darwin"
|
||||
],
|
||||
"engines": {
|
||||
"node": "^8.16.0 || ^10.6.0 || >=11.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/function-bind": {
|
||||
"version": "1.1.2",
|
||||
"resolved": "https://registry.npmjs.org/function-bind/-/function-bind-1.1.2.tgz",
|
||||
|
|
@ -5384,31 +5358,6 @@
|
|||
"node": ">= 0.8"
|
||||
}
|
||||
},
|
||||
"node_modules/whatwg-encoding": {
|
||||
"version": "3.1.1",
|
||||
"resolved": "https://registry.npmjs.org/whatwg-encoding/-/whatwg-encoding-3.1.1.tgz",
|
||||
"integrity": "sha512-6qN4hJdMwfYBtE3YBTTHhoeuUrDBPZmbQaxWAqSALV/MeEnR5z1xd8UKud2RAkFoPkmB+hli1TZSnyi84xz1vQ==",
|
||||
"deprecated": "Use @exodus/bytes instead for a more spec-conformant and faster implementation",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"iconv-lite": "0.6.3"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/whatwg-encoding/node_modules/iconv-lite": {
|
||||
"version": "0.6.3",
|
||||
"resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.6.3.tgz",
|
||||
"integrity": "sha512-4fCk79wshMdzMp2rH06qWrJE4iolqLhCUH+OiuIgU++RB0+94NlDL81atO7GX55uUKueo0txHNtvEyI6D7WdMw==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"safer-buffer": ">= 2.1.2 < 3.0.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=0.10.0"
|
||||
}
|
||||
},
|
||||
"node_modules/which": {
|
||||
"version": "2.0.2",
|
||||
"resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz",
|
||||
|
|
|
|||
|
|
@ -39,7 +39,6 @@
|
|||
"license": "GPL-3.0",
|
||||
"dependencies": {
|
||||
"axios": "^0.27.2",
|
||||
"chardet": "^2.1.1",
|
||||
"cookie-parser": "^1.4.6",
|
||||
"express": "^4.17.1",
|
||||
"express-rate-limit": "^7.5.1",
|
||||
|
|
@ -58,7 +57,6 @@
|
|||
"socket.io": "^4.5.4",
|
||||
"sqlite3": "^5.1.7",
|
||||
"ssrf-req-filter": "^1.1.0",
|
||||
"whatwg-encoding": "^3.1.1",
|
||||
"xml2js": "^0.5.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
|
|
|
|||
|
|
@ -12,7 +12,7 @@ class DaisyFileScanner {
|
|||
* @param {Object} bookMetadata
|
||||
*/
|
||||
async scanBookDaisyFile(daisyLibraryFileObj, bookMetadata, audioFiles = []) {
|
||||
const htmlText = await readTextFile(daisyLibraryFileObj.metadata.path, { detectEncoding: true, isHtml: true })
|
||||
const htmlText = await readTextFile(daisyLibraryFileObj.metadata.path)
|
||||
const daisyMetadata = htmlText ? parseDaisyMetadata(htmlText) : null
|
||||
if (daisyMetadata) {
|
||||
for (const key in daisyMetadata) {
|
||||
|
|
|
|||
|
|
@ -6,8 +6,6 @@ const fs = require('../libs/fsExtra')
|
|||
const rra = require('../libs/recursiveReaddirAsync')
|
||||
const Logger = require('../Logger')
|
||||
const { AudioMimeType } = require('./constants')
|
||||
const chardet = require('chardet')
|
||||
const whatwgEncoding = require('whatwg-encoding')
|
||||
|
||||
/**
|
||||
* Make sure folder separator is POSIX for Windows file paths. e.g. "C:\Users\Abs" becomes "C:/Users/Abs"
|
||||
|
|
@ -117,57 +115,15 @@ function getIno(path) {
|
|||
}
|
||||
module.exports.getIno = getIno
|
||||
|
||||
/**
|
||||
* @typedef ReadTextFileOptions
|
||||
*/
|
||||
|
||||
function detectTextEncoding(buffer) {
|
||||
try {
|
||||
const detectedEncoding = chardet.detect(buffer)
|
||||
const labeledEncoding = detectedEncoding ? whatwgEncoding.labelToName(detectedEncoding) : null
|
||||
if (labeledEncoding) {
|
||||
return labeledEncoding
|
||||
}
|
||||
} catch {}
|
||||
|
||||
return 'UTF-8'
|
||||
}
|
||||
|
||||
/**
|
||||
* Decode raw text bytes with optional encoding detection.
|
||||
*
|
||||
* @param {Buffer} buffer
|
||||
* @param {ReadTextFileOptions} [options]
|
||||
* @returns {string}
|
||||
*/
|
||||
function decodeTextBuffer(buffer, options = {}) {
|
||||
if (!buffer) return ''
|
||||
const { detectEncoding = false, isHtml = false } = options
|
||||
|
||||
if (!detectEncoding) {
|
||||
return String(buffer)
|
||||
}
|
||||
|
||||
const fallbackEncoding = detectTextEncoding(buffer)
|
||||
try {
|
||||
// WHATWG decode handles BOM override and legacy encoding tables.
|
||||
return whatwgEncoding.decode(buffer, fallbackEncoding)
|
||||
} catch {
|
||||
return String(buffer)
|
||||
}
|
||||
}
|
||||
module.exports.decodeTextBuffer = decodeTextBuffer
|
||||
|
||||
/**
|
||||
* Read contents of file
|
||||
* @param {string} path
|
||||
* @param {ReadTextFileOptions} [options]
|
||||
* @returns {string}
|
||||
*/
|
||||
async function readTextFile(path, options = {}) {
|
||||
async function readTextFile(path) {
|
||||
try {
|
||||
var data = await fs.readFile(path)
|
||||
return decodeTextBuffer(data, options)
|
||||
return String(data)
|
||||
} catch (error) {
|
||||
Logger.error(`[FileUtils] ReadTextFile error ${error}`)
|
||||
return ''
|
||||
|
|
|
|||
|
|
@ -6,26 +6,6 @@ const fs = require('fs')
|
|||
const Logger = require('../../../server/Logger')
|
||||
|
||||
describe('fileUtils', () => {
|
||||
describe('decodeTextBuffer', () => {
|
||||
it('decodes html using charset declaration (windows-1252)', () => {
|
||||
const htmlPrefix = Buffer.from('<html><head><meta charset="windows-1252"></head><body>M')
|
||||
const htmlSuffix = Buffer.from('ller</body></html>')
|
||||
const input = Buffer.concat([htmlPrefix, Buffer.from([0xfc]), htmlSuffix])
|
||||
|
||||
const decoded = fileUtils.decodeTextBuffer(input, { detectEncoding: true, isHtml: true })
|
||||
expect(decoded).to.include('Müller')
|
||||
})
|
||||
|
||||
it('falls back to windows-1252 for html without charset when utf-8 decoding is invalid', () => {
|
||||
const htmlPrefix = Buffer.from('<html><body>Gr')
|
||||
const htmlSuffix = Buffer.from('n</body></html>')
|
||||
const input = Buffer.concat([htmlPrefix, Buffer.from([0xfc]), htmlSuffix])
|
||||
|
||||
const decoded = fileUtils.decodeTextBuffer(input, { detectEncoding: true, isHtml: true })
|
||||
expect(decoded).to.include('Grün')
|
||||
})
|
||||
})
|
||||
|
||||
it('shouldIgnoreFile', () => {
|
||||
global.isWin = process.platform === 'win32'
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue