try to properly interpret ncc.html encoding (seems to be a bit weird / incorrect sometimes)

2026-03-01 13:39:41 +00:00 · 2026-02-07 18:00:29 +01:00 · 2026-02-07 18:00:29 +01:00 · 687e62e1fa
commit 687e62e1fa
parent 6c9bf8c2bd
5 changed files with 132 additions and 5 deletions
--- a/test/server/utils/fileUtils.test.js
+++ b/test/server/utils/fileUtils.test.js
@ -6,6 +6,26 @@ const fs = require('fs')
 const Logger = require('../../../server/Logger')

 describe('fileUtils', () => {
+  describe('decodeTextBuffer', () => {
+    it('decodes html using charset declaration (windows-1252)', () => {
+      const htmlPrefix = Buffer.from('<html><head><meta charset="windows-1252"></head><body>M')
+      const htmlSuffix = Buffer.from('ller</body></html>')
+      const input = Buffer.concat([htmlPrefix, Buffer.from([0xfc]), htmlSuffix])
+
+      const decoded = fileUtils.decodeTextBuffer(input, { detectEncoding: true, isHtml: true })
+      expect(decoded).to.include('Müller')
+    })
+
+    it('falls back to windows-1252 for html without charset when utf-8 decoding is invalid', () => {
+      const htmlPrefix = Buffer.from('<html><body>Gr')
+      const htmlSuffix = Buffer.from('n</body></html>')
+      const input = Buffer.concat([htmlPrefix, Buffer.from([0xfc]), htmlSuffix])
+
+      const decoded = fileUtils.decodeTextBuffer(input, { detectEncoding: true, isHtml: true })
+      expect(decoded).to.include('Grün')
+    })
+  })
+
  it('shouldIgnoreFile', () => {
    global.isWin = process.platform === 'win32'