From 6e05484307a2b8bff5b6ef32f7fa3e5ad1144475 Mon Sep 17 00:00:00 2001
From: Harry Rose
Date: Mon, 16 Mar 2026 20:59:00 +0000
Subject: [PATCH] Handle podcasts which use html lists and also have html tags
in the chapter titles
---
.../parsers/parsePodcastDescriptionForChapters.js | 10 +++++++---
.../parsePodcastDescriptionForChapters.test.js | 14 +++++++++++++-
2 files changed, 20 insertions(+), 4 deletions(-)
diff --git a/server/utils/parsers/parsePodcastDescriptionForChapters.js b/server/utils/parsers/parsePodcastDescriptionForChapters.js
index 3f1b4c75..8a83f149 100644
--- a/server/utils/parsers/parsePodcastDescriptionForChapters.js
+++ b/server/utils/parsers/parsePodcastDescriptionForChapters.js
@@ -1,3 +1,4 @@
+const sanitizeHtml = require('../../libs/sanitizeHtml')
const Logger = require('../../Logger')
/**
@@ -27,13 +28,16 @@ module.exports.parse = (podcastDescription, audioDurationSecs) => {
const timestampRegex = /\b(\d{1,2}):(\d{1,2})(?::(\d{1,2}))?\b/
const chapterTitleRegex = /\b\d{1,2}:\d{1,2}(?::\d{1,2})?\b(?:\s+|\))(.+)$/
- const descriptionLineSplitRegex = /\<\s*\/\s*p\s*\>|\<\s*br\s*\s*\/\>|\n/
+
+ // Split on "
", "
", "\n",
+ const descriptionLineSplitRegex = /\<\s*\/\s*p\s*\>|\<\s*br\s*\/\>|\n|\<\s*\/\s*li\s*\>/
var descriptionLines = podcastDescription.split(descriptionLineSplitRegex)
var newChapters = []
for (let i = 0; i < descriptionLines.length; i++) {
- let line = descriptionLines[i]
+ // Strip all HTML tags out
+ let line = sanitizeHtml(descriptionLines[i], { allowedTags: [] })
let match = timestampRegex.exec(line)
if (match == null) continue
@@ -85,7 +89,7 @@ module.exports.parse = (podcastDescription, audioDurationSecs) => {
newChapters[newChapters.length - 1].end = audioDurationSecs
}
- Logger.info(`[PodcastEpisode] Successfully generated ${newChapters.length} chapters`)
+ Logger.info(`Successfully generated ${newChapters.length} chapters`)
if (newChapters.length == 1) {
throw new Error('Only one chapter found, treating as invalid description')
diff --git a/test/server/utils/parsers/parsePodcastDescriptionForChapters.test.js b/test/server/utils/parsers/parsePodcastDescriptionForChapters.test.js
index 0fff4574..662c131d 100644
--- a/test/server/utils/parsers/parsePodcastDescriptionForChapters.test.js
+++ b/test/server/utils/parsers/parsePodcastDescriptionForChapters.test.js
@@ -1,5 +1,4 @@
const chai = require('chai')
-const PodcastEpisode = require('../../../../server/models/PodcastEpisode')
const expect = chai.expect
const parsePodcastDescriptionForChapters = require('../../../../server/utils/parsers/parsePodcastDescriptionForChapters')
@@ -69,6 +68,19 @@ describe('parsePodcastDescriptionForChapters', () => {
{ title: 'Chapter 7', id: 7, start: 2492, end: 2803 },
{ title: 'Chapter 8', id: 8, start: 2803, end: 3060 }
]
+ },
+ {
+ testName: 'Should handle html lists and chapters with html tags in the title',
+ description: 'Introduction
Chapters- 00:00:00 Intro
- 00:03:55 Chapter 1
- 00:09:52 Chapter 2
- 00:16:11 Chapter 3
- 00:20:03 Chapter 4
- 00:24:08 Chapter 5
',
+ audioDuration: 4000,
+ expectedChapters: [
+ { title: 'Intro', id: 1, start: 0, end: 235 },
+ { title: 'Chapter 1', id: 2, start: 235, end: 592 },
+ { title: 'Chapter 2', id: 3, start: 592, end: 971 },
+ { title: 'Chapter 3', id: 4, start: 971, end: 1203 },
+ { title: 'Chapter 4', id: 5, start: 1203, end: 1448 },
+ { title: 'Chapter 5', id: 6, start: 1448, end: 4000 }
+ ]
}
]
testCasesTestingSuccess.forEach(function (testCase) {