From 9d4a2a8a598c500c0a221d82e0e5e983a3f3914b Mon Sep 17 00:00:00 2001 From: Harry Rose Date: Wed, 11 Mar 2026 17:22:23 +0000 Subject: [PATCH] Improve chapter generation code and extract it into its own function --- server/models/PodcastEpisode.js | 150 +++++++++++++++++--------------- 1 file changed, 81 insertions(+), 69 deletions(-) diff --git a/server/models/PodcastEpisode.js b/server/models/PodcastEpisode.js index 555c348ca..4b334d6ef 100644 --- a/server/models/PodcastEpisode.js +++ b/server/models/PodcastEpisode.js @@ -87,76 +87,12 @@ class PodcastEpisode extends Model { } else if (rssPodcastEpisode.chapters?.length) { podcastEpisode.chapters = rssPodcastEpisode.chapters.map((ch) => ({ ...ch })) } else { - const timeMarkerRegex = /\b(\d{1,2}):(\d{1,2})(?::(\d{1,2}))?\b/ - const chapterTitleRegex = /\b\d{1,2}:\d{1,2}(?::\d{1,2})?\b(.+)$/ - Logger.debug("Podcast episode doesn't have chapters, attempting to generate them from timestamps", rssPodcastEpisode.title) - - var errorMessage = null - var descriptionLines = podcastEpisode.description.split('

') - var chaptersToPush = [] - - for (let i = 0; i < descriptionLines.length; i++) { - let line = descriptionLines[i] - Logger.debug('Description Line:', line) - - let match = timeMarkerRegex.exec(line) - if (match == null) continue - - Logger.debug('Matches:', match) - - let first = match[1] - let second = match[2] - let third = match[3] - - let hours = 0 - let minutes = 0 - let seconds = 0 - - // If there's three components then we can assume its hh:mm:ss - if (first && second && third) { - hours = Number(first) - minutes = Number(second) - seconds = Number(third) - } else if (first && second) // otherwise assume mm:ss - { - minutes = Number(first) - seconds = Number(second) - } else { - // Unknown timestamp state - errorMessage = `Unknown timestamp format in description, line ${line}` - break - } - - let startTime = seconds + minutes * 60 + hours * 60 * 60 - let chapterTitleMatch = chapterTitleRegex.exec(line) - Logger.debug('Chapter Title Matches:', chapterTitleMatch) - - if (chapterTitleMatch == null && chapterTitleMatch.length >= 2) { - // Unknown chapter state - errorMessage = `Unable to get chapter title from description, line ${line}` - break - } - - let chapter = { title: chapterTitleMatch[1].trim(), id: i, start: startTime } - - if (chaptersToPush.length > 0) { - chaptersToPush[chaptersToPush.length - 1].end = startTime - } - - chaptersToPush.push(chapter) - - Logger.debug('Added chapter', chapter) - } - if (errorMessage == null) { - if (chaptersToPush.length > 0) { - chaptersToPush[chaptersToPush.length - 1].end = podcastEpisode.audioFile.duration - } - - podcastEpisode.chapters.push(...chaptersToPush) - Logger.debug(`Successfully gnerated ${podcastEpisode.chapters.length} chapters`) - } else { - Logger.error(`Unable generate chapters from podcast description, error '${errorMessage}`) + try { + let autoGeneratedChapters = PodcastEpisode.autoGenerateChaptersFromTimestamps(podcastEpisode.description, podcastEpisode.audioFile.duration) + podcastEpisode.chapters = autoGeneratedChapters + } catch (error) { + Logger.error(`[PodcastEpisode] createFromRssPodcastEpisode: Failed to auto generate chapters for "${podcastEpisode.title}"`, error) } } @@ -309,6 +245,82 @@ class PodcastEpisode extends Model { return json } + + /** + * + * @param {string} podcastDescription + * @param {number} audioDurationSecs + * @returns {ChapterObject[]} + */ + static autoGenerateChaptersFromTimestamps(podcastDescription, audioDurationSecs) { + if (podcastDescription == null) { + throw new Error('Description must not be null') + } + + if (audioDurationSecs == null) { + throw new Error('Audio duration must not be null') + } + + const timestampRegex = /\b(\d{1,2}):(\d{1,2})(?::(\d{1,2}))?\b/ + const chapterTitleRegex = /\b\d{1,2}:\d{1,2}(?::\d{1,2})?\b(?:\s+|\))(.+)$/ + const descriptionLineSplitRegex = /\<\s*\/\s*p\s*\>|\<\s*br\s*\s*\/\>|\n/ + + var descriptionLines = podcastDescription.split(descriptionLineSplitRegex) + var newChapters = [] + + for (let i = 0; i < descriptionLines.length; i++) { + let line = descriptionLines[i] + + let match = timestampRegex.exec(line) + if (match == null) continue + + let first = match[1] + let second = match[2] + let third = match[3] + + let hours = 0 + let minutes = 0 + let seconds = 0 + + // If there's three components then we can assume its hh:mm:ss + if (first && second && third) { + hours = Number(first) + minutes = Number(second) + seconds = Number(third) + } else if (first && second) // otherwise assume mm:ss + { + minutes = Number(first) + seconds = Number(second) + } + + let startTime = seconds + minutes * 60 + hours * 60 * 60 + let chapterTitleMatch = chapterTitleRegex.exec(line) + + if (chapterTitleMatch == null || chapterTitleMatch.length < 2) { + // Unknown chapter state + throw new Error(`Unable to get chapter title from description, line ${line}`) + } + + let chapter = { title: chapterTitleMatch[1].trim(), id: newChapters.length + 1, start: startTime } + + if (newChapters.length > 0) { + newChapters[newChapters.length - 1].end = startTime + } + + newChapters.push(chapter) + } + if (newChapters.length > 0) { + newChapters[newChapters.length - 1].end = audioDurationSecs + } + + Logger.debug(`Successfully gnerated ${newChapters.length} chapters`) + + if (newChapters.length == 1) { + throw new Error('Only one chapter found, treating as invalid description') + } + + return newChapters + } } module.exports = PodcastEpisode