From e8d65ceb88725e6a35ced5dfae7bffcc4967b3f5 Mon Sep 17 00:00:00 2001 From: Harry Rose Date: Tue, 10 Mar 2026 20:13:12 +0000 Subject: [PATCH 01/14] Commit first implementation of timestamp to chapter generation --- server/models/PodcastEpisode.js | 50 +++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/server/models/PodcastEpisode.js b/server/models/PodcastEpisode.js index 996f55f5..defd4748 100644 --- a/server/models/PodcastEpisode.js +++ b/server/models/PodcastEpisode.js @@ -1,5 +1,6 @@ const { DataTypes, Model } = require('sequelize') const libraryItemsPodcastFilters = require('../utils/queries/libraryItemsPodcastFilters') +const Logger = require('../Logger') /** * @typedef ChapterObject * @property {number} id @@ -85,6 +86,55 @@ class PodcastEpisode extends Model { podcastEpisode.chapters = audioFile.chapters.map((ch) => ({ ...ch })) } else if (rssPodcastEpisode.chapters?.length) { podcastEpisode.chapters = rssPodcastEpisode.chapters.map((ch) => ({ ...ch })) + } else { + const timeMarkerRegex = /\b(\d{1,2}):(\d{1,2})(?::(\d{1,2}))?\b/m + + Logger.debug("Podcast didn't have chapters", rssPodcastEpisode.title) + + var descriptionLines = podcastEpisode.description.split('

') + for (let i = 0; i < descriptionLines.length; i++) { + let line = descriptionLines[i] + Logger.debug('Description Line:', line) + + let match = timeMarkerRegex.exec(line) + if (match == null) continue + + Logger.debug('matches:', match) + + let first = match[1] + let second = match[2] + let third = match[3] + + let hours = 0 + let minutes = 0 + let seconds = 0 + + // If there's three components then we can assume its hh:mm:ss + if (first && second && third) { + hours = Number(first) + minutes = Number(second) + seconds = Number(third) + } else if (first && second) // otherwise assume mm:ss + { + minutes = Number(first) + seconds = Number(second) + } + + let startTime = seconds + minutes * 60 + hours * 60 * 60 + let chapter = { title: `Chapter ${i}`, id: i, start: startTime } + + if (podcastEpisode.chapters.length > 0) { + podcastEpisode.chapters[podcastEpisode.chapters.length - 1].end = startTime + } + + podcastEpisode.chapters.push(chapter) + + Logger.debug('Added chapter', chapter) + } + if (podcastEpisode.chapters.length > 0) { + podcastEpisode.chapters[podcastEpisode.chapters.length - 1].end = podcastEpisode.audioFile.duration + } + Logger.debug('Chapters', podcastEpisode.chapters) } return this.create(podcastEpisode) From b4b126e39f084d69a0bc585c2004bae7b3a3020b Mon Sep 17 00:00:00 2001 From: Harry Rose Date: Tue, 10 Mar 2026 20:43:57 +0000 Subject: [PATCH 02/14] Add chapter title scraping and improve error logging --- .devcontainer/devcontainer.json | 66 +++++++++++++++------------------ server/models/PodcastEpisode.js | 40 ++++++++++++++++---- 2 files changed, 61 insertions(+), 45 deletions(-) diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 0213d517..4f4f24a7 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -1,40 +1,32 @@ // For format details, see https://aka.ms/devcontainer.json. For config options, see the // README at: https://github.com/devcontainers/templates/tree/main/src/javascript-node { - "name": "Audiobookshelf", - "build": { - "dockerfile": "Dockerfile", - // Update 'VARIANT' to pick a Node version: 18, 16, 14. - // Append -bullseye or -buster to pin to an OS version. - // Use -bullseye variants on local arm64/Apple Silicon. - "args": { - "VARIANT": "20" - } - }, - "mounts": [ - "source=abs-server-node_modules,target=${containerWorkspaceFolder}/node_modules,type=volume", - "source=abs-client-node_modules,target=${containerWorkspaceFolder}/client/node_modules,type=volume" - ], - // Features to add to the dev container. More info: https://containers.dev/features. - // "features": {}, - // Use 'forwardPorts' to make a list of ports inside the container available locally. - "forwardPorts": [ - 3000, - 3333 - ], - // Use 'postCreateCommand' to run commands after the container is created. - "postCreateCommand": "sh .devcontainer/post-create.sh", - // Configure tool-specific properties. - "customizations": { - // Configure properties specific to VS Code. - "vscode": { - // Add the IDs of extensions you want installed when the container is created. - "extensions": [ - "dbaeumer.vscode-eslint", - "octref.vetur" - ] - } - } - // Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root. - // "remoteUser": "root" -} \ No newline at end of file + "name": "Audiobookshelf", + "build": { + "dockerfile": "Dockerfile", + // Update 'VARIANT' to pick a Node version: 18, 16, 14. + // Append -bullseye or -buster to pin to an OS version. + // Use -bullseye variants on local arm64/Apple Silicon. + "args": { + "VARIANT": "20" + } + }, + "mounts": ["source=abs-server-node_modules,target=${containerWorkspaceFolder}/node_modules,type=volume", "source=abs-client-node_modules,target=${containerWorkspaceFolder}/client/node_modules,type=volume", "source=/home/harry/Music/ABS-Dev,target=/podcasts,type=bind,consistency=cached"], + // Features to add to the dev container. More info: https://containers.dev/features. + // "features": {}, + // Use 'forwardPorts' to make a list of ports inside the container available locally. + "forwardPorts": [3000, 3333], + // Use 'postCreateCommand' to run commands after the container is created. + "postCreateCommand": "sh .devcontainer/post-create.sh", + // Configure tool-specific properties. + "customizations": { + // Configure properties specific to VS Code. + "vscode": { + // Add the IDs of extensions you want installed when the container is created. + "extensions": ["dbaeumer.vscode-eslint", "octref.vetur"] + } + }, + "runArgs": ["-p=3333:3333"] + // Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root. + // "remoteUser": "root" +} diff --git a/server/models/PodcastEpisode.js b/server/models/PodcastEpisode.js index defd4748..f87cc76b 100644 --- a/server/models/PodcastEpisode.js +++ b/server/models/PodcastEpisode.js @@ -1,6 +1,7 @@ const { DataTypes, Model } = require('sequelize') const libraryItemsPodcastFilters = require('../utils/queries/libraryItemsPodcastFilters') const Logger = require('../Logger') +const { logger } = require('sequelize/lib/utils/logger') /** * @typedef ChapterObject * @property {number} id @@ -87,11 +88,15 @@ class PodcastEpisode extends Model { } else if (rssPodcastEpisode.chapters?.length) { podcastEpisode.chapters = rssPodcastEpisode.chapters.map((ch) => ({ ...ch })) } else { - const timeMarkerRegex = /\b(\d{1,2}):(\d{1,2})(?::(\d{1,2}))?\b/m + const timeMarkerRegex = /\b(\d{1,2}):(\d{1,2})(?::(\d{1,2}))?\b/ + const chapterTitleRegex = /\b\d{1,2}:\d{1,2}(?::\d{1,2})?\b(.+)$/ - Logger.debug("Podcast didn't have chapters", rssPodcastEpisode.title) + Logger.debug("Podcast episode doesn't have chapters, attempting to generate them from timestamps", rssPodcastEpisode.title) + var errorMessage = null var descriptionLines = podcastEpisode.description.split('

') + var chaptersToPush = [] + for (let i = 0; i < descriptionLines.length; i++) { let line = descriptionLines[i] Logger.debug('Description Line:', line) @@ -99,7 +104,7 @@ class PodcastEpisode extends Model { let match = timeMarkerRegex.exec(line) if (match == null) continue - Logger.debug('matches:', match) + Logger.debug('Matches:', match) let first = match[1] let second = match[2] @@ -118,23 +123,42 @@ class PodcastEpisode extends Model { { minutes = Number(first) seconds = Number(second) + } else { + // Unknown timestamp state + errorMessage = `Unknown timestamp format in description, line ${line}` + break } let startTime = seconds + minutes * 60 + hours * 60 * 60 - let chapter = { title: `Chapter ${i}`, id: i, start: startTime } + let chapterTitleMatch = chapterTitleRegex.exec(line) + Logger.debug('Chapter Title Matches:', chapterTitleMatch) + + if (chapterTitleMatch == null && chapterTitleMatch.length >= 2) { + // Unknown chapter state + errorMessage = `Unable to get chapter title from description, line ${line}` + break + } + + let chapter = { title: chapterTitleMatch[1].trim(), id: i, start: startTime } if (podcastEpisode.chapters.length > 0) { podcastEpisode.chapters[podcastEpisode.chapters.length - 1].end = startTime } - podcastEpisode.chapters.push(chapter) + chaptersToPush.push(chapter) Logger.debug('Added chapter', chapter) } - if (podcastEpisode.chapters.length > 0) { - podcastEpisode.chapters[podcastEpisode.chapters.length - 1].end = podcastEpisode.audioFile.duration + if (errorMessage == null) { + if (podcastEpisode.chapters.length > 0) { + podcastEpisode.chapters[podcastEpisode.chapters.length - 1].end = podcastEpisode.audioFile.duration + } + + podcastEpisode.chapters.push(...chaptersToPush) + Logger.debug(`Successfully gnerated ${podcastEpisode.chapters.length} chapters`) + } else { + logger.error(`Unable generate chapters from podcast description, error '${errorMessage}`) } - Logger.debug('Chapters', podcastEpisode.chapters) } return this.create(podcastEpisode) From e096a046039e868571b94d0015a1427599e461ec Mon Sep 17 00:00:00 2001 From: Harry Rose Date: Tue, 10 Mar 2026 20:45:15 +0000 Subject: [PATCH 03/14] Revert .devcontainer/devcontainer.json --- .devcontainer/devcontainer.json | 66 ++++++++++++++++++--------------- 1 file changed, 37 insertions(+), 29 deletions(-) diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 4f4f24a7..0213d517 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -1,32 +1,40 @@ // For format details, see https://aka.ms/devcontainer.json. For config options, see the // README at: https://github.com/devcontainers/templates/tree/main/src/javascript-node { - "name": "Audiobookshelf", - "build": { - "dockerfile": "Dockerfile", - // Update 'VARIANT' to pick a Node version: 18, 16, 14. - // Append -bullseye or -buster to pin to an OS version. - // Use -bullseye variants on local arm64/Apple Silicon. - "args": { - "VARIANT": "20" - } - }, - "mounts": ["source=abs-server-node_modules,target=${containerWorkspaceFolder}/node_modules,type=volume", "source=abs-client-node_modules,target=${containerWorkspaceFolder}/client/node_modules,type=volume", "source=/home/harry/Music/ABS-Dev,target=/podcasts,type=bind,consistency=cached"], - // Features to add to the dev container. More info: https://containers.dev/features. - // "features": {}, - // Use 'forwardPorts' to make a list of ports inside the container available locally. - "forwardPorts": [3000, 3333], - // Use 'postCreateCommand' to run commands after the container is created. - "postCreateCommand": "sh .devcontainer/post-create.sh", - // Configure tool-specific properties. - "customizations": { - // Configure properties specific to VS Code. - "vscode": { - // Add the IDs of extensions you want installed when the container is created. - "extensions": ["dbaeumer.vscode-eslint", "octref.vetur"] - } - }, - "runArgs": ["-p=3333:3333"] - // Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root. - // "remoteUser": "root" -} + "name": "Audiobookshelf", + "build": { + "dockerfile": "Dockerfile", + // Update 'VARIANT' to pick a Node version: 18, 16, 14. + // Append -bullseye or -buster to pin to an OS version. + // Use -bullseye variants on local arm64/Apple Silicon. + "args": { + "VARIANT": "20" + } + }, + "mounts": [ + "source=abs-server-node_modules,target=${containerWorkspaceFolder}/node_modules,type=volume", + "source=abs-client-node_modules,target=${containerWorkspaceFolder}/client/node_modules,type=volume" + ], + // Features to add to the dev container. More info: https://containers.dev/features. + // "features": {}, + // Use 'forwardPorts' to make a list of ports inside the container available locally. + "forwardPorts": [ + 3000, + 3333 + ], + // Use 'postCreateCommand' to run commands after the container is created. + "postCreateCommand": "sh .devcontainer/post-create.sh", + // Configure tool-specific properties. + "customizations": { + // Configure properties specific to VS Code. + "vscode": { + // Add the IDs of extensions you want installed when the container is created. + "extensions": [ + "dbaeumer.vscode-eslint", + "octref.vetur" + ] + } + } + // Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root. + // "remoteUser": "root" +} \ No newline at end of file From 256c341f06597c7c8ea4bc33de72cf31b8fd13cf Mon Sep 17 00:00:00 2001 From: Harry Rose Date: Tue, 10 Mar 2026 20:59:25 +0000 Subject: [PATCH 04/14] Update updating of end values to use new chaptersToPush temp array --- server/models/PodcastEpisode.js | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/server/models/PodcastEpisode.js b/server/models/PodcastEpisode.js index f87cc76b..e70dc809 100644 --- a/server/models/PodcastEpisode.js +++ b/server/models/PodcastEpisode.js @@ -141,8 +141,8 @@ class PodcastEpisode extends Model { let chapter = { title: chapterTitleMatch[1].trim(), id: i, start: startTime } - if (podcastEpisode.chapters.length > 0) { - podcastEpisode.chapters[podcastEpisode.chapters.length - 1].end = startTime + if (chaptersToPush.length > 0) { + chaptersToPush[chaptersToPush.length - 1].end = startTime } chaptersToPush.push(chapter) @@ -150,8 +150,8 @@ class PodcastEpisode extends Model { Logger.debug('Added chapter', chapter) } if (errorMessage == null) { - if (podcastEpisode.chapters.length > 0) { - podcastEpisode.chapters[podcastEpisode.chapters.length - 1].end = podcastEpisode.audioFile.duration + if (chaptersToPush.length > 0) { + chaptersToPush[chaptersToPush.length - 1].end = podcastEpisode.audioFile.duration } podcastEpisode.chapters.push(...chaptersToPush) From bb7fcc1420c3ba77723b23574ea3867d660bfae6 Mon Sep 17 00:00:00 2001 From: Harry Rose Date: Tue, 10 Mar 2026 21:07:26 +0000 Subject: [PATCH 05/14] Only use projects logger --- server/models/PodcastEpisode.js | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/server/models/PodcastEpisode.js b/server/models/PodcastEpisode.js index e70dc809..555c348c 100644 --- a/server/models/PodcastEpisode.js +++ b/server/models/PodcastEpisode.js @@ -1,7 +1,6 @@ const { DataTypes, Model } = require('sequelize') const libraryItemsPodcastFilters = require('../utils/queries/libraryItemsPodcastFilters') const Logger = require('../Logger') -const { logger } = require('sequelize/lib/utils/logger') /** * @typedef ChapterObject * @property {number} id @@ -157,7 +156,7 @@ class PodcastEpisode extends Model { podcastEpisode.chapters.push(...chaptersToPush) Logger.debug(`Successfully gnerated ${podcastEpisode.chapters.length} chapters`) } else { - logger.error(`Unable generate chapters from podcast description, error '${errorMessage}`) + Logger.error(`Unable generate chapters from podcast description, error '${errorMessage}`) } } From 9d4a2a8a598c500c0a221d82e0e5e983a3f3914b Mon Sep 17 00:00:00 2001 From: Harry Rose Date: Wed, 11 Mar 2026 17:22:23 +0000 Subject: [PATCH 06/14] Improve chapter generation code and extract it into its own function --- server/models/PodcastEpisode.js | 150 +++++++++++++++++--------------- 1 file changed, 81 insertions(+), 69 deletions(-) diff --git a/server/models/PodcastEpisode.js b/server/models/PodcastEpisode.js index 555c348c..4b334d6e 100644 --- a/server/models/PodcastEpisode.js +++ b/server/models/PodcastEpisode.js @@ -87,76 +87,12 @@ class PodcastEpisode extends Model { } else if (rssPodcastEpisode.chapters?.length) { podcastEpisode.chapters = rssPodcastEpisode.chapters.map((ch) => ({ ...ch })) } else { - const timeMarkerRegex = /\b(\d{1,2}):(\d{1,2})(?::(\d{1,2}))?\b/ - const chapterTitleRegex = /\b\d{1,2}:\d{1,2}(?::\d{1,2})?\b(.+)$/ - Logger.debug("Podcast episode doesn't have chapters, attempting to generate them from timestamps", rssPodcastEpisode.title) - - var errorMessage = null - var descriptionLines = podcastEpisode.description.split('

') - var chaptersToPush = [] - - for (let i = 0; i < descriptionLines.length; i++) { - let line = descriptionLines[i] - Logger.debug('Description Line:', line) - - let match = timeMarkerRegex.exec(line) - if (match == null) continue - - Logger.debug('Matches:', match) - - let first = match[1] - let second = match[2] - let third = match[3] - - let hours = 0 - let minutes = 0 - let seconds = 0 - - // If there's three components then we can assume its hh:mm:ss - if (first && second && third) { - hours = Number(first) - minutes = Number(second) - seconds = Number(third) - } else if (first && second) // otherwise assume mm:ss - { - minutes = Number(first) - seconds = Number(second) - } else { - // Unknown timestamp state - errorMessage = `Unknown timestamp format in description, line ${line}` - break - } - - let startTime = seconds + minutes * 60 + hours * 60 * 60 - let chapterTitleMatch = chapterTitleRegex.exec(line) - Logger.debug('Chapter Title Matches:', chapterTitleMatch) - - if (chapterTitleMatch == null && chapterTitleMatch.length >= 2) { - // Unknown chapter state - errorMessage = `Unable to get chapter title from description, line ${line}` - break - } - - let chapter = { title: chapterTitleMatch[1].trim(), id: i, start: startTime } - - if (chaptersToPush.length > 0) { - chaptersToPush[chaptersToPush.length - 1].end = startTime - } - - chaptersToPush.push(chapter) - - Logger.debug('Added chapter', chapter) - } - if (errorMessage == null) { - if (chaptersToPush.length > 0) { - chaptersToPush[chaptersToPush.length - 1].end = podcastEpisode.audioFile.duration - } - - podcastEpisode.chapters.push(...chaptersToPush) - Logger.debug(`Successfully gnerated ${podcastEpisode.chapters.length} chapters`) - } else { - Logger.error(`Unable generate chapters from podcast description, error '${errorMessage}`) + try { + let autoGeneratedChapters = PodcastEpisode.autoGenerateChaptersFromTimestamps(podcastEpisode.description, podcastEpisode.audioFile.duration) + podcastEpisode.chapters = autoGeneratedChapters + } catch (error) { + Logger.error(`[PodcastEpisode] createFromRssPodcastEpisode: Failed to auto generate chapters for "${podcastEpisode.title}"`, error) } } @@ -309,6 +245,82 @@ class PodcastEpisode extends Model { return json } + + /** + * + * @param {string} podcastDescription + * @param {number} audioDurationSecs + * @returns {ChapterObject[]} + */ + static autoGenerateChaptersFromTimestamps(podcastDescription, audioDurationSecs) { + if (podcastDescription == null) { + throw new Error('Description must not be null') + } + + if (audioDurationSecs == null) { + throw new Error('Audio duration must not be null') + } + + const timestampRegex = /\b(\d{1,2}):(\d{1,2})(?::(\d{1,2}))?\b/ + const chapterTitleRegex = /\b\d{1,2}:\d{1,2}(?::\d{1,2})?\b(?:\s+|\))(.+)$/ + const descriptionLineSplitRegex = /\<\s*\/\s*p\s*\>|\<\s*br\s*\s*\/\>|\n/ + + var descriptionLines = podcastDescription.split(descriptionLineSplitRegex) + var newChapters = [] + + for (let i = 0; i < descriptionLines.length; i++) { + let line = descriptionLines[i] + + let match = timestampRegex.exec(line) + if (match == null) continue + + let first = match[1] + let second = match[2] + let third = match[3] + + let hours = 0 + let minutes = 0 + let seconds = 0 + + // If there's three components then we can assume its hh:mm:ss + if (first && second && third) { + hours = Number(first) + minutes = Number(second) + seconds = Number(third) + } else if (first && second) // otherwise assume mm:ss + { + minutes = Number(first) + seconds = Number(second) + } + + let startTime = seconds + minutes * 60 + hours * 60 * 60 + let chapterTitleMatch = chapterTitleRegex.exec(line) + + if (chapterTitleMatch == null || chapterTitleMatch.length < 2) { + // Unknown chapter state + throw new Error(`Unable to get chapter title from description, line ${line}`) + } + + let chapter = { title: chapterTitleMatch[1].trim(), id: newChapters.length + 1, start: startTime } + + if (newChapters.length > 0) { + newChapters[newChapters.length - 1].end = startTime + } + + newChapters.push(chapter) + } + if (newChapters.length > 0) { + newChapters[newChapters.length - 1].end = audioDurationSecs + } + + Logger.debug(`Successfully gnerated ${newChapters.length} chapters`) + + if (newChapters.length == 1) { + throw new Error('Only one chapter found, treating as invalid description') + } + + return newChapters + } } module.exports = PodcastEpisode From b3ba764d11dbf3619efe33ac8abd1d2807a3adbb Mon Sep 17 00:00:00 2001 From: Harry Rose Date: Wed, 11 Mar 2026 17:22:56 +0000 Subject: [PATCH 07/14] Add tests --- server/models/PodcastEpisode.test.js | 117 +++++++++++++++++++++++++++ 1 file changed, 117 insertions(+) create mode 100644 server/models/PodcastEpisode.test.js diff --git a/server/models/PodcastEpisode.test.js b/server/models/PodcastEpisode.test.js new file mode 100644 index 00000000..676c59f1 --- /dev/null +++ b/server/models/PodcastEpisode.test.js @@ -0,0 +1,117 @@ +const chai = require('chai') +const PodcastEpisode = require('./PodcastEpisode') +const Logger = require('../Logger') +const expect = chai.expect + +describe('PodcastEpisode', () => { + describe('autoGenerateChaptersFromTimestamps', () => { + var testCasesTestingSuccess = [ + { + testName: 'Should handle descriptions using html paragraphs', + description: '

Introduction text paragraph 1

Introduction text paragraph 2

00:48 Chatper 1

12:14 Chapter 2

20:56 Chapter 3

27:34 Chapter 4

32:00 Chapter 5

35:16 Chapter 6

41:32 Chapter 7

46:43 Chapter 8

', + audioDuration: 3060, + expectedChapters: [ + { title: 'Chatper 1', id: 1, start: 48, end: 734 }, + { title: 'Chapter 2', id: 2, start: 734, end: 1256 }, + { title: 'Chapter 3', id: 3, start: 1256, end: 1654 }, + { title: 'Chapter 4', id: 4, start: 1654, end: 1920 }, + { title: 'Chapter 5', id: 5, start: 1920, end: 2116 }, + { title: 'Chapter 6', id: 6, start: 2116, end: 2492 }, + { title: 'Chapter 7', id: 7, start: 2492, end: 2803 }, + { title: 'Chapter 8', id: 8, start: 2803, end: 3060 } + ] + }, + { + testName: 'Should handle descriptions using html line breaks', + description: '
Introduction text paragraph 1

Introduction text paragraph 2

0:00:00 Chapter 1
0:17:05 Chapter 2
0:33:58 Chapter 3
0:40:35 Chapter 4
Unrelated outro line
', + audioDuration: 2700, + expectedChapters: [ + { title: 'Chapter 1', id: 1, start: 0, end: 1025 }, + { title: 'Chapter 2', id: 2, start: 1025, end: 2038 }, + { title: 'Chapter 3', id: 3, start: 2038, end: 2435 }, + { title: 'Chapter 4', id: 4, start: 2435, end: 2700 } + ] + }, + { + testName: 'Should handle descriptions using unix new lines', + description: `Introduction text paragraph 1 + Introduction text paragraph 2 + 0:00:00 Chapter 1 + 0:17:05 Chapter 2 + 0:33:58 Chapter 3 + 0:40:35 Chapter 4 + Unrelated outro line`, + audioDuration: 2700, + expectedChapters: [ + { title: 'Chapter 1', id: 1, start: 0, end: 1025 }, + { title: 'Chapter 2', id: 2, start: 1025, end: 2038 }, + { title: 'Chapter 3', id: 3, start: 2038, end: 2435 }, + { title: 'Chapter 4', id: 4, start: 2435, end: 2700 } + ] + }, + { + testName: 'Should handle descriptions with no timestamps', + description: `Introduction text paragraph 1 + Introduction text paragraph 2`, + audioDuration: 2700, + expectedChapters: [] + }, + { + testName: 'Should handle timestampes in parentheses', + description: '

Introduction text paragraph 1

Introduction text paragraph 2

(00:48) Chatper 1

(12:14) Chapter 2

(20:56) Chapter 3

(27:34) Chapter 4

(32:00) Chapter 5

(35:16) Chapter 6

(41:32) Chapter 7

(46:43) Chapter 8

', + audioDuration: 3060, + expectedChapters: [ + { title: 'Chatper 1', id: 1, start: 48, end: 734 }, + { title: 'Chapter 2', id: 2, start: 734, end: 1256 }, + { title: 'Chapter 3', id: 3, start: 1256, end: 1654 }, + { title: 'Chapter 4', id: 4, start: 1654, end: 1920 }, + { title: 'Chapter 5', id: 5, start: 1920, end: 2116 }, + { title: 'Chapter 6', id: 6, start: 2116, end: 2492 }, + { title: 'Chapter 7', id: 7, start: 2492, end: 2803 }, + { title: 'Chapter 8', id: 8, start: 2803, end: 3060 } + ] + } + ] + testCasesTestingSuccess.forEach(function (testCase) { + it(testCase.testName, () => { + var chapters = PodcastEpisode.autoGenerateChaptersFromTimestamps(testCase.description, testCase.audioDuration) + expect(chapters).to.be.deep.equal(testCase.expectedChapters) + }) + }) + + var testCasesTestingFailure = [ + { + testName: 'Should throw if only one chapter found', + description: '

Introduction text paragraph 1

Introduction text paragraph 2

00:48 Chatper 1

', + audioDuration: 1000, + expectedError: 'Only one chapter found, treating as invalid description' + }, + { + testName: 'Should throw if description is null', + description: null, + audioDuration: 1000, + expectedError: 'Description must not be null' + }, + { + testName: 'Should throw if audio duration is null', + description: '', + audioDuration: null, + expectedError: 'Audio duration must not be null' + }, + { + testName: 'Should throw if chapter has no title', + description: '

Introduction text paragraph 1

Introduction text paragraph 2

00:48 Chatper 1

00:60:12

', + audioDuration: 1000, + expectedError: 'Unable to get chapter title from description' + } + ] + testCasesTestingFailure.forEach(function (testCase) { + it(testCase.testName, () => { + expect(() => { + var chapters = PodcastEpisode.autoGenerateChaptersFromTimestamps(testCase.description, testCase.audioDuration) + Logger.debug('Chapters', chapters) + }).to.throw(testCase.expectedError) + }) + }) + }) +}) From 32ea3e08d69887ac70877f202a42823b87e5c737 Mon Sep 17 00:00:00 2001 From: Harry Rose Date: Fri, 13 Mar 2026 20:11:03 +0000 Subject: [PATCH 08/14] Update logging to use info for key logs, also use [PodcastEpisode] prefix to match other logs --- server/models/PodcastEpisode.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/server/models/PodcastEpisode.js b/server/models/PodcastEpisode.js index 4b334d6e..745a71d2 100644 --- a/server/models/PodcastEpisode.js +++ b/server/models/PodcastEpisode.js @@ -87,7 +87,7 @@ class PodcastEpisode extends Model { } else if (rssPodcastEpisode.chapters?.length) { podcastEpisode.chapters = rssPodcastEpisode.chapters.map((ch) => ({ ...ch })) } else { - Logger.debug("Podcast episode doesn't have chapters, attempting to generate them from timestamps", rssPodcastEpisode.title) + Logger.info("[PodcastEpisode] New episode doesn't have chapters, attempting to generate them from timestamps", rssPodcastEpisode.title) try { let autoGeneratedChapters = PodcastEpisode.autoGenerateChaptersFromTimestamps(podcastEpisode.description, podcastEpisode.audioFile.duration) podcastEpisode.chapters = autoGeneratedChapters @@ -313,7 +313,7 @@ class PodcastEpisode extends Model { newChapters[newChapters.length - 1].end = audioDurationSecs } - Logger.debug(`Successfully gnerated ${newChapters.length} chapters`) + Logger.info(`[PodcastEpisode] Successfully gnerated ${newChapters.length} chapters`) if (newChapters.length == 1) { throw new Error('Only one chapter found, treating as invalid description') From 12b04faed25d52d7818f1196b35c0e75f4650702 Mon Sep 17 00:00:00 2001 From: Harry Rose Date: Fri, 13 Mar 2026 20:14:14 +0000 Subject: [PATCH 09/14] Fix typo --- server/models/PodcastEpisode.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/models/PodcastEpisode.js b/server/models/PodcastEpisode.js index 745a71d2..3f1f4487 100644 --- a/server/models/PodcastEpisode.js +++ b/server/models/PodcastEpisode.js @@ -313,7 +313,7 @@ class PodcastEpisode extends Model { newChapters[newChapters.length - 1].end = audioDurationSecs } - Logger.info(`[PodcastEpisode] Successfully gnerated ${newChapters.length} chapters`) + Logger.info(`[PodcastEpisode] Successfully generated ${newChapters.length} chapters`) if (newChapters.length == 1) { throw new Error('Only one chapter found, treating as invalid description') From 4907e70a48dcbe4bd8314f3a0a0f8131e9e431d7 Mon Sep 17 00:00:00 2001 From: Harry Rose Date: Mon, 16 Mar 2026 18:42:01 +0000 Subject: [PATCH 10/14] - Add new migration to add an autoGenerateChapters column in the Podcasts table - Bump minor version (I wasn't sure if this was needed for the migration) - Feature is now controlled by the field in the podcast database object - Move parsing code and tests to existing utils/parsers/ dir - Add more test cases --- client/package.json | 2 +- package.json | 2 +- server/managers/PodcastManager.js | 2 +- ...34.0-add-auto-generate-podcast-chapters.js | 83 +++++++++++ server/models/Podcast.js | 11 ++ server/models/PodcastEpisode.js | 85 +---------- server/models/PodcastEpisode.test.js | 117 ---------------- .../parsePodcastDescriptionForChapters.js | 95 +++++++++++++ ...add-auto-generate-podcast-chapters.test.js | 64 +++++++++ ...parsePodcastDescriptionForChapters.test.js | 132 ++++++++++++++++++ 10 files changed, 393 insertions(+), 200 deletions(-) create mode 100644 server/migrations/v2.34.0-add-auto-generate-podcast-chapters.js delete mode 100644 server/models/PodcastEpisode.test.js create mode 100644 server/utils/parsers/parsePodcastDescriptionForChapters.js create mode 100644 test/server/migrations/v2.34.0-add-auto-generate-podcast-chapters.test.js create mode 100644 test/server/utils/parsers/parsePodcastDescriptionForChapters.test.js diff --git a/client/package.json b/client/package.json index a1503a50..dd0f3a0c 100644 --- a/client/package.json +++ b/client/package.json @@ -1,6 +1,6 @@ { "name": "audiobookshelf-client", - "version": "2.33.0", + "version": "2.34.0", "buildNumber": 1, "description": "Self-hosted audiobook and podcast client", "main": "index.js", diff --git a/package.json b/package.json index 3108b517..10ba26f6 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "audiobookshelf", - "version": "2.33.0", + "version": "2.34.0", "buildNumber": 1, "description": "Self-hosted audiobook and podcast server", "main": "index.js", diff --git a/server/managers/PodcastManager.js b/server/managers/PodcastManager.js index bdf6fc76..82e0dbd3 100644 --- a/server/managers/PodcastManager.js +++ b/server/managers/PodcastManager.js @@ -204,7 +204,7 @@ class PodcastManager { return false } - const podcastEpisode = await Database.podcastEpisodeModel.createFromRssPodcastEpisode(this.currentDownload.rssPodcastEpisode, libraryItem.media.id, audioFile) + const podcastEpisode = await Database.podcastEpisodeModel.createFromRssPodcastEpisode(this.currentDownload.rssPodcastEpisode, libraryItem.media.id, libraryItem.media.autoGenerateChapters, audioFile) libraryItem.libraryFiles.push(libraryFile.toJSON()) // Re-calculating library item size because this wasnt being updated properly for podcasts in v2.20.0 and below diff --git a/server/migrations/v2.34.0-add-auto-generate-podcast-chapters.js b/server/migrations/v2.34.0-add-auto-generate-podcast-chapters.js new file mode 100644 index 00000000..c5567abe --- /dev/null +++ b/server/migrations/v2.34.0-add-auto-generate-podcast-chapters.js @@ -0,0 +1,83 @@ +const util = require('util') + +/** + * @typedef MigrationContext + * @property {import('sequelize').QueryInterface} queryInterface - a suquelize QueryInterface object. + * @property {import('../Logger')} logger - a Logger object. + * + * @typedef MigrationOptions + * @property {MigrationContext} context - an object containing the migration context. + */ + +const migrationVersion = '2.34.0' +const migrationName = `${migrationVersion}-add-auto-generate-podcast-chapters` +const loggerPrefix = `[${migrationVersion} migration]` + +/** + * This upward migration adds a boolean autoGenerateChapters column to the podcasts table and defaults it to false. + * + * @param {MigrationOptions} options - an object containing the migration context. + * @returns {Promise} - A promise that resolves when the migration is complete. + */ +async function up({ context: { queryInterface, logger } }) { + logger.info(`${loggerPrefix} UPGRADE BEGIN: ${migrationName}`) + + await addColumn(queryInterface, logger, 'podcasts', 'autoGenerateChapters', { type: queryInterface.sequelize.Sequelize.BOOLEAN, allowNull: false, defaultValue: false }) + + logger.info(`${loggerPrefix} UPGRADE END: ${migrationName}`) +} + +/** + * This downward migration removes the autoGenerateChapters column on the podcasts table, + * + * @param {MigrationOptions} options - an object containing the migration context. + * @returns {Promise} - A promise that resolves when the migration is complete. + */ +async function down({ context: { queryInterface, logger } }) { + logger.info(`${loggerPrefix} DOWNGRADE BEGIN: ${migrationName}`) + + await removeColumn(queryInterface, logger, 'podcasts', 'autoGenerateChapters') + + logger.info(`${loggerPrefix} DOWNGRADE END: ${migrationName}`) +} + +/** + * Utility function to add a column to a table. If the column already exists, it logs a message and continues. + * + * @param {import('sequelize').QueryInterface} queryInterface - a suquelize QueryInterface object. + * @param {import('../Logger')} logger - a Logger object. + * @param {string} table - the name of the table to add the column to. + * @param {string} column - the name of the column to add. + * @param {Object} options - the options for the column. + */ +async function addColumn(queryInterface, logger, table, column, options) { + logger.info(`${loggerPrefix} adding column "${column}" to table "${table}"`) + const tableDescription = await queryInterface.describeTable(table) + if (!tableDescription[column]) { + await queryInterface.addColumn(table, column, options) + logger.info(`${loggerPrefix} added column "${column}" to table "${table}"`) + } else { + logger.info(`${loggerPrefix} column "${column}" already exists in table "${table}"`) + } +} + +/** + * Utility function to remove a column from a table. If the column does not exist, it logs a message and continues. + * + * @param {import('sequelize').QueryInterface} queryInterface - a suquelize QueryInterface object. + * @param {import('../Logger')} logger - a Logger object. + * @param {string} table - the name of the table to remove the column from. + * @param {string} column - the name of the column to remove. + */ +async function removeColumn(queryInterface, logger, table, column) { + logger.info(`${loggerPrefix} removing column "${column}" from table "${table}"`) + const tableDescription = await queryInterface.describeTable(table) + if (tableDescription[column]) { + await queryInterface.sequelize.query(`ALTER TABLE ${table} DROP COLUMN ${column}`) + logger.info(`${loggerPrefix} removed column "${column}" from table "${table}"`) + } else { + logger.info(`${loggerPrefix} column "${column}" does not exist in table "${table}"`) + } +} + +module.exports = { up, down } diff --git a/server/models/Podcast.js b/server/models/Podcast.js index a96e1dd0..bb0e0453 100644 --- a/server/models/Podcast.js +++ b/server/models/Podcast.js @@ -53,6 +53,8 @@ class Podcast extends Model { this.maxEpisodesToKeep /** @type {number} */ this.maxNewEpisodesToDownload + /** @type {boolean} */ + this.autoGenerateChapters /** @type {string} */ this.coverPath /** @type {string[]} */ @@ -106,6 +108,7 @@ class Podcast extends Model { explicit: !!payload.metadata.explicit, autoDownloadEpisodes: !!payload.autoDownloadEpisodes, autoDownloadSchedule: autoDownloadSchedule || global.ServerSettings.podcastEpisodeSchedule, + autoGenerateChapters: !!payload.autoGenerateChapters, lastEpisodeCheck: new Date(), maxEpisodesToKeep: 0, maxNewEpisodesToDownload: 3, @@ -145,6 +148,7 @@ class Podcast extends Model { autoDownloadEpisodes: DataTypes.BOOLEAN, autoDownloadSchedule: DataTypes.STRING, lastEpisodeCheck: DataTypes.DATE, + autoGenerateChapters: DataTypes.BOOLEAN, maxEpisodesToKeep: DataTypes.INTEGER, maxNewEpisodesToDownload: DataTypes.INTEGER, coverPath: DataTypes.STRING, @@ -273,6 +277,10 @@ class Podcast extends Model { this.autoDownloadSchedule = payload.autoDownloadSchedule hasUpdates = true } + if (payload.autoGenerateChapters !== undefined && payload.autoGenerateChapters !== this.autoGenerateChapters) { + this.autoGenerateChapters = !!payload.autoGenerateChapters + hasUpdates = true + } if (typeof payload.lastEpisodeCheck === 'number' && payload.lastEpisodeCheck !== this.lastEpisodeCheck?.valueOf()) { this.lastEpisodeCheck = payload.lastEpisodeCheck hasUpdates = true @@ -441,6 +449,7 @@ class Podcast extends Model { autoDownloadEpisodes: this.autoDownloadEpisodes, autoDownloadSchedule: this.autoDownloadSchedule, lastEpisodeCheck: this.lastEpisodeCheck?.valueOf() || null, + autoGenerateChapters: this.autoGenerateChapters, maxEpisodesToKeep: this.maxEpisodesToKeep, maxNewEpisodesToDownload: this.maxNewEpisodesToDownload } @@ -457,6 +466,7 @@ class Podcast extends Model { autoDownloadEpisodes: this.autoDownloadEpisodes, autoDownloadSchedule: this.autoDownloadSchedule, lastEpisodeCheck: this.lastEpisodeCheck?.valueOf() || null, + autoGenerateChapters: this.autoGenerateChapters, maxEpisodesToKeep: this.maxEpisodesToKeep, maxNewEpisodesToDownload: this.maxNewEpisodesToDownload, size: this.size @@ -481,6 +491,7 @@ class Podcast extends Model { autoDownloadEpisodes: this.autoDownloadEpisodes, autoDownloadSchedule: this.autoDownloadSchedule, lastEpisodeCheck: this.lastEpisodeCheck?.valueOf() || null, + autoGenerateChapters: this.autoGenerateChapters, maxEpisodesToKeep: this.maxEpisodesToKeep, maxNewEpisodesToDownload: this.maxNewEpisodesToDownload, size: this.size diff --git a/server/models/PodcastEpisode.js b/server/models/PodcastEpisode.js index 3f1f4487..c253f479 100644 --- a/server/models/PodcastEpisode.js +++ b/server/models/PodcastEpisode.js @@ -1,5 +1,6 @@ const { DataTypes, Model } = require('sequelize') const libraryItemsPodcastFilters = require('../utils/queries/libraryItemsPodcastFilters') +const parsePodcastDescriptionForChapters = require('../utils/parsers/parsePodcastDescriptionForChapters') const Logger = require('../Logger') /** * @typedef ChapterObject @@ -57,9 +58,10 @@ class PodcastEpisode extends Model { * * @param {import('../utils/podcastUtils').RssPodcastEpisode} rssPodcastEpisode * @param {string} podcastId + * @param {boolean} autoGenerateChapters * @param {import('../objects/files/AudioFile')} audioFile */ - static async createFromRssPodcastEpisode(rssPodcastEpisode, podcastId, audioFile) { + static async createFromRssPodcastEpisode(rssPodcastEpisode, podcastId, autoGenerateChapters, audioFile) { const podcastEpisode = { index: null, season: rssPodcastEpisode.season, @@ -86,11 +88,10 @@ class PodcastEpisode extends Model { podcastEpisode.chapters = audioFile.chapters.map((ch) => ({ ...ch })) } else if (rssPodcastEpisode.chapters?.length) { podcastEpisode.chapters = rssPodcastEpisode.chapters.map((ch) => ({ ...ch })) - } else { + } else if (autoGenerateChapters) { Logger.info("[PodcastEpisode] New episode doesn't have chapters, attempting to generate them from timestamps", rssPodcastEpisode.title) try { - let autoGeneratedChapters = PodcastEpisode.autoGenerateChaptersFromTimestamps(podcastEpisode.description, podcastEpisode.audioFile.duration) - podcastEpisode.chapters = autoGeneratedChapters + podcastEpisode.chapters = parsePodcastDescriptionForChapters.parse(podcastEpisode.description, podcastEpisode.audioFile.duration) } catch (error) { Logger.error(`[PodcastEpisode] createFromRssPodcastEpisode: Failed to auto generate chapters for "${podcastEpisode.title}"`, error) } @@ -245,82 +246,6 @@ class PodcastEpisode extends Model { return json } - - /** - * - * @param {string} podcastDescription - * @param {number} audioDurationSecs - * @returns {ChapterObject[]} - */ - static autoGenerateChaptersFromTimestamps(podcastDescription, audioDurationSecs) { - if (podcastDescription == null) { - throw new Error('Description must not be null') - } - - if (audioDurationSecs == null) { - throw new Error('Audio duration must not be null') - } - - const timestampRegex = /\b(\d{1,2}):(\d{1,2})(?::(\d{1,2}))?\b/ - const chapterTitleRegex = /\b\d{1,2}:\d{1,2}(?::\d{1,2})?\b(?:\s+|\))(.+)$/ - const descriptionLineSplitRegex = /\<\s*\/\s*p\s*\>|\<\s*br\s*\s*\/\>|\n/ - - var descriptionLines = podcastDescription.split(descriptionLineSplitRegex) - var newChapters = [] - - for (let i = 0; i < descriptionLines.length; i++) { - let line = descriptionLines[i] - - let match = timestampRegex.exec(line) - if (match == null) continue - - let first = match[1] - let second = match[2] - let third = match[3] - - let hours = 0 - let minutes = 0 - let seconds = 0 - - // If there's three components then we can assume its hh:mm:ss - if (first && second && third) { - hours = Number(first) - minutes = Number(second) - seconds = Number(third) - } else if (first && second) // otherwise assume mm:ss - { - minutes = Number(first) - seconds = Number(second) - } - - let startTime = seconds + minutes * 60 + hours * 60 * 60 - let chapterTitleMatch = chapterTitleRegex.exec(line) - - if (chapterTitleMatch == null || chapterTitleMatch.length < 2) { - // Unknown chapter state - throw new Error(`Unable to get chapter title from description, line ${line}`) - } - - let chapter = { title: chapterTitleMatch[1].trim(), id: newChapters.length + 1, start: startTime } - - if (newChapters.length > 0) { - newChapters[newChapters.length - 1].end = startTime - } - - newChapters.push(chapter) - } - if (newChapters.length > 0) { - newChapters[newChapters.length - 1].end = audioDurationSecs - } - - Logger.info(`[PodcastEpisode] Successfully generated ${newChapters.length} chapters`) - - if (newChapters.length == 1) { - throw new Error('Only one chapter found, treating as invalid description') - } - - return newChapters - } } module.exports = PodcastEpisode diff --git a/server/models/PodcastEpisode.test.js b/server/models/PodcastEpisode.test.js deleted file mode 100644 index 676c59f1..00000000 --- a/server/models/PodcastEpisode.test.js +++ /dev/null @@ -1,117 +0,0 @@ -const chai = require('chai') -const PodcastEpisode = require('./PodcastEpisode') -const Logger = require('../Logger') -const expect = chai.expect - -describe('PodcastEpisode', () => { - describe('autoGenerateChaptersFromTimestamps', () => { - var testCasesTestingSuccess = [ - { - testName: 'Should handle descriptions using html paragraphs', - description: '

Introduction text paragraph 1

Introduction text paragraph 2

00:48 Chatper 1

12:14 Chapter 2

20:56 Chapter 3

27:34 Chapter 4

32:00 Chapter 5

35:16 Chapter 6

41:32 Chapter 7

46:43 Chapter 8

', - audioDuration: 3060, - expectedChapters: [ - { title: 'Chatper 1', id: 1, start: 48, end: 734 }, - { title: 'Chapter 2', id: 2, start: 734, end: 1256 }, - { title: 'Chapter 3', id: 3, start: 1256, end: 1654 }, - { title: 'Chapter 4', id: 4, start: 1654, end: 1920 }, - { title: 'Chapter 5', id: 5, start: 1920, end: 2116 }, - { title: 'Chapter 6', id: 6, start: 2116, end: 2492 }, - { title: 'Chapter 7', id: 7, start: 2492, end: 2803 }, - { title: 'Chapter 8', id: 8, start: 2803, end: 3060 } - ] - }, - { - testName: 'Should handle descriptions using html line breaks', - description: '
Introduction text paragraph 1

Introduction text paragraph 2

0:00:00 Chapter 1
0:17:05 Chapter 2
0:33:58 Chapter 3
0:40:35 Chapter 4
Unrelated outro line
', - audioDuration: 2700, - expectedChapters: [ - { title: 'Chapter 1', id: 1, start: 0, end: 1025 }, - { title: 'Chapter 2', id: 2, start: 1025, end: 2038 }, - { title: 'Chapter 3', id: 3, start: 2038, end: 2435 }, - { title: 'Chapter 4', id: 4, start: 2435, end: 2700 } - ] - }, - { - testName: 'Should handle descriptions using unix new lines', - description: `Introduction text paragraph 1 - Introduction text paragraph 2 - 0:00:00 Chapter 1 - 0:17:05 Chapter 2 - 0:33:58 Chapter 3 - 0:40:35 Chapter 4 - Unrelated outro line`, - audioDuration: 2700, - expectedChapters: [ - { title: 'Chapter 1', id: 1, start: 0, end: 1025 }, - { title: 'Chapter 2', id: 2, start: 1025, end: 2038 }, - { title: 'Chapter 3', id: 3, start: 2038, end: 2435 }, - { title: 'Chapter 4', id: 4, start: 2435, end: 2700 } - ] - }, - { - testName: 'Should handle descriptions with no timestamps', - description: `Introduction text paragraph 1 - Introduction text paragraph 2`, - audioDuration: 2700, - expectedChapters: [] - }, - { - testName: 'Should handle timestampes in parentheses', - description: '

Introduction text paragraph 1

Introduction text paragraph 2

(00:48) Chatper 1

(12:14) Chapter 2

(20:56) Chapter 3

(27:34) Chapter 4

(32:00) Chapter 5

(35:16) Chapter 6

(41:32) Chapter 7

(46:43) Chapter 8

', - audioDuration: 3060, - expectedChapters: [ - { title: 'Chatper 1', id: 1, start: 48, end: 734 }, - { title: 'Chapter 2', id: 2, start: 734, end: 1256 }, - { title: 'Chapter 3', id: 3, start: 1256, end: 1654 }, - { title: 'Chapter 4', id: 4, start: 1654, end: 1920 }, - { title: 'Chapter 5', id: 5, start: 1920, end: 2116 }, - { title: 'Chapter 6', id: 6, start: 2116, end: 2492 }, - { title: 'Chapter 7', id: 7, start: 2492, end: 2803 }, - { title: 'Chapter 8', id: 8, start: 2803, end: 3060 } - ] - } - ] - testCasesTestingSuccess.forEach(function (testCase) { - it(testCase.testName, () => { - var chapters = PodcastEpisode.autoGenerateChaptersFromTimestamps(testCase.description, testCase.audioDuration) - expect(chapters).to.be.deep.equal(testCase.expectedChapters) - }) - }) - - var testCasesTestingFailure = [ - { - testName: 'Should throw if only one chapter found', - description: '

Introduction text paragraph 1

Introduction text paragraph 2

00:48 Chatper 1

', - audioDuration: 1000, - expectedError: 'Only one chapter found, treating as invalid description' - }, - { - testName: 'Should throw if description is null', - description: null, - audioDuration: 1000, - expectedError: 'Description must not be null' - }, - { - testName: 'Should throw if audio duration is null', - description: '', - audioDuration: null, - expectedError: 'Audio duration must not be null' - }, - { - testName: 'Should throw if chapter has no title', - description: '

Introduction text paragraph 1

Introduction text paragraph 2

00:48 Chatper 1

00:60:12

', - audioDuration: 1000, - expectedError: 'Unable to get chapter title from description' - } - ] - testCasesTestingFailure.forEach(function (testCase) { - it(testCase.testName, () => { - expect(() => { - var chapters = PodcastEpisode.autoGenerateChaptersFromTimestamps(testCase.description, testCase.audioDuration) - Logger.debug('Chapters', chapters) - }).to.throw(testCase.expectedError) - }) - }) - }) -}) diff --git a/server/utils/parsers/parsePodcastDescriptionForChapters.js b/server/utils/parsers/parsePodcastDescriptionForChapters.js new file mode 100644 index 00000000..3f1b4c75 --- /dev/null +++ b/server/utils/parsers/parsePodcastDescriptionForChapters.js @@ -0,0 +1,95 @@ +const Logger = require('../../Logger') + +/** + * Parse podcast descriptions for timestamps and generate chapters + * The following formats are supports: + * + * MM:SS Chapter name + * HH:MM:SS Chapter name + * (HH:MM:SS) Chapter name + * + * Descriptions have to use

,
or \n to split up lines in order to be supported + * + * See test suite for more input examples + * + * @param {string} podcastDescription + * @param {number} audioDurationSecs + * @returns {ChapterObject[]} + */ +module.exports.parse = (podcastDescription, audioDurationSecs) => { + if (podcastDescription == null) { + throw new Error('Description must not be null') + } + + if (audioDurationSecs == null) { + throw new Error('Audio duration must not be null') + } + + const timestampRegex = /\b(\d{1,2}):(\d{1,2})(?::(\d{1,2}))?\b/ + const chapterTitleRegex = /\b\d{1,2}:\d{1,2}(?::\d{1,2})?\b(?:\s+|\))(.+)$/ + const descriptionLineSplitRegex = /\<\s*\/\s*p\s*\>|\<\s*br\s*\s*\/\>|\n/ + + var descriptionLines = podcastDescription.split(descriptionLineSplitRegex) + var newChapters = [] + + for (let i = 0; i < descriptionLines.length; i++) { + let line = descriptionLines[i] + + let match = timestampRegex.exec(line) + if (match == null) continue + + let first = match[1] + let second = match[2] + let third = match[3] + + let hours = 0 + let minutes = 0 + let seconds = 0 + + // If there's three components then we can assume its hh:mm:ss + if (first && second && third) { + hours = Number(first) + minutes = Number(second) + seconds = Number(third) + } else if (first && second) // otherwise assume mm:ss + { + minutes = Number(first) + seconds = Number(second) + } + + if (minutes > 59 || seconds > 59) { + throw new Error(`Timestamp contains invalid minutes or seconds field '${minutes}::${seconds}'`) + } + + let startTime = seconds + minutes * 60 + hours * 60 * 60 + if (startTime > audioDurationSecs) { + throw new Error(`Chapter found that starts after over audio duration. Duration: ${audioDurationSecs}s - Chapter start ${startTime}s`) + } + + let chapterTitleMatch = chapterTitleRegex.exec(line) + + if (chapterTitleMatch == null || chapterTitleMatch.length < 2) { + // Unknown chapter state + throw new Error(`Unable to get chapter title from description, line ${line}`) + } + + let chapter = { title: chapterTitleMatch[1].trim(), id: newChapters.length + 1, start: startTime } + + if (newChapters.length > 0) { + newChapters[newChapters.length - 1].end = startTime + } + + newChapters.push(chapter) + } + if (newChapters.length > 0) { + newChapters[newChapters.length - 1].end = audioDurationSecs + } + + Logger.info(`[PodcastEpisode] Successfully generated ${newChapters.length} chapters`) + + if (newChapters.length == 1) { + throw new Error('Only one chapter found, treating as invalid description') + } + + return newChapters +} diff --git a/test/server/migrations/v2.34.0-add-auto-generate-podcast-chapters.test.js b/test/server/migrations/v2.34.0-add-auto-generate-podcast-chapters.test.js new file mode 100644 index 00000000..04822be4 --- /dev/null +++ b/test/server/migrations/v2.34.0-add-auto-generate-podcast-chapters.test.js @@ -0,0 +1,64 @@ +const chai = require('chai') +const sinon = require('sinon') +const { expect } = chai + +const { DataTypes, Sequelize } = require('sequelize') +const Logger = require('../../../server/Logger') + +const { up, down } = require('../../../server/migrations/v2.34.0-add-auto-generate-podcast-chapters') + +describe('Migration v2.34.0-add-auto-generate-podcast-chapters', () => { + let sequelize + let queryInterface + let loggerInfoStub + + beforeEach(async () => { + sequelize = new Sequelize({ dialect: 'sqlite', storage: ':memory:', logging: false }) + queryInterface = sequelize.getQueryInterface() + loggerInfoStub = sinon.stub(Logger, 'info') + + await queryInterface.createTable('podcasts', { + id: { type: DataTypes.INTEGER, allowNull: false, primaryKey: true, unique: true }, + title: { type: DataTypes.STRING, allowNull: false }, + titleIgnorePrefix: { type: DataTypes.STRING, allowNull: false } + }) + + await queryInterface.bulkInsert('podcasts', [ + { id: 1, title: 'The Podcast 1', titleIgnorePrefix: 'Podcast 1, The' }, + { id: 2, title: 'The Podcast 2', titleIgnorePrefix: 'Podcast 2, The' } + ]) + }) + + afterEach(() => { + sinon.restore() + }) + + describe('up', () => { + it('should add autoGenerateChapters column to podcasts', async () => { + await up({ context: { queryInterface, logger: Logger } }) + + const [podcasts] = await queryInterface.sequelize.query('SELECT * FROM podcasts') + expect(podcasts).to.deep.equal([ + { id: 1, autoGenerateChapters: 0, title: 'The Podcast 1', titleIgnorePrefix: 'Podcast 1, The' }, + { id: 2, autoGenerateChapters: 0, title: 'The Podcast 2', titleIgnorePrefix: 'Podcast 2, The' } + ]) + }) + }) + + describe('down', () => { + it('should remove autoGenerateChapters column from podcasts', async () => { + await up({ context: { queryInterface, logger: Logger } }) + try { + await down({ context: { queryInterface, logger: Logger } }) + } catch (error) { + console.log(error) + } + + const [podcasts] = await queryInterface.sequelize.query('SELECT * FROM podcasts') + expect(podcasts).to.deep.equal([ + { id: 1, title: 'The Podcast 1', titleIgnorePrefix: 'Podcast 1, The' }, + { id: 2, title: 'The Podcast 2', titleIgnorePrefix: 'Podcast 2, The' } + ]) + }) + }) +}) diff --git a/test/server/utils/parsers/parsePodcastDescriptionForChapters.test.js b/test/server/utils/parsers/parsePodcastDescriptionForChapters.test.js new file mode 100644 index 00000000..0fff4574 --- /dev/null +++ b/test/server/utils/parsers/parsePodcastDescriptionForChapters.test.js @@ -0,0 +1,132 @@ +const chai = require('chai') +const PodcastEpisode = require('../../../../server/models/PodcastEpisode') +const expect = chai.expect +const parsePodcastDescriptionForChapters = require('../../../../server/utils/parsers/parsePodcastDescriptionForChapters') + +describe('parsePodcastDescriptionForChapters', () => { + var testCasesTestingSuccess = [ + { + testName: 'Should handle descriptions using html paragraphs', + description: '

Introduction text paragraph 1

Introduction text paragraph 2

00:48 Chatper 1

12:14 Chapter 2

20:56 Chapter 3

27:34 Chapter 4

32:00 Chapter 5

35:16 Chapter 6

41:32 Chapter 7

46:43 Chapter 8

', + audioDuration: 3060, + expectedChapters: [ + { title: 'Chatper 1', id: 1, start: 48, end: 734 }, + { title: 'Chapter 2', id: 2, start: 734, end: 1256 }, + { title: 'Chapter 3', id: 3, start: 1256, end: 1654 }, + { title: 'Chapter 4', id: 4, start: 1654, end: 1920 }, + { title: 'Chapter 5', id: 5, start: 1920, end: 2116 }, + { title: 'Chapter 6', id: 6, start: 2116, end: 2492 }, + { title: 'Chapter 7', id: 7, start: 2492, end: 2803 }, + { title: 'Chapter 8', id: 8, start: 2803, end: 3060 } + ] + }, + { + testName: 'Should handle descriptions using html line breaks', + description: '
Introduction text paragraph 1

Introduction text paragraph 2

0:00:00 Chapter 1
0:17:05 Chapter 2
0:33:58 Chapter 3
0:40:35 Chapter 4
Unrelated outro line
', + audioDuration: 2700, + expectedChapters: [ + { title: 'Chapter 1', id: 1, start: 0, end: 1025 }, + { title: 'Chapter 2', id: 2, start: 1025, end: 2038 }, + { title: 'Chapter 3', id: 3, start: 2038, end: 2435 }, + { title: 'Chapter 4', id: 4, start: 2435, end: 2700 } + ] + }, + { + testName: 'Should handle descriptions using unix new lines', + description: `Introduction text paragraph 1 + Introduction text paragraph 2 + 0:00:00 Chapter 1 + 0:17:05 Chapter 2 + 0:33:58 Chapter 3 + 0:40:35 Chapter 4 + Unrelated outro line`, + audioDuration: 2700, + expectedChapters: [ + { title: 'Chapter 1', id: 1, start: 0, end: 1025 }, + { title: 'Chapter 2', id: 2, start: 1025, end: 2038 }, + { title: 'Chapter 3', id: 3, start: 2038, end: 2435 }, + { title: 'Chapter 4', id: 4, start: 2435, end: 2700 } + ] + }, + { + testName: 'Should handle descriptions with no timestamps', + description: `Introduction text paragraph 1 + Introduction text paragraph 2`, + audioDuration: 2700, + expectedChapters: [] + }, + { + testName: 'Should handle timestampes in parentheses', + description: '

Introduction text paragraph 1

Introduction text paragraph 2

(00:48) Chatper 1

(12:14) Chapter 2

(20:56) Chapter 3

(27:34) Chapter 4

(32:00) Chapter 5

(35:16) Chapter 6

(41:32) Chapter 7

(46:43) Chapter 8

', + audioDuration: 3060, + expectedChapters: [ + { title: 'Chatper 1', id: 1, start: 48, end: 734 }, + { title: 'Chapter 2', id: 2, start: 734, end: 1256 }, + { title: 'Chapter 3', id: 3, start: 1256, end: 1654 }, + { title: 'Chapter 4', id: 4, start: 1654, end: 1920 }, + { title: 'Chapter 5', id: 5, start: 1920, end: 2116 }, + { title: 'Chapter 6', id: 6, start: 2116, end: 2492 }, + { title: 'Chapter 7', id: 7, start: 2492, end: 2803 }, + { title: 'Chapter 8', id: 8, start: 2803, end: 3060 } + ] + } + ] + testCasesTestingSuccess.forEach(function (testCase) { + it(testCase.testName, () => { + var chapters = parsePodcastDescriptionForChapters.parse(testCase.description, testCase.audioDuration) + expect(chapters).to.be.deep.equal(testCase.expectedChapters) + }) + }) + + var testCasesTestingFailure = [ + { + testName: 'Should throw if only one chapter found', + description: '

Introduction text paragraph 1

Introduction text paragraph 2

00:48 Chatper 1

', + audioDuration: 1000, + expectedError: 'Only one chapter found, treating as invalid description' + }, + { + testName: 'Should throw if invalid minutes', + description: '

Introduction text paragraph 1

Introduction text paragraph 2

75:48 Chatper 1

', + audioDuration: 1000, + expectedError: "Timestamp contains invalid minutes or seconds field '75::48'" + }, + { + testName: 'Should throw if invalid minutes', + description: '

Introduction text paragraph 1

Introduction text paragraph 2

00:90 Chatper 1

', + audioDuration: 1000, + expectedError: "Timestamp contains invalid minutes or seconds field '0::90'" + }, + { + testName: 'Should throw if chapter goes over lenght of audio file', + description: '

Introduction text paragraph 1

Introduction text paragraph 2

00:48 Chatper 1

01:00:01 Chatper 2

', + audioDuration: 3600, + expectedError: 'Chapter found that starts after over audio duration' + }, + { + testName: 'Should throw if description is null', + description: null, + audioDuration: 1000, + expectedError: 'Description must not be null' + }, + { + testName: 'Should throw if audio duration is null', + description: '', + audioDuration: null, + expectedError: 'Audio duration must not be null' + }, + { + testName: 'Should throw if chapter has no title', + description: '

Introduction text paragraph 1

Introduction text paragraph 2

00:48 Chatper 1

00:30:00

', + audioDuration: 3600, + expectedError: 'Unable to get chapter title from description' + } + ] + testCasesTestingFailure.forEach(function (testCase) { + it(testCase.testName, () => { + expect(() => { + parsePodcastDescriptionForChapters.parse(testCase.description, testCase.audioDuration) + }).to.throw(testCase.expectedError) + }) + }) +}) From 6e05484307a2b8bff5b6ef32f7fa3e5ad1144475 Mon Sep 17 00:00:00 2001 From: Harry Rose Date: Mon, 16 Mar 2026 20:59:00 +0000 Subject: [PATCH 11/14] Handle podcasts which use html lists and also have html tags in the chapter titles --- .../parsers/parsePodcastDescriptionForChapters.js | 10 +++++++--- .../parsePodcastDescriptionForChapters.test.js | 14 +++++++++++++- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/server/utils/parsers/parsePodcastDescriptionForChapters.js b/server/utils/parsers/parsePodcastDescriptionForChapters.js index 3f1b4c75..8a83f149 100644 --- a/server/utils/parsers/parsePodcastDescriptionForChapters.js +++ b/server/utils/parsers/parsePodcastDescriptionForChapters.js @@ -1,3 +1,4 @@ +const sanitizeHtml = require('../../libs/sanitizeHtml') const Logger = require('../../Logger') /** @@ -27,13 +28,16 @@ module.exports.parse = (podcastDescription, audioDurationSecs) => { const timestampRegex = /\b(\d{1,2}):(\d{1,2})(?::(\d{1,2}))?\b/ const chapterTitleRegex = /\b\d{1,2}:\d{1,2}(?::\d{1,2})?\b(?:\s+|\))(.+)$/ - const descriptionLineSplitRegex = /\<\s*\/\s*p\s*\>|\<\s*br\s*\s*\/\>|\n/ + + // Split on "

", "
", "\n", + const descriptionLineSplitRegex = /\<\s*\/\s*p\s*\>|\<\s*br\s*\/\>|\n|\<\s*\/\s*li\s*\>/ var descriptionLines = podcastDescription.split(descriptionLineSplitRegex) var newChapters = [] for (let i = 0; i < descriptionLines.length; i++) { - let line = descriptionLines[i] + // Strip all HTML tags out + let line = sanitizeHtml(descriptionLines[i], { allowedTags: [] }) let match = timestampRegex.exec(line) if (match == null) continue @@ -85,7 +89,7 @@ module.exports.parse = (podcastDescription, audioDurationSecs) => { newChapters[newChapters.length - 1].end = audioDurationSecs } - Logger.info(`[PodcastEpisode] Successfully generated ${newChapters.length} chapters`) + Logger.info(`Successfully generated ${newChapters.length} chapters`) if (newChapters.length == 1) { throw new Error('Only one chapter found, treating as invalid description') diff --git a/test/server/utils/parsers/parsePodcastDescriptionForChapters.test.js b/test/server/utils/parsers/parsePodcastDescriptionForChapters.test.js index 0fff4574..662c131d 100644 --- a/test/server/utils/parsers/parsePodcastDescriptionForChapters.test.js +++ b/test/server/utils/parsers/parsePodcastDescriptionForChapters.test.js @@ -1,5 +1,4 @@ const chai = require('chai') -const PodcastEpisode = require('../../../../server/models/PodcastEpisode') const expect = chai.expect const parsePodcastDescriptionForChapters = require('../../../../server/utils/parsers/parsePodcastDescriptionForChapters') @@ -69,6 +68,19 @@ describe('parsePodcastDescriptionForChapters', () => { { title: 'Chapter 7', id: 7, start: 2492, end: 2803 }, { title: 'Chapter 8', id: 8, start: 2803, end: 3060 } ] + }, + { + testName: 'Should handle html lists and chapters with html tags in the title', + description: '

Introduction



Chapters
  • 00:00:00 Intro
  • 00:03:55 Chapter 1
  • 00:09:52 Chapter 2
  • 00:16:11 Chapter 3
  • 00:20:03 Chapter 4
  • 00:24:08 Chapter 5
  • ', + audioDuration: 4000, + expectedChapters: [ + { title: 'Intro', id: 1, start: 0, end: 235 }, + { title: 'Chapter 1', id: 2, start: 235, end: 592 }, + { title: 'Chapter 2', id: 3, start: 592, end: 971 }, + { title: 'Chapter 3', id: 4, start: 971, end: 1203 }, + { title: 'Chapter 4', id: 5, start: 1203, end: 1448 }, + { title: 'Chapter 5', id: 6, start: 1448, end: 4000 } + ] } ] testCasesTestingSuccess.forEach(function (testCase) { From 0227302fc0604e6f9254b00f76549922d9ba4802 Mon Sep 17 00:00:00 2001 From: Harry Rose Date: Tue, 17 Mar 2026 18:52:56 +0000 Subject: [PATCH 12/14] Handle chapters names that are very long, add examples to tests --- .../parsers/parsePodcastDescriptionForChapters.js | 13 ++++++++++++- .../parsePodcastDescriptionForChapters.test.js | 13 +++++++++++-- 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/server/utils/parsers/parsePodcastDescriptionForChapters.js b/server/utils/parsers/parsePodcastDescriptionForChapters.js index 8a83f149..3f90e847 100644 --- a/server/utils/parsers/parsePodcastDescriptionForChapters.js +++ b/server/utils/parsers/parsePodcastDescriptionForChapters.js @@ -26,6 +26,12 @@ module.exports.parse = (podcastDescription, audioDurationSecs) => { throw new Error('Audio duration must not be null') } + Logger.info('Description!', podcastDescription) + + // This number is arbitrary, but there have been examples where descriptions of the chapter are on the same line as the chapter title + // This results in a unpleasant UX where the chapter is very long, it's also possible that an overly long chapter title is the result of a parsing failure + const maxChapterTitleLength = 200 + const timestampRegex = /\b(\d{1,2}):(\d{1,2})(?::(\d{1,2}))?\b/ const chapterTitleRegex = /\b\d{1,2}:\d{1,2}(?::\d{1,2})?\b(?:\s+|\))(.+)$/ @@ -77,7 +83,12 @@ module.exports.parse = (podcastDescription, audioDurationSecs) => { throw new Error(`Unable to get chapter title from description, line ${line}`) } - let chapter = { title: chapterTitleMatch[1].trim(), id: newChapters.length + 1, start: startTime } + let chapterTitle = chapterTitleMatch[1].trim() + if (chapterTitle.length > maxChapterTitleLength) { + throw new Error(`Chapter title too long, possible parsing falure, line ${line}`) + } + + let chapter = { title: chapterTitle, id: newChapters.length + 1, start: startTime } if (newChapters.length > 0) { newChapters[newChapters.length - 1].end = startTime diff --git a/test/server/utils/parsers/parsePodcastDescriptionForChapters.test.js b/test/server/utils/parsers/parsePodcastDescriptionForChapters.test.js index 662c131d..24ca3f0a 100644 --- a/test/server/utils/parsers/parsePodcastDescriptionForChapters.test.js +++ b/test/server/utils/parsers/parsePodcastDescriptionForChapters.test.js @@ -20,6 +20,7 @@ describe('parsePodcastDescriptionForChapters', () => { ] }, { + // Example: https://podcasts.apple.com/us/podcast/giant-bombcast-931-bleepbloop-remote/id274450056?i=1000754550540 testName: 'Should handle descriptions using html line breaks', description: '
    Introduction text paragraph 1

    Introduction text paragraph 2

    0:00:00 Chapter 1
    0:17:05 Chapter 2
    0:33:58 Chapter 3
    0:40:35 Chapter 4
    Unrelated outro line
    ', audioDuration: 2700, @@ -31,6 +32,7 @@ describe('parsePodcastDescriptionForChapters', () => { ] }, { + // Example: https://podcasts.apple.com/us/podcast/xboxs-big-helix-reveal-witcher-4-path-tracing-crimson/id1596728253?i=1000755411491 testName: 'Should handle descriptions using unix new lines', description: `Introduction text paragraph 1 Introduction text paragraph 2 @@ -49,8 +51,7 @@ describe('parsePodcastDescriptionForChapters', () => { }, { testName: 'Should handle descriptions with no timestamps', - description: `Introduction text paragraph 1 - Introduction text paragraph 2`, + description: 'Lorem ipsum dolor sit amet consectetur adipiscing elit quisque faucibus ex sapien vitae pellentesque sem placerat in id cursus mi pretium tellus duis convallis tempus leo eu aenean sed diam urna tempor pulvinar vivamus fringilla lacus nec metus bibendum egestas.', audioDuration: 2700, expectedChapters: [] }, @@ -70,6 +71,7 @@ describe('parsePodcastDescriptionForChapters', () => { ] }, { + // Example here: https://podcasts.apple.com/gb/podcast/daniel-priestley-plumbers-will-earn-more-than-lawyers/id1291423644?i=1000755513967 testName: 'Should handle html lists and chapters with html tags in the title', description: '

    Introduction



    Chapters
    • 00:00:00 Intro
    • 00:03:55 Chapter 1
    • 00:09:52 Chapter 2
    • 00:16:11 Chapter 3
    • 00:20:03 Chapter 4
    • 00:24:08 Chapter 5
    • ', audioDuration: 4000, @@ -132,6 +134,13 @@ describe('parsePodcastDescriptionForChapters', () => { description: '

      Introduction text paragraph 1

      Introduction text paragraph 2

      00:48 Chatper 1

      00:30:00

      ', audioDuration: 3600, expectedError: 'Unable to get chapter title from description' + }, + { + // Example here: https://podcasts.apple.com/us/podcast/is-your-personal-finance-indecision-costing-you-plus/id1256091892?i=1000636624926 + testName: 'Should throw if chapter is too long', + description: '

      01:19 Chapter 1

      10:00 Chapter 2: Lorem ipsum dolor sit amet consectetur adipiscing elit quisque faucibus ex sapien vitae pellentesque sem placerat in id cursus mi pretium tellus duis convallis tempus leo eu aenean sed diam urna tempor pulvinar vivamus fringilla>

      ', + audioDuration: 3600, + expectedError: 'Chapter title too long, possible parsing falure' } ] testCasesTestingFailure.forEach(function (testCase) { From 8710816a6fe6a4dcb2b5f892ec6c03c4d095fe5b Mon Sep 17 00:00:00 2001 From: Harry Rose Date: Tue, 17 Mar 2026 18:59:12 +0000 Subject: [PATCH 13/14] Remove autoGenerateChapters flag, migration and version bump --- client/package.json | 2 +- package.json | 2 +- server/managers/PodcastManager.js | 2 +- ...34.0-add-auto-generate-podcast-chapters.js | 83 ------------------- server/models/Podcast.js | 11 --- server/models/PodcastEpisode.js | 7 +- .../parsePodcastDescriptionForChapters.js | 2 - ...add-auto-generate-podcast-chapters.test.js | 64 -------------- 8 files changed, 6 insertions(+), 167 deletions(-) delete mode 100644 server/migrations/v2.34.0-add-auto-generate-podcast-chapters.js delete mode 100644 test/server/migrations/v2.34.0-add-auto-generate-podcast-chapters.test.js diff --git a/client/package.json b/client/package.json index dd0f3a0c..a1503a50 100644 --- a/client/package.json +++ b/client/package.json @@ -1,6 +1,6 @@ { "name": "audiobookshelf-client", - "version": "2.34.0", + "version": "2.33.0", "buildNumber": 1, "description": "Self-hosted audiobook and podcast client", "main": "index.js", diff --git a/package.json b/package.json index 10ba26f6..3108b517 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "audiobookshelf", - "version": "2.34.0", + "version": "2.33.0", "buildNumber": 1, "description": "Self-hosted audiobook and podcast server", "main": "index.js", diff --git a/server/managers/PodcastManager.js b/server/managers/PodcastManager.js index 82e0dbd3..bdf6fc76 100644 --- a/server/managers/PodcastManager.js +++ b/server/managers/PodcastManager.js @@ -204,7 +204,7 @@ class PodcastManager { return false } - const podcastEpisode = await Database.podcastEpisodeModel.createFromRssPodcastEpisode(this.currentDownload.rssPodcastEpisode, libraryItem.media.id, libraryItem.media.autoGenerateChapters, audioFile) + const podcastEpisode = await Database.podcastEpisodeModel.createFromRssPodcastEpisode(this.currentDownload.rssPodcastEpisode, libraryItem.media.id, audioFile) libraryItem.libraryFiles.push(libraryFile.toJSON()) // Re-calculating library item size because this wasnt being updated properly for podcasts in v2.20.0 and below diff --git a/server/migrations/v2.34.0-add-auto-generate-podcast-chapters.js b/server/migrations/v2.34.0-add-auto-generate-podcast-chapters.js deleted file mode 100644 index c5567abe..00000000 --- a/server/migrations/v2.34.0-add-auto-generate-podcast-chapters.js +++ /dev/null @@ -1,83 +0,0 @@ -const util = require('util') - -/** - * @typedef MigrationContext - * @property {import('sequelize').QueryInterface} queryInterface - a suquelize QueryInterface object. - * @property {import('../Logger')} logger - a Logger object. - * - * @typedef MigrationOptions - * @property {MigrationContext} context - an object containing the migration context. - */ - -const migrationVersion = '2.34.0' -const migrationName = `${migrationVersion}-add-auto-generate-podcast-chapters` -const loggerPrefix = `[${migrationVersion} migration]` - -/** - * This upward migration adds a boolean autoGenerateChapters column to the podcasts table and defaults it to false. - * - * @param {MigrationOptions} options - an object containing the migration context. - * @returns {Promise} - A promise that resolves when the migration is complete. - */ -async function up({ context: { queryInterface, logger } }) { - logger.info(`${loggerPrefix} UPGRADE BEGIN: ${migrationName}`) - - await addColumn(queryInterface, logger, 'podcasts', 'autoGenerateChapters', { type: queryInterface.sequelize.Sequelize.BOOLEAN, allowNull: false, defaultValue: false }) - - logger.info(`${loggerPrefix} UPGRADE END: ${migrationName}`) -} - -/** - * This downward migration removes the autoGenerateChapters column on the podcasts table, - * - * @param {MigrationOptions} options - an object containing the migration context. - * @returns {Promise} - A promise that resolves when the migration is complete. - */ -async function down({ context: { queryInterface, logger } }) { - logger.info(`${loggerPrefix} DOWNGRADE BEGIN: ${migrationName}`) - - await removeColumn(queryInterface, logger, 'podcasts', 'autoGenerateChapters') - - logger.info(`${loggerPrefix} DOWNGRADE END: ${migrationName}`) -} - -/** - * Utility function to add a column to a table. If the column already exists, it logs a message and continues. - * - * @param {import('sequelize').QueryInterface} queryInterface - a suquelize QueryInterface object. - * @param {import('../Logger')} logger - a Logger object. - * @param {string} table - the name of the table to add the column to. - * @param {string} column - the name of the column to add. - * @param {Object} options - the options for the column. - */ -async function addColumn(queryInterface, logger, table, column, options) { - logger.info(`${loggerPrefix} adding column "${column}" to table "${table}"`) - const tableDescription = await queryInterface.describeTable(table) - if (!tableDescription[column]) { - await queryInterface.addColumn(table, column, options) - logger.info(`${loggerPrefix} added column "${column}" to table "${table}"`) - } else { - logger.info(`${loggerPrefix} column "${column}" already exists in table "${table}"`) - } -} - -/** - * Utility function to remove a column from a table. If the column does not exist, it logs a message and continues. - * - * @param {import('sequelize').QueryInterface} queryInterface - a suquelize QueryInterface object. - * @param {import('../Logger')} logger - a Logger object. - * @param {string} table - the name of the table to remove the column from. - * @param {string} column - the name of the column to remove. - */ -async function removeColumn(queryInterface, logger, table, column) { - logger.info(`${loggerPrefix} removing column "${column}" from table "${table}"`) - const tableDescription = await queryInterface.describeTable(table) - if (tableDescription[column]) { - await queryInterface.sequelize.query(`ALTER TABLE ${table} DROP COLUMN ${column}`) - logger.info(`${loggerPrefix} removed column "${column}" from table "${table}"`) - } else { - logger.info(`${loggerPrefix} column "${column}" does not exist in table "${table}"`) - } -} - -module.exports = { up, down } diff --git a/server/models/Podcast.js b/server/models/Podcast.js index bb0e0453..a96e1dd0 100644 --- a/server/models/Podcast.js +++ b/server/models/Podcast.js @@ -53,8 +53,6 @@ class Podcast extends Model { this.maxEpisodesToKeep /** @type {number} */ this.maxNewEpisodesToDownload - /** @type {boolean} */ - this.autoGenerateChapters /** @type {string} */ this.coverPath /** @type {string[]} */ @@ -108,7 +106,6 @@ class Podcast extends Model { explicit: !!payload.metadata.explicit, autoDownloadEpisodes: !!payload.autoDownloadEpisodes, autoDownloadSchedule: autoDownloadSchedule || global.ServerSettings.podcastEpisodeSchedule, - autoGenerateChapters: !!payload.autoGenerateChapters, lastEpisodeCheck: new Date(), maxEpisodesToKeep: 0, maxNewEpisodesToDownload: 3, @@ -148,7 +145,6 @@ class Podcast extends Model { autoDownloadEpisodes: DataTypes.BOOLEAN, autoDownloadSchedule: DataTypes.STRING, lastEpisodeCheck: DataTypes.DATE, - autoGenerateChapters: DataTypes.BOOLEAN, maxEpisodesToKeep: DataTypes.INTEGER, maxNewEpisodesToDownload: DataTypes.INTEGER, coverPath: DataTypes.STRING, @@ -277,10 +273,6 @@ class Podcast extends Model { this.autoDownloadSchedule = payload.autoDownloadSchedule hasUpdates = true } - if (payload.autoGenerateChapters !== undefined && payload.autoGenerateChapters !== this.autoGenerateChapters) { - this.autoGenerateChapters = !!payload.autoGenerateChapters - hasUpdates = true - } if (typeof payload.lastEpisodeCheck === 'number' && payload.lastEpisodeCheck !== this.lastEpisodeCheck?.valueOf()) { this.lastEpisodeCheck = payload.lastEpisodeCheck hasUpdates = true @@ -449,7 +441,6 @@ class Podcast extends Model { autoDownloadEpisodes: this.autoDownloadEpisodes, autoDownloadSchedule: this.autoDownloadSchedule, lastEpisodeCheck: this.lastEpisodeCheck?.valueOf() || null, - autoGenerateChapters: this.autoGenerateChapters, maxEpisodesToKeep: this.maxEpisodesToKeep, maxNewEpisodesToDownload: this.maxNewEpisodesToDownload } @@ -466,7 +457,6 @@ class Podcast extends Model { autoDownloadEpisodes: this.autoDownloadEpisodes, autoDownloadSchedule: this.autoDownloadSchedule, lastEpisodeCheck: this.lastEpisodeCheck?.valueOf() || null, - autoGenerateChapters: this.autoGenerateChapters, maxEpisodesToKeep: this.maxEpisodesToKeep, maxNewEpisodesToDownload: this.maxNewEpisodesToDownload, size: this.size @@ -491,7 +481,6 @@ class Podcast extends Model { autoDownloadEpisodes: this.autoDownloadEpisodes, autoDownloadSchedule: this.autoDownloadSchedule, lastEpisodeCheck: this.lastEpisodeCheck?.valueOf() || null, - autoGenerateChapters: this.autoGenerateChapters, maxEpisodesToKeep: this.maxEpisodesToKeep, maxNewEpisodesToDownload: this.maxNewEpisodesToDownload, size: this.size diff --git a/server/models/PodcastEpisode.js b/server/models/PodcastEpisode.js index c253f479..7c107a87 100644 --- a/server/models/PodcastEpisode.js +++ b/server/models/PodcastEpisode.js @@ -58,10 +58,9 @@ class PodcastEpisode extends Model { * * @param {import('../utils/podcastUtils').RssPodcastEpisode} rssPodcastEpisode * @param {string} podcastId - * @param {boolean} autoGenerateChapters * @param {import('../objects/files/AudioFile')} audioFile */ - static async createFromRssPodcastEpisode(rssPodcastEpisode, podcastId, autoGenerateChapters, audioFile) { + static async createFromRssPodcastEpisode(rssPodcastEpisode, podcastId, audioFile) { const podcastEpisode = { index: null, season: rssPodcastEpisode.season, @@ -88,8 +87,8 @@ class PodcastEpisode extends Model { podcastEpisode.chapters = audioFile.chapters.map((ch) => ({ ...ch })) } else if (rssPodcastEpisode.chapters?.length) { podcastEpisode.chapters = rssPodcastEpisode.chapters.map((ch) => ({ ...ch })) - } else if (autoGenerateChapters) { - Logger.info("[PodcastEpisode] New episode doesn't have chapters, attempting to generate them from timestamps", rssPodcastEpisode.title) + } else { + Logger.debug("[PodcastEpisode] New episode doesn't have chapters, attempting to generate them from timestamps", rssPodcastEpisode.title) try { podcastEpisode.chapters = parsePodcastDescriptionForChapters.parse(podcastEpisode.description, podcastEpisode.audioFile.duration) } catch (error) { diff --git a/server/utils/parsers/parsePodcastDescriptionForChapters.js b/server/utils/parsers/parsePodcastDescriptionForChapters.js index 3f90e847..53585d64 100644 --- a/server/utils/parsers/parsePodcastDescriptionForChapters.js +++ b/server/utils/parsers/parsePodcastDescriptionForChapters.js @@ -26,8 +26,6 @@ module.exports.parse = (podcastDescription, audioDurationSecs) => { throw new Error('Audio duration must not be null') } - Logger.info('Description!', podcastDescription) - // This number is arbitrary, but there have been examples where descriptions of the chapter are on the same line as the chapter title // This results in a unpleasant UX where the chapter is very long, it's also possible that an overly long chapter title is the result of a parsing failure const maxChapterTitleLength = 200 diff --git a/test/server/migrations/v2.34.0-add-auto-generate-podcast-chapters.test.js b/test/server/migrations/v2.34.0-add-auto-generate-podcast-chapters.test.js deleted file mode 100644 index 04822be4..00000000 --- a/test/server/migrations/v2.34.0-add-auto-generate-podcast-chapters.test.js +++ /dev/null @@ -1,64 +0,0 @@ -const chai = require('chai') -const sinon = require('sinon') -const { expect } = chai - -const { DataTypes, Sequelize } = require('sequelize') -const Logger = require('../../../server/Logger') - -const { up, down } = require('../../../server/migrations/v2.34.0-add-auto-generate-podcast-chapters') - -describe('Migration v2.34.0-add-auto-generate-podcast-chapters', () => { - let sequelize - let queryInterface - let loggerInfoStub - - beforeEach(async () => { - sequelize = new Sequelize({ dialect: 'sqlite', storage: ':memory:', logging: false }) - queryInterface = sequelize.getQueryInterface() - loggerInfoStub = sinon.stub(Logger, 'info') - - await queryInterface.createTable('podcasts', { - id: { type: DataTypes.INTEGER, allowNull: false, primaryKey: true, unique: true }, - title: { type: DataTypes.STRING, allowNull: false }, - titleIgnorePrefix: { type: DataTypes.STRING, allowNull: false } - }) - - await queryInterface.bulkInsert('podcasts', [ - { id: 1, title: 'The Podcast 1', titleIgnorePrefix: 'Podcast 1, The' }, - { id: 2, title: 'The Podcast 2', titleIgnorePrefix: 'Podcast 2, The' } - ]) - }) - - afterEach(() => { - sinon.restore() - }) - - describe('up', () => { - it('should add autoGenerateChapters column to podcasts', async () => { - await up({ context: { queryInterface, logger: Logger } }) - - const [podcasts] = await queryInterface.sequelize.query('SELECT * FROM podcasts') - expect(podcasts).to.deep.equal([ - { id: 1, autoGenerateChapters: 0, title: 'The Podcast 1', titleIgnorePrefix: 'Podcast 1, The' }, - { id: 2, autoGenerateChapters: 0, title: 'The Podcast 2', titleIgnorePrefix: 'Podcast 2, The' } - ]) - }) - }) - - describe('down', () => { - it('should remove autoGenerateChapters column from podcasts', async () => { - await up({ context: { queryInterface, logger: Logger } }) - try { - await down({ context: { queryInterface, logger: Logger } }) - } catch (error) { - console.log(error) - } - - const [podcasts] = await queryInterface.sequelize.query('SELECT * FROM podcasts') - expect(podcasts).to.deep.equal([ - { id: 1, title: 'The Podcast 1', titleIgnorePrefix: 'Podcast 1, The' }, - { id: 2, title: 'The Podcast 2', titleIgnorePrefix: 'Podcast 2, The' } - ]) - }) - }) -}) From 7f88d4b0369fa8f5fb4bd51e5a2abc9a9de7b0b1 Mon Sep 17 00:00:00 2001 From: Harry Rose Date: Tue, 17 Mar 2026 19:43:09 +0000 Subject: [PATCH 14/14] Early out if the description doesn't contain and timestamps --- server/models/PodcastEpisode.js | 6 +++++- .../parsers/parsePodcastDescriptionForChapters.js | 8 ++++++-- .../parsePodcastDescriptionForChapters.test.js | 13 +++++++++++++ 3 files changed, 24 insertions(+), 3 deletions(-) diff --git a/server/models/PodcastEpisode.js b/server/models/PodcastEpisode.js index 7c107a87..fdef2c50 100644 --- a/server/models/PodcastEpisode.js +++ b/server/models/PodcastEpisode.js @@ -91,8 +91,12 @@ class PodcastEpisode extends Model { Logger.debug("[PodcastEpisode] New episode doesn't have chapters, attempting to generate them from timestamps", rssPodcastEpisode.title) try { podcastEpisode.chapters = parsePodcastDescriptionForChapters.parse(podcastEpisode.description, podcastEpisode.audioFile.duration) + + if (podcastEpisode.chapters.length > 0) { + Logger.info(`[PodcastEpisode] Successfully generated ${podcastEpisode.chapters.length} chapters`) + } } catch (error) { - Logger.error(`[PodcastEpisode] createFromRssPodcastEpisode: Failed to auto generate chapters for "${podcastEpisode.title}"`, error) + Logger.error(`[PodcastEpisode] createFromRssPodcastEpisode: Failed to generate chapters for "${podcastEpisode.title}"`, error) } } diff --git a/server/utils/parsers/parsePodcastDescriptionForChapters.js b/server/utils/parsers/parsePodcastDescriptionForChapters.js index 53585d64..1fa59f83 100644 --- a/server/utils/parsers/parsePodcastDescriptionForChapters.js +++ b/server/utils/parsers/parsePodcastDescriptionForChapters.js @@ -36,6 +36,12 @@ module.exports.parse = (podcastDescription, audioDurationSecs) => { // Split on "

      ", "
      ", "\n", const descriptionLineSplitRegex = /\<\s*\/\s*p\s*\>|\<\s*br\s*\/\>|\n|\<\s*\/\s*li\s*\>/ + // Early out if there aren't any timestamps in the entire description + if (timestampRegex.exec(podcastDescription) == null) { + Logger.debug('No timestamps found in description, bailing out early') + return [] + } + var descriptionLines = podcastDescription.split(descriptionLineSplitRegex) var newChapters = [] @@ -98,8 +104,6 @@ module.exports.parse = (podcastDescription, audioDurationSecs) => { newChapters[newChapters.length - 1].end = audioDurationSecs } - Logger.info(`Successfully generated ${newChapters.length} chapters`) - if (newChapters.length == 1) { throw new Error('Only one chapter found, treating as invalid description') } diff --git a/test/server/utils/parsers/parsePodcastDescriptionForChapters.test.js b/test/server/utils/parsers/parsePodcastDescriptionForChapters.test.js index 24ca3f0a..c4765415 100644 --- a/test/server/utils/parsers/parsePodcastDescriptionForChapters.test.js +++ b/test/server/utils/parsers/parsePodcastDescriptionForChapters.test.js @@ -1,8 +1,21 @@ const chai = require('chai') const expect = chai.expect const parsePodcastDescriptionForChapters = require('../../../../server/utils/parsers/parsePodcastDescriptionForChapters') +const sinon = require('sinon') +const Logger = require('../../../../server/Logger') describe('parsePodcastDescriptionForChapters', () => { + it("should early out if description doens't contain timestamps", () => { + let loggerDebugStub = sinon.stub(Logger, 'debug') + let description = '

      Introduction text paragraph 1

      Introduction text paragraph 2

      ' + let chapters = parsePodcastDescriptionForChapters.parse(description, 1000) + + expect(chapters).to.be.empty + expect(loggerDebugStub.calledWith('No timestamps found in description, bailing out early')).to.be.true + + sinon.restore() + }) + var testCasesTestingSuccess = [ { testName: 'Should handle descriptions using html paragraphs',