mirror of
https://github.com/advplyr/audiobookshelf.git
synced 2026-05-16 16:31:30 +00:00
Merge 95fb522e8d into 47ea6b5092
This commit is contained in:
commit
3485db8ee8
3 changed files with 291 additions and 0 deletions
|
|
@ -1,5 +1,7 @@
|
|||
const { DataTypes, Model } = require('sequelize')
|
||||
const libraryItemsPodcastFilters = require('../utils/queries/libraryItemsPodcastFilters')
|
||||
const parsePodcastDescriptionForChapters = require('../utils/parsers/parsePodcastDescriptionForChapters')
|
||||
const Logger = require('../Logger')
|
||||
/**
|
||||
* @typedef ChapterObject
|
||||
* @property {number} id
|
||||
|
|
@ -85,6 +87,17 @@ class PodcastEpisode extends Model {
|
|||
podcastEpisode.chapters = audioFile.chapters.map((ch) => ({ ...ch }))
|
||||
} else if (rssPodcastEpisode.chapters?.length) {
|
||||
podcastEpisode.chapters = rssPodcastEpisode.chapters.map((ch) => ({ ...ch }))
|
||||
} else {
|
||||
Logger.debug("[PodcastEpisode] New episode doesn't have chapters, attempting to generate them from timestamps", rssPodcastEpisode.title)
|
||||
try {
|
||||
podcastEpisode.chapters = parsePodcastDescriptionForChapters.parse(podcastEpisode.description, podcastEpisode.audioFile.duration)
|
||||
|
||||
if (podcastEpisode.chapters.length > 0) {
|
||||
Logger.info(`[PodcastEpisode] Successfully generated ${podcastEpisode.chapters.length} chapters`)
|
||||
}
|
||||
} catch (error) {
|
||||
Logger.error(`[PodcastEpisode] createFromRssPodcastEpisode: Failed to generate chapters for "${podcastEpisode.title}"`, error)
|
||||
}
|
||||
}
|
||||
|
||||
return this.create(podcastEpisode)
|
||||
|
|
|
|||
112
server/utils/parsers/parsePodcastDescriptionForChapters.js
Normal file
112
server/utils/parsers/parsePodcastDescriptionForChapters.js
Normal file
|
|
@ -0,0 +1,112 @@
|
|||
const sanitizeHtml = require('../../libs/sanitizeHtml')
|
||||
const Logger = require('../../Logger')
|
||||
|
||||
/**
|
||||
* Parse podcast descriptions for timestamps and generate chapters
|
||||
* The following formats are supports:
|
||||
*
|
||||
* MM:SS Chapter name
|
||||
* HH:MM:SS Chapter name
|
||||
* (HH:MM:SS) Chapter name
|
||||
*
|
||||
* Descriptions have to use <p>, <br> or \n to split up lines in order to be supported
|
||||
*
|
||||
* See test suite for more input examples
|
||||
*
|
||||
* @param {string} podcastDescription
|
||||
* @param {number} audioDurationSecs
|
||||
* @returns {ChapterObject[]}
|
||||
*/
|
||||
module.exports.parse = (podcastDescription, audioDurationSecs) => {
|
||||
if (podcastDescription == null) {
|
||||
throw new Error('Description must not be null')
|
||||
}
|
||||
|
||||
if (audioDurationSecs == null) {
|
||||
throw new Error('Audio duration must not be null')
|
||||
}
|
||||
|
||||
// This number is arbitrary, but there have been examples where descriptions of the chapter are on the same line as the chapter title
|
||||
// This results in a unpleasant UX where the chapter is very long, it's also possible that an overly long chapter title is the result of a parsing failure
|
||||
const maxChapterTitleLength = 200
|
||||
|
||||
const timestampRegex = /\b(\d{1,2}):(\d{1,2})(?::(\d{1,2}))?\b/
|
||||
const chapterTitleRegex = /\b\d{1,2}:\d{1,2}(?::\d{1,2})?\b(?:\s+|\))(.+)$/
|
||||
|
||||
// Split on "</p>", "<br />", "\n", </li>
|
||||
const descriptionLineSplitRegex = /\<\s*\/\s*p\s*\>|\<\s*br\s*\/\>|\n|\<\s*\/\s*li\s*\>/
|
||||
|
||||
// Early out if there aren't any timestamps in the entire description
|
||||
if (timestampRegex.exec(podcastDescription) == null) {
|
||||
Logger.debug('No timestamps found in description, bailing out early')
|
||||
return []
|
||||
}
|
||||
|
||||
var descriptionLines = podcastDescription.split(descriptionLineSplitRegex)
|
||||
var newChapters = []
|
||||
|
||||
for (let i = 0; i < descriptionLines.length; i++) {
|
||||
// Strip all HTML tags out
|
||||
let line = sanitizeHtml(descriptionLines[i], { allowedTags: [] })
|
||||
|
||||
let match = timestampRegex.exec(line)
|
||||
if (match == null) continue
|
||||
|
||||
let first = match[1]
|
||||
let second = match[2]
|
||||
let third = match[3]
|
||||
|
||||
let hours = 0
|
||||
let minutes = 0
|
||||
let seconds = 0
|
||||
|
||||
// If there's three components then we can assume its hh:mm:ss
|
||||
if (first && second && third) {
|
||||
hours = Number(first)
|
||||
minutes = Number(second)
|
||||
seconds = Number(third)
|
||||
} else if (first && second) // otherwise assume mm:ss
|
||||
{
|
||||
minutes = Number(first)
|
||||
seconds = Number(second)
|
||||
}
|
||||
|
||||
if (minutes > 59 || seconds > 59) {
|
||||
throw new Error(`Timestamp contains invalid minutes or seconds field '${minutes}::${seconds}'`)
|
||||
}
|
||||
|
||||
let startTime = seconds + minutes * 60 + hours * 60 * 60
|
||||
if (startTime > audioDurationSecs) {
|
||||
throw new Error(`Chapter found that starts after over audio duration. Duration: ${audioDurationSecs}s - Chapter start ${startTime}s`)
|
||||
}
|
||||
|
||||
let chapterTitleMatch = chapterTitleRegex.exec(line)
|
||||
|
||||
if (chapterTitleMatch == null || chapterTitleMatch.length < 2) {
|
||||
// Unknown chapter state
|
||||
throw new Error(`Unable to get chapter title from description, line ${line}`)
|
||||
}
|
||||
|
||||
let chapterTitle = chapterTitleMatch[1].trim()
|
||||
if (chapterTitle.length > maxChapterTitleLength) {
|
||||
throw new Error(`Chapter title too long, possible parsing falure, line ${line}`)
|
||||
}
|
||||
|
||||
let chapter = { title: chapterTitle, id: newChapters.length + 1, start: startTime }
|
||||
|
||||
if (newChapters.length > 0) {
|
||||
newChapters[newChapters.length - 1].end = startTime
|
||||
}
|
||||
|
||||
newChapters.push(chapter)
|
||||
}
|
||||
if (newChapters.length > 0) {
|
||||
newChapters[newChapters.length - 1].end = audioDurationSecs
|
||||
}
|
||||
|
||||
if (newChapters.length == 1) {
|
||||
throw new Error('Only one chapter found, treating as invalid description')
|
||||
}
|
||||
|
||||
return newChapters
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue