diff --git a/server/utils/parsers/parsePodcastDescriptionForChapters.js b/server/utils/parsers/parsePodcastDescriptionForChapters.js index 8a83f149..3f90e847 100644 --- a/server/utils/parsers/parsePodcastDescriptionForChapters.js +++ b/server/utils/parsers/parsePodcastDescriptionForChapters.js @@ -26,6 +26,12 @@ module.exports.parse = (podcastDescription, audioDurationSecs) => { throw new Error('Audio duration must not be null') } + Logger.info('Description!', podcastDescription) + + // This number is arbitrary, but there have been examples where descriptions of the chapter are on the same line as the chapter title + // This results in a unpleasant UX where the chapter is very long, it's also possible that an overly long chapter title is the result of a parsing failure + const maxChapterTitleLength = 200 + const timestampRegex = /\b(\d{1,2}):(\d{1,2})(?::(\d{1,2}))?\b/ const chapterTitleRegex = /\b\d{1,2}:\d{1,2}(?::\d{1,2})?\b(?:\s+|\))(.+)$/ @@ -77,7 +83,12 @@ module.exports.parse = (podcastDescription, audioDurationSecs) => { throw new Error(`Unable to get chapter title from description, line ${line}`) } - let chapter = { title: chapterTitleMatch[1].trim(), id: newChapters.length + 1, start: startTime } + let chapterTitle = chapterTitleMatch[1].trim() + if (chapterTitle.length > maxChapterTitleLength) { + throw new Error(`Chapter title too long, possible parsing falure, line ${line}`) + } + + let chapter = { title: chapterTitle, id: newChapters.length + 1, start: startTime } if (newChapters.length > 0) { newChapters[newChapters.length - 1].end = startTime diff --git a/test/server/utils/parsers/parsePodcastDescriptionForChapters.test.js b/test/server/utils/parsers/parsePodcastDescriptionForChapters.test.js index 662c131d..24ca3f0a 100644 --- a/test/server/utils/parsers/parsePodcastDescriptionForChapters.test.js +++ b/test/server/utils/parsers/parsePodcastDescriptionForChapters.test.js @@ -20,6 +20,7 @@ describe('parsePodcastDescriptionForChapters', () => { ] }, { + // Example: https://podcasts.apple.com/us/podcast/giant-bombcast-931-bleepbloop-remote/id274450056?i=1000754550540 testName: 'Should handle descriptions using html line breaks', description: '
Introduction text paragraph 1

Introduction text paragraph 2

0:00:00 Chapter 1
0:17:05 Chapter 2
0:33:58 Chapter 3
0:40:35 Chapter 4
Unrelated outro line
', audioDuration: 2700, @@ -31,6 +32,7 @@ describe('parsePodcastDescriptionForChapters', () => { ] }, { + // Example: https://podcasts.apple.com/us/podcast/xboxs-big-helix-reveal-witcher-4-path-tracing-crimson/id1596728253?i=1000755411491 testName: 'Should handle descriptions using unix new lines', description: `Introduction text paragraph 1 Introduction text paragraph 2 @@ -49,8 +51,7 @@ describe('parsePodcastDescriptionForChapters', () => { }, { testName: 'Should handle descriptions with no timestamps', - description: `Introduction text paragraph 1 - Introduction text paragraph 2`, + description: 'Lorem ipsum dolor sit amet consectetur adipiscing elit quisque faucibus ex sapien vitae pellentesque sem placerat in id cursus mi pretium tellus duis convallis tempus leo eu aenean sed diam urna tempor pulvinar vivamus fringilla lacus nec metus bibendum egestas.', audioDuration: 2700, expectedChapters: [] }, @@ -70,6 +71,7 @@ describe('parsePodcastDescriptionForChapters', () => { ] }, { + // Example here: https://podcasts.apple.com/gb/podcast/daniel-priestley-plumbers-will-earn-more-than-lawyers/id1291423644?i=1000755513967 testName: 'Should handle html lists and chapters with html tags in the title', description: '

Introduction



Chapters