Improved subtitle parsing to account for bare colon in title

This commit is contained in:
Khashayar Toodehfallah 2026-02-06 14:31:09 -05:00
parent fe13456a2b
commit c15cb48def
4 changed files with 22 additions and 5 deletions

View file

@ -227,7 +227,7 @@ class BookFinder {
title = this.#removeAuthorFromTitle(title)
const titleTransformers = [
[/([,:;_]| by ).*/g, ''], // Remove subtitle
[/(: |[,;_]| by ).*/g, ''], // Remove subtitle
[/(^| )\d+k(bps)?( |$)/, ' '], // Remove bitrate
[/ (2nd|3rd|\d+th)\s+ed(\.|ition)?/g, ''], // Remove edition
[/(^| |\.)(m4b|m4a|mp3)( |$)/g, ''], // Remove file-type
@ -646,11 +646,11 @@ class BookFinder {
module.exports = new BookFinder()
function hasSubtitle(title) {
return title.includes(':') || title.includes(' - ')
return title.includes(': ') || title.includes(' - ')
}
function stripSubtitle(title) {
if (title.includes(':')) {
return title.split(':')[0].trim()
if (title.includes(': ')) {
return title.split(': ')[0].trim()
} else if (title.includes(' - ')) {
return title.split(' - ')[0].trim()
}

View file

@ -22,7 +22,7 @@ function parseNfoMetadata(nfoText) {
switch (key) {
case 'title':
{
const titleMatch = value.match(/^(.*?):(.*)$/)
const titleMatch = value.match(/^(.*?): (.*)$/)
if (titleMatch) {
metadata.title = titleMatch[1].trim()
metadata.subtitle = titleMatch[2].trim()

View file

@ -35,7 +35,10 @@ describe('TitleCandidates', () => {
['adds candidate, removing author', `anna karenina by ${cleanAuthor}`, ['anna karenina']],
['does not add empty candidate after removing author', cleanAuthor, []],
['adds candidate, removing subtitle', 'anna karenina: subtitle', ['anna karenina']],
['adds candidate, not stripping subtitle for bare colon in title', '10:04', ['10:04']],
['adds candidate, not stripping subtitle for colon between words without space', 'making the mission:impossible movies', ['making the mission:impossible movies']],
['adds candidate + variant, removing "by ..."', 'anna karenina by arnold schwarzenegger', ['anna karenina', 'anna karenina by arnold schwarzenegger']],
['adds candidate + variant, removing "by ..." when title has bare colon', '10:04 by ben lerner', ['10:04', '10:04 by ben lerner']],
['adds candidate + variant, removing bitrate', 'anna karenina 64kbps', ['anna karenina', 'anna karenina 64kbps']],
['adds candidate + variant, removing edition 1', 'anna karenina 2nd edition', ['anna karenina', 'anna karenina 2nd edition']],
['adds candidate + variant, removing edition 2', 'anna karenina 4th ed.', ['anna karenina', 'anna karenina 4th ed.']],

View file

@ -21,6 +21,20 @@ describe('parseNfoMetadata', () => {
expect(result.subtitle).to.equal('A Novel')
})
it('does not split title on bare colon without space', () => {
const nfoText = 'Title: 10:04'
const result = parseNfoMetadata(nfoText)
expect(result.title).to.equal('10:04')
expect(result.subtitle).to.be.undefined
})
it('does not split title on colon between words without space', () => {
const nfoText = 'Title: Making the Mission:Impossible Movies'
const result = parseNfoMetadata(nfoText)
expect(result.title).to.equal('Making the Mission:Impossible Movies')
expect(result.subtitle).to.be.undefined
})
it('parses authors', () => {
const nfoText = 'Author: F. Scott Fitzgerald'
const result = parseNfoMetadata(nfoText)