Improved subtitle parsing to account for bare colon in title

2026-07-15 05:41:34 +00:00 · 2026-02-06 14:31:09 -05:00 · 2026-02-06 14:31:09 -05:00 · c15cb48def
commit c15cb48def
parent fe13456a2b
4 changed files with 22 additions and 5 deletions
--- a/server/finders/BookFinder.js
+++ b/server/finders/BookFinder.js
@ -227,7 +227,7 @@ class BookFinder {
      title = this.#removeAuthorFromTitle(title)

      const titleTransformers = [
-        [/([,:;_]| by ).*/g, ''], // Remove subtitle
+        [/(: |[,;_]| by ).*/g, ''], // Remove subtitle
        [/(^| )\d+k(bps)?( |$)/, ' '], // Remove bitrate
        [/ (2nd|3rd|\d+th)\s+ed(\.|ition)?/g, ''], // Remove edition
        [/(^| |\.)(m4b|m4a|mp3)( |$)/g, ''], // Remove file-type
@ -646,11 +646,11 @@ class BookFinder {
 module.exports = new BookFinder()

 function hasSubtitle(title) {
-  return title.includes(':') || title.includes(' - ')
+  return title.includes(': ') || title.includes(' - ')
 }
 function stripSubtitle(title) {
-  if (title.includes(':')) {
-    return title.split(':')[0].trim()
+  if (title.includes(': ')) {
+    return title.split(': ')[0].trim()
  } else if (title.includes(' - ')) {
    return title.split(' - ')[0].trim()
  }
--- a/server/utils/parsers/parseNfoMetadata.js
+++ b/server/utils/parsers/parseNfoMetadata.js
@ -22,7 +22,7 @@ function parseNfoMetadata(nfoText) {
      switch (key) {
        case 'title':
          {
-            const titleMatch = value.match(/^(.*?):(.*)$/)
+            const titleMatch = value.match(/^(.*?): (.*)$/)
            if (titleMatch) {
              metadata.title = titleMatch[1].trim()
              metadata.subtitle = titleMatch[2].trim()
--- a/test/server/finders/BookFinder.test.js
+++ b/test/server/finders/BookFinder.test.js
@ -35,7 +35,10 @@ describe('TitleCandidates', () => {
        ['adds candidate, removing author', `anna karenina by ${cleanAuthor}`, ['anna karenina']],
        ['does not add empty candidate after removing author', cleanAuthor, []],
        ['adds candidate, removing subtitle', 'anna karenina: subtitle', ['anna karenina']],
+        ['adds candidate, not stripping subtitle for bare colon in title', '10:04', ['10:04']],
+        ['adds candidate, not stripping subtitle for colon between words without space', 'making the mission:impossible movies', ['making the mission:impossible movies']],
        ['adds candidate + variant, removing "by ..."', 'anna karenina by arnold schwarzenegger', ['anna karenina', 'anna karenina by arnold schwarzenegger']],
+        ['adds candidate + variant, removing "by ..." when title has bare colon', '10:04 by ben lerner', ['10:04', '10:04 by ben lerner']],
        ['adds candidate + variant, removing bitrate', 'anna karenina 64kbps', ['anna karenina', 'anna karenina 64kbps']],
        ['adds candidate + variant, removing edition 1', 'anna karenina 2nd edition', ['anna karenina', 'anna karenina 2nd edition']],
        ['adds candidate + variant, removing edition 2', 'anna karenina 4th ed.', ['anna karenina', 'anna karenina 4th ed.']],
--- a/test/server/utils/parsers/parseNfoMetadata.test.js
+++ b/test/server/utils/parsers/parseNfoMetadata.test.js
@ -21,6 +21,20 @@ describe('parseNfoMetadata', () => {
    expect(result.subtitle).to.equal('A Novel')
  })

+  it('does not split title on bare colon without space', () => {
+    const nfoText = 'Title: 10:04'
+    const result = parseNfoMetadata(nfoText)
+    expect(result.title).to.equal('10:04')
+    expect(result.subtitle).to.be.undefined
+  })
+
+  it('does not split title on colon between words without space', () => {
+    const nfoText = 'Title: Making the Mission:Impossible Movies'
+    const result = parseNfoMetadata(nfoText)
+    expect(result.title).to.equal('Making the Mission:Impossible Movies')
+    expect(result.subtitle).to.be.undefined
+  })
+
  it('parses authors', () => {
    const nfoText = 'Author: F. Scott Fitzgerald'
    const result = parseNfoMetadata(nfoText)