diff --git a/server/scanner/AudioFileScanner.js b/server/scanner/AudioFileScanner.js index 2ba4230d..8b875b33 100644 --- a/server/scanner/AudioFileScanner.js +++ b/server/scanner/AudioFileScanner.js @@ -5,6 +5,7 @@ const { LogLevel } = require('../utils/constants') const { parseOverdriveMediaMarkersAsChapters } = require('../utils/parsers/parseOverdriveMediaMarkers') const parseNameString = require('../utils/parsers/parseNameString') const parseSeriesString = require('../utils/parsers/parseSeriesString') +const parseDate = require('../utils/parsers/parseDate') const LibraryItem = require('../models/LibraryItem') const AudioFile = require('../objects/files/AudioFile') @@ -431,6 +432,10 @@ class AudioFileScanner { tag: 'tagDate', key: 'pubDate' }, + { + tag: 'tagDate', + key: 'date' + }, { tag: 'tagDisc', key: 'season' @@ -462,9 +467,9 @@ class AudioFileScanner { if (value && typeof value === 'string') { value = value.trim() // Trim whitespace - if (mapping.key === 'pubDate') { - const pubJsDate = new Date(value) - if (pubJsDate && !isNaN(pubJsDate)) { + if ((mapping.key === 'pubDate' || mapping.key === 'date') && !podcastEpisode.pubDate) { + const pubJsDate = parseDate.parse(value) + if (pubJsDate) { podcastEpisode.publishedAt = pubJsDate.valueOf() podcastEpisode.pubDate = value scanLogger.addLog(LogLevel.DEBUG, `Mapping metadata to key ${tagToUse} => ${mapping.key}: ${podcastEpisode[mapping.key]}`) diff --git a/server/utils/parsers/parseDate.js b/server/utils/parsers/parseDate.js new file mode 100644 index 00000000..b43db38f --- /dev/null +++ b/server/utils/parsers/parseDate.js @@ -0,0 +1,54 @@ +/** + * Parse a date string with multiple fallback formats + * + * @example + * parse('2024-01-15') => Date object for Jan 15, 2024 + * parse('20240325') => Date object for Mar 25, 2024 + * parse('250312') => Date object for Mar 12, 2025 (year <= 50 as 20xx) + * parse('750312') => Date object for Mar 12, 1975 (year > 50 as 19xx) + * + * @param {string} dateString The date string to parse + * @returns {Date|null} Date object or null if unparseable + */ +function parseDate(dateString) { + if (!dateString || typeof dateString !== 'string') return null + + const date = new Date(dateString) + if (!isNaN(date.getTime())) { + const year = date.getFullYear() + if (year >= 0 && year <= 9999) { + return date + } + } + + const yyyymmddMatch = dateString.match(/^(\d{4})(\d{2})(\d{2})$/) + if (yyyymmddMatch) { + const [, year, month, day] = yyyymmddMatch + const monthNum = parseInt(month) + const dayNum = parseInt(day) + if (monthNum >= 1 && monthNum <= 12 && dayNum >= 1 && dayNum <= 31) { + const parsedDate = new Date(parseInt(year), monthNum - 1, dayNum) + if (!isNaN(parsedDate.getTime())) { + return parsedDate + } + } + } + + const yymmddMatch = dateString.match(/^(\d{2})(\d{2})(\d{2})$/) + if (yymmddMatch) { + const [, year, month, day] = yymmddMatch + const monthNum = parseInt(month) + const dayNum = parseInt(day) + if (monthNum >= 1 && monthNum <= 12 && dayNum >= 1 && dayNum <= 31) { + const fullYear = parseInt(year) > 50 ? 1900 + parseInt(year) : 2000 + parseInt(year) + const parsedDate = new Date(fullYear, monthNum - 1, dayNum) + if (!isNaN(parsedDate.getTime())) { + return parsedDate + } + } + } + + return null +} + +module.exports.parse = parseDate diff --git a/server/utils/podcastUtils.js b/server/utils/podcastUtils.js index 1cb0c4cb..7ea4fc57 100644 --- a/server/utils/podcastUtils.js +++ b/server/utils/podcastUtils.js @@ -4,6 +4,7 @@ const Logger = require('../Logger') const { xmlToJSON, timestampToSeconds } = require('./index') const htmlSanitizer = require('../utils/htmlSanitizer') const Fuse = require('../libs/fusejs') +const parseDate = require('../utils/parsers/parseDate') /** * @typedef RssPodcastChapter @@ -147,6 +148,12 @@ function extractPodcastMetadata(channel) { if (value?.['_']) value = value['_'] metadata[cleanKey] = value }) + + if (!metadata['pubDate']) { + const dateVal = extractFirstArrayItem(channel, 'date') + if (dateVal) metadata['pubDate'] = dateVal + } + return metadata } @@ -198,6 +205,15 @@ function extractEpisodeData(item) { } else { Logger.error(`[podcastUtils] Invalid pubDate ${item['pubDate']} for ${episode.enclosure.url}`) } + } else if (item['date']) { + const date = extractFirstArrayItem(item, 'date') + if (typeof date === 'string') { + episode.pubDate = date + } else if (typeof date?._ === 'string') { + episode.pubDate = date._ + } else { + Logger.error(`[podcastUtils] Invalid date ${item['date']} for ${episode.enclosure.url}`) + } } if (item['guid']) { @@ -265,7 +281,7 @@ function extractEpisodeData(item) { } function cleanEpisodeData(data) { - const pubJsDate = data.pubDate ? new Date(data.pubDate) : null + const pubJsDate = data.pubDate ? parseDate.parse(data.pubDate) : null const publishedAt = pubJsDate && !isNaN(pubJsDate) ? pubJsDate.valueOf() : null return { diff --git a/test/server/utils/parsers/parseDate.test.js b/test/server/utils/parsers/parseDate.test.js new file mode 100644 index 00000000..b37ef8e0 --- /dev/null +++ b/test/server/utils/parsers/parseDate.test.js @@ -0,0 +1,109 @@ +const chai = require('chai') +const expect = chai.expect +const { parse } = require('../../../../server/utils/parsers/parseDate') + +describe('parseDate', () => { + describe('parse', () => { + it('returns null for empty input', () => { + expect(parse('')).to.be.null + expect(parse(null)).to.be.null + expect(parse(undefined)).to.be.null + }) + + it('returns null for non-string input', () => { + expect(parse(20250101)).to.be.null + expect(parse({})).to.be.null + expect(parse([])).to.be.null + }) + + it('parses ISO 8601 date string with new Date()', () => { + const result = parse('2024-01-15') + expect(result).to.be.instanceOf(Date) + expect(result.getFullYear()).to.equal(2024) + expect(result.getMonth()).to.equal(0) + expect(result.getDate()).to.equal(15) + }) + + it('parses date string with time using new Date()', () => { + const result = parse('2024-06-20T14:30:00Z') + expect(result).to.be.instanceOf(Date) + expect(result.getFullYear()).to.equal(2024) + expect(result.getMonth()).to.equal(5) + expect(result.getDate()).to.equal(20) + }) + + it('parses YYYYMMDD format when new Date() fails', () => { + const result = parse('20240325') + expect(result).to.be.instanceOf(Date) + expect(result.getFullYear()).to.equal(2024) + expect(result.getMonth()).to.equal(2) + expect(result.getDate()).to.equal(25) + }) + + it('parses YYYYMMDD format with leading zeros', () => { + const result = parse('20240105') + expect(result).to.be.instanceOf(Date) + expect(result.getFullYear()).to.equal(2024) + expect(result.getMonth()).to.equal(0) + expect(result.getDate()).to.equal(5) + }) + + it('parses YYMMDD format (2-digit year > 50 as 19xx)', () => { + const result = parse('750312') + expect(result).to.be.instanceOf(Date) + expect(result.getFullYear()).to.equal(1975) + expect(result.getMonth()).to.equal(2) + expect(result.getDate()).to.equal(12) + }) + + it('parses YYMMDD format (2-digit year <= 50 as 20xx)', () => { + const result = parse('250312') + expect(result).to.be.instanceOf(Date) + expect(result.getFullYear()).to.equal(2025) + expect(result.getMonth()).to.equal(2) + expect(result.getDate()).to.equal(12) + }) + + it('parses YYMMDD format with leading zeros', () => { + const result = parse('990105') + expect(result).to.be.instanceOf(Date) + expect(result.getFullYear()).to.equal(1999) + expect(result.getMonth()).to.equal(0) + expect(result.getDate()).to.equal(5) + }) + + it('prefers new Date() parsing over YYYYMMDD when both could work', () => { + const result = parse('2024-01-15') + expect(result).to.be.instanceOf(Date) + expect(result.getFullYear()).to.equal(2024) + }) + + it('rejects month overflow in YYYYMMDD', () => { + expect(parse('20241301')).to.be.null + }) + + it('rejects day overflow in YYYYMMDD', () => { + expect(parse('20240235')).to.be.null + }) + + it('returns null for invalid strings', () => { + expect(parse('not a date')).to.be.null + expect(parse('hello world')).to.be.null + expect(parse('202401')).to.be.null + }) + + it('returns null for YYYYMMDD with non-digits', () => { + expect(parse('2024-01-15')).to.be.instanceOf(Date) + expect(parse('2024/01/15')).to.be.instanceOf(Date) + expect(parse('2024.01.15')).to.be.instanceOf(Date) + }) + + it('parses 4-digit year string as year', () => { + const result = parse('2024') + expect(result).to.be.instanceOf(Date) + expect(result.getFullYear()).to.equal(2024) + expect(result.getMonth()).to.equal(0) + expect(result.getDate()).to.equal(1) + }) + }) +})