This commit is contained in:
Toni Barth 2026-02-23 19:14:14 -07:00 committed by GitHub
commit 487cf59a98
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 408 additions and 6 deletions

View file

@ -127,7 +127,7 @@ export default {
autoScanCronExpression: null,
hideSingleBookSeries: false,
onlyShowLaterBooksInContinueSeries: false,
metadataPrecedence: ['folderStructure', 'audioMetatags', 'nfoFile', 'txtFiles', 'opfFile', 'absMetadata'],
metadataPrecedence: ['folderStructure', 'audioMetatags', 'nfoFile', 'txtFiles', 'opfFile', 'daisyFile', 'absMetadata'],
markAsFinishedPercentComplete: null,
markAsFinishedTimeRemaining: 10
}

View file

@ -81,6 +81,11 @@ export default {
name: 'OPF file',
include: true
},
daisyFile: {
id: 'daisyFile',
name: 'DAISY ncc.html file',
include: true
},
absMetadata: {
id: 'absMetadata',
name: 'Audiobookshelf metadata file',
@ -157,4 +162,4 @@ export default {
this.init()
}
}
</script>
</script>

View file

@ -82,7 +82,7 @@ class Library extends Model {
}
static get defaultMetadataPrecedence() {
return ['folderStructure', 'audioMetatags', 'nfoFile', 'txtFiles', 'opfFile', 'absMetadata']
return ['folderStructure', 'audioMetatags', 'nfoFile', 'txtFiles', 'opfFile', 'daisyFile', 'absMetadata']
}
/**

View file

@ -23,6 +23,7 @@ const CoverManager = require('../managers/CoverManager')
const LibraryScan = require('./LibraryScan')
const OpfFileScanner = require('./OpfFileScanner')
const NfoFileScanner = require('./NfoFileScanner')
const DaisyFileScanner = require('./DaisyFileScanner')
const AbsMetadataFileScanner = require('./AbsMetadataFileScanner')
/**
@ -792,6 +793,14 @@ class BookScanner {
await OpfFileScanner.scanBookOpfFile(this.libraryItemData.metadataOpfLibraryFile, this.bookMetadata)
}
/**
* Metadata from DAISY ncc.html file
*/
async daisyFile() {
if (!this.libraryItemData.metadataDaisyNccLibraryFile) return
await DaisyFileScanner.scanBookDaisyFile(this.libraryItemData.metadataDaisyNccLibraryFile, this.bookMetadata, this.audioFiles)
}
/**
* Metadata from metadata.json
*/

View file

@ -0,0 +1,99 @@
const { parseDaisyMetadata } = require('../utils/parsers/parseDaisyMetadata')
const { readTextFile } = require('../utils/fileUtils')
const Path = require('path')
class DaisyFileScanner {
constructor() {}
/**
* Parse metadata from DAISY ncc.html file found in library scan and update bookMetadata
*
* @param {import('../models/LibraryItem').LibraryFileObject} daisyLibraryFileObj
* @param {Object} bookMetadata
*/
async scanBookDaisyFile(daisyLibraryFileObj, bookMetadata, audioFiles = []) {
const htmlText = await readTextFile(daisyLibraryFileObj.metadata.path)
const daisyMetadata = htmlText ? parseDaisyMetadata(htmlText) : null
if (daisyMetadata) {
for (const key in daisyMetadata) {
if (key === 'tags') {
if (daisyMetadata.tags.length) {
bookMetadata.tags = daisyMetadata.tags
}
} else if (key === 'genres') {
if (daisyMetadata.genres.length) {
bookMetadata.genres = daisyMetadata.genres
}
} else if (key === 'authors') {
if (daisyMetadata.authors?.length) {
bookMetadata.authors = daisyMetadata.authors
}
} else if (key === 'narrators') {
if (daisyMetadata.narrators?.length) {
bookMetadata.narrators = daisyMetadata.narrators
}
} else if (key === 'chapters') {
if (!daisyMetadata.chapters?.length) continue
// DAISY ncc.html provides chapter names; preserve existing timings if available.
if (bookMetadata.chapters?.length) {
const updatedChapters = bookMetadata.chapters.map((chapter, index) => {
const daisyChapter = daisyMetadata.chapters[index]
if (!daisyChapter?.title) return chapter
return {
...chapter,
id: chapter.id ?? index,
title: daisyChapter.title
}
})
bookMetadata.chapters = updatedChapters
} else {
const chaptersFromFiles = this.buildChaptersFromAudioFiles(audioFiles, daisyMetadata.chapters)
if (chaptersFromFiles.length) {
bookMetadata.chapters = chaptersFromFiles
}
}
} else if (daisyMetadata[key]) {
bookMetadata[key] = daisyMetadata[key]
}
}
}
}
/**
* Build chapter timings from ordered audio files while applying DAISY chapter titles.
* Falls back to file basenames if DAISY has fewer titles than files.
*
* @param {import('../models/Book').AudioFileObject[]} audioFiles
* @param {{title:string}[]} daisyChapters
* @returns {import('../models/Book').ChapterObject[]}
*/
buildChaptersFromAudioFiles(audioFiles, daisyChapters) {
if (!audioFiles?.length) return []
const chapters = []
let currentStartTime = 0
let chapterId = 0
audioFiles.forEach((audioFile) => {
if (!audioFile.duration) return
const fallbackTitle = audioFile.metadata?.filename
? Path.basename(audioFile.metadata.filename, Path.extname(audioFile.metadata.filename))
: `Chapter ${chapterId + 1}`
const title = daisyChapters[chapterId]?.title || fallbackTitle
chapters.push({
id: chapterId++,
start: currentStartTime,
end: currentStartTime + audioFile.duration,
title
})
currentStartTime += audioFile.duration
})
return chapters
}
}
module.exports = new DaisyFileScanner()

View file

@ -173,6 +173,11 @@ class LibraryItemScanData {
return this.libraryFiles.find(lf => lf.metadata.ext.toLowerCase() === '.nfo')
}
/** @type {LibraryItem.LibraryFileObject} */
get metadataDaisyNccLibraryFile() {
return this.libraryFiles.find(lf => lf.metadata.filename?.toLowerCase() === 'ncc.html')
}
/**
*
* @param {LibraryItem} existingLibraryItem
@ -374,4 +379,4 @@ class LibraryItemScanData {
}
}
}
module.exports = LibraryItemScanData
module.exports = LibraryItemScanData

View file

@ -0,0 +1,177 @@
const h = require('htmlparser2')
const parseNameString = require('./parseNameString')
function getValues(metaTags, tagName) {
return metaTags[tagName]?.filter((v) => v) || []
}
function getFirstValue(metaTags, tagNames) {
for (const tagName of tagNames) {
const values = getValues(metaTags, tagName)
if (values.length) return values[0]
}
return null
}
function parseNameValues(values) {
const names = []
values.forEach((value) => {
const parsedNames = parseNameString.parse(value)?.names || value.split(/\s*;\s*/).filter((n) => n)
parsedNames.forEach((name) => {
if (!names.includes(name)) names.push(name)
})
})
return names
}
function parseStringList(values) {
const items = []
values.forEach((value) => {
value.split(/\s*[;,]\s*/).forEach((item) => {
if (item && !items.includes(item)) {
items.push(item)
}
})
})
return items
}
function extractYear(str) {
if (!str) return null
const match = str.match(/\d{4}/)
return match ? match[0] : null
}
function extractIdentifierValue(identifier, identifierType) {
if (!identifier) return null
const value = identifier.trim()
const expression = identifierType === 'isbn'
? /(?:^|[^a-z0-9])(97[89][\d\- ]{9,16}[\dx]|[\d\- ]{9,14}[\dx])(?:$|[^a-z0-9])/i
: /(?:^|[^a-z0-9])([a-z0-9]{10})(?:$|[^a-z0-9])/i
const match = value.match(expression)
if (!match) return null
return (match[1] || match[0]).replace(/^[^a-z0-9]+|[^a-z0-9]+$/gi, '').trim()
}
function parseIdentifier(metaTags, identifierType) {
const typeTag = identifierType === 'isbn' ? 'dc:identifier:isbn' : 'dc:identifier:asin'
const typedIdentifier = getFirstValue(metaTags, [typeTag, identifierType])
if (typedIdentifier) {
const extracted = extractIdentifierValue(typedIdentifier, identifierType)
if (extracted) return extracted
}
const identifierValues = [
...getValues(metaTags, 'dc:identifier'),
...getValues(metaTags, 'ncc:identifier'),
...(identifierType === 'isbn' ? getValues(metaTags, 'dc:source') : [])
]
for (const identifier of identifierValues) {
if (identifierType === 'isbn' && /isbn/i.test(identifier)) {
const extracted = extractIdentifierValue(identifier, identifierType)
if (extracted) return extracted
}
if (identifierType === 'asin' && /asin/i.test(identifier)) {
const extracted = extractIdentifierValue(identifier, identifierType)
if (extracted) return extracted
}
}
for (const identifier of identifierValues) {
const extracted = extractIdentifierValue(identifier, identifierType)
if (extracted) return extracted
}
return null
}
function parseDaisyMetadata(htmlText) {
if (!htmlText) return null
const metaTags = {}
let titleText = ''
let inTitle = false
let currentHeadingName = null
let currentHeadingText = ''
const chapterTitles = []
const parser = new h.Parser(
{
onopentag: (name, attribs) => {
if (name === 'title') {
inTitle = true
}
if (/^h[1-6]$/.test(name)) {
currentHeadingName = name
currentHeadingText = ''
}
if (name !== 'meta') return
const tagName = attribs.name?.trim().toLowerCase()
const content = attribs.content?.trim()
if (!tagName || !content) return
if (!metaTags[tagName]) metaTags[tagName] = []
metaTags[tagName].push(content)
},
ontext: (text) => {
if (inTitle) titleText += text
if (currentHeadingName) currentHeadingText += text
},
onclosetag: (name) => {
if (name === 'title') {
inTitle = false
}
if (name === currentHeadingName) {
const chapterTitle = currentHeadingText.replace(/\s+/g, ' ').trim()
if (chapterTitle) {
chapterTitles.push(chapterTitle)
}
currentHeadingName = null
currentHeadingText = ''
}
}
},
{ decodeEntities: true }
)
parser.write(htmlText)
parser.end()
const creators = parseNameValues(getValues(metaTags, 'dc:creator'))
const narrators = parseNameValues(getValues(metaTags, 'ncc:narrator'))
const subjects = parseStringList([
...getValues(metaTags, 'dc:subject'),
...getValues(metaTags, 'ncc:subject')
])
const tags = parseStringList([
...getValues(metaTags, 'ncc:keywords'),
...getValues(metaTags, 'dc:tag')
])
const metadata = {
title: getFirstValue(metaTags, ['dc:title']) || titleText.trim() || null,
authors: creators,
narrators,
publishedYear: extractYear(getFirstValue(metaTags, ['dc:date', 'ncc:revisiondate'])),
publisher: getFirstValue(metaTags, ['dc:publisher']),
description: getFirstValue(metaTags, ['dc:description']),
language: getFirstValue(metaTags, ['dc:language']),
genres: subjects,
tags,
isbn: parseIdentifier(metaTags, 'isbn'),
asin: parseIdentifier(metaTags, 'asin'),
chapters: chapterTitles.map((title) => ({ title }))
}
for (const key in metadata) {
if (metadata[key] === null) {
delete metadata[key]
}
}
return metadata
}
module.exports = { parseDaisyMetadata }

View file

@ -24,7 +24,10 @@ function isMediaFile(mediaType, ext, audiobooksOnly = false) {
return globals.SupportedAudioTypes.includes(extclean) || globals.SupportedEbookTypes.includes(extclean)
}
function isScannableNonMediaFile(ext) {
function isScannableNonMediaFile(ext, filename = '') {
const filenameLower = filename.toLowerCase()
if (filenameLower === 'ncc.html') return true
if (!ext) return false
const extclean = ext.slice(1).toLowerCase()
return globals.TextFileTypes.includes(extclean) || globals.MetadataFileTypes.includes(extclean) || globals.SupportedImageTypes.includes(extclean)
@ -58,7 +61,7 @@ function groupFileItemsIntoLibraryItemDirs(mediaType, fileItems, audiobooksOnly,
/** @type {import('./fileUtils').FilePathItem[]} */
const otherFileItems = []
itemsFiltered.forEach((item) => {
if (isMediaFile(mediaType, item.extension, audiobooksOnly) || (includeNonMediaFiles && isScannableNonMediaFile(item.extension))) {
if (isMediaFile(mediaType, item.extension, audiobooksOnly) || (includeNonMediaFiles && isScannableNonMediaFile(item.extension, item.name))) {
mediaFileItems.push(item)
} else {
otherFileItems.push(item)

View file

@ -0,0 +1,86 @@
const chai = require('chai')
const expect = chai.expect
const { parseDaisyMetadata } = require('../../../../server/utils/parsers/parseDaisyMetadata')
describe('parseDaisyMetadata', () => {
it('returns null if htmlText is empty', () => {
const result = parseDaisyMetadata('')
expect(result).to.be.null
})
it('parses common metadata values from DAISY ncc.html', () => {
const nccHtml = `
<html>
<head>
<title>Fallback Title</title>
<meta name="dc:title" content="The DAISY Book">
<meta name="dc:creator" content="Jane Doe & Richard Roe">
<meta name="ncc:narrator" content="Reader One; Reader Two">
<meta name="dc:publisher" content="Talking Books Inc">
<meta name="dc:date" content="2021-06-04">
<meta name="dc:language" content="en">
<meta name="dc:subject" content="Fiction, Mystery">
<meta name="ncc:keywords" content="audio; daisy">
<meta name="dc:identifier" content="ISBN 978-1-4028-9462-6">
<meta name="dc:identifier:asin" content="ASIN: B012345678">
</head>
</html>
`
const result = parseDaisyMetadata(nccHtml)
expect(result.title).to.equal('The DAISY Book')
expect(result.authors).to.deep.equal(['Jane Doe', 'Richard Roe'])
expect(result.narrators).to.deep.equal(['Reader One', 'Reader Two'])
expect(result.publisher).to.equal('Talking Books Inc')
expect(result.publishedYear).to.equal('2021')
expect(result.language).to.equal('en')
expect(result.genres).to.deep.equal(['Fiction', 'Mystery'])
expect(result.tags).to.deep.equal(['audio', 'daisy'])
expect(result.isbn).to.equal('978-1-4028-9462-6')
expect(result.asin).to.equal('B012345678')
})
it('falls back to title tag when dc:title is not set', () => {
const nccHtml = `
<html>
<head>
<title>Title From Head</title>
</head>
</html>
`
const result = parseDaisyMetadata(nccHtml)
expect(result.title).to.equal('Title From Head')
})
it('parses isbn from dc:source in DAISY ncc.html', () => {
const nccHtml = `
<html>
<head>
<meta name="dc:source" content="ISBN 978-0-553-38016-3">
</head>
</html>
`
const result = parseDaisyMetadata(nccHtml)
expect(result.isbn).to.equal('978-0-553-38016-3')
})
it('parses chapter names from heading entries in ncc.html', () => {
const nccHtml = `
<html>
<body>
<h1><a href="book.smil#id1">Chapter 1</a></h1>
<h2><a href="book.smil#id2">Chapter 2: The Road</a></h2>
<h3>Part 1</h3>
</body>
</html>
`
const result = parseDaisyMetadata(nccHtml)
expect(result.chapters).to.deep.equal([
{ title: 'Chapter 1' },
{ title: 'Chapter 2: The Road' },
{ title: 'Part 1' }
])
})
})

View file

@ -49,4 +49,22 @@ describe('scanUtils', async () => {
'Author/Series2/Book5/deeply/nested': ['cd 01/audiofile.mp3', 'cd 02/audiofile.mp3']
})
})
it('should include DAISY ncc.html changes when includeNonMediaFiles is enabled', async () => {
const filePath = 'Author/Book3/ncc.html'
const dirname = Path.dirname(filePath)
const fileItems = [
{
name: Path.basename(filePath),
reldirpath: dirname === '.' ? '' : dirname,
extension: Path.extname(filePath),
deep: filePath.split('/').length - 1
}
]
const libraryItemGrouping = scanUtils.groupFileItemsIntoLibraryItemDirs('book', fileItems, false, true)
expect(libraryItemGrouping).to.deep.equal({
'Author/Book3': ['ncc.html']
})
})
})