first iteration of parsing metadata and chapter names from ncc.html file

This commit is contained in:
Toni Barth 2026-02-07 16:45:40 +01:00
parent fe13456a2b
commit 6c9bf8c2bd
10 changed files with 394 additions and 6 deletions

View file

@ -0,0 +1,73 @@
const chai = require('chai')
const expect = chai.expect
const { parseDaisyMetadata } = require('../../../../server/utils/parsers/parseDaisyMetadata')
describe('parseDaisyMetadata', () => {
it('returns null if htmlText is empty', () => {
const result = parseDaisyMetadata('')
expect(result).to.be.null
})
it('parses common metadata values from DAISY ncc.html', () => {
const nccHtml = `
<html>
<head>
<title>Fallback Title</title>
<meta name="dc:title" content="The DAISY Book">
<meta name="dc:creator" content="Jane Doe & Richard Roe">
<meta name="ncc:narrator" content="Reader One; Reader Two">
<meta name="dc:publisher" content="Talking Books Inc">
<meta name="dc:date" content="2021-06-04">
<meta name="dc:language" content="en">
<meta name="dc:subject" content="Fiction, Mystery">
<meta name="ncc:keywords" content="audio; daisy">
<meta name="dc:identifier" content="ISBN 978-1-4028-9462-6">
<meta name="dc:identifier:asin" content="ASIN: B012345678">
</head>
</html>
`
const result = parseDaisyMetadata(nccHtml)
expect(result.title).to.equal('The DAISY Book')
expect(result.authors).to.deep.equal(['Jane Doe', 'Richard Roe'])
expect(result.narrators).to.deep.equal(['Reader One', 'Reader Two'])
expect(result.publisher).to.equal('Talking Books Inc')
expect(result.publishedYear).to.equal('2021')
expect(result.language).to.equal('en')
expect(result.genres).to.deep.equal(['Fiction', 'Mystery'])
expect(result.tags).to.deep.equal(['audio', 'daisy'])
expect(result.isbn).to.equal('978-1-4028-9462-6')
expect(result.asin).to.equal('B012345678')
})
it('falls back to title tag when dc:title is not set', () => {
const nccHtml = `
<html>
<head>
<title>Title From Head</title>
</head>
</html>
`
const result = parseDaisyMetadata(nccHtml)
expect(result.title).to.equal('Title From Head')
})
it('parses chapter names from heading entries in ncc.html', () => {
const nccHtml = `
<html>
<body>
<h1><a href="book.smil#id1">Chapter 1</a></h1>
<h2><a href="book.smil#id2">Chapter 2: The Road</a></h2>
<h3>Part 1</h3>
</body>
</html>
`
const result = parseDaisyMetadata(nccHtml)
expect(result.chapters).to.deep.equal([
{ title: 'Chapter 1' },
{ title: 'Chapter 2: The Road' },
{ title: 'Part 1' }
])
})
})

View file

@ -49,4 +49,22 @@ describe('scanUtils', async () => {
'Author/Series2/Book5/deeply/nested': ['cd 01/audiofile.mp3', 'cd 02/audiofile.mp3']
})
})
it('should include DAISY ncc.html changes when includeNonMediaFiles is enabled', async () => {
const filePath = 'Author/Book3/ncc.html'
const dirname = Path.dirname(filePath)
const fileItems = [
{
name: Path.basename(filePath),
reldirpath: dirname === '.' ? '' : dirname,
extension: Path.extname(filePath),
deep: filePath.split('/').length - 1
}
]
const libraryItemGrouping = scanUtils.groupFileItemsIntoLibraryItemDirs('book', fileItems, false, true)
expect(libraryItemGrouping).to.deep.equal({
'Author/Book3': ['ncc.html']
})
})
})