mirror of
https://github.com/advplyr/audiobookshelf.git
synced 2026-05-22 19:31:31 +00:00
update
This commit is contained in:
parent
6527b8b0f5
commit
f4ce4a4bde
4 changed files with 272 additions and 1 deletions
|
|
@ -66,6 +66,11 @@ export default {
|
||||||
name: 'Audio file meta tags OR ebook metadata',
|
name: 'Audio file meta tags OR ebook metadata',
|
||||||
include: true
|
include: true
|
||||||
},
|
},
|
||||||
|
openAIPathMetadata: {
|
||||||
|
id: 'openAIPathMetadata',
|
||||||
|
name: 'OpenAI path and filename inference',
|
||||||
|
include: false
|
||||||
|
},
|
||||||
nfoFile: {
|
nfoFile: {
|
||||||
id: 'nfoFile',
|
id: 'nfoFile',
|
||||||
name: 'NFO file',
|
name: 'NFO file',
|
||||||
|
|
@ -157,4 +162,4 @@ export default {
|
||||||
this.init()
|
this.init()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
</script>
|
</script>
|
||||||
|
|
|
||||||
|
|
@ -31,6 +31,17 @@ class OpenAI {
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
summarizeScanMetadataForLog(metadata) {
|
||||||
|
return JSON.stringify({
|
||||||
|
title: metadata.title || null,
|
||||||
|
authors: metadata.authors || [],
|
||||||
|
seriesName: metadata.seriesName || null,
|
||||||
|
sequence: metadata.sequence || null,
|
||||||
|
publishedYear: metadata.publishedYear || null,
|
||||||
|
reason: metadata.reason || ''
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
normalizePathForPrompt(filePath) {
|
normalizePathForPrompt(filePath) {
|
||||||
if (!filePath || typeof filePath !== 'string') return null
|
if (!filePath || typeof filePath !== 'string') return null
|
||||||
return filePath.replace(/\\/g, '/')
|
return filePath.replace(/\\/g, '/')
|
||||||
|
|
@ -189,6 +200,78 @@ class OpenAI {
|
||||||
return sequence
|
return sequence
|
||||||
}
|
}
|
||||||
|
|
||||||
|
normalizeOptionalString(value, maxLength = 300) {
|
||||||
|
if (value === null || value === undefined) return null
|
||||||
|
if (typeof value !== 'string') return null
|
||||||
|
const normalized = value.replace(/\s+/g, ' ').trim()
|
||||||
|
if (!normalized) return null
|
||||||
|
return normalized.slice(0, maxLength)
|
||||||
|
}
|
||||||
|
|
||||||
|
normalizeStringArray(value, maxItems = 10, maxLength = 120) {
|
||||||
|
if (!Array.isArray(value)) return []
|
||||||
|
|
||||||
|
const deduped = []
|
||||||
|
const seen = new Set()
|
||||||
|
for (const item of value) {
|
||||||
|
const normalized = this.normalizeOptionalString(item, maxLength)
|
||||||
|
if (!normalized) continue
|
||||||
|
const key = normalized.toLowerCase()
|
||||||
|
if (seen.has(key)) continue
|
||||||
|
seen.add(key)
|
||||||
|
deduped.push(normalized)
|
||||||
|
if (deduped.length >= maxItems) break
|
||||||
|
}
|
||||||
|
return deduped
|
||||||
|
}
|
||||||
|
|
||||||
|
normalizePublishedYear(value) {
|
||||||
|
if (value === null || value === undefined) return null
|
||||||
|
if (typeof value === 'number' && Number.isInteger(value)) value = String(value)
|
||||||
|
if (typeof value !== 'string') return null
|
||||||
|
const normalized = value.trim()
|
||||||
|
if (!/^\d{4}$/.test(normalized)) return null
|
||||||
|
return normalized
|
||||||
|
}
|
||||||
|
|
||||||
|
normalizeIsbn(value) {
|
||||||
|
const normalized = this.normalizeOptionalString(value, 20)
|
||||||
|
if (!normalized) return null
|
||||||
|
const compact = normalized.replace(/[-\s]/g, '')
|
||||||
|
if (!/^(?:\d{10}|\d{13}|[0-9X]{10})$/i.test(compact)) return null
|
||||||
|
return compact
|
||||||
|
}
|
||||||
|
|
||||||
|
normalizeAsin(value) {
|
||||||
|
const normalized = this.normalizeOptionalString(value, 10)
|
||||||
|
if (!normalized) return null
|
||||||
|
return /^[A-Z0-9]{10}$/i.test(normalized) ? normalized.toUpperCase() : null
|
||||||
|
}
|
||||||
|
|
||||||
|
validateScanMetadataPayload(payload) {
|
||||||
|
const book = payload?.book && typeof payload.book === 'object' ? payload.book : payload
|
||||||
|
if (!book || typeof book !== 'object' || Array.isArray(book)) {
|
||||||
|
throw new Error('OpenAI returned invalid scan metadata payload')
|
||||||
|
}
|
||||||
|
|
||||||
|
const seriesName = this.normalizeSeriesName(book.seriesName)
|
||||||
|
|
||||||
|
return {
|
||||||
|
title: this.normalizeOptionalString(book.title),
|
||||||
|
subtitle: this.normalizeOptionalString(book.subtitle),
|
||||||
|
publishedYear: this.normalizePublishedYear(book.publishedYear),
|
||||||
|
publisher: this.normalizeOptionalString(book.publisher),
|
||||||
|
isbn: this.normalizeIsbn(book.isbn),
|
||||||
|
asin: this.normalizeAsin(book.asin),
|
||||||
|
language: this.normalizeOptionalString(book.language, 40),
|
||||||
|
authors: this.normalizeStringArray(book.authors),
|
||||||
|
narrators: this.normalizeStringArray(book.narrators),
|
||||||
|
seriesName,
|
||||||
|
sequence: seriesName ? this.normalizeSequence(book.sequence) : null,
|
||||||
|
reason: this.normalizeOptionalString(book.reason, 600) || ''
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
validateBookIds(resultBooks, books) {
|
validateBookIds(resultBooks, books) {
|
||||||
if (!Array.isArray(resultBooks) || resultBooks.length !== books.length) {
|
if (!Array.isArray(resultBooks) || resultBooks.length !== books.length) {
|
||||||
throw new Error('OpenAI returned an invalid number of books')
|
throw new Error('OpenAI returned an invalid number of books')
|
||||||
|
|
@ -470,6 +553,102 @@ ${JSON.stringify(books, null, 2)}`
|
||||||
})
|
})
|
||||||
return validated
|
return validated
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async inferBookMetadataFromScan(libraryItemData, audioFiles = [], ebookFileScanData = null) {
|
||||||
|
if (!this.isConfigured) {
|
||||||
|
throw new Error('OpenAI API key is not configured')
|
||||||
|
}
|
||||||
|
|
||||||
|
const folderContext = this.getFolderContext(libraryItemData)
|
||||||
|
const audioFileCandidates = (audioFiles || []).slice(0, 25).map((audioFile) => ({
|
||||||
|
relPath: this.normalizePathForPrompt(audioFile.metadata?.relPath),
|
||||||
|
filename: audioFile.metadata?.filename || null,
|
||||||
|
duration: audioFile.duration || null,
|
||||||
|
trackNumber: audioFile.trackNumFromMeta || audioFile.metaTags?.trackNumber || null,
|
||||||
|
discNumber: audioFile.discNumFromMeta || audioFile.metaTags?.discNumber || null,
|
||||||
|
metaTags: {
|
||||||
|
tagTitle: audioFile.metaTags?.tagTitle || null,
|
||||||
|
tagAlbum: audioFile.metaTags?.tagAlbum || null,
|
||||||
|
tagArtist: audioFile.metaTags?.tagArtist || null,
|
||||||
|
tagAlbumArtist: audioFile.metaTags?.tagAlbumArtist || null,
|
||||||
|
tagSeries: audioFile.metaTags?.tagSeries || null,
|
||||||
|
tagSeriesPart: audioFile.metaTags?.tagSeriesPart || null,
|
||||||
|
tagSubtitle: audioFile.metaTags?.tagSubtitle || null,
|
||||||
|
tagDate: audioFile.metaTags?.tagDate || null,
|
||||||
|
tagASIN: audioFile.metaTags?.tagASIN || null
|
||||||
|
}
|
||||||
|
}))
|
||||||
|
|
||||||
|
const ebookMetadata = ebookFileScanData?.metadata
|
||||||
|
? {
|
||||||
|
title: ebookFileScanData.metadata.title || null,
|
||||||
|
subtitle: ebookFileScanData.metadata.subtitle || null,
|
||||||
|
authors: ebookFileScanData.metadata.authors || [],
|
||||||
|
narrators: ebookFileScanData.metadata.narrators || [],
|
||||||
|
series: ebookFileScanData.metadata.series || [],
|
||||||
|
publishedYear: ebookFileScanData.metadata.publishedYear || null,
|
||||||
|
isbn: ebookFileScanData.metadata.isbn || null,
|
||||||
|
asin: ebookFileScanData.metadata.asin || null
|
||||||
|
}
|
||||||
|
: null
|
||||||
|
|
||||||
|
const currentPathMetadata = {
|
||||||
|
title: libraryItemData.mediaMetadata?.title || null,
|
||||||
|
subtitle: libraryItemData.mediaMetadata?.subtitle || null,
|
||||||
|
authors: libraryItemData.mediaMetadata?.authors || [],
|
||||||
|
narrators: libraryItemData.mediaMetadata?.narrators || [],
|
||||||
|
seriesName: libraryItemData.mediaMetadata?.seriesName || null,
|
||||||
|
sequence: libraryItemData.mediaMetadata?.seriesSequence || null,
|
||||||
|
publishedYear: libraryItemData.mediaMetadata?.publishedYear || null
|
||||||
|
}
|
||||||
|
|
||||||
|
Logger.info(`[OpenAI] Inferring scan metadata for "${libraryItemData.relPath}"`)
|
||||||
|
|
||||||
|
const prompt = `You infer audiobook metadata from weak or messy directory structures.
|
||||||
|
|
||||||
|
Return only valid JSON in this shape:
|
||||||
|
{
|
||||||
|
"book": {
|
||||||
|
"title": "Book title or null",
|
||||||
|
"subtitle": "Subtitle or null",
|
||||||
|
"authors": ["Author Name"],
|
||||||
|
"narrators": ["Narrator Name"],
|
||||||
|
"seriesName": "Series name or null",
|
||||||
|
"sequence": "1 or null",
|
||||||
|
"publishedYear": "2004 or null",
|
||||||
|
"publisher": "Publisher or null",
|
||||||
|
"isbn": "ISBN or null",
|
||||||
|
"asin": "ASIN or null",
|
||||||
|
"language": "Language or null",
|
||||||
|
"reason": "brief reason"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Rules:
|
||||||
|
- Infer metadata from full path, relative path, folder names, filenames, and any provided tag metadata.
|
||||||
|
- Prefer title/author/series evidence that is explicit in filenames or tags.
|
||||||
|
- Use null when uncertain.
|
||||||
|
- If a series is provided, sequence may be null when it cannot be inferred confidently.
|
||||||
|
- Do not invent authors or series when there is weak evidence.
|
||||||
|
- Respond with one book object only.
|
||||||
|
|
||||||
|
Current path-derived metadata:
|
||||||
|
${JSON.stringify(currentPathMetadata, null, 2)}
|
||||||
|
|
||||||
|
Folder context:
|
||||||
|
${JSON.stringify(folderContext, null, 2)}
|
||||||
|
|
||||||
|
Audio files:
|
||||||
|
${JSON.stringify(audioFileCandidates, null, 2)}
|
||||||
|
|
||||||
|
Ebook metadata:
|
||||||
|
${JSON.stringify(ebookMetadata, null, 2)}`
|
||||||
|
|
||||||
|
const payload = await this.createResponse(prompt)
|
||||||
|
const validated = this.validateScanMetadataPayload(payload)
|
||||||
|
Logger.info(`[OpenAI] Scan-metadata result for "${libraryItemData.relPath}" ${this.summarizeScanMetadataForLog(validated)}`)
|
||||||
|
return validated
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
module.exports = OpenAI
|
module.exports = OpenAI
|
||||||
|
|
|
||||||
|
|
@ -19,12 +19,15 @@ const LibraryFile = require('../objects/files/LibraryFile')
|
||||||
|
|
||||||
const RssFeedManager = require('../managers/RssFeedManager')
|
const RssFeedManager = require('../managers/RssFeedManager')
|
||||||
const CoverManager = require('../managers/CoverManager')
|
const CoverManager = require('../managers/CoverManager')
|
||||||
|
const OpenAI = require('../providers/OpenAI')
|
||||||
|
|
||||||
const LibraryScan = require('./LibraryScan')
|
const LibraryScan = require('./LibraryScan')
|
||||||
const OpfFileScanner = require('./OpfFileScanner')
|
const OpfFileScanner = require('./OpfFileScanner')
|
||||||
const NfoFileScanner = require('./NfoFileScanner')
|
const NfoFileScanner = require('./NfoFileScanner')
|
||||||
const AbsMetadataFileScanner = require('./AbsMetadataFileScanner')
|
const AbsMetadataFileScanner = require('./AbsMetadataFileScanner')
|
||||||
|
|
||||||
|
const openAI = new OpenAI()
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Metadata for books pulled from files
|
* Metadata for books pulled from files
|
||||||
* @typedef BookMetadataObject
|
* @typedef BookMetadataObject
|
||||||
|
|
@ -756,6 +759,43 @@ class BookScanner {
|
||||||
return null
|
return null
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async openAIPathMetadata() {
|
||||||
|
if (!openAI.isConfigured) {
|
||||||
|
this.libraryScan.addLog(LogLevel.DEBUG, `Skipping OpenAI path metadata for "${this.libraryItemData.relPath}" because OpenAI is not configured`)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
const inferredMetadata = await openAI
|
||||||
|
.inferBookMetadataFromScan(this.libraryItemData, this.audioFiles, this.ebookFileScanData)
|
||||||
|
.catch((error) => {
|
||||||
|
this.libraryScan.addLog(LogLevel.WARN, `OpenAI path metadata failed for "${this.libraryItemData.relPath}": ${error.message}`)
|
||||||
|
return null
|
||||||
|
})
|
||||||
|
if (!inferredMetadata) return
|
||||||
|
|
||||||
|
const directKeys = ['title', 'subtitle', 'publishedYear', 'publisher', 'isbn', 'asin', 'language']
|
||||||
|
directKeys.forEach((key) => {
|
||||||
|
if (inferredMetadata[key]) {
|
||||||
|
this.bookMetadata[key] = inferredMetadata[key]
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
if (inferredMetadata.authors.length) {
|
||||||
|
this.bookMetadata.authors = inferredMetadata.authors
|
||||||
|
}
|
||||||
|
if (inferredMetadata.narrators.length) {
|
||||||
|
this.bookMetadata.narrators = inferredMetadata.narrators
|
||||||
|
}
|
||||||
|
if (inferredMetadata.seriesName) {
|
||||||
|
this.bookMetadata.series = [
|
||||||
|
{
|
||||||
|
name: inferredMetadata.seriesName,
|
||||||
|
sequence: inferredMetadata.sequence || null
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Metadata from .nfo file
|
* Metadata from .nfo file
|
||||||
*/
|
*/
|
||||||
|
|
|
||||||
|
|
@ -132,4 +132,51 @@ describe('OpenAI', () => {
|
||||||
expect(result[0].sequence).to.equal('1')
|
expect(result[0].sequence).to.equal('1')
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
|
||||||
|
describe('validateScanMetadataPayload', () => {
|
||||||
|
it('normalizes valid scan metadata', () => {
|
||||||
|
const result = openAI.validateScanMetadataPayload({
|
||||||
|
book: {
|
||||||
|
title: ' Neuromancer ',
|
||||||
|
subtitle: '20th Anniversary Edition',
|
||||||
|
authors: ['William Gibson', 'William Gibson'],
|
||||||
|
narrators: ['Robertson Dean'],
|
||||||
|
seriesName: 'Sprawl Trilogy',
|
||||||
|
sequence: '1',
|
||||||
|
publishedYear: '1984',
|
||||||
|
asin: 'B000FC11ZG',
|
||||||
|
isbn: '978-0441569595',
|
||||||
|
language: 'English',
|
||||||
|
reason: 'path and tags match'
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
expect(result.title).to.equal('Neuromancer')
|
||||||
|
expect(result.authors).to.deep.equal(['William Gibson'])
|
||||||
|
expect(result.seriesName).to.equal('Sprawl Trilogy')
|
||||||
|
expect(result.sequence).to.equal('1')
|
||||||
|
expect(result.asin).to.equal('B000FC11ZG')
|
||||||
|
expect(result.isbn).to.equal('9780441569595')
|
||||||
|
})
|
||||||
|
|
||||||
|
it('returns nulls for malformed scan metadata fields', () => {
|
||||||
|
const result = openAI.validateScanMetadataPayload({
|
||||||
|
title: '',
|
||||||
|
authors: [''],
|
||||||
|
seriesName: 'Series Name',
|
||||||
|
sequence: 'not-a-sequence',
|
||||||
|
publishedYear: '84',
|
||||||
|
asin: 'bad',
|
||||||
|
isbn: 'nope'
|
||||||
|
})
|
||||||
|
|
||||||
|
expect(result.title).to.equal(null)
|
||||||
|
expect(result.authors).to.deep.equal([])
|
||||||
|
expect(result.seriesName).to.equal('Series Name')
|
||||||
|
expect(result.sequence).to.equal(null)
|
||||||
|
expect(result.publishedYear).to.equal(null)
|
||||||
|
expect(result.asin).to.equal(null)
|
||||||
|
expect(result.isbn).to.equal(null)
|
||||||
|
})
|
||||||
|
})
|
||||||
})
|
})
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue