This commit is contained in:
korjik 2026-04-22 09:41:28 -07:00
parent e5261d137f
commit 58776ca983
6 changed files with 352 additions and 2 deletions

View file

@ -127,6 +127,7 @@ export default {
autoScanCronExpression: null,
hideSingleBookSeries: false,
onlyShowLaterBooksInContinueSeries: false,
openAIDirectoryGrouping: false,
metadataPrecedence: ['folderStructure', 'audioMetatags', 'nfoFile', 'txtFiles', 'opfFile', 'absMetadata'],
markAsFinishedPercentComplete: null,
markAsFinishedTimeRemaining: 10

View file

@ -1,5 +1,12 @@
<template>
<div class="w-full h-full px-1 md:px-4 py-1 mb-4">
<div class="flex items-center justify-between md:justify-start mb-4">
<div class="flex items-center">
<ui-toggle-switch v-model="openAIDirectoryGrouping" @input="updated" />
<p class="pl-4 text-sm text-gray-300">Use OpenAI to interpret poor directory trees during library scans</p>
</div>
</div>
<div class="flex items-center justify-between mb-2">
<h2 class="text-base md:text-lg text-gray-200">{{ $strings.HeaderMetadataOrderOfPrecedence }}</h2>
<ui-btn small @click="resetToDefault">{{ $strings.ButtonResetToDefault }}</ui-btn>
@ -92,7 +99,8 @@ export default {
include: true
}
},
metadataSourceMapped: []
metadataSourceMapped: [],
openAIDirectoryGrouping: false
}
},
computed: {
@ -131,6 +139,7 @@ export default {
metadataSourceIds.reverse()
return {
settings: {
openAIDirectoryGrouping: !!this.openAIDirectoryGrouping,
metadataPrecedence: metadataSourceIds
}
}
@ -145,6 +154,7 @@ export default {
this.$emit('update', this.getLibraryData())
},
init() {
this.openAIDirectoryGrouping = !!this.librarySettings.openAIDirectoryGrouping
const metadataPrecedence = this.librarySettings.metadataPrecedence || []
this.metadataSourceMapped = metadataPrecedence.map((source) => this.metadataSourceData[source]).filter((s) => s)

View file

@ -11,6 +11,7 @@ const Logger = require('../Logger')
* @property {boolean} audiobooksOnly
* @property {boolean} hideSingleBookSeries Do not show series that only have 1 book
* @property {boolean} onlyShowLaterBooksInContinueSeries Skip showing books that are earlier than the max sequence read
* @property {boolean} openAIDirectoryGrouping Allow OpenAI to infer library-item grouping from poor directory structures
* @property {string[]} metadataPrecedence
* @property {number} markAsFinishedTimeRemaining Time remaining in seconds to mark as finished. (defaults to 10s)
* @property {number} markAsFinishedPercentComplete Percent complete to mark as finished (0-100). If this is set it will be used over markAsFinishedTimeRemaining.
@ -74,6 +75,7 @@ class Library extends Model {
epubsAllowScriptedContent: false,
hideSingleBookSeries: false,
onlyShowLaterBooksInContinueSeries: false,
openAIDirectoryGrouping: false,
metadataPrecedence: this.defaultMetadataPrecedence,
markAsFinishedPercentComplete: null,
markAsFinishedTimeRemaining: 10

View file

@ -42,6 +42,14 @@ class OpenAI {
})
}
summarizeDirectoryGroupingForLog(grouping) {
return JSON.stringify({
path: grouping.path,
groupId: grouping.groupId,
reason: grouping.reason || ''
})
}
normalizePathForPrompt(filePath) {
if (!filePath || typeof filePath !== 'string') return null
return filePath.replace(/\\/g, '/')
@ -272,6 +280,42 @@ class OpenAI {
}
}
validateDirectoryGroupingPayload(payload, mediaFiles) {
const resultFiles = payload?.files
if (!Array.isArray(resultFiles)) {
throw new Error('OpenAI returned invalid directory-grouping payload')
}
const expectedPaths = new Set(mediaFiles.map((file) => file.path))
const resultByPath = new Map()
resultFiles.forEach((file) => {
if (!expectedPaths.has(file?.path)) {
Logger.warn(`[OpenAI] Ignoring unknown media path "${file?.path}" in directory-grouping response`)
return
}
if (resultByPath.has(file.path)) {
Logger.warn(`[OpenAI] Ignoring duplicate media path "${file.path}" in directory-grouping response`)
return
}
resultByPath.set(file.path, file)
})
return mediaFiles.map((file) => {
const result = resultByPath.get(file.path)
const groupId = this.normalizeOptionalString(result?.groupId, 120) || file.path
const reason = this.normalizeOptionalString(result?.reason, 600) || (result ? '' : 'OpenAI omitted this media file; kept it as its own item')
if (!result) {
Logger.warn(`[OpenAI] Missing directory-grouping result for media path "${file.path}" - keeping it separate`)
}
return {
path: file.path,
groupId,
reason
}
})
}
validateBookIds(resultBooks, books) {
if (!Array.isArray(resultBooks) || resultBooks.length !== books.length) {
throw new Error('OpenAI returned an invalid number of books')
@ -649,6 +693,55 @@ ${JSON.stringify(ebookMetadata, null, 2)}`
Logger.info(`[OpenAI] Scan-metadata result for "${libraryItemData.relPath}" ${this.summarizeScanMetadataForLog(validated)}`)
return validated
}
async inferDirectoryGroupingFromPaths(containerPath, mediaFiles) {
if (!this.isConfigured) {
throw new Error('OpenAI API key is not configured')
}
if (!Array.isArray(mediaFiles) || !mediaFiles.length) {
return []
}
Logger.info(`[OpenAI] Inferring directory grouping for "${containerPath}" with ${mediaFiles.length} media files`)
mediaFiles.forEach((file) => {
Logger.info(`[OpenAI] Directory-grouping candidate ${JSON.stringify(file)}`)
})
const prompt = `You infer logical audiobook item grouping from messy filesystem paths.
Return only valid JSON in this shape:
{
"files": [
{
"path": "relative/path/to/media-file.m4b",
"groupId": "short-group-label",
"reason": "brief reason"
}
]
}
Rules:
- Include every provided media file exactly once.
- Files that belong to the same logical audiobook item must share the same groupId.
- Files for different books must use different groupIds even if they are in the same series container.
- Use path, filename, parent directories, and current grouping hints as evidence.
- Prefer preserving existing grouping when it already looks reasonable.
- Do not merge different titled books just because they share a series or author folder.
- groupId only needs to be stable within this one response.
Container path:
${JSON.stringify(containerPath)}
Media files:
${JSON.stringify(mediaFiles, null, 2)}`
const payload = await this.createResponse(prompt)
const validated = this.validateDirectoryGroupingPayload(payload, mediaFiles)
validated.forEach((grouping) => {
Logger.info(`[OpenAI] Directory-grouping result ${this.summarizeDirectoryGroupingForLog(grouping)}`)
})
return validated
}
}
module.exports = OpenAI

View file

@ -7,6 +7,7 @@ const Database = require('../Database')
const fs = require('../libs/fsExtra')
const fileUtils = require('../utils/fileUtils')
const scanUtils = require('../utils/scandir')
const globals = require('../utils/globals')
const { LogLevel, ScanResult } = require('../utils/constants')
const libraryFilters = require('../utils/queries/libraryFilters')
const TaskManager = require('../managers/TaskManager')
@ -14,6 +15,10 @@ const LibraryItemScanner = require('./LibraryItemScanner')
const LibraryScan = require('./LibraryScan')
const LibraryItemScanData = require('./LibraryItemScanData')
const Task = require('../objects/Task')
const OpenAI = require('../providers/OpenAI')
const openAI = new OpenAI()
const DISC_DIR_REGEX = /^(cd|dis[ck])\s*\d{1,3}$/i
class LibraryScanner {
constructor() {
@ -309,7 +314,10 @@ class LibraryScanner {
}
const fileItems = await fileUtils.recurseFiles(folderPath)
const libraryItemGrouping = scanUtils.groupFileItemsIntoLibraryItemDirs(library.mediaType, fileItems, library.settings.audiobooksOnly)
let libraryItemGrouping = scanUtils.groupFileItemsIntoLibraryItemDirs(library.mediaType, fileItems, library.settings.audiobooksOnly)
if (library.mediaType === 'book' && library.settings.openAIDirectoryGrouping && openAI.isConfigured) {
libraryItemGrouping = await this.applyOpenAIDirectoryGrouping(folderPath, fileItems, libraryItemGrouping, library.settings.audiobooksOnly)
}
if (!Object.keys(libraryItemGrouping).length) {
Logger.error(`Root path has no media folders: ${folderPath}`)
@ -368,6 +376,201 @@ class LibraryScanner {
return items
}
expandGroupingFiles(groupPath, groupedFiles) {
if (groupPath === groupedFiles) return [groupPath]
return groupedFiles.map((file) => {
if (file === groupPath || file.startsWith(groupPath + '/')) return file
return Path.posix.join(groupPath, file)
})
}
getOpenAIDirectoryGroupingCandidates(fileItems, mediaType, audiobooksOnly, libraryItemGrouping) {
if (mediaType !== 'book') return []
const mediaFileItems = fileItems.filter((item) => isMediaFilePath(mediaType, item.path, audiobooksOnly))
const candidatesByContainer = new Map()
mediaFileItems.forEach((item) => {
const topLevelDir = item.path.split('/').filter(Boolean)[0]
if (!topLevelDir || !item.path.includes('/')) return
if (!candidatesByContainer.has(topLevelDir)) {
candidatesByContainer.set(topLevelDir, [])
}
candidatesByContainer.get(topLevelDir).push(item)
})
return [...candidatesByContainer.entries()]
.map(([containerPath, containerMediaFileItems]) => {
const defaultGroupKeys = Object.keys(libraryItemGrouping).filter((groupPath) => groupPath === containerPath || groupPath.startsWith(containerPath + '/'))
const hasDirectMediaFileInContainer = containerMediaFileItems.some((item) => Path.posix.dirname(item.path) === containerPath)
const hasMixedDefaultFileAndDirectoryGroups =
defaultGroupKeys.some((groupPath) => Path.posix.extname(groupPath)) && defaultGroupKeys.some((groupPath) => !Path.posix.extname(groupPath))
const maxRelativeDepth = Math.max(...containerMediaFileItems.map((item) => Path.posix.relative(containerPath, item.path).split('/').filter(Boolean).length))
const suspicious = defaultGroupKeys.length <= 1 || hasDirectMediaFileInContainer || hasMixedDefaultFileAndDirectoryGroups || maxRelativeDepth > 2
if (!suspicious || containerMediaFileItems.length < 2 || containerMediaFileItems.length > 40) {
return null
}
const groupingHints = containerMediaFileItems.map((item) => ({
path: item.path,
filename: item.name,
parentDir: item.reldirpath || '',
folderHierarchy: item.path.split('/').slice(0, -1).filter(Boolean),
currentGroup: defaultGroupKeys.find((groupPath) => this.expandGroupingFiles(groupPath, libraryItemGrouping[groupPath]).includes(item.path)) || null
}))
return {
containerPath,
groupingHints
}
})
.filter(Boolean)
}
getDirectoryGroupingDescriptor(containerPath, mediaPaths) {
const sortedMediaPaths = [...mediaPaths].sort((a, b) => a.localeCompare(b))
if (sortedMediaPaths.length === 1) {
const mediaDir = Path.posix.dirname(sortedMediaPaths[0])
if (mediaDir && mediaDir !== '.' && mediaDir !== containerPath) {
return {
groupPath: mediaDir,
isFile: false
}
}
return {
groupPath: sortedMediaPaths[0],
isFile: true
}
}
const splitPaths = sortedMediaPaths.map((mediaPath) => mediaPath.split('/'))
const commonParts = []
for (let i = 0; i < Math.min(...splitPaths.map((parts) => parts.length - 1)); i++) {
const segment = splitPaths[0][i]
if (splitPaths.every((parts) => parts[i] === segment)) {
commonParts.push(segment)
} else {
break
}
}
const commonDir = commonParts.join('/')
if (commonDir) {
const canUseFolderGroup = sortedMediaPaths.every((mediaPath) => {
const relativeParts = Path.posix.relative(commonDir, mediaPath).split('/').filter(Boolean)
return relativeParts.length === 1 || (relativeParts.length === 2 && DISC_DIR_REGEX.test(relativeParts[0]))
})
if (canUseFolderGroup) {
return {
groupPath: commonDir,
isFile: false
}
}
}
return {
groupPath: sortedMediaPaths[0],
isFile: true
}
}
buildLibraryItemGroupingFromOpenAIAssignments(containerPath, fileItems, assignments, mediaType, audiobooksOnly) {
const mediaPathsByGroupId = new Map()
assignments.forEach((assignment) => {
if (!mediaPathsByGroupId.has(assignment.groupId)) {
mediaPathsByGroupId.set(assignment.groupId, [])
}
mediaPathsByGroupId.get(assignment.groupId).push(assignment.path)
})
const groupRecords = [...mediaPathsByGroupId.entries()].map(([groupId, mediaPaths]) => {
const descriptor = this.getDirectoryGroupingDescriptor(containerPath, mediaPaths)
return {
groupId,
descriptor,
mediaPaths: [...mediaPaths].sort((a, b) => a.localeCompare(b)),
files: []
}
})
groupRecords.forEach((groupRecord) => {
if (groupRecord.descriptor.isFile) {
groupRecord.files.push(...groupRecord.mediaPaths)
} else {
groupRecord.files.push(...groupRecord.mediaPaths.map((mediaPath) => Path.posix.relative(groupRecord.descriptor.groupPath, mediaPath)))
}
})
const nonMediaItems = fileItems.filter((item) => !isMediaFilePath(mediaType, item.path, audiobooksOnly))
nonMediaItems.forEach((item) => {
const itemStem = Path.basename(item.name, item.extension)
let matchingGroup = null
const basenameMatches = groupRecords.filter((groupRecord) =>
groupRecord.mediaPaths.some((mediaPath) => Path.posix.dirname(mediaPath) === item.reldirpath && Path.basename(mediaPath, Path.extname(mediaPath)) === itemStem)
)
if (basenameMatches.length === 1) {
matchingGroup = basenameMatches[0]
}
if (!matchingGroup) {
const directoryMatches = groupRecords.filter((groupRecord) => {
if (!groupRecord.descriptor.isFile) {
return item.path.startsWith(groupRecord.descriptor.groupPath + '/')
}
return Path.posix.dirname(groupRecord.descriptor.groupPath) === item.reldirpath
})
if (directoryMatches.length === 1) {
matchingGroup = directoryMatches[0]
}
}
if (!matchingGroup) return
const fileEntry = matchingGroup.descriptor.isFile ? item.path : Path.posix.relative(matchingGroup.descriptor.groupPath, item.path)
if (!matchingGroup.files.includes(fileEntry)) {
matchingGroup.files.push(fileEntry)
}
})
return groupRecords.reduce((acc, groupRecord) => {
acc[groupRecord.descriptor.groupPath] = [...new Set(groupRecord.files)]
return acc
}, {})
}
async applyOpenAIDirectoryGrouping(folderPath, fileItems, libraryItemGrouping, audiobooksOnly) {
const candidates = this.getOpenAIDirectoryGroupingCandidates(fileItems, 'book', audiobooksOnly, libraryItemGrouping)
if (!candidates.length) return libraryItemGrouping
let updatedGrouping = { ...libraryItemGrouping }
for (const candidate of candidates) {
Logger.info(`[LibraryScanner] Evaluating OpenAI directory grouping for "${candidate.containerPath}" with ${candidate.groupingHints.length} media files`)
const containerFileItems = fileItems.filter((item) => item.path === candidate.containerPath || item.path.startsWith(candidate.containerPath + '/'))
const assignments = await openAI.inferDirectoryGroupingFromPaths(candidate.containerPath, candidate.groupingHints).catch((error) => {
Logger.warn(`[LibraryScanner] OpenAI directory grouping failed for "${candidate.containerPath}": ${error.message}`)
return null
})
if (!assignments?.length) continue
const aiGrouping = this.buildLibraryItemGroupingFromOpenAIAssignments(candidate.containerPath, containerFileItems, assignments, 'book', audiobooksOnly)
if (!Object.keys(aiGrouping).length) continue
updatedGrouping = Object.fromEntries(
Object.entries(updatedGrouping).filter(([groupPath]) => !(groupPath === candidate.containerPath || groupPath.startsWith(candidate.containerPath + '/')))
)
updatedGrouping = {
...updatedGrouping,
...aiGrouping
}
Logger.info(`[LibraryScanner] Applied OpenAI directory grouping for "${candidate.containerPath}" -> ${Object.keys(aiGrouping).length} library items`)
}
return updatedGrouping
}
/**
* Scan files changed from Watcher
* @param {import('../Watcher').PendingFileUpdate[]} fileUpdates
@ -650,6 +853,14 @@ function ItemToFileInoMatch(libraryItem1, libraryItem2) {
return libraryItem1.isFile && libraryItem2.libraryFiles.some((lf) => lf.ino === libraryItem1.ino)
}
function isMediaFilePath(mediaType, filepath, audiobooksOnly = false) {
const ext = Path.extname(filepath).slice(1).toLowerCase()
if (!ext) return false
if (mediaType === 'podcast') return globals.SupportedAudioTypes.includes(ext)
if (audiobooksOnly) return globals.SupportedAudioTypes.includes(ext)
return globals.SupportedAudioTypes.includes(ext) || globals.SupportedEbookTypes.includes(ext)
}
function ItemToItemInoMatch(libraryItem1, libraryItem2) {
return libraryItem1.ino === libraryItem2.ino
}

View file

@ -179,4 +179,37 @@ describe('OpenAI', () => {
expect(result.isbn).to.equal(null)
})
})
describe('validateDirectoryGroupingPayload', () => {
it('normalizes valid directory-grouping payload', () => {
const result = openAI.validateDirectoryGroupingPayload(
{
files: [
{ path: 'Series Alpha/Book One.m4b', groupId: ' book-one ', reason: 'same book' },
{ path: 'Series Alpha/Disc 1/Book Two Part 1.mp3', groupId: 'book-two', reason: 'disc set' }
]
},
[{ path: 'Series Alpha/Book One.m4b' }, { path: 'Series Alpha/Disc 1/Book Two Part 1.mp3' }]
)
expect(result[0].groupId).to.equal('book-one')
expect(result[1].groupId).to.equal('book-two')
})
it('backfills missing or invalid grouping rows', () => {
const result = openAI.validateDirectoryGroupingPayload(
{
files: [
{ path: 'unknown/path.m4b', groupId: 'ignored' },
{ path: 'Series Alpha/Book One.m4b', groupId: '' }
]
},
[{ path: 'Series Alpha/Book One.m4b' }, { path: 'Series Alpha/Book Two.m4b' }]
)
expect(result[0].groupId).to.equal('Series Alpha/Book One.m4b')
expect(result[1].groupId).to.equal('Series Alpha/Book Two.m4b')
expect(result[1].reason).to.contain('omitted this media file')
})
})
})