@@ -83,6 +97,18 @@ export default {
}
this.$store.commit('globals/setConfirmPrompt', payload)
},
+ dedupeBooksWithAI() {
+ const payload = {
+ message: 'Deduplicate books in this library with AI? Duplicate items chosen for removal will be deleted from the database and file system.',
+ callback: (confirmed) => {
+ if (confirmed) {
+ this.runBookDedupe()
+ }
+ },
+ type: 'yesNo'
+ }
+ this.$store.commit('globals/setConfirmPrompt', payload)
+ },
runSeriesDetection(onlyMissing = true) {
this.$emit('update:processing', true)
this.$axios
@@ -102,6 +128,25 @@ export default {
this.$emit('update:processing', false)
})
},
+ runBookDedupe() {
+ this.$emit('update:processing', true)
+ this.$axios
+ .$post(`/api/libraries/${this.libraryId}/dedupe-books-with-ai?hard=1`)
+ .then((data) => {
+ if (!data.duplicatesRemoved) {
+ this.$toast.info(this.$strings.ToastNoUpdatesNecessary)
+ } else {
+ this.$toast.success(`AI removed ${data.duplicatesRemoved} duplicate books`)
+ }
+ })
+ .catch((error) => {
+ console.error('Failed to dedupe books with AI', error)
+ this.$toast.error(error.response?.data || this.$strings.ToastFailedToUpdate)
+ })
+ .finally(() => {
+ this.$emit('update:processing', false)
+ })
+ },
removeAllMetadataClick(ext) {
const payload = {
message: this.$getString('MessageConfirmRemoveMetadataFiles', [ext]),
diff --git a/server/controllers/LibraryController.js b/server/controllers/LibraryController.js
index 045b6651..ea01a7be 100644
--- a/server/controllers/LibraryController.js
+++ b/server/controllers/LibraryController.js
@@ -1565,6 +1565,97 @@ class LibraryController {
return [...groups.values()]
}
+ normalizeBookTitleForAIDedupe(title) {
+ if (!title || typeof title !== 'string') return null
+ return title
+ .toLowerCase()
+ .replace(/\([^)]*\)/g, ' ')
+ .replace(/\[[^\]]*]/g, ' ')
+ .replace(/\b(unabridged|abridged|audiobook|audio book)\b/g, ' ')
+ .replace(/[^a-z0-9]+/g, ' ')
+ .replace(/\s+/g, ' ')
+ .trim()
+ }
+
+ groupLibraryBooksForAIDedupe(libraryItems) {
+ const parent = new Map()
+ const find = (id) => {
+ if (parent.get(id) !== id) {
+ parent.set(id, find(parent.get(id)))
+ }
+ return parent.get(id)
+ }
+ const union = (a, b) => {
+ const rootA = find(a)
+ const rootB = find(b)
+ if (rootA !== rootB) parent.set(rootB, rootA)
+ }
+
+ libraryItems.forEach((libraryItem) => parent.set(libraryItem.id, libraryItem.id))
+
+ const candidateMaps = [new Map(), new Map(), new Map()]
+ libraryItems.forEach((libraryItem) => {
+ const metadata = libraryItem.media.oldMetadataToJSON()
+ const primaryAuthor = metadata.authors?.[0]?.name?.trim().toLowerCase() || null
+ const normalizedTitle = LibraryController.prototype.normalizeBookTitleForAIDedupe.call(this, metadata.title || '')
+ const isbn = metadata.isbn?.replace(/[-\s]/g, '').toLowerCase() || null
+ const asin = metadata.asin?.trim().toLowerCase() || null
+
+ const candidateKeys = []
+ if (primaryAuthor && normalizedTitle) candidateKeys.push([candidateMaps[0], `${primaryAuthor}::${normalizedTitle}`])
+ if (isbn) candidateKeys.push([candidateMaps[1], `isbn::${isbn}`])
+ if (asin) candidateKeys.push([candidateMaps[2], `asin::${asin}`])
+
+ candidateKeys.forEach(([candidateMap, key]) => {
+ if (!candidateMap.has(key)) candidateMap.set(key, [])
+ candidateMap.get(key).push(libraryItem)
+ })
+ })
+
+ candidateMaps.forEach((candidateMap) => {
+ candidateMap.forEach((groupItems) => {
+ if (groupItems.length < 2) return
+ for (let i = 1; i < groupItems.length; i++) {
+ union(groupItems[0].id, groupItems[i].id)
+ }
+ })
+ })
+
+ const grouped = new Map()
+ libraryItems.forEach((libraryItem) => {
+ const root = find(libraryItem.id)
+ if (!grouped.has(root)) grouped.set(root, [])
+ grouped.get(root).push(libraryItem)
+ })
+
+ return [...grouped.values()]
+ .filter((groupItems) => groupItems.length > 1)
+ .map((groupItems) => ({
+ label: groupItems.map((libraryItem) => libraryItem.media.title).join(' | '),
+ libraryItems: groupItems.sort((a, b) => a.media.title.localeCompare(b.media.title))
+ }))
+ }
+
+ getDeleteDependenciesForLibraryItem(libraryItem) {
+ const mediaItemIds = []
+ const authorIds = []
+ const seriesIds = []
+
+ mediaItemIds.push(libraryItem.media.id)
+ if (libraryItem.media.authors?.length) {
+ authorIds.push(...libraryItem.media.authors.map((author) => author.id))
+ }
+ if (libraryItem.media.series?.length) {
+ seriesIds.push(...libraryItem.media.series.map((series) => series.id))
+ }
+
+ return {
+ mediaItemIds,
+ authorIds,
+ seriesIds
+ }
+ }
+
/**
* POST: /api/libraries/:id/detect-series-with-ai
*
@@ -1706,6 +1797,91 @@ class LibraryController {
}
}
+ /**
+ * POST: /api/libraries/:id/dedupe-books-with-ai
+ *
+ * @this {import('../routers/ApiRouter')}
+ *
+ * @param {LibraryControllerRequest} req
+ * @param {Response} res
+ */
+ async dedupeBooksWithAI(req, res) {
+ if (!req.user.canUpdate) {
+ Logger.warn(`[LibraryController] User "${req.user.username}" attempted AI dedupe without update permissions`)
+ return res.sendStatus(403)
+ }
+ if (req.library.mediaType !== 'book') {
+ return res.status(400).send('AI book dedupe is only available for book libraries')
+ }
+ if (!openAI.isConfigured) {
+ return res.status(400).send('OpenAI is not configured')
+ }
+
+ const hardDelete = req.query.hard !== '0'
+
+ try {
+ const libraryItems = await LibraryController.prototype.getLibraryBooksForAISeriesDetection.call(this, req.library.id)
+ const candidateGroups = LibraryController.prototype.groupLibraryBooksForAIDedupe.call(this, libraryItems)
+
+ let groupsProcessed = 0
+ let duplicatesRemoved = 0
+ const removedIds = new Set()
+ const authorIdsToCheck = new Set()
+ const seriesIdsToCheck = new Set()
+
+ for (const candidateGroup of candidateGroups) {
+ const activeLibraryItems = candidateGroup.libraryItems.filter((libraryItem) => !removedIds.has(libraryItem.id))
+ if (activeLibraryItems.length < 2) continue
+
+ Logger.info(`[LibraryController] AI dedupe evaluating candidate group "${candidateGroup.label}" with ${activeLibraryItems.length} books`)
+ const decisions = await openAI.detectDuplicateBooks(activeLibraryItems)
+ groupsProcessed++
+
+ for (const decision of decisions) {
+ for (const duplicateId of decision.duplicateIds) {
+ if (removedIds.has(duplicateId) || duplicateId === decision.keepId) continue
+ const duplicateItem = activeLibraryItems.find((libraryItem) => libraryItem.id === duplicateId)
+ if (!duplicateItem) continue
+
+ Logger.info(
+ `[LibraryController] AI dedupe removing duplicate "${duplicateItem.media.title}" (${duplicateItem.id}) keeping "${decision.keepId}" reason="${decision.reason || ''}"`
+ )
+
+ const deleteDependencies = LibraryController.prototype.getDeleteDependenciesForLibraryItem.call(this, duplicateItem)
+ await this.handleDeleteLibraryItem(duplicateItem.id, deleteDependencies.mediaItemIds, req.library.id)
+ if (hardDelete) {
+ await fs.remove(duplicateItem.path).catch((error) => {
+ Logger.error(`[LibraryController] Failed to hard-delete duplicate item path "${duplicateItem.path}"`, error)
+ })
+ }
+
+ deleteDependencies.authorIds.forEach((authorId) => authorIdsToCheck.add(authorId))
+ deleteDependencies.seriesIds.forEach((seriesId) => seriesIdsToCheck.add(seriesId))
+ removedIds.add(duplicateItem.id)
+ duplicatesRemoved++
+ }
+ }
+ }
+
+ await this.checkRemoveAuthorsWithNoBooks([...authorIdsToCheck])
+ await this.checkRemoveEmptySeries([...seriesIdsToCheck])
+ await Database.resetLibraryIssuesFilterData(req.library.id)
+
+ Logger.info(
+ `[LibraryController] AI book dedupe completed for library "${req.library.name}" - groupsProcessed=${groupsProcessed}, duplicatesRemoved=${duplicatesRemoved}, hardDelete=${hardDelete}`
+ )
+
+ res.json({
+ groupsProcessed,
+ duplicatesRemoved,
+ hardDelete
+ })
+ } catch (error) {
+ Logger.error(`[LibraryController] Failed AI dedupe for library "${req.library.name}"`, error)
+ res.status(500).send(error.message || 'Failed to dedupe books with AI')
+ }
+ }
+
/**
*
* @param {RequestWithUser} req
diff --git a/server/providers/OpenAI.js b/server/providers/OpenAI.js
index 6d08cd99..1b301282 100644
--- a/server/providers/OpenAI.js
+++ b/server/providers/OpenAI.js
@@ -50,6 +50,14 @@ class OpenAI {
})
}
+ summarizeDuplicateDecisionForLog(decision) {
+ return JSON.stringify({
+ keepId: decision.keepId,
+ duplicateIds: decision.duplicateIds,
+ reason: decision.reason || ''
+ })
+ }
+
normalizePathForPrompt(filePath) {
if (!filePath || typeof filePath !== 'string') return null
return filePath.replace(/\\/g, '/')
@@ -316,6 +324,51 @@ class OpenAI {
})
}
+ validateDuplicateBooksPayload(payload, books) {
+ const resultGroups = Array.isArray(payload?.groups) ? payload.groups : []
+ const expectedIds = new Set(books.map((book) => book.id))
+ const consumedIds = new Set()
+ const validated = []
+
+ resultGroups.forEach((group) => {
+ const keepId = this.normalizeOptionalString(group?.keepId, 120)
+ if (!keepId || !expectedIds.has(keepId)) {
+ Logger.warn(`[OpenAI] Ignoring duplicate-books group with invalid keepId "${group?.keepId}"`)
+ return
+ }
+ if (consumedIds.has(keepId)) {
+ Logger.warn(`[OpenAI] Ignoring duplicate-books group because keepId "${keepId}" was already used`)
+ return
+ }
+
+ const duplicateIds = Array.isArray(group?.duplicateIds)
+ ? group.duplicateIds
+ .map((duplicateId) => this.normalizeOptionalString(duplicateId, 120))
+ .filter((duplicateId) => duplicateId && expectedIds.has(duplicateId) && duplicateId !== keepId)
+ : []
+
+ const dedupedDuplicateIds = []
+ const seenDuplicateIds = new Set()
+ duplicateIds.forEach((duplicateId) => {
+ if (seenDuplicateIds.has(duplicateId) || consumedIds.has(duplicateId)) return
+ seenDuplicateIds.add(duplicateId)
+ dedupedDuplicateIds.push(duplicateId)
+ })
+
+ if (!dedupedDuplicateIds.length) return
+
+ consumedIds.add(keepId)
+ dedupedDuplicateIds.forEach((duplicateId) => consumedIds.add(duplicateId))
+ validated.push({
+ keepId,
+ duplicateIds: dedupedDuplicateIds,
+ reason: this.normalizeOptionalString(group?.reason, 600) || ''
+ })
+ })
+
+ return validated
+ }
+
validateBookIds(resultBooks, books) {
if (!Array.isArray(resultBooks) || resultBooks.length !== books.length) {
throw new Error('OpenAI returned an invalid number of books')
@@ -742,6 +795,93 @@ ${JSON.stringify(mediaFiles, null, 2)}`
})
return validated
}
+
+ async detectDuplicateBooks(libraryItems) {
+ if (!this.isConfigured) {
+ throw new Error('OpenAI API key is not configured')
+ }
+
+ const books = libraryItems.map((libraryItem) => {
+ const metadata = libraryItem.media.oldMetadataToJSON()
+ const folderContext = this.getFolderContext(libraryItem)
+ const metadataCompletenessScore = [
+ metadata.title,
+ metadata.subtitle,
+ metadata.description,
+ metadata.isbn,
+ metadata.asin,
+ metadata.publisher,
+ metadata.language,
+ metadata.publishedYear,
+ metadata.authors?.length ? 'authors' : null,
+ metadata.series?.length ? 'series' : null,
+ metadata.narrators?.length ? 'narrators' : null,
+ libraryItem.media.coverPath ? 'cover' : null
+ ].filter(Boolean).length
+
+ return {
+ id: libraryItem.id,
+ title: metadata.title || null,
+ subtitle: metadata.subtitle || null,
+ authors: (metadata.authors || []).map((author) => author.name),
+ narrators: metadata.narrators || [],
+ series: (metadata.series || []).map((series) => ({ name: series.name, sequence: series.sequence || null })),
+ publishedYear: metadata.publishedYear || null,
+ description: this.cleanDescription(metadata.description),
+ language: metadata.language || null,
+ abridged: !!metadata.abridged,
+ explicit: !!metadata.explicit,
+ isbn: metadata.isbn || null,
+ asin: metadata.asin || null,
+ duration: libraryItem.media.duration || null,
+ size: libraryItem.media.size || libraryItem.size || null,
+ numAudioFiles: libraryItem.media.audioFiles?.length || 0,
+ numChapters: libraryItem.media.chapters?.length || 0,
+ hasCover: !!libraryItem.media.coverPath,
+ ebookFormat: libraryItem.media.ebookFile?.ebookFormat || null,
+ isFile: !!libraryItem.isFile,
+ fullPath: folderContext.fullPath,
+ relPath: folderContext.relPath,
+ metadataCompletenessScore
+ }
+ })
+
+ Logger.info(`[OpenAI] Evaluating duplicate books for ${books.length} candidates`)
+ books.forEach((book) => {
+ Logger.info(`[OpenAI] Duplicate-books candidate ${JSON.stringify(book)}`)
+ })
+
+ const prompt = `You identify duplicate audiobook library items that represent the same underlying book/work and choose which copy to keep.
+
+Return only valid JSON in this shape:
+{
+ "groups": [
+ {
+ "keepId": "library-item-id-to-keep",
+ "duplicateIds": ["library-item-id-to-remove"],
+ "reason": "brief reason"
+ }
+ ]
+}
+
+Rules:
+- Only mark items as duplicates if they are clearly the same book/work.
+- Books in the same series are not duplicates unless they are the same title/work.
+- Different abridged vs unabridged editions, different languages, dramatizations, companions, or supplemental books are not duplicates unless the evidence strongly indicates they are just duplicate copies.
+- Prefer keeping the copy with richer metadata, cleaner path naming, cover art, more complete file data, and generally better organization.
+- Do not include a group if no duplicates should be removed.
+- Do not include the same id in more than one group.
+
+Books:
+${JSON.stringify(books, null, 2)}`
+
+ const payload = await this.createResponse(prompt)
+ const validated = this.validateDuplicateBooksPayload(payload, books)
+ validated.forEach((decision) => {
+ Logger.info(`[OpenAI] Duplicate-books result ${this.summarizeDuplicateDecisionForLog(decision)}`)
+ })
+ return validated
+ }
}
module.exports = OpenAI
diff --git a/server/routers/ApiRouter.js b/server/routers/ApiRouter.js
index faa6ba2b..6002f975 100644
--- a/server/routers/ApiRouter.js
+++ b/server/routers/ApiRouter.js
@@ -90,6 +90,7 @@ class ApiRouter {
this.router.get('/libraries/:id/matchall', LibraryController.middleware.bind(this), LibraryController.matchAll.bind(this))
this.router.post('/libraries/:id/scan', LibraryController.middleware.bind(this), LibraryController.scan.bind(this))
this.router.post('/libraries/:id/detect-series-with-ai', LibraryController.middleware.bind(this), LibraryController.detectSeriesWithAI.bind(this))
+ this.router.post('/libraries/:id/dedupe-books-with-ai', LibraryController.middleware.bind(this), LibraryController.dedupeBooksWithAI.bind(this))
this.router.get('/libraries/:id/recent-episodes', LibraryController.middleware.bind(this), LibraryController.getRecentEpisodes.bind(this))
this.router.get('/libraries/:id/opml', LibraryController.middleware.bind(this), LibraryController.getOPMLFile.bind(this))
this.router.post('/libraries/order', LibraryController.reorder.bind(this))
diff --git a/test/server/providers/OpenAI.test.js b/test/server/providers/OpenAI.test.js
index 53fed180..4e86a93b 100644
--- a/test/server/providers/OpenAI.test.js
+++ b/test/server/providers/OpenAI.test.js
@@ -212,4 +212,61 @@ describe('OpenAI', () => {
expect(result[1].reason).to.contain('omitted this media file')
})
})
+
+ describe('validateDuplicateBooksPayload', () => {
+ it('normalizes valid duplicate-book groups', () => {
+ const result = openAI.validateDuplicateBooksPayload(
+ {
+ groups: [
+ {
+ keepId: 'a',
+ duplicateIds: ['b', 'c'],
+ reason: 'same book'
+ }
+ ]
+ },
+ [{ id: 'a' }, { id: 'b' }, { id: 'c' }]
+ )
+
+ expect(result).to.deep.equal([
+ {
+ keepId: 'a',
+ duplicateIds: ['b', 'c'],
+ reason: 'same book'
+ }
+ ])
+ })
+
+ it('ignores invalid and overlapping duplicate-book groups', () => {
+ const result = openAI.validateDuplicateBooksPayload(
+ {
+ groups: [
+ {
+ keepId: 'a',
+ duplicateIds: ['b', 'missing', 'a'],
+ reason: 'primary match'
+ },
+ {
+ keepId: 'b',
+ duplicateIds: ['c'],
+ reason: 'should be skipped because b was consumed'
+ },
+ {
+ keepId: 'z',
+ duplicateIds: ['c']
+ }
+ ]
+ },
+ [{ id: 'a' }, { id: 'b' }, { id: 'c' }]
+ )
+
+ expect(result).to.deep.equal([
+ {
+ keepId: 'a',
+ duplicateIds: ['b'],
+ reason: 'primary match'
+ }
+ ])
+ })
+ })
})