mirror of
https://github.com/advplyr/audiobookshelf.git
synced 2026-03-04 15:09:44 +00:00
feat: implement duplicate title normalized filter
This commit is contained in:
parent
aa85106681
commit
ead215e777
13 changed files with 276 additions and 1 deletions
159
server/migrations/v2.32.9-add-title-normalized-columns.js
Normal file
159
server/migrations/v2.32.9-add-title-normalized-columns.js
Normal file
|
|
@ -0,0 +1,159 @@
|
|||
const util = require('util')
|
||||
const { getNormalizedTitle } = require('../utils')
|
||||
|
||||
/**
|
||||
* @typedef MigrationContext
|
||||
* @property {import('sequelize').QueryInterface} queryInterface - a suquelize QueryInterface object.
|
||||
* @property {import('../Logger')} logger - a Logger object.
|
||||
*
|
||||
* @typedef MigrationOptions
|
||||
* @property {MigrationContext} context - an object containing the migration context.
|
||||
*/
|
||||
|
||||
const migrationVersion = '2.32.9'
|
||||
const migrationName = `${migrationVersion}-add-title-normalized-columns`
|
||||
const loggerPrefix = `[${migrationVersion} migration]`
|
||||
|
||||
async function up({ context: { queryInterface, logger } }) {
|
||||
logger.info(`${loggerPrefix} UPGRADE BEGIN: ${migrationName}`)
|
||||
|
||||
// 1. Add columns
|
||||
await addColumn(queryInterface, logger, 'libraryItems', 'titleNormalized', { type: queryInterface.sequelize.Sequelize.STRING, allowNull: true })
|
||||
await addColumn(queryInterface, logger, 'books', 'titleNormalized', { type: queryInterface.sequelize.Sequelize.STRING, allowNull: true })
|
||||
await addColumn(queryInterface, logger, 'podcasts', 'titleNormalized', { type: queryInterface.sequelize.Sequelize.STRING, allowNull: true })
|
||||
|
||||
// 2. Backfill data for books synchronously
|
||||
logger.info(`${loggerPrefix} Backfilling titleNormalized for books`)
|
||||
const books = await queryInterface.sequelize.query('SELECT id, title FROM books', { type: queryInterface.sequelize.QueryTypes.SELECT })
|
||||
for (const book of books) {
|
||||
if (book.title) {
|
||||
const titleNormalized = getNormalizedTitle(book.title)
|
||||
await queryInterface.sequelize.query('UPDATE books SET titleNormalized = :titleNormalized WHERE id = :id', {
|
||||
replacements: { titleNormalized, id: book.id }
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// Backfill data for podcasts
|
||||
logger.info(`${loggerPrefix} Backfilling titleNormalized for podcasts`)
|
||||
const podcasts = await queryInterface.sequelize.query('SELECT id, title FROM podcasts', { type: queryInterface.sequelize.QueryTypes.SELECT })
|
||||
for (const podcast of podcasts) {
|
||||
if (podcast.title) {
|
||||
const titleNormalized = getNormalizedTitle(podcast.title)
|
||||
await queryInterface.sequelize.query('UPDATE podcasts SET titleNormalized = :titleNormalized WHERE id = :id', {
|
||||
replacements: { titleNormalized, id: podcast.id }
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// 3. Copy from books/podcasts to libraryItems
|
||||
await copyColumn(queryInterface, logger, 'books', 'titleNormalized', 'id', 'libraryItems', 'titleNormalized', 'mediaId')
|
||||
await copyColumn(queryInterface, logger, 'podcasts', 'titleNormalized', 'id', 'libraryItems', 'titleNormalized', 'mediaId')
|
||||
|
||||
// 4. Add triggers
|
||||
await addTrigger(queryInterface, logger, 'books', 'titleNormalized', 'id', 'libraryItems', 'titleNormalized', 'mediaId')
|
||||
await addTrigger(queryInterface, logger, 'podcasts', 'titleNormalized', 'id', 'libraryItems', 'titleNormalized', 'mediaId')
|
||||
|
||||
// 5. Add index on libraryItems
|
||||
await addIndex(queryInterface, logger, 'libraryItems', ['libraryId', 'mediaType', { name: 'titleNormalized', collate: 'NOCASE' }])
|
||||
|
||||
logger.info(`${loggerPrefix} UPGRADE END: ${migrationName}`)
|
||||
}
|
||||
|
||||
async function down({ context: { queryInterface, logger } }) {
|
||||
logger.info(`${loggerPrefix} DOWNGRADE BEGIN: ${migrationName}`)
|
||||
|
||||
await removeIndex(queryInterface, logger, 'libraryItems', ['libraryId', 'mediaType', 'titleNormalized'])
|
||||
|
||||
await removeTrigger(queryInterface, logger, 'libraryItems', 'titleNormalized', 'books')
|
||||
await removeTrigger(queryInterface, logger, 'libraryItems', 'titleNormalized', 'podcasts')
|
||||
|
||||
await removeColumn(queryInterface, logger, 'libraryItems', 'titleNormalized')
|
||||
await removeColumn(queryInterface, logger, 'books', 'titleNormalized')
|
||||
await removeColumn(queryInterface, logger, 'podcasts', 'titleNormalized')
|
||||
|
||||
logger.info(`${loggerPrefix} DOWNGRADE END: ${migrationName}`)
|
||||
}
|
||||
|
||||
async function addIndex(queryInterface, logger, tableName, columns) {
|
||||
const columnString = columns.map((column) => util.inspect(column)).join(', ')
|
||||
const indexName = convertToSnakeCase(`${tableName}_${columns.map((column) => (typeof column === 'string' ? column : column.name)).join('_')}`)
|
||||
try {
|
||||
logger.info(`${loggerPrefix} adding index on [${columnString}] to table ${tableName}. index name: ${indexName}"`)
|
||||
await queryInterface.addIndex(tableName, columns)
|
||||
logger.info(`${loggerPrefix} added index on [${columnString}] to table ${tableName}. index name: ${indexName}"`)
|
||||
} catch (error) {
|
||||
if (error.name === 'SequelizeDatabaseError' && error.message.includes('already exists')) {
|
||||
logger.info(`${loggerPrefix} index [${columnString}] for table "${tableName}" already exists`)
|
||||
} else {
|
||||
throw error
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async function removeIndex(queryInterface, logger, tableName, columns) {
|
||||
logger.info(`${loggerPrefix} removing index [${columns.join(', ')}] from table "${tableName}"`)
|
||||
try {
|
||||
await queryInterface.removeIndex(tableName, columns)
|
||||
logger.info(`${loggerPrefix} removed index [${columns.join(', ')}] from table "${tableName}"`)
|
||||
} catch (error) {}
|
||||
}
|
||||
|
||||
async function addColumn(queryInterface, logger, table, column, options) {
|
||||
logger.info(`${loggerPrefix} adding column "${column}" to table "${table}"`)
|
||||
const tableDescription = await queryInterface.describeTable(table)
|
||||
if (!tableDescription[column]) {
|
||||
await queryInterface.addColumn(table, column, options)
|
||||
logger.info(`${loggerPrefix} added column "${column}" to table "${table}"`)
|
||||
} else {
|
||||
logger.info(`${loggerPrefix} column "${column}" already exists in table "${table}"`)
|
||||
}
|
||||
}
|
||||
|
||||
async function removeColumn(queryInterface, logger, table, column) {
|
||||
logger.info(`${loggerPrefix} removing column "${column}" from table "${table}"`)
|
||||
await queryInterface.removeColumn(table, column)
|
||||
logger.info(`${loggerPrefix} removed column "${column}" from table "${table}"`)
|
||||
}
|
||||
|
||||
async function copyColumn(queryInterface, logger, sourceTable, sourceColumn, sourceIdColumn, targetTable, targetColumn, targetIdColumn) {
|
||||
logger.info(`${loggerPrefix} copying column "${sourceColumn}" from table "${sourceTable}" to table "${targetTable}"`)
|
||||
await queryInterface.sequelize.query(`
|
||||
UPDATE ${targetTable}
|
||||
SET ${targetColumn} = ${sourceTable}.${sourceColumn}
|
||||
FROM ${sourceTable}
|
||||
WHERE ${targetTable}.${targetIdColumn} = ${sourceTable}.${sourceIdColumn}
|
||||
`)
|
||||
logger.info(`${loggerPrefix} copied column "${sourceColumn}" from table "${sourceTable}" to table "${targetTable}"`)
|
||||
}
|
||||
|
||||
async function addTrigger(queryInterface, logger, sourceTable, sourceColumn, sourceIdColumn, targetTable, targetColumn, targetIdColumn) {
|
||||
logger.info(`${loggerPrefix} adding trigger to update ${targetTable}.${targetColumn} when ${sourceTable}.${sourceColumn} is updated`)
|
||||
const triggerName = convertToSnakeCase(`update_${targetTable}_${targetColumn}_from_${sourceTable}`)
|
||||
|
||||
await queryInterface.sequelize.query(`DROP TRIGGER IF EXISTS ${triggerName}`)
|
||||
|
||||
await queryInterface.sequelize.query(`
|
||||
CREATE TRIGGER ${triggerName}
|
||||
AFTER UPDATE OF ${sourceColumn} ON ${sourceTable}
|
||||
FOR EACH ROW
|
||||
BEGIN
|
||||
UPDATE ${targetTable}
|
||||
SET ${targetColumn} = NEW.${sourceColumn}
|
||||
WHERE ${targetTable}.${targetIdColumn} = NEW.${sourceIdColumn};
|
||||
END;
|
||||
`)
|
||||
logger.info(`${loggerPrefix} added trigger.`)
|
||||
}
|
||||
|
||||
async function removeTrigger(queryInterface, logger, targetTable, targetColumn, sourceTable) {
|
||||
logger.info(`${loggerPrefix} removing trigger`)
|
||||
const triggerName = convertToSnakeCase(`update_${targetTable}_${targetColumn}_from_${sourceTable}`)
|
||||
await queryInterface.sequelize.query(`DROP TRIGGER IF EXISTS ${triggerName}`)
|
||||
}
|
||||
|
||||
function convertToSnakeCase(str) {
|
||||
return str.replace(/([A-Z])/g, '_$1').toLowerCase()
|
||||
}
|
||||
|
||||
module.exports = { up, down }
|
||||
|
|
@ -146,6 +146,7 @@ class Book extends Model {
|
|||
},
|
||||
title: DataTypes.STRING,
|
||||
titleIgnorePrefix: DataTypes.STRING,
|
||||
titleNormalized: DataTypes.STRING,
|
||||
subtitle: DataTypes.STRING,
|
||||
publishedYear: DataTypes.STRING,
|
||||
publishedDate: DataTypes.STRING,
|
||||
|
|
@ -407,7 +408,9 @@ class Book extends Model {
|
|||
this[key] = payload.metadata[key] || null
|
||||
|
||||
if (key === 'title') {
|
||||
const { getTitleIgnorePrefix, getNormalizedTitle } = require('../utils')
|
||||
this.titleIgnorePrefix = getTitleIgnorePrefix(this.title)
|
||||
this.titleNormalized = getNormalizedTitle(this.title)
|
||||
}
|
||||
|
||||
hasUpdates = true
|
||||
|
|
|
|||
|
|
@ -78,6 +78,8 @@ class LibraryItem extends Model {
|
|||
/** @type {string} */
|
||||
this.titleIgnorePrefix // Only used for sorting
|
||||
/** @type {string} */
|
||||
this.titleNormalized // Only used for sorting
|
||||
/** @type {string} */
|
||||
this.authorNamesFirstLast // Only used for sorting
|
||||
/** @type {string} */
|
||||
this.authorNamesLastFirst // Only used for sorting
|
||||
|
|
@ -687,6 +689,7 @@ class LibraryItem extends Model {
|
|||
extraData: DataTypes.JSON,
|
||||
title: DataTypes.STRING,
|
||||
titleIgnorePrefix: DataTypes.STRING,
|
||||
titleNormalized: DataTypes.STRING,
|
||||
authorNamesFirstLast: DataTypes.STRING,
|
||||
authorNamesLastFirst: DataTypes.STRING,
|
||||
isNotConsolidated: {
|
||||
|
|
@ -719,6 +722,9 @@ class LibraryItem extends Model {
|
|||
{
|
||||
fields: ['libraryId', 'mediaType', { name: 'titleIgnorePrefix', collate: 'NOCASE' }]
|
||||
},
|
||||
{
|
||||
fields: ['libraryId', 'mediaType', { name: 'titleNormalized', collate: 'NOCASE' }]
|
||||
},
|
||||
{
|
||||
fields: ['libraryId', 'mediaType', { name: 'authorNamesFirstLast', collate: 'NOCASE' }]
|
||||
},
|
||||
|
|
@ -795,6 +801,7 @@ class LibraryItem extends Model {
|
|||
if (instance.media) {
|
||||
instance.title = instance.media.title
|
||||
instance.titleIgnorePrefix = instance.media.titleIgnorePrefix
|
||||
instance.titleNormalized = instance.media.titleNormalized
|
||||
if (instance.isBook) {
|
||||
if (instance.media.authors !== undefined) {
|
||||
instance.authorNamesFirstLast = instance.media.authorName
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
const { DataTypes, Model } = require('sequelize')
|
||||
const { getTitlePrefixAtEnd, getTitleIgnorePrefix } = require('../utils')
|
||||
const { getTitlePrefixAtEnd, getTitleIgnorePrefix, getNormalizedTitle } = require('../utils')
|
||||
const Logger = require('../Logger')
|
||||
const libraryItemsPodcastFilters = require('../utils/queries/libraryItemsPodcastFilters')
|
||||
const htmlSanitizer = require('../utils/htmlSanitizer')
|
||||
|
|
@ -93,6 +93,7 @@ class Podcast extends Model {
|
|||
{
|
||||
title,
|
||||
titleIgnorePrefix: getTitleIgnorePrefix(title),
|
||||
titleNormalized: getNormalizedTitle(title),
|
||||
author: typeof payload.metadata.author === 'string' ? payload.metadata.author : null,
|
||||
releaseDate: typeof payload.metadata.releaseDate === 'string' ? payload.metadata.releaseDate : null,
|
||||
feedURL: typeof payload.metadata.feedUrl === 'string' ? payload.metadata.feedUrl : null,
|
||||
|
|
@ -130,6 +131,7 @@ class Podcast extends Model {
|
|||
},
|
||||
title: DataTypes.STRING,
|
||||
titleIgnorePrefix: DataTypes.STRING,
|
||||
titleNormalized: DataTypes.STRING,
|
||||
author: DataTypes.STRING,
|
||||
releaseDate: DataTypes.STRING,
|
||||
feedURL: DataTypes.STRING,
|
||||
|
|
@ -257,6 +259,7 @@ class Podcast extends Model {
|
|||
|
||||
if (key === 'title') {
|
||||
this.titleIgnorePrefix = getTitleIgnorePrefix(this.title)
|
||||
this.titleNormalized = getNormalizedTitle(this.title)
|
||||
}
|
||||
|
||||
hasUpdates = true
|
||||
|
|
|
|||
|
|
@ -191,6 +191,18 @@ module.exports.getTitleIgnorePrefix = (title) => {
|
|||
return getTitleParts(title)[0]
|
||||
}
|
||||
|
||||
/**
|
||||
* Get normalized title to use for grouping duplicates
|
||||
* Removes non-alphabetic characters (numbers, punctuation, spaces)
|
||||
* @param {string} title
|
||||
* @returns {string}
|
||||
*/
|
||||
module.exports.getNormalizedTitle = (title) => {
|
||||
if (!title) return ''
|
||||
const sortTitle = getTitleParts(title)[0] || title
|
||||
return sortTitle.toLowerCase().replace(/[^\p{L}]/gu, '')
|
||||
}
|
||||
|
||||
/**
|
||||
* Put sorting prefix at the end of title
|
||||
* @example "The Good Book" => "Good Book, The"
|
||||
|
|
|
|||
|
|
@ -515,6 +515,10 @@ module.exports = {
|
|||
isInvalid: true
|
||||
}
|
||||
]
|
||||
} else if (filterGroup === 'duplicates') {
|
||||
libraryItemWhere['titleNormalized'] = {
|
||||
[Sequelize.Op.in]: Sequelize.literal(`(SELECT titleNormalized FROM libraryItems WHERE libraryId = '${libraryId}' AND titleNormalized IS NOT NULL AND titleNormalized != '' GROUP BY titleNormalized HAVING COUNT(titleNormalized) > 1)`)
|
||||
}
|
||||
} else if (filterGroup === 'progress' && user) {
|
||||
const mediaProgressWhere = {
|
||||
userId: user.id
|
||||
|
|
|
|||
|
|
@ -168,6 +168,10 @@ module.exports = {
|
|||
isInvalid: true
|
||||
}
|
||||
]
|
||||
} else if (filterGroup === 'duplicates') {
|
||||
libraryItemWhere['titleNormalized'] = {
|
||||
[Sequelize.Op.in]: Sequelize.literal(`(SELECT titleNormalized FROM libraryItems WHERE libraryId = '${libraryId}' AND titleNormalized IS NOT NULL AND titleNormalized != '' GROUP BY titleNormalized HAVING COUNT(titleNormalized) > 1)`)
|
||||
}
|
||||
} else if (filterGroup === 'recent') {
|
||||
libraryItemWhere['createdAt'] = {
|
||||
[Sequelize.Op.gte]: new Date(new Date() - 60 * 24 * 60 * 60 * 1000) // 60 days ago
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue