Fix:Large OPML import timeouts #3118

- Added OPML Api endpoints for /parse and /create, removed old
- Show task for OPML import and create failed tasks for failed feeds
This commit is contained in:
advplyr 2024-07-16 17:05:52 -05:00
parent b1bc472205
commit 37ad1cced2
9 changed files with 258 additions and 93 deletions

View file

@ -14,6 +14,15 @@ const CoverManager = require('../managers/CoverManager')
const LibraryItem = require('../objects/LibraryItem')
class PodcastController {
/**
* POST /api/podcasts
* Create podcast
*
* @this import('../routers/ApiRouter')
*
* @param {import('express').Request} req
* @param {import('express').Response} res
*/
async create(req, res) {
if (!req.user.isAdminOrUp) {
Logger.error(`[PodcastController] Non-admin user "${req.user.username}" attempted to create podcast`)
@ -133,6 +142,14 @@ class PodcastController {
res.json({ podcast })
}
/**
* POST: /api/podcasts/opml
*
* @this import('../routers/ApiRouter')
*
* @param {import('express').Request} req
* @param {import('express').Response} res
*/
async getFeedsFromOPMLText(req, res) {
if (!req.user.isAdminOrUp) {
Logger.error(`[PodcastController] Non-admin user "${req.user.username}" attempted to get feeds from opml`)
@ -143,8 +160,44 @@ class PodcastController {
return res.sendStatus(400)
}
const rssFeedsData = await this.podcastManager.getOPMLFeeds(req.body.opmlText)
res.json(rssFeedsData)
res.json({
feeds: this.podcastManager.getParsedOPMLFileFeeds(req.body.opmlText)
})
}
/**
* POST: /api/podcasts/opml/create
*
* @this import('../routers/ApiRouter')
*
* @param {import('express').Request} req
* @param {import('express').Response} res
*/
async bulkCreatePodcastsFromOpmlFeedUrls(req, res) {
if (!req.user.isAdminOrUp) {
Logger.error(`[PodcastController] Non-admin user "${req.user.username}" attempted to bulk create podcasts`)
return res.sendStatus(403)
}
const rssFeeds = req.body.feeds
if (!Array.isArray(rssFeeds) || !rssFeeds.length || rssFeeds.some((feed) => !validateUrl(feed))) {
return res.status(400).send('Invalid request body. "feeds" must be an array of RSS feed URLs')
}
const libraryId = req.body.libraryId
const folderId = req.body.folderId
if (!libraryId || !folderId) {
return res.status(400).send('Invalid request body. "libraryId" and "folderId" are required')
}
const folder = await Database.libraryFolderModel.findByPk(folderId)
if (!folder || folder.libraryId !== libraryId) {
return res.status(404).send('Folder not found')
}
const autoDownloadEpisodes = !!req.body.autoDownloadEpisodes
this.podcastManager.createPodcastsFromFeedUrls(rssFeeds, folder, autoDownloadEpisodes, this.cronManager)
res.sendStatus(200)
}
async checkNewEpisodes(req, res) {

View file

@ -5,7 +5,7 @@ const Database = require('../Database')
const fs = require('../libs/fsExtra')
const { getPodcastFeed } = require('../utils/podcastUtils')
const { removeFile, downloadFile } = require('../utils/fileUtils')
const { removeFile, downloadFile, sanitizeFilename, filePathToPOSIX, getFileTimestampsWithIno } = require('../utils/fileUtils')
const { levenshteinDistance } = require('../utils/index')
const opmlParser = require('../utils/parsers/parseOPML')
const opmlGenerator = require('../utils/generators/opmlGenerator')
@ -13,11 +13,13 @@ const prober = require('../utils/prober')
const ffmpegHelpers = require('../utils/ffmpegHelpers')
const TaskManager = require('./TaskManager')
const CoverManager = require('../managers/CoverManager')
const LibraryFile = require('../objects/files/LibraryFile')
const PodcastEpisodeDownload = require('../objects/PodcastEpisodeDownload')
const PodcastEpisode = require('../objects/entities/PodcastEpisode')
const AudioFile = require('../objects/files/AudioFile')
const LibraryItem = require('../objects/LibraryItem')
class PodcastManager {
constructor(watcher, notificationManager) {
@ -350,19 +352,23 @@ class PodcastManager {
return matches.sort((a, b) => a.levenshtein - b.levenshtein)
}
getParsedOPMLFileFeeds(opmlText) {
return opmlParser.parse(opmlText)
}
async getOPMLFeeds(opmlText) {
var extractedFeeds = opmlParser.parse(opmlText)
if (!extractedFeeds || !extractedFeeds.length) {
const extractedFeeds = opmlParser.parse(opmlText)
if (!extractedFeeds?.length) {
Logger.error('[PodcastManager] getOPMLFeeds: No RSS feeds found in OPML')
return {
error: 'No RSS feeds found in OPML'
}
}
var rssFeedData = []
const rssFeedData = []
for (let feed of extractedFeeds) {
var feedData = await getPodcastFeed(feed.feedUrl, true)
const feedData = await getPodcastFeed(feed.feedUrl, true)
if (feedData) {
feedData.metadata.feedUrl = feed.feedUrl
rssFeedData.push(feedData)
@ -392,5 +398,115 @@ class PodcastManager {
queue: this.downloadQueue.filter((item) => !libraryId || item.libraryId === libraryId).map((item) => item.toJSONForClient())
}
}
/**
*
* @param {string[]} rssFeedUrls
* @param {import('../models/LibraryFolder')} folder
* @param {boolean} autoDownloadEpisodes
* @param {import('../managers/CronManager')} cronManager
*/
async createPodcastsFromFeedUrls(rssFeedUrls, folder, autoDownloadEpisodes, cronManager) {
const task = TaskManager.createAndAddTask('opml-import', 'OPML import', `Creating podcasts from ${rssFeedUrls.length} RSS feeds`, true, null)
let numPodcastsAdded = 0
Logger.info(`[PodcastManager] createPodcastsFromFeedUrls: Importing ${rssFeedUrls.length} RSS feeds to folder "${folder.path}"`)
for (const feedUrl of rssFeedUrls) {
const feed = await getPodcastFeed(feedUrl).catch(() => null)
if (!feed?.episodes) {
TaskManager.createAndEmitFailedTask('opml-import-feed', 'OPML import feed', `Importing RSS feed "${feedUrl}"`, 'Failed to get podcast feed')
Logger.error(`[PodcastManager] createPodcastsFromFeedUrls: Failed to get podcast feed for "${feedUrl}"`)
continue
}
const podcastFilename = sanitizeFilename(feed.metadata.title)
const podcastPath = filePathToPOSIX(`${folder.path}/${podcastFilename}`)
// Check if a library item with this podcast folder exists already
const existingLibraryItem =
(await Database.libraryItemModel.count({
where: {
path: podcastPath
}
})) > 0
if (existingLibraryItem) {
Logger.error(`[PodcastManager] createPodcastsFromFeedUrls: Podcast already exists at path "${podcastPath}"`)
TaskManager.createAndEmitFailedTask('opml-import-feed', 'OPML import feed', `Creating podcast "${feed.metadata.title}"`, 'Podcast already exists at path')
continue
}
const successCreatingPath = await fs
.ensureDir(podcastPath)
.then(() => true)
.catch((error) => {
Logger.error(`[PodcastManager] Failed to ensure podcast dir "${podcastPath}"`, error)
return false
})
if (!successCreatingPath) {
Logger.error(`[PodcastManager] createPodcastsFromFeedUrls: Failed to create podcast folder at "${podcastPath}"`)
TaskManager.createAndEmitFailedTask('opml-import-feed', 'OPML import feed', `Creating podcast "${feed.metadata.title}"`, 'Failed to create podcast folder')
continue
}
const newPodcastMetadata = {
title: feed.metadata.title,
author: feed.metadata.author,
description: feed.metadata.description,
releaseDate: '',
genres: [...feed.metadata.categories],
feedUrl: feed.metadata.feedUrl,
imageUrl: feed.metadata.image,
itunesPageUrl: '',
itunesId: '',
itunesArtistId: '',
language: '',
numEpisodes: feed.numEpisodes
}
const libraryItemFolderStats = await getFileTimestampsWithIno(podcastPath)
const libraryItemPayload = {
path: podcastPath,
relPath: podcastFilename,
folderId: folder.id,
libraryId: folder.libraryId,
ino: libraryItemFolderStats.ino,
mtimeMs: libraryItemFolderStats.mtimeMs || 0,
ctimeMs: libraryItemFolderStats.ctimeMs || 0,
birthtimeMs: libraryItemFolderStats.birthtimeMs || 0,
media: {
metadata: newPodcastMetadata,
autoDownloadEpisodes
}
}
const libraryItem = new LibraryItem()
libraryItem.setData('podcast', libraryItemPayload)
// Download and save cover image
if (newPodcastMetadata.imageUrl) {
// TODO: Scan cover image to library files
// Podcast cover will always go into library item folder
const coverResponse = await CoverManager.downloadCoverFromUrl(libraryItem, newPodcastMetadata.imageUrl, true)
if (coverResponse) {
if (coverResponse.error) {
Logger.error(`[PodcastManager] createPodcastsFromFeedUrls: Download cover error from "${newPodcastMetadata.imageUrl}": ${coverResponse.error}`)
} else if (coverResponse.cover) {
libraryItem.media.coverPath = coverResponse.cover
}
}
}
await Database.createLibraryItem(libraryItem)
SocketAuthority.emitter('item_added', libraryItem.toJSONExpanded())
// Turn on podcast auto download cron if not already on
if (libraryItem.media.autoDownloadEpisodes) {
cronManager.checkUpdatePodcastCron(libraryItem)
}
numPodcastsAdded++
}
task.setFinished(`Added ${numPodcastsAdded} podcasts`)
TaskManager.taskFinished(task)
Logger.info(`[PodcastManager] createPodcastsFromFeedUrls: Finished OPML import. Created ${numPodcastsAdded} podcasts out of ${rssFeedUrls.length} RSS feed URLs`)
}
}
module.exports = PodcastManager

View file

@ -9,8 +9,8 @@ class TaskManager {
/**
* Add task and emit socket task_started event
*
* @param {Task} task
*
* @param {Task} task
*/
addTask(task) {
this.tasks.push(task)
@ -19,24 +19,24 @@ class TaskManager {
/**
* Remove task and emit task_finished event
*
* @param {Task} task
*
* @param {Task} task
*/
taskFinished(task) {
if (this.tasks.some(t => t.id === task.id)) {
this.tasks = this.tasks.filter(t => t.id !== task.id)
if (this.tasks.some((t) => t.id === task.id)) {
this.tasks = this.tasks.filter((t) => t.id !== task.id)
SocketAuthority.emitter('task_finished', task.toJSON())
}
}
/**
* Create new task and add
*
* @param {string} action
* @param {string} title
* @param {string} description
* @param {boolean} showSuccess
* @param {Object} [data]
*
* @param {string} action
* @param {string} title
* @param {string} description
* @param {boolean} showSuccess
* @param {Object} [data]
*/
createAndAddTask(action, title, description, showSuccess, data = {}) {
const task = new Task()
@ -44,5 +44,21 @@ class TaskManager {
this.addTask(task)
return task
}
/**
* Create new failed task and add
*
* @param {string} action
* @param {string} title
* @param {string} description
* @param {string} errorMessage
*/
createAndEmitFailedTask(action, title, description, errorMessage) {
const task = new Task()
task.setData(action, title, description, false)
task.setFailed(errorMessage)
SocketAuthority.emitter('task_started', task.toJSON())
return task
}
}
module.exports = new TaskManager()
module.exports = new TaskManager()

View file

@ -60,7 +60,7 @@ class Library extends Model {
/**
* Convert expanded Library to oldLibrary
* @param {Library} libraryExpanded
* @returns {Promise<oldLibrary>}
* @returns {oldLibrary}
*/
static getOldLibrary(libraryExpanded) {
const folders = libraryExpanded.libraryFolders.map((folder) => {

View file

@ -45,6 +45,7 @@ class ApiRouter {
this.backupManager = Server.backupManager
/** @type {import('../Watcher')} */
this.watcher = Server.watcher
/** @type {import('../managers/PodcastManager')} */
this.podcastManager = Server.podcastManager
this.audioMetadataManager = Server.audioMetadataManager
this.rssFeedManager = Server.rssFeedManager
@ -239,7 +240,8 @@ class ApiRouter {
//
this.router.post('/podcasts', PodcastController.create.bind(this))
this.router.post('/podcasts/feed', PodcastController.getPodcastFeed.bind(this))
this.router.post('/podcasts/opml', PodcastController.getFeedsFromOPMLText.bind(this))
this.router.post('/podcasts/opml/parse', PodcastController.getFeedsFromOPMLText.bind(this))
this.router.post('/podcasts/opml/create', PodcastController.bulkCreatePodcastsFromOpmlFeedUrls.bind(this))
this.router.get('/podcasts/:id/checknew', PodcastController.middleware.bind(this), PodcastController.checkNewEpisodes.bind(this))
this.router.get('/podcasts/:id/downloads', PodcastController.middleware.bind(this), PodcastController.getEpisodeDownloads.bind(this))
this.router.get('/podcasts/:id/clear-queue', PodcastController.middleware.bind(this), PodcastController.clearEpisodeDownloadQueue.bind(this))

View file

@ -1,17 +1,21 @@
const h = require('htmlparser2')
const Logger = require('../../Logger')
/**
*
* @param {string} opmlText
* @returns {Array<{title: string, feedUrl: string}>
*/
function parse(opmlText) {
var feeds = []
var parser = new h.Parser({
onopentag: (name, attribs) => {
if (name === "outline" && attribs.type === 'rss') {
if (name === 'outline' && attribs.type === 'rss') {
if (!attribs.xmlurl) {
Logger.error('[parseOPML] Invalid opml outline tag has no xmlurl attribute')
} else {
feeds.push({
title: attribs.title || 'No Title',
text: attribs.text || '',
title: attribs.title || attribs.text || '',
feedUrl: attribs.xmlurl
})
}
@ -21,4 +25,4 @@ function parse(opmlText) {
parser.write(opmlText)
return feeds
}
module.exports.parse = parse
module.exports.parse = parse