Update book finder and cover matching - includes LibGen provider

This commit is contained in:
Mark Cooper 2021-08-21 09:15:44 -05:00
parent be7e2576f1
commit 30700c1eb0
14 changed files with 379 additions and 76 deletions

View file

@ -14,8 +14,10 @@ class ApiController {
}
init() {
this.router.get('/find/covers', this.findCovers.bind(this))
this.router.get('/find/:method', this.find.bind(this))
this.router.get('/audiobooks', this.getAudiobooks.bind(this))
this.router.get('/audiobook/:id', this.getAudiobook.bind(this))
this.router.delete('/audiobook/:id', this.deleteAudiobook.bind(this))
@ -36,6 +38,11 @@ class ApiController {
this.scanner.find(req, res)
}
findCovers(req, res) {
console.log('Find covers', req.query)
this.scanner.findCovers(req, res)
}
async getMetadata(req, res) {
var metadata = await this.scanner.fetchMetadata(req.params.id, req.params.trackIndex)
res.json(metadata)

View file

@ -1,5 +1,7 @@
const OpenLibrary = require('./providers/OpenLibrary')
const LibGen = require('./providers/LibGen')
const Logger = require('./Logger')
const { levenshteinDistance } = require('./utils/index')
class BookFinder {
constructor() {
@ -15,19 +17,142 @@ class BookFinder {
return book
}
async search(query, provider = 'openlibrary') {
var books = null
stripSubtitle(title) {
if (title.includes(':')) {
return title.split(':')[0].trim()
} else if (title.includes(' - ')) {
return title.split(' - ')[0].trim()
}
return title
}
cleanTitleForCompares(title) {
// Remove subtitle if there (i.e. "Cool Book: Coolest Ever" becomes "Cool Book")
var stripped = this.stripSubtitle(title)
// Remove text in paranthesis (i.e. "Ender's Game (Ender's Saga)" becomes "Ender's Game")
var cleaned = stripped.replace(/ *\([^)]*\) */g, "")
// Remove single quotes (i.e. "Ender's Game" becomes "Enders Game")
cleaned = cleaned.replace(/'/g, '')
return cleaned.toLowerCase()
}
filterSearchResults(books, title, author, maxTitleDistance, maxAuthorDistance) {
var searchTitle = this.cleanTitleForCompares(title)
return books.map(b => {
b.cleanedTitle = this.cleanTitleForCompares(b.title)
b.titleDistance = levenshteinDistance(b.cleanedTitle, title)
if (author) {
b.authorDistance = levenshteinDistance(b.author || '', author)
}
b.totalDistance = b.titleDistance + (b.authorDistance || 0)
b.totalPossibleDistance = b.title.length
if (b.cleanedTitle.includes(searchTitle) && searchTitle.length > 4) {
b.includesSearch = searchTitle
} else if (b.title.includes(searchTitle) && searchTitle.length > 4) {
b.includesSearch = searchTitle
}
if (author && b.author) b.totalPossibleDistance += b.author.length
return b
}).filter(b => {
if (b.includesSearch) { // If search was found in result title exactly then skip over leven distance check
Logger.debug(`Exact search was found inside title ${b.cleanedTitle}/${b.includesSearch}`)
} else if (b.titleDistance > maxTitleDistance) {
Logger.debug(`Filtering out search result title distance = ${b.titleDistance}: "${b.cleanedTitle}"/"${searchTitle}"`)
return false
}
if (author && b.authorDistance > maxAuthorDistance) {
Logger.debug(`Filtering out search result "${b.title}", author distance = ${b.authorDistance}: "${b.author}"/"${author}"`)
return false
}
if (b.totalPossibleDistance < 4 && b.totalDistance > 0) return false
return true
})
}
async getLibGenResults(title, author, maxTitleDistance, maxAuthorDistance) {
var books = await this.libGen.search(title)
Logger.info(`LibGen Book Search Results: ${books.length || 0}`)
if (books.errorCode) {
Logger.error(`LibGen Search Error ${books.errorCode}`)
return []
}
var booksFiltered = this.filterSearchResults(books, title, author, maxTitleDistance, maxAuthorDistance)
if (!booksFiltered.length && books.length) {
Logger.info(`Search has ${books.length} matches, but no close title matches`)
}
return booksFiltered
}
async getOpenLibResults(title, author, maxTitleDistance, maxAuthorDistance) {
var books = await this.openLibrary.searchTitle(title)
Logger.info(`OpenLib Book Search Results: ${books.length || 0}`)
if (books.errorCode) {
Logger.error(`OpenLib Search Error ${books.errorCode}`)
return []
}
var booksFiltered = this.filterSearchResults(books, title, author, maxTitleDistance, maxAuthorDistance)
if (!booksFiltered.length && books.length) {
Logger.info(`Search has ${books.length} matches, but no close title matches`)
}
return booksFiltered
}
async search(provider, title, author, options = {}) {
var books = []
var maxTitleDistance = !isNaN(options.titleDistance) ? Number(options.titleDistance) : 4
var maxAuthorDistance = !isNaN(options.authorDistance) ? Number(options.authorDistance) : 4
Logger.info(`Book Search, title: "${title}", author: "${author}", provider: ${provider}`)
if (provider === 'libgen') {
books = await this.libGen.search(query)
return books
books = await this.getLibGenResults(title, author, maxTitleDistance, maxAuthorDistance)
} else if (provider === 'openlibrary') {
books = await this.getOpenLibResults(title, author, maxTitleDistance, maxAuthorDistance)
} else if (provider === 'all') {
var lbBooks = await this.getLibGenResults(title, author, maxTitleDistance, maxAuthorDistance)
var olBooks = await this.getOpenLibResults(title, author, maxTitleDistance, maxAuthorDistance)
books = books.concat(lbBooks, olBooks)
} else {
var olBooks = await this.getOpenLibResults(title, author, maxTitleDistance, maxAuthorDistance)
var hasCloseMatch = olBooks.find(b => (b.totalDistance < 4 && b.totalPossibleDistance > 4))
if (hasCloseMatch) {
books = olBooks
} else {
Logger.info(`Book Search, LibGen has no close matches - get openlib results also`)
var lbBooks = await this.getLibGenResults(title, author, maxTitleDistance, maxAuthorDistance)
books = books.concat(lbBooks)
}
if (!books.length && author) {
Logger.info(`Book Search, no matches for title and author.. check title only`)
return this.search(provider, title, null, options)
}
}
books = await this.openLibrary.search(query)
if (books.errorCode) {
console.error('Books not found')
}
return books
return books.sort((a, b) => {
return a.totalDistance - b.totalDistance
})
}
async findCovers(provider, title, author, options = {}) {
var searchResults = await this.search(provider, title, author, options)
console.log('Find Covers search results', searchResults)
var covers = []
searchResults.forEach((result) => {
if (result.covers && result.covers.length) {
covers = covers.concat(result.covers)
}
if (result.cover) {
covers.push(result.cover)
}
})
return covers
}
}
module.exports = BookFinder

View file

@ -77,14 +77,18 @@ class Scanner {
var result = null
if (method === 'isbn') {
console.log('Search', query, 'via ISBN')
result = await this.bookFinder.findByISBN(query)
} else if (method === 'search') {
console.log('Search', query, 'via query')
result = await this.bookFinder.search(query)
result = await this.bookFinder.search(query.provider, query.title, query.author || null)
}
res.json(result)
}
async findCovers(req, res) {
var query = req.query
var result = await this.bookFinder.findCovers(query.provider, query.title, query.author || null)
res.json(result)
}
}
module.exports = Scanner

View file

@ -10,28 +10,40 @@ class LibGen {
console.log(`${this.mirror} is currently fastest`)
}
async search(query) {
async search(queryTitle) {
if (!this.mirror) {
await this.init()
}
queryTitle = queryTitle.replace(/'/g, '')
var options = {
mirror: this.mirror,
query: query,
query: queryTitle,
search_in: 'title'
}
var httpsMirror = this.mirror
if (httpsMirror.startsWith('http:')) {
httpsMirror = httpsMirror.replace('http:', 'https:')
}
// console.log('LibGen Search Options', options)
try {
const data = await libgen.search(options)
let n = data.length
console.log(`${n} results for "${options.query}"`)
// console.log(`${n} results for "${options.query}"`)
var cleanedResults = []
while (n--) {
console.log('');
console.log('Title: ' + data[n].title)
console.log('Author: ' + data[n].author)
console.log('Download: ' +
'http://gen.lib.rus.ec/book/index.php?md5=' +
data[n].md5.toLowerCase())
var resultObj = {
id: data[n].id,
title: data[n].title,
author: data[n].author,
publisher: data[n].publisher,
description: data[n].descr,
cover: `${httpsMirror}/covers/${data[n].coverurl}`,
year: data[n].year
}
if (!resultObj.title) continue;
cleanedResults.push(resultObj)
}
return data
return cleanedResults
} catch (err) {
console.error(err)
return {

View file

@ -50,7 +50,7 @@ class OpenLibrary {
return {
title: doc.title,
author: doc.author_name ? doc.author_name.join(', ') : null,
first_publish_year: doc.first_publish_year,
year: doc.first_publish_year,
edition: doc.cover_edition_key,
cover: doc.cover_edition_key ? `https://covers.openlibrary.org/b/OLID/${doc.cover_edition_key}-L.jpg` : null,
...worksData
@ -68,5 +68,17 @@ class OpenLibrary {
var searchDocs = await Promise.all(lookupData.docs.map(d => this.cleanSearchDoc(d)))
return searchDocs
}
async searchTitle(title) {
title = title.replace(/'/g, '')
var lookupData = await this.get(`/search.json?title=${title}`)
if (!lookupData) {
return {
errorCode: 404
}
}
var searchDocs = await Promise.all(lookupData.docs.map(d => this.cleanSearchDoc(d)))
return searchDocs
}
}
module.exports = OpenLibrary

26
server/utils/index.js Normal file
View file

@ -0,0 +1,26 @@
const levenshteinDistance = (str1, str2, caseSensitive = false) => {
if (!caseSensitive) {
str1 = str1.toLowerCase()
str2 = str2.toLowerCase()
}
const track = Array(str2.length + 1).fill(null).map(() =>
Array(str1.length + 1).fill(null));
for (let i = 0; i <= str1.length; i += 1) {
track[0][i] = i;
}
for (let j = 0; j <= str2.length; j += 1) {
track[j][0] = j;
}
for (let j = 1; j <= str2.length; j += 1) {
for (let i = 1; i <= str1.length; i += 1) {
const indicator = str1[i - 1] === str2[j - 1] ? 0 : 1;
track[j][i] = Math.min(
track[j][i - 1] + 1, // deletion
track[j - 1][i] + 1, // insertion
track[j - 1][i - 1] + indicator, // substitution
);
}
}
return track[str2.length][str1.length];
}
module.exports.levenshteinDistance = levenshteinDistance