mirror of
https://github.com/advplyr/audiobookshelf.git
synced 2026-02-03 16:59:41 +00:00
Add:OPML Upload for bulk adding podcasts #588
This commit is contained in:
parent
e5469cc0f8
commit
514893646a
16 changed files with 359 additions and 18 deletions
346
server/utils/parsers/parseFullName.js
Normal file
346
server/utils/parsers/parseFullName.js
Normal file
|
|
@ -0,0 +1,346 @@
|
|||
|
||||
|
||||
// https://github.com/RateGravity/parse-full-name/blob/master/index.js
|
||||
module.exports = (nameToParse, partToReturn, fixCase, stopOnError, useLongLists) => {
|
||||
|
||||
var i, j, k, l, m, n, part, comma, titleList, suffixList, prefixList, regex,
|
||||
partToCheck, partFound, partsFoundCount, firstComma, remainingCommas,
|
||||
nameParts = [], nameCommas = [null], partsFound = [],
|
||||
conjunctionList = ['&', 'and', 'et', 'e', 'of', 'the', 'und', 'y'],
|
||||
parsedName = {
|
||||
title: '', first: '', middle: '', last: '', nick: '', suffix: '', error: []
|
||||
};
|
||||
|
||||
// Validate inputs, or set to defaults
|
||||
partToReturn = partToReturn && ['title', 'first', 'middle', 'last', 'nick',
|
||||
'suffix', 'error'].indexOf(partToReturn.toLowerCase()) > -1 ?
|
||||
partToReturn.toLowerCase() : 'all';
|
||||
// 'all' = return object with all parts, others return single part
|
||||
if (fixCase === false) fixCase = 0;
|
||||
if (fixCase === true) fixCase = 1;
|
||||
fixCase = fixCase !== 'undefined' && (fixCase === 0 || fixCase === 1) ?
|
||||
fixCase : -1; // -1 = fix case only if input is all upper or lowercase
|
||||
if (stopOnError === true) stopOnError = 1;
|
||||
stopOnError = stopOnError && stopOnError === 1 ? 1 : 0;
|
||||
// false = output warnings on parse error, but don't stop
|
||||
if (useLongLists === true) useLongLists = 1;
|
||||
useLongLists = useLongLists && useLongLists === 1 ? 1 : 0; // 0 = short lists
|
||||
|
||||
// If stopOnError = 1, throw error, otherwise return error messages in array
|
||||
function handleError(errorMessage) {
|
||||
if (stopOnError) {
|
||||
throw 'Error: ' + errorMessage;
|
||||
} else {
|
||||
parsedName.error.push('Error: ' + errorMessage);
|
||||
}
|
||||
}
|
||||
|
||||
// If fixCase = 1, fix case of parsedName parts before returning
|
||||
function fixParsedNameCase(fixedCaseName, fixCaseNow) {
|
||||
var forceCaseList = ['e', 'y', 'av', 'af', 'da', 'dal', 'de', 'del', 'der', 'di',
|
||||
'la', 'le', 'van', 'der', 'den', 'vel', 'von', 'II', 'III', 'IV', 'J.D.', 'LL.M.',
|
||||
'M.D.', 'D.O.', 'D.C.', 'Ph.D.'];
|
||||
var forceCaseListIndex;
|
||||
var namePartLabels = [];
|
||||
var namePartWords;
|
||||
if (fixCaseNow) {
|
||||
namePartLabels = Object.keys(parsedName)
|
||||
.filter(function (v) { return v !== 'error'; });
|
||||
for (i = 0, l = namePartLabels.length; i < l; i++) {
|
||||
if (fixedCaseName[namePartLabels[i]]) {
|
||||
namePartWords = (fixedCaseName[namePartLabels[i]] + '').split(' ');
|
||||
for (j = 0, m = namePartWords.length; j < m; j++) {
|
||||
forceCaseListIndex = forceCaseList
|
||||
.map(function (v) { return v.toLowerCase(); })
|
||||
.indexOf(namePartWords[j].toLowerCase());
|
||||
if (forceCaseListIndex > -1) { // Set case of words in forceCaseList
|
||||
namePartWords[j] = forceCaseList[forceCaseListIndex];
|
||||
} else if (namePartWords[j].length === 1) { // Uppercase initials
|
||||
namePartWords[j] = namePartWords[j].toUpperCase();
|
||||
} else if (
|
||||
namePartWords[j].length > 2 &&
|
||||
namePartWords[j].slice(0, 1) ===
|
||||
namePartWords[j].slice(0, 1).toUpperCase() &&
|
||||
namePartWords[j].slice(1, 2) ===
|
||||
namePartWords[j].slice(1, 2).toLowerCase() &&
|
||||
namePartWords[j].slice(2) ===
|
||||
namePartWords[j].slice(2).toUpperCase()
|
||||
) { // Detect McCASE and convert to McCase
|
||||
namePartWords[j] = namePartWords[j].slice(0, 3) +
|
||||
namePartWords[j].slice(3).toLowerCase();
|
||||
} else if (
|
||||
namePartLabels[j] === 'suffix' &&
|
||||
nameParts[j].slice(-1) !== '.' &&
|
||||
!suffixList.indexOf(nameParts[j].toLowerCase())
|
||||
) { // Convert suffix abbreviations to UPPER CASE
|
||||
if (namePartWords[j] === namePartWords[j].toLowerCase()) {
|
||||
namePartWords[j] = namePartWords[j].toUpperCase();
|
||||
}
|
||||
} else { // Convert to Title Case
|
||||
namePartWords[j] = namePartWords[j].slice(0, 1).toUpperCase() +
|
||||
namePartWords[j].slice(1).toLowerCase();
|
||||
}
|
||||
}
|
||||
fixedCaseName[namePartLabels[i]] = namePartWords.join(' ');
|
||||
}
|
||||
}
|
||||
}
|
||||
return fixedCaseName;
|
||||
}
|
||||
|
||||
// If no input name, or input name is not a string, abort
|
||||
if (!nameToParse || typeof nameToParse !== 'string') {
|
||||
handleError('No input');
|
||||
parsedName = fixParsedNameCase(parsedName, fixCase);
|
||||
return partToReturn === 'all' ? parsedName : parsedName[partToReturn];
|
||||
} else {
|
||||
nameToParse = nameToParse.trim();
|
||||
}
|
||||
|
||||
// Auto-detect fixCase: fix if nameToParse is all upper or all lowercase
|
||||
if (fixCase === -1) {
|
||||
fixCase = (
|
||||
nameToParse === nameToParse.toUpperCase() ||
|
||||
nameToParse === nameToParse.toLowerCase() ? 1 : 0
|
||||
);
|
||||
}
|
||||
|
||||
// Initilize lists of prefixs, suffixs, and titles to detect
|
||||
// Note: These list entries must be all lowercase
|
||||
if (useLongLists) {
|
||||
suffixList = ['esq', 'esquire', 'jr', 'jnr', 'sr', 'snr', '2', 'ii', 'iii', 'iv',
|
||||
'v', 'clu', 'chfc', 'cfp', 'md', 'phd', 'j.d.', 'll.m.', 'm.d.', 'd.o.', 'd.c.',
|
||||
'p.c.', 'ph.d.'];
|
||||
prefixList = ['a', 'ab', 'antune', 'ap', 'abu', 'al', 'alm', 'alt', 'bab', 'bäck',
|
||||
'bar', 'bath', 'bat', 'beau', 'beck', 'ben', 'berg', 'bet', 'bin', 'bint', 'birch',
|
||||
'björk', 'björn', 'bjur', 'da', 'dahl', 'dal', 'de', 'degli', 'dele', 'del',
|
||||
'della', 'der', 'di', 'dos', 'du', 'e', 'ek', 'el', 'escob', 'esch', 'fleisch',
|
||||
'fitz', 'fors', 'gott', 'griff', 'haj', 'haug', 'holm', 'ibn', 'kauf', 'kil',
|
||||
'koop', 'kvarn', 'la', 'le', 'lind', 'lönn', 'lund', 'mac', 'mhic', 'mic', 'mir',
|
||||
'na', 'naka', 'neder', 'nic', 'ni', 'nin', 'nord', 'norr', 'ny', 'o', 'ua', 'ui\'',
|
||||
'öfver', 'ost', 'över', 'öz', 'papa', 'pour', 'quarn', 'skog', 'skoog', 'sten',
|
||||
'stor', 'ström', 'söder', 'ter', 'ter', 'tre', 'türk', 'van', 'väst', 'väster',
|
||||
'vest', 'von'];
|
||||
titleList = ['mr', 'mrs', 'ms', 'miss', 'dr', 'herr', 'monsieur', 'hr', 'frau',
|
||||
'a v m', 'admiraal', 'admiral', 'air cdre', 'air commodore', 'air marshal',
|
||||
'air vice marshal', 'alderman', 'alhaji', 'ambassador', 'baron', 'barones',
|
||||
'brig', 'brig gen', 'brig general', 'brigadier', 'brigadier general',
|
||||
'brother', 'canon', 'capt', 'captain', 'cardinal', 'cdr', 'chief', 'cik', 'cmdr',
|
||||
'coach', 'col', 'col dr', 'colonel', 'commandant', 'commander', 'commissioner',
|
||||
'commodore', 'comte', 'comtessa', 'congressman', 'conseiller', 'consul',
|
||||
'conte', 'contessa', 'corporal', 'councillor', 'count', 'countess',
|
||||
'crown prince', 'crown princess', 'dame', 'datin', 'dato', 'datuk',
|
||||
'datuk seri', 'deacon', 'deaconess', 'dean', 'dhr', 'dipl ing', 'doctor',
|
||||
'dott', 'dott sa', 'dr', 'dr ing', 'dra', 'drs', 'embajador', 'embajadora', 'en',
|
||||
'encik', 'eng', 'eur ing', 'exma sra', 'exmo sr', 'f o', 'father',
|
||||
'first lieutient', 'first officer', 'flt lieut', 'flying officer', 'fr',
|
||||
'frau', 'fraulein', 'fru', 'gen', 'generaal', 'general', 'governor', 'graaf',
|
||||
'gravin', 'group captain', 'grp capt', 'h e dr', 'h h', 'h m', 'h r h', 'hajah',
|
||||
'haji', 'hajim', 'her highness', 'her majesty', 'herr', 'high chief',
|
||||
'his highness', 'his holiness', 'his majesty', 'hon', 'hr', 'hra', 'ing', 'ir',
|
||||
'jonkheer', 'judge', 'justice', 'khun ying', 'kolonel', 'lady', 'lcda', 'lic',
|
||||
'lieut', 'lieut cdr', 'lieut col', 'lieut gen', 'lord', 'm', 'm l', 'm r',
|
||||
'madame', 'mademoiselle', 'maj gen', 'major', 'master', 'mevrouw', 'miss',
|
||||
'mlle', 'mme', 'monsieur', 'monsignor', 'mr', 'mrs', 'ms', 'mstr', 'nti', 'pastor',
|
||||
'president', 'prince', 'princess', 'princesse', 'prinses', 'prof', 'prof dr',
|
||||
'prof sir', 'professor', 'puan', 'puan sri', 'rabbi', 'rear admiral', 'rev',
|
||||
'rev canon', 'rev dr', 'rev mother', 'reverend', 'rva', 'senator', 'sergeant',
|
||||
'sheikh', 'sheikha', 'sig', 'sig na', 'sig ra', 'sir', 'sister', 'sqn ldr', 'sr',
|
||||
'sr d', 'sra', 'srta', 'sultan', 'tan sri', 'tan sri dato', 'tengku', 'teuku',
|
||||
'than puying', 'the hon dr', 'the hon justice', 'the hon miss', 'the hon mr',
|
||||
'the hon mrs', 'the hon ms', 'the hon sir', 'the very rev', 'toh puan', 'tun',
|
||||
'vice admiral', 'viscount', 'viscountess', 'wg cdr', 'ind', 'misc', 'mx'];
|
||||
} else {
|
||||
suffixList = ['esq', 'esquire', 'jr', 'jnr', 'sr', 'snr', '2', 'ii', 'iii', 'iv',
|
||||
'md', 'phd', 'j.d.', 'll.m.', 'm.d.', 'd.o.', 'd.c.', 'p.c.', 'ph.d.'];
|
||||
prefixList = ['ab', 'bar', 'bin', 'da', 'dal', 'de', 'de la', 'del', 'della', 'der',
|
||||
'di', 'du', 'ibn', 'l\'', 'la', 'le', 'san', 'st', 'st.', 'ste', 'ter', 'van',
|
||||
'van de', 'van der', 'van den', 'vel', 'ver', 'vere', 'von'];
|
||||
titleList = ['dr', 'miss', 'mr', 'mrs', 'ms', 'prof', 'sir', 'frau', 'herr', 'hr',
|
||||
'monsieur', 'captain', 'doctor', 'judge', 'officer', 'professor', 'ind', 'misc',
|
||||
'mx'];
|
||||
}
|
||||
|
||||
// Nickname: remove and store parts with surrounding punctuation as nicknames
|
||||
regex = /\s(?:[‘’']([^‘’']+)[‘’']|[“”"]([^“”"]+)[“”"]|\[([^\]]+)\]|\(([^\)]+)\)),?\s/g;
|
||||
partFound = (' ' + nameToParse + ' ').match(regex);
|
||||
if (partFound) partsFound = partsFound.concat(partFound);
|
||||
partsFoundCount = partsFound.length;
|
||||
if (partsFoundCount === 1) {
|
||||
parsedName.nick = partsFound[0].slice(2).slice(0, -2);
|
||||
if (parsedName.nick.slice(-1) === ',') {
|
||||
parsedName.nick = parsedName.nick.slice(0, -1);
|
||||
}
|
||||
nameToParse = (' ' + nameToParse + ' ').replace(partsFound[0], ' ').trim();
|
||||
partsFound = [];
|
||||
} else if (partsFoundCount > 1) {
|
||||
handleError(partsFoundCount + ' nicknames found');
|
||||
for (i = 0; i < partsFoundCount; i++) {
|
||||
nameToParse = (' ' + nameToParse + ' ')
|
||||
.replace(partsFound[i], ' ').trim();
|
||||
partsFound[i] = partsFound[i].slice(2).slice(0, -2);
|
||||
if (partsFound[i].slice(-1) === ',') {
|
||||
partsFound[i] = partsFound[i].slice(0, -1);
|
||||
}
|
||||
}
|
||||
parsedName.nick = partsFound.join(', ');
|
||||
partsFound = [];
|
||||
}
|
||||
if (!nameToParse.trim().length) {
|
||||
parsedName = fixParsedNameCase(parsedName, fixCase);
|
||||
return partToReturn === 'all' ? parsedName : parsedName[partToReturn];
|
||||
}
|
||||
|
||||
// Split remaining nameToParse into parts, remove and store preceding commas
|
||||
for (i = 0, n = nameToParse.split(' '), l = n.length; i < l; i++) {
|
||||
part = n[i];
|
||||
comma = null;
|
||||
if (part.slice(-1) === ',') {
|
||||
comma = ',';
|
||||
part = part.slice(0, -1);
|
||||
}
|
||||
nameParts.push(part);
|
||||
nameCommas.push(comma);
|
||||
}
|
||||
|
||||
// Suffix: remove and store matching parts as suffixes
|
||||
for (l = nameParts.length, i = l - 1; i > 0; i--) {
|
||||
partToCheck = (nameParts[i].slice(-1) === '.' ?
|
||||
nameParts[i].slice(0, -1).toLowerCase() : nameParts[i].toLowerCase());
|
||||
if (
|
||||
suffixList.indexOf(partToCheck) > -1 ||
|
||||
suffixList.indexOf(partToCheck + '.') > -1
|
||||
) {
|
||||
partsFound = nameParts.splice(i, 1).concat(partsFound);
|
||||
if (nameCommas[i] === ',') { // Keep comma, either before or after
|
||||
nameCommas.splice(i + 1, 1);
|
||||
} else {
|
||||
nameCommas.splice(i, 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
partsFoundCount = partsFound.length;
|
||||
if (partsFoundCount === 1) {
|
||||
parsedName.suffix = partsFound[0];
|
||||
partsFound = [];
|
||||
} else if (partsFoundCount > 1) {
|
||||
handleError(partsFoundCount + ' suffixes found');
|
||||
parsedName.suffix = partsFound.join(', ');
|
||||
partsFound = [];
|
||||
}
|
||||
if (!nameParts.length) {
|
||||
parsedName = fixParsedNameCase(parsedName, fixCase);
|
||||
return partToReturn === 'all' ? parsedName : parsedName[partToReturn];
|
||||
}
|
||||
|
||||
// Title: remove and store matching parts as titles
|
||||
for (l = nameParts.length, i = l - 1; i >= 0; i--) {
|
||||
partToCheck = (nameParts[i].slice(-1) === '.' ?
|
||||
nameParts[i].slice(0, -1).toLowerCase() : nameParts[i].toLowerCase());
|
||||
if (
|
||||
titleList.indexOf(partToCheck) > -1 ||
|
||||
titleList.indexOf(partToCheck + '.') > -1
|
||||
) {
|
||||
partsFound = nameParts.splice(i, 1).concat(partsFound);
|
||||
if (nameCommas[i] === ',') { // Keep comma, either before or after
|
||||
nameCommas.splice(i + 1, 1);
|
||||
} else {
|
||||
nameCommas.splice(i, 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
partsFoundCount = partsFound.length;
|
||||
if (partsFoundCount === 1) {
|
||||
parsedName.title = partsFound[0];
|
||||
partsFound = [];
|
||||
} else if (partsFoundCount > 1) {
|
||||
handleError(partsFoundCount + ' titles found');
|
||||
parsedName.title = partsFound.join(', ');
|
||||
partsFound = [];
|
||||
}
|
||||
if (!nameParts.length) {
|
||||
parsedName = fixParsedNameCase(parsedName, fixCase);
|
||||
return partToReturn === 'all' ? parsedName : parsedName[partToReturn];
|
||||
}
|
||||
|
||||
// Join name prefixes to following names
|
||||
if (nameParts.length > 1) {
|
||||
for (i = nameParts.length - 2; i >= 0; i--) {
|
||||
if (prefixList.indexOf(nameParts[i].toLowerCase()) > -1) {
|
||||
nameParts[i] = nameParts[i] + ' ' + nameParts[i + 1];
|
||||
nameParts.splice(i + 1, 1);
|
||||
nameCommas.splice(i + 1, 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Join conjunctions to surrounding names
|
||||
if (nameParts.length > 2) {
|
||||
for (i = nameParts.length - 3; i >= 0; i--) {
|
||||
if (conjunctionList.indexOf(nameParts[i + 1].toLowerCase()) > -1) {
|
||||
nameParts[i] = nameParts[i] + ' ' + nameParts[i + 1] + ' ' + nameParts[i + 2];
|
||||
nameParts.splice(i + 1, 2);
|
||||
nameCommas.splice(i + 1, 2);
|
||||
i--;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Suffix: remove and store items after extra commas as suffixes
|
||||
nameCommas.pop();
|
||||
firstComma = nameCommas.indexOf(',');
|
||||
remainingCommas = nameCommas.filter(function (v) { return v !== null; }).length;
|
||||
if (firstComma > 1 || remainingCommas > 1) {
|
||||
for (i = nameParts.length - 1; i >= 2; i--) {
|
||||
if (nameCommas[i] === ',') {
|
||||
partsFound = nameParts.splice(i, 1).concat(partsFound);
|
||||
nameCommas.splice(i, 1);
|
||||
remainingCommas--;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (partsFound.length) {
|
||||
if (parsedName.suffix) {
|
||||
partsFound = [parsedName.suffix].concat(partsFound);
|
||||
}
|
||||
parsedName.suffix = partsFound.join(', ');
|
||||
partsFound = [];
|
||||
}
|
||||
|
||||
// Last name: remove and store last name
|
||||
if (remainingCommas > 0) {
|
||||
if (remainingCommas > 1) {
|
||||
handleError((remainingCommas - 1) + ' extra commas found');
|
||||
}
|
||||
// Remove and store all parts before first comma as last name
|
||||
if (nameCommas.indexOf(',')) {
|
||||
parsedName.last = nameParts.splice(0, nameCommas.indexOf(',')).join(' ');
|
||||
nameCommas.splice(0, nameCommas.indexOf(','));
|
||||
}
|
||||
} else {
|
||||
// Remove and store last part as last name
|
||||
parsedName.last = nameParts.pop();
|
||||
}
|
||||
if (!nameParts.length) {
|
||||
parsedName = fixParsedNameCase(parsedName, fixCase);
|
||||
return partToReturn === 'all' ? parsedName : parsedName[partToReturn];
|
||||
}
|
||||
|
||||
// First name: remove and store first part as first name
|
||||
parsedName.first = nameParts.shift();
|
||||
if (!nameParts.length) {
|
||||
parsedName = fixParsedNameCase(parsedName, fixCase);
|
||||
return partToReturn === 'all' ? parsedName : parsedName[partToReturn];
|
||||
}
|
||||
|
||||
// Middle name: store all remaining parts as middle name
|
||||
if (nameParts.length > 2) {
|
||||
handleError(nameParts.length + ' middle names');
|
||||
}
|
||||
parsedName.middle = nameParts.join(' ');
|
||||
|
||||
parsedName = fixParsedNameCase(parsedName, fixCase);
|
||||
return partToReturn === 'all' ? parsedName : parsedName[partToReturn];
|
||||
};
|
||||
94
server/utils/parsers/parseNameString.js
Normal file
94
server/utils/parsers/parseNameString.js
Normal file
|
|
@ -0,0 +1,94 @@
|
|||
//
|
||||
// This takes a string and parsed out first and last names
|
||||
// accepts comma separated lists e.g. "Jon Smith, Jane Smith" or "Smith, Jon, Smith, Jane"
|
||||
// can be separated by "&" e.g. "Jon Smith & Jane Smith" or "Smith, Jon & Smith, Jane"
|
||||
//
|
||||
const parseFullName = require('./parseFullName')
|
||||
|
||||
function parseName(name) {
|
||||
var parts = parseFullName(name)
|
||||
var firstName = parts.first
|
||||
if (firstName && parts.middle) firstName += ' ' + parts.middle
|
||||
|
||||
return {
|
||||
first_name: firstName,
|
||||
last_name: parts.last
|
||||
}
|
||||
}
|
||||
|
||||
// Check if this name segment is of the format "Last, First" or "First Last"
|
||||
// return true is "Last, First"
|
||||
function checkIsALastName(name) {
|
||||
if (!name.includes(' ')) return true // No spaces must be a Last name
|
||||
|
||||
var parsed = parseFullName(name)
|
||||
if (!parsed.first) return true // had spaces but not a first name i.e. "von Mises", must be last name only
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
// Handle name already in First Last format and return Last, First
|
||||
module.exports.nameToLastFirst = (firstLast) => {
|
||||
var nameObj = parseName(firstLast)
|
||||
if (!nameObj.last_name) return nameObj.first_name
|
||||
else if (!nameObj.first_name) return nameObj.last_name
|
||||
return `${nameObj.last_name}, ${nameObj.first_name}`
|
||||
}
|
||||
|
||||
// Handle any name string
|
||||
module.exports.parse = (nameString) => {
|
||||
if (!nameString) return null
|
||||
|
||||
var splitNames = []
|
||||
// Example &LF: Friedman, Milton & Friedman, Rose
|
||||
if (nameString.includes('&')) {
|
||||
nameString.split('&').forEach((asa) => splitNames = splitNames.concat(asa.split(',')))
|
||||
} else {
|
||||
splitNames = nameString.split(',')
|
||||
}
|
||||
if (splitNames.length) splitNames = splitNames.map(a => a.trim())
|
||||
|
||||
var names = []
|
||||
|
||||
// 1 name FIRST LAST
|
||||
if (splitNames.length === 1) {
|
||||
names.push(parseName(nameString))
|
||||
} else {
|
||||
var firstChunkIsALastName = checkIsALastName(splitNames[0])
|
||||
var isEvenNum = splitNames.length % 2 === 0
|
||||
|
||||
if (!isEvenNum && firstChunkIsALastName) {
|
||||
// console.error('Multi-name LAST,FIRST entry has a straggler (could be roman numerals or a suffix), ignore it')
|
||||
splitNames = splitNames.slice(0, splitNames.length - 1)
|
||||
}
|
||||
|
||||
if (firstChunkIsALastName) {
|
||||
var num = splitNames.length / 2
|
||||
for (let i = 0; i < num; i++) {
|
||||
var last = splitNames.shift()
|
||||
var first = splitNames.shift()
|
||||
names.push({
|
||||
first_name: first,
|
||||
last_name: last
|
||||
})
|
||||
}
|
||||
} else {
|
||||
splitNames.forEach((segment) => {
|
||||
names.push(parseName(segment))
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// Filter out names that have no first and last
|
||||
names = names.filter(n => n.first_name || n.last_name)
|
||||
|
||||
var namesArray = names.map(a => a.first_name ? `${a.first_name} ${a.last_name}` : a.last_name)
|
||||
var firstLast = names.length ? namesArray.join(', ') : ''
|
||||
var lastFirst = names.length ? names.map(a => a.first_name ? `${a.last_name}, ${a.first_name}` : a.last_name).join(', ') : ''
|
||||
|
||||
return {
|
||||
nameFL: firstLast, // String of comma separated first last
|
||||
nameLF: lastFirst, // String of comma separated last, first
|
||||
names: namesArray // Array of first last
|
||||
}
|
||||
}
|
||||
24
server/utils/parsers/parseOPML.js
Normal file
24
server/utils/parsers/parseOPML.js
Normal file
|
|
@ -0,0 +1,24 @@
|
|||
const h = require('htmlparser2')
|
||||
const Logger = require('../../Logger')
|
||||
|
||||
function parse(opmlText) {
|
||||
var feeds = []
|
||||
var parser = new h.Parser({
|
||||
onopentag: (name, attribs) => {
|
||||
if (name === "outline" && attribs.type === 'rss') {
|
||||
if (!attribs.xmlurl) {
|
||||
Logger.error('[parseOPML] Invalid opml outline tag has no xmlurl attribute')
|
||||
} else {
|
||||
feeds.push({
|
||||
title: attribs.title || 'No Title',
|
||||
text: attribs.text || '',
|
||||
feedUrl: attribs.xmlurl
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
parser.write(opmlText)
|
||||
return feeds
|
||||
}
|
||||
module.exports.parse = parse
|
||||
147
server/utils/parsers/parseOpfMetadata.js
Normal file
147
server/utils/parsers/parseOpfMetadata.js
Normal file
|
|
@ -0,0 +1,147 @@
|
|||
const { xmlToJSON } = require('../index')
|
||||
const htmlSanitizer = require('../htmlSanitizer')
|
||||
|
||||
function parseCreators(metadata) {
|
||||
if (!metadata['dc:creator']) return null
|
||||
var creators = metadata['dc:creator']
|
||||
if (!creators.length) return null
|
||||
return creators.map(c => {
|
||||
if (typeof c !== 'object' || !c['$'] || !c['_']) return false
|
||||
return {
|
||||
value: c['_'],
|
||||
role: c['$']['opf:role'] || null,
|
||||
fileAs: c['$']['opf:file-as'] || null
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
function fetchCreator(creators, role) {
|
||||
if (!creators || !creators.length) return null
|
||||
var creator = creators.find(c => c.role === role)
|
||||
return creator ? creator.value : null
|
||||
}
|
||||
|
||||
function fetchTagString(metadata, tag) {
|
||||
if (!metadata[tag] || !metadata[tag].length) return null
|
||||
var tag = metadata[tag][0]
|
||||
if (typeof tag !== 'string') return null
|
||||
return tag
|
||||
}
|
||||
|
||||
function fetchDate(metadata) {
|
||||
var date = fetchTagString(metadata, 'dc:date')
|
||||
if (!date) return null
|
||||
var dateSplit = date.split('-')
|
||||
if (!dateSplit.length || dateSplit[0].length !== 4 || isNaN(dateSplit[0])) return null
|
||||
return dateSplit[0]
|
||||
}
|
||||
|
||||
function fetchPublisher(metadata) {
|
||||
return fetchTagString(metadata, 'dc:publisher')
|
||||
}
|
||||
|
||||
function fetchISBN(metadata) {
|
||||
if (!metadata['dc:identifier'] || !metadata['dc:identifier'].length) return null
|
||||
var identifiers = metadata['dc:identifier']
|
||||
var isbnObj = identifiers.find(i => i['$'] && i['$']['opf:scheme'] === 'ISBN')
|
||||
return isbnObj ? isbnObj['_'] || null : null
|
||||
}
|
||||
|
||||
function fetchTitle(metadata) {
|
||||
return fetchTagString(metadata, 'dc:title')
|
||||
}
|
||||
|
||||
function fetchDescription(metadata) {
|
||||
var description = fetchTagString(metadata, 'dc:description')
|
||||
if (!description) return null
|
||||
// check if description is HTML or plain text. only plain text allowed
|
||||
// calibre stores < and > as < and >
|
||||
description = description.replace(/</g, '<').replace(/>/g, '>')
|
||||
return htmlSanitizer.stripAllTags(description)
|
||||
}
|
||||
|
||||
function fetchGenres(metadata) {
|
||||
if (!metadata['dc:subject'] || !metadata['dc:subject'].length) return []
|
||||
return metadata['dc:subject'].map(g => typeof g === 'string' ? g : null).filter(g => !!g)
|
||||
}
|
||||
|
||||
function fetchLanguage(metadata) {
|
||||
return fetchTagString(metadata, 'dc:language')
|
||||
}
|
||||
|
||||
function fetchSeries(metadataMeta) {
|
||||
if (!metadataMeta) return null
|
||||
return fetchTagString(metadataMeta, "calibre:series")
|
||||
}
|
||||
|
||||
function fetchVolumeNumber(metadataMeta) {
|
||||
if (!metadataMeta) return null
|
||||
return fetchTagString(metadataMeta, "calibre:series_index")
|
||||
}
|
||||
|
||||
function fetchNarrators(creators, metadata) {
|
||||
var roleNrt = fetchCreator(creators, 'nrt')
|
||||
if (typeof metadata.meta == "undefined" || roleNrt != null) return roleNrt
|
||||
try {
|
||||
var narratorsJSON = JSON.parse(fetchTagString(metadata.meta, "calibre:user_metadata:#narrators").replace(/"/g, '"'))
|
||||
return narratorsJSON["#value#"].join(", ")
|
||||
} catch {
|
||||
return null
|
||||
}
|
||||
}
|
||||
|
||||
function fetchTags(metadata) {
|
||||
if (!metadata['dc:tag'] || !metadata['dc:tag'].length) return []
|
||||
return metadata['dc:tag'].filter(tag => (typeof tag === 'string'))
|
||||
}
|
||||
|
||||
function stripPrefix(str) {
|
||||
if (!str) return ''
|
||||
return str.split(':').pop()
|
||||
}
|
||||
|
||||
module.exports.parseOpfMetadataXML = async (xml) => {
|
||||
var json = await xmlToJSON(xml)
|
||||
|
||||
if (!json) return null
|
||||
|
||||
// Handle <package ...> or with prefix <ns0:package ...>
|
||||
const packageKey = Object.keys(json).find(key => stripPrefix(key) === 'package')
|
||||
if (!packageKey) return null
|
||||
const prefix = packageKey.split(':').shift()
|
||||
var metadata = prefix ? json[packageKey][`${prefix}:metadata`] || json[packageKey].metadata : json[packageKey].metadata
|
||||
if (!metadata) return null
|
||||
|
||||
if (Array.isArray(metadata)) {
|
||||
if (!metadata.length) return null
|
||||
metadata = metadata[0]
|
||||
}
|
||||
|
||||
const metadataMeta = prefix ? metadata[`${prefix}:meta`] || metadata.meta : metadata.meta
|
||||
|
||||
metadata.meta = {}
|
||||
if (metadataMeta && metadataMeta.length) {
|
||||
metadataMeta.forEach((meta) => {
|
||||
if (meta && meta['$'] && meta['$'].name) {
|
||||
metadata.meta[meta['$'].name] = [meta['$'].content || '']
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
var creators = parseCreators(metadata)
|
||||
var data = {
|
||||
title: fetchTitle(metadata),
|
||||
author: fetchCreator(creators, 'aut'),
|
||||
narrator: fetchNarrators(creators, metadata),
|
||||
publishedYear: fetchDate(metadata),
|
||||
publisher: fetchPublisher(metadata),
|
||||
isbn: fetchISBN(metadata),
|
||||
description: fetchDescription(metadata),
|
||||
genres: fetchGenres(metadata),
|
||||
language: fetchLanguage(metadata),
|
||||
series: fetchSeries(metadata.meta),
|
||||
sequence: fetchVolumeNumber(metadata.meta),
|
||||
tags: fetchTags(metadata)
|
||||
}
|
||||
return data
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue