mirror of
https://github.com/advplyr/audiobookshelf.git
synced 2026-05-12 22:41:29 +00:00
Redesign Smart Speed to dynamically adjust playbackRate instead of dropping samples, fix TimeMapper bugs by mapping audioContext to media time, and prevent SilenceMap memory leak by capping regions
This commit is contained in:
parent
fa2460868e
commit
545c77a2dc
4 changed files with 32 additions and 314 deletions
|
|
@ -29,7 +29,6 @@ export default class LocalAudioPlayer extends EventEmitter {
|
|||
|
||||
this.silenceMap = new SilenceMap()
|
||||
this.silenceDetectorNode = null
|
||||
this.silenceCompressorNode = null
|
||||
this.timeMapper = new TimeMapper([], 1.0)
|
||||
this.smartSpeedRatio = 2.0
|
||||
this.enableSmartSpeed = false
|
||||
|
|
@ -102,10 +101,8 @@ export default class LocalAudioPlayer extends EventEmitter {
|
|||
}
|
||||
|
||||
updateSmartSpeedRegions() {
|
||||
if (this.silenceCompressorNode) {
|
||||
this.silenceCompressorNode.port.postMessage({ type: 'set-regions', regions: this.silenceMap.getRegions() })
|
||||
}
|
||||
this.timeMapper = new TimeMapper(this.silenceMap.getRegions(), this.smartSpeedRatio)
|
||||
this.emit('timeSaved', this.timeMapper.totalTimeSaved())
|
||||
}
|
||||
|
||||
async initSilenceDetector() {
|
||||
|
|
@ -114,24 +111,27 @@ export default class LocalAudioPlayer extends EventEmitter {
|
|||
|
||||
try {
|
||||
await this.audioContext.audioWorklet.addModule('/client/players/smart-speed/SilenceDetectorProcessor.js')
|
||||
await this.audioContext.audioWorklet.addModule('/client/players/smart-speed/SilenceCompressorProcessor.js')
|
||||
this.silenceDetectorNode = new AudioWorkletNode(this.audioContext, 'silence-detector')
|
||||
this.silenceCompressorNode = new AudioWorkletNode(this.audioContext, 'silence-compressor')
|
||||
this.silenceCompressorNode.port.postMessage({ type: 'set-ratio', value: this.smartSpeedRatio })
|
||||
this.silenceCompressorNode.port.onmessage = (event) => {
|
||||
const msg = event.data
|
||||
if (msg.type === 'time-saved') {
|
||||
this.emit('timeSaved', msg.ms)
|
||||
}
|
||||
}
|
||||
|
||||
this.silenceDetectorNode.port.onmessage = (event) => {
|
||||
const msg = event.data
|
||||
if (msg.type === 'silence-start') {
|
||||
this._silenceStartTime = msg.time
|
||||
// Map AudioContext time to Media time
|
||||
const delayMs = this.audioContext.currentTime * 1000 - msg.time
|
||||
this._silenceStartTime = this.player.currentTime * 1000 - delayMs
|
||||
|
||||
// Dynamically increase playback rate
|
||||
if (this.enableSmartSpeed) {
|
||||
this.player.playbackRate = this.defaultPlaybackRate * this.smartSpeedRatio
|
||||
}
|
||||
} else if (msg.type === 'silence-end') {
|
||||
if (this.enableSmartSpeed) {
|
||||
this.player.playbackRate = this.defaultPlaybackRate
|
||||
}
|
||||
if (this._silenceStartTime !== null) {
|
||||
this.silenceMap.addRegion(this._silenceStartTime, msg.time)
|
||||
const delayMs = this.audioContext.currentTime * 1000 - msg.time
|
||||
const silenceEndTime = this.player.currentTime * 1000 - delayMs
|
||||
this.silenceMap.addRegion(this._silenceStartTime, silenceEndTime)
|
||||
this._silenceStartTime = null
|
||||
this.updateSmartSpeedRegions()
|
||||
}
|
||||
|
|
@ -140,8 +140,7 @@ export default class LocalAudioPlayer extends EventEmitter {
|
|||
|
||||
this.audioSourceNode.disconnect()
|
||||
this.audioSourceNode.connect(this.silenceDetectorNode)
|
||||
this.silenceDetectorNode.connect(this.silenceCompressorNode)
|
||||
this.silenceCompressorNode.connect(this.audioContext.destination)
|
||||
this.silenceDetectorNode.connect(this.audioContext.destination)
|
||||
|
||||
this._silenceStartTime = null
|
||||
console.log('[LocalPlayer] Silence detector initialised')
|
||||
|
|
@ -160,15 +159,14 @@ export default class LocalAudioPlayer extends EventEmitter {
|
|||
}
|
||||
this.silenceDetectorNode = null
|
||||
}
|
||||
if (this.silenceCompressorNode) {
|
||||
try {
|
||||
this.silenceCompressorNode.disconnect()
|
||||
} catch (err) {}
|
||||
this.silenceCompressorNode = null
|
||||
}
|
||||
this.silenceMap.reset()
|
||||
this.updateSmartSpeedRegions()
|
||||
this._silenceStartTime = null
|
||||
|
||||
// Reset playback rate in case we were in the middle of a silence region
|
||||
if (this.player && this.player.playbackRate !== this.defaultPlaybackRate) {
|
||||
this.player.playbackRate = this.defaultPlaybackRate
|
||||
}
|
||||
}
|
||||
|
||||
evtPlay() {
|
||||
|
|
@ -388,11 +386,6 @@ export default class LocalAudioPlayer extends EventEmitter {
|
|||
var currentTrackOffset = this.currentTrack.startOffset || 0
|
||||
if (!this.player) return 0
|
||||
|
||||
if (this.enableSmartSpeed) {
|
||||
var audioMs = this.player.currentTime * 1000
|
||||
var wallMs = this.timeMapper.audioToWallClock(audioMs)
|
||||
return currentTrackOffset + (wallMs / 1000)
|
||||
}
|
||||
return currentTrackOffset + this.player.currentTime
|
||||
}
|
||||
|
||||
|
|
@ -420,16 +413,16 @@ export default class LocalAudioPlayer extends EventEmitter {
|
|||
seek(time, playWhenReady) {
|
||||
if (!this.player) return
|
||||
|
||||
// Map wall-clock seek time to audio time before resetting regions
|
||||
var mappedTime = time
|
||||
if (this.enableSmartSpeed && time >= (this.currentTrack.startOffset || 0) && time <= (this.currentTrack.startOffset || 0) + (this.currentTrack.duration || Infinity)) {
|
||||
var offsetTime = mappedTime - (this.currentTrack.startOffset || 0)
|
||||
mappedTime = (this.currentTrack.startOffset || 0) + (this.timeMapper.wallClockToAudio(offsetTime * 1000) / 1000)
|
||||
}
|
||||
|
||||
this.silenceMap.reset()
|
||||
this.updateSmartSpeedRegions()
|
||||
this.playWhenReady = playWhenReady
|
||||
|
||||
// Reset playback rate in case we were in a silence region
|
||||
if (this.enableSmartSpeed && this.player.playbackRate !== this.defaultPlaybackRate) {
|
||||
this.player.playbackRate = this.defaultPlaybackRate
|
||||
}
|
||||
|
||||
if (this.isHlsTranscode) {
|
||||
// Seeking HLS stream
|
||||
|
|
|
|||
|
|
@ -1,122 +0,0 @@
|
|||
class SilenceCompressorProcessor extends AudioWorkletProcessor {
|
||||
constructor() {
|
||||
super()
|
||||
this.regions = []
|
||||
this.ratio = 1.0
|
||||
this.totalCompressedMs = 0
|
||||
this.rampDurationSec = 0.005 // 5ms
|
||||
|
||||
this.port.onmessage = (event) => {
|
||||
const msg = event.data
|
||||
if (msg.type === 'set-regions') {
|
||||
this.regions = msg.regions.filter(r => (r.end - r.start) >= 200)
|
||||
} else if (msg.type === 'set-ratio') {
|
||||
this.ratio = msg.value
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
getActiveRegion(timeMs) {
|
||||
for (const r of this.regions) {
|
||||
if (timeMs >= r.start && timeMs <= r.end) return r
|
||||
}
|
||||
return null
|
||||
}
|
||||
|
||||
calculateRampGain(timeMs, region) {
|
||||
const rampMs = this.rampDurationSec * 1000
|
||||
|
||||
// Entry ramp (0 -> 1)
|
||||
if (timeMs - region.start < rampMs) {
|
||||
return (timeMs - region.start) / rampMs
|
||||
}
|
||||
|
||||
// Exit ramp (1 -> 0)
|
||||
if (region.end - timeMs < rampMs) {
|
||||
return (region.end - timeMs) / rampMs
|
||||
}
|
||||
|
||||
return 1.0
|
||||
}
|
||||
|
||||
process(inputs, outputs, parameters) {
|
||||
const input = inputs[0]
|
||||
const output = outputs[0]
|
||||
|
||||
if (!input || !input.length || !output || !output.length) return true
|
||||
|
||||
const numChannels = input.length
|
||||
const numFrames = input[0].length
|
||||
const sampleRateC = typeof sampleRate !== 'undefined' ? sampleRate : 48000
|
||||
// Use currentTime if available, otherwise fallback to 0 (for tests)
|
||||
const currentTimeSec = typeof currentTime !== 'undefined' ? currentTime : 0
|
||||
|
||||
let outputIndex = 0
|
||||
let inputIndex = 0
|
||||
let savedSecThisBlock = 0
|
||||
|
||||
while (inputIndex < numFrames) {
|
||||
const sampleTimeSec = currentTimeSec + (inputIndex / sampleRateC)
|
||||
const sampleTimeMs = sampleTimeSec * 1000
|
||||
|
||||
const region = this.getActiveRegion(sampleTimeMs)
|
||||
|
||||
let step = 1.0
|
||||
let rampGain = 1.0
|
||||
|
||||
if (region && this.ratio > 1.0) {
|
||||
step = this.ratio
|
||||
rampGain = this.calculateRampGain(sampleTimeMs, region)
|
||||
}
|
||||
|
||||
// If taking this step exceeds the input buffer, we must stop
|
||||
if (inputIndex >= numFrames) break
|
||||
|
||||
const intIndex = Math.floor(inputIndex)
|
||||
const frac = inputIndex - intIndex
|
||||
|
||||
for (let c = 0; c < numChannels; c++) {
|
||||
const inChannel = input[c]
|
||||
const outChannel = output[c]
|
||||
|
||||
let sample = inChannel[intIndex]
|
||||
if (frac > 0 && intIndex + 1 < numFrames) {
|
||||
sample = sample + frac * (inChannel[intIndex + 1] - sample)
|
||||
}
|
||||
|
||||
if (outputIndex < numFrames) {
|
||||
outChannel[outputIndex] = sample * rampGain
|
||||
}
|
||||
}
|
||||
|
||||
inputIndex += step
|
||||
outputIndex += 1
|
||||
|
||||
if (step > 1.0) {
|
||||
savedSecThisBlock += (step - 1.0) / sampleRateC
|
||||
}
|
||||
}
|
||||
|
||||
// Fill the rest of the output buffer with 0s if we compressed
|
||||
for (let c = 0; c < numChannels; c++) {
|
||||
for (let i = outputIndex; i < numFrames; i++) {
|
||||
output[c][i] = 0
|
||||
}
|
||||
}
|
||||
|
||||
if (savedSecThisBlock > 0) {
|
||||
this.totalCompressedMs += savedSecThisBlock * 1000
|
||||
this.port.postMessage({ type: 'time-saved', ms: this.totalCompressedMs })
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
if (typeof registerProcessor !== 'undefined') {
|
||||
registerProcessor('silence-compressor', SilenceCompressorProcessor)
|
||||
}
|
||||
|
||||
if (typeof module !== 'undefined') {
|
||||
module.exports = SilenceCompressorProcessor
|
||||
}
|
||||
|
|
@ -38,6 +38,12 @@ class SilenceMap {
|
|||
}
|
||||
|
||||
this._regions = merged
|
||||
|
||||
// Cap the number of regions to prevent memory leaks for long audiobooks
|
||||
// Assuming each region is ~1 second, 5000 regions is over an hour of silence
|
||||
if (this._regions.length > 5000) {
|
||||
this._regions = this._regions.slice(-5000)
|
||||
}
|
||||
}
|
||||
|
||||
getCompressedOffset(atTimeMs, ratio) {
|
||||
|
|
|
|||
|
|
@ -1,159 +0,0 @@
|
|||
const chai = require('chai')
|
||||
const expect = chai.expect
|
||||
|
||||
// Mock AudioWorklet environment
|
||||
class MockMessagePort {
|
||||
constructor() {
|
||||
this.messages = []
|
||||
}
|
||||
postMessage(msg) {
|
||||
this.messages.push(msg)
|
||||
}
|
||||
}
|
||||
|
||||
class AudioWorkletProcessor {
|
||||
constructor() {
|
||||
this.port = new MockMessagePort()
|
||||
}
|
||||
}
|
||||
|
||||
global.AudioWorkletProcessor = AudioWorkletProcessor
|
||||
global.registerProcessor = (name, constructor) => {
|
||||
global.RegisteredProcessor = constructor
|
||||
}
|
||||
global.currentTime = 0
|
||||
|
||||
// Require the processor file which will call registerProcessor
|
||||
require('../../../../client/players/smart-speed/SilenceCompressorProcessor')
|
||||
const SilenceCompressorProcessor = global.RegisteredProcessor
|
||||
|
||||
describe('SilenceCompressorProcessor', () => {
|
||||
let processor
|
||||
|
||||
beforeEach(() => {
|
||||
global.currentTime = 0
|
||||
processor = new SilenceCompressorProcessor()
|
||||
})
|
||||
|
||||
function createProcessInputs(numFrames) {
|
||||
const input = [new Float32Array(numFrames)]
|
||||
for (let i = 0; i < numFrames; i++) {
|
||||
input[0][i] = 1.0 // fill with 1.0 to easily check what passes through
|
||||
}
|
||||
return [[input[0]]]
|
||||
}
|
||||
|
||||
function createProcessOutputs(numFrames) {
|
||||
return [[new Float32Array(numFrames)]]
|
||||
}
|
||||
|
||||
describe('Must Pass (GREEN)', () => {
|
||||
it('1. With no regions, all samples pass through unchanged', () => {
|
||||
const inputs = createProcessInputs(128)
|
||||
const outputs = createProcessOutputs(128)
|
||||
|
||||
processor.process(inputs, outputs, {})
|
||||
|
||||
for (let i = 0; i < 128; i++) {
|
||||
expect(outputs[0][0][i]).to.equal(1.0)
|
||||
}
|
||||
})
|
||||
|
||||
it('2. With region, samples within region are dropped at correct ratio', () => {
|
||||
processor.port.onmessage({ data: { type: 'set-ratio', value: 2.0 } })
|
||||
processor.port.onmessage({ data: { type: 'set-regions', regions: [{ start: 0, end: 1000 }] } })
|
||||
|
||||
const inputs = createProcessInputs(128)
|
||||
// Make input values equal to their index so we can verify interpolation/skipping
|
||||
for (let i = 0; i < 128; i++) inputs[0][0][i] = i
|
||||
|
||||
const outputs = createProcessOutputs(128)
|
||||
|
||||
// Inside region, ratio 2.0 means we skip every other sample
|
||||
processor.process(inputs, outputs, {})
|
||||
|
||||
// The first few samples will be subject to the crossfade ramp!
|
||||
// To strictly test dropping at correct ratio, let's look at samples after the 5ms ramp.
|
||||
// 5ms at 48000Hz (sample rate is usually 44100 or 48000, let's use 48000 for calculation if available,
|
||||
// wait, currentTime is in seconds, standard Web Audio API).
|
||||
// Let's just simulate process and verify port messages for time saved.
|
||||
// But requirement 2 says "samples within region are dropped at correct ratio".
|
||||
// Let's assert that the read index advances faster than write index.
|
||||
// With ratio 2.0, the last sample written shouldn't be the last sample of input.
|
||||
expect(outputs[0][0][127]).to.not.equal(127) // It should be something like 127*2 if we could fit it
|
||||
})
|
||||
|
||||
it('3. Crossfade ramp at region entry (first 5ms gain 0→1)', () => {
|
||||
// Test 5ms ramp. Sample rate is available via global.sampleRate in Web Audio API. Let's mock it.
|
||||
global.sampleRate = 48000
|
||||
processor.port.onmessage({ data: { type: 'set-ratio', value: 2.0 } })
|
||||
processor.port.onmessage({ data: { type: 'set-regions', regions: [{ start: 0, end: 1000 }] } })
|
||||
|
||||
const inputs = createProcessInputs(128)
|
||||
const outputs = createProcessOutputs(128)
|
||||
|
||||
processor.process(inputs, outputs, {})
|
||||
|
||||
// Entry ramp: gain goes from 0 to 1 over 5ms (240 samples at 48kHz)
|
||||
// At index 0, gain should be 0.
|
||||
expect(outputs[0][0][0]).to.equal(0)
|
||||
// Gain should be increasing
|
||||
expect(outputs[0][0][10]).to.be.greaterThan(0)
|
||||
expect(outputs[0][0][10]).to.be.lessThan(1)
|
||||
})
|
||||
|
||||
it('4. Crossfade ramp at region exit (last 5ms gain 1→0)', () => {
|
||||
global.sampleRate = 48000
|
||||
processor.port.onmessage({ data: { type: 'set-ratio', value: 2.0 } })
|
||||
processor.port.onmessage({ data: { type: 'set-regions', regions: [{ start: 0, end: 200 }] } }) // 10ms region
|
||||
|
||||
// advance time to 9ms, inside the 5ms exit ramp
|
||||
global.currentTime = 0.199
|
||||
|
||||
const inputs = createProcessInputs(128)
|
||||
const outputs = createProcessOutputs(128)
|
||||
processor.process(inputs, outputs, {})
|
||||
|
||||
// Exit ramp is active, gain should be going down
|
||||
// Not precisely testing the values, just that it's less than 1 and greater than 0
|
||||
expect(outputs[0][0][0]).to.be.lessThan(1)
|
||||
})
|
||||
|
||||
it('5. Regions shorter than 200ms pass through unchanged', () => {
|
||||
processor.port.onmessage({ data: { type: 'set-ratio', value: 2.0 } })
|
||||
processor.port.onmessage({ data: { type: 'set-regions', regions: [{ start: 0, end: 199 }] } })
|
||||
|
||||
const inputs = createProcessInputs(128)
|
||||
const outputs = createProcessOutputs(128)
|
||||
processor.process(inputs, outputs, {})
|
||||
|
||||
for (let i = 0; i < 128; i++) {
|
||||
expect(outputs[0][0][i]).to.equal(1.0)
|
||||
}
|
||||
})
|
||||
|
||||
it('6. ratio=1.0 passes all audio through unchanged', () => {
|
||||
processor.port.onmessage({ data: { type: 'set-ratio', value: 1.0 } })
|
||||
processor.port.onmessage({ data: { type: 'set-regions', regions: [{ start: 0, end: 1000 }] } })
|
||||
|
||||
const inputs = createProcessInputs(128)
|
||||
const outputs = createProcessOutputs(128)
|
||||
processor.process(inputs, outputs, {})
|
||||
|
||||
for (let i = 0; i < 128; i++) {
|
||||
expect(outputs[0][0][i]).to.equal(1.0)
|
||||
}
|
||||
})
|
||||
|
||||
it('7. set-regions message updates internal regions', () => {
|
||||
processor.port.onmessage({ data: { type: 'set-regions', regions: [{ start: 100, end: 500 }] } })
|
||||
expect(processor.regions.length).to.equal(1)
|
||||
expect(processor.regions[0].start).to.equal(100)
|
||||
})
|
||||
|
||||
it('8. set-ratio message updates internal ratio', () => {
|
||||
processor.port.onmessage({ data: { type: 'set-ratio', value: 2.5 } })
|
||||
expect(processor.ratio).to.equal(2.5)
|
||||
})
|
||||
})
|
||||
})
|
||||
Loading…
Add table
Add a link
Reference in a new issue