Redesign Smart Speed to dynamically adjust playbackRate instead of dropping samples, fix TimeMapper bugs by mapping audioContext to media time, and prevent SilenceMap memory leak by capping regions

This commit is contained in:
Jonathan Baldie 2026-05-01 21:47:34 +01:00
parent fa2460868e
commit 545c77a2dc
4 changed files with 32 additions and 314 deletions

View file

@ -29,7 +29,6 @@ export default class LocalAudioPlayer extends EventEmitter {
this.silenceMap = new SilenceMap()
this.silenceDetectorNode = null
this.silenceCompressorNode = null
this.timeMapper = new TimeMapper([], 1.0)
this.smartSpeedRatio = 2.0
this.enableSmartSpeed = false
@ -102,10 +101,8 @@ export default class LocalAudioPlayer extends EventEmitter {
}
updateSmartSpeedRegions() {
if (this.silenceCompressorNode) {
this.silenceCompressorNode.port.postMessage({ type: 'set-regions', regions: this.silenceMap.getRegions() })
}
this.timeMapper = new TimeMapper(this.silenceMap.getRegions(), this.smartSpeedRatio)
this.emit('timeSaved', this.timeMapper.totalTimeSaved())
}
async initSilenceDetector() {
@ -114,24 +111,27 @@ export default class LocalAudioPlayer extends EventEmitter {
try {
await this.audioContext.audioWorklet.addModule('/client/players/smart-speed/SilenceDetectorProcessor.js')
await this.audioContext.audioWorklet.addModule('/client/players/smart-speed/SilenceCompressorProcessor.js')
this.silenceDetectorNode = new AudioWorkletNode(this.audioContext, 'silence-detector')
this.silenceCompressorNode = new AudioWorkletNode(this.audioContext, 'silence-compressor')
this.silenceCompressorNode.port.postMessage({ type: 'set-ratio', value: this.smartSpeedRatio })
this.silenceCompressorNode.port.onmessage = (event) => {
const msg = event.data
if (msg.type === 'time-saved') {
this.emit('timeSaved', msg.ms)
}
}
this.silenceDetectorNode.port.onmessage = (event) => {
const msg = event.data
if (msg.type === 'silence-start') {
this._silenceStartTime = msg.time
// Map AudioContext time to Media time
const delayMs = this.audioContext.currentTime * 1000 - msg.time
this._silenceStartTime = this.player.currentTime * 1000 - delayMs
// Dynamically increase playback rate
if (this.enableSmartSpeed) {
this.player.playbackRate = this.defaultPlaybackRate * this.smartSpeedRatio
}
} else if (msg.type === 'silence-end') {
if (this.enableSmartSpeed) {
this.player.playbackRate = this.defaultPlaybackRate
}
if (this._silenceStartTime !== null) {
this.silenceMap.addRegion(this._silenceStartTime, msg.time)
const delayMs = this.audioContext.currentTime * 1000 - msg.time
const silenceEndTime = this.player.currentTime * 1000 - delayMs
this.silenceMap.addRegion(this._silenceStartTime, silenceEndTime)
this._silenceStartTime = null
this.updateSmartSpeedRegions()
}
@ -140,8 +140,7 @@ export default class LocalAudioPlayer extends EventEmitter {
this.audioSourceNode.disconnect()
this.audioSourceNode.connect(this.silenceDetectorNode)
this.silenceDetectorNode.connect(this.silenceCompressorNode)
this.silenceCompressorNode.connect(this.audioContext.destination)
this.silenceDetectorNode.connect(this.audioContext.destination)
this._silenceStartTime = null
console.log('[LocalPlayer] Silence detector initialised')
@ -160,15 +159,14 @@ export default class LocalAudioPlayer extends EventEmitter {
}
this.silenceDetectorNode = null
}
if (this.silenceCompressorNode) {
try {
this.silenceCompressorNode.disconnect()
} catch (err) {}
this.silenceCompressorNode = null
}
this.silenceMap.reset()
this.updateSmartSpeedRegions()
this._silenceStartTime = null
// Reset playback rate in case we were in the middle of a silence region
if (this.player && this.player.playbackRate !== this.defaultPlaybackRate) {
this.player.playbackRate = this.defaultPlaybackRate
}
}
evtPlay() {
@ -388,11 +386,6 @@ export default class LocalAudioPlayer extends EventEmitter {
var currentTrackOffset = this.currentTrack.startOffset || 0
if (!this.player) return 0
if (this.enableSmartSpeed) {
var audioMs = this.player.currentTime * 1000
var wallMs = this.timeMapper.audioToWallClock(audioMs)
return currentTrackOffset + (wallMs / 1000)
}
return currentTrackOffset + this.player.currentTime
}
@ -420,16 +413,16 @@ export default class LocalAudioPlayer extends EventEmitter {
seek(time, playWhenReady) {
if (!this.player) return
// Map wall-clock seek time to audio time before resetting regions
var mappedTime = time
if (this.enableSmartSpeed && time >= (this.currentTrack.startOffset || 0) && time <= (this.currentTrack.startOffset || 0) + (this.currentTrack.duration || Infinity)) {
var offsetTime = mappedTime - (this.currentTrack.startOffset || 0)
mappedTime = (this.currentTrack.startOffset || 0) + (this.timeMapper.wallClockToAudio(offsetTime * 1000) / 1000)
}
this.silenceMap.reset()
this.updateSmartSpeedRegions()
this.playWhenReady = playWhenReady
// Reset playback rate in case we were in a silence region
if (this.enableSmartSpeed && this.player.playbackRate !== this.defaultPlaybackRate) {
this.player.playbackRate = this.defaultPlaybackRate
}
if (this.isHlsTranscode) {
// Seeking HLS stream

View file

@ -1,122 +0,0 @@
class SilenceCompressorProcessor extends AudioWorkletProcessor {
constructor() {
super()
this.regions = []
this.ratio = 1.0
this.totalCompressedMs = 0
this.rampDurationSec = 0.005 // 5ms
this.port.onmessage = (event) => {
const msg = event.data
if (msg.type === 'set-regions') {
this.regions = msg.regions.filter(r => (r.end - r.start) >= 200)
} else if (msg.type === 'set-ratio') {
this.ratio = msg.value
}
}
}
getActiveRegion(timeMs) {
for (const r of this.regions) {
if (timeMs >= r.start && timeMs <= r.end) return r
}
return null
}
calculateRampGain(timeMs, region) {
const rampMs = this.rampDurationSec * 1000
// Entry ramp (0 -> 1)
if (timeMs - region.start < rampMs) {
return (timeMs - region.start) / rampMs
}
// Exit ramp (1 -> 0)
if (region.end - timeMs < rampMs) {
return (region.end - timeMs) / rampMs
}
return 1.0
}
process(inputs, outputs, parameters) {
const input = inputs[0]
const output = outputs[0]
if (!input || !input.length || !output || !output.length) return true
const numChannels = input.length
const numFrames = input[0].length
const sampleRateC = typeof sampleRate !== 'undefined' ? sampleRate : 48000
// Use currentTime if available, otherwise fallback to 0 (for tests)
const currentTimeSec = typeof currentTime !== 'undefined' ? currentTime : 0
let outputIndex = 0
let inputIndex = 0
let savedSecThisBlock = 0
while (inputIndex < numFrames) {
const sampleTimeSec = currentTimeSec + (inputIndex / sampleRateC)
const sampleTimeMs = sampleTimeSec * 1000
const region = this.getActiveRegion(sampleTimeMs)
let step = 1.0
let rampGain = 1.0
if (region && this.ratio > 1.0) {
step = this.ratio
rampGain = this.calculateRampGain(sampleTimeMs, region)
}
// If taking this step exceeds the input buffer, we must stop
if (inputIndex >= numFrames) break
const intIndex = Math.floor(inputIndex)
const frac = inputIndex - intIndex
for (let c = 0; c < numChannels; c++) {
const inChannel = input[c]
const outChannel = output[c]
let sample = inChannel[intIndex]
if (frac > 0 && intIndex + 1 < numFrames) {
sample = sample + frac * (inChannel[intIndex + 1] - sample)
}
if (outputIndex < numFrames) {
outChannel[outputIndex] = sample * rampGain
}
}
inputIndex += step
outputIndex += 1
if (step > 1.0) {
savedSecThisBlock += (step - 1.0) / sampleRateC
}
}
// Fill the rest of the output buffer with 0s if we compressed
for (let c = 0; c < numChannels; c++) {
for (let i = outputIndex; i < numFrames; i++) {
output[c][i] = 0
}
}
if (savedSecThisBlock > 0) {
this.totalCompressedMs += savedSecThisBlock * 1000
this.port.postMessage({ type: 'time-saved', ms: this.totalCompressedMs })
}
return true
}
}
if (typeof registerProcessor !== 'undefined') {
registerProcessor('silence-compressor', SilenceCompressorProcessor)
}
if (typeof module !== 'undefined') {
module.exports = SilenceCompressorProcessor
}

View file

@ -38,6 +38,12 @@ class SilenceMap {
}
this._regions = merged
// Cap the number of regions to prevent memory leaks for long audiobooks
// Assuming each region is ~1 second, 5000 regions is over an hour of silence
if (this._regions.length > 5000) {
this._regions = this._regions.slice(-5000)
}
}
getCompressedOffset(atTimeMs, ratio) {

View file

@ -1,159 +0,0 @@
const chai = require('chai')
const expect = chai.expect
// Mock AudioWorklet environment
class MockMessagePort {
constructor() {
this.messages = []
}
postMessage(msg) {
this.messages.push(msg)
}
}
class AudioWorkletProcessor {
constructor() {
this.port = new MockMessagePort()
}
}
global.AudioWorkletProcessor = AudioWorkletProcessor
global.registerProcessor = (name, constructor) => {
global.RegisteredProcessor = constructor
}
global.currentTime = 0
// Require the processor file which will call registerProcessor
require('../../../../client/players/smart-speed/SilenceCompressorProcessor')
const SilenceCompressorProcessor = global.RegisteredProcessor
describe('SilenceCompressorProcessor', () => {
let processor
beforeEach(() => {
global.currentTime = 0
processor = new SilenceCompressorProcessor()
})
function createProcessInputs(numFrames) {
const input = [new Float32Array(numFrames)]
for (let i = 0; i < numFrames; i++) {
input[0][i] = 1.0 // fill with 1.0 to easily check what passes through
}
return [[input[0]]]
}
function createProcessOutputs(numFrames) {
return [[new Float32Array(numFrames)]]
}
describe('Must Pass (GREEN)', () => {
it('1. With no regions, all samples pass through unchanged', () => {
const inputs = createProcessInputs(128)
const outputs = createProcessOutputs(128)
processor.process(inputs, outputs, {})
for (let i = 0; i < 128; i++) {
expect(outputs[0][0][i]).to.equal(1.0)
}
})
it('2. With region, samples within region are dropped at correct ratio', () => {
processor.port.onmessage({ data: { type: 'set-ratio', value: 2.0 } })
processor.port.onmessage({ data: { type: 'set-regions', regions: [{ start: 0, end: 1000 }] } })
const inputs = createProcessInputs(128)
// Make input values equal to their index so we can verify interpolation/skipping
for (let i = 0; i < 128; i++) inputs[0][0][i] = i
const outputs = createProcessOutputs(128)
// Inside region, ratio 2.0 means we skip every other sample
processor.process(inputs, outputs, {})
// The first few samples will be subject to the crossfade ramp!
// To strictly test dropping at correct ratio, let's look at samples after the 5ms ramp.
// 5ms at 48000Hz (sample rate is usually 44100 or 48000, let's use 48000 for calculation if available,
// wait, currentTime is in seconds, standard Web Audio API).
// Let's just simulate process and verify port messages for time saved.
// But requirement 2 says "samples within region are dropped at correct ratio".
// Let's assert that the read index advances faster than write index.
// With ratio 2.0, the last sample written shouldn't be the last sample of input.
expect(outputs[0][0][127]).to.not.equal(127) // It should be something like 127*2 if we could fit it
})
it('3. Crossfade ramp at region entry (first 5ms gain 0→1)', () => {
// Test 5ms ramp. Sample rate is available via global.sampleRate in Web Audio API. Let's mock it.
global.sampleRate = 48000
processor.port.onmessage({ data: { type: 'set-ratio', value: 2.0 } })
processor.port.onmessage({ data: { type: 'set-regions', regions: [{ start: 0, end: 1000 }] } })
const inputs = createProcessInputs(128)
const outputs = createProcessOutputs(128)
processor.process(inputs, outputs, {})
// Entry ramp: gain goes from 0 to 1 over 5ms (240 samples at 48kHz)
// At index 0, gain should be 0.
expect(outputs[0][0][0]).to.equal(0)
// Gain should be increasing
expect(outputs[0][0][10]).to.be.greaterThan(0)
expect(outputs[0][0][10]).to.be.lessThan(1)
})
it('4. Crossfade ramp at region exit (last 5ms gain 1→0)', () => {
global.sampleRate = 48000
processor.port.onmessage({ data: { type: 'set-ratio', value: 2.0 } })
processor.port.onmessage({ data: { type: 'set-regions', regions: [{ start: 0, end: 200 }] } }) // 10ms region
// advance time to 9ms, inside the 5ms exit ramp
global.currentTime = 0.199
const inputs = createProcessInputs(128)
const outputs = createProcessOutputs(128)
processor.process(inputs, outputs, {})
// Exit ramp is active, gain should be going down
// Not precisely testing the values, just that it's less than 1 and greater than 0
expect(outputs[0][0][0]).to.be.lessThan(1)
})
it('5. Regions shorter than 200ms pass through unchanged', () => {
processor.port.onmessage({ data: { type: 'set-ratio', value: 2.0 } })
processor.port.onmessage({ data: { type: 'set-regions', regions: [{ start: 0, end: 199 }] } })
const inputs = createProcessInputs(128)
const outputs = createProcessOutputs(128)
processor.process(inputs, outputs, {})
for (let i = 0; i < 128; i++) {
expect(outputs[0][0][i]).to.equal(1.0)
}
})
it('6. ratio=1.0 passes all audio through unchanged', () => {
processor.port.onmessage({ data: { type: 'set-ratio', value: 1.0 } })
processor.port.onmessage({ data: { type: 'set-regions', regions: [{ start: 0, end: 1000 }] } })
const inputs = createProcessInputs(128)
const outputs = createProcessOutputs(128)
processor.process(inputs, outputs, {})
for (let i = 0; i < 128; i++) {
expect(outputs[0][0][i]).to.equal(1.0)
}
})
it('7. set-regions message updates internal regions', () => {
processor.port.onmessage({ data: { type: 'set-regions', regions: [{ start: 100, end: 500 }] } })
expect(processor.regions.length).to.equal(1)
expect(processor.regions[0].start).to.equal(100)
})
it('8. set-ratio message updates internal ratio', () => {
processor.port.onmessage({ data: { type: 'set-ratio', value: 2.5 } })
expect(processor.ratio).to.equal(2.5)
})
})
})