From 19906ee3068f3593107d3d66b5a0dc0631d40683 Mon Sep 17 00:00:00 2001 From: hiroshihorie <548776+hiroshihorie@users.noreply.github.com> Date: Wed, 4 Sep 2024 11:59:51 +0900 Subject: [PATCH 1/9] Create FloatRingBuffer.swift --- Sources/LiveKit/Support/FloatRingBuffer.swift | 46 +++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100644 Sources/LiveKit/Support/FloatRingBuffer.swift diff --git a/Sources/LiveKit/Support/FloatRingBuffer.swift b/Sources/LiveKit/Support/FloatRingBuffer.swift new file mode 100644 index 000000000..e17f9c821 --- /dev/null +++ b/Sources/LiveKit/Support/FloatRingBuffer.swift @@ -0,0 +1,46 @@ +/* + * Copyright 2024 LiveKit + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import Foundation + +// Simple ring-buffer used for internal audio processing. Not thread-safe. +class FloatRingBuffer { + private var _isFull = false + private var _buffer: [Float] + private var _head: Int = 0 + + init(size: Int) { + _buffer = Array(repeating: 0, count: size) + } + + func write(_ value: Float) { + _buffer[_head] = value + _head = (_head + 1) % _buffer.count + if _head == 0 { _isFull = true } + } + + func write(_ sequence: [Float]) { + for value in sequence { + write(value) + } + } + + // Returns nil if buffer is not full. + func read() -> [Float]? { + guard _isFull else { return nil } + return Array(_buffer[_head ..< _buffer.count] + _buffer[0 ..< _head]) + } +} From cc30821eb0ec72d2714c6600673b0c4e1f908813 Mon Sep 17 00:00:00 2001 From: hiroshihorie <548776+hiroshihorie@users.noreply.github.com> Date: Wed, 4 Sep 2024 14:23:08 +0900 Subject: [PATCH 2/9] Processor --- .../LiveKit/Convenience/AudioProcessing.swift | 66 +++++- Sources/LiveKit/Support/FFTProcessor.swift | 189 ++++++++++++++++++ 2 files changed, 254 insertions(+), 1 deletion(-) create mode 100755 Sources/LiveKit/Support/FFTProcessor.swift diff --git a/Sources/LiveKit/Convenience/AudioProcessing.swift b/Sources/LiveKit/Convenience/AudioProcessing.swift index 3d33c3c94..9d4443f30 100644 --- a/Sources/LiveKit/Convenience/AudioProcessing.swift +++ b/Sources/LiveKit/Convenience/AudioProcessing.swift @@ -43,7 +43,8 @@ public extension LKAudioBuffer { guard let targetBufferPointer = pcmBuffer.floatChannelData else { return nil } // Optimized version - var normalizationFactor: Float = 1.0 / 32768.0 + let factor = Float(Int16.max) + var normalizationFactor: Float = 1.0 / factor // Or use 32768.0 for i in 0 ..< channels { vDSP_vsmul(rawBuffer(forChannel: i), @@ -98,3 +99,66 @@ public extension Sequence where Iterator.Element == AudioLevel { peak: totalSums.peakSum / Float(count)) } } + +public class AudioVisualizeProcessor { + static let _bufferSize = 1024 + + // MARK: - Public + + public let minFrequency: Float + public let maxFrequency: Float + public let bandsCount: Int + + public var bands: [Float]? + + // MARK: - Private + + public init(minFrequency: Float = 10, maxFrequency: Float = 8000, bandsCount: Int = 100) { + self.minFrequency = minFrequency + self.maxFrequency = maxFrequency + self.bandsCount = bandsCount + _processor = FFTProcessor(bufferSize: Self._bufferSize) + } + + // MARK: - Private + + private let _ringBuffer = FloatRingBuffer(size: _bufferSize) + private let _processor: FFTProcessor + + public func add(pcmBuffer: AVAudioPCMBuffer) { + guard let floatChannelData = pcmBuffer.floatChannelData else { return } + // Get the float array. + let floats = Array(UnsafeBufferPointer(start: floatChannelData[0], count: Int(pcmBuffer.frameLength))) + // Write to ring buffer. + _ringBuffer.write(floats) + // Get full size buffer if ready, otherwise return for this cycle. + guard let buffer = _ringBuffer.read() else { return } + + let fftRes = _processor.process(buffer: buffer) + let bands = fftRes.computeBands(minFrequency: minFrequency, + maxFrequency: maxFrequency, + bandsCount: bandsCount, + sampleRate: Float(pcmBuffer.format.sampleRate)) + + let maxDB: Float = 64.0 + let minDB: Float = -32.0 + let headroom = maxDB - minDB + + var result: [Float] = Array(repeating: 0.0, count: bands.magnitudes.count) + + var i = 0 + for magnitude in bands.magnitudes { + // Incoming magnitudes are linear, making it impossible to see very low or very high values. Decibels to the rescue! + var magnitudeDB = magnitude.toDecibels + + // Normalize the incoming magnitude so that -Inf = 0 + magnitudeDB = max(0, magnitudeDB + abs(minDB)) + + let dbRatio = min(1.0, magnitudeDB / headroom) + result[i] = dbRatio + i += 1 + } + + self.bands = result + } +} diff --git a/Sources/LiveKit/Support/FFTProcessor.swift b/Sources/LiveKit/Support/FFTProcessor.swift new file mode 100755 index 000000000..64b58a006 --- /dev/null +++ b/Sources/LiveKit/Support/FFTProcessor.swift @@ -0,0 +1,189 @@ +/* + * Copyright 2024 LiveKit + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import Accelerate +import Foundation + +extension Float { + /// The Nyquist frequency is sampleRate / 2. + var nyquistFrequency: Float { self / 2.0 } + + var toDecibels: Float { + // Avoid log of zero or negative values by using a very small value. + let minMagnitude: Float = 0.0000001 + return 20 * log10(max(magnitude, minMagnitude)) + } +} + +public struct FFTComputeBandsResult { + let count: Int + let magnitudes: [Float] + let frequencies: [Float] +} + +public class FFTResult { + // Result of fft operation. + public let magnitudes: [Float] + + init(magnitudes: [Float]) { + self.magnitudes = magnitudes + } + + // MARK: - Public + + /// Applies logical banding on top of the spectrum data. The bands are spaced linearly throughout the spectrum. + func computeBands(minFrequency: Float, + maxFrequency: Float, + bandsCount: Int, + sampleRate: Float) -> FFTComputeBandsResult + { + let actualMaxFrequency = min(sampleRate.nyquistFrequency, maxFrequency) + + var bandMagnitudes = [Float](repeating: 0.0, count: bandsCount) + var bandFrequencies = [Float](repeating: 0.0, count: bandsCount) + + let magLowerRange = _magnitudeIndex(for: minFrequency, sampleRate: sampleRate) + let magUpperRange = _magnitudeIndex(for: actualMaxFrequency, sampleRate: sampleRate) + let ratio = Float(magUpperRange - magLowerRange) / Float(bandsCount) + + for i in 0 ..< bandsCount { + let magsStartIdx = Int(floorf(Float(i) * ratio)) + magLowerRange + let magsEndIdx = Int(floorf(Float(i + 1) * ratio)) + magLowerRange + var magsAvg: Float + if magsEndIdx == magsStartIdx { + // Can happen when numberOfBands < # of magnitudes. No need to average anything. + magsAvg = magnitudes[magsStartIdx] + } else { + magsAvg = _computeAverage(magnitudes, magsStartIdx, magsEndIdx) + } + bandMagnitudes[i] = magsAvg + bandFrequencies[i] = _averageFrequencyInRange(magsStartIdx, magsEndIdx, sampleRate: sampleRate) + } + + return FFTComputeBandsResult(count: bandsCount, + magnitudes: bandMagnitudes, + frequencies: bandFrequencies) + } + + // MARK: - Private + + @inline(__always) private func _magnitudeIndex(for frequency: Float, sampleRate: Float) -> Int { + Int(Float(magnitudes.count) * frequency / sampleRate.nyquistFrequency) + } + + @inline(__always) private func _computeAverage(_ array: [Float], _ startIdx: Int, _ stopIdx: Int) -> Float { + var mean: Float = 0 + array.withUnsafeBufferPointer { bufferPtr in + let ptr = bufferPtr.baseAddress! + startIdx + vDSP_meanv(ptr, 1, &mean, UInt(stopIdx - startIdx)) + } + return mean + } + + /// The average bandwidth throughout the spectrum (nyquist / magnitudes.count) + @inline(__always) func _computeBandwidth(for sampleRate: Float) -> Float { + sampleRate.nyquistFrequency / Float(magnitudes.count) + } + + @inline(__always) private func _averageFrequencyInRange(_ startIndex: Int, _ endIndex: Int, sampleRate: Float) -> Float { + let bandwidth = _computeBandwidth(for: sampleRate) + return (bandwidth * Float(startIndex) + bandwidth * Float(endIndex)) / 2 + } +} + +class FFTProcessor { + // MARK: - Public + + public enum WindowType { + case none + case hanning + case hamming + } + + public let bufferSize: Int + + /// Supplying a window type (hanning or hamming) smooths the edges of the incoming waveform and reduces output errors from the FFT function. + /// https://en.wikipedia.org/wiki/Spectral_leakage + public let windowType: WindowType + + // MARK: - Private + + private let bufferHalfSize: Int + private let bufferLog2Size: Int + private var window: [Float] = [] + private var fftSetup: FFTSetup + + private var complexBuffer: DSPSplitComplex! + private var realPointer: UnsafeMutablePointer + private var imaginaryPointer: UnsafeMutablePointer + + init(bufferSize inBufferSize: Int, windowType: WindowType = .hamming) { + bufferSize = inBufferSize + self.windowType = windowType + bufferHalfSize = inBufferSize / 2 + + let bufferSizeFloat = Float(inBufferSize) + + // bufferSize must be a power of 2. + let lg2 = logbf(bufferSizeFloat) + assert(remainderf(bufferSizeFloat, powf(2.0, lg2)) == 0, "bufferSize must be a power of 2") + bufferLog2Size = Int(log2f(bufferSizeFloat)) + + // Create fft setup. + fftSetup = vDSP_create_fftsetup(UInt(bufferLog2Size), FFTRadix(FFT_RADIX2))! + + // Allocate memory for the real and imaginary parts. + realPointer = UnsafeMutablePointer.allocate(capacity: bufferHalfSize) + imaginaryPointer = UnsafeMutablePointer.allocate(capacity: bufferHalfSize) + + // Initialize the memory to zero. + realPointer.initialize(repeating: 0.0, count: bufferHalfSize) + imaginaryPointer.initialize(repeating: 0.0, count: bufferHalfSize) + + // Init the complexBuffer. + complexBuffer = DSPSplitComplex(realp: realPointer, imagp: imaginaryPointer) + } + + deinit { + // destroy the fft setup object + vDSP_destroy_fftsetup(fftSetup) + + realPointer.deallocate() + imaginaryPointer.deallocate() + } + + func process(buffer: [Float]) -> FFTResult { + // Ensure the input buffer is the correct size (twice the half buffer size, since it is interleaved). + guard buffer.count == bufferSize else { + fatalError("Input buffer size does not match the initialized buffer size.") + } + + // Convert the interleaved real and imaginary parts to a split complex form. + buffer.withUnsafeBufferPointer { bufferPtr in + let complexPtr = UnsafeRawPointer(bufferPtr.baseAddress!).bindMemory(to: DSPComplex.self, capacity: bufferHalfSize) + vDSP_ctoz(complexPtr, 2, &complexBuffer, 1, UInt(bufferHalfSize)) + } + + // Perform a forward FFT. + vDSP_fft_zrip(fftSetup, &complexBuffer, 1, UInt(bufferLog2Size), Int32(FFT_FORWARD)) + + // Calculate magnitudes. + var magnitudes = [Float](repeating: 0.0, count: bufferHalfSize) + vDSP_zvmags(&complexBuffer, 1, &magnitudes, 1, UInt(bufferHalfSize)) + + return FFTResult(magnitudes: magnitudes) + } +} From a5ba133f4be76f6e63118545b8d7fffe9b3aba1b Mon Sep 17 00:00:00 2001 From: hiroshihorie <548776+hiroshihorie@users.noreply.github.com> Date: Wed, 4 Sep 2024 15:31:49 +0900 Subject: [PATCH 3/9] Optimize --- Sources/LiveKit/Support/FFTProcessor.swift | 81 +++++----------------- 1 file changed, 19 insertions(+), 62 deletions(-) diff --git a/Sources/LiveKit/Support/FFTProcessor.swift b/Sources/LiveKit/Support/FFTProcessor.swift index 64b58a006..a79a01e08 100755 --- a/Sources/LiveKit/Support/FFTProcessor.swift +++ b/Sources/LiveKit/Support/FFTProcessor.swift @@ -13,17 +13,14 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - import Accelerate import Foundation extension Float { - /// The Nyquist frequency is sampleRate / 2. var nyquistFrequency: Float { self / 2.0 } var toDecibels: Float { - // Avoid log of zero or negative values by using a very small value. - let minMagnitude: Float = 0.0000001 + let minMagnitude: Float = 1e-7 return 20 * log10(max(magnitude, minMagnitude)) } } @@ -35,23 +32,14 @@ public struct FFTComputeBandsResult { } public class FFTResult { - // Result of fft operation. public let magnitudes: [Float] init(magnitudes: [Float]) { self.magnitudes = magnitudes } - // MARK: - Public - - /// Applies logical banding on top of the spectrum data. The bands are spaced linearly throughout the spectrum. - func computeBands(minFrequency: Float, - maxFrequency: Float, - bandsCount: Int, - sampleRate: Float) -> FFTComputeBandsResult - { + func computeBands(minFrequency: Float, maxFrequency: Float, bandsCount: Int, sampleRate: Float) -> FFTComputeBandsResult { let actualMaxFrequency = min(sampleRate.nyquistFrequency, maxFrequency) - var bandMagnitudes = [Float](repeating: 0.0, count: bandsCount) var bandFrequencies = [Float](repeating: 0.0, count: bandsCount) @@ -62,39 +50,32 @@ public class FFTResult { for i in 0 ..< bandsCount { let magsStartIdx = Int(floorf(Float(i) * ratio)) + magLowerRange let magsEndIdx = Int(floorf(Float(i + 1) * ratio)) + magLowerRange - var magsAvg: Float - if magsEndIdx == magsStartIdx { - // Can happen when numberOfBands < # of magnitudes. No need to average anything. - magsAvg = magnitudes[magsStartIdx] - } else { - magsAvg = _computeAverage(magnitudes, magsStartIdx, magsEndIdx) - } - bandMagnitudes[i] = magsAvg + + bandMagnitudes[i] = magsEndIdx == magsStartIdx + ? magnitudes[magsStartIdx] + : _computeAverage(magnitudes, magsStartIdx, magsEndIdx) + bandFrequencies[i] = _averageFrequencyInRange(magsStartIdx, magsEndIdx, sampleRate: sampleRate) } - return FFTComputeBandsResult(count: bandsCount, - magnitudes: bandMagnitudes, - frequencies: bandFrequencies) + return FFTComputeBandsResult(count: bandsCount, magnitudes: bandMagnitudes, frequencies: bandFrequencies) } - // MARK: - Private - @inline(__always) private func _magnitudeIndex(for frequency: Float, sampleRate: Float) -> Int { Int(Float(magnitudes.count) * frequency / sampleRate.nyquistFrequency) } @inline(__always) private func _computeAverage(_ array: [Float], _ startIdx: Int, _ stopIdx: Int) -> Float { var mean: Float = 0 + let count = stopIdx - startIdx array.withUnsafeBufferPointer { bufferPtr in let ptr = bufferPtr.baseAddress! + startIdx - vDSP_meanv(ptr, 1, &mean, UInt(stopIdx - startIdx)) + vDSP_meanv(ptr, 1, &mean, UInt(count)) } return mean } - /// The average bandwidth throughout the spectrum (nyquist / magnitudes.count) - @inline(__always) func _computeBandwidth(for sampleRate: Float) -> Float { + @inline(__always) private func _computeBandwidth(for sampleRate: Float) -> Float { sampleRate.nyquistFrequency / Float(magnitudes.count) } @@ -105,8 +86,6 @@ public class FFTResult { } class FFTProcessor { - // MARK: - Public - public enum WindowType { case none case hanning @@ -114,73 +93,51 @@ class FFTProcessor { } public let bufferSize: Int - - /// Supplying a window type (hanning or hamming) smooths the edges of the incoming waveform and reduces output errors from the FFT function. - /// https://en.wikipedia.org/wiki/Spectral_leakage public let windowType: WindowType - // MARK: - Private - private let bufferHalfSize: Int private let bufferLog2Size: Int private var window: [Float] = [] private var fftSetup: FFTSetup - - private var complexBuffer: DSPSplitComplex! + private var complexBuffer: DSPSplitComplex private var realPointer: UnsafeMutablePointer private var imaginaryPointer: UnsafeMutablePointer - init(bufferSize inBufferSize: Int, windowType: WindowType = .hamming) { - bufferSize = inBufferSize + init(bufferSize: Int, windowType: WindowType = .hamming) { + self.bufferSize = bufferSize self.windowType = windowType - bufferHalfSize = inBufferSize / 2 + bufferHalfSize = bufferSize / 2 + bufferLog2Size = Int(log2f(Float(bufferSize))) - let bufferSizeFloat = Float(inBufferSize) - - // bufferSize must be a power of 2. - let lg2 = logbf(bufferSizeFloat) - assert(remainderf(bufferSizeFloat, powf(2.0, lg2)) == 0, "bufferSize must be a power of 2") - bufferLog2Size = Int(log2f(bufferSizeFloat)) - - // Create fft setup. fftSetup = vDSP_create_fftsetup(UInt(bufferLog2Size), FFTRadix(FFT_RADIX2))! - // Allocate memory for the real and imaginary parts. - realPointer = UnsafeMutablePointer.allocate(capacity: bufferHalfSize) - imaginaryPointer = UnsafeMutablePointer.allocate(capacity: bufferHalfSize) + realPointer = .allocate(capacity: bufferHalfSize) + imaginaryPointer = .allocate(capacity: bufferHalfSize) - // Initialize the memory to zero. realPointer.initialize(repeating: 0.0, count: bufferHalfSize) imaginaryPointer.initialize(repeating: 0.0, count: bufferHalfSize) - // Init the complexBuffer. complexBuffer = DSPSplitComplex(realp: realPointer, imagp: imaginaryPointer) } deinit { - // destroy the fft setup object vDSP_destroy_fftsetup(fftSetup) - realPointer.deallocate() imaginaryPointer.deallocate() } func process(buffer: [Float]) -> FFTResult { - // Ensure the input buffer is the correct size (twice the half buffer size, since it is interleaved). guard buffer.count == bufferSize else { - fatalError("Input buffer size does not match the initialized buffer size.") + fatalError("Input buffer size mismatch.") } - // Convert the interleaved real and imaginary parts to a split complex form. buffer.withUnsafeBufferPointer { bufferPtr in let complexPtr = UnsafeRawPointer(bufferPtr.baseAddress!).bindMemory(to: DSPComplex.self, capacity: bufferHalfSize) vDSP_ctoz(complexPtr, 2, &complexBuffer, 1, UInt(bufferHalfSize)) } - // Perform a forward FFT. vDSP_fft_zrip(fftSetup, &complexBuffer, 1, UInt(bufferLog2Size), Int32(FFT_FORWARD)) - // Calculate magnitudes. var magnitudes = [Float](repeating: 0.0, count: bufferHalfSize) vDSP_zvmags(&complexBuffer, 1, &magnitudes, 1, UInt(bufferHalfSize)) From c09022d8f3e34e89a81f9b8da35ae937618a7d7f Mon Sep 17 00:00:00 2001 From: hiroshihorie <548776+hiroshihorie@users.noreply.github.com> Date: Tue, 10 Sep 2024 09:37:41 +0900 Subject: [PATCH 4/9] Optimize --- .../LiveKit/Convenience/AudioProcessing.swift | 56 ++++++++----------- Sources/LiveKit/Support/FFTProcessor.swift | 1 + Sources/LiveKit/Support/FloatRingBuffer.swift | 9 ++- 3 files changed, 32 insertions(+), 34 deletions(-) diff --git a/Sources/LiveKit/Convenience/AudioProcessing.swift b/Sources/LiveKit/Convenience/AudioProcessing.swift index 9d4443f30..73204025f 100644 --- a/Sources/LiveKit/Convenience/AudioProcessing.swift +++ b/Sources/LiveKit/Convenience/AudioProcessing.swift @@ -101,7 +101,7 @@ public extension Sequence where Iterator.Element == AudioLevel { } public class AudioVisualizeProcessor { - static let _bufferSize = 1024 + static let bufferSize = 1024 // MARK: - Public @@ -109,56 +109,48 @@ public class AudioVisualizeProcessor { public let maxFrequency: Float public let bandsCount: Int - public var bands: [Float]? + public private(set) var bands: [Float]? // MARK: - Private + private let ringBuffer = FloatRingBuffer(size: AudioVisualizeProcessor.bufferSize) + private let processor: FFTProcessor + public init(minFrequency: Float = 10, maxFrequency: Float = 8000, bandsCount: Int = 100) { self.minFrequency = minFrequency self.maxFrequency = maxFrequency self.bandsCount = bandsCount - _processor = FFTProcessor(bufferSize: Self._bufferSize) + processor = FFTProcessor(bufferSize: Self.bufferSize) } - // MARK: - Private - - private let _ringBuffer = FloatRingBuffer(size: _bufferSize) - private let _processor: FFTProcessor - public func add(pcmBuffer: AVAudioPCMBuffer) { guard let floatChannelData = pcmBuffer.floatChannelData else { return } + // Get the float array. let floats = Array(UnsafeBufferPointer(start: floatChannelData[0], count: Int(pcmBuffer.frameLength))) - // Write to ring buffer. - _ringBuffer.write(floats) - // Get full size buffer if ready, otherwise return for this cycle. - guard let buffer = _ringBuffer.read() else { return } + ringBuffer.write(floats) - let fftRes = _processor.process(buffer: buffer) - let bands = fftRes.computeBands(minFrequency: minFrequency, - maxFrequency: maxFrequency, - bandsCount: bandsCount, - sampleRate: Float(pcmBuffer.format.sampleRate)) + // Get full-size buffer if available, otherwise return + guard let buffer = ringBuffer.read() else { return } + // Process FFT and compute frequency bands + let fftRes = processor.process(buffer: buffer) + let bands = fftRes.computeBands( + minFrequency: minFrequency, + maxFrequency: maxFrequency, + bandsCount: bandsCount, + sampleRate: Float(pcmBuffer.format.sampleRate) + ) + + // Constants for decibel conversion let maxDB: Float = 64.0 let minDB: Float = -32.0 let headroom = maxDB - minDB - var result: [Float] = Array(repeating: 0.0, count: bands.magnitudes.count) - - var i = 0 - for magnitude in bands.magnitudes { - // Incoming magnitudes are linear, making it impossible to see very low or very high values. Decibels to the rescue! - var magnitudeDB = magnitude.toDecibels - - // Normalize the incoming magnitude so that -Inf = 0 - magnitudeDB = max(0, magnitudeDB + abs(minDB)) - - let dbRatio = min(1.0, magnitudeDB / headroom) - result[i] = dbRatio - i += 1 + // Normalize magnitudes to decibel ratio using a functional approach + self.bands = bands.magnitudes.map { magnitude in + let magnitudeDB = max(0, magnitude.toDecibels + abs(minDB)) + return min(1.0, magnitudeDB / headroom) } - - self.bands = result } } diff --git a/Sources/LiveKit/Support/FFTProcessor.swift b/Sources/LiveKit/Support/FFTProcessor.swift index a79a01e08..28ba6828c 100755 --- a/Sources/LiveKit/Support/FFTProcessor.swift +++ b/Sources/LiveKit/Support/FFTProcessor.swift @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + import Accelerate import Foundation diff --git a/Sources/LiveKit/Support/FloatRingBuffer.swift b/Sources/LiveKit/Support/FloatRingBuffer.swift index e17f9c821..524396626 100644 --- a/Sources/LiveKit/Support/FloatRingBuffer.swift +++ b/Sources/LiveKit/Support/FloatRingBuffer.swift @@ -38,9 +38,14 @@ class FloatRingBuffer { } } - // Returns nil if buffer is not full. func read() -> [Float]? { guard _isFull else { return nil } - return Array(_buffer[_head ..< _buffer.count] + _buffer[0 ..< _head]) + + if _head == 0 { + return _buffer // Return the entire buffer if _head is at the start + } else { + // Return the buffer in the correct order + return Array(_buffer[_head ..< _buffer.count] + _buffer[0 ..< _head]) + } } } From fec59e1e7cf096107e4d2b6bf73141f456009c58 Mon Sep 17 00:00:00 2001 From: hiroshihorie <548776+hiroshihorie@users.noreply.github.com> Date: Tue, 24 Sep 2024 13:59:13 +0900 Subject: [PATCH 5/9] Apply windowType --- Sources/LiveKit/Support/FFTProcessor.swift | 34 ++++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) diff --git a/Sources/LiveKit/Support/FFTProcessor.swift b/Sources/LiveKit/Support/FFTProcessor.swift index 28ba6828c..ef66dd250 100755 --- a/Sources/LiveKit/Support/FFTProcessor.swift +++ b/Sources/LiveKit/Support/FFTProcessor.swift @@ -93,8 +93,14 @@ class FFTProcessor { case hamming } + public enum ScaleType { + case linear + case logarithmic + } + public let bufferSize: Int public let windowType: WindowType + public let scaleType: ScaleType private let bufferHalfSize: Int private let bufferLog2Size: Int @@ -104,9 +110,11 @@ class FFTProcessor { private var realPointer: UnsafeMutablePointer private var imaginaryPointer: UnsafeMutablePointer - init(bufferSize: Int, windowType: WindowType = .hamming) { + init(bufferSize: Int, scaleType: ScaleType = .linear, windowType: WindowType = .hanning) { self.bufferSize = bufferSize + self.scaleType = scaleType self.windowType = windowType + bufferHalfSize = bufferSize / 2 bufferLog2Size = Int(log2f(Float(bufferSize))) @@ -119,6 +127,7 @@ class FFTProcessor { imaginaryPointer.initialize(repeating: 0.0, count: bufferHalfSize) complexBuffer = DSPSplitComplex(realp: realPointer, imagp: imaginaryPointer) + setupWindow() } deinit { @@ -127,18 +136,39 @@ class FFTProcessor { imaginaryPointer.deallocate() } + private func setupWindow() { + window = [Float](repeating: 1.0, count: bufferSize) + switch windowType { + case .none: + break + case .hanning: + vDSP_hann_window(&window, UInt(bufferSize), Int32(vDSP_HANN_NORM)) + case .hamming: + vDSP_hamm_window(&window, UInt(bufferSize), 0) + } + } + func process(buffer: [Float]) -> FFTResult { guard buffer.count == bufferSize else { fatalError("Input buffer size mismatch.") } - buffer.withUnsafeBufferPointer { bufferPtr in + // Create a new array to hold the windowed buffer + var windowedBuffer = [Float](repeating: 0.0, count: bufferSize) + + // Multiply the input buffer by the window coefficients + vDSP_vmul(buffer, 1, window, 1, &windowedBuffer, 1, UInt(bufferSize)) + + // Convert the real input to split complex form + windowedBuffer.withUnsafeBufferPointer { bufferPtr in let complexPtr = UnsafeRawPointer(bufferPtr.baseAddress!).bindMemory(to: DSPComplex.self, capacity: bufferHalfSize) vDSP_ctoz(complexPtr, 2, &complexBuffer, 1, UInt(bufferHalfSize)) } + // Perform the FFT vDSP_fft_zrip(fftSetup, &complexBuffer, 1, UInt(bufferLog2Size), Int32(FFT_FORWARD)) + // Calculate magnitudes var magnitudes = [Float](repeating: 0.0, count: bufferHalfSize) vDSP_zvmags(&complexBuffer, 1, &magnitudes, 1, UInt(bufferHalfSize)) From 6588a95cfa616ea55a66940a28cbf343ee5b62d8 Mon Sep 17 00:00:00 2001 From: hiroshihorie <548776+hiroshihorie@users.noreply.github.com> Date: Sun, 29 Sep 2024 18:16:47 +0900 Subject: [PATCH 6/9] centering, min max db param --- .../LiveKit/Convenience/AudioProcessing.swift | 54 ++++++++++++++++--- Sources/LiveKit/Protocols/AudioRenderer.swift | 21 ++++++++ Sources/LiveKit/Support/FFTProcessor.swift | 25 ++++++--- 3 files changed, 87 insertions(+), 13 deletions(-) diff --git a/Sources/LiveKit/Convenience/AudioProcessing.swift b/Sources/LiveKit/Convenience/AudioProcessing.swift index 73204025f..b709abe5e 100644 --- a/Sources/LiveKit/Convenience/AudioProcessing.swift +++ b/Sources/LiveKit/Convenience/AudioProcessing.swift @@ -107,7 +107,10 @@ public class AudioVisualizeProcessor { public let minFrequency: Float public let maxFrequency: Float + public let minDB: Float + public let maxDB: Float public let bandsCount: Int + public let isCentered: Bool public private(set) var bands: [Float]? @@ -116,10 +119,20 @@ public class AudioVisualizeProcessor { private let ringBuffer = FloatRingBuffer(size: AudioVisualizeProcessor.bufferSize) private let processor: FFTProcessor - public init(minFrequency: Float = 10, maxFrequency: Float = 8000, bandsCount: Int = 100) { + public init(minFrequency: Float = 10, + maxFrequency: Float = 8000, + minDB: Float = -32.0, + maxDB: Float = 32.0, + bandsCount: Int = 100, + isCentered: Bool = false) + { self.minFrequency = minFrequency self.maxFrequency = maxFrequency + self.minDB = minDB + self.maxDB = maxDB self.bandsCount = bandsCount + self.isCentered = isCentered + processor = FFTProcessor(bufferSize: Self.bufferSize) } @@ -136,21 +149,50 @@ public class AudioVisualizeProcessor { // Process FFT and compute frequency bands let fftRes = processor.process(buffer: buffer) let bands = fftRes.computeBands( - minFrequency: minFrequency, + minFrequency: 0, maxFrequency: maxFrequency, bandsCount: bandsCount, sampleRate: Float(pcmBuffer.format.sampleRate) ) - // Constants for decibel conversion - let maxDB: Float = 64.0 - let minDB: Float = -32.0 let headroom = maxDB - minDB // Normalize magnitudes to decibel ratio using a functional approach - self.bands = bands.magnitudes.map { magnitude in + var normalizedBands = bands.magnitudes.map { magnitude in let magnitudeDB = max(0, magnitude.toDecibels + abs(minDB)) return min(1.0, magnitudeDB / headroom) } + + // If centering is enabled, rearrange the normalized bands + if isCentered { + // Sort the normalized bands from highest to lowest + normalizedBands.sort(by: >) + + // Center the sorted bands + self.bands = centerBands(normalizedBands) + } else { + self.bands = normalizedBands + } + } + + /// Centers the sorted bands by placing higher values in the middle. + private func centerBands(_ sortedBands: [Float]) -> [Float] { + var centeredBands = [Float](repeating: 0, count: sortedBands.count) + var leftIndex = sortedBands.count / 2 + var rightIndex = leftIndex + + for (index, value) in sortedBands.enumerated() { + if index % 2 == 0 { + // Place value to the right + centeredBands[rightIndex] = value + rightIndex += 1 + } else { + // Place value to the left + leftIndex -= 1 + centeredBands[leftIndex] = value + } + } + + return centeredBands } } diff --git a/Sources/LiveKit/Protocols/AudioRenderer.swift b/Sources/LiveKit/Protocols/AudioRenderer.swift index 535e606aa..4a2ae055a 100644 --- a/Sources/LiveKit/Protocols/AudioRenderer.swift +++ b/Sources/LiveKit/Protocols/AudioRenderer.swift @@ -33,9 +33,30 @@ class AudioRendererAdapter: NSObject, LKRTCAudioRenderer { private weak var target: AudioRenderer? private let targetHashValue: Int + struct GlobalState { + var instanceCount: Int = 0 + } + + private static var _state = StateSync(GlobalState()) + init(target: AudioRenderer) { self.target = target targetHashValue = ObjectIdentifier(target).hashValue + + let count = Self._state.mutate { + $0.instanceCount += 1 + return $0.instanceCount + } + + print("AudioRendererAdapter instance count: \(count)") + } + + deinit { + let count = Self._state.mutate { + $0.instanceCount -= 1 + return $0.instanceCount + } + print("AudioRendererAdapter instance count: \(count)") } func render(pcmBuffer: AVAudioPCMBuffer) { diff --git a/Sources/LiveKit/Support/FFTProcessor.swift b/Sources/LiveKit/Support/FFTProcessor.swift index ef66dd250..ecd2afe5c 100755 --- a/Sources/LiveKit/Support/FFTProcessor.swift +++ b/Sources/LiveKit/Support/FFTProcessor.swift @@ -34,9 +34,11 @@ public struct FFTComputeBandsResult { public class FFTResult { public let magnitudes: [Float] + private let scaleType: FFTProcessor.ScaleType - init(magnitudes: [Float]) { + init(magnitudes: [Float], scaleType: FFTProcessor.ScaleType) { self.magnitudes = magnitudes + self.scaleType = scaleType } func computeBands(minFrequency: Float, maxFrequency: Float, bandsCount: Int, sampleRate: Float) -> FFTComputeBandsResult { @@ -52,10 +54,18 @@ public class FFTResult { let magsStartIdx = Int(floorf(Float(i) * ratio)) + magLowerRange let magsEndIdx = Int(floorf(Float(i + 1) * ratio)) + magLowerRange - bandMagnitudes[i] = magsEndIdx == magsStartIdx - ? magnitudes[magsStartIdx] - : _computeAverage(magnitudes, magsStartIdx, magsEndIdx) - + let count = magsEndIdx - magsStartIdx + if count > 0 { + if scaleType == .linear { + // Linear scale averaging + bandMagnitudes[i] = _computeAverage(magnitudes, magsStartIdx, magsEndIdx) + } + } else { + // Single value case + bandMagnitudes[i] = magnitudes[magsStartIdx] + } + + // Compute average frequency bandFrequencies[i] = _averageFrequencyInRange(magsStartIdx, magsEndIdx, sampleRate: sampleRate) } @@ -109,6 +119,7 @@ class FFTProcessor { private var complexBuffer: DSPSplitComplex private var realPointer: UnsafeMutablePointer private var imaginaryPointer: UnsafeMutablePointer + private var zeroDBReference: Float = 1.0 init(bufferSize: Int, scaleType: ScaleType = .linear, windowType: WindowType = .hanning) { self.bufferSize = bufferSize @@ -170,8 +181,8 @@ class FFTProcessor { // Calculate magnitudes var magnitudes = [Float](repeating: 0.0, count: bufferHalfSize) - vDSP_zvmags(&complexBuffer, 1, &magnitudes, 1, UInt(bufferHalfSize)) + vDSP_zvabs(&complexBuffer, 1, &magnitudes, 1, UInt(bufferHalfSize)) - return FFTResult(magnitudes: magnitudes) + return FFTResult(magnitudes: magnitudes, scaleType: scaleType) } } From 0a8f253fb613eaa0653a7c996bdc6bc4b500a636 Mon Sep 17 00:00:00 2001 From: hiroshihorie <548776+hiroshihorie@users.noreply.github.com> Date: Sun, 29 Sep 2024 18:25:28 +0900 Subject: [PATCH 7/9] smoothing 1 --- .../LiveKit/Convenience/AudioProcessing.swift | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/Sources/LiveKit/Convenience/AudioProcessing.swift b/Sources/LiveKit/Convenience/AudioProcessing.swift index b709abe5e..c3b74d72e 100644 --- a/Sources/LiveKit/Convenience/AudioProcessing.swift +++ b/Sources/LiveKit/Convenience/AudioProcessing.swift @@ -111,6 +111,7 @@ public class AudioVisualizeProcessor { public let maxDB: Float public let bandsCount: Int public let isCentered: Bool + public let smoothingFactor: Float public private(set) var bands: [Float]? @@ -124,7 +125,8 @@ public class AudioVisualizeProcessor { minDB: Float = -32.0, maxDB: Float = 32.0, bandsCount: Int = 100, - isCentered: Bool = false) + isCentered: Bool = false, + smoothingFactor: Float = 0.1) // Smoothing factor for smoother transitions { self.minFrequency = minFrequency self.maxFrequency = maxFrequency @@ -132,8 +134,10 @@ public class AudioVisualizeProcessor { self.maxDB = maxDB self.bandsCount = bandsCount self.isCentered = isCentered + self.smoothingFactor = smoothingFactor processor = FFTProcessor(bufferSize: Self.bufferSize) + bands = [Float](repeating: 0.0, count: bandsCount) } public func add(pcmBuffer: AVAudioPCMBuffer) { @@ -165,13 +169,13 @@ public class AudioVisualizeProcessor { // If centering is enabled, rearrange the normalized bands if isCentered { - // Sort the normalized bands from highest to lowest normalizedBands.sort(by: >) + normalizedBands = centerBands(normalizedBands) + } - // Center the sorted bands - self.bands = centerBands(normalizedBands) - } else { - self.bands = normalizedBands + // Smooth transition between old and new bands + self.bands = zip(self.bands ?? [], normalizedBands).map { old, new in + old * (1.0 - smoothingFactor) + new * smoothingFactor } } From 8e3dbe9861d3068f7bc7b62f158ac844e2ef75c6 Mon Sep 17 00:00:00 2001 From: hiroshihorie <548776+hiroshihorie@users.noreply.github.com> Date: Sun, 29 Sep 2024 18:33:46 +0900 Subject: [PATCH 8/9] smoothing 2 --- .../LiveKit/Convenience/AudioProcessing.swift | 21 ++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/Sources/LiveKit/Convenience/AudioProcessing.swift b/Sources/LiveKit/Convenience/AudioProcessing.swift index c3b74d72e..340866d55 100644 --- a/Sources/LiveKit/Convenience/AudioProcessing.swift +++ b/Sources/LiveKit/Convenience/AudioProcessing.swift @@ -126,7 +126,7 @@ public class AudioVisualizeProcessor { maxDB: Float = 32.0, bandsCount: Int = 100, isCentered: Bool = false, - smoothingFactor: Float = 0.1) // Smoothing factor for smoother transitions + smoothingFactor: Float = 0.2) // Smoothing factor for smoother transitions { self.minFrequency = minFrequency self.maxFrequency = maxFrequency @@ -173,9 +173,9 @@ public class AudioVisualizeProcessor { normalizedBands = centerBands(normalizedBands) } - // Smooth transition between old and new bands + // Smooth transition using an easing function self.bands = zip(self.bands ?? [], normalizedBands).map { old, new in - old * (1.0 - smoothingFactor) + new * smoothingFactor + _smoothTransition(from: old, to: new, factor: smoothingFactor) } } @@ -199,4 +199,19 @@ public class AudioVisualizeProcessor { return centeredBands } + + /// Applies an easing function to smooth the transition. + private func _smoothTransition(from oldValue: Float, to newValue: Float, factor: Float) -> Float { + // Calculate the delta change between the old and new value + let delta = newValue - oldValue + // Apply an ease-in-out cubic easing curve + let easedFactor = _easeInOutCubic(t: factor) + // Calculate and return the smoothed value + return oldValue + delta * easedFactor + } + + /// Easing function: ease-in-out cubic + private func _easeInOutCubic(t: Float) -> Float { + t < 0.5 ? 4 * t * t * t : 1 - pow(-2 * t + 2, 3) / 2 + } } From 053aa0cde0b0aacd7ad684496895192aab10a7b2 Mon Sep 17 00:00:00 2001 From: hiroshihorie <548776+hiroshihorie@users.noreply.github.com> Date: Mon, 30 Sep 2024 01:11:50 +0900 Subject: [PATCH 9/9] Optimize --- .../LiveKit/Convenience/AudioProcessing.swift | 2 +- Sources/LiveKit/Protocols/AudioRenderer.swift | 44 +++---------------- .../Track/Remote/RemoteAudioTrack.swift | 26 +++++++++-- 3 files changed, 30 insertions(+), 42 deletions(-) diff --git a/Sources/LiveKit/Convenience/AudioProcessing.swift b/Sources/LiveKit/Convenience/AudioProcessing.swift index 340866d55..4341350d4 100644 --- a/Sources/LiveKit/Convenience/AudioProcessing.swift +++ b/Sources/LiveKit/Convenience/AudioProcessing.swift @@ -126,7 +126,7 @@ public class AudioVisualizeProcessor { maxDB: Float = 32.0, bandsCount: Int = 100, isCentered: Bool = false, - smoothingFactor: Float = 0.2) // Smoothing factor for smoother transitions + smoothingFactor: Float = 0.3) // Smoothing factor for smoother transitions { self.minFrequency = minFrequency self.maxFrequency = maxFrequency diff --git a/Sources/LiveKit/Protocols/AudioRenderer.swift b/Sources/LiveKit/Protocols/AudioRenderer.swift index 4a2ae055a..7675a00da 100644 --- a/Sources/LiveKit/Protocols/AudioRenderer.swift +++ b/Sources/LiveKit/Protocols/AudioRenderer.swift @@ -29,47 +29,17 @@ public protocol AudioRenderer { func render(pcmBuffer: AVAudioPCMBuffer) } -class AudioRendererAdapter: NSObject, LKRTCAudioRenderer { - private weak var target: AudioRenderer? - private let targetHashValue: Int +class AudioRendererAdapter: MulticastDelegate, LKRTCAudioRenderer { + // + typealias Delegate = AudioRenderer - struct GlobalState { - var instanceCount: Int = 0 + init() { + super.init(label: "AudioRendererAdapter") } - private static var _state = StateSync(GlobalState()) - - init(target: AudioRenderer) { - self.target = target - targetHashValue = ObjectIdentifier(target).hashValue - - let count = Self._state.mutate { - $0.instanceCount += 1 - return $0.instanceCount - } - - print("AudioRendererAdapter instance count: \(count)") - } - - deinit { - let count = Self._state.mutate { - $0.instanceCount -= 1 - return $0.instanceCount - } - print("AudioRendererAdapter instance count: \(count)") - } + // MARK: - LKRTCAudioRenderer func render(pcmBuffer: AVAudioPCMBuffer) { - target?.render(pcmBuffer: pcmBuffer) - } - - // Proxy the equality operators - override func isEqual(_ object: Any?) -> Bool { - guard let other = object as? AudioRendererAdapter else { return false } - return targetHashValue == other.targetHashValue - } - - override var hash: Int { - targetHashValue + notify { $0.render(pcmBuffer: pcmBuffer) } } } diff --git a/Sources/LiveKit/Track/Remote/RemoteAudioTrack.swift b/Sources/LiveKit/Track/Remote/RemoteAudioTrack.swift index 6348e3eb4..94821a347 100644 --- a/Sources/LiveKit/Track/Remote/RemoteAudioTrack.swift +++ b/Sources/LiveKit/Track/Remote/RemoteAudioTrack.swift @@ -14,6 +14,7 @@ * limitations under the License. */ +import AVFoundation import CoreMedia #if swift(>=5.9) @@ -36,6 +37,8 @@ public class RemoteAudioTrack: Track, RemoteTrack, AudioTrack { } } + private lazy var _adapter = AudioRendererAdapter() + init(name: String, source: Track.Source, track: LKRTCMediaStreamTrack, @@ -48,14 +51,29 @@ public class RemoteAudioTrack: Track, RemoteTrack, AudioTrack { reportStatistics: reportStatistics) } - public func add(audioRenderer: AudioRenderer) { + deinit { + // Directly remove the adapter without unnecessary checks guard let audioTrack = mediaTrack as? LKRTCAudioTrack else { return } - audioTrack.add(AudioRendererAdapter(target: audioRenderer)) + audioTrack.remove(_adapter) + } + + public func add(audioRenderer: AudioRenderer) { + let wasEmpty = _adapter.countDelegates == 0 + _adapter.add(delegate: audioRenderer) + // Attach adapter only if it wasn't attached before + if wasEmpty { + guard let audioTrack = mediaTrack as? LKRTCAudioTrack else { return } + audioTrack.add(_adapter) + } } public func remove(audioRenderer: AudioRenderer) { - guard let audioTrack = mediaTrack as? LKRTCAudioTrack else { return } - audioTrack.remove(AudioRendererAdapter(target: audioRenderer)) + _adapter.remove(delegate: audioRenderer) + // Remove adapter only if there are no more delegates + if _adapter.countDelegates == 0 { + guard let audioTrack = mediaTrack as? LKRTCAudioTrack else { return } + audioTrack.remove(_adapter) + } } // MARK: - Internal