Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Audio visualization helpers #474

Open
wants to merge 14 commits into
base: main
Choose a base branch
from
119 changes: 118 additions & 1 deletion Sources/LiveKit/Convenience/AudioProcessing.swift
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,8 @@ public extension LKAudioBuffer {
guard let targetBufferPointer = pcmBuffer.floatChannelData else { return nil }

// Optimized version
var normalizationFactor: Float = 1.0 / 32768.0
let factor = Float(Int16.max)
var normalizationFactor: Float = 1.0 / factor // Or use 32768.0

for i in 0 ..< channels {
vDSP_vsmul(rawBuffer(forChannel: i),
Expand Down Expand Up @@ -98,3 +99,119 @@ public extension Sequence where Iterator.Element == AudioLevel {
peak: totalSums.peakSum / Float(count))
}
}

public class AudioVisualizeProcessor {
static let bufferSize = 1024

// MARK: - Public

public let minFrequency: Float
public let maxFrequency: Float
public let minDB: Float
public let maxDB: Float
public let bandsCount: Int
public let isCentered: Bool
public let smoothingFactor: Float

public private(set) var bands: [Float]?

// MARK: - Private

private let ringBuffer = FloatRingBuffer(size: AudioVisualizeProcessor.bufferSize)
private let processor: FFTProcessor

public init(minFrequency: Float = 10,
maxFrequency: Float = 8000,
minDB: Float = -32.0,
maxDB: Float = 32.0,
bandsCount: Int = 100,
isCentered: Bool = false,
smoothingFactor: Float = 0.3) // Smoothing factor for smoother transitions
{
self.minFrequency = minFrequency
self.maxFrequency = maxFrequency
self.minDB = minDB
self.maxDB = maxDB
self.bandsCount = bandsCount
self.isCentered = isCentered
self.smoothingFactor = smoothingFactor

processor = FFTProcessor(bufferSize: Self.bufferSize)
bands = [Float](repeating: 0.0, count: bandsCount)
}

public func add(pcmBuffer: AVAudioPCMBuffer) {
guard let floatChannelData = pcmBuffer.floatChannelData else { return }

// Get the float array.
let floats = Array(UnsafeBufferPointer(start: floatChannelData[0], count: Int(pcmBuffer.frameLength)))
ringBuffer.write(floats)

// Get full-size buffer if available, otherwise return
guard let buffer = ringBuffer.read() else { return }

// Process FFT and compute frequency bands
let fftRes = processor.process(buffer: buffer)
let bands = fftRes.computeBands(
minFrequency: 0,
maxFrequency: maxFrequency,
bandsCount: bandsCount,
sampleRate: Float(pcmBuffer.format.sampleRate)
)

let headroom = maxDB - minDB

// Normalize magnitudes to decibel ratio using a functional approach
var normalizedBands = bands.magnitudes.map { magnitude in
let magnitudeDB = max(0, magnitude.toDecibels + abs(minDB))
return min(1.0, magnitudeDB / headroom)
}

// If centering is enabled, rearrange the normalized bands
if isCentered {
normalizedBands.sort(by: >)
normalizedBands = centerBands(normalizedBands)
}

// Smooth transition using an easing function
self.bands = zip(self.bands ?? [], normalizedBands).map { old, new in
_smoothTransition(from: old, to: new, factor: smoothingFactor)
}
}

/// Centers the sorted bands by placing higher values in the middle.
private func centerBands(_ sortedBands: [Float]) -> [Float] {
var centeredBands = [Float](repeating: 0, count: sortedBands.count)
var leftIndex = sortedBands.count / 2
var rightIndex = leftIndex

for (index, value) in sortedBands.enumerated() {
if index % 2 == 0 {
// Place value to the right
centeredBands[rightIndex] = value
rightIndex += 1
} else {
// Place value to the left
leftIndex -= 1
centeredBands[leftIndex] = value
}
}

return centeredBands
}

/// Applies an easing function to smooth the transition.
private func _smoothTransition(from oldValue: Float, to newValue: Float, factor: Float) -> Float {
// Calculate the delta change between the old and new value
let delta = newValue - oldValue
// Apply an ease-in-out cubic easing curve
let easedFactor = _easeInOutCubic(t: factor)
// Calculate and return the smoothed value
return oldValue + delta * easedFactor
}

/// Easing function: ease-in-out cubic
private func _easeInOutCubic(t: Float) -> Float {
t < 0.5 ? 4 * t * t * t : 1 - pow(-2 * t + 2, 3) / 2
}
}
25 changes: 8 additions & 17 deletions Sources/LiveKit/Protocols/AudioRenderer.swift
Original file line number Diff line number Diff line change
Expand Up @@ -29,26 +29,17 @@ public protocol AudioRenderer {
func render(pcmBuffer: AVAudioPCMBuffer)
}

class AudioRendererAdapter: NSObject, LKRTCAudioRenderer {
private weak var target: AudioRenderer?
private let targetHashValue: Int
class AudioRendererAdapter: MulticastDelegate<AudioRenderer>, LKRTCAudioRenderer {
//
typealias Delegate = AudioRenderer

init(target: AudioRenderer) {
self.target = target
targetHashValue = ObjectIdentifier(target).hashValue
init() {
super.init(label: "AudioRendererAdapter")
}

func render(pcmBuffer: AVAudioPCMBuffer) {
target?.render(pcmBuffer: pcmBuffer)
}
// MARK: - LKRTCAudioRenderer

// Proxy the equality operators
override func isEqual(_ object: Any?) -> Bool {
guard let other = object as? AudioRendererAdapter else { return false }
return targetHashValue == other.targetHashValue
}

override var hash: Int {
targetHashValue
func render(pcmBuffer: AVAudioPCMBuffer) {
notify { $0.render(pcmBuffer: pcmBuffer) }
}
}
188 changes: 188 additions & 0 deletions Sources/LiveKit/Support/FFTProcessor.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,188 @@
/*
* Copyright 2024 LiveKit
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

import Accelerate
import Foundation

extension Float {
var nyquistFrequency: Float { self / 2.0 }

var toDecibels: Float {
let minMagnitude: Float = 1e-7
return 20 * log10(max(magnitude, minMagnitude))
}
}

public struct FFTComputeBandsResult {
let count: Int
let magnitudes: [Float]
let frequencies: [Float]
}

public class FFTResult {
public let magnitudes: [Float]
private let scaleType: FFTProcessor.ScaleType

init(magnitudes: [Float], scaleType: FFTProcessor.ScaleType) {
self.magnitudes = magnitudes
self.scaleType = scaleType
}

func computeBands(minFrequency: Float, maxFrequency: Float, bandsCount: Int, sampleRate: Float) -> FFTComputeBandsResult {
let actualMaxFrequency = min(sampleRate.nyquistFrequency, maxFrequency)
var bandMagnitudes = [Float](repeating: 0.0, count: bandsCount)
var bandFrequencies = [Float](repeating: 0.0, count: bandsCount)

let magLowerRange = _magnitudeIndex(for: minFrequency, sampleRate: sampleRate)
let magUpperRange = _magnitudeIndex(for: actualMaxFrequency, sampleRate: sampleRate)
let ratio = Float(magUpperRange - magLowerRange) / Float(bandsCount)

for i in 0 ..< bandsCount {
let magsStartIdx = Int(floorf(Float(i) * ratio)) + magLowerRange
let magsEndIdx = Int(floorf(Float(i + 1) * ratio)) + magLowerRange

let count = magsEndIdx - magsStartIdx
if count > 0 {
if scaleType == .linear {
// Linear scale averaging
bandMagnitudes[i] = _computeAverage(magnitudes, magsStartIdx, magsEndIdx)
}
} else {
// Single value case
bandMagnitudes[i] = magnitudes[magsStartIdx]
}

// Compute average frequency
bandFrequencies[i] = _averageFrequencyInRange(magsStartIdx, magsEndIdx, sampleRate: sampleRate)
}

return FFTComputeBandsResult(count: bandsCount, magnitudes: bandMagnitudes, frequencies: bandFrequencies)
}

@inline(__always) private func _magnitudeIndex(for frequency: Float, sampleRate: Float) -> Int {
Int(Float(magnitudes.count) * frequency / sampleRate.nyquistFrequency)
}

@inline(__always) private func _computeAverage(_ array: [Float], _ startIdx: Int, _ stopIdx: Int) -> Float {
var mean: Float = 0
let count = stopIdx - startIdx
array.withUnsafeBufferPointer { bufferPtr in
let ptr = bufferPtr.baseAddress! + startIdx
vDSP_meanv(ptr, 1, &mean, UInt(count))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

vDSP_Length(count)

}
return mean
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think you can simplify this function using ArraySlice to give responsibility for safe pointer arithmetic back to swift instead:

    @inline(__always) private func _computeAverage(_ array: ArraySlice<Float>) -> Float {
        var mean: Float = 0
        array.withUnsafeBufferPointer { bufferPtr in
            vDSP_meanv(bufferPtr.baseAddress!, 1, &mean, vDSP_Length(array.count))
        }
        return mean
    }

then you call it above with _computeAverage(magnitudes[magsStartIdx..<magsEndIdx])


@inline(__always) private func _computeBandwidth(for sampleRate: Float) -> Float {
sampleRate.nyquistFrequency / Float(magnitudes.count)
}

@inline(__always) private func _averageFrequencyInRange(_ startIndex: Int, _ endIndex: Int, sampleRate: Float) -> Float {
let bandwidth = _computeBandwidth(for: sampleRate)
return (bandwidth * Float(startIndex) + bandwidth * Float(endIndex)) / 2
}
}

class FFTProcessor {
public enum WindowType {
case none
case hanning
case hamming
}

public enum ScaleType {
case linear
case logarithmic
}

public let bufferSize: Int
public let windowType: WindowType
public let scaleType: ScaleType

private let bufferHalfSize: Int
private let bufferLog2Size: Int
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you should consider declaring these int types as vDSP_Length instead. At the very least, they should be UInt here and cast to vDSP_Length when passed to accelerate later

private var window: [Float] = []
private var fftSetup: FFTSetup
private var complexBuffer: DSPSplitComplex
private var realPointer: UnsafeMutablePointer<Float>
private var imaginaryPointer: UnsafeMutablePointer<Float>
private var zeroDBReference: Float = 1.0

init(bufferSize: Int, scaleType: ScaleType = .linear, windowType: WindowType = .hanning) {
self.bufferSize = bufferSize
self.scaleType = scaleType
self.windowType = windowType

bufferHalfSize = bufferSize / 2
bufferLog2Size = Int(log2f(Float(bufferSize)))

fftSetup = vDSP_create_fftsetup(UInt(bufferLog2Size), FFTRadix(FFT_RADIX2))!

realPointer = .allocate(capacity: bufferHalfSize)
imaginaryPointer = .allocate(capacity: bufferHalfSize)

realPointer.initialize(repeating: 0.0, count: bufferHalfSize)
imaginaryPointer.initialize(repeating: 0.0, count: bufferHalfSize)

complexBuffer = DSPSplitComplex(realp: realPointer, imagp: imaginaryPointer)
setupWindow()
}

deinit {
vDSP_destroy_fftsetup(fftSetup)
realPointer.deallocate()
imaginaryPointer.deallocate()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is there a reason you're managing raw pointers in this class rather than just using arrays and nested withUnsafeMutableBufferPointer calls as in the apple sample code? looks scary to me 🤷 https://developer.apple.com/documentation/accelerate/vdsp/fast_fourier_transforms/finding_the_component_frequencies_in_a_composite_sine_wave

}

private func setupWindow() {
window = [Float](repeating: 1.0, count: bufferSize)
switch windowType {
case .none:
break
case .hanning:
vDSP_hann_window(&window, UInt(bufferSize), Int32(vDSP_HANN_NORM))
case .hamming:
vDSP_hamm_window(&window, UInt(bufferSize), 0)
}
}

func process(buffer: [Float]) -> FFTResult {
guard buffer.count == bufferSize else {
fatalError("Input buffer size mismatch.")
}

// Create a new array to hold the windowed buffer
var windowedBuffer = [Float](repeating: 0.0, count: bufferSize)

// Multiply the input buffer by the window coefficients
vDSP_vmul(buffer, 1, window, 1, &windowedBuffer, 1, UInt(bufferSize))

// Convert the real input to split complex form
windowedBuffer.withUnsafeBufferPointer { bufferPtr in
let complexPtr = UnsafeRawPointer(bufferPtr.baseAddress!).bindMemory(to: DSPComplex.self, capacity: bufferHalfSize)
vDSP_ctoz(complexPtr, 2, &complexBuffer, 1, UInt(bufferHalfSize))
}

// Perform the FFT
vDSP_fft_zrip(fftSetup, &complexBuffer, 1, UInt(bufferLog2Size), Int32(FFT_FORWARD))

// Calculate magnitudes
var magnitudes = [Float](repeating: 0.0, count: bufferHalfSize)
vDSP_zvabs(&complexBuffer, 1, &magnitudes, 1, UInt(bufferHalfSize))

return FFTResult(magnitudes: magnitudes, scaleType: scaleType)
}
}
Loading
Loading