-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathaudio_module_multi.py
More file actions
274 lines (224 loc) · 9.36 KB
/
audio_module_multi.py
File metadata and controls
274 lines (224 loc) · 9.36 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
"""
Enhanced Audio Controller with Multi-Object Support - ULTRA LOW LATENCY VERSION.
Generates unique audio signatures for different object types.
Optimized for real-time performance with minimal allocations.
"""
import numpy as np
import threading
import config
from typing import List, Dict
try:
import sounddevice as sd
AUDIO_STREAM_AVAILABLE = True
except ImportError:
sd = None
AUDIO_STREAM_AVAILABLE = False
class AudioSignatureGenerator:
"""Generates unique audio waveforms for different object types."""
@staticmethod
def generate_waveform(waveform_type, frequency, duration, sample_rate):
if np is None:
return []
t = np.linspace(0, duration, int(sample_rate * duration), False)
if waveform_type == "sine":
wave = np.sin(2 * np.pi * frequency * t)
elif waveform_type == "square":
wave = np.sign(np.sin(2 * np.pi * frequency * t))
elif waveform_type == "sawtooth":
wave = 2 * (t * frequency - np.floor(0.5 + t * frequency))
elif waveform_type == "pulse":
pulse_freq = frequency / 60
wave = np.zeros_like(t)
pulse_indices = (t * pulse_freq) % 1.0 < 0.1
wave[pulse_indices] = np.sin(2 * np.pi * 10 * t[pulse_indices])
else:
wave = np.sin(2 * np.pi * frequency * t)
# Normalize
if wave.max() > 0:
wave = wave / wave.max()
# Apply fade in/out
fade_samples = int(0.01 * sample_rate)
if len(wave) > 2 * fade_samples:
wave[:fade_samples] *= np.linspace(0, 1, fade_samples)
wave[-fade_samples:] *= np.linspace(1, 0, fade_samples)
return wave.astype(np.float32)
class MultiAudioController:
"""
Ultra-low latency audio controller supporting multiple simultaneous spatial audio sources.
Optimized for real-time with pre-allocated buffers and minimal lock contention.
"""
def __init__(self):
"""Initialize multi-audio controller."""
self.sample_rate = config.SAMPLE_RATE
self.buffer_size = config.AUDIO_BUFFER_SIZE
# Audio signatures cache
self.signatures = {}
self._preload_signatures()
# Active audio sources
self.sources = {}
self.sources_lock = threading.RLock() # Reentrant for safety
# Pre-allocated output buffers (avoid repeated allocations in callback)
self._left_buffer = np.zeros(self.buffer_size, dtype=np.float32)
self._right_buffer = np.zeros(self.buffer_size, dtype=np.float32)
self._index_buffer = np.zeros(self.buffer_size, dtype=np.int32)
# Stream
self.stream = None
self.running = False
print(
f"[AUDIO] Multi-AudioController initialized | Sample Rate: {self.sample_rate} Hz"
)
def _preload_signatures(self):
"""Pre-generate audio signatures for all object types."""
for obj_type, sig_config in config.AUDIO_SIGNATURES.items():
waveform_type = sig_config.get("waveform", "sine")
frequency = sig_config.get("freq", 440)
# Generate 0.5 second signature
signature = AudioSignatureGenerator.generate_waveform(
waveform_type, frequency, 0.5, self.sample_rate
)
self.signatures[obj_type] = signature
print(
f" [AUDIO] Loaded signature: {obj_type} ({waveform_type} @ {frequency} Hz)"
)
def _audio_callback(self, outdata, frames, time_info, status):
"""
Ultra-low latency audio callback - minimal allocations, vectorized operations.
"""
if status:
print(f"Audio stream status: {status}")
# Reset pre-allocated buffers (fast)
self._left_buffer[:frames] = 0
self._right_buffer[:frames] = 0
# Quick check if any sources active (avoid lock if none)
if not self.sources:
outdata[:frames, 0] = self._left_buffer[:frames]
outdata[:frames, 1] = self._right_buffer[:frames]
return
with self.sources_lock:
active_sources = {k: dict(v) for k, v in self.sources.items()}
max_val = 0.0
# Process each source
for obj_id, source_data in active_sources.items():
azimuth = source_data.get("azimuth", 0)
volume = source_data.get("volume", 0.5)
signature_name = source_data.get("signature", "default")
position = source_data.get("position", 0)
# Get signature
signature = self.signatures.get(signature_name, self.signatures["default"])
n_sig_samples = len(signature)
# Vectorized sample extraction using pre-allocated index buffer
# Create indices efficiently
end_pos = position + frames
if end_pos <= n_sig_samples:
self._index_buffer[:frames] = np.arange(
position, end_pos, dtype=np.int32
)
else:
part1_len = n_sig_samples - position
self._index_buffer[:part1_len] = np.arange(
position, n_sig_samples, dtype=np.int32
)
self._index_buffer[part1_len:frames] = np.arange(
0, frames - part1_len, dtype=np.int32
)
samples = signature[self._index_buffer[:frames]]
new_position = int((position + frames) % n_sig_samples)
with self.sources_lock:
if obj_id in self.sources:
self.sources[obj_id]["position"] = new_position
# Calculate stereo pan (optimized)
pan = max(-1.0, min(1.0, azimuth / config.MAX_AZIMUTH_DEGREES))
angle = (pan + 1.0) * 0.785398 # pi/4
left_gain = np.cos(angle) * volume
right_gain = np.sin(angle) * volume
# Mix into buffers
self._left_buffer[:frames] += samples * left_gain
self._right_buffer[:frames] += samples * right_gain
# Track max for normalization
max_val = max(
max_val,
np.abs(self._left_buffer[:frames]).max(),
np.abs(self._right_buffer[:frames]).max(),
)
# Normalize if needed (single pass)
if max_val > 1.0:
self._left_buffer[:frames] /= max_val
self._right_buffer[:frames] /= max_val
# Output stereo (direct assignment)
outdata[:frames, 0] = self._left_buffer[:frames]
outdata[:frames, 1] = self._right_buffer[:frames]
def start_stream(self, shared_state=None):
"""Start the audio stream."""
if self.running:
return True
self.shared_state = shared_state
try:
# Use smaller buffer for lower latency
buffer_size = min(512, self.buffer_size) # Ultra-low latency
self.stream = sd.OutputStream(
samplerate=self.sample_rate,
channels=2,
blocksize=buffer_size,
callback=self._audio_callback,
latency="low", # Explicit low latency mode
)
self.stream.start()
self.running = True
print(
f"[AUDIO] Multi-audio stream started (buffer={buffer_size}, latency=low)"
)
return True
except Exception as e:
self.stream = None
self.running = False
print(f"[ERROR] Failed to start audio stream: {e}")
return False
def pause_stream(self):
"""Pause the audio stream."""
self.stop_stream()
print("[AUDIO] Multi-audio stream paused.")
def resume_stream(self):
"""Resume the audio stream."""
if self.start_stream(self.shared_state):
print("[AUDIO] Multi-audio stream resumed.")
def stop_stream(self):
"""Stop the audio stream."""
if not self.running:
return
try:
if self.stream:
self.stream.stop()
self.stream.close()
self.stream = None
self.running = False
print("[AUDIO] Multi-audio stream stopped.")
except Exception as e:
print(f"[ERROR] Error stopping audio stream: {e}")
def update_source(self, obj_id, azimuth, volume, signature_name="default"):
"""Update or add an audio source."""
with self.sources_lock:
if obj_id not in self.sources:
self.sources[obj_id] = {"position": 0}
self.sources[obj_id].update(
{
"azimuth": max(
-config.MAX_AZIMUTH_DEGREES,
min(config.MAX_AZIMUTH_DEGREES, azimuth),
),
"volume": max(0.0, min(1.0, volume)),
"signature": signature_name,
}
)
def remove_source(self, obj_id):
"""Remove an audio source."""
with self.sources_lock:
if obj_id in self.sources:
del self.sources[obj_id]
def clear_sources(self):
"""Clear all audio sources."""
with self.sources_lock:
self.sources.clear()
# Backward compatibility
def update_position(self, azimuth, elevation, volume):
"""Legacy single-object interface."""
self.update_source(0, azimuth, volume, "phone")