redo noisegate to fix voice presense

This commit is contained in:
Logan Cusano
2025-07-14 21:34:08 -04:00
parent f893209f36
commit 9040462171
2 changed files with 123 additions and 154 deletions

View File

@@ -1,152 +1,115 @@
import audioop
import math
import pyaudio
import discord
import asyncio
from discord import VoiceClient, OpusNotLoaded
from internal.logger import create_logger
LOGGER = create_logger(__name__)
DISCORD_FRAME_SIZE = 3840
SILENT_FRAME = b'\x00' * DISCORD_FRAME_SIZE
# Constants for audio processing
SAMPLES_PER_FRAME = 960 # For 20ms audio at 48kHz
CHANNELS = 2
SAMPLE_RATE = 48000
FRAME_SIZE = SAMPLES_PER_FRAME * CHANNELS * 2 # 16-bit PCM (2 bytes)
class AudioStream:
# ... This class remains unchanged from the previous version ...
def __init__(self, _channels: int = 2, _sample_rate: int = 48000, _frames_per_buffer: int = 960,
_input_device_index: int = None, _output_device_index: int = None, _input: bool = True,
_output: bool = True, _init_on_startup: bool = True, **kwargs): # Accept extra kwargs
self.paInstance_kwargs = {
'format': pyaudio.paInt16,
'channels': _channels,
'rate': _sample_rate,
'input': _input,
'output': _output,
'frames_per_buffer': _frames_per_buffer
}
class AudioTransmitter:
def __init__(self, voice_client: VoiceClient, noise_gate_threshold: int, loop: asyncio.AbstractEventLoop, input_device_index: int):
if not voice_client.is_connected():
raise ValueError("VoiceClient is not connected.")
if not hasattr(voice_client, 'encoder') or not voice_client.encoder:
raise OpusNotLoaded("VoiceClient has not initialized its Opus encoder.")
if _input_device_index is not None:
if _input:
self.paInstance_kwargs['input_device_index'] = _input_device_index
else:
LOGGER.warning("[AudioStream.__init__]:\tInput was not enabled."
" Reinitialize with '_input=True'")
if _output_device_index is not None:
if _output:
self.paInstance_kwargs['output_device_index'] = _output_device_index
else:
LOGGER.warning("[AudioStream.__init__]:\tOutput was not enabled."
" Reinitialize with '_output=True'")
if _init_on_startup:
LOGGER.info("Creating PyAudio instance")
self.paInstance = pyaudio.PyAudio()
self.stream = None
if _output_device_index is not None or _input_device_index is not None:
if _init_on_startup:
LOGGER.info("Init stream")
self.init_stream()
def init_stream(self, _new_output_device_index: int = None, _new_input_device_index: int = None):
if _new_input_device_index is not None:
if self.paInstance_kwargs['input']:
self.paInstance_kwargs['input_device_index'] = _new_input_device_index
else:
LOGGER.warning("[AudioStream.init_stream]:\tInput was not enabled when initialized.")
if _new_output_device_index is not None:
if self.paInstance_kwargs['output']:
self.paInstance_kwargs['output_device_index'] = _new_output_device_index
else:
LOGGER.warning("[AudioStream.init_stream]:\tOutput was not enabled when initialized.")
self.close_if_open()
self.stream = self.paInstance.open(**self.paInstance_kwargs)
def close_if_open(self):
if self.stream and self.stream.is_active():
self.stream.stop_stream()
self.stream.close()
LOGGER.debug("[ReopenStream.close_if_open]:\t Stream was open; It was closed.")
class NoiseGate(AudioStream):
def __init__(self, _voice_connection, _noise_gate_threshold: int, loop: asyncio.AbstractEventLoop, **kwargs):
super(NoiseGate, self).__init__(**kwargs)
self.voice_connection = _voice_connection
self.THRESHOLD = _noise_gate_threshold
# **THE FIX**: Pass the loop down to the stream source.
self.NGStream = NoiseGateStream(self, loop)
def run(self) -> None:
LOGGER.debug("Starting stream")
self.stream.start_stream()
self.core()
def core(self):
if self.voice_connection.is_connected() and not self.voice_connection.is_playing():
LOGGER.debug("Playing stream to discord")
self.voice_connection.play(self.NGStream)
async def close(self):
LOGGER.debug("Closing NoiseGate resources...")
if self.voice_connection and self.voice_connection.is_connected():
# Set speaking to false on close
if self.NGStream.is_speaking:
asyncio.run_coroutine_threadsafe(self.voice_connection.ws.speak(False), self.NGStream.loop)
self.voice_connection.stop()
self.close_if_open()
if self.paInstance:
self.paInstance.terminate()
LOGGER.debug("NoiseGate resources closed.")
class NoiseGateStream(discord.AudioSource):
def __init__(self, noise_gate_instance: NoiseGate, loop: asyncio.AbstractEventLoop):
super(NoiseGateStream, self).__init__()
self.noise_gate = noise_gate_instance
self.voice_client = voice_client
self.threshold = noise_gate_threshold
self.loop = loop
self.is_speaking = True
self.NG_fadeout = 12
self.NG_fadeout_count = 0
self.input_device_index = input_device_index
def read(self):
self.pa = pyaudio.PyAudio()
self.stream = self.pa.open(
format=pyaudio.paInt16,
channels=CHANNELS,
rate=SAMPLE_RATE,
input=True,
frames_per_buffer=SAMPLES_PER_FRAME,
input_device_index=self.input_device_index
)
self._is_running = False
self._is_speaking = False
self.ng_fadeout_count = 0
self.NG_FADEOUT_FRAMES = 12 # 240ms fadeout time
async def _set_speaking(self, speaking: bool):
"""Safely sets the speaking state if it has changed."""
if self._is_speaking != speaking:
self._is_speaking = speaking
await self.voice_client.ws.speak(speaking)
async def start(self):
"""Starts the main audio transmission loop."""
self._is_running = True
self.stream.start_stream()
LOGGER.info("Audio transmitter started.")
try:
if not self.noise_gate.voice_connection.is_connected():
if self.is_speaking:
asyncio.run_coroutine_threadsafe(self.noise_gate.voice_connection.ws.speak(False), self.loop)
self.is_speaking = False
return SILENT_FRAME
while self._is_running:
# Read audio data in a separate thread to not block the event loop
pcm_data = await self.loop.run_in_executor(
None, self.stream.read, SAMPLES_PER_FRAME
)
curr_buffer = self.noise_gate.stream.read(960, exception_on_overflow=False)
if len(curr_buffer) != DISCORD_FRAME_SIZE:
if self.is_speaking:
asyncio.run_coroutine_threadsafe(self.noise_gate.voice_connection.ws.speak(False), self.loop)
self.is_speaking = False
return SILENT_FRAME
gate_is_open = self._check_noise_gate(pcm_data)
buffer_rms = audioop.rms(curr_buffer, 2)
gate_is_open = False
if buffer_rms > 0:
buffer_decibel = 20 * math.log10(buffer_rms)
if buffer_decibel >= self.noise_gate.THRESHOLD:
self.NG_fadeout_count = self.NG_fadeout
gate_is_open = True
elif self.NG_fadeout_count > 0:
self.NG_fadeout_count -= 1
gate_is_open = True
if gate_is_open and not self.is_speaking:
asyncio.run_coroutine_threadsafe(self.noise_gate.voice_connection.ws.speak(True), self.loop)
self.is_speaking = True
elif not gate_is_open and self.is_speaking:
asyncio.run_coroutine_threadsafe(self.noise_gate.voice_connection.ws.speak(False), self.loop)
self.is_speaking = False
return bytes(curr_buffer) if gate_is_open else SILENT_FRAME
if gate_is_open:
# If gate is open, ensure speaking is on and send audio
await self._set_speaking(True)
# Encode PCM data to Opus
encoded_packets = self.voice_client.encoder.encode(pcm_data, SAMPLES_PER_FRAME)
# Send each encoded packet
for packet in encoded_packets:
self.voice_client.send_audio_packet(packet)
else:
# If gate is closed, ensure speaking is off
await self._set_speaking(False)
# Wait for the next 20ms interval
await asyncio.sleep(0.02)
except asyncio.CancelledError:
LOGGER.info("Audio transmitter task cancelled.")
except Exception as e:
LOGGER.error(f"Unhandled exception in NoiseGateStream.read: {e}", exc_info=True)
if self.is_speaking:
asyncio.run_coroutine_threadsafe(self.noise_gate.voice_connection.ws.speak(False), self.loop)
self.is_speaking = False
return SILENT_FRAME
LOGGER.error(f"Error in audio transmitter loop: {e}", exc_info=True)
finally:
await self._cleanup()
def _check_noise_gate(self, pcm_data: bytes) -> bool:
"""Applies the noise gate logic to raw PCM data."""
rms = audioop.rms(pcm_data, 2)
if rms == 0: return False
db = 20 * math.log10(rms)
if db >= self.threshold:
self.ng_fadeout_count = self.NG_FADEOUT_FRAMES
return True
elif self.ng_fadeout_count > 0:
self.ng_fadeout_count -= 1
return True
return False
async def stop(self):
"""Stops the transmission loop."""
self._is_running = False
async def _cleanup(self):
"""Cleans up all resources."""
LOGGER.info("Cleaning up transmitter resources.")
if self._is_speaking:
await self._set_speaking(False)
if self.stream.is_active():
self.stream.stop_stream()
self.stream.close()
self.pa.terminate()