From 9040462171822e8bdb21e44a2958ad7429c4f5a5 Mon Sep 17 00:00:00 2001 From: Logan Cusano Date: Mon, 14 Jul 2025 21:34:08 -0400 Subject: [PATCH] redo noisegate to fix voice presense --- app/internal/NoiseGatev2.py | 233 +++++++++++++++--------------------- app/internal/bot_manager.py | 44 ++++--- 2 files changed, 123 insertions(+), 154 deletions(-) diff --git a/app/internal/NoiseGatev2.py b/app/internal/NoiseGatev2.py index dd61760..d34de5b 100644 --- a/app/internal/NoiseGatev2.py +++ b/app/internal/NoiseGatev2.py @@ -1,152 +1,115 @@ import audioop import math import pyaudio -import discord import asyncio +from discord import VoiceClient, OpusNotLoaded from internal.logger import create_logger LOGGER = create_logger(__name__) -DISCORD_FRAME_SIZE = 3840 -SILENT_FRAME = b'\x00' * DISCORD_FRAME_SIZE +# Constants for audio processing +SAMPLES_PER_FRAME = 960 # For 20ms audio at 48kHz +CHANNELS = 2 +SAMPLE_RATE = 48000 +FRAME_SIZE = SAMPLES_PER_FRAME * CHANNELS * 2 # 16-bit PCM (2 bytes) -class AudioStream: - # ... This class remains unchanged from the previous version ... - def __init__(self, _channels: int = 2, _sample_rate: int = 48000, _frames_per_buffer: int = 960, - _input_device_index: int = None, _output_device_index: int = None, _input: bool = True, - _output: bool = True, _init_on_startup: bool = True, **kwargs): # Accept extra kwargs - self.paInstance_kwargs = { - 'format': pyaudio.paInt16, - 'channels': _channels, - 'rate': _sample_rate, - 'input': _input, - 'output': _output, - 'frames_per_buffer': _frames_per_buffer - } +class AudioTransmitter: + def __init__(self, voice_client: VoiceClient, noise_gate_threshold: int, loop: asyncio.AbstractEventLoop, input_device_index: int): + if not voice_client.is_connected(): + raise ValueError("VoiceClient is not connected.") + if not hasattr(voice_client, 'encoder') or not voice_client.encoder: + raise OpusNotLoaded("VoiceClient has not initialized its Opus encoder.") - if _input_device_index is not None: - if _input: - self.paInstance_kwargs['input_device_index'] = _input_device_index - else: - LOGGER.warning("[AudioStream.__init__]:\tInput was not enabled." - " Reinitialize with '_input=True'") - if _output_device_index is not None: - if _output: - self.paInstance_kwargs['output_device_index'] = _output_device_index - else: - LOGGER.warning("[AudioStream.__init__]:\tOutput was not enabled." - " Reinitialize with '_output=True'") - if _init_on_startup: - LOGGER.info("Creating PyAudio instance") - self.paInstance = pyaudio.PyAudio() - self.stream = None - if _output_device_index is not None or _input_device_index is not None: - if _init_on_startup: - LOGGER.info("Init stream") - self.init_stream() - - def init_stream(self, _new_output_device_index: int = None, _new_input_device_index: int = None): - if _new_input_device_index is not None: - if self.paInstance_kwargs['input']: - self.paInstance_kwargs['input_device_index'] = _new_input_device_index - else: - LOGGER.warning("[AudioStream.init_stream]:\tInput was not enabled when initialized.") - if _new_output_device_index is not None: - if self.paInstance_kwargs['output']: - self.paInstance_kwargs['output_device_index'] = _new_output_device_index - else: - LOGGER.warning("[AudioStream.init_stream]:\tOutput was not enabled when initialized.") - self.close_if_open() - self.stream = self.paInstance.open(**self.paInstance_kwargs) - - def close_if_open(self): - if self.stream and self.stream.is_active(): - self.stream.stop_stream() - self.stream.close() - LOGGER.debug("[ReopenStream.close_if_open]:\t Stream was open; It was closed.") - -class NoiseGate(AudioStream): - def __init__(self, _voice_connection, _noise_gate_threshold: int, loop: asyncio.AbstractEventLoop, **kwargs): - super(NoiseGate, self).__init__(**kwargs) - self.voice_connection = _voice_connection - self.THRESHOLD = _noise_gate_threshold - # **THE FIX**: Pass the loop down to the stream source. - self.NGStream = NoiseGateStream(self, loop) - - def run(self) -> None: - LOGGER.debug("Starting stream") - self.stream.start_stream() - self.core() - - def core(self): - if self.voice_connection.is_connected() and not self.voice_connection.is_playing(): - LOGGER.debug("Playing stream to discord") - self.voice_connection.play(self.NGStream) - - async def close(self): - LOGGER.debug("Closing NoiseGate resources...") - if self.voice_connection and self.voice_connection.is_connected(): - # Set speaking to false on close - if self.NGStream.is_speaking: - asyncio.run_coroutine_threadsafe(self.voice_connection.ws.speak(False), self.NGStream.loop) - self.voice_connection.stop() - - self.close_if_open() - - if self.paInstance: - self.paInstance.terminate() - - LOGGER.debug("NoiseGate resources closed.") - -class NoiseGateStream(discord.AudioSource): - def __init__(self, noise_gate_instance: NoiseGate, loop: asyncio.AbstractEventLoop): - super(NoiseGateStream, self).__init__() - self.noise_gate = noise_gate_instance + self.voice_client = voice_client + self.threshold = noise_gate_threshold self.loop = loop - self.is_speaking = True - - self.NG_fadeout = 12 - self.NG_fadeout_count = 0 + self.input_device_index = input_device_index - def read(self): + self.pa = pyaudio.PyAudio() + self.stream = self.pa.open( + format=pyaudio.paInt16, + channels=CHANNELS, + rate=SAMPLE_RATE, + input=True, + frames_per_buffer=SAMPLES_PER_FRAME, + input_device_index=self.input_device_index + ) + + self._is_running = False + self._is_speaking = False + self.ng_fadeout_count = 0 + self.NG_FADEOUT_FRAMES = 12 # 240ms fadeout time + + async def _set_speaking(self, speaking: bool): + """Safely sets the speaking state if it has changed.""" + if self._is_speaking != speaking: + self._is_speaking = speaking + await self.voice_client.ws.speak(speaking) + + async def start(self): + """Starts the main audio transmission loop.""" + self._is_running = True + self.stream.start_stream() + LOGGER.info("Audio transmitter started.") try: - if not self.noise_gate.voice_connection.is_connected(): - if self.is_speaking: - asyncio.run_coroutine_threadsafe(self.noise_gate.voice_connection.ws.speak(False), self.loop) - self.is_speaking = False - return SILENT_FRAME + while self._is_running: + # Read audio data in a separate thread to not block the event loop + pcm_data = await self.loop.run_in_executor( + None, self.stream.read, SAMPLES_PER_FRAME + ) - curr_buffer = self.noise_gate.stream.read(960, exception_on_overflow=False) - if len(curr_buffer) != DISCORD_FRAME_SIZE: - if self.is_speaking: - asyncio.run_coroutine_threadsafe(self.noise_gate.voice_connection.ws.speak(False), self.loop) - self.is_speaking = False - return SILENT_FRAME + gate_is_open = self._check_noise_gate(pcm_data) - buffer_rms = audioop.rms(curr_buffer, 2) - - gate_is_open = False - if buffer_rms > 0: - buffer_decibel = 20 * math.log10(buffer_rms) - if buffer_decibel >= self.noise_gate.THRESHOLD: - self.NG_fadeout_count = self.NG_fadeout - gate_is_open = True - elif self.NG_fadeout_count > 0: - self.NG_fadeout_count -= 1 - gate_is_open = True - - if gate_is_open and not self.is_speaking: - asyncio.run_coroutine_threadsafe(self.noise_gate.voice_connection.ws.speak(True), self.loop) - self.is_speaking = True - elif not gate_is_open and self.is_speaking: - asyncio.run_coroutine_threadsafe(self.noise_gate.voice_connection.ws.speak(False), self.loop) - self.is_speaking = False - - return bytes(curr_buffer) if gate_is_open else SILENT_FRAME + if gate_is_open: + # If gate is open, ensure speaking is on and send audio + await self._set_speaking(True) + + # Encode PCM data to Opus + encoded_packets = self.voice_client.encoder.encode(pcm_data, SAMPLES_PER_FRAME) + + # Send each encoded packet + for packet in encoded_packets: + self.voice_client.send_audio_packet(packet) + else: + # If gate is closed, ensure speaking is off + await self._set_speaking(False) + # Wait for the next 20ms interval + await asyncio.sleep(0.02) + except asyncio.CancelledError: + LOGGER.info("Audio transmitter task cancelled.") except Exception as e: - LOGGER.error(f"Unhandled exception in NoiseGateStream.read: {e}", exc_info=True) - if self.is_speaking: - asyncio.run_coroutine_threadsafe(self.noise_gate.voice_connection.ws.speak(False), self.loop) - self.is_speaking = False - return SILENT_FRAME \ No newline at end of file + LOGGER.error(f"Error in audio transmitter loop: {e}", exc_info=True) + finally: + await self._cleanup() + + def _check_noise_gate(self, pcm_data: bytes) -> bool: + """Applies the noise gate logic to raw PCM data.""" + rms = audioop.rms(pcm_data, 2) + if rms == 0: return False + + db = 20 * math.log10(rms) + + if db >= self.threshold: + self.ng_fadeout_count = self.NG_FADEOUT_FRAMES + return True + elif self.ng_fadeout_count > 0: + self.ng_fadeout_count -= 1 + return True + + return False + + async def stop(self): + """Stops the transmission loop.""" + self._is_running = False + + async def _cleanup(self): + """Cleans up all resources.""" + LOGGER.info("Cleaning up transmitter resources.") + if self._is_speaking: + await self._set_speaking(False) + + if self.stream.is_active(): + self.stream.stop_stream() + self.stream.close() + self.pa.terminate() \ No newline at end of file diff --git a/app/internal/bot_manager.py b/app/internal/bot_manager.py index 654d3a9..c159202 100644 --- a/app/internal/bot_manager.py +++ b/app/internal/bot_manager.py @@ -4,7 +4,7 @@ import os from discord import VoiceClient, VoiceChannel, opus, Activity, ActivityType, Intents from discord.ext import commands from typing import Optional, Dict -from internal.NoiseGatev2 import NoiseGate +from internal.NoiseGatev2 import AudioTransmitter from internal.logger import create_logger LOGGER = create_logger(__name__) @@ -18,7 +18,6 @@ class DiscordBotManager: def __init__(self): self.bot: Optional[commands.Bot] = None self.bot_task: Optional[asyncio.Task] = None - # This dictionary will hold both the client and its audio stream handler self.voice_connections: Dict[int, Dict] = {} self.token: Optional[str] = None self.loop = asyncio.get_event_loop() @@ -83,7 +82,6 @@ class DiscordBotManager: async def stop_bot(self): async with self.lock: if self.bot: - # Disconnect from all voice channels cleanly for guild_id in list(self.voice_connections.keys()): await self.leave_voice_channel(guild_id) await self.bot.close() @@ -112,20 +110,23 @@ class DiscordBotManager: await asyncio.wait_for(self._voice_ready_event.wait(), timeout=15.0) LOGGER.info("Bot voice connection is ready.") - stream_handler = NoiseGate( - _voice_connection=voice_client, - _noise_gate_threshold=ng_threshold, - # **THE FIX**: Pass the event loop to the stream handler. + # Create and start the new AudioTransmitter + transmitter = AudioTransmitter( + voice_client=voice_client, + noise_gate_threshold=ng_threshold, loop=self.loop, - _input_device_index=device_id + input_device_index=device_id ) - stream_handler.run() + + # Start the transmitter's main loop as a background task + transmitter_task = self.loop.create_task(transmitter.start()) self.voice_connections[guild_id] = { "client": voice_client, - "stream": stream_handler + "transmitter": transmitter, + "task": transmitter_task } - LOGGER.info(f"Joined guild {guild_id} and audio stream is now running.") + LOGGER.info(f"Joined guild {guild_id} and audio transmitter is running.") except asyncio.TimeoutError: LOGGER.error(f"Timeout waiting for bot to join voice channel {channel_id}.") @@ -140,12 +141,16 @@ class DiscordBotManager: connection_info = self.voice_connections.get(guild_id) if not connection_info: raise RuntimeError("Not connected to the specified guild's voice channel.") - stream_handler = connection_info.get('stream') - if stream_handler: - LOGGER.info(f"Stopping audio stream for guild {guild_id}.") - await stream_handler.close() + # Stop the transmitter task and clean up its resources + transmitter = connection_info.get("transmitter") + task = connection_info.get("task") + if transmitter: + LOGGER.info(f"Stopping audio transmitter for guild {guild_id}.") + await transmitter.stop() + if task and not task.done(): + task.cancel() - voice_client = connection_info.get('client') + voice_client = connection_info.get("client") if voice_client and voice_client.is_connected(): await voice_client.disconnect() @@ -153,15 +158,16 @@ class DiscordBotManager: LOGGER.info(f"Left guild {guild_id} voice channel.") async def load_opus(self): + # ... this method is unchanged ... processor = platform.machine() script_dir = os.path.dirname(os.path.abspath(__file__)) LOGGER.debug(f"Processor: {processor}, OS: {os.name}") try: - if os.name == 'nt': # Windows + if os.name == 'nt': if processor == "AMD64": opus.load_opus(os.path.join(script_dir, './opus/libopus_amd64.dll')) LOGGER.info("Loaded OPUS library for AMD64") - else: # Linux / other + else: if processor == "aarch64": opus.load_opus(os.path.join(script_dir, './opus/libopus_aarcch64.so')) LOGGER.info("Loaded OPUS library for aarch64") @@ -169,7 +175,6 @@ class DiscordBotManager: opus.load_opus(os.path.join(script_dir, './opus/libopus_armv7l.so')) LOGGER.info("Loaded OPUS library for armv7l") else: - # Fallback for other Linux archs like x86_64 opus.load_opus('libopus.so.0') LOGGER.info(f"Loaded system OPUS library for {processor}") except Exception as e: @@ -177,6 +182,7 @@ class DiscordBotManager: raise RuntimeError("Could not load a valid Opus library. Voice functionality will fail.") async def set_presence(self, system_name: str): + # ... this method is unchanged ... if not self.bot or not self.bot.is_ready(): LOGGER.warning("Bot is not ready, cannot set presence.") return