redo noisegate to fix voice presense

2025-07-14 21:34:08 -04:00
parent f893209f36
commit 9040462171
2 changed files with 123 additions and 154 deletions
@@ -1,152 +1,115 @@
 import audioop
 import math
 import pyaudio
 import discord
 import asyncio
 from discord import VoiceClient, OpusNotLoaded
 from internal.logger import create_logger
 LOGGER = create_logger(__name__)
-DISCORD_FRAME_SIZE = 3840
+# Constants for audio processing
-SILENT_FRAME = b'\x00' * DISCORD_FRAME_SIZE
+SAMPLES_PER_FRAME = 960  # For 20ms audio at 48kHz
 CHANNELS = 2
 SAMPLE_RATE = 48000
 FRAME_SIZE = SAMPLES_PER_FRAME * CHANNELS * 2  # 16-bit PCM (2 bytes)
-class AudioStream:
+class AudioTransmitter:
-    # ... This class remains unchanged from the previous version ...
+    def __init__(self, voice_client: VoiceClient, noise_gate_threshold: int, loop: asyncio.AbstractEventLoop, input_device_index: int):
-    def __init__(self, _channels: int = 2, _sample_rate: int = 48000, _frames_per_buffer: int = 960,
+        if not voice_client.is_connected():
-                 _input_device_index: int = None, _output_device_index: int = None, _input: bool = True,
+            raise ValueError("VoiceClient is not connected.")
-                 _output: bool = True, _init_on_startup: bool = True, **kwargs): # Accept extra kwargs
+        if not hasattr(voice_client, 'encoder') or not voice_client.encoder:
-        self.paInstance_kwargs = {
+             raise OpusNotLoaded("VoiceClient has not initialized its Opus encoder.")
            'format': pyaudio.paInt16,
            'channels': _channels,
            'rate': _sample_rate,
            'input': _input,
            'output': _output,
            'frames_per_buffer': _frames_per_buffer
        }
-        if _input_device_index is not None:
+        self.voice_client = voice_client
-            if _input:
+        self.threshold = noise_gate_threshold
-                self.paInstance_kwargs['input_device_index'] = _input_device_index
+        self.loop = loop
-            else:
+        self.input_device_index = input_device_index
                LOGGER.warning("[AudioStream.__init__]:\tInput was not enabled."
                               " Reinitialize with '_input=True'")
        if _output_device_index is not None:
            if _output:
                self.paInstance_kwargs['output_device_index'] = _output_device_index
            else:
                LOGGER.warning("[AudioStream.__init__]:\tOutput was not enabled."
                               " Reinitialize with '_output=True'")
        if _init_on_startup:
            LOGGER.info("Creating PyAudio instance")
            self.paInstance = pyaudio.PyAudio()
            self.stream = None
        if _output_device_index is not None or _input_device_index is not None:
            if _init_on_startup:
                LOGGER.info("Init stream")
                self.init_stream()
-    def init_stream(self, _new_output_device_index: int = None, _new_input_device_index: int = None):
+        self.pa = pyaudio.PyAudio()
-        if _new_input_device_index is not None:
+        self.stream = self.pa.open(
-            if self.paInstance_kwargs['input']:
+            format=pyaudio.paInt16,
-                self.paInstance_kwargs['input_device_index'] = _new_input_device_index
+            channels=CHANNELS,
-            else:
+            rate=SAMPLE_RATE,
-                LOGGER.warning("[AudioStream.init_stream]:\tInput was not enabled when initialized.")
+            input=True,
-        if _new_output_device_index is not None:
+            frames_per_buffer=SAMPLES_PER_FRAME,
-            if self.paInstance_kwargs['output']:
+            input_device_index=self.input_device_index
-                self.paInstance_kwargs['output_device_index'] = _new_output_device_index
+        )
            else:
                LOGGER.warning("[AudioStream.init_stream]:\tOutput was not enabled when initialized.")
        self.close_if_open()
        self.stream = self.paInstance.open(**self.paInstance_kwargs)
-    def close_if_open(self):
+        self._is_running = False
-        if self.stream and self.stream.is_active():
+        self._is_speaking = False
        self.ng_fadeout_count = 0
        self.NG_FADEOUT_FRAMES = 12  # 240ms fadeout time
    async def _set_speaking(self, speaking: bool):
        """Safely sets the speaking state if it has changed."""
        if self._is_speaking != speaking:
            self._is_speaking = speaking
            await self.voice_client.ws.speak(speaking)
    async def start(self):
        """Starts the main audio transmission loop."""
        self._is_running = True
        self.stream.start_stream()
        LOGGER.info("Audio transmitter started.")
        try:
            while self._is_running:
                # Read audio data in a separate thread to not block the event loop
                pcm_data = await self.loop.run_in_executor(
                    None, self.stream.read, SAMPLES_PER_FRAME
                )
                gate_is_open = self._check_noise_gate(pcm_data)
                if gate_is_open:
                    # If gate is open, ensure speaking is on and send audio
                    await self._set_speaking(True)
                    # Encode PCM data to Opus
                    encoded_packets = self.voice_client.encoder.encode(pcm_data, SAMPLES_PER_FRAME)
                    # Send each encoded packet
                    for packet in encoded_packets:
                        self.voice_client.send_audio_packet(packet)
                else:
                    # If gate is closed, ensure speaking is off
                    await self._set_speaking(False)
                # Wait for the next 20ms interval
                await asyncio.sleep(0.02)
        except asyncio.CancelledError:
            LOGGER.info("Audio transmitter task cancelled.")
        except Exception as e:
            LOGGER.error(f"Error in audio transmitter loop: {e}", exc_info=True)
        finally:
            await self._cleanup()
    def _check_noise_gate(self, pcm_data: bytes) -> bool:
        """Applies the noise gate logic to raw PCM data."""
        rms = audioop.rms(pcm_data, 2)
        if rms == 0: return False
        db = 20 * math.log10(rms)
        if db >= self.threshold:
            self.ng_fadeout_count = self.NG_FADEOUT_FRAMES
            return True
        elif self.ng_fadeout_count > 0:
            self.ng_fadeout_count -= 1
            return True
        return False
    async def stop(self):
        """Stops the transmission loop."""
        self._is_running = False
    async def _cleanup(self):
        """Cleans up all resources."""
        LOGGER.info("Cleaning up transmitter resources.")
        if self._is_speaking:
            await self._set_speaking(False)
        if self.stream.is_active():
            self.stream.stop_stream()
        self.stream.close()
-            LOGGER.debug("[ReopenStream.close_if_open]:\t Stream was open; It was closed.")
+        self.pa.terminate()
 class NoiseGate(AudioStream):
    def __init__(self, _voice_connection, _noise_gate_threshold: int, loop: asyncio.AbstractEventLoop, **kwargs):
        super(NoiseGate, self).__init__(**kwargs)
        self.voice_connection = _voice_connection
        self.THRESHOLD = _noise_gate_threshold
        # **THE FIX**: Pass the loop down to the stream source.
        self.NGStream = NoiseGateStream(self, loop)
    def run(self) -> None:
        LOGGER.debug("Starting stream")
        self.stream.start_stream()
        self.core()
    def core(self):
        if self.voice_connection.is_connected() and not self.voice_connection.is_playing():
            LOGGER.debug("Playing stream to discord")
            self.voice_connection.play(self.NGStream)
    async def close(self):
        LOGGER.debug("Closing NoiseGate resources...")
        if self.voice_connection and self.voice_connection.is_connected():
            # Set speaking to false on close
            if self.NGStream.is_speaking:
                 asyncio.run_coroutine_threadsafe(self.voice_connection.ws.speak(False), self.NGStream.loop)
            self.voice_connection.stop()
        self.close_if_open()
        if self.paInstance:
            self.paInstance.terminate()
        LOGGER.debug("NoiseGate resources closed.")
 class NoiseGateStream(discord.AudioSource):
    def __init__(self, noise_gate_instance: NoiseGate, loop: asyncio.AbstractEventLoop):
        super(NoiseGateStream, self).__init__()
        self.noise_gate = noise_gate_instance
        self.loop = loop
        self.is_speaking = True
        self.NG_fadeout = 12
        self.NG_fadeout_count = 0
    def read(self):
        try:
            if not self.noise_gate.voice_connection.is_connected():
                if self.is_speaking:
                    asyncio.run_coroutine_threadsafe(self.noise_gate.voice_connection.ws.speak(False), self.loop)
                    self.is_speaking = False
                return SILENT_FRAME
            curr_buffer = self.noise_gate.stream.read(960, exception_on_overflow=False)
            if len(curr_buffer) != DISCORD_FRAME_SIZE:
                if self.is_speaking:
                    asyncio.run_coroutine_threadsafe(self.noise_gate.voice_connection.ws.speak(False), self.loop)
                    self.is_speaking = False
                return SILENT_FRAME
            buffer_rms = audioop.rms(curr_buffer, 2)
            gate_is_open = False
            if buffer_rms > 0:
                buffer_decibel = 20 * math.log10(buffer_rms)
                if buffer_decibel >= self.noise_gate.THRESHOLD:
                    self.NG_fadeout_count = self.NG_fadeout
                    gate_is_open = True
                elif self.NG_fadeout_count > 0:
                    self.NG_fadeout_count -= 1
                    gate_is_open = True
            if gate_is_open and not self.is_speaking:
                asyncio.run_coroutine_threadsafe(self.noise_gate.voice_connection.ws.speak(True), self.loop)
                self.is_speaking = True
            elif not gate_is_open and self.is_speaking:
                asyncio.run_coroutine_threadsafe(self.noise_gate.voice_connection.ws.speak(False), self.loop)
                self.is_speaking = False
            return bytes(curr_buffer) if gate_is_open else SILENT_FRAME
        except Exception as e:
            LOGGER.error(f"Unhandled exception in NoiseGateStream.read: {e}", exc_info=True)
            if self.is_speaking:
                asyncio.run_coroutine_threadsafe(self.noise_gate.voice_connection.ws.speak(False), self.loop)
                self.is_speaking = False
            return SILENT_FRAME
@@ -4,7 +4,7 @@ import os
 from discord import VoiceClient, VoiceChannel, opus, Activity, ActivityType, Intents
 from discord.ext import commands
 from typing import Optional, Dict
-from internal.NoiseGatev2 import NoiseGate
+from internal.NoiseGatev2 import AudioTransmitter
 from internal.logger import create_logger
 LOGGER = create_logger(__name__)
@@ -18,7 +18,6 @@ class DiscordBotManager:
    def __init__(self):
        self.bot: Optional[commands.Bot] = None
        self.bot_task: Optional[asyncio.Task] = None
        # This dictionary will hold both the client and its audio stream handler
        self.voice_connections: Dict[int, Dict] = {}
        self.token: Optional[str] = None
        self.loop = asyncio.get_event_loop()
@@ -83,7 +82,6 @@ class DiscordBotManager:
    async def stop_bot(self):
        async with self.lock:
            if self.bot:
                # Disconnect from all voice channels cleanly
                for guild_id in list(self.voice_connections.keys()):
                    await self.leave_voice_channel(guild_id)
                await self.bot.close()
@@ -112,20 +110,23 @@ class DiscordBotManager:
            await asyncio.wait_for(self._voice_ready_event.wait(), timeout=15.0)
            LOGGER.info("Bot voice connection is ready.")
-            stream_handler = NoiseGate(
+            # Create and start the new AudioTransmitter
-                _voice_connection=voice_client,
+            transmitter = AudioTransmitter(
-                _noise_gate_threshold=ng_threshold,
+                voice_client=voice_client,
-                # **THE FIX**: Pass the event loop to the stream handler.
+                noise_gate_threshold=ng_threshold,
                loop=self.loop, 
-                _input_device_index=device_id
+                input_device_index=device_id
            )
-            stream_handler.run()
+            
            # Start the transmitter's main loop as a background task
            transmitter_task = self.loop.create_task(transmitter.start())
            self.voice_connections[guild_id] = {
                "client": voice_client,
-                "stream": stream_handler
+                "transmitter": transmitter,
                "task": transmitter_task
            }
-            LOGGER.info(f"Joined guild {guild_id} and audio stream is now running.")
+            LOGGER.info(f"Joined guild {guild_id} and audio transmitter is running.")
        except asyncio.TimeoutError:
            LOGGER.error(f"Timeout waiting for bot to join voice channel {channel_id}.")
@@ -140,12 +141,16 @@ class DiscordBotManager:
        connection_info = self.voice_connections.get(guild_id)
        if not connection_info: raise RuntimeError("Not connected to the specified guild's voice channel.")
-        stream_handler = connection_info.get('stream')
+        # Stop the transmitter task and clean up its resources
-        if stream_handler:
+        transmitter = connection_info.get("transmitter")
-            LOGGER.info(f"Stopping audio stream for guild {guild_id}.")
+        task = connection_info.get("task")
-            await stream_handler.close()
+        if transmitter:
            LOGGER.info(f"Stopping audio transmitter for guild {guild_id}.")
            await transmitter.stop()
        if task and not task.done():
            task.cancel()
-        voice_client = connection_info.get('client')
+        voice_client = connection_info.get("client")
        if voice_client and voice_client.is_connected():
            await voice_client.disconnect()
@@ -153,15 +158,16 @@ class DiscordBotManager:
        LOGGER.info(f"Left guild {guild_id} voice channel.")
    async def load_opus(self):
        # ... this method is unchanged ...
        processor = platform.machine()
        script_dir = os.path.dirname(os.path.abspath(__file__))
        LOGGER.debug(f"Processor: {processor}, OS: {os.name}")
        try:
-            if os.name == 'nt': # Windows
+            if os.name == 'nt':
                if processor == "AMD64":
                    opus.load_opus(os.path.join(script_dir, './opus/libopus_amd64.dll'))
                    LOGGER.info("Loaded OPUS library for AMD64")
-            else: # Linux / other
+            else:
                if processor == "aarch64":
                    opus.load_opus(os.path.join(script_dir, './opus/libopus_aarcch64.so'))
                    LOGGER.info("Loaded OPUS library for aarch64")
@@ -169,7 +175,6 @@ class DiscordBotManager:
                    opus.load_opus(os.path.join(script_dir, './opus/libopus_armv7l.so'))
                    LOGGER.info("Loaded OPUS library for armv7l")
                else:
                    # Fallback for other Linux archs like x86_64
                    opus.load_opus('libopus.so.0')
                    LOGGER.info(f"Loaded system OPUS library for {processor}")
        except Exception as e:
@@ -177,6 +182,7 @@ class DiscordBotManager:
            raise RuntimeError("Could not load a valid Opus library. Voice functionality will fail.")
    async def set_presence(self, system_name: str):
        # ... this method is unchanged ...
        if not self.bot or not self.bot.is_ready():
            LOGGER.warning("Bot is not ready, cannot set presence.")
            return