redo noisegate to fix voice presense

2025-07-14 21:34:08 -04:00
parent f893209f36
commit 9040462171
2 changed files with 123 additions and 154 deletions
--- a/app/internal/NoiseGatev2.py
+++ b/app/internal/NoiseGatev2.py
@@ -1,152 +1,115 @@
 import audioop
 import math
 import pyaudio
-import discord
 import asyncio
+from discord import VoiceClient, OpusNotLoaded
 from internal.logger import create_logger

 LOGGER = create_logger(__name__)

-DISCORD_FRAME_SIZE = 3840
-SILENT_FRAME = b'\x00' * DISCORD_FRAME_SIZE
+# Constants for audio processing
+SAMPLES_PER_FRAME = 960  # For 20ms audio at 48kHz
+CHANNELS = 2
+SAMPLE_RATE = 48000
+FRAME_SIZE = SAMPLES_PER_FRAME * CHANNELS * 2  # 16-bit PCM (2 bytes)

-class AudioStream:
-    # ... This class remains unchanged from the previous version ...
-    def __init__(self, _channels: int = 2, _sample_rate: int = 48000, _frames_per_buffer: int = 960,
-                 _input_device_index: int = None, _output_device_index: int = None, _input: bool = True,
-                 _output: bool = True, _init_on_startup: bool = True, **kwargs): # Accept extra kwargs
-        self.paInstance_kwargs = {
-            'format': pyaudio.paInt16,
-            'channels': _channels,
-            'rate': _sample_rate,
-            'input': _input,
-            'output': _output,
-            'frames_per_buffer': _frames_per_buffer
-        }
+class AudioTransmitter:
+    def __init__(self, voice_client: VoiceClient, noise_gate_threshold: int, loop: asyncio.AbstractEventLoop, input_device_index: int):
+        if not voice_client.is_connected():
+            raise ValueError("VoiceClient is not connected.")
+        if not hasattr(voice_client, 'encoder') or not voice_client.encoder:
+             raise OpusNotLoaded("VoiceClient has not initialized its Opus encoder.")

-        if _input_device_index is not None:
-            if _input:
-                self.paInstance_kwargs['input_device_index'] = _input_device_index
-            else:
-                LOGGER.warning("[AudioStream.__init__]:\tInput was not enabled."
-                               " Reinitialize with '_input=True'")
-        if _output_device_index is not None:
-            if _output:
-                self.paInstance_kwargs['output_device_index'] = _output_device_index
-            else:
-                LOGGER.warning("[AudioStream.__init__]:\tOutput was not enabled."
-                               " Reinitialize with '_output=True'")
-        if _init_on_startup:
-            LOGGER.info("Creating PyAudio instance")
-            self.paInstance = pyaudio.PyAudio()
-            self.stream = None
-        if _output_device_index is not None or _input_device_index is not None:
-            if _init_on_startup:
-                LOGGER.info("Init stream")
-                self.init_stream()
-
-    def init_stream(self, _new_output_device_index: int = None, _new_input_device_index: int = None):
-        if _new_input_device_index is not None:
-            if self.paInstance_kwargs['input']:
-                self.paInstance_kwargs['input_device_index'] = _new_input_device_index
-            else:
-                LOGGER.warning("[AudioStream.init_stream]:\tInput was not enabled when initialized.")
-        if _new_output_device_index is not None:
-            if self.paInstance_kwargs['output']:
-                self.paInstance_kwargs['output_device_index'] = _new_output_device_index
-            else:
-                LOGGER.warning("[AudioStream.init_stream]:\tOutput was not enabled when initialized.")
-        self.close_if_open()
-        self.stream = self.paInstance.open(**self.paInstance_kwargs)
-
-    def close_if_open(self):
-        if self.stream and self.stream.is_active():
-            self.stream.stop_stream()
-            self.stream.close()
-            LOGGER.debug("[ReopenStream.close_if_open]:\t Stream was open; It was closed.")
-            
-class NoiseGate(AudioStream):
-    def __init__(self, _voice_connection, _noise_gate_threshold: int, loop: asyncio.AbstractEventLoop, **kwargs):
-        super(NoiseGate, self).__init__(**kwargs)
-        self.voice_connection = _voice_connection
-        self.THRESHOLD = _noise_gate_threshold
-        # **THE FIX**: Pass the loop down to the stream source.
-        self.NGStream = NoiseGateStream(self, loop)
-
-    def run(self) -> None:
-        LOGGER.debug("Starting stream")
-        self.stream.start_stream()
-        self.core()
-
-    def core(self):
-        if self.voice_connection.is_connected() and not self.voice_connection.is_playing():
-            LOGGER.debug("Playing stream to discord")
-            self.voice_connection.play(self.NGStream)
-
-    async def close(self):
-        LOGGER.debug("Closing NoiseGate resources...")
-        if self.voice_connection and self.voice_connection.is_connected():
-            # Set speaking to false on close
-            if self.NGStream.is_speaking:
-                 asyncio.run_coroutine_threadsafe(self.voice_connection.ws.speak(False), self.NGStream.loop)
-            self.voice_connection.stop()
-        
-        self.close_if_open()
-        
-        if self.paInstance:
-            self.paInstance.terminate()
-        
-        LOGGER.debug("NoiseGate resources closed.")
-
-class NoiseGateStream(discord.AudioSource):
-    def __init__(self, noise_gate_instance: NoiseGate, loop: asyncio.AbstractEventLoop):
-        super(NoiseGateStream, self).__init__()
-        self.noise_gate = noise_gate_instance
+        self.voice_client = voice_client
+        self.threshold = noise_gate_threshold
        self.loop = loop
-        self.is_speaking = True
-        
-        self.NG_fadeout = 12
-        self.NG_fadeout_count = 0
+        self.input_device_index = input_device_index

-    def read(self):
+        self.pa = pyaudio.PyAudio()
+        self.stream = self.pa.open(
+            format=pyaudio.paInt16,
+            channels=CHANNELS,
+            rate=SAMPLE_RATE,
+            input=True,
+            frames_per_buffer=SAMPLES_PER_FRAME,
+            input_device_index=self.input_device_index
+        )
+
+        self._is_running = False
+        self._is_speaking = False
+        self.ng_fadeout_count = 0
+        self.NG_FADEOUT_FRAMES = 12  # 240ms fadeout time
+
+    async def _set_speaking(self, speaking: bool):
+        """Safely sets the speaking state if it has changed."""
+        if self._is_speaking != speaking:
+            self._is_speaking = speaking
+            await self.voice_client.ws.speak(speaking)
+
+    async def start(self):
+        """Starts the main audio transmission loop."""
+        self._is_running = True
+        self.stream.start_stream()
+        LOGGER.info("Audio transmitter started.")
        try:
-            if not self.noise_gate.voice_connection.is_connected():
-                if self.is_speaking:
-                    asyncio.run_coroutine_threadsafe(self.noise_gate.voice_connection.ws.speak(False), self.loop)
-                    self.is_speaking = False
-                return SILENT_FRAME
+            while self._is_running:
+                # Read audio data in a separate thread to not block the event loop
+                pcm_data = await self.loop.run_in_executor(
+                    None, self.stream.read, SAMPLES_PER_FRAME
+                )

-            curr_buffer = self.noise_gate.stream.read(960, exception_on_overflow=False)
-            if len(curr_buffer) != DISCORD_FRAME_SIZE:
-                if self.is_speaking:
-                    asyncio.run_coroutine_threadsafe(self.noise_gate.voice_connection.ws.speak(False), self.loop)
-                    self.is_speaking = False
-                return SILENT_FRAME
+                gate_is_open = self._check_noise_gate(pcm_data)

-            buffer_rms = audioop.rms(curr_buffer, 2)
-            
-            gate_is_open = False
-            if buffer_rms > 0:
-                buffer_decibel = 20 * math.log10(buffer_rms)
-                if buffer_decibel >= self.noise_gate.THRESHOLD:
-                    self.NG_fadeout_count = self.NG_fadeout
-                    gate_is_open = True
-                elif self.NG_fadeout_count > 0:
-                    self.NG_fadeout_count -= 1
-                    gate_is_open = True
-
-            if gate_is_open and not self.is_speaking:
-                asyncio.run_coroutine_threadsafe(self.noise_gate.voice_connection.ws.speak(True), self.loop)
-                self.is_speaking = True
-            elif not gate_is_open and self.is_speaking:
-                asyncio.run_coroutine_threadsafe(self.noise_gate.voice_connection.ws.speak(False), self.loop)
-                self.is_speaking = False
-
-            return bytes(curr_buffer) if gate_is_open else SILENT_FRAME
+                if gate_is_open:
+                    # If gate is open, ensure speaking is on and send audio
+                    await self._set_speaking(True)
+                    
+                    # Encode PCM data to Opus
+                    encoded_packets = self.voice_client.encoder.encode(pcm_data, SAMPLES_PER_FRAME)
+                    
+                    # Send each encoded packet
+                    for packet in encoded_packets:
+                        self.voice_client.send_audio_packet(packet)
+                else:
+                    # If gate is closed, ensure speaking is off
+                    await self._set_speaking(False)

+                # Wait for the next 20ms interval
+                await asyncio.sleep(0.02)
+        except asyncio.CancelledError:
+            LOGGER.info("Audio transmitter task cancelled.")
        except Exception as e:
-            LOGGER.error(f"Unhandled exception in NoiseGateStream.read: {e}", exc_info=True)
-            if self.is_speaking:
-                asyncio.run_coroutine_threadsafe(self.noise_gate.voice_connection.ws.speak(False), self.loop)
-                self.is_speaking = False
-            return SILENT_FRAME
+            LOGGER.error(f"Error in audio transmitter loop: {e}", exc_info=True)
+        finally:
+            await self._cleanup()
+
+    def _check_noise_gate(self, pcm_data: bytes) -> bool:
+        """Applies the noise gate logic to raw PCM data."""
+        rms = audioop.rms(pcm_data, 2)
+        if rms == 0: return False
+
+        db = 20 * math.log10(rms)
+        
+        if db >= self.threshold:
+            self.ng_fadeout_count = self.NG_FADEOUT_FRAMES
+            return True
+        elif self.ng_fadeout_count > 0:
+            self.ng_fadeout_count -= 1
+            return True
+        
+        return False
+
+    async def stop(self):
+        """Stops the transmission loop."""
+        self._is_running = False
+
+    async def _cleanup(self):
+        """Cleans up all resources."""
+        LOGGER.info("Cleaning up transmitter resources.")
+        if self._is_speaking:
+            await self._set_speaking(False)
+        
+        if self.stream.is_active():
+            self.stream.stop_stream()
+        self.stream.close()
+        self.pa.terminate()