Revert voice activity changes

revert
fix bot manager after revert
2025-07-14 22:25:36 -04:00 · 2025-07-14 22:20:22 -04:00 · 2025-07-14 22:09:21 -04:00 · 2025-07-14 22:06:36 -04:00 · 2025-07-14 21:58:03 -04:00 · 2025-07-14 21:52:08 -04:00
3 changed files with 166 additions and 278 deletions
--- a/app/internal/NoiseGatev2.py
+++ b/app/internal/NoiseGatev2.py
@@ -1,215 +1,98 @@
 import audioop
 import math
 import time
 import pyaudio
-import discord
+import asyncio
 import numpy
 from internal.logger import create_logger
-voice_connection = None
+# You need to import the base AudioSource class from your specific library.
 # This is a common path, but yours might be different.
 from discord import AudioSource
 LOGGER = create_logger(__name__)
-
+# Constants for audio processing
-# noinspection PyUnresolvedReferences
+SAMPLES_PER_FRAME = 960
-class AudioStream:
+CHANNELS = 2
-    def __init__(self, _channels: int = 2, _sample_rate: int = 48000, _frames_per_buffer: int = 1024,
+SAMPLE_RATE = 48000
-                 _input_device_index: int = None, _output_device_index: int = None, _input: bool = True,
+FRAME_SIZE = SAMPLES_PER_FRAME * CHANNELS * 2  # 16-bit PCM
-                 _output: bool = True, _init_on_startup: bool = True):
+SILENT_FRAME = b'\x00' * FRAME_SIZE
        self.paInstance_kwargs = {
            'format': pyaudio.paInt16,
            'channels': _channels,
            'rate': _sample_rate,
            'input': _input,
            'output': _output,
            'frames_per_buffer': _frames_per_buffer
        }
        if _input_device_index:
            if _input:
                self.paInstance_kwargs['input_device_index'] = _input_device_index
            else:
                LOGGER.warning("[AudioStream.__init__]:\tInput was not enabled."
                               " Reinitialize with '_input=True'")
        if _output_device_index:
            if _output:
                self.paInstance_kwargs['output_device_index'] = _output_device_index
            else:
                LOGGER.warning("[AudioStream.__init__]:\tOutput was not enabled."
                               " Reinitialize with '_output=True'")
        if _init_on_startup:
            # Init PyAudio instance
            LOGGER.info("Creating PyAudio instance")
            self.paInstance = pyaudio.PyAudio()
            # Define and initialize stream object if we have been passed a device ID (pyaudio.open)
            self.stream = None
        if _output_device_index or _input_device_index:
            if _init_on_startup:
                LOGGER.info("Init stream")
                self.init_stream()
    def init_stream(self, _new_output_device_index: int = None, _new_input_device_index: int = None):
        # Check what device was asked to be changed (or set)
        if _new_input_device_index:
            if self.paInstance_kwargs['input']:
                self.paInstance_kwargs['input_device_index'] = _new_input_device_index
            else:
                LOGGER.warning("[AudioStream.init_stream]:\tInput was not enabled when initialized."
                               " Reinitialize with '_input=True'")
        if _new_output_device_index:
            if self.paInstance_kwargs['output']:
                self.paInstance_kwargs['output_device_index'] = _new_output_device_index
            else:
                LOGGER.warning("[AudioStream.init_stream]:\tOutput was not enabled when initialized."
                               " Reinitialize with '_output=True'")
        self.close_if_open()
        # Open the stream
        self.stream = self.paInstance.open(**self.paInstance_kwargs)
    def close_if_open(self):
        # Stop the stream if it is started
        if self.stream:
            if self.stream.is_active():
                self.stream.stop_stream()
                self.stream.close()
                LOGGER.debug("[ReopenStream.close_if_open]:\t Stream was open; It was closed.")
    def list_devices(self, _display_input_devices: bool = True, _display_output_devices: bool = True):
        LOGGER.info('Getting a list of the devices connected')
        info = self.paInstance.get_host_api_info_by_index(0)
        numdevices = info.get('deviceCount')
        devices = {
            'Input': {},
            'Output': {}
        }
        for i in range(0, numdevices):
            if (self.paInstance.get_device_info_by_host_api_device_index(0, i).get('maxInputChannels')) > 0:
                input_device = self.paInstance.get_device_info_by_host_api_device_index(0, i).get('name')
                devices['Input'][i] = input_device
                if _display_input_devices:
                    LOGGER.debug(f"Input Device id {i} - {input_device}")
            if (self.paInstance.get_device_info_by_host_api_device_index(0, i).get('maxOutputChannels')) > 0:
                output_device = self.paInstance.get_device_info_by_host_api_device_index(0, i).get('name')
                devices['Output'][i] = output_device
                if _display_output_devices:
                    LOGGER.debug(f"Output Device id {i} - {output_device}")
        return devices
    async def stop(self):
        await voice_connection.disconnect()
        self.close_if_open()
        self.stream.close()
        self.paInstance.terminate()
-# noinspection PyUnresolvedReferences
+class NoiseGateSource(AudioSource):
-class NoiseGate(AudioStream):
+    def __init__(self, audio_stream, threshold: int):
-    def __init__(self, _voice_connection, _noise_gate_threshold: int, **kwargs):
+        self.audio_stream = audio_stream
-        super(NoiseGate, self).__init__(_init_on_startup=True, **kwargs)
+        self.threshold = threshold
-        global voice_connection
+        self.ng_fadeout_count = 0
-        voice_connection = _voice_connection
+        self.NG_FADEOUT_FRAMES = 12  # 240ms fadeout time
        self.THRESHOLD = _noise_gate_threshold
        self.NGStream = NoiseGateStream(self)
        self.Voice_Connection_Thread = None
-    def run(self) -> None:
+    def read(self) -> bytes:
-        global voice_connection
+        """
-        # Start the audio stream
+        Reads data from the audio stream, applies the noise gate,
-        LOGGER.debug("Starting stream")
+        and returns a 20ms audio frame.
-        self.stream.start_stream()
+        """
        # Start the stream to discord
        self.core()
    def core(self, error=None):
        if error:
            LOGGER.warning(error)
        while not voice_connection.is_connected():
            time.sleep(.2)
        if not voice_connection.is_playing():
            LOGGER.debug("Playing stream to discord")
            voice_connection.play(self.NGStream, after=self.core)
    async def close(self):
        LOGGER.debug("Closing")
        await voice_connection.disconnect()
        if self.stream.is_active:
            self.stream.stop_stream()
            LOGGER.debug("Stopping stream")
 # noinspection PyUnresolvedReferences
 class NoiseGateStream(discord.AudioSource):
    def __init__(self, _stream):
        super(NoiseGateStream, self).__init__()
        self.stream = _stream       # The actual audio stream object
        self.NG_fadeout = 240 / 20    # Fadeout value used to hold the noisegate after de-triggering
        self.NG_fadeout_count = 0   # A count set when the noisegate is triggered and was de-triggered
        self.process_set_count = 0  # Counts how many processes have been made
    def read(self):
        try:
-            while voice_connection.is_connected():
+            # Read a frame's worth of data from the input stream.
-                curr_buffer = bytearray(self.stream.stream.read(960))
+            pcm_data = self.audio_stream.read(SAMPLES_PER_FRAME, exception_on_overflow=False)
                buffer_rms = audioop.rms(curr_buffer, 2)
                if buffer_rms > 0:
                    buffer_decibel = 20 * math.log10(buffer_rms)
-                    if self.process_set_count % 10 == 0:
+            # Ensure we have a full frame of data.
-                        if buffer_decibel >= self.stream.THRESHOLD:
+            if len(pcm_data) != FRAME_SIZE:
-                            LOGGER.debug(f"[Noisegate Open] {buffer_decibel} db")
+                return SILENT_FRAME
                        else:
                            LOGGER.debug(f"[Noisegate Closed] {buffer_decibel} db")
-                    if buffer_decibel >= self.stream.THRESHOLD:
+            # Calculate volume to check against the threshold.
-                        self.NG_fadeout_count = self.NG_fadeout
+            rms = audioop.rms(pcm_data, 2)
-                        self.process_set_count += 1
+            if rms == 0:
-                        if curr_buffer:
+                # If there's no volume, check if we're in the fadeout period.
-                            return bytes(curr_buffer)
+                if self.ng_fadeout_count > 0:
                    self.ng_fadeout_count -= 1
                    return pcm_data # Return the (silent) data to complete the fade
                return SILENT_FRAME
-                    else:
+            db = 20 * math.log10(rms)
                        if self.NG_fadeout_count > 0:
                            self.NG_fadeout_count -= 1
                            LOGGER.debug(f"Frames in fadeout remaining: {self.NG_fadeout_count}")
                            self.process_set_count += 1
                            if curr_buffer:
                                return bytes(curr_buffer)
-        except OSError as e:
+            # If volume is above the threshold, send the audio and reset fadeout.
-            LOGGER.warning(e)
+            if db >= self.threshold:
                self.ng_fadeout_count = self.NG_FADEOUT_FRAMES
                return pcm_data
            # If below threshold but still in the fadeout period, send the audio.
            if self.ng_fadeout_count > 0:
                self.ng_fadeout_count -= 1
                return pcm_data
            # Otherwise, the gate is closed. Send silence.
            return SILENT_FRAME
        except Exception as e:
            LOGGER.error(f"Error in NoiseGateSource.read: {e}", exc_info=True)
            return SILENT_FRAME
    def cleanup(self) -> None:
        """Called when the player stops."""
        # The AudioStreamManager now handles cleanup.
        LOGGER.info("Audio source cleanup called.")
        pass
-    def audio_datalist_set_volume(self, datalist, volume):
+class AudioStreamManager:
-        """ Change value of list of audio chunks """
+    """Manages the PyAudio instance and input stream."""
-        sound_level = (volume / 100.)
+    def __init__(self, input_device_index: int):
        self.pa = pyaudio.PyAudio()
        self.stream = self.pa.open(
            format=pyaudio.paInt16,
            channels=CHANNELS,
            rate=SAMPLE_RATE,
            input=True,
            frames_per_buffer=SAMPLES_PER_FRAME,
            input_device_index=input_device_index
        )
        self.stream.start_stream()
        LOGGER.info(f"Audio stream started on device {input_device_index}")
-        for i in range(len(datalist)):
+    def get_stream(self):
-            chunk = numpy.fromstring(datalist[i], numpy.int16)
+        return self.stream
-            chunk = chunk * sound_level
+    def terminate(self):
-
+        if self.stream and self.stream.is_active():
-            datalist[i] = chunk.astype(numpy.int16)
+            self.stream.stop_stream()
-
+            self.stream.close()
-
+        self.pa.terminate()
-if __name__ == '__main__':
+        LOGGER.info("PyAudio instance terminated.")
    input_index = int(input("Input:\t"))
    output_index = int(input("Output:\t"))
    ng = NoiseGate(_input_device_index=input_index, _output_device_index=output_index)
    ng.list_devices()
    ng.start()
--- a/app/internal/bot_manager.py
+++ b/app/internal/bot_manager.py
@@ -4,12 +4,11 @@ import os
 from discord import VoiceClient, VoiceChannel, opus, Activity, ActivityType, Intents
 from discord.ext import commands
 from typing import Optional, Dict
 from internal.NoiseGatev2 import NoiseGate
 from internal.logger import create_logger
 from internal.NoiseGatev2 import AudioStreamManager, NoiseGateSource
 LOGGER = create_logger(__name__)
 # Configure discord intents
 intents = Intents.default()
 intents.voice_states = True
 intents.guilds = True
@@ -18,7 +17,7 @@ class DiscordBotManager:
    def __init__(self):
        self.bot: Optional[commands.Bot] = None
        self.bot_task: Optional[asyncio.Task] = None
-        self.voice_clients: Dict[int, VoiceClient] = {}
+        self.voice_connections: Dict[int, Dict] = {}
        self.token: Optional[str] = None
        self.loop = asyncio.get_event_loop()
        self.lock = asyncio.Lock()
@@ -38,127 +37,133 @@ class DiscordBotManager:
            @self.bot.event
            async def on_ready():
                LOGGER.info(f'Logged in as {self.bot.user}')
                # Set the event when on_ready is called
                self._ready_event.set()
            @self.bot.event
            async def on_voice_state_update(member, before, after):
-                if member == self.bot.user and before.channel is None and after.channel is not None:
+                if member != self.bot.user: return
-                    print(f"{member.name} joined voice channel {after.channel.name}")
+                if before.channel is None and after.channel is not None:
                    LOGGER.info(f"{member.name} joined voice channel {after.channel.name}")
                    self._voice_ready_event.set()
                elif before.channel is not None and after.channel is not None and before.channel != after.channel:
                    LOGGER.info(f"{member.name} was moved to voice channel {after.channel.name}")
                    if not self._voice_ready_event.is_set(): self._voice_ready_event.set()
                elif before.channel is not None and after.channel is None:
                    LOGGER.warning(f"{member.name} left voice channel {before.channel.name}")
                    guild_id = before.channel.guild.id
                    if guild_id in self.voice_connections:
                        LOGGER.warning(f"Bot was disconnected from {guild_id} unexpectedly. Cleaning up...")
                        await self.leave_voice_channel(guild_id)
                    self._voice_ready_event.clear()
            @self.bot.event
            async def on_disconnect():
                LOGGER.warning("Bot has been disconnected from Discord.")
            # Load Opus for the current CPU
            await self.load_opus()
            # Create the task to run the bot in the background
            self.bot_task = self.loop.create_task(self.bot.start(token))
        # Wait for the on_ready event to be set by the bot task
        LOGGER.info("Waiting for bot to become ready...")
        try:
            await asyncio.wait_for(self._ready_event.wait(), timeout=60.0)
-            LOGGER.info("Bot is ready, start_bot returning.")
+            LOGGER.info("Bot is ready.")
            return
        except asyncio.TimeoutError:
-            LOGGER.error("Timeout waiting for bot to become ready. Bot might have failed to start.")
+            LOGGER.error("Timeout waiting for bot to become ready.")
-            if self.bot_task and not self.bot_task.done():
+            if self.bot_task and not self.bot_task.done(): self.bot_task.cancel()
                self.bot_task.cancel()
            raise RuntimeError("Bot failed to become ready within timeout.")
    async def stop_bot(self):
        async with self.lock:
            if self.bot:
                for guild_id in list(self.voice_connections.keys()):
                    await self.leave_voice_channel(guild_id)
                await self.bot.close()
                self.bot = None
            if self.bot_task:
-                await self.bot_task
+                self.bot_task.cancel()
                self.bot_task = None
-            self.voice_clients.clear()
+            self.voice_connections.clear()
            self._ready_event.clear()
            LOGGER.info("Bot has been stopped.")
    async def join_voice_channel(self, guild_id: int, channel_id: int, ng_threshold: int = 50, device_id: int = 4):
-        if not self.bot:
+        if not self.bot: raise RuntimeError("Bot is not running.")
            raise RuntimeError("Bot is not running.")
        guild = self.bot.get_guild(guild_id)
-        if not guild:
+        if not guild: raise ValueError("Guild not found.")
-            raise ValueError("Guild not found.")
+        if not opus.is_loaded(): raise RuntimeError("Opus is not loaded.")
        if not opus.is_loaded():
            raise RuntimeError("Opus is not loaded.")
        channel = guild.get_channel(channel_id)
-        if not isinstance(channel, VoiceChannel):
+        if not isinstance(channel, VoiceChannel): raise ValueError("Channel is not a voice channel.")
-            raise ValueError("Channel is not a voice channel.")
+        if guild_id in self.voice_connections: raise RuntimeError("Already connected to this guild's voice channel.")
        if guild_id in self.voice_clients:
            raise RuntimeError("Already connected to this guild's voice channel.")
        try:
            self._voice_ready_event.clear()
            voice_client = await channel.connect(timeout=60.0, reconnect=True)
-            LOGGER.debug("Voice Connected.")
+            await asyncio.wait_for(self._voice_ready_event.wait(), timeout=15.0)
            streamHandler = NoiseGate(
                _input_device_index=device_id,
                _voice_connection=voice_client,
                _noise_gate_threshold=ng_threshold)
            streamHandler.run()
            LOGGER.debug("Stream is running.")
            self.voice_clients[guild_id] = voice_client
            LOGGER.info(f"Joined guild {guild_id} voice channel {channel_id} and stream is running.")
        except Exception as e:
            LOGGER.error(f"Failed to connect to voice channel: {e}")
-        LOGGER.info("Waiting for bot to join voice...")
+            audio_manager = AudioStreamManager(input_device_index=device_id)
-        try:
+            audio_source = NoiseGateSource(audio_manager.get_stream(), threshold=ng_threshold)
-            await asyncio.wait_for(self._voice_ready_event.wait(), timeout=60.0)
+
-            LOGGER.info("Bot joined voice, returning.")
+            voice_client.play(audio_source, after=lambda e: LOGGER.error(f'Player error: {e}') if e else None)
-            return
+            
-        except asyncio.TimeoutError:
+            self.voice_connections[guild_id] = {
-            LOGGER.error("Timeout waiting for bot to join voice.")
+                "client": voice_client,
-            raise RuntimeError("Bot failed to join voice within timeout.")
+                "audio_manager": audio_manager
            }
            LOGGER.info(f"Joined guild {guild_id} and started audio stream.")
        except Exception as e:
            LOGGER.error(f"Failed to connect to voice channel: {e}", exc_info=True)
            if guild_id in self.voice_connections: # Cleanup if join fails midway
                await self.leave_voice_channel(guild_id)
            raise
    async def leave_voice_channel(self, guild_id: int):
-        if not self.bot:
+        if not self.bot: raise RuntimeError("Bot is not running.")
            raise RuntimeError("Bot is not running.")
-        voice_client = self.voice_clients.get(guild_id)
+        connection_info = self.voice_connections.get(guild_id)
-        if not voice_client:
+        if not connection_info: raise RuntimeError("Not connected to the specified guild's voice channel.")
            raise RuntimeError("Not connected to the specified guild's voice channel.")
        voice_client = connection_info.get("client")
        if voice_client and voice_client.is_connected():
            voice_client.stop()
            await voice_client.disconnect()
-        del self.voice_clients[guild_id]
+
        audio_manager = connection_info.get("audio_manager")
        if audio_manager:
            audio_manager.terminate()
        # Use pop to safely remove the key
        self.voice_connections.pop(guild_id, None)
        LOGGER.info(f"Left guild {guild_id} voice channel.")
    async def load_opus(self):
-        """ Load the proper OPUS library for the device being used """
+        if opus.is_loaded():
            LOGGER.info("Opus library is already loaded.")
            return
        processor = platform.machine()
        script_dir = os.path.dirname(os.path.abspath(__file__))
-        LOGGER.debug("Processor: ", processor)
+
        LOGGER.debug(f"Attempting to load Opus. Processor: {processor}, OS: {os.name}")
        try:
            if os.name == 'nt':
                if processor == "AMD64":
                    opus.load_opus(os.path.join(script_dir, './opus/libopus_amd64.dll'))
                    LOGGER.info("Loaded OPUS library for AMD64")
                return "AMD64"
            else:
                if processor == "aarch64":
                    opus.load_opus(os.path.join(script_dir, './opus/libopus_aarcch64.so'))
                    LOGGER.info("Loaded OPUS library for aarch64")
                return "aarch64"
                elif processor == "armv7l":
                    opus.load_opus(os.path.join(script_dir, './opus/libopus_armv7l.so'))
                    LOGGER.info("Loaded OPUS library for armv7l")
-                return "armv7l"
+                else:
                    opus.load_opus('libopus.so.0')
                LOGGER.info(f"Attempted to load system OPUS library for {processor}")
-    async def set_presence(self, system_name: str):
+        except Exception as e:
-        """ Set the presence (activity) of the bot """
+            LOGGER.error(f"Failed to load OPUS library: {e}")
-        if not self.bot:
+            raise RuntimeError("Could not load a valid Opus library. Voice functionality will fail.")
            LOGGER.warning("Bot is not running, cannot set presence.")
            return
-        try:
+        if not opus.is_loaded():
-            activity = Activity(type=ActivityType.listening, name=system_name)
+            raise RuntimeError("Opus library could not be loaded. Please ensure it is installed correctly.")
            await self.bot.change_presence(activity=activity)
            LOGGER.info(f"Bot presence set to 'Listening to {system_name}'")
        except Exception as pe:
            LOGGER.error(f"Unable to set presence: '{pe}'")
--- a/app/routers/bot.py
+++ b/app/routers/bot.py
@@ -50,7 +50,7 @@ def create_bot_router(bot_manager: DiscordBotManager):
    async def get_status():
        status = {
            "bot_running": bot_manager.bot is not None and not bot_manager.bot.is_closed(),
-            "connected_guilds": list(bot_manager.voice_clients.keys()),
+            "connected_guilds": list(bot_manager.voice_connections.keys()),
            "active_token": bot_manager.token
        }
        return status
Author	SHA1	Message	Date
Logan Cusano	aee6e40792	Revert voice activity changes	2025-07-14 22:25:36 -04:00
Logan Cusano	84cef3119f	revert	2025-07-14 22:20:22 -04:00
Logan Cusano	abb2d2f042	fix bot manager after revert	2025-07-14 22:09:21 -04:00
Logan Cusano	cd2ea546b8	revert noisegate	2025-07-14 22:06:36 -04:00
Logan Cusano	e3566fb367	add more debug	2025-07-14 21:58:03 -04:00
Logan Cusano	0a0d8d3af9	fix typo	2025-07-14 21:52:08 -04:00
Logan Cusano	bbd866a8a6	add debug	2025-07-14 21:49:53 -04:00
Logan Cusano	4f93c99a52	Fix opus error import	2025-07-14 21:41:31 -04:00
Logan Cusano	9040462171	redo noisegate to fix voice presense	2025-07-14 21:34:08 -04:00
Logan Cusano	f893209f36	fix voice activity	2025-07-14 21:27:18 -04:00
Logan Cusano	09e0541bec	fix join loop	2025-07-14 21:20:58 -04:00
Logan Cusano	abd78c83d2	stream blank when no sound	2025-07-14 21:17:09 -04:00
Logan Cusano	a634ea2260	fix depend in bot router	2025-07-14 21:05:45 -04:00
Logan Cusano	e7ff28da6e	Update bot manager and noisegate functions to try and improve the logic	2025-07-14 21:04:14 -04:00
Logan Cusano	b5191ef4d0	working on #7	2025-07-14 20:37:12 -04:00