15 Commits

Author SHA1 Message Date
Logan Cusano
aee6e40792 Revert voice activity changes 2025-07-14 22:25:36 -04:00
Logan Cusano
84cef3119f revert 2025-07-14 22:20:22 -04:00
Logan Cusano
abb2d2f042 fix bot manager after revert 2025-07-14 22:09:21 -04:00
Logan Cusano
cd2ea546b8 revert noisegate 2025-07-14 22:06:36 -04:00
Logan Cusano
e3566fb367 add more debug 2025-07-14 21:58:03 -04:00
Logan Cusano
0a0d8d3af9 fix typo 2025-07-14 21:52:08 -04:00
Logan Cusano
bbd866a8a6 add debug 2025-07-14 21:49:53 -04:00
Logan Cusano
4f93c99a52 Fix opus error import 2025-07-14 21:41:31 -04:00
Logan Cusano
9040462171 redo noisegate to fix voice presense 2025-07-14 21:34:08 -04:00
Logan Cusano
f893209f36 fix voice activity 2025-07-14 21:27:18 -04:00
Logan Cusano
09e0541bec fix join loop 2025-07-14 21:20:58 -04:00
Logan Cusano
abd78c83d2 stream blank when no sound 2025-07-14 21:17:09 -04:00
Logan Cusano
a634ea2260 fix depend in bot router 2025-07-14 21:05:45 -04:00
Logan Cusano
e7ff28da6e Update bot manager and noisegate functions to try and improve the logic 2025-07-14 21:04:14 -04:00
Logan Cusano
b5191ef4d0 working on #7 2025-07-14 20:37:12 -04:00
3 changed files with 166 additions and 278 deletions

View File

@@ -1,215 +1,98 @@
import audioop
import math
import time
import pyaudio
import discord
import numpy
import asyncio
from internal.logger import create_logger
voice_connection = None
# You need to import the base AudioSource class from your specific library.
# This is a common path, but yours might be different.
from discord import AudioSource
LOGGER = create_logger(__name__)
# noinspection PyUnresolvedReferences
class AudioStream:
def __init__(self, _channels: int = 2, _sample_rate: int = 48000, _frames_per_buffer: int = 1024,
_input_device_index: int = None, _output_device_index: int = None, _input: bool = True,
_output: bool = True, _init_on_startup: bool = True):
self.paInstance_kwargs = {
'format': pyaudio.paInt16,
'channels': _channels,
'rate': _sample_rate,
'input': _input,
'output': _output,
'frames_per_buffer': _frames_per_buffer
}
if _input_device_index:
if _input:
self.paInstance_kwargs['input_device_index'] = _input_device_index
else:
LOGGER.warning("[AudioStream.__init__]:\tInput was not enabled."
" Reinitialize with '_input=True'")
if _output_device_index:
if _output:
self.paInstance_kwargs['output_device_index'] = _output_device_index
else:
LOGGER.warning("[AudioStream.__init__]:\tOutput was not enabled."
" Reinitialize with '_output=True'")
if _init_on_startup:
# Init PyAudio instance
LOGGER.info("Creating PyAudio instance")
self.paInstance = pyaudio.PyAudio()
# Define and initialize stream object if we have been passed a device ID (pyaudio.open)
self.stream = None
if _output_device_index or _input_device_index:
if _init_on_startup:
LOGGER.info("Init stream")
self.init_stream()
def init_stream(self, _new_output_device_index: int = None, _new_input_device_index: int = None):
# Check what device was asked to be changed (or set)
if _new_input_device_index:
if self.paInstance_kwargs['input']:
self.paInstance_kwargs['input_device_index'] = _new_input_device_index
else:
LOGGER.warning("[AudioStream.init_stream]:\tInput was not enabled when initialized."
" Reinitialize with '_input=True'")
if _new_output_device_index:
if self.paInstance_kwargs['output']:
self.paInstance_kwargs['output_device_index'] = _new_output_device_index
else:
LOGGER.warning("[AudioStream.init_stream]:\tOutput was not enabled when initialized."
" Reinitialize with '_output=True'")
self.close_if_open()
# Open the stream
self.stream = self.paInstance.open(**self.paInstance_kwargs)
def close_if_open(self):
# Stop the stream if it is started
if self.stream:
if self.stream.is_active():
self.stream.stop_stream()
self.stream.close()
LOGGER.debug("[ReopenStream.close_if_open]:\t Stream was open; It was closed.")
def list_devices(self, _display_input_devices: bool = True, _display_output_devices: bool = True):
LOGGER.info('Getting a list of the devices connected')
info = self.paInstance.get_host_api_info_by_index(0)
numdevices = info.get('deviceCount')
devices = {
'Input': {},
'Output': {}
}
for i in range(0, numdevices):
if (self.paInstance.get_device_info_by_host_api_device_index(0, i).get('maxInputChannels')) > 0:
input_device = self.paInstance.get_device_info_by_host_api_device_index(0, i).get('name')
devices['Input'][i] = input_device
if _display_input_devices:
LOGGER.debug(f"Input Device id {i} - {input_device}")
if (self.paInstance.get_device_info_by_host_api_device_index(0, i).get('maxOutputChannels')) > 0:
output_device = self.paInstance.get_device_info_by_host_api_device_index(0, i).get('name')
devices['Output'][i] = output_device
if _display_output_devices:
LOGGER.debug(f"Output Device id {i} - {output_device}")
return devices
async def stop(self):
await voice_connection.disconnect()
self.close_if_open()
self.stream.close()
self.paInstance.terminate()
# Constants for audio processing
SAMPLES_PER_FRAME = 960
CHANNELS = 2
SAMPLE_RATE = 48000
FRAME_SIZE = SAMPLES_PER_FRAME * CHANNELS * 2 # 16-bit PCM
SILENT_FRAME = b'\x00' * FRAME_SIZE
# noinspection PyUnresolvedReferences
class NoiseGate(AudioStream):
def __init__(self, _voice_connection, _noise_gate_threshold: int, **kwargs):
super(NoiseGate, self).__init__(_init_on_startup=True, **kwargs)
global voice_connection
voice_connection = _voice_connection
self.THRESHOLD = _noise_gate_threshold
self.NGStream = NoiseGateStream(self)
self.Voice_Connection_Thread = None
class NoiseGateSource(AudioSource):
def __init__(self, audio_stream, threshold: int):
self.audio_stream = audio_stream
self.threshold = threshold
self.ng_fadeout_count = 0
self.NG_FADEOUT_FRAMES = 12 # 240ms fadeout time
def run(self) -> None:
global voice_connection
# Start the audio stream
LOGGER.debug("Starting stream")
self.stream.start_stream()
# Start the stream to discord
self.core()
def core(self, error=None):
if error:
LOGGER.warning(error)
while not voice_connection.is_connected():
time.sleep(.2)
if not voice_connection.is_playing():
LOGGER.debug("Playing stream to discord")
voice_connection.play(self.NGStream, after=self.core)
async def close(self):
LOGGER.debug("Closing")
await voice_connection.disconnect()
if self.stream.is_active:
self.stream.stop_stream()
LOGGER.debug("Stopping stream")
# noinspection PyUnresolvedReferences
class NoiseGateStream(discord.AudioSource):
def __init__(self, _stream):
super(NoiseGateStream, self).__init__()
self.stream = _stream # The actual audio stream object
self.NG_fadeout = 240 / 20 # Fadeout value used to hold the noisegate after de-triggering
self.NG_fadeout_count = 0 # A count set when the noisegate is triggered and was de-triggered
self.process_set_count = 0 # Counts how many processes have been made
def read(self):
def read(self) -> bytes:
"""
Reads data from the audio stream, applies the noise gate,
and returns a 20ms audio frame.
"""
try:
while voice_connection.is_connected():
curr_buffer = bytearray(self.stream.stream.read(960))
buffer_rms = audioop.rms(curr_buffer, 2)
if buffer_rms > 0:
buffer_decibel = 20 * math.log10(buffer_rms)
# Read a frame's worth of data from the input stream.
pcm_data = self.audio_stream.read(SAMPLES_PER_FRAME, exception_on_overflow=False)
if self.process_set_count % 10 == 0:
if buffer_decibel >= self.stream.THRESHOLD:
LOGGER.debug(f"[Noisegate Open] {buffer_decibel} db")
else:
LOGGER.debug(f"[Noisegate Closed] {buffer_decibel} db")
# Ensure we have a full frame of data.
if len(pcm_data) != FRAME_SIZE:
return SILENT_FRAME
if buffer_decibel >= self.stream.THRESHOLD:
self.NG_fadeout_count = self.NG_fadeout
self.process_set_count += 1
if curr_buffer:
return bytes(curr_buffer)
# Calculate volume to check against the threshold.
rms = audioop.rms(pcm_data, 2)
if rms == 0:
# If there's no volume, check if we're in the fadeout period.
if self.ng_fadeout_count > 0:
self.ng_fadeout_count -= 1
return pcm_data # Return the (silent) data to complete the fade
return SILENT_FRAME
else:
if self.NG_fadeout_count > 0:
self.NG_fadeout_count -= 1
LOGGER.debug(f"Frames in fadeout remaining: {self.NG_fadeout_count}")
self.process_set_count += 1
if curr_buffer:
return bytes(curr_buffer)
db = 20 * math.log10(rms)
except OSError as e:
LOGGER.warning(e)
# If volume is above the threshold, send the audio and reset fadeout.
if db >= self.threshold:
self.ng_fadeout_count = self.NG_FADEOUT_FRAMES
return pcm_data
# If below threshold but still in the fadeout period, send the audio.
if self.ng_fadeout_count > 0:
self.ng_fadeout_count -= 1
return pcm_data
# Otherwise, the gate is closed. Send silence.
return SILENT_FRAME
except Exception as e:
LOGGER.error(f"Error in NoiseGateSource.read: {e}", exc_info=True)
return SILENT_FRAME
def cleanup(self) -> None:
"""Called when the player stops."""
# The AudioStreamManager now handles cleanup.
LOGGER.info("Audio source cleanup called.")
pass
def audio_datalist_set_volume(self, datalist, volume):
""" Change value of list of audio chunks """
sound_level = (volume / 100.)
class AudioStreamManager:
"""Manages the PyAudio instance and input stream."""
def __init__(self, input_device_index: int):
self.pa = pyaudio.PyAudio()
self.stream = self.pa.open(
format=pyaudio.paInt16,
channels=CHANNELS,
rate=SAMPLE_RATE,
input=True,
frames_per_buffer=SAMPLES_PER_FRAME,
input_device_index=input_device_index
)
self.stream.start_stream()
LOGGER.info(f"Audio stream started on device {input_device_index}")
for i in range(len(datalist)):
chunk = numpy.fromstring(datalist[i], numpy.int16)
def get_stream(self):
return self.stream
chunk = chunk * sound_level
datalist[i] = chunk.astype(numpy.int16)
if __name__ == '__main__':
input_index = int(input("Input:\t"))
output_index = int(input("Output:\t"))
ng = NoiseGate(_input_device_index=input_index, _output_device_index=output_index)
ng.list_devices()
ng.start()
def terminate(self):
if self.stream and self.stream.is_active():
self.stream.stop_stream()
self.stream.close()
self.pa.terminate()
LOGGER.info("PyAudio instance terminated.")

View File

@@ -4,12 +4,11 @@ import os
from discord import VoiceClient, VoiceChannel, opus, Activity, ActivityType, Intents
from discord.ext import commands
from typing import Optional, Dict
from internal.NoiseGatev2 import NoiseGate
from internal.logger import create_logger
from internal.NoiseGatev2 import AudioStreamManager, NoiseGateSource
LOGGER = create_logger(__name__)
# Configure discord intents
intents = Intents.default()
intents.voice_states = True
intents.guilds = True
@@ -18,7 +17,7 @@ class DiscordBotManager:
def __init__(self):
self.bot: Optional[commands.Bot] = None
self.bot_task: Optional[asyncio.Task] = None
self.voice_clients: Dict[int, VoiceClient] = {}
self.voice_connections: Dict[int, Dict] = {}
self.token: Optional[str] = None
self.loop = asyncio.get_event_loop()
self.lock = asyncio.Lock()
@@ -38,127 +37,133 @@ class DiscordBotManager:
@self.bot.event
async def on_ready():
LOGGER.info(f'Logged in as {self.bot.user}')
# Set the event when on_ready is called
self._ready_event.set()
@self.bot.event
async def on_voice_state_update(member, before, after):
if member == self.bot.user and before.channel is None and after.channel is not None:
print(f"{member.name} joined voice channel {after.channel.name}")
if member != self.bot.user: return
if before.channel is None and after.channel is not None:
LOGGER.info(f"{member.name} joined voice channel {after.channel.name}")
self._voice_ready_event.set()
elif before.channel is not None and after.channel is not None and before.channel != after.channel:
LOGGER.info(f"{member.name} was moved to voice channel {after.channel.name}")
if not self._voice_ready_event.is_set(): self._voice_ready_event.set()
elif before.channel is not None and after.channel is None:
LOGGER.warning(f"{member.name} left voice channel {before.channel.name}")
guild_id = before.channel.guild.id
if guild_id in self.voice_connections:
LOGGER.warning(f"Bot was disconnected from {guild_id} unexpectedly. Cleaning up...")
await self.leave_voice_channel(guild_id)
self._voice_ready_event.clear()
@self.bot.event
async def on_disconnect():
LOGGER.warning("Bot has been disconnected from Discord.")
# Load Opus for the current CPU
await self.load_opus()
# Create the task to run the bot in the background
self.bot_task = self.loop.create_task(self.bot.start(token))
# Wait for the on_ready event to be set by the bot task
LOGGER.info("Waiting for bot to become ready...")
try:
await asyncio.wait_for(self._ready_event.wait(), timeout=60.0)
LOGGER.info("Bot is ready, start_bot returning.")
return
LOGGER.info("Bot is ready.")
except asyncio.TimeoutError:
LOGGER.error("Timeout waiting for bot to become ready. Bot might have failed to start.")
if self.bot_task and not self.bot_task.done():
self.bot_task.cancel()
LOGGER.error("Timeout waiting for bot to become ready.")
if self.bot_task and not self.bot_task.done(): self.bot_task.cancel()
raise RuntimeError("Bot failed to become ready within timeout.")
async def stop_bot(self):
async with self.lock:
if self.bot:
for guild_id in list(self.voice_connections.keys()):
await self.leave_voice_channel(guild_id)
await self.bot.close()
self.bot = None
if self.bot_task:
await self.bot_task
self.bot_task.cancel()
self.bot_task = None
self.voice_clients.clear()
self.voice_connections.clear()
self._ready_event.clear()
LOGGER.info("Bot has been stopped.")
async def join_voice_channel(self, guild_id: int, channel_id: int, ng_threshold: int = 50, device_id: int = 4):
if not self.bot:
raise RuntimeError("Bot is not running.")
if not self.bot: raise RuntimeError("Bot is not running.")
guild = self.bot.get_guild(guild_id)
if not guild:
raise ValueError("Guild not found.")
if not opus.is_loaded():
raise RuntimeError("Opus is not loaded.")
if not guild: raise ValueError("Guild not found.")
if not opus.is_loaded(): raise RuntimeError("Opus is not loaded.")
channel = guild.get_channel(channel_id)
if not isinstance(channel, VoiceChannel):
raise ValueError("Channel is not a voice channel.")
if guild_id in self.voice_clients:
raise RuntimeError("Already connected to this guild's voice channel.")
if not isinstance(channel, VoiceChannel): raise ValueError("Channel is not a voice channel.")
if guild_id in self.voice_connections: raise RuntimeError("Already connected to this guild's voice channel.")
try:
self._voice_ready_event.clear()
voice_client = await channel.connect(timeout=60.0, reconnect=True)
LOGGER.debug("Voice Connected.")
streamHandler = NoiseGate(
_input_device_index=device_id,
_voice_connection=voice_client,
_noise_gate_threshold=ng_threshold)
streamHandler.run()
LOGGER.debug("Stream is running.")
self.voice_clients[guild_id] = voice_client
LOGGER.info(f"Joined guild {guild_id} voice channel {channel_id} and stream is running.")
except Exception as e:
LOGGER.error(f"Failed to connect to voice channel: {e}")
await asyncio.wait_for(self._voice_ready_event.wait(), timeout=15.0)
LOGGER.info("Waiting for bot to join voice...")
try:
await asyncio.wait_for(self._voice_ready_event.wait(), timeout=60.0)
LOGGER.info("Bot joined voice, returning.")
return
except asyncio.TimeoutError:
LOGGER.error("Timeout waiting for bot to join voice.")
raise RuntimeError("Bot failed to join voice within timeout.")
audio_manager = AudioStreamManager(input_device_index=device_id)
audio_source = NoiseGateSource(audio_manager.get_stream(), threshold=ng_threshold)
voice_client.play(audio_source, after=lambda e: LOGGER.error(f'Player error: {e}') if e else None)
self.voice_connections[guild_id] = {
"client": voice_client,
"audio_manager": audio_manager
}
LOGGER.info(f"Joined guild {guild_id} and started audio stream.")
except Exception as e:
LOGGER.error(f"Failed to connect to voice channel: {e}", exc_info=True)
if guild_id in self.voice_connections: # Cleanup if join fails midway
await self.leave_voice_channel(guild_id)
raise
async def leave_voice_channel(self, guild_id: int):
if not self.bot:
raise RuntimeError("Bot is not running.")
if not self.bot: raise RuntimeError("Bot is not running.")
voice_client = self.voice_clients.get(guild_id)
if not voice_client:
raise RuntimeError("Not connected to the specified guild's voice channel.")
connection_info = self.voice_connections.get(guild_id)
if not connection_info: raise RuntimeError("Not connected to the specified guild's voice channel.")
voice_client = connection_info.get("client")
if voice_client and voice_client.is_connected():
voice_client.stop()
await voice_client.disconnect()
del self.voice_clients[guild_id]
audio_manager = connection_info.get("audio_manager")
if audio_manager:
audio_manager.terminate()
# Use pop to safely remove the key
self.voice_connections.pop(guild_id, None)
LOGGER.info(f"Left guild {guild_id} voice channel.")
async def load_opus(self):
""" Load the proper OPUS library for the device being used """
if opus.is_loaded():
LOGGER.info("Opus library is already loaded.")
return
processor = platform.machine()
script_dir = os.path.dirname(os.path.abspath(__file__))
LOGGER.debug("Processor: ", processor)
LOGGER.debug(f"Attempting to load Opus. Processor: {processor}, OS: {os.name}")
try:
if os.name == 'nt':
if processor == "AMD64":
opus.load_opus(os.path.join(script_dir, './opus/libopus_amd64.dll'))
LOGGER.info("Loaded OPUS library for AMD64")
return "AMD64"
else:
if processor == "aarch64":
opus.load_opus(os.path.join(script_dir, './opus/libopus_aarcch64.so'))
LOGGER.info("Loaded OPUS library for aarch64")
return "aarch64"
elif processor == "armv7l":
opus.load_opus(os.path.join(script_dir, './opus/libopus_armv7l.so'))
LOGGER.info("Loaded OPUS library for armv7l")
return "armv7l"
else:
opus.load_opus('libopus.so.0')
LOGGER.info(f"Attempted to load system OPUS library for {processor}")
async def set_presence(self, system_name: str):
""" Set the presence (activity) of the bot """
if not self.bot:
LOGGER.warning("Bot is not running, cannot set presence.")
return
except Exception as e:
LOGGER.error(f"Failed to load OPUS library: {e}")
raise RuntimeError("Could not load a valid Opus library. Voice functionality will fail.")
try:
activity = Activity(type=ActivityType.listening, name=system_name)
await self.bot.change_presence(activity=activity)
LOGGER.info(f"Bot presence set to 'Listening to {system_name}'")
except Exception as pe:
LOGGER.error(f"Unable to set presence: '{pe}'")
if not opus.is_loaded():
raise RuntimeError("Opus library could not be loaded. Please ensure it is installed correctly.")

View File

@@ -50,7 +50,7 @@ def create_bot_router(bot_manager: DiscordBotManager):
async def get_status():
status = {
"bot_running": bot_manager.bot is not None and not bot_manager.bot.is_closed(),
"connected_guilds": list(bot_manager.voice_clients.keys()),
"connected_guilds": list(bot_manager.voice_connections.keys()),
"active_token": bot_manager.token
}
return status