redo noisegate to fix voice presense

This commit is contained in:
Logan Cusano
2025-07-14 21:34:08 -04:00
parent f893209f36
commit 9040462171
2 changed files with 123 additions and 154 deletions

View File

@@ -1,152 +1,115 @@
import audioop import audioop
import math import math
import pyaudio import pyaudio
import discord
import asyncio import asyncio
from discord import VoiceClient, OpusNotLoaded
from internal.logger import create_logger from internal.logger import create_logger
LOGGER = create_logger(__name__) LOGGER = create_logger(__name__)
DISCORD_FRAME_SIZE = 3840 # Constants for audio processing
SILENT_FRAME = b'\x00' * DISCORD_FRAME_SIZE SAMPLES_PER_FRAME = 960 # For 20ms audio at 48kHz
CHANNELS = 2
SAMPLE_RATE = 48000
FRAME_SIZE = SAMPLES_PER_FRAME * CHANNELS * 2 # 16-bit PCM (2 bytes)
class AudioStream: class AudioTransmitter:
# ... This class remains unchanged from the previous version ... def __init__(self, voice_client: VoiceClient, noise_gate_threshold: int, loop: asyncio.AbstractEventLoop, input_device_index: int):
def __init__(self, _channels: int = 2, _sample_rate: int = 48000, _frames_per_buffer: int = 960, if not voice_client.is_connected():
_input_device_index: int = None, _output_device_index: int = None, _input: bool = True, raise ValueError("VoiceClient is not connected.")
_output: bool = True, _init_on_startup: bool = True, **kwargs): # Accept extra kwargs if not hasattr(voice_client, 'encoder') or not voice_client.encoder:
self.paInstance_kwargs = { raise OpusNotLoaded("VoiceClient has not initialized its Opus encoder.")
'format': pyaudio.paInt16,
'channels': _channels,
'rate': _sample_rate,
'input': _input,
'output': _output,
'frames_per_buffer': _frames_per_buffer
}
if _input_device_index is not None: self.voice_client = voice_client
if _input: self.threshold = noise_gate_threshold
self.paInstance_kwargs['input_device_index'] = _input_device_index
else:
LOGGER.warning("[AudioStream.__init__]:\tInput was not enabled."
" Reinitialize with '_input=True'")
if _output_device_index is not None:
if _output:
self.paInstance_kwargs['output_device_index'] = _output_device_index
else:
LOGGER.warning("[AudioStream.__init__]:\tOutput was not enabled."
" Reinitialize with '_output=True'")
if _init_on_startup:
LOGGER.info("Creating PyAudio instance")
self.paInstance = pyaudio.PyAudio()
self.stream = None
if _output_device_index is not None or _input_device_index is not None:
if _init_on_startup:
LOGGER.info("Init stream")
self.init_stream()
def init_stream(self, _new_output_device_index: int = None, _new_input_device_index: int = None):
if _new_input_device_index is not None:
if self.paInstance_kwargs['input']:
self.paInstance_kwargs['input_device_index'] = _new_input_device_index
else:
LOGGER.warning("[AudioStream.init_stream]:\tInput was not enabled when initialized.")
if _new_output_device_index is not None:
if self.paInstance_kwargs['output']:
self.paInstance_kwargs['output_device_index'] = _new_output_device_index
else:
LOGGER.warning("[AudioStream.init_stream]:\tOutput was not enabled when initialized.")
self.close_if_open()
self.stream = self.paInstance.open(**self.paInstance_kwargs)
def close_if_open(self):
if self.stream and self.stream.is_active():
self.stream.stop_stream()
self.stream.close()
LOGGER.debug("[ReopenStream.close_if_open]:\t Stream was open; It was closed.")
class NoiseGate(AudioStream):
def __init__(self, _voice_connection, _noise_gate_threshold: int, loop: asyncio.AbstractEventLoop, **kwargs):
super(NoiseGate, self).__init__(**kwargs)
self.voice_connection = _voice_connection
self.THRESHOLD = _noise_gate_threshold
# **THE FIX**: Pass the loop down to the stream source.
self.NGStream = NoiseGateStream(self, loop)
def run(self) -> None:
LOGGER.debug("Starting stream")
self.stream.start_stream()
self.core()
def core(self):
if self.voice_connection.is_connected() and not self.voice_connection.is_playing():
LOGGER.debug("Playing stream to discord")
self.voice_connection.play(self.NGStream)
async def close(self):
LOGGER.debug("Closing NoiseGate resources...")
if self.voice_connection and self.voice_connection.is_connected():
# Set speaking to false on close
if self.NGStream.is_speaking:
asyncio.run_coroutine_threadsafe(self.voice_connection.ws.speak(False), self.NGStream.loop)
self.voice_connection.stop()
self.close_if_open()
if self.paInstance:
self.paInstance.terminate()
LOGGER.debug("NoiseGate resources closed.")
class NoiseGateStream(discord.AudioSource):
def __init__(self, noise_gate_instance: NoiseGate, loop: asyncio.AbstractEventLoop):
super(NoiseGateStream, self).__init__()
self.noise_gate = noise_gate_instance
self.loop = loop self.loop = loop
self.is_speaking = True self.input_device_index = input_device_index
self.NG_fadeout = 12
self.NG_fadeout_count = 0
def read(self): self.pa = pyaudio.PyAudio()
self.stream = self.pa.open(
format=pyaudio.paInt16,
channels=CHANNELS,
rate=SAMPLE_RATE,
input=True,
frames_per_buffer=SAMPLES_PER_FRAME,
input_device_index=self.input_device_index
)
self._is_running = False
self._is_speaking = False
self.ng_fadeout_count = 0
self.NG_FADEOUT_FRAMES = 12 # 240ms fadeout time
async def _set_speaking(self, speaking: bool):
"""Safely sets the speaking state if it has changed."""
if self._is_speaking != speaking:
self._is_speaking = speaking
await self.voice_client.ws.speak(speaking)
async def start(self):
"""Starts the main audio transmission loop."""
self._is_running = True
self.stream.start_stream()
LOGGER.info("Audio transmitter started.")
try: try:
if not self.noise_gate.voice_connection.is_connected(): while self._is_running:
if self.is_speaking: # Read audio data in a separate thread to not block the event loop
asyncio.run_coroutine_threadsafe(self.noise_gate.voice_connection.ws.speak(False), self.loop) pcm_data = await self.loop.run_in_executor(
self.is_speaking = False None, self.stream.read, SAMPLES_PER_FRAME
return SILENT_FRAME )
curr_buffer = self.noise_gate.stream.read(960, exception_on_overflow=False) gate_is_open = self._check_noise_gate(pcm_data)
if len(curr_buffer) != DISCORD_FRAME_SIZE:
if self.is_speaking:
asyncio.run_coroutine_threadsafe(self.noise_gate.voice_connection.ws.speak(False), self.loop)
self.is_speaking = False
return SILENT_FRAME
buffer_rms = audioop.rms(curr_buffer, 2) if gate_is_open:
# If gate is open, ensure speaking is on and send audio
gate_is_open = False await self._set_speaking(True)
if buffer_rms > 0:
buffer_decibel = 20 * math.log10(buffer_rms) # Encode PCM data to Opus
if buffer_decibel >= self.noise_gate.THRESHOLD: encoded_packets = self.voice_client.encoder.encode(pcm_data, SAMPLES_PER_FRAME)
self.NG_fadeout_count = self.NG_fadeout
gate_is_open = True # Send each encoded packet
elif self.NG_fadeout_count > 0: for packet in encoded_packets:
self.NG_fadeout_count -= 1 self.voice_client.send_audio_packet(packet)
gate_is_open = True else:
# If gate is closed, ensure speaking is off
if gate_is_open and not self.is_speaking: await self._set_speaking(False)
asyncio.run_coroutine_threadsafe(self.noise_gate.voice_connection.ws.speak(True), self.loop)
self.is_speaking = True
elif not gate_is_open and self.is_speaking:
asyncio.run_coroutine_threadsafe(self.noise_gate.voice_connection.ws.speak(False), self.loop)
self.is_speaking = False
return bytes(curr_buffer) if gate_is_open else SILENT_FRAME
# Wait for the next 20ms interval
await asyncio.sleep(0.02)
except asyncio.CancelledError:
LOGGER.info("Audio transmitter task cancelled.")
except Exception as e: except Exception as e:
LOGGER.error(f"Unhandled exception in NoiseGateStream.read: {e}", exc_info=True) LOGGER.error(f"Error in audio transmitter loop: {e}", exc_info=True)
if self.is_speaking: finally:
asyncio.run_coroutine_threadsafe(self.noise_gate.voice_connection.ws.speak(False), self.loop) await self._cleanup()
self.is_speaking = False
return SILENT_FRAME def _check_noise_gate(self, pcm_data: bytes) -> bool:
"""Applies the noise gate logic to raw PCM data."""
rms = audioop.rms(pcm_data, 2)
if rms == 0: return False
db = 20 * math.log10(rms)
if db >= self.threshold:
self.ng_fadeout_count = self.NG_FADEOUT_FRAMES
return True
elif self.ng_fadeout_count > 0:
self.ng_fadeout_count -= 1
return True
return False
async def stop(self):
"""Stops the transmission loop."""
self._is_running = False
async def _cleanup(self):
"""Cleans up all resources."""
LOGGER.info("Cleaning up transmitter resources.")
if self._is_speaking:
await self._set_speaking(False)
if self.stream.is_active():
self.stream.stop_stream()
self.stream.close()
self.pa.terminate()

View File

@@ -4,7 +4,7 @@ import os
from discord import VoiceClient, VoiceChannel, opus, Activity, ActivityType, Intents from discord import VoiceClient, VoiceChannel, opus, Activity, ActivityType, Intents
from discord.ext import commands from discord.ext import commands
from typing import Optional, Dict from typing import Optional, Dict
from internal.NoiseGatev2 import NoiseGate from internal.NoiseGatev2 import AudioTransmitter
from internal.logger import create_logger from internal.logger import create_logger
LOGGER = create_logger(__name__) LOGGER = create_logger(__name__)
@@ -18,7 +18,6 @@ class DiscordBotManager:
def __init__(self): def __init__(self):
self.bot: Optional[commands.Bot] = None self.bot: Optional[commands.Bot] = None
self.bot_task: Optional[asyncio.Task] = None self.bot_task: Optional[asyncio.Task] = None
# This dictionary will hold both the client and its audio stream handler
self.voice_connections: Dict[int, Dict] = {} self.voice_connections: Dict[int, Dict] = {}
self.token: Optional[str] = None self.token: Optional[str] = None
self.loop = asyncio.get_event_loop() self.loop = asyncio.get_event_loop()
@@ -83,7 +82,6 @@ class DiscordBotManager:
async def stop_bot(self): async def stop_bot(self):
async with self.lock: async with self.lock:
if self.bot: if self.bot:
# Disconnect from all voice channels cleanly
for guild_id in list(self.voice_connections.keys()): for guild_id in list(self.voice_connections.keys()):
await self.leave_voice_channel(guild_id) await self.leave_voice_channel(guild_id)
await self.bot.close() await self.bot.close()
@@ -112,20 +110,23 @@ class DiscordBotManager:
await asyncio.wait_for(self._voice_ready_event.wait(), timeout=15.0) await asyncio.wait_for(self._voice_ready_event.wait(), timeout=15.0)
LOGGER.info("Bot voice connection is ready.") LOGGER.info("Bot voice connection is ready.")
stream_handler = NoiseGate( # Create and start the new AudioTransmitter
_voice_connection=voice_client, transmitter = AudioTransmitter(
_noise_gate_threshold=ng_threshold, voice_client=voice_client,
# **THE FIX**: Pass the event loop to the stream handler. noise_gate_threshold=ng_threshold,
loop=self.loop, loop=self.loop,
_input_device_index=device_id input_device_index=device_id
) )
stream_handler.run()
# Start the transmitter's main loop as a background task
transmitter_task = self.loop.create_task(transmitter.start())
self.voice_connections[guild_id] = { self.voice_connections[guild_id] = {
"client": voice_client, "client": voice_client,
"stream": stream_handler "transmitter": transmitter,
"task": transmitter_task
} }
LOGGER.info(f"Joined guild {guild_id} and audio stream is now running.") LOGGER.info(f"Joined guild {guild_id} and audio transmitter is running.")
except asyncio.TimeoutError: except asyncio.TimeoutError:
LOGGER.error(f"Timeout waiting for bot to join voice channel {channel_id}.") LOGGER.error(f"Timeout waiting for bot to join voice channel {channel_id}.")
@@ -140,12 +141,16 @@ class DiscordBotManager:
connection_info = self.voice_connections.get(guild_id) connection_info = self.voice_connections.get(guild_id)
if not connection_info: raise RuntimeError("Not connected to the specified guild's voice channel.") if not connection_info: raise RuntimeError("Not connected to the specified guild's voice channel.")
stream_handler = connection_info.get('stream') # Stop the transmitter task and clean up its resources
if stream_handler: transmitter = connection_info.get("transmitter")
LOGGER.info(f"Stopping audio stream for guild {guild_id}.") task = connection_info.get("task")
await stream_handler.close() if transmitter:
LOGGER.info(f"Stopping audio transmitter for guild {guild_id}.")
await transmitter.stop()
if task and not task.done():
task.cancel()
voice_client = connection_info.get('client') voice_client = connection_info.get("client")
if voice_client and voice_client.is_connected(): if voice_client and voice_client.is_connected():
await voice_client.disconnect() await voice_client.disconnect()
@@ -153,15 +158,16 @@ class DiscordBotManager:
LOGGER.info(f"Left guild {guild_id} voice channel.") LOGGER.info(f"Left guild {guild_id} voice channel.")
async def load_opus(self): async def load_opus(self):
# ... this method is unchanged ...
processor = platform.machine() processor = platform.machine()
script_dir = os.path.dirname(os.path.abspath(__file__)) script_dir = os.path.dirname(os.path.abspath(__file__))
LOGGER.debug(f"Processor: {processor}, OS: {os.name}") LOGGER.debug(f"Processor: {processor}, OS: {os.name}")
try: try:
if os.name == 'nt': # Windows if os.name == 'nt':
if processor == "AMD64": if processor == "AMD64":
opus.load_opus(os.path.join(script_dir, './opus/libopus_amd64.dll')) opus.load_opus(os.path.join(script_dir, './opus/libopus_amd64.dll'))
LOGGER.info("Loaded OPUS library for AMD64") LOGGER.info("Loaded OPUS library for AMD64")
else: # Linux / other else:
if processor == "aarch64": if processor == "aarch64":
opus.load_opus(os.path.join(script_dir, './opus/libopus_aarcch64.so')) opus.load_opus(os.path.join(script_dir, './opus/libopus_aarcch64.so'))
LOGGER.info("Loaded OPUS library for aarch64") LOGGER.info("Loaded OPUS library for aarch64")
@@ -169,7 +175,6 @@ class DiscordBotManager:
opus.load_opus(os.path.join(script_dir, './opus/libopus_armv7l.so')) opus.load_opus(os.path.join(script_dir, './opus/libopus_armv7l.so'))
LOGGER.info("Loaded OPUS library for armv7l") LOGGER.info("Loaded OPUS library for armv7l")
else: else:
# Fallback for other Linux archs like x86_64
opus.load_opus('libopus.so.0') opus.load_opus('libopus.so.0')
LOGGER.info(f"Loaded system OPUS library for {processor}") LOGGER.info(f"Loaded system OPUS library for {processor}")
except Exception as e: except Exception as e:
@@ -177,6 +182,7 @@ class DiscordBotManager:
raise RuntimeError("Could not load a valid Opus library. Voice functionality will fail.") raise RuntimeError("Could not load a valid Opus library. Voice functionality will fail.")
async def set_presence(self, system_name: str): async def set_presence(self, system_name: str):
# ... this method is unchanged ...
if not self.bot or not self.bot.is_ready(): if not self.bot or not self.bot.is_ready():
LOGGER.warning("Bot is not ready, cannot set presence.") LOGGER.warning("Bot is not ready, cannot set presence.")
return return