redo noisegate to fix voice presense

This commit is contained in:
Logan Cusano
2025-07-14 21:34:08 -04:00
parent f893209f36
commit 9040462171
2 changed files with 123 additions and 154 deletions

View File

@@ -1,152 +1,115 @@
import audioop
import math
import pyaudio
import discord
import asyncio
from discord import VoiceClient, OpusNotLoaded
from internal.logger import create_logger
LOGGER = create_logger(__name__)
DISCORD_FRAME_SIZE = 3840
SILENT_FRAME = b'\x00' * DISCORD_FRAME_SIZE
# Constants for audio processing
SAMPLES_PER_FRAME = 960 # For 20ms audio at 48kHz
CHANNELS = 2
SAMPLE_RATE = 48000
FRAME_SIZE = SAMPLES_PER_FRAME * CHANNELS * 2 # 16-bit PCM (2 bytes)
class AudioStream:
# ... This class remains unchanged from the previous version ...
def __init__(self, _channels: int = 2, _sample_rate: int = 48000, _frames_per_buffer: int = 960,
_input_device_index: int = None, _output_device_index: int = None, _input: bool = True,
_output: bool = True, _init_on_startup: bool = True, **kwargs): # Accept extra kwargs
self.paInstance_kwargs = {
'format': pyaudio.paInt16,
'channels': _channels,
'rate': _sample_rate,
'input': _input,
'output': _output,
'frames_per_buffer': _frames_per_buffer
}
class AudioTransmitter:
def __init__(self, voice_client: VoiceClient, noise_gate_threshold: int, loop: asyncio.AbstractEventLoop, input_device_index: int):
if not voice_client.is_connected():
raise ValueError("VoiceClient is not connected.")
if not hasattr(voice_client, 'encoder') or not voice_client.encoder:
raise OpusNotLoaded("VoiceClient has not initialized its Opus encoder.")
if _input_device_index is not None:
if _input:
self.paInstance_kwargs['input_device_index'] = _input_device_index
else:
LOGGER.warning("[AudioStream.__init__]:\tInput was not enabled."
" Reinitialize with '_input=True'")
if _output_device_index is not None:
if _output:
self.paInstance_kwargs['output_device_index'] = _output_device_index
else:
LOGGER.warning("[AudioStream.__init__]:\tOutput was not enabled."
" Reinitialize with '_output=True'")
if _init_on_startup:
LOGGER.info("Creating PyAudio instance")
self.paInstance = pyaudio.PyAudio()
self.stream = None
if _output_device_index is not None or _input_device_index is not None:
if _init_on_startup:
LOGGER.info("Init stream")
self.init_stream()
def init_stream(self, _new_output_device_index: int = None, _new_input_device_index: int = None):
if _new_input_device_index is not None:
if self.paInstance_kwargs['input']:
self.paInstance_kwargs['input_device_index'] = _new_input_device_index
else:
LOGGER.warning("[AudioStream.init_stream]:\tInput was not enabled when initialized.")
if _new_output_device_index is not None:
if self.paInstance_kwargs['output']:
self.paInstance_kwargs['output_device_index'] = _new_output_device_index
else:
LOGGER.warning("[AudioStream.init_stream]:\tOutput was not enabled when initialized.")
self.close_if_open()
self.stream = self.paInstance.open(**self.paInstance_kwargs)
def close_if_open(self):
if self.stream and self.stream.is_active():
self.stream.stop_stream()
self.stream.close()
LOGGER.debug("[ReopenStream.close_if_open]:\t Stream was open; It was closed.")
class NoiseGate(AudioStream):
def __init__(self, _voice_connection, _noise_gate_threshold: int, loop: asyncio.AbstractEventLoop, **kwargs):
super(NoiseGate, self).__init__(**kwargs)
self.voice_connection = _voice_connection
self.THRESHOLD = _noise_gate_threshold
# **THE FIX**: Pass the loop down to the stream source.
self.NGStream = NoiseGateStream(self, loop)
def run(self) -> None:
LOGGER.debug("Starting stream")
self.stream.start_stream()
self.core()
def core(self):
if self.voice_connection.is_connected() and not self.voice_connection.is_playing():
LOGGER.debug("Playing stream to discord")
self.voice_connection.play(self.NGStream)
async def close(self):
LOGGER.debug("Closing NoiseGate resources...")
if self.voice_connection and self.voice_connection.is_connected():
# Set speaking to false on close
if self.NGStream.is_speaking:
asyncio.run_coroutine_threadsafe(self.voice_connection.ws.speak(False), self.NGStream.loop)
self.voice_connection.stop()
self.close_if_open()
if self.paInstance:
self.paInstance.terminate()
LOGGER.debug("NoiseGate resources closed.")
class NoiseGateStream(discord.AudioSource):
def __init__(self, noise_gate_instance: NoiseGate, loop: asyncio.AbstractEventLoop):
super(NoiseGateStream, self).__init__()
self.noise_gate = noise_gate_instance
self.voice_client = voice_client
self.threshold = noise_gate_threshold
self.loop = loop
self.is_speaking = True
self.NG_fadeout = 12
self.NG_fadeout_count = 0
self.input_device_index = input_device_index
def read(self):
self.pa = pyaudio.PyAudio()
self.stream = self.pa.open(
format=pyaudio.paInt16,
channels=CHANNELS,
rate=SAMPLE_RATE,
input=True,
frames_per_buffer=SAMPLES_PER_FRAME,
input_device_index=self.input_device_index
)
self._is_running = False
self._is_speaking = False
self.ng_fadeout_count = 0
self.NG_FADEOUT_FRAMES = 12 # 240ms fadeout time
async def _set_speaking(self, speaking: bool):
"""Safely sets the speaking state if it has changed."""
if self._is_speaking != speaking:
self._is_speaking = speaking
await self.voice_client.ws.speak(speaking)
async def start(self):
"""Starts the main audio transmission loop."""
self._is_running = True
self.stream.start_stream()
LOGGER.info("Audio transmitter started.")
try:
if not self.noise_gate.voice_connection.is_connected():
if self.is_speaking:
asyncio.run_coroutine_threadsafe(self.noise_gate.voice_connection.ws.speak(False), self.loop)
self.is_speaking = False
return SILENT_FRAME
while self._is_running:
# Read audio data in a separate thread to not block the event loop
pcm_data = await self.loop.run_in_executor(
None, self.stream.read, SAMPLES_PER_FRAME
)
curr_buffer = self.noise_gate.stream.read(960, exception_on_overflow=False)
if len(curr_buffer) != DISCORD_FRAME_SIZE:
if self.is_speaking:
asyncio.run_coroutine_threadsafe(self.noise_gate.voice_connection.ws.speak(False), self.loop)
self.is_speaking = False
return SILENT_FRAME
gate_is_open = self._check_noise_gate(pcm_data)
buffer_rms = audioop.rms(curr_buffer, 2)
gate_is_open = False
if buffer_rms > 0:
buffer_decibel = 20 * math.log10(buffer_rms)
if buffer_decibel >= self.noise_gate.THRESHOLD:
self.NG_fadeout_count = self.NG_fadeout
gate_is_open = True
elif self.NG_fadeout_count > 0:
self.NG_fadeout_count -= 1
gate_is_open = True
if gate_is_open and not self.is_speaking:
asyncio.run_coroutine_threadsafe(self.noise_gate.voice_connection.ws.speak(True), self.loop)
self.is_speaking = True
elif not gate_is_open and self.is_speaking:
asyncio.run_coroutine_threadsafe(self.noise_gate.voice_connection.ws.speak(False), self.loop)
self.is_speaking = False
return bytes(curr_buffer) if gate_is_open else SILENT_FRAME
if gate_is_open:
# If gate is open, ensure speaking is on and send audio
await self._set_speaking(True)
# Encode PCM data to Opus
encoded_packets = self.voice_client.encoder.encode(pcm_data, SAMPLES_PER_FRAME)
# Send each encoded packet
for packet in encoded_packets:
self.voice_client.send_audio_packet(packet)
else:
# If gate is closed, ensure speaking is off
await self._set_speaking(False)
# Wait for the next 20ms interval
await asyncio.sleep(0.02)
except asyncio.CancelledError:
LOGGER.info("Audio transmitter task cancelled.")
except Exception as e:
LOGGER.error(f"Unhandled exception in NoiseGateStream.read: {e}", exc_info=True)
if self.is_speaking:
asyncio.run_coroutine_threadsafe(self.noise_gate.voice_connection.ws.speak(False), self.loop)
self.is_speaking = False
return SILENT_FRAME
LOGGER.error(f"Error in audio transmitter loop: {e}", exc_info=True)
finally:
await self._cleanup()
def _check_noise_gate(self, pcm_data: bytes) -> bool:
"""Applies the noise gate logic to raw PCM data."""
rms = audioop.rms(pcm_data, 2)
if rms == 0: return False
db = 20 * math.log10(rms)
if db >= self.threshold:
self.ng_fadeout_count = self.NG_FADEOUT_FRAMES
return True
elif self.ng_fadeout_count > 0:
self.ng_fadeout_count -= 1
return True
return False
async def stop(self):
"""Stops the transmission loop."""
self._is_running = False
async def _cleanup(self):
"""Cleans up all resources."""
LOGGER.info("Cleaning up transmitter resources.")
if self._is_speaking:
await self._set_speaking(False)
if self.stream.is_active():
self.stream.stop_stream()
self.stream.close()
self.pa.terminate()

View File

@@ -4,7 +4,7 @@ import os
from discord import VoiceClient, VoiceChannel, opus, Activity, ActivityType, Intents
from discord.ext import commands
from typing import Optional, Dict
from internal.NoiseGatev2 import NoiseGate
from internal.NoiseGatev2 import AudioTransmitter
from internal.logger import create_logger
LOGGER = create_logger(__name__)
@@ -18,7 +18,6 @@ class DiscordBotManager:
def __init__(self):
self.bot: Optional[commands.Bot] = None
self.bot_task: Optional[asyncio.Task] = None
# This dictionary will hold both the client and its audio stream handler
self.voice_connections: Dict[int, Dict] = {}
self.token: Optional[str] = None
self.loop = asyncio.get_event_loop()
@@ -83,7 +82,6 @@ class DiscordBotManager:
async def stop_bot(self):
async with self.lock:
if self.bot:
# Disconnect from all voice channels cleanly
for guild_id in list(self.voice_connections.keys()):
await self.leave_voice_channel(guild_id)
await self.bot.close()
@@ -112,20 +110,23 @@ class DiscordBotManager:
await asyncio.wait_for(self._voice_ready_event.wait(), timeout=15.0)
LOGGER.info("Bot voice connection is ready.")
stream_handler = NoiseGate(
_voice_connection=voice_client,
_noise_gate_threshold=ng_threshold,
# **THE FIX**: Pass the event loop to the stream handler.
# Create and start the new AudioTransmitter
transmitter = AudioTransmitter(
voice_client=voice_client,
noise_gate_threshold=ng_threshold,
loop=self.loop,
_input_device_index=device_id
input_device_index=device_id
)
stream_handler.run()
# Start the transmitter's main loop as a background task
transmitter_task = self.loop.create_task(transmitter.start())
self.voice_connections[guild_id] = {
"client": voice_client,
"stream": stream_handler
"transmitter": transmitter,
"task": transmitter_task
}
LOGGER.info(f"Joined guild {guild_id} and audio stream is now running.")
LOGGER.info(f"Joined guild {guild_id} and audio transmitter is running.")
except asyncio.TimeoutError:
LOGGER.error(f"Timeout waiting for bot to join voice channel {channel_id}.")
@@ -140,12 +141,16 @@ class DiscordBotManager:
connection_info = self.voice_connections.get(guild_id)
if not connection_info: raise RuntimeError("Not connected to the specified guild's voice channel.")
stream_handler = connection_info.get('stream')
if stream_handler:
LOGGER.info(f"Stopping audio stream for guild {guild_id}.")
await stream_handler.close()
# Stop the transmitter task and clean up its resources
transmitter = connection_info.get("transmitter")
task = connection_info.get("task")
if transmitter:
LOGGER.info(f"Stopping audio transmitter for guild {guild_id}.")
await transmitter.stop()
if task and not task.done():
task.cancel()
voice_client = connection_info.get('client')
voice_client = connection_info.get("client")
if voice_client and voice_client.is_connected():
await voice_client.disconnect()
@@ -153,15 +158,16 @@ class DiscordBotManager:
LOGGER.info(f"Left guild {guild_id} voice channel.")
async def load_opus(self):
# ... this method is unchanged ...
processor = platform.machine()
script_dir = os.path.dirname(os.path.abspath(__file__))
LOGGER.debug(f"Processor: {processor}, OS: {os.name}")
try:
if os.name == 'nt': # Windows
if os.name == 'nt':
if processor == "AMD64":
opus.load_opus(os.path.join(script_dir, './opus/libopus_amd64.dll'))
LOGGER.info("Loaded OPUS library for AMD64")
else: # Linux / other
else:
if processor == "aarch64":
opus.load_opus(os.path.join(script_dir, './opus/libopus_aarcch64.so'))
LOGGER.info("Loaded OPUS library for aarch64")
@@ -169,7 +175,6 @@ class DiscordBotManager:
opus.load_opus(os.path.join(script_dir, './opus/libopus_armv7l.so'))
LOGGER.info("Loaded OPUS library for armv7l")
else:
# Fallback for other Linux archs like x86_64
opus.load_opus('libopus.so.0')
LOGGER.info(f"Loaded system OPUS library for {processor}")
except Exception as e:
@@ -177,6 +182,7 @@ class DiscordBotManager:
raise RuntimeError("Could not load a valid Opus library. Voice functionality will fail.")
async def set_presence(self, system_name: str):
# ... this method is unchanged ...
if not self.bot or not self.bot.is_ready():
LOGGER.warning("Bot is not ready, cannot set presence.")
return