fix voice activity
This commit is contained in:
@@ -2,22 +2,19 @@ import audioop
|
|||||||
import math
|
import math
|
||||||
import pyaudio
|
import pyaudio
|
||||||
import discord
|
import discord
|
||||||
|
import asyncio
|
||||||
from internal.logger import create_logger
|
from internal.logger import create_logger
|
||||||
|
|
||||||
LOGGER = create_logger(__name__)
|
LOGGER = create_logger(__name__)
|
||||||
|
|
||||||
# The size of a 20ms, 48kHz, stereo, 16-bit PCM audio frame.
|
|
||||||
# (960 frames * 2 channels * 2 bytes/sample)
|
|
||||||
DISCORD_FRAME_SIZE = 3840
|
DISCORD_FRAME_SIZE = 3840
|
||||||
SILENT_FRAME = b'\x00' * DISCORD_FRAME_SIZE
|
SILENT_FRAME = b'\x00' * DISCORD_FRAME_SIZE
|
||||||
|
|
||||||
|
|
||||||
# noinspection PyUnresolvedReferences
|
|
||||||
class AudioStream:
|
class AudioStream:
|
||||||
|
# ... This class remains unchanged from the previous version ...
|
||||||
def __init__(self, _channels: int = 2, _sample_rate: int = 48000, _frames_per_buffer: int = 960,
|
def __init__(self, _channels: int = 2, _sample_rate: int = 48000, _frames_per_buffer: int = 960,
|
||||||
_input_device_index: int = None, _output_device_index: int = None, _input: bool = True,
|
_input_device_index: int = None, _output_device_index: int = None, _input: bool = True,
|
||||||
_output: bool = True, _init_on_startup: bool = True):
|
_output: bool = True, _init_on_startup: bool = True, **kwargs): # Accept extra kwargs
|
||||||
# Corrected frames_per_buffer to 960 to match Discord's 20ms frame size
|
|
||||||
self.paInstance_kwargs = {
|
self.paInstance_kwargs = {
|
||||||
'format': pyaudio.paInt16,
|
'format': pyaudio.paInt16,
|
||||||
'channels': _channels,
|
'channels': _channels,
|
||||||
@@ -33,19 +30,16 @@ class AudioStream:
|
|||||||
else:
|
else:
|
||||||
LOGGER.warning("[AudioStream.__init__]:\tInput was not enabled."
|
LOGGER.warning("[AudioStream.__init__]:\tInput was not enabled."
|
||||||
" Reinitialize with '_input=True'")
|
" Reinitialize with '_input=True'")
|
||||||
|
|
||||||
if _output_device_index is not None:
|
if _output_device_index is not None:
|
||||||
if _output:
|
if _output:
|
||||||
self.paInstance_kwargs['output_device_index'] = _output_device_index
|
self.paInstance_kwargs['output_device_index'] = _output_device_index
|
||||||
else:
|
else:
|
||||||
LOGGER.warning("[AudioStream.__init__]:\tOutput was not enabled."
|
LOGGER.warning("[AudioStream.__init__]:\tOutput was not enabled."
|
||||||
" Reinitialize with '_output=True'")
|
" Reinitialize with '_output=True'")
|
||||||
|
|
||||||
if _init_on_startup:
|
if _init_on_startup:
|
||||||
LOGGER.info("Creating PyAudio instance")
|
LOGGER.info("Creating PyAudio instance")
|
||||||
self.paInstance = pyaudio.PyAudio()
|
self.paInstance = pyaudio.PyAudio()
|
||||||
self.stream = None
|
self.stream = None
|
||||||
|
|
||||||
if _output_device_index is not None or _input_device_index is not None:
|
if _output_device_index is not None or _input_device_index is not None:
|
||||||
if _init_on_startup:
|
if _init_on_startup:
|
||||||
LOGGER.info("Init stream")
|
LOGGER.info("Init stream")
|
||||||
@@ -56,16 +50,12 @@ class AudioStream:
|
|||||||
if self.paInstance_kwargs['input']:
|
if self.paInstance_kwargs['input']:
|
||||||
self.paInstance_kwargs['input_device_index'] = _new_input_device_index
|
self.paInstance_kwargs['input_device_index'] = _new_input_device_index
|
||||||
else:
|
else:
|
||||||
LOGGER.warning("[AudioStream.init_stream]:\tInput was not enabled when initialized."
|
LOGGER.warning("[AudioStream.init_stream]:\tInput was not enabled when initialized.")
|
||||||
" Reinitialize with '_input=True'")
|
|
||||||
|
|
||||||
if _new_output_device_index is not None:
|
if _new_output_device_index is not None:
|
||||||
if self.paInstance_kwargs['output']:
|
if self.paInstance_kwargs['output']:
|
||||||
self.paInstance_kwargs['output_device_index'] = _new_output_device_index
|
self.paInstance_kwargs['output_device_index'] = _new_output_device_index
|
||||||
else:
|
else:
|
||||||
LOGGER.warning("[AudioStream.init_stream]:\tOutput was not enabled when initialized."
|
LOGGER.warning("[AudioStream.init_stream]:\tOutput was not enabled when initialized.")
|
||||||
" Reinitialize with '_output=True'")
|
|
||||||
|
|
||||||
self.close_if_open()
|
self.close_if_open()
|
||||||
self.stream = self.paInstance.open(**self.paInstance_kwargs)
|
self.stream = self.paInstance.open(**self.paInstance_kwargs)
|
||||||
|
|
||||||
@@ -74,35 +64,14 @@ class AudioStream:
|
|||||||
self.stream.stop_stream()
|
self.stream.stop_stream()
|
||||||
self.stream.close()
|
self.stream.close()
|
||||||
LOGGER.debug("[ReopenStream.close_if_open]:\t Stream was open; It was closed.")
|
LOGGER.debug("[ReopenStream.close_if_open]:\t Stream was open; It was closed.")
|
||||||
|
|
||||||
def list_devices(self, _display_input_devices: bool = True, _display_output_devices: bool = True):
|
|
||||||
LOGGER.info('Getting a list of the devices connected')
|
|
||||||
info = self.paInstance.get_host_api_info_by_index(0)
|
|
||||||
numdevices = info.get('deviceCount')
|
|
||||||
|
|
||||||
devices = {'Input': {}, 'Output': {}}
|
|
||||||
for i in range(0, numdevices):
|
|
||||||
device_info = self.paInstance.get_device_info_by_host_api_device_index(0, i)
|
|
||||||
if (device_info.get('maxInputChannels')) > 0:
|
|
||||||
input_device = device_info.get('name')
|
|
||||||
devices['Input'][i] = input_device
|
|
||||||
if _display_input_devices:
|
|
||||||
LOGGER.debug(f"Input Device id {i} - {input_device}")
|
|
||||||
|
|
||||||
if (device_info.get('maxOutputChannels')) > 0:
|
|
||||||
output_device = device_info.get('name')
|
|
||||||
devices['Output'][i] = output_device
|
|
||||||
if _display_output_devices:
|
|
||||||
LOGGER.debug(f"Output Device id {i} - {output_device}")
|
|
||||||
return devices
|
|
||||||
|
|
||||||
# noinspection PyUnresolvedReferences
|
|
||||||
class NoiseGate(AudioStream):
|
class NoiseGate(AudioStream):
|
||||||
def __init__(self, _voice_connection, _noise_gate_threshold: int, **kwargs):
|
def __init__(self, _voice_connection, _noise_gate_threshold: int, loop: asyncio.AbstractEventLoop, **kwargs):
|
||||||
super(NoiseGate, self).__init__(_init_on_startup=True, **kwargs)
|
super(NoiseGate, self).__init__(**kwargs)
|
||||||
self.voice_connection = _voice_connection
|
self.voice_connection = _voice_connection
|
||||||
self.THRESHOLD = _noise_gate_threshold
|
self.THRESHOLD = _noise_gate_threshold
|
||||||
self.NGStream = NoiseGateStream(self)
|
# **THE FIX**: Pass the loop down to the stream source.
|
||||||
|
self.NGStream = NoiseGateStream(self, loop)
|
||||||
|
|
||||||
def run(self) -> None:
|
def run(self) -> None:
|
||||||
LOGGER.debug("Starting stream")
|
LOGGER.debug("Starting stream")
|
||||||
@@ -117,6 +86,9 @@ class NoiseGate(AudioStream):
|
|||||||
async def close(self):
|
async def close(self):
|
||||||
LOGGER.debug("Closing NoiseGate resources...")
|
LOGGER.debug("Closing NoiseGate resources...")
|
||||||
if self.voice_connection and self.voice_connection.is_connected():
|
if self.voice_connection and self.voice_connection.is_connected():
|
||||||
|
# Set speaking to false on close
|
||||||
|
if self.NGStream.is_speaking:
|
||||||
|
asyncio.run_coroutine_threadsafe(self.voice_connection.ws.speak(False), self.NGStream.loop)
|
||||||
self.voice_connection.stop()
|
self.voice_connection.stop()
|
||||||
|
|
||||||
self.close_if_open()
|
self.close_if_open()
|
||||||
@@ -126,49 +98,55 @@ class NoiseGate(AudioStream):
|
|||||||
|
|
||||||
LOGGER.debug("NoiseGate resources closed.")
|
LOGGER.debug("NoiseGate resources closed.")
|
||||||
|
|
||||||
# noinspection PyUnresolvedReferences
|
|
||||||
class NoiseGateStream(discord.AudioSource):
|
class NoiseGateStream(discord.AudioSource):
|
||||||
def __init__(self, noise_gate_instance: NoiseGate):
|
def __init__(self, noise_gate_instance: NoiseGate, loop: asyncio.AbstractEventLoop):
|
||||||
super(NoiseGateStream, self).__init__()
|
super(NoiseGateStream, self).__init__()
|
||||||
self.noise_gate = noise_gate_instance
|
self.noise_gate = noise_gate_instance
|
||||||
|
self.loop = loop
|
||||||
|
self.is_speaking = True
|
||||||
|
|
||||||
self.NG_fadeout = 12
|
self.NG_fadeout = 12
|
||||||
self.NG_fadeout_count = 0
|
self.NG_fadeout_count = 0
|
||||||
self.process_set_count = 0
|
|
||||||
|
|
||||||
def read(self):
|
def read(self):
|
||||||
try:
|
try:
|
||||||
if not self.noise_gate.voice_connection.is_connected():
|
if not self.noise_gate.voice_connection.is_connected():
|
||||||
|
if self.is_speaking:
|
||||||
|
asyncio.run_coroutine_threadsafe(self.noise_gate.voice_connection.ws.speak(False), self.loop)
|
||||||
|
self.is_speaking = False
|
||||||
return SILENT_FRAME
|
return SILENT_FRAME
|
||||||
|
|
||||||
curr_buffer = self.noise_gate.stream.read(960, exception_on_overflow=False)
|
curr_buffer = self.noise_gate.stream.read(960, exception_on_overflow=False)
|
||||||
|
|
||||||
if len(curr_buffer) != DISCORD_FRAME_SIZE:
|
if len(curr_buffer) != DISCORD_FRAME_SIZE:
|
||||||
|
if self.is_speaking:
|
||||||
|
asyncio.run_coroutine_threadsafe(self.noise_gate.voice_connection.ws.speak(False), self.loop)
|
||||||
|
self.is_speaking = False
|
||||||
return SILENT_FRAME
|
return SILENT_FRAME
|
||||||
|
|
||||||
buffer_rms = audioop.rms(curr_buffer, 2)
|
buffer_rms = audioop.rms(curr_buffer, 2)
|
||||||
|
|
||||||
|
gate_is_open = False
|
||||||
if buffer_rms > 0:
|
if buffer_rms > 0:
|
||||||
buffer_decibel = 20 * math.log10(buffer_rms)
|
buffer_decibel = 20 * math.log10(buffer_rms)
|
||||||
|
|
||||||
if self.process_set_count % 10 == 0:
|
|
||||||
log_msg = f"[{'Open' if buffer_decibel >= self.noise_gate.THRESHOLD else 'Closed'}]"
|
|
||||||
LOGGER.debug(f"[NoiseGate {log_msg}] {buffer_decibel:.2f} dB")
|
|
||||||
|
|
||||||
if buffer_decibel >= self.noise_gate.THRESHOLD:
|
if buffer_decibel >= self.noise_gate.THRESHOLD:
|
||||||
self.NG_fadeout_count = self.NG_fadeout
|
self.NG_fadeout_count = self.NG_fadeout
|
||||||
self.process_set_count += 1
|
gate_is_open = True
|
||||||
return bytes(curr_buffer)
|
|
||||||
|
|
||||||
elif self.NG_fadeout_count > 0:
|
elif self.NG_fadeout_count > 0:
|
||||||
self.NG_fadeout_count -= 1
|
self.NG_fadeout_count -= 1
|
||||||
self.process_set_count += 1
|
gate_is_open = True
|
||||||
return bytes(curr_buffer)
|
|
||||||
|
|
||||||
return SILENT_FRAME
|
if gate_is_open and not self.is_speaking:
|
||||||
|
asyncio.run_coroutine_threadsafe(self.noise_gate.voice_connection.ws.speak(True), self.loop)
|
||||||
|
self.is_speaking = True
|
||||||
|
elif not gate_is_open and self.is_speaking:
|
||||||
|
asyncio.run_coroutine_threadsafe(self.noise_gate.voice_connection.ws.speak(False), self.loop)
|
||||||
|
self.is_speaking = False
|
||||||
|
|
||||||
|
return bytes(curr_buffer) if gate_is_open else SILENT_FRAME
|
||||||
|
|
||||||
except IOError as e:
|
|
||||||
LOGGER.error(f"PyAudio IOError in read(): {e}")
|
|
||||||
return SILENT_FRAME
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
LOGGER.error(f"Unhandled exception in NoiseGateStream.read: {e}", exc_info=True)
|
LOGGER.error(f"Unhandled exception in NoiseGateStream.read: {e}", exc_info=True)
|
||||||
|
if self.is_speaking:
|
||||||
|
asyncio.run_coroutine_threadsafe(self.noise_gate.voice_connection.ws.speak(False), self.loop)
|
||||||
|
self.is_speaking = False
|
||||||
return SILENT_FRAME
|
return SILENT_FRAME
|
||||||
@@ -105,23 +105,22 @@ class DiscordBotManager:
|
|||||||
if guild_id in self.voice_connections: raise RuntimeError("Already connected to this guild's voice channel.")
|
if guild_id in self.voice_connections: raise RuntimeError("Already connected to this guild's voice channel.")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# 1. Connect to the channel first
|
|
||||||
self._voice_ready_event.clear()
|
self._voice_ready_event.clear()
|
||||||
voice_client = await channel.connect(timeout=60.0, reconnect=True)
|
voice_client = await channel.connect(timeout=60.0, reconnect=True)
|
||||||
LOGGER.debug("Voice client connecting...")
|
LOGGER.debug("Voice client connecting...")
|
||||||
|
|
||||||
# 2. Wait for the on_voice_state_update event to confirm readiness
|
|
||||||
await asyncio.wait_for(self._voice_ready_event.wait(), timeout=15.0)
|
await asyncio.wait_for(self._voice_ready_event.wait(), timeout=15.0)
|
||||||
LOGGER.info("Bot voice connection is ready.")
|
LOGGER.info("Bot voice connection is ready.")
|
||||||
|
|
||||||
# 3. NOW, create and start the audio stream handler
|
|
||||||
stream_handler = NoiseGate(
|
stream_handler = NoiseGate(
|
||||||
_input_device_index=device_id,
|
|
||||||
_voice_connection=voice_client,
|
_voice_connection=voice_client,
|
||||||
_noise_gate_threshold=ng_threshold)
|
_noise_gate_threshold=ng_threshold,
|
||||||
|
# **THE FIX**: Pass the event loop to the stream handler.
|
||||||
|
loop=self.loop,
|
||||||
|
_input_device_index=device_id
|
||||||
|
)
|
||||||
stream_handler.run()
|
stream_handler.run()
|
||||||
|
|
||||||
# 4. Store both client and stream handler for proper management
|
|
||||||
self.voice_connections[guild_id] = {
|
self.voice_connections[guild_id] = {
|
||||||
"client": voice_client,
|
"client": voice_client,
|
||||||
"stream": stream_handler
|
"stream": stream_handler
|
||||||
@@ -141,13 +140,11 @@ class DiscordBotManager:
|
|||||||
connection_info = self.voice_connections.get(guild_id)
|
connection_info = self.voice_connections.get(guild_id)
|
||||||
if not connection_info: raise RuntimeError("Not connected to the specified guild's voice channel.")
|
if not connection_info: raise RuntimeError("Not connected to the specified guild's voice channel.")
|
||||||
|
|
||||||
# Cleanly stop the associated audio stream first
|
|
||||||
stream_handler = connection_info.get('stream')
|
stream_handler = connection_info.get('stream')
|
||||||
if stream_handler:
|
if stream_handler:
|
||||||
LOGGER.info(f"Stopping audio stream for guild {guild_id}.")
|
LOGGER.info(f"Stopping audio stream for guild {guild_id}.")
|
||||||
await stream_handler.close()
|
await stream_handler.close()
|
||||||
|
|
||||||
# Disconnect the voice client
|
|
||||||
voice_client = connection_info.get('client')
|
voice_client = connection_info.get('client')
|
||||||
if voice_client and voice_client.is_connected():
|
if voice_client and voice_client.is_connected():
|
||||||
await voice_client.disconnect()
|
await voice_client.disconnect()
|
||||||
|
|||||||
Reference in New Issue
Block a user