From 0db1b1606197abc247348f3425019fe630c6944a Mon Sep 17 00:00:00 2001
From: Eidolon <furyhunter600@gmail.com>
Date: Sat, 13 Sep 2025 15:30:36 -0500
Subject: [PATCH] Adjust some voice chat parameters and behavior

1. If the sample queue is too long, frames will be skipped. This
   prevents bursting with a bunch of voice packets at once.
2. The Opus frame size is doubled to 20ms to reduce UDP overhead.
3. The Opus bitrate is reduced from its default to 10kbps, to prevent
   potential UDP MTU size overruns.
4. The server will ignore Opus frames past a certain limit per tic from
   each player, preventing excessive voice packet propagation
5. Push to talk no longer eats chat typing, and no longer responds while
   chat mode is on.
---
 src/d_clisrv.c        | 31 ++++++++++++++++++++++++++++---
 src/dummy/i_sound.c   |  5 +++++
 src/hu_stuff.c        |  6 ++----
 src/i_sound.h         |  1 +
 src/s_sound.c         |  5 +++++
 src/s_sound.h         |  1 +
 src/sdl/new_sound.cpp | 17 ++++++++++++++++-
 7 files changed, 58 insertions(+), 8 deletions(-)

diff --git a/src/d_clisrv.c b/src/d_clisrv.c
index a4cd1941b..5e9a6e857 100644
--- a/src/d_clisrv.c
+++ b/src/d_clisrv.c
@@ -200,9 +200,11 @@ tic_t firstconnectattempttime = 0;
 
 static OpusDecoder *g_player_opus_decoders[MAXPLAYERS];
 static UINT64 g_player_opus_lastframe[MAXPLAYERS];
+static UINT32 g_player_voice_frames_this_tic[MAXPLAYERS];
+#define MAX_PLAYER_VOICE_FRAMES_PER_TIC 3
 static OpusEncoder *g_local_opus_encoder;
 static UINT64 g_local_opus_frame = 0;
-#define SRB2_VOICE_OPUS_FRAME_SIZE 480
+#define SRB2_VOICE_OPUS_FRAME_SIZE 960
 static float g_local_voice_buffer[SRB2_VOICE_OPUS_FRAME_SIZE];
 static INT32 g_local_voice_buffer_len = 0;
 static INT32 g_local_voice_threshold_time = 0;
@@ -3698,12 +3700,13 @@ static void InitializeLocalVoiceEncoder(void)
 	}
 	int error;
 	encoder = opus_encoder_create(48000, 1, OPUS_APPLICATION_VOIP, &error);
-	opus_encoder_ctl(encoder, OPUS_SET_VBR(0));
 	if (error != OPUS_OK)
 	{
 		CONS_Alert(CONS_WARNING, "Failed to create Opus voice encoder: opus error %d\n", error);
 		encoder = NULL;
 	}
+	opus_encoder_ctl(encoder, OPUS_SET_VBR(0));
+	opus_encoder_ctl(encoder, OPUS_SET_BITRATE(10000));
 	g_local_opus_encoder = encoder;
 	g_local_opus_frame = 0;
 }
@@ -5373,6 +5376,12 @@ static void PT_HandleVoiceServer(SINT8 node)
 		// ignore, they should not be able to broadcast voice
 		return;
 	}
+	g_player_voice_frames_this_tic[playernum] += 1;
+	if (g_player_voice_frames_this_tic[playernum] > MAX_PLAYER_VOICE_FRAMES_PER_TIC)
+	{
+		// ignore; they sent too many voice frames this tic
+		return;
+	}
 
 	// Preserve terminal bit, blank all other bits
 	pl->flags &= VOICE_PAK_FLAGS_TERMINAL_BIT;
@@ -5401,6 +5410,7 @@ static void PT_HandleVoiceServer(SINT8 node)
 			HSendPacket(pnode, false, 0, doomcom->datalength - BASEPACKETSIZE);
 		}
 	}
+
 	PT_HandleVoiceClient(node, true);
 }
 
@@ -6867,6 +6877,12 @@ boolean TryRunTics(tic_t realtics)
 			{
 				break;
 			}
+
+			// Reset received voice frames per tic for all players
+			for (int i = 0; i < MAXPLAYERS; i++)
+			{
+				g_player_voice_frames_this_tic[i] = 0;
+			}
 		}
 
 		if (F_IsDeferredContinueCredits())
@@ -7489,7 +7505,8 @@ void NetVoiceUpdate(void)
 	{
 		// We need to drain the input queue completely, so do this in a full loop
 
-		INT32 to_read = (SRB2_VOICE_OPUS_FRAME_SIZE - g_local_voice_buffer_len) * sizeof(float);
+		UINT32 to_read = (SRB2_VOICE_OPUS_FRAME_SIZE - g_local_voice_buffer_len) * sizeof(float);
+
 		if (to_read > 0)
 		{
 			// Attempt to fill the voice frame buffer
@@ -7507,6 +7524,14 @@ void NetVoiceUpdate(void)
 			continue;
 		}
 
+		if (S_SoundInputRemainingSamples() > 5 * SRB2_VOICE_OPUS_FRAME_SIZE)
+		{
+			// If there are too many frames worth of samples to dequeue (100ms), skip this frame instead of encoding.
+			// This is so we drain the queue without sending too many packets that might queue up on the network driver.
+			g_local_voice_buffer_len = 0;
+			continue;
+		}
+
 		// Amp of +10 dB is appromiately "twice as loud"
 		float ampfactor = powf(10, (float) cv_voice_inputamp.value / 20.f);
 		for (int i = 0; i < g_local_voice_buffer_len; i++)
diff --git a/src/dummy/i_sound.c b/src/dummy/i_sound.c
index 74b052208..4147548c3 100644
--- a/src/dummy/i_sound.c
+++ b/src/dummy/i_sound.c
@@ -240,6 +240,11 @@ UINT32 I_SoundInputDequeueSamples(void *data, UINT32 len)
 	return 0;
 }
 
+UINT32 I_SoundInputRemainingSamples(void)
+{
+	return 0;
+}
+
 void I_QueueVoiceFrameFromPlayer(INT32 playernum, void *data, UINT32 len, boolean terminal)
 {
 }
diff --git a/src/hu_stuff.c b/src/hu_stuff.c
index e252fc1f4..fe66f84bf 100644
--- a/src/hu_stuff.c
+++ b/src/hu_stuff.c
@@ -710,7 +710,7 @@ static void Got_Saycmd(const UINT8 **p, INT32 playernum)
 			}
 		}
 	}
-	
+
 	if (LUA_HookPlayerMsg(playernum, target, flags, msg))
 		return;
 
@@ -1116,15 +1116,13 @@ boolean HU_Responder(event_t *ev)
 		|| ev->data1 == gamecontrol[0][gc_voicepushtotalk][2]
 		|| ev->data1 == gamecontrol[0][gc_voicepushtotalk][3])
 	{
-		if (ev->type == ev_keydown)
+		if (ev->type == ev_keydown && !chat_on)
 		{
 			g_voicepushtotalk_on = true;
-			return true;
 		}
 		else if (ev->type == ev_keyup)
 		{
 			g_voicepushtotalk_on = false;
-			return true;
 		}
 	}
 
diff --git a/src/i_sound.h b/src/i_sound.h
index f0c4814e1..6791ed88e 100644
--- a/src/i_sound.h
+++ b/src/i_sound.h
@@ -252,6 +252,7 @@ boolean I_FadeInPlaySong(UINT32 ms, boolean looping);
 boolean I_SoundInputIsEnabled(void);
 boolean I_SoundInputSetEnabled(boolean enabled);
 UINT32 I_SoundInputDequeueSamples(void *data, UINT32 len);
+UINT32 I_SoundInputRemainingSamples(void);
 
 // VOICE CHAT
 
diff --git a/src/s_sound.c b/src/s_sound.c
index 05907cc22..121147978 100644
--- a/src/s_sound.c
+++ b/src/s_sound.c
@@ -2872,6 +2872,11 @@ UINT32 S_SoundInputDequeueSamples(void *data, UINT32 len)
 	return I_SoundInputDequeueSamples(data, len);
 }
 
+UINT32 S_SoundInputRemainingSamples(void)
+{
+	return I_SoundInputRemainingSamples();
+}
+
 static INT32 g_playerlastvoiceactive[MAXPLAYERS];
 
 void S_QueueVoiceFrameFromPlayer(INT32 playernum, void *data, UINT32 len, boolean terminal)
diff --git a/src/s_sound.h b/src/s_sound.h
index 72a0b3387..6883c642f 100644
--- a/src/s_sound.h
+++ b/src/s_sound.h
@@ -295,6 +295,7 @@ void S_StopSoundByNum(sfxenum_t sfxnum);
 boolean S_SoundInputIsEnabled(void);
 boolean S_SoundInputSetEnabled(boolean enabled);
 UINT32 S_SoundInputDequeueSamples(void *data, UINT32 len);
+UINT32 S_SoundInputRemainingSamples(void);
 
 void S_QueueVoiceFrameFromPlayer(INT32 playernum, void *data, UINT32 len, boolean terminal);
 void S_SetPlayerVoiceActive(INT32 playernum);
diff --git a/src/sdl/new_sound.cpp b/src/sdl/new_sound.cpp
index 1fa9deea2..92f121652 100644
--- a/src/sdl/new_sound.cpp
+++ b/src/sdl/new_sound.cpp
@@ -1013,7 +1013,7 @@ boolean I_SoundInputSetEnabled(boolean enabled)
 		SDL_AudioSpec input_desired {};
 		input_desired.format = AUDIO_F32SYS;
 		input_desired.channels = 1;
-		input_desired.samples = 2048;
+		input_desired.samples = 1024;
 		input_desired.freq = 48000;
 		SDL_AudioSpec input_obtained {};
 		g_input_device_id = SDL_OpenAudioDevice(nullptr, SDL_TRUE, &input_desired, &input_obtained, 0);
@@ -1022,6 +1022,11 @@ boolean I_SoundInputSetEnabled(boolean enabled)
 			CONS_Alert(CONS_WARNING, "Failed to open input audio device: %s\n", SDL_GetError());
 			return false;
 		}
+		if (input_obtained.freq != 48000 || input_obtained.format != AUDIO_F32SYS || input_obtained.channels != 1)
+		{
+			CONS_Alert(CONS_WARNING, "Input audio device has unexpected unusable format: %s\n", SDL_GetError());
+			return false;
+		}
 		SDL_PauseAudioDevice(g_input_device_id, SDL_FALSE);
 	}
 	else if (g_input_device_id != 0 && !enabled)
@@ -1051,6 +1056,16 @@ UINT32 I_SoundInputDequeueSamples(void *data, UINT32 len)
 	return ret;
 }
 
+UINT32 I_SoundInputRemainingSamples(void)
+{
+	if (!g_input_device_id)
+	{
+		return 0;
+	}
+	UINT32 avail = SDL_GetQueuedAudioSize(g_input_device_id);
+	return avail / sizeof(float);
+}
+
 void I_QueueVoiceFrameFromPlayer(INT32 playernum, void *data, UINT32 len, boolean terminal)
 {
 	if (!sound_started)