Adjust some voice chat parameters and behavior

1. If the sample queue is too long, frames will be skipped. This prevents bursting with a bunch of voice packets at once. 2. The Opus frame size is doubled to 20ms to reduce UDP overhead. 3. The Opus bitrate is reduced from its default to 10kbps, to prevent potential UDP MTU size overruns. 4. The server will ignore Opus frames past a certain limit per tic from each player, preventing excessive voice packet propagation 5. Push to talk no longer eats chat typing, and no longer responds while chat mode is on.
2026-03-16 08:01:47 +00:00 · 2025-09-13 15:30:36 -05:00 · 2025-09-13 15:30:36 -05:00 · 0db1b16061
commit 0db1b16061
parent 68d83f6870
7 changed files with 58 additions and 8 deletions
--- a/src/d_clisrv.c
+++ b/src/d_clisrv.c
@ -200,9 +200,11 @@ tic_t firstconnectattempttime = 0;

 static OpusDecoder *g_player_opus_decoders[MAXPLAYERS];
 static UINT64 g_player_opus_lastframe[MAXPLAYERS];
+static UINT32 g_player_voice_frames_this_tic[MAXPLAYERS];
+#define MAX_PLAYER_VOICE_FRAMES_PER_TIC 3
 static OpusEncoder *g_local_opus_encoder;
 static UINT64 g_local_opus_frame = 0;
-#define SRB2_VOICE_OPUS_FRAME_SIZE 480
+#define SRB2_VOICE_OPUS_FRAME_SIZE 960
 static float g_local_voice_buffer[SRB2_VOICE_OPUS_FRAME_SIZE];
 static INT32 g_local_voice_buffer_len = 0;
 static INT32 g_local_voice_threshold_time = 0;
@ -3698,12 +3700,13 @@ static void InitializeLocalVoiceEncoder(void)
 	}
 	int error;
 	encoder = opus_encoder_create(48000, 1, OPUS_APPLICATION_VOIP, &error);
-	opus_encoder_ctl(encoder, OPUS_SET_VBR(0));
 	if (error != OPUS_OK)
 	{
 		CONS_Alert(CONS_WARNING, "Failed to create Opus voice encoder: opus error %d\n", error);
 		encoder = NULL;
 	}
+	opus_encoder_ctl(encoder, OPUS_SET_VBR(0));
+	opus_encoder_ctl(encoder, OPUS_SET_BITRATE(10000));
 	g_local_opus_encoder = encoder;
 	g_local_opus_frame = 0;
 }
@ -5373,6 +5376,12 @@ static void PT_HandleVoiceServer(SINT8 node)
 		// ignore, they should not be able to broadcast voice
 		return;
 	}
+	g_player_voice_frames_this_tic[playernum] += 1;
+	if (g_player_voice_frames_this_tic[playernum] > MAX_PLAYER_VOICE_FRAMES_PER_TIC)
+	{
+		// ignore; they sent too many voice frames this tic
+		return;
+	}

 	// Preserve terminal bit, blank all other bits
 	pl->flags &= VOICE_PAK_FLAGS_TERMINAL_BIT;
@ -5401,6 +5410,7 @@ static void PT_HandleVoiceServer(SINT8 node)
 			HSendPacket(pnode, false, 0, doomcom->datalength - BASEPACKETSIZE);
 		}
 	}
+
 	PT_HandleVoiceClient(node, true);
 }

@ -6867,6 +6877,12 @@ boolean TryRunTics(tic_t realtics)
 			{
 				break;
 			}
+
+			// Reset received voice frames per tic for all players
+			for (int i = 0; i < MAXPLAYERS; i++)
+			{
+				g_player_voice_frames_this_tic[i] = 0;
+			}
 		}

 		if (F_IsDeferredContinueCredits())
@ -7489,7 +7505,8 @@ void NetVoiceUpdate(void)
 	{
 		// We need to drain the input queue completely, so do this in a full loop

-		INT32 to_read = (SRB2_VOICE_OPUS_FRAME_SIZE - g_local_voice_buffer_len) * sizeof(float);
+		UINT32 to_read = (SRB2_VOICE_OPUS_FRAME_SIZE - g_local_voice_buffer_len) * sizeof(float);
+
 		if (to_read > 0)
 		{
 			// Attempt to fill the voice frame buffer
@ -7507,6 +7524,14 @@ void NetVoiceUpdate(void)
 			continue;
 		}

+		if (S_SoundInputRemainingSamples() > 5 * SRB2_VOICE_OPUS_FRAME_SIZE)
+		{
+			// If there are too many frames worth of samples to dequeue (100ms), skip this frame instead of encoding.
+			// This is so we drain the queue without sending too many packets that might queue up on the network driver.
+			g_local_voice_buffer_len = 0;
+			continue;
+		}
+
 		// Amp of +10 dB is appromiately "twice as loud"
 		float ampfactor = powf(10, (float) cv_voice_inputamp.value / 20.f);
 		for (int i = 0; i < g_local_voice_buffer_len; i++)
--- a/src/dummy/i_sound.c
+++ b/src/dummy/i_sound.c
@ -240,6 +240,11 @@ UINT32 I_SoundInputDequeueSamples(void *data, UINT32 len)
 	return 0;
 }

+UINT32 I_SoundInputRemainingSamples(void)
+{
+	return 0;
+}
+
 void I_QueueVoiceFrameFromPlayer(INT32 playernum, void *data, UINT32 len, boolean terminal)
 {
 }
--- a/src/hu_stuff.c
+++ b/src/hu_stuff.c
@ -710,7 +710,7 @@ static void Got_Saycmd(const UINT8 **p, INT32 playernum)
 			}
 		}
 	}
-	
+
 	if (LUA_HookPlayerMsg(playernum, target, flags, msg))
 		return;

@ -1116,15 +1116,13 @@ boolean HU_Responder(event_t *ev)
 		|| ev->data1 == gamecontrol[0][gc_voicepushtotalk][2]
 		|| ev->data1 == gamecontrol[0][gc_voicepushtotalk][3])
 	{
-		if (ev->type == ev_keydown)
+		if (ev->type == ev_keydown && !chat_on)
 		{
 			g_voicepushtotalk_on = true;
-			return true;
 		}
 		else if (ev->type == ev_keyup)
 		{
 			g_voicepushtotalk_on = false;
-			return true;
 		}
 	}

--- a/src/i_sound.h
+++ b/src/i_sound.h
@ -252,6 +252,7 @@ boolean I_FadeInPlaySong(UINT32 ms, boolean looping);
 boolean I_SoundInputIsEnabled(void);
 boolean I_SoundInputSetEnabled(boolean enabled);
 UINT32 I_SoundInputDequeueSamples(void *data, UINT32 len);
+UINT32 I_SoundInputRemainingSamples(void);

 // VOICE CHAT

--- a/src/s_sound.c
+++ b/src/s_sound.c
@ -2872,6 +2872,11 @@ UINT32 S_SoundInputDequeueSamples(void *data, UINT32 len)
 	return I_SoundInputDequeueSamples(data, len);
 }

+UINT32 S_SoundInputRemainingSamples(void)
+{
+	return I_SoundInputRemainingSamples();
+}
+
 static INT32 g_playerlastvoiceactive[MAXPLAYERS];

 void S_QueueVoiceFrameFromPlayer(INT32 playernum, void *data, UINT32 len, boolean terminal)
--- a/src/s_sound.h
+++ b/src/s_sound.h
@ -295,6 +295,7 @@ void S_StopSoundByNum(sfxenum_t sfxnum);
 boolean S_SoundInputIsEnabled(void);
 boolean S_SoundInputSetEnabled(boolean enabled);
 UINT32 S_SoundInputDequeueSamples(void *data, UINT32 len);
+UINT32 S_SoundInputRemainingSamples(void);

 void S_QueueVoiceFrameFromPlayer(INT32 playernum, void *data, UINT32 len, boolean terminal);
 void S_SetPlayerVoiceActive(INT32 playernum);
--- a/src/sdl/new_sound.cpp
+++ b/src/sdl/new_sound.cpp
@ -1013,7 +1013,7 @@ boolean I_SoundInputSetEnabled(boolean enabled)
 		SDL_AudioSpec input_desired {};
 		input_desired.format = AUDIO_F32SYS;
 		input_desired.channels = 1;
-		input_desired.samples = 2048;
+		input_desired.samples = 1024;
 		input_desired.freq = 48000;
 		SDL_AudioSpec input_obtained {};
 		g_input_device_id = SDL_OpenAudioDevice(nullptr, SDL_TRUE, &input_desired, &input_obtained, 0);
@ -1022,6 +1022,11 @@ boolean I_SoundInputSetEnabled(boolean enabled)
 			CONS_Alert(CONS_WARNING, "Failed to open input audio device: %s\n", SDL_GetError());
 			return false;
 		}
+		if (input_obtained.freq != 48000 || input_obtained.format != AUDIO_F32SYS || input_obtained.channels != 1)
+		{
+			CONS_Alert(CONS_WARNING, "Input audio device has unexpected unusable format: %s\n", SDL_GetError());
+			return false;
+		}
 		SDL_PauseAudioDevice(g_input_device_id, SDL_FALSE);
 	}
 	else if (g_input_device_id != 0 && !enabled)
@ -1051,6 +1056,16 @@ UINT32 I_SoundInputDequeueSamples(void *data, UINT32 len)
 	return ret;
 }

+UINT32 I_SoundInputRemainingSamples(void)
+{
+	if (!g_input_device_id)
+	{
+		return 0;
+	}
+	UINT32 avail = SDL_GetQueuedAudioSize(g_input_device_id);
+	return avail / sizeof(float);
+}
+
 void I_QueueVoiceFrameFromPlayer(INT32 playernum, void *data, UINT32 len, boolean terminal)
 {
 	if (!sound_started)