Merge branch 'denormals-are-a-fuck' into 'master'

Ignore float denormals on ia32/64 for voice and sound

Closes ring-racers#283

See merge request kart-krew-dev/ring-racers-internal!2917
This commit is contained in:
Eidolon 2025-10-01 14:25:16 -05:00
commit 341c62cc07
7 changed files with 74 additions and 4 deletions

View file

@ -7379,7 +7379,9 @@ void NetKeepAlive(void)
FileSendTicker();
// Update voice whenever possible.
NetVoiceUpdate();
{
NetVoiceUpdate();
}
}
// If a tree falls in the forest but nobody is around to hear it, does it make a tic?
@ -7580,12 +7582,16 @@ void NetVoiceUpdate(void)
UINT8 *encoded = NULL;
float *subframe_buffer = NULL;
float *denoise_buffer = NULL;
ps_voiceupdatetime = I_GetPreciseTime();
if (dedicated)
{
ps_voiceupdatetime = I_GetPreciseTime() - ps_voiceupdatetime;
return;
}
floatdenormalstate_t dnzstate = M_EnterFloatDenormalToZero();
UINT32 bytes_dequed = 0;
bytes_dequed = S_SoundInputDequeueSamples((void*)(g_local_voice_buffer + g_local_voice_buffer_len), SRB2_VOICE_MAX_DEQUEUE_BYTES - (g_local_voice_buffer_len * sizeof(float)));
@ -7724,9 +7730,12 @@ void NetVoiceUpdate(void)
g_local_voice_buffer_len -= buffer_offset;
}
M_ExitFloatDenormalToZero(dnzstate);
if (denoise_buffer) Z_Free(denoise_buffer);
if (subframe_buffer) Z_Free(subframe_buffer);
if (encoded) Z_Free(encoded);
ps_voiceupdatetime = I_GetPreciseTime() - ps_voiceupdatetime;
return;
}

View file

@ -32,6 +32,11 @@
// Extended map support.
#include <ctype.h>
#if defined(__x86_64__) || defined(_M_X64) || defined(i386) || defined(__i386__) || defined(__i386) || defined(_M_IX86)
#include <immintrin.h>
#define NEED_INTEL_DENORMAL_BIT 1
#endif
#include "doomdef.h"
#include "g_game.h"
#include "m_misc.h"
@ -2805,3 +2810,44 @@ const char * M_Ftrim (double f)
return &dig[1];/* skip the 0 */
}
}
/** Enable floating point denormal-to-zero section, if necessary */
floatdenormalstate_t M_EnterFloatDenormalToZero(void)
{
#ifdef NEED_INTEL_DENORMAL_BIT
floatdenormalstate_t state = 0;
state |= _MM_GET_FLUSH_ZERO_MODE() == _MM_FLUSH_ZERO_ON ? 1 : 0;
state |= _MM_GET_DENORMALS_ZERO_MODE() == _MM_DENORMALS_ZERO_ON ? 2 : 0;
if ((state & 1) == 0)
{
_MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
}
if ((state & 2) == 0)
{
_MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON);
}
return state;
#else
return 0;
#endif
}
/** Exit floating point denormal-to-zero section, if necessary, restoring previous state */
void M_ExitFloatDenormalToZero(floatdenormalstate_t previous)
{
#ifdef NEED_INTEL_DENORMAL_BIT
if ((previous & 1) == 0)
{
_MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_OFF);
}
if ((previous & 2) == 0)
{
_MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_OFF);
}
return;
#else
(void)previous;
return;
#endif
}

View file

@ -167,6 +167,13 @@ FUNCMATH UINT8 M_CountBits(UINT32 num, UINT8 size);
extern char configfile[MAX_WADPATH];
typedef INT32 floatdenormalstate_t;
/** Enable floating point denormal-to-zero section, if necessary */
floatdenormalstate_t M_EnterFloatDenormalToZero(void);
/** Exit floating point denormal-to-zero section, if necessary, restoring previous state */
void M_ExitFloatDenormalToZero(floatdenormalstate_t previous);
#ifdef __cplusplus
} // extern "C"
#endif

View file

@ -61,6 +61,8 @@ int ps_checkposition_calls = 0;
precise_t ps_lua_thinkframe_time = 0;
int ps_lua_mobjhooks = 0;
precise_t ps_voiceupdatetime = 0;
// dynamically allocated resizeable array for thinkframe hook stats
ps_hookinfo_t *thinkframe_hooks = NULL;
int thinkframe_hooks_length = 0;
@ -208,6 +210,7 @@ static void M_DrawRenderStats(void)
perfstatrow_t tictime_row[] = {
{"logic ", "Game logic: ", &ps_tictime},
{"voice ", "Voice update: ", &ps_voiceupdatetime},
{0}
};

View file

@ -45,6 +45,8 @@ extern int ps_checkposition_calls;
extern precise_t ps_lua_thinkframe_time;
extern int ps_lua_mobjhooks;
extern precise_t ps_voiceupdatetime;
struct ps_hookinfo_t
{
precise_t time_taken;

View file

@ -30,10 +30,10 @@ void P_TestLine(line_t* ld)
g_lines.emplace_back(ld);
}
extern "C" consvar_t cv_showgremlins;
line_t* P_SweepTestLines(fixed_t ax, fixed_t ay, fixed_t bx, fixed_t by, fixed_t r, vector2_t* return_normal)
{
extern consvar_t cv_showgremlins;
using namespace srb2::math;
using namespace srb2::sweep;
@ -96,7 +96,7 @@ line_t* P_SweepTestLines(fixed_t ax, fixed_t ay, fixed_t bx, fixed_t by, fixed_t
if (cv_showgremlins.value)
{
mobj_t *mo = g_tm.thing;
if (mo)
{
mobj_t *x = P_SpawnMobj(mo->x, mo->y, mo->z, MT_THOK);

View file

@ -31,6 +31,7 @@
#include "../doomdef.h"
#include "../i_sound.h"
#include "../m_misc.h"
#include "../s_sound.h"
#include "../sounds.h"
#include "../w_wad.h"
@ -258,6 +259,8 @@ void audio_callback(void* userdata, Uint8* buffer, int len)
tracy::SetThreadName("SDL Audio Thread");
FrameMarkStart(kAudio);
ZoneScoped;
floatdenormalstate_t dtzstate = M_EnterFloatDenormalToZero();
auto dtzrestore = srb2::finally([dtzstate] { M_ExitFloatDenormalToZero(dtzstate); });
// The SDL Audio lock is implied to be held during callback.