From 87db19548120e282b139df00ea71a9cd52adf645 Mon Sep 17 00:00:00 2001 From: James R Date: Wed, 17 Jan 2024 11:30:19 -0800 Subject: [PATCH] R_DrawColumnTemplate: minor optimization for non-power-of-2 textures This is very likely a platform specific optimization. The optimization is about a very hot std::clamp. However, the code is near functionally identical to the libstdc++ implementation, aside from being completely inlined. I can't say why my version is faster, since I haven't read any assembly output. More details in source code comments. --- src/r_draw_column.cpp | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/src/r_draw_column.cpp b/src/r_draw_column.cpp index fefbb9fbf..58aa7cf59 100644 --- a/src/r_draw_column.cpp +++ b/src/r_draw_column.cpp @@ -263,7 +263,19 @@ static void R_DrawColumnTemplate(drawcolumndata_t *dc) // -1 is the lower clamp bound because column posts have a "safe" byte before the real data // and a few bytes after as well - *dest = R_DrawColumnPixel(dc, dest, std::clamp(frac >> FRACBITS, npow2min, npow2max)); + //*dest = R_DrawColumnPixel(dc, dest, std::clamp(frac >> FRACBITS, npow2min, npow2max)); + { + // jartha: faster on my AMD FX-6300 CPU. + // Faster than ternaries, faster than std::min/std::max. Don't ask me why. + // I tested by viewing a non-PO2 texture from a consistent distance so it covered the entire screen. + // The framerate difference was about 50 frames at 640x400. + INT32 n = frac >> FRACBITS; + if (n < npow2min) + n = npow2min; + if (n > npow2max) + n = npow2max; + *dest = R_DrawColumnPixel(dc, dest, n); + } dest += vid.width;