NPO2 slope span optimization

# Conflicts:
#	src/CMakeLists.txt
#	src/r_draw.c
#	src/sdl/Srb2SDL-vc10.vcxproj
This commit is contained in:
toaster 2022-03-18 16:18:47 +00:00
parent 37e16db41e
commit 0c12a389e1
5 changed files with 2195 additions and 80 deletions

2082
src/libdivide.h Normal file

File diff suppressed because it is too large Load diff

View file

@ -27,6 +27,7 @@
#include "console.h" // Until buffering gets finished #include "console.h" // Until buffering gets finished
#include "k_color.h" // SRB2kart #include "k_color.h" // SRB2kart
#include "i_threads.h" #include "i_threads.h"
#include "libdivide.h" // used by NPO2 tilted span functions
#ifdef HWRENDER #ifdef HWRENDER
#include "hardware/hw_main.h" #include "hardware/hw_main.h"

View file

@ -106,6 +106,9 @@ void R_DrawTiltedSpan_NPO2_8(void)
double endz, endu, endv; double endz, endu, endv;
UINT32 stepu, stepv; UINT32 stepu, stepv;
struct libdivide_u32_t x_divider = libdivide_u32_gen(ds_flatwidth);
struct libdivide_u32_t y_divider = libdivide_u32_gen(ds_flatheight);
iz = ds_szp->z + ds_szp->y*(centery-ds_y) + ds_szp->x*(ds_x1-centerx); iz = ds_szp->z + ds_szp->y*(centery-ds_y) + ds_szp->x*(ds_x1-centerx);
// Lighting is simple. It's just linear interpolation from start to end // Lighting is simple. It's just linear interpolation from start to end
@ -145,12 +148,13 @@ void R_DrawTiltedSpan_NPO2_8(void)
// Carefully align all of my Friends. // Carefully align all of my Friends.
if (x < 0) if (x < 0)
x = ds_flatwidth - ((UINT32)(ds_flatwidth - x) % ds_flatwidth); x += (libdivide_u32_do((UINT32)(-x-1), &x_divider) + 1) * ds_flatwidth;
else
x -= libdivide_u32_do((UINT32)x, &x_divider) * ds_flatwidth;
if (y < 0) if (y < 0)
y = ds_flatheight - ((UINT32)(ds_flatheight - y) % ds_flatheight); y += (libdivide_u32_do((UINT32)(-y-1), &y_divider) + 1) * ds_flatheight;
else
x %= ds_flatwidth; y -= libdivide_u32_do((UINT32)y, &y_divider) * ds_flatheight;
y %= ds_flatheight;
*dest = colormap[source[((y * ds_flatwidth) + x)]]; *dest = colormap[source[((y * ds_flatwidth) + x)]];
} }
@ -194,12 +198,13 @@ void R_DrawTiltedSpan_NPO2_8(void)
// Carefully align all of my Friends. // Carefully align all of my Friends.
if (x < 0) if (x < 0)
x = ds_flatwidth - ((UINT32)(ds_flatwidth - x) % ds_flatwidth); x += (libdivide_u32_do((UINT32)(-x-1), &x_divider) + 1) * ds_flatwidth;
else
x -= libdivide_u32_do((UINT32)x, &x_divider) * ds_flatwidth;
if (y < 0) if (y < 0)
y = ds_flatheight - ((UINT32)(ds_flatheight - y) % ds_flatheight); y += (libdivide_u32_do((UINT32)(-y-1), &y_divider) + 1) * ds_flatheight;
else
x %= ds_flatwidth; y -= libdivide_u32_do((UINT32)y, &y_divider) * ds_flatheight;
y %= ds_flatheight;
*dest = colormap[source[((y * ds_flatwidth) + x)]]; *dest = colormap[source[((y * ds_flatwidth) + x)]];
} }
@ -225,12 +230,13 @@ void R_DrawTiltedSpan_NPO2_8(void)
// Carefully align all of my Friends. // Carefully align all of my Friends.
if (x < 0) if (x < 0)
x = ds_flatwidth - ((UINT32)(ds_flatwidth - x) % ds_flatwidth); x += (libdivide_u32_do((UINT32)(-x-1), &x_divider) + 1) * ds_flatwidth;
else
x -= libdivide_u32_do((UINT32)x, &x_divider) * ds_flatwidth;
if (y < 0) if (y < 0)
y = ds_flatheight - ((UINT32)(ds_flatheight - y) % ds_flatheight); y += (libdivide_u32_do((UINT32)(-y-1), &y_divider) + 1) * ds_flatheight;
else
x %= ds_flatwidth; y -= libdivide_u32_do((UINT32)y, &y_divider) * ds_flatheight;
y %= ds_flatheight;
*dest = colormap[source[((y * ds_flatwidth) + x)]]; *dest = colormap[source[((y * ds_flatwidth) + x)]];
} }
@ -261,12 +267,13 @@ void R_DrawTiltedSpan_NPO2_8(void)
// Carefully align all of my Friends. // Carefully align all of my Friends.
if (x < 0) if (x < 0)
x = ds_flatwidth - ((UINT32)(ds_flatwidth - x) % ds_flatwidth); x += (libdivide_u32_do((UINT32)(-x-1), &x_divider) + 1) * ds_flatwidth;
else
x -= libdivide_u32_do((UINT32)x, &x_divider) * ds_flatwidth;
if (y < 0) if (y < 0)
y = ds_flatheight - ((UINT32)(ds_flatheight - y) % ds_flatheight); y += (libdivide_u32_do((UINT32)(-y-1), &y_divider) + 1) * ds_flatheight;
else
x %= ds_flatwidth; y -= libdivide_u32_do((UINT32)y, &y_divider) * ds_flatheight;
y %= ds_flatheight;
*dest = colormap[source[((y * ds_flatwidth) + x)]]; *dest = colormap[source[((y * ds_flatwidth) + x)]];
} }
@ -299,6 +306,9 @@ void R_DrawTiltedTranslucentSpan_NPO2_8(void)
double endz, endu, endv; double endz, endu, endv;
UINT32 stepu, stepv; UINT32 stepu, stepv;
struct libdivide_u32_t x_divider = libdivide_u32_gen(ds_flatwidth);
struct libdivide_u32_t y_divider = libdivide_u32_gen(ds_flatheight);
iz = ds_szp->z + ds_szp->y*(centery-ds_y) + ds_szp->x*(ds_x1-centerx); iz = ds_szp->z + ds_szp->y*(centery-ds_y) + ds_szp->x*(ds_x1-centerx);
// Lighting is simple. It's just linear interpolation from start to end // Lighting is simple. It's just linear interpolation from start to end
@ -337,12 +347,13 @@ void R_DrawTiltedTranslucentSpan_NPO2_8(void)
// Carefully align all of my Friends. // Carefully align all of my Friends.
if (x < 0) if (x < 0)
x = ds_flatwidth - ((UINT32)(ds_flatwidth - x) % ds_flatwidth); x += (libdivide_u32_do((UINT32)(-x-1), &x_divider) + 1) * ds_flatwidth;
else
x -= libdivide_u32_do((UINT32)x, &x_divider) * ds_flatwidth;
if (y < 0) if (y < 0)
y = ds_flatheight - ((UINT32)(ds_flatheight - y) % ds_flatheight); y += (libdivide_u32_do((UINT32)(-y-1), &y_divider) + 1) * ds_flatheight;
else
x %= ds_flatwidth; y -= libdivide_u32_do((UINT32)y, &y_divider) * ds_flatheight;
y %= ds_flatheight;
*dest = *(ds_transmap + (colormap[source[((y * ds_flatwidth) + x)]] << 8) + *dest); *dest = *(ds_transmap + (colormap[source[((y * ds_flatwidth) + x)]] << 8) + *dest);
} }
@ -386,12 +397,13 @@ void R_DrawTiltedTranslucentSpan_NPO2_8(void)
// Carefully align all of my Friends. // Carefully align all of my Friends.
if (x < 0) if (x < 0)
x = ds_flatwidth - ((UINT32)(ds_flatwidth - x) % ds_flatwidth); x += (libdivide_u32_do((UINT32)(-x-1), &x_divider) + 1) * ds_flatwidth;
else
x -= libdivide_u32_do((UINT32)x, &x_divider) * ds_flatwidth;
if (y < 0) if (y < 0)
y = ds_flatheight - ((UINT32)(ds_flatheight - y) % ds_flatheight); y += (libdivide_u32_do((UINT32)(-y-1), &y_divider) + 1) * ds_flatheight;
else
x %= ds_flatwidth; y -= libdivide_u32_do((UINT32)y, &y_divider) * ds_flatheight;
y %= ds_flatheight;
*dest = *(ds_transmap + (colormap[source[((y * ds_flatwidth) + x)]] << 8) + *dest); *dest = *(ds_transmap + (colormap[source[((y * ds_flatwidth) + x)]] << 8) + *dest);
} }
@ -417,12 +429,13 @@ void R_DrawTiltedTranslucentSpan_NPO2_8(void)
// Carefully align all of my Friends. // Carefully align all of my Friends.
if (x < 0) if (x < 0)
x = ds_flatwidth - ((UINT32)(ds_flatwidth - x) % ds_flatwidth); x += (libdivide_u32_do((UINT32)(-x-1), &x_divider) + 1) * ds_flatwidth;
else
x -= libdivide_u32_do((UINT32)x, &x_divider) * ds_flatwidth;
if (y < 0) if (y < 0)
y = ds_flatheight - ((UINT32)(ds_flatheight - y) % ds_flatheight); y += (libdivide_u32_do((UINT32)(-y-1), &y_divider) + 1) * ds_flatheight;
else
x %= ds_flatwidth; y -= libdivide_u32_do((UINT32)y, &y_divider) * ds_flatheight;
y %= ds_flatheight;
*dest = *(ds_transmap + (colormap[source[((y * ds_flatwidth) + x)]] << 8) + *dest); *dest = *(ds_transmap + (colormap[source[((y * ds_flatwidth) + x)]] << 8) + *dest);
} }
@ -453,12 +466,13 @@ void R_DrawTiltedTranslucentSpan_NPO2_8(void)
// Carefully align all of my Friends. // Carefully align all of my Friends.
if (x < 0) if (x < 0)
x = ds_flatwidth - ((UINT32)(ds_flatwidth - x) % ds_flatwidth); x += (libdivide_u32_do((UINT32)(-x-1), &x_divider) + 1) * ds_flatwidth;
else
x -= libdivide_u32_do((UINT32)x, &x_divider) * ds_flatwidth;
if (y < 0) if (y < 0)
y = ds_flatheight - ((UINT32)(ds_flatheight - y) % ds_flatheight); y += (libdivide_u32_do((UINT32)(-y-1), &y_divider) + 1) * ds_flatheight;
else
x %= ds_flatwidth; y -= libdivide_u32_do((UINT32)y, &y_divider) * ds_flatheight;
y %= ds_flatheight;
*dest = *(ds_transmap + (colormap[source[((y * ds_flatwidth) + x)]] << 8) + *dest); *dest = *(ds_transmap + (colormap[source[((y * ds_flatwidth) + x)]] << 8) + *dest);
} }
@ -490,6 +504,9 @@ void R_DrawTiltedSplat_NPO2_8(void)
double endz, endu, endv; double endz, endu, endv;
UINT32 stepu, stepv; UINT32 stepu, stepv;
struct libdivide_u32_t x_divider = libdivide_u32_gen(ds_flatwidth);
struct libdivide_u32_t y_divider = libdivide_u32_gen(ds_flatheight);
iz = ds_szp->z + ds_szp->y*(centery-ds_y) + ds_szp->x*(ds_x1-centerx); iz = ds_szp->z + ds_szp->y*(centery-ds_y) + ds_szp->x*(ds_x1-centerx);
// Lighting is simple. It's just linear interpolation from start to end // Lighting is simple. It's just linear interpolation from start to end
@ -529,12 +546,13 @@ void R_DrawTiltedSplat_NPO2_8(void)
// Carefully align all of my Friends. // Carefully align all of my Friends.
if (x < 0) if (x < 0)
x = ds_flatwidth - ((UINT32)(ds_flatwidth - x) % ds_flatwidth); x += (libdivide_u32_do((UINT32)(-x-1), &x_divider) + 1) * ds_flatwidth;
else
x -= libdivide_u32_do((UINT32)x, &x_divider) * ds_flatwidth;
if (y < 0) if (y < 0)
y = ds_flatheight - ((UINT32)(ds_flatheight - y) % ds_flatheight); y += (libdivide_u32_do((UINT32)(-y-1), &y_divider) + 1) * ds_flatheight;
else
x %= ds_flatwidth; y -= libdivide_u32_do((UINT32)y, &y_divider) * ds_flatheight;
y %= ds_flatheight;
val = source[((y * ds_flatwidth) + x)]; val = source[((y * ds_flatwidth) + x)];
} }
@ -582,12 +600,13 @@ void R_DrawTiltedSplat_NPO2_8(void)
// Carefully align all of my Friends. // Carefully align all of my Friends.
if (x < 0) if (x < 0)
x = ds_flatwidth - ((UINT32)(ds_flatwidth - x) % ds_flatwidth); x += (libdivide_u32_do((UINT32)(-x-1), &x_divider) + 1) * ds_flatwidth;
else
x -= libdivide_u32_do((UINT32)x, &x_divider) * ds_flatwidth;
if (y < 0) if (y < 0)
y = ds_flatheight - ((UINT32)(ds_flatheight - y) % ds_flatheight); y += (libdivide_u32_do((UINT32)(-y-1), &y_divider) + 1) * ds_flatheight;
else
x %= ds_flatwidth; y -= libdivide_u32_do((UINT32)y, &y_divider) * ds_flatheight;
y %= ds_flatheight;
val = source[((y * ds_flatwidth) + x)]; val = source[((y * ds_flatwidth) + x)];
} }
@ -615,12 +634,13 @@ void R_DrawTiltedSplat_NPO2_8(void)
// Carefully align all of my Friends. // Carefully align all of my Friends.
if (x < 0) if (x < 0)
x = ds_flatwidth - ((UINT32)(ds_flatwidth - x) % ds_flatwidth); x += (libdivide_u32_do((UINT32)(-x-1), &x_divider) + 1) * ds_flatwidth;
else
x -= libdivide_u32_do((UINT32)x, &x_divider) * ds_flatwidth;
if (y < 0) if (y < 0)
y = ds_flatheight - ((UINT32)(ds_flatheight - y) % ds_flatheight); y += (libdivide_u32_do((UINT32)(-y-1), &y_divider) + 1) * ds_flatheight;
else
x %= ds_flatwidth; y -= libdivide_u32_do((UINT32)y, &y_divider) * ds_flatheight;
y %= ds_flatheight;
val = source[((y * ds_flatwidth) + x)]; val = source[((y * ds_flatwidth) + x)];
} }
@ -654,12 +674,13 @@ void R_DrawTiltedSplat_NPO2_8(void)
// Carefully align all of my Friends. // Carefully align all of my Friends.
if (x < 0) if (x < 0)
x = ds_flatwidth - ((UINT32)(ds_flatwidth - x) % ds_flatwidth); x += (libdivide_u32_do((UINT32)(-x-1), &x_divider) + 1) * ds_flatwidth;
else
x -= libdivide_u32_do((UINT32)x, &x_divider) * ds_flatwidth;
if (y < 0) if (y < 0)
y = ds_flatheight - ((UINT32)(ds_flatheight - y) % ds_flatheight); y += (libdivide_u32_do((UINT32)(-y-1), &y_divider) + 1) * ds_flatheight;
else
x %= ds_flatwidth; y -= libdivide_u32_do((UINT32)y, &y_divider) * ds_flatheight;
y %= ds_flatheight;
val = source[((y * ds_flatwidth) + x)]; val = source[((y * ds_flatwidth) + x)];
} }
@ -1401,6 +1422,9 @@ void R_DrawTiltedTranslucentWaterSpan_NPO2_8(void)
double endz, endu, endv; double endz, endu, endv;
UINT32 stepu, stepv; UINT32 stepu, stepv;
struct libdivide_u32_t x_divider = libdivide_u32_gen(ds_flatwidth);
struct libdivide_u32_t y_divider = libdivide_u32_gen(ds_flatheight);
iz = ds_szp->z + ds_szp->y*(centery-ds_y) + ds_szp->x*(ds_x1-centerx); iz = ds_szp->z + ds_szp->y*(centery-ds_y) + ds_szp->x*(ds_x1-centerx);
// Lighting is simple. It's just linear interpolation from start to end // Lighting is simple. It's just linear interpolation from start to end
@ -1440,12 +1464,13 @@ void R_DrawTiltedTranslucentWaterSpan_NPO2_8(void)
// Carefully align all of my Friends. // Carefully align all of my Friends.
if (x < 0) if (x < 0)
x = ds_flatwidth - ((UINT32)(ds_flatwidth - x) % ds_flatwidth); x += (libdivide_u32_do((UINT32)(-x-1), &x_divider) + 1) * ds_flatwidth;
else
x -= libdivide_u32_do((UINT32)x, &x_divider) * ds_flatwidth;
if (y < 0) if (y < 0)
y = ds_flatheight - ((UINT32)(ds_flatheight - y) % ds_flatheight); y += (libdivide_u32_do((UINT32)(-y-1), &y_divider) + 1) * ds_flatheight;
else
x %= ds_flatwidth; y -= libdivide_u32_do((UINT32)y, &y_divider) * ds_flatheight;
y %= ds_flatheight;
*dest = *(ds_transmap + (colormap[source[((y * ds_flatwidth) + x)]] << 8) + *dsrc++); *dest = *(ds_transmap + (colormap[source[((y * ds_flatwidth) + x)]] << 8) + *dsrc++);
} }
@ -1489,12 +1514,13 @@ void R_DrawTiltedTranslucentWaterSpan_NPO2_8(void)
// Carefully align all of my Friends. // Carefully align all of my Friends.
if (x < 0) if (x < 0)
x = ds_flatwidth - ((UINT32)(ds_flatwidth - x) % ds_flatwidth); x += (libdivide_u32_do((UINT32)(-x-1), &x_divider) + 1) * ds_flatwidth;
else
x -= libdivide_u32_do((UINT32)x, &x_divider) * ds_flatwidth;
if (y < 0) if (y < 0)
y = ds_flatheight - ((UINT32)(ds_flatheight - y) % ds_flatheight); y += (libdivide_u32_do((UINT32)(-y-1), &y_divider) + 1) * ds_flatheight;
else
x %= ds_flatwidth; y -= libdivide_u32_do((UINT32)y, &y_divider) * ds_flatheight;
y %= ds_flatheight;
*dest = *(ds_transmap + (colormap[source[((y * ds_flatwidth) + x)]] << 8) + *dsrc++); *dest = *(ds_transmap + (colormap[source[((y * ds_flatwidth) + x)]] << 8) + *dsrc++);
} }
@ -1520,12 +1546,13 @@ void R_DrawTiltedTranslucentWaterSpan_NPO2_8(void)
// Carefully align all of my Friends. // Carefully align all of my Friends.
if (x < 0) if (x < 0)
x = ds_flatwidth - ((UINT32)(ds_flatwidth - x) % ds_flatwidth); x += (libdivide_u32_do((UINT32)(-x-1), &x_divider) + 1) * ds_flatwidth;
else
x -= libdivide_u32_do((UINT32)x, &x_divider) * ds_flatwidth;
if (y < 0) if (y < 0)
y = ds_flatheight - ((UINT32)(ds_flatheight - y) % ds_flatheight); y += (libdivide_u32_do((UINT32)(-y-1), &y_divider) + 1) * ds_flatheight;
else
x %= ds_flatwidth; y -= libdivide_u32_do((UINT32)y, &y_divider) * ds_flatheight;
y %= ds_flatheight;
*dest = *(ds_transmap + (colormap[source[((y * ds_flatwidth) + x)]] << 8) + *dsrc++); *dest = *(ds_transmap + (colormap[source[((y * ds_flatwidth) + x)]] << 8) + *dsrc++);
} }
@ -1556,12 +1583,13 @@ void R_DrawTiltedTranslucentWaterSpan_NPO2_8(void)
// Carefully align all of my Friends. // Carefully align all of my Friends.
if (x < 0) if (x < 0)
x = ds_flatwidth - ((UINT32)(ds_flatwidth - x) % ds_flatwidth); x += (libdivide_u32_do((UINT32)(-x-1), &x_divider) + 1) * ds_flatwidth;
else
x -= libdivide_u32_do((UINT32)x, &x_divider) * ds_flatwidth;
if (y < 0) if (y < 0)
y = ds_flatheight - ((UINT32)(ds_flatheight - y) % ds_flatheight); y += (libdivide_u32_do((UINT32)(-y-1), &y_divider) + 1) * ds_flatheight;
else
x %= ds_flatwidth; y -= libdivide_u32_do((UINT32)y, &y_divider) * ds_flatheight;
y %= ds_flatheight;
*dest = *(ds_transmap + (colormap[source[((y * ds_flatwidth) + x)]] << 8) + *dsrc++); *dest = *(ds_transmap + (colormap[source[((y * ds_flatwidth) + x)]] << 8) + *dsrc++);
} }

View file

@ -249,6 +249,7 @@
<ClInclude Include="..\i_video.h" /> <ClInclude Include="..\i_video.h" />
<ClInclude Include="..\keys.h" /> <ClInclude Include="..\keys.h" />
<ClInclude Include="..\k_kart.h" /> <ClInclude Include="..\k_kart.h" />
<ClInclude Include="..\libdivide.h" />
<ClInclude Include="..\lua_hook.h" /> <ClInclude Include="..\lua_hook.h" />
<ClInclude Include="..\lua_hud.h" /> <ClInclude Include="..\lua_hud.h" />
<ClInclude Include="..\lua_libs.h" /> <ClInclude Include="..\lua_libs.h" />

View file

@ -402,6 +402,9 @@
<ClInclude Include="..\tables.h"> <ClInclude Include="..\tables.h">
<Filter>P_Play</Filter> <Filter>P_Play</Filter>
</ClInclude> </ClInclude>
<ClInclude Include="..\libdivide.h">
<Filter>R_Rend</Filter>
</ClInclude>
<ClInclude Include="..\r_bsp.h"> <ClInclude Include="..\r_bsp.h">
<Filter>R_Rend</Filter> <Filter>R_Rend</Filter>
</ClInclude> </ClInclude>