diff --git a/src/r_things.c b/src/r_things.c index 2fe008dd8..f067094c3 100644 --- a/src/r_things.c +++ b/src/r_things.c @@ -86,6 +86,35 @@ static spriteframe_t sprtemp[64]; static size_t maxframe; static const char *spritename; +// +// Clipping against drawsegs optimization, from prboom-plus +// +// TODO: This should be done with proper subsector pass through +// sprites which would ideally remove the need to do it at all. +// Unfortunately, SRB2's drawing loop has lots of annoying +// changes from Doom for portals, which make it hard to implement. + +typedef struct drawseg_xrange_item_s +{ + INT16 x1, x2; + drawseg_t *user; +} drawseg_xrange_item_t; + +typedef struct drawsegs_xrange_s +{ + drawseg_xrange_item_t *items; + INT32 count; +} drawsegs_xrange_t; + +#define DS_RANGES_COUNT 3 +static drawsegs_xrange_t drawsegs_xranges[DS_RANGES_COUNT]; + +static drawseg_xrange_item_t *drawsegs_xrange; +static size_t drawsegs_xrange_size = 0; +static INT32 drawsegs_xrange_count = 0; + +#define CLIP_UNDEF -2 + // ========================================================================== // // Sprite loading routines: support sprites in pwad, dehacked sprite renaming, @@ -2933,7 +2962,7 @@ static void R_DrawPrecipitationSprite(vissprite_t *spr) // R_ClipVisSprite // Clips vissprites without drawing, so that portals can work. -Red -void R_ClipVisSprite(vissprite_t *spr, INT32 x1, INT32 x2, drawseg_t* dsstart, portal_t* portal) +void R_ClipVisSprite(vissprite_t *spr, INT32 x1, INT32 x2, portal_t* portal) { drawseg_t *ds; INT32 x; @@ -2944,7 +2973,9 @@ void R_ClipVisSprite(vissprite_t *spr, INT32 x1, INT32 x2, drawseg_t* dsstart, p INT32 silhouette; for (x = x1; x <= x2; x++) - spr->clipbot[x] = spr->cliptop[x] = -2; + { + spr->clipbot[x] = spr->cliptop[x] = CLIP_UNDEF; + } // Scan drawsegs from end to start for obscuring segs. // The first drawseg that has a greater scale @@ -2953,82 +2984,96 @@ void R_ClipVisSprite(vissprite_t *spr, INT32 x1, INT32 x2, drawseg_t* dsstart, p // Pointer check was originally nonportable // and buggy, by going past LEFT end of array: - // for (ds = ds_p-1; ds >= drawsegs; ds--) old buggy code - for (ds = ds_p; ds-- > dsstart;) + // e6y: optimization + if (drawsegs_xrange_size) { - // determine if the drawseg obscures the sprite - if (ds->x1 > x2 || - ds->x2 < x1 || - (!ds->silhouette - && !ds->maskedtexturecol)) + const drawseg_xrange_item_t *last = &drawsegs_xrange[drawsegs_xrange_count - 1]; + drawseg_xrange_item_t *curr = &drawsegs_xrange[-1]; + + while (++curr <= last) { - // does not cover sprite - continue; - } - - if (ds->portalpass != 66) - { - if (ds->portalpass > 0 && ds->portalpass <= portalrender) - continue; // is a portal - - if (ds->scale1 > ds->scale2) + // determine if the drawseg obscures the sprite + if (curr->x1 > spr->x2 || curr->x2 < spr->x1) { - lowscale = ds->scale2; - scale = ds->scale1; - } - else - { - lowscale = ds->scale1; - scale = ds->scale2; - } - - if (scale < spr->sortscale || - (lowscale < spr->sortscale && - !R_PointOnSegSide (spr->gx, spr->gy, ds->curline))) - { - // masked mid texture? - /*if (ds->maskedtexturecol) - R_RenderMaskedSegRange (ds, r1, r2);*/ - // seg is behind sprite + // does not cover sprite continue; } - } - r1 = ds->x1 < x1 ? x1 : ds->x1; - r2 = ds->x2 > x2 ? x2 : ds->x2; + ds = curr->user; - // clip this piece of the sprite - silhouette = ds->silhouette; - - if (spr->gz >= ds->bsilheight) - silhouette &= ~SIL_BOTTOM; - - if (spr->gzt <= ds->tsilheight) - silhouette &= ~SIL_TOP; - - if (silhouette == SIL_BOTTOM) - { - // bottom sil - for (x = r1; x <= r2; x++) - if (spr->clipbot[x] == -2) - spr->clipbot[x] = ds->sprbottomclip[x]; - } - else if (silhouette == SIL_TOP) - { - // top sil - for (x = r1; x <= r2; x++) - if (spr->cliptop[x] == -2) - spr->cliptop[x] = ds->sprtopclip[x]; - } - else if (silhouette == (SIL_TOP|SIL_BOTTOM)) - { - // both - for (x = r1; x <= r2; x++) + if (!ds->silhouette && !ds->maskedtexturecol) { - if (spr->clipbot[x] == -2) - spr->clipbot[x] = ds->sprbottomclip[x]; - if (spr->cliptop[x] == -2) - spr->cliptop[x] = ds->sprtopclip[x]; + // does not cover sprite + continue; + } + + if (ds->portalpass != 66) // unused? + { + if (ds->portalpass > 0 && ds->portalpass <= portalrender) + continue; // is a portal + + if (ds->scale1 > ds->scale2) + { + lowscale = ds->scale2; + scale = ds->scale1; + } + else + { + lowscale = ds->scale1; + scale = ds->scale2; + } + + if (scale < spr->sortscale || + (lowscale < spr->sortscale && + !R_PointOnSegSide (spr->gx, spr->gy, ds->curline))) + { + // masked mid texture? + /* + if (ds->maskedtexturecol) + R_RenderMaskedSegRange (ds, r1, r2); + */ + + // seg is behind sprite + continue; + } + } + + r1 = ds->x1 < x1 ? x1 : ds->x1; + r2 = ds->x2 > x2 ? x2 : ds->x2; + + // clip this piece of the sprite + silhouette = ds->silhouette; + + if (spr->gz >= ds->bsilheight) + silhouette &= ~SIL_BOTTOM; + + if (spr->gzt <= ds->tsilheight) + silhouette &= ~SIL_TOP; + + if (silhouette == SIL_BOTTOM) + { + // bottom sil + for (x = r1; x <= r2; x++) + if (spr->clipbot[x] == CLIP_UNDEF) + spr->clipbot[x] = ds->sprbottomclip[x]; + } + else if (silhouette == SIL_TOP) + { + // top sil + for (x = r1; x <= r2; x++) + if (spr->cliptop[x] == CLIP_UNDEF) + spr->cliptop[x] = ds->sprtopclip[x]; + } + else if (silhouette == (SIL_TOP|SIL_BOTTOM)) + { + // both + for (x = r1; x <= r2; x++) + { + if (spr->clipbot[x] == CLIP_UNDEF) + spr->clipbot[x] = ds->sprbottomclip[x]; + if (spr->cliptop[x] == CLIP_UNDEF) + spr->cliptop[x] = ds->sprtopclip[x]; + } } } } @@ -3044,13 +3089,13 @@ void R_ClipVisSprite(vissprite_t *spr, INT32 x1, INT32 x2, drawseg_t* dsstart, p if (mh <= 0 || (phs != -1 && viewz > sectors[phs].floorheight)) { // clip bottom for (x = x1; x <= x2; x++) - if (spr->clipbot[x] == -2 || h < spr->clipbot[x]) + if (spr->clipbot[x] == CLIP_UNDEF || h < spr->clipbot[x]) spr->clipbot[x] = (INT16)h; } else // clip top { for (x = x1; x <= x2; x++) - if (spr->cliptop[x] == -2 || h > spr->cliptop[x]) + if (spr->cliptop[x] == CLIP_UNDEF || h > spr->cliptop[x]) spr->cliptop[x] = (INT16)h; } } @@ -3062,13 +3107,13 @@ void R_ClipVisSprite(vissprite_t *spr, INT32 x1, INT32 x2, drawseg_t* dsstart, p if (phs != -1 && viewz >= sectors[phs].ceilingheight) { // clip bottom for (x = x1; x <= x2; x++) - if (spr->clipbot[x] == -2 || h < spr->clipbot[x]) + if (spr->clipbot[x] == CLIP_UNDEF || h < spr->clipbot[x]) spr->clipbot[x] = (INT16)h; } else // clip top { for (x = x1; x <= x2; x++) - if (spr->cliptop[x] == -2 || h > spr->cliptop[x]) + if (spr->cliptop[x] == CLIP_UNDEF || h > spr->cliptop[x]) spr->cliptop[x] = (INT16)h; } } @@ -3077,10 +3122,10 @@ void R_ClipVisSprite(vissprite_t *spr, INT32 x1, INT32 x2, drawseg_t* dsstart, p { for (x = x1; x <= x2; x++) { - if (spr->cliptop[x] == -2 || spr->szt > spr->cliptop[x]) + if (spr->cliptop[x] == CLIP_UNDEF || spr->szt > spr->cliptop[x]) spr->cliptop[x] = spr->szt; - if (spr->clipbot[x] == -2 || spr->sz < spr->clipbot[x]) + if (spr->clipbot[x] == CLIP_UNDEF || spr->sz < spr->clipbot[x]) spr->clipbot[x] = spr->sz; } } @@ -3088,7 +3133,7 @@ void R_ClipVisSprite(vissprite_t *spr, INT32 x1, INT32 x2, drawseg_t* dsstart, p { for (x = x1; x <= x2; x++) { - if (spr->cliptop[x] == -2 || spr->szt > spr->cliptop[x]) + if (spr->cliptop[x] == CLIP_UNDEF || spr->szt > spr->cliptop[x]) spr->cliptop[x] = spr->szt; } } @@ -3096,7 +3141,7 @@ void R_ClipVisSprite(vissprite_t *spr, INT32 x1, INT32 x2, drawseg_t* dsstart, p { for (x = x1; x <= x2; x++) { - if (spr->clipbot[x] == -2 || spr->sz < spr->clipbot[x]) + if (spr->clipbot[x] == CLIP_UNDEF || spr->sz < spr->clipbot[x]) spr->clipbot[x] = spr->sz; } } @@ -3106,10 +3151,10 @@ void R_ClipVisSprite(vissprite_t *spr, INT32 x1, INT32 x2, drawseg_t* dsstart, p // check for unclipped columns for (x = x1; x <= x2; x++) { - if (spr->clipbot[x] == -2) + if (spr->clipbot[x] == CLIP_UNDEF) spr->clipbot[x] = (INT16)viewheight; - if (spr->cliptop[x] == -2) + if (spr->cliptop[x] == CLIP_UNDEF) //Fab : 26-04-98: was -1, now clips against console bottom spr->cliptop[x] = (INT16)con_clipviewtop; } @@ -3140,12 +3185,89 @@ void R_ClipVisSprite(vissprite_t *spr, INT32 x1, INT32 x2, drawseg_t* dsstart, p void R_ClipSprites(drawseg_t* dsstart, portal_t* portal) { + const size_t maxdrawsegs = ds_p - dsstart; + const INT32 cx = BASEVIDWIDTH / 2; + drawseg_t* ds; + INT32 i; + + // e6y + // Reducing of cache misses in the following R_DrawSprite() + // Makes sense for scenes with huge amount of drawsegs. + // ~12% of speed improvement on epic.wad map05 + for (i = 0; i < DS_RANGES_COUNT; i++) + { + drawsegs_xranges[i].count = 0; + } + + if (visspritecount - clippedvissprites <= 0) + { + return; + } + + if (drawsegs_xrange_size < maxdrawsegs) + { + drawsegs_xrange_size = 2 * maxdrawsegs; + + for (i = 0; i < DS_RANGES_COUNT; i++) + { + drawsegs_xranges[i].items = Z_Realloc( + drawsegs_xranges[i].items, + drawsegs_xrange_size * sizeof(drawsegs_xranges[i].items[0]), + PU_STATIC, NULL + ); + } + } + + for (ds = ds_p; ds-- > dsstart;) + { + if (ds->silhouette || ds->maskedtexturecol) + { + drawsegs_xranges[0].items[drawsegs_xranges[0].count].x1 = ds->x1; + drawsegs_xranges[0].items[drawsegs_xranges[0].count].x2 = ds->x2; + drawsegs_xranges[0].items[drawsegs_xranges[0].count].user = ds; + + // e6y: ~13% of speed improvement on sunder.wad map10 + if (ds->x1 < cx) + { + drawsegs_xranges[1].items[drawsegs_xranges[1].count] = + drawsegs_xranges[0].items[drawsegs_xranges[0].count]; + drawsegs_xranges[1].count++; + } + + if (ds->x2 >= cx) + { + drawsegs_xranges[2].items[drawsegs_xranges[2].count] = + drawsegs_xranges[0].items[drawsegs_xranges[0].count]; + drawsegs_xranges[2].count++; + } + + drawsegs_xranges[0].count++; + } + } + for (; clippedvissprites < visspritecount; clippedvissprites++) { vissprite_t *spr = R_GetVisSprite(clippedvissprites); INT32 x1 = (spr->cut & SC_SPLAT) ? 0 : spr->x1; INT32 x2 = (spr->cut & SC_SPLAT) ? viewwidth : spr->x2; - R_ClipVisSprite(spr, x1, x2, dsstart, portal); + + if (x2 < cx) + { + drawsegs_xrange = drawsegs_xranges[1].items; + drawsegs_xrange_count = drawsegs_xranges[1].count; + } + else if (x1 >= cx) + { + drawsegs_xrange = drawsegs_xranges[2].items; + drawsegs_xrange_count = drawsegs_xranges[2].count; + } + else + { + drawsegs_xrange = drawsegs_xranges[0].items; + drawsegs_xrange_count = drawsegs_xranges[0].count; + } + + R_ClipVisSprite(spr, x1, x2, portal); } } diff --git a/src/r_things.h b/src/r_things.h index db695cf3c..bd7449d3e 100644 --- a/src/r_things.h +++ b/src/r_things.h @@ -218,7 +218,7 @@ typedef struct vissprite_s extern UINT32 visspritecount; void R_ClipSprites(drawseg_t* dsstart, portal_t* portal); -void R_ClipVisSprite(vissprite_t *spr, INT32 x1, INT32 x2, drawseg_t* dsstart, portal_t* portal); +void R_ClipVisSprite(vissprite_t *spr, INT32 x1, INT32 x2, portal_t* portal); UINT8 *R_GetSpriteTranslation(vissprite_t *vis);