Implement hardware resolve path for both color and depth targets.

This commit is contained in:
Skyth 2025-02-03 21:11:03 +03:00
parent 7487cfd337
commit 472b136272
7 changed files with 170 additions and 95 deletions

View file

@ -27,6 +27,8 @@
//# define D3D12_DEBUG_LAYER_GPU_BASED_VALIDATION_ENABLED //# define D3D12_DEBUG_LAYER_GPU_BASED_VALIDATION_ENABLED
#endif #endif
//#define D3D12_DEBUG_SET_STABLE_POWER_STATE
// Old Windows SDK versions don't provide this macro, so we workaround it by making sure it is defined. // Old Windows SDK versions don't provide this macro, so we workaround it by making sure it is defined.
#ifndef D3D12_RESOURCE_STATE_ALL_SHADER_RESOURCE #ifndef D3D12_RESOURCE_STATE_ALL_SHADER_RESOURCE
#define D3D12_RESOURCE_STATE_ALL_SHADER_RESOURCE (D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE | D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE) #define D3D12_RESOURCE_STATE_ALL_SHADER_RESOURCE (D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE | D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE)
@ -692,6 +694,20 @@ namespace plume {
); );
} }
static D3D12_RESOLVE_MODE toD3D12(RenderResolveMode resolveMode) {
switch (resolveMode) {
case RenderResolveMode::MIN:
return D3D12_RESOLVE_MODE_MIN;
case RenderResolveMode::MAX:
return D3D12_RESOLVE_MODE_MAX;
case RenderResolveMode::AVERAGE:
return D3D12_RESOLVE_MODE_AVERAGE;
default:
assert(false && "Unknown resolve mode.");
return D3D12_RESOLVE_MODE_AVERAGE;
}
}
static void setObjectName(ID3D12Object *object, const std::string &name) { static void setObjectName(ID3D12Object *object, const std::string &name) {
const std::wstring wideCharName = Utf8ToUtf16(name); const std::wstring wideCharName = Utf8ToUtf16(name);
object->SetName(wideCharName.c_str()); object->SetName(wideCharName.c_str());
@ -1916,7 +1932,7 @@ namespace plume {
resetSamplePositions(); resetSamplePositions();
} }
void D3D12CommandList::resolveTextureRegion(const RenderTexture *dstTexture, uint32_t dstX, uint32_t dstY, const RenderTexture *srcTexture, const RenderRect *srcRect) { void D3D12CommandList::resolveTextureRegion(const RenderTexture *dstTexture, uint32_t dstX, uint32_t dstY, const RenderTexture *srcTexture, const RenderRect *srcRect, RenderResolveMode resolveMode) {
assert(dstTexture != nullptr); assert(dstTexture != nullptr);
assert(srcTexture != nullptr); assert(srcTexture != nullptr);
@ -1931,7 +1947,7 @@ namespace plume {
} }
setSamplePositions(interfaceDstTexture); setSamplePositions(interfaceDstTexture);
d3d->ResolveSubresourceRegion(interfaceDstTexture->d3d, 0, dstX, dstY, interfaceSrcTexture->d3d, 0, (srcRect != nullptr) ? &rect : nullptr, toDXGI(interfaceDstTexture->desc.format), D3D12_RESOLVE_MODE_AVERAGE); d3d->ResolveSubresourceRegion(interfaceDstTexture->d3d, 0, dstX, dstY, interfaceSrcTexture->d3d, 0, (srcRect != nullptr) ? &rect : nullptr, toDXGI(interfaceDstTexture->desc.format), toD3D12(resolveMode));
resetSamplePositions(); resetSamplePositions();
} }
@ -3373,6 +3389,10 @@ namespace plume {
return; return;
} }
#ifdef D3D12_DEBUG_SET_STABLE_POWER_STATE
d3d->SetStablePowerState(TRUE);
#endif
D3D12MA::ALLOCATOR_DESC allocatorDesc = {}; D3D12MA::ALLOCATOR_DESC allocatorDesc = {};
allocatorDesc.pDevice = d3d; allocatorDesc.pDevice = d3d;
allocatorDesc.pAdapter = adapter; allocatorDesc.pAdapter = adapter;

View file

@ -192,7 +192,7 @@ namespace plume {
void copyBuffer(const RenderBuffer *dstBuffer, const RenderBuffer *srcBuffer) override; void copyBuffer(const RenderBuffer *dstBuffer, const RenderBuffer *srcBuffer) override;
void copyTexture(const RenderTexture *dstTexture, const RenderTexture *srcTexture) override; void copyTexture(const RenderTexture *dstTexture, const RenderTexture *srcTexture) override;
void resolveTexture(const RenderTexture *dstTexture, const RenderTexture *srcTexture) override; void resolveTexture(const RenderTexture *dstTexture, const RenderTexture *srcTexture) override;
void resolveTextureRegion(const RenderTexture *dstTexture, uint32_t dstX, uint32_t dstY, const RenderTexture *srcTexture, const RenderRect *srcRect) override; void resolveTextureRegion(const RenderTexture *dstTexture, uint32_t dstX, uint32_t dstY, const RenderTexture *srcTexture, const RenderRect *srcRect, RenderResolveMode resolveMode) override;
void buildBottomLevelAS(const RenderAccelerationStructure *dstAccelerationStructure, RenderBufferReference scratchBuffer, const RenderBottomLevelASBuildInfo &buildInfo) override; void buildBottomLevelAS(const RenderAccelerationStructure *dstAccelerationStructure, RenderBufferReference scratchBuffer, const RenderBottomLevelASBuildInfo &buildInfo) override;
void buildTopLevelAS(const RenderAccelerationStructure *dstAccelerationStructure, RenderBufferReference scratchBuffer, RenderBufferReference instancesBuffer, const RenderTopLevelASBuildInfo &buildInfo) override; void buildTopLevelAS(const RenderAccelerationStructure *dstAccelerationStructure, RenderBufferReference scratchBuffer, RenderBufferReference instancesBuffer, const RenderTopLevelASBuildInfo &buildInfo) override;
void discardTexture(const RenderTexture* texture) override; void discardTexture(const RenderTexture* texture) override;

View file

@ -143,7 +143,7 @@ namespace plume {
virtual void copyBuffer(const RenderBuffer *dstBuffer, const RenderBuffer *srcBuffer) = 0; virtual void copyBuffer(const RenderBuffer *dstBuffer, const RenderBuffer *srcBuffer) = 0;
virtual void copyTexture(const RenderTexture *dstTexture, const RenderTexture *srcTexture) = 0; virtual void copyTexture(const RenderTexture *dstTexture, const RenderTexture *srcTexture) = 0;
virtual void resolveTexture(const RenderTexture *dstTexture, const RenderTexture *srcTexture) = 0; virtual void resolveTexture(const RenderTexture *dstTexture, const RenderTexture *srcTexture) = 0;
virtual void resolveTextureRegion(const RenderTexture *dstTexture, uint32_t dstX, uint32_t dstY, const RenderTexture *srcTexture, const RenderRect *srcRect = nullptr) = 0; virtual void resolveTextureRegion(const RenderTexture *dstTexture, uint32_t dstX, uint32_t dstY, const RenderTexture *srcTexture, const RenderRect *srcRect = nullptr, RenderResolveMode resolveMode = RenderResolveMode::AVERAGE) = 0;
virtual void buildBottomLevelAS(const RenderAccelerationStructure *dstAccelerationStructure, RenderBufferReference scratchBuffer, const RenderBottomLevelASBuildInfo &buildInfo) = 0; virtual void buildBottomLevelAS(const RenderAccelerationStructure *dstAccelerationStructure, RenderBufferReference scratchBuffer, const RenderBottomLevelASBuildInfo &buildInfo) = 0;
virtual void buildTopLevelAS(const RenderAccelerationStructure *dstAccelerationStructure, RenderBufferReference scratchBuffer, RenderBufferReference instancesBuffer, const RenderTopLevelASBuildInfo &buildInfo) = 0; virtual void buildTopLevelAS(const RenderAccelerationStructure *dstAccelerationStructure, RenderBufferReference scratchBuffer, RenderBufferReference instancesBuffer, const RenderTopLevelASBuildInfo &buildInfo) = 0;
virtual void discardTexture(const RenderTexture* texture) = 0; // D3D12 only. virtual void discardTexture(const RenderTexture* texture) = 0; // D3D12 only.

View file

@ -483,6 +483,12 @@ namespace plume {
CPU CPU
}; };
enum class RenderResolveMode {
MIN,
MAX,
AVERAGE
};
// Global functions. // Global functions.
constexpr uint32_t RenderFormatSize(RenderFormat format) { constexpr uint32_t RenderFormatSize(RenderFormat format) {

View file

@ -3074,12 +3074,13 @@ namespace plume {
} }
void VulkanCommandList::resolveTexture(const RenderTexture *dstTexture, const RenderTexture *srcTexture) { void VulkanCommandList::resolveTexture(const RenderTexture *dstTexture, const RenderTexture *srcTexture) {
resolveTextureRegion(dstTexture, 0, 0, srcTexture, nullptr); resolveTextureRegion(dstTexture, 0, 0, srcTexture, nullptr, RenderResolveMode::AVERAGE);
} }
void VulkanCommandList::resolveTextureRegion(const RenderTexture *dstTexture, uint32_t dstX, uint32_t dstY, const RenderTexture *srcTexture, const RenderRect *srcRect) { void VulkanCommandList::resolveTextureRegion(const RenderTexture *dstTexture, uint32_t dstX, uint32_t dstY, const RenderTexture *srcTexture, const RenderRect *srcRect, RenderResolveMode resolveMode) {
assert(dstTexture != nullptr); assert(dstTexture != nullptr);
assert(srcTexture != nullptr); assert(srcTexture != nullptr);
assert(resolveMode == RenderResolveMode::AVERAGE && "Vulkan only supports AVERAGE resolve mode.");
thread_local std::vector<VkImageResolve> imageResolves; thread_local std::vector<VkImageResolve> imageResolves;
imageResolves.clear(); imageResolves.clear();

View file

@ -315,7 +315,7 @@ namespace plume {
void copyBuffer(const RenderBuffer *dstBuffer, const RenderBuffer *srcBuffer) override; void copyBuffer(const RenderBuffer *dstBuffer, const RenderBuffer *srcBuffer) override;
void copyTexture(const RenderTexture *dstTexture, const RenderTexture *srcTexture) override; void copyTexture(const RenderTexture *dstTexture, const RenderTexture *srcTexture) override;
void resolveTexture(const RenderTexture *dstTexture, const RenderTexture *srcTexture) override; void resolveTexture(const RenderTexture *dstTexture, const RenderTexture *srcTexture) override;
void resolveTextureRegion(const RenderTexture *dstTexture, uint32_t dstX, uint32_t dstY, const RenderTexture *srcTexture, const RenderRect *srcRect) override; void resolveTextureRegion(const RenderTexture *dstTexture, uint32_t dstX, uint32_t dstY, const RenderTexture *srcTexture, const RenderRect *srcRect, RenderResolveMode resolveMode) override;
void buildBottomLevelAS(const RenderAccelerationStructure *dstAccelerationStructure, RenderBufferReference scratchBuffer, const RenderBottomLevelASBuildInfo &buildInfo) override; void buildBottomLevelAS(const RenderAccelerationStructure *dstAccelerationStructure, RenderBufferReference scratchBuffer, const RenderBottomLevelASBuildInfo &buildInfo) override;
void buildTopLevelAS(const RenderAccelerationStructure *dstAccelerationStructure, RenderBufferReference scratchBuffer, RenderBufferReference instancesBuffer, const RenderTopLevelASBuildInfo &buildInfo) override; void buildTopLevelAS(const RenderAccelerationStructure *dstAccelerationStructure, RenderBufferReference scratchBuffer, RenderBufferReference instancesBuffer, const RenderTopLevelASBuildInfo &buildInfo) override;
void discardTexture(const RenderTexture* texture) override; void discardTexture(const RenderTexture* texture) override;

View file

@ -235,6 +235,9 @@ static bool g_vulkan = false;
static constexpr bool g_vulkan = true; static constexpr bool g_vulkan = true;
#endif #endif
static constexpr bool g_hardwareResolve = true;
static constexpr bool g_hardwareDepthResolve = true;
static std::unique_ptr<RenderInterface> g_interface; static std::unique_ptr<RenderInterface> g_interface;
static std::unique_ptr<RenderDevice> g_device; static std::unique_ptr<RenderDevice> g_device;
@ -3024,10 +3027,35 @@ static bool PopulateBarriersForStretchRect(GuestSurface* renderTarget, GuestSurf
{ {
if (surface != nullptr && !surface->destinationTextures.empty()) if (surface != nullptr && !surface->destinationTextures.empty())
{ {
AddBarrier(surface, RenderTextureLayout::SHADER_READ); const bool multiSampling = surface->sampleCount != RenderSampleCount::COUNT_1;
RenderTextureLayout srcLayout;
RenderTextureLayout dstLayout;
bool shaderResolve = true;
if (multiSampling && g_hardwareResolve)
{
// Hardware depth resolve is only supported on D3D12 when programmable sample positions are available.
bool hardwareDepthResolveAvailable = g_hardwareDepthResolve && !g_vulkan && g_capabilities.sampleLocations;
if (surface->format != RenderFormat::D32_FLOAT || hardwareDepthResolveAvailable)
{
srcLayout = RenderTextureLayout::RESOLVE_SOURCE;
dstLayout = RenderTextureLayout::RESOLVE_DEST;
shaderResolve = false;
}
}
if (shaderResolve)
{
srcLayout = RenderTextureLayout::SHADER_READ;
dstLayout = (surface->format == RenderFormat::D32_FLOAT ? RenderTextureLayout::DEPTH_WRITE : RenderTextureLayout::COLOR_WRITE);
}
AddBarrier(surface, srcLayout);
for (const auto texture : surface->destinationTextures) for (const auto texture : surface->destinationTextures)
AddBarrier(texture, texture->format == RenderFormat::D32_FLOAT ? RenderTextureLayout::DEPTH_WRITE : RenderTextureLayout::COLOR_WRITE); AddBarrier(texture, dstLayout);
addedAny = true; addedAny = true;
} }
@ -3047,6 +3075,25 @@ static void ExecutePendingStretchRectCommands(GuestSurface* renderTarget, GuestS
const bool multiSampling = surface->sampleCount != RenderSampleCount::COUNT_1; const bool multiSampling = surface->sampleCount != RenderSampleCount::COUNT_1;
for (const auto texture : surface->destinationTextures) for (const auto texture : surface->destinationTextures)
{
bool shaderResolve = true;
if (multiSampling && g_hardwareResolve)
{
bool hardwareDepthResolveAvailable = g_hardwareDepthResolve && !g_vulkan && g_capabilities.sampleLocations;
if (surface->format != RenderFormat::D32_FLOAT || hardwareDepthResolveAvailable)
{
if (surface->format == RenderFormat::D32_FLOAT)
commandList->resolveTextureRegion(texture->texture, 0, 0, surface->texture, nullptr, RenderResolveMode::MIN);
else
commandList->resolveTexture(texture->texture, surface->texture);
shaderResolve = false;
}
}
if (shaderResolve)
{ {
RenderPipeline* pipeline = nullptr; RenderPipeline* pipeline = nullptr;
@ -3156,6 +3203,7 @@ static void ExecutePendingStretchRectCommands(GuestSurface* renderTarget, GuestS
g_dirtyStates.vertexShaderConstants = true; // The push constant call invalidates vertex shader constants. g_dirtyStates.vertexShaderConstants = true; // The push constant call invalidates vertex shader constants.
g_dirtyStates.depthBias = true; // Static depth bias in copy pipeline invalidates dynamic depth bias. g_dirtyStates.depthBias = true; // Static depth bias in copy pipeline invalidates dynamic depth bias.
} }
}
texture->sourceSurface = nullptr; texture->sourceSurface = nullptr;