Initial work for GPU upload heap & UMA.

This commit is contained in:
Skyth 2025-03-18 19:26:59 +03:00
parent d0368665dd
commit 07fe67092f
5 changed files with 64 additions and 28 deletions

View file

@ -310,7 +310,11 @@ endif()
if (UNLEASHED_RECOMP_D3D12) if (UNLEASHED_RECOMP_D3D12)
find_package(directx-headers CONFIG REQUIRED) find_package(directx-headers CONFIG REQUIRED)
find_package(directx12-agility CONFIG REQUIRED) find_package(directx12-agility CONFIG REQUIRED)
target_compile_definitions(UnleashedRecomp PRIVATE UNLEASHED_RECOMP_D3D12) target_compile_definitions(UnleashedRecomp PRIVATE
UNLEASHED_RECOMP_D3D12
D3D12MA_USING_DIRECTX_HEADERS
D3D12MA_OPTIONS16_SUPPORTED
)
endif() endif()
if (CMAKE_SYSTEM_NAME MATCHES "Linux") if (CMAKE_SYSTEM_NAME MATCHES "Linux")

View file

@ -442,6 +442,8 @@ namespace plume {
return D3D12_HEAP_TYPE_UPLOAD; return D3D12_HEAP_TYPE_UPLOAD;
case RenderHeapType::READBACK: case RenderHeapType::READBACK:
return D3D12_HEAP_TYPE_READBACK; return D3D12_HEAP_TYPE_READBACK;
case RenderHeapType::GPU_UPLOAD:
return D3D12_HEAP_TYPE_GPU_UPLOAD;
default: default:
assert(false && "Unknown heap type."); assert(false && "Unknown heap type.");
return D3D12_HEAP_TYPE_DEFAULT; return D3D12_HEAP_TYPE_DEFAULT;
@ -3391,12 +3393,14 @@ namespace plume {
triangleFanSupportOption = d3d12Options15.TriangleFanSupported; triangleFanSupportOption = d3d12Options15.TriangleFanSupported;
} }
// Check if dynamic depth bias is supported. // Check if dynamic depth bias and GPU upload heap are supported.
bool dynamicDepthBiasOption = false; bool dynamicDepthBiasOption = false;
bool gpuUploadHeapOption = false;
D3D12_FEATURE_DATA_D3D12_OPTIONS16 d3d12Options16 = {}; D3D12_FEATURE_DATA_D3D12_OPTIONS16 d3d12Options16 = {};
res = deviceOption->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS16, &d3d12Options16, sizeof(d3d12Options16)); res = deviceOption->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS16, &d3d12Options16, sizeof(d3d12Options16));
if (SUCCEEDED(res)) { if (SUCCEEDED(res)) {
dynamicDepthBiasOption = d3d12Options16.DynamicDepthBiasSupported; dynamicDepthBiasOption = d3d12Options16.DynamicDepthBiasSupported;
gpuUploadHeapOption = d3d12Options16.GPUUploadHeapSupported;
} }
// Check if the architecture has UMA. // Check if the architecture has UMA.
@ -3431,6 +3435,7 @@ namespace plume {
capabilities.triangleFan = triangleFanSupportOption; capabilities.triangleFan = triangleFanSupportOption;
capabilities.dynamicDepthBias = dynamicDepthBiasOption; capabilities.dynamicDepthBias = dynamicDepthBiasOption;
capabilities.uma = uma; capabilities.uma = uma;
capabilities.gpuUploadHeap = gpuUploadHeapOption;
description.name = deviceName; description.name = deviceName;
description.dedicatedVideoMemory = adapterDesc.DedicatedVideoMemory; description.dedicatedVideoMemory = adapterDesc.DedicatedVideoMemory;
description.vendor = RenderDeviceVendor(adapterDesc.VendorId); description.vendor = RenderDeviceVendor(adapterDesc.VendorId);

View file

@ -351,7 +351,8 @@ namespace plume {
UNKNOWN, UNKNOWN,
DEFAULT, DEFAULT,
UPLOAD, UPLOAD,
READBACK READBACK,
GPU_UPLOAD
}; };
enum class RenderTextureArrangement { enum class RenderTextureArrangement {
@ -1807,6 +1808,9 @@ namespace plume {
// UMA. // UMA.
bool uma = false; bool uma = false;
// GPU Upload heap.
bool gpuUploadHeap = false;
}; };
struct RenderInterfaceCapabilities { struct RenderInterfaceCapabilities {

View file

@ -808,6 +808,12 @@ namespace plume {
bufferInfo.usage |= VK_BUFFER_USAGE_TRANSFER_DST_BIT; bufferInfo.usage |= VK_BUFFER_USAGE_TRANSFER_DST_BIT;
createInfo.flags |= VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT; createInfo.flags |= VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT;
break; break;
case RenderHeapType::GPU_UPLOAD:
bufferInfo.usage |= VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
bufferInfo.usage |= VK_BUFFER_USAGE_TRANSFER_DST_BIT;
createInfo.flags |= VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT;
createInfo.requiredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT;
break;
default: default:
assert(false && "Unknown heap type."); assert(false && "Unknown heap type.");
break; break;
@ -3907,6 +3913,7 @@ namespace plume {
capabilities.preferHDR = memoryHeapSize > (512 * 1024 * 1024); capabilities.preferHDR = memoryHeapSize > (512 * 1024 * 1024);
capabilities.triangleFan = true; capabilities.triangleFan = true;
capabilities.dynamicDepthBias = true; capabilities.dynamicDepthBias = true;
capabilities.gpuUploadHeap = true; // TODO: Do a test buffer allocation with the required flags to set this.
// Fill Vulkan-only capabilities. // Fill Vulkan-only capabilities.
loadStoreOpNoneSupported = supportedOptionalExtensions.find(VK_EXT_LOAD_STORE_OP_NONE_EXTENSION_NAME) != supportedOptionalExtensions.end(); loadStoreOpNoneSupported = supportedOptionalExtensions.find(VK_EXT_LOAD_STORE_OP_NONE_EXTENSION_NAME) != supportedOptionalExtensions.end();

View file

@ -2112,36 +2112,46 @@ static void* LockVertexBuffer(GuestBuffer* buffer, uint32_t, uint32_t, uint32_t
template<typename T> template<typename T>
static void UnlockBuffer(GuestBuffer* buffer, bool useCopyQueue) static void UnlockBuffer(GuestBuffer* buffer, bool useCopyQueue)
{ {
auto uploadBuffer = g_device->createBuffer(RenderBufferDesc::UploadBuffer(buffer->dataSize)); auto copyBuffer = [&](T* dest)
{
auto src = reinterpret_cast<const T*>(buffer->mappedMemory);
auto dest = reinterpret_cast<T*>(uploadBuffer->map()); for (size_t i = 0; i < buffer->dataSize; i += sizeof(T))
auto src = reinterpret_cast<const T*>(buffer->mappedMemory);
for (size_t i = 0; i < buffer->dataSize; i += sizeof(T))
{
*dest = ByteSwap(*src);
++dest;
++src;
}
uploadBuffer->unmap();
if (useCopyQueue)
{
ExecuteCopyCommandList([&]
{ {
g_copyCommandList->copyBufferRegion(buffer->buffer->at(0), uploadBuffer->at(0), buffer->dataSize); *dest = ByteSwap(*src);
}); ++dest;
++src;
}
};
if (useCopyQueue && (g_capabilities.uma || g_capabilities.gpuUploadHeap))
{
copyBuffer(reinterpret_cast<T*>(buffer->buffer->map()));
buffer->buffer->unmap();
} }
else else
{ {
auto& commandList = g_commandLists[g_frame]; auto uploadBuffer = g_device->createBuffer(RenderBufferDesc::UploadBuffer(buffer->dataSize));
copyBuffer(reinterpret_cast<T*>(uploadBuffer->map()));
uploadBuffer->unmap();
commandList->barriers(RenderBarrierStage::COPY, RenderBufferBarrier(buffer->buffer.get(), RenderBufferAccess::WRITE)); if (useCopyQueue)
commandList->copyBufferRegion(buffer->buffer->at(0), uploadBuffer->at(0), buffer->dataSize); {
commandList->barriers(RenderBarrierStage::GRAPHICS, RenderBufferBarrier(buffer->buffer.get(), RenderBufferAccess::READ)); ExecuteCopyCommandList([&]
{
g_copyCommandList->copyBufferRegion(buffer->buffer->at(0), uploadBuffer->at(0), buffer->dataSize);
});
}
else
{
auto& commandList = g_commandLists[g_frame];
g_tempBuffers[g_frame].emplace_back(std::move(uploadBuffer)); commandList->barriers(RenderBarrierStage::COPY, RenderBufferBarrier(buffer->buffer.get(), RenderBufferAccess::WRITE));
commandList->copyBufferRegion(buffer->buffer->at(0), uploadBuffer->at(0), buffer->dataSize);
commandList->barriers(RenderBarrierStage::GRAPHICS, RenderBufferBarrier(buffer->buffer.get(), RenderBufferAccess::READ));
g_tempBuffers[g_frame].emplace_back(std::move(uploadBuffer));
}
} }
} }
@ -2339,6 +2349,7 @@ static void DrawProfiler()
ImGui::Text("Device Type: %s", DeviceTypeName(g_device->getDescription().type)); ImGui::Text("Device Type: %s", DeviceTypeName(g_device->getDescription().type));
ImGui::Text("VRAM: %.2f MiB", (double)(g_device->getDescription().dedicatedVideoMemory) / (1024.0 * 1024.0)); ImGui::Text("VRAM: %.2f MiB", (double)(g_device->getDescription().dedicatedVideoMemory) / (1024.0 * 1024.0));
ImGui::Text("UMA: %s", g_capabilities.uma ? "Supported" : "Unsupported"); ImGui::Text("UMA: %s", g_capabilities.uma ? "Supported" : "Unsupported");
ImGui::Text("GPU Upload Heap: %s", g_capabilities.gpuUploadHeap ? "Supported" : "Unsupported");
const char* sdlVideoDriver = SDL_GetCurrentVideoDriver(); const char* sdlVideoDriver = SDL_GetCurrentVideoDriver();
if (sdlVideoDriver != nullptr) if (sdlVideoDriver != nullptr)
@ -3019,10 +3030,15 @@ static GuestTexture* CreateTexture(uint32_t width, uint32_t height, uint32_t dep
return texture; return texture;
} }
static RenderHeapType GetBufferHeapType()
{
return g_capabilities.gpuUploadHeap ? RenderHeapType::GPU_UPLOAD : RenderHeapType::DEFAULT;
}
static GuestBuffer* CreateVertexBuffer(uint32_t length) static GuestBuffer* CreateVertexBuffer(uint32_t length)
{ {
auto buffer = g_userHeap.AllocPhysical<GuestBuffer>(ResourceType::VertexBuffer); auto buffer = g_userHeap.AllocPhysical<GuestBuffer>(ResourceType::VertexBuffer);
buffer->buffer = g_device->createBuffer(RenderBufferDesc::VertexBuffer(length, RenderHeapType::DEFAULT, RenderBufferFlag::INDEX)); buffer->buffer = g_device->createBuffer(RenderBufferDesc::VertexBuffer(length, GetBufferHeapType(), RenderBufferFlag::INDEX));
buffer->dataSize = length; buffer->dataSize = length;
#ifdef _DEBUG #ifdef _DEBUG
buffer->buffer->setName(fmt::format("Vertex Buffer {:X}", g_memory.MapVirtual(buffer))); buffer->buffer->setName(fmt::format("Vertex Buffer {:X}", g_memory.MapVirtual(buffer)));
@ -3033,7 +3049,7 @@ static GuestBuffer* CreateVertexBuffer(uint32_t length)
static GuestBuffer* CreateIndexBuffer(uint32_t length, uint32_t, uint32_t format) static GuestBuffer* CreateIndexBuffer(uint32_t length, uint32_t, uint32_t format)
{ {
auto buffer = g_userHeap.AllocPhysical<GuestBuffer>(ResourceType::IndexBuffer); auto buffer = g_userHeap.AllocPhysical<GuestBuffer>(ResourceType::IndexBuffer);
buffer->buffer = g_device->createBuffer(RenderBufferDesc::IndexBuffer(length, RenderHeapType::DEFAULT)); buffer->buffer = g_device->createBuffer(RenderBufferDesc::IndexBuffer(length, GetBufferHeapType()));
buffer->dataSize = length; buffer->dataSize = length;
buffer->format = ConvertFormat(format); buffer->format = ConvertFormat(format);
buffer->guestFormat = format; buffer->guestFormat = format;