From 07fe67092fa9329f9d0f24ead17d7d267510290c Mon Sep 17 00:00:00 2001 From: Skyth <19259897+blueskythlikesclouds@users.noreply.github.com> Date: Tue, 18 Mar 2025 19:26:59 +0300 Subject: [PATCH] Initial work for GPU upload heap & UMA. --- UnleashedRecomp/CMakeLists.txt | 6 +- UnleashedRecomp/gpu/rhi/plume_d3d12.cpp | 7 +- .../gpu/rhi/plume_render_interface_types.h | 6 +- UnleashedRecomp/gpu/rhi/plume_vulkan.cpp | 7 ++ UnleashedRecomp/gpu/video.cpp | 66 ++++++++++++------- 5 files changed, 64 insertions(+), 28 deletions(-) diff --git a/UnleashedRecomp/CMakeLists.txt b/UnleashedRecomp/CMakeLists.txt index 66b60a44..7a6c2774 100644 --- a/UnleashedRecomp/CMakeLists.txt +++ b/UnleashedRecomp/CMakeLists.txt @@ -310,7 +310,11 @@ endif() if (UNLEASHED_RECOMP_D3D12) find_package(directx-headers CONFIG REQUIRED) find_package(directx12-agility CONFIG REQUIRED) - target_compile_definitions(UnleashedRecomp PRIVATE UNLEASHED_RECOMP_D3D12) + target_compile_definitions(UnleashedRecomp PRIVATE + UNLEASHED_RECOMP_D3D12 + D3D12MA_USING_DIRECTX_HEADERS + D3D12MA_OPTIONS16_SUPPORTED + ) endif() if (CMAKE_SYSTEM_NAME MATCHES "Linux") diff --git a/UnleashedRecomp/gpu/rhi/plume_d3d12.cpp b/UnleashedRecomp/gpu/rhi/plume_d3d12.cpp index 395630c2..1ac7dfc4 100644 --- a/UnleashedRecomp/gpu/rhi/plume_d3d12.cpp +++ b/UnleashedRecomp/gpu/rhi/plume_d3d12.cpp @@ -442,6 +442,8 @@ namespace plume { return D3D12_HEAP_TYPE_UPLOAD; case RenderHeapType::READBACK: return D3D12_HEAP_TYPE_READBACK; + case RenderHeapType::GPU_UPLOAD: + return D3D12_HEAP_TYPE_GPU_UPLOAD; default: assert(false && "Unknown heap type."); return D3D12_HEAP_TYPE_DEFAULT; @@ -3391,12 +3393,14 @@ namespace plume { triangleFanSupportOption = d3d12Options15.TriangleFanSupported; } - // Check if dynamic depth bias is supported. + // Check if dynamic depth bias and GPU upload heap are supported. bool dynamicDepthBiasOption = false; + bool gpuUploadHeapOption = false; D3D12_FEATURE_DATA_D3D12_OPTIONS16 d3d12Options16 = {}; res = deviceOption->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS16, &d3d12Options16, sizeof(d3d12Options16)); if (SUCCEEDED(res)) { dynamicDepthBiasOption = d3d12Options16.DynamicDepthBiasSupported; + gpuUploadHeapOption = d3d12Options16.GPUUploadHeapSupported; } // Check if the architecture has UMA. @@ -3431,6 +3435,7 @@ namespace plume { capabilities.triangleFan = triangleFanSupportOption; capabilities.dynamicDepthBias = dynamicDepthBiasOption; capabilities.uma = uma; + capabilities.gpuUploadHeap = gpuUploadHeapOption; description.name = deviceName; description.dedicatedVideoMemory = adapterDesc.DedicatedVideoMemory; description.vendor = RenderDeviceVendor(adapterDesc.VendorId); diff --git a/UnleashedRecomp/gpu/rhi/plume_render_interface_types.h b/UnleashedRecomp/gpu/rhi/plume_render_interface_types.h index b0be1592..568160a8 100644 --- a/UnleashedRecomp/gpu/rhi/plume_render_interface_types.h +++ b/UnleashedRecomp/gpu/rhi/plume_render_interface_types.h @@ -351,7 +351,8 @@ namespace plume { UNKNOWN, DEFAULT, UPLOAD, - READBACK + READBACK, + GPU_UPLOAD }; enum class RenderTextureArrangement { @@ -1807,6 +1808,9 @@ namespace plume { // UMA. bool uma = false; + + // GPU Upload heap. + bool gpuUploadHeap = false; }; struct RenderInterfaceCapabilities { diff --git a/UnleashedRecomp/gpu/rhi/plume_vulkan.cpp b/UnleashedRecomp/gpu/rhi/plume_vulkan.cpp index 477a431a..7269ad05 100644 --- a/UnleashedRecomp/gpu/rhi/plume_vulkan.cpp +++ b/UnleashedRecomp/gpu/rhi/plume_vulkan.cpp @@ -808,6 +808,12 @@ namespace plume { bufferInfo.usage |= VK_BUFFER_USAGE_TRANSFER_DST_BIT; createInfo.flags |= VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT; break; + case RenderHeapType::GPU_UPLOAD: + bufferInfo.usage |= VK_BUFFER_USAGE_TRANSFER_SRC_BIT; + bufferInfo.usage |= VK_BUFFER_USAGE_TRANSFER_DST_BIT; + createInfo.flags |= VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT; + createInfo.requiredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; + break; default: assert(false && "Unknown heap type."); break; @@ -3907,6 +3913,7 @@ namespace plume { capabilities.preferHDR = memoryHeapSize > (512 * 1024 * 1024); capabilities.triangleFan = true; capabilities.dynamicDepthBias = true; + capabilities.gpuUploadHeap = true; // TODO: Do a test buffer allocation with the required flags to set this. // Fill Vulkan-only capabilities. loadStoreOpNoneSupported = supportedOptionalExtensions.find(VK_EXT_LOAD_STORE_OP_NONE_EXTENSION_NAME) != supportedOptionalExtensions.end(); diff --git a/UnleashedRecomp/gpu/video.cpp b/UnleashedRecomp/gpu/video.cpp index e24ce4de..4dfa2e7e 100644 --- a/UnleashedRecomp/gpu/video.cpp +++ b/UnleashedRecomp/gpu/video.cpp @@ -2112,36 +2112,46 @@ static void* LockVertexBuffer(GuestBuffer* buffer, uint32_t, uint32_t, uint32_t template static void UnlockBuffer(GuestBuffer* buffer, bool useCopyQueue) { - auto uploadBuffer = g_device->createBuffer(RenderBufferDesc::UploadBuffer(buffer->dataSize)); + auto copyBuffer = [&](T* dest) + { + auto src = reinterpret_cast(buffer->mappedMemory); - auto dest = reinterpret_cast(uploadBuffer->map()); - auto src = reinterpret_cast(buffer->mappedMemory); - - for (size_t i = 0; i < buffer->dataSize; i += sizeof(T)) - { - *dest = ByteSwap(*src); - ++dest; - ++src; - } - - uploadBuffer->unmap(); - - if (useCopyQueue) - { - ExecuteCopyCommandList([&] + for (size_t i = 0; i < buffer->dataSize; i += sizeof(T)) { - g_copyCommandList->copyBufferRegion(buffer->buffer->at(0), uploadBuffer->at(0), buffer->dataSize); - }); + *dest = ByteSwap(*src); + ++dest; + ++src; + } + }; + + if (useCopyQueue && (g_capabilities.uma || g_capabilities.gpuUploadHeap)) + { + copyBuffer(reinterpret_cast(buffer->buffer->map())); + buffer->buffer->unmap(); } else { - auto& commandList = g_commandLists[g_frame]; + auto uploadBuffer = g_device->createBuffer(RenderBufferDesc::UploadBuffer(buffer->dataSize)); + copyBuffer(reinterpret_cast(uploadBuffer->map())); + uploadBuffer->unmap(); - commandList->barriers(RenderBarrierStage::COPY, RenderBufferBarrier(buffer->buffer.get(), RenderBufferAccess::WRITE)); - commandList->copyBufferRegion(buffer->buffer->at(0), uploadBuffer->at(0), buffer->dataSize); - commandList->barriers(RenderBarrierStage::GRAPHICS, RenderBufferBarrier(buffer->buffer.get(), RenderBufferAccess::READ)); + if (useCopyQueue) + { + ExecuteCopyCommandList([&] + { + g_copyCommandList->copyBufferRegion(buffer->buffer->at(0), uploadBuffer->at(0), buffer->dataSize); + }); + } + else + { + auto& commandList = g_commandLists[g_frame]; - g_tempBuffers[g_frame].emplace_back(std::move(uploadBuffer)); + commandList->barriers(RenderBarrierStage::COPY, RenderBufferBarrier(buffer->buffer.get(), RenderBufferAccess::WRITE)); + commandList->copyBufferRegion(buffer->buffer->at(0), uploadBuffer->at(0), buffer->dataSize); + commandList->barriers(RenderBarrierStage::GRAPHICS, RenderBufferBarrier(buffer->buffer.get(), RenderBufferAccess::READ)); + + g_tempBuffers[g_frame].emplace_back(std::move(uploadBuffer)); + } } } @@ -2339,6 +2349,7 @@ static void DrawProfiler() ImGui::Text("Device Type: %s", DeviceTypeName(g_device->getDescription().type)); ImGui::Text("VRAM: %.2f MiB", (double)(g_device->getDescription().dedicatedVideoMemory) / (1024.0 * 1024.0)); ImGui::Text("UMA: %s", g_capabilities.uma ? "Supported" : "Unsupported"); + ImGui::Text("GPU Upload Heap: %s", g_capabilities.gpuUploadHeap ? "Supported" : "Unsupported"); const char* sdlVideoDriver = SDL_GetCurrentVideoDriver(); if (sdlVideoDriver != nullptr) @@ -3019,10 +3030,15 @@ static GuestTexture* CreateTexture(uint32_t width, uint32_t height, uint32_t dep return texture; } +static RenderHeapType GetBufferHeapType() +{ + return g_capabilities.gpuUploadHeap ? RenderHeapType::GPU_UPLOAD : RenderHeapType::DEFAULT; +} + static GuestBuffer* CreateVertexBuffer(uint32_t length) { auto buffer = g_userHeap.AllocPhysical(ResourceType::VertexBuffer); - buffer->buffer = g_device->createBuffer(RenderBufferDesc::VertexBuffer(length, RenderHeapType::DEFAULT, RenderBufferFlag::INDEX)); + buffer->buffer = g_device->createBuffer(RenderBufferDesc::VertexBuffer(length, GetBufferHeapType(), RenderBufferFlag::INDEX)); buffer->dataSize = length; #ifdef _DEBUG buffer->buffer->setName(fmt::format("Vertex Buffer {:X}", g_memory.MapVirtual(buffer))); @@ -3033,7 +3049,7 @@ static GuestBuffer* CreateVertexBuffer(uint32_t length) static GuestBuffer* CreateIndexBuffer(uint32_t length, uint32_t, uint32_t format) { auto buffer = g_userHeap.AllocPhysical(ResourceType::IndexBuffer); - buffer->buffer = g_device->createBuffer(RenderBufferDesc::IndexBuffer(length, RenderHeapType::DEFAULT)); + buffer->buffer = g_device->createBuffer(RenderBufferDesc::IndexBuffer(length, GetBufferHeapType())); buffer->dataSize = length; buffer->format = ConvertFormat(format); buffer->guestFormat = format;