mirror of
				https://github.com/hedge-dev/UnleashedRecomp.git
				synced 2025-10-30 07:11:05 +00:00 
			
		
		
		
	GPU Upload Heap & UMA. (#1421)
	
		
			
	
		
	
	
		
	
		
			Some checks are pending
		
		
	
	
		
			
				
	
				validate-internal / build (push) Waiting to run
				
			
		
		
	
	
				
					
				
			
		
			Some checks are pending
		
		
	
	validate-internal / build (push) Waiting to run
				
			* Initial work for GPU upload heap & UMA. * Finish D3D12 Support. * Rework the logic for the GPU Upload Heap fallback. * Only enable UMA on Vulkan on integrated GPUs. * Fix D3D12 fallback condition. --------- Co-authored-by: Dario <dariosamo@gmail.com>
This commit is contained in:
		
							parent
							
								
									1c1dc09006
								
							
						
					
					
						commit
						d15bb7a501
					
				
					 6 changed files with 114 additions and 37 deletions
				
			
		|  | @ -310,7 +310,11 @@ endif() | |||
| if (UNLEASHED_RECOMP_D3D12) | ||||
|     find_package(directx-headers CONFIG REQUIRED) | ||||
|     find_package(directx12-agility CONFIG REQUIRED) | ||||
|     target_compile_definitions(UnleashedRecomp PRIVATE UNLEASHED_RECOMP_D3D12) | ||||
|     target_compile_definitions(UnleashedRecomp PRIVATE  | ||||
|         UNLEASHED_RECOMP_D3D12 | ||||
|         D3D12MA_USING_DIRECTX_HEADERS | ||||
|         D3D12MA_OPTIONS16_SUPPORTED | ||||
|     ) | ||||
| endif() | ||||
| 
 | ||||
| if (CMAKE_SYSTEM_NAME MATCHES "Linux") | ||||
|  |  | |||
|  | @ -442,6 +442,8 @@ namespace plume { | |||
|             return D3D12_HEAP_TYPE_UPLOAD; | ||||
|         case RenderHeapType::READBACK: | ||||
|             return D3D12_HEAP_TYPE_READBACK; | ||||
|         case RenderHeapType::GPU_UPLOAD: | ||||
|             return D3D12_HEAP_TYPE_GPU_UPLOAD; | ||||
|         default: | ||||
|             assert(false && "Unknown heap type."); | ||||
|             return D3D12_HEAP_TYPE_DEFAULT; | ||||
|  | @ -2385,7 +2387,7 @@ namespace plume { | |||
|             range.End = readRange->end; | ||||
|         } | ||||
| 
 | ||||
|         void *outputData; | ||||
|         void *outputData = nullptr; | ||||
|         d3d->Map(subresource, (readRange != nullptr) ? &range : nullptr, &outputData); | ||||
|         return outputData; | ||||
|     } | ||||
|  | @ -2629,14 +2631,22 @@ namespace plume { | |||
| 
 | ||||
|     // D3D12Pool
 | ||||
| 
 | ||||
|     D3D12Pool::D3D12Pool(D3D12Device *device, const RenderPoolDesc &desc) { | ||||
|     D3D12Pool::D3D12Pool(D3D12Device *device, const RenderPoolDesc &desc, bool gpuUploadHeapFallback) { | ||||
|         assert(device != nullptr); | ||||
| 
 | ||||
|         this->device = device; | ||||
|         this->desc = desc; | ||||
| 
 | ||||
|         D3D12MA::POOL_DESC poolDesc = {}; | ||||
|         poolDesc.HeapProperties.Type = toD3D12(desc.heapType); | ||||
| 
 | ||||
|         // When using an UMA architecture without explicit support for GPU Upload heaps, we instead just make a custom heap with the same properties as Upload heaps.
 | ||||
|         if ((desc.heapType == RenderHeapType::GPU_UPLOAD) && gpuUploadHeapFallback) { | ||||
|             poolDesc.HeapProperties = device->d3d->GetCustomHeapProperties(0, D3D12_HEAP_TYPE_UPLOAD); | ||||
|         } | ||||
|         else { | ||||
|             poolDesc.HeapProperties.Type = toD3D12(desc.heapType); | ||||
|         } | ||||
| 
 | ||||
|         poolDesc.MinBlockCount = desc.minBlockCount; | ||||
|         poolDesc.MaxBlockCount = desc.maxBlockCount; | ||||
|         poolDesc.Flags |= desc.useLinearAlgorithm ? D3D12MA::POOL_FLAG_ALGORITHM_LINEAR : D3D12MA::POOL_FLAG_NONE; | ||||
|  | @ -3391,12 +3401,14 @@ namespace plume { | |||
|                 triangleFanSupportOption = d3d12Options15.TriangleFanSupported; | ||||
|             } | ||||
|              | ||||
|             // Check if dynamic depth bias is supported.
 | ||||
|             // Check if dynamic depth bias and GPU upload heap are supported.
 | ||||
|             bool dynamicDepthBiasOption = false; | ||||
|             bool gpuUploadHeapOption = false; | ||||
|             D3D12_FEATURE_DATA_D3D12_OPTIONS16 d3d12Options16 = {}; | ||||
|             res = deviceOption->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS16, &d3d12Options16, sizeof(d3d12Options16)); | ||||
|             if (SUCCEEDED(res)) { | ||||
|                 dynamicDepthBiasOption = d3d12Options16.DynamicDepthBiasSupported; | ||||
|                 gpuUploadHeapOption = d3d12Options16.GPUUploadHeapSupported; | ||||
|             } | ||||
| 
 | ||||
|             // Check if the architecture has UMA.
 | ||||
|  | @ -3431,6 +3443,11 @@ namespace plume { | |||
|                 capabilities.triangleFan = triangleFanSupportOption; | ||||
|                 capabilities.dynamicDepthBias = dynamicDepthBiasOption; | ||||
|                 capabilities.uma = uma; | ||||
| 
 | ||||
|                 // Pretend GPU Upload heaps are supported if UMA is supported, as the backend has a workaround using a custom pool for it.
 | ||||
|                 capabilities.gpuUploadHeap = uma || gpuUploadHeapOption; | ||||
|                 gpuUploadHeapFallback = uma && !gpuUploadHeapOption; | ||||
| 
 | ||||
|                 description.name = deviceName; | ||||
|                 description.dedicatedVideoMemory = adapterDesc.DedicatedVideoMemory; | ||||
|                 description.vendor = RenderDeviceVendor(adapterDesc.VendorId); | ||||
|  | @ -3528,6 +3545,13 @@ namespace plume { | |||
|         colorTargetHeapAllocator = std::make_unique<D3D12DescriptorHeapAllocator>(this, TargetDescriptorHeapSize, D3D12_DESCRIPTOR_HEAP_TYPE_RTV); | ||||
|         depthTargetHeapAllocator = std::make_unique<D3D12DescriptorHeapAllocator>(this, TargetDescriptorHeapSize, D3D12_DESCRIPTOR_HEAP_TYPE_DSV); | ||||
| 
 | ||||
|         // Create the custom upload pool that will be used as the fallback when using an UMA architecture without explicit support for GPU Upload heaps.
 | ||||
|         if (gpuUploadHeapFallback) { | ||||
|             RenderPoolDesc poolDesc; | ||||
|             poolDesc.heapType = RenderHeapType::GPU_UPLOAD; | ||||
|             customUploadPool = std::make_unique<D3D12Pool>(this, poolDesc, true); | ||||
|         } | ||||
| 
 | ||||
|         // Create a command queue only for retrieving the timestamp frequency. Delete it immediately afterwards.
 | ||||
|         std::unique_ptr<D3D12CommandQueue> timestampCommandQueue = std::make_unique<D3D12CommandQueue>(this, RenderCommandListType::DIRECT); | ||||
|         res = timestampCommandQueue->d3d->GetTimestampFrequency(×tampFrequency); | ||||
|  | @ -3577,7 +3601,12 @@ namespace plume { | |||
|     } | ||||
|      | ||||
|     std::unique_ptr<RenderBuffer> D3D12Device::createBuffer(const RenderBufferDesc &desc) { | ||||
|         return std::make_unique<D3D12Buffer>(this, nullptr, desc); | ||||
|         if ((desc.heapType == RenderHeapType::GPU_UPLOAD) && gpuUploadHeapFallback) { | ||||
|             return std::make_unique<D3D12Buffer>(this, customUploadPool.get(), desc); | ||||
|         } | ||||
|         else { | ||||
|             return std::make_unique<D3D12Buffer>(this, nullptr, desc); | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     std::unique_ptr<RenderTexture> D3D12Device::createTexture(const RenderTextureDesc &desc) { | ||||
|  | @ -3589,7 +3618,7 @@ namespace plume { | |||
|     } | ||||
| 
 | ||||
|     std::unique_ptr<RenderPool> D3D12Device::createPool(const RenderPoolDesc &desc) { | ||||
|         return std::make_unique<D3D12Pool>(this, desc); | ||||
|         return std::make_unique<D3D12Pool>(this, desc, gpuUploadHeapFallback); | ||||
|     } | ||||
| 
 | ||||
|     std::unique_ptr<RenderPipelineLayout> D3D12Device::createPipelineLayout(const RenderPipelineLayoutDesc &desc) { | ||||
|  |  | |||
|  | @ -329,7 +329,7 @@ namespace plume { | |||
|         D3D12Device *device = nullptr; | ||||
|         RenderPoolDesc desc; | ||||
| 
 | ||||
|         D3D12Pool(D3D12Device *device, const RenderPoolDesc &desc); | ||||
|         D3D12Pool(D3D12Device *device, const RenderPoolDesc &desc, bool gpuUploadHeapFallback); | ||||
|         ~D3D12Pool() override; | ||||
|         std::unique_ptr<RenderBuffer> createBuffer(const RenderBufferDesc &desc) override; | ||||
|         std::unique_ptr<RenderTexture> createTexture(const RenderTextureDesc &desc) override; | ||||
|  | @ -430,9 +430,11 @@ namespace plume { | |||
|         std::unique_ptr<D3D12DescriptorHeapAllocator> samplerHeapAllocator; | ||||
|         std::unique_ptr<D3D12DescriptorHeapAllocator> colorTargetHeapAllocator; | ||||
|         std::unique_ptr<D3D12DescriptorHeapAllocator> depthTargetHeapAllocator; | ||||
|         std::unique_ptr<D3D12Pool> customUploadPool; | ||||
|         RenderDeviceCapabilities capabilities; | ||||
|         RenderDeviceDescription description; | ||||
|         uint64_t timestampFrequency = 1; | ||||
|         bool gpuUploadHeapFallback = false; | ||||
| 
 | ||||
|         D3D12Device(D3D12Interface *renderInterface, const std::string &preferredDeviceName); | ||||
|         ~D3D12Device() override; | ||||
|  |  | |||
|  | @ -351,7 +351,8 @@ namespace plume { | |||
|         UNKNOWN, | ||||
|         DEFAULT, | ||||
|         UPLOAD, | ||||
|         READBACK | ||||
|         READBACK, | ||||
|         GPU_UPLOAD | ||||
|     }; | ||||
| 
 | ||||
|     enum class RenderTextureArrangement { | ||||
|  | @ -1807,6 +1808,9 @@ namespace plume { | |||
| 
 | ||||
|         // UMA.
 | ||||
|         bool uma = false; | ||||
| 
 | ||||
|         // GPU Upload heap.
 | ||||
|         bool gpuUploadHeap = false; | ||||
|     }; | ||||
| 
 | ||||
|     struct RenderInterfaceCapabilities { | ||||
|  |  | |||
|  | @ -808,6 +808,12 @@ namespace plume { | |||
|             bufferInfo.usage |= VK_BUFFER_USAGE_TRANSFER_DST_BIT; | ||||
|             createInfo.flags |= VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT; | ||||
|             break; | ||||
|         case RenderHeapType::GPU_UPLOAD: | ||||
|             bufferInfo.usage |= VK_BUFFER_USAGE_TRANSFER_SRC_BIT; | ||||
|             bufferInfo.usage |= VK_BUFFER_USAGE_TRANSFER_DST_BIT; | ||||
|             createInfo.flags |= VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT; | ||||
|             createInfo.requiredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; | ||||
|             break; | ||||
|         default: | ||||
|             assert(false && "Unknown heap type."); | ||||
|             break; | ||||
|  | @ -833,7 +839,7 @@ namespace plume { | |||
|         } | ||||
| 
 | ||||
|         if (res != VK_SUCCESS) { | ||||
|             fprintf(stderr, "vkCreateBuffer failed with error code 0x%X.\n", res); | ||||
|             fprintf(stderr, "vmaCreateBuffer failed with error code 0x%X.\n", res); | ||||
|             return; | ||||
|         } | ||||
|     } | ||||
|  | @ -3887,6 +3893,15 @@ namespace plume { | |||
|         VkDeviceSize memoryHeapSize = 0; | ||||
|         const VkPhysicalDeviceMemoryProperties *memoryProps = nullptr; | ||||
|         vmaGetMemoryProperties(allocator, &memoryProps); | ||||
| 
 | ||||
|         constexpr VkMemoryPropertyFlags uploadHeapPropertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; | ||||
|         bool hasHostVisibleDeviceLocalMemory = false; | ||||
|         for (uint32_t i = 0; i < memoryProps->memoryTypeCount; i++) { | ||||
|             if ((memoryProps->memoryTypes[i].propertyFlags & uploadHeapPropertyFlags) == uploadHeapPropertyFlags) { | ||||
|                 hasHostVisibleDeviceLocalMemory = true; | ||||
|             } | ||||
|         } | ||||
| 
 | ||||
|         for (uint32_t i = 0; i < memoryProps->memoryHeapCount; i++) { | ||||
|             if (memoryProps->memoryHeaps[i].flags & VK_MEMORY_HEAP_DEVICE_LOCAL_BIT) { | ||||
|                 memoryHeapSize = std::max(memoryProps->memoryHeaps[i].size, memoryHeapSize); | ||||
|  | @ -3907,6 +3922,8 @@ namespace plume { | |||
|         capabilities.preferHDR = memoryHeapSize > (512 * 1024 * 1024); | ||||
|         capabilities.triangleFan = true; | ||||
|         capabilities.dynamicDepthBias = true; | ||||
|         capabilities.uma = (description.type == RenderDeviceType::INTEGRATED) && hasHostVisibleDeviceLocalMemory; | ||||
|         capabilities.gpuUploadHeap = capabilities.uma; | ||||
| 
 | ||||
|         // Fill Vulkan-only capabilities.
 | ||||
|         loadStoreOpNoneSupported = supportedOptionalExtensions.find(VK_EXT_LOAD_STORE_OP_NONE_EXTENSION_NAME) != supportedOptionalExtensions.end(); | ||||
|  |  | |||
|  | @ -2114,40 +2114,54 @@ static void* LockVertexBuffer(GuestBuffer* buffer, uint32_t, uint32_t, uint32_t | |||
|     return LockBuffer(buffer, flags); | ||||
| } | ||||
| 
 | ||||
| static std::atomic<uint32_t> g_bufferUploadCount = 0; | ||||
| 
 | ||||
| template<typename T> | ||||
| static void UnlockBuffer(GuestBuffer* buffer, bool useCopyQueue) | ||||
| { | ||||
|     auto uploadBuffer = g_device->createBuffer(RenderBufferDesc::UploadBuffer(buffer->dataSize)); | ||||
|     auto copyBuffer = [&](T* dest) | ||||
|         { | ||||
|             auto src = reinterpret_cast<const T*>(buffer->mappedMemory); | ||||
| 
 | ||||
|     auto dest = reinterpret_cast<T*>(uploadBuffer->map()); | ||||
|     auto src = reinterpret_cast<const T*>(buffer->mappedMemory); | ||||
| 
 | ||||
|     for (size_t i = 0; i < buffer->dataSize; i += sizeof(T)) | ||||
|     { | ||||
|         *dest = ByteSwap(*src); | ||||
|         ++dest; | ||||
|         ++src; | ||||
|     } | ||||
| 
 | ||||
|     uploadBuffer->unmap(); | ||||
| 
 | ||||
|     if (useCopyQueue) | ||||
|     { | ||||
|         ExecuteCopyCommandList([&] | ||||
|             for (size_t i = 0; i < buffer->dataSize; i += sizeof(T)) | ||||
|             { | ||||
|                 g_copyCommandList->copyBufferRegion(buffer->buffer->at(0), uploadBuffer->at(0), buffer->dataSize); | ||||
|             }); | ||||
|                 *dest = ByteSwap(*src); | ||||
|                 ++dest; | ||||
|                 ++src; | ||||
|             } | ||||
|         }; | ||||
| 
 | ||||
|     if (useCopyQueue && g_capabilities.gpuUploadHeap) | ||||
|     { | ||||
|         copyBuffer(reinterpret_cast<T*>(buffer->buffer->map())); | ||||
|         buffer->buffer->unmap(); | ||||
|     } | ||||
|     else | ||||
|     { | ||||
|         auto& commandList = g_commandLists[g_frame]; | ||||
|         auto uploadBuffer = g_device->createBuffer(RenderBufferDesc::UploadBuffer(buffer->dataSize)); | ||||
|         copyBuffer(reinterpret_cast<T*>(uploadBuffer->map())); | ||||
|         uploadBuffer->unmap(); | ||||
| 
 | ||||
|         commandList->barriers(RenderBarrierStage::COPY, RenderBufferBarrier(buffer->buffer.get(), RenderBufferAccess::WRITE)); | ||||
|         commandList->copyBufferRegion(buffer->buffer->at(0), uploadBuffer->at(0), buffer->dataSize); | ||||
|         commandList->barriers(RenderBarrierStage::GRAPHICS, RenderBufferBarrier(buffer->buffer.get(), RenderBufferAccess::READ)); | ||||
|         if (useCopyQueue) | ||||
|         { | ||||
|             ExecuteCopyCommandList([&] | ||||
|                 { | ||||
|                     g_copyCommandList->copyBufferRegion(buffer->buffer->at(0), uploadBuffer->at(0), buffer->dataSize); | ||||
|                 }); | ||||
|         } | ||||
|         else | ||||
|         { | ||||
|             auto& commandList = g_commandLists[g_frame]; | ||||
| 
 | ||||
|         g_tempBuffers[g_frame].emplace_back(std::move(uploadBuffer)); | ||||
|             commandList->barriers(RenderBarrierStage::COPY, RenderBufferBarrier(buffer->buffer.get(), RenderBufferAccess::WRITE)); | ||||
|             commandList->copyBufferRegion(buffer->buffer->at(0), uploadBuffer->at(0), buffer->dataSize); | ||||
|             commandList->barriers(RenderBarrierStage::GRAPHICS, RenderBufferBarrier(buffer->buffer.get(), RenderBufferAccess::READ)); | ||||
| 
 | ||||
|             g_tempBuffers[g_frame].emplace_back(std::move(uploadBuffer)); | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     g_bufferUploadCount++; | ||||
| } | ||||
| 
 | ||||
| template<typename T> | ||||
|  | @ -2329,6 +2343,7 @@ static void DrawProfiler() | |||
|         ImGui::Text("Heap Allocated: %d MB", int32_t(diagnostics.allocated / (1024 * 1024))); | ||||
|         ImGui::Text("Physical Heap Allocated: %d MB", int32_t(physicalDiagnostics.allocated / (1024 * 1024))); | ||||
|         ImGui::Text("GPU Waits: %d", int32_t(g_waitForGPUCount)); | ||||
|         ImGui::Text("Buffer Uploads: %d", int32_t(g_bufferUploadCount)); | ||||
|         ImGui::NewLine(); | ||||
| 
 | ||||
|         ImGui::Text("Present Wait: %s", g_capabilities.presentWait ? "Supported" : "Unsupported"); | ||||
|  | @ -2344,6 +2359,7 @@ static void DrawProfiler() | |||
|         ImGui::Text("Device Type: %s", DeviceTypeName(g_device->getDescription().type)); | ||||
|         ImGui::Text("VRAM: %.2f MiB", (double)(g_device->getDescription().dedicatedVideoMemory) / (1024.0 * 1024.0)); | ||||
|         ImGui::Text("UMA: %s", g_capabilities.uma ? "Supported" : "Unsupported"); | ||||
|         ImGui::Text("GPU Upload Heap: %s", g_capabilities.gpuUploadHeap ? "Supported" : "Unsupported"); | ||||
| 
 | ||||
|         const char* sdlVideoDriver = SDL_GetCurrentVideoDriver(); | ||||
|         if (sdlVideoDriver != nullptr) | ||||
|  | @ -3024,10 +3040,15 @@ static GuestTexture* CreateTexture(uint32_t width, uint32_t height, uint32_t dep | |||
|     return texture; | ||||
| } | ||||
| 
 | ||||
| static RenderHeapType GetBufferHeapType() | ||||
| { | ||||
|     return g_capabilities.gpuUploadHeap ? RenderHeapType::GPU_UPLOAD : RenderHeapType::DEFAULT; | ||||
| } | ||||
| 
 | ||||
| static GuestBuffer* CreateVertexBuffer(uint32_t length)  | ||||
| { | ||||
|     auto buffer = g_userHeap.AllocPhysical<GuestBuffer>(ResourceType::VertexBuffer); | ||||
|     buffer->buffer = g_device->createBuffer(RenderBufferDesc::VertexBuffer(length, RenderHeapType::DEFAULT, RenderBufferFlag::INDEX)); | ||||
|     buffer->buffer = g_device->createBuffer(RenderBufferDesc::VertexBuffer(length, GetBufferHeapType(), RenderBufferFlag::INDEX)); | ||||
|     buffer->dataSize = length; | ||||
| #ifdef _DEBUG  | ||||
|     buffer->buffer->setName(fmt::format("Vertex Buffer {:X}", g_memory.MapVirtual(buffer))); | ||||
|  | @ -3038,7 +3059,7 @@ static GuestBuffer* CreateVertexBuffer(uint32_t length) | |||
| static GuestBuffer* CreateIndexBuffer(uint32_t length, uint32_t, uint32_t format) | ||||
| { | ||||
|     auto buffer = g_userHeap.AllocPhysical<GuestBuffer>(ResourceType::IndexBuffer); | ||||
|     buffer->buffer = g_device->createBuffer(RenderBufferDesc::IndexBuffer(length, RenderHeapType::DEFAULT)); | ||||
|     buffer->buffer = g_device->createBuffer(RenderBufferDesc::IndexBuffer(length, GetBufferHeapType())); | ||||
|     buffer->dataSize = length; | ||||
|     buffer->format = ConvertFormat(format); | ||||
|     buffer->guestFormat = format; | ||||
|  |  | |||
		Loading…
	
	Add table
		
		Reference in a new issue
	
	 Skyth (Asilkan)
						Skyth (Asilkan)