From 24830e132667d1725c3e813f9df238879acbea06 Mon Sep 17 00:00:00 2001 From: Skyth <19259897+blueskythlikesclouds@users.noreply.github.com> Date: Sun, 6 Oct 2024 20:12:17 +0300 Subject: [PATCH] Initial graphics implementation. --- .gitmodules | 3 + UnleashedRecomp/CMakeLists.txt | 17 +- UnleashedRecomp/gpu/Window.cpp | 18 +- UnleashedRecomp/gpu/Window.h | 4 +- UnleashedRecomp/gpu/rhi/rt64_d3d12.cpp | 3688 +++++++++++++++++ UnleashedRecomp/gpu/rhi/rt64_d3d12.h | 450 ++ .../gpu/rhi/rt64_render_interface.h | 241 ++ .../gpu/rhi/rt64_render_interface_builders.h | 278 ++ .../gpu/rhi/rt64_render_interface_types.h | 1748 ++++++++ UnleashedRecomp/gpu/video.cpp | 2192 +++++++++- UnleashedRecomp/gpu/video.h | 338 +- UnleashedRecomp/main.cpp | 23 +- UnleashedRecomp/misc_impl.cpp | 33 +- UnleashedRecomp/ppc/config/SWA.toml | 17 +- UnleashedRecomp/stdafx.h | 1 + thirdparty/D3D12MemoryAllocator | 1 + thirdparty/PowerRecomp | 2 +- thirdparty/ddspp | 1 + 18 files changed, 9001 insertions(+), 54 deletions(-) create mode 100644 UnleashedRecomp/gpu/rhi/rt64_d3d12.cpp create mode 100644 UnleashedRecomp/gpu/rhi/rt64_d3d12.h create mode 100644 UnleashedRecomp/gpu/rhi/rt64_render_interface.h create mode 100644 UnleashedRecomp/gpu/rhi/rt64_render_interface_builders.h create mode 100644 UnleashedRecomp/gpu/rhi/rt64_render_interface_types.h create mode 160000 thirdparty/D3D12MemoryAllocator create mode 160000 thirdparty/ddspp diff --git a/.gitmodules b/.gitmodules index 5d35850..cd2eacb 100644 --- a/.gitmodules +++ b/.gitmodules @@ -8,3 +8,6 @@ [submodule "thirdparty/unordered_dense"] path = thirdparty/unordered_dense url = https://github.com/martinus/unordered_dense.git +[submodule "thirdparty/D3D12MemoryAllocator"] + path = thirdparty/D3D12MemoryAllocator + url = https://github.com/GPUOpen-LibrariesAndSDKs/D3D12MemoryAllocator.git diff --git a/UnleashedRecomp/CMakeLists.txt b/UnleashedRecomp/CMakeLists.txt index 71729da..5acc354 100644 --- a/UnleashedRecomp/CMakeLists.txt +++ b/UnleashedRecomp/CMakeLists.txt @@ -6,13 +6,14 @@ add_compile_definitions(SWA_IMPL) add_compile_definitions(SDL_MAIN_HANDLED) # Microsoft wtf? -add_compile_definitions(_DISABLE_CONSTEXPR_MUTEX_CONSTRUCTOR ) +add_compile_definitions( + _DISABLE_CONSTEXPR_MUTEX_CONSTRUCTOR + _HAS_EXCEPTIONS=0) add_compile_options( - "/D_HAS_EXCEPTIONS=0" "/fp:strict" "/GS-" - "/EHa-" + "/EHs-c-" "-march=sandybridge" "-fno-strict-aliasing") @@ -41,6 +42,7 @@ set(SWA_CPU_CXX_SOURCES set(SWA_GPU_CXX_SOURCES "gpu/window.cpp" "gpu/video.cpp" + "gpu/rhi/rt64_d3d12.cpp" ) set(SWA_APU_CXX_SOURCES @@ -76,8 +78,15 @@ target_link_libraries(UnleashedRecomp PUBLIC winmm ntdll comctl32 + d3d12 + dxgi ) -target_include_directories(UnleashedRecomp PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}) +target_include_directories(UnleashedRecomp PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR} + ${SWA_THIRDPARTY_ROOT}/ddspp + ${SWA_THIRDPARTY_ROOT}/D3D12MemoryAllocator/include + ${SWA_THIRDPARTY_ROOT}/D3D12MemoryAllocator/src) + target_precompile_headers(UnleashedRecomp PUBLIC ${SWA_PRECOMPILED_HEADERS}) target_compile_definitions(PowerRecomp PRIVATE CONFIG_FILE_PATH=\"${CMAKE_CURRENT_SOURCE_DIR}/ppc/config/SWA.toml\") diff --git a/UnleashedRecomp/gpu/Window.cpp b/UnleashedRecomp/gpu/Window.cpp index 1bddcd5..071225b 100644 --- a/UnleashedRecomp/gpu/Window.cpp +++ b/UnleashedRecomp/gpu/Window.cpp @@ -1,9 +1,7 @@ #include "window.h" #include #include - -SDL_Window* Window::s_window = nullptr; -void* Window::s_windowHandle = nullptr; +#include void Window::Init() { @@ -15,5 +13,19 @@ void Window::Init() SDL_EventState(SDL_SYSWMEVENT, SDL_ENABLE); s_window = SDL_CreateWindow(title, SDL_WINDOWPOS_CENTERED, SDL_WINDOWPOS_CENTERED, 1280, 720, SDL_WINDOW_RESIZABLE); + + SDL_SysWMinfo info; + SDL_VERSION(&info.version); + SDL_GetWindowWMInfo(s_window, &info); + s_windowHandle = info.info.win.window; } +// CApplication::Update +PPC_FUNC_IMPL(__imp__sub_822C1130); +PPC_FUNC(sub_822C1130) +{ + SDL_PumpEvents(); + SDL_FlushEvents(SDL_FIRSTEVENT, SDL_LASTEVENT); + + __imp__sub_822C1130(ctx, base); +} diff --git a/UnleashedRecomp/gpu/Window.h b/UnleashedRecomp/gpu/Window.h index a045738..f21c386 100644 --- a/UnleashedRecomp/gpu/Window.h +++ b/UnleashedRecomp/gpu/Window.h @@ -3,8 +3,8 @@ struct Window { - static SDL_Window* s_window; - static void* s_windowHandle; + static inline SDL_Window* s_window; + static inline HWND s_windowHandle; static void Init(); }; diff --git a/UnleashedRecomp/gpu/rhi/rt64_d3d12.cpp b/UnleashedRecomp/gpu/rhi/rt64_d3d12.cpp new file mode 100644 index 0000000..a755df0 --- /dev/null +++ b/UnleashedRecomp/gpu/rhi/rt64_d3d12.cpp @@ -0,0 +1,3688 @@ +// +// RT64 +// + +#include "rt64_d3d12.h" + +#include + +#ifdef __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wtautological-undefined-compare" +#pragma clang diagnostic ignored "-Wswitch" +#endif + +#include "D3D12MemAlloc.cpp" + +#ifdef __clang__ +#pragma clang diagnostic pop +#endif + +#ifndef NDEBUG +# define D3D12_DEBUG_LAYER_ENABLED +# define D3D12_DEBUG_LAYER_BREAK_ON_ERROR true +# define D3D12_DEBUG_LAYER_BREAK_ON_WARNING false +# define D3D12_DEBUG_LAYER_SUPRESS_SAMPLE_POSITIONS_ERROR // Supress error message that's been fixed in newer Agility SDK versions. +#endif + +// Old Windows SDK versions don't provide this macro, so we workaround it by making sure it is defined. +#ifndef D3D12_RESOURCE_STATE_ALL_SHADER_RESOURCE +#define D3D12_RESOURCE_STATE_ALL_SHADER_RESOURCE (D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE | D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE) +#endif + +namespace RT64 { + static const uint32_t ShaderDescriptorHeapSize = 65536; + static const uint32_t SamplerDescriptorHeapSize = 1024; + static const uint32_t TargetDescriptorHeapSize = 16384; + + // Common functions. + + static std::wstring Utf8ToUtf16(const std::string_view& value) { + std::wstring wideStr; + wideStr.resize(MultiByteToWideChar(CP_UTF8, 0, value.data(), value.size(), nullptr, 0)); + MultiByteToWideChar(CP_UTF8, 0, value.data(), value.size(), wideStr.data(), wideStr.size()); + return wideStr; + } + + static std::string Utf16ToUtf8(const std::wstring_view& value) { + std::string multiByteStr; + multiByteStr.resize(WideCharToMultiByte(CP_UTF8, 0, value.data(), value.size(), nullptr, 0, nullptr, FALSE)); + WideCharToMultiByte(CP_UTF8, 0, value.data(), value.size(), multiByteStr.data(), multiByteStr.size(), nullptr, FALSE); + return multiByteStr; + } + + static uint32_t roundUp(uint32_t value, uint32_t powerOf2Alignment) { + return (value + powerOf2Alignment - 1) & ~(powerOf2Alignment - 1); + } + + static uint64_t roundUp(uint64_t value, uint64_t powerOf2Alignment) { + return (value + powerOf2Alignment - 1) & ~(powerOf2Alignment - 1); + } + + static DXGI_FORMAT toDXGI(RenderFormat format) { + switch (format) { + case RenderFormat::UNKNOWN: + return DXGI_FORMAT_UNKNOWN; + case RenderFormat::R32G32B32A32_TYPELESS: + return DXGI_FORMAT_R32G32B32A32_TYPELESS; + case RenderFormat::R32G32B32A32_FLOAT: + return DXGI_FORMAT_R32G32B32A32_FLOAT; + case RenderFormat::R32G32B32A32_UINT: + return DXGI_FORMAT_R32G32B32A32_UINT; + case RenderFormat::R32G32B32A32_SINT: + return DXGI_FORMAT_R32G32B32A32_SINT; + case RenderFormat::R32G32B32_TYPELESS: + return DXGI_FORMAT_R32G32B32_TYPELESS; + case RenderFormat::R32G32B32_FLOAT: + return DXGI_FORMAT_R32G32B32_FLOAT; + case RenderFormat::R32G32B32_UINT: + return DXGI_FORMAT_R32G32B32_UINT; + case RenderFormat::R32G32B32_SINT: + return DXGI_FORMAT_R32G32B32_SINT; + case RenderFormat::R16G16B16A16_TYPELESS: + return DXGI_FORMAT_R16G16B16A16_TYPELESS; + case RenderFormat::R16G16B16A16_FLOAT: + return DXGI_FORMAT_R16G16B16A16_FLOAT; + case RenderFormat::R16G16B16A16_UNORM: + return DXGI_FORMAT_R16G16B16A16_UNORM; + case RenderFormat::R16G16B16A16_UINT: + return DXGI_FORMAT_R16G16B16A16_UINT; + case RenderFormat::R16G16B16A16_SNORM: + return DXGI_FORMAT_R16G16B16A16_SNORM; + case RenderFormat::R16G16B16A16_SINT: + return DXGI_FORMAT_R16G16B16A16_SINT; + case RenderFormat::R32G32_TYPELESS: + return DXGI_FORMAT_R32G32_TYPELESS; + case RenderFormat::R32G32_FLOAT: + return DXGI_FORMAT_R32G32_FLOAT; + case RenderFormat::R32G32_UINT: + return DXGI_FORMAT_R32G32_UINT; + case RenderFormat::R32G32_SINT: + return DXGI_FORMAT_R32G32_SINT; + case RenderFormat::R8G8B8A8_TYPELESS: + return DXGI_FORMAT_R8G8B8A8_TYPELESS; + case RenderFormat::R8G8B8A8_UNORM: + return DXGI_FORMAT_R8G8B8A8_UNORM; + case RenderFormat::R8G8B8A8_UINT: + return DXGI_FORMAT_R8G8B8A8_UINT; + case RenderFormat::R8G8B8A8_SNORM: + return DXGI_FORMAT_R8G8B8A8_SNORM; + case RenderFormat::R8G8B8A8_SINT: + return DXGI_FORMAT_R8G8B8A8_SINT; + case RenderFormat::B8G8R8A8_UNORM: + return DXGI_FORMAT_B8G8R8A8_UNORM; + case RenderFormat::R16G16_TYPELESS: + return DXGI_FORMAT_R16G16_TYPELESS; + case RenderFormat::R16G16_FLOAT: + return DXGI_FORMAT_R16G16_FLOAT; + case RenderFormat::R16G16_UNORM: + return DXGI_FORMAT_R16G16_UNORM; + case RenderFormat::R16G16_UINT: + return DXGI_FORMAT_R16G16_UINT; + case RenderFormat::R16G16_SNORM: + return DXGI_FORMAT_R16G16_SNORM; + case RenderFormat::R16G16_SINT: + return DXGI_FORMAT_R16G16_SINT; + case RenderFormat::R32_TYPELESS: + return DXGI_FORMAT_R32_TYPELESS; + case RenderFormat::D32_FLOAT: + return DXGI_FORMAT_D32_FLOAT; + case RenderFormat::R32_FLOAT: + return DXGI_FORMAT_R32_FLOAT; + case RenderFormat::R32_UINT: + return DXGI_FORMAT_R32_UINT; + case RenderFormat::R32_SINT: + return DXGI_FORMAT_R32_SINT; + case RenderFormat::R8G8_TYPELESS: + return DXGI_FORMAT_R8G8_TYPELESS; + case RenderFormat::R8G8_UNORM: + return DXGI_FORMAT_R8G8_UNORM; + case RenderFormat::R8G8_UINT: + return DXGI_FORMAT_R8G8_UINT; + case RenderFormat::R8G8_SNORM: + return DXGI_FORMAT_R8G8_SNORM; + case RenderFormat::R8G8_SINT: + return DXGI_FORMAT_R8G8_SINT; + case RenderFormat::R16_TYPELESS: + return DXGI_FORMAT_R16_TYPELESS; + case RenderFormat::R16_FLOAT: + return DXGI_FORMAT_R16_FLOAT; + case RenderFormat::D16_UNORM: + return DXGI_FORMAT_D16_UNORM; + case RenderFormat::R16_UNORM: + return DXGI_FORMAT_R16_UNORM; + case RenderFormat::R16_UINT: + return DXGI_FORMAT_R16_UINT; + case RenderFormat::R16_SNORM: + return DXGI_FORMAT_R16_SNORM; + case RenderFormat::R16_SINT: + return DXGI_FORMAT_R16_SINT; + case RenderFormat::R8_TYPELESS: + return DXGI_FORMAT_R8_TYPELESS; + case RenderFormat::R8_UNORM: + return DXGI_FORMAT_R8_UNORM; + case RenderFormat::R8_UINT: + return DXGI_FORMAT_R8_UINT; + case RenderFormat::R8_SNORM: + return DXGI_FORMAT_R8_SNORM; + case RenderFormat::R8_SINT: + return DXGI_FORMAT_R8_SINT; + case RenderFormat::BC1_TYPELESS: + return DXGI_FORMAT_BC1_TYPELESS; + case RenderFormat::BC1_UNORM: + return DXGI_FORMAT_BC1_UNORM; + case RenderFormat::BC1_UNORM_SRGB: + return DXGI_FORMAT_BC1_UNORM_SRGB; + case RenderFormat::BC2_TYPELESS: + return DXGI_FORMAT_BC2_TYPELESS; + case RenderFormat::BC2_UNORM: + return DXGI_FORMAT_BC2_UNORM; + case RenderFormat::BC2_UNORM_SRGB: + return DXGI_FORMAT_BC2_UNORM_SRGB; + case RenderFormat::BC3_TYPELESS: + return DXGI_FORMAT_BC3_TYPELESS; + case RenderFormat::BC3_UNORM: + return DXGI_FORMAT_BC3_UNORM; + case RenderFormat::BC3_UNORM_SRGB: + return DXGI_FORMAT_BC3_UNORM_SRGB; + case RenderFormat::BC4_TYPELESS: + return DXGI_FORMAT_BC4_TYPELESS; + case RenderFormat::BC4_UNORM: + return DXGI_FORMAT_BC4_UNORM; + case RenderFormat::BC4_SNORM: + return DXGI_FORMAT_BC4_SNORM; + case RenderFormat::BC5_TYPELESS: + return DXGI_FORMAT_BC5_TYPELESS; + case RenderFormat::BC5_UNORM: + return DXGI_FORMAT_BC5_UNORM; + case RenderFormat::BC5_SNORM: + return DXGI_FORMAT_BC5_SNORM; + case RenderFormat::BC6H_TYPELESS: + return DXGI_FORMAT_BC6H_TYPELESS; + case RenderFormat::BC6H_UF16: + return DXGI_FORMAT_BC6H_UF16; + case RenderFormat::BC6H_SF16: + return DXGI_FORMAT_BC6H_SF16; + case RenderFormat::BC7_TYPELESS: + return DXGI_FORMAT_BC7_TYPELESS; + case RenderFormat::BC7_UNORM: + return DXGI_FORMAT_BC7_UNORM; + case RenderFormat::BC7_UNORM_SRGB: + return DXGI_FORMAT_BC7_UNORM_SRGB; + default: + assert(false && "Unknown format."); + return DXGI_FORMAT_FORCE_UINT; + } + } + + static D3D12_BLEND toD3D12(RenderBlend blend) { + switch (blend) { + case RenderBlend::ZERO: + return D3D12_BLEND_ZERO; + case RenderBlend::ONE: + return D3D12_BLEND_ONE; + case RenderBlend::SRC_COLOR: + return D3D12_BLEND_SRC_COLOR; + case RenderBlend::INV_SRC_COLOR: + return D3D12_BLEND_INV_SRC_COLOR; + case RenderBlend::SRC_ALPHA: + return D3D12_BLEND_SRC_ALPHA; + case RenderBlend::INV_SRC_ALPHA: + return D3D12_BLEND_INV_SRC_ALPHA; + case RenderBlend::DEST_ALPHA: + return D3D12_BLEND_DEST_ALPHA; + case RenderBlend::INV_DEST_ALPHA: + return D3D12_BLEND_INV_DEST_ALPHA; + case RenderBlend::DEST_COLOR: + return D3D12_BLEND_DEST_COLOR; + case RenderBlend::INV_DEST_COLOR: + return D3D12_BLEND_INV_DEST_COLOR; + case RenderBlend::SRC_ALPHA_SAT: + return D3D12_BLEND_SRC_ALPHA_SAT; + case RenderBlend::BLEND_FACTOR: + return D3D12_BLEND_BLEND_FACTOR; + case RenderBlend::INV_BLEND_FACTOR: + return D3D12_BLEND_INV_BLEND_FACTOR; + case RenderBlend::SRC1_COLOR: + return D3D12_BLEND_SRC1_COLOR; + case RenderBlend::INV_SRC1_COLOR: + return D3D12_BLEND_INV_SRC1_COLOR; + case RenderBlend::SRC1_ALPHA: + return D3D12_BLEND_SRC1_ALPHA; + case RenderBlend::INV_SRC1_ALPHA: + return D3D12_BLEND_INV_SRC1_ALPHA; + default: + assert(false && "Unknown blend."); + return D3D12_BLEND_ZERO; + } + } + + static D3D12_BLEND_OP toD3D12(RenderBlendOperation operation) { + switch (operation) { + case RenderBlendOperation::ADD: + return D3D12_BLEND_OP_ADD; + case RenderBlendOperation::SUBTRACT: + return D3D12_BLEND_OP_SUBTRACT; + case RenderBlendOperation::REV_SUBTRACT: + return D3D12_BLEND_OP_REV_SUBTRACT; + case RenderBlendOperation::MIN: + return D3D12_BLEND_OP_MIN; + case RenderBlendOperation::MAX: + return D3D12_BLEND_OP_MAX; + default: + assert(false && "Unknown blend operation."); + return D3D12_BLEND_OP_ADD; + } + } + + static D3D12_COLOR_WRITE_ENABLE toD3D12(RenderColorWriteEnable enable) { + return D3D12_COLOR_WRITE_ENABLE( + ((uint32_t(enable) & uint32_t(RenderColorWriteEnable::RED)) ? D3D12_COLOR_WRITE_ENABLE_RED : 0x0) | + ((uint32_t(enable) & uint32_t(RenderColorWriteEnable::GREEN)) ? D3D12_COLOR_WRITE_ENABLE_GREEN : 0x0) | + ((uint32_t(enable) & uint32_t(RenderColorWriteEnable::BLUE)) ? D3D12_COLOR_WRITE_ENABLE_BLUE : 0x0) | + ((uint32_t(enable) & uint32_t(RenderColorWriteEnable::ALPHA)) ? D3D12_COLOR_WRITE_ENABLE_ALPHA : 0x0) + ); + } + + static D3D12_LOGIC_OP toD3D12(RenderLogicOperation operation) { + switch (operation) { + case RenderLogicOperation::CLEAR: + return D3D12_LOGIC_OP_CLEAR; + case RenderLogicOperation::SET: + return D3D12_LOGIC_OP_SET; + case RenderLogicOperation::COPY: + return D3D12_LOGIC_OP_COPY; + case RenderLogicOperation::COPY_INVERTED: + return D3D12_LOGIC_OP_COPY_INVERTED; + case RenderLogicOperation::NOOP: + return D3D12_LOGIC_OP_NOOP; + case RenderLogicOperation::INVERT: + return D3D12_LOGIC_OP_INVERT; + case RenderLogicOperation::AND: + return D3D12_LOGIC_OP_AND; + case RenderLogicOperation::NAND: + return D3D12_LOGIC_OP_NAND; + case RenderLogicOperation::OR: + return D3D12_LOGIC_OP_OR; + case RenderLogicOperation::NOR: + return D3D12_LOGIC_OP_NOR; + case RenderLogicOperation::XOR: + return D3D12_LOGIC_OP_XOR; + case RenderLogicOperation::EQUIV: + return D3D12_LOGIC_OP_EQUIV; + case RenderLogicOperation::AND_REVERSE: + return D3D12_LOGIC_OP_AND_REVERSE; + case RenderLogicOperation::AND_INVERTED: + return D3D12_LOGIC_OP_AND_INVERTED; + case RenderLogicOperation::OR_REVERSE: + return D3D12_LOGIC_OP_OR_REVERSE; + case RenderLogicOperation::OR_INVERTED: + return D3D12_LOGIC_OP_OR_INVERTED; + default: + assert(false && "Unknown logic operation."); + return D3D12_LOGIC_OP_CLEAR; + } + } + + static D3D12_FILTER toFilter(RenderFilter minFilter, RenderFilter magFilter, RenderMipmapMode mipmapMode, bool anisotropyEnabled, bool comparisonEnabled) { + assert(minFilter != RenderFilter::UNKNOWN); + assert(magFilter != RenderFilter::UNKNOWN); + assert(mipmapMode != RenderMipmapMode::UNKNOWN); + + if (anisotropyEnabled) { + return comparisonEnabled ? D3D12_FILTER_COMPARISON_ANISOTROPIC : D3D12_FILTER_ANISOTROPIC; + } + else { + uint32_t filterInt = 0; + filterInt |= (mipmapMode == RenderMipmapMode::LINEAR) ? 0x1 : 0x0; + filterInt |= (magFilter == RenderFilter::LINEAR) ? 0x4 : 0x0; + filterInt |= (minFilter == RenderFilter::LINEAR) ? 0x10 : 0x0; + filterInt |= comparisonEnabled ? 0x80 : 0x0; + return D3D12_FILTER(filterInt); + } + } + + static D3D12_TEXTURE_ADDRESS_MODE toD3D12(RenderTextureAddressMode addressMode) { + switch (addressMode) { + case RenderTextureAddressMode::WRAP: + return D3D12_TEXTURE_ADDRESS_MODE_WRAP; + case RenderTextureAddressMode::MIRROR: + return D3D12_TEXTURE_ADDRESS_MODE_MIRROR; + case RenderTextureAddressMode::CLAMP: + return D3D12_TEXTURE_ADDRESS_MODE_CLAMP; + case RenderTextureAddressMode::BORDER: + return D3D12_TEXTURE_ADDRESS_MODE_BORDER; + case RenderTextureAddressMode::MIRROR_ONCE: + return D3D12_TEXTURE_ADDRESS_MODE_MIRROR_ONCE; + default: + assert(false && "Unknown texture address mode."); + return D3D12_TEXTURE_ADDRESS_MODE_WRAP; + } + } + + static D3D12_STATIC_BORDER_COLOR toStaticBorderColor(RenderBorderColor borderColor) { + switch (borderColor) { + case RenderBorderColor::TRANSPARENT_BLACK: + return D3D12_STATIC_BORDER_COLOR_TRANSPARENT_BLACK; + case RenderBorderColor::OPAQUE_BLACK: + return D3D12_STATIC_BORDER_COLOR_OPAQUE_BLACK; + case RenderBorderColor::OPAQUE_WHITE: + return D3D12_STATIC_BORDER_COLOR_OPAQUE_WHITE; + default: + assert(false && "Unknown static border color."); + return D3D12_STATIC_BORDER_COLOR_TRANSPARENT_BLACK; + } + } + + static D3D12_SHADER_VISIBILITY toD3D12(RenderShaderVisibility visibility) { + switch (visibility) { + case RenderShaderVisibility::ALL: + return D3D12_SHADER_VISIBILITY_ALL; + case RenderShaderVisibility::VERTEX: + return D3D12_SHADER_VISIBILITY_VERTEX; + case RenderShaderVisibility::GEOMETRY: + return D3D12_SHADER_VISIBILITY_GEOMETRY; + case RenderShaderVisibility::PIXEL: + return D3D12_SHADER_VISIBILITY_PIXEL; + default: + assert(false && "Unknown shader visibility."); + return D3D12_SHADER_VISIBILITY_ALL; + } + } + + static D3D12_INPUT_CLASSIFICATION toD3D12(RenderInputSlotClassification classification) { + switch (classification) { + case RenderInputSlotClassification::PER_VERTEX_DATA: + return D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA; + case RenderInputSlotClassification::PER_INSTANCE_DATA: + return D3D12_INPUT_CLASSIFICATION_PER_INSTANCE_DATA; + default: + assert(false && "Unknown input classification."); + return D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA; + } + } + + static D3D12_DESCRIPTOR_RANGE_TYPE toRangeType(RenderDescriptorRangeType type) { + switch (type) { + case RenderDescriptorRangeType::FORMATTED_BUFFER: + case RenderDescriptorRangeType::TEXTURE: + case RenderDescriptorRangeType::STRUCTURED_BUFFER: + case RenderDescriptorRangeType::BYTE_ADDRESS_BUFFER: + case RenderDescriptorRangeType::ACCELERATION_STRUCTURE: + return D3D12_DESCRIPTOR_RANGE_TYPE_SRV; + case RenderDescriptorRangeType::READ_WRITE_FORMATTED_BUFFER: + case RenderDescriptorRangeType::READ_WRITE_TEXTURE: + case RenderDescriptorRangeType::READ_WRITE_STRUCTURED_BUFFER: + case RenderDescriptorRangeType::READ_WRITE_BYTE_ADDRESS_BUFFER: + return D3D12_DESCRIPTOR_RANGE_TYPE_UAV; + case RenderDescriptorRangeType::CONSTANT_BUFFER: + return D3D12_DESCRIPTOR_RANGE_TYPE_CBV; + case RenderDescriptorRangeType::SAMPLER: + return D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER; + default: + assert(false && "Unknown descriptor range type."); + return D3D12_DESCRIPTOR_RANGE_TYPE_SRV; + } + } + + static D3D12_HEAP_TYPE toD3D12(RenderHeapType type) { + switch (type) { + case RenderHeapType::DEFAULT: + return D3D12_HEAP_TYPE_DEFAULT; + case RenderHeapType::UPLOAD: + return D3D12_HEAP_TYPE_UPLOAD; + case RenderHeapType::READBACK: + return D3D12_HEAP_TYPE_READBACK; + default: + assert(false && "Unknown heap type."); + return D3D12_HEAP_TYPE_DEFAULT; + } + } + + static D3D12_COMPARISON_FUNC toD3D12(RenderComparisonFunction function) { + switch (function) { + case RenderComparisonFunction::NEVER: + return D3D12_COMPARISON_FUNC_NEVER; + case RenderComparisonFunction::LESS: + return D3D12_COMPARISON_FUNC_LESS; + case RenderComparisonFunction::EQUAL: + return D3D12_COMPARISON_FUNC_EQUAL; + case RenderComparisonFunction::LESS_EQUAL: + return D3D12_COMPARISON_FUNC_LESS_EQUAL; + case RenderComparisonFunction::GREATER: + return D3D12_COMPARISON_FUNC_GREATER; + case RenderComparisonFunction::NOT_EQUAL: + return D3D12_COMPARISON_FUNC_NOT_EQUAL; + case RenderComparisonFunction::GREATER_EQUAL: + return D3D12_COMPARISON_FUNC_GREATER_EQUAL; + case RenderComparisonFunction::ALWAYS: + return D3D12_COMPARISON_FUNC_ALWAYS; + default: + assert(false && "Unknown comparison function."); + return D3D12_COMPARISON_FUNC_NEVER; + } + } + + static D3D12_PRIMITIVE_TOPOLOGY toD3D12(RenderPrimitiveTopology topology) { + switch (topology) { + case RenderPrimitiveTopology::POINT_LIST: + return D3D_PRIMITIVE_TOPOLOGY_POINTLIST; + case RenderPrimitiveTopology::LINE_LIST: + return D3D_PRIMITIVE_TOPOLOGY_LINELIST; + case RenderPrimitiveTopology::LINE_STRIP: + return D3D_PRIMITIVE_TOPOLOGY_LINESTRIP; + case RenderPrimitiveTopology::TRIANGLE_LIST: + return D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST; + case RenderPrimitiveTopology::TRIANGLE_STRIP: + return D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP; + default: + assert(false && "Unknown primitive topology."); + return D3D_PRIMITIVE_TOPOLOGY_UNDEFINED; + } + } + + static D3D12_PRIMITIVE_TOPOLOGY_TYPE toTopologyType(RenderPrimitiveTopology topologyType) { + switch (topologyType) { + case RenderPrimitiveTopology::POINT_LIST: + return D3D12_PRIMITIVE_TOPOLOGY_TYPE_POINT; + case RenderPrimitiveTopology::LINE_LIST: + case RenderPrimitiveTopology::LINE_STRIP: + return D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE; + case RenderPrimitiveTopology::TRIANGLE_LIST: + case RenderPrimitiveTopology::TRIANGLE_STRIP: + return D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; + default: + assert(false && "Unknown primitive topology type."); + return D3D12_PRIMITIVE_TOPOLOGY_TYPE_UNDEFINED; + } + } + + static D3D12_RESOURCE_DIMENSION toD3D12(RenderTextureDimension dimension) { + switch (dimension) { + case RenderTextureDimension::UNKNOWN: + return D3D12_RESOURCE_DIMENSION_UNKNOWN; + case RenderTextureDimension::TEXTURE_1D: + return D3D12_RESOURCE_DIMENSION_TEXTURE1D; + case RenderTextureDimension::TEXTURE_2D: + return D3D12_RESOURCE_DIMENSION_TEXTURE2D; + case RenderTextureDimension::TEXTURE_3D: + return D3D12_RESOURCE_DIMENSION_TEXTURE3D; + default: + assert(false && "Unknown resource dimension."); + return D3D12_RESOURCE_DIMENSION_UNKNOWN; + } + } + + static D3D12_TEXTURE_LAYOUT toD3D12(RenderTextureArrangement arrangement) { + switch (arrangement) { + case RenderTextureArrangement::UNKNOWN: + return D3D12_TEXTURE_LAYOUT_UNKNOWN; + case RenderTextureArrangement::ROW_MAJOR: + return D3D12_TEXTURE_LAYOUT_ROW_MAJOR; + default: + assert(false && "Unknown texture arrangement."); + return D3D12_TEXTURE_LAYOUT_UNKNOWN; + } + } + + static D3D12_RESOURCE_STATES toBufferState(RenderBarrierStages stages, RenderBufferAccessBits accessBits, RenderBufferFlags bufferFlags) { + // The only allowed state for acceleration structures. + if (bufferFlags & RenderBufferFlag::ACCELERATION_STRUCTURE) { + return D3D12_RESOURCE_STATE_RAYTRACING_ACCELERATION_STRUCTURE; + } + + // Use copy-optimized states. + if (stages == RenderBarrierStage::COPY) { + if (accessBits == RenderBufferAccess::WRITE) { + return D3D12_RESOURCE_STATE_COPY_DEST; + } + else if (accessBits == RenderBufferAccess::READ) { + return D3D12_RESOURCE_STATE_COPY_SOURCE; + } + } + + // Use unordered access state if the buffer supports it and writing is enabled. + if ((accessBits & RenderBufferAccess::WRITE) && (bufferFlags & RenderBufferFlag::UNORDERED_ACCESS)) { + return D3D12_RESOURCE_STATE_UNORDERED_ACCESS; + } + + // If both stages are required and the buffer is read-only, use the all shader resource state. + if (stages == (RenderBarrierStage::GRAPHICS | RenderBarrierStage::COMPUTE)) { + if (accessBits == RenderBufferAccess::READ) { + return D3D12_RESOURCE_STATE_ALL_SHADER_RESOURCE; + } + } + + // Use graphics pipeline states. + if (stages == RenderBarrierStage::GRAPHICS) { + if (accessBits == RenderBufferAccess::READ) { + if (bufferFlags & (RenderBufferFlag::VERTEX | RenderBufferFlag::CONSTANT)) { + return D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER; + } + + if (bufferFlags & RenderBufferFlag::INDEX) { + return D3D12_RESOURCE_STATE_INDEX_BUFFER; + } + + return D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE; + } + } + + // Fall back to common state. + return D3D12_RESOURCE_STATE_COMMON; + } + + static D3D12_RESOURCE_STATES toTextureState(RenderBarrierStages stages, RenderTextureLayout textureLayout, RenderTextureFlags textureFlags) { + switch (textureLayout) { + case RenderTextureLayout::GENERAL: + return (textureFlags & RenderTextureFlag::UNORDERED_ACCESS) ? D3D12_RESOURCE_STATE_UNORDERED_ACCESS : D3D12_RESOURCE_STATE_COMMON; + case RenderTextureLayout::SHADER_READ: + switch (stages) { + case RenderBarrierStage::GRAPHICS: + return D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE; + case RenderBarrierStage::COMPUTE: + return D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE; + default: + return D3D12_RESOURCE_STATE_ALL_SHADER_RESOURCE; + } + case RenderTextureLayout::COLOR_WRITE: + return D3D12_RESOURCE_STATE_RENDER_TARGET; + case RenderTextureLayout::DEPTH_WRITE: + return D3D12_RESOURCE_STATE_DEPTH_WRITE; + case RenderTextureLayout::DEPTH_READ: + return D3D12_RESOURCE_STATE_ALL_SHADER_RESOURCE | D3D12_RESOURCE_STATE_DEPTH_READ; + case RenderTextureLayout::COPY_SOURCE: + return D3D12_RESOURCE_STATE_COPY_SOURCE; + case RenderTextureLayout::COPY_DEST: + return D3D12_RESOURCE_STATE_COPY_DEST; + case RenderTextureLayout::RESOLVE_SOURCE: + return D3D12_RESOURCE_STATE_RESOLVE_SOURCE; + case RenderTextureLayout::RESOLVE_DEST: + return D3D12_RESOURCE_STATE_RESOLVE_DEST; + case RenderTextureLayout::PRESENT: + return D3D12_RESOURCE_STATE_PRESENT; + default: + assert(false && "Unknown texture layout."); + return D3D12_RESOURCE_STATE_COMMON; + } + } + + static D3D12_TEXTURE_COPY_LOCATION toD3D12(const RenderTextureCopyLocation &location) { + D3D12_TEXTURE_COPY_LOCATION loc; + switch (location.type) { + case RenderTextureCopyType::SUBRESOURCE: { + const D3D12Texture *interfaceTexture = static_cast(location.texture); + loc.pResource = (interfaceTexture != nullptr) ? interfaceTexture->d3d : nullptr; + loc.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; + loc.SubresourceIndex = location.subresource.index; + break; + } + case RenderTextureCopyType::PLACED_FOOTPRINT: { + const D3D12Buffer *interfaceBuffer = static_cast(location.buffer); + const uint32_t blockWidth = RenderFormatBlockWidth(location.placedFootprint.format); + const uint32_t blockCount = (location.placedFootprint.rowWidth + blockWidth - 1) / blockWidth; + loc.pResource = (interfaceBuffer != nullptr) ? interfaceBuffer->d3d : nullptr; + loc.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; + loc.PlacedFootprint.Offset = location.placedFootprint.offset; + loc.PlacedFootprint.Footprint.Format = toDXGI(location.placedFootprint.format); + loc.PlacedFootprint.Footprint.Width = ((location.placedFootprint.width + blockWidth - 1) / blockWidth) * blockWidth; + loc.PlacedFootprint.Footprint.Height = ((location.placedFootprint.height + blockWidth - 1) / blockWidth) * blockWidth; + loc.PlacedFootprint.Footprint.Depth = location.placedFootprint.depth; + loc.PlacedFootprint.Footprint.RowPitch = blockCount * RenderFormatSize(location.placedFootprint.format); + + // Test for conditions that might not be reported if the hardware doesn't complain about them. + assert(((loc.PlacedFootprint.Offset % D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT) == 0) && "Resulting offset must be aligned to 512 bytes in D3D12."); + assert(((loc.PlacedFootprint.Footprint.RowPitch % D3D12_TEXTURE_DATA_PITCH_ALIGNMENT) == 0) && "Resulting row pitch must be aligned to 256 bytes in D3D12."); + + break; + } + default: { + assert(false && "Unknown texture copy type."); + } + } + + return loc; + } + + static D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAGS toRTASBuildFlags(bool preferFastBuild, bool preferFastTrace) { + D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAGS flags = D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_NONE; + flags |= preferFastBuild ? D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_PREFER_FAST_BUILD : D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_NONE; + flags |= preferFastTrace ? D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_PREFER_FAST_TRACE : D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_NONE; + return flags; + } + + static void setObjectName(ID3D12Object *object, const std::string &name) { + const std::wstring wideCharName = Utf8ToUtf16(name); + object->SetName(wideCharName.c_str()); + } + + // D3D12DescriptorHeapAllocator + + D3D12DescriptorHeapAllocator::D3D12DescriptorHeapAllocator(D3D12Device *device, uint32_t heapSize, D3D12_DESCRIPTOR_HEAP_TYPE heapType) { + assert(device != nullptr); + assert(heapSize > 0); + + this->device = device; + this->heapSize = heapSize; + this->freeSize = heapSize; + + D3D12_DESCRIPTOR_HEAP_DESC heapDesc = {}; + heapDesc.NumDescriptors = heapSize; + heapDesc.Type = heapType; + descriptorHandleIncrement = device->d3d->GetDescriptorHandleIncrementSize(heapDesc.Type); + + HRESULT res = device->d3d->CreateDescriptorHeap(&heapDesc, IID_PPV_ARGS(&hostHeap)); + if (FAILED(res)) { + fprintf(stderr, "CreateDescriptorHeap failed with error code 0x%lX.\n", res); + return; + } + + hostCPUDescriptorHandle = hostHeap->GetCPUDescriptorHandleForHeapStart(); + + const bool shaderVisible = (heapType == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV) || (heapType == D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER); + if (shaderVisible) { + heapDesc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE; + res = device->d3d->CreateDescriptorHeap(&heapDesc, IID_PPV_ARGS(&shaderHeap)); + if (FAILED(res)) { + fprintf(stderr, "CreateDescriptorHeap failed with error code 0x%lX.\n", res); + return; + } + + shaderCPUDescriptorHandle = shaderHeap->GetCPUDescriptorHandleForHeapStart(); + shaderGPUDescriptorHandle = shaderHeap->GetGPUDescriptorHandleForHeapStart(); + } + + addFreeBlock(0, heapSize); + } + + D3D12DescriptorHeapAllocator::~D3D12DescriptorHeapAllocator() { + if (hostHeap != nullptr) { + hostHeap->Release(); + } + + if (shaderHeap != nullptr) { + shaderHeap->Release(); + } + } + + void D3D12DescriptorHeapAllocator::addFreeBlock(uint32_t offset, uint32_t size) { + OffsetFreeBlockMap::iterator blockOffsetIt = offsetFreeBlockMap.emplace(offset, size).first; + SizeFreeBlockMap::iterator blockSizeIt = sizeFreeBlockMap.emplace(size, blockOffsetIt); + blockOffsetIt->second.sizeMapIterator = blockSizeIt; + } + + uint32_t D3D12DescriptorHeapAllocator::allocate(uint32_t size) { + const std::scoped_lock lock(allocationMutex); + if (freeSize < size) { + return INVALID_OFFSET; + } + + SizeFreeBlockMap::iterator blockSizeIt = sizeFreeBlockMap.lower_bound(size); + if (blockSizeIt == sizeFreeBlockMap.end()) { + return INVALID_OFFSET; + } + + OffsetFreeBlockMap::iterator blockOffsetIt = blockSizeIt->second; + uint32_t retOffset = blockOffsetIt->first; + uint32_t newOffset = retOffset + size; + uint32_t newSize = blockOffsetIt->second.size - size; + sizeFreeBlockMap.erase(blockSizeIt); + offsetFreeBlockMap.erase(blockOffsetIt); + if (newSize > 0) { + addFreeBlock(newOffset, newSize); + } + + freeSize -= size; + return retOffset; + } + + void D3D12DescriptorHeapAllocator::free(uint32_t offset, uint32_t size) { + const std::scoped_lock lock(allocationMutex); + OffsetFreeBlockMap::iterator nextBlockIt = offsetFreeBlockMap.upper_bound(offset); + OffsetFreeBlockMap::iterator prevBlockIt = nextBlockIt; + if (prevBlockIt != offsetFreeBlockMap.begin()) { + prevBlockIt--; + } + else { + prevBlockIt = offsetFreeBlockMap.end(); + } + + freeSize += size; + + // The previous free block is contiguous. + if ((prevBlockIt != offsetFreeBlockMap.end()) && (offset == (prevBlockIt->first + prevBlockIt->second.size))) { + size = prevBlockIt->second.size + size; + offset = prevBlockIt->first; + sizeFreeBlockMap.erase(prevBlockIt->second.sizeMapIterator); + offsetFreeBlockMap.erase(prevBlockIt); + } + + // The next free block is contiguous. + if ((nextBlockIt != offsetFreeBlockMap.end()) && ((offset + size) == nextBlockIt->first)) { + size = size + nextBlockIt->second.size; + sizeFreeBlockMap.erase(nextBlockIt->second.sizeMapIterator); + offsetFreeBlockMap.erase(nextBlockIt); + } + + addFreeBlock(offset, size); + } + + D3D12_CPU_DESCRIPTOR_HANDLE D3D12DescriptorHeapAllocator::getHostCPUHandleAt(uint32_t index) const { + assert(index < heapSize); + assert(hostCPUDescriptorHandle.ptr > 0); + return { hostCPUDescriptorHandle.ptr + uint64_t(index) * descriptorHandleIncrement }; + } + + D3D12_CPU_DESCRIPTOR_HANDLE D3D12DescriptorHeapAllocator::getShaderCPUHandleAt(uint32_t index) const { + assert(index < heapSize); + assert(shaderCPUDescriptorHandle.ptr > 0); + return { shaderCPUDescriptorHandle.ptr + uint64_t(index) * descriptorHandleIncrement }; + } + + D3D12_GPU_DESCRIPTOR_HANDLE D3D12DescriptorHeapAllocator::getShaderGPUHandleAt(uint32_t index) const { + assert(index < heapSize); + assert(shaderGPUDescriptorHandle.ptr > 0); + return { shaderGPUDescriptorHandle.ptr + uint64_t(index) * descriptorHandleIncrement }; + } + + // D3D12DescriptorSet + + D3D12DescriptorSet::D3D12DescriptorSet(D3D12Device *device, const RenderDescriptorSetDesc &desc) { + assert(device != nullptr); + + this->device = device; + + // Figure out the total amount of entries that will be required. + uint32_t rangeCount = desc.descriptorRangesCount; + uint32_t viewDescriptorCount = 0; + uint32_t samplerDescriptorCount = 0; + auto addDescriptor = [&](const RenderDescriptorRange &range, uint32_t descriptorCount) { + descriptorTypes.emplace_back(range.type); + + bool isDynamicSampler = (range.type == RenderDescriptorRangeType::SAMPLER) && (range.immutableSampler == nullptr); + if (isDynamicSampler) { + descriptorHeapIndices.emplace_back(samplerDescriptorCount); + samplerDescriptorCount += descriptorCount; + } + else { + descriptorHeapIndices.emplace_back(viewDescriptorCount); + viewDescriptorCount += descriptorCount; + } + }; + + if (desc.lastRangeIsBoundless) { + assert((desc.descriptorRangesCount > 0) && "There must be at least one descriptor set to define the last range as boundless."); + rangeCount--; + } + + for (uint32_t i = 0; i < rangeCount; i++) { + const RenderDescriptorRange &range = desc.descriptorRanges[i]; + for (uint32_t j = 0; j < range.count; j++) { + addDescriptor(range, 1); + } + } + + if (desc.lastRangeIsBoundless) { + const RenderDescriptorRange &lastDescriptorRange = desc.descriptorRanges[desc.descriptorRangesCount - 1]; + addDescriptor(lastDescriptorRange, desc.boundlessRangeSize); + } + + if (!descriptorTypes.empty()) { + descriptorTypeMaxIndex = uint32_t(descriptorTypes.size()) - 1; + } + + if (viewDescriptorCount > 0) { + viewAllocation.offset = device->viewHeapAllocator->allocate(viewDescriptorCount); + if (viewAllocation.offset == D3D12DescriptorHeapAllocator::INVALID_OFFSET) { + fprintf(stderr, "Allocator was unable to find free space for the set."); + return; + } + + viewAllocation.count = viewDescriptorCount; + } + + if (samplerDescriptorCount > 0) { + samplerAllocation.offset = device->samplerHeapAllocator->allocate(samplerDescriptorCount); + if (samplerAllocation.offset == D3D12DescriptorHeapAllocator::INVALID_OFFSET) { + fprintf(stderr, "Allocator was unable to find free space for the set."); + return; + } + + samplerAllocation.count = samplerDescriptorCount; + } + } + + D3D12DescriptorSet::~D3D12DescriptorSet() { + if (viewAllocation.count > 0) { + device->viewHeapAllocator->free(viewAllocation.offset, viewAllocation.count); + } + + if (samplerAllocation.count > 0) { + device->samplerHeapAllocator->free(samplerAllocation.offset, samplerAllocation.count); + } + } + + void D3D12DescriptorSet::setBuffer(uint32_t descriptorIndex, const RenderBuffer *buffer, uint64_t bufferSize, const RenderBufferStructuredView *bufferStructuredView, const RenderBufferFormattedView *bufferFormattedView) { + const D3D12Buffer *interfaceBuffer = static_cast(buffer); + ID3D12Resource *nativeResource = (interfaceBuffer != nullptr) ? interfaceBuffer->d3d : nullptr; + uint32_t descriptorIndexClamped = std::min(descriptorIndex, descriptorTypeMaxIndex); + RenderDescriptorRangeType descriptorType = descriptorTypes[descriptorIndexClamped]; + switch (descriptorType) { + case RenderDescriptorRangeType::CONSTANT_BUFFER: { + uint64_t bufferViewSize = bufferSize; + if ((bufferSize == 0) && (interfaceBuffer != nullptr)) { + bufferViewSize = interfaceBuffer->desc.size; + } + + setCBV(descriptorIndex, nativeResource, bufferViewSize); + break; + } + case RenderDescriptorRangeType::FORMATTED_BUFFER: { + assert((bufferStructuredView == nullptr) && "Can't use structured view on texture buffers."); + if (nativeResource != nullptr) { + assert((bufferFormattedView != nullptr) && "A view must be provided for formatted buffers."); + const D3D12BufferFormattedView *interfaceBufferFormattedView = static_cast(bufferFormattedView); + D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; + srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + srvDesc.ViewDimension = D3D12_SRV_DIMENSION_BUFFER; + srvDesc.Format = toDXGI(interfaceBufferFormattedView->format); + srvDesc.Buffer.Flags = (descriptorType == RenderDescriptorRangeType::BYTE_ADDRESS_BUFFER) ? D3D12_BUFFER_SRV_FLAG_RAW : D3D12_BUFFER_SRV_FLAG_NONE; + + // Figure out the number of elements from the format. + const uint64_t bufferViewSize = (bufferSize > 0) ? bufferSize : interfaceBuffer->desc.size; + srvDesc.Buffer.NumElements = UINT(bufferViewSize / RenderFormatSize(interfaceBufferFormattedView->format)); + setSRV(descriptorIndex, nativeResource, &srvDesc); + } + else { + setSRV(descriptorIndex, nullptr, nullptr); + } + + break; + } + case RenderDescriptorRangeType::STRUCTURED_BUFFER: + case RenderDescriptorRangeType::BYTE_ADDRESS_BUFFER: { + assert((bufferFormattedView == nullptr) && "Can't use formatted view on byte or structured buffers."); + if (nativeResource != nullptr) { + D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; + srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + srvDesc.ViewDimension = D3D12_SRV_DIMENSION_BUFFER; + + const uint64_t bufferViewSize = (bufferSize > 0) ? bufferSize : interfaceBuffer->desc.size; + if (descriptorType == RenderDescriptorRangeType::BYTE_ADDRESS_BUFFER) { + srvDesc.Format = DXGI_FORMAT_R32_TYPELESS; + srvDesc.Buffer.NumElements = UINT(bufferViewSize / 4); + srvDesc.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_RAW; + } + else { + assert((bufferStructuredView != nullptr) && "A view must be provided for structured buffers."); + assert(bufferStructuredView->structureByteStride > 0); + srvDesc.Buffer.FirstElement = bufferStructuredView->firstElement; + srvDesc.Buffer.StructureByteStride = bufferStructuredView->structureByteStride; + srvDesc.Buffer.NumElements = UINT(bufferViewSize / bufferStructuredView->structureByteStride); + } + + setSRV(descriptorIndex, nativeResource, &srvDesc); + } + else { + setSRV(descriptorIndex, nullptr, nullptr); + } + + break; + } + case RenderDescriptorRangeType::READ_WRITE_FORMATTED_BUFFER: { + assert((bufferStructuredView == nullptr) && "Can't use structured view on texture buffers."); + if (nativeResource != nullptr) { + assert((bufferFormattedView != nullptr) && "A view must be provided for formatted buffers."); + const D3D12BufferFormattedView *interfaceBufferFormatView = static_cast(bufferFormattedView); + D3D12_UNORDERED_ACCESS_VIEW_DESC uavDesc = {}; + uavDesc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER; + uavDesc.Format = toDXGI(interfaceBufferFormatView->format); + uavDesc.Buffer.Flags = (descriptorType == RenderDescriptorRangeType::READ_WRITE_BYTE_ADDRESS_BUFFER) ? D3D12_BUFFER_UAV_FLAG_RAW : D3D12_BUFFER_UAV_FLAG_NONE; + + // Figure out the number of elements from the format. + const uint64_t bufferViewSize = (bufferSize > 0) ? bufferSize : interfaceBuffer->desc.size; + uavDesc.Buffer.NumElements = UINT(bufferViewSize / RenderFormatSize(interfaceBufferFormatView->format)); + setUAV(descriptorIndex, nativeResource, &uavDesc); + } + else { + setUAV(descriptorIndex, nullptr, nullptr); + } + + break; + } + case RenderDescriptorRangeType::READ_WRITE_STRUCTURED_BUFFER: + case RenderDescriptorRangeType::READ_WRITE_BYTE_ADDRESS_BUFFER: { + assert((bufferFormattedView == nullptr) && "Can't use formatted view on byte or structured buffers."); + if (nativeResource != nullptr) { + D3D12_UNORDERED_ACCESS_VIEW_DESC uavDesc = {}; + uavDesc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER; + + const uint64_t bufferViewSize = (bufferSize > 0) ? bufferSize : interfaceBuffer->desc.size; + if (descriptorType == RenderDescriptorRangeType::READ_WRITE_BYTE_ADDRESS_BUFFER) { + uavDesc.Format = DXGI_FORMAT_R32_TYPELESS; + uavDesc.Buffer.NumElements = UINT(bufferViewSize / 4); + uavDesc.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_RAW; + } + else { + assert((bufferStructuredView != nullptr) && "A view must be provided for structured buffers."); + assert(bufferStructuredView->structureByteStride > 0); + uavDesc.Buffer.FirstElement = bufferStructuredView->firstElement; + uavDesc.Buffer.StructureByteStride = bufferStructuredView->structureByteStride; + uavDesc.Buffer.NumElements = UINT(bufferViewSize / bufferStructuredView->structureByteStride); + } + + setUAV(descriptorIndex, nativeResource, &uavDesc); + } + else { + setUAV(descriptorIndex, nullptr, nullptr); + } + + break; + } + case RenderDescriptorRangeType::TEXTURE: + case RenderDescriptorRangeType::READ_WRITE_TEXTURE: + case RenderDescriptorRangeType::SAMPLER: + case RenderDescriptorRangeType::ACCELERATION_STRUCTURE: + assert(false && "Incompatible descriptor type."); + break; + default: + assert(false && "Unknown descriptor type."); + break; + } + } + + void D3D12DescriptorSet::setTexture(uint32_t descriptorIndex, const RenderTexture *texture, const RenderTextureLayout textureLayout, const RenderTextureView *textureView) { + // Texture state is ignored by D3D12 because image layout information is not required. + + const D3D12Texture *interfaceTexture = static_cast(texture); + ID3D12Resource *nativeResource = (interfaceTexture != nullptr) ? interfaceTexture->d3d : nullptr; + uint32_t descriptorIndexClamped = std::min(descriptorIndex, descriptorTypeMaxIndex); + RenderDescriptorRangeType descriptorType = descriptorTypes[descriptorIndexClamped]; + switch (descriptorType) { + case RenderDescriptorRangeType::TEXTURE: { + if ((nativeResource != nullptr) && (textureView != nullptr)) { + const D3D12TextureView *interfaceTextureView = static_cast(textureView); + D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; + srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + srvDesc.Format = interfaceTextureView->format; + + const bool isMSAA = (interfaceTextureView->texture->desc.multisampling.sampleCount > RenderSampleCount::COUNT_1); + switch (interfaceTextureView->dimension) { + case RenderTextureViewDimension::TEXTURE_1D: + srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE1D; + srvDesc.Texture1D.MipLevels = interfaceTextureView->mipLevels; + break; + case RenderTextureViewDimension::TEXTURE_2D: + if (isMSAA) { + srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2DMS; + } + else { + srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; + srvDesc.Texture2D.MipLevels = interfaceTextureView->mipLevels; + } + + break; + case RenderTextureViewDimension::TEXTURE_3D: + srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE3D; + srvDesc.Texture3D.MipLevels = interfaceTextureView->mipLevels; + break; + case RenderTextureViewDimension::TEXTURE_CUBE: + srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURECUBE; + srvDesc.TextureCube.MipLevels = interfaceTextureView->mipLevels; + break; + default: + assert(false && "Unknown texture dimension."); + break; + } + + setSRV(descriptorIndex, nativeResource, &srvDesc); + } + else { + setSRV(descriptorIndex, nativeResource, nullptr); + } + + break; + } + case RenderDescriptorRangeType::READ_WRITE_TEXTURE: { + if ((nativeResource != nullptr) && (textureView != nullptr)) { + const D3D12TextureView *interfaceTextureView = static_cast(textureView); + D3D12_UNORDERED_ACCESS_VIEW_DESC uavDesc = {}; + uavDesc.Format = interfaceTextureView->format; + + switch (interfaceTextureView->dimension) { + case RenderTextureViewDimension::TEXTURE_1D: + uavDesc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE1D; + uavDesc.Texture1D.MipSlice = interfaceTextureView->mipSlice; + break; + case RenderTextureViewDimension::TEXTURE_2D: + uavDesc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE2D; + uavDesc.Texture2D.MipSlice = interfaceTextureView->mipSlice; + break; + case RenderTextureViewDimension::TEXTURE_3D: + uavDesc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE3D; + uavDesc.Texture3D.MipSlice = interfaceTextureView->mipSlice; + break; + default: + assert(false && "Unknown texture dimension."); + break; + } + + setUAV(descriptorIndex, nativeResource, &uavDesc); + } + else { + setUAV(descriptorIndex, nativeResource, nullptr); + } + + break; + } + case RenderDescriptorRangeType::CONSTANT_BUFFER: + case RenderDescriptorRangeType::FORMATTED_BUFFER: + case RenderDescriptorRangeType::READ_WRITE_FORMATTED_BUFFER: + case RenderDescriptorRangeType::STRUCTURED_BUFFER: + case RenderDescriptorRangeType::BYTE_ADDRESS_BUFFER: + case RenderDescriptorRangeType::READ_WRITE_STRUCTURED_BUFFER: + case RenderDescriptorRangeType::READ_WRITE_BYTE_ADDRESS_BUFFER: + case RenderDescriptorRangeType::SAMPLER: + case RenderDescriptorRangeType::ACCELERATION_STRUCTURE: + assert(false && "Incompatible descriptor type."); + break; + default: + assert(false && "Unknown descriptor type."); + break; + } + } + + void D3D12DescriptorSet::setSampler(uint32_t descriptorIndex, const RenderSampler *sampler) { + if (sampler != nullptr) { + const D3D12Sampler *interfaceSampler = static_cast(sampler); + uint32_t descriptorIndexClamped = std::min(descriptorIndex, descriptorTypeMaxIndex); + uint32_t descriptorIndexRelative = (descriptorIndex - descriptorIndexClamped); + uint32_t descriptorHeapIndex = descriptorHeapIndices[descriptorIndexClamped]; + const D3D12_CPU_DESCRIPTOR_HANDLE cpuHandle = device->samplerHeapAllocator->getHostCPUHandleAt(samplerAllocation.offset + descriptorHeapIndex + descriptorIndexRelative); + device->d3d->CreateSampler(&interfaceSampler->samplerDesc, cpuHandle); + setHostModified(samplerAllocation, descriptorHeapIndex + descriptorIndexRelative); + } + } + + void D3D12DescriptorSet::setAccelerationStructure(uint32_t descriptorIndex, const RenderAccelerationStructure *accelerationStructure) { + const D3D12AccelerationStructure *interfaceAccelerationStructure = static_cast(accelerationStructure); + uint32_t descriptorIndexClamped = std::min(descriptorIndex, descriptorTypeMaxIndex); + RenderDescriptorRangeType descriptorType = descriptorTypes[descriptorIndexClamped]; + assert((descriptorType == RenderDescriptorRangeType::ACCELERATION_STRUCTURE) && "Incompatible descriptor type."); + + if (interfaceAccelerationStructure != nullptr) { + D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; + srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + srvDesc.ViewDimension = D3D12_SRV_DIMENSION_RAYTRACING_ACCELERATION_STRUCTURE; + srvDesc.RaytracingAccelerationStructure.Location = interfaceAccelerationStructure->buffer->d3d->GetGPUVirtualAddress(); + setSRV(descriptorIndex, nullptr, &srvDesc); + } + else { + setSRV(descriptorIndex, nullptr, nullptr); + } + } + + void D3D12DescriptorSet::setSRV(uint32_t descriptorIndex, ID3D12Resource *resource, const D3D12_SHADER_RESOURCE_VIEW_DESC *viewDesc) { + if ((resource != nullptr) || (viewDesc != nullptr)) { + uint32_t descriptorIndexClamped = std::min(descriptorIndex, descriptorTypeMaxIndex); + uint32_t descriptorIndexRelative = (descriptorIndex - descriptorIndexClamped); + uint32_t descriptorHeapIndex = descriptorHeapIndices[descriptorIndexClamped]; + const D3D12_CPU_DESCRIPTOR_HANDLE cpuHandle = device->viewHeapAllocator->getHostCPUHandleAt(viewAllocation.offset + descriptorHeapIndex + descriptorIndexRelative); + device->d3d->CreateShaderResourceView(resource, viewDesc, cpuHandle); + setHostModified(viewAllocation, descriptorHeapIndex + descriptorIndexRelative); + } + } + + void D3D12DescriptorSet::setUAV(uint32_t descriptorIndex, ID3D12Resource *resource, const D3D12_UNORDERED_ACCESS_VIEW_DESC *viewDesc) { + if ((resource != nullptr) || (viewDesc != nullptr)) { + uint32_t descriptorIndexClamped = std::min(descriptorIndex, descriptorTypeMaxIndex); + uint32_t descriptorIndexRelative = (descriptorIndex - descriptorIndexClamped); + uint32_t descriptorHeapIndex = descriptorHeapIndices[descriptorIndexClamped]; + const D3D12_CPU_DESCRIPTOR_HANDLE cpuHandle = device->viewHeapAllocator->getHostCPUHandleAt(viewAllocation.offset + descriptorHeapIndex + descriptorIndexRelative); + device->d3d->CreateUnorderedAccessView(resource, nullptr, viewDesc, cpuHandle); + setHostModified(viewAllocation, descriptorHeapIndex + descriptorIndexRelative); + } + } + + void D3D12DescriptorSet::setCBV(uint32_t descriptorIndex, ID3D12Resource *resource, uint64_t bufferSize) { + if (resource != nullptr) { + D3D12_CONSTANT_BUFFER_VIEW_DESC viewDesc = {}; + viewDesc.BufferLocation = resource->GetGPUVirtualAddress(); + viewDesc.SizeInBytes = UINT(roundUp(bufferSize, D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT)); + + uint32_t descriptorIndexClamped = std::min(descriptorIndex, descriptorTypeMaxIndex); + uint32_t descriptorIndexRelative = (descriptorIndex - descriptorIndexClamped); + uint32_t descriptorHeapIndex = descriptorHeapIndices[descriptorIndexClamped]; + const D3D12_CPU_DESCRIPTOR_HANDLE cpuHandle = device->viewHeapAllocator->getHostCPUHandleAt(viewAllocation.offset + descriptorHeapIndex + descriptorIndexRelative); + device->d3d->CreateConstantBufferView(&viewDesc, cpuHandle); + setHostModified(viewAllocation, descriptorHeapIndex + descriptorIndexRelative); + } + } + + void D3D12DescriptorSet::setHostModified(HeapAllocation &heapAllocation, uint32_t heapIndex) { + if (heapAllocation.hostModifiedCount == 0) { + heapAllocation.hostModifiedIndex = heapIndex; + heapAllocation.hostModifiedCount = 1; + } + else if (heapIndex < heapAllocation.hostModifiedIndex) { + heapAllocation.hostModifiedCount = heapAllocation.hostModifiedIndex + heapAllocation.hostModifiedCount - heapIndex; + heapAllocation.hostModifiedIndex = heapIndex; + } + else if (heapIndex >= (heapAllocation.hostModifiedIndex + heapAllocation.hostModifiedCount)) { + heapAllocation.hostModifiedCount = heapIndex - heapAllocation.hostModifiedIndex + 1; + } + } + + // D3D12SwapChain + + D3D12SwapChain::D3D12SwapChain(D3D12CommandQueue *commandQueue, RenderWindow renderWindow, uint32_t textureCount, RenderFormat format) { + assert(commandQueue != nullptr); + assert(renderWindow != 0); + + this->commandQueue = commandQueue; + this->renderWindow = renderWindow; + this->textureCount = textureCount; + this->format = format; + + // Store the native format representation. + nativeFormat = toDXGI(format); + + getWindowSize(width, height); + + DXGI_SWAP_CHAIN_DESC1 swapChainDesc = {}; + swapChainDesc.BufferCount = textureCount; + swapChainDesc.Width = width; + swapChainDesc.Height = height; + swapChainDesc.Format = nativeFormat; + swapChainDesc.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT; + swapChainDesc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD; + swapChainDesc.SampleDesc.Count = 1; + swapChainDesc.Flags = DXGI_SWAP_CHAIN_FLAG_FRAME_LATENCY_WAITABLE_OBJECT; + + IDXGISwapChain1 *swapChain1; + IDXGIFactory4 *dxgiFactory = commandQueue->device->renderInterface->dxgiFactory; + HRESULT res = dxgiFactory->CreateSwapChainForHwnd(commandQueue->d3d, renderWindow, &swapChainDesc, nullptr, nullptr, &swapChain1); + if (FAILED(res)) { + fprintf(stderr, "CreateSwapChainForHwnd failed with error code 0x%lX.\n", res); + return; + } + + res = dxgiFactory->MakeWindowAssociation(renderWindow, DXGI_MWA_NO_ALT_ENTER); + if (FAILED(res)) { + fprintf(stderr, "MakeWindowAssociation failed with error code 0x%lX.\n", res); + return; + } + + d3d = static_cast(swapChain1); + d3d->SetMaximumFrameLatency(1); + waitableObject = d3d->GetFrameLatencyWaitableObject(); + + textures.resize(textureCount); + + for (uint32_t i = 0; i < textureCount; i++) { + textures[i].device = commandQueue->device; + textures[i].desc.dimension = RenderTextureDimension::TEXTURE_2D; + textures[i].desc.format = format; + textures[i].desc.depth = 1; + textures[i].desc.mipLevels = 1; + textures[i].desc.arraySize = 1; + textures[i].desc.flags = RenderTextureFlag::RENDER_TARGET; + } + + setTextures(); + } + + D3D12SwapChain::~D3D12SwapChain() { + for (uint32_t i = 0; i < textureCount; i++) { + textures[i].releaseTargetHeap(); + + if (textures[i].d3d != nullptr) { + textures[i].d3d->Release(); + textures[i].d3d = nullptr; + } + } + + if (d3d != nullptr) { + d3d->Release(); + } + } + + bool D3D12SwapChain::present(uint32_t textureIndex, RenderCommandSemaphore **waitSemaphores, uint32_t waitSemaphoreCount) { + if (waitableObject != NULL) { + while (WaitForSingleObjectEx(waitableObject, 0, FALSE)); + } + + HRESULT res = d3d->Present(1, 0); + return SUCCEEDED(res); + } + + bool D3D12SwapChain::resize() { + getWindowSize(width, height); + + // Don't resize the swap chain at all if the window doesn't have a valid size. + if ((width == 0) || (height == 0)) { + return false; + } + + for (uint32_t i = 0; i < textureCount; i++) { + textures[i].releaseTargetHeap(); + textures[i].d3d->Release(); + textures[i].d3d = nullptr; + } + + HRESULT res = d3d->ResizeBuffers(0, 0, 0, DXGI_FORMAT_UNKNOWN, DXGI_SWAP_CHAIN_FLAG_FRAME_LATENCY_WAITABLE_OBJECT); + if (FAILED(res)) { + fprintf(stderr, "ResizeBuffers failed with error code 0x%lX.\n", res); + return false; + } + + setTextures(); + return true; + } + + bool D3D12SwapChain::needsResize() const { + uint32_t windowWidth, windowHeight; + getWindowSize(windowWidth, windowHeight); + return (d3d == nullptr) || (windowWidth != width) || (windowHeight != height); + } + + uint32_t D3D12SwapChain::getWidth() const { + return width; + } + + uint32_t D3D12SwapChain::getHeight() const { + return height; + } + + void D3D12SwapChain::getWindowSize(uint32_t &dstWidth, uint32_t &dstHeight) const { + RECT rect; + GetClientRect(renderWindow, &rect); + dstWidth = rect.right - rect.left; + dstHeight = rect.bottom - rect.top; + } + + void D3D12SwapChain::setTextures() { + assert(textureCount == textures.size()); + + for (uint32_t i = 0; i < textureCount; i++) { + d3d->GetBuffer(i, IID_PPV_ARGS(&textures[i].d3d)); + + textures[i].desc.width = width; + textures[i].desc.height = height; + textures[i].resourceStates = D3D12_RESOURCE_STATE_PRESENT; + textures[i].layout = RenderTextureLayout::PRESENT; + textures[i].createRenderTargetHeap(); + } + } + + RenderTexture *D3D12SwapChain::getTexture(uint32_t textureIndex) { + return &textures[textureIndex]; + } + + uint32_t D3D12SwapChain::getTextureCount() const { + return textureCount; + } + + bool D3D12SwapChain::acquireTexture(RenderCommandSemaphore *signalSemaphore, uint32_t *textureIndex) { + assert(textureIndex != nullptr); + *textureIndex = d3d->GetCurrentBackBufferIndex(); + return true; + } + + RenderWindow D3D12SwapChain::getWindow() const { + return renderWindow; + } + + bool D3D12SwapChain::isEmpty() const { + return (d3d == nullptr) || (width == 0) || (height == 0); + } + + uint32_t D3D12SwapChain::getRefreshRate() const { + return 0; + } + + // D3D12Framebuffer + + D3D12Framebuffer::D3D12Framebuffer(D3D12Device *device, const RenderFramebufferDesc &desc) { + assert(device != nullptr); + + this->device = device; + + if (desc.colorAttachmentsCount > 0) { + for (uint32_t i = 0; i < desc.colorAttachmentsCount; i++) { + const D3D12Texture *interfaceTexture = static_cast(desc.colorAttachments[i]); + assert((interfaceTexture->desc.flags & RenderTextureFlag::RENDER_TARGET) && "Color attachment must be a render target."); + colorTargets.emplace_back(interfaceTexture); + colorHandles.emplace_back(device->colorTargetHeapAllocator->getHostCPUHandleAt(interfaceTexture->targetAllocatorOffset)); + + if (i == 0) { + width = interfaceTexture->desc.width; + height = interfaceTexture->desc.height; + } + } + } + + if (desc.depthAttachment != nullptr) { + const D3D12Texture *interfaceTexture = static_cast(desc.depthAttachment); + assert((interfaceTexture->desc.flags & RenderTextureFlag::DEPTH_TARGET) && "Depth attachment must be a depth target."); + depthTarget = interfaceTexture; + + // The read-only handle is on the second slot on the DSV heap. + if (desc.depthAttachmentReadOnly) { + depthHandle = device->depthTargetHeapAllocator->getHostCPUHandleAt(interfaceTexture->targetAllocatorOffset + 1); + } + else { + depthHandle = device->depthTargetHeapAllocator->getHostCPUHandleAt(interfaceTexture->targetAllocatorOffset); + } + + if (desc.colorAttachmentsCount == 0) { + width = interfaceTexture->desc.width; + height = interfaceTexture->desc.height; + } + } + } + + D3D12Framebuffer::~D3D12Framebuffer() { } + + uint32_t D3D12Framebuffer::getWidth() const { + return width; + } + + uint32_t D3D12Framebuffer::getHeight() const { + return height; + } + + // D3D12CommandList + + D3D12CommandList::D3D12CommandList(D3D12Device *device, RenderCommandListType type) { + assert(device != nullptr); + + this->device = device; + this->type = type; + + D3D12_COMMAND_LIST_TYPE commandListType; + switch (type) { + case RenderCommandListType::DIRECT: + commandListType = D3D12_COMMAND_LIST_TYPE_DIRECT; + break; + case RenderCommandListType::COMPUTE: + commandListType = D3D12_COMMAND_LIST_TYPE_COMPUTE; + break; + case RenderCommandListType::COPY: + commandListType = D3D12_COMMAND_LIST_TYPE_COPY; + break; + default: + assert(false && "Unknown command list type."); + return; + } + + HRESULT res = device->d3d->CreateCommandAllocator(commandListType, IID_PPV_ARGS(&commandAllocator)); + if (FAILED(res)) { + fprintf(stderr, "CreateCommandAllocator failed with error code 0x%lX.\n", res); + return; + } + + res = device->d3d->CreateCommandList(0, commandListType, commandAllocator, nullptr, IID_PPV_ARGS(&d3d)); + if (FAILED(res)) { + fprintf(stderr, "CreateCommandList failed with error code 0x%lX.\n", res); + return; + } + + d3d->Close(); + } + + D3D12CommandList::~D3D12CommandList() { + if (d3d != nullptr) { + d3d->Release(); + } + + if (commandAllocator != nullptr) { + commandAllocator->Release(); + } + } + + bool D3D12CommandList::isOpen() { + return open; + } + + void D3D12CommandList::begin() { + assert(!open); + + commandAllocator->Reset(); + d3d->Reset(commandAllocator, nullptr); + open = true; + } + + void D3D12CommandList::end() { + assert(open); + + // It's required to reset the sample positions before the command list ends. + resetSamplePositions(); + + d3d->Close(); + open = false; + targetFramebuffer = nullptr; + targetFramebufferSamplePositionsSet = false; + activeComputePipelineLayout = nullptr; + activeGraphicsPipelineLayout = nullptr; + activeGraphicsPipeline = nullptr; + activeTopology = D3D_PRIMITIVE_TOPOLOGY_UNDEFINED; + descriptorHeapsSet = false; + } + + void D3D12CommandList::barriers(RenderBarrierStages stages, const RenderBufferBarrier *bufferBarriers, uint32_t bufferBarriersCount, const RenderTextureBarrier *textureBarriers, uint32_t textureBarriersCount) { + thread_local std::vector barrierVector; + barrierVector.clear(); + + auto makeBarrier = [&](ID3D12Resource *resource, D3D12_RESOURCE_STATES stateBefore, D3D12_RESOURCE_STATES stateAfter, bool supportsUAV, D3D12_RESOURCE_BARRIER &resourceBarrier) { + resourceBarrier = {}; + + if (stateBefore != stateAfter) { + resourceBarrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; + resourceBarrier.Transition.StateBefore = stateBefore; + resourceBarrier.Transition.StateAfter = stateAfter; + resourceBarrier.Transition.pResource = resource; + resourceBarrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; + return true; + } + else if (supportsUAV) { + resourceBarrier.Type = D3D12_RESOURCE_BARRIER_TYPE_UAV; + resourceBarrier.UAV.pResource = resource; + return true; + } + else { + return false; + } + }; + + D3D12_RESOURCE_BARRIER resourceBarrier; + const RenderBufferFlags bufferUAVMask = RenderBufferFlag::UNORDERED_ACCESS | RenderBufferFlag::ACCELERATION_STRUCTURE; + for (uint32_t i = 0; i < bufferBarriersCount; i++) { + const RenderBufferBarrier &bufferBarrier = bufferBarriers[i]; + D3D12Buffer *interfaceBuffer = static_cast(bufferBarrier.buffer); + D3D12_RESOURCE_STATES stateBefore = interfaceBuffer->resourceStates; + D3D12_RESOURCE_STATES stateAfter = toBufferState(stages, bufferBarrier.accessBits, interfaceBuffer->desc.flags); + if (makeBarrier(interfaceBuffer->d3d, stateBefore, stateAfter, interfaceBuffer->desc.flags & bufferUAVMask, resourceBarrier)) { + barrierVector.emplace_back(resourceBarrier); + } + + interfaceBuffer->resourceStates = stateAfter; + } + + bool resetSamplePositionsRequired = false; + for (uint32_t i = 0; i < textureBarriersCount; i++) { + const RenderTextureBarrier &textureBarrier = textureBarriers[i]; + D3D12Texture *interfaceTexture = static_cast(textureBarrier.texture); + D3D12_RESOURCE_STATES stateBefore = interfaceTexture->resourceStates; + D3D12_RESOURCE_STATES stateAfter = toTextureState(stages, textureBarrier.layout, interfaceTexture->desc.flags); + bool madeBarrier = makeBarrier(interfaceTexture->d3d, stateBefore, stateAfter, interfaceTexture->desc.flags & RenderTextureFlag::UNORDERED_ACCESS, resourceBarrier); + interfaceTexture->resourceStates = stateAfter; + interfaceTexture->layout = textureBarrier.layout; + if (!madeBarrier) { + continue; + } + + // MSAA Depth targets with multisampling require separate barriers. + const bool msaaDepthTarget = (interfaceTexture->desc.flags & RenderTextureFlag::DEPTH_TARGET) && (interfaceTexture->desc.multisampling.sampleCount > 1); + if (msaaDepthTarget && interfaceTexture->desc.multisampling.sampleLocationsEnabled) { + setSamplePositions(interfaceTexture); + d3d->ResourceBarrier(1, &resourceBarrier); + resetSamplePositionsRequired = true; + } + else { + barrierVector.emplace_back(resourceBarrier); + } + } + + if (resetSamplePositionsRequired) { + resetSamplePositions(); + } + + if (!barrierVector.empty()) { + d3d->ResourceBarrier(UINT(barrierVector.size()), barrierVector.data()); + } + } + + void D3D12CommandList::dispatch(uint32_t threadGroupCountX, uint32_t threadGroupCountY, uint32_t threadGroupCountZ) { + d3d->Dispatch(threadGroupCountX, threadGroupCountY, threadGroupCountZ); + } + + void D3D12CommandList::traceRays(uint32_t width, uint32_t height, uint32_t depth, RenderBufferReference shaderBindingTable, const RenderShaderBindingGroupsInfo &shaderBindingGroupsInfo) { + const D3D12Buffer *interfaceBuffer = static_cast(shaderBindingTable.ref); + assert(interfaceBuffer != nullptr); + assert((interfaceBuffer->desc.flags & RenderBufferFlag::SHADER_BINDING_TABLE) && "Buffer must allow being used as a shader binding table."); + + D3D12_GPU_VIRTUAL_ADDRESS tableAddress = interfaceBuffer->d3d->GetGPUVirtualAddress() + shaderBindingTable.offset; + const RenderShaderBindingGroupInfo &rayGen = shaderBindingGroupsInfo.rayGen; + const RenderShaderBindingGroupInfo &miss = shaderBindingGroupsInfo.miss; + const RenderShaderBindingGroupInfo &hitGroup = shaderBindingGroupsInfo.hitGroup; + const RenderShaderBindingGroupInfo &callable = shaderBindingGroupsInfo.callable; + D3D12_DISPATCH_RAYS_DESC desc; + desc.RayGenerationShaderRecord.StartAddress = (rayGen.size > 0) ? (tableAddress + rayGen.offset + rayGen.startIndex * rayGen.stride) : 0; + desc.RayGenerationShaderRecord.SizeInBytes = rayGen.size; + desc.MissShaderTable.StartAddress = (miss.size > 0) ? (tableAddress + miss.offset + miss.startIndex * miss.stride) : 0; + desc.MissShaderTable.SizeInBytes = miss.size; + desc.MissShaderTable.StrideInBytes = miss.stride; + desc.HitGroupTable.StartAddress = (hitGroup.size > 0) ? (tableAddress + hitGroup.offset + hitGroup.startIndex * hitGroup.stride) : 0; + desc.HitGroupTable.SizeInBytes = hitGroup.size; + desc.HitGroupTable.StrideInBytes = hitGroup.stride; + desc.CallableShaderTable.StartAddress = (callable.size > 0) ? (tableAddress + callable.offset + callable.startIndex * callable.stride) : 0; + desc.CallableShaderTable.SizeInBytes = callable.size; + desc.CallableShaderTable.StrideInBytes = callable.stride; + desc.Width = width; + desc.Height = height; + desc.Depth = depth; + d3d->DispatchRays(&desc); + } + + void D3D12CommandList::drawInstanced(uint32_t vertexCountPerInstance, uint32_t instanceCount, uint32_t startVertexLocation, uint32_t startInstanceLocation) { + checkTopology(); + checkFramebufferSamplePositions(); + d3d->DrawInstanced(vertexCountPerInstance, instanceCount, startVertexLocation, startInstanceLocation); + } + + void D3D12CommandList::drawIndexedInstanced(uint32_t indexCountPerInstance, uint32_t instanceCount, uint32_t startIndexLocation, int32_t baseVertexLocation, uint32_t startInstanceLocation) { + checkTopology(); + checkFramebufferSamplePositions(); + d3d->DrawIndexedInstanced(indexCountPerInstance, instanceCount, startIndexLocation, baseVertexLocation, startInstanceLocation); + } + + void D3D12CommandList::setPipeline(const RenderPipeline *pipeline) { + assert(pipeline != nullptr); + + const D3D12Pipeline *interfacePipeline = static_cast(pipeline); + switch (interfacePipeline->type) { + case D3D12Pipeline::Type::Compute: { + const D3D12ComputePipeline *computePipeline = static_cast(interfacePipeline); + d3d->SetPipelineState(computePipeline->d3d); + break; + } + case D3D12Pipeline::Type::Graphics: { + const D3D12GraphicsPipeline *graphicsPipeline = static_cast(interfacePipeline); + d3d->SetPipelineState(graphicsPipeline->d3d); + activeGraphicsPipeline = graphicsPipeline; + break; + } + case D3D12Pipeline::Type::Raytracing: { + const D3D12RaytracingPipeline *raytracingPipeline = static_cast(interfacePipeline); + d3d->SetPipelineState1(raytracingPipeline->stateObject); + break; + } + default: + assert(false && "Unknown pipeline type."); + break; + } + } + + void D3D12CommandList::setComputePipelineLayout(const RenderPipelineLayout *pipelineLayout) { + assert(pipelineLayout != nullptr); + + const D3D12PipelineLayout *interfacePipelineLayout = static_cast(pipelineLayout); + d3d->SetComputeRootSignature(interfacePipelineLayout->rootSignature); + activeComputePipelineLayout = interfacePipelineLayout; + } + + void D3D12CommandList::setComputePushConstants(uint32_t rangeIndex, const void *data) { + assert(activeComputePipelineLayout != nullptr); + assert(rangeIndex < activeComputePipelineLayout->pushConstantRanges.size()); + + const RenderPushConstantRange &range = activeComputePipelineLayout->pushConstantRanges[rangeIndex]; + assert((range.offset == 0) && "Offset behavior should be verified when compared to Vulkan."); + d3d->SetComputeRoot32BitConstants(rangeIndex, (range.size + sizeof(uint32_t) - 1) / sizeof(uint32_t), data, 0); + } + + void D3D12CommandList::setComputeDescriptorSet(RenderDescriptorSet *descriptorSet, uint32_t setIndex) { + setDescriptorSet(activeComputePipelineLayout, descriptorSet, setIndex, true); + } + + void D3D12CommandList::setGraphicsPipelineLayout(const RenderPipelineLayout *pipelineLayout) { + assert(pipelineLayout != nullptr); + + const D3D12PipelineLayout *interfacePipelineLayout = static_cast(pipelineLayout); + d3d->SetGraphicsRootSignature(interfacePipelineLayout->rootSignature); + activeGraphicsPipelineLayout = interfacePipelineLayout; + } + + void D3D12CommandList::setGraphicsPushConstants(uint32_t rangeIndex, const void *data) { + assert(activeGraphicsPipelineLayout != nullptr); + assert(rangeIndex < activeGraphicsPipelineLayout->pushConstantRanges.size()); + + const RenderPushConstantRange &range = activeGraphicsPipelineLayout->pushConstantRanges[rangeIndex]; + assert((range.offset == 0) && "Offset behavior should be verified when compared to Vulkan."); + d3d->SetGraphicsRoot32BitConstants(rangeIndex, (range.size + sizeof(uint32_t) - 1) / sizeof(uint32_t), data, 0); + } + + void D3D12CommandList::setGraphicsDescriptorSet(RenderDescriptorSet *descriptorSet, uint32_t setIndex) { + setDescriptorSet(activeGraphicsPipelineLayout, descriptorSet, setIndex, false); + } + + void D3D12CommandList::setGraphicsRootDescriptor(RenderBufferReference bufferReference, uint32_t rootDescriptorIndex) { + setRootDescriptor(activeGraphicsPipelineLayout, bufferReference, rootDescriptorIndex, false); + } + + void D3D12CommandList::setRaytracingPipelineLayout(const RenderPipelineLayout *pipelineLayout) { + setComputePipelineLayout(pipelineLayout); + } + + void D3D12CommandList::setRaytracingPushConstants(uint32_t rangeIndex, const void *data) { + setComputePushConstants(rangeIndex, data); + } + + void D3D12CommandList::setRaytracingDescriptorSet(RenderDescriptorSet *descriptorSet, uint32_t setIndex) { + setComputeDescriptorSet(descriptorSet, setIndex); + } + + void D3D12CommandList::setIndexBuffer(const RenderIndexBufferView *view) { + if (view != nullptr) { + const D3D12Buffer *interfaceBuffer = static_cast(view->buffer.ref); + D3D12_INDEX_BUFFER_VIEW bufferView; + bufferView.BufferLocation = (interfaceBuffer != nullptr) ? (interfaceBuffer->d3d->GetGPUVirtualAddress() + view->buffer.offset) : 0; + bufferView.Format = toDXGI(view->format); + bufferView.SizeInBytes = view->size; + d3d->IASetIndexBuffer(&bufferView); + } + else { + d3d->IASetIndexBuffer(nullptr); + } + } + + void D3D12CommandList::setVertexBuffers(uint32_t startSlot, const RenderVertexBufferView *views, uint32_t viewCount, const RenderInputSlot *inputSlots) { + if (views != nullptr) { + assert(inputSlots != nullptr); + + thread_local std::vector bufferViewVector; + bufferViewVector.resize(viewCount, {}); + for (uint32_t i = 0; i < viewCount; i++) { + const RenderVertexBufferView &renderView = views[i]; + const D3D12Buffer *interfaceBuffer = static_cast(renderView.buffer.ref); + bufferViewVector[i].BufferLocation = (interfaceBuffer != nullptr) ? (interfaceBuffer->d3d->GetGPUVirtualAddress() + renderView.buffer.offset) : 0; + bufferViewVector[i].SizeInBytes = renderView.size; + + bool slotFound = false; + for (uint32_t j = 0; j < viewCount; j++) { + if (inputSlots[j].index == (startSlot + i)) { + bufferViewVector[i].StrideInBytes = inputSlots[j].stride; + slotFound = true; + break; + } + } + + assert(slotFound && "Input slots must contain a slot with the same index as the view."); + } + + d3d->IASetVertexBuffers(startSlot, viewCount, bufferViewVector.data()); + } + else { + d3d->IASetVertexBuffers(startSlot, viewCount, nullptr); + } + } + + void D3D12CommandList::setViewports(const RenderViewport *viewports, uint32_t count) { + if (count > 1) { + thread_local std::vector viewportVector; + viewportVector.clear(); + for (uint32_t i = 0; i < count; i++) { + viewportVector.emplace_back(D3D12_VIEWPORT{ viewports[i].x, viewports[i].y, viewports[i].width, viewports[i].height, viewports[i].minDepth, viewports[i].maxDepth }); + } + + if (!viewportVector.empty()) { + d3d->RSSetViewports(UINT(viewportVector.size()), viewportVector.data()); + } + } + else { + // Single element fast path. + D3D12_VIEWPORT viewport = D3D12_VIEWPORT{ viewports[0].x, viewports[0].y, viewports[0].width, viewports[0].height, viewports[0].minDepth, viewports[0].maxDepth }; + d3d->RSSetViewports(1, &viewport); + } + } + + void D3D12CommandList::setScissors(const RenderRect *scissorRects, uint32_t count) { + if (count > 1) { + thread_local std::vector rectVector; + rectVector.clear(); + for (uint32_t i = 0; i < count; i++) { + rectVector.emplace_back(D3D12_RECT{ scissorRects[i].left, scissorRects[i].top, scissorRects[i].right, scissorRects[i].bottom }); + } + + if (!rectVector.empty()) { + d3d->RSSetScissorRects(UINT(rectVector.size()), rectVector.data()); + } + } + else { + // Single element fast path. + D3D12_RECT scissor = D3D12_RECT{ scissorRects[0].left, scissorRects[0].top, scissorRects[0].right, scissorRects[0].bottom }; + d3d->RSSetScissorRects(1, &scissor); + } + } + + void D3D12CommandList::setFramebuffer(const RenderFramebuffer *framebuffer) { + if (framebuffer != nullptr) { + const D3D12Framebuffer *interfaceFramebuffer = static_cast(framebuffer); + for (const D3D12Texture *target : interfaceFramebuffer->colorTargets) { + assert((target->layout == RenderTextureLayout::COLOR_WRITE) && "Color targets must be in color write layout when setting the framebuffer."); + } + + if (interfaceFramebuffer->depthTarget != nullptr) { + const bool depthReadLayout = (interfaceFramebuffer->depthTarget->layout == RenderTextureLayout::DEPTH_READ); + const bool depthWriteLayout = (interfaceFramebuffer->depthTarget->layout == RenderTextureLayout::DEPTH_WRITE); + assert((depthReadLayout || depthWriteLayout) && "Depth target must be in depth read or write layout when setting the framebuffer."); + } + + const D3D12_CPU_DESCRIPTOR_HANDLE *colorDescriptors = !interfaceFramebuffer->colorHandles.empty() ? interfaceFramebuffer->colorHandles.data() : nullptr; + const D3D12_CPU_DESCRIPTOR_HANDLE *depthDescriptor = (interfaceFramebuffer->depthHandle.ptr != 0) ? &interfaceFramebuffer->depthHandle : nullptr; + d3d->OMSetRenderTargets(UINT(interfaceFramebuffer->colorHandles.size()), colorDescriptors, false, depthDescriptor); + targetFramebuffer = interfaceFramebuffer; + targetFramebufferSamplePositionsSet = false; + } + else { + d3d->OMSetRenderTargets(0, nullptr, false, nullptr); + targetFramebuffer = nullptr; + } + } + + void D3D12CommandList::clearColor(uint32_t attachmentIndex, RenderColor colorValue, const RenderRect *clearRects, uint32_t clearRectsCount) { + assert(targetFramebuffer != nullptr); + assert(attachmentIndex < targetFramebuffer->colorTargets.size()); + assert((clearRectsCount == 0) || (clearRects != nullptr)); + + checkFramebufferSamplePositions(); + + thread_local std::vector rectVector; + if (clearRectsCount > 0) { + rectVector.clear(); + for (uint32_t i = 0; i < clearRectsCount; i++) { + rectVector.emplace_back(D3D12_RECT{ clearRects[i].left, clearRects[i].top, clearRects[i].right, clearRects[i].bottom }); + } + } + + d3d->ClearRenderTargetView(targetFramebuffer->colorHandles[attachmentIndex], colorValue.rgba, clearRectsCount, (clearRectsCount > 0) ? rectVector.data() : nullptr); + } + + void D3D12CommandList::clearDepth(bool clearDepth, float depthValue, const RenderRect *clearRects, uint32_t clearRectsCount) { + assert(targetFramebuffer != nullptr); + assert(targetFramebuffer->depthHandle.ptr != 0); + assert((clearRectsCount == 0) || (clearRects != nullptr)); + + checkFramebufferSamplePositions(); + + thread_local std::vector rectVector; + if (clearRectsCount > 0) { + rectVector.clear(); + for (uint32_t i = 0; i < clearRectsCount; i++) { + rectVector.emplace_back(D3D12_RECT{ clearRects[i].left, clearRects[i].top, clearRects[i].right, clearRects[i].bottom }); + } + } + + D3D12_CLEAR_FLAGS clearFlags = {}; + clearFlags |= clearDepth ? D3D12_CLEAR_FLAG_DEPTH : D3D12_CLEAR_FLAGS(0); + d3d->ClearDepthStencilView(targetFramebuffer->depthHandle, clearFlags, depthValue, 0, clearRectsCount, (clearRectsCount > 0) ? rectVector.data() : nullptr); + } + + void D3D12CommandList::copyBufferRegion(RenderBufferReference dstBuffer, RenderBufferReference srcBuffer, uint64_t size) { + assert(dstBuffer.ref != nullptr); + assert(srcBuffer.ref != nullptr); + + const D3D12Buffer *interfaceDstBuffer = static_cast(dstBuffer.ref); + const D3D12Buffer *interfaceSrcBuffer = static_cast(srcBuffer.ref); + d3d->CopyBufferRegion(interfaceDstBuffer->d3d, dstBuffer.offset, interfaceSrcBuffer->d3d, srcBuffer.offset, size); + } + + void D3D12CommandList::copyTextureRegion(const RenderTextureCopyLocation &dstLocation, const RenderTextureCopyLocation &srcLocation, uint32_t dstX, uint32_t dstY, uint32_t dstZ, const RenderBox *srcBox) { + D3D12_BOX copyBox; + if (srcBox != nullptr) { + copyBox.left = srcBox->left; + copyBox.top = srcBox->top; + copyBox.front = srcBox->front; + copyBox.right = srcBox->right; + copyBox.bottom = srcBox->bottom; + copyBox.back = srcBox->back; + } + + const D3D12_TEXTURE_COPY_LOCATION copyDstLocation = toD3D12(dstLocation); + const D3D12_TEXTURE_COPY_LOCATION copySrcLocation = toD3D12(srcLocation); + setSamplePositions(dstLocation.texture); + d3d->CopyTextureRegion(©DstLocation, dstX, dstY, dstZ, ©SrcLocation, (srcBox != nullptr) ? ©Box : nullptr); + resetSamplePositions(); + } + + void D3D12CommandList::copyBuffer(const RenderBuffer *dstBuffer, const RenderBuffer *srcBuffer) { + assert(dstBuffer != nullptr); + assert(srcBuffer != nullptr); + + const D3D12Buffer *interfaceDstBuffer = static_cast(dstBuffer); + const D3D12Buffer *interfaceSrcBuffer = static_cast(srcBuffer); + d3d->CopyResource(interfaceDstBuffer->d3d, interfaceSrcBuffer->d3d); + } + + void D3D12CommandList::copyTexture(const RenderTexture *dstTexture, const RenderTexture *srcTexture) { + assert(dstTexture != nullptr); + assert(srcTexture != nullptr); + + const D3D12Texture *interfaceDstTexture = static_cast(dstTexture); + const D3D12Texture *interfaceSrcTexture = static_cast(srcTexture); + setSamplePositions(interfaceDstTexture); + d3d->CopyResource(interfaceDstTexture->d3d, interfaceSrcTexture->d3d); + resetSamplePositions(); + } + + void D3D12CommandList::resolveTexture(const RenderTexture *dstTexture, const RenderTexture *srcTexture) { + assert(dstTexture != nullptr); + assert(srcTexture != nullptr); + + const D3D12Texture *interfaceDstTexture = static_cast(dstTexture); + const D3D12Texture *interfaceSrcTexture = static_cast(srcTexture); + setSamplePositions(interfaceDstTexture); + d3d->ResolveSubresource(interfaceDstTexture->d3d, 0, interfaceSrcTexture->d3d, 0, toDXGI(interfaceDstTexture->desc.format)); + resetSamplePositions(); + } + + void D3D12CommandList::resolveTextureRegion(const RenderTexture *dstTexture, uint32_t dstX, uint32_t dstY, const RenderTexture *srcTexture, const RenderRect *srcRect) { + assert(dstTexture != nullptr); + assert(srcTexture != nullptr); + + const D3D12Texture *interfaceDstTexture = static_cast(dstTexture); + const D3D12Texture *interfaceSrcTexture = static_cast(srcTexture); + D3D12_RECT rect; + if (srcRect != nullptr) { + rect.left = srcRect->left; + rect.top = srcRect->top; + rect.right = srcRect->right; + rect.bottom = srcRect->bottom; + } + + setSamplePositions(interfaceDstTexture); + d3d->ResolveSubresourceRegion(interfaceDstTexture->d3d, 0, dstX, dstY, interfaceSrcTexture->d3d, 0, (srcRect != nullptr) ? &rect : nullptr, toDXGI(interfaceDstTexture->desc.format), D3D12_RESOLVE_MODE_AVERAGE); + resetSamplePositions(); + } + + void D3D12CommandList::buildBottomLevelAS(const RenderAccelerationStructure *dstAccelerationStructure, RenderBufferReference scratchBuffer, const RenderBottomLevelASBuildInfo &buildInfo) { + assert(dstAccelerationStructure != nullptr); + assert(scratchBuffer.ref != nullptr); + + const D3D12AccelerationStructure *interfaceAccelerationStructure = static_cast(dstAccelerationStructure); + assert(interfaceAccelerationStructure->type == RenderAccelerationStructureType::BOTTOM_LEVEL); + + const D3D12Buffer *interfaceScratchBuffer = static_cast(scratchBuffer.ref); + assert((interfaceScratchBuffer->desc.flags & RenderBufferFlag::ACCELERATION_STRUCTURE_SCRATCH) && "Scratch buffer must be allowed."); + + D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_DESC buildDesc = {}; + buildDesc.Inputs.Type = D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL; + buildDesc.Inputs.DescsLayout = D3D12_ELEMENTS_LAYOUT_ARRAY; + buildDesc.Inputs.NumDescs = buildInfo.meshCount; + buildDesc.Inputs.pGeometryDescs = reinterpret_cast(buildInfo.buildData.data()); + buildDesc.Inputs.Flags = toRTASBuildFlags(buildInfo.preferFastBuild, buildInfo.preferFastTrace); + buildDesc.DestAccelerationStructureData = interfaceAccelerationStructure->buffer->d3d->GetGPUVirtualAddress() + interfaceAccelerationStructure->offset; + buildDesc.ScratchAccelerationStructureData = interfaceScratchBuffer->d3d->GetGPUVirtualAddress() + scratchBuffer.offset; + + d3d->BuildRaytracingAccelerationStructure(&buildDesc, 0, nullptr); + } + + void D3D12CommandList::buildTopLevelAS(const RenderAccelerationStructure *dstAccelerationStructure, RenderBufferReference scratchBuffer, RenderBufferReference instancesBuffer, const RenderTopLevelASBuildInfo &buildInfo) { + assert(dstAccelerationStructure != nullptr); + assert(scratchBuffer.ref != nullptr); + assert(instancesBuffer.ref != nullptr); + + const D3D12AccelerationStructure *interfaceAccelerationStructure = static_cast(dstAccelerationStructure); + assert(interfaceAccelerationStructure->type == RenderAccelerationStructureType::TOP_LEVEL);; + + const D3D12Buffer *interfaceScratchBuffer = static_cast(scratchBuffer.ref); + assert((interfaceScratchBuffer->desc.flags & RenderBufferFlag::ACCELERATION_STRUCTURE_SCRATCH) && "Scratch buffer must be allowed."); + + const D3D12Buffer *interfaceInstancesBuffer = static_cast(instancesBuffer.ref); + assert((interfaceInstancesBuffer->desc.flags & RenderBufferFlag::ACCELERATION_STRUCTURE_INPUT) && "Acceleration structure input must be allowed."); + + D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_DESC buildDesc = {}; + buildDesc.Inputs.Type = D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL; + buildDesc.Inputs.DescsLayout = D3D12_ELEMENTS_LAYOUT_ARRAY; + buildDesc.Inputs.NumDescs = buildInfo.instanceCount; + buildDesc.Inputs.InstanceDescs = interfaceInstancesBuffer->d3d->GetGPUVirtualAddress() + instancesBuffer.offset; + buildDesc.Inputs.Flags = toRTASBuildFlags(buildInfo.preferFastBuild, buildInfo.preferFastTrace); + buildDesc.DestAccelerationStructureData = interfaceAccelerationStructure->buffer->d3d->GetGPUVirtualAddress() + interfaceAccelerationStructure->offset; + buildDesc.ScratchAccelerationStructureData = interfaceScratchBuffer->d3d->GetGPUVirtualAddress() + scratchBuffer.offset; + + d3d->BuildRaytracingAccelerationStructure(&buildDesc, 0, nullptr); + } + + void D3D12CommandList::checkDescriptorHeaps() { + if (!descriptorHeapsSet) { + ID3D12DescriptorHeap *descriptorHeaps[] = { device->viewHeapAllocator->shaderHeap, device->samplerHeapAllocator->shaderHeap }; + d3d->SetDescriptorHeaps(std::size(descriptorHeaps), descriptorHeaps); + descriptorHeapsSet = true; + } + } + + void D3D12CommandList::notifyDescriptorHeapWasChangedExternally() { + descriptorHeapsSet = false; + } + + void D3D12CommandList::checkTopology() { + assert(activeGraphicsPipeline != nullptr); + assert(activeGraphicsPipeline->type == D3D12Pipeline::Type::Graphics); + + const D3D12GraphicsPipeline *graphicsPipeline = static_cast(activeGraphicsPipeline); + if (activeTopology != graphicsPipeline->topology) { + d3d->IASetPrimitiveTopology(graphicsPipeline->topology); + activeTopology = graphicsPipeline->topology; + } + } + + void D3D12CommandList::checkFramebufferSamplePositions() { + if (!targetFramebufferSamplePositionsSet && (targetFramebuffer != nullptr)) { + if (targetFramebuffer->depthTarget != nullptr) { + setSamplePositions(targetFramebuffer->depthTarget); + } + + targetFramebufferSamplePositionsSet = true; + } + } + + void D3D12CommandList::setSamplePositions(const RenderTexture *texture) { + assert(texture != nullptr); + + const D3D12Texture *interfaceTexture = static_cast(texture); + if (interfaceTexture->desc.multisampling.sampleLocationsEnabled) { + thread_local std::vector samplePositions; + samplePositions.resize(interfaceTexture->desc.multisampling.sampleCount); + for (uint32_t i = 0; i < interfaceTexture->desc.multisampling.sampleCount; i++) { + const RenderMultisamplingLocation &location = interfaceTexture->desc.multisampling.sampleLocations[i]; + samplePositions[i].X = location.x; + samplePositions[i].Y = location.y; + } + + d3d->SetSamplePositions(uint32_t(samplePositions.size()), 1, samplePositions.data()); + activeSamplePositions = true; + } + else { + resetSamplePositions(); + } + } + + void D3D12CommandList::resetSamplePositions() { + if (activeSamplePositions) { + d3d->SetSamplePositions(0, 0, nullptr); + activeSamplePositions = false; + targetFramebufferSamplePositionsSet = false; + } + } + + static void updateShaderVisibleSet(D3D12Device *device, D3D12DescriptorHeapAllocator *heapAllocator, D3D12DescriptorSet::HeapAllocation &heapAllocation, D3D12_DESCRIPTOR_HEAP_TYPE heapType) { + if (heapAllocation.hostModifiedCount == 0) { + return; + } + + const D3D12_CPU_DESCRIPTOR_HANDLE dstHandle = heapAllocator->getShaderCPUHandleAt(heapAllocation.offset + heapAllocation.hostModifiedIndex); + const D3D12_CPU_DESCRIPTOR_HANDLE srcHandle = heapAllocator->getHostCPUHandleAt(heapAllocation.offset + heapAllocation.hostModifiedIndex); + device->d3d->CopyDescriptorsSimple(heapAllocation.hostModifiedCount, dstHandle, srcHandle, heapType); + heapAllocation.hostModifiedIndex = 0; + heapAllocation.hostModifiedCount = 0; + } + + void D3D12CommandList::setDescriptorSet(const D3D12PipelineLayout *activePipelineLayout, RenderDescriptorSet *descriptorSet, uint32_t setIndex, bool setCompute) { + assert(descriptorSet != nullptr); + assert(activePipelineLayout != nullptr); + assert(setIndex < activePipelineLayout->setCount); + + // Copy descriptors if the shader visible heap is outdated. + D3D12DescriptorSet *interfaceDescriptorSet = static_cast(descriptorSet); + updateShaderVisibleSet(device, device->viewHeapAllocator.get(), interfaceDescriptorSet->viewAllocation, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + updateShaderVisibleSet(device, device->samplerHeapAllocator.get(), interfaceDescriptorSet->samplerAllocation, D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER); + checkDescriptorHeaps(); + + setRootDescriptorTable(device->viewHeapAllocator.get(), interfaceDescriptorSet->viewAllocation, activePipelineLayout->setViewRootIndices[setIndex], setCompute); + setRootDescriptorTable(device->samplerHeapAllocator.get(), interfaceDescriptorSet->samplerAllocation, activePipelineLayout->setSamplerRootIndices[setIndex], setCompute); + } + + void D3D12CommandList::setRootDescriptorTable(D3D12DescriptorHeapAllocator *heapAllocator, D3D12DescriptorSet::HeapAllocation &heapAllocation, uint32_t rootIndex, bool setCompute) { + if (heapAllocation.count == 0) { + return; + } + + const D3D12_GPU_DESCRIPTOR_HANDLE gpuHandle = heapAllocator->getShaderGPUHandleAt(heapAllocation.offset); + if (setCompute) { + d3d->SetComputeRootDescriptorTable(rootIndex, gpuHandle); + } + else { + d3d->SetGraphicsRootDescriptorTable(rootIndex, gpuHandle); + } + } + + void D3D12CommandList::setRootDescriptor(const D3D12PipelineLayout* activePipelineLayout, RenderBufferReference bufferReference, uint32_t rootDescriptorIndex, bool setCompute) { + assert(rootDescriptorIndex < activePipelineLayout->rootDescriptorRootIndicesAndTypes.size()); + + D3D12_GPU_VIRTUAL_ADDRESS gpuVA = static_cast(bufferReference.ref)->d3d->GetGPUVirtualAddress() + bufferReference.offset; + const auto& [rootParamIndex, type] = activePipelineLayout->rootDescriptorRootIndicesAndTypes[rootDescriptorIndex]; + + if (setCompute) { + switch (type) { + case RenderRootDescriptorType::CONSTANT_BUFFER: + d3d->SetComputeRootConstantBufferView(rootParamIndex, gpuVA); + break; + case RenderRootDescriptorType::SHADER_RESOURCE: + d3d->SetComputeRootShaderResourceView(rootParamIndex, gpuVA); + break; + case RenderRootDescriptorType::UNORDERED_ACCESS: + d3d->SetComputeRootUnorderedAccessView(rootParamIndex, gpuVA); + break; + default: + assert(false && "Unknown root descriptor type."); + break; + } + } + else { + switch (type) { + case RenderRootDescriptorType::CONSTANT_BUFFER: + d3d->SetGraphicsRootConstantBufferView(rootParamIndex, gpuVA); + break; + case RenderRootDescriptorType::SHADER_RESOURCE: + d3d->SetGraphicsRootShaderResourceView(rootParamIndex, gpuVA); + break; + case RenderRootDescriptorType::UNORDERED_ACCESS: + d3d->SetGraphicsRootUnorderedAccessView(rootParamIndex, gpuVA); + break; + default: + assert(false && "Unknown root descriptor type."); + break; + } + } + } + + // D3D12CommandFence + + D3D12CommandFence::D3D12CommandFence(D3D12Device *device) { + assert(device != nullptr); + + this->device = device; + + HRESULT res = device->d3d->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&d3d)); + if (FAILED(res)) { + fprintf(stderr, "CreateFence failed with error code 0x%lX.\n", res); + return; + } + + fenceEvent = CreateEvent(nullptr, FALSE, FALSE, nullptr); + fenceValue = 1; + } + + D3D12CommandFence::~D3D12CommandFence() { + if (d3d != nullptr) { + d3d->Release(); + } + + if (fenceEvent != 0) { + CloseHandle(fenceEvent); + } + } + + // D3D12CommandSemaphore + + D3D12CommandSemaphore::D3D12CommandSemaphore(D3D12Device *device) { + assert(device != nullptr); + + this->device = device; + + HRESULT res = device->d3d->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&d3d)); + if (FAILED(res)) { + fprintf(stderr, "CreateFence failed with error code 0x%lX.\n", res); + return; + } + } + + D3D12CommandSemaphore::~D3D12CommandSemaphore() { + if (d3d != nullptr) { + d3d->Release(); + } + } + + // D3D12CommandQueue + + D3D12CommandQueue::D3D12CommandQueue(D3D12Device *device, RenderCommandListType type) { + assert(device != nullptr); + + this->device = device; + this->type = type; + + D3D12_COMMAND_QUEUE_DESC queueDesc = { }; + queueDesc.Priority = D3D12_COMMAND_QUEUE_PRIORITY_NORMAL; + queueDesc.Flags = D3D12_COMMAND_QUEUE_FLAG_NONE; + queueDesc.NodeMask = 0; + + switch (type) { + case RenderCommandListType::DIRECT: + queueDesc.Type = D3D12_COMMAND_LIST_TYPE_DIRECT; + break; + case RenderCommandListType::COMPUTE: + queueDesc.Type = D3D12_COMMAND_LIST_TYPE_COMPUTE; + break; + case RenderCommandListType::COPY: + queueDesc.Type = D3D12_COMMAND_LIST_TYPE_COPY; + break; + default: + assert(false && "Unknown command list type."); + return; + } + + HRESULT res = device->d3d->CreateCommandQueue(&queueDesc, IID_PPV_ARGS(&d3d)); + if (FAILED(res)) { + fprintf(stderr, "CreateCommandQueue failed with error code 0x%lX.\n", res); + return; + } + } + + D3D12CommandQueue::~D3D12CommandQueue() { + if (d3d != nullptr) { + d3d->Release(); + } + } + + std::unique_ptr D3D12CommandQueue::createSwapChain(RenderWindow renderWindow, uint32_t bufferCount, RenderFormat format) { + return std::make_unique(this, renderWindow, bufferCount, format); + } + + void D3D12CommandQueue::executeCommandLists(const RenderCommandList **commandLists, uint32_t commandListCount, RenderCommandSemaphore **waitSemaphores, uint32_t waitSemaphoreCount, RenderCommandSemaphore **signalSemaphores, uint32_t signalSemaphoreCount, RenderCommandFence *signalFence) { + for (uint32_t i = 0; i < waitSemaphoreCount; i++) { + D3D12CommandSemaphore *interfaceSemaphore = static_cast(waitSemaphores[i]); + d3d->Wait(interfaceSemaphore->d3d, interfaceSemaphore->semaphoreValue); + } + + thread_local std::vector executionVector; + executionVector.clear(); + for (uint32_t i = 0; i < commandListCount; i++) { + const D3D12CommandList *interfaceCommandList = static_cast(commandLists[i]); + executionVector.emplace_back(static_cast(interfaceCommandList->d3d)); + } + + if (!executionVector.empty()) { + d3d->ExecuteCommandLists(UINT(executionVector.size()), executionVector.data()); + } + + for (uint32_t i = 0; i < signalSemaphoreCount; i++) { + D3D12CommandSemaphore *interfaceSemaphore = static_cast(signalSemaphores[i]); + interfaceSemaphore->semaphoreValue++; + d3d->Signal(interfaceSemaphore->d3d, interfaceSemaphore->semaphoreValue); + } + + if (signalFence != nullptr) { + D3D12CommandFence *interfaceFence = static_cast(signalFence); + d3d->Signal(interfaceFence->d3d, interfaceFence->fenceValue); + interfaceFence->d3d->SetEventOnCompletion(interfaceFence->fenceValue, interfaceFence->fenceEvent); + interfaceFence->fenceValue++; + } + } + + void D3D12CommandQueue::waitForCommandFence(RenderCommandFence *fence) { + assert(fence != nullptr); + + D3D12CommandFence *interfaceFence = static_cast(fence); + WaitForSingleObjectEx(interfaceFence->fenceEvent, INFINITE, FALSE); + } + + // D3D12Buffer + + D3D12Buffer::D3D12Buffer(D3D12Device *device, D3D12Pool *pool, const RenderBufferDesc &desc) { + assert(device != nullptr); + + this->device = device; + this->pool = pool; + this->desc = desc; + + D3D12_RESOURCE_DESC resourceDesc = {}; + resourceDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; + resourceDesc.Width = desc.size; + resourceDesc.Height = 1; + resourceDesc.DepthOrArraySize = 1; + resourceDesc.MipLevels = 1; + resourceDesc.Format = DXGI_FORMAT_UNKNOWN; + resourceDesc.SampleDesc.Count = 1; + resourceDesc.SampleDesc.Quality = 0; + resourceDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; + + const uint32_t unorderedAccessMask = RenderBufferFlag::ACCELERATION_STRUCTURE | RenderBufferFlag::ACCELERATION_STRUCTURE_SCRATCH | RenderBufferFlag::UNORDERED_ACCESS; + resourceDesc.Flags |= (desc.flags & unorderedAccessMask) ? D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS : D3D12_RESOURCE_FLAG_NONE; + + // Default to acceleration structure state if allowed. + if ((desc.flags & RenderBufferFlag::ACCELERATION_STRUCTURE)) { + resourceStates |= D3D12_RESOURCE_STATE_RAYTRACING_ACCELERATION_STRUCTURE; + } + + // Resources on upload heap require generic read during creation. + if (desc.heapType == RenderHeapType::UPLOAD) { + resourceStates |= D3D12_RESOURCE_STATE_GENERIC_READ; + } + // Resources on readback heap require copy dest during creation. + else if (desc.heapType == RenderHeapType::READBACK) { + resourceStates |= D3D12_RESOURCE_STATE_COPY_DEST; + } + + D3D12MA::ALLOCATION_DESC allocationDesc = {}; + allocationDesc.Flags = desc.committed ? D3D12MA::ALLOCATION_FLAG_COMMITTED : D3D12MA::ALLOCATION_FLAG_NONE; + allocationDesc.HeapType = toD3D12(desc.heapType); + allocationDesc.CustomPool = (pool != nullptr) ? pool->d3d : nullptr; + + HRESULT res = device->allocator->CreateResource(&allocationDesc, &resourceDesc, resourceStates, nullptr, &allocation, IID_PPV_ARGS(&d3d)); + if (FAILED(res)) { + fprintf(stderr, "CreateResource failed with error code 0x%lX.\n", res); + return; + } + } + + D3D12Buffer::~D3D12Buffer() { + if (allocation != nullptr) { + d3d->Release(); + allocation->Release(); + } + } + + void *D3D12Buffer::map(uint32_t subresource, const RenderRange *readRange) { + D3D12_RANGE range; + if (readRange != nullptr) { + range.Begin = readRange->begin; + range.End = readRange->end; + } + + void *outputData; + d3d->Map(subresource, (readRange != nullptr) ? &range : nullptr, &outputData); + return outputData; + } + + void D3D12Buffer::unmap(uint32_t subresource, const RenderRange *writtenRange) { + D3D12_RANGE range; + if (writtenRange != nullptr) { + range.Begin = writtenRange->begin; + range.End = writtenRange->end; + } + + d3d->Unmap(subresource, (writtenRange != nullptr) ? &range : nullptr); + } + + std::unique_ptr D3D12Buffer::createBufferFormattedView(RenderFormat format) { + return std::make_unique(this, format); + } + + void D3D12Buffer::setName(const std::string &name) { + setObjectName(d3d, name); + } + + // D3D12BufferFormattedView + + D3D12BufferFormattedView::D3D12BufferFormattedView(D3D12Buffer *buffer, RenderFormat format) { + assert(buffer != nullptr); + assert((buffer->desc.flags & RenderBufferFlag::FORMATTED) && "Buffer must allow formatted views."); + + this->buffer = buffer; + this->format = format; + } + + D3D12BufferFormattedView::~D3D12BufferFormattedView() { } + + // D3D12TextureView + + D3D12TextureView::D3D12TextureView(D3D12Texture *texture, const RenderTextureViewDesc &desc) { + assert(texture != nullptr); + + this->texture = texture; + this->format = toDXGI(desc.format); + this->dimension = desc.dimension; + this->mipLevels = desc.mipLevels; + this->mipSlice = desc.mipSlice; + + // D3D12 and Vulkan disagree on whether D32 is usable as a texture view format. We just make D3D12 use R32 instead. + if (format == DXGI_FORMAT_D32_FLOAT) { + format = DXGI_FORMAT_R32_FLOAT; + } + } + + D3D12TextureView::~D3D12TextureView() { } + + // D3D12Texture + + D3D12Texture::D3D12Texture(D3D12Device *device, D3D12Pool *pool, const RenderTextureDesc &desc) { + assert(device != nullptr); + + this->device = device; + this->pool = pool; + this->desc = desc; + + const bool renderTarget = (desc.flags & RenderTextureFlag::RENDER_TARGET); + const bool depthTarget = (desc.flags & RenderTextureFlag::DEPTH_TARGET); + D3D12_RESOURCE_DESC resourceDesc = {}; + resourceDesc.Dimension = toD3D12(desc.dimension); + resourceDesc.Width = desc.width; + resourceDesc.Height = desc.height; + resourceDesc.DepthOrArraySize = desc.dimension == RenderTextureDimension::TEXTURE_3D ? desc.depth : desc.arraySize; + resourceDesc.MipLevels = desc.mipLevels; + resourceDesc.Format = toDXGI(desc.format); + resourceDesc.SampleDesc.Count = desc.multisampling.sampleCount; + resourceDesc.Layout = toD3D12(desc.textureArrangement); + resourceDesc.Flags |= renderTarget ? D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET : D3D12_RESOURCE_FLAG_NONE; + resourceDesc.Flags |= depthTarget ? D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL : D3D12_RESOURCE_FLAG_NONE; + resourceDesc.Flags |= (desc.flags & RenderTextureFlag::UNORDERED_ACCESS) ? D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS : D3D12_RESOURCE_FLAG_NONE; + + D3D12MA::ALLOCATION_DESC allocationDesc = {}; + allocationDesc.Flags = desc.committed ? D3D12MA::ALLOCATION_FLAG_COMMITTED : D3D12MA::ALLOCATION_FLAG_NONE; + allocationDesc.HeapType = D3D12_HEAP_TYPE_DEFAULT; + allocationDesc.CustomPool = (pool != nullptr) ? pool->d3d : nullptr; + + D3D12_CLEAR_VALUE optimizedClearValue; + if (desc.optimizedClearValue != nullptr) { + optimizedClearValue.Format = toDXGI(desc.optimizedClearValue->format); + memcpy(optimizedClearValue.Color, desc.optimizedClearValue->color.rgba, sizeof(optimizedClearValue.Color)); + } + + HRESULT res = device->allocator->CreateResource(&allocationDesc, &resourceDesc, resourceStates, (desc.optimizedClearValue != nullptr) ? &optimizedClearValue : nullptr, &allocation, IID_PPV_ARGS(&d3d)); + if (FAILED(res)) { + fprintf(stderr, "CreateResource failed with error code 0x%lX.\n", res); + return; + } + + if (renderTarget) { + createRenderTargetHeap(); + } + else if (depthTarget) { + createDepthStencilHeap(); + } + } + + D3D12Texture::~D3D12Texture() { + releaseTargetHeap(); + + if (allocation != nullptr) { + d3d->Release(); + allocation->Release(); + } + } + + std::unique_ptr D3D12Texture::createTextureView(const RenderTextureViewDesc &desc) { + return std::make_unique(this, desc); + } + + void D3D12Texture::setName(const std::string &name) { + setObjectName(d3d, name); + } + + void D3D12Texture::createRenderTargetHeap() { + targetAllocatorOffset = device->colorTargetHeapAllocator->allocate(1); + if (targetAllocatorOffset == D3D12DescriptorHeapAllocator::INVALID_OFFSET) { + fprintf(stderr, "Allocator was unable to find free space for the set."); + return; + } + + targetEntryCount = 1; + targetHeapDepth = false; + + D3D12_RENDER_TARGET_VIEW_DESC rtvDesc = {}; + rtvDesc.Format = toDXGI(desc.format); + + const bool isMSAA = (desc.multisampling.sampleCount > RenderSampleCount::COUNT_1); + switch (desc.dimension) { + case RenderTextureDimension::TEXTURE_1D: + rtvDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE1D; + rtvDesc.Texture1D.MipSlice = 0; + break; + case RenderTextureDimension::TEXTURE_2D: + if (isMSAA) { + rtvDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2DMS; + } + else { + rtvDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D; + rtvDesc.Texture2D.MipSlice = 0; + rtvDesc.Texture2D.PlaneSlice = 0; + } + + break; + case RenderTextureDimension::TEXTURE_3D: + rtvDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE3D; + rtvDesc.Texture3D.MipSlice = 0; + rtvDesc.Texture3D.FirstWSlice = 0; + rtvDesc.Texture3D.WSize = 1; + break; + default: + assert(false && "Unsupported texture dimension for render target."); + break; + } + + const D3D12_CPU_DESCRIPTOR_HANDLE cpuHandle = device->colorTargetHeapAllocator->getHostCPUHandleAt(targetAllocatorOffset); + device->d3d->CreateRenderTargetView(d3d, &rtvDesc, cpuHandle); + } + + void D3D12Texture::createDepthStencilHeap() { + targetAllocatorOffset = device->depthTargetHeapAllocator->allocate(2); + if (targetAllocatorOffset == D3D12DescriptorHeapAllocator::INVALID_OFFSET) { + fprintf(stderr, "Allocator was unable to find free space for the set."); + return; + } + + targetEntryCount = 2; + targetHeapDepth = true; + + D3D12_DEPTH_STENCIL_VIEW_DESC dsvDesc = {}; + dsvDesc.Format = toDXGI(desc.format); + + const bool isMSAA = (desc.multisampling.sampleCount > RenderSampleCount::COUNT_1); + switch (desc.dimension) { + case RenderTextureDimension::TEXTURE_1D: + dsvDesc.ViewDimension = D3D12_DSV_DIMENSION_TEXTURE1D; + dsvDesc.Texture1D.MipSlice = 0; + break; + case RenderTextureDimension::TEXTURE_2D: + if (isMSAA) { + dsvDesc.ViewDimension = D3D12_DSV_DIMENSION_TEXTURE2DMS; + } + else { + dsvDesc.ViewDimension = D3D12_DSV_DIMENSION_TEXTURE2D; + dsvDesc.Texture2D.MipSlice = 0; + } + + break; + default: + assert(false && "Unsupported texture dimension for depth target."); + break; + } + + const D3D12_CPU_DESCRIPTOR_HANDLE writeHandle = device->depthTargetHeapAllocator->getHostCPUHandleAt(targetAllocatorOffset); + const D3D12_CPU_DESCRIPTOR_HANDLE readOnlyHandle = device->depthTargetHeapAllocator->getHostCPUHandleAt(targetAllocatorOffset + 1); + device->d3d->CreateDepthStencilView(d3d, &dsvDesc, writeHandle); + + dsvDesc.Flags = D3D12_DSV_FLAG_READ_ONLY_DEPTH; + device->d3d->CreateDepthStencilView(d3d, &dsvDesc, readOnlyHandle); + } + + void D3D12Texture::releaseTargetHeap() { + if (targetEntryCount > 0) { + if (targetHeapDepth) { + device->depthTargetHeapAllocator->free(targetAllocatorOffset, targetEntryCount); + } + else { + device->colorTargetHeapAllocator->free(targetAllocatorOffset, targetEntryCount); + } + + targetEntryCount = 0; + } + } + + // D3D12AccelerationStructure + + D3D12AccelerationStructure::D3D12AccelerationStructure(D3D12Device *device, const RenderAccelerationStructureDesc &desc) { + assert(device != nullptr); + assert(desc.buffer.ref != nullptr); + + this->device = device; + this->buffer = static_cast(desc.buffer.ref); + this->offset = desc.buffer.offset; + this->size = desc.size; + this->type = desc.type; + + assert((buffer->desc.flags & RenderBufferFlag::ACCELERATION_STRUCTURE) && "Buffer must be enabled for acceleration structures."); + } + + D3D12AccelerationStructure::~D3D12AccelerationStructure() { } + + // D3D12Pool + + D3D12Pool::D3D12Pool(D3D12Device *device, const RenderPoolDesc &desc) { + assert(device != nullptr); + + this->device = device; + this->desc = desc; + + D3D12MA::POOL_DESC poolDesc = {}; + poolDesc.HeapProperties.Type = toD3D12(desc.heapType); + poolDesc.MinBlockCount = desc.minBlockCount; + poolDesc.MaxBlockCount = desc.maxBlockCount; + poolDesc.Flags |= desc.useLinearAlgorithm ? D3D12MA::POOL_FLAG_ALGORITHM_LINEAR : D3D12MA::POOL_FLAG_NONE; + poolDesc.HeapFlags |= desc.allowOnlyBuffers ? D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS : D3D12_HEAP_FLAG_NONE; + + HRESULT res = device->allocator->CreatePool(&poolDesc, &d3d); + if (FAILED(res)) { + fprintf(stderr, "CreatePool failed with error code 0x%lX.\n", res); + return; + } + } + + D3D12Pool::~D3D12Pool() { + if (d3d != nullptr) { + d3d->Release(); + } + } + + std::unique_ptr D3D12Pool::createBuffer(const RenderBufferDesc &desc) { + return std::make_unique(device, this, desc); + } + + std::unique_ptr D3D12Pool::createTexture(const RenderTextureDesc &desc) { + return std::make_unique(device, this, desc); + } + + // D3D12Shader + + D3D12Shader::D3D12Shader(D3D12Device *device, const void *data, uint64_t size, const char *entryPointName, RenderShaderFormat format) { + assert(device != nullptr); + assert(data != nullptr); + assert(size > 0); + assert(format != RenderShaderFormat::UNKNOWN); + assert(format == RenderShaderFormat::DXIL); + + this->device = device; + this->format = format; + this->entryPointName = (entryPointName != nullptr) ? std::string(entryPointName) : std::string(); + + const uint8_t *dataBytes = reinterpret_cast(data); + this->d3d = std::vector(dataBytes, dataBytes + size); + } + + D3D12Shader::~D3D12Shader() { } + + // D3D12Sampler + + D3D12Sampler::D3D12Sampler(D3D12Device *device, const RenderSamplerDesc &desc) { + assert(device != nullptr); + + this->device = device; + this->borderColor = desc.borderColor; + this->shaderVisibility = desc.shaderVisibility; + + samplerDesc.Filter = toFilter(desc.minFilter, desc.magFilter, desc.mipmapMode, desc.anisotropyEnabled, desc.comparisonEnabled); + samplerDesc.AddressU = toD3D12(desc.addressU); + samplerDesc.AddressV = toD3D12(desc.addressV); + samplerDesc.AddressW = toD3D12(desc.addressW); + samplerDesc.MipLODBias = desc.mipLODBias; + samplerDesc.MaxAnisotropy = desc.anisotropyEnabled ? desc.maxAnisotropy : 1; + samplerDesc.ComparisonFunc = desc.comparisonEnabled ? toD3D12(desc.comparisonFunc) : D3D12_COMPARISON_FUNC_NEVER; + samplerDesc.MinLOD = desc.minLOD; + samplerDesc.MaxLOD = desc.maxLOD; + + float *dstColor = samplerDesc.BorderColor; + switch (desc.borderColor) { + case RenderBorderColor::TRANSPARENT_BLACK: + dstColor[0] = 0.0f; + dstColor[1] = 0.0f; + dstColor[2] = 0.0f; + dstColor[3] = 0.0f; + break; + case RenderBorderColor::OPAQUE_BLACK: + dstColor[0] = 0.0f; + dstColor[1] = 0.0f; + dstColor[2] = 0.0f; + dstColor[3] = 1.0f; + break; + case RenderBorderColor::OPAQUE_WHITE: + dstColor[0] = 1.0f; + dstColor[1] = 1.0f; + dstColor[2] = 1.0f; + dstColor[3] = 1.0f; + break; + default: + assert(false && "Unknown border color."); + break; + } + } + + D3D12Sampler::~D3D12Sampler() { } + + // D3D12Pipeline + + D3D12Pipeline::D3D12Pipeline(D3D12Device *device, Type type) { + assert(device != nullptr); + + this->device = device; + this->type = type; + } + + D3D12Pipeline::~D3D12Pipeline() { } + + // D3D12ComputePipeline + + D3D12ComputePipeline::D3D12ComputePipeline(D3D12Device *device, const RenderComputePipelineDesc &desc) : D3D12Pipeline(device, Type::Compute) { + assert(desc.pipelineLayout != nullptr); + + const D3D12PipelineLayout *rootSignature = static_cast(desc.pipelineLayout); + const D3D12Shader *computeShader = static_cast(desc.computeShader); + D3D12_COMPUTE_PIPELINE_STATE_DESC psoDesc = {}; + psoDesc.pRootSignature = rootSignature->rootSignature; + psoDesc.CS.pShaderBytecode = computeShader->d3d.data(); + psoDesc.CS.BytecodeLength = computeShader->d3d.size(); + device->d3d->CreateComputePipelineState(&psoDesc, IID_PPV_ARGS(&d3d)); + } + + D3D12ComputePipeline::~D3D12ComputePipeline() { + if (d3d != nullptr) { + d3d->Release(); + } + } + + RenderPipelineProgram D3D12ComputePipeline::getProgram(const std::string &name) const { + assert(false && "Compute pipelines can't retrieve shader programs."); + return RenderPipelineProgram(); + } + + // D3D12GraphicsPipeline + + D3D12GraphicsPipeline::D3D12GraphicsPipeline(D3D12Device *device, const RenderGraphicsPipelineDesc &desc) : D3D12Pipeline(device, Type::Graphics) { + assert(desc.pipelineLayout != nullptr); + + topology = toD3D12(desc.primitiveTopology); + + const D3D12PipelineLayout *pipelineLayout = static_cast(desc.pipelineLayout); + const D3D12Shader *vertexShader = static_cast(desc.vertexShader); + const D3D12Shader *geometryShader = static_cast(desc.geometryShader); + const D3D12Shader *pixelShader = static_cast(desc.pixelShader); + D3D12_GRAPHICS_PIPELINE_STATE_DESC psoDesc = {}; + psoDesc.pRootSignature = pipelineLayout->rootSignature; + psoDesc.VS.pShaderBytecode = (vertexShader != nullptr) ? vertexShader->d3d.data() : nullptr; + psoDesc.VS.BytecodeLength = (vertexShader != nullptr) ? vertexShader->d3d.size() : 0; + psoDesc.GS.pShaderBytecode = (geometryShader != nullptr) ? geometryShader->d3d.data() : nullptr; + psoDesc.GS.BytecodeLength = (geometryShader != nullptr) ? geometryShader->d3d.size() : 0; + psoDesc.PS.pShaderBytecode = (pixelShader != nullptr) ? pixelShader->d3d.data() : nullptr; + psoDesc.PS.BytecodeLength = (pixelShader != nullptr) ? pixelShader->d3d.size() : 0; + psoDesc.SampleMask = UINT_MAX; + psoDesc.SampleDesc.Count = desc.multisampling.sampleCount; + psoDesc.IBStripCutValue = D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFF; + psoDesc.PrimitiveTopologyType = toTopologyType(desc.primitiveTopology); + psoDesc.RasterizerState.FillMode = D3D12_FILL_MODE_SOLID; + psoDesc.RasterizerState.DepthClipEnable = desc.depthClipEnabled; + psoDesc.RasterizerState.DepthBias = desc.depthBias; + psoDesc.RasterizerState.SlopeScaledDepthBias = desc.slopeScaledDepthBias; + + switch (desc.cullMode) { + case RenderCullMode::NONE: + psoDesc.RasterizerState.CullMode = D3D12_CULL_MODE_NONE; + break; + case RenderCullMode::FRONT: + psoDesc.RasterizerState.CullMode = D3D12_CULL_MODE_FRONT; + break; + case RenderCullMode::BACK: + psoDesc.RasterizerState.CullMode = D3D12_CULL_MODE_BACK; + break; + default: + assert(false && "Unknown cull mode."); + return; + } + + psoDesc.DepthStencilState.DepthEnable = desc.depthEnabled; + psoDesc.DepthStencilState.DepthWriteMask = desc.depthWriteEnabled ? D3D12_DEPTH_WRITE_MASK_ALL : D3D12_DEPTH_WRITE_MASK_ZERO; + psoDesc.DepthStencilState.DepthFunc = toD3D12(desc.depthFunction); + psoDesc.NumRenderTargets = desc.renderTargetCount; + + for (uint32_t i = 0; i < desc.renderTargetCount; i++) { + psoDesc.RTVFormats[i] = toDXGI(desc.renderTargetFormat[i]); + + const RenderBlendDesc &renderDesc = desc.renderTargetBlend[i]; + D3D12_RENDER_TARGET_BLEND_DESC &targetDesc = psoDesc.BlendState.RenderTarget[i]; + targetDesc.BlendEnable = renderDesc.blendEnabled; + targetDesc.LogicOpEnable = desc.logicOpEnabled; + targetDesc.SrcBlend = toD3D12(renderDesc.srcBlend); + targetDesc.DestBlend = toD3D12(renderDesc.dstBlend); + targetDesc.BlendOp = toD3D12(renderDesc.blendOp); + targetDesc.SrcBlendAlpha = toD3D12(renderDesc.srcBlendAlpha); + targetDesc.DestBlendAlpha = toD3D12(renderDesc.dstBlendAlpha); + targetDesc.BlendOpAlpha = toD3D12(renderDesc.blendOpAlpha); + targetDesc.LogicOp = toD3D12(desc.logicOp); + targetDesc.RenderTargetWriteMask = renderDesc.renderTargetWriteMask; + } + + psoDesc.DSVFormat = toDXGI(desc.depthTargetFormat); + + std::vector inputElements; + for (uint32_t i = 0; i < desc.inputElementsCount; i++) { + const RenderInputElement &renderElement = desc.inputElements[i]; + D3D12_INPUT_ELEMENT_DESC inputElement; + inputElement.SemanticName = renderElement.semanticName; + inputElement.SemanticIndex = renderElement.semanticIndex; + inputElement.Format = toDXGI(renderElement.format); + inputElement.InputSlot = renderElement.slotIndex; + inputElement.AlignedByteOffset = renderElement.alignedByteOffset; + + // Read the corresponding input slot to find the input classification and instance data step rate. + bool foundInputSlot = false; + D3D12_INPUT_CLASSIFICATION inputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA; + uint32_t instanceDataStepRate = 0; + for (uint32_t j = 0; j < desc.inputSlotsCount; j++) { + if (renderElement.slotIndex == desc.inputSlots[j].index) { + inputSlotClass = toD3D12(desc.inputSlots[j].classification); + instanceDataStepRate = (inputSlotClass == D3D12_INPUT_CLASSIFICATION_PER_INSTANCE_DATA) ? 1 : 0; + foundInputSlot = true; + break; + } + } + + assert(foundInputSlot && "The slot index specified in the input element must exist in the input slots."); + inputElement.InputSlotClass = inputSlotClass; + inputElement.InstanceDataStepRate = instanceDataStepRate; + inputElements.emplace_back(inputElement); + } + + for (uint32_t i = 0; i < desc.inputSlotsCount; i++) { + inputSlots.emplace_back(desc.inputSlots[i]); + } + + psoDesc.InputLayout = { inputElements.data(), UINT(inputElements.size()) }; + + device->d3d->CreateGraphicsPipelineState(&psoDesc, IID_PPV_ARGS(&d3d)); + } + + D3D12GraphicsPipeline::~D3D12GraphicsPipeline() { + if (d3d != nullptr) { + d3d->Release(); + } + } + + RenderPipelineProgram D3D12GraphicsPipeline::getProgram(const std::string &name) const { + assert(false && "Graphics pipelines can't retrieve shader programs."); + return RenderPipelineProgram(); + } + + // D3D12RaytracingPipeline + + D3D12RaytracingPipeline::D3D12RaytracingPipeline(D3D12Device *device, const RenderRaytracingPipelineDesc &desc, const RenderPipeline *previousPipeline) : D3D12Pipeline(device, Type::Raytracing) { + assert(desc.librariesCount > 0); + assert(desc.pipelineLayout != nullptr); + + uint32_t subobjectCount = desc.librariesCount + desc.hitGroupsCount + 8; + uint32_t exportSymbolsCount = 0; + for (uint32_t i = 0; i < desc.librariesCount; i++) { + exportSymbolsCount += desc.libraries[i].symbolsCount; + } + + assert((exportSymbolsCount > 0) && "At least one symbol must be exported from the libraries."); + + std::vector subobjects(subobjectCount); + std::vector libraryDescs(desc.librariesCount); + std::vector hitGroupDescs(desc.hitGroupsCount); + std::vector exportDescs(exportSymbolsCount); + std::vector exportNames(exportSymbolsCount); + std::vector exportRenames(exportSymbolsCount); + std::unordered_set exportAssociationSet; + D3D12_SUBOBJECT_TO_EXPORTS_ASSOCIATION subobjectToExportsAssociation; + + uint32_t subobjectIndex = 0; + uint32_t exportsIndex = 0; + for (uint32_t i = 0; i < desc.librariesCount; i++) { + uint32_t exportsIndexStart = exportsIndex; + const RenderRaytracingPipelineLibrary &renderLibrary = desc.libraries[i]; + for (uint32_t j = 0; j < renderLibrary.symbolsCount; j++) { + D3D12_EXPORT_DESC &exportDesc = exportDescs[exportsIndex]; + std::wstring &exportName = exportNames[exportsIndex]; + std::wstring &exportRename = exportRenames[exportsIndex]; + exportName = Utf8ToUtf16(std::string(renderLibrary.symbols[j].importName)); + exportRename = (renderLibrary.symbols[j].exportName != nullptr) ? Utf8ToUtf16(std::string(renderLibrary.symbols[j].exportName)) : exportName; + exportDesc.Name = exportName.c_str(); + exportDesc.ExportToRename = exportRename.c_str(); + exportDesc.Flags = D3D12_EXPORT_FLAG_NONE; + exportAssociationSet.insert(exportRename); + exportsIndex++; + } + + const D3D12Shader *libraryShader = static_cast(renderLibrary.shader); + assert(libraryShader != nullptr); + + D3D12_DXIL_LIBRARY_DESC &libraryDesc = libraryDescs[i]; + libraryDesc.DXILLibrary.pShaderBytecode = libraryShader->d3d.data(); + libraryDesc.DXILLibrary.BytecodeLength = libraryShader->d3d.size(); + libraryDesc.pExports = &exportDescs[exportsIndexStart]; + libraryDesc.NumExports = exportsIndex - exportsIndexStart; + + D3D12_STATE_SUBOBJECT &subobject = subobjects[subobjectIndex++]; + subobject.Type = D3D12_STATE_SUBOBJECT_TYPE_DXIL_LIBRARY; + subobject.pDesc = &libraryDescs[i]; + } + + auto fillHitGroupString = [&exportAssociationSet](LPCWSTR &dstPtr, std::wstring &dstString, const char *srcString, bool associateToSet) { + if (srcString != nullptr) { + dstString = Utf8ToUtf16(srcString); + dstPtr = dstString.c_str(); + } + else { + dstPtr = nullptr; + } + + if (associateToSet) { + exportAssociationSet.insert(dstString); + } + else { + exportAssociationSet.erase(dstString); + } + }; + + std::vector hitGroupNames(desc.hitGroupsCount * 4); + uint32_t hitGroupNameIndex = 0; + for (uint32_t i = 0; i < desc.hitGroupsCount; i++) { + const RenderRaytracingPipelineHitGroup &renderHitGroup = desc.hitGroups[i]; + assert(renderHitGroup.hitGroupName != nullptr); + + D3D12_HIT_GROUP_DESC &hitGroupDesc = hitGroupDescs[i]; + hitGroupDesc.Type = D3D12_HIT_GROUP_TYPE_TRIANGLES; + fillHitGroupString(hitGroupDesc.HitGroupExport, hitGroupNames[hitGroupNameIndex++], renderHitGroup.hitGroupName, true); + fillHitGroupString(hitGroupDesc.ClosestHitShaderImport, hitGroupNames[hitGroupNameIndex++], renderHitGroup.closestHitName, false); + fillHitGroupString(hitGroupDesc.AnyHitShaderImport, hitGroupNames[hitGroupNameIndex++], renderHitGroup.anyHitName, false); + fillHitGroupString(hitGroupDesc.IntersectionShaderImport, hitGroupNames[hitGroupNameIndex++], renderHitGroup.intersectionName, false); + + D3D12_STATE_SUBOBJECT &subobject = subobjects[subobjectIndex++]; + subobject.Type = D3D12_STATE_SUBOBJECT_TYPE_HIT_GROUP; + subobject.pDesc = &hitGroupDescs[i]; + } + + D3D12_RAYTRACING_SHADER_CONFIG shaderDesc = {}; + shaderDesc.MaxPayloadSizeInBytes = desc.maxPayloadSize; + shaderDesc.MaxAttributeSizeInBytes = desc.maxAttributeSize; + + D3D12_STATE_SUBOBJECT &shaderConfigSubobject = subobjects[subobjectIndex++]; + shaderConfigSubobject.Type = D3D12_STATE_SUBOBJECT_TYPE_RAYTRACING_SHADER_CONFIG; + shaderConfigSubobject.pDesc = &shaderDesc; + + std::vector exportPointers; + exportPointers.reserve(exportAssociationSet.size()); + for (const std::wstring &exportAssociation : exportAssociationSet) { + exportPointers.emplace_back(exportAssociation.c_str()); + } + + D3D12_SUBOBJECT_TO_EXPORTS_ASSOCIATION exportsAssociation = {}; + exportsAssociation.pExports = exportPointers.data(); + exportsAssociation.NumExports = static_cast(exportPointers.size()); + exportsAssociation.pSubobjectToAssociate = &shaderConfigSubobject; + + D3D12_STATE_SUBOBJECT &exportAssociationSubobject = subobjects[subobjectIndex++]; + exportAssociationSubobject.Type = D3D12_STATE_SUBOBJECT_TYPE_SUBOBJECT_TO_EXPORTS_ASSOCIATION; + exportAssociationSubobject.pDesc = &exportsAssociation; + + std::vector associatedSymbolsNames(exportSymbolsCount); + std::vector associatedSymbolsNamesPointers(exportSymbolsCount); + uint32_t associatedSymbolCount = 0; + for (uint32_t i = 0; i < desc.librariesCount; i++) { + const RenderRaytracingPipelineLibrary &renderLibrary = desc.libraries[i]; + for (uint32_t j = 0; j < renderLibrary.symbolsCount; j++) { + const char *symbolName = (renderLibrary.symbols[j].exportName != nullptr) ? renderLibrary.symbols[j].exportName : renderLibrary.symbols[j].importName; + associatedSymbolsNames[associatedSymbolCount] = Utf8ToUtf16(symbolName); + associatedSymbolsNamesPointers[associatedSymbolCount] = associatedSymbolsNames[associatedSymbolCount].c_str(); + associatedSymbolCount++; + } + } + + pipelineLayout = static_cast(desc.pipelineLayout); + + D3D12_STATE_SUBOBJECT &rootSignatureSubobject = subobjects[subobjectIndex++]; + rootSignatureSubobject.Type = D3D12_STATE_SUBOBJECT_TYPE_LOCAL_ROOT_SIGNATURE; + rootSignatureSubobject.pDesc = &pipelineLayout->rootSignature; + + subobjectToExportsAssociation.pExports = associatedSymbolsNamesPointers.data(); + subobjectToExportsAssociation.NumExports = associatedSymbolCount; + subobjectToExportsAssociation.pSubobjectToAssociate = &rootSignatureSubobject; + + D3D12_STATE_SUBOBJECT &rootSignatureAssocationSubobject = subobjects[subobjectIndex++]; + rootSignatureAssocationSubobject.Type = D3D12_STATE_SUBOBJECT_TYPE_SUBOBJECT_TO_EXPORTS_ASSOCIATION; + rootSignatureAssocationSubobject.pDesc = &subobjectToExportsAssociation; + + const D3D12PipelineLayout *interfaceGlobalPipelineLayout = static_cast(device->rtDummyGlobalPipelineLayout.get()); + D3D12_STATE_SUBOBJECT &globalRootSignatureSubobject = subobjects[subobjectIndex++]; + globalRootSignatureSubobject.Type = D3D12_STATE_SUBOBJECT_TYPE_GLOBAL_ROOT_SIGNATURE; + globalRootSignatureSubobject.pDesc = &interfaceGlobalPipelineLayout->rootSignature; + + const D3D12PipelineLayout *interfaceLocalPipelineLayout = static_cast(device->rtDummyLocalPipelineLayout.get()); + D3D12_STATE_SUBOBJECT &localRootSignatureSubobject = subobjects[subobjectIndex++]; + localRootSignatureSubobject.Type = D3D12_STATE_SUBOBJECT_TYPE_LOCAL_ROOT_SIGNATURE; + localRootSignatureSubobject.pDesc = &interfaceLocalPipelineLayout->rootSignature; + + D3D12_STATE_OBJECT_CONFIG stateConfig; + stateConfig.Flags = desc.stateUpdateEnabled ? D3D12_STATE_OBJECT_FLAG_ALLOW_STATE_OBJECT_ADDITIONS : D3D12_STATE_OBJECT_FLAG_NONE; + + D3D12_STATE_SUBOBJECT &stateConfigSubobject = subobjects[subobjectIndex++]; + stateConfigSubobject.Type = D3D12_STATE_SUBOBJECT_TYPE_STATE_OBJECT_CONFIG; + stateConfigSubobject.pDesc = &stateConfig; + + D3D12_RAYTRACING_PIPELINE_CONFIG pipelineConfig = {}; + pipelineConfig.MaxTraceRecursionDepth = desc.maxRecursionDepth; + + D3D12_STATE_SUBOBJECT &pipelineConfigSubobject = subobjects[subobjectIndex++]; + pipelineConfigSubobject.Type = D3D12_STATE_SUBOBJECT_TYPE_RAYTRACING_PIPELINE_CONFIG; + pipelineConfigSubobject.pDesc = &pipelineConfig; + + assert(uint32_t(subobjects.size()) == subobjectIndex); + + D3D12_STATE_OBJECT_DESC pipelineDesc = {}; + pipelineDesc.Type = D3D12_STATE_OBJECT_TYPE_RAYTRACING_PIPELINE; + pipelineDesc.pSubobjects = subobjects.data(); + pipelineDesc.NumSubobjects = UINT(subobjects.size()); + + if (desc.stateUpdateEnabled && (previousPipeline != nullptr)) { + assert(static_cast(previousPipeline)->type == Type::Raytracing); + const D3D12RaytracingPipeline *previousRaytracingPipeline = static_cast(previousPipeline); + HRESULT res = device->d3d->AddToStateObject(&pipelineDesc, previousRaytracingPipeline->stateObject, IID_PPV_ARGS(&stateObject)); + if (FAILED(res)) { + fprintf(stderr, "AddToStateObject failed with error code 0x%lX.\n", res); + return; + } + } + else { + HRESULT res = device->d3d->CreateStateObject(&pipelineDesc, IID_PPV_ARGS(&stateObject)); + if (FAILED(res)) { + fprintf(stderr, "CreateStateObject failed with error code 0x%lX.\n", res); + return; + } + } + + HRESULT res = stateObject->QueryInterface(IID_PPV_ARGS(&stateObjectProperties)); + if (FAILED(res)) { + fprintf(stderr, "QueryInterface failed with error code 0x%lX.\n", res); + return; + } + + // Cache all the programs compiled into the PSO into a name map. + programShaderIdentifiers.reserve(exportAssociationSet.size()); + for (const std::wstring &exportAssociation : exportAssociationSet) { + void *shaderIdentifier = stateObjectProperties->GetShaderIdentifier(exportAssociation.c_str()); + const std::string exportName = Utf16ToUtf8(exportAssociation); + uint32_t programIndex = uint32_t(programShaderIdentifiers.size()); + programShaderIdentifiers.emplace_back(shaderIdentifier); + nameProgramMap[exportName] = { programIndex }; + } + } + + D3D12RaytracingPipeline::~D3D12RaytracingPipeline() { + if (stateObjectProperties != nullptr) { + stateObjectProperties->Release(); + } + + if (stateObject != nullptr) { + stateObject->Release(); + } + } + + RenderPipelineProgram D3D12RaytracingPipeline::getProgram(const std::string &name) const { + auto it = nameProgramMap.find(name); + assert((it != nameProgramMap.end()) && "Program must exist in the PSO."); + return it->second; + } + + // D3D12PipelineLayout + + D3D12PipelineLayout::D3D12PipelineLayout(D3D12Device *device, const RenderPipelineLayoutDesc &desc) { + assert(device != nullptr); + + this->device = device; + this->setCount = desc.descriptorSetDescsCount; + + thread_local std::vector rootParameters; + thread_local std::vector staticSamplers; + rootParameters.clear(); + staticSamplers.clear(); + + // Push constants will be the first root parameters of the signature. + for (uint32_t i = 0; i < desc.pushConstantRangesCount; i++) { + const RenderPushConstantRange &range = desc.pushConstantRanges[i]; + D3D12_ROOT_PARAMETER rootParameter = {}; + rootParameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; + rootParameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS; + rootParameter.Constants.ShaderRegister = range.binding; + rootParameter.Constants.RegisterSpace = range.set; + rootParameter.Constants.Num32BitValues = (range.size + sizeof(uint32_t) - 1) / sizeof(uint32_t); + rootParameters.emplace_back(rootParameter); + pushConstantRanges.emplace_back(range); + } + + // Figure out the total size of ranges that will be needed first. + uint32_t viewRangesCount = 0; + uint32_t samplerRangesCount = 0; + for (uint32_t i = 0; i < desc.descriptorSetDescsCount; i++) { + const RenderDescriptorSetDesc &descriptorSetDesc = desc.descriptorSetDescs[i]; + for (uint32_t j = 0; j < descriptorSetDesc.descriptorRangesCount; j++) { + const RenderDescriptorRange &renderRange = descriptorSetDesc.descriptorRanges[j]; + if (renderRange.immutableSampler != nullptr) { + continue; + } + else if (renderRange.type == RenderDescriptorRangeType::SAMPLER) { + samplerRangesCount++; + } + else { + viewRangesCount++; + } + } + } + + thread_local std::vector viewRanges; + thread_local std::vector samplerRanges; + uint32_t viewRangeIndex = 0; + uint32_t samplerRangeIndex = 0; + viewRanges.resize(viewRangesCount); + samplerRanges.resize(samplerRangesCount); + + // Descriptor sets will be created as descriptor table parameters. + for (uint32_t i = 0; i < desc.descriptorSetDescsCount; i++) { + uint32_t viewTableOffset = 0; + uint32_t viewTableSize = 0; + uint32_t samplerTableOffset = 0; + uint32_t samplerTableSize = 0; + const RenderDescriptorSetDesc &descriptorSetDesc = desc.descriptorSetDescs[i]; + for (uint32_t j = 0; j < descriptorSetDesc.descriptorRangesCount; j++) { + // D3D12 requires specifying boundless arrays by setting the descriptor count to UINT_MAX. + const RenderDescriptorRange &renderRange = descriptorSetDesc.descriptorRanges[j]; + const bool isRangeBoundless = (descriptorSetDesc.lastRangeIsBoundless && (j == (descriptorSetDesc.descriptorRangesCount - 1))); + + // Immutable samplers are converted to static samplers and filtered out of the table entirely. + if (renderRange.immutableSampler != nullptr) { + for (uint32_t k = 0; k < renderRange.count; k++) { + const D3D12Sampler *sampler = static_cast(renderRange.immutableSampler[k]); + const D3D12_SAMPLER_DESC &samplerDesc = sampler->samplerDesc; + D3D12_STATIC_SAMPLER_DESC staticSampler = {}; + staticSampler.Filter = samplerDesc.Filter; + staticSampler.AddressU = samplerDesc.AddressU; + staticSampler.AddressV = samplerDesc.AddressV; + staticSampler.AddressW = samplerDesc.AddressW; + staticSampler.MipLODBias = samplerDesc.MipLODBias; + staticSampler.MaxAnisotropy = samplerDesc.MaxAnisotropy; + staticSampler.ComparisonFunc = samplerDesc.ComparisonFunc; + staticSampler.BorderColor = toStaticBorderColor(sampler->borderColor); + staticSampler.MinLOD = samplerDesc.MinLOD; + staticSampler.MaxLOD = samplerDesc.MaxLOD; + staticSampler.ShaderRegister = renderRange.binding; + staticSampler.RegisterSpace = i; + staticSampler.ShaderVisibility = toD3D12(sampler->shaderVisibility); + staticSamplers.emplace_back(staticSampler); + } + } + // Dynamic samplers must use a different type of heap. + else if (renderRange.type == RenderDescriptorRangeType::SAMPLER) { + D3D12_DESCRIPTOR_RANGE &descriptorRange = samplerRanges[samplerRangeIndex + samplerTableSize]; + descriptorRange.RangeType = toRangeType(renderRange.type); + descriptorRange.NumDescriptors = isRangeBoundless ? UINT_MAX : renderRange.count; + descriptorRange.BaseShaderRegister = renderRange.binding; + descriptorRange.RegisterSpace = i; + descriptorRange.OffsetInDescriptorsFromTableStart = samplerTableOffset; + samplerTableSize++; + samplerTableOffset += renderRange.count; + } + else { + D3D12_DESCRIPTOR_RANGE &descriptorRange = viewRanges[viewRangeIndex + viewTableSize]; + descriptorRange.RangeType = toRangeType(renderRange.type); + descriptorRange.NumDescriptors = isRangeBoundless ? UINT_MAX : renderRange.count; + descriptorRange.BaseShaderRegister = renderRange.binding; + descriptorRange.RegisterSpace = i; + descriptorRange.OffsetInDescriptorsFromTableStart = viewTableOffset; + viewTableSize++; + viewTableOffset += renderRange.count; + } + } + + setViewRootIndices.emplace_back(uint32_t(rootParameters.size())); + + if (viewTableSize > 0) { + D3D12_ROOT_PARAMETER rootParameter = {}; + rootParameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; + rootParameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; + rootParameter.DescriptorTable.pDescriptorRanges = &viewRanges[viewRangeIndex]; + rootParameter.DescriptorTable.NumDescriptorRanges = viewTableSize; + rootParameters.emplace_back(rootParameter); + viewRangeIndex += viewTableSize; + } + + setSamplerRootIndices.emplace_back(uint32_t(rootParameters.size())); + + if (samplerTableSize > 0) { + D3D12_ROOT_PARAMETER rootParameter = {}; + rootParameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; + rootParameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; + rootParameter.DescriptorTable.pDescriptorRanges = &samplerRanges[samplerRangeIndex]; + rootParameter.DescriptorTable.NumDescriptorRanges = samplerTableSize; + rootParameters.emplace_back(rootParameter); + samplerRangeIndex += samplerTableSize; + } + } + + // Add root descriptors last. + for (uint32_t i = 0; i < desc.rootDescriptorDescsCount; i++) { + const RenderRootDescriptorDesc& rootDescriptorDesc = desc.rootDescriptorDescs[i]; + + D3D12_ROOT_PARAMETER rootParameter = {}; + rootParameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; + rootParameter.Descriptor.ShaderRegister = rootDescriptorDesc.shaderRegister; + rootParameter.Descriptor.RegisterSpace = rootDescriptorDesc.registerSpace; + + switch (rootDescriptorDesc.type) { + case RenderRootDescriptorType::CONSTANT_BUFFER: + rootParameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV; + break; + case RenderRootDescriptorType::SHADER_RESOURCE: + rootParameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_SRV; + break; + case RenderRootDescriptorType::UNORDERED_ACCESS: + rootParameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_UAV; + break; + default: + assert(false && "Invalid root descriptor type."); + break; + } + + rootDescriptorRootIndicesAndTypes.emplace_back(uint32_t(rootParameters.size()), rootDescriptorDesc.type); + rootParameters.push_back(rootParameter); + } + + // Store the total amount of root parameters. + rootCount = rootParameters.size(); + + // Fill root signature desc. + D3D12_ROOT_SIGNATURE_DESC rootSignatureDesc = {}; + rootSignatureDesc.Flags |= desc.isLocal ? D3D12_ROOT_SIGNATURE_FLAG_LOCAL_ROOT_SIGNATURE : D3D12_ROOT_SIGNATURE_FLAG_NONE; + rootSignatureDesc.Flags |= desc.allowInputLayout ? D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT : D3D12_ROOT_SIGNATURE_FLAG_NONE; + rootSignatureDesc.pParameters = !rootParameters.empty() ? rootParameters.data() : nullptr; + rootSignatureDesc.NumParameters = UINT(rootParameters.size()); + rootSignatureDesc.pStaticSamplers = !staticSamplers.empty() ? staticSamplers.data() : nullptr; + rootSignatureDesc.NumStaticSamplers = UINT(staticSamplers.size()); + + // Serialize the root signature. + ID3DBlob *signatureBlob; + ID3DBlob *errorBlob; + HRESULT res = D3D12SerializeRootSignature(&rootSignatureDesc, D3D_ROOT_SIGNATURE_VERSION_1_0, &signatureBlob, &errorBlob); + if (FAILED(res)) { + fprintf(stderr, "%s\n", (char *)(errorBlob->GetBufferPointer())); + return; + } + + res = device->d3d->CreateRootSignature(0, signatureBlob->GetBufferPointer(), signatureBlob->GetBufferSize(), IID_PPV_ARGS(&rootSignature)); + if (FAILED(res)) { + fprintf(stderr, "CreateRootSignature failed with error code 0x%lX.\n", res); + return; + } + } + + D3D12PipelineLayout::~D3D12PipelineLayout() { + if (rootSignature != nullptr) { + rootSignature->Release(); + } + } + + // D3D12Device + + D3D12Device::D3D12Device(D3D12Interface *renderInterface) { + assert(renderInterface != nullptr); + + this->renderInterface = renderInterface; + + // Detect adapter to use that will offer the best performance and features. + HRESULT res; + UINT adapterIndex = 0; + IDXGIAdapter1 *adapterOption = nullptr; + while (renderInterface->dxgiFactory->EnumAdapters1(adapterIndex++, &adapterOption) != DXGI_ERROR_NOT_FOUND) { + DXGI_ADAPTER_DESC1 adapterDesc; + adapterOption->GetDesc1(&adapterDesc); + + // Ignore remote or software adapters. + if (adapterDesc.Flags & (DXGI_ADAPTER_FLAG_REMOTE | DXGI_ADAPTER_FLAG_SOFTWARE)) { + adapterOption->Release(); + continue; + } + + ID3D12Device8 *deviceOption = nullptr; + res = D3D12CreateDevice(adapterOption, D3D_FEATURE_LEVEL_11_0, IID_PPV_ARGS(&deviceOption)); + if (FAILED(res)) { + adapterOption->Release(); + continue; + } + + // Determine the shader model supported by the device. +# if SM_5_1_SUPPORTED + const D3D_SHADER_MODEL supportedShaderModels[] = { D3D_SHADER_MODEL_6_0, D3D_SHADER_MODEL_5_1 }; +# else + const D3D_SHADER_MODEL supportedShaderModels[] = { D3D_SHADER_MODEL_6_0 }; +# endif + D3D12_FEATURE_DATA_SHADER_MODEL dataShaderModel = {}; + for (uint32_t i = 0; i < _countof(supportedShaderModels); i++) { + dataShaderModel.HighestShaderModel = supportedShaderModels[i]; + res = deviceOption->CheckFeatureSupport(D3D12_FEATURE_SHADER_MODEL, &dataShaderModel, sizeof(dataShaderModel)); + if (res != E_INVALIDARG) { + if (FAILED(res)) { + deviceOption->Release(); + adapterOption->Release(); + continue; + } + + break; + } + } + + // Determine if the device supports sample locations. + bool samplePositionsOption = false; + D3D12_FEATURE_DATA_D3D12_OPTIONS2 d3d12Options2 = {}; + res = deviceOption->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS2, &d3d12Options2, sizeof(d3d12Options2)); + if (SUCCEEDED(res)) { + samplePositionsOption = d3d12Options2.ProgrammableSamplePositionsTier >= D3D12_PROGRAMMABLE_SAMPLE_POSITIONS_TIER_1; + } + + // Determine if the device supports raytracing. + bool rtSupportOption = false; + bool rtStateUpdateSupportOption = false; + D3D12_FEATURE_DATA_D3D12_OPTIONS5 d3d12Options5 = {}; + res = deviceOption->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS5, &d3d12Options5, sizeof(d3d12Options5)); + if (SUCCEEDED(res)) { + rtSupportOption = d3d12Options5.RaytracingTier >= D3D12_RAYTRACING_TIER_1_0; + rtStateUpdateSupportOption = d3d12Options5.RaytracingTier >= D3D12_RAYTRACING_TIER_1_1; + } + + // Pick this adapter and device if it has better feature support than the current one. + bool preferOverNothing = (adapter == nullptr) || (d3d == nullptr); + bool preferVideoMemory = adapterDesc.DedicatedVideoMemory > description.dedicatedVideoMemory; + bool preferUserChoice = false;//wcsstr(adapterDesc.Description, L"AMD") != nullptr; + bool preferOption = preferOverNothing || preferVideoMemory || preferUserChoice; + if (preferOption) { + if (d3d != nullptr) { + d3d->Release(); + } + + if (adapter != nullptr) { + adapter->Release(); + } + + adapter = adapterOption; + d3d = deviceOption; + shaderModel = dataShaderModel.HighestShaderModel; + capabilities.raytracing = rtSupportOption; + capabilities.raytracingStateUpdate = rtStateUpdateSupportOption; + capabilities.sampleLocations = samplePositionsOption; + description.name = Utf16ToUtf8(adapterDesc.Description); + description.dedicatedVideoMemory = adapterDesc.DedicatedVideoMemory; + + if (preferUserChoice) { + break; + } + } + else { + deviceOption->Release(); + adapterOption->Release(); + } + } + + if (d3d == nullptr) { + fprintf(stderr, "Unable to create a D3D12 device with the required features.\n"); + return; + } + + D3D12MA::ALLOCATOR_DESC allocatorDesc = {}; + allocatorDesc.pDevice = d3d; + allocatorDesc.pAdapter = adapter; + allocatorDesc.Flags = D3D12MA::ALLOCATOR_FLAG_DEFAULT_POOLS_NOT_ZEROED | D3D12MA::ALLOCATOR_FLAG_MSAA_TEXTURES_ALWAYS_COMMITTED | D3D12MA::ALLOCATOR_FLAG_DONT_PREFER_SMALL_BUFFERS_COMMITTED; + + res = D3D12MA::CreateAllocator(&allocatorDesc, &allocator); + if (FAILED(res)) { + fprintf(stderr, "D3D12MA::CreateAllocator failed with error code 0x%lX.\n", res); + release(); + return; + } + + if (capabilities.raytracing) { + RenderPipelineLayoutDesc pipelineLayoutDesc; + rtDummyGlobalPipelineLayout = createPipelineLayout(pipelineLayoutDesc); + + pipelineLayoutDesc.isLocal = true; + rtDummyLocalPipelineLayout = createPipelineLayout(pipelineLayoutDesc); + } + +# ifdef D3D12_DEBUG_LAYER_ENABLED + // Add it to the debug layer info queue if available. + ID3D12InfoQueue *infoQueue; + res = d3d->QueryInterface(IID_PPV_ARGS(&infoQueue)); + if (SUCCEEDED(res)) { + D3D12_MESSAGE_SEVERITY severities[] = { + D3D12_MESSAGE_SEVERITY_INFO + }; + + D3D12_MESSAGE_ID denyIds[] = { + D3D12_MESSAGE_ID_COMMAND_LIST_DRAW_VERTEX_BUFFER_NOT_SET, + D3D12_MESSAGE_ID_CLEARRENDERTARGETVIEW_MISMATCHINGCLEARVALUE, + D3D12_MESSAGE_ID_CLEARDEPTHSTENCILVIEW_MISMATCHINGCLEARVALUE, + D3D12_MESSAGE_ID_DRAW_EMPTY_SCISSOR_RECTANGLE, + D3D12_MESSAGE_ID_HEAP_ADDRESS_RANGE_INTERSECTS_MULTIPLE_BUFFERS, + D3D12_MESSAGE_ID_CREATEGRAPHICSPIPELINESTATE_RENDERTARGETVIEW_NOT_SET, +# ifdef D3D12_DEBUG_LAYER_SUPRESS_SAMPLE_POSITIONS_ERROR + D3D12_MESSAGE_ID_SAMPLEPOSITIONS_MISMATCH_RECORDTIME_ASSUMEDFROMCLEAR, + D3D12_MESSAGE_ID_SAMPLEPOSITIONS_MISMATCH_DEFERRED, +# endif + }; + + D3D12_INFO_QUEUE_FILTER newFilter = {}; + newFilter.DenyList.NumSeverities = _countof(severities); + newFilter.DenyList.pSeverityList = severities; + newFilter.DenyList.NumIDs = _countof(denyIds); + newFilter.DenyList.pIDList = denyIds; + infoQueue->PushStorageFilter(&newFilter); + + infoQueue->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_CORRUPTION, true); + infoQueue->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_ERROR, D3D12_DEBUG_LAYER_BREAK_ON_ERROR); + infoQueue->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_WARNING, D3D12_DEBUG_LAYER_BREAK_ON_WARNING); + } +# endif + + // Fill capabilities. + capabilities.descriptorIndexing = true; + capabilities.scalarBlockLayout = true; + capabilities.presentWait = true; + capabilities.preferHDR = description.dedicatedVideoMemory > (512 * 1024 * 1024); + + // Create descriptor heaps allocator. + viewHeapAllocator = std::make_unique(this, ShaderDescriptorHeapSize, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + samplerHeapAllocator = std::make_unique(this, SamplerDescriptorHeapSize, D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER); + colorTargetHeapAllocator = std::make_unique(this, TargetDescriptorHeapSize, D3D12_DESCRIPTOR_HEAP_TYPE_RTV); + depthTargetHeapAllocator = std::make_unique(this, TargetDescriptorHeapSize, D3D12_DESCRIPTOR_HEAP_TYPE_DSV); + } + + D3D12Device::~D3D12Device() { + viewHeapAllocator.reset(); + samplerHeapAllocator.reset(); + rtDummyGlobalPipelineLayout.reset(); + rtDummyLocalPipelineLayout.reset(); + release(); + } + + std::unique_ptr D3D12Device::createCommandList(RenderCommandListType type) { + return std::make_unique(this, type); + } + + std::unique_ptr D3D12Device::createDescriptorSet(const RenderDescriptorSetDesc &desc) { + return std::make_unique(this, desc); + } + + std::unique_ptr D3D12Device::createShader(const void *data, uint64_t size, const char *entryPointName, RenderShaderFormat format) { + return std::make_unique(this, data, size, entryPointName, format); + } + + std::unique_ptr D3D12Device::createSampler(const RenderSamplerDesc &desc) { + return std::make_unique(this, desc); + } + + std::unique_ptr D3D12Device::createComputePipeline(const RenderComputePipelineDesc &desc) { + return std::make_unique(this, desc); + } + + std::unique_ptr D3D12Device::createGraphicsPipeline(const RenderGraphicsPipelineDesc &desc) { + return std::make_unique(this, desc); + } + + std::unique_ptr D3D12Device::createRaytracingPipeline(const RenderRaytracingPipelineDesc &desc, const RenderPipeline *previousPipeline) { + return std::make_unique(this, desc, previousPipeline); + } + + std::unique_ptr D3D12Device::createCommandQueue(RenderCommandListType type) { + return std::make_unique(this, type); + } + + std::unique_ptr D3D12Device::createBuffer(const RenderBufferDesc &desc) { + return std::make_unique(this, nullptr, desc); + } + + std::unique_ptr D3D12Device::createTexture(const RenderTextureDesc &desc) { + return std::make_unique(this, nullptr, desc); + } + + std::unique_ptr D3D12Device::createAccelerationStructure(const RenderAccelerationStructureDesc &desc) { + return std::make_unique(this, desc); + } + + std::unique_ptr D3D12Device::createPool(const RenderPoolDesc &desc) { + return std::make_unique(this, desc); + } + + std::unique_ptr D3D12Device::createPipelineLayout(const RenderPipelineLayoutDesc &desc) { + return std::make_unique(this, desc); + } + + std::unique_ptr D3D12Device::createCommandFence() { + return std::make_unique(this); + } + + std::unique_ptr D3D12Device::createCommandSemaphore() { + return std::make_unique(this); + } + + std::unique_ptr D3D12Device::createFramebuffer(const RenderFramebufferDesc &desc) { + return std::make_unique(this, desc); + } + + void D3D12Device::setBottomLevelASBuildInfo(RenderBottomLevelASBuildInfo &buildInfo, const RenderBottomLevelASMesh *meshes, uint32_t meshCount, bool preferFastBuild, bool preferFastTrace) { + assert(meshes != nullptr); + assert(meshCount > 0); + + buildInfo.buildData.resize(sizeof(D3D12_RAYTRACING_GEOMETRY_DESC) * meshCount, 0); + + D3D12_RAYTRACING_GEOMETRY_DESC *geometryDescs = reinterpret_cast(buildInfo.buildData.data()); + for (uint32_t i = 0; i < meshCount; i++) { + const RenderBottomLevelASMesh &mesh = meshes[i]; + const D3D12Buffer *interfaceIndexBuffer = static_cast(mesh.indexBuffer.ref); + const D3D12Buffer *interfaceVertexBuffer = static_cast(mesh.vertexBuffer.ref); + assert((interfaceIndexBuffer == nullptr) || ((interfaceIndexBuffer->desc.flags & RenderBufferFlag::ACCELERATION_STRUCTURE_INPUT) && "Acceleration structure input must be allowed on index buffer.")); + assert((interfaceVertexBuffer == nullptr) || ((interfaceVertexBuffer->desc.flags & RenderBufferFlag::ACCELERATION_STRUCTURE_INPUT) && "Acceleration structure input must be allowed on vertex buffer.")); + + D3D12_RAYTRACING_GEOMETRY_DESC &geometryDesc = geometryDescs[i]; + geometryDesc.Type = D3D12_RAYTRACING_GEOMETRY_TYPE_TRIANGLES; + geometryDesc.Flags = D3D12_RAYTRACING_GEOMETRY_FLAG_NO_DUPLICATE_ANYHIT_INVOCATION; + geometryDesc.Flags |= mesh.isOpaque ? D3D12_RAYTRACING_GEOMETRY_FLAG_OPAQUE : D3D12_RAYTRACING_GEOMETRY_FLAG_NONE; + geometryDesc.Triangles.Transform3x4 = 0; + geometryDesc.Triangles.IndexFormat = toDXGI(mesh.indexFormat); + geometryDesc.Triangles.VertexFormat = toDXGI(mesh.vertexFormat); + geometryDesc.Triangles.IndexCount = mesh.indexCount; + geometryDesc.Triangles.VertexCount = mesh.vertexCount; + geometryDesc.Triangles.IndexBuffer = (interfaceIndexBuffer != nullptr) ? (interfaceIndexBuffer->d3d->GetGPUVirtualAddress() + mesh.indexBuffer.offset) : 0; + geometryDesc.Triangles.VertexBuffer.StartAddress = (interfaceVertexBuffer != nullptr) ? (interfaceVertexBuffer->d3d->GetGPUVirtualAddress() + mesh.vertexBuffer.offset) : 0; + geometryDesc.Triangles.VertexBuffer.StrideInBytes = mesh.vertexStride; + } + + D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_INPUTS inputs = {}; + inputs.Type = D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL; + inputs.DescsLayout = D3D12_ELEMENTS_LAYOUT_ARRAY; + inputs.NumDescs = meshCount; + inputs.Flags = toRTASBuildFlags(preferFastBuild, preferFastTrace); + inputs.pGeometryDescs = geometryDescs; + + D3D12_RAYTRACING_ACCELERATION_STRUCTURE_PREBUILD_INFO info = {}; + d3d->GetRaytracingAccelerationStructurePrebuildInfo(&inputs, &info); + + buildInfo.meshCount = meshCount; + buildInfo.preferFastBuild = preferFastBuild; + buildInfo.preferFastTrace = preferFastTrace; + buildInfo.scratchSize = roundUp(info.ScratchDataSizeInBytes, D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT); + buildInfo.accelerationStructureSize = roundUp(info.ResultDataMaxSizeInBytes, D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT); + } + + void D3D12Device::setTopLevelASBuildInfo(RenderTopLevelASBuildInfo &buildInfo, const RenderTopLevelASInstance *instances, uint32_t instanceCount, bool preferFastBuild, bool preferFastTrace) { + assert(instances != nullptr); + assert(instanceCount > 0); + + uint64_t bufferSize = roundUp(sizeof(D3D12_RAYTRACING_INSTANCE_DESC) * instanceCount, D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT); + buildInfo.instancesBufferData.resize(bufferSize, 0); + + D3D12_RAYTRACING_INSTANCE_DESC *instanceDescs = reinterpret_cast(buildInfo.instancesBufferData.data()); + for (uint32_t i = 0; i < instanceCount; i++) { + const RenderTopLevelASInstance &instance = instances[i]; + const D3D12Buffer *interfaceBottomLevelAS = static_cast(instance.bottomLevelAS.ref); + assert(interfaceBottomLevelAS != nullptr); + + D3D12_RAYTRACING_INSTANCE_DESC &instanceDesc = instanceDescs[i]; + instanceDesc.InstanceID = instance.instanceID; + instanceDesc.InstanceMask = instance.instanceMask; + instanceDesc.InstanceContributionToHitGroupIndex = instance.instanceContributionToHitGroupIndex; + instanceDesc.Flags = instance.cullDisable ? D3D12_RAYTRACING_INSTANCE_FLAG_TRIANGLE_CULL_DISABLE : D3D12_RAYTRACING_INSTANCE_FLAG_NONE; + instanceDesc.AccelerationStructure = interfaceBottomLevelAS->d3d->GetGPUVirtualAddress() + instance.bottomLevelAS.offset; + memcpy(instanceDesc.Transform, instance.transform.m, sizeof(instanceDesc.Transform)); + } + + D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_INPUTS inputs = {}; + inputs.Type = D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL; + inputs.DescsLayout = D3D12_ELEMENTS_LAYOUT_ARRAY; + inputs.Flags = toRTASBuildFlags(preferFastBuild, preferFastTrace); + inputs.NumDescs = instanceCount; + + D3D12_RAYTRACING_ACCELERATION_STRUCTURE_PREBUILD_INFO info = {}; + d3d->GetRaytracingAccelerationStructurePrebuildInfo(&inputs, &info); + + buildInfo.instanceCount = instanceCount; + buildInfo.preferFastBuild = preferFastBuild; + buildInfo.preferFastTrace = preferFastTrace; + buildInfo.scratchSize = roundUp(info.ScratchDataSizeInBytes, D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT); + buildInfo.accelerationStructureSize = roundUp(info.ResultDataMaxSizeInBytes, D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT); + } + + void D3D12Device::setShaderBindingTableInfo(RenderShaderBindingTableInfo &tableInfo, const RenderShaderBindingGroups &groups, const RenderPipeline *pipeline, RenderDescriptorSet **descriptorSets, uint32_t descriptorSetCount) { + assert(pipeline != nullptr); + assert(descriptorSets != nullptr); + + const D3D12RaytracingPipeline *raytracingPipeline = static_cast(pipeline); + assert((raytracingPipeline->type == D3D12Pipeline::Type::Raytracing) && "Only raytracing pipelines can be used to build shader binding tables."); + assert((raytracingPipeline->pipelineLayout->setCount <= descriptorSetCount) && "There must be enough descriptor sets available for the pipeline."); + + uint64_t tableSize = 0; + auto setGroup = [&](RenderShaderBindingGroupInfo &groupInfo, const RenderShaderBindingGroup &renderGroup) { + groupInfo.startIndex = 0; + + if (renderGroup.pipelineProgramsCount == 0) { + groupInfo.stride = 0; + groupInfo.offset = 0; + groupInfo.size = 0; + } + else { + groupInfo.stride = roundUp(D3D12_RAYTRACING_SHADER_RECORD_BYTE_ALIGNMENT + sizeof(UINT64) * raytracingPipeline->pipelineLayout->rootCount, D3D12_RAYTRACING_SHADER_TABLE_BYTE_ALIGNMENT); + groupInfo.offset = tableSize; + groupInfo.size = groupInfo.stride * renderGroup.pipelineProgramsCount; + tableSize += groupInfo.size; + } + }; + + setGroup(tableInfo.groups.rayGen, groups.rayGen); + setGroup(tableInfo.groups.miss, groups.miss); + setGroup(tableInfo.groups.hitGroup, groups.hitGroup); + setGroup(tableInfo.groups.callable, groups.callable); + + tableSize = roundUp(tableSize, D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT); + + tableInfo.tableBufferData.clear(); + tableInfo.tableBufferData.resize(tableSize, 0); + + thread_local std::vector descriptorHandles; + descriptorHandles.clear(); + descriptorHandles.resize(raytracingPipeline->pipelineLayout->rootCount, 0); + + for (uint32_t i = 0; i < raytracingPipeline->pipelineLayout->setCount; i++) { + const D3D12DescriptorSet *interfaceDescriptorSet = static_cast(descriptorSets[i]); + if (interfaceDescriptorSet != nullptr) { + if (interfaceDescriptorSet->viewAllocation.count > 0) { + uint32_t viewRootIndex = raytracingPipeline->pipelineLayout->setViewRootIndices[i]; + descriptorHandles[viewRootIndex] = viewHeapAllocator->getShaderGPUHandleAt(interfaceDescriptorSet->viewAllocation.offset).ptr; + } + + if (interfaceDescriptorSet->samplerAllocation.count > 0) { + uint32_t samplerRootIndex = raytracingPipeline->pipelineLayout->setSamplerRootIndices[i]; + descriptorHandles[samplerRootIndex] = samplerHeapAllocator->getShaderGPUHandleAt(interfaceDescriptorSet->samplerAllocation.offset).ptr; + } + } + } + + auto copyGroupData = [&](RenderShaderBindingGroupInfo &groupInfo, const RenderShaderBindingGroup &renderGroup) { + for (uint32_t i = 0; i < renderGroup.pipelineProgramsCount; i++) { + void *shaderId = raytracingPipeline->programShaderIdentifiers[renderGroup.pipelinePrograms[i].programIndex]; + uint64_t tableOffset = groupInfo.offset + i * groupInfo.stride; + memcpy(&tableInfo.tableBufferData[tableOffset], shaderId, D3D12_RAYTRACING_SHADER_RECORD_BYTE_ALIGNMENT); + + if (raytracingPipeline->pipelineLayout->rootCount > 0) { + UINT64 *tableDescriptorHandles = reinterpret_cast(&tableInfo.tableBufferData[tableOffset + D3D12_RAYTRACING_SHADER_RECORD_BYTE_ALIGNMENT]); + memcpy(tableDescriptorHandles, descriptorHandles.data(), sizeof(UINT64) * raytracingPipeline->pipelineLayout->rootCount); + } + } + }; + + copyGroupData(tableInfo.groups.rayGen, groups.rayGen); + copyGroupData(tableInfo.groups.miss, groups.miss); + copyGroupData(tableInfo.groups.hitGroup, groups.hitGroup); + copyGroupData(tableInfo.groups.callable, groups.callable); + } + + const RenderDeviceCapabilities &D3D12Device::getCapabilities() const { + return capabilities; + } + + const RenderDeviceDescription &D3D12Device::getDescription() const { + return description; + } + + RenderSampleCounts D3D12Device::getSampleCountsSupported(RenderFormat format) const { + HRESULT res; + RenderSampleCounts countsSupported = RenderSampleCount::COUNT_0; + D3D12_FEATURE_DATA_MULTISAMPLE_QUALITY_LEVELS multisampleLevel = {}; + RenderSampleCounts testCount = RenderSampleCount::COUNT_1; + while (testCount <= RenderSampleCount::COUNT_MAX) { + multisampleLevel.SampleCount = testCount; + multisampleLevel.Format = toDXGI(format); + + res = d3d->CheckFeatureSupport(D3D12_FEATURE_MULTISAMPLE_QUALITY_LEVELS, &multisampleLevel, sizeof(multisampleLevel)); + if (SUCCEEDED(res)) { + if (multisampleLevel.NumQualityLevels > 0) { + countsSupported |= testCount; + } + } + + testCount = testCount << 1; + } + + return countsSupported; + } + + void D3D12Device::release() { + if (d3d != nullptr) { + d3d->Release(); + d3d = nullptr; + } + + if (adapter != nullptr) { + adapter->Release(); + adapter = nullptr; + } + } + + bool D3D12Device::isValid() const { + return d3d != nullptr; + } + + // D3D12Interface + + D3D12Interface::D3D12Interface() { + // Create DXGI Factory. + UINT dxgiFactoryFlags = 0; + +# ifdef D3D12_DEBUG_LAYER_ENABLED + ID3D12Debug *debugController; + if (SUCCEEDED(D3D12GetDebugInterface(IID_PPV_ARGS(&debugController)))) { + debugController->EnableDebugLayer(); + + // Enable additional debug layers. + dxgiFactoryFlags |= DXGI_CREATE_FACTORY_DEBUG; + } +# endif + + HRESULT res = CreateDXGIFactory2(dxgiFactoryFlags, IID_PPV_ARGS(&dxgiFactory)); + if (FAILED(res)) { + fprintf(stderr, "CreateDXGIFactory2 failed with error code 0x%lX.\n", res); + return; + } + + // Fill capabilities. + capabilities.shaderFormat = RenderShaderFormat::DXIL; + } + + D3D12Interface::~D3D12Interface() { + if (dxgiFactory != nullptr) { + dxgiFactory->Release(); + } + } + + std::unique_ptr D3D12Interface::createDevice() { + std::unique_ptr createdDevice = std::make_unique(this); + return createdDevice->isValid() ? std::move(createdDevice) : nullptr; + } + + const RenderInterfaceCapabilities &D3D12Interface::getCapabilities() const { + return capabilities; + } + + bool D3D12Interface::isValid() const { + return dxgiFactory != nullptr; + } + + // Global creation function. + + std::unique_ptr CreateD3D12Interface() { + std::unique_ptr createdInterface = std::make_unique(); + return createdInterface->isValid() ? std::move(createdInterface) : nullptr; + } +}; diff --git a/UnleashedRecomp/gpu/rhi/rt64_d3d12.h b/UnleashedRecomp/gpu/rhi/rt64_d3d12.h new file mode 100644 index 0000000..e4fe615 --- /dev/null +++ b/UnleashedRecomp/gpu/rhi/rt64_d3d12.h @@ -0,0 +1,450 @@ +// +// RT64 +// + +#pragma once + +#include "rt64_render_interface.h" + +#include +#include +#include + +#include +#include + +#include "D3D12MemAlloc.h" + +namespace RT64 { + struct D3D12Buffer; + struct D3D12CommandQueue; + struct D3D12Device; + struct D3D12GraphicsPipeline; + struct D3D12Interface; + struct D3D12Pipeline; + struct D3D12Pool; + struct D3D12PipelineLayout; + struct D3D12Texture; + + struct D3D12DescriptorHeapAllocator { + enum : uint32_t { + INVALID_OFFSET = 0xFFFFFFFFU + }; + + // Reference implementation http://diligentgraphics.com/diligent-engine/architecture/d3d12/variable-size-memory-allocations-manager/ + struct FreeBlock; + + typedef std::map OffsetFreeBlockMap; + typedef std::multimap SizeFreeBlockMap; + + struct FreeBlock { + uint32_t size; + SizeFreeBlockMap::iterator sizeMapIterator; + + FreeBlock(uint32_t size) { + this->size = size; + } + }; + + ID3D12DescriptorHeap *hostHeap = nullptr; + ID3D12DescriptorHeap *shaderHeap = nullptr; + uint32_t heapSize = 0; + uint32_t freeSize = 0; + D3D12Device *device = nullptr; + D3D12_CPU_DESCRIPTOR_HANDLE hostCPUDescriptorHandle = {}; + D3D12_CPU_DESCRIPTOR_HANDLE shaderCPUDescriptorHandle = {}; + D3D12_GPU_DESCRIPTOR_HANDLE shaderGPUDescriptorHandle = {}; + UINT descriptorHandleIncrement = 0; + OffsetFreeBlockMap offsetFreeBlockMap; + SizeFreeBlockMap sizeFreeBlockMap; + std::mutex allocationMutex; + + D3D12DescriptorHeapAllocator(D3D12Device *device, uint32_t heapSize, D3D12_DESCRIPTOR_HEAP_TYPE heapType); + ~D3D12DescriptorHeapAllocator(); + void addFreeBlock(uint32_t offset, uint32_t size); + uint32_t allocate(uint32_t size); + void free(uint32_t offset, uint32_t size); + D3D12_CPU_DESCRIPTOR_HANDLE getHostCPUHandleAt(uint32_t index) const; + D3D12_CPU_DESCRIPTOR_HANDLE getShaderCPUHandleAt(uint32_t index) const; + D3D12_GPU_DESCRIPTOR_HANDLE getShaderGPUHandleAt(uint32_t index) const; + }; + + struct D3D12DescriptorSet : RenderDescriptorSet { + D3D12Device *device = nullptr; + + struct HeapAllocation { + uint32_t offset = 0; + uint32_t count = 0; + uint32_t hostModifiedIndex = 0; + uint32_t hostModifiedCount = 0; + }; + + HeapAllocation viewAllocation; + HeapAllocation samplerAllocation; + std::vector descriptorTypes; + std::vector descriptorHeapIndices; + uint32_t descriptorTypeMaxIndex = 0; + + D3D12DescriptorSet(D3D12Device *device, const RenderDescriptorSetDesc &desc); + ~D3D12DescriptorSet() override; + void setBuffer(uint32_t descriptorIndex, const RenderBuffer *buffer, uint64_t bufferSize, const RenderBufferStructuredView *bufferStructuredView, const RenderBufferFormattedView *bufferFormattedView) override; + void setTexture(uint32_t descriptorIndex, const RenderTexture *texture, RenderTextureLayout textureLayout, const RenderTextureView *textureView) override; + void setSampler(uint32_t descriptorIndex, const RenderSampler *sampler) override; + void setAccelerationStructure(uint32_t descriptorIndex, const RenderAccelerationStructure *accelerationStructure) override; + void setSRV(uint32_t descriptorIndex, ID3D12Resource *resource, const D3D12_SHADER_RESOURCE_VIEW_DESC *viewDesc); + void setUAV(uint32_t descriptorIndex, ID3D12Resource *resource, const D3D12_UNORDERED_ACCESS_VIEW_DESC *viewDesc); + void setCBV(uint32_t descriptorIndex, ID3D12Resource *resource, uint64_t bufferSize); + void setHostModified(HeapAllocation &heapAllocation, uint32_t heapIndex); + }; + + struct D3D12SwapChain : RenderSwapChain { + IDXGISwapChain3 *d3d = nullptr; + HANDLE waitableObject = 0; + D3D12CommandQueue *commandQueue = nullptr; + RenderWindow renderWindow = {}; + std::vector textures; + uint32_t textureCount = 0; + RenderFormat format = RenderFormat::UNKNOWN; + DXGI_FORMAT nativeFormat = DXGI_FORMAT_UNKNOWN; + uint32_t width = 0; + uint32_t height = 0; + uint32_t refreshRate = 0; + + D3D12SwapChain(D3D12CommandQueue *commandQueue, RenderWindow renderWindow, uint32_t textureCount, RenderFormat format); + ~D3D12SwapChain() override; + bool present(uint32_t textureIndex, RenderCommandSemaphore **waitSemaphores, uint32_t waitSemaphoreCount) override; + bool resize() override; + bool needsResize() const override; + uint32_t getWidth() const override; + uint32_t getHeight() const override; + RenderTexture *getTexture(uint32_t textureIndex) override; + uint32_t getTextureCount() const override; + bool acquireTexture(RenderCommandSemaphore *signalSemaphore, uint32_t *textureIndex) override; + RenderWindow getWindow() const override; + bool isEmpty() const override; + uint32_t getRefreshRate() const override; + void getWindowSize(uint32_t &dstWidth, uint32_t &dstHeight) const; + void setTextures(); + }; + + struct D3D12Framebuffer : RenderFramebuffer { + D3D12Device *device = nullptr; + uint32_t width = 0; + uint32_t height = 0; + std::vector colorTargets; + const D3D12Texture *depthTarget = nullptr; + std::vector colorHandles; + D3D12_CPU_DESCRIPTOR_HANDLE depthHandle = {}; + + D3D12Framebuffer(D3D12Device *device, const RenderFramebufferDesc &desc); + ~D3D12Framebuffer() override; + uint32_t getWidth() const override; + uint32_t getHeight() const override; + }; + + struct D3D12CommandList : RenderCommandList { + ID3D12GraphicsCommandList4 *d3d = nullptr; + ID3D12CommandAllocator *commandAllocator = nullptr; + D3D12Device *device = nullptr; + RenderCommandListType type = RenderCommandListType::UNKNOWN; + const D3D12Framebuffer *targetFramebuffer = nullptr; + bool targetFramebufferSamplePositionsSet = false; + bool open = false; + const D3D12PipelineLayout *activeComputePipelineLayout = nullptr; + const D3D12PipelineLayout *activeGraphicsPipelineLayout = nullptr; + const D3D12GraphicsPipeline *activeGraphicsPipeline = nullptr; + bool descriptorHeapsSet = false; + D3D12_PRIMITIVE_TOPOLOGY activeTopology = D3D_PRIMITIVE_TOPOLOGY_UNDEFINED; + bool activeSamplePositions = false; + + D3D12CommandList(D3D12Device *device, RenderCommandListType type); + ~D3D12CommandList() override; + bool isOpen() override; + void begin() override; + void end() override; + void barriers(RenderBarrierStages stages, const RenderBufferBarrier *bufferBarriers, uint32_t bufferBarriersCount, const RenderTextureBarrier *textureBarriers, uint32_t textureBarriersCount) override; + void dispatch(uint32_t threadGroupCountX, uint32_t threadGroupCountY, uint32_t threadGroupCountZ) override; + void traceRays(uint32_t width, uint32_t height, uint32_t depth, RenderBufferReference shaderBindingTable, const RenderShaderBindingGroupsInfo &shaderBindingGroupsInfo) override; + void drawInstanced(uint32_t vertexCountPerInstance, uint32_t instanceCount, uint32_t startVertexLocation, uint32_t startInstanceLocation) override; + void drawIndexedInstanced(uint32_t indexCountPerInstance, uint32_t instanceCount, uint32_t startIndexLocation, int32_t baseVertexLocation, uint32_t startInstanceLocation) override; + void setPipeline(const RenderPipeline *pipeline) override; + void setComputePipelineLayout(const RenderPipelineLayout *pipelineLayout) override; + void setComputePushConstants(uint32_t rangeIndex, const void *data) override; + void setComputeDescriptorSet(RenderDescriptorSet *descriptorSet, uint32_t setIndex) override; + void setGraphicsPipelineLayout(const RenderPipelineLayout *pipelineLayout) override; + void setGraphicsPushConstants(uint32_t rangeIndex, const void *data) override; + void setGraphicsDescriptorSet(RenderDescriptorSet *descriptorSet, uint32_t setIndex) override; + void setGraphicsRootDescriptor(RenderBufferReference bufferReference, uint32_t rootDescriptorIndex) override; + void setRaytracingPipelineLayout(const RenderPipelineLayout *pipelineLayout) override; + void setRaytracingPushConstants(uint32_t rangeIndex, const void *data) override; + void setRaytracingDescriptorSet(RenderDescriptorSet *descriptorSet, uint32_t setIndex) override; + void setIndexBuffer(const RenderIndexBufferView *view) override; + void setVertexBuffers(uint32_t startSlot, const RenderVertexBufferView *views, uint32_t viewCount, const RenderInputSlot *inputSlots) override; + void setViewports(const RenderViewport *viewports, uint32_t count) override; + void setScissors(const RenderRect *scissorRects, uint32_t count) override; + void setFramebuffer(const RenderFramebuffer *framebuffer) override; + void clearColor(uint32_t attachmentIndex, RenderColor colorValue, const RenderRect *clearRects, uint32_t clearRectsCount) override; + void clearDepth(bool clearDepth, float depthValue, const RenderRect *clearRects, uint32_t clearRectsCount) override; + void copyBufferRegion(RenderBufferReference dstBuffer, RenderBufferReference srcBuffer, uint64_t size) override; + void copyTextureRegion(const RenderTextureCopyLocation &dstLocation, const RenderTextureCopyLocation &srcLocation, uint32_t dstX, uint32_t dstY, uint32_t dstZ, const RenderBox *srcBox) override; + void copyBuffer(const RenderBuffer *dstBuffer, const RenderBuffer *srcBuffer) override; + void copyTexture(const RenderTexture *dstTexture, const RenderTexture *srcTexture) override; + void resolveTexture(const RenderTexture *dstTexture, const RenderTexture *srcTexture) override; + void resolveTextureRegion(const RenderTexture *dstTexture, uint32_t dstX, uint32_t dstY, const RenderTexture *srcTexture, const RenderRect *srcRect) override; + void buildBottomLevelAS(const RenderAccelerationStructure *dstAccelerationStructure, RenderBufferReference scratchBuffer, const RenderBottomLevelASBuildInfo &buildInfo) override; + void buildTopLevelAS(const RenderAccelerationStructure *dstAccelerationStructure, RenderBufferReference scratchBuffer, RenderBufferReference instancesBuffer, const RenderTopLevelASBuildInfo &buildInfo) override; + void checkDescriptorHeaps(); + void notifyDescriptorHeapWasChangedExternally(); + void checkTopology(); + void checkFramebufferSamplePositions(); + void setSamplePositions(const RenderTexture *texture); + void resetSamplePositions(); + void setDescriptorSet(const D3D12PipelineLayout *activePipelineLayout, RenderDescriptorSet *descriptorSet, uint32_t setIndex, bool setCompute); + void setRootDescriptorTable(D3D12DescriptorHeapAllocator *heapAllocator, D3D12DescriptorSet::HeapAllocation &heapAllocation, uint32_t rootIndex, bool setCompute); + void setRootDescriptor(const D3D12PipelineLayout *activePipelineLayout, RenderBufferReference bufferReference, uint32_t setIndex, bool setCompute); + }; + + struct D3D12CommandFence : RenderCommandFence { + ID3D12Fence *d3d = nullptr; + D3D12Device *device = nullptr; + HANDLE fenceEvent = 0; + UINT64 fenceValue = 0; + + D3D12CommandFence(D3D12Device *device); + ~D3D12CommandFence() override; + }; + + struct D3D12CommandSemaphore : RenderCommandSemaphore { + ID3D12Fence *d3d = nullptr; + D3D12Device *device = nullptr; + UINT64 semaphoreValue = 0; + + D3D12CommandSemaphore(D3D12Device *device); + ~D3D12CommandSemaphore() override; + }; + + struct D3D12CommandQueue : RenderCommandQueue { + ID3D12CommandQueue *d3d = nullptr; + D3D12Device *device = nullptr; + RenderCommandListType type = RenderCommandListType::UNKNOWN; + + D3D12CommandQueue(D3D12Device *device, RenderCommandListType type); + ~D3D12CommandQueue() override; + std::unique_ptr createSwapChain(RenderWindow renderWindow, uint32_t textureCount, RenderFormat format) override; + void executeCommandLists(const RenderCommandList **commandLists, uint32_t commandListCount, RenderCommandSemaphore **waitSemaphores, uint32_t waitSemaphoreCount, RenderCommandSemaphore **signalSemaphores, uint32_t signalSemaphoreCount, RenderCommandFence *signalFence) override; + void waitForCommandFence(RenderCommandFence *fence) override; + }; + + struct D3D12Buffer : RenderBuffer { + ID3D12Resource *d3d = nullptr; + D3D12_RESOURCE_STATES resourceStates = D3D12_RESOURCE_STATE_COMMON; + D3D12Device *device = nullptr; + D3D12MA::Allocation *allocation = nullptr; + D3D12Pool *pool = nullptr; + RenderBufferDesc desc; + + D3D12Buffer() = default; + D3D12Buffer(D3D12Device *device, D3D12Pool *pool, const RenderBufferDesc &desc); + ~D3D12Buffer() override; + void *map(uint32_t subresource, const RenderRange *readRange) override; + void unmap(uint32_t subresource, const RenderRange *writtenRange) override; + std::unique_ptr createBufferFormattedView(RenderFormat format) override; + void setName(const std::string &name) override; + }; + + struct D3D12BufferFormattedView : RenderBufferFormattedView { + RenderFormat format = RenderFormat::UNKNOWN; + D3D12Buffer *buffer = nullptr; + + D3D12BufferFormattedView(D3D12Buffer *buffer, RenderFormat format); + ~D3D12BufferFormattedView() override; + }; + + struct D3D12Texture : RenderTexture { + ID3D12Resource *d3d = nullptr; + D3D12_RESOURCE_STATES resourceStates = D3D12_RESOURCE_STATE_COMMON; + RenderTextureLayout layout = RenderTextureLayout::UNKNOWN; + D3D12Device *device = nullptr; + D3D12MA::Allocation *allocation = nullptr; + D3D12Pool *pool = nullptr; + RenderTextureDesc desc; + uint32_t targetAllocatorOffset = 0; + uint32_t targetEntryCount = 0; + bool targetHeapDepth = false; + + D3D12Texture() = default; + D3D12Texture(D3D12Device *device, D3D12Pool *pool, const RenderTextureDesc &desc); + ~D3D12Texture() override; + std::unique_ptr createTextureView(const RenderTextureViewDesc &desc) override; + void setName(const std::string &name) override; + void createRenderTargetHeap(); + void createDepthStencilHeap(); + void releaseTargetHeap(); + }; + + struct D3D12TextureView : RenderTextureView { + DXGI_FORMAT format = DXGI_FORMAT_UNKNOWN; + D3D12Texture *texture = nullptr; + RenderTextureViewDimension dimension = RenderTextureViewDimension::UNKNOWN; + uint32_t mipLevels = 0; + uint32_t mipSlice = 0; + + D3D12TextureView(D3D12Texture *texture, const RenderTextureViewDesc &desc); + ~D3D12TextureView() override; + }; + + struct D3D12AccelerationStructure :RenderAccelerationStructure { + D3D12Device *device = nullptr; + const D3D12Buffer *buffer = nullptr; + uint64_t offset = 0; + uint64_t size = 0; + RenderAccelerationStructureType type = RenderAccelerationStructureType::UNKNOWN; + + D3D12AccelerationStructure(D3D12Device *device, const RenderAccelerationStructureDesc &desc); + ~D3D12AccelerationStructure() override; + }; + + struct D3D12Pool : RenderPool { + D3D12MA::Pool *d3d = nullptr; + D3D12Device *device = nullptr; + RenderPoolDesc desc; + + D3D12Pool(D3D12Device *device, const RenderPoolDesc &desc); + ~D3D12Pool() override; + std::unique_ptr createBuffer(const RenderBufferDesc &desc) override; + std::unique_ptr createTexture(const RenderTextureDesc &desc) override; + }; + + struct D3D12Shader : RenderShader { + std::vector d3d; + std::string entryPointName; + D3D12Device *device = nullptr; + RenderShaderFormat format = RenderShaderFormat::UNKNOWN; + + D3D12Shader(D3D12Device *device, const void *data, uint64_t size, const char *entryPointName, RenderShaderFormat format); + ~D3D12Shader() override; + }; + + struct D3D12Sampler : RenderSampler { + D3D12_SAMPLER_DESC samplerDesc = {}; + D3D12Device *device = nullptr; + RenderBorderColor borderColor = RenderBorderColor::UNKNOWN; + RenderShaderVisibility shaderVisibility = RenderShaderVisibility::UNKNOWN; + + D3D12Sampler(D3D12Device *device, const RenderSamplerDesc &desc); + ~D3D12Sampler() override; + }; + + struct D3D12Pipeline : RenderPipeline { + enum class Type { + Unknown, + Compute, + Graphics, + Raytracing + }; + + D3D12Device *device = nullptr; + Type type = Type::Unknown; + + D3D12Pipeline(D3D12Device *device, Type type); + virtual ~D3D12Pipeline() override; + }; + + struct D3D12ComputePipeline : D3D12Pipeline { + ID3D12PipelineState *d3d = nullptr; + + D3D12ComputePipeline(D3D12Device *device, const RenderComputePipelineDesc &desc); + ~D3D12ComputePipeline() override; + virtual RenderPipelineProgram getProgram(const std::string &name) const override; + }; + + struct D3D12GraphicsPipeline : D3D12Pipeline { + ID3D12PipelineState *d3d = nullptr; + std::vector inputSlots; + D3D12_PRIMITIVE_TOPOLOGY topology = D3D_PRIMITIVE_TOPOLOGY_UNDEFINED; + + D3D12GraphicsPipeline(D3D12Device *device, const RenderGraphicsPipelineDesc &desc); + ~D3D12GraphicsPipeline() override; + virtual RenderPipelineProgram getProgram(const std::string &name) const override; + }; + + struct D3D12RaytracingPipeline : D3D12Pipeline { + ID3D12StateObject *stateObject = nullptr; + ID3D12StateObjectProperties *stateObjectProperties = nullptr; + std::vector programShaderIdentifiers; + std::unordered_map nameProgramMap; + const D3D12PipelineLayout *pipelineLayout = nullptr; + + D3D12RaytracingPipeline(D3D12Device *device, const RenderRaytracingPipelineDesc &desc, const RenderPipeline *previousPipeline); + ~D3D12RaytracingPipeline() override; + virtual RenderPipelineProgram getProgram(const std::string &name) const override; + }; + + struct D3D12PipelineLayout : RenderPipelineLayout { + ID3D12RootSignature *rootSignature = nullptr; + D3D12Device *device = nullptr; + std::vector pushConstantRanges; + std::vector setViewRootIndices; + std::vector setSamplerRootIndices; + std::vector> rootDescriptorRootIndicesAndTypes; + uint32_t setCount = 0; + uint32_t rootCount = 0; + + D3D12PipelineLayout(D3D12Device *device, const RenderPipelineLayoutDesc &desc); + ~D3D12PipelineLayout() override; + }; + + struct D3D12Device : RenderDevice { + ID3D12Device8 *d3d = nullptr; + D3D12Interface *renderInterface = nullptr; + IDXGIAdapter1 *adapter = nullptr; + D3D12MA::Allocator *allocator = nullptr; + D3D_SHADER_MODEL shaderModel = D3D_SHADER_MODEL(0); + std::unique_ptr rtDummyGlobalPipelineLayout; + std::unique_ptr rtDummyLocalPipelineLayout; + std::unique_ptr viewHeapAllocator; + std::unique_ptr samplerHeapAllocator; + std::unique_ptr colorTargetHeapAllocator; + std::unique_ptr depthTargetHeapAllocator; + RenderDeviceCapabilities capabilities; + RenderDeviceDescription description; + + D3D12Device(D3D12Interface *renderInterface); + ~D3D12Device() override; + std::unique_ptr createCommandList(RenderCommandListType type) override; + std::unique_ptr createDescriptorSet(const RenderDescriptorSetDesc &desc) override; + std::unique_ptr createShader(const void *data, uint64_t size, const char *entryPointName, RenderShaderFormat format) override; + std::unique_ptr createSampler(const RenderSamplerDesc &desc) override; + std::unique_ptr createComputePipeline(const RenderComputePipelineDesc &desc) override; + std::unique_ptr createGraphicsPipeline(const RenderGraphicsPipelineDesc &desc) override; + std::unique_ptr createRaytracingPipeline(const RenderRaytracingPipelineDesc &desc, const RenderPipeline *previousPipeline) override; + std::unique_ptr createCommandQueue(RenderCommandListType type) override; + std::unique_ptr createBuffer(const RenderBufferDesc &desc) override; + std::unique_ptr createTexture(const RenderTextureDesc &desc) override; + std::unique_ptr createAccelerationStructure(const RenderAccelerationStructureDesc &desc) override; + std::unique_ptr createPool(const RenderPoolDesc &desc) override; + std::unique_ptr createPipelineLayout(const RenderPipelineLayoutDesc &desc) override; + std::unique_ptr createCommandFence() override; + std::unique_ptr createCommandSemaphore() override; + std::unique_ptr createFramebuffer(const RenderFramebufferDesc &desc) override; + void setBottomLevelASBuildInfo(RenderBottomLevelASBuildInfo &buildInfo, const RenderBottomLevelASMesh *meshes, uint32_t meshCount, bool preferFastBuild, bool preferFastTrace) override; + void setTopLevelASBuildInfo(RenderTopLevelASBuildInfo &buildInfo, const RenderTopLevelASInstance *instances, uint32_t instanceCount, bool preferFastBuild, bool preferFastTrace) override; + void setShaderBindingTableInfo(RenderShaderBindingTableInfo &tableInfo, const RenderShaderBindingGroups &groups, const RenderPipeline *pipeline, RenderDescriptorSet **descriptorSets, uint32_t descriptorSetCount) override; + const RenderDeviceCapabilities &getCapabilities() const override; + const RenderDeviceDescription &getDescription() const override; + RenderSampleCounts getSampleCountsSupported(RenderFormat format) const override; + void release(); + bool isValid() const; + }; + + struct D3D12Interface : RenderInterface { + IDXGIFactory4 *dxgiFactory = nullptr; + RenderInterfaceCapabilities capabilities; + + D3D12Interface(); + ~D3D12Interface() override; + std::unique_ptr createDevice() override; + const RenderInterfaceCapabilities &getCapabilities() const override; + bool isValid() const; + }; +}; diff --git a/UnleashedRecomp/gpu/rhi/rt64_render_interface.h b/UnleashedRecomp/gpu/rhi/rt64_render_interface.h new file mode 100644 index 0000000..67d436d --- /dev/null +++ b/UnleashedRecomp/gpu/rhi/rt64_render_interface.h @@ -0,0 +1,241 @@ +// +// RT64 +// + +#pragma once + +#include + +#include "rt64_render_interface_types.h" + +namespace RT64 { + // Interfaces. + + struct RenderBufferFormattedView { + virtual ~RenderBufferFormattedView() { } + }; + + struct RenderBuffer { + virtual ~RenderBuffer() { } + virtual void *map(uint32_t subresource = 0, const RenderRange *readRange = nullptr) = 0; + virtual void unmap(uint32_t subresource = 0, const RenderRange *writtenRange = nullptr) = 0; + virtual std::unique_ptr createBufferFormattedView(RenderFormat format) = 0; + virtual void setName(const std::string &name) = 0; + + // Concrete implementation shortcuts. + inline RenderBufferReference at(uint64_t offset) const { + return RenderBufferReference(this, offset); + } + }; + + struct RenderTextureView { + virtual ~RenderTextureView() { } + }; + + struct RenderTexture { + virtual ~RenderTexture() { } + virtual std::unique_ptr createTextureView(const RenderTextureViewDesc &desc) = 0; + virtual void setName(const std::string &name) = 0; + }; + + struct RenderAccelerationStructure { + virtual ~RenderAccelerationStructure() { } + }; + + struct RenderShader { + virtual ~RenderShader() { } + }; + + struct RenderSampler { + virtual ~RenderSampler() { } + }; + + struct RenderPipeline { + virtual ~RenderPipeline() { } + virtual RenderPipelineProgram getProgram(const std::string &name) const = 0; + }; + + struct RenderPipelineLayout { + virtual ~RenderPipelineLayout() { } + }; + + struct RenderCommandFence { + virtual ~RenderCommandFence() { } + }; + + struct RenderCommandSemaphore { + virtual ~RenderCommandSemaphore() { } + }; + + struct RenderDescriptorSet { + // Descriptor indices correspond to the index assuming the descriptor set is one contiguous array. They DO NOT correspond to the bindings, which can be sparse. + // User code should derive these indices on its own by looking at the order the bindings were assigned during set creation along with the descriptor count and + // assume it was all allocated in one contiguous array. This allows efficient mapping between Vulkan and D3D12's descriptor models. + + virtual ~RenderDescriptorSet() { } + virtual void setBuffer(uint32_t descriptorIndex, const RenderBuffer *buffer, uint64_t bufferSize = 0, const RenderBufferStructuredView *bufferStructuredView = nullptr, const RenderBufferFormattedView *bufferFormattedView = nullptr) = 0; + virtual void setTexture(uint32_t descriptorIndex, const RenderTexture *texture, RenderTextureLayout textureLayout, const RenderTextureView *textureView = nullptr) = 0; + virtual void setSampler(uint32_t descriptorIndex, const RenderSampler *sampler) = 0; + virtual void setAccelerationStructure(uint32_t descriptorIndex, const RenderAccelerationStructure *accelerationStructure) = 0; + }; + + struct RenderSwapChain { + virtual ~RenderSwapChain() { } + virtual bool present(uint32_t textureIndex, RenderCommandSemaphore **waitSemaphores, uint32_t waitSemaphoreCount) = 0; + virtual bool resize() = 0; + virtual bool needsResize() const = 0; + virtual uint32_t getWidth() const = 0; + virtual uint32_t getHeight() const = 0; + virtual RenderTexture *getTexture(uint32_t textureIndex) = 0; + virtual uint32_t getTextureCount() const = 0; + virtual bool acquireTexture(RenderCommandSemaphore *signalSemaphore, uint32_t *textureIndex) = 0; + virtual RenderWindow getWindow() const = 0; + virtual bool isEmpty() const = 0; + + // Only valid if displayTiming is enabled in capabilities. + virtual uint32_t getRefreshRate() const = 0; + }; + + struct RenderFramebuffer { + virtual ~RenderFramebuffer() { } + virtual uint32_t getWidth() const = 0; + virtual uint32_t getHeight() const = 0; + }; + + struct RenderCommandList { + virtual ~RenderCommandList() { } + virtual bool isOpen() = 0; + virtual void begin() = 0; + virtual void end() = 0; + virtual void barriers(RenderBarrierStages stages, const RenderBufferBarrier *bufferBarriers, uint32_t bufferBarriersCount, const RenderTextureBarrier *textureBarriers, uint32_t textureBarriersCount) = 0; + virtual void dispatch(uint32_t threadGroupCountX, uint32_t threadGroupCountY, uint32_t threadGroupCountZ) = 0; + virtual void traceRays(uint32_t width, uint32_t height, uint32_t depth, RenderBufferReference shaderBindingTable, const RenderShaderBindingGroupsInfo &shaderBindingGroupsInfo) = 0; + virtual void drawInstanced(uint32_t vertexCountPerInstance, uint32_t instanceCount, uint32_t startVertexLocation, uint32_t startInstanceLocation) = 0; + virtual void drawIndexedInstanced(uint32_t indexCountPerInstance, uint32_t instanceCount, uint32_t startIndexLocation, int32_t baseVertexLocation, uint32_t startInstanceLocation) = 0; + virtual void setPipeline(const RenderPipeline *pipeline) = 0; + virtual void setComputePipelineLayout(const RenderPipelineLayout *pipelineLayout) = 0; + virtual void setComputePushConstants(uint32_t rangeIndex, const void *data) = 0; + virtual void setComputeDescriptorSet(RenderDescriptorSet *descriptorSet, uint32_t setIndex) = 0; + virtual void setGraphicsPipelineLayout(const RenderPipelineLayout *pipelineLayout) = 0; + virtual void setGraphicsPushConstants(uint32_t rangeIndex, const void *data) = 0; + virtual void setGraphicsDescriptorSet(RenderDescriptorSet *descriptorSet, uint32_t setIndex) = 0; + virtual void setGraphicsRootDescriptor(RenderBufferReference bufferReference, uint32_t rootDescriptorIndex) = 0; + virtual void setRaytracingPipelineLayout(const RenderPipelineLayout *pipelineLayout) = 0; + virtual void setRaytracingPushConstants(uint32_t rangeIndex, const void *data) = 0; + virtual void setRaytracingDescriptorSet(RenderDescriptorSet *descriptorSet, uint32_t setIndex) = 0; + virtual void setIndexBuffer(const RenderIndexBufferView *view) = 0; + virtual void setVertexBuffers(uint32_t startSlot, const RenderVertexBufferView *views, uint32_t viewCount, const RenderInputSlot *inputSlots) = 0; + virtual void setViewports(const RenderViewport *viewports, uint32_t count) = 0; + virtual void setScissors(const RenderRect *scissorRects, uint32_t count) = 0; + virtual void setFramebuffer(const RenderFramebuffer *framebuffer) = 0; + virtual void clearColor(uint32_t attachmentIndex = 0, RenderColor colorValue = RenderColor(), const RenderRect *clearRects = nullptr, uint32_t clearRectsCount = 0) = 0; + virtual void clearDepth(bool clearDepth = true, float depthValue = 1.0f, const RenderRect *clearRects = nullptr, uint32_t clearRectsCount = 0) = 0; + virtual void copyBufferRegion(RenderBufferReference dstBuffer, RenderBufferReference srcBuffer, uint64_t size) = 0; + virtual void copyTextureRegion(const RenderTextureCopyLocation &dstLocation, const RenderTextureCopyLocation &srcLocation, uint32_t dstX = 0, uint32_t dstY = 0, uint32_t dstZ = 0, const RenderBox *srcBox = nullptr) = 0; + virtual void copyBuffer(const RenderBuffer *dstBuffer, const RenderBuffer *srcBuffer) = 0; + virtual void copyTexture(const RenderTexture *dstTexture, const RenderTexture *srcTexture) = 0; + virtual void resolveTexture(const RenderTexture *dstTexture, const RenderTexture *srcTexture) = 0; + virtual void resolveTextureRegion(const RenderTexture *dstTexture, uint32_t dstX, uint32_t dstY, const RenderTexture *srcTexture, const RenderRect *srcRect = nullptr) = 0; + virtual void buildBottomLevelAS(const RenderAccelerationStructure *dstAccelerationStructure, RenderBufferReference scratchBuffer, const RenderBottomLevelASBuildInfo &buildInfo) = 0; + virtual void buildTopLevelAS(const RenderAccelerationStructure *dstAccelerationStructure, RenderBufferReference scratchBuffer, RenderBufferReference instancesBuffer, const RenderTopLevelASBuildInfo &buildInfo) = 0; + + // Concrete implementation shortcuts. + inline void barriers(RenderBarrierStages stages, const RenderBufferBarrier &barrier) { + barriers(stages, &barrier, 1, nullptr, 0); + } + + inline void barriers(RenderBarrierStages stages, const RenderTextureBarrier &barrier) { + barriers(stages, nullptr, 0, &barrier, 1); + } + + inline void barriers(RenderBarrierStages stages, const RenderBufferBarrier &bufferBarrier, const RenderTextureBarrier &textureBarrier) { + barriers(stages, &bufferBarrier, 1, &textureBarrier, 1); + } + + inline void barriers(RenderBarrierStages stages, const RenderBufferBarrier *bufferBarriers, uint32_t bufferBarriersCount) { + barriers(stages, bufferBarriers, bufferBarriersCount, nullptr, 0); + } + + inline void barriers(RenderBarrierStages stages, const std::vector &bufferBarriers) { + barriers(stages, bufferBarriers.data(), uint32_t(bufferBarriers.size()), nullptr, 0); + } + + inline void barriers(RenderBarrierStages stages, const RenderTextureBarrier *textureBarriers, uint32_t textureBarriersCount) { + barriers(stages, nullptr, 0, textureBarriers, textureBarriersCount); + } + + inline void barriers(RenderBarrierStages stages, const std::vector &textureBarriers) { + barriers(stages, nullptr, 0, textureBarriers.data(), uint32_t(textureBarriers.size())); + } + + inline void barriers(RenderBarrierStages stages, const std::vector &bufferBarriers, const std::vector &textureBarriers) { + barriers(stages, bufferBarriers.data(), uint32_t(bufferBarriers.size()), textureBarriers.data(), uint32_t(textureBarriers.size())); + } + + inline void setViewports(const RenderViewport &viewport) { + setViewports(&viewport, 1); + } + + inline void setScissors(const RenderRect &scissorRect) { + setScissors(&scissorRect, 1); + } + }; + + struct RenderCommandQueue { + virtual ~RenderCommandQueue() { } + virtual std::unique_ptr createSwapChain(RenderWindow renderWindow, uint32_t textureCount, RenderFormat format) = 0; + virtual void executeCommandLists(const RenderCommandList **commandLists, uint32_t commandListCount, RenderCommandSemaphore **waitSemaphores = nullptr, uint32_t waitSemaphoreCount = 0, RenderCommandSemaphore **signalSemaphores = nullptr, uint32_t signalSemaphoreCount = 0, RenderCommandFence *signalFence = nullptr) = 0; + virtual void waitForCommandFence(RenderCommandFence *fence) = 0; + + // Concrete implementation shortcuts. + inline void executeCommandLists(const RenderCommandList *commandList, RenderCommandFence *signalFence = nullptr) { + executeCommandLists(&commandList, 1, nullptr, 0, nullptr, 0, signalFence); + } + }; + + struct RenderPool { + virtual ~RenderPool() { } + virtual std::unique_ptr createBuffer(const RenderBufferDesc &desc) = 0; + virtual std::unique_ptr createTexture(const RenderTextureDesc &desc) = 0; + }; + + struct RenderDevice { + virtual ~RenderDevice() { } + virtual std::unique_ptr createCommandList(RenderCommandListType type) = 0; + virtual std::unique_ptr createDescriptorSet(const RenderDescriptorSetDesc &desc) = 0; + virtual std::unique_ptr createShader(const void *data, uint64_t size, const char *entryPointName, RenderShaderFormat format) = 0; + virtual std::unique_ptr createSampler(const RenderSamplerDesc &desc) = 0; + virtual std::unique_ptr createComputePipeline(const RenderComputePipelineDesc &desc) = 0; + virtual std::unique_ptr createGraphicsPipeline(const RenderGraphicsPipelineDesc &desc) = 0; + virtual std::unique_ptr createRaytracingPipeline(const RenderRaytracingPipelineDesc &desc, const RenderPipeline *previousPipeline = nullptr) = 0; + virtual std::unique_ptr createCommandQueue(RenderCommandListType type) = 0; + virtual std::unique_ptr createBuffer(const RenderBufferDesc &desc) = 0; + virtual std::unique_ptr createTexture(const RenderTextureDesc &desc) = 0; + virtual std::unique_ptr createAccelerationStructure(const RenderAccelerationStructureDesc &desc) = 0; + virtual std::unique_ptr createPool(const RenderPoolDesc &desc) = 0; + virtual std::unique_ptr createPipelineLayout(const RenderPipelineLayoutDesc &desc) = 0; + virtual std::unique_ptr createCommandFence() = 0; + virtual std::unique_ptr createCommandSemaphore() = 0; + virtual std::unique_ptr createFramebuffer(const RenderFramebufferDesc &desc) = 0; + virtual void setBottomLevelASBuildInfo(RenderBottomLevelASBuildInfo &buildInfo, const RenderBottomLevelASMesh *meshes, uint32_t meshCount, bool preferFastBuild = true, bool preferFastTrace = false) = 0; + virtual void setTopLevelASBuildInfo(RenderTopLevelASBuildInfo &buildInfo, const RenderTopLevelASInstance *instances, uint32_t instanceCount, bool preferFastBuild = true, bool preferFastTrace = false) = 0; + virtual void setShaderBindingTableInfo(RenderShaderBindingTableInfo &tableInfo, const RenderShaderBindingGroups &groups, const RenderPipeline *pipeline, RenderDescriptorSet **descriptorSets, uint32_t descriptorSetCount) = 0; + virtual const RenderDeviceCapabilities &getCapabilities() const = 0; + virtual const RenderDeviceDescription &getDescription() const = 0; + virtual RenderSampleCounts getSampleCountsSupported(RenderFormat format) const = 0; + }; + + struct RenderInterface { + virtual ~RenderInterface() { } + virtual std::unique_ptr createDevice() = 0; + virtual const RenderInterfaceCapabilities &getCapabilities() const = 0; + }; + + extern void RenderInterfaceTest(RenderInterface *renderInterface); + extern void TestInitialize(RenderInterface* renderInterface, RenderWindow window); + extern void TestDraw(); + extern void TestResize(); + extern void TestShutdown(); +}; + +#include "rt64_render_interface_builders.h" diff --git a/UnleashedRecomp/gpu/rhi/rt64_render_interface_builders.h b/UnleashedRecomp/gpu/rhi/rt64_render_interface_builders.h new file mode 100644 index 0000000..07a7b45 --- /dev/null +++ b/UnleashedRecomp/gpu/rhi/rt64_render_interface_builders.h @@ -0,0 +1,278 @@ +// +// RT64 +// + +#pragma once + +#include + +namespace RT64 { + struct RenderDescriptorSetBuilder { + std::list> samplerPointerVectorList; + std::vector descriptorRanges; + RenderDescriptorSetDesc descriptorSetDesc; + bool open = false; + uint32_t setIndex = 0; + + RenderDescriptorSetBuilder() = default; + + void begin() { + assert(!open && "Builder must be closed."); + + descriptorSetDesc = RenderDescriptorSetDesc(); + + samplerPointerVectorList.clear(); + descriptorRanges.clear(); + + open = true; + setIndex = 0; + } + + uint32_t addConstantBuffer(uint32_t binding, uint32_t count = 1) { + return addRange(RenderDescriptorRange(RenderDescriptorRangeType::CONSTANT_BUFFER, binding, count, nullptr)); + } + + uint32_t addFormattedBuffer(uint32_t binding, uint32_t count = 1) { + return addRange(RenderDescriptorRange(RenderDescriptorRangeType::FORMATTED_BUFFER, binding, count, nullptr)); + } + + uint32_t addReadWriteFormattedBuffer(uint32_t binding, uint32_t count = 1) { + return addRange(RenderDescriptorRange(RenderDescriptorRangeType::READ_WRITE_FORMATTED_BUFFER, binding, count, nullptr)); + } + + uint32_t addTexture(uint32_t binding, uint32_t count = 1) { + return addRange(RenderDescriptorRange(RenderDescriptorRangeType::TEXTURE, binding, count, nullptr)); + } + + uint32_t addReadWriteTexture(uint32_t binding, uint32_t count = 1) { + return addRange(RenderDescriptorRange(RenderDescriptorRangeType::READ_WRITE_TEXTURE, binding, count, nullptr)); + } + + uint32_t addSampler(uint32_t binding, uint32_t count = 1) { + return addRange(RenderDescriptorRange(RenderDescriptorRangeType::SAMPLER, binding, count, nullptr)); + } + + uint32_t addImmutableSampler(uint32_t binding, const RenderSampler *immutableSampler) { + assert(immutableSampler != nullptr); + + return addImmutableSampler(binding, &immutableSampler); + } + + uint32_t addImmutableSampler(uint32_t binding, const RenderSampler **immutableSampler, uint32_t count = 1) { + assert(immutableSampler != nullptr); + + samplerPointerVectorList.emplace_back(std::vector(immutableSampler, immutableSampler + count)); + return addRange(RenderDescriptorRange(RenderDescriptorRangeType::SAMPLER, binding, count, samplerPointerVectorList.back().data())); + } + + uint32_t addStructuredBuffer(uint32_t binding, uint32_t count = 1) { + return addRange(RenderDescriptorRange(RenderDescriptorRangeType::STRUCTURED_BUFFER, binding, count, nullptr)); + } + + uint32_t addReadWriteStructuredBuffer(uint32_t binding, uint32_t count = 1) { + return addRange(RenderDescriptorRange(RenderDescriptorRangeType::READ_WRITE_STRUCTURED_BUFFER, binding, count, nullptr)); + } + + uint32_t addByteAddressBuffer(uint32_t binding, uint32_t count = 1) { + return addRange(RenderDescriptorRange(RenderDescriptorRangeType::BYTE_ADDRESS_BUFFER, binding, count, nullptr)); + } + + uint32_t addReadWriteByteAddressBuffer(uint32_t binding, uint32_t count = 1) { + return addRange(RenderDescriptorRange(RenderDescriptorRangeType::READ_WRITE_BYTE_ADDRESS_BUFFER, binding, count, nullptr)); + } + + uint32_t addAccelerationStructure(uint32_t binding, uint32_t count = 1) { + return addRange(RenderDescriptorRange(RenderDescriptorRangeType::ACCELERATION_STRUCTURE, binding, count, nullptr)); + } + + uint32_t addRange(const RenderDescriptorRange &range) { + assert(open && "Builder must be open."); + + uint32_t returnValue = setIndex; + descriptorRanges.emplace_back(range); + descriptorSetDesc.descriptorRangesCount++; + setIndex += range.count; + return returnValue; + } + + void end(bool lastRangeIsBoundless = false, uint32_t boundlessRangeSize = 0) { + assert(open && "Builder must be open."); + + descriptorSetDesc.lastRangeIsBoundless = lastRangeIsBoundless; + descriptorSetDesc.boundlessRangeSize = boundlessRangeSize; + descriptorSetDesc.descriptorRanges = descriptorRanges.data(); + open = false; + } + + std::unique_ptr create(RenderDevice *device) const { + assert(!open && "Builder must be closed."); + + return device->createDescriptorSet(descriptorSetDesc); + } + }; + + struct RenderDescriptorSetBase { + RenderDescriptorSetBuilder builder; + std::unique_ptr descriptorSet; + + void create(RenderDevice *device) { + descriptorSet = builder.create(device); + } + + RenderDescriptorSet *get() const { + return descriptorSet.get(); + } + + void setBuffer(uint32_t descriptorIndex, const RenderBuffer *buffer, uint64_t bufferSize = 0, const RenderBufferStructuredView *bufferStructuredView = nullptr, const RenderBufferFormattedView *bufferFormattedView = nullptr) { + descriptorSet->setBuffer(descriptorIndex, buffer, bufferSize, bufferStructuredView, bufferFormattedView); + } + + void setBuffer(uint32_t descriptorIndex, const RenderBuffer *buffer, uint64_t bufferSize, const RenderBufferStructuredView &bufferStructuredView) { + descriptorSet->setBuffer(descriptorIndex, buffer, bufferSize, &bufferStructuredView); + } + + void setBuffer(uint32_t descriptorIndex, const RenderBuffer *buffer, uint64_t bufferSize, const RenderBufferFormattedView *bufferFormattedView) { + descriptorSet->setBuffer(descriptorIndex, buffer, bufferSize, nullptr, bufferFormattedView); + } + + void setBuffer(uint32_t descriptorIndex, const RenderBuffer *buffer, const RenderBufferStructuredView &bufferStructuredView) { + descriptorSet->setBuffer(descriptorIndex, buffer, 0, &bufferStructuredView); + } + + void setBuffer(uint32_t descriptorIndex, const RenderBuffer *buffer, const RenderBufferFormattedView *bufferFormattedView) { + descriptorSet->setBuffer(descriptorIndex, buffer, 0, nullptr, bufferFormattedView); + } + + void setTexture(uint32_t descriptorIndex, const RenderTexture *texture, const RenderTextureLayout textureLayout, const RenderTextureView *textureView = nullptr) { + descriptorSet->setTexture(descriptorIndex, texture, textureLayout, textureView); + } + + void setSampler(uint32_t descriptorIndex, const RenderSampler *sampler) { + descriptorSet->setSampler(descriptorIndex, sampler); + } + + void setAccelerationStructure(uint32_t descriptorIndex, const RenderAccelerationStructure *accelerationStructure) { + descriptorSet->setAccelerationStructure(descriptorIndex, accelerationStructure); + } + }; + + struct RenderDescriptorSetInclusionFilter { + const uint32_t *bindings = nullptr; + uint32_t bindingsCount = 0; + }; + + struct RenderPipelineLayoutBuilder { + std::vector pushConstantRanges; + std::list> samplerPointerVectorList; + std::vector descriptorRanges; + std::vector descriptorSetDescs; + std::vector descriptorRangeIndexPerSet; + std::vector rootDescriptorDescs; + RenderPipelineLayoutDesc layoutDesc; + bool open = false; + + // Start filling the description. + void begin(bool isLocal = false, bool allowInputLayout = false) { + assert(!open && "Builder must be closed."); + + layoutDesc = RenderPipelineLayoutDesc(); + layoutDesc.isLocal = isLocal; + layoutDesc.allowInputLayout = allowInputLayout; + + pushConstantRanges.clear(); + samplerPointerVectorList.clear(); + descriptorRanges.clear(); + descriptorSetDescs.clear(); + descriptorRangeIndexPerSet.clear(); + rootDescriptorDescs.clear(); + + open = true; + } + + // Returns push constant index. + uint32_t addPushConstant(uint32_t binding, uint32_t set, uint32_t size, RenderShaderStageFlags stageFlags, uint32_t offset = 0) { + assert(open && "Builder must be open."); + + uint32_t returnValue = layoutDesc.pushConstantRangesCount; + pushConstantRanges.emplace_back(RenderPushConstantRange(binding, set, offset, size, stageFlags)); + layoutDesc.pushConstantRangesCount++; + return returnValue; + } + + // Returns set index. + uint32_t addDescriptorSet(const RenderDescriptorSetDesc &descriptorSetDesc) { + assert(open && "Builder must be open."); + + uint32_t returnValue = layoutDesc.descriptorSetDescsCount; + descriptorRangeIndexPerSet.emplace_back(uint32_t(descriptorRanges.size())); + descriptorSetDescs.emplace_back(descriptorSetDesc); + + for (uint32_t j = 0; j < descriptorSetDesc.descriptorRangesCount; j++) { + descriptorRanges.emplace_back(descriptorSetDesc.descriptorRanges[j]); + + // Copy the immutable sampler pointers to a local vector list. + if (descriptorRanges.back().immutableSampler != nullptr) { + const RenderSampler **immutableSampler = descriptorRanges.back().immutableSampler; + samplerPointerVectorList.emplace_back(std::vector(immutableSampler, immutableSampler + descriptorRanges.back().count)); + descriptorRanges.back().immutableSampler = samplerPointerVectorList.back().data(); + } + } + + layoutDesc.descriptorSetDescsCount++; + + return returnValue; + } + + // Returns set index. + uint32_t addDescriptorSet(const RenderDescriptorSetBuilder &descriptorSetBuilder) { + return addDescriptorSet(descriptorSetBuilder.descriptorSetDesc); + } + + // Returns set index. + uint32_t addDescriptorSet(const RenderDescriptorSetBase &descriptorSetBase) { + return addDescriptorSet(descriptorSetBase.builder); + } + + // Returns root descriptor index. D3D12 only. + uint32_t addRootDescriptor(uint32_t shaderRegister, uint32_t registerSpace, RenderRootDescriptorType type) { + assert(open && "Builder must be open."); + + uint32_t returnValue = layoutDesc.rootDescriptorDescsCount; + rootDescriptorDescs.emplace_back(shaderRegister, registerSpace, type); + ++layoutDesc.rootDescriptorDescsCount; + + return returnValue; + } + + // Finish the description. + void end() { + assert(open && "Builder must be open."); + + if (layoutDesc.pushConstantRangesCount > 0) { + layoutDesc.pushConstantRanges = pushConstantRanges.data(); + } + + if (layoutDesc.descriptorSetDescsCount > 0) { + for (uint32_t i = 0; i < layoutDesc.descriptorSetDescsCount; i++) { + const uint32_t rangeIndex = descriptorRangeIndexPerSet[i]; + descriptorSetDescs[i].descriptorRanges = &descriptorRanges[rangeIndex]; + } + + layoutDesc.descriptorSetDescs = descriptorSetDescs.data(); + } + + if (layoutDesc.rootDescriptorDescsCount > 0) { + layoutDesc.rootDescriptorDescs = rootDescriptorDescs.data(); + } + + open = false; + } + + // Create a pipeline layout with the final description. + std::unique_ptr create(RenderDevice *device) const { + assert(!open && "Builder must be closed."); + + return device->createPipelineLayout(layoutDesc); + } + }; +} diff --git a/UnleashedRecomp/gpu/rhi/rt64_render_interface_types.h b/UnleashedRecomp/gpu/rhi/rt64_render_interface_types.h new file mode 100644 index 0000000..92dc616 --- /dev/null +++ b/UnleashedRecomp/gpu/rhi/rt64_render_interface_types.h @@ -0,0 +1,1748 @@ +// +// RT64 +// + +#pragma once + +#include +#include +#include +#include +#include +#include + +#if defined(_WIN64) +#include +#elif defined(__ANDROID__) +#include "android/native_window.h" +#elif defined(__linux__) +#include "X11/Xlib.h" +#undef None +#undef Status +#undef LockMask +#undef ControlMask +#undef Success +#elif defined(__APPLE__) +typedef struct _NSWindow NSWindow; +#endif + +namespace RT64 { +#if defined(_WIN64) + // Native HWND handle to the target window. + typedef HWND RenderWindow; +#elif defined(__ANDROID__) + typedef ANativeWindow* RenderWindow; +#elif defined(__linux__) + struct RenderWindow { + Display* display; + Window window; + bool operator==(const struct RenderWindow& rhs) const { + return display == rhs.display && window == rhs.window; + } + bool operator!=(const struct RenderWindow& rhs) const { return !(*this == rhs); } + }; +#elif defined(__APPLE__) + struct RenderWindow { + NSWindow* window; + bool operator==(const struct RenderWindow& rhs) const { + return window == rhs.window; + } + bool operator!=(const struct RenderWindow& rhs) const { return !(*this == rhs); } + }; +#else + static_assert(false, "RenderWindow was not defined for this platform."); +#endif + + struct RenderBuffer; + struct RenderDescriptorSet; + struct RenderPipeline; + struct RenderPipelineLayout; + struct RenderSampler; + struct RenderShader; + struct RenderTexture; + + // Enums. + + enum class RenderFormat { + UNKNOWN, + R32G32B32A32_TYPELESS, + R32G32B32A32_FLOAT, + R32G32B32A32_UINT, + R32G32B32A32_SINT, + R32G32B32_TYPELESS, + R32G32B32_FLOAT, + R32G32B32_UINT, + R32G32B32_SINT, + R16G16B16A16_TYPELESS, + R16G16B16A16_FLOAT, + R16G16B16A16_UNORM, + R16G16B16A16_UINT, + R16G16B16A16_SNORM, + R16G16B16A16_SINT, + R32G32_TYPELESS, + R32G32_FLOAT, + R32G32_UINT, + R32G32_SINT, + R8G8B8A8_TYPELESS, + R8G8B8A8_UNORM, + R8G8B8A8_UINT, + R8G8B8A8_SNORM, + R8G8B8A8_SINT, + B8G8R8A8_UNORM, + R16G16_TYPELESS, + R16G16_FLOAT, + R16G16_UNORM, + R16G16_UINT, + R16G16_SNORM, + R16G16_SINT, + R32_TYPELESS, + D32_FLOAT, + R32_FLOAT, + R32_UINT, + R32_SINT, + R8G8_TYPELESS, + R8G8_UNORM, + R8G8_UINT, + R8G8_SNORM, + R8G8_SINT, + R16_TYPELESS, + R16_FLOAT, + D16_UNORM, + R16_UNORM, + R16_UINT, + R16_SNORM, + R16_SINT, + R8_TYPELESS, + R8_UNORM, + R8_UINT, + R8_SNORM, + R8_SINT, + BC1_TYPELESS, + BC1_UNORM, + BC1_UNORM_SRGB, + BC2_TYPELESS, + BC2_UNORM, + BC2_UNORM_SRGB, + BC3_TYPELESS, + BC3_UNORM, + BC3_UNORM_SRGB, + BC4_TYPELESS, + BC4_UNORM, + BC4_SNORM, + BC5_TYPELESS, + BC5_UNORM, + BC5_SNORM, + BC6H_TYPELESS, + BC6H_UF16, + BC6H_SF16, + BC7_TYPELESS, + BC7_UNORM, + BC7_UNORM_SRGB + }; + + enum class RenderTextureDimension { + UNKNOWN, + TEXTURE_1D, + TEXTURE_2D, + TEXTURE_3D + }; + + enum class RenderTextureViewDimension { + UNKNOWN, + TEXTURE_1D, + TEXTURE_2D, + TEXTURE_3D, + TEXTURE_CUBE + }; + + enum class RenderCommandListType { + UNKNOWN, + DIRECT, + COMPUTE, + COPY + }; + + enum class RenderPrimitiveTopology { + UNKNOWN, + POINT_LIST, + LINE_LIST, + LINE_STRIP, + TRIANGLE_LIST, + TRIANGLE_STRIP + }; + + enum class RenderSRVType { + UNKNOWN, + BUFFER, + TEXTURE_1D, + TEXTURE_2D, + TEXTURE_3D + }; + + enum class RenderUAVType { + UNKNOWN, + BUFFER, + TEXTURE_1D, + TEXTURE_2D, + TEXTURE_3D + }; + + enum class RenderCullMode { + UNKNOWN, + NONE, + FRONT, + BACK + }; + + enum class RenderComparisonFunction { + UNKNOWN, + NEVER, + LESS, + EQUAL, + LESS_EQUAL, + GREATER, + NOT_EQUAL, + GREATER_EQUAL, + ALWAYS + }; + + enum class RenderInputSlotClassification { + UNKNOWN, + PER_VERTEX_DATA, + PER_INSTANCE_DATA + }; + + enum class RenderBlend { + UNKNOWN, + ZERO, + ONE, + SRC_COLOR, + INV_SRC_COLOR, + SRC_ALPHA, + INV_SRC_ALPHA, + DEST_ALPHA, + INV_DEST_ALPHA, + DEST_COLOR, + INV_DEST_COLOR, + SRC_ALPHA_SAT, + BLEND_FACTOR, + INV_BLEND_FACTOR, + SRC1_COLOR, + INV_SRC1_COLOR, + SRC1_ALPHA, + INV_SRC1_ALPHA + }; + + enum class RenderBlendOperation { + UNKNOWN, + ADD, + SUBTRACT, + REV_SUBTRACT, + MIN, + MAX + }; + + enum class RenderColorWriteEnable : uint8_t { + UNKNOWN = 0x0, + RED = 0x1, + GREEN = 0x2, + BLUE = 0x4, + ALPHA = 0x8, + ALL = RED | GREEN | BLUE | ALPHA + }; + + enum class RenderLogicOperation { + UNKNOWN, + CLEAR, + SET, + COPY, + COPY_INVERTED, + NOOP, + INVERT, + AND, + NAND, + OR, + NOR, + XOR, + EQUIV, + AND_REVERSE, + AND_INVERTED, + OR_REVERSE, + OR_INVERTED + }; + + enum class RenderFilter { + UNKNOWN, + NEAREST, + LINEAR + }; + + enum class RenderMipmapMode { + UNKNOWN, + NEAREST, + LINEAR + }; + + enum class RenderTextureAddressMode { + UNKNOWN, + WRAP, + MIRROR, + CLAMP, + BORDER, + MIRROR_ONCE + }; + + enum class RenderBorderColor { + UNKNOWN, + TRANSPARENT_BLACK, + OPAQUE_BLACK, + OPAQUE_WHITE + }; + + enum class RenderShaderVisibility { + UNKNOWN, + ALL, + VERTEX, + GEOMETRY, + PIXEL + }; + + enum class RenderDescriptorRangeType { + UNKNOWN, + CONSTANT_BUFFER, + FORMATTED_BUFFER, + READ_WRITE_FORMATTED_BUFFER, + TEXTURE, + READ_WRITE_TEXTURE, + SAMPLER, + STRUCTURED_BUFFER, + READ_WRITE_STRUCTURED_BUFFER, + BYTE_ADDRESS_BUFFER, + READ_WRITE_BYTE_ADDRESS_BUFFER, + ACCELERATION_STRUCTURE + }; + + enum class RenderRootDescriptorType { + UNKNOWN, + CONSTANT_BUFFER, + SHADER_RESOURCE, + UNORDERED_ACCESS + }; + + enum class RenderHeapType { + UNKNOWN, + DEFAULT, + UPLOAD, + READBACK + }; + + enum class RenderTextureArrangement { + UNKNOWN, + ROW_MAJOR + }; + + enum class RenderShaderFormat { + UNKNOWN, + DXIL, + SPIRV + }; + + enum class RenderRaytracingPipelineLibrarySymbolType { + UNKNOWN, + RAYGEN, + MISS, + CLOSEST_HIT, + ANY_HIT, + INTERSECTION, + CALLABLE + }; + + enum class RenderAccelerationStructureType { + UNKNOWN, + TOP_LEVEL, + BOTTOM_LEVEL + }; + + namespace RenderShaderStageFlag { + enum Bits : uint32_t { + NONE = 0U, + VERTEX = 1U << 0, + GEOMETRY = 1U << 1, + PIXEL = 1U << 2, + COMPUTE = 1U << 3, + RAYGEN = 1U << 4, + ANY_HIT = 1U << 5, + CLOSEST_HIT = 1U << 6, + MISS = 1U << 7, + INTERSECTION = 1U << 8, + CALLABLE = 1U << 9 + }; + }; + + typedef uint32_t RenderShaderStageFlags; + + namespace RenderBufferFlag { + enum Bits : uint32_t { + NONE = 0U, + VERTEX = 1U << 0, + INDEX = 1U << 1, + STORAGE = 1U << 2, + CONSTANT = 1U << 3, + FORMATTED = 1U << 4, + ACCELERATION_STRUCTURE = 1U << 5, + ACCELERATION_STRUCTURE_INPUT = 1U << 6, + ACCELERATION_STRUCTURE_SCRATCH = 1U << 7, + SHADER_BINDING_TABLE = 1U << 8, + UNORDERED_ACCESS = 1U << 9 + }; + }; + + typedef uint32_t RenderBufferFlags; + + namespace RenderTextureFlag { + enum Bits : uint32_t { + NONE = 0U, + RENDER_TARGET = 1U << 0, + DEPTH_TARGET = 1U << 1, + STORAGE = 1U << 2, + UNORDERED_ACCESS = 1U << 3 + }; + }; + + typedef uint32_t RenderTextureFlags; + + namespace RenderBarrierStage { + enum Bits : uint32_t { + NONE = 0U, + GRAPHICS = 1U << 0, + COMPUTE = 1U << 1, + COPY = 1U << 2, + GRAPHICS_AND_COMPUTE = GRAPHICS | COMPUTE, + ALL = GRAPHICS | COMPUTE | COPY + }; + }; + + typedef uint32_t RenderBarrierStages; + + namespace RenderBufferAccess { + enum Bits : uint32_t { + NONE = 0U, + READ = 1U << 0, + WRITE = 1U << 1 + }; + }; + + typedef uint32_t RenderBufferAccessBits; + + enum class RenderTextureLayout { + UNKNOWN, + GENERAL, + SHADER_READ, + COLOR_WRITE, + DEPTH_WRITE, + DEPTH_READ, + COPY_SOURCE, + COPY_DEST, + RESOLVE_SOURCE, + RESOLVE_DEST, + PRESENT + }; + + namespace RenderSampleCount { + enum Bits : uint32_t { + COUNT_0 = 0x0, + COUNT_1 = 0x1, + COUNT_2 = 0x2, + COUNT_4 = 0x4, + COUNT_8 = 0x8, + COUNT_16 = 0x10, + COUNT_32 = 0x20, + COUNT_64 = 0x40, + COUNT_MAX = COUNT_64 + }; + }; + + typedef uint32_t RenderSampleCounts; + + // Global functions. + + constexpr uint32_t RenderFormatSize(RenderFormat format) { + switch (format) { + case RenderFormat::R32G32B32A32_TYPELESS: + case RenderFormat::R32G32B32A32_FLOAT: + case RenderFormat::R32G32B32A32_UINT: + case RenderFormat::R32G32B32A32_SINT: + return 16; + case RenderFormat::R32G32B32_TYPELESS: + case RenderFormat::R32G32B32_FLOAT: + case RenderFormat::R32G32B32_UINT: + case RenderFormat::R32G32B32_SINT: + return 12; + case RenderFormat::R16G16B16A16_TYPELESS: + case RenderFormat::R16G16B16A16_FLOAT: + case RenderFormat::R16G16B16A16_UNORM: + case RenderFormat::R16G16B16A16_UINT: + case RenderFormat::R16G16B16A16_SNORM: + case RenderFormat::R16G16B16A16_SINT: + case RenderFormat::R32G32_TYPELESS: + case RenderFormat::R32G32_FLOAT: + case RenderFormat::R32G32_UINT: + case RenderFormat::R32G32_SINT: + return 8; + case RenderFormat::R8G8B8A8_TYPELESS: + case RenderFormat::R8G8B8A8_UNORM: + case RenderFormat::R8G8B8A8_UINT: + case RenderFormat::R8G8B8A8_SNORM: + case RenderFormat::R8G8B8A8_SINT: + case RenderFormat::B8G8R8A8_UNORM: + case RenderFormat::R16G16_TYPELESS: + case RenderFormat::R16G16_FLOAT: + case RenderFormat::R16G16_UNORM: + case RenderFormat::R16G16_UINT: + case RenderFormat::R16G16_SNORM: + case RenderFormat::R16G16_SINT: + case RenderFormat::R32_TYPELESS: + case RenderFormat::D32_FLOAT: + case RenderFormat::R32_FLOAT: + case RenderFormat::R32_UINT: + case RenderFormat::R32_SINT: + return 4; + case RenderFormat::R8G8_TYPELESS: + case RenderFormat::R8G8_UNORM: + case RenderFormat::R8G8_UINT: + case RenderFormat::R8G8_SNORM: + case RenderFormat::R8G8_SINT: + case RenderFormat::R16_TYPELESS: + case RenderFormat::R16_FLOAT: + case RenderFormat::D16_UNORM: + case RenderFormat::R16_UNORM: + case RenderFormat::R16_UINT: + case RenderFormat::R16_SNORM: + case RenderFormat::R16_SINT: + return 2; + case RenderFormat::R8_TYPELESS: + case RenderFormat::R8_UNORM: + case RenderFormat::R8_UINT: + case RenderFormat::R8_SNORM: + case RenderFormat::R8_SINT: + return 1; + case RenderFormat::BC1_UNORM: + case RenderFormat::BC1_UNORM_SRGB: + case RenderFormat::BC1_TYPELESS: + case RenderFormat::BC4_UNORM: + case RenderFormat::BC4_SNORM: + case RenderFormat::BC4_TYPELESS: + return 8; + case RenderFormat::BC2_UNORM: + case RenderFormat::BC2_UNORM_SRGB: + case RenderFormat::BC2_TYPELESS: + case RenderFormat::BC3_UNORM: + case RenderFormat::BC3_UNORM_SRGB: + case RenderFormat::BC3_TYPELESS: + case RenderFormat::BC5_UNORM: + case RenderFormat::BC5_SNORM: + case RenderFormat::BC6H_UF16: + case RenderFormat::BC6H_SF16: + case RenderFormat::BC7_UNORM: + case RenderFormat::BC7_UNORM_SRGB: + return 16; + default: + assert(false && "Unknown format."); + return 1; + } + } + + constexpr uint32_t RenderFormatBlockWidth(RenderFormat format) { + switch (format) { + case RenderFormat::R32G32B32A32_TYPELESS: + case RenderFormat::R32G32B32A32_FLOAT: + case RenderFormat::R32G32B32A32_UINT: + case RenderFormat::R32G32B32A32_SINT: + case RenderFormat::R32G32B32_TYPELESS: + case RenderFormat::R32G32B32_FLOAT: + case RenderFormat::R32G32B32_UINT: + case RenderFormat::R32G32B32_SINT: + case RenderFormat::R16G16B16A16_TYPELESS: + case RenderFormat::R16G16B16A16_FLOAT: + case RenderFormat::R16G16B16A16_UNORM: + case RenderFormat::R16G16B16A16_UINT: + case RenderFormat::R16G16B16A16_SNORM: + case RenderFormat::R16G16B16A16_SINT: + case RenderFormat::R32G32_TYPELESS: + case RenderFormat::R32G32_FLOAT: + case RenderFormat::R32G32_UINT: + case RenderFormat::R32G32_SINT: + case RenderFormat::R8G8B8A8_TYPELESS: + case RenderFormat::R8G8B8A8_UNORM: + case RenderFormat::R8G8B8A8_UINT: + case RenderFormat::R8G8B8A8_SNORM: + case RenderFormat::R8G8B8A8_SINT: + case RenderFormat::B8G8R8A8_UNORM: + case RenderFormat::R16G16_TYPELESS: + case RenderFormat::R16G16_FLOAT: + case RenderFormat::R16G16_UNORM: + case RenderFormat::R16G16_UINT: + case RenderFormat::R16G16_SNORM: + case RenderFormat::R16G16_SINT: + case RenderFormat::R32_TYPELESS: + case RenderFormat::D32_FLOAT: + case RenderFormat::R32_FLOAT: + case RenderFormat::R32_UINT: + case RenderFormat::R32_SINT: + case RenderFormat::R8G8_TYPELESS: + case RenderFormat::R8G8_UNORM: + case RenderFormat::R8G8_UINT: + case RenderFormat::R8G8_SNORM: + case RenderFormat::R8G8_SINT: + case RenderFormat::R16_TYPELESS: + case RenderFormat::R16_FLOAT: + case RenderFormat::D16_UNORM: + case RenderFormat::R16_UNORM: + case RenderFormat::R16_UINT: + case RenderFormat::R16_SNORM: + case RenderFormat::R16_SINT: + case RenderFormat::R8_TYPELESS: + case RenderFormat::R8_UNORM: + case RenderFormat::R8_UINT: + case RenderFormat::R8_SNORM: + case RenderFormat::R8_SINT: + return 1; + case RenderFormat::BC1_TYPELESS: + case RenderFormat::BC1_UNORM: + case RenderFormat::BC1_UNORM_SRGB: + case RenderFormat::BC2_TYPELESS: + case RenderFormat::BC2_UNORM: + case RenderFormat::BC2_UNORM_SRGB: + case RenderFormat::BC3_TYPELESS: + case RenderFormat::BC3_UNORM: + case RenderFormat::BC3_UNORM_SRGB: + case RenderFormat::BC4_TYPELESS: + case RenderFormat::BC4_UNORM: + case RenderFormat::BC4_SNORM: + case RenderFormat::BC5_TYPELESS: + case RenderFormat::BC5_UNORM: + case RenderFormat::BC5_SNORM: + case RenderFormat::BC6H_TYPELESS: + case RenderFormat::BC6H_UF16: + case RenderFormat::BC6H_SF16: + case RenderFormat::BC7_TYPELESS: + case RenderFormat::BC7_UNORM: + case RenderFormat::BC7_UNORM_SRGB: + return 4; + default: + assert(false && "Unknown format."); + return 1; + } + }; + + // Concrete structs. + + struct RenderColor { + union { + struct { + float rgba[4]; + }; + + struct { + float r; + float g; + float b; + float a; + }; + }; + + RenderColor() { + r = 0.0f; + g = 0.0f; + b = 0.0f; + a = 1.0f; + } + + RenderColor(float r, float g, float b, float a = 1.0f) { + this->r = r; + this->g = g; + this->b = b; + this->a = a; + } + }; + + struct RenderAffineTransform { + float m[3][4] = {}; + + RenderAffineTransform() { + m[0][0] = 1.0f; + m[1][1] = 1.0f; + m[2][2] = 1.0f; + } + }; + + struct RenderDepth { + float depth = 1.0f; + + RenderDepth() = default; + + RenderDepth(float depth) { + this->depth = depth; + } + }; + + struct RenderMultisamplingLocation { + // Valid range is [-8, 7]. + int8_t x = 0; + int8_t y = 0; + }; + + struct RenderMultisampling { + RenderSampleCounts sampleCount = RenderSampleCount::COUNT_1; + RenderMultisamplingLocation sampleLocations[16] = {}; + bool sampleLocationsEnabled = false; + + RenderMultisampling() = default; + + RenderMultisampling(RenderSampleCounts sampleCount) { + this->sampleCount = sampleCount; + } + }; + + struct RenderBufferReference { + const RenderBuffer *ref = nullptr; + uint64_t offset = 0; + + RenderBufferReference() = default; + + RenderBufferReference(const RenderBuffer *ref) { + this->ref = ref; + offset = 0; + } + + RenderBufferReference(const RenderBuffer *ref, uint64_t offset) { + this->ref = ref; + this->offset = offset; + } + + bool operator==(const RenderBufferReference& rhs) const { + return ref == rhs.ref && offset == rhs.offset; + } + + bool operator!=(const RenderBufferReference& rhs) const { + return !(*this == rhs); + } + }; + + struct RenderBufferBarrier { + RenderBuffer *buffer = nullptr; + RenderBufferAccessBits accessBits = RenderBufferAccess::NONE; + + RenderBufferBarrier() = default; + + RenderBufferBarrier(RenderBuffer *buffer, RenderBufferAccessBits accessBits) { + this->buffer = buffer; + this->accessBits = accessBits; + } + }; + + struct RenderBufferStructuredView { + uint32_t structureByteStride = 0; + uint32_t firstElement = 0; + + RenderBufferStructuredView() = default; + + RenderBufferStructuredView(uint32_t structureByteStride, uint32_t firstElement = 0) { + this->structureByteStride = structureByteStride; + this->firstElement = firstElement; + } + }; + + struct RenderTextureBarrier { + RenderTexture *texture = nullptr; + RenderTextureLayout layout = RenderTextureLayout::UNKNOWN; + + RenderTextureBarrier() = default; + + RenderTextureBarrier(RenderTexture *texture, RenderTextureLayout layout) { + this->texture = texture; + this->layout = layout; + } + }; + + struct RenderClearValue { + RenderFormat format = RenderFormat::UNKNOWN; + union { + RenderColor color; + RenderDepth depth; + }; + + RenderClearValue() : color{} {} + + static RenderClearValue Color(RenderColor color, RenderFormat format) { + RenderClearValue clear = {}; + clear.format = format; + clear.color = color; + return clear; + } + + static RenderClearValue Depth(RenderDepth depth, RenderFormat format) { + RenderClearValue clear = {}; + clear.format = format; + clear.depth = depth; + return clear; + } + }; + + struct RenderBufferDesc { + uint64_t size = 0; + RenderHeapType heapType = RenderHeapType::UNKNOWN; + RenderBufferFlags flags = RenderBufferFlag::NONE; + bool committed = false; + + RenderBufferDesc() = default; + + static RenderBufferDesc DefaultBuffer(uint64_t size, RenderBufferFlags flags = RenderBufferFlag::NONE) { + RenderBufferDesc desc; + desc.size = size; + desc.heapType = RenderHeapType::DEFAULT; + desc.flags = flags; + return desc; + } + + static RenderBufferDesc UploadBuffer(uint64_t size, RenderBufferFlags flags = RenderBufferFlag::NONE) { + RenderBufferDesc desc; + desc.heapType = RenderHeapType::UPLOAD; + desc.size = size; + desc.flags = flags; + return desc; + } + + static RenderBufferDesc ReadbackBuffer(uint64_t size, RenderBufferFlags flags = RenderBufferFlag::NONE) { + RenderBufferDesc desc; + desc.heapType = RenderHeapType::READBACK; + desc.size = size; + desc.flags = flags; + return desc; + } + + static RenderBufferDesc VertexBuffer(uint64_t size, RenderHeapType heapType, RenderBufferFlags flags = RenderBufferFlag::NONE) { + RenderBufferDesc desc; + desc.size = size; + desc.heapType = heapType; + desc.flags = flags | RenderBufferFlag::VERTEX; + return desc; + } + + static RenderBufferDesc IndexBuffer(uint64_t size, RenderHeapType heapType, RenderBufferFlags flags = RenderBufferFlag::NONE) { + RenderBufferDesc desc; + desc.size = size; + desc.heapType = heapType; + desc.flags = flags | RenderBufferFlag::INDEX; + return desc; + } + + static RenderBufferDesc AccelerationStructureBuffer(uint64_t size) { + RenderBufferDesc desc; + desc.size = size; + desc.heapType = RenderHeapType::DEFAULT; + desc.flags = RenderBufferFlag::ACCELERATION_STRUCTURE; + return desc; + } + }; + + struct RenderTextureDesc { + RenderTextureDimension dimension = RenderTextureDimension::UNKNOWN; + uint32_t width = 0; + uint32_t height = 0; + uint16_t depth = 0; + uint16_t mipLevels = 0; + uint16_t arraySize = 0; + RenderMultisampling multisampling; + RenderFormat format = RenderFormat::UNKNOWN; + RenderTextureArrangement textureArrangement = RenderTextureArrangement::UNKNOWN; + const RenderClearValue *optimizedClearValue = nullptr; + RenderTextureFlags flags = RenderTextureFlag::NONE; + bool committed = false; + + RenderTextureDesc() = default; + + static RenderTextureDesc Texture(RenderTextureDimension dimension, uint32_t width, uint32_t height, uint16_t depth, uint16_t mipLevels, uint16_t arraySize, RenderFormat format, RenderTextureFlags flags = RenderTextureFlag::NONE) { + RenderTextureDesc desc; + desc.dimension = dimension; + desc.width = width; + desc.height = height; + desc.depth = depth; + desc.mipLevels = mipLevels; + desc.arraySize = arraySize; + desc.format = format; + desc.flags = flags; + return desc; + } + + static RenderTextureDesc Texture1D(uint32_t width, uint16_t mipLevels, RenderFormat format, RenderTextureFlags flags = RenderTextureFlag::NONE) { + return Texture(RenderTextureDimension::TEXTURE_1D, width, 1, 1, mipLevels, 1, format, flags); + } + + static RenderTextureDesc Texture2D(uint32_t width, uint32_t height, uint16_t mipLevels, RenderFormat format, RenderTextureFlags flags = RenderTextureFlag::NONE) { + return Texture(RenderTextureDimension::TEXTURE_2D, width, height, 1, mipLevels, 1, format, flags); + } + + static RenderTextureDesc Texture3D(uint32_t width, uint32_t height, uint32_t depth, uint16_t mipLevels, RenderFormat format, RenderTextureFlags flags = RenderTextureFlag::NONE) { + return Texture(RenderTextureDimension::TEXTURE_3D, width, height, depth, mipLevels, 1, format, flags); + } + + static RenderTextureDesc ColorTarget(uint32_t width, uint32_t height, RenderFormat format, RenderMultisampling multisampling = RenderMultisampling(), const RenderClearValue *optimizedClearValue = nullptr, RenderTextureFlags flags = RenderTextureFlag::NONE) { + RenderTextureDesc desc; + desc.committed = true; + desc.dimension = RenderTextureDimension::TEXTURE_2D; + desc.width = width; + desc.height = height; + desc.depth = 1; + desc.mipLevels = 1; + desc.arraySize = 1; + desc.format = format; + desc.multisampling = multisampling; + desc.flags = flags | RenderTextureFlag::RENDER_TARGET; + desc.optimizedClearValue = optimizedClearValue; + return desc; + } + + static RenderTextureDesc DepthTarget(uint32_t width, uint32_t height, RenderFormat format, RenderMultisampling multisampling = RenderMultisampling(), const RenderClearValue *optimizedClearValue = nullptr, RenderTextureFlags flags = RenderTextureFlag::NONE) { + RenderTextureDesc desc; + desc.committed = true; + desc.dimension = RenderTextureDimension::TEXTURE_2D; + desc.width = width; + desc.height = height; + desc.depth = 1; + desc.mipLevels = 1; + desc.arraySize = 1; + desc.format = format; + desc.multisampling = multisampling; + desc.flags = flags | RenderTextureFlag::DEPTH_TARGET; + desc.optimizedClearValue = optimizedClearValue; + return desc; + } + }; + + struct RenderTextureViewDesc { + RenderFormat format = RenderFormat::UNKNOWN; + RenderTextureViewDimension dimension = RenderTextureViewDimension::UNKNOWN; + uint32_t mipLevels = 0; + uint32_t mipSlice = 0; + + RenderTextureViewDesc() = default; + + static RenderTextureViewDesc Texture1D(RenderFormat format, uint32_t mipLevels = 1) { + RenderTextureViewDesc viewDesc; + viewDesc.format = format; + viewDesc.dimension = RenderTextureViewDimension::TEXTURE_1D; + viewDesc.mipLevels = mipLevels; + return viewDesc; + } + + static RenderTextureViewDesc Texture2D(RenderFormat format, uint32_t mipLevels = 1) { + RenderTextureViewDesc viewDesc; + viewDesc.format = format; + viewDesc.dimension = RenderTextureViewDimension::TEXTURE_2D; + viewDesc.mipLevels = mipLevels; + return viewDesc; + } + + static RenderTextureViewDesc Texture3D(RenderFormat format, uint32_t mipLevels = 1) { + RenderTextureViewDesc viewDesc; + viewDesc.format = format; + viewDesc.dimension = RenderTextureViewDimension::TEXTURE_3D; + viewDesc.mipLevels = mipLevels; + return viewDesc; + } + + static RenderTextureViewDesc TextureCube(RenderFormat format, uint32_t mipLevels = 1) { + RenderTextureViewDesc viewDesc; + viewDesc.format = format; + viewDesc.dimension = RenderTextureViewDimension::TEXTURE_CUBE; + viewDesc.mipLevels = mipLevels; + return viewDesc; + } + }; + + struct RenderAccelerationStructureDesc { + RenderAccelerationStructureType type = RenderAccelerationStructureType::UNKNOWN; + RenderBufferReference buffer; + uint64_t size = 0; + + RenderAccelerationStructureDesc() = default; + + RenderAccelerationStructureDesc(RenderAccelerationStructureType type, RenderBufferReference buffer, uint64_t size) { + this->type = type; + this->buffer = buffer; + this->size = size; + } + }; + + enum class RenderTextureCopyType { + UNKNOWN, + SUBRESOURCE, + PLACED_FOOTPRINT + }; + + struct RenderTextureCopyLocation { + const RenderTexture *texture = nullptr; + const RenderBuffer *buffer = nullptr; + RenderTextureCopyType type = RenderTextureCopyType::UNKNOWN; + + union { + struct { + RenderFormat format; + uint32_t width; + uint32_t height; + uint32_t depth; + uint32_t rowWidth; + uint64_t offset; + } placedFootprint; + + struct { + uint32_t index; + } subresource; + }; + + static RenderTextureCopyLocation PlacedFootprint(const RenderBuffer *buffer, RenderFormat format, uint32_t width, uint32_t height, uint32_t depth, uint32_t rowWidth, uint64_t offset = 0) { + RenderTextureCopyLocation loc; + loc.buffer = buffer; + loc.type = RenderTextureCopyType::PLACED_FOOTPRINT; + loc.placedFootprint.format = format; + loc.placedFootprint.width = width; + loc.placedFootprint.height = height; + loc.placedFootprint.depth = depth; + loc.placedFootprint.rowWidth = rowWidth; + loc.placedFootprint.offset = offset; + return loc; + } + + static RenderTextureCopyLocation Subresource(const RenderTexture *texture, uint32_t index = 0) { + RenderTextureCopyLocation loc; + loc.texture = texture; + loc.type = RenderTextureCopyType::SUBRESOURCE; + loc.subresource.index = index; + return loc; + } + }; + + struct RenderPoolDesc { + RenderHeapType heapType = RenderHeapType::UNKNOWN; + uint32_t minBlockCount = 0; + uint32_t maxBlockCount = 0; + bool useLinearAlgorithm = false; + bool allowOnlyBuffers = false; + }; + + struct RenderInputSlot { + uint32_t index = 0; + uint32_t stride = 0; + RenderInputSlotClassification classification = RenderInputSlotClassification::UNKNOWN; + + RenderInputSlot() = default; + + RenderInputSlot(uint32_t index, uint32_t stride, RenderInputSlotClassification classification = RenderInputSlotClassification::PER_VERTEX_DATA) { + this->index = index; + this->stride = stride; + this->classification = classification; + } + }; + + struct RenderInputElement { + // Semantic name and index and location must be specified for both backends, but each attribute will only be read by the backend that uses them. + const char *semanticName = nullptr; + uint32_t semanticIndex = 0; + uint32_t location = 0; + RenderFormat format = RenderFormat::UNKNOWN; + uint32_t slotIndex = 0; + uint32_t alignedByteOffset = 0; + + RenderInputElement() = default; + + RenderInputElement(const char *semanticName, uint32_t semanticIndex, uint32_t location, RenderFormat format, uint32_t slotIndex, uint32_t alignedByteOffset) { + this->semanticName = semanticName; + this->semanticIndex = semanticIndex; + this->location = location; + this->format = format; + this->slotIndex = slotIndex; + this->alignedByteOffset = alignedByteOffset; + } + }; + + struct RenderBlendDesc { + bool blendEnabled = false; + RenderBlend srcBlend = RenderBlend::UNKNOWN; + RenderBlend dstBlend = RenderBlend::UNKNOWN; + RenderBlendOperation blendOp = RenderBlendOperation::UNKNOWN; + RenderBlend srcBlendAlpha = RenderBlend::UNKNOWN; + RenderBlend dstBlendAlpha = RenderBlend::UNKNOWN; + RenderBlendOperation blendOpAlpha = RenderBlendOperation::UNKNOWN; + uint8_t renderTargetWriteMask = uint8_t(RenderColorWriteEnable::ALL); + + static RenderBlendDesc Copy() { + RenderBlendDesc desc; + desc.srcBlend = RenderBlend::ONE; + desc.dstBlend = RenderBlend::ZERO; + desc.blendOp = RenderBlendOperation::ADD; + desc.srcBlendAlpha = RenderBlend::ONE; + desc.dstBlendAlpha = RenderBlend::ZERO; + desc.blendOpAlpha = RenderBlendOperation::ADD; + return desc; + } + + static RenderBlendDesc AlphaBlend() { + RenderBlendDesc desc; + desc.blendEnabled = true; + desc.srcBlend = RenderBlend::SRC_ALPHA; + desc.dstBlend = RenderBlend::INV_SRC_ALPHA; + desc.blendOp = RenderBlendOperation::ADD; + desc.srcBlendAlpha = RenderBlend::ONE; + desc.dstBlendAlpha = RenderBlend::INV_SRC_ALPHA; + desc.blendOpAlpha = RenderBlendOperation::ADD; + return desc; + } + }; + + struct RenderSpecConstant { + uint32_t index = 0; + uint32_t value = 0; + + RenderSpecConstant() = default; + + RenderSpecConstant(uint32_t index, uint32_t value) { + this->index = index; + this->value = value; + } + }; + + struct RenderComputePipelineDesc { + const RenderPipelineLayout *pipelineLayout = nullptr; + const RenderShader *computeShader = nullptr; + const RenderSpecConstant *specConstants = nullptr; + uint32_t specConstantsCount = 0; + + RenderComputePipelineDesc() = default; + + RenderComputePipelineDesc(const RenderPipelineLayout *pipelineLayout, const RenderShader *computeShader) { + this->pipelineLayout = pipelineLayout; + this->computeShader = computeShader; + } + }; + + struct RenderGraphicsPipelineDesc { + static const uint32_t MaxRenderTargets = 8; + + const RenderPipelineLayout *pipelineLayout = nullptr; + const RenderShader *vertexShader = nullptr; + const RenderShader *geometryShader = nullptr; + const RenderShader *pixelShader = nullptr; + RenderComparisonFunction depthFunction = RenderComparisonFunction::NEVER; + bool depthClipEnabled = false; + int32_t depthBias = 0; + float slopeScaledDepthBias = 0.0f; + bool depthEnabled = false; + bool depthWriteEnabled = false; + RenderMultisampling multisampling; + RenderPrimitiveTopology primitiveTopology = RenderPrimitiveTopology::TRIANGLE_LIST; + RenderCullMode cullMode = RenderCullMode::NONE; + RenderFormat renderTargetFormat[MaxRenderTargets] = {}; + RenderBlendDesc renderTargetBlend[MaxRenderTargets] = {}; + uint32_t renderTargetCount = 0; + bool logicOpEnabled = false; + RenderLogicOperation logicOp = RenderLogicOperation::NOOP; + RenderFormat depthTargetFormat = RenderFormat::UNKNOWN; + const RenderInputSlot *inputSlots = nullptr; + uint32_t inputSlotsCount = 0; + const RenderInputElement *inputElements = nullptr; + uint32_t inputElementsCount = 0; + const RenderSpecConstant *specConstants = nullptr; + uint32_t specConstantsCount = 0; + }; + + struct RenderRaytracingPipelineLibrarySymbol { + const char *importName = nullptr; + RenderRaytracingPipelineLibrarySymbolType type = RenderRaytracingPipelineLibrarySymbolType::UNKNOWN; + const char *exportName = nullptr; + const RenderSpecConstant *specConstants = nullptr; + uint32_t specConstantsCount = 0; + + RenderRaytracingPipelineLibrarySymbol() = default; + + RenderRaytracingPipelineLibrarySymbol(const char *importName, RenderRaytracingPipelineLibrarySymbolType type, const char *exportName = nullptr, const RenderSpecConstant *specConstants = nullptr, uint32_t specConstantsCount = 0) { + this->importName = importName; + this->type = type; + this->specConstants = specConstants; + this->exportName = exportName; + this->specConstantsCount = specConstantsCount; + } + }; + + struct RenderRaytracingPipelineLibrary { + const RenderShader *shader = nullptr; + const RenderRaytracingPipelineLibrarySymbol *symbols = nullptr; + uint32_t symbolsCount = 0; + + RenderRaytracingPipelineLibrary() = default; + + RenderRaytracingPipelineLibrary(const RenderShader *shader, const RenderRaytracingPipelineLibrarySymbol *symbols, uint32_t symbolsCount) { + this->shader = shader; + this->symbols = symbols; + this->symbolsCount = symbolsCount; + } + }; + + struct RenderRaytracingPipelineHitGroup { + const char *hitGroupName = nullptr; + const char *closestHitName = nullptr; + const char *anyHitName = nullptr; + const char *intersectionName = nullptr; + + RenderRaytracingPipelineHitGroup() = default; + + RenderRaytracingPipelineHitGroup(const char *hitGroupName, const char *closestHitName = nullptr, const char *anyHitName = nullptr, const char *intersectionName = nullptr) { + this->hitGroupName = hitGroupName; + this->closestHitName = closestHitName; + this->anyHitName = anyHitName; + this->intersectionName = intersectionName; + } + }; + + struct RenderRaytracingPipelineDesc { + const RenderRaytracingPipelineLibrary *libraries = nullptr; + uint32_t librariesCount = 0; + const RenderRaytracingPipelineHitGroup *hitGroups = nullptr; + uint32_t hitGroupsCount = 0; + const RenderPipelineLayout *pipelineLayout = nullptr; + uint32_t maxPayloadSize = 0; + uint32_t maxAttributeSize = 2 * sizeof(float); + uint32_t maxRecursionDepth = 1; + + // IMPORTANT: State update support must be true for this option to work. The pipeline creation will not work if this option + // is enabled and the device doesn't support it. This option is only supported by Raytracing Tier 1.1 devices. + bool stateUpdateEnabled = false; + }; + + struct RenderPipelineProgram { + uint32_t programIndex = 0; + + RenderPipelineProgram() = default; + + RenderPipelineProgram(uint32_t programIndex) { + this->programIndex = programIndex; + } + }; + + struct RenderSamplerDesc { + RenderFilter minFilter = RenderFilter::LINEAR; + RenderFilter magFilter = RenderFilter::LINEAR; + RenderMipmapMode mipmapMode = RenderMipmapMode::LINEAR; + RenderTextureAddressMode addressU = RenderTextureAddressMode::WRAP; + RenderTextureAddressMode addressV = RenderTextureAddressMode::WRAP; + RenderTextureAddressMode addressW = RenderTextureAddressMode::WRAP; + float mipLODBias = 0.0f; + uint32_t maxAnisotropy = 16; + bool anisotropyEnabled = false; + RenderComparisonFunction comparisonFunc = RenderComparisonFunction::LESS_EQUAL; + bool comparisonEnabled = false; + RenderBorderColor borderColor = RenderBorderColor::OPAQUE_BLACK; + float minLOD = 0.0f; + float maxLOD = FLT_MAX; + RenderShaderVisibility shaderVisibility = RenderShaderVisibility::ALL; + + RenderSamplerDesc() = default; + }; + + struct RenderDescriptorRange { + // The type of descriptor range. The descriptor can't change this during its lifetime. + RenderDescriptorRangeType type = RenderDescriptorRangeType::UNKNOWN; + + // How many descriptors should be assigned and allocated for this range. When the range + // is boundless (see RenderDescriptorSetDesc::lastRangeIsBoundless), this indicates the upper + // bound of the variable sized array (TBD if this implies additional memory consumption). + uint32_t count = 0; + + // The shader binding number the descriptor will correspond to. + uint32_t binding = 0; + + // An optional immutable sampler to build in statically into the pipeline layout. + const RenderSampler **immutableSampler = nullptr; + + RenderDescriptorRange() = default; + + RenderDescriptorRange(RenderDescriptorRangeType type, uint32_t binding, uint32_t count, const RenderSampler **immutableSampler = nullptr) { + this->type = type; + this->binding = binding; + this->count = count; + this->immutableSampler = immutableSampler; + } + }; + + struct RenderDescriptorSetDesc { + const RenderDescriptorRange *descriptorRanges = nullptr; + uint32_t descriptorRangesCount = 0; + bool lastRangeIsBoundless = false; + uint32_t boundlessRangeSize = 0; + + RenderDescriptorSetDesc() = default; + + RenderDescriptorSetDesc(const RenderDescriptorRange *descriptorRanges, uint32_t descriptorRangesCount, bool lastRangeIsBoundless = false, uint32_t boundlessRangeSize = 0) { + this->descriptorRanges = descriptorRanges; + this->descriptorRangesCount = descriptorRangesCount; + this->lastRangeIsBoundless = lastRangeIsBoundless; + this->boundlessRangeSize = boundlessRangeSize; + } + }; + + struct RenderPushConstantRange { + uint32_t binding = 0; + uint32_t set = 0; + uint32_t offset = 0; // Must be aligned to 4-bytes for DX12. + uint32_t size = 0; + RenderShaderStageFlags stageFlags = RenderShaderStageFlag::NONE; + + RenderPushConstantRange() = default; + + RenderPushConstantRange(uint32_t binding, uint32_t set, uint32_t offset, uint32_t size, RenderShaderStageFlags stageFlags) { + this->binding = binding; + this->set = set; + this->offset = offset; + this->size = size; + this->stageFlags = stageFlags; + } + }; + + // D3D12 only. + struct RenderRootDescriptorDesc { + uint32_t shaderRegister = 0; + uint32_t registerSpace = 0; + RenderRootDescriptorType type = RenderRootDescriptorType::UNKNOWN; + + RenderRootDescriptorDesc() = default; + + RenderRootDescriptorDesc(uint32_t shaderRegister, uint32_t registerSpace, RenderRootDescriptorType type) { + this->shaderRegister = shaderRegister; + this->registerSpace = registerSpace; + this->type = type; + } + }; + + struct RenderPipelineLayoutDesc { + const RenderPushConstantRange *pushConstantRanges = nullptr; + uint32_t pushConstantRangesCount = 0; + const RenderDescriptorSetDesc *descriptorSetDescs = nullptr; + uint32_t descriptorSetDescsCount = 0; + const RenderRootDescriptorDesc* rootDescriptorDescs = nullptr; + uint32_t rootDescriptorDescsCount = 0; + bool isLocal = false; + bool allowInputLayout = false; + + RenderPipelineLayoutDesc() = default; + + RenderPipelineLayoutDesc(const RenderPushConstantRange *pushConstantRanges, uint32_t pushConstantRangesCount, const RenderDescriptorSetDesc *descriptorSetDescs, uint32_t descriptorSetDescsCount, bool isLocal = false, bool allowInputLayout = false) { + this->pushConstantRanges = pushConstantRanges; + this->pushConstantRangesCount = pushConstantRangesCount; + this->descriptorSetDescs = descriptorSetDescs; + this->descriptorSetDescsCount = descriptorSetDescsCount; + this->isLocal = isLocal; + this->allowInputLayout = allowInputLayout; + } + }; + + struct RenderIndexBufferView { + RenderBufferReference buffer; + uint32_t size = 0; + RenderFormat format = RenderFormat::UNKNOWN; + + RenderIndexBufferView() = default; + + RenderIndexBufferView(RenderBufferReference buffer, uint32_t size, RenderFormat format) { + this->buffer = buffer; + this->size = size; + this->format = format; + } + }; + + struct RenderVertexBufferView { + RenderBufferReference buffer; + uint32_t size = 0; + + RenderVertexBufferView() = default; + + RenderVertexBufferView(RenderBufferReference buffer, uint32_t size) { + this->buffer = buffer; + this->size = size; + } + }; + + struct RenderSRV { + RenderSRVType type = RenderSRVType::UNKNOWN; + RenderFormat format = RenderFormat::UNKNOWN; + + union { + struct { + uint32_t firstElement; + uint32_t structureByteStride; + bool raw; + } buffer; + + struct { + uint32_t mipLevels; + } texture; + }; + + RenderSRV() = default; + + RenderSRV(RenderSRVType type, RenderFormat format) { + this->type = type; + this->format = format; + } + + static RenderSRV Buffer(RenderFormat format, uint32_t firstElement = 0, bool raw = false) { + RenderSRV srv(RenderSRVType::BUFFER, format); + srv.buffer.firstElement = firstElement; + srv.buffer.structureByteStride = 0; + srv.buffer.raw = raw; + return srv; + } + + static RenderSRV StructuredBuffer(uint32_t strideInBytes, uint32_t firstElement = 0, bool raw = false) { + RenderSRV srv(RenderSRVType::BUFFER, RenderFormat::UNKNOWN); + srv.buffer.firstElement = firstElement; + srv.buffer.structureByteStride = strideInBytes; + srv.buffer.raw = raw; + return srv; + } + + static RenderSRV Texture1D(RenderFormat format = RenderFormat::UNKNOWN, uint32_t mipLevels = 1) { + RenderSRV srv(RenderSRVType::TEXTURE_1D, format); + srv.texture.mipLevels = mipLevels; + return srv; + } + + static RenderSRV Texture2D(RenderFormat format = RenderFormat::UNKNOWN, uint32_t mipLevels = 1) { + RenderSRV srv(RenderSRVType::TEXTURE_2D, format); + srv.texture.mipLevels = mipLevels; + return srv; + } + + static RenderSRV Texture3D(RenderFormat format = RenderFormat::UNKNOWN, uint32_t mipLevels = 1) { + RenderSRV srv(RenderSRVType::TEXTURE_3D, format); + srv.texture.mipLevels = mipLevels; + return srv; + } + }; + + struct RenderUAV { + RenderUAVType type = RenderUAVType::UNKNOWN; + RenderFormat format = RenderFormat::UNKNOWN; + + union { + struct { + uint32_t firstElement; + uint32_t structureByteStride; + bool raw; + } buffer; + + struct { + uint32_t mipSlice; + } texture; + }; + + RenderUAV() = default; + + RenderUAV(RenderUAVType type, RenderFormat format) { + this->type = type; + this->format = format; + } + + static RenderUAV Buffer(RenderFormat format, uint32_t firstElement = 0, bool raw = false) { + RenderUAV uav(RenderUAVType::BUFFER, format); + uav.buffer.firstElement = firstElement; + uav.buffer.structureByteStride = 0; + uav.buffer.raw = raw; + return uav; + } + + static RenderUAV StructuredBuffer(uint32_t strideInBytes, uint32_t firstElement = 0, bool raw = false) { + RenderUAV uav(RenderUAVType::BUFFER, RenderFormat::UNKNOWN); + uav.buffer.firstElement = firstElement; + uav.buffer.structureByteStride = strideInBytes; + uav.buffer.raw = raw; + return uav; + } + + static RenderUAV Texture1D(RenderFormat format = RenderFormat::UNKNOWN, uint32_t mipSlice = 0) { + RenderUAV uav(RenderUAVType::TEXTURE_1D, format); + uav.texture.mipSlice = mipSlice; + return uav; + } + + static RenderUAV Texture2D(RenderFormat format = RenderFormat::UNKNOWN, uint32_t mipSlice = 0) { + RenderUAV uav(RenderUAVType::TEXTURE_2D, format); + uav.texture.mipSlice = mipSlice; + return uav; + } + + static RenderUAV Texture3D(RenderFormat format = RenderFormat::UNKNOWN, uint32_t mipSlice = 0) { + RenderUAV uav(RenderUAVType::TEXTURE_3D, format); + uav.texture.mipSlice = mipSlice; + return uav; + } + }; + + struct RenderViewport { + float x = 0.0f; + float y = 0.0f; + float width = 0.0f; + float height = 0.0f; + float minDepth = 0.0f; + float maxDepth = 1.0f; + + RenderViewport() = default; + + RenderViewport(float x, float y, float width, float height, float minDepth = 0.0f, float maxDepth = 1.0f) { + this->x = x; + this->y = y; + this->width = width; + this->height = height; + this->minDepth = minDepth; + this->maxDepth = maxDepth; + } + + bool operator==(const RenderViewport &v) const { + return (x == v.x) && (y == v.y) && (width == v.width) && (height == v.height) && (minDepth == v.minDepth) && (maxDepth == v.maxDepth); + } + + bool operator!=(const RenderViewport &v) const { + return (x != v.x) || (y != v.y) || (width != v.width) || (height != v.height) || (minDepth != v.minDepth) || (maxDepth != v.maxDepth); + } + + bool isEmpty() const { + return (width <= 0.0f) || (height <= 0.0f); + } + }; + + struct RenderRect { + int32_t left = 0; + int32_t top = 0; + int32_t right = 0; + int32_t bottom = 0; + + RenderRect() = default; + + RenderRect(int32_t left, int32_t top, int32_t right, int32_t bottom) { + this->left = left; + this->top = top; + this->right = right; + this->bottom = bottom; + } + + bool operator==(const RenderRect &v) const { + return (left == v.left) && (top == v.top) && (right == v.right) && (bottom == v.bottom); + } + + bool operator!=(const RenderRect &v) const { + return (left != v.left) || (top != v.top) || (right != v.right) || (bottom != v.bottom); + } + + bool isEmpty() const { + return (left >= right) || (top >= bottom); + } + }; + + struct RenderBox { + int32_t left = 0; + int32_t top = 0; + int32_t front = 0; + int32_t right = 0; + int32_t bottom = 0; + int32_t back = 0; + + RenderBox() = default; + + RenderBox(int32_t left, int32_t top, int32_t right, int32_t bottom, int32_t front = 0, int32_t back = 1) { + this->left = left; + this->top = top; + this->front = front; + this->right = right; + this->bottom = bottom; + this->back = back; + } + }; + + struct RenderRange { + uint64_t begin = 0; + uint64_t end = 0; + + RenderRange() = default; + + RenderRange(uint64_t begin, uint64_t end) { + this->begin = begin; + this->end = end; + } + }; + + struct RenderFramebufferDesc { + const RenderTexture **colorAttachments = nullptr; + uint32_t colorAttachmentsCount = 0; + const RenderTexture *depthAttachment = nullptr; + bool depthAttachmentReadOnly = false; + + RenderFramebufferDesc() = default; + + RenderFramebufferDesc(const RenderTexture **colorAttachments, uint32_t colorAttachmentsCount, const RenderTexture *depthAttachment = nullptr, bool depthAttachmentReadOnly = false) { + this->colorAttachments = colorAttachments; + this->colorAttachmentsCount = colorAttachmentsCount; + this->depthAttachment = depthAttachment; + this->depthAttachmentReadOnly = depthAttachmentReadOnly; + } + }; + + struct RenderBottomLevelASMesh { + RenderBufferReference indexBuffer; + RenderBufferReference vertexBuffer; + RenderFormat indexFormat = RenderFormat::UNKNOWN; + RenderFormat vertexFormat = RenderFormat::UNKNOWN; + uint32_t indexCount = 0; + uint32_t vertexCount = 0; + uint32_t vertexStride = 0; + bool isOpaque = false; + + RenderBottomLevelASMesh() = default; + + RenderBottomLevelASMesh(RenderBufferReference indexBuffer, RenderBufferReference vertexBuffer, RenderFormat indexFormat, RenderFormat vertexFormat, uint32_t indexCount, uint32_t vertexCount, uint32_t vertexStride, bool isOpaque) { + this->indexBuffer = indexBuffer; + this->vertexBuffer = vertexBuffer; + this->indexFormat = indexFormat; + this->vertexFormat = vertexFormat; + this->indexCount = indexCount; + this->vertexCount = vertexCount; + this->vertexStride = vertexStride; + this->isOpaque = isOpaque; + } + }; + + struct RenderBottomLevelASBuildInfo { + uint32_t meshCount = 0; + uint32_t primitiveCount = 0; + bool preferFastBuild = false; + bool preferFastTrace = false; + uint64_t scratchSize = 0; + uint64_t accelerationStructureSize = 0; + + // Private backend data. Can go unused. + std::vector buildData; + }; + + struct RenderTopLevelASInstance { + RenderBufferReference bottomLevelAS; + uint32_t instanceID = 0; + uint32_t instanceMask = 0; + uint32_t instanceContributionToHitGroupIndex = 0; + bool cullDisable = false; + RenderAffineTransform transform; + + RenderTopLevelASInstance() = default; + + RenderTopLevelASInstance(RenderBufferReference bottomLevelAS, uint32_t instanceID, uint32_t instanceMask, uint32_t instanceContributionToHitGroupIndex, bool cullDisable, RenderAffineTransform transform) { + this->bottomLevelAS = bottomLevelAS; + this->instanceID = instanceID; + this->instanceMask = instanceMask; + this->instanceContributionToHitGroupIndex = instanceContributionToHitGroupIndex; + this->cullDisable = cullDisable; + this->transform = transform; + } + }; + + struct RenderTopLevelASBuildInfo { + // The instances buffer data must be uploaded to the GPU by the API user. + std::vector instancesBufferData; + uint32_t instanceCount = 0; + bool preferFastBuild = false; + bool preferFastTrace = false; + uint64_t scratchSize = 0; + uint64_t accelerationStructureSize = 0; + + // Private backend data. Can go unused. + std::vector buildData; + }; + + struct RenderShaderBindingGroup { + const RenderPipelineProgram *pipelinePrograms = nullptr; + uint32_t pipelineProgramsCount = 0; + + RenderShaderBindingGroup() = default; + + RenderShaderBindingGroup(const RenderPipelineProgram *pipelinePrograms, uint32_t pipelineProgramsCount) { + this->pipelinePrograms = pipelinePrograms; + this->pipelineProgramsCount = pipelineProgramsCount; + } + }; + + struct RenderShaderBindingGroups { + RenderShaderBindingGroup rayGen; + RenderShaderBindingGroup miss; + RenderShaderBindingGroup hitGroup; + RenderShaderBindingGroup callable; + + RenderShaderBindingGroups() = default; + + RenderShaderBindingGroups(RenderShaderBindingGroup rayGen, RenderShaderBindingGroup miss, RenderShaderBindingGroup hitGroup, RenderShaderBindingGroup callable = RenderShaderBindingGroup()) { + this->rayGen = rayGen; + this->miss = miss; + this->hitGroup = hitGroup; + this->callable = callable; + } + }; + + struct RenderShaderBindingGroupInfo { + uint64_t offset = 0; + uint64_t size = 0; + uint32_t stride = 0; + + // Convenience index for selecting a different binding in the table. offset must add startIndex * stride. + uint32_t startIndex = 0; + }; + + struct RenderShaderBindingGroupsInfo { + RenderShaderBindingGroupInfo rayGen; + RenderShaderBindingGroupInfo miss; + RenderShaderBindingGroupInfo hitGroup; + RenderShaderBindingGroupInfo callable; + }; + + struct RenderShaderBindingTableInfo { + // The table buffer data must be uploaded to the GPU by the API user and submitted to dispatchRays(). + std::vector tableBufferData; + + // This info will be requested by dispatchRays(). + RenderShaderBindingGroupsInfo groups; + }; + + struct RenderDeviceDescription { + std::string name = "Unknown"; + uint32_t driverVersion = 0; + uint64_t dedicatedVideoMemory = 0; + }; + + struct RenderDeviceCapabilities { + // Raytracing. + bool raytracing = false; + bool raytracingStateUpdate = false; + + // MSAA. + bool sampleLocations = false; + + // Bindless resources. + bool descriptorIndexing = false; + bool scalarBlockLayout = false; + + // Present. + bool presentWait = false; + bool displayTiming = false; + + // HDR. + bool preferHDR = false; + }; + + struct RenderInterfaceCapabilities { + RenderShaderFormat shaderFormat = RenderShaderFormat::UNKNOWN; + }; +}; diff --git a/UnleashedRecomp/gpu/video.cpp b/UnleashedRecomp/gpu/video.cpp index 73f23d9..5627c09 100644 --- a/UnleashedRecomp/gpu/video.cpp +++ b/UnleashedRecomp/gpu/video.cpp @@ -1,39 +1,2191 @@ #include + +#include +#include +#include +#include +#include + #include "video.h" #include "window.h" -#include "kernel/function.h" -void VdInitializeSystem() +namespace RT64 { - Window::Init(); + extern std::unique_ptr CreateD3D12Interface(); + extern std::unique_ptr CreateVulkanInterface(); } -void* VdGetGlobalDevice() +struct PipelineState +{ + GuestShader* vertexShader = nullptr; + GuestShader* pixelShader = nullptr; + GuestVertexDeclaration* vertexDeclaration = nullptr; + bool instancing = false; + bool zEnable = true; + bool zWriteEnable = true; + RenderBlend srcBlend = RenderBlend::ONE; + RenderBlend destBlend = RenderBlend::ZERO; + RenderCullMode cullMode = RenderCullMode::NONE; + RenderComparisonFunction zFunc = RenderComparisonFunction::LESS; + bool alphaBlendEnable = false; + RenderBlendOperation blendOp = RenderBlendOperation::ADD; + float slopeScaledDepthBias = 0.0f; + int32_t depthBias = 0; + RenderBlend srcBlendAlpha = RenderBlend::ONE; + RenderBlend destBlendAlpha = RenderBlend::ZERO; + RenderBlendOperation blendOpAlpha = RenderBlendOperation::ADD; + uint32_t colorWriteEnable{}; + RenderPrimitiveTopology primitiveTopology = RenderPrimitiveTopology::TRIANGLE_LIST; + uint8_t vertexStrides[16]{}; + RenderFormat renderTargetFormat{}; + RenderFormat depthStencilFormat{}; +}; + +struct SharedConstants +{ + uint32_t textureIndices[16]{}; + uint32_t samplerIndices[16]{}; + uint32_t alphaTestMode{}; + float alphaThreshold{}; + uint32_t booleans{}; + uint32_t swappedTexcoords{}; + uint32_t inputLayoutFlags{}; +}; + +static GuestSurface* g_renderTarget; +static GuestSurface* g_depthStencil; +static RenderViewport g_viewport(0.0f, 0.0f, 1280.0f, 720.0f); +static PipelineState g_pipelineState; +static SharedConstants g_sharedConstants; +static RenderSamplerDesc g_samplerDescs[16]; +static bool g_scissorTestEnable = false; +static RenderRect g_scissorRect; +static RenderVertexBufferView g_vertexBufferViews[16]; +static RenderInputSlot g_inputSlots[16]; +static RenderIndexBufferView g_indexBufferView; + +struct DirtyStates +{ + bool renderTargetAndDepthStencil; + bool viewport; + bool pipelineState; + bool sharedConstants; + bool scissorRect; + bool vertexShaderConstants; + uint8_t vertexStreamFirst; + uint8_t vertexStreamLast; + bool indices; + bool pixelShaderConstants; + + DirtyStates(bool value) + : renderTargetAndDepthStencil(value) + , viewport(value) + , pipelineState(value) + , sharedConstants(value) + , scissorRect(value) + , vertexShaderConstants(value) + , vertexStreamFirst(value ? 0 : 255) + , vertexStreamLast(value ? 15 : 0) + , indices(value) + , pixelShaderConstants(value) + { + } +}; + +static DirtyStates g_dirtyStates(true); + +template +static void SetDirtyValue(bool& dirtyState, T& dest, const T& src) +{ + if (dest != src) + { + dest = src; + dirtyState = true; + } +} + +static std::unique_ptr g_interface; +static std::unique_ptr g_device; + +static constexpr size_t NUM_FRAMES = 2; + +static uint32_t g_frame = 0; +static uint32_t g_nextFrame = 1; + +static std::unique_ptr g_queue; +static std::unique_ptr g_commandLists[NUM_FRAMES]; +static std::unique_ptr g_commandFences[NUM_FRAMES]; + +static Mutex g_copyMutex; +static std::unique_ptr g_copyQueue; +static std::unique_ptr g_copyCommandList; +static std::unique_ptr g_copyCommandFence; + +static std::unique_ptr g_swapChain; +static GuestSurface* g_backBuffer; + +struct std::unique_ptr g_textureDescriptorSet; +struct std::unique_ptr g_samplerDescriptorSet; + +struct TextureDescriptorAllocator +{ + Mutex mutex; + uint32_t capacity = 0; + std::vector freed; + + uint32_t allocate() + { + std::lock_guard lock(mutex); + + uint32_t value; + if (!freed.empty()) + { + value = freed.back(); + freed.pop_back(); + } + else + { + value = ++capacity; + } + + return value; + } + + void free(uint32_t value) + { + assert(value != NULL); + std::lock_guard lock(mutex); + freed.push_back(value); + } +}; + +static TextureDescriptorAllocator g_textureDescriptorAllocator; + +static std::unique_ptr g_pipelineLayout; +static xxHashMap> g_pipelines; + +static xxHashMap>> g_samplerStates; + +static Mutex g_vertexDeclarationMutex; +static xxHashMap g_vertexDeclarations; + +struct UploadBuffer +{ + static constexpr size_t SIZE = 16 * 1024 * 1024; + + std::unique_ptr buffer; + uint8_t* memory = nullptr; +}; + +struct UploadAllocator +{ + std::vector buffers; + uint32_t index = 0; + uint32_t offset = 0; + + std::pair allocate(uint32_t size, uint32_t alignment) + { + assert(size <= UploadBuffer::SIZE); + + offset = (offset + alignment - 1) & ~(alignment - 1); + + if (offset + size > UploadBuffer::SIZE) + { + ++index; + offset = 0; + } + + if (buffers.size() <= index) + buffers.resize(index + 1); + + auto& buffer = buffers[index]; + if (buffer.buffer == nullptr) + { + buffer.buffer = g_device->createBuffer(RenderBufferDesc::UploadBuffer(UploadBuffer::SIZE)); + buffer.memory = reinterpret_cast(buffer.buffer->map()); + } + + auto ref = buffer.buffer->at(offset); + offset += size; + + return { ref, buffer.memory + ref.offset }; + } + + template + RenderBufferReference allocate(const T* memory, uint32_t size, uint32_t alignment) + { + auto result = allocate(size, alignment); + + if constexpr (TByteSwap) + { + auto destination = reinterpret_cast(result.second); + + for (size_t i = 0; i < size; i += sizeof(T)) + { + *destination = std::byteswap(*memory); + ++destination; + ++memory; + } + } + else + { + memcpy(result.second, memory, size); + } + + return result.first; + } + + void reset() + { + index = 0; + offset = 0; + } +}; + +static UploadAllocator g_uploadAllocators[NUM_FRAMES]; + +static Mutex g_tempMutex; +static std::vector> g_tempTextures[NUM_FRAMES]; +static std::vector> g_tempBuffers[NUM_FRAMES]; +static std::vector g_tempDescriptorIndices[NUM_FRAMES]; + +static void SetRenderState(GuestDevice* device, uint32_t value) +{ +} + +static void SetRenderStateZEnable(GuestDevice* device, uint32_t value) +{ + SetDirtyValue(g_dirtyStates.pipelineState, g_pipelineState.zEnable, value != 0); +} + +static void SetRenderStateZWriteEnable(GuestDevice* device, uint32_t value) +{ + SetDirtyValue(g_dirtyStates.pipelineState, g_pipelineState.zWriteEnable, value != 0); +} + +static void SetRenderStateAlphaTestEnable(GuestDevice* device, uint32_t value) +{ + SetDirtyValue(g_dirtyStates.sharedConstants, g_sharedConstants.alphaTestMode, value ? 1u : 0); +} + +static RenderBlend ConvertBlendMode(uint32_t blendMode) +{ + switch (blendMode) + { + case D3DBLEND_ZERO: + return RenderBlend::ZERO; + case D3DBLEND_ONE: + return RenderBlend::ONE; + case D3DBLEND_SRCCOLOR: + return RenderBlend::SRC_COLOR; + case D3DBLEND_INVSRCCOLOR: + return RenderBlend::INV_SRC_COLOR; + case D3DBLEND_SRCALPHA: + return RenderBlend::SRC_ALPHA; + case D3DBLEND_INVSRCALPHA: + return RenderBlend::INV_SRC_ALPHA; + case D3DBLEND_DESTCOLOR: + return RenderBlend::DEST_COLOR; + case D3DBLEND_INVDESTCOLOR: + return RenderBlend::INV_DEST_COLOR; + case D3DBLEND_DESTALPHA: + return RenderBlend::DEST_ALPHA; + case D3DBLEND_INVDESTALPHA: + return RenderBlend::INV_DEST_ALPHA; + default: + assert(false && "Invalid blend mode"); + return RenderBlend::ZERO; + } +} + +static void SetRenderStateSrcBlend(GuestDevice* device, uint32_t value) +{ + SetDirtyValue(g_dirtyStates.pipelineState, g_pipelineState.srcBlend, ConvertBlendMode(value)); +} + +static void SetRenderStateDestBlend(GuestDevice* device, uint32_t value) +{ + SetDirtyValue(g_dirtyStates.pipelineState, g_pipelineState.destBlend, ConvertBlendMode(value)); +} + +static void SetRenderStateCullMode(GuestDevice* device, uint32_t value) +{ + RenderCullMode cullMode; + + switch (value) { + case D3DCULL_NONE: + case D3DCULL_NONE_2: + cullMode = RenderCullMode::NONE; + break; + case D3DCULL_CW: + cullMode = RenderCullMode::FRONT; + break; + case D3DCULL_CCW: + cullMode = RenderCullMode::BACK; + break; + default: + assert(false && "Invalid cull mode"); + cullMode = RenderCullMode::NONE; + break; + } + + SetDirtyValue(g_dirtyStates.pipelineState, g_pipelineState.cullMode, cullMode); +} + +static void SetRenderStateZFunc(GuestDevice* device, uint32_t value) +{ + RenderComparisonFunction comparisonFunc; + + switch (value) + { + case D3DCMP_NEVER: + comparisonFunc = RenderComparisonFunction::NEVER; + break; + case D3DCMP_LESS: + comparisonFunc = RenderComparisonFunction::LESS; + break; + case D3DCMP_EQUAL: + comparisonFunc = RenderComparisonFunction::EQUAL; + break; + case D3DCMP_LESSEQUAL: + comparisonFunc = RenderComparisonFunction::LESS_EQUAL; + break; + case D3DCMP_GREATER: + comparisonFunc = RenderComparisonFunction::GREATER; + break; + case D3DCMP_NOTEQUAL: + comparisonFunc = RenderComparisonFunction::NOT_EQUAL; + break; + case D3DCMP_GREATEREQUAL: + comparisonFunc = RenderComparisonFunction::GREATER_EQUAL; + break; + case D3DCMP_ALWAYS: + comparisonFunc = RenderComparisonFunction::ALWAYS; + break; + default: + assert(false && "Unknown comparison function"); + comparisonFunc = RenderComparisonFunction::NEVER; + break; + } + + SetDirtyValue(g_dirtyStates.pipelineState, g_pipelineState.zFunc, comparisonFunc); +} + +static void SetRenderStateAlphaRef(GuestDevice* device, uint32_t value) +{ + SetDirtyValue(g_dirtyStates.pipelineState, g_sharedConstants.alphaThreshold, float(value) / 256.0f); +} + +static void SetRenderStateAlphaBlendEnable(GuestDevice* device, uint32_t value) +{ + SetDirtyValue(g_dirtyStates.pipelineState, g_pipelineState.alphaBlendEnable, value != 0); +} + +static RenderBlendOperation ConvertBlendOp(uint32_t blendOp) +{ + switch (blendOp) + { + case D3DBLENDOP_ADD: + return RenderBlendOperation::ADD; + case D3DBLENDOP_SUBTRACT: + return RenderBlendOperation::SUBTRACT; + case D3DBLENDOP_REVSUBTRACT: + return RenderBlendOperation::REV_SUBTRACT; + case D3DBLENDOP_MIN: + return RenderBlendOperation::MIN; + case D3DBLENDOP_MAX: + return RenderBlendOperation::MAX; + default: + assert(false && "Unknown blend operation"); + return RenderBlendOperation::ADD; + } +} + +static void SetRenderStateBlendOp(GuestDevice* device, uint32_t value) +{ + SetDirtyValue(g_dirtyStates.pipelineState, g_pipelineState.blendOp, ConvertBlendOp(value)); +} + +static void SetRenderStateScissorTestEnable(GuestDevice* device, uint32_t value) +{ + SetDirtyValue(g_dirtyStates.scissorRect, g_scissorTestEnable, value != 0); +} + +static void SetRenderStateSlopeScaledDepthBias(GuestDevice* device, uint32_t value) +{ + SetDirtyValue(g_dirtyStates.pipelineState, g_pipelineState.slopeScaledDepthBias, *reinterpret_cast(&value)); +} + +static void SetRenderStateDepthBias(GuestDevice* device, uint32_t value) +{ + SetDirtyValue(g_dirtyStates.pipelineState, g_pipelineState.depthBias, int32_t(*reinterpret_cast(&value) * (1 << 24))); +} + +static void SetRenderStateSrcBlendAlpha(GuestDevice* device, uint32_t value) +{ + SetDirtyValue(g_dirtyStates.pipelineState, g_pipelineState.srcBlendAlpha, ConvertBlendMode(value)); +} + +static void SetRenderStateDestBlendAlpha(GuestDevice* device, uint32_t value) +{ + SetDirtyValue(g_dirtyStates.pipelineState, g_pipelineState.destBlendAlpha, ConvertBlendMode(value)); +} + +static void SetRenderStateBlendOpAlpha(GuestDevice* device, uint32_t value) +{ + SetDirtyValue(g_dirtyStates.pipelineState, g_pipelineState.blendOpAlpha, ConvertBlendOp(value)); +} + +static void SetRenderStateColorWriteEnable(GuestDevice* device, uint32_t value) +{ + SetDirtyValue(g_dirtyStates.pipelineState, g_pipelineState.colorWriteEnable, value); +} + +static const std::pair g_setRenderStateFunctions[] = +{ + { D3DRS_ZENABLE, GuestFunction }, + { D3DRS_ZWRITEENABLE, GuestFunction }, + { D3DRS_ALPHATESTENABLE, GuestFunction }, + { D3DRS_SRCBLEND, GuestFunction }, + { D3DRS_DESTBLEND, GuestFunction }, + { D3DRS_CULLMODE, GuestFunction }, + { D3DRS_ZFUNC, GuestFunction }, + { D3DRS_ALPHAREF, GuestFunction }, + { D3DRS_ALPHABLENDENABLE, GuestFunction }, + { D3DRS_BLENDOP, GuestFunction }, + { D3DRS_SCISSORTESTENABLE, GuestFunction }, + { D3DRS_SLOPESCALEDEPTHBIAS, GuestFunction }, + { D3DRS_DEPTHBIAS, GuestFunction }, + { D3DRS_SRCBLENDALPHA, GuestFunction }, + { D3DRS_DESTBLENDALPHA, GuestFunction }, + { D3DRS_BLENDOPALPHA, GuestFunction }, + { D3DRS_COLORWRITEENABLE, GuestFunction } +}; + +static void CreateHostDevice() +{ + for (uint32_t i = 0; i < 16; i++) + g_inputSlots[i].index = i; + + Window::Init(); + + g_interface = CreateD3D12Interface(); + g_device = g_interface->createDevice(); + + g_queue = g_device->createCommandQueue(RenderCommandListType::DIRECT); + + for (auto& commandList : g_commandLists) + commandList = g_device->createCommandList(RenderCommandListType::DIRECT); + + for (auto& commandFence : g_commandFences) + commandFence = g_device->createCommandFence(); + + g_copyQueue = g_device->createCommandQueue(RenderCommandListType::COPY); + g_copyCommandList = g_device->createCommandList(RenderCommandListType::COPY); + g_copyCommandFence = g_device->createCommandFence(); + + g_swapChain = g_queue->createSwapChain(Window::s_windowHandle, 2, RenderFormat::R8G8B8A8_UNORM); + + RenderPipelineLayoutBuilder pipelineLayoutBuilder; + pipelineLayoutBuilder.begin(false, true); + + constexpr size_t TEXTURE_DESCRIPTOR_SIZE = 65536; + constexpr size_t SAMPLER_DESCRIPTOR_SIZE = 1024; + + RenderDescriptorSetBuilder descriptorSetBuilder; + descriptorSetBuilder.begin(); + descriptorSetBuilder.addTexture(0, TEXTURE_DESCRIPTOR_SIZE); + descriptorSetBuilder.end(true, TEXTURE_DESCRIPTOR_SIZE); + + g_textureDescriptorSet = descriptorSetBuilder.create(g_device.get()); + pipelineLayoutBuilder.addDescriptorSet(descriptorSetBuilder); + pipelineLayoutBuilder.addDescriptorSet(descriptorSetBuilder); + pipelineLayoutBuilder.addDescriptorSet(descriptorSetBuilder); + + descriptorSetBuilder.begin(); + descriptorSetBuilder.addSampler(0, SAMPLER_DESCRIPTOR_SIZE); + descriptorSetBuilder.end(true, SAMPLER_DESCRIPTOR_SIZE); + + g_samplerDescriptorSet = descriptorSetBuilder.create(g_device.get()); + pipelineLayoutBuilder.addDescriptorSet(descriptorSetBuilder); + + pipelineLayoutBuilder.addRootDescriptor(0, 4, RenderRootDescriptorType::CONSTANT_BUFFER); + pipelineLayoutBuilder.addRootDescriptor(1, 4, RenderRootDescriptorType::CONSTANT_BUFFER); + pipelineLayoutBuilder.addRootDescriptor(2, 4, RenderRootDescriptorType::CONSTANT_BUFFER); + pipelineLayoutBuilder.end(); + + g_pipelineLayout = pipelineLayoutBuilder.create(g_device.get()); +} + +static void BeginCommandList() +{ + g_renderTarget = g_backBuffer; + g_depthStencil = nullptr; + + g_pipelineState.renderTargetFormat = g_backBuffer->format; + g_pipelineState.depthStencilFormat = RenderFormat::UNKNOWN; + + uint32_t textureIndex = 0; + g_swapChain->acquireTexture(nullptr, &textureIndex); + g_backBuffer->texture = g_swapChain->getTexture(textureIndex); + + auto& commandList = g_commandLists[g_frame]; + + commandList->begin(); + commandList->barriers(RenderBarrierStage::GRAPHICS, RenderTextureBarrier(g_backBuffer->texture, RenderTextureLayout::COLOR_WRITE)); + commandList->setGraphicsPipelineLayout(g_pipelineLayout.get()); + commandList->setGraphicsDescriptorSet(g_textureDescriptorSet.get(), 0); + commandList->setGraphicsDescriptorSet(g_textureDescriptorSet.get(), 1); + commandList->setGraphicsDescriptorSet(g_textureDescriptorSet.get(), 2); + commandList->setGraphicsDescriptorSet(g_samplerDescriptorSet.get(), 3); +} + +static void ExecuteDummyNextFrame() +{ + auto& commandList = g_commandLists[g_nextFrame]; + commandList->begin(); + commandList->end(); + g_queue->executeCommandLists(commandList.get(), g_commandFences[g_nextFrame].get()); +} + +static uint32_t CreateDevice(uint32_t a1, uint32_t a2, uint32_t a3, uint32_t a4, uint32_t a5, be* a6) +{ + CreateHostDevice(); + + g_backBuffer = g_userHeap.AllocPhysical(ResourceType::RenderTarget); + g_backBuffer->format = RenderFormat::R8G8B8A8_UNORM; + + ExecuteDummyNextFrame(); + BeginCommandList(); + + auto device = g_userHeap.AllocPhysical(); + memset(device, 0, sizeof(*device)); + + uint32_t functionOffset = 'D3D'; + g_codeCache.Insert(functionOffset, reinterpret_cast(GuestFunction)); + + for (size_t i = 0; i < _countof(device->setRenderStateFunctions); i++) + device->setRenderStateFunctions[i] = functionOffset; + + for (auto& [state, function] : g_setRenderStateFunctions) + { + functionOffset += 4; + g_codeCache.Insert(functionOffset, function); + device->setRenderStateFunctions[state / 4] = functionOffset; + } + + for (size_t i = 0; i < _countof(device->setSamplerStateFunctions); i++) + device->setSamplerStateFunctions[i] = *reinterpret_cast(g_memory.Translate(0x8330F3DC + i * 0xC)); + + *a6 = g_memory.MapVirtual(device); + + return 0; +} + +static void DestructResource(GuestResource* resource) +{ + switch (resource->type) + { + case ResourceType::Texture: + case ResourceType::VolumeTexture: + { + const auto texture = reinterpret_cast(resource); + + if (texture->mappedMemory != nullptr) + g_userHeap.Free(texture->mappedMemory); + + { + std::lock_guard lock(g_tempMutex); + g_tempTextures[g_frame].emplace_back(std::move(texture->texture)); + g_tempDescriptorIndices[g_frame].push_back(texture->descriptorIndex); + } + + texture->~GuestTexture(); + break; + } + + case ResourceType::VertexBuffer: + case ResourceType::IndexBuffer: + { + const auto buffer = reinterpret_cast(resource); + + if (buffer->mappedMemory != nullptr) + g_userHeap.Free(buffer->mappedMemory); + + { + std::lock_guard lock(g_tempMutex); + g_tempBuffers[g_frame].emplace_back(std::move(buffer->buffer)); + } + + buffer->~GuestBuffer(); + break; + } + + case ResourceType::RenderTarget: + case ResourceType::DepthStencil: + { + const auto surface = reinterpret_cast(resource); + + { + std::lock_guard lock(g_tempMutex); + g_tempTextures[g_frame].emplace_back(std::move(surface->textureHolder)); + } + + surface->~GuestSurface(); + break; + } + + case ResourceType::VertexDeclaration: + reinterpret_cast(resource)->~GuestVertexDeclaration(); + break; + + case ResourceType::VertexShader: + case ResourceType::PixelShader: + reinterpret_cast(resource)->~GuestShader(); + break; + } + + g_userHeap.Free(resource); +} + +static constexpr uint32_t PITCH_ALIGNMENT = 0x100; +static constexpr uint32_t PLACEMENT_ALIGNMENT = 0x200; + +static void LockTextureRect(GuestTexture* texture, uint32_t, GuestLockedRect* lockedRect) +{ + uint32_t pitch = (texture->width * RenderFormatSize(texture->format) + PITCH_ALIGNMENT - 1) & ~(PITCH_ALIGNMENT - 1); + + if (texture->mappedMemory == nullptr) + texture->mappedMemory = g_userHeap.AllocPhysical(pitch * texture->height, 0x10); + + lockedRect->pitch = pitch; + lockedRect->bits = g_memory.MapVirtual(texture->mappedMemory); +} + +template +static void ExecuteCopyCommandList(const T& function) +{ + std::lock_guard lock(g_copyMutex); + + g_copyCommandList->begin(); + function(); + g_copyCommandList->end(); + g_copyQueue->executeCommandLists(g_copyCommandList.get(), g_copyCommandFence.get()); + g_copyQueue->waitForCommandFence(g_copyCommandFence.get()); +} + +static void UnlockTextureRect(GuestTexture* texture) +{ +} + +static void* LockBuffer(GuestBuffer* buffer, uint32_t flags) +{ + buffer->lockedReadOnly = (flags & 0x10) != 0; + + if (buffer->mappedMemory == nullptr) + buffer->mappedMemory = g_userHeap.AllocPhysical(buffer->dataSize, 0x10); + + return buffer->mappedMemory; +} + +static void* LockVertexBuffer(GuestBuffer* buffer, uint32_t, uint32_t, uint32_t flags) +{ + return LockBuffer(buffer, flags); +} + +template +static void UnlockBuffer(GuestBuffer* buffer) +{ + if (!buffer->lockedReadOnly) + { + auto uploadBuffer = g_device->createBuffer(RenderBufferDesc::UploadBuffer(buffer->dataSize)); + + auto dest = reinterpret_cast(uploadBuffer->map()); + auto src = reinterpret_cast(buffer->mappedMemory); + + for (size_t i = 0; i < buffer->dataSize; i += sizeof(T)) + { + *dest = std::byteswap(*src); + ++dest; + ++src; + } + + uploadBuffer->unmap(); + + ExecuteCopyCommandList([&] + { + g_copyCommandList->copyBufferRegion(buffer->buffer->at(0), uploadBuffer->at(0), buffer->dataSize); + }); + } +} + +static void UnlockVertexBuffer(GuestBuffer* buffer) +{ + UnlockBuffer(buffer); +} + +static void GetVertexBufferDesc(GuestBuffer* buffer, GuestBufferDesc* desc) +{ + desc->size = buffer->dataSize; +} + +static void* LockIndexBuffer(GuestBuffer* buffer, uint32_t, uint32_t, uint32_t flags) +{ + return LockBuffer(buffer, flags); +} + +static void UnlockIndexBuffer(GuestBuffer* buffer) +{ + UnlockBuffer(buffer); +} + +static void GetIndexBufferDesc(GuestBuffer* buffer, GuestBufferDesc* desc) +{ + desc->format = buffer->guestFormat; + desc->size = buffer->dataSize; +} + +static void GetSurfaceDesc(GuestSurface* surface, GuestSurfaceDesc* desc) +{ + if (surface->textureHolder != nullptr) + { + desc->width = surface->width; + desc->height = surface->height; + } + else + { + desc->width = 1280; + desc->height = 720; + } +} + +static void GetVertexDeclaration(GuestVertexDeclaration* vertexDeclaration, GuestVertexElement* vertexElements, be* count) +{ + memcpy(vertexElements, vertexDeclaration->vertexElements.get(), vertexDeclaration->vertexElementCount * sizeof(GuestVertexElement)); + *count = vertexDeclaration->vertexElementCount; +} + +static uint32_t HashVertexDeclaration(uint32_t vertexDeclaration) +{ + // Vertex declarations are cached on host side, so the pointer itself can be used. + return vertexDeclaration; +} + +static void Present() +{ + auto& commandList = g_commandLists[g_frame]; + commandList->barriers(RenderBarrierStage::GRAPHICS, RenderTextureBarrier(g_backBuffer->texture, RenderTextureLayout::PRESENT)); + commandList->end(); + + g_queue->executeCommandLists(commandList.get(), g_commandFences[g_frame].get()); + g_swapChain->present(0, nullptr, 0); + + g_frame = g_nextFrame; + g_nextFrame = (g_frame + 1) % NUM_FRAMES; + + g_queue->waitForCommandFence(g_commandFences[g_frame].get()); + + { + std::lock_guard lock(g_tempMutex); + + g_tempBuffers[g_frame].clear(); + g_tempTextures[g_frame].clear(); + + for (auto index : g_tempDescriptorIndices[g_frame]) + g_textureDescriptorAllocator.free(index); + + g_tempDescriptorIndices[g_frame].clear(); + } + + g_dirtyStates = DirtyStates(true); + g_uploadAllocators[g_frame].reset(); + + BeginCommandList(); +} + +static GuestSurface* GetBackBuffer() +{ + g_backBuffer->AddRef(); + return g_backBuffer; +} + +static RenderFormat ConvertFormat(uint32_t format) +{ + switch (format) + { + case D3DFMT_A16B16G16R16F: + case D3DFMT_A16B16G16R16F_2: + return RenderFormat::R16G16B16A16_FLOAT; + case D3DFMT_A8B8G8R8: + case D3DFMT_A8R8G8B8: + case D3DFMT_X8R8G8B8: + return RenderFormat::R8G8B8A8_UNORM; + case D3DFMT_D24FS8: + case D3DFMT_D24S8: + return RenderFormat::D32_FLOAT; + case D3DFMT_G16R16F: + case D3DFMT_G16R16F_2: + return RenderFormat::R16G16_FLOAT; + case D3DFMT_INDEX16: + return RenderFormat::R16_UINT; + case D3DFMT_INDEX32: + return RenderFormat::R32_UINT; + case D3DFMT_L8: + case D3DFMT_L8_2: + return RenderFormat::R8_UNORM; + default: + assert(false && "Unknown format"); + return RenderFormat::R16G16B16A16_FLOAT; + } +} + +static GuestTexture* CreateTexture(uint32_t width, uint32_t height, uint32_t depth, uint32_t levels, uint32_t usage, uint32_t format, uint32_t pool, uint32_t type) +{ + const auto texture = g_userHeap.AllocPhysical(type == 17 ? ResourceType::VolumeTexture : ResourceType::Texture); + + RenderTextureDesc desc; + desc.dimension = texture->type == ResourceType::VolumeTexture ? RenderTextureDimension::TEXTURE_3D : RenderTextureDimension::TEXTURE_2D; + desc.width = width; + desc.height = height; + desc.depth = depth; + desc.mipLevels = levels; + desc.arraySize = 1; + desc.format = ConvertFormat(format); + texture->texture = g_device->createTexture(desc); + + RenderTextureViewDesc viewDesc; + viewDesc.format = desc.format == RenderFormat::D32_FLOAT ? RenderFormat::R32_FLOAT : desc.format; + viewDesc.dimension = texture->type == ResourceType::VolumeTexture ? RenderTextureViewDimension::TEXTURE_3D : RenderTextureViewDimension::TEXTURE_2D; + viewDesc.mipLevels = levels; + texture->textureView = texture->texture->createTextureView(viewDesc); + + texture->width = width; + texture->height = height; + texture->depth = depth; + texture->format = desc.format; + texture->descriptorIndex = g_textureDescriptorAllocator.allocate(); + + g_textureDescriptorSet->setTexture(texture->descriptorIndex, texture->texture.get(), RenderTextureLayout::SHADER_READ, texture->textureView.get()); + + return texture; +} + +static GuestBuffer* CreateVertexBuffer(uint32_t length) +{ + auto buffer = g_userHeap.AllocPhysical(ResourceType::VertexBuffer); + buffer->buffer = g_device->createBuffer(RenderBufferDesc::VertexBuffer(length, RenderHeapType::DEFAULT)); + buffer->dataSize = length; +#ifdef _DEBUG + buffer->buffer->setName(std::format("Vertex Buffer {:X}", g_memory.MapVirtual(buffer))); +#endif + return buffer; +} + +static GuestBuffer* CreateIndexBuffer(uint32_t length, uint32_t, uint32_t format) +{ + auto buffer = g_userHeap.AllocPhysical(ResourceType::IndexBuffer); + buffer->buffer = g_device->createBuffer(RenderBufferDesc::IndexBuffer(length, RenderHeapType::DEFAULT)); + buffer->dataSize = length; + buffer->format = ConvertFormat(format); + buffer->guestFormat = format; +#ifdef _DEBUG + buffer->buffer->setName(std::format("Index Buffer {:X}", g_memory.MapVirtual(buffer))); +#endif + return buffer; +} + +static GuestSurface* CreateSurface(uint32_t width, uint32_t height, uint32_t format, uint32_t multiSample) +{ + RenderTextureDesc desc; + desc.dimension = RenderTextureDimension::TEXTURE_2D; + desc.width = width; + desc.height = height; + desc.depth = 1; + desc.mipLevels = 1; + desc.arraySize = 1; + //desc.multisampling.sampleCount = multiSample != 0 ? RenderSampleCount::COUNT_4 : RenderSampleCount::COUNT_1; + desc.format = ConvertFormat(format); + desc.flags = desc.format == RenderFormat::D32_FLOAT ? RenderTextureFlag::DEPTH_TARGET : RenderTextureFlag::RENDER_TARGET; + desc.committed = true; + + auto surface = g_userHeap.AllocPhysical(desc.format == RenderFormat::D32_FLOAT ? + ResourceType::DepthStencil : ResourceType::RenderTarget); + + surface->textureHolder = g_device->createTexture(desc); + surface->texture = surface->textureHolder.get(); + surface->width = width; + surface->height = height; + surface->format = desc.format; + + return surface; +} + +static void StretchRect(GuestDevice* device, uint32_t flags, uint32_t, GuestTexture* texture) +{ + const bool isDepthStencil = (flags & 0x4) != 0; + const auto surface = isDepthStencil ? g_depthStencil : g_renderTarget; + + RenderTextureBarrier srcBarriers[] = + { + RenderTextureBarrier(surface->texture, RenderTextureLayout::COPY_SOURCE), + RenderTextureBarrier(texture->texture.get(), RenderTextureLayout::COPY_DEST) + }; + + auto& commandList = g_commandLists[g_frame]; + + commandList->barriers(RenderBarrierStage::GRAPHICS, srcBarriers, std::size(srcBarriers)); + commandList->copyTexture(texture->texture.get(), surface->texture); + + RenderTextureBarrier dstBarriers[] = + { + RenderTextureBarrier(surface->texture, isDepthStencil ? RenderTextureLayout::DEPTH_WRITE : RenderTextureLayout::COLOR_WRITE), + RenderTextureBarrier(texture->texture.get(), RenderTextureLayout::SHADER_READ) + }; + + commandList->barriers(RenderBarrierStage::GRAPHICS, dstBarriers, std::size(dstBarriers)); +} + +static void SetRenderTarget(GuestDevice* device, uint32_t index, GuestSurface* renderTarget) +{ + if (renderTarget != nullptr) + g_commandLists[g_frame]->barriers(RenderBarrierStage::GRAPHICS, RenderTextureBarrier(renderTarget->texture, RenderTextureLayout::COLOR_WRITE)); + + SetDirtyValue(g_dirtyStates.renderTargetAndDepthStencil, g_renderTarget, renderTarget); + SetDirtyValue(g_dirtyStates.pipelineState, g_pipelineState.renderTargetFormat, renderTarget != nullptr ? renderTarget->format : RenderFormat::UNKNOWN); +} + +static GuestSurface* GetDepthStencilSurface(GuestDevice* device) { return nullptr; } -// CApplication::Update -PPC_FUNC_IMPL(__imp__sub_822C1130); -PPC_FUNC(sub_822C1130) +static void SetDepthStencilSurface(GuestDevice* device, GuestSurface* depthStencil) { - SDL_PumpEvents(); - SDL_FlushEvents(SDL_FIRSTEVENT, SDL_LASTEVENT); + if (depthStencil != nullptr) + g_commandLists[g_frame]->barriers(RenderBarrierStage::GRAPHICS, RenderTextureBarrier(depthStencil->texture, RenderTextureLayout::DEPTH_WRITE)); - __imp__sub_822C1130(ctx, base); + SetDirtyValue(g_dirtyStates.renderTargetAndDepthStencil, g_depthStencil, depthStencil); + SetDirtyValue(g_dirtyStates.pipelineState, g_pipelineState.depthStencilFormat, depthStencil != nullptr ? depthStencil->format : RenderFormat::UNKNOWN); } -// Skip logo -PPC_FUNC(sub_82547DF0) +static void FlushFramebuffer() { - sub_825517C8(ctx, base); + if (g_dirtyStates.renderTargetAndDepthStencil) + { + GuestSurface* framebufferContainer = nullptr; + RenderTexture* framebufferKey = nullptr; + + if (g_renderTarget != nullptr && g_depthStencil != nullptr) + { + framebufferContainer = g_depthStencil; // Backbuffer texture changes per frame so we can't use the depth stencil as the key. + framebufferKey = g_renderTarget->texture; + } + else if (g_renderTarget != nullptr && g_depthStencil == nullptr) + { + framebufferContainer = g_renderTarget; + framebufferKey = g_renderTarget->texture; // Backbuffer texture changes per frame so we can't assume nullptr for it. + } + else if (g_renderTarget == nullptr && g_depthStencil != nullptr) + { + framebufferContainer = g_depthStencil; + framebufferKey = nullptr; + } + + auto& commandList = g_commandLists[g_frame]; + + if (framebufferContainer != nullptr) + { + auto& framebuffer = framebufferContainer->framebuffers[framebufferKey]; + + if (framebuffer == nullptr) + { + RenderFramebufferDesc desc; + + if (g_renderTarget != nullptr) + { + desc.colorAttachments = const_cast(&g_renderTarget->texture); + desc.colorAttachmentsCount = 1; + } + + if (g_depthStencil != nullptr) + desc.depthAttachment = g_depthStencil->texture; + + framebuffer = g_device->createFramebuffer(desc); + } + + commandList->setFramebuffer(framebuffer.get()); + } + else + { + commandList->setFramebuffer(nullptr); + } + + g_dirtyStates.renderTargetAndDepthStencil = false; + } } -// Direct3D stubs -GUEST_FUNCTION_STUB(sub_824EB290); -GUEST_FUNCTION_STUB(sub_82BDA8C0); +static void Clear(GuestDevice* device, uint32_t flags, uint32_t, be* color, double z) +{ + FlushFramebuffer(); + + auto& commandList = g_commandLists[g_frame]; + + if (g_renderTarget != nullptr && (flags & D3DCLEAR_TARGET) != 0) + commandList->clearColor(0, RenderColor(color[0], color[1], color[2], color[3])); + + if (g_depthStencil != nullptr && (flags & D3DCLEAR_ZBUFFER) != 0) + commandList->clearDepth(true, float(z)); +} + +static void SetViewport(GuestDevice* device, GuestViewport* viewport) +{ + SetDirtyValue(g_dirtyStates.viewport, g_viewport.x, viewport->x); + SetDirtyValue(g_dirtyStates.viewport, g_viewport.y, viewport->y); + SetDirtyValue(g_dirtyStates.viewport, g_viewport.width, viewport->width); + SetDirtyValue(g_dirtyStates.viewport, g_viewport.height, viewport->height); + SetDirtyValue(g_dirtyStates.viewport, g_viewport.minDepth, viewport->minZ); + SetDirtyValue(g_dirtyStates.viewport, g_viewport.maxDepth, viewport->maxZ); + + g_dirtyStates.scissorRect |= g_dirtyStates.viewport; +} + +static void GetViewport(GuestDevice* device, GuestViewport* viewport) +{ + viewport->x = g_viewport.x; + viewport->y = g_viewport.y; + viewport->width = g_viewport.width; + viewport->height = g_viewport.height; + viewport->minZ = g_viewport.minDepth; + viewport->maxZ = g_viewport.maxDepth; +} + +static void SetTexture(GuestDevice* device, uint32_t index, GuestTexture* texture) +{ + if (texture != nullptr) + g_commandLists[g_frame]->barriers(RenderBarrierStage::GRAPHICS, RenderTextureBarrier(texture->texture.get(), RenderTextureLayout::SHADER_READ)); + + SetDirtyValue(g_dirtyStates.sharedConstants, g_sharedConstants.textureIndices[index], texture != nullptr ? texture->descriptorIndex : NULL); +} + +static void SetScissorRect(GuestDevice* device, GuestRect* rect) +{ + SetDirtyValue(g_dirtyStates.scissorRect, g_scissorRect.top, rect->top); + SetDirtyValue(g_dirtyStates.scissorRect, g_scissorRect.left, rect->left); + SetDirtyValue(g_dirtyStates.scissorRect, g_scissorRect.bottom, rect->bottom); + SetDirtyValue(g_dirtyStates.scissorRect, g_scissorRect.right, rect->right); +} + +static RenderPipeline* CreateGraphicsPipeline(const PipelineState& pipelineState) +{ + auto& pipeline = g_pipelines[XXH3_64bits(&pipelineState, sizeof(PipelineState))]; + if (pipeline == nullptr) + { + RenderGraphicsPipelineDesc desc; + desc.pipelineLayout = g_pipelineLayout.get(); + desc.vertexShader = pipelineState.vertexShader->shader.get(); + desc.pixelShader = pipelineState.pixelShader != nullptr ? pipelineState.pixelShader->shader.get() : nullptr; + desc.depthFunction = pipelineState.zFunc; + desc.depthEnabled = pipelineState.zEnable; + desc.depthWriteEnabled = pipelineState.zWriteEnable; + desc.depthBias = pipelineState.depthBias; + desc.slopeScaledDepthBias = pipelineState.slopeScaledDepthBias; + desc.depthClipEnabled = true; + desc.primitiveTopology = pipelineState.primitiveTopology; + desc.cullMode = pipelineState.cullMode; + desc.renderTargetFormat[0] = pipelineState.renderTargetFormat; + desc.renderTargetBlend[0].blendEnabled = pipelineState.alphaBlendEnable; + desc.renderTargetBlend[0].srcBlend = pipelineState.srcBlend; + desc.renderTargetBlend[0].dstBlend = pipelineState.destBlend; + desc.renderTargetBlend[0].blendOp = pipelineState.blendOp; + desc.renderTargetBlend[0].srcBlendAlpha = pipelineState.srcBlendAlpha; + desc.renderTargetBlend[0].dstBlendAlpha = pipelineState.destBlendAlpha; + desc.renderTargetBlend[0].blendOpAlpha = pipelineState.blendOpAlpha; + desc.renderTargetBlend[0].renderTargetWriteMask = pipelineState.colorWriteEnable; + desc.renderTargetCount = pipelineState.renderTargetFormat != RenderFormat::UNKNOWN ? 1 : 0; + desc.depthTargetFormat = pipelineState.depthStencilFormat; + desc.inputElements = pipelineState.vertexDeclaration->inputElements.get(); + desc.inputElementsCount = pipelineState.vertexDeclaration->inputElementCount; + + RenderInputSlot inputSlots[16]{}; + uint32_t inputSlotIndices[16]{}; + uint32_t inputSlotCount = 0; + + for (size_t i = 0; i < pipelineState.vertexDeclaration->inputElementCount; i++) + { + auto& inputElement = pipelineState.vertexDeclaration->inputElements[i]; + auto& inputSlotIndex = inputSlotIndices[inputElement.slotIndex]; + + if (inputSlotIndex == NULL) + inputSlotIndex = ++inputSlotCount; + + auto& inputSlot = inputSlots[inputSlotIndex - 1]; + inputSlot.index = inputElement.slotIndex; + inputSlot.stride = pipelineState.vertexStrides[inputElement.slotIndex]; + + if (pipelineState.instancing && inputElement.slotIndex != 0 && inputElement.slotIndex != 15) + inputSlot.classification = RenderInputSlotClassification::PER_INSTANCE_DATA; + else + inputSlot.classification = RenderInputSlotClassification::PER_VERTEX_DATA; + } + + desc.inputSlots = inputSlots; + desc.inputSlotsCount = inputSlotCount; + + pipeline = g_device->createGraphicsPipeline(desc); + } + return pipeline.get(); +} + +static RenderTextureAddressMode ConvertTextureAddressMode(size_t value) +{ + switch (value) + { + case D3DTADDRESS_WRAP: + return RenderTextureAddressMode::WRAP; + case D3DTADDRESS_MIRROR: + return RenderTextureAddressMode::MIRROR; + case D3DTADDRESS_CLAMP: + return RenderTextureAddressMode::CLAMP; + case D3DTADDRESS_MIRRORONCE: + return RenderTextureAddressMode::MIRROR_ONCE; + case D3DTADDRESS_BORDER: + return RenderTextureAddressMode::BORDER; + default: + assert(false && "Unknown texture address mode"); + return RenderTextureAddressMode::UNKNOWN; + } +} + +static RenderFilter ConvertTextureFilter(uint32_t value) +{ + switch (value) + { + case D3DTEXF_POINT: + case D3DTEXF_NONE: + return RenderFilter::NEAREST; + case D3DTEXF_LINEAR: + return RenderFilter::LINEAR; + default: + assert(false && "Unknown texture filter"); + return RenderFilter::UNKNOWN; + } +} + +static RenderBorderColor ConvertBorderColor(uint32_t value) +{ + switch (value) + { + case 0: + return RenderBorderColor::TRANSPARENT_BLACK; + case 1: + return RenderBorderColor::OPAQUE_WHITE; + default: + assert(false && "Unknown border color"); + return RenderBorderColor::UNKNOWN; + } +} + +static void FlushRenderState(GuestDevice* device) +{ + FlushFramebuffer(); + + auto& commandList = g_commandLists[g_frame]; + + if (g_dirtyStates.viewport) + commandList->setViewports(g_viewport); + + if (g_dirtyStates.pipelineState) + commandList->setPipeline(CreateGraphicsPipeline(g_pipelineState)); + + constexpr size_t BOOL_MASK = 0x100000000000000ull; + if ((device->dirtyFlags[4].get() & BOOL_MASK) != 0) + { + uint32_t booleans = device->vertexShaderBoolConstants [0].get() & 0xFF; + booleans |= (device->pixelShaderBoolConstants[0].get() & 0xFF) << 16; + + SetDirtyValue(g_dirtyStates.sharedConstants, g_sharedConstants.booleans, booleans); + + device->dirtyFlags[4] = device->dirtyFlags[4].get() & ~BOOL_MASK; + } + + for (size_t i = 0; i < 16; i++) + { + const size_t mask = 0x8000000000000000ull >> (i + 32); + if (device->dirtyFlags[3].get() & mask) + { + const auto addressU = ConvertTextureAddressMode((device->samplerStates[i].data[0].get() >> 10) & 0x7); + const auto addressV = ConvertTextureAddressMode((device->samplerStates[i].data[0].get() >> 13) & 0x7); + const auto addressW = ConvertTextureAddressMode((device->samplerStates[i].data[0].get() >> 16) & 0x7); + const auto magFilter = ConvertTextureFilter((device->samplerStates[i].data[3].get() >> 19) & 0x3); + const auto minFilter = ConvertTextureFilter((device->samplerStates[i].data[3].get() >> 21) & 0x3); + const auto mipFilter = ConvertTextureFilter((device->samplerStates[i].data[3].get() >> 23) & 0x3); + const auto borderColor = ConvertBorderColor(device->samplerStates[i].data[5].get() & 0x3); + + auto& samplerDesc = g_samplerDescs[i]; + + bool dirty = false; + + SetDirtyValue(dirty, samplerDesc.addressU, addressU); + SetDirtyValue(dirty, samplerDesc.addressV, addressV); + SetDirtyValue(dirty, samplerDesc.addressW, addressW); + SetDirtyValue(dirty, samplerDesc.minFilter, minFilter); + SetDirtyValue(dirty, samplerDesc.magFilter, magFilter); + SetDirtyValue(dirty, samplerDesc.mipmapMode, RenderMipmapMode(mipFilter)); + SetDirtyValue(dirty, samplerDesc.borderColor, borderColor); + + if (dirty) + { + auto& [descriptorIndex, sampler] = g_samplerStates[XXH3_64bits(&samplerDesc, sizeof(RenderSamplerDesc))]; + if (descriptorIndex == NULL) + { + descriptorIndex = g_samplerStates.size(); + sampler = g_device->createSampler(samplerDesc); + + g_samplerDescriptorSet->setSampler(descriptorIndex, sampler.get()); + } + + SetDirtyValue(g_dirtyStates.sharedConstants, g_sharedConstants.samplerIndices[i], descriptorIndex); + } + } + } + + auto& uploadAllocator = g_uploadAllocators[g_frame]; + + if (g_dirtyStates.sharedConstants) + { + auto sharedConstants = uploadAllocator.allocate(&g_sharedConstants, sizeof(g_sharedConstants), 0x100); + commandList->setGraphicsRootDescriptor(sharedConstants, 2); + } + + if (g_dirtyStates.scissorRect) + { + const auto scissorRect = g_scissorTestEnable ? g_scissorRect : RenderRect( + g_viewport.x, + g_viewport.y, + g_viewport.x + g_viewport.width, + g_viewport.y + g_viewport.height); + + commandList->setScissors(scissorRect); + } + + if (g_dirtyStates.vertexShaderConstants || device->dirtyFlags[0] != 0) + { + auto vertexShaderConstants = uploadAllocator.allocate(device->vertexShaderFloatConstants, 0x1000, 0x100); + commandList->setGraphicsRootDescriptor(vertexShaderConstants, 0); + + device->dirtyFlags[0] = 0; + } + + if (g_dirtyStates.vertexStreamFirst <= g_dirtyStates.vertexStreamLast) + { + commandList->setVertexBuffers( + g_dirtyStates.vertexStreamFirst, + g_vertexBufferViews + g_dirtyStates.vertexStreamFirst, + g_dirtyStates.vertexStreamLast - g_dirtyStates.vertexStreamFirst + 1, + g_inputSlots + g_dirtyStates.vertexStreamFirst); + } + + if (g_dirtyStates.indices) + commandList->setIndexBuffer(&g_indexBufferView); + + if (g_dirtyStates.pixelShaderConstants || device->dirtyFlags[1] != 0) + { + auto pixelShaderConstants = uploadAllocator.allocate(device->pixelShaderFloatConstants, 0xE00, 0x100); + commandList->setGraphicsRootDescriptor(pixelShaderConstants, 1); + + device->dirtyFlags[1] = 0; + } + + g_dirtyStates = DirtyStates(false); +} + +static RenderPrimitiveTopology ConvertPrimitiveType(uint32_t primitiveType) +{ + switch (primitiveType) + { + case D3DPT_POINTLIST: + return RenderPrimitiveTopology::POINT_LIST; + case D3DPT_LINELIST: + return RenderPrimitiveTopology::LINE_LIST; + case D3DPT_LINESTRIP: + return RenderPrimitiveTopology::LINE_STRIP; + case D3DPT_TRIANGLELIST: + return RenderPrimitiveTopology::TRIANGLE_LIST; + case D3DPT_TRIANGLESTRIP: + return RenderPrimitiveTopology::TRIANGLE_STRIP; + default: + assert(false && "Unknown primitive type"); + return RenderPrimitiveTopology::UNKNOWN; + } +} + +static void SetPrimitiveType(uint32_t primitiveType) +{ + SetDirtyValue(g_dirtyStates.pipelineState, g_pipelineState.primitiveTopology, ConvertPrimitiveType(primitiveType)); +} + +static bool TemporarySkipRendering(uint32_t primitiveType) +{ + return primitiveType == D3DPT_QUADLIST || + primitiveType == D3DPT_TRIANGLEFAN || + g_pipelineState.vertexShader == nullptr || + g_pipelineState.vertexShader->shader == nullptr; +} + +static uint32_t CheckInstancing() +{ + uint32_t indexCount = 0; + + SetDirtyValue(g_dirtyStates.pipelineState, g_pipelineState.instancing, g_pipelineState.vertexDeclaration->indexVertexStream != 0); + if (g_pipelineState.instancing) + { + // Index buffer is passed as a vertex stream + indexCount = g_vertexBufferViews[g_pipelineState.vertexDeclaration->indexVertexStream].size / 4; + } + + return indexCount; +} + +static void DrawPrimitive(GuestDevice* device, uint32_t primitiveType, uint32_t startVertex, uint32_t primitiveCount) +{ + if (TemporarySkipRendering(primitiveType)) + return; + + SetPrimitiveType(primitiveType); + + uint32_t indexCount = CheckInstancing(); + if (indexCount > 0) + { + auto& vertexBufferView = g_vertexBufferViews[g_pipelineState.vertexDeclaration->indexVertexStream]; + + SetDirtyValue(g_dirtyStates.indices, g_indexBufferView.buffer, vertexBufferView.buffer); + SetDirtyValue(g_dirtyStates.indices, g_indexBufferView.size, vertexBufferView.size); + SetDirtyValue(g_dirtyStates.indices, g_indexBufferView.format, RenderFormat::R32_UINT); + } + + FlushRenderState(device); + + auto& commandList = g_commandLists[g_frame]; + + if (indexCount > 0) + commandList->drawIndexedInstanced(indexCount, primitiveCount / indexCount, 0, 0, 0); + else + commandList->drawInstanced(primitiveCount, 1, startVertex, 0); +} + +static void DrawIndexedPrimitive(GuestDevice* device, uint32_t primitiveType, int32_t baseVertexIndex, uint32_t startIndex, uint32_t primCount) +{ + if (TemporarySkipRendering(primitiveType)) + return; + + CheckInstancing(); + SetPrimitiveType(primitiveType); + FlushRenderState(device); + g_commandLists[g_frame]->drawIndexedInstanced(primCount, 1, startIndex, baseVertexIndex, 0); +} + +static void DrawPrimitiveUP(GuestDevice* device, uint32_t primitiveType, uint32_t primitiveCount, void* vertexStreamZeroData, uint32_t vertexStreamZeroStride) +{ + if (TemporarySkipRendering(primitiveType)) + return; + + CheckInstancing(); + SetPrimitiveType(primitiveType); + SetDirtyValue(g_dirtyStates.pipelineState, g_pipelineState.vertexStrides[0], uint8_t(vertexStreamZeroStride)); + + auto& vertexBufferView = g_vertexBufferViews[0]; + vertexBufferView.size = primitiveCount * vertexStreamZeroStride; + vertexBufferView.buffer = g_uploadAllocators[g_frame].allocate(reinterpret_cast(vertexStreamZeroData), vertexBufferView.size, 0x4); + g_inputSlots[0].stride = vertexStreamZeroStride; + g_dirtyStates.vertexStreamFirst = 0; + + FlushRenderState(device); + g_commandLists[g_frame]->drawInstanced(primitiveCount, 1, 0, 0); +} + +static const char* ConvertDeclUsage(uint32_t usage) +{ + switch (usage) + { + case D3DDECLUSAGE_POSITION: + return "POSITION"; + case D3DDECLUSAGE_BLENDWEIGHT: + return "BLENDWEIGHT"; + case D3DDECLUSAGE_BLENDINDICES: + return "BLENDINDICES"; + case D3DDECLUSAGE_NORMAL: + return "NORMAL"; + case D3DDECLUSAGE_PSIZE: + return "PSIZE"; + case D3DDECLUSAGE_TEXCOORD: + return "TEXCOORD"; + case D3DDECLUSAGE_TANGENT: + return "TANGENT"; + case D3DDECLUSAGE_BINORMAL: + return "BINORMAL"; + case D3DDECLUSAGE_TESSFACTOR: + return "TESSFACTOR"; + case D3DDECLUSAGE_POSITIONT: + return "POSITIONT"; + case D3DDECLUSAGE_COLOR: + return "COLOR"; + case D3DDECLUSAGE_FOG: + return "FOG"; + case D3DDECLUSAGE_DEPTH: + return "DEPTH"; + case D3DDECLUSAGE_SAMPLE: + return "SAMPLE"; + default: + assert(false && "Unknown usage"); + return "UNKNOWN"; + } +} + +static RenderFormat ConvertDeclType(uint32_t type) +{ + switch (type) + { + case D3DDECLTYPE_FLOAT1: + return RenderFormat::R32_FLOAT; + case D3DDECLTYPE_FLOAT2: + return RenderFormat::R32G32_FLOAT; + case D3DDECLTYPE_FLOAT3: + return RenderFormat::R32G32B32_FLOAT; + case D3DDECLTYPE_FLOAT4: + return RenderFormat::R32G32B32A32_FLOAT; + case D3DDECLTYPE_D3DCOLOR: + return RenderFormat::B8G8R8A8_UNORM; + case D3DDECLTYPE_UBYTE4: + case D3DDECLTYPE_UBYTE4_2: + return RenderFormat::R8G8B8A8_UINT; + case D3DDECLTYPE_SHORT2: + return RenderFormat::R16G16_SINT; + case D3DDECLTYPE_SHORT4: + return RenderFormat::R16G16B16A16_SINT; + case D3DDECLTYPE_UBYTE4N: + case D3DDECLTYPE_UBYTE4N_2: + return RenderFormat::R8G8B8A8_UNORM; + case D3DDECLTYPE_SHORT2N: + return RenderFormat::R16G16_SNORM; + case D3DDECLTYPE_SHORT4N: + return RenderFormat::R16G16B16A16_SNORM; + case D3DDECLTYPE_USHORT2N: + return RenderFormat::R16G16_UNORM; + case D3DDECLTYPE_USHORT4N: + return RenderFormat::R16G16B16A16_UNORM; + case D3DDECLTYPE_UINT1: + return RenderFormat::R32_UINT; + case D3DDECLTYPE_DEC3N_2: + case D3DDECLTYPE_DEC3N_3: + return RenderFormat::R32_UINT; + case D3DDECLTYPE_FLOAT16_2: + return RenderFormat::R16G16_FLOAT; + case D3DDECLTYPE_FLOAT16_4: + return RenderFormat::R16G16B16A16_FLOAT; + default: + assert(false && "Unknown type"); + return RenderFormat::UNKNOWN; + } +} + +static GuestVertexDeclaration* CreateVertexDeclaration(GuestVertexElement* vertexElements) +{ + size_t vertexElementCount = 0; + auto vertexElement = vertexElements; + + while (vertexElement->stream != 0xFF && vertexElement->type != D3DDECLTYPE_UNUSED) + { + vertexElement->padding = 0; + ++vertexElement; + ++vertexElementCount; + } + + std::lock_guard lock(g_vertexDeclarationMutex); + + auto& vertexDeclaration = g_vertexDeclarations[ + XXH3_64bits(vertexElements, vertexElementCount * sizeof(GuestVertexElement))]; + + if (vertexDeclaration == nullptr) + { + vertexDeclaration = g_userHeap.AllocPhysical(ResourceType::VertexDeclaration); + + static std::vector inputElements; + inputElements.clear(); + + vertexElement = vertexElements; + while (vertexElement->stream != 0xFF && vertexElement->type != D3DDECLTYPE_UNUSED) + { + auto& inputElement = inputElements.emplace_back(); + + inputElement.semanticName = ConvertDeclUsage(vertexElement->usage); + inputElement.semanticIndex = vertexElement->usageIndex; + inputElement.location = (vertexElement->usage * 4) + vertexElement->usageIndex; + inputElement.format = ConvertDeclType(vertexElement->type); + inputElement.slotIndex = vertexElement->stream; + inputElement.alignedByteOffset = vertexElement->offset; + + switch (vertexElement->usage) + { + case D3DDECLUSAGE_POSITION: + if (vertexElement->usageIndex == 1) + vertexDeclaration->indexVertexStream = vertexElement->stream; + break; + + case D3DDECLUSAGE_BLENDWEIGHT: + case D3DDECLUSAGE_BLENDINDICES: + vertexDeclaration->inputLayoutFlags |= INPUT_LAYOUT_FLAG_HAS_BONE_WEIGHTS; + break; + + case D3DDECLUSAGE_NORMAL: + case D3DDECLUSAGE_TANGENT: + case D3DDECLUSAGE_BINORMAL: + if (vertexElement->type == D3DDECLTYPE_FLOAT3) + inputElement.format = RenderFormat::R32G32B32_UINT; + else + vertexDeclaration->inputLayoutFlags |= INPUT_LAYOUT_FLAG_HAS_R11G11B10_NORMAL; + break; + + case D3DDECLUSAGE_TEXCOORD: + switch (vertexElement->type) + { + case D3DDECLTYPE_SHORT2: + case D3DDECLTYPE_SHORT4: + case D3DDECLTYPE_SHORT2N: + case D3DDECLTYPE_SHORT4N: + case D3DDECLTYPE_USHORT2N: + case D3DDECLTYPE_USHORT4N: + case D3DDECLTYPE_FLOAT16_2: + case D3DDECLTYPE_FLOAT16_4: + vertexDeclaration->swappedTexcoords |= 1 << vertexElement->usageIndex; + break; + } + + break; + } + + ++vertexElement; + } + + auto addInputElement = [&](uint32_t usage, uint32_t usageIndex) + { + uint32_t location = (usage * 4) + usageIndex; + + for (auto& inputElement : inputElements) + { + if (inputElement.location == location) + return; + } + + auto format = RenderFormat::R32_FLOAT; + switch (usage) + { + case D3DDECLUSAGE_NORMAL: + case D3DDECLUSAGE_TANGENT: + case D3DDECLUSAGE_BINORMAL: + case D3DDECLUSAGE_BLENDINDICES: + format = RenderFormat::R32_UINT; + break; + } + + inputElements.emplace_back(ConvertDeclUsage(usage), usageIndex, location, format, 15, 0); + }; + + addInputElement(D3DDECLUSAGE_POSITION, 0); + addInputElement(D3DDECLUSAGE_NORMAL, 0); + addInputElement(D3DDECLUSAGE_TANGENT, 0); + addInputElement(D3DDECLUSAGE_BINORMAL, 0); + addInputElement(D3DDECLUSAGE_TEXCOORD, 0); + addInputElement(D3DDECLUSAGE_TEXCOORD, 1); + addInputElement(D3DDECLUSAGE_TEXCOORD, 2); + addInputElement(D3DDECLUSAGE_TEXCOORD, 3); + addInputElement(D3DDECLUSAGE_COLOR, 0); + addInputElement(D3DDECLUSAGE_COLOR, 1); + addInputElement(D3DDECLUSAGE_BLENDWEIGHT, 0); + addInputElement(D3DDECLUSAGE_BLENDINDICES, 0); + + vertexDeclaration->inputElements = std::make_unique(inputElements.size()); + std::copy(inputElements.begin(), inputElements.end(), vertexDeclaration->inputElements.get()); + + vertexDeclaration->vertexElements = std::make_unique(vertexElementCount + 1); + std::copy(vertexElements, vertexElements + vertexElementCount + 1, vertexDeclaration->vertexElements.get()); + + vertexDeclaration->inputElementCount = uint32_t(inputElements.size()); + vertexDeclaration->vertexElementCount = vertexElementCount + 1; + } + + vertexDeclaration->AddRef(); + return vertexDeclaration; +} + +static void SetVertexDeclaration(GuestDevice* device, GuestVertexDeclaration* vertexDeclaration) +{ + if (vertexDeclaration != nullptr) + { + SetDirtyValue(g_dirtyStates.sharedConstants, g_sharedConstants.swappedTexcoords, vertexDeclaration->swappedTexcoords); + SetDirtyValue(g_dirtyStates.sharedConstants, g_sharedConstants.inputLayoutFlags, vertexDeclaration->inputLayoutFlags); + } + SetDirtyValue(g_dirtyStates.pipelineState, g_pipelineState.vertexDeclaration, vertexDeclaration); + device->vertexDeclaration = vertexDeclaration; +} + +static GuestShader* CreateVertexShader(const uint32_t* function) +{ + auto vertexShader = g_userHeap.AllocPhysical(ResourceType::VertexShader); + if (*function == 0x43425844) + vertexShader->shader = g_device->createShader(function, function[6], "main", RenderShaderFormat::DXIL); + + return vertexShader; +} + +static void SetVertexShader(GuestDevice* device, GuestShader* shader) +{ + SetDirtyValue(g_dirtyStates.pipelineState, g_pipelineState.vertexShader, shader); +} + +static void SetStreamSource(GuestDevice* device, uint32_t index, GuestBuffer* buffer, uint32_t offset, uint32_t stride) +{ + SetDirtyValue(g_dirtyStates.pipelineState, g_pipelineState.vertexStrides[index], uint8_t(buffer != nullptr ? stride : 0)); + + bool dirty = false; + + SetDirtyValue(dirty, g_vertexBufferViews[index].buffer, buffer != nullptr ? buffer->buffer->at(offset) : RenderBufferReference{}); + SetDirtyValue(dirty, g_vertexBufferViews[index].size, buffer != nullptr ? (buffer->dataSize - offset) : 0u); + SetDirtyValue(dirty, g_inputSlots[index].stride, buffer != nullptr ? stride : 0u); + + if (dirty) + { + g_dirtyStates.vertexStreamFirst = std::min(g_dirtyStates.vertexStreamFirst, index); + g_dirtyStates.vertexStreamLast = std::max(g_dirtyStates.vertexStreamLast, index); + } +} + +static void SetIndices(GuestDevice* device, GuestBuffer* buffer) +{ + SetDirtyValue(g_dirtyStates.indices, g_indexBufferView.buffer, buffer != nullptr ? buffer->buffer->at(0) : RenderBufferReference{}); + SetDirtyValue(g_dirtyStates.indices, g_indexBufferView.format, buffer != nullptr ? buffer->format : RenderFormat::UNKNOWN); + SetDirtyValue(g_dirtyStates.indices, g_indexBufferView.size, buffer != nullptr ? buffer->dataSize : 0u); +} + +static GuestShader* CreatePixelShader(const uint32_t* function) +{ + auto pixelShader = g_userHeap.AllocPhysical(ResourceType::PixelShader); + if (*function == 0x43425844) + pixelShader->shader = g_device->createShader(function, function[6], "main", RenderShaderFormat::DXIL); + + return pixelShader; +} + +static void SetPixelShader(GuestDevice* device, GuestShader* shader) +{ + SetDirtyValue(g_dirtyStates.pipelineState, g_pipelineState.pixelShader, shader); +} + +static void D3DXFillVolumeTexture(GuestTexture* texture, uint32_t function, void* data) +{ + uint32_t rowPitch0 = (texture->width * 4 + PITCH_ALIGNMENT - 1) & ~(PITCH_ALIGNMENT - 1); + uint32_t slicePitch0 = (rowPitch0 * texture->height * texture->depth + PLACEMENT_ALIGNMENT - 1) & ~(PLACEMENT_ALIGNMENT - 1); + + uint32_t rowPitch1 = ((texture->width / 2) * 4 + PITCH_ALIGNMENT - 1) & ~(PITCH_ALIGNMENT - 1); + uint32_t slicePitch1 = (rowPitch1 * (texture->height / 2) * (texture->depth / 2) + PLACEMENT_ALIGNMENT - 1) & ~(PLACEMENT_ALIGNMENT - 1); + + auto uploadBuffer = g_device->createBuffer(RenderBufferDesc::UploadBuffer(slicePitch0 + slicePitch1)); + uint8_t* mappedData = reinterpret_cast(uploadBuffer->map()); + + thread_local std::vector mipData; + mipData.resize((texture->width / 2) * (texture->height / 2) * (texture->depth / 2) * 4); + memset(mipData.data(), 0, mipData.size() * sizeof(float)); + + for (size_t z = 0; z < texture->depth; z++) + { + for (size_t y = 0; y < texture->height; y++) + { + for (size_t x = 0; x < texture->width; x++) + { + auto dest = mappedData + z * rowPitch0 * texture->height + y * rowPitch0 + x * sizeof(uint32_t); + size_t index = z * texture->width * texture->height + y * texture->width + x; + size_t mipIndex = ((z / 2) * (texture->width / 2) * (texture->height / 2) + (y / 2) * (texture->width / 2) + x / 2) * 4; + + if (function == 0x82BC7820) + { + auto src = reinterpret_cast*>(data) + index * 4; + + float r = static_cast(src[0] * 255.0f); + float g = static_cast(src[1] * 255.0f); + float b = static_cast(src[2] * 255.0f); + float a = static_cast(src[3] * 255.0f); + + dest[0] = r; + dest[1] = g; + dest[2] = b; + dest[3] = a; + + mipData[mipIndex + 0] += r; + mipData[mipIndex + 1] += g; + mipData[mipIndex + 2] += b; + mipData[mipIndex + 3] += a; + } + else if (function == 0x82BC78A8) + { + auto src = reinterpret_cast(data) + index * 4; + + dest[0] = src[3]; + dest[1] = src[2]; + dest[2] = src[1]; + dest[3] = src[0]; + + mipData[mipIndex + 0] += src[3]; + mipData[mipIndex + 1] += src[2]; + mipData[mipIndex + 2] += src[1]; + mipData[mipIndex + 3] += src[0]; + } + } + } + } + + for (size_t z = 0; z < texture->depth / 2; z++) + { + for (size_t y = 0; y < texture->height / 2; y++) + { + for (size_t x = 0; x < texture->width / 2; x++) + { + auto dest = mappedData + slicePitch0 + z * rowPitch1 * (texture->height / 2) + y * rowPitch1 + x * sizeof(uint32_t); + size_t index = (z * (texture->width / 2) * (texture->height / 2) + y * (texture->width / 2) + x) * 4; + + dest[0] = static_cast(mipData[index + 0] / 8.0f); + dest[1] = static_cast(mipData[index + 1] / 8.0f); + dest[2] = static_cast(mipData[index + 2] / 8.0f); + dest[3] = static_cast(mipData[index + 3] / 8.0f); + } + } + } + + ExecuteCopyCommandList([&] + { + g_copyCommandList->copyTextureRegion( + RenderTextureCopyLocation::Subresource(texture->texture.get(), 0), + RenderTextureCopyLocation::PlacedFootprint(uploadBuffer.get(), texture->format, texture->width, texture->height, texture->depth, rowPitch0 / RenderFormatSize(texture->format), 0)); + + g_copyCommandList->copyTextureRegion( + RenderTextureCopyLocation::Subresource(texture->texture.get(), 1), + RenderTextureCopyLocation::PlacedFootprint(uploadBuffer.get(), texture->format, texture->width / 2, texture->height / 2, texture->depth / 2, rowPitch1 / RenderFormatSize(texture->format), slicePitch0)); + }); +} + +struct GuestPictureData +{ + be vtable; + uint8_t flags; + be name; + be texture; + be type; +}; + +static RenderTextureDimension ConvertTextureDimension(ddspp::TextureType type) +{ + switch (type) + { + case ddspp::Texture1D: + return RenderTextureDimension::TEXTURE_1D; + case ddspp::Texture2D: + case ddspp::Cubemap: + return RenderTextureDimension::TEXTURE_2D; + case ddspp::Texture3D: + return RenderTextureDimension::TEXTURE_3D; + default: + assert(false && "Unknown texture type from DDS."); + return RenderTextureDimension::UNKNOWN; + } +} + +static RenderTextureViewDimension ConvertTextureViewDimension(ddspp::TextureType type) +{ + switch (type) + { + case ddspp::Texture1D: + return RenderTextureViewDimension::TEXTURE_1D; + case ddspp::Texture2D: + return RenderTextureViewDimension::TEXTURE_2D; + case ddspp::Texture3D: + return RenderTextureViewDimension::TEXTURE_3D; + case ddspp::Cubemap: + return RenderTextureViewDimension::TEXTURE_CUBE; + default: + assert(false && "Unknown texture type from DDS."); + return RenderTextureViewDimension::UNKNOWN; + } +} + +static RenderFormat ConvertDXGIFormat(ddspp::DXGIFormat format) +{ + switch (format) + { + case ddspp::R32G32B32A32_TYPELESS: + return RenderFormat::R32G32B32A32_TYPELESS; + case ddspp::R32G32B32A32_FLOAT: + return RenderFormat::R32G32B32A32_FLOAT; + case ddspp::R32G32B32A32_UINT: + return RenderFormat::R32G32B32A32_UINT; + case ddspp::R32G32B32A32_SINT: + return RenderFormat::R32G32B32A32_SINT; + case ddspp::R32G32B32_TYPELESS: + return RenderFormat::R32G32B32_TYPELESS; + case ddspp::R32G32B32_FLOAT: + return RenderFormat::R32G32B32_FLOAT; + case ddspp::R32G32B32_UINT: + return RenderFormat::R32G32B32_UINT; + case ddspp::R32G32B32_SINT: + return RenderFormat::R32G32B32_SINT; + case ddspp::R16G16B16A16_TYPELESS: + return RenderFormat::R16G16B16A16_TYPELESS; + case ddspp::R16G16B16A16_FLOAT: + return RenderFormat::R16G16B16A16_FLOAT; + case ddspp::R16G16B16A16_UNORM: + return RenderFormat::R16G16B16A16_UNORM; + case ddspp::R16G16B16A16_UINT: + return RenderFormat::R16G16B16A16_UINT; + case ddspp::R16G16B16A16_SNORM: + return RenderFormat::R16G16B16A16_SNORM; + case ddspp::R16G16B16A16_SINT: + return RenderFormat::R16G16B16A16_SINT; + case ddspp::R32G32_TYPELESS: + return RenderFormat::R32G32_TYPELESS; + case ddspp::R32G32_FLOAT: + return RenderFormat::R32G32_FLOAT; + case ddspp::R32G32_UINT: + return RenderFormat::R32G32_UINT; + case ddspp::R32G32_SINT: + return RenderFormat::R32G32_SINT; + case ddspp::R8G8B8A8_TYPELESS: + return RenderFormat::R8G8B8A8_TYPELESS; + case ddspp::R8G8B8A8_UNORM: + return RenderFormat::R8G8B8A8_UNORM; + case ddspp::R8G8B8A8_UINT: + return RenderFormat::R8G8B8A8_UINT; + case ddspp::R8G8B8A8_SNORM: + return RenderFormat::R8G8B8A8_SNORM; + case ddspp::R8G8B8A8_SINT: + return RenderFormat::R8G8B8A8_SINT; + case ddspp::B8G8R8A8_UNORM: + return RenderFormat::B8G8R8A8_UNORM; + case ddspp::R16G16_TYPELESS: + return RenderFormat::R16G16_TYPELESS; + case ddspp::R16G16_FLOAT: + return RenderFormat::R16G16_FLOAT; + case ddspp::R16G16_UNORM: + return RenderFormat::R16G16_UNORM; + case ddspp::R16G16_UINT: + return RenderFormat::R16G16_UINT; + case ddspp::R16G16_SNORM: + return RenderFormat::R16G16_SNORM; + case ddspp::R16G16_SINT: + return RenderFormat::R16G16_SINT; + case ddspp::R32_TYPELESS: + return RenderFormat::R32_TYPELESS; + case ddspp::D32_FLOAT: + return RenderFormat::D32_FLOAT; + case ddspp::R32_FLOAT: + return RenderFormat::R32_FLOAT; + case ddspp::R32_UINT: + return RenderFormat::R32_UINT; + case ddspp::R32_SINT: + return RenderFormat::R32_SINT; + case ddspp::R8G8_TYPELESS: + return RenderFormat::R8G8_TYPELESS; + case ddspp::R8G8_UNORM: + return RenderFormat::R8G8_UNORM; + case ddspp::R8G8_UINT: + return RenderFormat::R8G8_UINT; + case ddspp::R8G8_SNORM: + return RenderFormat::R8G8_SNORM; + case ddspp::R8G8_SINT: + return RenderFormat::R8G8_SINT; + case ddspp::R16_TYPELESS: + return RenderFormat::R16_TYPELESS; + case ddspp::R16_FLOAT: + return RenderFormat::R16_FLOAT; + case ddspp::D16_UNORM: + return RenderFormat::D16_UNORM; + case ddspp::R16_UNORM: + return RenderFormat::R16_UNORM; + case ddspp::R16_UINT: + return RenderFormat::R16_UINT; + case ddspp::R16_SNORM: + return RenderFormat::R16_SNORM; + case ddspp::R16_SINT: + return RenderFormat::R16_SINT; + case ddspp::R8_TYPELESS: + return RenderFormat::R8_TYPELESS; + case ddspp::R8_UNORM: + return RenderFormat::R8_UNORM; + case ddspp::R8_UINT: + return RenderFormat::R8_UINT; + case ddspp::R8_SNORM: + return RenderFormat::R8_SNORM; + case ddspp::R8_SINT: + return RenderFormat::R8_SINT; + case ddspp::BC1_TYPELESS: + return RenderFormat::BC1_TYPELESS; + case ddspp::BC1_UNORM: + return RenderFormat::BC1_UNORM; + case ddspp::BC1_UNORM_SRGB: + return RenderFormat::BC1_UNORM_SRGB; + case ddspp::BC2_TYPELESS: + return RenderFormat::BC2_TYPELESS; + case ddspp::BC2_UNORM: + return RenderFormat::BC2_UNORM; + case ddspp::BC2_UNORM_SRGB: + return RenderFormat::BC2_UNORM_SRGB; + case ddspp::BC3_TYPELESS: + return RenderFormat::BC3_TYPELESS; + case ddspp::BC3_UNORM: + return RenderFormat::BC3_UNORM; + case ddspp::BC3_UNORM_SRGB: + return RenderFormat::BC3_UNORM_SRGB; + case ddspp::BC4_TYPELESS: + return RenderFormat::BC4_TYPELESS; + case ddspp::BC4_UNORM: + return RenderFormat::BC4_UNORM; + case ddspp::BC4_SNORM: + return RenderFormat::BC4_SNORM; + case ddspp::BC5_TYPELESS: + return RenderFormat::BC5_TYPELESS; + case ddspp::BC5_UNORM: + return RenderFormat::BC5_UNORM; + case ddspp::BC5_SNORM: + return RenderFormat::BC5_SNORM; + case ddspp::BC6H_TYPELESS: + return RenderFormat::BC6H_TYPELESS; + case ddspp::BC6H_UF16: + return RenderFormat::BC6H_UF16; + case ddspp::BC6H_SF16: + return RenderFormat::BC6H_SF16; + case ddspp::BC7_TYPELESS: + return RenderFormat::BC7_TYPELESS; + case ddspp::BC7_UNORM: + return RenderFormat::BC7_UNORM; + case ddspp::BC7_UNORM_SRGB: + return RenderFormat::BC7_UNORM_SRGB; + default: + assert(false && "Unsupported format from DDS."); + return RenderFormat::UNKNOWN; + } +} + +static void MakePictureData(GuestPictureData* pictureData, uint8_t* data, uint32_t dataSize) +{ + if ((pictureData->flags & 0x1) == 0) + { + ddspp::Descriptor ddsDesc; + if (ddspp::decode_header(data, ddsDesc) != ddspp::Error) + { + const auto texture = g_userHeap.AllocPhysical(ResourceType::Texture); + + RenderTextureDesc desc; + desc.dimension = ConvertTextureDimension(ddsDesc.type); + desc.width = ddsDesc.width; + desc.height = ddsDesc.height; + desc.depth = ddsDesc.depth; + desc.mipLevels = ddsDesc.numMips; + desc.arraySize = ddsDesc.type == ddspp::TextureType::Cubemap ? ddsDesc.arraySize * 6 : ddsDesc.arraySize; + desc.format = ConvertDXGIFormat(ddsDesc.format); + texture->texture = g_device->createTexture(desc); +#ifdef _DEBUG + texture->texture->setName(reinterpret_cast(g_memory.Translate(pictureData->name + 2))); +#endif + + RenderTextureViewDesc viewDesc; + viewDesc.format = desc.format; + viewDesc.dimension = ConvertTextureViewDimension(ddsDesc.type); + viewDesc.mipLevels = ddsDesc.numMips; + texture->textureView = texture->texture->createTextureView(viewDesc); + texture->descriptorIndex = g_textureDescriptorAllocator.allocate(); + g_textureDescriptorSet->setTexture(texture->descriptorIndex, texture->texture.get(), RenderTextureLayout::SHADER_READ, texture->textureView.get()); + + struct Slice + { + uint32_t width; + uint32_t height; + uint32_t depth; + uint32_t srcOffset; + uint32_t dstOffset; + uint32_t srcRowPitch; + uint32_t dstRowPitch; + uint32_t rowCount; + }; + + std::vector slices; + uint32_t curSrcOffset = 0; + uint32_t curDstOffset = 0; + + for (uint32_t arraySlice = 0; arraySlice < desc.arraySize; arraySlice++) + { + for (uint32_t mipSlice = 0; mipSlice < ddsDesc.numMips; mipSlice++) + { + auto& slice = slices.emplace_back(); + + slice.width = std::max(1u, ddsDesc.width >> mipSlice); + slice.height = std::max(1u, ddsDesc.height >> mipSlice); + slice.depth = std::max(1u, ddsDesc.depth >> mipSlice); + slice.srcOffset = curSrcOffset; + slice.dstOffset = curDstOffset; + uint32_t rowPitch = ((slice.width + ddsDesc.blockWidth - 1) / ddsDesc.blockWidth) * ddsDesc.bitsPerPixelOrBlock; + slice.srcRowPitch = (rowPitch + 7) / 8; + slice.dstRowPitch = (slice.srcRowPitch + PITCH_ALIGNMENT - 1) & ~(PITCH_ALIGNMENT - 1); + slice.rowCount = (slice.height + ddsDesc.blockHeight - 1) / ddsDesc.blockHeight; + + curSrcOffset += slice.srcRowPitch * slice.rowCount * slice.depth; + curDstOffset += (slice.dstRowPitch * slice.rowCount * slice.depth + PLACEMENT_ALIGNMENT - 1) & ~(PLACEMENT_ALIGNMENT - 1); + } + } + + auto uploadBuffer = g_device->createBuffer(RenderBufferDesc::UploadBuffer(curDstOffset)); + uint8_t* mappedMemory = reinterpret_cast(uploadBuffer->map()); + + for (auto& slice : slices) + { + uint8_t* srcData = data + ddsDesc.headerSize + slice.srcOffset; + uint8_t* dstData = mappedMemory + slice.dstOffset; + + if (slice.srcRowPitch == slice.dstRowPitch) + { + memcpy(dstData, srcData, slice.srcRowPitch * slice.rowCount * slice.depth); + } + else + { + for (size_t i = 0; i < slice.rowCount * slice.depth; i++) + { + memcpy(dstData, srcData, slice.srcRowPitch); + srcData += slice.srcRowPitch; + dstData += slice.dstRowPitch; + } + } + } + + uploadBuffer->unmap(); + + ExecuteCopyCommandList([&] + { + for (size_t i = 0; i < slices.size(); i++) + { + auto& slice = slices[i]; + + g_copyCommandList->copyTextureRegion( + RenderTextureCopyLocation::Subresource(texture->texture.get(), i), + RenderTextureCopyLocation::PlacedFootprint(uploadBuffer.get(), desc.format, slice.width, slice.height, slice.depth, (slice.dstRowPitch * 8) / ddsDesc.bitsPerPixelOrBlock * ddsDesc.blockWidth, slice.dstOffset)); + } + }); + + pictureData->texture = g_memory.MapVirtual(texture); + pictureData->type = 0; + } + + pictureData->flags |= 0x1; + } +} + +void HalfPixelOffsetMidAsmHook(PPCRegister& f9, PPCRegister& f0) +{ + f9.f64 = 0.0; + f0.f64 = 0.0; +} + +void IndexBufferLengthMidAsmHook(PPCRegister& r3) +{ + r3.u64 *= 2; +} + +void SetShadowResolutionMidAsmHook(PPCRegister& r11) +{ + r11.u64 = 4096; +} + +static void SetResolution(be* device) +{ + device[46] = 1920; + device[47] = 1080; +} + +static uint32_t StubFunction() +{ + return 0; +} + +GUEST_FUNCTION_HOOK(sub_82BD99B0, CreateDevice); + +GUEST_FUNCTION_HOOK(sub_82BE6230, DestructResource); + +GUEST_FUNCTION_HOOK(sub_82BE9300, LockTextureRect); +GUEST_FUNCTION_HOOK(sub_82BE7780, UnlockTextureRect); + +GUEST_FUNCTION_HOOK(sub_82BE6B98, LockVertexBuffer); +GUEST_FUNCTION_HOOK(sub_82BE6BE8, UnlockVertexBuffer); +GUEST_FUNCTION_HOOK(sub_82BE61D0, GetVertexBufferDesc); + +GUEST_FUNCTION_HOOK(sub_82BE6CA8, LockIndexBuffer); +GUEST_FUNCTION_HOOK(sub_82BE6CF0, UnlockIndexBuffer); +GUEST_FUNCTION_HOOK(sub_82BE6200, GetIndexBufferDesc); + +GUEST_FUNCTION_HOOK(sub_82BE96F0, GetSurfaceDesc); + +GUEST_FUNCTION_HOOK(sub_82BE04B0, GetVertexDeclaration); +GUEST_FUNCTION_HOOK(sub_82BE0530, HashVertexDeclaration); + +GUEST_FUNCTION_HOOK(sub_82BDA8C0, Present); +GUEST_FUNCTION_HOOK(sub_82BDD330, GetBackBuffer); + +GUEST_FUNCTION_HOOK(sub_82BE9498, CreateTexture); +GUEST_FUNCTION_HOOK(sub_82BE6AD0, CreateVertexBuffer); +GUEST_FUNCTION_HOOK(sub_82BE6BF8, CreateIndexBuffer); +GUEST_FUNCTION_HOOK(sub_82BE95B8, CreateSurface); + +GUEST_FUNCTION_HOOK(sub_82BF6400, StretchRect); + +GUEST_FUNCTION_HOOK(sub_82BDD9F0, SetRenderTarget); +GUEST_FUNCTION_HOOK(sub_82BDD2F0, GetDepthStencilSurface); +GUEST_FUNCTION_HOOK(sub_82BDDD38, SetDepthStencilSurface); + +GUEST_FUNCTION_HOOK(sub_82BFE4C8, Clear); + +GUEST_FUNCTION_HOOK(sub_82BDD8C0, SetViewport); +GUEST_FUNCTION_HOOK(sub_82BDD0A8, GetViewport); + +GUEST_FUNCTION_HOOK(sub_82BE9818, SetTexture); +GUEST_FUNCTION_HOOK(sub_82BDCFB0, SetScissorRect); + +GUEST_FUNCTION_HOOK(sub_82BE5900, DrawPrimitive); +GUEST_FUNCTION_HOOK(sub_82BE5CF0, DrawIndexedPrimitive); +GUEST_FUNCTION_HOOK(sub_82BE52F8, DrawPrimitiveUP); + +GUEST_FUNCTION_HOOK(sub_82BE0428, CreateVertexDeclaration); +GUEST_FUNCTION_HOOK(sub_82BE02E0, SetVertexDeclaration); + +GUEST_FUNCTION_HOOK(sub_82BE1A80, CreateVertexShader); +GUEST_FUNCTION_HOOK(sub_82BE0110, SetVertexShader); + +GUEST_FUNCTION_HOOK(sub_82BDD0F8, SetStreamSource); +GUEST_FUNCTION_HOOK(sub_82BDD218, SetIndices); + +GUEST_FUNCTION_HOOK(sub_82BE1990, CreatePixelShader); +GUEST_FUNCTION_HOOK(sub_82BDFE58, SetPixelShader); + +GUEST_FUNCTION_HOOK(sub_82C00910, D3DXFillVolumeTexture); + +GUEST_FUNCTION_HOOK(sub_82E43FC8, MakePictureData); + +GUEST_FUNCTION_HOOK(sub_82E9EE38, SetResolution); +GUEST_FUNCTION_HOOK(sub_82BE77B0, StubFunction); + +GUEST_FUNCTION_STUB(sub_822C15D8); +GUEST_FUNCTION_STUB(sub_822C1810); +GUEST_FUNCTION_STUB(sub_82BD97A8); +GUEST_FUNCTION_STUB(sub_82BD97E8); +GUEST_FUNCTION_STUB(sub_82BDD370); // SetGammaRamp GUEST_FUNCTION_STUB(sub_82BE05B8); - -// Movie player stubs -GUEST_FUNCTION_STUB(sub_82AE3638); -GUEST_FUNCTION_STUB(sub_82AE2BF8); +GUEST_FUNCTION_STUB(sub_82BE9C98); +GUEST_FUNCTION_STUB(sub_82BEA308); +GUEST_FUNCTION_STUB(sub_82CD5D68); +GUEST_FUNCTION_STUB(sub_82BE9B28); +GUEST_FUNCTION_STUB(sub_82BEA018); +GUEST_FUNCTION_STUB(sub_82BEA7C0); +GUEST_FUNCTION_STUB(sub_82BFFF88); // D3DXFilterTexture +GUEST_FUNCTION_STUB(sub_82E9EF90); // D3DXFillTexture diff --git a/UnleashedRecomp/gpu/video.h b/UnleashedRecomp/gpu/video.h index 9ab4054..70e4339 100644 --- a/UnleashedRecomp/gpu/video.h +++ b/UnleashedRecomp/gpu/video.h @@ -1,4 +1,338 @@ #pragma once -void VdInitializeSystem(); -SWA_API void* VdGetGlobalDevice(); +#include "rhi/rt64_render_interface.h" + +#define D3DCLEAR_TARGET 0x1 +#define D3DCLEAR_ZBUFFER 0x10 + +using namespace RT64; + +struct GuestSamplerState +{ + be data[6]; +}; + +struct GuestDevice +{ + be dirtyFlags[8]; + + be setRenderStateFunctions[0x65]; + uint32_t setSamplerStateFunctions[0x14]; + + uint8_t padding224[0x25C]; + + GuestSamplerState samplerStates[0x20]; + + uint32_t vertexShaderFloatConstants[0x400]; + uint32_t pixelShaderFloatConstants[0x400]; + + be vertexShaderBoolConstants[0x4]; + be pixelShaderBoolConstants[0x4]; + + uint8_t padding27A0[0x68C]; + struct GuestVertexDeclaration* vertexDeclaration; + uint8_t padding2E30[0x2FD0]; +}; + +enum class ResourceType +{ + Texture, + VolumeTexture, + VertexBuffer, + IndexBuffer, + RenderTarget, + DepthStencil, + VertexDeclaration, + VertexShader, + PixelShader +}; + +struct GuestResource +{ + uint32_t unused = 0; + be refCount = 1; + ResourceType type; + + GuestResource(ResourceType type) : type(type) + { + } + + void AddRef() + { + uint32_t originalValue, incrementedValue; + do + { + originalValue = refCount.value; + incrementedValue = std::byteswap(std::byteswap(originalValue) + 1); + } while (InterlockedCompareExchange(reinterpret_cast(&refCount), incrementedValue, originalValue) != originalValue); + } +}; + +enum GuestFormat +{ + D3DFMT_A16B16G16R16F = 0x1A22AB60, + D3DFMT_A16B16G16R16F_2 = 0x1A2201BF, + D3DFMT_A8B8G8R8 = 0x1A200186, + D3DFMT_A8R8G8B8 = 0x18280186, + D3DFMT_D24FS8 = 0x1A220197, + D3DFMT_D24S8 = 0x2D200196, + D3DFMT_G16R16F = 0x2D22AB9F, + D3DFMT_G16R16F_2 = 0x2D20AB8D, + D3DFMT_INDEX16 = 1, + D3DFMT_INDEX32 = 6, + D3DFMT_L8 = 0x28000102, + D3DFMT_L8_2 = 0x28000002, + D3DFMT_X8R8G8B8 = 0x28280086, + D3DFMT_UNKNOWN = 0xFFFFFFFF +}; + +// Texture/VolumeTexture +struct GuestTexture : GuestResource +{ + std::unique_ptr texture; + std::unique_ptr textureView; + uint32_t width = 0; + uint32_t height = 0; + uint32_t depth = 0; + RenderFormat format = RenderFormat::UNKNOWN; + void* mappedMemory = nullptr; + uint32_t descriptorIndex = 0; +}; + +struct GuestLockedRect +{ + be pitch; + be bits; +}; + +struct GuestBufferDesc +{ + be format; + be type; + be usage; + be pool; + be size; + be fvf; +}; + +// VertexBuffer/IndexBuffer +struct GuestBuffer : GuestResource +{ + std::unique_ptr buffer; + void* mappedMemory = nullptr; + uint32_t dataSize = 0; + RenderFormat format = RenderFormat::UNKNOWN; + uint32_t guestFormat = 0; + bool lockedReadOnly = false; +}; + +struct GuestSurfaceDesc +{ + be format; + be type; + be usage; + be pool; + be multiSampleType; + be multiSampleQuality; + be width; + be height; +}; + +// RenderTarget/DepthStencil +struct GuestSurface : GuestResource +{ + std::unique_ptr textureHolder; + RenderTexture* texture = nullptr; + uint32_t width = 0; + uint32_t height = 0; + RenderFormat format = RenderFormat::UNKNOWN; + ankerl::unordered_dense::map> framebuffers; +}; + +enum GuestDeclType +{ + D3DDECLTYPE_FLOAT1 = 0x2C83A4, + D3DDECLTYPE_FLOAT2 = 0x2C23A5, + D3DDECLTYPE_FLOAT3 = 0x2A23B9, + D3DDECLTYPE_FLOAT4 = 0x1A23A6, + D3DDECLTYPE_D3DCOLOR = 0x182886, + D3DDECLTYPE_UBYTE4 = 0x1A2286, + D3DDECLTYPE_UBYTE4_2 = 0x1A2386, + D3DDECLTYPE_SHORT2 = 0x2C2359, + D3DDECLTYPE_SHORT4 = 0x1A235A, + D3DDECLTYPE_UBYTE4N = 0x1A2086, + D3DDECLTYPE_UBYTE4N_2 = 0x1A2186, + D3DDECLTYPE_SHORT2N = 0x2C2159, + D3DDECLTYPE_SHORT4N = 0x1A215A, + D3DDECLTYPE_USHORT2N = 0x2C2059, + D3DDECLTYPE_USHORT4N = 0x1A205A, + D3DDECLTYPE_UINT1 = 0x2C82A1, + D3DDECLTYPE_UDEC3 = 0x2A2287, + D3DDECLTYPE_DEC3N = 0x2A2187, + D3DDECLTYPE_DEC3N_2 = 0x2A2190, + D3DDECLTYPE_DEC3N_3 = 0x2A2390, + D3DDECLTYPE_FLOAT16_2 = 0x2C235F, + D3DDECLTYPE_FLOAT16_4 = 0x1A2360, + D3DDECLTYPE_UNUSED = 0xFFFFFFFF +}; + +enum GuestDeclUsage +{ + D3DDECLUSAGE_POSITION = 0, + D3DDECLUSAGE_BLENDWEIGHT = 1, + D3DDECLUSAGE_BLENDINDICES = 2, + D3DDECLUSAGE_NORMAL = 3, + D3DDECLUSAGE_PSIZE = 4, + D3DDECLUSAGE_TEXCOORD = 5, + D3DDECLUSAGE_TANGENT = 6, + D3DDECLUSAGE_BINORMAL = 7, + D3DDECLUSAGE_TESSFACTOR = 8, + D3DDECLUSAGE_POSITIONT = 9, + D3DDECLUSAGE_COLOR = 10, + D3DDECLUSAGE_FOG = 11, + D3DDECLUSAGE_DEPTH = 12, + D3DDECLUSAGE_SAMPLE = 13 +}; + +struct GuestVertexElement +{ + be stream; + be offset; + be type; + uint8_t method; + uint8_t usage; + uint8_t usageIndex; + uint8_t padding; +}; + +enum InputLayoutFlags +{ + INPUT_LAYOUT_FLAG_HAS_R11G11B10_NORMAL = 1 << 0, + INPUT_LAYOUT_FLAG_HAS_BONE_WEIGHTS = 1 << 1 +}; + +struct GuestVertexDeclaration : GuestResource +{ + std::unique_ptr inputElements; + std::unique_ptr vertexElements; + uint32_t inputElementCount = 0; + uint32_t vertexElementCount = 0; + uint32_t swappedTexcoords = 0; + uint32_t inputLayoutFlags = 0; + uint32_t indexVertexStream = 0; +}; + +// VertexShader/PixelShader +struct GuestShader : GuestResource +{ + std::unique_ptr shader; +}; + +struct GuestViewport +{ + be x; + be y; + be width; + be height; + be minZ; + be maxZ; +}; + +struct GuestRect +{ + be left; + be top; + be right; + be bottom; +}; + +enum GuestRenderState +{ + D3DRS_ZENABLE = 40, + D3DRS_ZFUNC = 44, + D3DRS_ZWRITEENABLE = 48, + D3DRS_CULLMODE = 56, + D3DRS_ALPHABLENDENABLE = 60, + D3DRS_SRCBLEND = 72, + D3DRS_DESTBLEND = 76, + D3DRS_BLENDOP = 80, + D3DRS_SRCBLENDALPHA = 84, + D3DRS_DESTBLENDALPHA = 88, + D3DRS_BLENDOPALPHA = 92, + D3DRS_ALPHATESTENABLE = 96, + D3DRS_ALPHAREF = 100, + D3DRS_SCISSORTESTENABLE = 200, + D3DRS_SLOPESCALEDEPTHBIAS = 204, + D3DRS_DEPTHBIAS = 208, + D3DRS_COLORWRITEENABLE = 212 +}; + +enum GuestCullMode +{ + D3DCULL_NONE = 0, + D3DCULL_CW = 2, + D3DCULL_NONE_2 = 4, + D3DCULL_CCW = 6 +}; + +enum GuestBlendMode +{ + D3DBLEND_ZERO = 0, + D3DBLEND_ONE = 1, + D3DBLEND_SRCCOLOR = 4, + D3DBLEND_INVSRCCOLOR = 5, + D3DBLEND_SRCALPHA = 6, + D3DBLEND_INVSRCALPHA = 7, + D3DBLEND_DESTCOLOR = 8, + D3DBLEND_INVDESTCOLOR = 9, + D3DBLEND_DESTALPHA = 10, + D3DBLEND_INVDESTALPHA = 11 +}; + +enum GuestBlendOp +{ + D3DBLENDOP_ADD = 0, + D3DBLENDOP_SUBTRACT = 1, + D3DBLENDOP_MIN = 2, + D3DBLENDOP_MAX = 3, + D3DBLENDOP_REVSUBTRACT = 4 +}; + +enum GuestCmpFunc +{ + D3DCMP_NEVER = 0, + D3DCMP_LESS = 1, + D3DCMP_EQUAL = 2, + D3DCMP_LESSEQUAL = 3, + D3DCMP_GREATER = 4, + D3DCMP_NOTEQUAL = 5, + D3DCMP_GREATEREQUAL = 6, + D3DCMP_ALWAYS = 7 +}; + +enum GuestPrimitiveType +{ + D3DPT_POINTLIST = 1, + D3DPT_LINELIST = 2, + D3DPT_LINESTRIP = 3, + D3DPT_TRIANGLELIST = 4, + D3DPT_TRIANGLEFAN = 5, + D3DPT_TRIANGLESTRIP = 6, + D3DPT_QUADLIST = 13 +}; + +enum GuestTextureFilterType +{ + D3DTEXF_POINT = 0, + D3DTEXF_LINEAR = 1, + D3DTEXF_NONE = 2 +}; + +enum GuestTextureAddress +{ + D3DTADDRESS_WRAP = 0, + D3DTADDRESS_MIRROR = 1, + D3DTADDRESS_CLAMP = 2, + D3DTADDRESS_MIRRORONCE = 3, + D3DTADDRESS_BORDER = 6 +}; diff --git a/UnleashedRecomp/main.cpp b/UnleashedRecomp/main.cpp index af2ae66..fbd8fbf 100644 --- a/UnleashedRecomp/main.cpp +++ b/UnleashedRecomp/main.cpp @@ -55,6 +55,27 @@ void KiSystemStartup() // OS mounts game data to D: XamContentCreateEx(0, "D", &gameContent, OPEN_EXISTING, nullptr, nullptr, 0, 0, nullptr); + WIN32_FIND_DATAA fdata; + const auto findHandle = FindFirstFileA(".\\dlc\\*.*", &fdata); + if (findHandle != INVALID_HANDLE_VALUE) + { + char strBuf[256]; + do + { + if (strcmp(fdata.cFileName, ".") == 0 || strcmp(fdata.cFileName, "..") == 0) + { + continue; + } + + if (fdata.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) + { + snprintf(strBuf, sizeof(strBuf), ".\\dlc\\%s", fdata.cFileName); + XamRegisterContent(XamMakeContent(XCONTENTTYPE_DLC, fdata.cFileName), strBuf); + } + } while (FindNextFileA(findHandle, &fdata)); + FindClose(findHandle); + } + XAudioInitializeSystem(); hid::Init(); } @@ -107,8 +128,6 @@ int main() uint32_t entry = LdrLoadModule(FileSystem::TransformPath(GAME_XEX_PATH)); - VdInitializeSystem(); - GuestThread::Start(entry); return 0; diff --git a/UnleashedRecomp/misc_impl.cpp b/UnleashedRecomp/misc_impl.cpp index 94b3b30..2ff7517 100644 --- a/UnleashedRecomp/misc_impl.cpp +++ b/UnleashedRecomp/misc_impl.cpp @@ -1,19 +1,6 @@ #include "stdafx.h" #include -// TODO: Multiplatform support -void CriErrNotify1(const char* message) -{ - OutputDebugStringA(message); - OutputDebugStringA("\n"); -} - -void CriErrNotify2(uint32_t category, const char* message) -{ - OutputDebugStringA(message); - OutputDebugStringA("\n"); -} - BOOL QueryPerformanceCounterImpl(LARGE_INTEGER* lpPerformanceCount) { BOOL result = QueryPerformanceCounter(lpPerformanceCount); @@ -53,13 +40,21 @@ GUEST_FUNCTION_HOOK(sub_831CCAA0, memset); GUEST_FUNCTION_HOOK(sub_82BD4CA8, OutputDebugStringA); -#ifdef _DEBUG -GUEST_FUNCTION_HOOK(sub_8312EB48, CriErrNotify1); -GUEST_FUNCTION_HOOK(sub_83185B00, CriErrNotify1); -GUEST_FUNCTION_HOOK(sub_831683E0, CriErrNotify2); -#endif - GUEST_FUNCTION_HOOK(sub_82BD4AC8, QueryPerformanceCounterImpl); GUEST_FUNCTION_HOOK(sub_831CD040, QueryPerformanceFrequencyImpl); GUEST_FUNCTION_HOOK(sub_82BD4BC0, GlobalMemoryStatusImpl); + +// Movie player stubs +GUEST_FUNCTION_STUB(sub_82AE3638); +GUEST_FUNCTION_STUB(sub_82AE2BF8); + +// Logo skip +PPC_FUNC(sub_82547DF0) +{ + ctx.r4.u64 = 0; + ctx.r5.u64 = 0; + ctx.r6.u64 = 1; + ctx.r7.u64 = 0; + sub_825517C8(ctx, base); +} diff --git a/UnleashedRecomp/ppc/config/SWA.toml b/UnleashedRecomp/ppc/config/SWA.toml index cdfa922..625d50d 100644 --- a/UnleashedRecomp/ppc/config/SWA.toml +++ b/UnleashedRecomp/ppc/config/SWA.toml @@ -77,4 +77,19 @@ invalid_instructions = [ { data = 0x8324B3BC, size = 8 }, # C Specific Frame Handler { data = 0x831C8B50, size = 8 }, { data = 0x00485645, size = 44 } # End of .text -] \ No newline at end of file +] + +[[midasm_hook]] +name = "HalfPixelOffsetMidAsmHook" +address = 0x82E9FD64 +registers = ["f9", "f0"] + +[[midasm_hook]] +name = "IndexBufferLengthMidAsmHook" +address = 0x82E26244 +registers = ["r3"] + +[[midasm_hook]] +name = "SetShadowResolutionMidAsmHook" +address = 0x82BAD87C +registers = ["r11"] \ No newline at end of file diff --git a/UnleashedRecomp/stdafx.h b/UnleashedRecomp/stdafx.h index 7c42db8..07e5945 100644 --- a/UnleashedRecomp/stdafx.h +++ b/UnleashedRecomp/stdafx.h @@ -9,6 +9,7 @@ #include #include #include +#include #include "framework.h" #include "Mutex.h" diff --git a/thirdparty/D3D12MemoryAllocator b/thirdparty/D3D12MemoryAllocator new file mode 160000 index 0000000..e00c4a7 --- /dev/null +++ b/thirdparty/D3D12MemoryAllocator @@ -0,0 +1 @@ +Subproject commit e00c4a7c85cf9c28c6f4a6cc75032736f416410f diff --git a/thirdparty/PowerRecomp b/thirdparty/PowerRecomp index 1f5d7c3..516e23f 160000 --- a/thirdparty/PowerRecomp +++ b/thirdparty/PowerRecomp @@ -1 +1 @@ -Subproject commit 1f5d7c32e8205ef10dab458394ebc760fb5a0bdc +Subproject commit 516e23f1f7781265a99ee90264c2b1f8f58378b3 diff --git a/thirdparty/ddspp b/thirdparty/ddspp new file mode 160000 index 0000000..1390499 --- /dev/null +++ b/thirdparty/ddspp @@ -0,0 +1 @@ +Subproject commit 1390499ec9f7b82e7a9cbdeb2e6191808e981f84