#include #include #include #include #include #include #include #include #include "imgui_snapshot.h" #include "gpu/video.h" #include "ui/window.h" #include "config.h" #include "shader/copy_vs.hlsl.dxil.h" #include "shader/copy_vs.hlsl.spirv.h" #include "shader/imgui_ps.hlsl.dxil.h" #include "shader/imgui_ps.hlsl.spirv.h" #include "shader/imgui_vs.hlsl.dxil.h" #include "shader/imgui_vs.hlsl.spirv.h" #include "shader/movie_vs.hlsl.dxil.h" #include "shader/movie_vs.hlsl.spirv.h" #include "shader/movie_ps.hlsl.dxil.h" #include "shader/movie_ps.hlsl.spirv.h" #include "shader/resolve_msaa_depth_2x.hlsl.dxil.h" #include "shader/resolve_msaa_depth_2x.hlsl.spirv.h" #include "shader/resolve_msaa_depth_4x.hlsl.dxil.h" #include "shader/resolve_msaa_depth_4x.hlsl.spirv.h" #include "shader/resolve_msaa_depth_8x.hlsl.dxil.h" #include "shader/resolve_msaa_depth_8x.hlsl.spirv.h" extern "C" { __declspec(dllexport) unsigned long NvOptimusEnablement = 0x00000001; __declspec(dllexport) int AmdPowerXpressRequestHighPerformance = 1; } namespace RT64 { extern std::unique_ptr CreateD3D12Interface(); extern std::unique_ptr CreateVulkanInterface(); } struct PipelineState { GuestShader* vertexShader = nullptr; GuestShader* pixelShader = nullptr; GuestVertexDeclaration* vertexDeclaration = nullptr; bool instancing = false; bool zEnable = true; bool zWriteEnable = true; RenderBlend srcBlend = RenderBlend::ONE; RenderBlend destBlend = RenderBlend::ZERO; RenderCullMode cullMode = RenderCullMode::NONE; RenderComparisonFunction zFunc = RenderComparisonFunction::LESS; bool alphaBlendEnable = false; RenderBlendOperation blendOp = RenderBlendOperation::ADD; float slopeScaledDepthBias = 0.0f; int32_t depthBias = 0; RenderBlend srcBlendAlpha = RenderBlend::ONE; RenderBlend destBlendAlpha = RenderBlend::ZERO; RenderBlendOperation blendOpAlpha = RenderBlendOperation::ADD; uint32_t colorWriteEnable = uint32_t(RenderColorWriteEnable::ALL); RenderPrimitiveTopology primitiveTopology = RenderPrimitiveTopology::TRIANGLE_LIST; uint8_t vertexStrides[16]{}; RenderFormat renderTargetFormat{}; RenderFormat depthStencilFormat{}; RenderSampleCounts sampleCount = RenderSampleCount::COUNT_1; bool enableAlphaToCoverage = false; }; enum class AlphaTestMode : uint32_t { Disabled, AlphaThreshold, AlphaToCoverage }; struct SharedConstants { uint32_t texture2DIndices[16]{}; uint32_t texture3DIndices[16]{}; uint32_t textureCubeIndices[16]{}; uint32_t samplerIndices[16]{}; AlphaTestMode alphaTestMode{}; float alphaThreshold{}; uint32_t booleans{}; uint32_t swappedTexcoords{}; uint32_t inputLayoutFlags{}; uint32_t enableGIBicubicFiltering{}; }; static GuestSurface* g_renderTarget; static GuestSurface* g_depthStencil; static RenderFramebuffer* g_framebuffer; static RenderViewport g_viewport(0.0f, 0.0f, 1280.0f, 720.0f); static bool g_halfPixel = true; static uint32_t g_zFunc; static PipelineState g_pipelineState; static SharedConstants g_sharedConstants; static RenderSamplerDesc g_samplerDescs[16]; static bool g_scissorTestEnable = false; static RenderRect g_scissorRect; static RenderVertexBufferView g_vertexBufferViews[16]; static RenderInputSlot g_inputSlots[16]; static RenderIndexBufferView g_indexBufferView({}, 0, RenderFormat::R16_UINT); struct DirtyStates { bool renderTargetAndDepthStencil; bool viewport; bool pipelineState; bool sharedConstants; bool scissorRect; bool vertexShaderConstants; uint8_t vertexStreamFirst; uint8_t vertexStreamLast; bool indices; bool pixelShaderConstants; DirtyStates(bool value) : renderTargetAndDepthStencil(value) , viewport(value) , pipelineState(value) , sharedConstants(value) , scissorRect(value) , vertexShaderConstants(value) , vertexStreamFirst(value ? 0 : 255) , vertexStreamLast(value ? 15 : 0) , indices(value) , pixelShaderConstants(value) { } }; static DirtyStates g_dirtyStates(true); template static FORCEINLINE void SetDirtyValue(bool& dirtyState, T& dest, const T& src) { if (dest != src) { dest = src; dirtyState = true; } } static bool g_vulkan; static std::unique_ptr g_interface; static std::unique_ptr g_device; static bool g_triangleFanSupported; static constexpr size_t NUM_FRAMES = 2; static uint32_t g_frame = 0; static uint32_t g_nextFrame = 1; static std::unique_ptr g_queue; static std::unique_ptr g_commandLists[NUM_FRAMES]; static std::unique_ptr g_commandFences[NUM_FRAMES]; static bool g_commandListStates[NUM_FRAMES]; static Mutex g_copyMutex; static std::unique_ptr g_copyQueue; static std::unique_ptr g_copyCommandList; static std::unique_ptr g_copyCommandFence; static std::unique_ptr g_swapChain; static bool g_swapChainValid; static bool g_needsResize; static std::unique_ptr g_acquireSemaphores[NUM_FRAMES]; static std::unique_ptr g_renderSemaphores[NUM_FRAMES]; static uint32_t g_backBufferIndex; static GuestSurface* g_backBuffer; struct std::unique_ptr g_textureDescriptorSet; struct std::unique_ptr g_samplerDescriptorSet; enum { TEXTURE_DESCRIPTOR_NULL_TEXTURE_2D, TEXTURE_DESCRIPTOR_NULL_TEXTURE_3D, TEXTURE_DESCRIPTOR_NULL_TEXTURE_CUBE, TEXTURE_DESCRIPTOR_NULL_COUNT }; struct TextureDescriptorAllocator { Mutex mutex; uint32_t capacity = TEXTURE_DESCRIPTOR_NULL_COUNT; std::vector freed; uint32_t allocate() { std::lock_guard lock(mutex); uint32_t value; if (!freed.empty()) { value = freed.back(); freed.pop_back(); } else { value = capacity; ++capacity; } return value; } void free(uint32_t value) { assert(value != NULL); std::lock_guard lock(mutex); freed.push_back(value); } }; static std::unique_ptr g_blankTextures[TEXTURE_DESCRIPTOR_NULL_COUNT]; static std::unique_ptr g_blankTextureViews[TEXTURE_DESCRIPTOR_NULL_COUNT]; static TextureDescriptorAllocator g_textureDescriptorAllocator; static std::unique_ptr g_pipelineLayout; static xxHashMap> g_pipelines; static xxHashMap>> g_samplerStates; static Mutex g_vertexDeclarationMutex; static xxHashMap g_vertexDeclarations; struct UploadBuffer { static constexpr size_t SIZE = 16 * 1024 * 1024; std::unique_ptr buffer; uint8_t* memory = nullptr; uint64_t deviceAddress = 0; }; struct UploadAllocation { const RenderBuffer* buffer; uint64_t offset; uint8_t* memory; uint64_t deviceAddress; }; struct UploadAllocator { std::vector buffers; uint32_t index = 0; uint32_t offset = 0; Mutex mutex; UploadAllocation allocate(uint32_t size, uint32_t alignment) { std::lock_guard lock(mutex); assert(size <= UploadBuffer::SIZE); offset = (offset + alignment - 1) & ~(alignment - 1); if (offset + size > UploadBuffer::SIZE) { ++index; offset = 0; } if (buffers.size() <= index) buffers.resize(index + 1); auto& buffer = buffers[index]; if (buffer.buffer == nullptr) { buffer.buffer = g_device->createBuffer(RenderBufferDesc::UploadBuffer(UploadBuffer::SIZE, RenderBufferFlag::CONSTANT | RenderBufferFlag::VERTEX | RenderBufferFlag::INDEX)); buffer.memory = reinterpret_cast(buffer.buffer->map()); buffer.deviceAddress = buffer.buffer->getDeviceAddress(); } auto ref = buffer.buffer->at(offset); offset += size; return { ref.ref, ref.offset, buffer.memory + ref.offset, buffer.deviceAddress + ref.offset }; } template UploadAllocation allocate(const T* memory, uint32_t size, uint32_t alignment) { auto result = allocate(size, alignment); if constexpr (TByteSwap) { auto destination = reinterpret_cast(result.memory); for (size_t i = 0; i < size; i += sizeof(T)) { *destination = std::byteswap(*memory); ++destination; ++memory; } } else { memcpy(result.memory, memory, size); } return result; } void reset() { index = 0; offset = 0; } }; static UploadAllocator g_uploadAllocators[NUM_FRAMES]; static std::vector g_tempResources[NUM_FRAMES]; static std::vector> g_tempBuffers[NUM_FRAMES]; template struct PrimitiveIndexData { std::vector indexData; RenderBufferReference indexBuffer; uint32_t currentIndexCount = 0; uint32_t prepare(uint32_t guestPrimCount) { uint32_t primCount; uint32_t indexCountPerPrimitive; switch (PrimitiveType) { case D3DPT_TRIANGLEFAN: primCount = guestPrimCount - 2; indexCountPerPrimitive = 3; break; case D3DPT_QUADLIST: primCount = guestPrimCount / 4; indexCountPerPrimitive = 6; break; default: assert(false && "Unknown primitive type."); break; } uint32_t indexCount = primCount * indexCountPerPrimitive; if (indexData.size() < indexCount) { const size_t oldPrimCount = indexData.size() / indexCountPerPrimitive; indexData.resize(indexCount); for (size_t i = oldPrimCount; i < primCount; i++) { switch (PrimitiveType) { case D3DPT_TRIANGLEFAN: { indexData[i * 3 + 0] = 0; indexData[i * 3 + 1] = static_cast(i + 1); indexData[i * 3 + 2] = static_cast(i + 2); break; } case D3DPT_QUADLIST: { indexData[i * 6 + 0] = static_cast(i * 4 + 0); indexData[i * 6 + 1] = static_cast(i * 4 + 1); indexData[i * 6 + 2] = static_cast(i * 4 + 2); indexData[i * 6 + 3] = static_cast(i * 4 + 0); indexData[i * 6 + 4] = static_cast(i * 4 + 2); indexData[i * 6 + 5] = static_cast(i * 4 + 3); break; } default: assert(false && "Unknown primitive type."); break; } } } if (indexBuffer == NULL || currentIndexCount < indexCount) { auto allocation = g_uploadAllocators[g_frame].allocate(indexData.data(), indexCount * 2, 2); indexBuffer = allocation.buffer->at(allocation.offset); currentIndexCount = indexCount; } SetDirtyValue(g_dirtyStates.indices, g_indexBufferView.buffer, indexBuffer); SetDirtyValue(g_dirtyStates.indices, g_indexBufferView.size, indexCount * 2); SetDirtyValue(g_dirtyStates.indices, g_indexBufferView.format, RenderFormat::R16_UINT); return indexCount; } void reset() { indexBuffer = {}; currentIndexCount = 0; } }; static PrimitiveIndexData g_triangleFanIndexData; static PrimitiveIndexData g_quadIndexData; static void DestructTempResources() { for (auto resource : g_tempResources[g_frame]) { switch (resource->type) { case ResourceType::Texture: case ResourceType::VolumeTexture: { const auto texture = reinterpret_cast(resource); if (texture->mappedMemory != nullptr) g_userHeap.Free(texture->mappedMemory); g_textureDescriptorAllocator.free(texture->descriptorIndex); texture->~GuestTexture(); break; } case ResourceType::VertexBuffer: case ResourceType::IndexBuffer: { const auto buffer = reinterpret_cast(resource); if (buffer->mappedMemory != nullptr) g_userHeap.Free(buffer->mappedMemory); buffer->~GuestBuffer(); break; } case ResourceType::RenderTarget: case ResourceType::DepthStencil: { const auto surface = reinterpret_cast(resource); if (surface->descriptorIndex != NULL) g_textureDescriptorAllocator.free(surface->descriptorIndex); surface->~GuestSurface(); break; } case ResourceType::VertexDeclaration: reinterpret_cast(resource)->~GuestVertexDeclaration(); break; case ResourceType::VertexShader: case ResourceType::PixelShader: reinterpret_cast(resource)->~GuestShader(); break; } g_userHeap.Free(resource); } g_tempResources[g_frame].clear(); g_tempBuffers[g_frame].clear(); } static uint32_t g_mainThreadId; static ankerl::unordered_dense::map g_barrierMap; static void AddBarrier(GuestBaseTexture* texture, RenderTextureLayout layout) { if (texture != nullptr && texture->layout != layout) { g_barrierMap[texture->texture] = layout; texture->layout = layout; } } static std::vector g_barriers; static void FlushBarriers() { if (!g_barrierMap.empty()) { for (auto& [texture, layout] : g_barrierMap) g_barriers.emplace_back(texture, layout); g_commandLists[g_frame]->barriers(RenderBarrierStage::GRAPHICS | RenderBarrierStage::COPY, g_barriers); g_barrierMap.clear(); g_barriers.clear(); } } static std::unique_ptr g_shaderCache; static void LoadShaderCache() { const size_t decompressedSize = g_vulkan ? g_spirvCacheDecompressedSize : g_dxilCacheDecompressedSize; g_shaderCache = std::make_unique(decompressedSize); ZSTD_decompress(g_shaderCache.get(), decompressedSize, g_vulkan ? g_compressedSpirvCache : g_compressedDxilCache, g_vulkan ? g_spirvCacheCompressedSize : g_dxilCacheCompressedSize); } enum class RenderCommandType { SetRenderState, DestructResource, UnlockTextureRect, UnlockBuffer16, UnlockBuffer32, DrawImGui, Present, StretchRect, SetRenderTarget, SetDepthStencilSurface, Clear, SetViewport, SetTexture, SetScissorRect, SetSamplerState, SetBooleans, SetVertexShaderConstants, SetPixelShaderConstants, DrawPrimitive, DrawIndexedPrimitive, DrawPrimitiveUP, SetVertexDeclaration, SetVertexShader, SetStreamSource, SetIndices, SetPixelShader }; struct RenderCommand { RenderCommandType type; union { struct { GuestRenderState type; uint32_t value; } setRenderState; struct { GuestResource* resource; } destructResource; struct { GuestTexture* texture; } unlockTextureRect; struct { GuestBuffer* buffer; } unlockBuffer; struct { GuestDevice* device; uint32_t flags; GuestTexture* texture; } stretchRect; struct { GuestSurface* renderTarget; } setRenderTarget; struct { GuestSurface* depthStencil; } setDepthStencilSurface; struct { uint32_t flags; float color[4]; float z; } clear; struct { float x; float y; float width; float height; float minDepth; float maxDepth; } setViewport; struct { uint32_t index; GuestTexture* texture; } setTexture; struct { int32_t left; int32_t top; int32_t right; int32_t bottom; } setScissorRect; struct { uint32_t index; uint32_t data0; uint32_t data3; uint32_t data5; } setSamplerState; struct { uint32_t booleans; } setBooleans; struct { UploadAllocation allocation; } setVertexShaderConstants; struct { UploadAllocation allocation; } setPixelShaderConstants; struct { uint32_t primitiveType; uint32_t startVertex; uint32_t primitiveCount; } drawPrimitive; struct { uint32_t primitiveType; int32_t baseVertexIndex; uint32_t startIndex; uint32_t primCount; } drawIndexedPrimitive; struct { uint32_t primitiveType; uint32_t primitiveCount; UploadAllocation vertexStreamZeroData; uint32_t vertexStreamZeroStride; } drawPrimitiveUP; struct { GuestVertexDeclaration* vertexDeclaration; } setVertexDeclaration; struct { GuestShader* shader; } setVertexShader; struct { uint32_t index; GuestBuffer* buffer; uint32_t offset; uint32_t stride; } setStreamSource; struct { GuestBuffer* buffer; } setIndices; struct { GuestShader* shader; } setPixelShader; }; }; static moodycamel::BlockingConcurrentQueue g_renderQueue; template static void SetRenderState(GuestDevice* device, uint32_t value) { RenderCommand cmd; cmd.type = RenderCommandType::SetRenderState; cmd.setRenderState.type = TType; cmd.setRenderState.value = value; g_renderQueue.enqueue(cmd); } static void SetRenderStateUnimplemented(GuestDevice* device, uint32_t value) { } static void SetAlphaTestMode(bool enable) { AlphaTestMode alphaTestMode = AlphaTestMode::Disabled; if (enable) { if (Config::AlphaToCoverage && g_renderTarget != nullptr && g_renderTarget->sampleCount != RenderSampleCount::COUNT_1) alphaTestMode = AlphaTestMode::AlphaToCoverage; else alphaTestMode = AlphaTestMode::AlphaThreshold; } SetDirtyValue(g_dirtyStates.sharedConstants, g_sharedConstants.alphaTestMode, alphaTestMode); SetDirtyValue(g_dirtyStates.pipelineState, g_pipelineState.enableAlphaToCoverage, alphaTestMode == AlphaTestMode::AlphaToCoverage); } static RenderBlend ConvertBlendMode(uint32_t blendMode) { switch (blendMode) { case D3DBLEND_ZERO: return RenderBlend::ZERO; case D3DBLEND_ONE: return RenderBlend::ONE; case D3DBLEND_SRCCOLOR: return RenderBlend::SRC_COLOR; case D3DBLEND_INVSRCCOLOR: return RenderBlend::INV_SRC_COLOR; case D3DBLEND_SRCALPHA: return RenderBlend::SRC_ALPHA; case D3DBLEND_INVSRCALPHA: return RenderBlend::INV_SRC_ALPHA; case D3DBLEND_DESTCOLOR: return RenderBlend::DEST_COLOR; case D3DBLEND_INVDESTCOLOR: return RenderBlend::INV_DEST_COLOR; case D3DBLEND_DESTALPHA: return RenderBlend::DEST_ALPHA; case D3DBLEND_INVDESTALPHA: return RenderBlend::INV_DEST_ALPHA; default: assert(false && "Invalid blend mode"); return RenderBlend::ZERO; } } // The game renders the main scene with reverse Z where the viewport's minDepth and maxDepth // values are swapped. We negate this to improve compatibility with old hardware. static RenderComparisonFunction ConvertComparisonFunc(uint32_t cmpFunc, bool reverseZ) { switch (cmpFunc) { case D3DCMP_LESS: return reverseZ ? RenderComparisonFunction::GREATER : RenderComparisonFunction::LESS; case D3DCMP_LESSEQUAL: return reverseZ ? RenderComparisonFunction::GREATER_EQUAL : RenderComparisonFunction::LESS_EQUAL; case D3DCMP_GREATER: return reverseZ ? RenderComparisonFunction::LESS : RenderComparisonFunction::GREATER; case D3DCMP_GREATEREQUAL: return reverseZ ? RenderComparisonFunction::LESS_EQUAL : RenderComparisonFunction::GREATER_EQUAL; case D3DCMP_NEVER: return RenderComparisonFunction::NEVER; case D3DCMP_EQUAL: return RenderComparisonFunction::EQUAL; case D3DCMP_NOTEQUAL: return RenderComparisonFunction::NOT_EQUAL; case D3DCMP_ALWAYS: return RenderComparisonFunction::ALWAYS; default: assert(false && "Unknown comparison function"); return RenderComparisonFunction::NEVER; } } static RenderBlendOperation ConvertBlendOp(uint32_t blendOp) { switch (blendOp) { case D3DBLENDOP_ADD: return RenderBlendOperation::ADD; case D3DBLENDOP_SUBTRACT: return RenderBlendOperation::SUBTRACT; case D3DBLENDOP_REVSUBTRACT: return RenderBlendOperation::REV_SUBTRACT; case D3DBLENDOP_MIN: return RenderBlendOperation::MIN; case D3DBLENDOP_MAX: return RenderBlendOperation::MAX; default: assert(false && "Unknown blend operation"); return RenderBlendOperation::ADD; } } static void ProcSetRenderState(const RenderCommand& cmd) { uint32_t value = cmd.setRenderState.value; switch (cmd.setRenderState.type) { case D3DRS_ZENABLE: { SetDirtyValue(g_dirtyStates.pipelineState, g_pipelineState.zEnable, value != 0); g_dirtyStates.renderTargetAndDepthStencil |= g_dirtyStates.pipelineState; break; } case D3DRS_ZWRITEENABLE: { SetDirtyValue(g_dirtyStates.pipelineState, g_pipelineState.zWriteEnable, value != 0); break; } case D3DRS_ALPHATESTENABLE: { SetAlphaTestMode(value != 0); break; } case D3DRS_SRCBLEND: { SetDirtyValue(g_dirtyStates.pipelineState, g_pipelineState.srcBlend, ConvertBlendMode(value)); break; } case D3DRS_DESTBLEND: { SetDirtyValue(g_dirtyStates.pipelineState, g_pipelineState.destBlend, ConvertBlendMode(value)); break; } case D3DRS_CULLMODE: { RenderCullMode cullMode; switch (value) { case D3DCULL_NONE: case D3DCULL_NONE_2: cullMode = RenderCullMode::NONE; break; case D3DCULL_CW: cullMode = RenderCullMode::FRONT; break; case D3DCULL_CCW: cullMode = RenderCullMode::BACK; break; default: assert(false && "Invalid cull mode"); cullMode = RenderCullMode::NONE; break; } SetDirtyValue(g_dirtyStates.pipelineState, g_pipelineState.cullMode, cullMode); break; } case D3DRS_ZFUNC: { g_zFunc = value; SetDirtyValue(g_dirtyStates.pipelineState, g_pipelineState.zFunc, ConvertComparisonFunc(value, g_viewport.minDepth >= g_viewport.maxDepth)); break; } case D3DRS_ALPHAREF: { SetDirtyValue(g_dirtyStates.pipelineState, g_sharedConstants.alphaThreshold, float(value) / 256.0f); break; } case D3DRS_ALPHABLENDENABLE: { SetDirtyValue(g_dirtyStates.pipelineState, g_pipelineState.alphaBlendEnable, value != 0); break; } case D3DRS_BLENDOP: { SetDirtyValue(g_dirtyStates.pipelineState, g_pipelineState.blendOp, ConvertBlendOp(value)); break; } case D3DRS_SCISSORTESTENABLE: { SetDirtyValue(g_dirtyStates.scissorRect, g_scissorTestEnable, value != 0); break; } case D3DRS_SLOPESCALEDEPTHBIAS: { SetDirtyValue(g_dirtyStates.pipelineState, g_pipelineState.slopeScaledDepthBias, *reinterpret_cast(&value)); break; } case D3DRS_DEPTHBIAS: { SetDirtyValue(g_dirtyStates.pipelineState, g_pipelineState.depthBias, int32_t(*reinterpret_cast(&value) * (1 << 24))); break; } case D3DRS_SRCBLENDALPHA: { SetDirtyValue(g_dirtyStates.pipelineState, g_pipelineState.srcBlendAlpha, ConvertBlendMode(value)); break; } case D3DRS_DESTBLENDALPHA: { SetDirtyValue(g_dirtyStates.pipelineState, g_pipelineState.destBlendAlpha, ConvertBlendMode(value)); break; } case D3DRS_BLENDOPALPHA: { SetDirtyValue(g_dirtyStates.pipelineState, g_pipelineState.blendOpAlpha, ConvertBlendOp(value)); break; } case D3DRS_COLORWRITEENABLE: { SetDirtyValue(g_dirtyStates.pipelineState, g_pipelineState.colorWriteEnable, value); g_dirtyStates.renderTargetAndDepthStencil |= g_dirtyStates.pipelineState; break; } } } static const std::pair g_setRenderStateFunctions[] = { { D3DRS_ZENABLE, GuestFunction> }, { D3DRS_ZWRITEENABLE, GuestFunction> }, { D3DRS_ALPHATESTENABLE, GuestFunction> }, { D3DRS_SRCBLEND, GuestFunction> }, { D3DRS_DESTBLEND, GuestFunction> }, { D3DRS_CULLMODE, GuestFunction> }, { D3DRS_ZFUNC, GuestFunction> }, { D3DRS_ALPHAREF, GuestFunction> }, { D3DRS_ALPHABLENDENABLE, GuestFunction> }, { D3DRS_BLENDOP, GuestFunction> }, { D3DRS_SCISSORTESTENABLE, GuestFunction> }, { D3DRS_SLOPESCALEDEPTHBIAS, GuestFunction> }, { D3DRS_DEPTHBIAS, GuestFunction> }, { D3DRS_SRCBLENDALPHA, GuestFunction> }, { D3DRS_DESTBLENDALPHA, GuestFunction> }, { D3DRS_BLENDOPALPHA, GuestFunction> }, { D3DRS_COLORWRITEENABLE, GuestFunction> } }; static std::unique_ptr g_resolveMsaaDepthPipelines[3]; #define CREATE_SHADER(NAME) \ g_device->createShader( \ g_vulkan ? g_##NAME##_spirv : g_##NAME##_dxil, \ g_vulkan ? sizeof(g_##NAME##_spirv) : sizeof(g_##NAME##_dxil), \ "main", \ g_vulkan ? RenderShaderFormat::SPIRV : RenderShaderFormat::DXIL) static bool DetectWine() { HMODULE dllHandle = GetModuleHandle("ntdll.dll"); return dllHandle != nullptr && GetProcAddress(dllHandle, "wine_get_version") != nullptr; } static constexpr size_t TEXTURE_DESCRIPTOR_SIZE = 65536; static constexpr size_t SAMPLER_DESCRIPTOR_SIZE = 1024; static std::unique_ptr g_imFontTexture; static std::unique_ptr g_imFontTextureView; static uint32_t g_imFontTextureDescriptorIndex; static bool g_imPendingBarrier = true; static std::unique_ptr g_imPipelineLayout; static std::unique_ptr g_imPipeline; static ImDrawDataSnapshot g_imSnapshot; template static void ExecuteCopyCommandList(const T& function) { std::lock_guard lock(g_copyMutex); g_copyCommandList->begin(); function(); g_copyCommandList->end(); g_copyQueue->executeCommandLists(g_copyCommandList.get(), g_copyCommandFence.get()); g_copyQueue->waitForCommandFence(g_copyCommandFence.get()); } static constexpr uint32_t PITCH_ALIGNMENT = 0x100; static constexpr uint32_t PLACEMENT_ALIGNMENT = 0x200; static void CreateImGuiBackend() { ImGuiIO& io = ImGui::GetIO(); io.BackendFlags |= ImGuiBackendFlags_RendererHasVtxOffset; ImGui_ImplSDL2_InitForOther(Window::s_pWindow); uint8_t* pixels; int width, height; io.Fonts->GetTexDataAsAlpha8(&pixels, &width, &height); RenderTextureDesc textureDesc; textureDesc.dimension = RenderTextureDimension::TEXTURE_2D; textureDesc.width = width; textureDesc.height = height; textureDesc.depth = 1; textureDesc.mipLevels = 1; textureDesc.arraySize = 1; textureDesc.format = RenderFormat::R8_UNORM; g_imFontTexture = g_device->createTexture(textureDesc); uint32_t rowPitch = (width + PITCH_ALIGNMENT - 1) & ~(PITCH_ALIGNMENT - 1); uint32_t slicePitch = (rowPitch * height + PLACEMENT_ALIGNMENT - 1) & ~(PLACEMENT_ALIGNMENT - 1); auto uploadBuffer = g_device->createBuffer(RenderBufferDesc::UploadBuffer(slicePitch)); uint8_t* mappedMemory = reinterpret_cast(uploadBuffer->map()); if (rowPitch == width) { memcpy(mappedMemory, pixels, slicePitch); } else { for (size_t i = 0; i < height; i++) { memcpy(mappedMemory, pixels, width); pixels += width; mappedMemory += rowPitch; } } uploadBuffer->unmap(); ExecuteCopyCommandList([&] { g_copyCommandList->barriers(RenderBarrierStage::COPY, RenderTextureBarrier(g_imFontTexture.get(), RenderTextureLayout::COPY_DEST)); g_copyCommandList->copyTextureRegion( RenderTextureCopyLocation::Subresource(g_imFontTexture.get(), 0), RenderTextureCopyLocation::PlacedFootprint(uploadBuffer.get(), RenderFormat::R8_UNORM, width, height, 1, rowPitch, 0)); }); RenderTextureViewDesc textureViewDesc; textureViewDesc.format = textureDesc.format; textureViewDesc.dimension = RenderTextureViewDimension::TEXTURE_2D; textureViewDesc.mipLevels = 1; textureViewDesc.componentMapping = RenderComponentMapping(RenderSwizzle::ONE, RenderSwizzle::ONE, RenderSwizzle::ONE, RenderSwizzle::R); g_imFontTextureView = g_imFontTexture->createTextureView(textureViewDesc); g_imFontTextureDescriptorIndex = g_textureDescriptorAllocator.allocate(); g_textureDescriptorSet->setTexture(g_imFontTextureDescriptorIndex, g_imFontTexture.get(), RenderTextureLayout::SHADER_READ, g_imFontTextureView.get()); io.Fonts->SetTexID(ImTextureID(g_imFontTextureDescriptorIndex)); RenderPipelineLayoutBuilder pipelineLayoutBuilder; pipelineLayoutBuilder.begin(false, true); RenderDescriptorSetBuilder descriptorSetBuilder; descriptorSetBuilder.begin(); descriptorSetBuilder.addTexture(0, TEXTURE_DESCRIPTOR_SIZE); descriptorSetBuilder.end(true, TEXTURE_DESCRIPTOR_SIZE); pipelineLayoutBuilder.addDescriptorSet(descriptorSetBuilder); descriptorSetBuilder.begin(); descriptorSetBuilder.addSampler(0, SAMPLER_DESCRIPTOR_SIZE); descriptorSetBuilder.end(true, SAMPLER_DESCRIPTOR_SIZE); pipelineLayoutBuilder.addDescriptorSet(descriptorSetBuilder); pipelineLayoutBuilder.addPushConstant(0, 2, 12, RenderShaderStageFlag::VERTEX | RenderShaderStageFlag::PIXEL); pipelineLayoutBuilder.end(); g_imPipelineLayout = pipelineLayoutBuilder.create(g_device.get()); auto vertexShader = CREATE_SHADER(imgui_vs); auto pixelShader = CREATE_SHADER(imgui_ps); RenderInputElement inputElements[3]; inputElements[0] = RenderInputElement("POSITION", 0, 0, RenderFormat::R32G32_FLOAT, 0, offsetof(ImDrawVert, pos)); inputElements[1] = RenderInputElement("TEXCOORD", 0, 1, RenderFormat::R32G32_FLOAT, 0, offsetof(ImDrawVert, uv)); inputElements[2] = RenderInputElement("COLOR", 0, 2, RenderFormat::R8G8B8A8_UNORM, 0, offsetof(ImDrawVert, col)); RenderInputSlot inputSlot(0, sizeof(ImDrawVert)); RenderGraphicsPipelineDesc pipelineDesc; pipelineDesc.pipelineLayout = g_imPipelineLayout.get(); pipelineDesc.vertexShader = vertexShader.get(); pipelineDesc.pixelShader = pixelShader.get(); pipelineDesc.renderTargetFormat[0] = RenderFormat::B8G8R8A8_UNORM; pipelineDesc.renderTargetBlend[0] = RenderBlendDesc::AlphaBlend(); pipelineDesc.renderTargetCount = 1; pipelineDesc.inputElements = inputElements; pipelineDesc.inputElementsCount = std::size(inputElements); pipelineDesc.inputSlots = &inputSlot; pipelineDesc.inputSlotsCount = 1; g_imPipeline = g_device->createGraphicsPipeline(pipelineDesc); } static void CreateHostDevice() { for (uint32_t i = 0; i < 16; i++) g_inputSlots[i].index = i; IMGUI_CHECKVERSION(); ImGui::CreateContext(); Window::Init(); g_vulkan = DetectWine() || Config::GraphicsAPI == EGraphicsAPI::Vulkan; LoadShaderCache(); g_interface = g_vulkan ? CreateVulkanInterface() : CreateD3D12Interface(); g_device = g_interface->createDevice(); g_triangleFanSupported = g_device->getCapabilities().triangleFan; g_queue = g_device->createCommandQueue(RenderCommandListType::DIRECT); for (auto& commandList : g_commandLists) commandList = g_device->createCommandList(RenderCommandListType::DIRECT); for (auto& commandFence : g_commandFences) commandFence = g_device->createCommandFence(); g_copyQueue = g_device->createCommandQueue(RenderCommandListType::COPY); g_copyCommandList = g_device->createCommandList(RenderCommandListType::COPY); g_copyCommandFence = g_device->createCommandFence(); g_swapChain = g_queue->createSwapChain(Window::s_handle, Config::TripleBuffering ? 3 : 2, RenderFormat::B8G8R8A8_UNORM); g_swapChain->setVsyncEnabled(Config::VSync); g_swapChainValid = !g_swapChain->needsResize(); for (auto& acquireSemaphore : g_acquireSemaphores) acquireSemaphore = g_device->createCommandSemaphore(); for (auto& renderSemaphore : g_renderSemaphores) renderSemaphore = g_device->createCommandSemaphore(); g_mainThreadId = GetCurrentThreadId(); RenderPipelineLayoutBuilder pipelineLayoutBuilder; pipelineLayoutBuilder.begin(false, true); RenderDescriptorSetBuilder descriptorSetBuilder; descriptorSetBuilder.begin(); descriptorSetBuilder.addTexture(0, TEXTURE_DESCRIPTOR_SIZE); descriptorSetBuilder.end(true, TEXTURE_DESCRIPTOR_SIZE); g_textureDescriptorSet = descriptorSetBuilder.create(g_device.get()); for (size_t i = 0; i < TEXTURE_DESCRIPTOR_NULL_COUNT; i++) { auto& texture = g_blankTextures[i]; auto& textureView = g_blankTextureViews[i]; RenderTextureDesc desc; desc.width = 1; desc.height = 1; desc.depth = 1; desc.mipLevels = 1; desc.format = RenderFormat::R8_UNORM; RenderTextureViewDesc viewDesc; viewDesc.format = desc.format; viewDesc.componentMapping = RenderComponentMapping(RenderSwizzle::ZERO, RenderSwizzle::ZERO, RenderSwizzle::ZERO, RenderSwizzle::ZERO); viewDesc.mipLevels = 1; switch (i) { case TEXTURE_DESCRIPTOR_NULL_TEXTURE_2D: desc.dimension = RenderTextureDimension::TEXTURE_2D; desc.arraySize = 1; viewDesc.dimension = RenderTextureViewDimension::TEXTURE_2D; break; case TEXTURE_DESCRIPTOR_NULL_TEXTURE_3D: desc.dimension = RenderTextureDimension::TEXTURE_3D; desc.arraySize = 1; viewDesc.dimension = RenderTextureViewDimension::TEXTURE_3D; break; case TEXTURE_DESCRIPTOR_NULL_TEXTURE_CUBE: desc.dimension = RenderTextureDimension::TEXTURE_2D; desc.arraySize = 6; desc.flags = RenderTextureFlag::CUBE; viewDesc.dimension = RenderTextureViewDimension::TEXTURE_CUBE; break; default: assert(false && "Unknown null descriptor dimension"); break; } texture = g_device->createTexture(desc); textureView = texture->createTextureView(viewDesc); g_textureDescriptorSet->setTexture(i, texture.get(), RenderTextureLayout::SHADER_READ, textureView.get()); } pipelineLayoutBuilder.addDescriptorSet(descriptorSetBuilder); pipelineLayoutBuilder.addDescriptorSet(descriptorSetBuilder); pipelineLayoutBuilder.addDescriptorSet(descriptorSetBuilder); descriptorSetBuilder.begin(); descriptorSetBuilder.addSampler(0, SAMPLER_DESCRIPTOR_SIZE); descriptorSetBuilder.end(true, SAMPLER_DESCRIPTOR_SIZE); g_samplerDescriptorSet = descriptorSetBuilder.create(g_device.get()); auto& [descriptorIndex, sampler] = g_samplerStates[XXH3_64bits(&g_samplerDescs[0], sizeof(RenderSamplerDesc))]; descriptorIndex = 1; sampler = g_device->createSampler(g_samplerDescs[0]); g_samplerDescriptorSet->setSampler(0, sampler.get()); pipelineLayoutBuilder.addDescriptorSet(descriptorSetBuilder); if (g_vulkan) { pipelineLayoutBuilder.addPushConstant(0, 4, 24, RenderShaderStageFlag::VERTEX | RenderShaderStageFlag::PIXEL); } else { pipelineLayoutBuilder.addRootDescriptor(0, 4, RenderRootDescriptorType::CONSTANT_BUFFER); pipelineLayoutBuilder.addRootDescriptor(1, 4, RenderRootDescriptorType::CONSTANT_BUFFER); pipelineLayoutBuilder.addRootDescriptor(2, 4, RenderRootDescriptorType::CONSTANT_BUFFER); pipelineLayoutBuilder.addPushConstant(3, 4, 4, RenderShaderStageFlag::PIXEL); // For copy/resolve shaders. } pipelineLayoutBuilder.end(); g_pipelineLayout = pipelineLayoutBuilder.create(g_device.get()); auto copyShader = CREATE_SHADER(copy_vs); for (size_t i = 0; i < std::size(g_resolveMsaaDepthPipelines); i++) { std::unique_ptr pixelShader; switch (i) { case 0: pixelShader = CREATE_SHADER(resolve_msaa_depth_2x); break; case 1: pixelShader = CREATE_SHADER(resolve_msaa_depth_4x); break; case 2: pixelShader = CREATE_SHADER(resolve_msaa_depth_8x); break; } RenderGraphicsPipelineDesc desc; desc.pipelineLayout = g_pipelineLayout.get(); desc.vertexShader = copyShader.get(); desc.pixelShader = pixelShader.get(); desc.depthFunction = RenderComparisonFunction::ALWAYS; desc.depthEnabled = true; desc.depthWriteEnabled = true; desc.depthTargetFormat = RenderFormat::D32_FLOAT; g_resolveMsaaDepthPipelines[i] = g_device->createGraphicsPipeline(desc); } CreateImGuiBackend(); } static void WaitForGPU() { if (g_vulkan) { g_device->waitIdle(); } else { for (size_t i = 0; i < NUM_FRAMES; i++) { if (g_commandListStates[i]) { g_queue->waitForCommandFence(g_commandFences[i].get()); g_commandListStates[i] = false; } } g_queue->executeCommandLists(nullptr, g_commandFences[0].get()); g_queue->waitForCommandFence(g_commandFences[0].get()); } } static bool g_pendingRenderThread; static void WaitForRenderThread() { while (g_pendingRenderThread) Sleep(0); } static void BeginCommandList() { g_renderTarget = g_backBuffer; g_depthStencil = nullptr; g_framebuffer = nullptr; g_pipelineState.renderTargetFormat = g_backBuffer->format; g_pipelineState.depthStencilFormat = RenderFormat::UNKNOWN; g_swapChainValid &= !g_swapChain->needsResize(); if (!g_swapChainValid) { WaitForGPU(); g_backBuffer->framebuffers.clear(); g_swapChainValid = g_swapChain->resize(); g_needsResize = g_swapChainValid; } if (g_swapChainValid) g_swapChainValid = g_swapChain->acquireTexture(g_acquireSemaphores[g_frame].get(), &g_backBufferIndex); if (g_swapChainValid) g_backBuffer->texture = g_swapChain->getTexture(g_backBufferIndex); else g_backBuffer->texture = g_backBuffer->textureHolder.get(); g_backBuffer->layout = RenderTextureLayout::UNKNOWN; for (size_t i = 0; i < 16; i++) { g_sharedConstants.texture2DIndices[i] = TEXTURE_DESCRIPTOR_NULL_TEXTURE_2D; g_sharedConstants.texture3DIndices[i] = TEXTURE_DESCRIPTOR_NULL_TEXTURE_3D; g_sharedConstants.textureCubeIndices[i] = TEXTURE_DESCRIPTOR_NULL_TEXTURE_CUBE; } g_sharedConstants.enableGIBicubicFiltering = (Config::GITextureFiltering == EGITextureFiltering::Bicubic); auto& commandList = g_commandLists[g_frame]; commandList->begin(); commandList->setGraphicsPipelineLayout(g_pipelineLayout.get()); commandList->setGraphicsDescriptorSet(g_textureDescriptorSet.get(), 0); commandList->setGraphicsDescriptorSet(g_textureDescriptorSet.get(), 1); commandList->setGraphicsDescriptorSet(g_textureDescriptorSet.get(), 2); commandList->setGraphicsDescriptorSet(g_samplerDescriptorSet.get(), 3); } static uint32_t CreateDevice(uint32_t a1, uint32_t a2, uint32_t a3, uint32_t a4, uint32_t a5, be* a6) { CreateHostDevice(); g_backBuffer = g_userHeap.AllocPhysical(ResourceType::RenderTarget); g_backBuffer->width = 1280; g_backBuffer->height = 720; g_backBuffer->format = RenderFormat::B8G8R8A8_UNORM; g_backBuffer->textureHolder = g_device->createTexture(RenderTextureDesc::Texture2D(16, 16, 1, g_backBuffer->format, RenderTextureFlag::RENDER_TARGET)); BeginCommandList(); RenderTextureBarrier blankTextureBarriers[TEXTURE_DESCRIPTOR_NULL_COUNT]; for (size_t i = 0; i < TEXTURE_DESCRIPTOR_NULL_COUNT; i++) blankTextureBarriers[i] = RenderTextureBarrier(g_blankTextures[i].get(), RenderTextureLayout::SHADER_READ); g_commandLists[g_frame]->barriers(RenderBarrierStage::NONE, blankTextureBarriers, std::size(blankTextureBarriers)); auto device = g_userHeap.AllocPhysical(); memset(device, 0, sizeof(*device)); uint32_t functionOffset = 0x443344; // D3D g_codeCache.Insert(functionOffset, reinterpret_cast(GuestFunction)); for (size_t i = 0; i < _countof(device->setRenderStateFunctions); i++) device->setRenderStateFunctions[i] = functionOffset; for (auto& [state, function] : g_setRenderStateFunctions) { functionOffset += 4; g_codeCache.Insert(functionOffset, function); device->setRenderStateFunctions[state / 4] = functionOffset; } for (size_t i = 0; i < _countof(device->setSamplerStateFunctions); i++) device->setSamplerStateFunctions[i] = *reinterpret_cast(g_memory.Translate(0x8330F3DC + i * 0xC)); device->viewport.width = 1280.0f; device->viewport.height = 720.0f; device->viewport.maxZ = 1.0f; *a6 = g_memory.MapVirtual(device); return 0; } static void DestructResource(GuestResource* resource) { RenderCommand cmd; cmd.type = RenderCommandType::DestructResource; cmd.destructResource.resource = resource; g_renderQueue.enqueue(cmd); } static void ProcDestructResource(const RenderCommand& cmd) { const auto& args = cmd.destructResource; g_tempResources[g_frame].push_back(args.resource); } static uint32_t ComputeTexturePitch(GuestTexture* texture) { return (texture->width * RenderFormatSize(texture->format) + PITCH_ALIGNMENT - 1) & ~(PITCH_ALIGNMENT - 1); } static void LockTextureRect(GuestTexture* texture, uint32_t, GuestLockedRect* lockedRect) { uint32_t pitch = ComputeTexturePitch(texture); uint32_t slicePitch = pitch * texture->height; if (texture->mappedMemory == nullptr) texture->mappedMemory = g_userHeap.AllocPhysical(slicePitch, 0x10); lockedRect->pitch = pitch; lockedRect->bits = g_memory.MapVirtual(texture->mappedMemory); } static void UnlockTextureRect(GuestTexture* texture) { RenderCommand cmd; cmd.type = RenderCommandType::UnlockTextureRect; cmd.unlockTextureRect.texture = texture; g_renderQueue.enqueue(cmd); } static void ProcUnlockTextureRect(const RenderCommand& cmd) { const auto& args = cmd.unlockTextureRect; AddBarrier(args.texture, RenderTextureLayout::COPY_DEST); FlushBarriers(); uint32_t pitch = ComputeTexturePitch(args.texture); uint32_t slicePitch = pitch * args.texture->height; auto allocation = g_uploadAllocators[g_frame].allocate(slicePitch, PLACEMENT_ALIGNMENT); memcpy(allocation.memory, args.texture->mappedMemory, slicePitch); g_commandLists[g_frame]->copyTextureRegion( RenderTextureCopyLocation::Subresource(args.texture->texture, 0), RenderTextureCopyLocation::PlacedFootprint(allocation.buffer, args.texture->format, args.texture->width, args.texture->height, 1, pitch / RenderFormatSize(args.texture->format), allocation.offset)); } static void* LockBuffer(GuestBuffer* buffer, uint32_t flags) { buffer->lockedReadOnly = (flags & 0x10) != 0; if (buffer->mappedMemory == nullptr) buffer->mappedMemory = g_userHeap.AllocPhysical(buffer->dataSize, 0x10); return buffer->mappedMemory; } static void* LockVertexBuffer(GuestBuffer* buffer, uint32_t, uint32_t, uint32_t flags) { return LockBuffer(buffer, flags); } template static void UnlockBuffer(GuestBuffer* buffer, bool useCopyQueue) { auto uploadBuffer = g_device->createBuffer(RenderBufferDesc::UploadBuffer(buffer->dataSize)); auto dest = reinterpret_cast(uploadBuffer->map()); auto src = reinterpret_cast(buffer->mappedMemory); for (size_t i = 0; i < buffer->dataSize; i += sizeof(T)) { *dest = std::byteswap(*src); ++dest; ++src; } uploadBuffer->unmap(); if (useCopyQueue) { ExecuteCopyCommandList([&] { g_copyCommandList->copyBufferRegion(buffer->buffer->at(0), uploadBuffer->at(0), buffer->dataSize); }); } else { auto& commandList = g_commandLists[g_frame]; commandList->barriers(RenderBarrierStage::COPY, RenderBufferBarrier(buffer->buffer.get(), RenderBufferAccess::WRITE)); commandList->copyBufferRegion(buffer->buffer->at(0), uploadBuffer->at(0), buffer->dataSize); commandList->barriers(RenderBarrierStage::GRAPHICS, RenderBufferBarrier(buffer->buffer.get(), RenderBufferAccess::READ)); g_tempBuffers[g_frame].emplace_back(std::move(uploadBuffer)); } } template static void UnlockBuffer(GuestBuffer* buffer) { if (!buffer->lockedReadOnly) { if (GetCurrentThreadId() == g_mainThreadId) { RenderCommand cmd; cmd.type = (sizeof(T) == 2) ? RenderCommandType::UnlockBuffer16 : RenderCommandType::UnlockBuffer32; cmd.unlockBuffer.buffer = buffer; g_renderQueue.enqueue(cmd); } else { UnlockBuffer(buffer, true); } } } static void ProcUnlockBuffer16(const RenderCommand& cmd) { UnlockBuffer(cmd.unlockBuffer.buffer, false); } static void ProcUnlockBuffer32(const RenderCommand& cmd) { UnlockBuffer(cmd.unlockBuffer.buffer, false); } static void UnlockVertexBuffer(GuestBuffer* buffer) { UnlockBuffer(buffer); } static void GetVertexBufferDesc(GuestBuffer* buffer, GuestBufferDesc* desc) { desc->size = buffer->dataSize; } static void* LockIndexBuffer(GuestBuffer* buffer, uint32_t, uint32_t, uint32_t flags) { return LockBuffer(buffer, flags); } static void UnlockIndexBuffer(GuestBuffer* buffer) { UnlockBuffer(buffer); } static void GetIndexBufferDesc(GuestBuffer* buffer, GuestBufferDesc* desc) { desc->format = buffer->guestFormat; desc->size = buffer->dataSize; } static void GetSurfaceDesc(GuestSurface* surface, GuestSurfaceDesc* desc) { desc->width = surface->width; desc->height = surface->height; } static void GetVertexDeclaration(GuestVertexDeclaration* vertexDeclaration, GuestVertexElement* vertexElements, be* count) { memcpy(vertexElements, vertexDeclaration->vertexElements.get(), vertexDeclaration->vertexElementCount * sizeof(GuestVertexElement)); *count = vertexDeclaration->vertexElementCount; } static uint32_t HashVertexDeclaration(uint32_t vertexDeclaration) { // Vertex declarations are cached on host side, so the pointer itself can be used. return vertexDeclaration; } static void DrawImGui() { ImGui_ImplSDL2_NewFrame(); ImGui::NewFrame(); // ImGui logic here ImGui::Render(); auto drawData = ImGui::GetDrawData(); if (drawData->CmdListsCount != 0) { g_imSnapshot.SnapUsingSwap(drawData, ImGui::GetTime()); RenderCommand cmd; cmd.type = RenderCommandType::DrawImGui; g_renderQueue.enqueue(cmd); } } static void ProcDrawImGui(const RenderCommand& cmd) { auto& commandList = g_commandLists[g_frame]; if (g_imPendingBarrier) { commandList->barriers(RenderBarrierStage::GRAPHICS, RenderTextureBarrier(g_imFontTexture.get(), RenderTextureLayout::SHADER_READ)); g_imPendingBarrier = false; } commandList->setGraphicsPipelineLayout(g_imPipelineLayout.get()); commandList->setPipeline(g_imPipeline.get()); commandList->setGraphicsDescriptorSet(g_textureDescriptorSet.get(), 0); commandList->setGraphicsDescriptorSet(g_samplerDescriptorSet.get(), 1); auto& drawData = g_imSnapshot.DrawData; commandList->setViewports(RenderViewport(drawData.DisplayPos.x, drawData.DisplayPos.y, drawData.DisplaySize.x, drawData.DisplaySize.y)); float inverseDisplaySize[] = { 1.0f / drawData.DisplaySize.x, 1.0f / drawData.DisplaySize.y }; commandList->setGraphicsPushConstants(0, inverseDisplaySize, 4, 8); for (int i = 0; i < drawData.CmdListsCount; i++) { auto& drawList = drawData.CmdLists[i]; auto vertexBufferAllocation = g_uploadAllocators[g_frame].allocate(drawList->VtxBuffer.Data, drawList->VtxBuffer.Size * sizeof(ImDrawVert), alignof(ImDrawVert)); auto indexBufferAllocation = g_uploadAllocators[g_frame].allocate(drawList->IdxBuffer.Data, drawList->IdxBuffer.Size * sizeof(uint16_t), alignof(uint16_t)); const RenderVertexBufferView vertexBufferView(vertexBufferAllocation.buffer->at(vertexBufferAllocation.offset), drawList->VtxBuffer.Size * sizeof(ImDrawVert)); const RenderInputSlot inputSlot(0, sizeof(ImDrawVert)); commandList->setVertexBuffers(0, &vertexBufferView, 1, &inputSlot); const RenderIndexBufferView indexBufferView(indexBufferAllocation.buffer->at(indexBufferAllocation.offset), drawList->IdxBuffer.Size * sizeof(uint16_t), RenderFormat::R16_UINT); commandList->setIndexBuffer(&indexBufferView); for (int j = 0; j < drawList->CmdBuffer.Size; j++) { auto& drawCmd = drawList->CmdBuffer[j]; if (drawCmd.ClipRect.z <= drawCmd.ClipRect.x || drawCmd.ClipRect.w <= drawCmd.ClipRect.y) continue; uint32_t descriptorIndex = uint32_t(drawCmd.GetTexID()); commandList->setGraphicsPushConstants(0, &descriptorIndex, 0, 4); commandList->setScissors(RenderRect(int32_t(drawCmd.ClipRect.x), int32_t(drawCmd.ClipRect.y), int32_t(drawCmd.ClipRect.z), int32_t(drawCmd.ClipRect.w))); commandList->drawIndexedInstanced(drawCmd.ElemCount, 1, drawCmd.IdxOffset, drawCmd.VtxOffset, 0); } } } static void Present() { DrawImGui(); WaitForRenderThread(); g_pendingRenderThread = true; RenderCommand cmd; cmd.type = RenderCommandType::Present; g_renderQueue.enqueue(cmd); } static void ProcPresent(const RenderCommand& cmd) { if (g_swapChainValid) AddBarrier(g_backBuffer, RenderTextureLayout::PRESENT); FlushBarriers(); auto& commandList = g_commandLists[g_frame]; commandList->end(); if (g_swapChainValid) { const RenderCommandList* commandLists[] = { commandList.get() }; RenderCommandSemaphore* waitSemaphores[] = { g_acquireSemaphores[g_frame].get() }; RenderCommandSemaphore* signalSemaphores[] = { g_renderSemaphores[g_frame].get() }; g_queue->executeCommandLists( commandLists, std::size(commandLists), waitSemaphores, std::size(waitSemaphores), signalSemaphores, std::size(signalSemaphores), g_commandFences[g_frame].get()); g_swapChainValid = g_swapChain->present(g_backBufferIndex, signalSemaphores, std::size(signalSemaphores)); } else { g_queue->executeCommandLists(commandList.get(), g_commandFences[g_frame].get()); } g_commandListStates[g_frame] = true; g_frame = g_nextFrame; g_nextFrame = (g_frame + 1) % NUM_FRAMES; if (g_commandListStates[g_frame]) { g_queue->waitForCommandFence(g_commandFences[g_frame].get()); g_commandListStates[g_frame] = false; } g_dirtyStates = DirtyStates(true); g_uploadAllocators[g_frame].reset(); DestructTempResources(); g_triangleFanIndexData.reset(); g_quadIndexData.reset(); BeginCommandList(); g_pendingRenderThread = false; } static GuestSurface* GetBackBuffer() { g_backBuffer->AddRef(); return g_backBuffer; } static RenderFormat ConvertFormat(uint32_t format) { switch (format) { case D3DFMT_A16B16G16R16F: case D3DFMT_A16B16G16R16F_2: return RenderFormat::R16G16B16A16_FLOAT; case D3DFMT_A8B8G8R8: case D3DFMT_A8R8G8B8: case D3DFMT_X8R8G8B8: return RenderFormat::R8G8B8A8_UNORM; case D3DFMT_D24FS8: case D3DFMT_D24S8: return RenderFormat::D32_FLOAT; case D3DFMT_G16R16F: case D3DFMT_G16R16F_2: return RenderFormat::R16G16_FLOAT; case D3DFMT_INDEX16: return RenderFormat::R16_UINT; case D3DFMT_INDEX32: return RenderFormat::R32_UINT; case D3DFMT_L8: case D3DFMT_L8_2: return RenderFormat::R8_UNORM; default: assert(false && "Unknown format"); return RenderFormat::R16G16B16A16_FLOAT; } } static GuestTexture* CreateTexture(uint32_t width, uint32_t height, uint32_t depth, uint32_t levels, uint32_t usage, uint32_t format, uint32_t pool, uint32_t type) { const auto texture = g_userHeap.AllocPhysical(type == 17 ? ResourceType::VolumeTexture : ResourceType::Texture); RenderTextureDesc desc; desc.dimension = texture->type == ResourceType::VolumeTexture ? RenderTextureDimension::TEXTURE_3D : RenderTextureDimension::TEXTURE_2D; desc.width = width; desc.height = height; desc.depth = depth; desc.mipLevels = levels; desc.arraySize = 1; desc.format = ConvertFormat(format); desc.flags = (desc.format == RenderFormat::D32_FLOAT) ? RenderTextureFlag::DEPTH_TARGET : RenderTextureFlag::NONE; texture->textureHolder = g_device->createTexture(desc); texture->texture = texture->textureHolder.get(); RenderTextureViewDesc viewDesc; viewDesc.format = desc.format; viewDesc.dimension = texture->type == ResourceType::VolumeTexture ? RenderTextureViewDimension::TEXTURE_3D : RenderTextureViewDimension::TEXTURE_2D; viewDesc.mipLevels = levels; switch (format) { case D3DFMT_D24FS8: case D3DFMT_D24S8: case D3DFMT_L8: case D3DFMT_L8_2: viewDesc.componentMapping = RenderComponentMapping(RenderSwizzle::R, RenderSwizzle::R, RenderSwizzle::R, RenderSwizzle::ONE); break; case D3DFMT_X8R8G8B8: viewDesc.componentMapping = RenderComponentMapping(RenderSwizzle::G, RenderSwizzle::B, RenderSwizzle::A, RenderSwizzle::ONE); break; } texture->textureView = texture->texture->createTextureView(viewDesc); texture->width = width; texture->height = height; texture->depth = depth; texture->format = desc.format; texture->viewDimension = viewDesc.dimension; texture->descriptorIndex = g_textureDescriptorAllocator.allocate(); g_textureDescriptorSet->setTexture(texture->descriptorIndex, texture->texture, RenderTextureLayout::SHADER_READ, texture->textureView.get()); #ifdef _DEBUG texture->texture->setName(std::format("Texture {:X}", g_memory.MapVirtual(texture))); #endif return texture; } static GuestBuffer* CreateVertexBuffer(uint32_t length) { auto buffer = g_userHeap.AllocPhysical(ResourceType::VertexBuffer); buffer->buffer = g_device->createBuffer(RenderBufferDesc::VertexBuffer(length, RenderHeapType::DEFAULT, RenderBufferFlag::INDEX)); buffer->dataSize = length; #ifdef _DEBUG buffer->buffer->setName(std::format("Vertex Buffer {:X}", g_memory.MapVirtual(buffer))); #endif return buffer; } static GuestBuffer* CreateIndexBuffer(uint32_t length, uint32_t, uint32_t format) { auto buffer = g_userHeap.AllocPhysical(ResourceType::IndexBuffer); buffer->buffer = g_device->createBuffer(RenderBufferDesc::IndexBuffer(length, RenderHeapType::DEFAULT)); buffer->dataSize = length; buffer->format = ConvertFormat(format); buffer->guestFormat = format; #ifdef _DEBUG buffer->buffer->setName(std::format("Index Buffer {:X}", g_memory.MapVirtual(buffer))); #endif return buffer; } static GuestSurface* CreateSurface(uint32_t width, uint32_t height, uint32_t format, uint32_t multiSample) { RenderTextureDesc desc; desc.dimension = RenderTextureDimension::TEXTURE_2D; desc.width = width; desc.height = height; desc.depth = 1; desc.mipLevels = 1; desc.arraySize = 1; desc.multisampling.sampleCount = multiSample != 0 && Config::MSAA > 1 ? Config::MSAA : RenderSampleCount::COUNT_1; desc.format = ConvertFormat(format); desc.flags = desc.format == RenderFormat::D32_FLOAT ? RenderTextureFlag::DEPTH_TARGET : RenderTextureFlag::RENDER_TARGET; auto surface = g_userHeap.AllocPhysical(desc.format == RenderFormat::D32_FLOAT ? ResourceType::DepthStencil : ResourceType::RenderTarget); surface->textureHolder = g_device->createTexture(desc); surface->texture = surface->textureHolder.get(); surface->width = width; surface->height = height; surface->format = desc.format; surface->guestFormat = format; surface->sampleCount = desc.multisampling.sampleCount; if (desc.multisampling.sampleCount != RenderSampleCount::COUNT_1 && desc.format == RenderFormat::D32_FLOAT) { RenderTextureViewDesc viewDesc; viewDesc.dimension = RenderTextureViewDimension::TEXTURE_2D; viewDesc.format = RenderFormat::D32_FLOAT; viewDesc.mipLevels = 1; surface->textureView = surface->textureHolder->createTextureView(viewDesc); surface->descriptorIndex = g_textureDescriptorAllocator.allocate(); g_textureDescriptorSet->setTexture(surface->descriptorIndex, surface->textureHolder.get(), RenderTextureLayout::SHADER_READ, surface->textureView.get()); } #ifdef _DEBUG surface->texture->setName(std::format("{} {:X}", desc.flags & RenderTextureFlag::RENDER_TARGET ? "Render Target" : "Depth Stencil", g_memory.MapVirtual(surface))); #endif return surface; } static void FlushViewport() { bool renderingToBackBuffer = g_renderTarget == g_backBuffer && g_backBuffer->texture != g_backBuffer->textureHolder.get(); auto& commandList = g_commandLists[g_frame]; if (g_dirtyStates.viewport) { auto viewport = g_viewport; if (g_halfPixel) { viewport.x += 0.5f; viewport.y += 0.5f; } if (renderingToBackBuffer) { uint32_t width = g_swapChain->getWidth(); uint32_t height = g_swapChain->getHeight(); viewport.x *= width / 1280.0f; viewport.y *= height / 720.0f; viewport.width *= width / 1280.0f; viewport.height *= height / 720.0f; } if (viewport.minDepth >= viewport.maxDepth) { viewport.minDepth = 1.0f - viewport.minDepth; viewport.maxDepth = 1.0f - viewport.maxDepth; } commandList->setViewports(viewport); g_dirtyStates.viewport = false; } if (g_dirtyStates.scissorRect) { auto scissorRect = g_scissorTestEnable ? g_scissorRect : RenderRect( g_viewport.x, g_viewport.y, g_viewport.x + g_viewport.width, g_viewport.y + g_viewport.height); if (renderingToBackBuffer) { uint32_t width = g_swapChain->getWidth(); uint32_t height = g_swapChain->getHeight(); scissorRect.left = scissorRect.left * width / 1280; scissorRect.top = scissorRect.top * height / 720; scissorRect.right = scissorRect.right * width / 1280; scissorRect.bottom = scissorRect.bottom * height / 720; } commandList->setScissors(scissorRect); g_dirtyStates.scissorRect = false; } } static bool SetHalfPixel(bool enable) { bool oldValue = g_halfPixel; SetDirtyValue(g_dirtyStates.viewport, g_halfPixel, enable); return oldValue; } static void StretchRect(GuestDevice* device, uint32_t flags, uint32_t, GuestTexture* texture) { RenderCommand cmd; cmd.type = RenderCommandType::StretchRect; cmd.stretchRect.flags = flags; cmd.stretchRect.texture = texture; g_renderQueue.enqueue(cmd); } static void ProcStretchRect(const RenderCommand& cmd) { const auto& args = cmd.stretchRect; const bool isDepthStencil = (args.flags & 0x4) != 0; const auto surface = isDepthStencil ? g_depthStencil : g_renderTarget; const bool multiSampling = surface->sampleCount != RenderSampleCount::COUNT_1; RenderTextureLayout srcLayout; RenderTextureLayout dstLayout; if (multiSampling) { if (isDepthStencil) { srcLayout = RenderTextureLayout::SHADER_READ; dstLayout = RenderTextureLayout::DEPTH_WRITE; } else { srcLayout = RenderTextureLayout::RESOLVE_SOURCE; dstLayout = RenderTextureLayout::RESOLVE_DEST; } } else { srcLayout = RenderTextureLayout::COPY_SOURCE; dstLayout = RenderTextureLayout::COPY_DEST; } AddBarrier(surface, srcLayout); AddBarrier(args.texture, dstLayout); FlushBarriers(); auto& commandList = g_commandLists[g_frame]; if (multiSampling) { if (isDepthStencil) { uint32_t pipelineIndex = 0; switch (g_depthStencil->sampleCount) { case RenderSampleCount::COUNT_2: pipelineIndex = 0; break; case RenderSampleCount::COUNT_4: pipelineIndex = 1; break; case RenderSampleCount::COUNT_8: pipelineIndex = 2; break; default: assert(false && "Unsupported MSAA sample count"); break; } if (args.texture->framebuffer == nullptr) { RenderFramebufferDesc desc; desc.depthAttachment = args.texture->texture; args.texture->framebuffer = g_device->createFramebuffer(desc); } if (g_framebuffer != args.texture->framebuffer.get()) { commandList->setFramebuffer(args.texture->framebuffer.get()); g_framebuffer = args.texture->framebuffer.get(); } bool oldHalfPixel = SetHalfPixel(false); FlushViewport(); commandList->setPipeline(g_resolveMsaaDepthPipelines[pipelineIndex].get()); commandList->setGraphicsPushConstants(0, &g_depthStencil->descriptorIndex, 0, sizeof(uint32_t)); commandList->drawInstanced(6, 1, 0, 0); g_dirtyStates.renderTargetAndDepthStencil = true; g_dirtyStates.pipelineState = true; if (g_vulkan) g_dirtyStates.vertexShaderConstants = true; SetHalfPixel(oldHalfPixel); } else { commandList->resolveTexture(args.texture->texture, surface->texture); } } else { commandList->copyTexture(args.texture->texture, surface->texture); } AddBarrier(args.texture, RenderTextureLayout::SHADER_READ); } static void SetRenderTarget(GuestDevice* device, uint32_t index, GuestSurface* renderTarget) { RenderCommand cmd; cmd.type = RenderCommandType::SetRenderTarget; cmd.setRenderTarget.renderTarget = renderTarget; g_renderQueue.enqueue(cmd); } static void ProcSetRenderTarget(const RenderCommand& cmd) { const auto& args = cmd.setRenderTarget; SetDirtyValue(g_dirtyStates.renderTargetAndDepthStencil, g_renderTarget, args.renderTarget); SetDirtyValue(g_dirtyStates.pipelineState, g_pipelineState.renderTargetFormat, args.renderTarget != nullptr ? args.renderTarget->format : RenderFormat::UNKNOWN); SetDirtyValue(g_dirtyStates.pipelineState, g_pipelineState.sampleCount, args.renderTarget != nullptr ? args.renderTarget->sampleCount : RenderSampleCount::COUNT_1); // When alpha to coverage is enabled, update the alpha test mode as it's dependent on sample count. SetAlphaTestMode(g_sharedConstants.alphaTestMode != AlphaTestMode::Disabled); } static void SetDepthStencilSurface(GuestDevice* device, GuestSurface* depthStencil) { RenderCommand cmd; cmd.type = RenderCommandType::SetDepthStencilSurface; cmd.setDepthStencilSurface.depthStencil = depthStencil; g_renderQueue.enqueue(cmd); } static void ProcSetDepthStencilSurface(const RenderCommand& cmd) { const auto& args = cmd.setDepthStencilSurface; SetDirtyValue(g_dirtyStates.renderTargetAndDepthStencil, g_depthStencil, args.depthStencil); SetDirtyValue(g_dirtyStates.pipelineState, g_pipelineState.depthStencilFormat, args.depthStencil != nullptr ? args.depthStencil->format : RenderFormat::UNKNOWN); } static void SetFramebuffer(GuestSurface* renderTarget, GuestSurface* depthStencil, bool settingForClear) { if (settingForClear || g_dirtyStates.renderTargetAndDepthStencil) { GuestSurface* framebufferContainer = nullptr; RenderTexture* framebufferKey = nullptr; if (renderTarget != nullptr && depthStencil != nullptr) { framebufferContainer = depthStencil; // Backbuffer texture changes per frame so we can't use the depth stencil as the key. framebufferKey = renderTarget->texture; } else if (renderTarget != nullptr && depthStencil == nullptr) { framebufferContainer = renderTarget; framebufferKey = renderTarget->texture; // Backbuffer texture changes per frame so we can't assume nullptr for it. } else if (renderTarget == nullptr && depthStencil != nullptr) { framebufferContainer = depthStencil; framebufferKey = nullptr; } auto& commandList = g_commandLists[g_frame]; if (framebufferContainer != nullptr) { auto& framebuffer = framebufferContainer->framebuffers[framebufferKey]; if (framebuffer == nullptr) { RenderFramebufferDesc desc; if (renderTarget != nullptr) { desc.colorAttachments = const_cast(&renderTarget->texture); desc.colorAttachmentsCount = 1; } if (depthStencil != nullptr) desc.depthAttachment = depthStencil->texture; framebuffer = g_device->createFramebuffer(desc); } if (g_framebuffer != framebuffer.get()) { commandList->setFramebuffer(framebuffer.get()); g_framebuffer = framebuffer.get(); } } else if (g_framebuffer != nullptr) { commandList->setFramebuffer(nullptr); g_framebuffer = nullptr; } g_dirtyStates.renderTargetAndDepthStencil = settingForClear; } } static void Clear(GuestDevice* device, uint32_t flags, uint32_t, be* color, double z) { RenderCommand cmd; cmd.type = RenderCommandType::Clear; cmd.clear.flags = flags; cmd.clear.color[0] = color[0]; cmd.clear.color[1] = color[1]; cmd.clear.color[2] = color[2]; cmd.clear.color[3] = color[3]; cmd.clear.z = float(z); g_renderQueue.enqueue(cmd); } static void ProcClear(const RenderCommand& cmd) { const auto& args = cmd.clear; AddBarrier(g_renderTarget, RenderTextureLayout::COLOR_WRITE); AddBarrier(g_depthStencil, RenderTextureLayout::DEPTH_WRITE); FlushBarriers(); bool canClearInOnePass = (g_renderTarget == nullptr) || (g_depthStencil == nullptr) || (g_renderTarget->width == g_depthStencil->width && g_renderTarget->height == g_depthStencil->height); if (canClearInOnePass) SetFramebuffer(g_renderTarget, g_depthStencil, true); auto& commandList = g_commandLists[g_frame]; if (g_renderTarget != nullptr && (args.flags & D3DCLEAR_TARGET) != 0) { if (!canClearInOnePass) SetFramebuffer(g_renderTarget, nullptr, true); commandList->clearColor(0, RenderColor(args.color[0], args.color[1], args.color[2], args.color[3])); } if (g_depthStencil != nullptr && (args.flags & D3DCLEAR_ZBUFFER) != 0) { if (!canClearInOnePass) SetFramebuffer(nullptr, g_depthStencil, true); // The condition here is done by the game to determine reverse Z. commandList->clearDepth(true, g_depthStencil->guestFormat == D3DFMT_D24FS8 ? (1.0f - args.z) : args.z); } } static void SetViewport(GuestDevice* device, GuestViewport* viewport) { RenderCommand cmd; cmd.type = RenderCommandType::SetViewport; cmd.setViewport.x = viewport->x; cmd.setViewport.y = viewport->y; cmd.setViewport.width = viewport->width; cmd.setViewport.height = viewport->height; cmd.setViewport.minDepth = viewport->minZ; cmd.setViewport.maxDepth = viewport->maxZ; g_renderQueue.enqueue(cmd); device->viewport.x = float(viewport->x); device->viewport.y = float(viewport->y); device->viewport.width = float(viewport->width); device->viewport.height = float(viewport->height); device->viewport.minZ = viewport->minZ; device->viewport.maxZ = viewport->maxZ; } static void ProcSetViewport(const RenderCommand& cmd) { const auto& args = cmd.setViewport; SetDirtyValue(g_dirtyStates.viewport, g_viewport.x, args.x); SetDirtyValue(g_dirtyStates.viewport, g_viewport.y, args.y); SetDirtyValue(g_dirtyStates.viewport, g_viewport.width, args.width); SetDirtyValue(g_dirtyStates.viewport, g_viewport.height, args.height); SetDirtyValue(g_dirtyStates.viewport, g_viewport.minDepth, args.minDepth); SetDirtyValue(g_dirtyStates.viewport, g_viewport.maxDepth, args.maxDepth); g_dirtyStates.scissorRect |= g_dirtyStates.viewport; // Update Z function as it's dependent on reverse Z. SetDirtyValue(g_dirtyStates.pipelineState, g_pipelineState.zFunc, ConvertComparisonFunc(g_zFunc, args.minDepth >= args.maxDepth)); } static void SetTexture(GuestDevice* device, uint32_t index, GuestTexture* texture) { RenderCommand cmd; cmd.type = RenderCommandType::SetTexture; cmd.setTexture.index = index; cmd.setTexture.texture = texture; g_renderQueue.enqueue(cmd); } static void ProcSetTexture(const RenderCommand& cmd) { const auto& args = cmd.setTexture; AddBarrier(args.texture, RenderTextureLayout::SHADER_READ); auto viewDimension = args.texture != nullptr ? args.texture->viewDimension : RenderTextureViewDimension::UNKNOWN; SetDirtyValue(g_dirtyStates.sharedConstants, g_sharedConstants.texture2DIndices[args.index], viewDimension == RenderTextureViewDimension::TEXTURE_2D ? args.texture->descriptorIndex : TEXTURE_DESCRIPTOR_NULL_TEXTURE_2D); SetDirtyValue(g_dirtyStates.sharedConstants, g_sharedConstants.texture3DIndices[args.index], args.texture != nullptr && viewDimension == RenderTextureViewDimension::TEXTURE_3D ? args.texture->descriptorIndex : TEXTURE_DESCRIPTOR_NULL_TEXTURE_3D); SetDirtyValue(g_dirtyStates.sharedConstants, g_sharedConstants.textureCubeIndices[args.index], args.texture != nullptr && viewDimension == RenderTextureViewDimension::TEXTURE_CUBE ? args.texture->descriptorIndex : TEXTURE_DESCRIPTOR_NULL_TEXTURE_CUBE); } static void SetScissorRect(GuestDevice* device, GuestRect* rect) { RenderCommand cmd; cmd.type = RenderCommandType::SetScissorRect; cmd.setScissorRect.top = rect->top; cmd.setScissorRect.left = rect->left; cmd.setScissorRect.bottom = rect->bottom; cmd.setScissorRect.right = rect->right; g_renderQueue.enqueue(cmd); } static void ProcSetScissorRect(const RenderCommand& cmd) { const auto& args = cmd.setScissorRect; SetDirtyValue(g_dirtyStates.scissorRect, g_scissorRect.top, args.top); SetDirtyValue(g_dirtyStates.scissorRect, g_scissorRect.left, args.left); SetDirtyValue(g_dirtyStates.scissorRect, g_scissorRect.bottom, args.bottom); SetDirtyValue(g_dirtyStates.scissorRect, g_scissorRect.right, args.right); } static RenderPipeline* CreateGraphicsPipeline(PipelineState pipelineState) { // Sanitize to prevent state leaking. if (!pipelineState.zEnable) { pipelineState.zWriteEnable = false; pipelineState.zFunc = RenderComparisonFunction::LESS; pipelineState.slopeScaledDepthBias = 0.0f; pipelineState.depthBias = 0; pipelineState.depthStencilFormat = RenderFormat::UNKNOWN; } if (!pipelineState.colorWriteEnable) { pipelineState.alphaBlendEnable = false; pipelineState.renderTargetFormat = RenderFormat::UNKNOWN; } if (!pipelineState.alphaBlendEnable) { pipelineState.srcBlend = RenderBlend::ONE; pipelineState.destBlend = RenderBlend::ZERO; pipelineState.blendOp = RenderBlendOperation::ADD; pipelineState.srcBlendAlpha = RenderBlend::ONE; pipelineState.destBlendAlpha = RenderBlend::ZERO; pipelineState.blendOpAlpha = RenderBlendOperation::ADD; } auto& pipeline = g_pipelines[XXH3_64bits(&pipelineState, sizeof(PipelineState))]; if (pipeline == nullptr) { RenderGraphicsPipelineDesc desc; desc.pipelineLayout = g_pipelineLayout.get(); desc.vertexShader = pipelineState.vertexShader->shader.get(); desc.pixelShader = pipelineState.pixelShader != nullptr ? pipelineState.pixelShader->shader.get() : nullptr; desc.depthFunction = pipelineState.zFunc; desc.depthEnabled = pipelineState.zEnable; desc.depthWriteEnabled = pipelineState.zWriteEnable; desc.depthBias = pipelineState.depthBias; desc.slopeScaledDepthBias = pipelineState.slopeScaledDepthBias; desc.depthClipEnabled = true; desc.primitiveTopology = pipelineState.primitiveTopology; desc.cullMode = pipelineState.cullMode; desc.renderTargetFormat[0] = pipelineState.renderTargetFormat; desc.renderTargetBlend[0].blendEnabled = pipelineState.alphaBlendEnable; desc.renderTargetBlend[0].srcBlend = pipelineState.srcBlend; desc.renderTargetBlend[0].dstBlend = pipelineState.destBlend; desc.renderTargetBlend[0].blendOp = pipelineState.blendOp; desc.renderTargetBlend[0].srcBlendAlpha = pipelineState.srcBlendAlpha; desc.renderTargetBlend[0].dstBlendAlpha = pipelineState.destBlendAlpha; desc.renderTargetBlend[0].blendOpAlpha = pipelineState.blendOpAlpha; desc.renderTargetBlend[0].renderTargetWriteMask = pipelineState.colorWriteEnable; desc.renderTargetCount = pipelineState.renderTargetFormat != RenderFormat::UNKNOWN ? 1 : 0; desc.depthTargetFormat = pipelineState.depthStencilFormat; desc.multisampling.sampleCount = pipelineState.sampleCount; desc.alphaToCoverageEnabled = pipelineState.enableAlphaToCoverage; desc.inputElements = pipelineState.vertexDeclaration->inputElements.get(); desc.inputElementsCount = pipelineState.vertexDeclaration->inputElementCount; RenderInputSlot inputSlots[16]{}; uint32_t inputSlotIndices[16]{}; uint32_t inputSlotCount = 0; for (size_t i = 0; i < pipelineState.vertexDeclaration->inputElementCount; i++) { auto& inputElement = pipelineState.vertexDeclaration->inputElements[i]; auto& inputSlotIndex = inputSlotIndices[inputElement.slotIndex]; if (inputSlotIndex == NULL) inputSlotIndex = ++inputSlotCount; auto& inputSlot = inputSlots[inputSlotIndex - 1]; inputSlot.index = inputElement.slotIndex; inputSlot.stride = pipelineState.vertexStrides[inputElement.slotIndex]; if (pipelineState.instancing && inputElement.slotIndex != 0 && inputElement.slotIndex != 15) inputSlot.classification = RenderInputSlotClassification::PER_INSTANCE_DATA; else inputSlot.classification = RenderInputSlotClassification::PER_VERTEX_DATA; } desc.inputSlots = inputSlots; desc.inputSlotsCount = inputSlotCount; pipeline = g_device->createGraphicsPipeline(desc); } return pipeline.get(); } static RenderTextureAddressMode ConvertTextureAddressMode(size_t value) { switch (value) { case D3DTADDRESS_WRAP: return RenderTextureAddressMode::WRAP; case D3DTADDRESS_MIRROR: return RenderTextureAddressMode::MIRROR; case D3DTADDRESS_CLAMP: return RenderTextureAddressMode::CLAMP; case D3DTADDRESS_MIRRORONCE: return RenderTextureAddressMode::MIRROR_ONCE; case D3DTADDRESS_BORDER: return RenderTextureAddressMode::BORDER; default: assert(false && "Unknown texture address mode"); return RenderTextureAddressMode::UNKNOWN; } } static RenderFilter ConvertTextureFilter(uint32_t value) { switch (value) { case D3DTEXF_POINT: case D3DTEXF_NONE: return RenderFilter::NEAREST; case D3DTEXF_LINEAR: return RenderFilter::LINEAR; default: assert(false && "Unknown texture filter"); return RenderFilter::UNKNOWN; } } static RenderBorderColor ConvertBorderColor(uint32_t value) { switch (value) { case 0: return RenderBorderColor::TRANSPARENT_BLACK; case 1: return RenderBorderColor::OPAQUE_WHITE; default: assert(false && "Unknown border color"); return RenderBorderColor::UNKNOWN; } } struct LocalRenderCommandQueue { RenderCommand commands[20]; uint32_t count = 0; RenderCommand& enqueue() { assert(count < std::size(commands)); return commands[count++]; } void submit() { g_renderQueue.enqueue_bulk(commands, count); } }; static void FlushRenderStateForMainThread(GuestDevice* device, LocalRenderCommandQueue& queue) { constexpr size_t BOOL_MASK = 0x100000000000000ull; if ((device->dirtyFlags[4].get() & BOOL_MASK) != 0) { auto& cmd = queue.enqueue(); cmd.type = RenderCommandType::SetBooleans; cmd.setBooleans.booleans = (device->vertexShaderBoolConstants[0].get() & 0xFF) | ((device->pixelShaderBoolConstants[0].get() & 0xFF) << 16); device->dirtyFlags[4] = device->dirtyFlags[4].get() & ~BOOL_MASK; } for (uint32_t i = 0; i < 16; i++) { const size_t mask = 0x8000000000000000ull >> (i + 32); if (device->dirtyFlags[3].get() & mask) { auto& cmd = queue.enqueue(); cmd.type = RenderCommandType::SetSamplerState; cmd.setSamplerState.index = i; cmd.setSamplerState.data0 = device->samplerStates[i].data[0]; cmd.setSamplerState.data3 = device->samplerStates[i].data[3]; cmd.setSamplerState.data5 = device->samplerStates[i].data[5]; device->dirtyFlags[3] = device->dirtyFlags[3].get() & ~mask; } } if (g_dirtyStates.vertexShaderConstants || device->dirtyFlags[0] != 0) { WaitForRenderThread(); auto& cmd = queue.enqueue(); cmd.type = RenderCommandType::SetVertexShaderConstants; cmd.setVertexShaderConstants.allocation = g_uploadAllocators[g_frame].allocate(device->vertexShaderFloatConstants, 0x1000, 0x100); device->dirtyFlags[0] = 0; } if (g_dirtyStates.pixelShaderConstants || device->dirtyFlags[1] != 0) { WaitForRenderThread(); auto& cmd = queue.enqueue(); cmd.type = RenderCommandType::SetPixelShaderConstants; cmd.setPixelShaderConstants.allocation = g_uploadAllocators[g_frame].allocate(device->pixelShaderFloatConstants, 0xE00, 0x100); device->dirtyFlags[1] = 0; } } static void ProcSetBooleans(const RenderCommand& cmd) { SetDirtyValue(g_dirtyStates.sharedConstants, g_sharedConstants.booleans, cmd.setBooleans.booleans); } static void ProcSetSamplerState(const RenderCommand& cmd) { const auto& args = cmd.setSamplerState; const auto addressU = ConvertTextureAddressMode((args.data0 >> 10) & 0x7); const auto addressV = ConvertTextureAddressMode((args.data0 >> 13) & 0x7); const auto addressW = ConvertTextureAddressMode((args.data0 >> 16) & 0x7); auto magFilter = ConvertTextureFilter((args.data3 >> 19) & 0x3); auto minFilter = ConvertTextureFilter((args.data3 >> 21) & 0x3); auto mipFilter = ConvertTextureFilter((args.data3 >> 23) & 0x3); const auto borderColor = ConvertBorderColor(args.data5 & 0x3); bool anisotropyEnabled = mipFilter == RenderFilter::LINEAR; if (anisotropyEnabled) { magFilter = RenderFilter::LINEAR; minFilter = RenderFilter::LINEAR; } auto& samplerDesc = g_samplerDescs[args.index]; bool dirty = false; SetDirtyValue(dirty, samplerDesc.addressU, addressU); SetDirtyValue(dirty, samplerDesc.addressV, addressV); SetDirtyValue(dirty, samplerDesc.addressW, addressW); SetDirtyValue(dirty, samplerDesc.minFilter, minFilter); SetDirtyValue(dirty, samplerDesc.magFilter, magFilter); SetDirtyValue(dirty, samplerDesc.mipmapMode, RenderMipmapMode(mipFilter)); SetDirtyValue(dirty, samplerDesc.anisotropyEnabled, anisotropyEnabled); SetDirtyValue(dirty, samplerDesc.borderColor, borderColor); if (dirty) { auto& [descriptorIndex, sampler] = g_samplerStates[XXH3_64bits(&samplerDesc, sizeof(RenderSamplerDesc))]; if (descriptorIndex == NULL) { descriptorIndex = g_samplerStates.size(); sampler = g_device->createSampler(samplerDesc); g_samplerDescriptorSet->setSampler(descriptorIndex - 1, sampler.get()); } SetDirtyValue(g_dirtyStates.sharedConstants, g_sharedConstants.samplerIndices[args.index], descriptorIndex - 1); } } static void SetRootDescriptor(const UploadAllocation& allocation, size_t index) { auto& commandList = g_commandLists[g_frame]; if (g_vulkan) commandList->setGraphicsPushConstants(0, &allocation.deviceAddress, 8 * index, 8); else commandList->setGraphicsRootDescriptor(allocation.buffer->at(allocation.offset), index); } static void ProcSetVertexShaderConstants(const RenderCommand& cmd) { SetRootDescriptor(cmd.setVertexShaderConstants.allocation, 0); } static void ProcSetPixelShaderConstants(const RenderCommand& cmd) { SetRootDescriptor(cmd.setPixelShaderConstants.allocation, 1); } static void FlushRenderStateForRenderThread() { auto renderTarget = g_pipelineState.colorWriteEnable ? g_renderTarget : nullptr; auto depthStencil = g_pipelineState.zEnable ? g_depthStencil : nullptr; AddBarrier(renderTarget, RenderTextureLayout::COLOR_WRITE); AddBarrier(depthStencil, RenderTextureLayout::DEPTH_WRITE); FlushBarriers(); SetFramebuffer(renderTarget, depthStencil, false); FlushViewport(); auto& commandList = g_commandLists[g_frame]; if (g_dirtyStates.pipelineState) commandList->setPipeline(CreateGraphicsPipeline(g_pipelineState)); if (g_dirtyStates.sharedConstants) { auto sharedConstants = g_uploadAllocators[g_frame].allocate(&g_sharedConstants, sizeof(g_sharedConstants), 0x100); SetRootDescriptor(sharedConstants, 2); } if (g_dirtyStates.vertexStreamFirst <= g_dirtyStates.vertexStreamLast) { commandList->setVertexBuffers( g_dirtyStates.vertexStreamFirst, g_vertexBufferViews + g_dirtyStates.vertexStreamFirst, g_dirtyStates.vertexStreamLast - g_dirtyStates.vertexStreamFirst + 1, g_inputSlots + g_dirtyStates.vertexStreamFirst); } if (g_dirtyStates.indices && (!g_vulkan || g_indexBufferView.buffer.ref != nullptr)) commandList->setIndexBuffer(&g_indexBufferView); g_dirtyStates = DirtyStates(false); } static RenderPrimitiveTopology ConvertPrimitiveType(uint32_t primitiveType) { switch (primitiveType) { case D3DPT_POINTLIST: return RenderPrimitiveTopology::POINT_LIST; case D3DPT_LINELIST: return RenderPrimitiveTopology::LINE_LIST; case D3DPT_LINESTRIP: return RenderPrimitiveTopology::LINE_STRIP; case D3DPT_TRIANGLELIST: case D3DPT_QUADLIST: return RenderPrimitiveTopology::TRIANGLE_LIST; case D3DPT_TRIANGLESTRIP: return RenderPrimitiveTopology::TRIANGLE_STRIP; case D3DPT_TRIANGLEFAN: return g_triangleFanSupported ? RenderPrimitiveTopology::TRIANGLE_FAN : RenderPrimitiveTopology::TRIANGLE_LIST; default: assert(false && "Unknown primitive type"); return RenderPrimitiveTopology::UNKNOWN; } } static void SetPrimitiveType(uint32_t primitiveType) { SetDirtyValue(g_dirtyStates.pipelineState, g_pipelineState.primitiveTopology, ConvertPrimitiveType(primitiveType)); } static uint32_t CheckInstancing() { uint32_t indexCount = 0; SetDirtyValue(g_dirtyStates.pipelineState, g_pipelineState.instancing, g_pipelineState.vertexDeclaration->indexVertexStream != 0); if (g_pipelineState.instancing) { // Index buffer is passed as a vertex stream indexCount = g_vertexBufferViews[g_pipelineState.vertexDeclaration->indexVertexStream].size / 4; } return indexCount; } static void DrawPrimitive(GuestDevice* device, uint32_t primitiveType, uint32_t startVertex, uint32_t primitiveCount) { LocalRenderCommandQueue queue; FlushRenderStateForMainThread(device, queue); auto& cmd = queue.enqueue(); cmd.type = RenderCommandType::DrawPrimitive; cmd.drawPrimitive.primitiveType = primitiveType; cmd.drawPrimitive.startVertex = startVertex; cmd.drawPrimitive.primitiveCount = primitiveCount; queue.submit(); } static void ProcDrawPrimitive(const RenderCommand& cmd) { const auto& args = cmd.drawPrimitive; SetPrimitiveType(args.primitiveType); uint32_t indexCount = CheckInstancing(); if (indexCount > 0) { auto& vertexBufferView = g_vertexBufferViews[g_pipelineState.vertexDeclaration->indexVertexStream]; SetDirtyValue(g_dirtyStates.indices, g_indexBufferView.buffer, vertexBufferView.buffer); SetDirtyValue(g_dirtyStates.indices, g_indexBufferView.size, vertexBufferView.size); SetDirtyValue(g_dirtyStates.indices, g_indexBufferView.format, RenderFormat::R32_UINT); } FlushRenderStateForRenderThread(); auto& commandList = g_commandLists[g_frame]; if (indexCount > 0) commandList->drawIndexedInstanced(indexCount, args.primitiveCount / indexCount, 0, 0, 0); else commandList->drawInstanced(args.primitiveCount, 1, args.startVertex, 0); } static void DrawIndexedPrimitive(GuestDevice* device, uint32_t primitiveType, int32_t baseVertexIndex, uint32_t startIndex, uint32_t primCount) { LocalRenderCommandQueue queue; FlushRenderStateForMainThread(device, queue); auto& cmd = queue.enqueue(); cmd.type = RenderCommandType::DrawIndexedPrimitive; cmd.drawIndexedPrimitive.primitiveType = primitiveType; cmd.drawIndexedPrimitive.baseVertexIndex = baseVertexIndex; cmd.drawIndexedPrimitive.startIndex = startIndex; cmd.drawIndexedPrimitive.primCount = primCount; queue.submit(); } static void ProcDrawIndexedPrimitive(const RenderCommand& cmd) { const auto& args = cmd.drawIndexedPrimitive; CheckInstancing(); SetPrimitiveType(args.primitiveType); FlushRenderStateForRenderThread(); g_commandLists[g_frame]->drawIndexedInstanced(args.primCount, 1, args.startIndex, args.baseVertexIndex, 0); } static void DrawPrimitiveUP(GuestDevice* device, uint32_t primitiveType, uint32_t primitiveCount, void* vertexStreamZeroData, uint32_t vertexStreamZeroStride) { LocalRenderCommandQueue queue; FlushRenderStateForMainThread(device, queue); WaitForRenderThread(); auto& cmd = queue.enqueue(); cmd.type = RenderCommandType::DrawPrimitiveUP; cmd.drawPrimitiveUP.primitiveType = primitiveType; cmd.drawPrimitiveUP.primitiveCount = primitiveCount; cmd.drawPrimitiveUP.vertexStreamZeroData = g_uploadAllocators[g_frame].allocate(reinterpret_cast(vertexStreamZeroData), primitiveCount * vertexStreamZeroStride, 0x4); cmd.drawPrimitiveUP.vertexStreamZeroStride = vertexStreamZeroStride; queue.submit(); } static void ProcDrawPrimitiveUP(const RenderCommand& cmd) { const auto& args = cmd.drawPrimitiveUP; CheckInstancing(); SetPrimitiveType(args.primitiveType); SetDirtyValue(g_dirtyStates.pipelineState, g_pipelineState.vertexStrides[0], uint8_t(args.vertexStreamZeroStride)); auto& vertexBufferView = g_vertexBufferViews[0]; vertexBufferView.size = args.primitiveCount * args.vertexStreamZeroStride; vertexBufferView.buffer = args.vertexStreamZeroData.buffer->at(args.vertexStreamZeroData.offset); g_inputSlots[0].stride = args.vertexStreamZeroStride; g_dirtyStates.vertexStreamFirst = 0; uint32_t indexCount = 0; if (args.primitiveType == D3DPT_QUADLIST) indexCount = g_quadIndexData.prepare(args.primitiveCount); else if (!g_triangleFanSupported && args.primitiveType == D3DPT_TRIANGLEFAN) indexCount = g_triangleFanIndexData.prepare(args.primitiveCount); FlushRenderStateForRenderThread(); if (indexCount != 0) g_commandLists[g_frame]->drawIndexedInstanced(indexCount, 1, 0, 0, 0); else g_commandLists[g_frame]->drawInstanced(args.primitiveCount, 1, 0, 0); } static const char* ConvertDeclUsage(uint32_t usage) { switch (usage) { case D3DDECLUSAGE_POSITION: return "POSITION"; case D3DDECLUSAGE_BLENDWEIGHT: return "BLENDWEIGHT"; case D3DDECLUSAGE_BLENDINDICES: return "BLENDINDICES"; case D3DDECLUSAGE_NORMAL: return "NORMAL"; case D3DDECLUSAGE_PSIZE: return "PSIZE"; case D3DDECLUSAGE_TEXCOORD: return "TEXCOORD"; case D3DDECLUSAGE_TANGENT: return "TANGENT"; case D3DDECLUSAGE_BINORMAL: return "BINORMAL"; case D3DDECLUSAGE_TESSFACTOR: return "TESSFACTOR"; case D3DDECLUSAGE_POSITIONT: return "POSITIONT"; case D3DDECLUSAGE_COLOR: return "COLOR"; case D3DDECLUSAGE_FOG: return "FOG"; case D3DDECLUSAGE_DEPTH: return "DEPTH"; case D3DDECLUSAGE_SAMPLE: return "SAMPLE"; default: assert(false && "Unknown usage"); return "UNKNOWN"; } } static RenderFormat ConvertDeclType(uint32_t type) { switch (type) { case D3DDECLTYPE_FLOAT1: return RenderFormat::R32_FLOAT; case D3DDECLTYPE_FLOAT2: return RenderFormat::R32G32_FLOAT; case D3DDECLTYPE_FLOAT3: return RenderFormat::R32G32B32_FLOAT; case D3DDECLTYPE_FLOAT4: return RenderFormat::R32G32B32A32_FLOAT; case D3DDECLTYPE_D3DCOLOR: return RenderFormat::B8G8R8A8_UNORM; case D3DDECLTYPE_UBYTE4: case D3DDECLTYPE_UBYTE4_2: return RenderFormat::R8G8B8A8_UINT; case D3DDECLTYPE_SHORT2: return RenderFormat::R16G16_SINT; case D3DDECLTYPE_SHORT4: return RenderFormat::R16G16B16A16_SINT; case D3DDECLTYPE_UBYTE4N: case D3DDECLTYPE_UBYTE4N_2: return RenderFormat::R8G8B8A8_UNORM; case D3DDECLTYPE_SHORT2N: return RenderFormat::R16G16_SNORM; case D3DDECLTYPE_SHORT4N: return RenderFormat::R16G16B16A16_SNORM; case D3DDECLTYPE_USHORT2N: return RenderFormat::R16G16_UNORM; case D3DDECLTYPE_USHORT4N: return RenderFormat::R16G16B16A16_UNORM; case D3DDECLTYPE_UINT1: return RenderFormat::R32_UINT; case D3DDECLTYPE_DEC3N_2: case D3DDECLTYPE_DEC3N_3: return RenderFormat::R32_UINT; case D3DDECLTYPE_FLOAT16_2: return RenderFormat::R16G16_FLOAT; case D3DDECLTYPE_FLOAT16_4: return RenderFormat::R16G16B16A16_FLOAT; default: assert(false && "Unknown type"); return RenderFormat::UNKNOWN; } } static GuestVertexDeclaration* CreateVertexDeclaration(GuestVertexElement* vertexElements) { size_t vertexElementCount = 0; auto vertexElement = vertexElements; while (vertexElement->stream != 0xFF && vertexElement->type != D3DDECLTYPE_UNUSED) { vertexElement->padding = 0; ++vertexElement; ++vertexElementCount; } std::lock_guard lock(g_vertexDeclarationMutex); auto& vertexDeclaration = g_vertexDeclarations[ XXH3_64bits(vertexElements, vertexElementCount * sizeof(GuestVertexElement))]; if (vertexDeclaration == nullptr) { vertexDeclaration = g_userHeap.AllocPhysical(ResourceType::VertexDeclaration); static std::vector inputElements; inputElements.clear(); struct Location { uint32_t usage; uint32_t usageIndex; uint32_t location; }; constexpr Location locations[] = { { D3DDECLUSAGE_POSITION, 0, 0 }, { D3DDECLUSAGE_NORMAL, 0, 1 }, { D3DDECLUSAGE_TANGENT, 0, 2 }, { D3DDECLUSAGE_BINORMAL, 0, 3 }, { D3DDECLUSAGE_TEXCOORD, 0, 4 }, { D3DDECLUSAGE_TEXCOORD, 1, 5 }, { D3DDECLUSAGE_TEXCOORD, 2, 6 }, { D3DDECLUSAGE_TEXCOORD, 3, 7 }, { D3DDECLUSAGE_COLOR, 0, 8 }, { D3DDECLUSAGE_BLENDINDICES, 0, 9 }, { D3DDECLUSAGE_BLENDWEIGHT, 0, 10 }, { D3DDECLUSAGE_COLOR, 1, 11 }, { D3DDECLUSAGE_TEXCOORD, 4, 12 }, { D3DDECLUSAGE_TEXCOORD, 5, 13 }, { D3DDECLUSAGE_TEXCOORD, 6, 14 }, { D3DDECLUSAGE_TEXCOORD, 7, 15 }, { D3DDECLUSAGE_POSITION, 1, 15 } }; vertexElement = vertexElements; while (vertexElement->stream != 0xFF && vertexElement->type != D3DDECLTYPE_UNUSED) { if (vertexElement->usage == D3DDECLUSAGE_POSITION && vertexElement->usageIndex == 2) { ++vertexElement; continue; } auto& inputElement = inputElements.emplace_back(); inputElement.semanticName = ConvertDeclUsage(vertexElement->usage); inputElement.semanticIndex = vertexElement->usageIndex; inputElement.location = ~0; for (auto& location : locations) { if (location.usage == vertexElement->usage && location.usageIndex == vertexElement->usageIndex) { inputElement.location = location.location; break; } } assert(inputElement.location != ~0); inputElement.format = ConvertDeclType(vertexElement->type); inputElement.slotIndex = vertexElement->stream; inputElement.alignedByteOffset = vertexElement->offset; switch (vertexElement->usage) { case D3DDECLUSAGE_POSITION: if (vertexElement->usageIndex == 1) vertexDeclaration->indexVertexStream = vertexElement->stream; break; case D3DDECLUSAGE_BLENDWEIGHT: case D3DDECLUSAGE_BLENDINDICES: vertexDeclaration->inputLayoutFlags |= INPUT_LAYOUT_FLAG_HAS_BONE_WEIGHTS; break; case D3DDECLUSAGE_NORMAL: case D3DDECLUSAGE_TANGENT: case D3DDECLUSAGE_BINORMAL: if (vertexElement->type == D3DDECLTYPE_FLOAT3) inputElement.format = RenderFormat::R32G32B32_UINT; else vertexDeclaration->inputLayoutFlags |= INPUT_LAYOUT_FLAG_HAS_R11G11B10_NORMAL; break; case D3DDECLUSAGE_TEXCOORD: switch (vertexElement->type) { case D3DDECLTYPE_SHORT2: case D3DDECLTYPE_SHORT4: case D3DDECLTYPE_SHORT2N: case D3DDECLTYPE_SHORT4N: case D3DDECLTYPE_USHORT2N: case D3DDECLTYPE_USHORT4N: case D3DDECLTYPE_FLOAT16_2: case D3DDECLTYPE_FLOAT16_4: vertexDeclaration->swappedTexcoords |= 1 << vertexElement->usageIndex; break; } break; } ++vertexElement; } auto addInputElement = [&](uint32_t usage, uint32_t usageIndex) { uint32_t location = ~0; for (auto& alsoLocation : locations) { if (alsoLocation.usage == usage && alsoLocation.usageIndex == usageIndex) { location = alsoLocation.location; break; } } assert(location != ~0); for (auto& inputElement : inputElements) { if (inputElement.location == location) return; } auto format = RenderFormat::R32_FLOAT; switch (usage) { case D3DDECLUSAGE_NORMAL: case D3DDECLUSAGE_TANGENT: case D3DDECLUSAGE_BINORMAL: case D3DDECLUSAGE_BLENDINDICES: format = RenderFormat::R32_UINT; break; } inputElements.emplace_back(ConvertDeclUsage(usage), usageIndex, location, format, 15, 0); }; addInputElement(D3DDECLUSAGE_POSITION, 0); addInputElement(D3DDECLUSAGE_NORMAL, 0); addInputElement(D3DDECLUSAGE_TANGENT, 0); addInputElement(D3DDECLUSAGE_BINORMAL, 0); addInputElement(D3DDECLUSAGE_TEXCOORD, 0); addInputElement(D3DDECLUSAGE_TEXCOORD, 1); addInputElement(D3DDECLUSAGE_TEXCOORD, 2); addInputElement(D3DDECLUSAGE_TEXCOORD, 3); addInputElement(D3DDECLUSAGE_COLOR, 0); addInputElement(D3DDECLUSAGE_BLENDWEIGHT, 0); addInputElement(D3DDECLUSAGE_BLENDINDICES, 0); vertexDeclaration->inputElements = std::make_unique(inputElements.size()); std::copy(inputElements.begin(), inputElements.end(), vertexDeclaration->inputElements.get()); vertexDeclaration->vertexElements = std::make_unique(vertexElementCount + 1); std::copy(vertexElements, vertexElements + vertexElementCount + 1, vertexDeclaration->vertexElements.get()); vertexDeclaration->inputElementCount = uint32_t(inputElements.size()); vertexDeclaration->vertexElementCount = vertexElementCount + 1; } vertexDeclaration->AddRef(); return vertexDeclaration; } static void SetVertexDeclaration(GuestDevice* device, GuestVertexDeclaration* vertexDeclaration) { RenderCommand cmd; cmd.type = RenderCommandType::SetVertexDeclaration; cmd.setVertexDeclaration.vertexDeclaration = vertexDeclaration; g_renderQueue.enqueue(cmd); device->vertexDeclaration = g_memory.MapVirtual(vertexDeclaration); } static void ProcSetVertexDeclaration(const RenderCommand& cmd) { auto& args = cmd.setVertexDeclaration; if (args.vertexDeclaration != nullptr) { SetDirtyValue(g_dirtyStates.sharedConstants, g_sharedConstants.swappedTexcoords, args.vertexDeclaration->swappedTexcoords); SetDirtyValue(g_dirtyStates.sharedConstants, g_sharedConstants.inputLayoutFlags, args.vertexDeclaration->inputLayoutFlags); } SetDirtyValue(g_dirtyStates.pipelineState, g_pipelineState.vertexDeclaration, args.vertexDeclaration); } static GuestShader* CreateShader(const be* function, ResourceType resourceType) { XXH64_hash_t hash = XXH3_64bits(function, function[1] + function[2]); auto end = g_shaderCacheEntries + g_shaderCacheEntryCount; auto findResult = std::lower_bound(g_shaderCacheEntries, end, hash, [](ShaderCacheEntry& lhs, XXH64_hash_t rhs) { return lhs.hash < rhs; }); GuestShader* shader = nullptr; if (findResult != end && findResult->hash == hash) { if (findResult->userData == nullptr) { shader = g_userHeap.AllocPhysical(resourceType); if (g_vulkan) shader->shader = g_device->createShader(g_shaderCache.get() + findResult->spirvOffset, findResult->spirvSize, "main", RenderShaderFormat::SPIRV); else shader->shader = g_device->createShader(g_shaderCache.get() + findResult->dxilOffset, findResult->dxilSize, "main", RenderShaderFormat::DXIL); findResult->userData = shader; } else { shader = reinterpret_cast(findResult->userData); } } if (shader == nullptr) shader = g_userHeap.AllocPhysical(resourceType); else shader->AddRef(); return shader; } static GuestShader* CreateVertexShader(const be* function) { return CreateShader(function, ResourceType::VertexShader); } static void SetVertexShader(GuestDevice* device, GuestShader* shader) { RenderCommand cmd; cmd.type = RenderCommandType::SetVertexShader; cmd.setVertexShader.shader = shader; g_renderQueue.enqueue(cmd); } static void ProcSetVertexShader(const RenderCommand& cmd) { SetDirtyValue(g_dirtyStates.pipelineState, g_pipelineState.vertexShader, cmd.setVertexShader.shader); } static void SetStreamSource(GuestDevice* device, uint32_t index, GuestBuffer* buffer, uint32_t offset, uint32_t stride) { RenderCommand cmd; cmd.type = RenderCommandType::SetStreamSource; cmd.setStreamSource.index = index; cmd.setStreamSource.buffer = buffer; cmd.setStreamSource.offset = offset; cmd.setStreamSource.stride = stride; g_renderQueue.enqueue(cmd); } static void ProcSetStreamSource(const RenderCommand& cmd) { const auto& args = cmd.setStreamSource; SetDirtyValue(g_dirtyStates.pipelineState, g_pipelineState.vertexStrides[args.index], uint8_t(args.buffer != nullptr ? args.stride : 0)); bool dirty = false; SetDirtyValue(dirty, g_vertexBufferViews[args.index].buffer, args.buffer != nullptr ? args.buffer->buffer->at(args.offset) : RenderBufferReference{}); SetDirtyValue(dirty, g_vertexBufferViews[args.index].size, args.buffer != nullptr ? (args.buffer->dataSize - args.offset) : 0u); SetDirtyValue(dirty, g_inputSlots[args.index].stride, args.buffer != nullptr ? args.stride : 0u); if (dirty) { g_dirtyStates.vertexStreamFirst = std::min(g_dirtyStates.vertexStreamFirst, args.index); g_dirtyStates.vertexStreamLast = std::max(g_dirtyStates.vertexStreamLast, args.index); } } static void SetIndices(GuestDevice* device, GuestBuffer* buffer) { RenderCommand cmd; cmd.type = RenderCommandType::SetIndices; cmd.setIndices.buffer = buffer; g_renderQueue.enqueue(cmd); } static void ProcSetIndices(const RenderCommand& cmd) { const auto& args = cmd.setIndices; SetDirtyValue(g_dirtyStates.indices, g_indexBufferView.buffer, args.buffer != nullptr ? args.buffer->buffer->at(0) : RenderBufferReference{}); SetDirtyValue(g_dirtyStates.indices, g_indexBufferView.format, args.buffer != nullptr ? args.buffer->format : RenderFormat::R16_UINT); SetDirtyValue(g_dirtyStates.indices, g_indexBufferView.size, args.buffer != nullptr ? args.buffer->dataSize : 0u); } static GuestShader* CreatePixelShader(const be* function) { return CreateShader(function, ResourceType::PixelShader); } static void SetPixelShader(GuestDevice* device, GuestShader* shader) { RenderCommand cmd; cmd.type = RenderCommandType::SetPixelShader; cmd.setPixelShader.shader = shader; g_renderQueue.enqueue(cmd); } static void ProcSetPixelShader(const RenderCommand& cmd) { SetDirtyValue(g_dirtyStates.pipelineState, g_pipelineState.pixelShader, cmd.setPixelShader.shader); } static std::thread g_renderThread([] { RenderCommand commands[32]; while (true) { size_t count = g_renderQueue.wait_dequeue_bulk(commands, std::size(commands)); for (size_t i = 0; i < count; i++) { auto& cmd = commands[i]; switch (cmd.type) { case RenderCommandType::SetRenderState: ProcSetRenderState(cmd); break; case RenderCommandType::DestructResource: ProcDestructResource(cmd); break; case RenderCommandType::UnlockTextureRect: ProcUnlockTextureRect(cmd); break; case RenderCommandType::UnlockBuffer16: ProcUnlockBuffer16(cmd); break; case RenderCommandType::UnlockBuffer32: ProcUnlockBuffer32(cmd); break; case RenderCommandType::DrawImGui: ProcDrawImGui(cmd); break; case RenderCommandType::Present: ProcPresent(cmd); break; case RenderCommandType::StretchRect: ProcStretchRect(cmd); break; case RenderCommandType::SetRenderTarget: ProcSetRenderTarget(cmd); break; case RenderCommandType::SetDepthStencilSurface: ProcSetDepthStencilSurface(cmd); break; case RenderCommandType::Clear: ProcClear(cmd); break; case RenderCommandType::SetViewport: ProcSetViewport(cmd); break; case RenderCommandType::SetTexture: ProcSetTexture(cmd); break; case RenderCommandType::SetScissorRect: ProcSetScissorRect(cmd); break; case RenderCommandType::SetSamplerState: ProcSetSamplerState(cmd); break; case RenderCommandType::SetBooleans: ProcSetBooleans(cmd); break; case RenderCommandType::SetVertexShaderConstants: ProcSetVertexShaderConstants(cmd); break; case RenderCommandType::SetPixelShaderConstants: ProcSetPixelShaderConstants(cmd); break; case RenderCommandType::DrawPrimitive: ProcDrawPrimitive(cmd); break; case RenderCommandType::DrawIndexedPrimitive: ProcDrawIndexedPrimitive(cmd); break; case RenderCommandType::DrawPrimitiveUP: ProcDrawPrimitiveUP(cmd); break; case RenderCommandType::SetVertexDeclaration: ProcSetVertexDeclaration(cmd); break; case RenderCommandType::SetVertexShader: ProcSetVertexShader(cmd); break; case RenderCommandType::SetStreamSource: ProcSetStreamSource(cmd); break; case RenderCommandType::SetIndices: ProcSetIndices(cmd); break; case RenderCommandType::SetPixelShader: ProcSetPixelShader(cmd); break; default: assert(false && "Unrecognized render command type."); break; } } } }); static void D3DXFillTexture(GuestTexture* texture, uint32_t function, void* data) { if (texture->width == 1 && texture->height == 1 && texture->format == RenderFormat::R8_UNORM && function == 0x82BA2150) { auto uploadBuffer = g_device->createBuffer(RenderBufferDesc::UploadBuffer(PLACEMENT_ALIGNMENT)); uint8_t* mappedData = reinterpret_cast(uploadBuffer->map()); *mappedData = 0xFF; uploadBuffer->unmap(); ExecuteCopyCommandList([&] { g_copyCommandList->barriers(RenderBarrierStage::COPY, RenderTextureBarrier(texture->texture, RenderTextureLayout::COPY_DEST)); g_copyCommandList->copyTextureRegion( RenderTextureCopyLocation::Subresource(texture->texture, 0), RenderTextureCopyLocation::PlacedFootprint(uploadBuffer.get(), texture->format, 1, 1, 1, PLACEMENT_ALIGNMENT, 0)); }); texture->layout = RenderTextureLayout::COPY_DEST; } } static void D3DXFillVolumeTexture(GuestTexture* texture, uint32_t function, void* data) { uint32_t rowPitch0 = (texture->width * 4 + PITCH_ALIGNMENT - 1) & ~(PITCH_ALIGNMENT - 1); uint32_t slicePitch0 = (rowPitch0 * texture->height * texture->depth + PLACEMENT_ALIGNMENT - 1) & ~(PLACEMENT_ALIGNMENT - 1); uint32_t rowPitch1 = ((texture->width / 2) * 4 + PITCH_ALIGNMENT - 1) & ~(PITCH_ALIGNMENT - 1); uint32_t slicePitch1 = (rowPitch1 * (texture->height / 2) * (texture->depth / 2) + PLACEMENT_ALIGNMENT - 1) & ~(PLACEMENT_ALIGNMENT - 1); auto uploadBuffer = g_device->createBuffer(RenderBufferDesc::UploadBuffer(slicePitch0 + slicePitch1)); uint8_t* mappedData = reinterpret_cast(uploadBuffer->map()); thread_local std::vector mipData; mipData.resize((texture->width / 2) * (texture->height / 2) * (texture->depth / 2) * 4); memset(mipData.data(), 0, mipData.size() * sizeof(float)); for (size_t z = 0; z < texture->depth; z++) { for (size_t y = 0; y < texture->height; y++) { for (size_t x = 0; x < texture->width; x++) { auto dest = mappedData + z * rowPitch0 * texture->height + y * rowPitch0 + x * sizeof(uint32_t); size_t index = z * texture->width * texture->height + y * texture->width + x; size_t mipIndex = ((z / 2) * (texture->width / 2) * (texture->height / 2) + (y / 2) * (texture->width / 2) + x / 2) * 4; if (function == 0x82BC7820) { auto src = reinterpret_cast*>(data) + index * 4; float r = static_cast(src[0] * 255.0f); float g = static_cast(src[1] * 255.0f); float b = static_cast(src[2] * 255.0f); float a = static_cast(src[3] * 255.0f); dest[0] = r; dest[1] = g; dest[2] = b; dest[3] = a; mipData[mipIndex + 0] += r; mipData[mipIndex + 1] += g; mipData[mipIndex + 2] += b; mipData[mipIndex + 3] += a; } else if (function == 0x82BC78A8) { auto src = reinterpret_cast(data) + index * 4; dest[0] = src[3]; dest[1] = src[2]; dest[2] = src[1]; dest[3] = src[0]; mipData[mipIndex + 0] += src[3]; mipData[mipIndex + 1] += src[2]; mipData[mipIndex + 2] += src[1]; mipData[mipIndex + 3] += src[0]; } } } } for (size_t z = 0; z < texture->depth / 2; z++) { for (size_t y = 0; y < texture->height / 2; y++) { for (size_t x = 0; x < texture->width / 2; x++) { auto dest = mappedData + slicePitch0 + z * rowPitch1 * (texture->height / 2) + y * rowPitch1 + x * sizeof(uint32_t); size_t index = (z * (texture->width / 2) * (texture->height / 2) + y * (texture->width / 2) + x) * 4; dest[0] = static_cast(mipData[index + 0] / 8.0f); dest[1] = static_cast(mipData[index + 1] / 8.0f); dest[2] = static_cast(mipData[index + 2] / 8.0f); dest[3] = static_cast(mipData[index + 3] / 8.0f); } } } uploadBuffer->unmap(); ExecuteCopyCommandList([&] { g_copyCommandList->barriers(RenderBarrierStage::COPY, RenderTextureBarrier(texture->texture, RenderTextureLayout::COPY_DEST)); g_copyCommandList->copyTextureRegion( RenderTextureCopyLocation::Subresource(texture->texture, 0), RenderTextureCopyLocation::PlacedFootprint(uploadBuffer.get(), texture->format, texture->width, texture->height, texture->depth, rowPitch0 / RenderFormatSize(texture->format), 0)); g_copyCommandList->copyTextureRegion( RenderTextureCopyLocation::Subresource(texture->texture, 1), RenderTextureCopyLocation::PlacedFootprint(uploadBuffer.get(), texture->format, texture->width / 2, texture->height / 2, texture->depth / 2, rowPitch1 / RenderFormatSize(texture->format), slicePitch0)); }); texture->layout = RenderTextureLayout::COPY_DEST; } struct GuestPictureData { be vtable; uint8_t flags; be name; be texture; be type; }; static RenderTextureDimension ConvertTextureDimension(ddspp::TextureType type) { switch (type) { case ddspp::Texture1D: return RenderTextureDimension::TEXTURE_1D; case ddspp::Texture2D: case ddspp::Cubemap: return RenderTextureDimension::TEXTURE_2D; case ddspp::Texture3D: return RenderTextureDimension::TEXTURE_3D; default: assert(false && "Unknown texture type from DDS."); return RenderTextureDimension::UNKNOWN; } } static RenderTextureViewDimension ConvertTextureViewDimension(ddspp::TextureType type) { switch (type) { case ddspp::Texture1D: return RenderTextureViewDimension::TEXTURE_1D; case ddspp::Texture2D: return RenderTextureViewDimension::TEXTURE_2D; case ddspp::Texture3D: return RenderTextureViewDimension::TEXTURE_3D; case ddspp::Cubemap: return RenderTextureViewDimension::TEXTURE_CUBE; default: assert(false && "Unknown texture type from DDS."); return RenderTextureViewDimension::UNKNOWN; } } static RenderFormat ConvertDXGIFormat(ddspp::DXGIFormat format) { switch (format) { case ddspp::R32G32B32A32_TYPELESS: return RenderFormat::R32G32B32A32_TYPELESS; case ddspp::R32G32B32A32_FLOAT: return RenderFormat::R32G32B32A32_FLOAT; case ddspp::R32G32B32A32_UINT: return RenderFormat::R32G32B32A32_UINT; case ddspp::R32G32B32A32_SINT: return RenderFormat::R32G32B32A32_SINT; case ddspp::R32G32B32_TYPELESS: return RenderFormat::R32G32B32_TYPELESS; case ddspp::R32G32B32_FLOAT: return RenderFormat::R32G32B32_FLOAT; case ddspp::R32G32B32_UINT: return RenderFormat::R32G32B32_UINT; case ddspp::R32G32B32_SINT: return RenderFormat::R32G32B32_SINT; case ddspp::R16G16B16A16_TYPELESS: return RenderFormat::R16G16B16A16_TYPELESS; case ddspp::R16G16B16A16_FLOAT: return RenderFormat::R16G16B16A16_FLOAT; case ddspp::R16G16B16A16_UNORM: return RenderFormat::R16G16B16A16_UNORM; case ddspp::R16G16B16A16_UINT: return RenderFormat::R16G16B16A16_UINT; case ddspp::R16G16B16A16_SNORM: return RenderFormat::R16G16B16A16_SNORM; case ddspp::R16G16B16A16_SINT: return RenderFormat::R16G16B16A16_SINT; case ddspp::R32G32_TYPELESS: return RenderFormat::R32G32_TYPELESS; case ddspp::R32G32_FLOAT: return RenderFormat::R32G32_FLOAT; case ddspp::R32G32_UINT: return RenderFormat::R32G32_UINT; case ddspp::R32G32_SINT: return RenderFormat::R32G32_SINT; case ddspp::R8G8B8A8_TYPELESS: return RenderFormat::R8G8B8A8_TYPELESS; case ddspp::R8G8B8A8_UNORM: return RenderFormat::R8G8B8A8_UNORM; case ddspp::R8G8B8A8_UINT: return RenderFormat::R8G8B8A8_UINT; case ddspp::R8G8B8A8_SNORM: return RenderFormat::R8G8B8A8_SNORM; case ddspp::R8G8B8A8_SINT: return RenderFormat::R8G8B8A8_SINT; case ddspp::B8G8R8A8_UNORM: return RenderFormat::B8G8R8A8_UNORM; case ddspp::B8G8R8X8_UNORM: return RenderFormat::B8G8R8A8_UNORM; case ddspp::R16G16_TYPELESS: return RenderFormat::R16G16_TYPELESS; case ddspp::R16G16_FLOAT: return RenderFormat::R16G16_FLOAT; case ddspp::R16G16_UNORM: return RenderFormat::R16G16_UNORM; case ddspp::R16G16_UINT: return RenderFormat::R16G16_UINT; case ddspp::R16G16_SNORM: return RenderFormat::R16G16_SNORM; case ddspp::R16G16_SINT: return RenderFormat::R16G16_SINT; case ddspp::R32_TYPELESS: return RenderFormat::R32_TYPELESS; case ddspp::D32_FLOAT: return RenderFormat::D32_FLOAT; case ddspp::R32_FLOAT: return RenderFormat::R32_FLOAT; case ddspp::R32_UINT: return RenderFormat::R32_UINT; case ddspp::R32_SINT: return RenderFormat::R32_SINT; case ddspp::R8G8_TYPELESS: return RenderFormat::R8G8_TYPELESS; case ddspp::R8G8_UNORM: return RenderFormat::R8G8_UNORM; case ddspp::R8G8_UINT: return RenderFormat::R8G8_UINT; case ddspp::R8G8_SNORM: return RenderFormat::R8G8_SNORM; case ddspp::R8G8_SINT: return RenderFormat::R8G8_SINT; case ddspp::R16_TYPELESS: return RenderFormat::R16_TYPELESS; case ddspp::R16_FLOAT: return RenderFormat::R16_FLOAT; case ddspp::D16_UNORM: return RenderFormat::D16_UNORM; case ddspp::R16_UNORM: return RenderFormat::R16_UNORM; case ddspp::R16_UINT: return RenderFormat::R16_UINT; case ddspp::R16_SNORM: return RenderFormat::R16_SNORM; case ddspp::R16_SINT: return RenderFormat::R16_SINT; case ddspp::R8_TYPELESS: return RenderFormat::R8_TYPELESS; case ddspp::R8_UNORM: return RenderFormat::R8_UNORM; case ddspp::R8_UINT: return RenderFormat::R8_UINT; case ddspp::R8_SNORM: return RenderFormat::R8_SNORM; case ddspp::R8_SINT: return RenderFormat::R8_SINT; case ddspp::BC1_TYPELESS: return RenderFormat::BC1_TYPELESS; case ddspp::BC1_UNORM: return RenderFormat::BC1_UNORM; case ddspp::BC1_UNORM_SRGB: return RenderFormat::BC1_UNORM_SRGB; case ddspp::BC2_TYPELESS: return RenderFormat::BC2_TYPELESS; case ddspp::BC2_UNORM: return RenderFormat::BC2_UNORM; case ddspp::BC2_UNORM_SRGB: return RenderFormat::BC2_UNORM_SRGB; case ddspp::BC3_TYPELESS: return RenderFormat::BC3_TYPELESS; case ddspp::BC3_UNORM: return RenderFormat::BC3_UNORM; case ddspp::BC3_UNORM_SRGB: return RenderFormat::BC3_UNORM_SRGB; case ddspp::BC4_TYPELESS: return RenderFormat::BC4_TYPELESS; case ddspp::BC4_UNORM: return RenderFormat::BC4_UNORM; case ddspp::BC4_SNORM: return RenderFormat::BC4_SNORM; case ddspp::BC5_TYPELESS: return RenderFormat::BC5_TYPELESS; case ddspp::BC5_UNORM: return RenderFormat::BC5_UNORM; case ddspp::BC5_SNORM: return RenderFormat::BC5_SNORM; case ddspp::BC6H_TYPELESS: return RenderFormat::BC6H_TYPELESS; case ddspp::BC6H_UF16: return RenderFormat::BC6H_UF16; case ddspp::BC6H_SF16: return RenderFormat::BC6H_SF16; case ddspp::BC7_TYPELESS: return RenderFormat::BC7_TYPELESS; case ddspp::BC7_UNORM: return RenderFormat::BC7_UNORM; case ddspp::BC7_UNORM_SRGB: return RenderFormat::BC7_UNORM_SRGB; default: assert(false && "Unsupported format from DDS."); return RenderFormat::UNKNOWN; } } static void MakePictureData(GuestPictureData* pictureData, uint8_t* data, uint32_t dataSize) { if ((pictureData->flags & 0x1) == 0) { ddspp::Descriptor ddsDesc; if (ddspp::decode_header(data, ddsDesc) != ddspp::Error) { const auto texture = g_userHeap.AllocPhysical(ResourceType::Texture); RenderTextureDesc desc; desc.dimension = ConvertTextureDimension(ddsDesc.type); desc.width = ddsDesc.width; desc.height = ddsDesc.height; desc.depth = ddsDesc.depth; desc.mipLevels = ddsDesc.numMips; desc.arraySize = ddsDesc.type == ddspp::TextureType::Cubemap ? ddsDesc.arraySize * 6 : ddsDesc.arraySize; desc.format = ConvertDXGIFormat(ddsDesc.format); desc.flags = ddsDesc.type == ddspp::TextureType::Cubemap ? RenderTextureFlag::CUBE : RenderTextureFlag::NONE; texture->textureHolder = g_device->createTexture(desc); texture->texture = texture->textureHolder.get(); texture->layout = RenderTextureLayout::COPY_DEST; #ifdef _DEBUG texture->texture->setName(reinterpret_cast(g_memory.Translate(pictureData->name + 2))); #endif RenderTextureViewDesc viewDesc; viewDesc.format = desc.format; viewDesc.dimension = ConvertTextureViewDimension(ddsDesc.type); viewDesc.mipLevels = ddsDesc.numMips; texture->textureView = texture->texture->createTextureView(viewDesc); texture->descriptorIndex = g_textureDescriptorAllocator.allocate(); g_textureDescriptorSet->setTexture(texture->descriptorIndex, texture->texture, RenderTextureLayout::SHADER_READ, texture->textureView.get()); texture->viewDimension = viewDesc.dimension; struct Slice { uint32_t width; uint32_t height; uint32_t depth; uint32_t srcOffset; uint32_t dstOffset; uint32_t srcRowPitch; uint32_t dstRowPitch; uint32_t rowCount; }; std::vector slices; uint32_t curSrcOffset = 0; uint32_t curDstOffset = 0; for (uint32_t arraySlice = 0; arraySlice < desc.arraySize; arraySlice++) { for (uint32_t mipSlice = 0; mipSlice < ddsDesc.numMips; mipSlice++) { auto& slice = slices.emplace_back(); slice.width = std::max(1u, ddsDesc.width >> mipSlice); slice.height = std::max(1u, ddsDesc.height >> mipSlice); slice.depth = std::max(1u, ddsDesc.depth >> mipSlice); slice.srcOffset = curSrcOffset; slice.dstOffset = curDstOffset; uint32_t rowPitch = ((slice.width + ddsDesc.blockWidth - 1) / ddsDesc.blockWidth) * ddsDesc.bitsPerPixelOrBlock; slice.srcRowPitch = (rowPitch + 7) / 8; slice.dstRowPitch = (slice.srcRowPitch + PITCH_ALIGNMENT - 1) & ~(PITCH_ALIGNMENT - 1); slice.rowCount = (slice.height + ddsDesc.blockHeight - 1) / ddsDesc.blockHeight; curSrcOffset += slice.srcRowPitch * slice.rowCount * slice.depth; curDstOffset += (slice.dstRowPitch * slice.rowCount * slice.depth + PLACEMENT_ALIGNMENT - 1) & ~(PLACEMENT_ALIGNMENT - 1); } } auto uploadBuffer = g_device->createBuffer(RenderBufferDesc::UploadBuffer(curDstOffset)); uint8_t* mappedMemory = reinterpret_cast(uploadBuffer->map()); for (auto& slice : slices) { uint8_t* srcData = data + ddsDesc.headerSize + slice.srcOffset; uint8_t* dstData = mappedMemory + slice.dstOffset; if (slice.srcRowPitch == slice.dstRowPitch) { memcpy(dstData, srcData, slice.srcRowPitch * slice.rowCount * slice.depth); } else { for (size_t i = 0; i < slice.rowCount * slice.depth; i++) { memcpy(dstData, srcData, slice.srcRowPitch); srcData += slice.srcRowPitch; dstData += slice.dstRowPitch; } } } uploadBuffer->unmap(); ExecuteCopyCommandList([&] { g_copyCommandList->barriers(RenderBarrierStage::COPY, RenderTextureBarrier(texture->texture, RenderTextureLayout::COPY_DEST)); for (size_t i = 0; i < slices.size(); i++) { auto& slice = slices[i]; g_copyCommandList->copyTextureRegion( RenderTextureCopyLocation::Subresource(texture->texture, i), RenderTextureCopyLocation::PlacedFootprint(uploadBuffer.get(), desc.format, slice.width, slice.height, slice.depth, (slice.dstRowPitch * 8) / ddsDesc.bitsPerPixelOrBlock * ddsDesc.blockWidth, slice.dstOffset)); } }); pictureData->texture = g_memory.MapVirtual(texture); pictureData->type = 0; } else { int width, height; void* stbImage = stbi_load_from_memory(data, dataSize, &width, &height, nullptr, 4); if (stbImage != nullptr) { const auto texture = g_userHeap.AllocPhysical(ResourceType::Texture); texture->textureHolder = g_device->createTexture(RenderTextureDesc::Texture2D(width, height, 1, RenderFormat::R8G8B8A8_UNORM)); texture->texture = texture->textureHolder.get(); texture->viewDimension = RenderTextureViewDimension::TEXTURE_2D; texture->layout = RenderTextureLayout::COPY_DEST; texture->descriptorIndex = g_textureDescriptorAllocator.allocate(); g_textureDescriptorSet->setTexture(texture->descriptorIndex, texture->texture, RenderTextureLayout::SHADER_READ); uint32_t rowPitch = (width * 4 + PITCH_ALIGNMENT - 1) & ~(PITCH_ALIGNMENT - 1); uint32_t slicePitch = rowPitch * height; auto uploadBuffer = g_device->createBuffer(RenderBufferDesc::UploadBuffer(slicePitch)); uint8_t* mappedMemory = reinterpret_cast(uploadBuffer->map()); if (rowPitch == (width * 4)) { memcpy(mappedMemory, stbImage, slicePitch); } else { auto data = reinterpret_cast(stbImage); for (size_t i = 0; i < height; i++) { memcpy(mappedMemory, data, width * 4); data += width * 4; mappedMemory += rowPitch; } } uploadBuffer->unmap(); stbi_image_free(stbImage); ExecuteCopyCommandList([&] { g_copyCommandList->barriers(RenderBarrierStage::COPY, RenderTextureBarrier(texture->texture, RenderTextureLayout::COPY_DEST)); g_copyCommandList->copyTextureRegion( RenderTextureCopyLocation::Subresource(texture->texture, 0), RenderTextureCopyLocation::PlacedFootprint(uploadBuffer.get(), RenderFormat::R8G8B8A8_UNORM, width, height, 1, rowPitch / 4, 0)); }); pictureData->texture = g_memory.MapVirtual(texture); pictureData->type = 0; } } } } void IndexBufferLengthMidAsmHook(PPCRegister& r3) { r3.u64 *= 2; } void SetShadowResolutionMidAsmHook(PPCRegister& r11) { auto res = (int32_t)Config::ShadowResolution.Value; if (res > 0) r11.u64 = res; } static void SetResolution(be* device) { uint32_t width = uint32_t(g_swapChain->getWidth() * Config::ResolutionScale); uint32_t height = uint32_t(g_swapChain->getHeight() * Config::ResolutionScale); device[46] = width == 0 ? 880 : width; device[47] = height == 0 ? 720 : height; } static uint32_t StubFunction() { return 0; } static GuestShader* g_movieVertexShader; static GuestShader* g_moviePixelShader; static GuestVertexDeclaration* g_movieVertexDeclaration; static void ScreenShaderInit(be* a1, uint32_t a2, uint32_t a3, GuestVertexElement* vertexElements) { if (g_moviePixelShader == nullptr) { g_moviePixelShader = g_userHeap.AllocPhysical(ResourceType::PixelShader); g_moviePixelShader->shader = CREATE_SHADER(movie_ps); } if (g_movieVertexShader == nullptr) { g_movieVertexShader = g_userHeap.AllocPhysical(ResourceType::VertexShader); g_movieVertexShader->shader = CREATE_SHADER(movie_vs); } if (g_movieVertexDeclaration == nullptr) g_movieVertexDeclaration = CreateVertexDeclaration(vertexElements); g_moviePixelShader->AddRef(); g_movieVertexShader->AddRef(); g_movieVertexDeclaration->AddRef(); a1[2] = g_memory.MapVirtual(g_moviePixelShader); a1[3] = g_memory.MapVirtual(g_movieVertexShader); a1[4] = g_memory.MapVirtual(g_movieVertexDeclaration); } void MovieRendererMidAsmHook(PPCRegister& r3) { auto device = reinterpret_cast(g_memory.Translate(r3.u32)); // Force linear filtering & clamp addressing for (size_t i = 0; i < 3; i++) { device->samplerStates[i].data[0] = (device->samplerStates[i].data[0].get() & ~0x7fc00) | 0x24800; device->samplerStates[i].data[3] = (device->samplerStates[i].data[3].get() & ~0x1f80000) | 0x1280000; } device->dirtyFlags[3] = device->dirtyFlags[3].get() | 0xe0000000ull; } static PPCRegister g_r4; static PPCRegister g_r5; // CRenderDirectorFxPipeline::Initialize PPC_FUNC_IMPL(__imp__sub_8258C8A0); PPC_FUNC(sub_8258C8A0) { g_r4 = ctx.r4; g_r5 = ctx.r5; __imp__sub_8258C8A0(ctx, base); } // CRenderDirectorFxPipeline::Update PPC_FUNC_IMPL(__imp__sub_8258CAE0); PPC_FUNC(sub_8258CAE0) { if (g_needsResize) { auto r3 = ctx.r3; ctx.r4 = g_r4; ctx.r5 = g_r5; __imp__sub_8258C8A0(ctx, base); ctx.r3 = r3; g_needsResize = false; } __imp__sub_8258CAE0(ctx, base); } void PostProcessResolutionFix(PPCRegister& r4, PPCRegister& f1, PPCRegister& f2) { auto device = reinterpret_cast*>(g_memory.Translate(r4.u32)); uint32_t width = device[46].get(); uint32_t height = device[47].get(); #if 0 // TODO: Figure out why this breaks for height > weight double factor; if (width > height) factor = 720.0 / double(height); else factor = 1280.0 / double(width); #else double factor = 720.0 / double(height); #endif f1.f64 *= factor; f2.f64 *= factor; } void LightShaftAspectRatioFix(PPCRegister& f28, PPCRegister& f0) { f28.f64 = f0.f64; } static const be g_particleTestIndexBuffer[] = { 0, 1, 2, 0, 2, 3, 0, 3, 4, 0, 4, 5 }; bool ParticleTestIndexBufferMidAsmHook(PPCRegister& r30) { if (!g_triangleFanSupported) { auto buffer = CreateIndexBuffer(sizeof(g_particleTestIndexBuffer), 0, D3DFMT_INDEX16); void* memory = LockIndexBuffer(buffer, 0, 0, 0); memcpy(memory, g_particleTestIndexBuffer, sizeof(g_particleTestIndexBuffer)); UnlockIndexBuffer(buffer); r30.u32 = g_memory.MapVirtual(buffer); return true; } return false; } void ParticleTestDrawIndexedPrimitiveMidAsmHook(PPCRegister& r7) { if (!g_triangleFanSupported) r7.u64 = std::size(g_particleTestIndexBuffer); } GUEST_FUNCTION_HOOK(sub_82BD99B0, CreateDevice); GUEST_FUNCTION_HOOK(sub_82BE6230, DestructResource); GUEST_FUNCTION_HOOK(sub_82BE9300, LockTextureRect); GUEST_FUNCTION_HOOK(sub_82BE7780, UnlockTextureRect); GUEST_FUNCTION_HOOK(sub_82BE6B98, LockVertexBuffer); GUEST_FUNCTION_HOOK(sub_82BE6BE8, UnlockVertexBuffer); GUEST_FUNCTION_HOOK(sub_82BE61D0, GetVertexBufferDesc); GUEST_FUNCTION_HOOK(sub_82BE6CA8, LockIndexBuffer); GUEST_FUNCTION_HOOK(sub_82BE6CF0, UnlockIndexBuffer); GUEST_FUNCTION_HOOK(sub_82BE6200, GetIndexBufferDesc); GUEST_FUNCTION_HOOK(sub_82BE96F0, GetSurfaceDesc); GUEST_FUNCTION_HOOK(sub_82BE04B0, GetVertexDeclaration); GUEST_FUNCTION_HOOK(sub_82BE0530, HashVertexDeclaration); GUEST_FUNCTION_HOOK(sub_82BDA8C0, Present); GUEST_FUNCTION_HOOK(sub_82BDD330, GetBackBuffer); GUEST_FUNCTION_HOOK(sub_82BE9498, CreateTexture); GUEST_FUNCTION_HOOK(sub_82BE6AD0, CreateVertexBuffer); GUEST_FUNCTION_HOOK(sub_82BE6BF8, CreateIndexBuffer); GUEST_FUNCTION_HOOK(sub_82BE95B8, CreateSurface); GUEST_FUNCTION_HOOK(sub_82BF6400, StretchRect); GUEST_FUNCTION_HOOK(sub_82BDD9F0, SetRenderTarget); GUEST_FUNCTION_HOOK(sub_82BDDD38, SetDepthStencilSurface); GUEST_FUNCTION_HOOK(sub_82BFE4C8, Clear); GUEST_FUNCTION_HOOK(sub_82BDD8C0, SetViewport); GUEST_FUNCTION_HOOK(sub_82BE9818, SetTexture); GUEST_FUNCTION_HOOK(sub_82BDCFB0, SetScissorRect); GUEST_FUNCTION_HOOK(sub_82BE5900, DrawPrimitive); GUEST_FUNCTION_HOOK(sub_82BE5CF0, DrawIndexedPrimitive); GUEST_FUNCTION_HOOK(sub_82BE52F8, DrawPrimitiveUP); GUEST_FUNCTION_HOOK(sub_82BE0428, CreateVertexDeclaration); GUEST_FUNCTION_HOOK(sub_82BE02E0, SetVertexDeclaration); GUEST_FUNCTION_HOOK(sub_82BE1A80, CreateVertexShader); GUEST_FUNCTION_HOOK(sub_82BE0110, SetVertexShader); GUEST_FUNCTION_HOOK(sub_82BDD0F8, SetStreamSource); GUEST_FUNCTION_HOOK(sub_82BDD218, SetIndices); GUEST_FUNCTION_HOOK(sub_82BE1990, CreatePixelShader); GUEST_FUNCTION_HOOK(sub_82BDFE58, SetPixelShader); GUEST_FUNCTION_HOOK(sub_82C003B8, D3DXFillTexture); GUEST_FUNCTION_HOOK(sub_82C00910, D3DXFillVolumeTexture); GUEST_FUNCTION_HOOK(sub_82E43FC8, MakePictureData); GUEST_FUNCTION_HOOK(sub_82E9EE38, SetResolution); GUEST_FUNCTION_HOOK(sub_82BE77B0, StubFunction); GUEST_FUNCTION_HOOK(sub_82AE2BF8, ScreenShaderInit); GUEST_FUNCTION_STUB(sub_822C15D8); GUEST_FUNCTION_STUB(sub_822C1810); GUEST_FUNCTION_STUB(sub_82BD97A8); GUEST_FUNCTION_STUB(sub_82BD97E8); GUEST_FUNCTION_STUB(sub_82BDD370); // SetGammaRamp GUEST_FUNCTION_STUB(sub_82BE05B8); GUEST_FUNCTION_STUB(sub_82BE9C98); GUEST_FUNCTION_STUB(sub_82BEA308); GUEST_FUNCTION_STUB(sub_82CD5D68); GUEST_FUNCTION_STUB(sub_82BE9B28); GUEST_FUNCTION_STUB(sub_82BEA018); GUEST_FUNCTION_STUB(sub_82BEA7C0); GUEST_FUNCTION_STUB(sub_82BFFF88); // D3DXFilterTexture GUEST_FUNCTION_STUB(sub_82BD96D0);