From 17f1fcdcbcdff902eb2b8ee677cee3b942a27f40 Mon Sep 17 00:00:00 2001
From: Skyth <19259897+blueskythlikesclouds@users.noreply.github.com>
Date: Wed, 27 Nov 2024 19:58:34 +0300
Subject: [PATCH] Precompile sparkle shaders in loading screens.

---
 .../api/Hedgehog/Sparkle/hhParticleMaterial.h |  41 ++++
 UnleashedRecomp/api/SWA.h                     |   1 +
 UnleashedRecomp/gpu/video.cpp                 | 205 +++++++++++++++---
 UnleashedRecomp/gpu/video.h                   |  13 ++
 UnleashedRecompLib/config/SWA.toml            |  18 +-
 5 files changed, 240 insertions(+), 38 deletions(-)
 create mode 100644 UnleashedRecomp/api/Hedgehog/Sparkle/hhParticleMaterial.h
diff --git a/UnleashedRecomp/api/Hedgehog/Sparkle/hhParticleMaterial.h b/UnleashedRecomp/api/Hedgehog/Sparkle/hhParticleMaterial.h
new file mode 100644
index 00000000..75bb1b33
--- /dev/null
+++ b/UnleashedRecomp/api/Hedgehog/Sparkle/hhParticleMaterial.h
@@ -0,0 +1,41 @@
+#pragma once
+
+#include <boost/smart_ptr/shared_ptr.h>
+
+#include <Hedgehog/Base/Container/hhVector.h>
+#include <Hedgehog/Database/System/hhDatabaseData.h>
+
+namespace Hedgehog::Mirage
+{
+    class CShaderListData;
+}
+
+namespace Hedgehog::Sparkle
+{
+    class CParticleMaterial : public Hedgehog::Database::CDatabaseData
+    {
+    public:
+        enum EBlendMode
+        {
+            eBlendMode_Zero,
+            eBlendMode_Typical,
+            eBlendMode_Add,
+            eBlendMode_Subtract
+        };
+
+        hh::vector<boost::anonymous_shared_ptr> m_spFieldC;
+        boost::shared_ptr<Hedgehog::Mirage::CShaderListData> m_spDefaultShaderListData; // BillboardParticle_d[v]
+        boost::shared_ptr<Hedgehog::Mirage::CShaderListData> m_spShaderListData;
+        bool m_Field2C;
+        be<uint32_t> m_BlendMode;
+        be<uint32_t> m_AddressMode;
+        Hedgehog::Base::CSharedString m_MaterialName;
+        Hedgehog::Base::CSharedString m_TextureName;
+        Hedgehog::Base::CSharedString m_DeflectionTextureName;
+        Hedgehog::Base::CSharedString m_ShaderName;
+        be<float> m_Field48;
+        be<float> m_Field4C;
+    };
+
+    SWA_ASSERT_SIZEOF(CParticleMaterial, 0x50);
+}
diff --git a/UnleashedRecomp/api/SWA.h b/UnleashedRecomp/api/SWA.h
index c1691418..f72f0e19 100644
--- a/UnleashedRecomp/api/SWA.h
+++ b/UnleashedRecomp/api/SWA.h
@@ -42,6 +42,7 @@
 #include "Hedgehog/MirageCore/RenderData/hhVertexShaderCodeData.h"
 #include "Hedgehog/MirageCore/RenderData/hhVertexShaderData.h"
 #include "Hedgehog/MirageCore/Renderable/hhRenderable.h"
+#include "Hedgehog/Sparkle/hhParticleMaterial.h"
 #include "Hedgehog/Universe/Engine/hhMessageActor.h"
 #include "Hedgehog/Universe/Engine/hhMessageProcess.h"
 #include "Hedgehog/Universe/Engine/hhUpdateInfo.h"
diff --git a/UnleashedRecomp/gpu/video.cpp b/UnleashedRecomp/gpu/video.cpp
index e2cc5de0..2ccb663f 100644
--- a/UnleashedRecomp/gpu/video.cpp
+++ b/UnleashedRecomp/gpu/video.cpp
@@ -247,8 +247,8 @@ static std::string g_pipelineDebugText;
 static Mutex g_debugMutex;
 #endif
 
-static std::atomic<uint32_t> g_compilingModelCount;
-static std::atomic<uint32_t> g_pendingModelCount;
+static std::atomic<uint32_t> g_compilingDataCount;
+static std::atomic<uint32_t> g_pendingDataCount;
 
 static xxHashMap<std::pair<uint32_t, std::unique_ptr<RenderSampler>>> g_samplerStates;
 
@@ -1684,8 +1684,8 @@ static void DrawImGui()
         ImGui::Text("Pipelines Created In Render Thread: %d", g_pipelinesCreatedInRenderThread.load());
         ImGui::Text("Pipelines Created Asynchronously: %d", g_pipelinesCreatedAsynchronously.load());
         ImGui::Text("Pipelines Dropped: %d", g_pipelinesDropped.load());
-        ImGui::Text("Compiling Model Count: %d", g_compilingModelCount.load());
-        ImGui::Text("Pending Model Count: %d", g_pendingModelCount.load());
+        ImGui::Text("Compiling Data Count: %d", g_compilingDataCount.load());
+        ImGui::Text("Pending Data Count: %d", g_pendingDataCount.load());
 
         std::lock_guard lock(g_debugMutex);
         ImGui::TextUnformatted(g_pipelineDebugText.c_str());
@@ -4354,8 +4354,9 @@ enum
 
 static constexpr uint32_t MODEL_DATA_VFTABLE = 0x82073A44;
 static constexpr uint32_t TERRAIN_MODEL_DATA_VFTABLE = 0x8211D25C;
+static constexpr uint32_t PARTICLE_MATERIAL_VFTABLE = 0x8211F198;
 
-static moodycamel::BlockingConcurrentQueue<boost::shared_ptr<Hedgehog::Database::CDatabaseData>> g_compilingModelQueue;
+static moodycamel::BlockingConcurrentQueue<boost::shared_ptr<Hedgehog::Database::CDatabaseData>> g_compilingDataQueue;
 
 // Having this separate, because I don't want to lock a mutex in the render thread before
 // every single draw. Might be worth profiling to see if it actually has an impact and merge them.
@@ -4657,6 +4658,122 @@ static void CompileMeshPipelines(const T& modelData, const CompilationArgs& args
         CompileMeshPipeline(mesh.get(), MeshLayer::PunchThrough, args);
 }
 
+static void CompileParticleMaterialPipeline(const Hedgehog::Sparkle::CParticleMaterial& material)
+{
+    auto& shaderList = material.m_spShaderListData;
+    if (shaderList.get() == nullptr)
+        return;
+
+    guest_stack_var<Hedgehog::Base::CStringSymbol> defaultSymbol(reinterpret_cast<const char*>(g_memory.Translate(0x8202DDBC)));
+    auto defaultFindResult = shaderList->m_PixelShaderPermutations.find(*defaultSymbol);
+    if (defaultFindResult == shaderList->m_PixelShaderPermutations.end())
+        return;
+
+    guest_stack_var<Hedgehog::Base::CStringSymbol> noneSymbol(reinterpret_cast<const char*>(g_memory.Translate(0x8200D938)));
+    auto noneFindResult = defaultFindResult->second.m_VertexShaderPermutations.find(*noneSymbol);
+    if (noneFindResult == defaultFindResult->second.m_VertexShaderPermutations.end())
+        return;
+
+    // All the particle models in the game come with the unoptimized format, so we can assume it.
+    uint8_t unoptimizedVertexElements[144] = 
+    {
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x2A, 0x23, 0xB9, 0x00, 0x00, 0x00, 0x00,
+        0x00, 0x00, 0x00, 0x0C, 0x00, 0x2A, 0x23, 0xB9, 0x00, 0x03, 0x00, 0x00,
+        0x00, 0x00, 0x00, 0x18, 0x00, 0x2A, 0x23, 0xB9, 0x00, 0x06, 0x00, 0x00,
+        0x00, 0x00, 0x00, 0x24, 0x00, 0x2A, 0x23, 0xB9, 0x00, 0x07, 0x00, 0x00,
+        0x00, 0x00, 0x00, 0x30, 0x00, 0x2C, 0x23, 0xA5, 0x00, 0x05, 0x00, 0x00,
+        0x00, 0x00, 0x00, 0x38, 0x00, 0x2C, 0x23, 0xA5, 0x00, 0x05, 0x01, 0x00,
+        0x00, 0x00, 0x00, 0x40, 0x00, 0x2C, 0x23, 0xA5, 0x00, 0x05, 0x02, 0x00,
+        0x00, 0x00, 0x00, 0x48, 0x00, 0x2C, 0x23, 0xA5, 0x00, 0x05, 0x03, 0x00,
+        0x00, 0x00, 0x00, 0x50, 0x00, 0x1A, 0x23, 0xA6, 0x00, 0x0A, 0x00, 0x00,
+        0x00, 0x00, 0x00, 0x60, 0x00, 0x1A, 0x23, 0x86, 0x00, 0x02, 0x00, 0x00,
+        0x00, 0x00, 0x00, 0x64, 0x00, 0x1A, 0x20, 0x86, 0x00, 0x01, 0x00, 0x00,
+        0x00, 0xFF, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00
+    };
+
+    auto unoptimizedVertexDeclaration = CreateVertexDeclaration(reinterpret_cast<GuestVertexElement*>(unoptimizedVertexElements));
+    auto sparkleVertexDeclaration = CreateVertexDeclaration(reinterpret_cast<GuestVertexElement*>(g_memory.Translate(0x8211F540)));
+
+    bool isMeshShader = strstr(shaderList->m_TypeAndName.c_str(), "Mesh") != nullptr;
+
+    PipelineState pipelineState{};
+    pipelineState.vertexShader = reinterpret_cast<GuestShader*>(noneFindResult->second->m_VertexShaders.begin()->second->m_spCode->m_pD3DVertexShader.get());
+    pipelineState.pixelShader = reinterpret_cast<GuestShader*>(defaultFindResult->second.m_PixelShaders.begin()->second->m_spCode->m_pD3DPixelShader.get());
+    pipelineState.vertexDeclaration = isMeshShader ? unoptimizedVertexDeclaration : sparkleVertexDeclaration;
+    pipelineState.zWriteEnable = false;
+    pipelineState.srcBlend = RenderBlend::SRC_ALPHA;
+    pipelineState.destBlend = RenderBlend::INV_SRC_ALPHA;
+    pipelineState.zFunc = RenderComparisonFunction::GREATER_EQUAL;
+    pipelineState.alphaBlendEnable = true;
+    pipelineState.srcBlendAlpha = RenderBlend::SRC_ALPHA;
+    pipelineState.destBlendAlpha = RenderBlend::INV_SRC_ALPHA;
+    pipelineState.primitiveTopology = RenderPrimitiveTopology::TRIANGLE_STRIP;
+    pipelineState.vertexStrides[0] = isMeshShader ? 104 : 28;
+    pipelineState.renderTargetFormat = RenderFormat::R16G16B16A16_FLOAT;
+    pipelineState.depthStencilFormat = RenderFormat::D32_FLOAT;
+    pipelineState.sampleCount = Config::MSAA > 1 ? Config::MSAA : 1;
+    pipelineState.specConstants = SPEC_CONSTANT_REVERSE_Z;
+
+    switch (material.m_BlendMode.get())
+    {
+    case Hedgehog::Sparkle::CParticleMaterial::eBlendMode_Zero:
+        // TODO: What are the render states for this??
+        break;
+    case Hedgehog::Sparkle::CParticleMaterial::eBlendMode_Typical:
+        // Leave default.
+        break;
+    case Hedgehog::Sparkle::CParticleMaterial::eBlendMode_Add:
+        pipelineState.destBlend = RenderBlend::ONE;
+        break;
+    case Hedgehog::Sparkle::CParticleMaterial::eBlendMode_Subtract:
+        // TODO: Is this correct?
+        pipelineState.destBlend = RenderBlend::ONE;
+        pipelineState.blendOp = RenderBlendOperation::SUBTRACT;
+        break;
+    }
+
+    auto createGraphicsPipeline = [&](PipelineState& pipelineStateToCreate)
+        {
+            SanitizePipelineState(pipelineStateToCreate);
+            CreateGraphicsPipelineInPipelineThread(pipelineStateToCreate, shaderList->m_TypeAndName.c_str() + 3);
+        };
+
+    // TODO: See if this is necessary for everything.
+    RenderCullMode cullModes[] = { RenderCullMode::NONE, RenderCullMode::BACK };
+
+    for (auto cullMode : cullModes)
+    {
+        pipelineState.cullMode = cullMode;
+        createGraphicsPipeline(pipelineState);
+
+        bool planarReflectionEnabled = reinterpret_cast<bool*>(g_memory.Translate(0x832FA0D8));
+
+        auto noMsaaPipelineState = pipelineState;
+        noMsaaPipelineState.sampleCount = 1;
+
+        if (planarReflectionEnabled)
+            createGraphicsPipeline(noMsaaPipelineState);
+
+        if (!isMeshShader)
+        {
+            // Previous compilation was for locus particles. This one will be for quads.
+            auto quadPipelineState = pipelineState;
+            quadPipelineState.primitiveTopology = RenderPrimitiveTopology::TRIANGLE_LIST;
+            createGraphicsPipeline(quadPipelineState);
+
+            if (planarReflectionEnabled)
+            {
+                auto noMsaaQuadPipelineState = noMsaaPipelineState;
+                noMsaaQuadPipelineState.primitiveTopology = RenderPrimitiveTopology::TRIANGLE_LIST;
+                createGraphicsPipeline(noMsaaQuadPipelineState);
+            }
+        }
+    }
+
+    unoptimizedVertexDeclaration->Release();
+    sparkleVertexDeclaration->Release();
+}
+
 static void PipelineCompilerThread()
 {
     GuestThread::SetThreadName(GetCurrentThreadId(), "Pipeline Compiler Thread");
@@ -4667,7 +4784,7 @@ static void PipelineCompilerThread()
     while (true)
     {
         boost::shared_ptr<Hedgehog::Database::CDatabaseData> databaseData;
-        g_compilingModelQueue.wait_dequeue(databaseData);
+        g_compilingDataQueue.wait_dequeue(databaseData);
 
         if (stack == nullptr)
         {
@@ -4681,9 +4798,15 @@ static void PipelineCompilerThread()
         if (databaseData->m_pVftable.ptr == TERRAIN_MODEL_DATA_VFTABLE)
         {
             CompileMeshPipelines(*reinterpret_cast<Hedgehog::Mirage::CTerrainModelData*>(databaseData.get()), {});
+        }        
+        else if (databaseData->m_pVftable.ptr == PARTICLE_MATERIAL_VFTABLE)
+        {
+            CompileParticleMaterialPipeline(*reinterpret_cast<Hedgehog::Sparkle::CParticleMaterial*>(databaseData.get()));
         }
         else
         {
+            assert(databaseData->m_pVftable.ptr == MODEL_DATA_VFTABLE);
+
             auto modelData = reinterpret_cast<Hedgehog::Mirage::CModelData*>(databaseData.get());
 
             CompilationArgs args{};
@@ -4708,8 +4831,8 @@ static void PipelineCompilerThread()
 
         databaseData->m_Flags &= ~eDatabaseDataFlags_CompilingPipelines;
 
-        if ((--g_compilingModelCount) == 0)
-            g_compilingModelCount.notify_all();
+        if ((--g_compilingDataCount) == 0)
+            g_compilingDataCount.notify_all();
     }
 
     g_userHeap.Free(stack);
@@ -4723,8 +4846,8 @@ PPC_FUNC(sub_825369A0)
 {
     // Wait for pipeline compilations to finish.
     uint32_t value;
-    while ((value = g_compilingModelCount.load()) != 0)
-        g_compilingModelCount.wait(value);
+    while ((value = g_compilingDataCount.load()) != 0)
+        g_compilingDataCount.wait(value);
 
     __imp__sub_825369A0(ctx, base);
 }
@@ -4757,37 +4880,51 @@ PPC_FUNC(sub_82E243D8)
     }
 }
 
-static Mutex g_pendingModelMutex;
-static std::vector<boost::shared_ptr<Hedgehog::Database::CDatabaseData>> g_pendingModelQueue;
+// CParticleMaterial::CheckMadeAll
+PPC_FUNC_IMPL(__imp__sub_82E87598);
+PPC_FUNC(sub_82E87598)
+{   
+    if (reinterpret_cast<Hedgehog::Database::CDatabaseData*>(base + ctx.r3.u32)->m_Flags & eDatabaseDataFlags_CompilingPipelines)
+    {
+        ctx.r3.u64 = 0;
+    }
+    else
+    {
+        __imp__sub_82E87598(ctx, base);
+    }
+}
 
-void GetModelDataMidAsmHook(PPCRegister& r1, PPCRegister& r31)
+static Mutex g_pendingModelMutex;
+static std::vector<boost::shared_ptr<Hedgehog::Database::CDatabaseData>> g_pendingDataQueue;
+
+void GetDatabaseDataMidAsmHook(PPCRegister& r1, PPCRegister& r4)
 {
     auto& databaseData = *reinterpret_cast<boost::shared_ptr<Hedgehog::Database::CDatabaseData>*>(
         g_memory.Translate(r1.u32 + 0x58));
 
-    if (!databaseData->IsMadeOne() && r31.u32 != NULL)
+    if (!databaseData->IsMadeOne() && r4.u32 != NULL)
     {
         if (databaseData->m_pVftable.ptr == MODEL_DATA_VFTABLE)
         {
             // Ignore particle models, the materials they point at don't actually
             // get used and give the threads unnecessary work.
-            bool isParticleModel = *reinterpret_cast<be<uint32_t>*>(g_memory.Translate(r31.u32 + 4)) != 5 &&
+            bool isParticleModel = *reinterpret_cast<be<uint32_t>*>(g_memory.Translate(r4.u32 + 4)) != 5 &&
                 strncmp(databaseData->m_TypeAndName.c_str() + 2, "eff_", 4) == 0;
 
             if (isParticleModel)
                 return;
         }
 
-        ++g_compilingModelCount;
+        ++g_compilingDataCount;
         databaseData->m_Flags |= eDatabaseDataFlags_CompilingPipelines;
 
         {
             std::lock_guard lock(g_pendingModelMutex);
-            g_pendingModelQueue.push_back(databaseData);
+            g_pendingDataQueue.push_back(databaseData);
         }
 
-        ++g_pendingModelCount;
-        g_pendingModelCount.notify_all();
+        ++g_pendingDataCount;
+        g_pendingDataCount.notify_all();
     }
 }
 
@@ -4878,38 +5015,38 @@ static void ModelConsumerThread()
 {
     GuestThread::SetThreadName(GetCurrentThreadId(), "Model Consumer Thread");
 
-    std::vector<boost::shared_ptr<Hedgehog::Database::CDatabaseData>> localPendingModelQueue;
+    std::vector<boost::shared_ptr<Hedgehog::Database::CDatabaseData>> localPendingDataQueue;
 
     while (true)
     {
         // Wait for models to arrive.
-        uint32_t pendingModelCount;
-        while ((pendingModelCount = g_pendingModelCount.load()) == 0)
-            g_pendingModelCount.wait(pendingModelCount);
+        uint32_t pendingDataCount;
+        while ((pendingDataCount = g_pendingDataCount.load()) == 0)
+            g_pendingDataCount.wait(pendingDataCount);
 
         {
             std::lock_guard lock(g_pendingModelMutex);
-            localPendingModelQueue.insert(localPendingModelQueue.end(), g_pendingModelQueue.begin(), g_pendingModelQueue.end());
-            g_pendingModelQueue.clear();
+            localPendingDataQueue.insert(localPendingDataQueue.end(), g_pendingDataQueue.begin(), g_pendingDataQueue.end());
+            g_pendingDataQueue.clear();
         }
 
         bool allHandled = true;
 
-        for (auto& pendingModel : localPendingModelQueue)
+        for (auto& pendingData : localPendingDataQueue)
         {
-            if (pendingModel.get() != nullptr)
+            if (pendingData.get() != nullptr)
             {
                 bool ready = false;
 
-                if (pendingModel->m_pVftable.ptr == TERRAIN_MODEL_DATA_VFTABLE)
-                    ready = CheckMadeAll(*reinterpret_cast<Hedgehog::Mirage::CTerrainModelData*>(pendingModel.get()));
+                if (pendingData->m_pVftable.ptr == MODEL_DATA_VFTABLE)
+                    ready = CheckMadeAll(*reinterpret_cast<Hedgehog::Mirage::CModelData*>(pendingData.get()));
                 else
-                    ready = CheckMadeAll(*reinterpret_cast<Hedgehog::Mirage::CModelData*>(pendingModel.get()));
+                    ready = pendingData->IsMadeOne();
 
-                if (ready || pendingModel.unique())
+                if (ready || pendingData.unique())
                 {
-                    g_compilingModelQueue.enqueue(std::move(pendingModel));
-                    --g_pendingModelCount;
+                    g_compilingDataQueue.enqueue(std::move(pendingData));
+                    --g_pendingDataCount;
                 }
                 else
                 {
@@ -4919,7 +5056,7 @@ static void ModelConsumerThread()
         }
 
         if (allHandled)
-            localPendingModelQueue.clear();
+            localPendingDataQueue.clear();
     }
 }
 
diff --git a/UnleashedRecomp/gpu/video.h b/UnleashedRecomp/gpu/video.h
index 37f7f190..6d08603b 100644
--- a/UnleashedRecomp/gpu/video.h
+++ b/UnleashedRecomp/gpu/video.h
@@ -80,6 +80,19 @@ struct GuestResource
             incrementedValue = std::byteswap(std::byteswap(originalValue) + 1);
         } while (InterlockedCompareExchange(reinterpret_cast<LONG*>(&refCount), incrementedValue, originalValue) != originalValue);
     }
+
+    void Release()
+    {
+        uint32_t originalValue, decrementedValue;
+        do
+        {
+            originalValue = refCount.value;
+            decrementedValue = std::byteswap(std::byteswap(originalValue) - 1);
+        } while (InterlockedCompareExchange(reinterpret_cast<LONG*>(&refCount), decrementedValue, originalValue) != originalValue);
+
+        // Normally we are supposed to release here, so only use this
+        // function when you know you won't be the one destructing it.
+    }
 };
 
 enum GuestFormat
diff --git a/UnleashedRecompLib/config/SWA.toml b/UnleashedRecompLib/config/SWA.toml
index c6f34255..0e2e975e 100644
--- a/UnleashedRecompLib/config/SWA.toml
+++ b/UnleashedRecompLib/config/SWA.toml
@@ -429,11 +429,21 @@ address = 0x82BA9E7C
 registers = ["r10"]
 
 [[midasm_hook]]
-name = "GetModelDataMidAsmHook"
-address = 0x82E38688
+name = "GetDatabaseDataMidAsmHook"
+address = 0x82E38688 # Model
 registers = ["r1", "r31"]
 
 [[midasm_hook]]
-name = "GetModelDataMidAsmHook"
-address = 0x82E39650
+name = "GetDatabaseDataMidAsmHook"
+address = 0x82E39650 # Terrain Model
 registers = ["r1", "r31"]
+
+[[midasm_hook]]
+name = "GetDatabaseDataMidAsmHook"
+address = 0x827D614C # Particle Material Binary
+registers = ["r1", "r29"]
+
+[[midasm_hook]]
+name = "GetDatabaseDataMidAsmHook"
+address = 0x827D6018 # Particle Material XML
+registers = ["r1", "r30"]