Precompile sparkle shaders in loading screens.

This commit is contained in:
Skyth 2024-11-27 19:58:34 +03:00
parent b271c37839
commit 17f1fcdcbc
5 changed files with 240 additions and 38 deletions

View file

@ -0,0 +1,41 @@
#pragma once
#include <boost/smart_ptr/shared_ptr.h>
#include <Hedgehog/Base/Container/hhVector.h>
#include <Hedgehog/Database/System/hhDatabaseData.h>
namespace Hedgehog::Mirage
{
class CShaderListData;
}
namespace Hedgehog::Sparkle
{
class CParticleMaterial : public Hedgehog::Database::CDatabaseData
{
public:
enum EBlendMode
{
eBlendMode_Zero,
eBlendMode_Typical,
eBlendMode_Add,
eBlendMode_Subtract
};
hh::vector<boost::anonymous_shared_ptr> m_spFieldC;
boost::shared_ptr<Hedgehog::Mirage::CShaderListData> m_spDefaultShaderListData; // BillboardParticle_d[v]
boost::shared_ptr<Hedgehog::Mirage::CShaderListData> m_spShaderListData;
bool m_Field2C;
be<uint32_t> m_BlendMode;
be<uint32_t> m_AddressMode;
Hedgehog::Base::CSharedString m_MaterialName;
Hedgehog::Base::CSharedString m_TextureName;
Hedgehog::Base::CSharedString m_DeflectionTextureName;
Hedgehog::Base::CSharedString m_ShaderName;
be<float> m_Field48;
be<float> m_Field4C;
};
SWA_ASSERT_SIZEOF(CParticleMaterial, 0x50);
}

View file

@ -42,6 +42,7 @@
#include "Hedgehog/MirageCore/RenderData/hhVertexShaderCodeData.h" #include "Hedgehog/MirageCore/RenderData/hhVertexShaderCodeData.h"
#include "Hedgehog/MirageCore/RenderData/hhVertexShaderData.h" #include "Hedgehog/MirageCore/RenderData/hhVertexShaderData.h"
#include "Hedgehog/MirageCore/Renderable/hhRenderable.h" #include "Hedgehog/MirageCore/Renderable/hhRenderable.h"
#include "Hedgehog/Sparkle/hhParticleMaterial.h"
#include "Hedgehog/Universe/Engine/hhMessageActor.h" #include "Hedgehog/Universe/Engine/hhMessageActor.h"
#include "Hedgehog/Universe/Engine/hhMessageProcess.h" #include "Hedgehog/Universe/Engine/hhMessageProcess.h"
#include "Hedgehog/Universe/Engine/hhUpdateInfo.h" #include "Hedgehog/Universe/Engine/hhUpdateInfo.h"

View file

@ -247,8 +247,8 @@ static std::string g_pipelineDebugText;
static Mutex g_debugMutex; static Mutex g_debugMutex;
#endif #endif
static std::atomic<uint32_t> g_compilingModelCount; static std::atomic<uint32_t> g_compilingDataCount;
static std::atomic<uint32_t> g_pendingModelCount; static std::atomic<uint32_t> g_pendingDataCount;
static xxHashMap<std::pair<uint32_t, std::unique_ptr<RenderSampler>>> g_samplerStates; static xxHashMap<std::pair<uint32_t, std::unique_ptr<RenderSampler>>> g_samplerStates;
@ -1684,8 +1684,8 @@ static void DrawImGui()
ImGui::Text("Pipelines Created In Render Thread: %d", g_pipelinesCreatedInRenderThread.load()); ImGui::Text("Pipelines Created In Render Thread: %d", g_pipelinesCreatedInRenderThread.load());
ImGui::Text("Pipelines Created Asynchronously: %d", g_pipelinesCreatedAsynchronously.load()); ImGui::Text("Pipelines Created Asynchronously: %d", g_pipelinesCreatedAsynchronously.load());
ImGui::Text("Pipelines Dropped: %d", g_pipelinesDropped.load()); ImGui::Text("Pipelines Dropped: %d", g_pipelinesDropped.load());
ImGui::Text("Compiling Model Count: %d", g_compilingModelCount.load()); ImGui::Text("Compiling Data Count: %d", g_compilingDataCount.load());
ImGui::Text("Pending Model Count: %d", g_pendingModelCount.load()); ImGui::Text("Pending Data Count: %d", g_pendingDataCount.load());
std::lock_guard lock(g_debugMutex); std::lock_guard lock(g_debugMutex);
ImGui::TextUnformatted(g_pipelineDebugText.c_str()); ImGui::TextUnformatted(g_pipelineDebugText.c_str());
@ -4354,8 +4354,9 @@ enum
static constexpr uint32_t MODEL_DATA_VFTABLE = 0x82073A44; static constexpr uint32_t MODEL_DATA_VFTABLE = 0x82073A44;
static constexpr uint32_t TERRAIN_MODEL_DATA_VFTABLE = 0x8211D25C; static constexpr uint32_t TERRAIN_MODEL_DATA_VFTABLE = 0x8211D25C;
static constexpr uint32_t PARTICLE_MATERIAL_VFTABLE = 0x8211F198;
static moodycamel::BlockingConcurrentQueue<boost::shared_ptr<Hedgehog::Database::CDatabaseData>> g_compilingModelQueue; static moodycamel::BlockingConcurrentQueue<boost::shared_ptr<Hedgehog::Database::CDatabaseData>> g_compilingDataQueue;
// Having this separate, because I don't want to lock a mutex in the render thread before // Having this separate, because I don't want to lock a mutex in the render thread before
// every single draw. Might be worth profiling to see if it actually has an impact and merge them. // every single draw. Might be worth profiling to see if it actually has an impact and merge them.
@ -4657,6 +4658,122 @@ static void CompileMeshPipelines(const T& modelData, const CompilationArgs& args
CompileMeshPipeline(mesh.get(), MeshLayer::PunchThrough, args); CompileMeshPipeline(mesh.get(), MeshLayer::PunchThrough, args);
} }
static void CompileParticleMaterialPipeline(const Hedgehog::Sparkle::CParticleMaterial& material)
{
auto& shaderList = material.m_spShaderListData;
if (shaderList.get() == nullptr)
return;
guest_stack_var<Hedgehog::Base::CStringSymbol> defaultSymbol(reinterpret_cast<const char*>(g_memory.Translate(0x8202DDBC)));
auto defaultFindResult = shaderList->m_PixelShaderPermutations.find(*defaultSymbol);
if (defaultFindResult == shaderList->m_PixelShaderPermutations.end())
return;
guest_stack_var<Hedgehog::Base::CStringSymbol> noneSymbol(reinterpret_cast<const char*>(g_memory.Translate(0x8200D938)));
auto noneFindResult = defaultFindResult->second.m_VertexShaderPermutations.find(*noneSymbol);
if (noneFindResult == defaultFindResult->second.m_VertexShaderPermutations.end())
return;
// All the particle models in the game come with the unoptimized format, so we can assume it.
uint8_t unoptimizedVertexElements[144] =
{
0x00, 0x00, 0x00, 0x00, 0x00, 0x2A, 0x23, 0xB9, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x0C, 0x00, 0x2A, 0x23, 0xB9, 0x00, 0x03, 0x00, 0x00,
0x00, 0x00, 0x00, 0x18, 0x00, 0x2A, 0x23, 0xB9, 0x00, 0x06, 0x00, 0x00,
0x00, 0x00, 0x00, 0x24, 0x00, 0x2A, 0x23, 0xB9, 0x00, 0x07, 0x00, 0x00,
0x00, 0x00, 0x00, 0x30, 0x00, 0x2C, 0x23, 0xA5, 0x00, 0x05, 0x00, 0x00,
0x00, 0x00, 0x00, 0x38, 0x00, 0x2C, 0x23, 0xA5, 0x00, 0x05, 0x01, 0x00,
0x00, 0x00, 0x00, 0x40, 0x00, 0x2C, 0x23, 0xA5, 0x00, 0x05, 0x02, 0x00,
0x00, 0x00, 0x00, 0x48, 0x00, 0x2C, 0x23, 0xA5, 0x00, 0x05, 0x03, 0x00,
0x00, 0x00, 0x00, 0x50, 0x00, 0x1A, 0x23, 0xA6, 0x00, 0x0A, 0x00, 0x00,
0x00, 0x00, 0x00, 0x60, 0x00, 0x1A, 0x23, 0x86, 0x00, 0x02, 0x00, 0x00,
0x00, 0x00, 0x00, 0x64, 0x00, 0x1A, 0x20, 0x86, 0x00, 0x01, 0x00, 0x00,
0x00, 0xFF, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00
};
auto unoptimizedVertexDeclaration = CreateVertexDeclaration(reinterpret_cast<GuestVertexElement*>(unoptimizedVertexElements));
auto sparkleVertexDeclaration = CreateVertexDeclaration(reinterpret_cast<GuestVertexElement*>(g_memory.Translate(0x8211F540)));
bool isMeshShader = strstr(shaderList->m_TypeAndName.c_str(), "Mesh") != nullptr;
PipelineState pipelineState{};
pipelineState.vertexShader = reinterpret_cast<GuestShader*>(noneFindResult->second->m_VertexShaders.begin()->second->m_spCode->m_pD3DVertexShader.get());
pipelineState.pixelShader = reinterpret_cast<GuestShader*>(defaultFindResult->second.m_PixelShaders.begin()->second->m_spCode->m_pD3DPixelShader.get());
pipelineState.vertexDeclaration = isMeshShader ? unoptimizedVertexDeclaration : sparkleVertexDeclaration;
pipelineState.zWriteEnable = false;
pipelineState.srcBlend = RenderBlend::SRC_ALPHA;
pipelineState.destBlend = RenderBlend::INV_SRC_ALPHA;
pipelineState.zFunc = RenderComparisonFunction::GREATER_EQUAL;
pipelineState.alphaBlendEnable = true;
pipelineState.srcBlendAlpha = RenderBlend::SRC_ALPHA;
pipelineState.destBlendAlpha = RenderBlend::INV_SRC_ALPHA;
pipelineState.primitiveTopology = RenderPrimitiveTopology::TRIANGLE_STRIP;
pipelineState.vertexStrides[0] = isMeshShader ? 104 : 28;
pipelineState.renderTargetFormat = RenderFormat::R16G16B16A16_FLOAT;
pipelineState.depthStencilFormat = RenderFormat::D32_FLOAT;
pipelineState.sampleCount = Config::MSAA > 1 ? Config::MSAA : 1;
pipelineState.specConstants = SPEC_CONSTANT_REVERSE_Z;
switch (material.m_BlendMode.get())
{
case Hedgehog::Sparkle::CParticleMaterial::eBlendMode_Zero:
// TODO: What are the render states for this??
break;
case Hedgehog::Sparkle::CParticleMaterial::eBlendMode_Typical:
// Leave default.
break;
case Hedgehog::Sparkle::CParticleMaterial::eBlendMode_Add:
pipelineState.destBlend = RenderBlend::ONE;
break;
case Hedgehog::Sparkle::CParticleMaterial::eBlendMode_Subtract:
// TODO: Is this correct?
pipelineState.destBlend = RenderBlend::ONE;
pipelineState.blendOp = RenderBlendOperation::SUBTRACT;
break;
}
auto createGraphicsPipeline = [&](PipelineState& pipelineStateToCreate)
{
SanitizePipelineState(pipelineStateToCreate);
CreateGraphicsPipelineInPipelineThread(pipelineStateToCreate, shaderList->m_TypeAndName.c_str() + 3);
};
// TODO: See if this is necessary for everything.
RenderCullMode cullModes[] = { RenderCullMode::NONE, RenderCullMode::BACK };
for (auto cullMode : cullModes)
{
pipelineState.cullMode = cullMode;
createGraphicsPipeline(pipelineState);
bool planarReflectionEnabled = reinterpret_cast<bool*>(g_memory.Translate(0x832FA0D8));
auto noMsaaPipelineState = pipelineState;
noMsaaPipelineState.sampleCount = 1;
if (planarReflectionEnabled)
createGraphicsPipeline(noMsaaPipelineState);
if (!isMeshShader)
{
// Previous compilation was for locus particles. This one will be for quads.
auto quadPipelineState = pipelineState;
quadPipelineState.primitiveTopology = RenderPrimitiveTopology::TRIANGLE_LIST;
createGraphicsPipeline(quadPipelineState);
if (planarReflectionEnabled)
{
auto noMsaaQuadPipelineState = noMsaaPipelineState;
noMsaaQuadPipelineState.primitiveTopology = RenderPrimitiveTopology::TRIANGLE_LIST;
createGraphicsPipeline(noMsaaQuadPipelineState);
}
}
}
unoptimizedVertexDeclaration->Release();
sparkleVertexDeclaration->Release();
}
static void PipelineCompilerThread() static void PipelineCompilerThread()
{ {
GuestThread::SetThreadName(GetCurrentThreadId(), "Pipeline Compiler Thread"); GuestThread::SetThreadName(GetCurrentThreadId(), "Pipeline Compiler Thread");
@ -4667,7 +4784,7 @@ static void PipelineCompilerThread()
while (true) while (true)
{ {
boost::shared_ptr<Hedgehog::Database::CDatabaseData> databaseData; boost::shared_ptr<Hedgehog::Database::CDatabaseData> databaseData;
g_compilingModelQueue.wait_dequeue(databaseData); g_compilingDataQueue.wait_dequeue(databaseData);
if (stack == nullptr) if (stack == nullptr)
{ {
@ -4682,8 +4799,14 @@ static void PipelineCompilerThread()
{ {
CompileMeshPipelines(*reinterpret_cast<Hedgehog::Mirage::CTerrainModelData*>(databaseData.get()), {}); CompileMeshPipelines(*reinterpret_cast<Hedgehog::Mirage::CTerrainModelData*>(databaseData.get()), {});
} }
else if (databaseData->m_pVftable.ptr == PARTICLE_MATERIAL_VFTABLE)
{
CompileParticleMaterialPipeline(*reinterpret_cast<Hedgehog::Sparkle::CParticleMaterial*>(databaseData.get()));
}
else else
{ {
assert(databaseData->m_pVftable.ptr == MODEL_DATA_VFTABLE);
auto modelData = reinterpret_cast<Hedgehog::Mirage::CModelData*>(databaseData.get()); auto modelData = reinterpret_cast<Hedgehog::Mirage::CModelData*>(databaseData.get());
CompilationArgs args{}; CompilationArgs args{};
@ -4708,8 +4831,8 @@ static void PipelineCompilerThread()
databaseData->m_Flags &= ~eDatabaseDataFlags_CompilingPipelines; databaseData->m_Flags &= ~eDatabaseDataFlags_CompilingPipelines;
if ((--g_compilingModelCount) == 0) if ((--g_compilingDataCount) == 0)
g_compilingModelCount.notify_all(); g_compilingDataCount.notify_all();
} }
g_userHeap.Free(stack); g_userHeap.Free(stack);
@ -4723,8 +4846,8 @@ PPC_FUNC(sub_825369A0)
{ {
// Wait for pipeline compilations to finish. // Wait for pipeline compilations to finish.
uint32_t value; uint32_t value;
while ((value = g_compilingModelCount.load()) != 0) while ((value = g_compilingDataCount.load()) != 0)
g_compilingModelCount.wait(value); g_compilingDataCount.wait(value);
__imp__sub_825369A0(ctx, base); __imp__sub_825369A0(ctx, base);
} }
@ -4757,37 +4880,51 @@ PPC_FUNC(sub_82E243D8)
} }
} }
static Mutex g_pendingModelMutex; // CParticleMaterial::CheckMadeAll
static std::vector<boost::shared_ptr<Hedgehog::Database::CDatabaseData>> g_pendingModelQueue; PPC_FUNC_IMPL(__imp__sub_82E87598);
PPC_FUNC(sub_82E87598)
{
if (reinterpret_cast<Hedgehog::Database::CDatabaseData*>(base + ctx.r3.u32)->m_Flags & eDatabaseDataFlags_CompilingPipelines)
{
ctx.r3.u64 = 0;
}
else
{
__imp__sub_82E87598(ctx, base);
}
}
void GetModelDataMidAsmHook(PPCRegister& r1, PPCRegister& r31) static Mutex g_pendingModelMutex;
static std::vector<boost::shared_ptr<Hedgehog::Database::CDatabaseData>> g_pendingDataQueue;
void GetDatabaseDataMidAsmHook(PPCRegister& r1, PPCRegister& r4)
{ {
auto& databaseData = *reinterpret_cast<boost::shared_ptr<Hedgehog::Database::CDatabaseData>*>( auto& databaseData = *reinterpret_cast<boost::shared_ptr<Hedgehog::Database::CDatabaseData>*>(
g_memory.Translate(r1.u32 + 0x58)); g_memory.Translate(r1.u32 + 0x58));
if (!databaseData->IsMadeOne() && r31.u32 != NULL) if (!databaseData->IsMadeOne() && r4.u32 != NULL)
{ {
if (databaseData->m_pVftable.ptr == MODEL_DATA_VFTABLE) if (databaseData->m_pVftable.ptr == MODEL_DATA_VFTABLE)
{ {
// Ignore particle models, the materials they point at don't actually // Ignore particle models, the materials they point at don't actually
// get used and give the threads unnecessary work. // get used and give the threads unnecessary work.
bool isParticleModel = *reinterpret_cast<be<uint32_t>*>(g_memory.Translate(r31.u32 + 4)) != 5 && bool isParticleModel = *reinterpret_cast<be<uint32_t>*>(g_memory.Translate(r4.u32 + 4)) != 5 &&
strncmp(databaseData->m_TypeAndName.c_str() + 2, "eff_", 4) == 0; strncmp(databaseData->m_TypeAndName.c_str() + 2, "eff_", 4) == 0;
if (isParticleModel) if (isParticleModel)
return; return;
} }
++g_compilingModelCount; ++g_compilingDataCount;
databaseData->m_Flags |= eDatabaseDataFlags_CompilingPipelines; databaseData->m_Flags |= eDatabaseDataFlags_CompilingPipelines;
{ {
std::lock_guard lock(g_pendingModelMutex); std::lock_guard lock(g_pendingModelMutex);
g_pendingModelQueue.push_back(databaseData); g_pendingDataQueue.push_back(databaseData);
} }
++g_pendingModelCount; ++g_pendingDataCount;
g_pendingModelCount.notify_all(); g_pendingDataCount.notify_all();
} }
} }
@ -4878,38 +5015,38 @@ static void ModelConsumerThread()
{ {
GuestThread::SetThreadName(GetCurrentThreadId(), "Model Consumer Thread"); GuestThread::SetThreadName(GetCurrentThreadId(), "Model Consumer Thread");
std::vector<boost::shared_ptr<Hedgehog::Database::CDatabaseData>> localPendingModelQueue; std::vector<boost::shared_ptr<Hedgehog::Database::CDatabaseData>> localPendingDataQueue;
while (true) while (true)
{ {
// Wait for models to arrive. // Wait for models to arrive.
uint32_t pendingModelCount; uint32_t pendingDataCount;
while ((pendingModelCount = g_pendingModelCount.load()) == 0) while ((pendingDataCount = g_pendingDataCount.load()) == 0)
g_pendingModelCount.wait(pendingModelCount); g_pendingDataCount.wait(pendingDataCount);
{ {
std::lock_guard lock(g_pendingModelMutex); std::lock_guard lock(g_pendingModelMutex);
localPendingModelQueue.insert(localPendingModelQueue.end(), g_pendingModelQueue.begin(), g_pendingModelQueue.end()); localPendingDataQueue.insert(localPendingDataQueue.end(), g_pendingDataQueue.begin(), g_pendingDataQueue.end());
g_pendingModelQueue.clear(); g_pendingDataQueue.clear();
} }
bool allHandled = true; bool allHandled = true;
for (auto& pendingModel : localPendingModelQueue) for (auto& pendingData : localPendingDataQueue)
{ {
if (pendingModel.get() != nullptr) if (pendingData.get() != nullptr)
{ {
bool ready = false; bool ready = false;
if (pendingModel->m_pVftable.ptr == TERRAIN_MODEL_DATA_VFTABLE) if (pendingData->m_pVftable.ptr == MODEL_DATA_VFTABLE)
ready = CheckMadeAll(*reinterpret_cast<Hedgehog::Mirage::CTerrainModelData*>(pendingModel.get())); ready = CheckMadeAll(*reinterpret_cast<Hedgehog::Mirage::CModelData*>(pendingData.get()));
else else
ready = CheckMadeAll(*reinterpret_cast<Hedgehog::Mirage::CModelData*>(pendingModel.get())); ready = pendingData->IsMadeOne();
if (ready || pendingModel.unique()) if (ready || pendingData.unique())
{ {
g_compilingModelQueue.enqueue(std::move(pendingModel)); g_compilingDataQueue.enqueue(std::move(pendingData));
--g_pendingModelCount; --g_pendingDataCount;
} }
else else
{ {
@ -4919,7 +5056,7 @@ static void ModelConsumerThread()
} }
if (allHandled) if (allHandled)
localPendingModelQueue.clear(); localPendingDataQueue.clear();
} }
} }

View file

@ -80,6 +80,19 @@ struct GuestResource
incrementedValue = std::byteswap(std::byteswap(originalValue) + 1); incrementedValue = std::byteswap(std::byteswap(originalValue) + 1);
} while (InterlockedCompareExchange(reinterpret_cast<LONG*>(&refCount), incrementedValue, originalValue) != originalValue); } while (InterlockedCompareExchange(reinterpret_cast<LONG*>(&refCount), incrementedValue, originalValue) != originalValue);
} }
void Release()
{
uint32_t originalValue, decrementedValue;
do
{
originalValue = refCount.value;
decrementedValue = std::byteswap(std::byteswap(originalValue) - 1);
} while (InterlockedCompareExchange(reinterpret_cast<LONG*>(&refCount), decrementedValue, originalValue) != originalValue);
// Normally we are supposed to release here, so only use this
// function when you know you won't be the one destructing it.
}
}; };
enum GuestFormat enum GuestFormat

View file

@ -429,11 +429,21 @@ address = 0x82BA9E7C
registers = ["r10"] registers = ["r10"]
[[midasm_hook]] [[midasm_hook]]
name = "GetModelDataMidAsmHook" name = "GetDatabaseDataMidAsmHook"
address = 0x82E38688 address = 0x82E38688 # Model
registers = ["r1", "r31"] registers = ["r1", "r31"]
[[midasm_hook]] [[midasm_hook]]
name = "GetModelDataMidAsmHook" name = "GetDatabaseDataMidAsmHook"
address = 0x82E39650 address = 0x82E39650 # Terrain Model
registers = ["r1", "r31"] registers = ["r1", "r31"]
[[midasm_hook]]
name = "GetDatabaseDataMidAsmHook"
address = 0x827D614C # Particle Material Binary
registers = ["r1", "r29"]
[[midasm_hook]]
name = "GetDatabaseDataMidAsmHook"
address = 0x827D6018 # Particle Material XML
registers = ["r1", "r30"]