UnleashedRecomp/UnleashedRecomp/gpu/video.cpp

3942 lines
135 KiB
C++

#include <stdafx.h>
#include <kernel/function.h>
#include <kernel/heap.h>
#include <cpu/code_cache.h>
#include <cpu/guest_code.h>
#include <kernel/memory.h>
#include <xxHashMap.h>
#include <shader/shader_cache.h>
#include "imgui_snapshot.h"
#include "gpu/video.h"
#include "ui/window.h"
#include "config.h"
#include "shader/copy_vs.hlsl.dxil.h"
#include "shader/copy_vs.hlsl.spirv.h"
#include "shader/imgui_ps.hlsl.dxil.h"
#include "shader/imgui_ps.hlsl.spirv.h"
#include "shader/imgui_vs.hlsl.dxil.h"
#include "shader/imgui_vs.hlsl.spirv.h"
#include "shader/movie_vs.hlsl.dxil.h"
#include "shader/movie_vs.hlsl.spirv.h"
#include "shader/movie_ps.hlsl.dxil.h"
#include "shader/movie_ps.hlsl.spirv.h"
#include "shader/resolve_msaa_depth_2x.hlsl.dxil.h"
#include "shader/resolve_msaa_depth_2x.hlsl.spirv.h"
#include "shader/resolve_msaa_depth_4x.hlsl.dxil.h"
#include "shader/resolve_msaa_depth_4x.hlsl.spirv.h"
#include "shader/resolve_msaa_depth_8x.hlsl.dxil.h"
#include "shader/resolve_msaa_depth_8x.hlsl.spirv.h"
extern "C"
{
__declspec(dllexport) unsigned long NvOptimusEnablement = 0x00000001;
__declspec(dllexport) int AmdPowerXpressRequestHighPerformance = 1;
}
namespace RT64
{
extern std::unique_ptr<RenderInterface> CreateD3D12Interface();
extern std::unique_ptr<RenderInterface> CreateVulkanInterface();
}
struct PipelineState
{
GuestShader* vertexShader = nullptr;
GuestShader* pixelShader = nullptr;
GuestVertexDeclaration* vertexDeclaration = nullptr;
bool instancing = false;
bool zEnable = true;
bool zWriteEnable = true;
RenderBlend srcBlend = RenderBlend::ONE;
RenderBlend destBlend = RenderBlend::ZERO;
RenderCullMode cullMode = RenderCullMode::NONE;
RenderComparisonFunction zFunc = RenderComparisonFunction::LESS;
bool alphaBlendEnable = false;
RenderBlendOperation blendOp = RenderBlendOperation::ADD;
float slopeScaledDepthBias = 0.0f;
int32_t depthBias = 0;
RenderBlend srcBlendAlpha = RenderBlend::ONE;
RenderBlend destBlendAlpha = RenderBlend::ZERO;
RenderBlendOperation blendOpAlpha = RenderBlendOperation::ADD;
uint32_t colorWriteEnable = uint32_t(RenderColorWriteEnable::ALL);
RenderPrimitiveTopology primitiveTopology = RenderPrimitiveTopology::TRIANGLE_LIST;
uint8_t vertexStrides[16]{};
RenderFormat renderTargetFormat{};
RenderFormat depthStencilFormat{};
RenderSampleCounts sampleCount = RenderSampleCount::COUNT_1;
bool enableAlphaToCoverage = false;
};
enum class AlphaTestMode : uint32_t
{
Disabled,
AlphaThreshold,
AlphaToCoverage
};
struct SharedConstants
{
uint32_t texture2DIndices[16]{};
uint32_t texture3DIndices[16]{};
uint32_t textureCubeIndices[16]{};
uint32_t samplerIndices[16]{};
AlphaTestMode alphaTestMode{};
float alphaThreshold{};
uint32_t booleans{};
uint32_t swappedTexcoords{};
uint32_t inputLayoutFlags{};
uint32_t enableGIBicubicFiltering{};
};
static GuestSurface* g_renderTarget;
static GuestSurface* g_depthStencil;
static RenderFramebuffer* g_framebuffer;
static RenderViewport g_viewport(0.0f, 0.0f, 1280.0f, 720.0f);
static bool g_halfPixel = true;
static uint32_t g_zFunc;
static PipelineState g_pipelineState;
static SharedConstants g_sharedConstants;
static RenderSamplerDesc g_samplerDescs[16];
static bool g_scissorTestEnable = false;
static RenderRect g_scissorRect;
static RenderVertexBufferView g_vertexBufferViews[16];
static RenderInputSlot g_inputSlots[16];
static RenderIndexBufferView g_indexBufferView({}, 0, RenderFormat::R16_UINT);
struct DirtyStates
{
bool renderTargetAndDepthStencil;
bool viewport;
bool pipelineState;
bool sharedConstants;
bool scissorRect;
bool vertexShaderConstants;
uint8_t vertexStreamFirst;
uint8_t vertexStreamLast;
bool indices;
bool pixelShaderConstants;
DirtyStates(bool value)
: renderTargetAndDepthStencil(value)
, viewport(value)
, pipelineState(value)
, sharedConstants(value)
, scissorRect(value)
, vertexShaderConstants(value)
, vertexStreamFirst(value ? 0 : 255)
, vertexStreamLast(value ? 15 : 0)
, indices(value)
, pixelShaderConstants(value)
{
}
};
static DirtyStates g_dirtyStates(true);
template<typename T>
static FORCEINLINE void SetDirtyValue(bool& dirtyState, T& dest, const T& src)
{
if (dest != src)
{
dest = src;
dirtyState = true;
}
}
static bool g_vulkan;
static std::unique_ptr<RenderInterface> g_interface;
static std::unique_ptr<RenderDevice> g_device;
static bool g_triangleFanSupported;
static constexpr size_t NUM_FRAMES = 2;
static uint32_t g_frame = 0;
static uint32_t g_nextFrame = 1;
static std::unique_ptr<RenderCommandQueue> g_queue;
static std::unique_ptr<RenderCommandList> g_commandLists[NUM_FRAMES];
static std::unique_ptr<RenderCommandFence> g_commandFences[NUM_FRAMES];
static bool g_commandListStates[NUM_FRAMES];
static Mutex g_copyMutex;
static std::unique_ptr<RenderCommandQueue> g_copyQueue;
static std::unique_ptr<RenderCommandList> g_copyCommandList;
static std::unique_ptr<RenderCommandFence> g_copyCommandFence;
static std::unique_ptr<RenderSwapChain> g_swapChain;
static bool g_swapChainValid;
static bool g_needsResize;
static std::unique_ptr<RenderCommandSemaphore> g_acquireSemaphores[NUM_FRAMES];
static std::unique_ptr<RenderCommandSemaphore> g_renderSemaphores[NUM_FRAMES];
static uint32_t g_backBufferIndex;
static GuestSurface* g_backBuffer;
struct std::unique_ptr<RenderDescriptorSet> g_textureDescriptorSet;
struct std::unique_ptr<RenderDescriptorSet> g_samplerDescriptorSet;
enum
{
TEXTURE_DESCRIPTOR_NULL_TEXTURE_2D,
TEXTURE_DESCRIPTOR_NULL_TEXTURE_3D,
TEXTURE_DESCRIPTOR_NULL_TEXTURE_CUBE,
TEXTURE_DESCRIPTOR_NULL_COUNT
};
struct TextureDescriptorAllocator
{
Mutex mutex;
uint32_t capacity = TEXTURE_DESCRIPTOR_NULL_COUNT;
std::vector<uint32_t> freed;
uint32_t allocate()
{
std::lock_guard lock(mutex);
uint32_t value;
if (!freed.empty())
{
value = freed.back();
freed.pop_back();
}
else
{
value = capacity;
++capacity;
}
return value;
}
void free(uint32_t value)
{
assert(value != NULL);
std::lock_guard lock(mutex);
freed.push_back(value);
}
};
static std::unique_ptr<RenderTexture> g_blankTextures[TEXTURE_DESCRIPTOR_NULL_COUNT];
static std::unique_ptr<RenderTextureView> g_blankTextureViews[TEXTURE_DESCRIPTOR_NULL_COUNT];
static TextureDescriptorAllocator g_textureDescriptorAllocator;
static std::unique_ptr<RenderPipelineLayout> g_pipelineLayout;
static xxHashMap<std::unique_ptr<RenderPipeline>> g_pipelines;
static xxHashMap<std::pair<uint32_t, std::unique_ptr<RenderSampler>>> g_samplerStates;
static Mutex g_vertexDeclarationMutex;
static xxHashMap<GuestVertexDeclaration*> g_vertexDeclarations;
struct UploadBuffer
{
static constexpr size_t SIZE = 16 * 1024 * 1024;
std::unique_ptr<RenderBuffer> buffer;
uint8_t* memory = nullptr;
uint64_t deviceAddress = 0;
};
struct UploadAllocation
{
const RenderBuffer* buffer;
uint64_t offset;
uint8_t* memory;
uint64_t deviceAddress;
};
struct UploadAllocator
{
std::vector<UploadBuffer> buffers;
uint32_t index = 0;
uint32_t offset = 0;
Mutex mutex;
UploadAllocation allocate(uint32_t size, uint32_t alignment)
{
std::lock_guard lock(mutex);
assert(size <= UploadBuffer::SIZE);
offset = (offset + alignment - 1) & ~(alignment - 1);
if (offset + size > UploadBuffer::SIZE)
{
++index;
offset = 0;
}
if (buffers.size() <= index)
buffers.resize(index + 1);
auto& buffer = buffers[index];
if (buffer.buffer == nullptr)
{
buffer.buffer = g_device->createBuffer(RenderBufferDesc::UploadBuffer(UploadBuffer::SIZE, RenderBufferFlag::CONSTANT | RenderBufferFlag::VERTEX | RenderBufferFlag::INDEX));
buffer.memory = reinterpret_cast<uint8_t*>(buffer.buffer->map());
buffer.deviceAddress = buffer.buffer->getDeviceAddress();
}
auto ref = buffer.buffer->at(offset);
offset += size;
return { ref.ref, ref.offset, buffer.memory + ref.offset, buffer.deviceAddress + ref.offset };
}
template<bool TByteSwap, typename T>
UploadAllocation allocate(const T* memory, uint32_t size, uint32_t alignment)
{
auto result = allocate(size, alignment);
if constexpr (TByteSwap)
{
auto destination = reinterpret_cast<T*>(result.memory);
for (size_t i = 0; i < size; i += sizeof(T))
{
*destination = std::byteswap(*memory);
++destination;
++memory;
}
}
else
{
memcpy(result.memory, memory, size);
}
return result;
}
void reset()
{
index = 0;
offset = 0;
}
};
static UploadAllocator g_uploadAllocators[NUM_FRAMES];
static std::vector<GuestResource*> g_tempResources[NUM_FRAMES];
static std::vector<std::unique_ptr<RenderBuffer>> g_tempBuffers[NUM_FRAMES];
template<GuestPrimitiveType PrimitiveType>
struct PrimitiveIndexData
{
std::vector<uint16_t> indexData;
RenderBufferReference indexBuffer;
uint32_t currentIndexCount = 0;
uint32_t prepare(uint32_t guestPrimCount)
{
uint32_t primCount;
uint32_t indexCountPerPrimitive;
switch (PrimitiveType)
{
case D3DPT_TRIANGLEFAN:
primCount = guestPrimCount - 2;
indexCountPerPrimitive = 3;
break;
case D3DPT_QUADLIST:
primCount = guestPrimCount / 4;
indexCountPerPrimitive = 6;
break;
default:
assert(false && "Unknown primitive type.");
break;
}
uint32_t indexCount = primCount * indexCountPerPrimitive;
if (indexData.size() < indexCount)
{
const size_t oldPrimCount = indexData.size() / indexCountPerPrimitive;
indexData.resize(indexCount);
for (size_t i = oldPrimCount; i < primCount; i++)
{
switch (PrimitiveType)
{
case D3DPT_TRIANGLEFAN:
{
indexData[i * 3 + 0] = 0;
indexData[i * 3 + 1] = static_cast<uint16_t>(i + 1);
indexData[i * 3 + 2] = static_cast<uint16_t>(i + 2);
break;
}
case D3DPT_QUADLIST:
{
indexData[i * 6 + 0] = static_cast<uint16_t>(i * 4 + 0);
indexData[i * 6 + 1] = static_cast<uint16_t>(i * 4 + 1);
indexData[i * 6 + 2] = static_cast<uint16_t>(i * 4 + 2);
indexData[i * 6 + 3] = static_cast<uint16_t>(i * 4 + 0);
indexData[i * 6 + 4] = static_cast<uint16_t>(i * 4 + 2);
indexData[i * 6 + 5] = static_cast<uint16_t>(i * 4 + 3);
break;
}
default:
assert(false && "Unknown primitive type.");
break;
}
}
}
if (indexBuffer == NULL || currentIndexCount < indexCount)
{
auto allocation = g_uploadAllocators[g_frame].allocate<false>(indexData.data(), indexCount * 2, 2);
indexBuffer = allocation.buffer->at(allocation.offset);
currentIndexCount = indexCount;
}
SetDirtyValue(g_dirtyStates.indices, g_indexBufferView.buffer, indexBuffer);
SetDirtyValue(g_dirtyStates.indices, g_indexBufferView.size, indexCount * 2);
SetDirtyValue(g_dirtyStates.indices, g_indexBufferView.format, RenderFormat::R16_UINT);
return indexCount;
}
void reset()
{
indexBuffer = {};
currentIndexCount = 0;
}
};
static PrimitiveIndexData<D3DPT_TRIANGLEFAN> g_triangleFanIndexData;
static PrimitiveIndexData<D3DPT_QUADLIST> g_quadIndexData;
static void DestructTempResources()
{
for (auto resource : g_tempResources[g_frame])
{
switch (resource->type)
{
case ResourceType::Texture:
case ResourceType::VolumeTexture:
{
const auto texture = reinterpret_cast<GuestTexture*>(resource);
if (texture->mappedMemory != nullptr)
g_userHeap.Free(texture->mappedMemory);
g_textureDescriptorAllocator.free(texture->descriptorIndex);
texture->~GuestTexture();
break;
}
case ResourceType::VertexBuffer:
case ResourceType::IndexBuffer:
{
const auto buffer = reinterpret_cast<GuestBuffer*>(resource);
if (buffer->mappedMemory != nullptr)
g_userHeap.Free(buffer->mappedMemory);
buffer->~GuestBuffer();
break;
}
case ResourceType::RenderTarget:
case ResourceType::DepthStencil:
{
const auto surface = reinterpret_cast<GuestSurface*>(resource);
if (surface->descriptorIndex != NULL)
g_textureDescriptorAllocator.free(surface->descriptorIndex);
surface->~GuestSurface();
break;
}
case ResourceType::VertexDeclaration:
reinterpret_cast<GuestVertexDeclaration*>(resource)->~GuestVertexDeclaration();
break;
case ResourceType::VertexShader:
case ResourceType::PixelShader:
reinterpret_cast<GuestShader*>(resource)->~GuestShader();
break;
}
g_userHeap.Free(resource);
}
g_tempResources[g_frame].clear();
g_tempBuffers[g_frame].clear();
}
static uint32_t g_mainThreadId;
static ankerl::unordered_dense::map<RenderTexture*, RenderTextureLayout> g_barrierMap;
static void AddBarrier(GuestBaseTexture* texture, RenderTextureLayout layout)
{
if (texture != nullptr && texture->layout != layout)
{
g_barrierMap[texture->texture] = layout;
texture->layout = layout;
}
}
static std::vector<RenderTextureBarrier> g_barriers;
static void FlushBarriers()
{
if (!g_barrierMap.empty())
{
for (auto& [texture, layout] : g_barrierMap)
g_barriers.emplace_back(texture, layout);
g_commandLists[g_frame]->barriers(RenderBarrierStage::GRAPHICS | RenderBarrierStage::COPY, g_barriers);
g_barrierMap.clear();
g_barriers.clear();
}
}
static std::unique_ptr<uint8_t[]> g_shaderCache;
static void LoadShaderCache()
{
const size_t decompressedSize = g_vulkan ? g_spirvCacheDecompressedSize : g_dxilCacheDecompressedSize;
g_shaderCache = std::make_unique<uint8_t[]>(decompressedSize);
ZSTD_decompress(g_shaderCache.get(),
decompressedSize,
g_vulkan ? g_compressedSpirvCache : g_compressedDxilCache,
g_vulkan ? g_spirvCacheCompressedSize : g_dxilCacheCompressedSize);
}
enum class RenderCommandType
{
SetRenderState,
DestructResource,
UnlockTextureRect,
UnlockBuffer16,
UnlockBuffer32,
DrawImGui,
Present,
StretchRect,
SetRenderTarget,
SetDepthStencilSurface,
Clear,
SetViewport,
SetTexture,
SetScissorRect,
SetSamplerState,
SetBooleans,
SetVertexShaderConstants,
SetPixelShaderConstants,
DrawPrimitive,
DrawIndexedPrimitive,
DrawPrimitiveUP,
SetVertexDeclaration,
SetVertexShader,
SetStreamSource,
SetIndices,
SetPixelShader
};
struct RenderCommand
{
RenderCommandType type;
union
{
struct
{
GuestRenderState type;
uint32_t value;
} setRenderState;
struct
{
GuestResource* resource;
} destructResource;
struct
{
GuestTexture* texture;
} unlockTextureRect;
struct
{
GuestBuffer* buffer;
} unlockBuffer;
struct
{
GuestDevice* device;
uint32_t flags;
GuestTexture* texture;
} stretchRect;
struct
{
GuestSurface* renderTarget;
} setRenderTarget;
struct
{
GuestSurface* depthStencil;
} setDepthStencilSurface;
struct
{
uint32_t flags;
float color[4];
float z;
} clear;
struct
{
float x;
float y;
float width;
float height;
float minDepth;
float maxDepth;
} setViewport;
struct
{
uint32_t index;
GuestTexture* texture;
} setTexture;
struct
{
int32_t left;
int32_t top;
int32_t right;
int32_t bottom;
} setScissorRect;
struct
{
uint32_t index;
uint32_t data0;
uint32_t data3;
uint32_t data5;
} setSamplerState;
struct
{
uint32_t booleans;
} setBooleans;
struct
{
UploadAllocation allocation;
} setVertexShaderConstants;
struct
{
UploadAllocation allocation;
} setPixelShaderConstants;
struct
{
uint32_t primitiveType;
uint32_t startVertex;
uint32_t primitiveCount;
} drawPrimitive;
struct
{
uint32_t primitiveType;
int32_t baseVertexIndex;
uint32_t startIndex;
uint32_t primCount;
} drawIndexedPrimitive;
struct
{
uint32_t primitiveType;
uint32_t primitiveCount;
UploadAllocation vertexStreamZeroData;
uint32_t vertexStreamZeroStride;
} drawPrimitiveUP;
struct
{
GuestVertexDeclaration* vertexDeclaration;
} setVertexDeclaration;
struct
{
GuestShader* shader;
} setVertexShader;
struct
{
uint32_t index;
GuestBuffer* buffer;
uint32_t offset;
uint32_t stride;
} setStreamSource;
struct
{
GuestBuffer* buffer;
} setIndices;
struct
{
GuestShader* shader;
} setPixelShader;
};
};
static moodycamel::BlockingConcurrentQueue<RenderCommand> g_renderQueue;
template<GuestRenderState TType>
static void SetRenderState(GuestDevice* device, uint32_t value)
{
RenderCommand cmd;
cmd.type = RenderCommandType::SetRenderState;
cmd.setRenderState.type = TType;
cmd.setRenderState.value = value;
g_renderQueue.enqueue(cmd);
}
static void SetRenderStateUnimplemented(GuestDevice* device, uint32_t value)
{
}
static void SetAlphaTestMode(bool enable)
{
AlphaTestMode alphaTestMode = AlphaTestMode::Disabled;
if (enable)
{
if (Config::AlphaToCoverage && g_renderTarget != nullptr && g_renderTarget->sampleCount != RenderSampleCount::COUNT_1)
alphaTestMode = AlphaTestMode::AlphaToCoverage;
else
alphaTestMode = AlphaTestMode::AlphaThreshold;
}
SetDirtyValue(g_dirtyStates.sharedConstants, g_sharedConstants.alphaTestMode, alphaTestMode);
SetDirtyValue(g_dirtyStates.pipelineState, g_pipelineState.enableAlphaToCoverage, alphaTestMode == AlphaTestMode::AlphaToCoverage);
}
static RenderBlend ConvertBlendMode(uint32_t blendMode)
{
switch (blendMode)
{
case D3DBLEND_ZERO:
return RenderBlend::ZERO;
case D3DBLEND_ONE:
return RenderBlend::ONE;
case D3DBLEND_SRCCOLOR:
return RenderBlend::SRC_COLOR;
case D3DBLEND_INVSRCCOLOR:
return RenderBlend::INV_SRC_COLOR;
case D3DBLEND_SRCALPHA:
return RenderBlend::SRC_ALPHA;
case D3DBLEND_INVSRCALPHA:
return RenderBlend::INV_SRC_ALPHA;
case D3DBLEND_DESTCOLOR:
return RenderBlend::DEST_COLOR;
case D3DBLEND_INVDESTCOLOR:
return RenderBlend::INV_DEST_COLOR;
case D3DBLEND_DESTALPHA:
return RenderBlend::DEST_ALPHA;
case D3DBLEND_INVDESTALPHA:
return RenderBlend::INV_DEST_ALPHA;
default:
assert(false && "Invalid blend mode");
return RenderBlend::ZERO;
}
}
// The game renders the main scene with reverse Z where the viewport's minDepth and maxDepth
// values are swapped. We negate this to improve compatibility with old hardware.
static RenderComparisonFunction ConvertComparisonFunc(uint32_t cmpFunc, bool reverseZ)
{
switch (cmpFunc)
{
case D3DCMP_LESS:
return reverseZ ? RenderComparisonFunction::GREATER : RenderComparisonFunction::LESS;
case D3DCMP_LESSEQUAL:
return reverseZ ? RenderComparisonFunction::GREATER_EQUAL : RenderComparisonFunction::LESS_EQUAL;
case D3DCMP_GREATER:
return reverseZ ? RenderComparisonFunction::LESS : RenderComparisonFunction::GREATER;
case D3DCMP_GREATEREQUAL:
return reverseZ ? RenderComparisonFunction::LESS_EQUAL : RenderComparisonFunction::GREATER_EQUAL;
case D3DCMP_NEVER:
return RenderComparisonFunction::NEVER;
case D3DCMP_EQUAL:
return RenderComparisonFunction::EQUAL;
case D3DCMP_NOTEQUAL:
return RenderComparisonFunction::NOT_EQUAL;
case D3DCMP_ALWAYS:
return RenderComparisonFunction::ALWAYS;
default:
assert(false && "Unknown comparison function");
return RenderComparisonFunction::NEVER;
}
}
static RenderBlendOperation ConvertBlendOp(uint32_t blendOp)
{
switch (blendOp)
{
case D3DBLENDOP_ADD:
return RenderBlendOperation::ADD;
case D3DBLENDOP_SUBTRACT:
return RenderBlendOperation::SUBTRACT;
case D3DBLENDOP_REVSUBTRACT:
return RenderBlendOperation::REV_SUBTRACT;
case D3DBLENDOP_MIN:
return RenderBlendOperation::MIN;
case D3DBLENDOP_MAX:
return RenderBlendOperation::MAX;
default:
assert(false && "Unknown blend operation");
return RenderBlendOperation::ADD;
}
}
static void ProcSetRenderState(const RenderCommand& cmd)
{
uint32_t value = cmd.setRenderState.value;
switch (cmd.setRenderState.type)
{
case D3DRS_ZENABLE:
{
SetDirtyValue(g_dirtyStates.pipelineState, g_pipelineState.zEnable, value != 0);
g_dirtyStates.renderTargetAndDepthStencil |= g_dirtyStates.pipelineState;
break;
}
case D3DRS_ZWRITEENABLE:
{
SetDirtyValue(g_dirtyStates.pipelineState, g_pipelineState.zWriteEnable, value != 0);
break;
}
case D3DRS_ALPHATESTENABLE:
{
SetAlphaTestMode(value != 0);
break;
}
case D3DRS_SRCBLEND:
{
SetDirtyValue(g_dirtyStates.pipelineState, g_pipelineState.srcBlend, ConvertBlendMode(value));
break;
}
case D3DRS_DESTBLEND:
{
SetDirtyValue(g_dirtyStates.pipelineState, g_pipelineState.destBlend, ConvertBlendMode(value));
break;
}
case D3DRS_CULLMODE:
{
RenderCullMode cullMode;
switch (value) {
case D3DCULL_NONE:
case D3DCULL_NONE_2:
cullMode = RenderCullMode::NONE;
break;
case D3DCULL_CW:
cullMode = RenderCullMode::FRONT;
break;
case D3DCULL_CCW:
cullMode = RenderCullMode::BACK;
break;
default:
assert(false && "Invalid cull mode");
cullMode = RenderCullMode::NONE;
break;
}
SetDirtyValue(g_dirtyStates.pipelineState, g_pipelineState.cullMode, cullMode);
break;
}
case D3DRS_ZFUNC:
{
g_zFunc = value;
SetDirtyValue(g_dirtyStates.pipelineState, g_pipelineState.zFunc, ConvertComparisonFunc(value, g_viewport.minDepth >= g_viewport.maxDepth));
break;
}
case D3DRS_ALPHAREF:
{
SetDirtyValue(g_dirtyStates.pipelineState, g_sharedConstants.alphaThreshold, float(value) / 256.0f);
break;
}
case D3DRS_ALPHABLENDENABLE:
{
SetDirtyValue(g_dirtyStates.pipelineState, g_pipelineState.alphaBlendEnable, value != 0);
break;
}
case D3DRS_BLENDOP:
{
SetDirtyValue(g_dirtyStates.pipelineState, g_pipelineState.blendOp, ConvertBlendOp(value));
break;
}
case D3DRS_SCISSORTESTENABLE:
{
SetDirtyValue(g_dirtyStates.scissorRect, g_scissorTestEnable, value != 0);
break;
}
case D3DRS_SLOPESCALEDEPTHBIAS:
{
SetDirtyValue(g_dirtyStates.pipelineState, g_pipelineState.slopeScaledDepthBias, *reinterpret_cast<float*>(&value));
break;
}
case D3DRS_DEPTHBIAS:
{
SetDirtyValue(g_dirtyStates.pipelineState, g_pipelineState.depthBias, int32_t(*reinterpret_cast<float*>(&value) * (1 << 24)));
break;
}
case D3DRS_SRCBLENDALPHA:
{
SetDirtyValue(g_dirtyStates.pipelineState, g_pipelineState.srcBlendAlpha, ConvertBlendMode(value));
break;
}
case D3DRS_DESTBLENDALPHA:
{
SetDirtyValue(g_dirtyStates.pipelineState, g_pipelineState.destBlendAlpha, ConvertBlendMode(value));
break;
}
case D3DRS_BLENDOPALPHA:
{
SetDirtyValue(g_dirtyStates.pipelineState, g_pipelineState.blendOpAlpha, ConvertBlendOp(value));
break;
}
case D3DRS_COLORWRITEENABLE:
{
SetDirtyValue(g_dirtyStates.pipelineState, g_pipelineState.colorWriteEnable, value);
g_dirtyStates.renderTargetAndDepthStencil |= g_dirtyStates.pipelineState;
break;
}
}
}
static const std::pair<GuestRenderState, void*> g_setRenderStateFunctions[] =
{
{ D3DRS_ZENABLE, GuestFunction<SetRenderState<D3DRS_ZENABLE>> },
{ D3DRS_ZWRITEENABLE, GuestFunction<SetRenderState<D3DRS_ZWRITEENABLE>> },
{ D3DRS_ALPHATESTENABLE, GuestFunction<SetRenderState<D3DRS_ALPHATESTENABLE>> },
{ D3DRS_SRCBLEND, GuestFunction<SetRenderState<D3DRS_SRCBLEND>> },
{ D3DRS_DESTBLEND, GuestFunction<SetRenderState<D3DRS_DESTBLEND>> },
{ D3DRS_CULLMODE, GuestFunction<SetRenderState<D3DRS_CULLMODE>> },
{ D3DRS_ZFUNC, GuestFunction<SetRenderState<D3DRS_ZFUNC>> },
{ D3DRS_ALPHAREF, GuestFunction<SetRenderState<D3DRS_ALPHAREF>> },
{ D3DRS_ALPHABLENDENABLE, GuestFunction<SetRenderState<D3DRS_ALPHABLENDENABLE>> },
{ D3DRS_BLENDOP, GuestFunction<SetRenderState<D3DRS_BLENDOP>> },
{ D3DRS_SCISSORTESTENABLE, GuestFunction<SetRenderState<D3DRS_SCISSORTESTENABLE>> },
{ D3DRS_SLOPESCALEDEPTHBIAS, GuestFunction<SetRenderState<D3DRS_SLOPESCALEDEPTHBIAS>> },
{ D3DRS_DEPTHBIAS, GuestFunction<SetRenderState<D3DRS_DEPTHBIAS>> },
{ D3DRS_SRCBLENDALPHA, GuestFunction<SetRenderState<D3DRS_SRCBLENDALPHA>> },
{ D3DRS_DESTBLENDALPHA, GuestFunction<SetRenderState<D3DRS_DESTBLENDALPHA>> },
{ D3DRS_BLENDOPALPHA, GuestFunction<SetRenderState<D3DRS_BLENDOPALPHA>> },
{ D3DRS_COLORWRITEENABLE, GuestFunction<SetRenderState<D3DRS_COLORWRITEENABLE>> }
};
static std::unique_ptr<RenderPipeline> g_resolveMsaaDepthPipelines[3];
#define CREATE_SHADER(NAME) \
g_device->createShader( \
g_vulkan ? g_##NAME##_spirv : g_##NAME##_dxil, \
g_vulkan ? sizeof(g_##NAME##_spirv) : sizeof(g_##NAME##_dxil), \
"main", \
g_vulkan ? RenderShaderFormat::SPIRV : RenderShaderFormat::DXIL)
static bool DetectWine()
{
HMODULE dllHandle = GetModuleHandle("ntdll.dll");
return dllHandle != nullptr && GetProcAddress(dllHandle, "wine_get_version") != nullptr;
}
static constexpr size_t TEXTURE_DESCRIPTOR_SIZE = 65536;
static constexpr size_t SAMPLER_DESCRIPTOR_SIZE = 1024;
static std::unique_ptr<RenderTexture> g_imFontTexture;
static std::unique_ptr<RenderTextureView> g_imFontTextureView;
static uint32_t g_imFontTextureDescriptorIndex;
static bool g_imPendingBarrier = true;
static std::unique_ptr<RenderPipelineLayout> g_imPipelineLayout;
static std::unique_ptr<RenderPipeline> g_imPipeline;
static ImDrawDataSnapshot g_imSnapshot;
template<typename T>
static void ExecuteCopyCommandList(const T& function)
{
std::lock_guard lock(g_copyMutex);
g_copyCommandList->begin();
function();
g_copyCommandList->end();
g_copyQueue->executeCommandLists(g_copyCommandList.get(), g_copyCommandFence.get());
g_copyQueue->waitForCommandFence(g_copyCommandFence.get());
}
static constexpr uint32_t PITCH_ALIGNMENT = 0x100;
static constexpr uint32_t PLACEMENT_ALIGNMENT = 0x200;
static void CreateImGuiBackend()
{
ImGuiIO& io = ImGui::GetIO();
io.BackendFlags |= ImGuiBackendFlags_RendererHasVtxOffset;
ImGui_ImplSDL2_InitForOther(Window::s_pWindow);
uint8_t* pixels;
int width, height;
io.Fonts->GetTexDataAsAlpha8(&pixels, &width, &height);
RenderTextureDesc textureDesc;
textureDesc.dimension = RenderTextureDimension::TEXTURE_2D;
textureDesc.width = width;
textureDesc.height = height;
textureDesc.depth = 1;
textureDesc.mipLevels = 1;
textureDesc.arraySize = 1;
textureDesc.format = RenderFormat::R8_UNORM;
g_imFontTexture = g_device->createTexture(textureDesc);
uint32_t rowPitch = (width + PITCH_ALIGNMENT - 1) & ~(PITCH_ALIGNMENT - 1);
uint32_t slicePitch = (rowPitch * height + PLACEMENT_ALIGNMENT - 1) & ~(PLACEMENT_ALIGNMENT - 1);
auto uploadBuffer = g_device->createBuffer(RenderBufferDesc::UploadBuffer(slicePitch));
uint8_t* mappedMemory = reinterpret_cast<uint8_t*>(uploadBuffer->map());
if (rowPitch == width)
{
memcpy(mappedMemory, pixels, slicePitch);
}
else
{
for (size_t i = 0; i < height; i++)
{
memcpy(mappedMemory, pixels, width);
pixels += width;
mappedMemory += rowPitch;
}
}
uploadBuffer->unmap();
ExecuteCopyCommandList([&]
{
g_copyCommandList->barriers(RenderBarrierStage::COPY, RenderTextureBarrier(g_imFontTexture.get(), RenderTextureLayout::COPY_DEST));
g_copyCommandList->copyTextureRegion(
RenderTextureCopyLocation::Subresource(g_imFontTexture.get(), 0),
RenderTextureCopyLocation::PlacedFootprint(uploadBuffer.get(), RenderFormat::R8_UNORM, width, height, 1, rowPitch, 0));
});
RenderTextureViewDesc textureViewDesc;
textureViewDesc.format = textureDesc.format;
textureViewDesc.dimension = RenderTextureViewDimension::TEXTURE_2D;
textureViewDesc.mipLevels = 1;
textureViewDesc.componentMapping = RenderComponentMapping(RenderSwizzle::ONE, RenderSwizzle::ONE, RenderSwizzle::ONE, RenderSwizzle::R);
g_imFontTextureView = g_imFontTexture->createTextureView(textureViewDesc);
g_imFontTextureDescriptorIndex = g_textureDescriptorAllocator.allocate();
g_textureDescriptorSet->setTexture(g_imFontTextureDescriptorIndex, g_imFontTexture.get(), RenderTextureLayout::SHADER_READ, g_imFontTextureView.get());
io.Fonts->SetTexID(ImTextureID(g_imFontTextureDescriptorIndex));
RenderPipelineLayoutBuilder pipelineLayoutBuilder;
pipelineLayoutBuilder.begin(false, true);
RenderDescriptorSetBuilder descriptorSetBuilder;
descriptorSetBuilder.begin();
descriptorSetBuilder.addTexture(0, TEXTURE_DESCRIPTOR_SIZE);
descriptorSetBuilder.end(true, TEXTURE_DESCRIPTOR_SIZE);
pipelineLayoutBuilder.addDescriptorSet(descriptorSetBuilder);
descriptorSetBuilder.begin();
descriptorSetBuilder.addSampler(0, SAMPLER_DESCRIPTOR_SIZE);
descriptorSetBuilder.end(true, SAMPLER_DESCRIPTOR_SIZE);
pipelineLayoutBuilder.addDescriptorSet(descriptorSetBuilder);
pipelineLayoutBuilder.addPushConstant(0, 2, 12, RenderShaderStageFlag::VERTEX | RenderShaderStageFlag::PIXEL);
pipelineLayoutBuilder.end();
g_imPipelineLayout = pipelineLayoutBuilder.create(g_device.get());
auto vertexShader = CREATE_SHADER(imgui_vs);
auto pixelShader = CREATE_SHADER(imgui_ps);
RenderInputElement inputElements[3];
inputElements[0] = RenderInputElement("POSITION", 0, 0, RenderFormat::R32G32_FLOAT, 0, offsetof(ImDrawVert, pos));
inputElements[1] = RenderInputElement("TEXCOORD", 0, 1, RenderFormat::R32G32_FLOAT, 0, offsetof(ImDrawVert, uv));
inputElements[2] = RenderInputElement("COLOR", 0, 2, RenderFormat::R8G8B8A8_UNORM, 0, offsetof(ImDrawVert, col));
RenderInputSlot inputSlot(0, sizeof(ImDrawVert));
RenderGraphicsPipelineDesc pipelineDesc;
pipelineDesc.pipelineLayout = g_imPipelineLayout.get();
pipelineDesc.vertexShader = vertexShader.get();
pipelineDesc.pixelShader = pixelShader.get();
pipelineDesc.renderTargetFormat[0] = RenderFormat::B8G8R8A8_UNORM;
pipelineDesc.renderTargetBlend[0] = RenderBlendDesc::AlphaBlend();
pipelineDesc.renderTargetCount = 1;
pipelineDesc.inputElements = inputElements;
pipelineDesc.inputElementsCount = std::size(inputElements);
pipelineDesc.inputSlots = &inputSlot;
pipelineDesc.inputSlotsCount = 1;
g_imPipeline = g_device->createGraphicsPipeline(pipelineDesc);
}
static void CreateHostDevice()
{
for (uint32_t i = 0; i < 16; i++)
g_inputSlots[i].index = i;
IMGUI_CHECKVERSION();
ImGui::CreateContext();
Window::Init();
g_vulkan = DetectWine() || Config::GraphicsAPI == EGraphicsAPI::Vulkan;
LoadShaderCache();
g_interface = g_vulkan ? CreateVulkanInterface() : CreateD3D12Interface();
g_device = g_interface->createDevice();
g_triangleFanSupported = g_device->getCapabilities().triangleFan;
g_queue = g_device->createCommandQueue(RenderCommandListType::DIRECT);
for (auto& commandList : g_commandLists)
commandList = g_device->createCommandList(RenderCommandListType::DIRECT);
for (auto& commandFence : g_commandFences)
commandFence = g_device->createCommandFence();
g_copyQueue = g_device->createCommandQueue(RenderCommandListType::COPY);
g_copyCommandList = g_device->createCommandList(RenderCommandListType::COPY);
g_copyCommandFence = g_device->createCommandFence();
g_swapChain = g_queue->createSwapChain(Window::s_handle, Config::TripleBuffering ? 3 : 2, RenderFormat::B8G8R8A8_UNORM);
g_swapChain->setVsyncEnabled(Config::VSync);
g_swapChainValid = !g_swapChain->needsResize();
for (auto& acquireSemaphore : g_acquireSemaphores)
acquireSemaphore = g_device->createCommandSemaphore();
for (auto& renderSemaphore : g_renderSemaphores)
renderSemaphore = g_device->createCommandSemaphore();
g_mainThreadId = GetCurrentThreadId();
RenderPipelineLayoutBuilder pipelineLayoutBuilder;
pipelineLayoutBuilder.begin(false, true);
RenderDescriptorSetBuilder descriptorSetBuilder;
descriptorSetBuilder.begin();
descriptorSetBuilder.addTexture(0, TEXTURE_DESCRIPTOR_SIZE);
descriptorSetBuilder.end(true, TEXTURE_DESCRIPTOR_SIZE);
g_textureDescriptorSet = descriptorSetBuilder.create(g_device.get());
for (size_t i = 0; i < TEXTURE_DESCRIPTOR_NULL_COUNT; i++)
{
auto& texture = g_blankTextures[i];
auto& textureView = g_blankTextureViews[i];
RenderTextureDesc desc;
desc.width = 1;
desc.height = 1;
desc.depth = 1;
desc.mipLevels = 1;
desc.format = RenderFormat::R8_UNORM;
RenderTextureViewDesc viewDesc;
viewDesc.format = desc.format;
viewDesc.componentMapping = RenderComponentMapping(RenderSwizzle::ZERO, RenderSwizzle::ZERO, RenderSwizzle::ZERO, RenderSwizzle::ZERO);
viewDesc.mipLevels = 1;
switch (i)
{
case TEXTURE_DESCRIPTOR_NULL_TEXTURE_2D:
desc.dimension = RenderTextureDimension::TEXTURE_2D;
desc.arraySize = 1;
viewDesc.dimension = RenderTextureViewDimension::TEXTURE_2D;
break;
case TEXTURE_DESCRIPTOR_NULL_TEXTURE_3D:
desc.dimension = RenderTextureDimension::TEXTURE_3D;
desc.arraySize = 1;
viewDesc.dimension = RenderTextureViewDimension::TEXTURE_3D;
break;
case TEXTURE_DESCRIPTOR_NULL_TEXTURE_CUBE:
desc.dimension = RenderTextureDimension::TEXTURE_2D;
desc.arraySize = 6;
desc.flags = RenderTextureFlag::CUBE;
viewDesc.dimension = RenderTextureViewDimension::TEXTURE_CUBE;
break;
default:
assert(false && "Unknown null descriptor dimension");
break;
}
texture = g_device->createTexture(desc);
textureView = texture->createTextureView(viewDesc);
g_textureDescriptorSet->setTexture(i, texture.get(), RenderTextureLayout::SHADER_READ, textureView.get());
}
pipelineLayoutBuilder.addDescriptorSet(descriptorSetBuilder);
pipelineLayoutBuilder.addDescriptorSet(descriptorSetBuilder);
pipelineLayoutBuilder.addDescriptorSet(descriptorSetBuilder);
descriptorSetBuilder.begin();
descriptorSetBuilder.addSampler(0, SAMPLER_DESCRIPTOR_SIZE);
descriptorSetBuilder.end(true, SAMPLER_DESCRIPTOR_SIZE);
g_samplerDescriptorSet = descriptorSetBuilder.create(g_device.get());
auto& [descriptorIndex, sampler] = g_samplerStates[XXH3_64bits(&g_samplerDescs[0], sizeof(RenderSamplerDesc))];
descriptorIndex = 1;
sampler = g_device->createSampler(g_samplerDescs[0]);
g_samplerDescriptorSet->setSampler(0, sampler.get());
pipelineLayoutBuilder.addDescriptorSet(descriptorSetBuilder);
if (g_vulkan)
{
pipelineLayoutBuilder.addPushConstant(0, 4, 24, RenderShaderStageFlag::VERTEX | RenderShaderStageFlag::PIXEL);
}
else
{
pipelineLayoutBuilder.addRootDescriptor(0, 4, RenderRootDescriptorType::CONSTANT_BUFFER);
pipelineLayoutBuilder.addRootDescriptor(1, 4, RenderRootDescriptorType::CONSTANT_BUFFER);
pipelineLayoutBuilder.addRootDescriptor(2, 4, RenderRootDescriptorType::CONSTANT_BUFFER);
pipelineLayoutBuilder.addPushConstant(3, 4, 4, RenderShaderStageFlag::PIXEL); // For copy/resolve shaders.
}
pipelineLayoutBuilder.end();
g_pipelineLayout = pipelineLayoutBuilder.create(g_device.get());
auto copyShader = CREATE_SHADER(copy_vs);
for (size_t i = 0; i < std::size(g_resolveMsaaDepthPipelines); i++)
{
std::unique_ptr<RenderShader> pixelShader;
switch (i)
{
case 0:
pixelShader = CREATE_SHADER(resolve_msaa_depth_2x);
break;
case 1:
pixelShader = CREATE_SHADER(resolve_msaa_depth_4x);
break;
case 2:
pixelShader = CREATE_SHADER(resolve_msaa_depth_8x);
break;
}
RenderGraphicsPipelineDesc desc;
desc.pipelineLayout = g_pipelineLayout.get();
desc.vertexShader = copyShader.get();
desc.pixelShader = pixelShader.get();
desc.depthFunction = RenderComparisonFunction::ALWAYS;
desc.depthEnabled = true;
desc.depthWriteEnabled = true;
desc.depthTargetFormat = RenderFormat::D32_FLOAT;
g_resolveMsaaDepthPipelines[i] = g_device->createGraphicsPipeline(desc);
}
CreateImGuiBackend();
}
static void WaitForGPU()
{
if (g_vulkan)
{
g_device->waitIdle();
}
else
{
for (size_t i = 0; i < NUM_FRAMES; i++)
{
if (g_commandListStates[i])
{
g_queue->waitForCommandFence(g_commandFences[i].get());
g_commandListStates[i] = false;
}
}
g_queue->executeCommandLists(nullptr, g_commandFences[0].get());
g_queue->waitForCommandFence(g_commandFences[0].get());
}
}
static bool g_pendingRenderThread;
static void WaitForRenderThread()
{
while (g_pendingRenderThread)
Sleep(0);
}
static void BeginCommandList()
{
g_renderTarget = g_backBuffer;
g_depthStencil = nullptr;
g_framebuffer = nullptr;
g_pipelineState.renderTargetFormat = g_backBuffer->format;
g_pipelineState.depthStencilFormat = RenderFormat::UNKNOWN;
g_swapChainValid &= !g_swapChain->needsResize();
if (!g_swapChainValid)
{
WaitForGPU();
g_backBuffer->framebuffers.clear();
g_swapChainValid = g_swapChain->resize();
g_needsResize = g_swapChainValid;
}
if (g_swapChainValid)
g_swapChainValid = g_swapChain->acquireTexture(g_acquireSemaphores[g_frame].get(), &g_backBufferIndex);
if (g_swapChainValid)
g_backBuffer->texture = g_swapChain->getTexture(g_backBufferIndex);
else
g_backBuffer->texture = g_backBuffer->textureHolder.get();
g_backBuffer->layout = RenderTextureLayout::UNKNOWN;
for (size_t i = 0; i < 16; i++)
{
g_sharedConstants.texture2DIndices[i] = TEXTURE_DESCRIPTOR_NULL_TEXTURE_2D;
g_sharedConstants.texture3DIndices[i] = TEXTURE_DESCRIPTOR_NULL_TEXTURE_3D;
g_sharedConstants.textureCubeIndices[i] = TEXTURE_DESCRIPTOR_NULL_TEXTURE_CUBE;
}
g_sharedConstants.enableGIBicubicFiltering = (Config::GITextureFiltering == EGITextureFiltering::Bicubic);
auto& commandList = g_commandLists[g_frame];
commandList->begin();
commandList->setGraphicsPipelineLayout(g_pipelineLayout.get());
commandList->setGraphicsDescriptorSet(g_textureDescriptorSet.get(), 0);
commandList->setGraphicsDescriptorSet(g_textureDescriptorSet.get(), 1);
commandList->setGraphicsDescriptorSet(g_textureDescriptorSet.get(), 2);
commandList->setGraphicsDescriptorSet(g_samplerDescriptorSet.get(), 3);
}
static uint32_t CreateDevice(uint32_t a1, uint32_t a2, uint32_t a3, uint32_t a4, uint32_t a5, be<uint32_t>* a6)
{
CreateHostDevice();
g_backBuffer = g_userHeap.AllocPhysical<GuestSurface>(ResourceType::RenderTarget);
g_backBuffer->width = 1280;
g_backBuffer->height = 720;
g_backBuffer->format = RenderFormat::B8G8R8A8_UNORM;
g_backBuffer->textureHolder = g_device->createTexture(RenderTextureDesc::Texture2D(16, 16, 1, g_backBuffer->format, RenderTextureFlag::RENDER_TARGET));
BeginCommandList();
RenderTextureBarrier blankTextureBarriers[TEXTURE_DESCRIPTOR_NULL_COUNT];
for (size_t i = 0; i < TEXTURE_DESCRIPTOR_NULL_COUNT; i++)
blankTextureBarriers[i] = RenderTextureBarrier(g_blankTextures[i].get(), RenderTextureLayout::SHADER_READ);
g_commandLists[g_frame]->barriers(RenderBarrierStage::NONE, blankTextureBarriers, std::size(blankTextureBarriers));
auto device = g_userHeap.AllocPhysical<GuestDevice>();
memset(device, 0, sizeof(*device));
uint32_t functionOffset = 0x443344; // D3D
g_codeCache.Insert(functionOffset, reinterpret_cast<void*>(GuestFunction<SetRenderStateUnimplemented>));
for (size_t i = 0; i < _countof(device->setRenderStateFunctions); i++)
device->setRenderStateFunctions[i] = functionOffset;
for (auto& [state, function] : g_setRenderStateFunctions)
{
functionOffset += 4;
g_codeCache.Insert(functionOffset, function);
device->setRenderStateFunctions[state / 4] = functionOffset;
}
for (size_t i = 0; i < _countof(device->setSamplerStateFunctions); i++)
device->setSamplerStateFunctions[i] = *reinterpret_cast<uint32_t*>(g_memory.Translate(0x8330F3DC + i * 0xC));
device->viewport.width = 1280.0f;
device->viewport.height = 720.0f;
device->viewport.maxZ = 1.0f;
*a6 = g_memory.MapVirtual(device);
return 0;
}
static void DestructResource(GuestResource* resource)
{
RenderCommand cmd;
cmd.type = RenderCommandType::DestructResource;
cmd.destructResource.resource = resource;
g_renderQueue.enqueue(cmd);
}
static void ProcDestructResource(const RenderCommand& cmd)
{
const auto& args = cmd.destructResource;
g_tempResources[g_frame].push_back(args.resource);
}
static uint32_t ComputeTexturePitch(GuestTexture* texture)
{
return (texture->width * RenderFormatSize(texture->format) + PITCH_ALIGNMENT - 1) & ~(PITCH_ALIGNMENT - 1);
}
static void LockTextureRect(GuestTexture* texture, uint32_t, GuestLockedRect* lockedRect)
{
uint32_t pitch = ComputeTexturePitch(texture);
uint32_t slicePitch = pitch * texture->height;
if (texture->mappedMemory == nullptr)
texture->mappedMemory = g_userHeap.AllocPhysical(slicePitch, 0x10);
lockedRect->pitch = pitch;
lockedRect->bits = g_memory.MapVirtual(texture->mappedMemory);
}
static void UnlockTextureRect(GuestTexture* texture)
{
RenderCommand cmd;
cmd.type = RenderCommandType::UnlockTextureRect;
cmd.unlockTextureRect.texture = texture;
g_renderQueue.enqueue(cmd);
}
static void ProcUnlockTextureRect(const RenderCommand& cmd)
{
const auto& args = cmd.unlockTextureRect;
AddBarrier(args.texture, RenderTextureLayout::COPY_DEST);
FlushBarriers();
uint32_t pitch = ComputeTexturePitch(args.texture);
uint32_t slicePitch = pitch * args.texture->height;
auto allocation = g_uploadAllocators[g_frame].allocate(slicePitch, PLACEMENT_ALIGNMENT);
memcpy(allocation.memory, args.texture->mappedMemory, slicePitch);
g_commandLists[g_frame]->copyTextureRegion(
RenderTextureCopyLocation::Subresource(args.texture->texture, 0),
RenderTextureCopyLocation::PlacedFootprint(allocation.buffer, args.texture->format, args.texture->width, args.texture->height, 1, pitch / RenderFormatSize(args.texture->format), allocation.offset));
}
static void* LockBuffer(GuestBuffer* buffer, uint32_t flags)
{
buffer->lockedReadOnly = (flags & 0x10) != 0;
if (buffer->mappedMemory == nullptr)
buffer->mappedMemory = g_userHeap.AllocPhysical(buffer->dataSize, 0x10);
return buffer->mappedMemory;
}
static void* LockVertexBuffer(GuestBuffer* buffer, uint32_t, uint32_t, uint32_t flags)
{
return LockBuffer(buffer, flags);
}
template<typename T>
static void UnlockBuffer(GuestBuffer* buffer, bool useCopyQueue)
{
auto uploadBuffer = g_device->createBuffer(RenderBufferDesc::UploadBuffer(buffer->dataSize));
auto dest = reinterpret_cast<T*>(uploadBuffer->map());
auto src = reinterpret_cast<const T*>(buffer->mappedMemory);
for (size_t i = 0; i < buffer->dataSize; i += sizeof(T))
{
*dest = std::byteswap(*src);
++dest;
++src;
}
uploadBuffer->unmap();
if (useCopyQueue)
{
ExecuteCopyCommandList([&]
{
g_copyCommandList->copyBufferRegion(buffer->buffer->at(0), uploadBuffer->at(0), buffer->dataSize);
});
}
else
{
auto& commandList = g_commandLists[g_frame];
commandList->barriers(RenderBarrierStage::COPY, RenderBufferBarrier(buffer->buffer.get(), RenderBufferAccess::WRITE));
commandList->copyBufferRegion(buffer->buffer->at(0), uploadBuffer->at(0), buffer->dataSize);
commandList->barriers(RenderBarrierStage::GRAPHICS, RenderBufferBarrier(buffer->buffer.get(), RenderBufferAccess::READ));
g_tempBuffers[g_frame].emplace_back(std::move(uploadBuffer));
}
}
template<typename T>
static void UnlockBuffer(GuestBuffer* buffer)
{
if (!buffer->lockedReadOnly)
{
if (GetCurrentThreadId() == g_mainThreadId)
{
RenderCommand cmd;
cmd.type = (sizeof(T) == 2) ? RenderCommandType::UnlockBuffer16 : RenderCommandType::UnlockBuffer32;
cmd.unlockBuffer.buffer = buffer;
g_renderQueue.enqueue(cmd);
}
else
{
UnlockBuffer<T>(buffer, true);
}
}
}
static void ProcUnlockBuffer16(const RenderCommand& cmd)
{
UnlockBuffer<uint16_t>(cmd.unlockBuffer.buffer, false);
}
static void ProcUnlockBuffer32(const RenderCommand& cmd)
{
UnlockBuffer<uint32_t>(cmd.unlockBuffer.buffer, false);
}
static void UnlockVertexBuffer(GuestBuffer* buffer)
{
UnlockBuffer<uint32_t>(buffer);
}
static void GetVertexBufferDesc(GuestBuffer* buffer, GuestBufferDesc* desc)
{
desc->size = buffer->dataSize;
}
static void* LockIndexBuffer(GuestBuffer* buffer, uint32_t, uint32_t, uint32_t flags)
{
return LockBuffer(buffer, flags);
}
static void UnlockIndexBuffer(GuestBuffer* buffer)
{
UnlockBuffer<uint16_t>(buffer);
}
static void GetIndexBufferDesc(GuestBuffer* buffer, GuestBufferDesc* desc)
{
desc->format = buffer->guestFormat;
desc->size = buffer->dataSize;
}
static void GetSurfaceDesc(GuestSurface* surface, GuestSurfaceDesc* desc)
{
desc->width = surface->width;
desc->height = surface->height;
}
static void GetVertexDeclaration(GuestVertexDeclaration* vertexDeclaration, GuestVertexElement* vertexElements, be<uint32_t>* count)
{
memcpy(vertexElements, vertexDeclaration->vertexElements.get(), vertexDeclaration->vertexElementCount * sizeof(GuestVertexElement));
*count = vertexDeclaration->vertexElementCount;
}
static uint32_t HashVertexDeclaration(uint32_t vertexDeclaration)
{
// Vertex declarations are cached on host side, so the pointer itself can be used.
return vertexDeclaration;
}
static void DrawImGui()
{
ImGui_ImplSDL2_NewFrame();
ImGui::NewFrame();
// ImGui logic here
ImGui::Render();
auto drawData = ImGui::GetDrawData();
if (drawData->CmdListsCount != 0)
{
g_imSnapshot.SnapUsingSwap(drawData, ImGui::GetTime());
RenderCommand cmd;
cmd.type = RenderCommandType::DrawImGui;
g_renderQueue.enqueue(cmd);
}
}
static void ProcDrawImGui(const RenderCommand& cmd)
{
auto& commandList = g_commandLists[g_frame];
if (g_imPendingBarrier)
{
commandList->barriers(RenderBarrierStage::GRAPHICS, RenderTextureBarrier(g_imFontTexture.get(), RenderTextureLayout::SHADER_READ));
g_imPendingBarrier = false;
}
commandList->setGraphicsPipelineLayout(g_imPipelineLayout.get());
commandList->setPipeline(g_imPipeline.get());
commandList->setGraphicsDescriptorSet(g_textureDescriptorSet.get(), 0);
commandList->setGraphicsDescriptorSet(g_samplerDescriptorSet.get(), 1);
auto& drawData = g_imSnapshot.DrawData;
commandList->setViewports(RenderViewport(drawData.DisplayPos.x, drawData.DisplayPos.y, drawData.DisplaySize.x, drawData.DisplaySize.y));
float inverseDisplaySize[] = { 1.0f / drawData.DisplaySize.x, 1.0f / drawData.DisplaySize.y };
commandList->setGraphicsPushConstants(0, inverseDisplaySize, 4, 8);
for (int i = 0; i < drawData.CmdListsCount; i++)
{
auto& drawList = drawData.CmdLists[i];
auto vertexBufferAllocation = g_uploadAllocators[g_frame].allocate<false>(drawList->VtxBuffer.Data, drawList->VtxBuffer.Size * sizeof(ImDrawVert), alignof(ImDrawVert));
auto indexBufferAllocation = g_uploadAllocators[g_frame].allocate<false>(drawList->IdxBuffer.Data, drawList->IdxBuffer.Size * sizeof(uint16_t), alignof(uint16_t));
const RenderVertexBufferView vertexBufferView(vertexBufferAllocation.buffer->at(vertexBufferAllocation.offset), drawList->VtxBuffer.Size * sizeof(ImDrawVert));
const RenderInputSlot inputSlot(0, sizeof(ImDrawVert));
commandList->setVertexBuffers(0, &vertexBufferView, 1, &inputSlot);
const RenderIndexBufferView indexBufferView(indexBufferAllocation.buffer->at(indexBufferAllocation.offset), drawList->IdxBuffer.Size * sizeof(uint16_t), RenderFormat::R16_UINT);
commandList->setIndexBuffer(&indexBufferView);
for (int j = 0; j < drawList->CmdBuffer.Size; j++)
{
auto& drawCmd = drawList->CmdBuffer[j];
if (drawCmd.ClipRect.z <= drawCmd.ClipRect.x || drawCmd.ClipRect.w <= drawCmd.ClipRect.y)
continue;
uint32_t descriptorIndex = uint32_t(drawCmd.GetTexID());
commandList->setGraphicsPushConstants(0, &descriptorIndex, 0, 4);
commandList->setScissors(RenderRect(int32_t(drawCmd.ClipRect.x), int32_t(drawCmd.ClipRect.y), int32_t(drawCmd.ClipRect.z), int32_t(drawCmd.ClipRect.w)));
commandList->drawIndexedInstanced(drawCmd.ElemCount, 1, drawCmd.IdxOffset, drawCmd.VtxOffset, 0);
}
}
}
static void Present()
{
DrawImGui();
WaitForRenderThread();
g_pendingRenderThread = true;
RenderCommand cmd;
cmd.type = RenderCommandType::Present;
g_renderQueue.enqueue(cmd);
}
static void ProcPresent(const RenderCommand& cmd)
{
if (g_swapChainValid)
AddBarrier(g_backBuffer, RenderTextureLayout::PRESENT);
FlushBarriers();
auto& commandList = g_commandLists[g_frame];
commandList->end();
if (g_swapChainValid)
{
const RenderCommandList* commandLists[] = { commandList.get() };
RenderCommandSemaphore* waitSemaphores[] = { g_acquireSemaphores[g_frame].get() };
RenderCommandSemaphore* signalSemaphores[] = { g_renderSemaphores[g_frame].get() };
g_queue->executeCommandLists(
commandLists, std::size(commandLists),
waitSemaphores, std::size(waitSemaphores),
signalSemaphores, std::size(signalSemaphores),
g_commandFences[g_frame].get());
g_swapChainValid = g_swapChain->present(g_backBufferIndex, signalSemaphores, std::size(signalSemaphores));
}
else
{
g_queue->executeCommandLists(commandList.get(), g_commandFences[g_frame].get());
}
g_commandListStates[g_frame] = true;
g_frame = g_nextFrame;
g_nextFrame = (g_frame + 1) % NUM_FRAMES;
if (g_commandListStates[g_frame])
{
g_queue->waitForCommandFence(g_commandFences[g_frame].get());
g_commandListStates[g_frame] = false;
}
g_dirtyStates = DirtyStates(true);
g_uploadAllocators[g_frame].reset();
DestructTempResources();
g_triangleFanIndexData.reset();
g_quadIndexData.reset();
BeginCommandList();
g_pendingRenderThread = false;
}
static GuestSurface* GetBackBuffer()
{
g_backBuffer->AddRef();
return g_backBuffer;
}
static RenderFormat ConvertFormat(uint32_t format)
{
switch (format)
{
case D3DFMT_A16B16G16R16F:
case D3DFMT_A16B16G16R16F_2:
return RenderFormat::R16G16B16A16_FLOAT;
case D3DFMT_A8B8G8R8:
case D3DFMT_A8R8G8B8:
case D3DFMT_X8R8G8B8:
return RenderFormat::R8G8B8A8_UNORM;
case D3DFMT_D24FS8:
case D3DFMT_D24S8:
return RenderFormat::D32_FLOAT;
case D3DFMT_G16R16F:
case D3DFMT_G16R16F_2:
return RenderFormat::R16G16_FLOAT;
case D3DFMT_INDEX16:
return RenderFormat::R16_UINT;
case D3DFMT_INDEX32:
return RenderFormat::R32_UINT;
case D3DFMT_L8:
case D3DFMT_L8_2:
return RenderFormat::R8_UNORM;
default:
assert(false && "Unknown format");
return RenderFormat::R16G16B16A16_FLOAT;
}
}
static GuestTexture* CreateTexture(uint32_t width, uint32_t height, uint32_t depth, uint32_t levels, uint32_t usage, uint32_t format, uint32_t pool, uint32_t type)
{
const auto texture = g_userHeap.AllocPhysical<GuestTexture>(type == 17 ? ResourceType::VolumeTexture : ResourceType::Texture);
RenderTextureDesc desc;
desc.dimension = texture->type == ResourceType::VolumeTexture ? RenderTextureDimension::TEXTURE_3D : RenderTextureDimension::TEXTURE_2D;
desc.width = width;
desc.height = height;
desc.depth = depth;
desc.mipLevels = levels;
desc.arraySize = 1;
desc.format = ConvertFormat(format);
desc.flags = (desc.format == RenderFormat::D32_FLOAT) ? RenderTextureFlag::DEPTH_TARGET : RenderTextureFlag::NONE;
texture->textureHolder = g_device->createTexture(desc);
texture->texture = texture->textureHolder.get();
RenderTextureViewDesc viewDesc;
viewDesc.format = desc.format;
viewDesc.dimension = texture->type == ResourceType::VolumeTexture ? RenderTextureViewDimension::TEXTURE_3D : RenderTextureViewDimension::TEXTURE_2D;
viewDesc.mipLevels = levels;
switch (format)
{
case D3DFMT_D24FS8:
case D3DFMT_D24S8:
case D3DFMT_L8:
case D3DFMT_L8_2:
viewDesc.componentMapping = RenderComponentMapping(RenderSwizzle::R, RenderSwizzle::R, RenderSwizzle::R, RenderSwizzle::ONE);
break;
case D3DFMT_X8R8G8B8:
viewDesc.componentMapping = RenderComponentMapping(RenderSwizzle::G, RenderSwizzle::B, RenderSwizzle::A, RenderSwizzle::ONE);
break;
}
texture->textureView = texture->texture->createTextureView(viewDesc);
texture->width = width;
texture->height = height;
texture->depth = depth;
texture->format = desc.format;
texture->viewDimension = viewDesc.dimension;
texture->descriptorIndex = g_textureDescriptorAllocator.allocate();
g_textureDescriptorSet->setTexture(texture->descriptorIndex, texture->texture, RenderTextureLayout::SHADER_READ, texture->textureView.get());
#ifdef _DEBUG
texture->texture->setName(std::format("Texture {:X}", g_memory.MapVirtual(texture)));
#endif
return texture;
}
static GuestBuffer* CreateVertexBuffer(uint32_t length)
{
auto buffer = g_userHeap.AllocPhysical<GuestBuffer>(ResourceType::VertexBuffer);
buffer->buffer = g_device->createBuffer(RenderBufferDesc::VertexBuffer(length, RenderHeapType::DEFAULT, RenderBufferFlag::INDEX));
buffer->dataSize = length;
#ifdef _DEBUG
buffer->buffer->setName(std::format("Vertex Buffer {:X}", g_memory.MapVirtual(buffer)));
#endif
return buffer;
}
static GuestBuffer* CreateIndexBuffer(uint32_t length, uint32_t, uint32_t format)
{
auto buffer = g_userHeap.AllocPhysical<GuestBuffer>(ResourceType::IndexBuffer);
buffer->buffer = g_device->createBuffer(RenderBufferDesc::IndexBuffer(length, RenderHeapType::DEFAULT));
buffer->dataSize = length;
buffer->format = ConvertFormat(format);
buffer->guestFormat = format;
#ifdef _DEBUG
buffer->buffer->setName(std::format("Index Buffer {:X}", g_memory.MapVirtual(buffer)));
#endif
return buffer;
}
static GuestSurface* CreateSurface(uint32_t width, uint32_t height, uint32_t format, uint32_t multiSample)
{
RenderTextureDesc desc;
desc.dimension = RenderTextureDimension::TEXTURE_2D;
desc.width = width;
desc.height = height;
desc.depth = 1;
desc.mipLevels = 1;
desc.arraySize = 1;
desc.multisampling.sampleCount = multiSample != 0 && Config::MSAA > 1 ? Config::MSAA : RenderSampleCount::COUNT_1;
desc.format = ConvertFormat(format);
desc.flags = desc.format == RenderFormat::D32_FLOAT ? RenderTextureFlag::DEPTH_TARGET : RenderTextureFlag::RENDER_TARGET;
auto surface = g_userHeap.AllocPhysical<GuestSurface>(desc.format == RenderFormat::D32_FLOAT ?
ResourceType::DepthStencil : ResourceType::RenderTarget);
surface->textureHolder = g_device->createTexture(desc);
surface->texture = surface->textureHolder.get();
surface->width = width;
surface->height = height;
surface->format = desc.format;
surface->guestFormat = format;
surface->sampleCount = desc.multisampling.sampleCount;
if (desc.multisampling.sampleCount != RenderSampleCount::COUNT_1 && desc.format == RenderFormat::D32_FLOAT)
{
RenderTextureViewDesc viewDesc;
viewDesc.dimension = RenderTextureViewDimension::TEXTURE_2D;
viewDesc.format = RenderFormat::D32_FLOAT;
viewDesc.mipLevels = 1;
surface->textureView = surface->textureHolder->createTextureView(viewDesc);
surface->descriptorIndex = g_textureDescriptorAllocator.allocate();
g_textureDescriptorSet->setTexture(surface->descriptorIndex, surface->textureHolder.get(), RenderTextureLayout::SHADER_READ, surface->textureView.get());
}
#ifdef _DEBUG
surface->texture->setName(std::format("{} {:X}", desc.flags & RenderTextureFlag::RENDER_TARGET ? "Render Target" : "Depth Stencil", g_memory.MapVirtual(surface)));
#endif
return surface;
}
static void FlushViewport()
{
bool renderingToBackBuffer = g_renderTarget == g_backBuffer &&
g_backBuffer->texture != g_backBuffer->textureHolder.get();
auto& commandList = g_commandLists[g_frame];
if (g_dirtyStates.viewport)
{
auto viewport = g_viewport;
if (g_halfPixel)
{
viewport.x += 0.5f;
viewport.y += 0.5f;
}
if (renderingToBackBuffer)
{
uint32_t width = g_swapChain->getWidth();
uint32_t height = g_swapChain->getHeight();
viewport.x *= width / 1280.0f;
viewport.y *= height / 720.0f;
viewport.width *= width / 1280.0f;
viewport.height *= height / 720.0f;
}
if (viewport.minDepth >= viewport.maxDepth)
{
viewport.minDepth = 1.0f - viewport.minDepth;
viewport.maxDepth = 1.0f - viewport.maxDepth;
}
commandList->setViewports(viewport);
g_dirtyStates.viewport = false;
}
if (g_dirtyStates.scissorRect)
{
auto scissorRect = g_scissorTestEnable ? g_scissorRect : RenderRect(
g_viewport.x,
g_viewport.y,
g_viewport.x + g_viewport.width,
g_viewport.y + g_viewport.height);
if (renderingToBackBuffer)
{
uint32_t width = g_swapChain->getWidth();
uint32_t height = g_swapChain->getHeight();
scissorRect.left = scissorRect.left * width / 1280;
scissorRect.top = scissorRect.top * height / 720;
scissorRect.right = scissorRect.right * width / 1280;
scissorRect.bottom = scissorRect.bottom * height / 720;
}
commandList->setScissors(scissorRect);
g_dirtyStates.scissorRect = false;
}
}
static bool SetHalfPixel(bool enable)
{
bool oldValue = g_halfPixel;
SetDirtyValue(g_dirtyStates.viewport, g_halfPixel, enable);
return oldValue;
}
static void StretchRect(GuestDevice* device, uint32_t flags, uint32_t, GuestTexture* texture)
{
RenderCommand cmd;
cmd.type = RenderCommandType::StretchRect;
cmd.stretchRect.flags = flags;
cmd.stretchRect.texture = texture;
g_renderQueue.enqueue(cmd);
}
static void ProcStretchRect(const RenderCommand& cmd)
{
const auto& args = cmd.stretchRect;
const bool isDepthStencil = (args.flags & 0x4) != 0;
const auto surface = isDepthStencil ? g_depthStencil : g_renderTarget;
const bool multiSampling = surface->sampleCount != RenderSampleCount::COUNT_1;
RenderTextureLayout srcLayout;
RenderTextureLayout dstLayout;
if (multiSampling)
{
if (isDepthStencil)
{
srcLayout = RenderTextureLayout::SHADER_READ;
dstLayout = RenderTextureLayout::DEPTH_WRITE;
}
else
{
srcLayout = RenderTextureLayout::RESOLVE_SOURCE;
dstLayout = RenderTextureLayout::RESOLVE_DEST;
}
}
else
{
srcLayout = RenderTextureLayout::COPY_SOURCE;
dstLayout = RenderTextureLayout::COPY_DEST;
}
AddBarrier(surface, srcLayout);
AddBarrier(args.texture, dstLayout);
FlushBarriers();
auto& commandList = g_commandLists[g_frame];
if (multiSampling)
{
if (isDepthStencil)
{
uint32_t pipelineIndex = 0;
switch (g_depthStencil->sampleCount)
{
case RenderSampleCount::COUNT_2:
pipelineIndex = 0;
break;
case RenderSampleCount::COUNT_4:
pipelineIndex = 1;
break;
case RenderSampleCount::COUNT_8:
pipelineIndex = 2;
break;
default:
assert(false && "Unsupported MSAA sample count");
break;
}
if (args.texture->framebuffer == nullptr)
{
RenderFramebufferDesc desc;
desc.depthAttachment = args.texture->texture;
args.texture->framebuffer = g_device->createFramebuffer(desc);
}
if (g_framebuffer != args.texture->framebuffer.get())
{
commandList->setFramebuffer(args.texture->framebuffer.get());
g_framebuffer = args.texture->framebuffer.get();
}
bool oldHalfPixel = SetHalfPixel(false);
FlushViewport();
commandList->setPipeline(g_resolveMsaaDepthPipelines[pipelineIndex].get());
commandList->setGraphicsPushConstants(0, &g_depthStencil->descriptorIndex, 0, sizeof(uint32_t));
commandList->drawInstanced(6, 1, 0, 0);
g_dirtyStates.renderTargetAndDepthStencil = true;
g_dirtyStates.pipelineState = true;
if (g_vulkan)
g_dirtyStates.vertexShaderConstants = true;
SetHalfPixel(oldHalfPixel);
}
else
{
commandList->resolveTexture(args.texture->texture, surface->texture);
}
}
else
{
commandList->copyTexture(args.texture->texture, surface->texture);
}
AddBarrier(args.texture, RenderTextureLayout::SHADER_READ);
}
static void SetDefaultViewport(GuestDevice* device, GuestSurface* surface)
{
if (surface != nullptr)
{
RenderCommand cmd;
cmd.type = RenderCommandType::SetViewport;
cmd.setViewport.x = 0.0f;
cmd.setViewport.y = 0.0f;
cmd.setViewport.width = float(surface->width);
cmd.setViewport.height = float(surface->height);
cmd.setViewport.minDepth = 0.0f;
cmd.setViewport.maxDepth = 1.0f;
g_renderQueue.enqueue(cmd);
device->viewport.x = 0.0f;
device->viewport.y = 0.0f;
device->viewport.width = float(surface->width);
device->viewport.height = float(surface->height);
device->viewport.minZ = 0.0f;
device->viewport.maxZ = 1.0f;
}
}
static void SetRenderTarget(GuestDevice* device, uint32_t index, GuestSurface* renderTarget)
{
RenderCommand cmd;
cmd.type = RenderCommandType::SetRenderTarget;
cmd.setRenderTarget.renderTarget = renderTarget;
g_renderQueue.enqueue(cmd);
SetDefaultViewport(device, renderTarget);
}
static void ProcSetRenderTarget(const RenderCommand& cmd)
{
const auto& args = cmd.setRenderTarget;
SetDirtyValue(g_dirtyStates.renderTargetAndDepthStencil, g_renderTarget, args.renderTarget);
SetDirtyValue(g_dirtyStates.pipelineState, g_pipelineState.renderTargetFormat, args.renderTarget != nullptr ? args.renderTarget->format : RenderFormat::UNKNOWN);
SetDirtyValue(g_dirtyStates.pipelineState, g_pipelineState.sampleCount, args.renderTarget != nullptr ? args.renderTarget->sampleCount : RenderSampleCount::COUNT_1);
// When alpha to coverage is enabled, update the alpha test mode as it's dependent on sample count.
SetAlphaTestMode(g_sharedConstants.alphaTestMode != AlphaTestMode::Disabled);
}
static void SetDepthStencilSurface(GuestDevice* device, GuestSurface* depthStencil)
{
RenderCommand cmd;
cmd.type = RenderCommandType::SetDepthStencilSurface;
cmd.setDepthStencilSurface.depthStencil = depthStencil;
g_renderQueue.enqueue(cmd);
SetDefaultViewport(device, depthStencil);
}
static void ProcSetDepthStencilSurface(const RenderCommand& cmd)
{
const auto& args = cmd.setDepthStencilSurface;
SetDirtyValue(g_dirtyStates.renderTargetAndDepthStencil, g_depthStencil, args.depthStencil);
SetDirtyValue(g_dirtyStates.pipelineState, g_pipelineState.depthStencilFormat, args.depthStencil != nullptr ? args.depthStencil->format : RenderFormat::UNKNOWN);
}
static void SetFramebuffer(GuestSurface* renderTarget, GuestSurface* depthStencil, bool settingForClear)
{
if (settingForClear || g_dirtyStates.renderTargetAndDepthStencil)
{
GuestSurface* framebufferContainer = nullptr;
RenderTexture* framebufferKey = nullptr;
if (renderTarget != nullptr && depthStencil != nullptr)
{
framebufferContainer = depthStencil; // Backbuffer texture changes per frame so we can't use the depth stencil as the key.
framebufferKey = renderTarget->texture;
}
else if (renderTarget != nullptr && depthStencil == nullptr)
{
framebufferContainer = renderTarget;
framebufferKey = renderTarget->texture; // Backbuffer texture changes per frame so we can't assume nullptr for it.
}
else if (renderTarget == nullptr && depthStencil != nullptr)
{
framebufferContainer = depthStencil;
framebufferKey = nullptr;
}
auto& commandList = g_commandLists[g_frame];
if (framebufferContainer != nullptr)
{
auto& framebuffer = framebufferContainer->framebuffers[framebufferKey];
if (framebuffer == nullptr)
{
RenderFramebufferDesc desc;
if (renderTarget != nullptr)
{
desc.colorAttachments = const_cast<const RenderTexture**>(&renderTarget->texture);
desc.colorAttachmentsCount = 1;
}
if (depthStencil != nullptr)
desc.depthAttachment = depthStencil->texture;
framebuffer = g_device->createFramebuffer(desc);
}
if (g_framebuffer != framebuffer.get())
{
commandList->setFramebuffer(framebuffer.get());
g_framebuffer = framebuffer.get();
}
}
else if (g_framebuffer != nullptr)
{
commandList->setFramebuffer(nullptr);
g_framebuffer = nullptr;
}
g_dirtyStates.renderTargetAndDepthStencil = settingForClear;
}
}
static void Clear(GuestDevice* device, uint32_t flags, uint32_t, be<float>* color, double z)
{
RenderCommand cmd;
cmd.type = RenderCommandType::Clear;
cmd.clear.flags = flags;
cmd.clear.color[0] = color[0];
cmd.clear.color[1] = color[1];
cmd.clear.color[2] = color[2];
cmd.clear.color[3] = color[3];
cmd.clear.z = float(z);
g_renderQueue.enqueue(cmd);
}
static void ProcClear(const RenderCommand& cmd)
{
const auto& args = cmd.clear;
AddBarrier(g_renderTarget, RenderTextureLayout::COLOR_WRITE);
AddBarrier(g_depthStencil, RenderTextureLayout::DEPTH_WRITE);
FlushBarriers();
bool canClearInOnePass = (g_renderTarget == nullptr) || (g_depthStencil == nullptr) ||
(g_renderTarget->width == g_depthStencil->width && g_renderTarget->height == g_depthStencil->height);
if (canClearInOnePass)
SetFramebuffer(g_renderTarget, g_depthStencil, true);
auto& commandList = g_commandLists[g_frame];
if (g_renderTarget != nullptr && (args.flags & D3DCLEAR_TARGET) != 0)
{
if (!canClearInOnePass)
SetFramebuffer(g_renderTarget, nullptr, true);
commandList->clearColor(0, RenderColor(args.color[0], args.color[1], args.color[2], args.color[3]));
}
if (g_depthStencil != nullptr && (args.flags & D3DCLEAR_ZBUFFER) != 0)
{
if (!canClearInOnePass)
SetFramebuffer(nullptr, g_depthStencil, true);
// The condition here is done by the game to determine reverse Z.
commandList->clearDepth(true, g_depthStencil->guestFormat == D3DFMT_D24FS8 ? (1.0f - args.z) : args.z);
}
}
static void SetViewport(GuestDevice* device, GuestViewport* viewport)
{
RenderCommand cmd;
cmd.type = RenderCommandType::SetViewport;
cmd.setViewport.x = viewport->x;
cmd.setViewport.y = viewport->y;
cmd.setViewport.width = viewport->width;
cmd.setViewport.height = viewport->height;
cmd.setViewport.minDepth = viewport->minZ;
cmd.setViewport.maxDepth = viewport->maxZ;
g_renderQueue.enqueue(cmd);
device->viewport.x = float(viewport->x);
device->viewport.y = float(viewport->y);
device->viewport.width = float(viewport->width);
device->viewport.height = float(viewport->height);
device->viewport.minZ = viewport->minZ;
device->viewport.maxZ = viewport->maxZ;
}
static void ProcSetViewport(const RenderCommand& cmd)
{
const auto& args = cmd.setViewport;
SetDirtyValue<float>(g_dirtyStates.viewport, g_viewport.x, args.x);
SetDirtyValue<float>(g_dirtyStates.viewport, g_viewport.y, args.y);
SetDirtyValue<float>(g_dirtyStates.viewport, g_viewport.width, args.width);
SetDirtyValue<float>(g_dirtyStates.viewport, g_viewport.height, args.height);
SetDirtyValue<float>(g_dirtyStates.viewport, g_viewport.minDepth, args.minDepth);
SetDirtyValue<float>(g_dirtyStates.viewport, g_viewport.maxDepth, args.maxDepth);
g_dirtyStates.scissorRect |= g_dirtyStates.viewport;
// Update Z function as it's dependent on reverse Z.
SetDirtyValue(g_dirtyStates.pipelineState, g_pipelineState.zFunc, ConvertComparisonFunc(g_zFunc, args.minDepth >= args.maxDepth));
}
static void SetTexture(GuestDevice* device, uint32_t index, GuestTexture* texture)
{
RenderCommand cmd;
cmd.type = RenderCommandType::SetTexture;
cmd.setTexture.index = index;
cmd.setTexture.texture = texture;
g_renderQueue.enqueue(cmd);
}
static void ProcSetTexture(const RenderCommand& cmd)
{
const auto& args = cmd.setTexture;
AddBarrier(args.texture, RenderTextureLayout::SHADER_READ);
auto viewDimension = args.texture != nullptr ? args.texture->viewDimension : RenderTextureViewDimension::UNKNOWN;
SetDirtyValue(g_dirtyStates.sharedConstants, g_sharedConstants.texture2DIndices[args.index],
viewDimension == RenderTextureViewDimension::TEXTURE_2D ? args.texture->descriptorIndex : TEXTURE_DESCRIPTOR_NULL_TEXTURE_2D);
SetDirtyValue(g_dirtyStates.sharedConstants, g_sharedConstants.texture3DIndices[args.index], args.texture != nullptr &&
viewDimension == RenderTextureViewDimension::TEXTURE_3D ? args.texture->descriptorIndex : TEXTURE_DESCRIPTOR_NULL_TEXTURE_3D);
SetDirtyValue(g_dirtyStates.sharedConstants, g_sharedConstants.textureCubeIndices[args.index], args.texture != nullptr &&
viewDimension == RenderTextureViewDimension::TEXTURE_CUBE ? args.texture->descriptorIndex : TEXTURE_DESCRIPTOR_NULL_TEXTURE_CUBE);
}
static void SetScissorRect(GuestDevice* device, GuestRect* rect)
{
RenderCommand cmd;
cmd.type = RenderCommandType::SetScissorRect;
cmd.setScissorRect.top = rect->top;
cmd.setScissorRect.left = rect->left;
cmd.setScissorRect.bottom = rect->bottom;
cmd.setScissorRect.right = rect->right;
g_renderQueue.enqueue(cmd);
}
static void ProcSetScissorRect(const RenderCommand& cmd)
{
const auto& args = cmd.setScissorRect;
SetDirtyValue<int32_t>(g_dirtyStates.scissorRect, g_scissorRect.top, args.top);
SetDirtyValue<int32_t>(g_dirtyStates.scissorRect, g_scissorRect.left, args.left);
SetDirtyValue<int32_t>(g_dirtyStates.scissorRect, g_scissorRect.bottom, args.bottom);
SetDirtyValue<int32_t>(g_dirtyStates.scissorRect, g_scissorRect.right, args.right);
}
static RenderPipeline* CreateGraphicsPipeline(PipelineState pipelineState)
{
// Sanitize to prevent state leaking.
if (!pipelineState.zEnable)
{
pipelineState.zWriteEnable = false;
pipelineState.zFunc = RenderComparisonFunction::LESS;
pipelineState.slopeScaledDepthBias = 0.0f;
pipelineState.depthBias = 0;
pipelineState.depthStencilFormat = RenderFormat::UNKNOWN;
}
if (!pipelineState.colorWriteEnable)
{
pipelineState.alphaBlendEnable = false;
pipelineState.renderTargetFormat = RenderFormat::UNKNOWN;
}
if (!pipelineState.alphaBlendEnable)
{
pipelineState.srcBlend = RenderBlend::ONE;
pipelineState.destBlend = RenderBlend::ZERO;
pipelineState.blendOp = RenderBlendOperation::ADD;
pipelineState.srcBlendAlpha = RenderBlend::ONE;
pipelineState.destBlendAlpha = RenderBlend::ZERO;
pipelineState.blendOpAlpha = RenderBlendOperation::ADD;
}
auto& pipeline = g_pipelines[XXH3_64bits(&pipelineState, sizeof(PipelineState))];
if (pipeline == nullptr)
{
RenderGraphicsPipelineDesc desc;
desc.pipelineLayout = g_pipelineLayout.get();
desc.vertexShader = pipelineState.vertexShader->shader.get();
desc.pixelShader = pipelineState.pixelShader != nullptr ? pipelineState.pixelShader->shader.get() : nullptr;
desc.depthFunction = pipelineState.zFunc;
desc.depthEnabled = pipelineState.zEnable;
desc.depthWriteEnabled = pipelineState.zWriteEnable;
desc.depthBias = pipelineState.depthBias;
desc.slopeScaledDepthBias = pipelineState.slopeScaledDepthBias;
desc.depthClipEnabled = true;
desc.primitiveTopology = pipelineState.primitiveTopology;
desc.cullMode = pipelineState.cullMode;
desc.renderTargetFormat[0] = pipelineState.renderTargetFormat;
desc.renderTargetBlend[0].blendEnabled = pipelineState.alphaBlendEnable;
desc.renderTargetBlend[0].srcBlend = pipelineState.srcBlend;
desc.renderTargetBlend[0].dstBlend = pipelineState.destBlend;
desc.renderTargetBlend[0].blendOp = pipelineState.blendOp;
desc.renderTargetBlend[0].srcBlendAlpha = pipelineState.srcBlendAlpha;
desc.renderTargetBlend[0].dstBlendAlpha = pipelineState.destBlendAlpha;
desc.renderTargetBlend[0].blendOpAlpha = pipelineState.blendOpAlpha;
desc.renderTargetBlend[0].renderTargetWriteMask = pipelineState.colorWriteEnable;
desc.renderTargetCount = pipelineState.renderTargetFormat != RenderFormat::UNKNOWN ? 1 : 0;
desc.depthTargetFormat = pipelineState.depthStencilFormat;
desc.multisampling.sampleCount = pipelineState.sampleCount;
desc.alphaToCoverageEnabled = pipelineState.enableAlphaToCoverage;
desc.inputElements = pipelineState.vertexDeclaration->inputElements.get();
desc.inputElementsCount = pipelineState.vertexDeclaration->inputElementCount;
RenderInputSlot inputSlots[16]{};
uint32_t inputSlotIndices[16]{};
uint32_t inputSlotCount = 0;
for (size_t i = 0; i < pipelineState.vertexDeclaration->inputElementCount; i++)
{
auto& inputElement = pipelineState.vertexDeclaration->inputElements[i];
auto& inputSlotIndex = inputSlotIndices[inputElement.slotIndex];
if (inputSlotIndex == NULL)
inputSlotIndex = ++inputSlotCount;
auto& inputSlot = inputSlots[inputSlotIndex - 1];
inputSlot.index = inputElement.slotIndex;
inputSlot.stride = pipelineState.vertexStrides[inputElement.slotIndex];
if (pipelineState.instancing && inputElement.slotIndex != 0 && inputElement.slotIndex != 15)
inputSlot.classification = RenderInputSlotClassification::PER_INSTANCE_DATA;
else
inputSlot.classification = RenderInputSlotClassification::PER_VERTEX_DATA;
}
desc.inputSlots = inputSlots;
desc.inputSlotsCount = inputSlotCount;
pipeline = g_device->createGraphicsPipeline(desc);
}
return pipeline.get();
}
static RenderTextureAddressMode ConvertTextureAddressMode(size_t value)
{
switch (value)
{
case D3DTADDRESS_WRAP:
return RenderTextureAddressMode::WRAP;
case D3DTADDRESS_MIRROR:
return RenderTextureAddressMode::MIRROR;
case D3DTADDRESS_CLAMP:
return RenderTextureAddressMode::CLAMP;
case D3DTADDRESS_MIRRORONCE:
return RenderTextureAddressMode::MIRROR_ONCE;
case D3DTADDRESS_BORDER:
return RenderTextureAddressMode::BORDER;
default:
assert(false && "Unknown texture address mode");
return RenderTextureAddressMode::UNKNOWN;
}
}
static RenderFilter ConvertTextureFilter(uint32_t value)
{
switch (value)
{
case D3DTEXF_POINT:
case D3DTEXF_NONE:
return RenderFilter::NEAREST;
case D3DTEXF_LINEAR:
return RenderFilter::LINEAR;
default:
assert(false && "Unknown texture filter");
return RenderFilter::UNKNOWN;
}
}
static RenderBorderColor ConvertBorderColor(uint32_t value)
{
switch (value)
{
case 0:
return RenderBorderColor::TRANSPARENT_BLACK;
case 1:
return RenderBorderColor::OPAQUE_WHITE;
default:
assert(false && "Unknown border color");
return RenderBorderColor::UNKNOWN;
}
}
struct LocalRenderCommandQueue
{
RenderCommand commands[20];
uint32_t count = 0;
RenderCommand& enqueue()
{
assert(count < std::size(commands));
return commands[count++];
}
void submit()
{
g_renderQueue.enqueue_bulk(commands, count);
}
};
static void FlushRenderStateForMainThread(GuestDevice* device, LocalRenderCommandQueue& queue)
{
constexpr size_t BOOL_MASK = 0x100000000000000ull;
if ((device->dirtyFlags[4].get() & BOOL_MASK) != 0)
{
auto& cmd = queue.enqueue();
cmd.type = RenderCommandType::SetBooleans;
cmd.setBooleans.booleans = (device->vertexShaderBoolConstants[0].get() & 0xFF) | ((device->pixelShaderBoolConstants[0].get() & 0xFF) << 16);
device->dirtyFlags[4] = device->dirtyFlags[4].get() & ~BOOL_MASK;
}
for (uint32_t i = 0; i < 16; i++)
{
const size_t mask = 0x8000000000000000ull >> (i + 32);
if (device->dirtyFlags[3].get() & mask)
{
auto& cmd = queue.enqueue();
cmd.type = RenderCommandType::SetSamplerState;
cmd.setSamplerState.index = i;
cmd.setSamplerState.data0 = device->samplerStates[i].data[0];
cmd.setSamplerState.data3 = device->samplerStates[i].data[3];
cmd.setSamplerState.data5 = device->samplerStates[i].data[5];
device->dirtyFlags[3] = device->dirtyFlags[3].get() & ~mask;
}
}
if (g_dirtyStates.vertexShaderConstants || device->dirtyFlags[0] != 0)
{
WaitForRenderThread();
auto& cmd = queue.enqueue();
cmd.type = RenderCommandType::SetVertexShaderConstants;
cmd.setVertexShaderConstants.allocation = g_uploadAllocators[g_frame].allocate<true>(device->vertexShaderFloatConstants, 0x1000, 0x100);
device->dirtyFlags[0] = 0;
}
if (g_dirtyStates.pixelShaderConstants || device->dirtyFlags[1] != 0)
{
WaitForRenderThread();
auto& cmd = queue.enqueue();
cmd.type = RenderCommandType::SetPixelShaderConstants;
cmd.setPixelShaderConstants.allocation = g_uploadAllocators[g_frame].allocate<true>(device->pixelShaderFloatConstants, 0xE00, 0x100);
device->dirtyFlags[1] = 0;
}
}
static void ProcSetBooleans(const RenderCommand& cmd)
{
SetDirtyValue(g_dirtyStates.sharedConstants, g_sharedConstants.booleans, cmd.setBooleans.booleans);
}
static void ProcSetSamplerState(const RenderCommand& cmd)
{
const auto& args = cmd.setSamplerState;
const auto addressU = ConvertTextureAddressMode((args.data0 >> 10) & 0x7);
const auto addressV = ConvertTextureAddressMode((args.data0 >> 13) & 0x7);
const auto addressW = ConvertTextureAddressMode((args.data0 >> 16) & 0x7);
auto magFilter = ConvertTextureFilter((args.data3 >> 19) & 0x3);
auto minFilter = ConvertTextureFilter((args.data3 >> 21) & 0x3);
auto mipFilter = ConvertTextureFilter((args.data3 >> 23) & 0x3);
const auto borderColor = ConvertBorderColor(args.data5 & 0x3);
bool anisotropyEnabled = mipFilter == RenderFilter::LINEAR;
if (anisotropyEnabled)
{
magFilter = RenderFilter::LINEAR;
minFilter = RenderFilter::LINEAR;
}
auto& samplerDesc = g_samplerDescs[args.index];
bool dirty = false;
SetDirtyValue(dirty, samplerDesc.addressU, addressU);
SetDirtyValue(dirty, samplerDesc.addressV, addressV);
SetDirtyValue(dirty, samplerDesc.addressW, addressW);
SetDirtyValue(dirty, samplerDesc.minFilter, minFilter);
SetDirtyValue(dirty, samplerDesc.magFilter, magFilter);
SetDirtyValue(dirty, samplerDesc.mipmapMode, RenderMipmapMode(mipFilter));
SetDirtyValue(dirty, samplerDesc.anisotropyEnabled, anisotropyEnabled);
SetDirtyValue(dirty, samplerDesc.borderColor, borderColor);
if (dirty)
{
auto& [descriptorIndex, sampler] = g_samplerStates[XXH3_64bits(&samplerDesc, sizeof(RenderSamplerDesc))];
if (descriptorIndex == NULL)
{
descriptorIndex = g_samplerStates.size();
sampler = g_device->createSampler(samplerDesc);
g_samplerDescriptorSet->setSampler(descriptorIndex - 1, sampler.get());
}
SetDirtyValue(g_dirtyStates.sharedConstants, g_sharedConstants.samplerIndices[args.index], descriptorIndex - 1);
}
}
static void SetRootDescriptor(const UploadAllocation& allocation, size_t index)
{
auto& commandList = g_commandLists[g_frame];
if (g_vulkan)
commandList->setGraphicsPushConstants(0, &allocation.deviceAddress, 8 * index, 8);
else
commandList->setGraphicsRootDescriptor(allocation.buffer->at(allocation.offset), index);
}
static void ProcSetVertexShaderConstants(const RenderCommand& cmd)
{
SetRootDescriptor(cmd.setVertexShaderConstants.allocation, 0);
}
static void ProcSetPixelShaderConstants(const RenderCommand& cmd)
{
SetRootDescriptor(cmd.setPixelShaderConstants.allocation, 1);
}
static void FlushRenderStateForRenderThread()
{
auto renderTarget = g_pipelineState.colorWriteEnable ? g_renderTarget : nullptr;
auto depthStencil = g_pipelineState.zEnable ? g_depthStencil : nullptr;
AddBarrier(renderTarget, RenderTextureLayout::COLOR_WRITE);
AddBarrier(depthStencil, RenderTextureLayout::DEPTH_WRITE);
FlushBarriers();
SetFramebuffer(renderTarget, depthStencil, false);
FlushViewport();
auto& commandList = g_commandLists[g_frame];
if (g_dirtyStates.pipelineState)
commandList->setPipeline(CreateGraphicsPipeline(g_pipelineState));
if (g_dirtyStates.sharedConstants)
{
auto sharedConstants = g_uploadAllocators[g_frame].allocate<false>(&g_sharedConstants, sizeof(g_sharedConstants), 0x100);
SetRootDescriptor(sharedConstants, 2);
}
if (g_dirtyStates.vertexStreamFirst <= g_dirtyStates.vertexStreamLast)
{
commandList->setVertexBuffers(
g_dirtyStates.vertexStreamFirst,
g_vertexBufferViews + g_dirtyStates.vertexStreamFirst,
g_dirtyStates.vertexStreamLast - g_dirtyStates.vertexStreamFirst + 1,
g_inputSlots + g_dirtyStates.vertexStreamFirst);
}
if (g_dirtyStates.indices && (!g_vulkan || g_indexBufferView.buffer.ref != nullptr))
commandList->setIndexBuffer(&g_indexBufferView);
g_dirtyStates = DirtyStates(false);
}
static RenderPrimitiveTopology ConvertPrimitiveType(uint32_t primitiveType)
{
switch (primitiveType)
{
case D3DPT_POINTLIST:
return RenderPrimitiveTopology::POINT_LIST;
case D3DPT_LINELIST:
return RenderPrimitiveTopology::LINE_LIST;
case D3DPT_LINESTRIP:
return RenderPrimitiveTopology::LINE_STRIP;
case D3DPT_TRIANGLELIST:
case D3DPT_QUADLIST:
return RenderPrimitiveTopology::TRIANGLE_LIST;
case D3DPT_TRIANGLESTRIP:
return RenderPrimitiveTopology::TRIANGLE_STRIP;
case D3DPT_TRIANGLEFAN:
return g_triangleFanSupported ? RenderPrimitiveTopology::TRIANGLE_FAN : RenderPrimitiveTopology::TRIANGLE_LIST;
default:
assert(false && "Unknown primitive type");
return RenderPrimitiveTopology::UNKNOWN;
}
}
static void SetPrimitiveType(uint32_t primitiveType)
{
SetDirtyValue(g_dirtyStates.pipelineState, g_pipelineState.primitiveTopology, ConvertPrimitiveType(primitiveType));
}
static uint32_t CheckInstancing()
{
uint32_t indexCount = 0;
SetDirtyValue(g_dirtyStates.pipelineState, g_pipelineState.instancing, g_pipelineState.vertexDeclaration->indexVertexStream != 0);
if (g_pipelineState.instancing)
{
// Index buffer is passed as a vertex stream
indexCount = g_vertexBufferViews[g_pipelineState.vertexDeclaration->indexVertexStream].size / 4;
}
return indexCount;
}
static void DrawPrimitive(GuestDevice* device, uint32_t primitiveType, uint32_t startVertex, uint32_t primitiveCount)
{
LocalRenderCommandQueue queue;
FlushRenderStateForMainThread(device, queue);
auto& cmd = queue.enqueue();
cmd.type = RenderCommandType::DrawPrimitive;
cmd.drawPrimitive.primitiveType = primitiveType;
cmd.drawPrimitive.startVertex = startVertex;
cmd.drawPrimitive.primitiveCount = primitiveCount;
queue.submit();
}
static void ProcDrawPrimitive(const RenderCommand& cmd)
{
const auto& args = cmd.drawPrimitive;
SetPrimitiveType(args.primitiveType);
uint32_t indexCount = CheckInstancing();
if (indexCount > 0)
{
auto& vertexBufferView = g_vertexBufferViews[g_pipelineState.vertexDeclaration->indexVertexStream];
SetDirtyValue(g_dirtyStates.indices, g_indexBufferView.buffer, vertexBufferView.buffer);
SetDirtyValue(g_dirtyStates.indices, g_indexBufferView.size, vertexBufferView.size);
SetDirtyValue(g_dirtyStates.indices, g_indexBufferView.format, RenderFormat::R32_UINT);
}
FlushRenderStateForRenderThread();
auto& commandList = g_commandLists[g_frame];
if (indexCount > 0)
commandList->drawIndexedInstanced(indexCount, args.primitiveCount / indexCount, 0, 0, 0);
else
commandList->drawInstanced(args.primitiveCount, 1, args.startVertex, 0);
}
static void DrawIndexedPrimitive(GuestDevice* device, uint32_t primitiveType, int32_t baseVertexIndex, uint32_t startIndex, uint32_t primCount)
{
LocalRenderCommandQueue queue;
FlushRenderStateForMainThread(device, queue);
auto& cmd = queue.enqueue();
cmd.type = RenderCommandType::DrawIndexedPrimitive;
cmd.drawIndexedPrimitive.primitiveType = primitiveType;
cmd.drawIndexedPrimitive.baseVertexIndex = baseVertexIndex;
cmd.drawIndexedPrimitive.startIndex = startIndex;
cmd.drawIndexedPrimitive.primCount = primCount;
queue.submit();
}
static void ProcDrawIndexedPrimitive(const RenderCommand& cmd)
{
const auto& args = cmd.drawIndexedPrimitive;
CheckInstancing();
SetPrimitiveType(args.primitiveType);
FlushRenderStateForRenderThread();
g_commandLists[g_frame]->drawIndexedInstanced(args.primCount, 1, args.startIndex, args.baseVertexIndex, 0);
}
static void DrawPrimitiveUP(GuestDevice* device, uint32_t primitiveType, uint32_t primitiveCount, void* vertexStreamZeroData, uint32_t vertexStreamZeroStride)
{
LocalRenderCommandQueue queue;
FlushRenderStateForMainThread(device, queue);
WaitForRenderThread();
auto& cmd = queue.enqueue();
cmd.type = RenderCommandType::DrawPrimitiveUP;
cmd.drawPrimitiveUP.primitiveType = primitiveType;
cmd.drawPrimitiveUP.primitiveCount = primitiveCount;
cmd.drawPrimitiveUP.vertexStreamZeroData = g_uploadAllocators[g_frame].allocate<true>(reinterpret_cast<uint32_t*>(vertexStreamZeroData), primitiveCount * vertexStreamZeroStride, 0x4);
cmd.drawPrimitiveUP.vertexStreamZeroStride = vertexStreamZeroStride;
queue.submit();
}
static void ProcDrawPrimitiveUP(const RenderCommand& cmd)
{
const auto& args = cmd.drawPrimitiveUP;
CheckInstancing();
SetPrimitiveType(args.primitiveType);
SetDirtyValue(g_dirtyStates.pipelineState, g_pipelineState.vertexStrides[0], uint8_t(args.vertexStreamZeroStride));
auto& vertexBufferView = g_vertexBufferViews[0];
vertexBufferView.size = args.primitiveCount * args.vertexStreamZeroStride;
vertexBufferView.buffer = args.vertexStreamZeroData.buffer->at(args.vertexStreamZeroData.offset);
g_inputSlots[0].stride = args.vertexStreamZeroStride;
g_dirtyStates.vertexStreamFirst = 0;
uint32_t indexCount = 0;
if (args.primitiveType == D3DPT_QUADLIST)
indexCount = g_quadIndexData.prepare(args.primitiveCount);
else if (!g_triangleFanSupported && args.primitiveType == D3DPT_TRIANGLEFAN)
indexCount = g_triangleFanIndexData.prepare(args.primitiveCount);
FlushRenderStateForRenderThread();
if (indexCount != 0)
g_commandLists[g_frame]->drawIndexedInstanced(indexCount, 1, 0, 0, 0);
else
g_commandLists[g_frame]->drawInstanced(args.primitiveCount, 1, 0, 0);
}
static const char* ConvertDeclUsage(uint32_t usage)
{
switch (usage)
{
case D3DDECLUSAGE_POSITION:
return "POSITION";
case D3DDECLUSAGE_BLENDWEIGHT:
return "BLENDWEIGHT";
case D3DDECLUSAGE_BLENDINDICES:
return "BLENDINDICES";
case D3DDECLUSAGE_NORMAL:
return "NORMAL";
case D3DDECLUSAGE_PSIZE:
return "PSIZE";
case D3DDECLUSAGE_TEXCOORD:
return "TEXCOORD";
case D3DDECLUSAGE_TANGENT:
return "TANGENT";
case D3DDECLUSAGE_BINORMAL:
return "BINORMAL";
case D3DDECLUSAGE_TESSFACTOR:
return "TESSFACTOR";
case D3DDECLUSAGE_POSITIONT:
return "POSITIONT";
case D3DDECLUSAGE_COLOR:
return "COLOR";
case D3DDECLUSAGE_FOG:
return "FOG";
case D3DDECLUSAGE_DEPTH:
return "DEPTH";
case D3DDECLUSAGE_SAMPLE:
return "SAMPLE";
default:
assert(false && "Unknown usage");
return "UNKNOWN";
}
}
static RenderFormat ConvertDeclType(uint32_t type)
{
switch (type)
{
case D3DDECLTYPE_FLOAT1:
return RenderFormat::R32_FLOAT;
case D3DDECLTYPE_FLOAT2:
return RenderFormat::R32G32_FLOAT;
case D3DDECLTYPE_FLOAT3:
return RenderFormat::R32G32B32_FLOAT;
case D3DDECLTYPE_FLOAT4:
return RenderFormat::R32G32B32A32_FLOAT;
case D3DDECLTYPE_D3DCOLOR:
return RenderFormat::B8G8R8A8_UNORM;
case D3DDECLTYPE_UBYTE4:
case D3DDECLTYPE_UBYTE4_2:
return RenderFormat::R8G8B8A8_UINT;
case D3DDECLTYPE_SHORT2:
return RenderFormat::R16G16_SINT;
case D3DDECLTYPE_SHORT4:
return RenderFormat::R16G16B16A16_SINT;
case D3DDECLTYPE_UBYTE4N:
case D3DDECLTYPE_UBYTE4N_2:
return RenderFormat::R8G8B8A8_UNORM;
case D3DDECLTYPE_SHORT2N:
return RenderFormat::R16G16_SNORM;
case D3DDECLTYPE_SHORT4N:
return RenderFormat::R16G16B16A16_SNORM;
case D3DDECLTYPE_USHORT2N:
return RenderFormat::R16G16_UNORM;
case D3DDECLTYPE_USHORT4N:
return RenderFormat::R16G16B16A16_UNORM;
case D3DDECLTYPE_UINT1:
return RenderFormat::R32_UINT;
case D3DDECLTYPE_DEC3N_2:
case D3DDECLTYPE_DEC3N_3:
return RenderFormat::R32_UINT;
case D3DDECLTYPE_FLOAT16_2:
return RenderFormat::R16G16_FLOAT;
case D3DDECLTYPE_FLOAT16_4:
return RenderFormat::R16G16B16A16_FLOAT;
default:
assert(false && "Unknown type");
return RenderFormat::UNKNOWN;
}
}
static GuestVertexDeclaration* CreateVertexDeclaration(GuestVertexElement* vertexElements)
{
size_t vertexElementCount = 0;
auto vertexElement = vertexElements;
while (vertexElement->stream != 0xFF && vertexElement->type != D3DDECLTYPE_UNUSED)
{
vertexElement->padding = 0;
++vertexElement;
++vertexElementCount;
}
std::lock_guard lock(g_vertexDeclarationMutex);
auto& vertexDeclaration = g_vertexDeclarations[
XXH3_64bits(vertexElements, vertexElementCount * sizeof(GuestVertexElement))];
if (vertexDeclaration == nullptr)
{
vertexDeclaration = g_userHeap.AllocPhysical<GuestVertexDeclaration>(ResourceType::VertexDeclaration);
static std::vector<RenderInputElement> inputElements;
inputElements.clear();
struct Location
{
uint32_t usage;
uint32_t usageIndex;
uint32_t location;
};
constexpr Location locations[] =
{
{ D3DDECLUSAGE_POSITION, 0, 0 },
{ D3DDECLUSAGE_NORMAL, 0, 1 },
{ D3DDECLUSAGE_TANGENT, 0, 2 },
{ D3DDECLUSAGE_BINORMAL, 0, 3 },
{ D3DDECLUSAGE_TEXCOORD, 0, 4 },
{ D3DDECLUSAGE_TEXCOORD, 1, 5 },
{ D3DDECLUSAGE_TEXCOORD, 2, 6 },
{ D3DDECLUSAGE_TEXCOORD, 3, 7 },
{ D3DDECLUSAGE_COLOR, 0, 8 },
{ D3DDECLUSAGE_BLENDINDICES, 0, 9 },
{ D3DDECLUSAGE_BLENDWEIGHT, 0, 10 },
{ D3DDECLUSAGE_COLOR, 1, 11 },
{ D3DDECLUSAGE_TEXCOORD, 4, 12 },
{ D3DDECLUSAGE_TEXCOORD, 5, 13 },
{ D3DDECLUSAGE_TEXCOORD, 6, 14 },
{ D3DDECLUSAGE_TEXCOORD, 7, 15 },
{ D3DDECLUSAGE_POSITION, 1, 15 }
};
vertexElement = vertexElements;
while (vertexElement->stream != 0xFF && vertexElement->type != D3DDECLTYPE_UNUSED)
{
if (vertexElement->usage == D3DDECLUSAGE_POSITION && vertexElement->usageIndex == 2)
{
++vertexElement;
continue;
}
auto& inputElement = inputElements.emplace_back();
inputElement.semanticName = ConvertDeclUsage(vertexElement->usage);
inputElement.semanticIndex = vertexElement->usageIndex;
inputElement.location = ~0;
for (auto& location : locations)
{
if (location.usage == vertexElement->usage && location.usageIndex == vertexElement->usageIndex)
{
inputElement.location = location.location;
break;
}
}
assert(inputElement.location != ~0);
inputElement.format = ConvertDeclType(vertexElement->type);
inputElement.slotIndex = vertexElement->stream;
inputElement.alignedByteOffset = vertexElement->offset;
switch (vertexElement->usage)
{
case D3DDECLUSAGE_POSITION:
if (vertexElement->usageIndex == 1)
vertexDeclaration->indexVertexStream = vertexElement->stream;
break;
case D3DDECLUSAGE_BLENDWEIGHT:
case D3DDECLUSAGE_BLENDINDICES:
vertexDeclaration->inputLayoutFlags |= INPUT_LAYOUT_FLAG_HAS_BONE_WEIGHTS;
break;
case D3DDECLUSAGE_NORMAL:
case D3DDECLUSAGE_TANGENT:
case D3DDECLUSAGE_BINORMAL:
if (vertexElement->type == D3DDECLTYPE_FLOAT3)
inputElement.format = RenderFormat::R32G32B32_UINT;
else
vertexDeclaration->inputLayoutFlags |= INPUT_LAYOUT_FLAG_HAS_R11G11B10_NORMAL;
break;
case D3DDECLUSAGE_TEXCOORD:
switch (vertexElement->type)
{
case D3DDECLTYPE_SHORT2:
case D3DDECLTYPE_SHORT4:
case D3DDECLTYPE_SHORT2N:
case D3DDECLTYPE_SHORT4N:
case D3DDECLTYPE_USHORT2N:
case D3DDECLTYPE_USHORT4N:
case D3DDECLTYPE_FLOAT16_2:
case D3DDECLTYPE_FLOAT16_4:
vertexDeclaration->swappedTexcoords |= 1 << vertexElement->usageIndex;
break;
}
break;
}
++vertexElement;
}
auto addInputElement = [&](uint32_t usage, uint32_t usageIndex)
{
uint32_t location = ~0;
for (auto& alsoLocation : locations)
{
if (alsoLocation.usage == usage && alsoLocation.usageIndex == usageIndex)
{
location = alsoLocation.location;
break;
}
}
assert(location != ~0);
for (auto& inputElement : inputElements)
{
if (inputElement.location == location)
return;
}
auto format = RenderFormat::R32_FLOAT;
switch (usage)
{
case D3DDECLUSAGE_NORMAL:
case D3DDECLUSAGE_TANGENT:
case D3DDECLUSAGE_BINORMAL:
case D3DDECLUSAGE_BLENDINDICES:
format = RenderFormat::R32_UINT;
break;
}
inputElements.emplace_back(ConvertDeclUsage(usage), usageIndex, location, format, 15, 0);
};
addInputElement(D3DDECLUSAGE_POSITION, 0);
addInputElement(D3DDECLUSAGE_NORMAL, 0);
addInputElement(D3DDECLUSAGE_TANGENT, 0);
addInputElement(D3DDECLUSAGE_BINORMAL, 0);
addInputElement(D3DDECLUSAGE_TEXCOORD, 0);
addInputElement(D3DDECLUSAGE_TEXCOORD, 1);
addInputElement(D3DDECLUSAGE_TEXCOORD, 2);
addInputElement(D3DDECLUSAGE_TEXCOORD, 3);
addInputElement(D3DDECLUSAGE_COLOR, 0);
addInputElement(D3DDECLUSAGE_BLENDWEIGHT, 0);
addInputElement(D3DDECLUSAGE_BLENDINDICES, 0);
vertexDeclaration->inputElements = std::make_unique<RenderInputElement[]>(inputElements.size());
std::copy(inputElements.begin(), inputElements.end(), vertexDeclaration->inputElements.get());
vertexDeclaration->vertexElements = std::make_unique<GuestVertexElement[]>(vertexElementCount + 1);
std::copy(vertexElements, vertexElements + vertexElementCount + 1, vertexDeclaration->vertexElements.get());
vertexDeclaration->inputElementCount = uint32_t(inputElements.size());
vertexDeclaration->vertexElementCount = vertexElementCount + 1;
}
vertexDeclaration->AddRef();
return vertexDeclaration;
}
static void SetVertexDeclaration(GuestDevice* device, GuestVertexDeclaration* vertexDeclaration)
{
RenderCommand cmd;
cmd.type = RenderCommandType::SetVertexDeclaration;
cmd.setVertexDeclaration.vertexDeclaration = vertexDeclaration;
g_renderQueue.enqueue(cmd);
device->vertexDeclaration = g_memory.MapVirtual(vertexDeclaration);
}
static void ProcSetVertexDeclaration(const RenderCommand& cmd)
{
auto& args = cmd.setVertexDeclaration;
if (args.vertexDeclaration != nullptr)
{
SetDirtyValue(g_dirtyStates.sharedConstants, g_sharedConstants.swappedTexcoords, args.vertexDeclaration->swappedTexcoords);
SetDirtyValue(g_dirtyStates.sharedConstants, g_sharedConstants.inputLayoutFlags, args.vertexDeclaration->inputLayoutFlags);
}
SetDirtyValue(g_dirtyStates.pipelineState, g_pipelineState.vertexDeclaration, args.vertexDeclaration);
}
static GuestShader* CreateShader(const be<uint32_t>* function, ResourceType resourceType)
{
XXH64_hash_t hash = XXH3_64bits(function, function[1] + function[2]);
auto end = g_shaderCacheEntries + g_shaderCacheEntryCount;
auto findResult = std::lower_bound(g_shaderCacheEntries, end, hash, [](ShaderCacheEntry& lhs, XXH64_hash_t rhs)
{
return lhs.hash < rhs;
});
GuestShader* shader = nullptr;
if (findResult != end && findResult->hash == hash)
{
if (findResult->userData == nullptr)
{
shader = g_userHeap.AllocPhysical<GuestShader>(resourceType);
if (g_vulkan)
shader->shader = g_device->createShader(g_shaderCache.get() + findResult->spirvOffset, findResult->spirvSize, "main", RenderShaderFormat::SPIRV);
else
shader->shader = g_device->createShader(g_shaderCache.get() + findResult->dxilOffset, findResult->dxilSize, "main", RenderShaderFormat::DXIL);
findResult->userData = shader;
}
else
{
shader = reinterpret_cast<GuestShader*>(findResult->userData);
}
}
if (shader == nullptr)
shader = g_userHeap.AllocPhysical<GuestShader>(resourceType);
else
shader->AddRef();
return shader;
}
static GuestShader* CreateVertexShader(const be<uint32_t>* function)
{
return CreateShader(function, ResourceType::VertexShader);
}
static void SetVertexShader(GuestDevice* device, GuestShader* shader)
{
RenderCommand cmd;
cmd.type = RenderCommandType::SetVertexShader;
cmd.setVertexShader.shader = shader;
g_renderQueue.enqueue(cmd);
}
static void ProcSetVertexShader(const RenderCommand& cmd)
{
SetDirtyValue(g_dirtyStates.pipelineState, g_pipelineState.vertexShader, cmd.setVertexShader.shader);
}
static void SetStreamSource(GuestDevice* device, uint32_t index, GuestBuffer* buffer, uint32_t offset, uint32_t stride)
{
RenderCommand cmd;
cmd.type = RenderCommandType::SetStreamSource;
cmd.setStreamSource.index = index;
cmd.setStreamSource.buffer = buffer;
cmd.setStreamSource.offset = offset;
cmd.setStreamSource.stride = stride;
g_renderQueue.enqueue(cmd);
}
static void ProcSetStreamSource(const RenderCommand& cmd)
{
const auto& args = cmd.setStreamSource;
SetDirtyValue(g_dirtyStates.pipelineState, g_pipelineState.vertexStrides[args.index], uint8_t(args.buffer != nullptr ? args.stride : 0));
bool dirty = false;
SetDirtyValue(dirty, g_vertexBufferViews[args.index].buffer, args.buffer != nullptr ? args.buffer->buffer->at(args.offset) : RenderBufferReference{});
SetDirtyValue(dirty, g_vertexBufferViews[args.index].size, args.buffer != nullptr ? (args.buffer->dataSize - args.offset) : 0u);
SetDirtyValue(dirty, g_inputSlots[args.index].stride, args.buffer != nullptr ? args.stride : 0u);
if (dirty)
{
g_dirtyStates.vertexStreamFirst = std::min<uint8_t>(g_dirtyStates.vertexStreamFirst, args.index);
g_dirtyStates.vertexStreamLast = std::max<uint8_t>(g_dirtyStates.vertexStreamLast, args.index);
}
}
static void SetIndices(GuestDevice* device, GuestBuffer* buffer)
{
RenderCommand cmd;
cmd.type = RenderCommandType::SetIndices;
cmd.setIndices.buffer = buffer;
g_renderQueue.enqueue(cmd);
}
static void ProcSetIndices(const RenderCommand& cmd)
{
const auto& args = cmd.setIndices;
SetDirtyValue(g_dirtyStates.indices, g_indexBufferView.buffer, args.buffer != nullptr ? args.buffer->buffer->at(0) : RenderBufferReference{});
SetDirtyValue(g_dirtyStates.indices, g_indexBufferView.format, args.buffer != nullptr ? args.buffer->format : RenderFormat::R16_UINT);
SetDirtyValue(g_dirtyStates.indices, g_indexBufferView.size, args.buffer != nullptr ? args.buffer->dataSize : 0u);
}
static GuestShader* CreatePixelShader(const be<uint32_t>* function)
{
return CreateShader(function, ResourceType::PixelShader);
}
static void SetPixelShader(GuestDevice* device, GuestShader* shader)
{
RenderCommand cmd;
cmd.type = RenderCommandType::SetPixelShader;
cmd.setPixelShader.shader = shader;
g_renderQueue.enqueue(cmd);
}
static void ProcSetPixelShader(const RenderCommand& cmd)
{
SetDirtyValue(g_dirtyStates.pipelineState, g_pipelineState.pixelShader, cmd.setPixelShader.shader);
}
static std::thread g_renderThread([]
{
RenderCommand commands[32];
while (true)
{
size_t count = g_renderQueue.wait_dequeue_bulk(commands, std::size(commands));
for (size_t i = 0; i < count; i++)
{
auto& cmd = commands[i];
switch (cmd.type)
{
case RenderCommandType::SetRenderState: ProcSetRenderState(cmd); break;
case RenderCommandType::DestructResource: ProcDestructResource(cmd); break;
case RenderCommandType::UnlockTextureRect: ProcUnlockTextureRect(cmd); break;
case RenderCommandType::UnlockBuffer16: ProcUnlockBuffer16(cmd); break;
case RenderCommandType::UnlockBuffer32: ProcUnlockBuffer32(cmd); break;
case RenderCommandType::DrawImGui: ProcDrawImGui(cmd); break;
case RenderCommandType::Present: ProcPresent(cmd); break;
case RenderCommandType::StretchRect: ProcStretchRect(cmd); break;
case RenderCommandType::SetRenderTarget: ProcSetRenderTarget(cmd); break;
case RenderCommandType::SetDepthStencilSurface: ProcSetDepthStencilSurface(cmd); break;
case RenderCommandType::Clear: ProcClear(cmd); break;
case RenderCommandType::SetViewport: ProcSetViewport(cmd); break;
case RenderCommandType::SetTexture: ProcSetTexture(cmd); break;
case RenderCommandType::SetScissorRect: ProcSetScissorRect(cmd); break;
case RenderCommandType::SetSamplerState: ProcSetSamplerState(cmd); break;
case RenderCommandType::SetBooleans: ProcSetBooleans(cmd); break;
case RenderCommandType::SetVertexShaderConstants: ProcSetVertexShaderConstants(cmd); break;
case RenderCommandType::SetPixelShaderConstants: ProcSetPixelShaderConstants(cmd); break;
case RenderCommandType::DrawPrimitive: ProcDrawPrimitive(cmd); break;
case RenderCommandType::DrawIndexedPrimitive: ProcDrawIndexedPrimitive(cmd); break;
case RenderCommandType::DrawPrimitiveUP: ProcDrawPrimitiveUP(cmd); break;
case RenderCommandType::SetVertexDeclaration: ProcSetVertexDeclaration(cmd); break;
case RenderCommandType::SetVertexShader: ProcSetVertexShader(cmd); break;
case RenderCommandType::SetStreamSource: ProcSetStreamSource(cmd); break;
case RenderCommandType::SetIndices: ProcSetIndices(cmd); break;
case RenderCommandType::SetPixelShader: ProcSetPixelShader(cmd); break;
default: assert(false && "Unrecognized render command type."); break;
}
}
}
});
static void D3DXFillTexture(GuestTexture* texture, uint32_t function, void* data)
{
if (texture->width == 1 && texture->height == 1 && texture->format == RenderFormat::R8_UNORM && function == 0x82BA2150)
{
auto uploadBuffer = g_device->createBuffer(RenderBufferDesc::UploadBuffer(PLACEMENT_ALIGNMENT));
uint8_t* mappedData = reinterpret_cast<uint8_t*>(uploadBuffer->map());
*mappedData = 0xFF;
uploadBuffer->unmap();
ExecuteCopyCommandList([&]
{
g_copyCommandList->barriers(RenderBarrierStage::COPY, RenderTextureBarrier(texture->texture, RenderTextureLayout::COPY_DEST));
g_copyCommandList->copyTextureRegion(
RenderTextureCopyLocation::Subresource(texture->texture, 0),
RenderTextureCopyLocation::PlacedFootprint(uploadBuffer.get(), texture->format, 1, 1, 1, PLACEMENT_ALIGNMENT, 0));
});
texture->layout = RenderTextureLayout::COPY_DEST;
}
}
static void D3DXFillVolumeTexture(GuestTexture* texture, uint32_t function, void* data)
{
uint32_t rowPitch0 = (texture->width * 4 + PITCH_ALIGNMENT - 1) & ~(PITCH_ALIGNMENT - 1);
uint32_t slicePitch0 = (rowPitch0 * texture->height * texture->depth + PLACEMENT_ALIGNMENT - 1) & ~(PLACEMENT_ALIGNMENT - 1);
uint32_t rowPitch1 = ((texture->width / 2) * 4 + PITCH_ALIGNMENT - 1) & ~(PITCH_ALIGNMENT - 1);
uint32_t slicePitch1 = (rowPitch1 * (texture->height / 2) * (texture->depth / 2) + PLACEMENT_ALIGNMENT - 1) & ~(PLACEMENT_ALIGNMENT - 1);
auto uploadBuffer = g_device->createBuffer(RenderBufferDesc::UploadBuffer(slicePitch0 + slicePitch1));
uint8_t* mappedData = reinterpret_cast<uint8_t*>(uploadBuffer->map());
thread_local std::vector<float> mipData;
mipData.resize((texture->width / 2) * (texture->height / 2) * (texture->depth / 2) * 4);
memset(mipData.data(), 0, mipData.size() * sizeof(float));
for (size_t z = 0; z < texture->depth; z++)
{
for (size_t y = 0; y < texture->height; y++)
{
for (size_t x = 0; x < texture->width; x++)
{
auto dest = mappedData + z * rowPitch0 * texture->height + y * rowPitch0 + x * sizeof(uint32_t);
size_t index = z * texture->width * texture->height + y * texture->width + x;
size_t mipIndex = ((z / 2) * (texture->width / 2) * (texture->height / 2) + (y / 2) * (texture->width / 2) + x / 2) * 4;
if (function == 0x82BC7820)
{
auto src = reinterpret_cast<be<float>*>(data) + index * 4;
float r = static_cast<uint8_t>(src[0] * 255.0f);
float g = static_cast<uint8_t>(src[1] * 255.0f);
float b = static_cast<uint8_t>(src[2] * 255.0f);
float a = static_cast<uint8_t>(src[3] * 255.0f);
dest[0] = r;
dest[1] = g;
dest[2] = b;
dest[3] = a;
mipData[mipIndex + 0] += r;
mipData[mipIndex + 1] += g;
mipData[mipIndex + 2] += b;
mipData[mipIndex + 3] += a;
}
else if (function == 0x82BC78A8)
{
auto src = reinterpret_cast<uint8_t*>(data) + index * 4;
dest[0] = src[3];
dest[1] = src[2];
dest[2] = src[1];
dest[3] = src[0];
mipData[mipIndex + 0] += src[3];
mipData[mipIndex + 1] += src[2];
mipData[mipIndex + 2] += src[1];
mipData[mipIndex + 3] += src[0];
}
}
}
}
for (size_t z = 0; z < texture->depth / 2; z++)
{
for (size_t y = 0; y < texture->height / 2; y++)
{
for (size_t x = 0; x < texture->width / 2; x++)
{
auto dest = mappedData + slicePitch0 + z * rowPitch1 * (texture->height / 2) + y * rowPitch1 + x * sizeof(uint32_t);
size_t index = (z * (texture->width / 2) * (texture->height / 2) + y * (texture->width / 2) + x) * 4;
dest[0] = static_cast<uint8_t>(mipData[index + 0] / 8.0f);
dest[1] = static_cast<uint8_t>(mipData[index + 1] / 8.0f);
dest[2] = static_cast<uint8_t>(mipData[index + 2] / 8.0f);
dest[3] = static_cast<uint8_t>(mipData[index + 3] / 8.0f);
}
}
}
uploadBuffer->unmap();
ExecuteCopyCommandList([&]
{
g_copyCommandList->barriers(RenderBarrierStage::COPY, RenderTextureBarrier(texture->texture, RenderTextureLayout::COPY_DEST));
g_copyCommandList->copyTextureRegion(
RenderTextureCopyLocation::Subresource(texture->texture, 0),
RenderTextureCopyLocation::PlacedFootprint(uploadBuffer.get(), texture->format, texture->width, texture->height, texture->depth, rowPitch0 / RenderFormatSize(texture->format), 0));
g_copyCommandList->copyTextureRegion(
RenderTextureCopyLocation::Subresource(texture->texture, 1),
RenderTextureCopyLocation::PlacedFootprint(uploadBuffer.get(), texture->format, texture->width / 2, texture->height / 2, texture->depth / 2, rowPitch1 / RenderFormatSize(texture->format), slicePitch0));
});
texture->layout = RenderTextureLayout::COPY_DEST;
}
struct GuestPictureData
{
be<uint32_t> vtable;
uint8_t flags;
be<uint32_t> name;
be<uint32_t> texture;
be<uint32_t> type;
};
static RenderTextureDimension ConvertTextureDimension(ddspp::TextureType type)
{
switch (type)
{
case ddspp::Texture1D:
return RenderTextureDimension::TEXTURE_1D;
case ddspp::Texture2D:
case ddspp::Cubemap:
return RenderTextureDimension::TEXTURE_2D;
case ddspp::Texture3D:
return RenderTextureDimension::TEXTURE_3D;
default:
assert(false && "Unknown texture type from DDS.");
return RenderTextureDimension::UNKNOWN;
}
}
static RenderTextureViewDimension ConvertTextureViewDimension(ddspp::TextureType type)
{
switch (type)
{
case ddspp::Texture1D:
return RenderTextureViewDimension::TEXTURE_1D;
case ddspp::Texture2D:
return RenderTextureViewDimension::TEXTURE_2D;
case ddspp::Texture3D:
return RenderTextureViewDimension::TEXTURE_3D;
case ddspp::Cubemap:
return RenderTextureViewDimension::TEXTURE_CUBE;
default:
assert(false && "Unknown texture type from DDS.");
return RenderTextureViewDimension::UNKNOWN;
}
}
static RenderFormat ConvertDXGIFormat(ddspp::DXGIFormat format)
{
switch (format)
{
case ddspp::R32G32B32A32_TYPELESS:
return RenderFormat::R32G32B32A32_TYPELESS;
case ddspp::R32G32B32A32_FLOAT:
return RenderFormat::R32G32B32A32_FLOAT;
case ddspp::R32G32B32A32_UINT:
return RenderFormat::R32G32B32A32_UINT;
case ddspp::R32G32B32A32_SINT:
return RenderFormat::R32G32B32A32_SINT;
case ddspp::R32G32B32_TYPELESS:
return RenderFormat::R32G32B32_TYPELESS;
case ddspp::R32G32B32_FLOAT:
return RenderFormat::R32G32B32_FLOAT;
case ddspp::R32G32B32_UINT:
return RenderFormat::R32G32B32_UINT;
case ddspp::R32G32B32_SINT:
return RenderFormat::R32G32B32_SINT;
case ddspp::R16G16B16A16_TYPELESS:
return RenderFormat::R16G16B16A16_TYPELESS;
case ddspp::R16G16B16A16_FLOAT:
return RenderFormat::R16G16B16A16_FLOAT;
case ddspp::R16G16B16A16_UNORM:
return RenderFormat::R16G16B16A16_UNORM;
case ddspp::R16G16B16A16_UINT:
return RenderFormat::R16G16B16A16_UINT;
case ddspp::R16G16B16A16_SNORM:
return RenderFormat::R16G16B16A16_SNORM;
case ddspp::R16G16B16A16_SINT:
return RenderFormat::R16G16B16A16_SINT;
case ddspp::R32G32_TYPELESS:
return RenderFormat::R32G32_TYPELESS;
case ddspp::R32G32_FLOAT:
return RenderFormat::R32G32_FLOAT;
case ddspp::R32G32_UINT:
return RenderFormat::R32G32_UINT;
case ddspp::R32G32_SINT:
return RenderFormat::R32G32_SINT;
case ddspp::R8G8B8A8_TYPELESS:
return RenderFormat::R8G8B8A8_TYPELESS;
case ddspp::R8G8B8A8_UNORM:
return RenderFormat::R8G8B8A8_UNORM;
case ddspp::R8G8B8A8_UINT:
return RenderFormat::R8G8B8A8_UINT;
case ddspp::R8G8B8A8_SNORM:
return RenderFormat::R8G8B8A8_SNORM;
case ddspp::R8G8B8A8_SINT:
return RenderFormat::R8G8B8A8_SINT;
case ddspp::B8G8R8A8_UNORM:
return RenderFormat::B8G8R8A8_UNORM;
case ddspp::B8G8R8X8_UNORM:
return RenderFormat::B8G8R8A8_UNORM;
case ddspp::R16G16_TYPELESS:
return RenderFormat::R16G16_TYPELESS;
case ddspp::R16G16_FLOAT:
return RenderFormat::R16G16_FLOAT;
case ddspp::R16G16_UNORM:
return RenderFormat::R16G16_UNORM;
case ddspp::R16G16_UINT:
return RenderFormat::R16G16_UINT;
case ddspp::R16G16_SNORM:
return RenderFormat::R16G16_SNORM;
case ddspp::R16G16_SINT:
return RenderFormat::R16G16_SINT;
case ddspp::R32_TYPELESS:
return RenderFormat::R32_TYPELESS;
case ddspp::D32_FLOAT:
return RenderFormat::D32_FLOAT;
case ddspp::R32_FLOAT:
return RenderFormat::R32_FLOAT;
case ddspp::R32_UINT:
return RenderFormat::R32_UINT;
case ddspp::R32_SINT:
return RenderFormat::R32_SINT;
case ddspp::R8G8_TYPELESS:
return RenderFormat::R8G8_TYPELESS;
case ddspp::R8G8_UNORM:
return RenderFormat::R8G8_UNORM;
case ddspp::R8G8_UINT:
return RenderFormat::R8G8_UINT;
case ddspp::R8G8_SNORM:
return RenderFormat::R8G8_SNORM;
case ddspp::R8G8_SINT:
return RenderFormat::R8G8_SINT;
case ddspp::R16_TYPELESS:
return RenderFormat::R16_TYPELESS;
case ddspp::R16_FLOAT:
return RenderFormat::R16_FLOAT;
case ddspp::D16_UNORM:
return RenderFormat::D16_UNORM;
case ddspp::R16_UNORM:
return RenderFormat::R16_UNORM;
case ddspp::R16_UINT:
return RenderFormat::R16_UINT;
case ddspp::R16_SNORM:
return RenderFormat::R16_SNORM;
case ddspp::R16_SINT:
return RenderFormat::R16_SINT;
case ddspp::R8_TYPELESS:
return RenderFormat::R8_TYPELESS;
case ddspp::R8_UNORM:
return RenderFormat::R8_UNORM;
case ddspp::R8_UINT:
return RenderFormat::R8_UINT;
case ddspp::R8_SNORM:
return RenderFormat::R8_SNORM;
case ddspp::R8_SINT:
return RenderFormat::R8_SINT;
case ddspp::BC1_TYPELESS:
return RenderFormat::BC1_TYPELESS;
case ddspp::BC1_UNORM:
return RenderFormat::BC1_UNORM;
case ddspp::BC1_UNORM_SRGB:
return RenderFormat::BC1_UNORM_SRGB;
case ddspp::BC2_TYPELESS:
return RenderFormat::BC2_TYPELESS;
case ddspp::BC2_UNORM:
return RenderFormat::BC2_UNORM;
case ddspp::BC2_UNORM_SRGB:
return RenderFormat::BC2_UNORM_SRGB;
case ddspp::BC3_TYPELESS:
return RenderFormat::BC3_TYPELESS;
case ddspp::BC3_UNORM:
return RenderFormat::BC3_UNORM;
case ddspp::BC3_UNORM_SRGB:
return RenderFormat::BC3_UNORM_SRGB;
case ddspp::BC4_TYPELESS:
return RenderFormat::BC4_TYPELESS;
case ddspp::BC4_UNORM:
return RenderFormat::BC4_UNORM;
case ddspp::BC4_SNORM:
return RenderFormat::BC4_SNORM;
case ddspp::BC5_TYPELESS:
return RenderFormat::BC5_TYPELESS;
case ddspp::BC5_UNORM:
return RenderFormat::BC5_UNORM;
case ddspp::BC5_SNORM:
return RenderFormat::BC5_SNORM;
case ddspp::BC6H_TYPELESS:
return RenderFormat::BC6H_TYPELESS;
case ddspp::BC6H_UF16:
return RenderFormat::BC6H_UF16;
case ddspp::BC6H_SF16:
return RenderFormat::BC6H_SF16;
case ddspp::BC7_TYPELESS:
return RenderFormat::BC7_TYPELESS;
case ddspp::BC7_UNORM:
return RenderFormat::BC7_UNORM;
case ddspp::BC7_UNORM_SRGB:
return RenderFormat::BC7_UNORM_SRGB;
default:
assert(false && "Unsupported format from DDS.");
return RenderFormat::UNKNOWN;
}
}
static void MakePictureData(GuestPictureData* pictureData, uint8_t* data, uint32_t dataSize)
{
if ((pictureData->flags & 0x1) == 0)
{
ddspp::Descriptor ddsDesc;
if (ddspp::decode_header(data, ddsDesc) != ddspp::Error)
{
const auto texture = g_userHeap.AllocPhysical<GuestTexture>(ResourceType::Texture);
RenderTextureDesc desc;
desc.dimension = ConvertTextureDimension(ddsDesc.type);
desc.width = ddsDesc.width;
desc.height = ddsDesc.height;
desc.depth = ddsDesc.depth;
desc.mipLevels = ddsDesc.numMips;
desc.arraySize = ddsDesc.type == ddspp::TextureType::Cubemap ? ddsDesc.arraySize * 6 : ddsDesc.arraySize;
desc.format = ConvertDXGIFormat(ddsDesc.format);
desc.flags = ddsDesc.type == ddspp::TextureType::Cubemap ? RenderTextureFlag::CUBE : RenderTextureFlag::NONE;
texture->textureHolder = g_device->createTexture(desc);
texture->texture = texture->textureHolder.get();
texture->layout = RenderTextureLayout::COPY_DEST;
#ifdef _DEBUG
texture->texture->setName(reinterpret_cast<char*>(g_memory.Translate(pictureData->name + 2)));
#endif
RenderTextureViewDesc viewDesc;
viewDesc.format = desc.format;
viewDesc.dimension = ConvertTextureViewDimension(ddsDesc.type);
viewDesc.mipLevels = ddsDesc.numMips;
texture->textureView = texture->texture->createTextureView(viewDesc);
texture->descriptorIndex = g_textureDescriptorAllocator.allocate();
g_textureDescriptorSet->setTexture(texture->descriptorIndex, texture->texture, RenderTextureLayout::SHADER_READ, texture->textureView.get());
texture->viewDimension = viewDesc.dimension;
struct Slice
{
uint32_t width;
uint32_t height;
uint32_t depth;
uint32_t srcOffset;
uint32_t dstOffset;
uint32_t srcRowPitch;
uint32_t dstRowPitch;
uint32_t rowCount;
};
std::vector<Slice> slices;
uint32_t curSrcOffset = 0;
uint32_t curDstOffset = 0;
for (uint32_t arraySlice = 0; arraySlice < desc.arraySize; arraySlice++)
{
for (uint32_t mipSlice = 0; mipSlice < ddsDesc.numMips; mipSlice++)
{
auto& slice = slices.emplace_back();
slice.width = std::max(1u, ddsDesc.width >> mipSlice);
slice.height = std::max(1u, ddsDesc.height >> mipSlice);
slice.depth = std::max(1u, ddsDesc.depth >> mipSlice);
slice.srcOffset = curSrcOffset;
slice.dstOffset = curDstOffset;
uint32_t rowPitch = ((slice.width + ddsDesc.blockWidth - 1) / ddsDesc.blockWidth) * ddsDesc.bitsPerPixelOrBlock;
slice.srcRowPitch = (rowPitch + 7) / 8;
slice.dstRowPitch = (slice.srcRowPitch + PITCH_ALIGNMENT - 1) & ~(PITCH_ALIGNMENT - 1);
slice.rowCount = (slice.height + ddsDesc.blockHeight - 1) / ddsDesc.blockHeight;
curSrcOffset += slice.srcRowPitch * slice.rowCount * slice.depth;
curDstOffset += (slice.dstRowPitch * slice.rowCount * slice.depth + PLACEMENT_ALIGNMENT - 1) & ~(PLACEMENT_ALIGNMENT - 1);
}
}
auto uploadBuffer = g_device->createBuffer(RenderBufferDesc::UploadBuffer(curDstOffset));
uint8_t* mappedMemory = reinterpret_cast<uint8_t*>(uploadBuffer->map());
for (auto& slice : slices)
{
uint8_t* srcData = data + ddsDesc.headerSize + slice.srcOffset;
uint8_t* dstData = mappedMemory + slice.dstOffset;
if (slice.srcRowPitch == slice.dstRowPitch)
{
memcpy(dstData, srcData, slice.srcRowPitch * slice.rowCount * slice.depth);
}
else
{
for (size_t i = 0; i < slice.rowCount * slice.depth; i++)
{
memcpy(dstData, srcData, slice.srcRowPitch);
srcData += slice.srcRowPitch;
dstData += slice.dstRowPitch;
}
}
}
uploadBuffer->unmap();
ExecuteCopyCommandList([&]
{
g_copyCommandList->barriers(RenderBarrierStage::COPY, RenderTextureBarrier(texture->texture, RenderTextureLayout::COPY_DEST));
for (size_t i = 0; i < slices.size(); i++)
{
auto& slice = slices[i];
g_copyCommandList->copyTextureRegion(
RenderTextureCopyLocation::Subresource(texture->texture, i),
RenderTextureCopyLocation::PlacedFootprint(uploadBuffer.get(), desc.format, slice.width, slice.height, slice.depth, (slice.dstRowPitch * 8) / ddsDesc.bitsPerPixelOrBlock * ddsDesc.blockWidth, slice.dstOffset));
}
});
pictureData->texture = g_memory.MapVirtual(texture);
pictureData->type = 0;
}
else
{
int width, height;
void* stbImage = stbi_load_from_memory(data, dataSize, &width, &height, nullptr, 4);
if (stbImage != nullptr)
{
const auto texture = g_userHeap.AllocPhysical<GuestTexture>(ResourceType::Texture);
texture->textureHolder = g_device->createTexture(RenderTextureDesc::Texture2D(width, height, 1, RenderFormat::R8G8B8A8_UNORM));
texture->texture = texture->textureHolder.get();
texture->viewDimension = RenderTextureViewDimension::TEXTURE_2D;
texture->layout = RenderTextureLayout::COPY_DEST;
texture->descriptorIndex = g_textureDescriptorAllocator.allocate();
g_textureDescriptorSet->setTexture(texture->descriptorIndex, texture->texture, RenderTextureLayout::SHADER_READ);
uint32_t rowPitch = (width * 4 + PITCH_ALIGNMENT - 1) & ~(PITCH_ALIGNMENT - 1);
uint32_t slicePitch = rowPitch * height;
auto uploadBuffer = g_device->createBuffer(RenderBufferDesc::UploadBuffer(slicePitch));
uint8_t* mappedMemory = reinterpret_cast<uint8_t*>(uploadBuffer->map());
if (rowPitch == (width * 4))
{
memcpy(mappedMemory, stbImage, slicePitch);
}
else
{
auto data = reinterpret_cast<const uint8_t*>(stbImage);
for (size_t i = 0; i < height; i++)
{
memcpy(mappedMemory, data, width * 4);
data += width * 4;
mappedMemory += rowPitch;
}
}
uploadBuffer->unmap();
stbi_image_free(stbImage);
ExecuteCopyCommandList([&]
{
g_copyCommandList->barriers(RenderBarrierStage::COPY, RenderTextureBarrier(texture->texture, RenderTextureLayout::COPY_DEST));
g_copyCommandList->copyTextureRegion(
RenderTextureCopyLocation::Subresource(texture->texture, 0),
RenderTextureCopyLocation::PlacedFootprint(uploadBuffer.get(), RenderFormat::R8G8B8A8_UNORM, width, height, 1, rowPitch / 4, 0));
});
pictureData->texture = g_memory.MapVirtual(texture);
pictureData->type = 0;
}
}
}
}
void IndexBufferLengthMidAsmHook(PPCRegister& r3)
{
r3.u64 *= 2;
}
void SetShadowResolutionMidAsmHook(PPCRegister& r11)
{
auto res = (int32_t)Config::ShadowResolution.Value;
if (res > 0)
r11.u64 = res;
}
static void SetResolution(be<uint32_t>* device)
{
uint32_t width = uint32_t(g_swapChain->getWidth() * Config::ResolutionScale);
uint32_t height = uint32_t(g_swapChain->getHeight() * Config::ResolutionScale);
device[46] = width == 0 ? 880 : width;
device[47] = height == 0 ? 720 : height;
}
// The game does some weird stuff to render targets if they are above
// 1024x1024 resolution, setting this bool at address 20 seems to avoid all that.
PPC_FUNC(sub_82E9F048)
{
PPC_STORE_U8(ctx.r4.u32 + 20, 1);
PPC_STORE_U32(ctx.r4.u32 + 44, PPC_LOAD_U32(ctx.r4.u32 + 8)); // Width
PPC_STORE_U32(ctx.r4.u32 + 48, PPC_LOAD_U32(ctx.r4.u32 + 12)); // Height
}
static GuestShader* g_movieVertexShader;
static GuestShader* g_moviePixelShader;
static GuestVertexDeclaration* g_movieVertexDeclaration;
static void ScreenShaderInit(be<uint32_t>* a1, uint32_t a2, uint32_t a3, GuestVertexElement* vertexElements)
{
if (g_moviePixelShader == nullptr)
{
g_moviePixelShader = g_userHeap.AllocPhysical<GuestShader>(ResourceType::PixelShader);
g_moviePixelShader->shader = CREATE_SHADER(movie_ps);
}
if (g_movieVertexShader == nullptr)
{
g_movieVertexShader = g_userHeap.AllocPhysical<GuestShader>(ResourceType::VertexShader);
g_movieVertexShader->shader = CREATE_SHADER(movie_vs);
}
if (g_movieVertexDeclaration == nullptr)
g_movieVertexDeclaration = CreateVertexDeclaration(vertexElements);
g_moviePixelShader->AddRef();
g_movieVertexShader->AddRef();
g_movieVertexDeclaration->AddRef();
a1[2] = g_memory.MapVirtual(g_moviePixelShader);
a1[3] = g_memory.MapVirtual(g_movieVertexShader);
a1[4] = g_memory.MapVirtual(g_movieVertexDeclaration);
}
void MovieRendererMidAsmHook(PPCRegister& r3)
{
auto device = reinterpret_cast<GuestDevice*>(g_memory.Translate(r3.u32));
// Force linear filtering & clamp addressing
for (size_t i = 0; i < 3; i++)
{
device->samplerStates[i].data[0] = (device->samplerStates[i].data[0].get() & ~0x7fc00) | 0x24800;
device->samplerStates[i].data[3] = (device->samplerStates[i].data[3].get() & ~0x1f80000) | 0x1280000;
}
device->dirtyFlags[3] = device->dirtyFlags[3].get() | 0xe0000000ull;
}
static PPCRegister g_r4;
static PPCRegister g_r5;
// CRenderDirectorFxPipeline::Initialize
PPC_FUNC_IMPL(__imp__sub_8258C8A0);
PPC_FUNC(sub_8258C8A0)
{
g_r4 = ctx.r4;
g_r5 = ctx.r5;
__imp__sub_8258C8A0(ctx, base);
}
// CRenderDirectorFxPipeline::Update
PPC_FUNC_IMPL(__imp__sub_8258CAE0);
PPC_FUNC(sub_8258CAE0)
{
if (g_needsResize)
{
auto r3 = ctx.r3;
ctx.r4 = g_r4;
ctx.r5 = g_r5;
__imp__sub_8258C8A0(ctx, base);
ctx.r3 = r3;
g_needsResize = false;
}
__imp__sub_8258CAE0(ctx, base);
}
void PostProcessResolutionFix(PPCRegister& r4, PPCRegister& f1, PPCRegister& f2)
{
auto device = reinterpret_cast<be<uint32_t>*>(g_memory.Translate(r4.u32));
uint32_t width = device[46].get();
uint32_t height = device[47].get();
#if 0
// TODO: Figure out why this breaks for height > weight
double factor;
if (width > height)
factor = 720.0 / double(height);
else
factor = 1280.0 / double(width);
#else
double factor = 720.0 / double(height);
#endif
f1.f64 *= factor;
f2.f64 *= factor;
}
void LightShaftAspectRatioFix(PPCRegister& f28, PPCRegister& f0)
{
f28.f64 = f0.f64;
}
static const be<uint16_t> g_particleTestIndexBuffer[] =
{
0, 1, 2,
0, 2, 3,
0, 3, 4,
0, 4, 5
};
bool ParticleTestIndexBufferMidAsmHook(PPCRegister& r30)
{
if (!g_triangleFanSupported)
{
auto buffer = CreateIndexBuffer(sizeof(g_particleTestIndexBuffer), 0, D3DFMT_INDEX16);
void* memory = LockIndexBuffer(buffer, 0, 0, 0);
memcpy(memory, g_particleTestIndexBuffer, sizeof(g_particleTestIndexBuffer));
UnlockIndexBuffer(buffer);
r30.u32 = g_memory.MapVirtual(buffer);
return true;
}
return false;
}
void ParticleTestDrawIndexedPrimitiveMidAsmHook(PPCRegister& r7)
{
if (!g_triangleFanSupported)
r7.u64 = std::size(g_particleTestIndexBuffer);
}
GUEST_FUNCTION_HOOK(sub_82BD99B0, CreateDevice);
GUEST_FUNCTION_HOOK(sub_82BE6230, DestructResource);
GUEST_FUNCTION_HOOK(sub_82BE9300, LockTextureRect);
GUEST_FUNCTION_HOOK(sub_82BE7780, UnlockTextureRect);
GUEST_FUNCTION_HOOK(sub_82BE6B98, LockVertexBuffer);
GUEST_FUNCTION_HOOK(sub_82BE6BE8, UnlockVertexBuffer);
GUEST_FUNCTION_HOOK(sub_82BE61D0, GetVertexBufferDesc);
GUEST_FUNCTION_HOOK(sub_82BE6CA8, LockIndexBuffer);
GUEST_FUNCTION_HOOK(sub_82BE6CF0, UnlockIndexBuffer);
GUEST_FUNCTION_HOOK(sub_82BE6200, GetIndexBufferDesc);
GUEST_FUNCTION_HOOK(sub_82BE96F0, GetSurfaceDesc);
GUEST_FUNCTION_HOOK(sub_82BE04B0, GetVertexDeclaration);
GUEST_FUNCTION_HOOK(sub_82BE0530, HashVertexDeclaration);
GUEST_FUNCTION_HOOK(sub_82BDA8C0, Present);
GUEST_FUNCTION_HOOK(sub_82BDD330, GetBackBuffer);
GUEST_FUNCTION_HOOK(sub_82BE9498, CreateTexture);
GUEST_FUNCTION_HOOK(sub_82BE6AD0, CreateVertexBuffer);
GUEST_FUNCTION_HOOK(sub_82BE6BF8, CreateIndexBuffer);
GUEST_FUNCTION_HOOK(sub_82BE95B8, CreateSurface);
GUEST_FUNCTION_HOOK(sub_82BF6400, StretchRect);
GUEST_FUNCTION_HOOK(sub_82BDD9F0, SetRenderTarget);
GUEST_FUNCTION_HOOK(sub_82BDDD38, SetDepthStencilSurface);
GUEST_FUNCTION_HOOK(sub_82BFE4C8, Clear);
GUEST_FUNCTION_HOOK(sub_82BDD8C0, SetViewport);
GUEST_FUNCTION_HOOK(sub_82BE9818, SetTexture);
GUEST_FUNCTION_HOOK(sub_82BDCFB0, SetScissorRect);
GUEST_FUNCTION_HOOK(sub_82BE5900, DrawPrimitive);
GUEST_FUNCTION_HOOK(sub_82BE5CF0, DrawIndexedPrimitive);
GUEST_FUNCTION_HOOK(sub_82BE52F8, DrawPrimitiveUP);
GUEST_FUNCTION_HOOK(sub_82BE0428, CreateVertexDeclaration);
GUEST_FUNCTION_HOOK(sub_82BE02E0, SetVertexDeclaration);
GUEST_FUNCTION_HOOK(sub_82BE1A80, CreateVertexShader);
GUEST_FUNCTION_HOOK(sub_82BE0110, SetVertexShader);
GUEST_FUNCTION_HOOK(sub_82BDD0F8, SetStreamSource);
GUEST_FUNCTION_HOOK(sub_82BDD218, SetIndices);
GUEST_FUNCTION_HOOK(sub_82BE1990, CreatePixelShader);
GUEST_FUNCTION_HOOK(sub_82BDFE58, SetPixelShader);
GUEST_FUNCTION_HOOK(sub_82C003B8, D3DXFillTexture);
GUEST_FUNCTION_HOOK(sub_82C00910, D3DXFillVolumeTexture);
GUEST_FUNCTION_HOOK(sub_82E43FC8, MakePictureData);
GUEST_FUNCTION_HOOK(sub_82E9EE38, SetResolution);
GUEST_FUNCTION_HOOK(sub_82AE2BF8, ScreenShaderInit);
GUEST_FUNCTION_STUB(sub_822C15D8);
GUEST_FUNCTION_STUB(sub_822C1810);
GUEST_FUNCTION_STUB(sub_82BD97A8);
GUEST_FUNCTION_STUB(sub_82BD97E8);
GUEST_FUNCTION_STUB(sub_82BDD370); // SetGammaRamp
GUEST_FUNCTION_STUB(sub_82BE05B8);
GUEST_FUNCTION_STUB(sub_82BE9C98);
GUEST_FUNCTION_STUB(sub_82BEA308);
GUEST_FUNCTION_STUB(sub_82CD5D68);
GUEST_FUNCTION_STUB(sub_82BE9B28);
GUEST_FUNCTION_STUB(sub_82BEA018);
GUEST_FUNCTION_STUB(sub_82BEA7C0);
GUEST_FUNCTION_STUB(sub_82BFFF88); // D3DXFilterTexture
GUEST_FUNCTION_STUB(sub_82BD96D0);