mirror of
https://github.com/coop-deluxe/sm64coopdx.git
synced 2026-04-26 20:11:42 +00:00
Some optimizations
This commit is contained in:
parent
2e8f2ef360
commit
77a018a8c0
1 changed files with 37 additions and 18 deletions
|
|
@ -7,6 +7,9 @@ extern "C" {
|
|||
#undef max
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
#ifdef __SSE__
|
||||
#include <xmmintrin.h>
|
||||
#endif
|
||||
|
||||
#define C_FIELD extern "C"
|
||||
|
||||
|
|
@ -31,6 +34,14 @@ static enum LEMode sMode = LE_MODE_AFFECT_ALL_SHADED_AND_COLORED;
|
|||
static enum LEToneMapping sToneMapping = LE_TONE_MAPPING_WEIGHTED;
|
||||
static bool sEnabled = false;
|
||||
|
||||
static inline f32 rsqrt(f32 value) {
|
||||
#ifdef __SSE__
|
||||
return _mm_cvtss_f32(_mm_rsqrt_ss(_mm_set_ss(value)));
|
||||
#else
|
||||
return 1.0f / sqrtf(value);
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void color_set(Color color, u8 r, u8 g, u8 b) {
|
||||
color[0] = r;
|
||||
color[1] = g;
|
||||
|
|
@ -43,6 +54,14 @@ static inline void color_copy(Color dest, Color src) {
|
|||
dest[2] = src[2];
|
||||
}
|
||||
|
||||
|
||||
static inline u8 to_u8(float value) {
|
||||
int v = (int)value;
|
||||
v = v < 0 ? 0 : v;
|
||||
v = v > 255 ? 255 : v;
|
||||
return (u8)v;
|
||||
}
|
||||
|
||||
C_FIELD bool le_is_enabled(void) {
|
||||
// this is needed because we don't want to make vanilla darker,
|
||||
// and we don't want to set the ambient color to { 255, 255, 255 }
|
||||
|
|
@ -72,21 +91,21 @@ C_FIELD void le_set_ambient_color(u8 r, u8 g, u8 b) {
|
|||
}
|
||||
|
||||
static inline void le_tone_map_total_weighted(Color out, Color inAmbient, Vec3f inColor, float weight) {
|
||||
out[0] = std::clamp((s32)((inAmbient[0] + inColor[0]) / weight), 0, 255);
|
||||
out[1] = std::clamp((s32)((inAmbient[1] + inColor[1]) / weight), 0, 255);
|
||||
out[2] = std::clamp((s32)((inAmbient[2] + inColor[2]) / weight), 0, 255);
|
||||
out[0] = to_u8((inAmbient[0] + inColor[0]) / weight);
|
||||
out[1] = to_u8((inAmbient[1] + inColor[1]) / weight);
|
||||
out[2] = to_u8((inAmbient[2] + inColor[2]) / weight);
|
||||
}
|
||||
|
||||
static inline void le_tone_map_weighted(Color out, Color inAmbient, Vec3f inColor, float weight) {
|
||||
out[0] = std::clamp((s32)(inAmbient[0] + (inColor[0] / weight)), 0, 255);
|
||||
out[1] = std::clamp((s32)(inAmbient[1] + (inColor[1] / weight)), 0, 255);
|
||||
out[2] = std::clamp((s32)(inAmbient[2] + (inColor[2] / weight)), 0, 255);
|
||||
out[0] = to_u8(inAmbient[0] + (inColor[0] / weight));
|
||||
out[1] = to_u8(inAmbient[1] + (inColor[1] / weight));
|
||||
out[2] = to_u8(inAmbient[2] + (inColor[2] / weight));
|
||||
}
|
||||
|
||||
static inline void le_tone_map_clamp(Color out, Color inAmbient, Vec3f inColor) {
|
||||
out[0] = std::clamp((s32)(inAmbient[0] + inColor[0]), 0, 255);
|
||||
out[1] = std::clamp((s32)(inAmbient[1] + inColor[1]), 0, 255);
|
||||
out[2] = std::clamp((s32)(inAmbient[2] + inColor[2]), 0, 255);
|
||||
out[0] = to_u8(inAmbient[0] + inColor[0]);
|
||||
out[1] = to_u8(inAmbient[1] + inColor[1]);
|
||||
out[2] = to_u8(inAmbient[2] + inColor[2]);
|
||||
}
|
||||
|
||||
static inline void le_tone_map_reinhard(Color out, Color inAmbient, Vec3f inColor) {
|
||||
|
|
@ -94,9 +113,9 @@ static inline void le_tone_map_reinhard(Color out, Color inAmbient, Vec3f inColo
|
|||
inColor[1] += inAmbient[1];
|
||||
inColor[2] += inAmbient[2];
|
||||
|
||||
out[0] = std::clamp((s32)((inColor[0] / (inColor[0] + 255.0f)) * 255.0f), 0, 255);
|
||||
out[1] = std::clamp((s32)((inColor[1] / (inColor[1] + 255.0f)) * 255.0f), 0, 255);
|
||||
out[2] = std::clamp((s32)((inColor[2] / (inColor[2] + 255.0f)) * 255.0f), 0, 255);
|
||||
out[0] = to_u8((inColor[0] / (inColor[0] + 255.0f)) * 255.0f);
|
||||
out[1] = to_u8((inColor[1] / (inColor[1] + 255.0f)) * 255.0f);
|
||||
out[2] = to_u8((inColor[2] / (inColor[2] + 255.0f)) * 255.0f);
|
||||
}
|
||||
|
||||
static inline void le_tone_map(Color out, Color inAmbient, Vec3f inColor, float weight) {
|
||||
|
|
@ -126,13 +145,13 @@ static inline OPTIMIZE_O3 void le_calculate_light_contribution(const LELight& li
|
|||
f32 att = 1.0f - (dist2 / radius2);
|
||||
f32 brightness = att * light.intensity * lightIntensityScalar;
|
||||
|
||||
// normalize diff
|
||||
f32 invLen = 1.0f / sqrtf(dist2);
|
||||
diffX *= invLen;
|
||||
diffY *= invLen;
|
||||
diffZ *= invLen;
|
||||
|
||||
if (light.useSurfaceNormals && normal) {
|
||||
// normalize diff
|
||||
f32 invLen = rsqrt(dist2);
|
||||
diffX *= invLen;
|
||||
diffY *= invLen;
|
||||
diffZ *= invLen;
|
||||
|
||||
// lambert term
|
||||
f32 nl = (normal[0] * diffX) + (normal[1] * diffY) + (normal[2] * diffZ);
|
||||
if (nl <= 0.0f) { return; }
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue