sm64coopdx/src/pc/utils/misc.c
EmeraldLockdown d750fee179
Resolve issue with windows compilation due to use of strcasestr (#1246)
Fixes issue introduced by bb142d5769 because someone didn't check the C standard to verify it was ok to use (it was me).

`strcasestr` is not apart of the C standard and is a GNU extension. Windows does not play well with this. The solution was to define a quite simple custom function partially based on the glibc implementation. Function should be reviewed.
2026-05-16 15:00:12 +10:00

673 lines
No EOL
20 KiB
C

#ifdef _WIN32
#include <windows.h>
#else
#include <unistd.h>
#endif
#include <stdio.h>
#include <stdlib.h>
#include <PR/ultratypes.h>
#include <stdbool.h>
#include <time.h>
#include <float.h>
#include <ctype.h>
#include "misc.h"
#include "course_table.h"
#include "game/area.h"
#include "game/level_update.h"
#include "game/save_file.h"
#include "engine/math_util.h"
#include "pc/configfile.h"
#include "pc/pc_main.h"
#include "pc/update_checker.h"
float smooth_step(float edge0, float edge1, float x) {
float t = (x - edge0) / (edge1 - edge0);
if (t < 0) { t = 0; }
if (t > 1) { t = 1; }
return t * t * (3.0 - 2.0 * t);
}
void update_all_mario_stars(void) {
s32 numStars = save_file_get_total_star_count(gCurrSaveFileNum - 1, COURSE_MIN - 1, COURSE_MAX - 1);
for (s32 i = 0; i < MAX_PLAYERS; i++) {
gMarioStates[i].numStars = numStars;
}
}
static void _clock_gettime(struct timespec* clock_time) {
#if !defined _POSIX_MONOTONIC_CLOCK || _POSIX_MONOTONIC_CLOCK < 0
clock_gettime(CLOCK_REALTIME, clock_time);
#elif _POSIX_MONOTONIC_CLOCK > 0
clock_gettime(CLOCK_MONOTONIC, clock_time);
#else
if (clock_gettime(CLOCK_MONOTONIC, clock_time))
clock_gettime(CLOCK_REALTIME, clock_time);
#endif
#ifdef DEVELOPMENT
// give each instance a random offset for testing purposed
/*
static s32 randomOffset1 = 0;
static s32 randomOffset2 = 0;
if (randomOffset1 == 0) {
time_t t;
srand((unsigned)time(&t));
randomOffset1 = rand();
randomOffset2 = rand();
}
clock_time->tv_sec += randomOffset1;
clock_time->tv_nsec += randomOffset2;
*/
#endif
}
static u64 clock_elapsed_ns(void) {
static bool sClockInitialized = false;
static u64 clock_start_ns;
if (!sClockInitialized) {
struct timespec clock_start;
_clock_gettime(&clock_start);
clock_start_ns = ((u64)clock_start.tv_sec) * 1000000000 + ((u64)clock_start.tv_nsec);
sClockInitialized = true;
}
struct timespec clock_current;
_clock_gettime(&clock_current);
u64 clock_current_ns = ((u64)clock_current.tv_sec) * 1000000000 + ((u64)clock_current.tv_nsec);
return (clock_current_ns - clock_start_ns);
}
f32 clock_elapsed(void) {
return (clock_elapsed_ns() / 1000000000.0f);
}
f64 clock_elapsed_f64(void) {
return (clock_elapsed_ns() / 1000000000.0);
}
u32 clock_elapsed_ticks(void) {
return (clock_elapsed_ns() * 3 / 100000000);
}
bool clock_is_date(u8 month, u8 day) {
time_t t = time(NULL);
struct tm *tm_info = localtime(&t);
return tm_info->tm_mon == month - 1 && tm_info->tm_mday == day;
}
// delay functions lack accuracy sometimes due to os scheduling
// busy-waiting is bad practice but it's very accurate so we use a hybrid
void precise_delay_f64(f64 delaySec) {
const f64 sleepMargin = 0.002; // 2 ms margin before we switch to busy-waiting
f64 start = clock_elapsed_f64();
f64 end = start + delaySec;
// sleep until we're ~2ms away from the target
for (f64 remaining = end - clock_elapsed_f64(); remaining > sleepMargin; remaining = end - clock_elapsed_f64()) {
u32 sleepMs = (u32) ((remaining - sleepMargin) * 1000.0);
if (sleepMs < 1) { break; } // not enough time to sleep
gWindowApi->delay(sleepMs);
}
// busy-wait until the target time is hit
while (clock_elapsed_f64() < end);
}
void file_get_line(char* buffer, size_t maxLength, FILE* fp) {
char* initial = buffer;
char c = fgetc(fp);
while (!feof(fp) && c != '\n') {
// make sure it's printable
if (c < ' ' || c > '~') { goto next_get; }
// parse new line escape code
if (c == '\\') {
c = fgetc(fp);
if (feof(fp)) { break; }
if (c == 'n') {
if ((size_t)(buffer - initial) < (maxLength - 1)) {
*buffer++ = '\n';
}
goto next_get;
}
}
// found new line
if (c == '\n') { break; }
// append to buffer
if ((size_t)(buffer - initial) < (maxLength - 1)) {
*buffer++ = c;
}
next_get:
c = fgetc(fp);
}
*buffer = '\0';
}
/////////////////
f32 delta_interpolate_f32(f32 a, f32 b, f32 delta) {
return a * (1.0f - delta) + b * delta;
}
s32 delta_interpolate_s32(s32 a, s32 b, f32 delta) {
return a * (1.0f - delta) + b * delta;
}
void delta_interpolate_vec3f(VEC_OUT Vec3f res, Vec3f a, Vec3f b, f32 delta) {
f32 antiDelta = 1.0f - delta;
res[0] = ((a[0] * antiDelta) + (b[0] * delta));
res[1] = ((a[1] * antiDelta) + (b[1] * delta));
res[2] = ((a[2] * antiDelta) + (b[2] * delta));
}
void delta_interpolate_vec3s(VEC_OUT Vec3s res, Vec3s a, Vec3s b, f32 delta) {
f32 antiDelta = 1.0f - delta;
res[0] = ((a[0] * antiDelta) + (b[0] * delta));
res[1] = ((a[1] * antiDelta) + (b[1] * delta));
res[2] = ((a[2] * antiDelta) + (b[2] * delta));
}
void delta_interpolate_normal(s8* res, s8* a, s8* b, f32 delta) {
f32 antiDelta = 1.0f - delta;
res[0] = ((a[0] * antiDelta) + (b[0] * delta));
res[1] = ((a[1] * antiDelta) + (b[1] * delta));
res[2] = ((a[2] * antiDelta) + (b[2] * delta));
}
void delta_interpolate_rgba(u8* res, u8* a, u8* b, f32 delta) {
f32 antiDelta = 1.0f - delta;
res[0] = ((a[0] * antiDelta) + (b[0] * delta));
res[1] = ((a[1] * antiDelta) + (b[1] * delta));
res[2] = ((a[2] * antiDelta) + (b[2] * delta));
res[3] = ((a[3] * antiDelta) + (b[3] * delta));
}
static f32 get_quat_compo_abs(f32 xPiece, f32 yPiece, f32 zPiece) {
return sqrt((1.0f + xPiece + yPiece + zPiece) * 0.25f);
}
static u8 float_ge_with_nan_check(f32 a, f32 b) {
return !isnan(a) && (isnan(b) || a >= b);
}
// this function expects an orthonormal rotation matrix
static void rot_mat_to_rot_quat(Vec4f q, Vec3f a[3]) {
// get absolute value of coefficients
q[0] = get_quat_compo_abs(a[0][0], a[1][1], a[2][2]);
q[1] = get_quat_compo_abs(a[0][0], -a[1][1], -a[2][2]);
q[2] = get_quat_compo_abs(-a[0][0], a[1][1], -a[2][2]);
q[3] = get_quat_compo_abs(-a[0][0], -a[1][1], a[2][2]);
// find the coefficient with greatest magnitude
// NaN checks are because of possible square root of negative number in get_quat_compo_abs
int maxCompoMagCase = float_ge_with_nan_check(q[0], q[1])
? float_ge_with_nan_check(q[0], q[2])
? float_ge_with_nan_check(q[0], q[3])
? 0
: 3
: float_ge_with_nan_check(q[2], q[3])
? 2
: 3
: float_ge_with_nan_check(q[1], q[2])
? float_ge_with_nan_check(q[1], q[3])
? 1
: 3
: float_ge_with_nan_check(q[2], q[3])
? 2
: 3;
// adjust signs of coefficients; base on greatest magnitude to improve float accuracy
f32 divFactor = 0;
switch (maxCompoMagCase) {
case 0:
divFactor = 0.25f / q[0];
q[1] = (a[1][2] - a[2][1]) * divFactor;
q[2] = (a[2][0] - a[0][2]) * divFactor;
q[3] = (a[0][1] - a[1][0]) * divFactor;
return;
case 1:
divFactor = 0.25f / q[1];
q[0] = (a[1][2] - a[2][1]) * divFactor;
q[2] = (a[1][0] + a[0][1]) * divFactor;
q[3] = (a[2][0] + a[0][2]) * divFactor;
return;
case 2:
divFactor = 0.25f / q[2];
q[0] = (a[2][0] - a[0][2]) * divFactor;
q[1] = (a[1][0] + a[0][1]) * divFactor;
q[3] = (a[2][1] + a[1][2]) * divFactor;
return;
case 3:
divFactor = 0.25f / q[3];
q[0] = (a[0][1] - a[1][0]) * divFactor;
q[1] = (a[2][0] + a[0][2]) * divFactor;
q[2] = (a[2][1] + a[1][2]) * divFactor;
return;
}
}
static void rot_quat_to_mtx_rot(Vec3f a[3], Vec4f q) {
f32 dq0s = 2.0f * sqr(q[0]), dq1s = 2.0f * sqr(q[1]), dq2s = 2.0f * sqr(q[2]),
dq3s = 2.0f * sqr(q[3]), dq12 = 2.0f * q[1] * q[2], dq03 = 2.0f * q[0] * q[3],
dq13 = 2.0f * q[1] * q[3], dq02 = 2.0f * q[0] * q[2], dq23 = 2.0f * q[2] * q[3],
dq01 = 2.0f * q[0] * q[1];
a[0][0] = dq0s + dq1s - 1.0f;
a[0][1] = dq12 + dq03;
a[0][2] = dq13 - dq02;
a[1][0] = dq12 - dq03;
a[1][1] = dq0s + dq2s - 1.0f;
a[1][2] = dq23 + dq01;
a[2][0] = dq13 + dq02;
a[2][1] = dq23 - dq01;
a[2][2] = dq0s + dq3s - 1.0f;
}
// rotation quaternion spherical linear interpolation
static void rot_quat_slerp(Vec4f out, Vec4f a, Vec4f b, f32 t) {
// credit where it's due:
// Martin John Baker
// https://www.euclideanspace.com/maths/algebra/realNormedAlgebra/quaternions/slerp/index.htm
f32 halfTh = 0;
f32 halfSin = 0;
f32 st = 0;
f32 sat = 0;
f32 halfCos = a[0] * b[0] + a[1] * b[1] + a[2] * b[2] + a[3] * b[3];
memcpy(out, b, sizeof(f32) * 4);
// if the abs of cos is 1, then there is no rotation
if (fabs(halfCos) >= 1.0f) {
return;
}
// if cos is negative, rotation takes long path; invert to take short path
if (halfCos < 0.0f) {
out[0] *= -1.0f;
out[1] *= -1.0f;
out[2] *= -1.0f;
out[3] *= -1.0f;
halfCos *= -1.0f;
}
halfTh = acos(halfCos);
halfSin = sqrt(1.0f - sqr(halfCos));
if (halfSin == 0.0f) {
// this shouldn't happen, but float imprecision can make it happen anyway
halfSin = FLT_EPSILON;
}
sat = sin((1 - t) * halfTh) / halfSin;
st = sin(t * halfTh) / halfSin;
out[0] = a[0] * sat + out[0] * st;
out[1] = a[1] * sat + out[1] * st;
out[2] = a[2] * sat + out[2] * st;
out[3] = a[3] * sat + out[3] * st;
}
// removes scaling from the shear value
inline static f32 unmat_unscale_shear(f32 shear, f32 scale) {
if (scale == 0.0f) {
// assume no shear
return 0.0f;
}
return shear / scale;
}
// matrix decomposition code is brought over from "unmatrix.c" from Graphics Gems II
// Author: Spencer W. Thomas of University of Michigan
// additional Graphics Gems II code by Andrew Glassner and Rod G. Bogart
// thanks to fgsfds for informing me of this
// http://www.realtimerendering.com/resources/GraphicsGems/gemsii/unmatrix.c
//
// matrix perspective is not used in SM64, so those indices are stripped from the output parameter
// return value was related to if matrix was non-singular, which was necessary for perspective
// since perspective is not used, the return value is also stripped
//
// additionally, rotation is not converted to euler angles
// instead, it is converted to a quaternion to avoid gimbal lock
//
// tranfs is returned as follows:
// scale(x, y, z), shear(xy, xz, zy), rotation(a, b, c, d), translation(x, y, z)
OPTIMIZE_O3 static void unmatrix(Mtx * mat, f32 tranfs[13]) {
int i = 0;
Vec3f axisVecs[3] = { 0 };
Vec3f yzCross = { 0 };
Mtx locMat = *mat;
// matrix normalization skipped since all SM64 matrices are normalized
// perspective is not used in SM64 matrices, so the steps for it are skipped
for (i = 0; i < 3; ++i) {
// translation (position)
tranfs[10 + i] = locMat.m[3][i];
// axis vector
memcpy(axisVecs[i], locMat.m[i], sizeof(f32) * 3);
}
// X-scale
tranfs[0] = vec3f_length(axisVecs[0]);
// normalize X-axis vector
if (tranfs[0] == 0.0f) {
axisVecs[0][0] = 1.0f;
axisVecs[0][1] = 0.0f;
axisVecs[0][2] = 0.0f;
} else {
for (i = 0; i < 3; ++i) {
axisVecs[0][i] /= tranfs[0];
}
}
// XY-shear
tranfs[3] = vec3f_dot(axisVecs[0], axisVecs[1]);
// orthogonalize Y-axis vector to X-axis vector
vec3f_combine(axisVecs[1], axisVecs[1], axisVecs[0], 1.0f, -tranfs[3]);
// Y-scale
tranfs[1] = vec3f_length(axisVecs[1]);
// normalize Y-axis vector
if (tranfs[1] == 0.0f) {
axisVecs[1][0] = 0.0f;
axisVecs[1][1] = 1.0f;
axisVecs[1][2] = 0.0f;
} else {
for (i = 0; i < 3; ++i) {
axisVecs[1][i] /= tranfs[1];
}
}
// unscale XY-shear
tranfs[3] = unmat_unscale_shear(tranfs[3], tranfs[1]);
// XZ-shear
tranfs[4] = vec3f_dot(axisVecs[0], axisVecs[2]);
// orthogonalize Z-axis vector to X-axis vector
vec3f_combine(axisVecs[2], axisVecs[2], axisVecs[0], 1.0f, -tranfs[4]);
// ZY-shear
tranfs[5] = vec3f_dot(axisVecs[1], axisVecs[2]);
// orthogonalize Z-axis vector to Y-axis vector
vec3f_combine(axisVecs[2], axisVecs[2], axisVecs[1], 1.0f, -tranfs[5]);
// Z-scale
tranfs[2] = vec3f_length(axisVecs[2]);
// normalize Z-axis vector
if (tranfs[2] == 0.0f) {
axisVecs[2][0] = 0.0f;
axisVecs[2][1] = 0.0f;
axisVecs[2][2] = 1.0f;
} else {
for (i = 0; i < 3; ++i) {
axisVecs[2][i] /= tranfs[2];
}
}
// unscale remaining shears
tranfs[4] = unmat_unscale_shear(tranfs[4], tranfs[2]);
tranfs[5] = unmat_unscale_shear(tranfs[5], tranfs[2]);
// check if coordinate system needs to be flipped
vec3f_cross(yzCross, axisVecs[1], axisVecs[2]);
if (vec3f_dot(axisVecs[0], yzCross) < 0.0f) {
for (i = 0; i < 3; ++i) {
tranfs[i] *= -1.0f;
vec3f_mul(axisVecs[i], -1.0f);
}
}
// rotation
rot_mat_to_rot_quat(tranfs + 6, axisVecs);
}
// builds a transformation matrix from a decomposed sequence from unmatrix
// see unmatrix for what tranfs means
static void rematrix(Mtx * mat, f32 tranfs[13]) {
int i;
Vec3f rotAxes[3] = { 0 };
Mat4 rotMat = { 0 };
// start with the identity matrix
for (i = 0; i < 4; ++i) {
int j;
mat->m[i][i] = 1.0f;
for (j = 3; j > i; --j) {
mat->m[i][j] = mat->m[j][i] = 0.0f;
}
}
// scale
for (i = 0; i < 3; ++i) {
vec3f_mul(mat->m[i], tranfs[i]);
}
// shear
mat->m[1][0] = mat->m[1][1] * tranfs[3];
mat->m[2][0] = mat->m[2][2] * tranfs[4];
mat->m[2][1] = mat->m[2][2] * tranfs[5];
// rotate
// get the rotation matrix for the quat
rot_quat_to_mtx_rot(rotAxes, tranfs + 6);
// transfer to the Mat4 struct
for (i = 0; i < 3; ++i) {
memcpy(rotMat[i], rotAxes[i], sizeof(f32) * 3);
rotMat[i][3] = rotMat[3][i] = 0.0f;
}
rotMat[3][3] = 1.0f;
// apply the rotation
// this is technically abuse of Mat4 vs. Mtx, but coop doesn't target N64 anyway
mtxf_mul(mat->m, mat->m, rotMat);
// translate
for (i = 0; i < 3; ++i) {
mat->m[3][i] = tranfs[10 + i];
}
}
inline static void delta_interpolate_mtx_accurate(Mtx* out, Mtx* a, Mtx* b, f32 delta) {
int i = 0;
f32 matTranfsA[13] = { 0 };
f32 matTranfsB[13] = { 0 };
f32 antiDelta = 1.0f - delta;
unmatrix(a, matTranfsA);
unmatrix(b, matTranfsB);
// skip rot quat
for (i = 0; i < 6; ++i) {
matTranfsB[i] = matTranfsA[i] * antiDelta + matTranfsB[i] * delta;
}
for (i = 10; i < 13; ++i) {
matTranfsB[i] = matTranfsA[i] * antiDelta + matTranfsB[i] * delta;
}
rot_quat_slerp(matTranfsB + 6, matTranfsA + 6, matTranfsB + 6, delta);
rematrix(out, matTranfsB);
}
void delta_interpolate_mtx(Mtx* out, Mtx* a, Mtx* b, f32 delta) {
// HACK: Limit accurate interpolation to 64-bit builds
if (sizeof(void*) > 4) {
if (configInterpolationMode) {
delta_interpolate_mtx_accurate(out, a, b, delta);
return;
}
}
// this isn't the right way to do things.
f32 antiDelta = 1.0f - delta;
for (s32 i = 0; i < 4; i++) {
for (s32 j = 0; j < 4; j++) {
out->m[i][j] = (a->m[i][j] * antiDelta) + (b->m[i][j] * delta);
}
}
}
void detect_and_skip_mtx_interpolation(Mtx** mtxPrev, Mtx** mtx) {
// if the matrix has changed "too much", then skip interpolation
const f32 minDot = sqrt(2.0f) / -3.0f;
Vec3f prevX; vec3f_copy(prevX, (f32*)(*mtxPrev)->m[0]); vec3f_normalize(prevX);
Vec3f prevY; vec3f_copy(prevY, (f32*)(*mtxPrev)->m[1]); vec3f_normalize(prevY);
Vec3f prevZ; vec3f_copy(prevZ, (f32*)(*mtxPrev)->m[2]); vec3f_normalize(prevZ);
Vec3f nextX; vec3f_copy(nextX, (f32*)(*mtx)->m[0]); vec3f_normalize(nextX);
Vec3f nextY; vec3f_copy(nextY, (f32*)(*mtx)->m[1]); vec3f_normalize(nextY);
Vec3f nextZ; vec3f_copy(nextZ, (f32*)(*mtx)->m[2]); vec3f_normalize(nextZ);
f32 dotX = vec3f_dot(prevX, nextX);
f32 dotY = vec3f_dot(prevY, nextY);
f32 dotZ = vec3f_dot(prevZ, nextZ);
if ((dotX < minDot) || (dotY < minDot) || (dotZ < minDot)) {
*mtx = *mtxPrev;
}
}
void str_seperator_concat(char *output_buffer, int buffer_size, char** strings, int num_strings, char* seperator) {
// empty buffer
memset(output_buffer, 0, buffer_size);
if (num_strings <= 0) { return; }
// Calculate the total length of all strings
int string_length[num_strings];
int total_length = 0;
for (int i = 0; i < num_strings; i++) {
string_length[i] = strlen(strings[i]);
total_length += string_length[i];
}
// get the seperator length
int seperator_length = strlen(seperator);
int seperators_length = (num_strings - 1) * seperator_length;
if (seperators_length + 8 < buffer_size) {
// Shorten the largest string over and over until we fit
while (total_length + seperators_length >= buffer_size) {
int* largest = NULL;
for (int i = 0; i < num_strings; i++) {
if (largest == NULL || string_length[i] >= *largest) {
largest = &string_length[i];
}
}
if (largest == NULL || *largest == 0) { break; }
*largest = *largest - 1;
total_length--;
}
}
// Fill the buffer
int buffer_index = 0;
for (int i = 0; i < num_strings; i++) {
// Concat string
int amount = MIN(buffer_size - buffer_index, string_length[i] + 1);
if (amount <= 0) { break; }
snprintf(&output_buffer[buffer_index], amount, "%s", strings[i]);
buffer_index += string_length[i];
// Concat seperator
if (i != (num_strings - 1)) {
int amount = MIN(buffer_size - buffer_index, seperator_length + 1);
if (amount <= 0) { break; }
snprintf(&output_buffer[buffer_index], amount, "%s", seperator);
buffer_index += seperator_length;
}
}
}
const char *strstr_lowercased(const char *haystack, const char *needle) {
// sanity check
if (!*needle) {
return haystack;
}
while (*haystack) {
const char *h = haystack;
const char *n = needle;
while (*h && *n && tolower((unsigned char)*h) == tolower((unsigned char)*n)) {
++h;
++n;
}
if (!*n) {
return haystack;
}
++haystack;
}
return NULL;
}
static char *get_update_path(void) {
#ifdef _WIN32
char updateExecFilename[] = "coopdx_updater.exe";
#else
char updateExecFilename[] = "coopdx_updater";
#endif
static char sUpdateExecFilePath[SYS_MAX_PATH];
// this may truncate as sys_exe_path_dir is allocated to be of size SYS_MAX_SIZE, nonetheless such a limit should not be hit during normal use.
snprintf(sUpdateExecFilePath, sizeof(sUpdateExecFilePath), "%s%s%s", sys_exe_path_dir(), PATH_SEPARATOR, updateExecFilename);
return sUpdateExecFilePath;
}
bool can_update_game(void) {
// the file is not guaranteed to exist, so make sure we have the updater installed
return fs_sys_file_exists(get_update_path()) && gUpdateMessage;
}
void update_game(void) {
const char *updateExecFilePath = get_update_path();
#ifdef _WIN32
STARTUPINFOA si = { 0 };
PROCESS_INFORMATION pi = { 0 };
si.cb = sizeof(si);
char commandBuf[SYS_MAX_PATH];
// this can truncate, but under normal use, SYS_MAX_PATH should not ever be filled up
snprintf(commandBuf, sizeof(commandBuf), "%s --game-update", updateExecFilePath);
if (CreateProcessA(NULL, commandBuf, NULL, NULL, FALSE, 0, NULL, NULL, &si, &pi)) {
CloseHandle(pi.hProcess);
CloseHandle(pi.hThread);
}
exit(0);
#else
fclose(stdin);
fclose(stdout);
fclose(stderr);
execl(updateExecFilePath, "coopdx_updater", "--game-update", NULL);
exit(1);
#endif
}