Implement vctuxs/vcfpuxws128

2025-10-30 07:11:38 +00:00 · 2025-05-23 17:34:07 -04:00 · 2025-05-23 17:34:07 -04:00 · 3cb492f94b
commit 3cb492f94b
parent daa4c009dc
2 changed files with 42 additions and 0 deletions
--- a/XenonRecomp/recompiler.cpp
+++ b/XenonRecomp/recompiler.cpp
@ -1861,6 +1861,16 @@ bool Recompiler::Recompile(
            println("_mm_load_ps({}.f32)));", v(insn.operands[1]));
        break;

+    case PPC_INST_VCTUXS:
+    case PPC_INST_VCFPUXWS128:
+        printSetFlushMode(true);
+        print("\t_mm_store_si128((__m128i*){}.u32, _mm_vctuxs(", v(insn.operands[0]));
+        if (insn.operands[2] != 0)
+            println("_mm_mul_ps(_mm_load_ps({}.f32), _mm_set1_ps({}))));", v(insn.operands[1]), 1u << insn.operands[2]);
+        else
+            println("_mm_load_ps({}.f32)));", v(insn.operands[1]));
+        break;
+
    case PPC_INST_VCFSX:
    case PPC_INST_VCSXWFP128:
    {
--- a/XenonUtils/ppc_context.h
+++ b/XenonUtils/ppc_context.h
@ -676,4 +676,36 @@ inline uint64_t __mulhu(uint64_t a, uint64_t b) {
    return hi_hi + (hi_lo >> 32) + (lo_hi >> 32) + (cross >> 32);
 }

+inline __m128i _mm_vctuxs(__m128 src1)
+{
+    // Clamp negative to 0
+    __m128 clamped = _mm_max_ps(src1, _mm_setzero_ps());
+
+    // For values in [2^31, 2^32), subtract 2^31, convert, add 2^31 back
+    __m128i big_result = _mm_add_epi32(
+        _mm_cvttps_epi32(
+            _mm_sub_ps(clamped, _mm_castsi128_ps(_mm_set1_epi32(0x4F000000)))
+        ), 
+        _mm_set1_epi32(0x80000000)
+    ); 
+    
+    // Select based on range
+    __m128i result = _mm_blendv_epi8(
+        _mm_cvttps_epi32(clamped), 
+        big_result, 
+        _mm_castps_si128(
+            _mm_cmpge_ps(clamped, _mm_castsi128_ps(_mm_set1_epi32(0x4F000000)))
+        )
+    );
+    
+    // Saturate overflow and NaN to UINT_MAX
+    __m128 saturate_mask = _mm_or_ps(
+        _mm_cmpge_ps(
+            clamped, _mm_castsi128_ps(_mm_set1_epi32(0x4F800000))
+        ),
+        _mm_cmpunord_ps(src1, src1)
+    );
+    return _mm_blendv_epi8(result, _mm_set1_epi32(-1), _mm_castps_si128(saturate_mask));
+}
+
 #endif