Implement vctuxs/vcfpuxws128

2025-10-30 07:11:38 +00:00 · 2025-05-23 17:34:07 -04:00 · 2025-05-23 17:34:07 -04:00 · 7c13094ffd
commit 7c13094ffd
parent fb627549d8
2 changed files with 42 additions and 0 deletions
--- a/XenonRecomp/recompiler.cpp
+++ b/XenonRecomp/recompiler.cpp
@ -1861,6 +1861,16 @@ bool Recompiler::Recompile(
            println("_mm_load_ps({}.f32)));", v(insn.operands[1]));
        break;
    case PPC_INST_VCTUXS:
    case PPC_INST_VCFPUXWS128:
        printSetFlushMode(true);
        print("\t_mm_store_si128((__m128i*){}.u32, _mm_vctuxs(", v(insn.operands[0]));
        if (insn.operands[2] != 0)
            println("_mm_mul_ps(_mm_load_ps({}.f32), _mm_set1_ps({}))));", v(insn.operands[1]), 1u << insn.operands[2]);
        else
            println("_mm_load_ps({}.f32)));", v(insn.operands[1]));
        break;
    case PPC_INST_VCFSX:
    case PPC_INST_VCSXWFP128:
    {
--- a/XenonUtils/ppc_context.h
+++ b/XenonUtils/ppc_context.h
@ -676,4 +676,36 @@ inline uint64_t __mulhu(uint64_t a, uint64_t b) {
    return hi_hi + (hi_lo >> 32) + (lo_hi >> 32) + (cross >> 32);
 }
 inline __m128i _mm_vctuxs(__m128 src1)
 {
    // Clamp negative to 0
    __m128 clamped = _mm_max_ps(src1, _mm_setzero_ps());
    // For values in [2^31, 2^32), subtract 2^31, convert, add 2^31 back
    __m128i big_result = _mm_add_epi32(
        _mm_cvttps_epi32(
            _mm_sub_ps(clamped, _mm_castsi128_ps(_mm_set1_epi32(0x4F000000)))
        ), 
        _mm_set1_epi32(0x80000000)
    ); 
    // Select based on range
    __m128i result = _mm_blendv_epi8(
        _mm_cvttps_epi32(clamped), 
        big_result, 
        _mm_castps_si128(
            _mm_cmpge_ps(clamped, _mm_castsi128_ps(_mm_set1_epi32(0x4F000000)))
        )
    );
    // Saturate overflow and NaN to UINT_MAX
    __m128 saturate_mask = _mm_or_ps(
        _mm_cmpge_ps(
            clamped, _mm_castsi128_ps(_mm_set1_epi32(0x4F800000))
        ),
        _mm_cmpunord_ps(src1, src1)
    );
    return _mm_blendv_epi8(result, _mm_set1_epi32(-1), _mm_castps_si128(saturate_mask));
 }
 #endif