Implement vctuxs/vcfpuxws128

This commit is contained in:
CRACKbomber 2025-05-23 17:34:07 -04:00 committed by Tom
parent fb627549d8
commit 7c13094ffd
2 changed files with 42 additions and 0 deletions

View file

@ -1861,6 +1861,16 @@ bool Recompiler::Recompile(
println("_mm_load_ps({}.f32)));", v(insn.operands[1])); println("_mm_load_ps({}.f32)));", v(insn.operands[1]));
break; break;
case PPC_INST_VCTUXS:
case PPC_INST_VCFPUXWS128:
printSetFlushMode(true);
print("\t_mm_store_si128((__m128i*){}.u32, _mm_vctuxs(", v(insn.operands[0]));
if (insn.operands[2] != 0)
println("_mm_mul_ps(_mm_load_ps({}.f32), _mm_set1_ps({}))));", v(insn.operands[1]), 1u << insn.operands[2]);
else
println("_mm_load_ps({}.f32)));", v(insn.operands[1]));
break;
case PPC_INST_VCFSX: case PPC_INST_VCFSX:
case PPC_INST_VCSXWFP128: case PPC_INST_VCSXWFP128:
{ {

View file

@ -676,4 +676,36 @@ inline uint64_t __mulhu(uint64_t a, uint64_t b) {
return hi_hi + (hi_lo >> 32) + (lo_hi >> 32) + (cross >> 32); return hi_hi + (hi_lo >> 32) + (lo_hi >> 32) + (cross >> 32);
} }
inline __m128i _mm_vctuxs(__m128 src1)
{
// Clamp negative to 0
__m128 clamped = _mm_max_ps(src1, _mm_setzero_ps());
// For values in [2^31, 2^32), subtract 2^31, convert, add 2^31 back
__m128i big_result = _mm_add_epi32(
_mm_cvttps_epi32(
_mm_sub_ps(clamped, _mm_castsi128_ps(_mm_set1_epi32(0x4F000000)))
),
_mm_set1_epi32(0x80000000)
);
// Select based on range
__m128i result = _mm_blendv_epi8(
_mm_cvttps_epi32(clamped),
big_result,
_mm_castps_si128(
_mm_cmpge_ps(clamped, _mm_castsi128_ps(_mm_set1_epi32(0x4F000000)))
)
);
// Saturate overflow and NaN to UINT_MAX
__m128 saturate_mask = _mm_or_ps(
_mm_cmpge_ps(
clamped, _mm_castsi128_ps(_mm_set1_epi32(0x4F800000))
),
_mm_cmpunord_ps(src1, src1)
);
return _mm_blendv_epi8(result, _mm_set1_epi32(-1), _mm_castps_si128(saturate_mask));
}
#endif #endif