diff --git a/XenonRecomp/recompiler.cpp b/XenonRecomp/recompiler.cpp index 27f1a3e..6268df9 100644 --- a/XenonRecomp/recompiler.cpp +++ b/XenonRecomp/recompiler.cpp @@ -2457,6 +2457,11 @@ bool Recompiler::Recompile( println("\t_mm_store_si128((__m128i*){}.u8, _mm_or_si128(_mm_andnot_si128(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8)), _mm_and_si128(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8))));", v(insn.operands[0]), v(insn.operands[3]), v(insn.operands[1]), v(insn.operands[3]), v(insn.operands[2])); break; + case PPC_INST_VSL: + println("\t_mm_store_si128((__m128i*){}.u8, _mm_vsl(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8)));", + v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); + break; + case PPC_INST_VSLB: // TODO: vectorize for (size_t i = 0; i < 16; i++) diff --git a/XenonUtils/ppc_context.h b/XenonUtils/ppc_context.h index 2225748..7b30689 100644 --- a/XenonUtils/ppc_context.h +++ b/XenonUtils/ppc_context.h @@ -708,4 +708,20 @@ inline __m128i _mm_vctuxs(__m128 src1) return _mm_blendv_epi8(result, _mm_set1_epi32(-1), _mm_castps_si128(saturate_mask)); } +inline __m128i _mm_vsl(__m128i a, __m128i b) +{ + // Extract shift count from last byte of b (accounting for endianness) + uint32_t shift = _mm_extract_epi8(b, 15) & 0x7; + + if (shift == 0) return a; + + // Shift left by bits + __m128i shifted = _mm_or_si128( + _mm_slli_epi64(a, shift), + _mm_srli_epi64(_mm_slli_si128(a, 8), 64 - shift) + ); + + return shifted; +} + #endif