mirror of
				https://github.com/hedge-dev/XenonRecomp.git
				synced 2025-10-30 07:11:38 +00:00 
			
		
		
		
	Compare commits
	
		
			18 commits
		
	
	
		
			8237abe22e
			...
			1dc45bab73
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|   | 1dc45bab73 | ||
|   | 4d7f6d88a5 | ||
|   | bc864bbb31 | ||
|   | f752fceba9 | ||
|   | bb43ea52bf | ||
|   | f5855085b5 | ||
|   | 78d9bc1be2 | ||
|   | 5a945705de | ||
|   | 865319a39c | ||
|   | 6df2397610 | ||
|   | 49c5e3b4f5 | ||
|   | 0bfeaed44a | ||
|   | c017eb630a | ||
|   | 82b4cd3bb7 | ||
|   | c3934c624f | ||
|   | 1c571c8576 | ||
|   | 7b8e37aa37 | ||
|   | 0bf1fd5477 | 
					 13 changed files with 426 additions and 206 deletions
				
			
		
							
								
								
									
										10
									
								
								.gitignore
									
										
									
									
										vendored
									
									
								
							
							
						
						
									
										10
									
								
								.gitignore
									
										
									
									
										vendored
									
									
								
							|  | @ -397,3 +397,13 @@ FodyWeavers.xsd | |||
| 
 | ||||
| # JetBrains Rider | ||||
| *.sln.iml | ||||
| 
 | ||||
| # IntelliJ IDEs | ||||
| .idea/ | ||||
| 
 | ||||
| # macOS metadata | ||||
| *.DS_Store | ||||
| 
 | ||||
| # CMake Files | ||||
| **/cmake-build-debug | ||||
| **/CMakeCache.txt | ||||
|  |  | |||
							
								
								
									
										3
									
								
								.gitmodules
									
										
									
									
										vendored
									
									
								
							
							
						
						
									
										3
									
								
								.gitmodules
									
										
									
									
										vendored
									
									
								
							|  | @ -13,3 +13,6 @@ | |||
| [submodule "thirdparty/tiny-AES-c"] | ||||
| 	path = thirdparty/tiny-AES-c | ||||
| 	url = https://github.com/kokke/tiny-AES-c.git | ||||
| [submodule "thirdparty/simde"] | ||||
| 	path = thirdparty/simde | ||||
| 	url = https://github.com/simd-everywhere/simde-no-tests.git | ||||
|  |  | |||
							
								
								
									
										24
									
								
								README.md
									
										
									
									
									
								
							
							
						
						
									
										24
									
								
								README.md
									
										
									
									
									
								
							|  | @ -4,6 +4,8 @@ XenonRecomp is a tool that converts Xbox 360 executables into C++ code, which ca | |||
| 
 | ||||
| This project was heavily inspired by [N64: Recompiled](https://github.com/N64Recomp/N64Recomp), a similar tool for N64 executables. | ||||
| 
 | ||||
| **DISCLAIMER:** This project does not provide a runtime implementation. It only converts the game code to C++, which is not going to function correctly without a runtime backing it. **Making the game work is your responsibility.** | ||||
| 
 | ||||
| ## Implementation Details | ||||
| 
 | ||||
| ### Instructions | ||||
|  | @ -155,16 +157,16 @@ savevmx_64_address = 0x831B34E4 | |||
| 
 | ||||
| Xbox 360 binaries feature specialized register restore & save functions that act similarly to switch case fallthroughs. Every function that utilizes non-volatile registers either has an inlined version of these functions or explicitly calls them. The recompiler requires the starting address of each restore/save function in the TOML file to recompile them correctly. These functions could likely be auto-detected, but there is currently no mechanism for it. | ||||
| 
 | ||||
| Property|Description | ||||
| -|- | ||||
| restgprlr_14_address|Start address of the `__restgprlr_14` function. It starts with `ld r14, -0x98(r1)`, repeating the same operation for the rest of the non-volatile registers and restoring the link register at the end. | ||||
| savegprlr_14_address|Start address of the `__savegprlr_14` function. It starts with `std r14, -0x98(r1)`, repeating the same operation for the rest of the non-volatile registers and saving the link register at the end. | ||||
| restfpr_14_address|Start address of the `__restfpr_14` function. It starts with `lfd f14, -0x90(r12)`, repeating the same operation for the rest of the non-volatile FPU registers. | ||||
| savefpr_14_address|Start address of the `__savefpr_14` function. It starts with `stfd r14, -0x90(r12)`, repeating the same operation for the rest of the non-volatile FPU registers. | ||||
| restvmx_14_address|Start address of the `__restvmx_14` function. It starts with `li r11, -0x120` and `lvx v14, r11, r12`, repeating the same operation for the rest of the non-volatile VMX registers until `v31`. | ||||
| savevmx_14_address|Start address of the `__savevmx_14` function. It starts with `li r11, -0x120` and `stvx v14, r11, r12`, repeating the same operation for the rest of the non-volatile VMX registers until `v31`. | ||||
| restvmx_64_address|Start address of the `__restvmx_64` function. It starts with `li r11, -0x400` and `lvx128 v64, r11, r12`, repeating the same operation for the rest of the non-volatile VMX registers. | ||||
| savevmx_64_address|Start address of the `__savevmx_64` function. It starts with `li r11, -0x400` and `stvx128 v64, r11, r12`, repeating the same operation for the rest of the non-volatile VMX registers. | ||||
| Property|Description|Byte Pattern | ||||
| -|-|- | ||||
| restgprlr_14_address|Start address of the `__restgprlr_14` function. It starts with `ld r14, -0x98(r1)`, repeating the same operation for the rest of the non-volatile registers and restoring the link register at the end.|`e9 c1 ff 68` | ||||
| savegprlr_14_address|Start address of the `__savegprlr_14` function. It starts with `std r14, -0x98(r1)`, repeating the same operation for the rest of the non-volatile registers and saving the link register at the end.|`f9 c1 ff 68` | ||||
| restfpr_14_address|Start address of the `__restfpr_14` function. It starts with `lfd f14, -0x90(r12)`, repeating the same operation for the rest of the non-volatile FPU registers.|`c9 cc ff 70` | ||||
| savefpr_14_address|Start address of the `__savefpr_14` function. It starts with `stfd r14, -0x90(r12)`, repeating the same operation for the rest of the non-volatile FPU registers.|`d9 cc ff 70` | ||||
| restvmx_14_address|Start address of the `__restvmx_14` function. It starts with `li r11, -0x120` and `lvx v14, r11, r12`, repeating the same operation for the rest of the non-volatile VMX registers until `v31`.|`39 60 fe e0 7d cb 60 ce` | ||||
| savevmx_14_address|Start address of the `__savevmx_14` function. It starts with `li r11, -0x120` and `stvx v14, r11, r12`, repeating the same operation for the rest of the non-volatile VMX registers until `v31`.|`39 60 fe e0 7d cb 61 ce` | ||||
| restvmx_64_address|Start address of the `__restvmx_64` function. It starts with `li r11, -0x400` and `lvx128 v64, r11, r12`, repeating the same operation for the rest of the non-volatile VMX registers.|`39 60 fc 00 10 0b 60 cb` | ||||
| savevmx_64_address|Start address of the `__savevmx_64` function. It starts with `li r11, -0x400` and `stvx128 v64, r11, r12`, repeating the same operation for the rest of the non-volatile VMX registers.|`39 60 fc 00 10 0b 61 cb` | ||||
| 
 | ||||
| #### longjmp & setjmp | ||||
| 
 | ||||
|  | @ -255,4 +257,4 @@ On Windows, you can use the clang-cl toolset and open the project in Visual Stud | |||
| 
 | ||||
| ## Special Thanks | ||||
| 
 | ||||
| This project could not have been possible without the [Xenia](https://github.com/xenia-project/xenia) emulator, as many parts of the CPU code conversion process has been implemented by heavily referencing its PPC code translator. The project also uses code from [Xenia Canary](https://github.com/xenia-canary/xenia-canary) to patch XEX binaries. | ||||
| This project could not have been possible without the [Xenia](https://github.com/xenia-project/xenia) emulator, as many parts of the CPU code conversion process has been implemented by heavily referencing its PPC code translator. The project also uses code from [Xenia Canary](https://github.com/xenia-canary/xenia-canary) to patch XEX binaries. | ||||
|  |  | |||
|  | @ -16,4 +16,4 @@ | |||
| #include <xbox.h> | ||||
| #include <xxhash.h> | ||||
| #include <fmt/core.h> | ||||
| #include <xmmintrin.h> | ||||
| #include <x86/sse.h> | ||||
|  |  | |||
|  | @ -378,8 +378,9 @@ bool Recompiler::Recompile( | |||
|             else if (address == config.setJmpAddress) | ||||
|             { | ||||
|                 println("\t{} = ctx;", env()); | ||||
|                 println("\t{}.s64 = setjmp(*reinterpret_cast<jmp_buf*>(base + {}.u32));", r(3), r(3)); | ||||
|                 println("\tif ({}.s64 != 0) ctx = {};", r(3), env()); | ||||
|                 println("\t{}.s64 = setjmp(*reinterpret_cast<jmp_buf*>(base + {}.u32));", temp(), r(3)); | ||||
|                 println("\tif ({}.s64 != 0) ctx = {};", temp(), env()); | ||||
|                 println("\t{} = {};", r(3), temp()); | ||||
|             } | ||||
|             else | ||||
|             { | ||||
|  | @ -896,17 +897,17 @@ bool Recompiler::Recompile( | |||
| 
 | ||||
|     case PPC_INST_FCTID: | ||||
|         printSetFlushMode(false); | ||||
|         println("\t{}.s64 = ({}.f64 > double(LLONG_MAX)) ? LLONG_MAX : _mm_cvtsd_si64(_mm_load_sd(&{}.f64));", f(insn.operands[0]), f(insn.operands[1]), f(insn.operands[1])); | ||||
|         println("\t{}.s64 = ({}.f64 > double(LLONG_MAX)) ? LLONG_MAX : simde_mm_cvtsd_si64(simde_mm_load_sd(&{}.f64));", f(insn.operands[0]), f(insn.operands[1]), f(insn.operands[1])); | ||||
|         break; | ||||
| 
 | ||||
|     case PPC_INST_FCTIDZ: | ||||
|         printSetFlushMode(false); | ||||
|         println("\t{}.s64 = ({}.f64 > double(LLONG_MAX)) ? LLONG_MAX : _mm_cvttsd_si64(_mm_load_sd(&{}.f64));", f(insn.operands[0]), f(insn.operands[1]), f(insn.operands[1])); | ||||
|         println("\t{}.s64 = ({}.f64 > double(LLONG_MAX)) ? LLONG_MAX : simde_mm_cvttsd_si64(simde_mm_load_sd(&{}.f64));", f(insn.operands[0]), f(insn.operands[1]), f(insn.operands[1])); | ||||
|         break; | ||||
| 
 | ||||
|     case PPC_INST_FCTIWZ: | ||||
|         printSetFlushMode(false); | ||||
|         println("\t{}.s64 = ({}.f64 > double(INT_MAX)) ? INT_MAX : _mm_cvttsd_si32(_mm_load_sd(&{}.f64));", f(insn.operands[0]), f(insn.operands[1]), f(insn.operands[1])); | ||||
|         println("\t{}.s64 = ({}.f64 > double(INT_MAX)) ? INT_MAX : simde_mm_cvttsd_si32(simde_mm_load_sd(&{}.f64));", f(insn.operands[0]), f(insn.operands[1]), f(insn.operands[1])); | ||||
|         break; | ||||
| 
 | ||||
|     case PPC_INST_FDIV: | ||||
|  | @ -1138,10 +1139,10 @@ bool Recompiler::Recompile( | |||
|     case PPC_INST_LVX128: | ||||
|         // NOTE: for endian swapping, we reverse the whole vector instead of individual elements.
 | ||||
|         // this is accounted for in every instruction (eg. dp3 sums yzw instead of xyz)
 | ||||
|         print("\t_mm_store_si128((__m128i*){}.u8, _mm_shuffle_epi8(_mm_load_si128((__m128i*)(base + ((", v(insn.operands[0])); | ||||
|         print("\tsimde_mm_store_si128((simde__m128i*){}.u8, simde_mm_shuffle_epi8(simde_mm_load_si128((simde__m128i*)(base + ((", v(insn.operands[0])); | ||||
|         if (insn.operands[1] != 0) | ||||
|             print("{}.u32 + ", r(insn.operands[1])); | ||||
|         println("{}.u32) & ~0xF))), _mm_load_si128((__m128i*)VectorMaskL)));", r(insn.operands[2])); | ||||
|         println("{}.u32) & ~0xF))), simde_mm_load_si128((simde__m128i*)VectorMaskL)));", r(insn.operands[2])); | ||||
|         break; | ||||
| 
 | ||||
|     case PPC_INST_LVLX: | ||||
|  | @ -1150,7 +1151,7 @@ bool Recompiler::Recompile( | |||
|         if (insn.operands[1] != 0) | ||||
|             print("{}.u32 + ", r(insn.operands[1])); | ||||
|         println("{}.u32;", r(insn.operands[2])); | ||||
|         println("\t_mm_store_si128((__m128i*){}.u8, _mm_shuffle_epi8(_mm_load_si128((__m128i*)(base + ({}.u32 & ~0xF))), _mm_load_si128((__m128i*)&VectorMaskL[({}.u32 & 0xF) * 16])));", v(insn.operands[0]), temp(), temp()); | ||||
|         println("\tsimde_mm_store_si128((simde__m128i*){}.u8, simde_mm_shuffle_epi8(simde_mm_load_si128((simde__m128i*)(base + ({}.u32 & ~0xF))), simde_mm_load_si128((simde__m128i*)&VectorMaskL[({}.u32 & 0xF) * 16])));", v(insn.operands[0]), temp(), temp()); | ||||
|         break; | ||||
| 
 | ||||
|     case PPC_INST_LVRX: | ||||
|  | @ -1159,7 +1160,7 @@ bool Recompiler::Recompile( | |||
|         if (insn.operands[1] != 0) | ||||
|             print("{}.u32 + ", r(insn.operands[1])); | ||||
|         println("{}.u32;", r(insn.operands[2])); | ||||
|         println("\t_mm_store_si128((__m128i*){}.u8, {}.u32 & 0xF ? _mm_shuffle_epi8(_mm_load_si128((__m128i*)(base + ({}.u32 & ~0xF))), _mm_load_si128((__m128i*)&VectorMaskR[({}.u32 & 0xF) * 16])) : _mm_setzero_si128());", v(insn.operands[0]), temp(), temp(), temp()); | ||||
|         println("\tsimde_mm_store_si128((simde__m128i*){}.u8, {}.u32 & 0xF ? simde_mm_shuffle_epi8(simde_mm_load_si128((simde__m128i*)(base + ({}.u32 & ~0xF))), simde_mm_load_si128((simde__m128i*)&VectorMaskR[({}.u32 & 0xF) * 16])) : simde_mm_setzero_si128());", v(insn.operands[0]), temp(), temp(), temp()); | ||||
|         break; | ||||
| 
 | ||||
|     case PPC_INST_LVSL: | ||||
|  | @ -1167,7 +1168,7 @@ bool Recompiler::Recompile( | |||
|         if (insn.operands[1] != 0) | ||||
|             print("{}.u32 + ", r(insn.operands[1])); | ||||
|         println("{}.u32;", r(insn.operands[2])); | ||||
|         println("\t_mm_store_si128((__m128i*){}.u8, _mm_load_si128((__m128i*)&VectorShiftTableL[({}.u32 & 0xF) * 16]));", v(insn.operands[0]), temp()); | ||||
|         println("\tsimde_mm_store_si128((simde__m128i*){}.u8, simde_mm_load_si128((simde__m128i*)&VectorShiftTableL[({}.u32 & 0xF) * 16]));", v(insn.operands[0]), temp()); | ||||
|         break; | ||||
| 
 | ||||
|     case PPC_INST_LVSR: | ||||
|  | @ -1175,7 +1176,7 @@ bool Recompiler::Recompile( | |||
|         if (insn.operands[1] != 0) | ||||
|             print("{}.u32 + ", r(insn.operands[1])); | ||||
|         println("{}.u32;", r(insn.operands[2])); | ||||
|         println("\t_mm_store_si128((__m128i*){}.u8, _mm_load_si128((__m128i*)&VectorShiftTableR[({}.u32 & 0xF) * 16]));", v(insn.operands[0]), temp()); | ||||
|         println("\tsimde_mm_store_si128((simde__m128i*){}.u8, simde_mm_load_si128((simde__m128i*)&VectorShiftTableR[({}.u32 & 0xF) * 16]));", v(insn.operands[0]), temp()); | ||||
|         break; | ||||
| 
 | ||||
|     case PPC_INST_LWA: | ||||
|  | @ -1240,7 +1241,7 @@ bool Recompiler::Recompile( | |||
|         break; | ||||
| 
 | ||||
|     case PPC_INST_MFFS: | ||||
|         println("\t{}.u64 = ctx.fpscr.loadFromHost();", r(insn.operands[0])); | ||||
|         println("\t{}.u64 = ctx.fpscr.loadFromHost();", f(insn.operands[0])); | ||||
|         break; | ||||
| 
 | ||||
|     case PPC_INST_MFLR: | ||||
|  | @ -1634,10 +1635,10 @@ bool Recompiler::Recompile( | |||
| 
 | ||||
|     case PPC_INST_STVX: | ||||
|     case PPC_INST_STVX128: | ||||
|         print("\t_mm_store_si128((__m128i*)(base + (("); | ||||
|         print("\tsimde_mm_store_si128((simde__m128i*)(base + (("); | ||||
|         if (insn.operands[1] != 0) | ||||
|             print("{}.u32 + ", r(insn.operands[1])); | ||||
|         println("{}.u32) & ~0xF)), _mm_shuffle_epi8(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*)VectorMaskL)));", r(insn.operands[2]), v(insn.operands[0])); | ||||
|         println("{}.u32) & ~0xF)), simde_mm_shuffle_epi8(simde_mm_load_si128((simde__m128i*){}.u8), simde_mm_load_si128((simde__m128i*)VectorMaskL)));", r(insn.operands[2]), v(insn.operands[0])); | ||||
|         break; | ||||
| 
 | ||||
|     case PPC_INST_STW: | ||||
|  | @ -1736,77 +1737,77 @@ bool Recompiler::Recompile( | |||
|     case PPC_INST_VADDFP: | ||||
|     case PPC_INST_VADDFP128: | ||||
|         printSetFlushMode(true); | ||||
|         println("\t_mm_store_ps({}.f32, _mm_add_ps(_mm_load_ps({}.f32), _mm_load_ps({}.f32)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); | ||||
|         println("\tsimde_mm_store_ps({}.f32, simde_mm_add_ps(simde_mm_load_ps({}.f32), simde_mm_load_ps({}.f32)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); | ||||
|         break; | ||||
| 
 | ||||
|     case PPC_INST_VADDSHS: | ||||
|         println("\t_mm_store_si128((__m128i*){}.s16, _mm_adds_epi16(_mm_load_si128((__m128i*){}.s16), _mm_load_si128((__m128i*){}.s16)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); | ||||
|         println("\tsimde_mm_store_si128((simde__m128i*){}.s16, simde_mm_adds_epi16(simde_mm_load_si128((simde__m128i*){}.s16), simde_mm_load_si128((simde__m128i*){}.s16)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); | ||||
|         break; | ||||
| 
 | ||||
|     case PPC_INST_VADDUBM: | ||||
|         println("\t_mm_store_si128((__m128i*){}.u8, _mm_add_epi8(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); | ||||
|         println("\tsimde_mm_store_si128((simde__m128i*){}.u8, simde_mm_add_epi8(simde_mm_load_si128((simde__m128i*){}.u8), simde_mm_load_si128((simde__m128i*){}.u8)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); | ||||
|         break; | ||||
| 
 | ||||
|     case PPC_INST_VADDUBS: | ||||
|         println("\t_mm_store_si128((__m128i*){}.u8, _mm_adds_epu8(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); | ||||
|         println("\tsimde_mm_store_si128((simde__m128i*){}.u8, simde_mm_adds_epu8(simde_mm_load_si128((simde__m128i*){}.u8), simde_mm_load_si128((simde__m128i*){}.u8)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); | ||||
|         break; | ||||
| 
 | ||||
|     case PPC_INST_VADDUHM: | ||||
|         println("\t_mm_store_si128((__m128i*){}.u16, _mm_add_epi16(_mm_load_si128((__m128i*){}.u16), _mm_load_si128((__m128i*){}.u16)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); | ||||
|         println("\tsimde_mm_store_si128((simde__m128i*){}.u16, simde_mm_add_epi16(simde_mm_load_si128((simde__m128i*){}.u16), simde_mm_load_si128((simde__m128i*){}.u16)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); | ||||
|         break; | ||||
| 
 | ||||
|     case PPC_INST_VADDUWM: | ||||
|         println("\t_mm_store_si128((__m128i*){}.u32, _mm_add_epi32(_mm_load_si128((__m128i*){}.u32), _mm_load_si128((__m128i*){}.u32)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); | ||||
|         println("\tsimde_mm_store_si128((simde__m128i*){}.u32, simde_mm_add_epi32(simde_mm_load_si128((simde__m128i*){}.u32), simde_mm_load_si128((simde__m128i*){}.u32)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); | ||||
|         break; | ||||
| 
 | ||||
|     case PPC_INST_VADDUWS: | ||||
|         println("\t_mm_store_si128((__m128i*){}.u32, _mm_adds_epu32(_mm_load_si128((__m128i*){}.u32), _mm_load_si128((__m128i*){}.u32)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); | ||||
|         println("\tsimde_mm_store_si128((simde__m128i*){}.u32, simde_mm_adds_epu32(simde_mm_load_si128((simde__m128i*){}.u32), simde_mm_load_si128((simde__m128i*){}.u32)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); | ||||
|         break; | ||||
| 
 | ||||
|     case PPC_INST_VAND: | ||||
|     case PPC_INST_VAND128: | ||||
|         println("\t_mm_store_si128((__m128i*){}.u8, _mm_and_si128(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); | ||||
|         println("\tsimde_mm_store_si128((simde__m128i*){}.u8, simde_mm_and_si128(simde_mm_load_si128((simde__m128i*){}.u8), simde_mm_load_si128((simde__m128i*){}.u8)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); | ||||
|         break; | ||||
| 
 | ||||
|     case PPC_INST_VANDC128: | ||||
|         println("\t_mm_store_si128((__m128i*){}.u8, _mm_andnot_si128(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8)));", v(insn.operands[0]), v(insn.operands[2]), v(insn.operands[1])); | ||||
|         println("\tsimde_mm_store_si128((simde__m128i*){}.u8, simde_mm_andnot_si128(simde_mm_load_si128((simde__m128i*){}.u8), simde_mm_load_si128((simde__m128i*){}.u8)));", v(insn.operands[0]), v(insn.operands[2]), v(insn.operands[1])); | ||||
|         break; | ||||
| 
 | ||||
|     case PPC_INST_VAVGSB: | ||||
|         println("\t_mm_store_si128((__m128i*){}.u8, _mm_avg_epi8(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); | ||||
|         println("\tsimde_mm_store_si128((simde__m128i*){}.u8, simde_mm_avg_epi8(simde_mm_load_si128((simde__m128i*){}.u8), simde_mm_load_si128((simde__m128i*){}.u8)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); | ||||
|         break; | ||||
| 
 | ||||
|     case PPC_INST_VAVGSH: | ||||
|         println("\t_mm_store_si128((__m128i*){}.u8, _mm_avg_epi16(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); | ||||
|         println("\tsimde_mm_store_si128((simde__m128i*){}.u8, simde_mm_avg_epi16(simde_mm_load_si128((simde__m128i*){}.u8), simde_mm_load_si128((simde__m128i*){}.u8)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); | ||||
|         break; | ||||
| 
 | ||||
|     case PPC_INST_VAVGUB: | ||||
|         println("\t_mm_store_si128((__m128i*){}.u8, _mm_avg_epu8(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); | ||||
|         println("\tsimde_mm_store_si128((simde__m128i*){}.u8, simde_mm_avg_epu8(simde_mm_load_si128((simde__m128i*){}.u8), simde_mm_load_si128((simde__m128i*){}.u8)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); | ||||
|         break; | ||||
| 
 | ||||
|     case PPC_INST_VCTSXS: | ||||
|     case PPC_INST_VCFPSXWS128: | ||||
|         printSetFlushMode(true); | ||||
|         print("\t_mm_store_si128((__m128i*){}.s32, _mm_vctsxs(", v(insn.operands[0])); | ||||
|         print("\tsimde_mm_store_si128((simde__m128i*){}.s32, simde_mm_vctsxs(", v(insn.operands[0])); | ||||
|         if (insn.operands[2] != 0) | ||||
|             println("_mm_mul_ps(_mm_load_ps({}.f32), _mm_set1_ps({}))));", v(insn.operands[1]), 1u << insn.operands[2]); | ||||
|             println("simde_mm_mul_ps(simde_mm_load_ps({}.f32), simde_mm_set1_ps({}))));", v(insn.operands[1]), 1u << insn.operands[2]); | ||||
|         else | ||||
|             println("_mm_load_ps({}.f32)));", v(insn.operands[1])); | ||||
|             println("simde_mm_load_ps({}.f32)));", v(insn.operands[1])); | ||||
|         break; | ||||
| 
 | ||||
|     case PPC_INST_VCFSX: | ||||
|     case PPC_INST_VCSXWFP128: | ||||
|     { | ||||
|         printSetFlushMode(true); | ||||
|         print("\t_mm_store_ps({}.f32, ", v(insn.operands[0])); | ||||
|         print("\tsimde_mm_store_ps({}.f32, ", v(insn.operands[0])); | ||||
|         if (insn.operands[2] != 0) | ||||
|         { | ||||
|             const float value = ldexp(1.0f, -int32_t(insn.operands[2])); | ||||
|             println("_mm_mul_ps(_mm_cvtepi32_ps(_mm_load_si128((__m128i*){}.u32)), _mm_castsi128_ps(_mm_set1_epi32(int(0x{:X})))));", v(insn.operands[1]), *reinterpret_cast<const uint32_t*>(&value)); | ||||
|             println("simde_mm_mul_ps(simde_mm_cvtepi32_ps(simde_mm_load_si128((simde__m128i*){}.u32)), simde_mm_castsi128_ps(simde_mm_set1_epi32(int(0x{:X})))));", v(insn.operands[1]), *reinterpret_cast<const uint32_t*>(&value)); | ||||
|         } | ||||
|         else | ||||
|         { | ||||
|             println("_mm_cvtepi32_ps(_mm_load_si128((__m128i*){}.u32)));", v(insn.operands[1])); | ||||
|             println("simde_mm_cvtepi32_ps(simde_mm_load_si128((simde__m128i*){}.u32)));", v(insn.operands[1])); | ||||
|         } | ||||
|         break; | ||||
|     } | ||||
|  | @ -1815,15 +1816,15 @@ bool Recompiler::Recompile( | |||
|     case PPC_INST_VCUXWFP128: | ||||
|     { | ||||
|         printSetFlushMode(true); | ||||
|         print("\t_mm_store_ps({}.f32, ", v(insn.operands[0])); | ||||
|         print("\tsimde_mm_store_ps({}.f32, ", v(insn.operands[0])); | ||||
|         if (insn.operands[2] != 0) | ||||
|         { | ||||
|             const float value = ldexp(1.0f, -int32_t(insn.operands[2])); | ||||
|             println("_mm_mul_ps(_mm_cvtepu32_ps_(_mm_load_si128((__m128i*){}.u32)), _mm_castsi128_ps(_mm_set1_epi32(int(0x{:X})))));", v(insn.operands[1]), *reinterpret_cast<const uint32_t*>(&value)); | ||||
|             println("simde_mm_mul_ps(simde_mm_cvtepu32_ps_(simde_mm_load_si128((simde__m128i*){}.u32)), simde_mm_castsi128_ps(simde_mm_set1_epi32(int(0x{:X})))));", v(insn.operands[1]), *reinterpret_cast<const uint32_t*>(&value)); | ||||
|         } | ||||
|         else | ||||
|         { | ||||
|             println("_mm_cvtepu32_ps_(_mm_load_si128((__m128i*){}.u32)));", v(insn.operands[1])); | ||||
|             println("simde_mm_cvtepu32_ps_(simde_mm_load_si128((simde__m128i*){}.u32)));", v(insn.operands[1])); | ||||
|         } | ||||
|         break; | ||||
|     } | ||||
|  | @ -1836,46 +1837,46 @@ bool Recompiler::Recompile( | |||
|     case PPC_INST_VCMPEQFP: | ||||
|     case PPC_INST_VCMPEQFP128: | ||||
|         printSetFlushMode(true); | ||||
|         println("\t_mm_store_ps({}.f32, _mm_cmpeq_ps(_mm_load_ps({}.f32), _mm_load_ps({}.f32)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); | ||||
|         println("\tsimde_mm_store_ps({}.f32, simde_mm_cmpeq_ps(simde_mm_load_ps({}.f32), simde_mm_load_ps({}.f32)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); | ||||
|         if (strchr(insn.opcode->name, '.')) | ||||
|             println("\t{}.setFromMask(_mm_load_ps({}.f32), 0xF);", cr(6), v(insn.operands[0])); | ||||
|             println("\t{}.setFromMask(simde_mm_load_ps({}.f32), 0xF);", cr(6), v(insn.operands[0])); | ||||
|         break; | ||||
| 
 | ||||
|     case PPC_INST_VCMPEQUB: | ||||
|         println("\t_mm_store_si128((__m128i*){}.u8, _mm_cmpeq_epi8(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); | ||||
|         println("\tsimde_mm_store_si128((simde__m128i*){}.u8, simde_mm_cmpeq_epi8(simde_mm_load_si128((simde__m128i*){}.u8), simde_mm_load_si128((simde__m128i*){}.u8)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); | ||||
|         if (strchr(insn.opcode->name, '.')) | ||||
|             println("\t{}.setFromMask(_mm_load_si128((__m128i*){}.u8), 0xFFFF);", cr(6), v(insn.operands[0])); | ||||
|             println("\t{}.setFromMask(simde_mm_load_si128((simde__m128i*){}.u8), 0xFFFF);", cr(6), v(insn.operands[0])); | ||||
|         break; | ||||
| 
 | ||||
|     case PPC_INST_VCMPEQUW: | ||||
|     case PPC_INST_VCMPEQUW128: | ||||
|         println("\t_mm_store_si128((__m128i*){}.u8, _mm_cmpeq_epi32(_mm_load_si128((__m128i*){}.u32), _mm_load_si128((__m128i*){}.u32)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); | ||||
|         println("\tsimde_mm_store_si128((simde__m128i*){}.u8, simde_mm_cmpeq_epi32(simde_mm_load_si128((simde__m128i*){}.u32), simde_mm_load_si128((simde__m128i*){}.u32)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); | ||||
|         if (strchr(insn.opcode->name, '.')) | ||||
|             println("\t{}.setFromMask(_mm_load_ps({}.f32), 0xF);", cr(6), v(insn.operands[0])); | ||||
|             println("\t{}.setFromMask(simde_mm_load_ps({}.f32), 0xF);", cr(6), v(insn.operands[0])); | ||||
|         break; | ||||
| 
 | ||||
|     case PPC_INST_VCMPGEFP: | ||||
|     case PPC_INST_VCMPGEFP128: | ||||
|         printSetFlushMode(true); | ||||
|         println("\t_mm_store_ps({}.f32, _mm_cmpge_ps(_mm_load_ps({}.f32), _mm_load_ps({}.f32)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); | ||||
|         println("\tsimde_mm_store_ps({}.f32, simde_mm_cmpge_ps(simde_mm_load_ps({}.f32), simde_mm_load_ps({}.f32)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); | ||||
|         if (strchr(insn.opcode->name, '.')) | ||||
|             println("\t{}.setFromMask(_mm_load_ps({}.f32), 0xF);", cr(6), v(insn.operands[0])); | ||||
|             println("\t{}.setFromMask(simde_mm_load_ps({}.f32), 0xF);", cr(6), v(insn.operands[0])); | ||||
|         break; | ||||
| 
 | ||||
|     case PPC_INST_VCMPGTFP: | ||||
|     case PPC_INST_VCMPGTFP128: | ||||
|         printSetFlushMode(true); | ||||
|         println("\t_mm_store_ps({}.f32, _mm_cmpgt_ps(_mm_load_ps({}.f32), _mm_load_ps({}.f32)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); | ||||
|         println("\tsimde_mm_store_ps({}.f32, simde_mm_cmpgt_ps(simde_mm_load_ps({}.f32), simde_mm_load_ps({}.f32)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); | ||||
|         if (strchr(insn.opcode->name, '.')) | ||||
|             println("\t{}.setFromMask(_mm_load_ps({}.f32), 0xF);", cr(6), v(insn.operands[0])); | ||||
|             println("\t{}.setFromMask(simde_mm_load_ps({}.f32), 0xF);", cr(6), v(insn.operands[0])); | ||||
|         break; | ||||
| 
 | ||||
|     case PPC_INST_VCMPGTUB: | ||||
|         println("\t_mm_store_si128((__m128i*){}.u8, _mm_cmpgt_epu8(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); | ||||
|         println("\tsimde_mm_store_si128((simde__m128i*){}.u8, simde_mm_cmpgt_epu8(simde_mm_load_si128((simde__m128i*){}.u8), simde_mm_load_si128((simde__m128i*){}.u8)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); | ||||
|         break; | ||||
| 
 | ||||
|     case PPC_INST_VCMPGTUH: | ||||
|         println("\t_mm_store_si128((__m128i*){}.u8, _mm_cmpgt_epu16(_mm_load_si128((__m128i*){}.u16), _mm_load_si128((__m128i*){}.u16)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); | ||||
|         println("\tsimde_mm_store_si128((simde__m128i*){}.u8, simde_mm_cmpgt_epu16(simde_mm_load_si128((simde__m128i*){}.u16), simde_mm_load_si128((simde__m128i*){}.u16)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); | ||||
|         break; | ||||
| 
 | ||||
|     case PPC_INST_VEXPTEFP: | ||||
|  | @ -1898,87 +1899,87 @@ bool Recompiler::Recompile( | |||
|     case PPC_INST_VMADDFP: | ||||
|     case PPC_INST_VMADDFP128: | ||||
|         printSetFlushMode(true); | ||||
|         println("\t_mm_store_ps({}.f32, _mm_add_ps(_mm_mul_ps(_mm_load_ps({}.f32), _mm_load_ps({}.f32)), _mm_load_ps({}.f32)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2]), v(insn.operands[3])); | ||||
|         println("\tsimde_mm_store_ps({}.f32, simde_mm_add_ps(simde_mm_mul_ps(simde_mm_load_ps({}.f32), simde_mm_load_ps({}.f32)), simde_mm_load_ps({}.f32)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2]), v(insn.operands[3])); | ||||
|         break; | ||||
| 
 | ||||
|     case PPC_INST_VMAXFP: | ||||
|     case PPC_INST_VMAXFP128: | ||||
|         printSetFlushMode(true); | ||||
|         println("\t_mm_store_ps({}.f32, _mm_max_ps(_mm_load_ps({}.f32), _mm_load_ps({}.f32)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); | ||||
|         println("\tsimde_mm_store_ps({}.f32, simde_mm_max_ps(simde_mm_load_ps({}.f32), simde_mm_load_ps({}.f32)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); | ||||
|         break; | ||||
| 
 | ||||
|     case PPC_INST_VMAXSW: | ||||
|         println("\t_mm_store_si128((__m128i*){}.u32, _mm_max_epi32(_mm_load_si128((__m128i*){}.u32), _mm_load_si128((__m128i*){}.u32)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); | ||||
|         println("\tsimde_mm_store_si128((simde__m128i*){}.u32, simde_mm_max_epi32(simde_mm_load_si128((simde__m128i*){}.u32), simde_mm_load_si128((simde__m128i*){}.u32)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); | ||||
|         break; | ||||
| 
 | ||||
|     case PPC_INST_VMINFP: | ||||
|     case PPC_INST_VMINFP128: | ||||
|         printSetFlushMode(true); | ||||
|         println("\t_mm_store_ps({}.f32, _mm_min_ps(_mm_load_ps({}.f32), _mm_load_ps({}.f32)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); | ||||
|         println("\tsimde_mm_store_ps({}.f32, simde_mm_min_ps(simde_mm_load_ps({}.f32), simde_mm_load_ps({}.f32)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); | ||||
|         break; | ||||
| 
 | ||||
|     case PPC_INST_VMRGHB: | ||||
|         println("\t_mm_store_si128((__m128i*){}.u8, _mm_unpackhi_epi8(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8)));", v(insn.operands[0]), v(insn.operands[2]), v(insn.operands[1])); | ||||
|         println("\tsimde_mm_store_si128((simde__m128i*){}.u8, simde_mm_unpackhi_epi8(simde_mm_load_si128((simde__m128i*){}.u8), simde_mm_load_si128((simde__m128i*){}.u8)));", v(insn.operands[0]), v(insn.operands[2]), v(insn.operands[1])); | ||||
|         break; | ||||
| 
 | ||||
|     case PPC_INST_VMRGHH: | ||||
|         println("\t_mm_store_si128((__m128i*){}.u16, _mm_unpackhi_epi16(_mm_load_si128((__m128i*){}.u16), _mm_load_si128((__m128i*){}.u16)));", v(insn.operands[0]), v(insn.operands[2]), v(insn.operands[1])); | ||||
|         println("\tsimde_mm_store_si128((simde__m128i*){}.u16, simde_mm_unpackhi_epi16(simde_mm_load_si128((simde__m128i*){}.u16), simde_mm_load_si128((simde__m128i*){}.u16)));", v(insn.operands[0]), v(insn.operands[2]), v(insn.operands[1])); | ||||
|         break; | ||||
| 
 | ||||
|     case PPC_INST_VMRGHW: | ||||
|     case PPC_INST_VMRGHW128: | ||||
|         println("\t_mm_store_si128((__m128i*){}.u32, _mm_unpackhi_epi32(_mm_load_si128((__m128i*){}.u32), _mm_load_si128((__m128i*){}.u32)));", v(insn.operands[0]), v(insn.operands[2]), v(insn.operands[1])); | ||||
|         println("\tsimde_mm_store_si128((simde__m128i*){}.u32, simde_mm_unpackhi_epi32(simde_mm_load_si128((simde__m128i*){}.u32), simde_mm_load_si128((simde__m128i*){}.u32)));", v(insn.operands[0]), v(insn.operands[2]), v(insn.operands[1])); | ||||
|         break; | ||||
| 
 | ||||
|     case PPC_INST_VMRGLB: | ||||
|         println("\t_mm_store_si128((__m128i*){}.u8, _mm_unpacklo_epi8(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8)));", v(insn.operands[0]), v(insn.operands[2]), v(insn.operands[1])); | ||||
|         println("\tsimde_mm_store_si128((simde__m128i*){}.u8, simde_mm_unpacklo_epi8(simde_mm_load_si128((simde__m128i*){}.u8), simde_mm_load_si128((simde__m128i*){}.u8)));", v(insn.operands[0]), v(insn.operands[2]), v(insn.operands[1])); | ||||
|         break; | ||||
| 
 | ||||
|     case PPC_INST_VMRGLH: | ||||
|         println("\t_mm_store_si128((__m128i*){}.u16, _mm_unpacklo_epi16(_mm_load_si128((__m128i*){}.u16), _mm_load_si128((__m128i*){}.u16)));", v(insn.operands[0]), v(insn.operands[2]), v(insn.operands[1])); | ||||
|         println("\tsimde_mm_store_si128((simde__m128i*){}.u16, simde_mm_unpacklo_epi16(simde_mm_load_si128((simde__m128i*){}.u16), simde_mm_load_si128((simde__m128i*){}.u16)));", v(insn.operands[0]), v(insn.operands[2]), v(insn.operands[1])); | ||||
|         break; | ||||
| 
 | ||||
|     case PPC_INST_VMRGLW: | ||||
|     case PPC_INST_VMRGLW128: | ||||
|         println("\t_mm_store_si128((__m128i*){}.u32, _mm_unpacklo_epi32(_mm_load_si128((__m128i*){}.u32), _mm_load_si128((__m128i*){}.u32)));", v(insn.operands[0]), v(insn.operands[2]), v(insn.operands[1])); | ||||
|         println("\tsimde_mm_store_si128((simde__m128i*){}.u32, simde_mm_unpacklo_epi32(simde_mm_load_si128((simde__m128i*){}.u32), simde_mm_load_si128((simde__m128i*){}.u32)));", v(insn.operands[0]), v(insn.operands[2]), v(insn.operands[1])); | ||||
|         break; | ||||
| 
 | ||||
|     case PPC_INST_VMSUM3FP128: | ||||
|         // NOTE: accounting for full vector reversal here. should dot product yzw instead of xyz
 | ||||
|         printSetFlushMode(true); | ||||
|         println("\t_mm_store_ps({}.f32, _mm_dp_ps(_mm_load_ps({}.f32), _mm_load_ps({}.f32), 0xEF));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); | ||||
|         println("\tsimde_mm_store_ps({}.f32, simde_mm_dp_ps(simde_mm_load_ps({}.f32), simde_mm_load_ps({}.f32), 0xEF));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); | ||||
|         break; | ||||
| 
 | ||||
|     case PPC_INST_VMSUM4FP128: | ||||
|         printSetFlushMode(true); | ||||
|         println("\t_mm_store_ps({}.f32, _mm_dp_ps(_mm_load_ps({}.f32), _mm_load_ps({}.f32), 0xFF));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); | ||||
|         println("\tsimde_mm_store_ps({}.f32, simde_mm_dp_ps(simde_mm_load_ps({}.f32), simde_mm_load_ps({}.f32), 0xFF));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); | ||||
|         break; | ||||
| 
 | ||||
|     case PPC_INST_VMULFP128: | ||||
|         printSetFlushMode(true); | ||||
|         println("\t_mm_store_ps({}.f32, _mm_mul_ps(_mm_load_ps({}.f32), _mm_load_ps({}.f32)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); | ||||
|         println("\tsimde_mm_store_ps({}.f32, simde_mm_mul_ps(simde_mm_load_ps({}.f32), simde_mm_load_ps({}.f32)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); | ||||
|         break; | ||||
| 
 | ||||
|     case PPC_INST_VNMSUBFP: | ||||
|     case PPC_INST_VNMSUBFP128: | ||||
|         printSetFlushMode(true); | ||||
|         println("\t_mm_store_ps({}.f32, _mm_xor_ps(_mm_sub_ps(_mm_mul_ps(_mm_load_ps({}.f32), _mm_load_ps({}.f32)), _mm_load_ps({}.f32)), _mm_castsi128_ps(_mm_set1_epi32(int(0x80000000)))));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2]), v(insn.operands[3])); | ||||
|         println("\tsimde_mm_store_ps({}.f32, simde_mm_xor_ps(simde_mm_sub_ps(simde_mm_mul_ps(simde_mm_load_ps({}.f32), simde_mm_load_ps({}.f32)), simde_mm_load_ps({}.f32)), simde_mm_castsi128_ps(simde_mm_set1_epi32(int(0x80000000)))));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2]), v(insn.operands[3])); | ||||
|         break; | ||||
| 
 | ||||
|     case PPC_INST_VOR: | ||||
|     case PPC_INST_VOR128: | ||||
|         print("\t_mm_store_si128((__m128i*){}.u8, ", v(insn.operands[0])); | ||||
|         print("\tsimde_mm_store_si128((simde__m128i*){}.u8, ", v(insn.operands[0])); | ||||
| 
 | ||||
|         if (insn.operands[1] != insn.operands[2]) | ||||
|             println("_mm_or_si128(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8)));", v(insn.operands[1]), v(insn.operands[2])); | ||||
|             println("simde_mm_or_si128(simde_mm_load_si128((simde__m128i*){}.u8), simde_mm_load_si128((simde__m128i*){}.u8)));", v(insn.operands[1]), v(insn.operands[2])); | ||||
|         else | ||||
|             println("_mm_load_si128((__m128i*){}.u8));", v(insn.operands[1])); | ||||
|             println("simde_mm_load_si128((simde__m128i*){}.u8));", v(insn.operands[1])); | ||||
| 
 | ||||
|         break; | ||||
| 
 | ||||
|     case PPC_INST_VPERM: | ||||
|     case PPC_INST_VPERM128: | ||||
|         println("\t_mm_store_si128((__m128i*){}.u8, _mm_perm_epi8_(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2]), v(insn.operands[3])); | ||||
|         println("\tsimde_mm_store_si128((simde__m128i*){}.u8, simde_mm_perm_epi8_(simde_mm_load_si128((simde__m128i*){}.u8), simde_mm_load_si128((simde__m128i*){}.u8), simde_mm_load_si128((simde__m128i*){}.u8)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2]), v(insn.operands[3])); | ||||
|         break; | ||||
| 
 | ||||
|     case PPC_INST_VPERMWI128: | ||||
|  | @ -1989,7 +1990,7 @@ bool Recompiler::Recompile( | |||
|         uint32_t z = 3 - ((insn.operands[2] >> 4) & 0x3); | ||||
|         uint32_t w = 3 - ((insn.operands[2] >> 6) & 0x3); | ||||
|         uint32_t perm = x | (y << 2) | (z << 4) | (w << 6); | ||||
|         println("\t_mm_store_si128((__m128i*){}.u32, _mm_shuffle_epi32(_mm_load_si128((__m128i*){}.u32), 0x{:X}));", v(insn.operands[0]), v(insn.operands[1]), perm); | ||||
|         println("\tsimde_mm_store_si128((simde__m128i*){}.u32, simde_mm_shuffle_epi32(simde_mm_load_si128((simde__m128i*){}.u32), 0x{:X}));", v(insn.operands[0]), v(insn.operands[1]), perm); | ||||
|         break; | ||||
|     } | ||||
| 
 | ||||
|  | @ -2000,7 +2001,7 @@ bool Recompiler::Recompile( | |||
|         switch (insn.operands[2]) | ||||
|         { | ||||
|         case 0: // D3D color
 | ||||
|             if (insn.operands[3] != 1 || insn.operands[4] != 3) | ||||
|             if (insn.operands[3] != 1) | ||||
|                 fmt::println("Unexpected D3D color pack instruction at {:X}", base); | ||||
| 
 | ||||
|             for (size_t i = 0; i < 4; i++) | ||||
|  | @ -2010,7 +2011,29 @@ bool Recompiler::Recompile( | |||
|                 println("\t{}.f32[{}] = {}.f32[{}] < 3.0f ? 3.0f : ({}.f32[{}] > {}.f32[{}] ? {}.f32[{}] : {}.f32[{}]);", vTemp(), i, v(insn.operands[1]), i, v(insn.operands[1]), i, vTemp(), i, vTemp(), i, v(insn.operands[1]), i); | ||||
|                 println("\t{}.u32 {}= uint32_t({}.u8[{}]) << {};", temp(), i == 0 ? "" : "|", vTemp(), i * 4, indices[i] * 8); | ||||
|             } | ||||
|             println("\t{}.u32[3] = {}.u32;", v(insn.operands[0]), temp()); | ||||
|             println("\t{}.u32[{}] = {}.u32;", v(insn.operands[0]), insn.operands[4], temp()); | ||||
|             break; | ||||
| 
 | ||||
|         case 5: // float16_4
 | ||||
|             if (insn.operands[3] != 2 || insn.operands[4] > 2) | ||||
|                 fmt::println("Unexpected float16_4 pack instruction at {:X}", base); | ||||
| 
 | ||||
|             for (size_t i = 0; i < 4; i++) | ||||
|             { | ||||
|         		// Strip sign from source
 | ||||
|         		println("\t{}.u32 = ({}.u32[{}]&0x7FFFFFFF);", temp(), v(insn.operands[1]), i); | ||||
|         		// If |source| is > 65504, clamp output to 0x7FFF, else save 8 exponent bits 
 | ||||
|         		println("\t{0}.u8[0] = ({1}.f32 != {1}.f32) || ({1}.f32 > 65504.0f) ? 0xFF : (({2}.u32[{3}]&0x7f800000)>>23);", vTemp(), temp(), v(insn.operands[1]), i); | ||||
|         		// If 8 exponent bits were saved, it can only be 0x8E at most
 | ||||
|         		// If saved, save first 10 bits of mantissa
 | ||||
|         		println("\t{}.u16 = {}.u8[0] != 0xFF ? (({}.u32[{}]&0x7FE000)>>13) : 0x0;", temp(), vTemp(), v(insn.operands[1]), i); | ||||
|         		// If saved and > 127-15, exponent is converted from 8 to 5-bit by subtracting 0x70
 | ||||
|         		// If saved but not > 127-15, clamp exponent at 0, add 0x400 to mantissa and shift right by (0x71-exponent)
 | ||||
|         		// If right shift is greater than 31 bits, manually clamp mantissa to 0 or else the output of the shift will be wrong
 | ||||
|         		println("\t{0}.u16[{1}] = {2}.u8[0] != 0xFF ? ({2}.u8[0] > 0x70 ? ((({2}.u8[0]-0x70)<<10)+{3}.u16) : (0x71-{2}.u8[0] > 31 ? 0x0 : ((0x400+{3}.u16)>>(0x71-{2}.u8[0])))) : 0x7FFF;", v(insn.operands[0]), i+(2*insn.operands[4]), vTemp(), temp()); | ||||
|         		// Add back original sign
 | ||||
|         		println("\t{}.u16[{}] |= (({}.u32[{}]&0x80000000)>>16);", v(insn.operands[0]), i+(2*insn.operands[4]), v(insn.operands[1]), i); | ||||
|             } | ||||
|             break; | ||||
| 
 | ||||
|         default: | ||||
|  | @ -2021,38 +2044,38 @@ bool Recompiler::Recompile( | |||
| 
 | ||||
|     case PPC_INST_VPKSHUS: | ||||
|     case PPC_INST_VPKSHUS128: | ||||
|         println("\t_mm_store_si128((__m128i*){}.u8, _mm_packus_epi16(_mm_load_si128((__m128i*){}.s16), _mm_load_si128((__m128i*){}.s16)));", v(insn.operands[0]), v(insn.operands[2]), v(insn.operands[1])); | ||||
|         println("\tsimde_mm_store_si128((simde__m128i*){}.u8, simde_mm_packus_epi16(simde_mm_load_si128((simde__m128i*){}.s16), simde_mm_load_si128((simde__m128i*){}.s16)));", v(insn.operands[0]), v(insn.operands[2]), v(insn.operands[1])); | ||||
|         break; | ||||
| 
 | ||||
|     case PPC_INST_VREFP: | ||||
|     case PPC_INST_VREFP128: | ||||
|         // TODO: see if we can use rcp safely
 | ||||
|         printSetFlushMode(true); | ||||
|         println("\t_mm_store_ps({}.f32, _mm_div_ps(_mm_set1_ps(1), _mm_load_ps({}.f32)));", v(insn.operands[0]), v(insn.operands[1])); | ||||
|         println("\tsimde_mm_store_ps({}.f32, simde_mm_div_ps(simde_mm_set1_ps(1), simde_mm_load_ps({}.f32)));", v(insn.operands[0]), v(insn.operands[1])); | ||||
|         break; | ||||
| 
 | ||||
|     case PPC_INST_VRFIM: | ||||
|     case PPC_INST_VRFIM128: | ||||
|         printSetFlushMode(true); | ||||
|         println("\t_mm_store_ps({}.f32, _mm_round_ps(_mm_load_ps({}.f32), _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC));", v(insn.operands[0]), v(insn.operands[1])); | ||||
|         println("\tsimde_mm_store_ps({}.f32, simde_mm_round_ps(simde_mm_load_ps({}.f32), SIMDE_MM_FROUND_TO_NEG_INF | SIMDE_MM_FROUND_NO_EXC));", v(insn.operands[0]), v(insn.operands[1])); | ||||
|         break; | ||||
| 
 | ||||
|     case PPC_INST_VRFIN: | ||||
|     case PPC_INST_VRFIN128: | ||||
|         printSetFlushMode(true); | ||||
|         println("\t_mm_store_ps({}.f32, _mm_round_ps(_mm_load_ps({}.f32), _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC));", v(insn.operands[0]), v(insn.operands[1])); | ||||
|         println("\tsimde_mm_store_ps({}.f32, simde_mm_round_ps(simde_mm_load_ps({}.f32), SIMDE_MM_FROUND_TO_NEAREST_INT | SIMDE_MM_FROUND_NO_EXC));", v(insn.operands[0]), v(insn.operands[1])); | ||||
|         break; | ||||
| 
 | ||||
|     case PPC_INST_VRFIZ: | ||||
|     case PPC_INST_VRFIZ128: | ||||
|         printSetFlushMode(true); | ||||
|         println("\t_mm_store_ps({}.f32, _mm_round_ps(_mm_load_ps({}.f32), _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC));", v(insn.operands[0]), v(insn.operands[1])); | ||||
|         println("\tsimde_mm_store_ps({}.f32, simde_mm_round_ps(simde_mm_load_ps({}.f32), SIMDE_MM_FROUND_TO_ZERO | SIMDE_MM_FROUND_NO_EXC));", v(insn.operands[0]), v(insn.operands[1])); | ||||
|         break; | ||||
| 
 | ||||
|     case PPC_INST_VRLIMI128: | ||||
|     { | ||||
|         constexpr size_t shuffles[] = { _MM_SHUFFLE(3, 2, 1, 0), _MM_SHUFFLE(2, 1, 0, 3), _MM_SHUFFLE(1, 0, 3, 2), _MM_SHUFFLE(0, 3, 2, 1) }; | ||||
|         println("\t_mm_store_ps({}.f32, _mm_blend_ps(_mm_load_ps({}.f32), _mm_permute_ps(_mm_load_ps({}.f32), {}), {}));", v(insn.operands[0]), v(insn.operands[0]), v(insn.operands[1]), shuffles[insn.operands[3]], insn.operands[2]); | ||||
|         constexpr size_t shuffles[] = { SIMDE_MM_SHUFFLE(3, 2, 1, 0), SIMDE_MM_SHUFFLE(2, 1, 0, 3), SIMDE_MM_SHUFFLE(1, 0, 3, 2), SIMDE_MM_SHUFFLE(0, 3, 2, 1) }; | ||||
|         println("\tsimde_mm_store_ps({}.f32, simde_mm_blend_ps(simde_mm_load_ps({}.f32), simde_mm_permute_ps(simde_mm_load_ps({}.f32), {}), {}));", v(insn.operands[0]), v(insn.operands[0]), v(insn.operands[1]), shuffles[insn.operands[3]], insn.operands[2]); | ||||
|         break; | ||||
|     } | ||||
| 
 | ||||
|  | @ -2061,11 +2084,11 @@ bool Recompiler::Recompile( | |||
|         // TODO: see if we can use rsqrt safely
 | ||||
|         // TODO: we can detect if the input is from a dot product and apply logic only on one value
 | ||||
|         printSetFlushMode(true); | ||||
|         println("\t_mm_store_ps({}.f32, _mm_div_ps(_mm_set1_ps(1), _mm_sqrt_ps(_mm_load_ps({}.f32))));", v(insn.operands[0]), v(insn.operands[1])); | ||||
|         println("\tsimde_mm_store_ps({}.f32, simde_mm_div_ps(simde_mm_set1_ps(1), simde_mm_sqrt_ps(simde_mm_load_ps({}.f32))));", v(insn.operands[0]), v(insn.operands[1])); | ||||
|         break; | ||||
| 
 | ||||
|     case PPC_INST_VSEL: | ||||
|         println("\t_mm_store_si128((__m128i*){}.u8, _mm_or_si128(_mm_andnot_si128(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8)), _mm_and_si128(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8))));", v(insn.operands[0]), v(insn.operands[3]), v(insn.operands[1]), v(insn.operands[3]), v(insn.operands[2])); | ||||
|         println("\tsimde_mm_store_si128((simde__m128i*){}.u8, simde_mm_or_si128(simde_mm_andnot_si128(simde_mm_load_si128((simde__m128i*){}.u8), simde_mm_load_si128((simde__m128i*){}.u8)), simde_mm_and_si128(simde_mm_load_si128((simde__m128i*){}.u8), simde_mm_load_si128((simde__m128i*){}.u8))));", v(insn.operands[0]), v(insn.operands[3]), v(insn.operands[1]), v(insn.operands[3]), v(insn.operands[2])); | ||||
|         break; | ||||
| 
 | ||||
|     case PPC_INST_VSLB: | ||||
|  | @ -2076,7 +2099,7 @@ bool Recompiler::Recompile( | |||
| 
 | ||||
|     case PPC_INST_VSLDOI: | ||||
|     case PPC_INST_VSLDOI128: | ||||
|         println("\t_mm_store_si128((__m128i*){}.u8, _mm_alignr_epi8(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8), {}));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2]), 16 - insn.operands[3]); | ||||
|         println("\tsimde_mm_store_si128((simde__m128i*){}.u8, simde_mm_alignr_epi8(simde_mm_load_si128((simde__m128i*){}.u8), simde_mm_load_si128((simde__m128i*){}.u8), {}));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2]), 16 - insn.operands[3]); | ||||
|         break; | ||||
| 
 | ||||
|     case PPC_INST_VSLW: | ||||
|  | @ -2090,7 +2113,7 @@ bool Recompiler::Recompile( | |||
|     { | ||||
|         // NOTE: accounting for full vector reversal here
 | ||||
|         uint32_t perm = 15 - insn.operands[2]; | ||||
|         println("\t_mm_store_si128((__m128i*){}.u8, _mm_shuffle_epi8(_mm_load_si128((__m128i*){}.u8), _mm_set1_epi8(char(0x{:X}))));", v(insn.operands[0]), v(insn.operands[1]), perm); | ||||
|         println("\tsimde_mm_store_si128((simde__m128i*){}.u8, simde_mm_shuffle_epi8(simde_mm_load_si128((simde__m128i*){}.u8), simde_mm_set1_epi8(char(0x{:X}))));", v(insn.operands[0]), v(insn.operands[1]), perm); | ||||
|         break; | ||||
|     } | ||||
| 
 | ||||
|  | @ -2099,17 +2122,17 @@ bool Recompiler::Recompile( | |||
|         // NOTE: accounting for full vector reversal here
 | ||||
|         uint32_t perm = 7 - insn.operands[2]; | ||||
|         perm = (perm * 2) | ((perm * 2 + 1) << 8); | ||||
|         println("\t_mm_store_si128((__m128i*){}.u16, _mm_shuffle_epi8(_mm_load_si128((__m128i*){}.u16), _mm_set1_epi16(short(0x{:X}))));", v(insn.operands[0]), v(insn.operands[1]), perm); | ||||
|         println("\tsimde_mm_store_si128((simde__m128i*){}.u16, simde_mm_shuffle_epi8(simde_mm_load_si128((simde__m128i*){}.u16), simde_mm_set1_epi16(short(0x{:X}))));", v(insn.operands[0]), v(insn.operands[1]), perm); | ||||
|         break; | ||||
|     } | ||||
| 
 | ||||
|     case PPC_INST_VSPLTISB: | ||||
|         println("\t_mm_store_si128((__m128i*){}.u8, _mm_set1_epi8(char(0x{:X})));", v(insn.operands[0]), insn.operands[1]); | ||||
|         println("\tsimde_mm_store_si128((simde__m128i*){}.u8, simde_mm_set1_epi8(char(0x{:X})));", v(insn.operands[0]), insn.operands[1]); | ||||
|         break; | ||||
| 
 | ||||
|     case PPC_INST_VSPLTISW: | ||||
|     case PPC_INST_VSPLTISW128: | ||||
|         println("\t_mm_store_si128((__m128i*){}.u32, _mm_set1_epi32(int(0x{:X})));", v(insn.operands[0]), insn.operands[1]); | ||||
|         println("\tsimde_mm_store_si128((simde__m128i*){}.u32, simde_mm_set1_epi32(int(0x{:X})));", v(insn.operands[0]), insn.operands[1]); | ||||
|         break; | ||||
| 
 | ||||
|     case PPC_INST_VSPLTW: | ||||
|  | @ -2118,12 +2141,12 @@ bool Recompiler::Recompile( | |||
|         // NOTE: accounting for full vector reversal here
 | ||||
|         uint32_t perm = 3 - insn.operands[2]; | ||||
|         perm |= (perm << 2) | (perm << 4) | (perm << 6); | ||||
|         println("\t_mm_store_si128((__m128i*){}.u32, _mm_shuffle_epi32(_mm_load_si128((__m128i*){}.u32), 0x{:X}));", v(insn.operands[0]), v(insn.operands[1]), perm); | ||||
|         println("\tsimde_mm_store_si128((simde__m128i*){}.u32, simde_mm_shuffle_epi32(simde_mm_load_si128((simde__m128i*){}.u32), 0x{:X}));", v(insn.operands[0]), v(insn.operands[1]), perm); | ||||
|         break; | ||||
|     } | ||||
| 
 | ||||
|     case PPC_INST_VSR: | ||||
|         println("\t_mm_store_si128((__m128i*){}.u8, _mm_vsr(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); | ||||
|         println("\tsimde_mm_store_si128((simde__m128i*){}.u8, simde_mm_vsr(simde_mm_load_si128((simde__m128i*){}.u8), simde_mm_load_si128((simde__m128i*){}.u8)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); | ||||
|         break; | ||||
| 
 | ||||
|     case PPC_INST_VSRAW: | ||||
|  | @ -2143,7 +2166,7 @@ bool Recompiler::Recompile( | |||
|     case PPC_INST_VSUBFP: | ||||
|     case PPC_INST_VSUBFP128: | ||||
|         printSetFlushMode(true); | ||||
|         println("\t_mm_store_ps({}.f32, _mm_sub_ps(_mm_load_ps({}.f32), _mm_load_ps({}.f32)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); | ||||
|         println("\tsimde_mm_store_ps({}.f32, simde_mm_sub_ps(simde_mm_load_ps({}.f32), simde_mm_load_ps({}.f32)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); | ||||
|         break; | ||||
| 
 | ||||
|     case PPC_INST_VSUBSWS: | ||||
|  | @ -2156,11 +2179,11 @@ bool Recompiler::Recompile( | |||
|         break; | ||||
| 
 | ||||
|     case PPC_INST_VSUBUBS: | ||||
|         println("\t_mm_store_si128((__m128i*){}.u8, _mm_subs_epu8(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); | ||||
|         println("\tsimde_mm_store_si128((simde__m128i*){}.u8, simde_mm_subs_epu8(simde_mm_load_si128((simde__m128i*){}.u8), simde_mm_load_si128((simde__m128i*){}.u8)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); | ||||
|         break; | ||||
| 
 | ||||
|     case PPC_INST_VSUBUHM: | ||||
|         println("\t_mm_store_si128((__m128i*){}.u8, _mm_sub_epi16(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); | ||||
|         println("\tsimde_mm_store_si128((simde__m128i*){}.u8, simde_mm_sub_epi16(simde_mm_load_si128((simde__m128i*){}.u8), simde_mm_load_si128((simde__m128i*){}.u8)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); | ||||
|         break; | ||||
| 
 | ||||
|     case PPC_INST_VUPKD3D128: | ||||
|  | @ -2197,32 +2220,32 @@ bool Recompiler::Recompile( | |||
| 
 | ||||
|     case PPC_INST_VUPKHSB: | ||||
|     case PPC_INST_VUPKHSB128: | ||||
|         println("\t_mm_store_si128((__m128i*){}.s16, _mm_cvtepi8_epi16(_mm_unpackhi_epi64(_mm_load_si128((__m128i*){}.s8), _mm_load_si128((__m128i*){}.s8))));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[1])); | ||||
|         println("\tsimde_mm_store_si128((simde__m128i*){}.s16, simde_mm_cvtepi8_epi16(simde_mm_unpackhi_epi64(simde_mm_load_si128((simde__m128i*){}.s8), simde_mm_load_si128((simde__m128i*){}.s8))));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[1])); | ||||
|         break; | ||||
| 
 | ||||
|     case PPC_INST_VUPKHSH: | ||||
|     case PPC_INST_VUPKHSH128: | ||||
|         println("\t_mm_store_si128((__m128i*){}.s32, _mm_cvtepi16_epi32(_mm_unpackhi_epi64(_mm_load_si128((__m128i*){}.s16), _mm_load_si128((__m128i*){}.s16))));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[1])); | ||||
|         println("\tsimde_mm_store_si128((simde__m128i*){}.s32, simde_mm_cvtepi16_epi32(simde_mm_unpackhi_epi64(simde_mm_load_si128((simde__m128i*){}.s16), simde_mm_load_si128((simde__m128i*){}.s16))));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[1])); | ||||
|         break; | ||||
| 
 | ||||
|     case PPC_INST_VUPKLSB: | ||||
|     case PPC_INST_VUPKLSB128: | ||||
|         println("\t_mm_store_si128((__m128i*){}.s32, _mm_cvtepi8_epi16(_mm_load_si128((__m128i*){}.s16)));", v(insn.operands[0]), v(insn.operands[1])); | ||||
|         println("\tsimde_mm_store_si128((simde__m128i*){}.s32, simde_mm_cvtepi8_epi16(simde_mm_load_si128((simde__m128i*){}.s16)));", v(insn.operands[0]), v(insn.operands[1])); | ||||
|         break; | ||||
| 
 | ||||
|     case PPC_INST_VUPKLSH: | ||||
|     case PPC_INST_VUPKLSH128: | ||||
|         println("\t_mm_store_si128((__m128i*){}.s32, _mm_cvtepi16_epi32(_mm_load_si128((__m128i*){}.s16)));", v(insn.operands[0]), v(insn.operands[1])); | ||||
|         println("\tsimde_mm_store_si128((simde__m128i*){}.s32, simde_mm_cvtepi16_epi32(simde_mm_load_si128((simde__m128i*){}.s16)));", v(insn.operands[0]), v(insn.operands[1])); | ||||
|         break; | ||||
| 
 | ||||
|     case PPC_INST_VXOR: | ||||
|     case PPC_INST_VXOR128: | ||||
|         print("\t_mm_store_si128((__m128i*){}.u8, ", v(insn.operands[0])); | ||||
|         print("\tsimde_mm_store_si128((simde__m128i*){}.u8, ", v(insn.operands[0])); | ||||
| 
 | ||||
|         if (insn.operands[1] != insn.operands[2]) | ||||
|             println("_mm_xor_si128(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8)));", v(insn.operands[1]), v(insn.operands[2])); | ||||
|             println("simde_mm_xor_si128(simde_mm_load_si128((simde__m128i*){}.u8), simde_mm_load_si128((simde__m128i*){}.u8)));", v(insn.operands[1]), v(insn.operands[2])); | ||||
|         else | ||||
|             println("_mm_setzero_si128());"); | ||||
|             println("simde_mm_setzero_si128());"); | ||||
| 
 | ||||
|         break; | ||||
| 
 | ||||
|  |  | |||
|  | @ -17,8 +17,9 @@ target_compile_definitions(XenonUtils | |||
| ) | ||||
| 
 | ||||
| target_include_directories(XenonUtils  | ||||
|     PUBLIC  | ||||
|     PUBLIC | ||||
|         . | ||||
|         "${THIRDPARTY_ROOT}/simde" | ||||
|     PRIVATE | ||||
|         "${THIRDPARTY_ROOT}/libmspack/libmspack/mspack" | ||||
|         "${THIRDPARTY_ROOT}/tiny-AES-c" | ||||
|  |  | |||
|  | @ -12,13 +12,13 @@ | |||
| #include <cstdlib> | ||||
| #include <cstring> | ||||
| 
 | ||||
| #include <x86intrin.h> | ||||
| #include <x86/avx.h> | ||||
| #include <x86/sse.h> | ||||
| #include <x86/sse4.1.h> | ||||
| 
 | ||||
| #ifdef _WIN32 | ||||
| #include <intrin.h> | ||||
| #else | ||||
| #include <xmmintrin.h> | ||||
| #include <smmintrin.h> | ||||
| // SSE3 constants are missing from simde
 | ||||
| #ifndef _MM_DENORMALS_ZERO_MASK | ||||
| #define _MM_DENORMALS_ZERO_MASK 0x0040 | ||||
| #endif | ||||
| 
 | ||||
| #define PPC_JOIN(x, y) x##y | ||||
|  | @ -29,7 +29,7 @@ | |||
| #define PPC_EXTERN_FUNC(x) extern PPC_FUNC(x) | ||||
| #define PPC_WEAK_FUNC(x) __attribute__((weak,noinline)) PPC_FUNC(x) | ||||
| 
 | ||||
| #define PPC_FUNC_PROLOGUE() __builtin_assume(((size_t)base & 0xFFFFFFFF) == 0) | ||||
| #define PPC_FUNC_PROLOGUE() __builtin_assume(((size_t)base & 0x1F) == 0) | ||||
| 
 | ||||
| #ifndef PPC_LOAD_U8 | ||||
| #define PPC_LOAD_U8(x) *(volatile uint8_t*)(base + (x)) | ||||
|  | @ -123,21 +123,18 @@ struct PPCFuncMapping | |||
| 
 | ||||
| extern PPCFuncMapping PPCFuncMappings[]; | ||||
| 
 | ||||
| struct PPCRegister | ||||
| union PPCRegister | ||||
| { | ||||
|     union | ||||
|     { | ||||
|         int8_t s8; | ||||
|         uint8_t u8; | ||||
|         int16_t s16; | ||||
|         uint16_t u16; | ||||
|         int32_t s32; | ||||
|         uint32_t u32; | ||||
|         int64_t s64; | ||||
|         uint64_t u64; | ||||
|         float f32; | ||||
|         double f64; | ||||
|     }; | ||||
|     int8_t s8; | ||||
|     uint8_t u8; | ||||
|     int16_t s16; | ||||
|     uint16_t u16; | ||||
|     int32_t s32; | ||||
|     uint32_t u32; | ||||
|     int64_t s64; | ||||
|     uint64_t u64; | ||||
|     float f32; | ||||
|     double f64; | ||||
| }; | ||||
| 
 | ||||
| struct PPCXERRegister | ||||
|  | @ -175,18 +172,18 @@ struct PPCCRRegister | |||
|         eq = !un && (left == right); | ||||
|     } | ||||
| 
 | ||||
|     inline void setFromMask(__m128 mask, int imm) noexcept | ||||
|     inline void setFromMask(simde__m128 mask, int imm) noexcept | ||||
|     { | ||||
|         int m = _mm_movemask_ps(mask); | ||||
|         int m = simde_mm_movemask_ps(mask); | ||||
|         lt = m == imm; // all equal
 | ||||
|         gt = 0; | ||||
|         eq = m == 0; // none equal
 | ||||
|         so = 0; | ||||
|     } | ||||
| 
 | ||||
|     inline void setFromMask(__m128i mask, int imm) noexcept | ||||
|     inline void setFromMask(simde__m128i mask, int imm) noexcept | ||||
|     { | ||||
|         int m = _mm_movemask_epi8(mask); | ||||
|         int m = simde_mm_movemask_epi8(mask); | ||||
|         lt = m == imm; // all equal
 | ||||
|         gt = 0; | ||||
|         eq = m == 0; // none equal
 | ||||
|  | @ -194,21 +191,18 @@ struct PPCCRRegister | |||
|     } | ||||
| }; | ||||
| 
 | ||||
| struct alignas(0x10) PPCVRegister | ||||
| union alignas(0x10) PPCVRegister | ||||
| { | ||||
|     union | ||||
|     { | ||||
|         int8_t s8[16]; | ||||
|         uint8_t u8[16]; | ||||
|         int16_t s16[8]; | ||||
|         uint16_t u16[8]; | ||||
|         int32_t s32[4]; | ||||
|         uint32_t u32[4]; | ||||
|         int64_t s64[2]; | ||||
|         uint64_t u64[2]; | ||||
|         float f32[4]; | ||||
|         double f64[2]; | ||||
|     }; | ||||
|     int8_t s8[16]; | ||||
|     uint8_t u8[16]; | ||||
|     int16_t s16[8]; | ||||
|     uint16_t u16[8]; | ||||
|     int32_t s32[4]; | ||||
|     uint32_t u32[4]; | ||||
|     int64_t s64[2]; | ||||
|     uint64_t u64[2]; | ||||
|     float f32[4]; | ||||
|     double f64[2]; | ||||
| }; | ||||
| 
 | ||||
| #define PPC_ROUND_NEAREST 0x00 | ||||
|  | @ -221,34 +215,71 @@ struct PPCFPSCRRegister | |||
| { | ||||
|     uint32_t csr; | ||||
| 
 | ||||
|     static constexpr size_t GuestToHost[] = { _MM_ROUND_NEAREST, _MM_ROUND_TOWARD_ZERO, _MM_ROUND_UP, _MM_ROUND_DOWN }; | ||||
|     static constexpr size_t HostToGuest[] = { PPC_ROUND_NEAREST, PPC_ROUND_DOWN, PPC_ROUND_UP, PPC_ROUND_TOWARD_ZERO }; | ||||
| 
 | ||||
|     // simde does not handle denormal flags, so we need to implement per-arch.
 | ||||
| #if defined(__x86_64__) || defined(_M_X64) | ||||
|     static constexpr size_t RoundShift = 13; | ||||
|     static constexpr size_t RoundMask = SIMDE_MM_ROUND_MASK; | ||||
|     static constexpr size_t FlushMask = SIMDE_MM_FLUSH_ZERO_MASK | _MM_DENORMALS_ZERO_MASK; | ||||
|     static constexpr size_t GuestToHost[] = { SIMDE_MM_ROUND_NEAREST, SIMDE_MM_ROUND_TOWARD_ZERO, SIMDE_MM_ROUND_UP, SIMDE_MM_ROUND_DOWN }; | ||||
| 
 | ||||
|     inline uint32_t getcsr() noexcept | ||||
|     { | ||||
|         return simde_mm_getcsr(); | ||||
|     } | ||||
| 
 | ||||
|     inline void setcsr(uint32_t csr) noexcept | ||||
|     { | ||||
|         simde_mm_setcsr(csr); | ||||
|     } | ||||
| #elif defined(__aarch64__) || defined(_M_ARM64) | ||||
|     // RMode
 | ||||
|     static constexpr size_t RoundShift = 22; | ||||
|     static constexpr size_t RoundMask = 3 << RoundShift; | ||||
|     // FZ and FZ16
 | ||||
|     static constexpr size_t FlushMask = (1 << 19) | (1 << 24); | ||||
|     // Nearest, Zero, -Infinity, -Infinity
 | ||||
|     static constexpr size_t GuestToHost[] = { 0 << RoundShift, 3 << RoundShift, 1 << RoundShift, 2 << RoundShift }; | ||||
| 
 | ||||
|     inline uint32_t getcsr() noexcept | ||||
|     { | ||||
|         uint64_t csr; | ||||
|         __asm__ __volatile__("mrs %0, fpcr" : "=r"(csr)); | ||||
|         return csr; | ||||
|     } | ||||
| 
 | ||||
|     inline void setcsr(uint32_t csr) noexcept | ||||
|     { | ||||
|         __asm__ __volatile__("msr fpcr, %0" : : "r"(csr)); | ||||
|     } | ||||
| #else | ||||
| #   error "Missing implementation for FPSCR." | ||||
| #endif | ||||
| 
 | ||||
|     inline uint32_t loadFromHost() noexcept | ||||
|     { | ||||
|         csr = _mm_getcsr(); | ||||
|         return HostToGuest[(csr & _MM_ROUND_MASK) >> 13]; | ||||
|         csr = getcsr(); | ||||
|         return HostToGuest[(csr & RoundMask) >> RoundShift]; | ||||
|     } | ||||
|          | ||||
|     inline void storeFromGuest(uint32_t value) noexcept | ||||
|     { | ||||
|         csr &= ~_MM_ROUND_MASK; | ||||
|         csr &= ~RoundMask; | ||||
|         csr |= GuestToHost[value & PPC_ROUND_MASK]; | ||||
|         _mm_setcsr(csr); | ||||
|         setcsr(csr); | ||||
|     } | ||||
| 
 | ||||
|     static constexpr size_t FlushMask = _MM_FLUSH_ZERO_MASK | _MM_DENORMALS_ZERO_MASK; | ||||
| 
 | ||||
|     inline void enableFlushModeUnconditional() noexcept | ||||
|     { | ||||
|         csr |= FlushMask; | ||||
|         _mm_setcsr(csr); | ||||
|         setcsr(csr); | ||||
|     } | ||||
| 
 | ||||
|     inline void disableFlushModeUnconditional() noexcept | ||||
|     { | ||||
|         csr &= ~FlushMask; | ||||
|         _mm_setcsr(csr); | ||||
|         setcsr(csr); | ||||
|     } | ||||
| 
 | ||||
|     inline void enableFlushMode() noexcept | ||||
|  | @ -256,7 +287,7 @@ struct PPCFPSCRRegister | |||
|         if ((csr & FlushMask) != FlushMask) [[unlikely]] | ||||
|         { | ||||
|             csr |= FlushMask; | ||||
|             _mm_setcsr(csr); | ||||
|             setcsr(csr); | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|  | @ -265,12 +296,12 @@ struct PPCFPSCRRegister | |||
|         if ((csr & FlushMask) != 0) [[unlikely]] | ||||
|         { | ||||
|             csr &= ~FlushMask; | ||||
|             _mm_setcsr(csr); | ||||
|             setcsr(csr); | ||||
|         } | ||||
|     } | ||||
| }; | ||||
| 
 | ||||
| struct PPCContext | ||||
| struct alignas(0x40) PPCContext | ||||
| { | ||||
|     PPCRegister r3; | ||||
| #ifndef PPC_CONFIG_NON_ARGUMENT_AS_LOCAL | ||||
|  | @ -593,68 +624,80 @@ inline uint8_t VectorShiftTableR[] = | |||
|     0x10, 0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, | ||||
| }; | ||||
| 
 | ||||
| inline __m128i _mm_adds_epu32(__m128i a, __m128i b)  | ||||
| inline simde__m128i simde_mm_adds_epu32(simde__m128i a, simde__m128i b) | ||||
| { | ||||
|     return _mm_add_epi32(a, _mm_min_epu32(_mm_xor_si128(a, _mm_cmpeq_epi32(a, a)), b)); | ||||
|     return simde_mm_add_epi32(a, simde_mm_min_epu32(simde_mm_xor_si128(a, simde_mm_cmpeq_epi32(a, a)), b)); | ||||
| } | ||||
| 
 | ||||
| inline __m128i _mm_avg_epi8(__m128i a, __m128i b) | ||||
| inline simde__m128i simde_mm_avg_epi8(simde__m128i a, simde__m128i b) | ||||
| { | ||||
|     __m128i c = _mm_set1_epi8(char(128)); | ||||
|     return _mm_xor_si128(c, _mm_avg_epu8(_mm_xor_si128(c, a), _mm_xor_si128(c, b))); | ||||
|     simde__m128i c = simde_mm_set1_epi8(char(128)); | ||||
|     return simde_mm_xor_si128(c, simde_mm_avg_epu8(simde_mm_xor_si128(c, a), simde_mm_xor_si128(c, b))); | ||||
| } | ||||
| 
 | ||||
| inline __m128i _mm_avg_epi16(__m128i a, __m128i b) | ||||
| inline simde__m128i simde_mm_avg_epi16(simde__m128i a, simde__m128i b) | ||||
| { | ||||
|     __m128i c = _mm_set1_epi16(short(32768)); | ||||
|     return _mm_xor_si128(c, _mm_avg_epu16(_mm_xor_si128(c, a), _mm_xor_si128(c, b))); | ||||
|     simde__m128i c = simde_mm_set1_epi16(short(32768)); | ||||
|     return simde_mm_xor_si128(c, simde_mm_avg_epu16(simde_mm_xor_si128(c, a), simde_mm_xor_si128(c, b))); | ||||
| } | ||||
| 
 | ||||
| inline __m128 _mm_cvtepu32_ps_(__m128i src1) | ||||
| inline simde__m128 simde_mm_cvtepu32_ps_(simde__m128i src1) | ||||
| { | ||||
|     __m128i xmm1 = _mm_add_epi32(src1, _mm_set1_epi32(127)); | ||||
|     __m128i xmm0 = _mm_slli_epi32(src1, 31 - 8); | ||||
|     xmm0 = _mm_srli_epi32(xmm0, 31); | ||||
|     xmm0 = _mm_add_epi32(xmm0, xmm1); | ||||
|     xmm0 = _mm_srai_epi32(xmm0, 8); | ||||
|     xmm0 = _mm_add_epi32(xmm0, _mm_set1_epi32(0x4F800000)); | ||||
|     __m128 xmm2 = _mm_cvtepi32_ps(src1); | ||||
|     return _mm_blendv_ps(xmm2, _mm_castsi128_ps(xmm0), _mm_castsi128_ps(src1)); | ||||
|     simde__m128i xmm1 = simde_mm_add_epi32(src1, simde_mm_set1_epi32(127)); | ||||
|     simde__m128i xmm0 = simde_mm_slli_epi32(src1, 31 - 8); | ||||
|     xmm0 = simde_mm_srli_epi32(xmm0, 31); | ||||
|     xmm0 = simde_mm_add_epi32(xmm0, xmm1); | ||||
|     xmm0 = simde_mm_srai_epi32(xmm0, 8); | ||||
|     xmm0 = simde_mm_add_epi32(xmm0, simde_mm_set1_epi32(0x4F800000)); | ||||
|     simde__m128 xmm2 = simde_mm_cvtepi32_ps(src1); | ||||
|     return simde_mm_blendv_ps(xmm2, simde_mm_castsi128_ps(xmm0), simde_mm_castsi128_ps(src1)); | ||||
| } | ||||
| 
 | ||||
| inline __m128i _mm_perm_epi8_(__m128i a, __m128i b, __m128i c) | ||||
| inline simde__m128i simde_mm_perm_epi8_(simde__m128i a, simde__m128i b, simde__m128i c) | ||||
| { | ||||
|     __m128i d = _mm_set1_epi8(0xF); | ||||
|     __m128i e = _mm_sub_epi8(d, _mm_and_si128(c, d)); | ||||
|     return _mm_blendv_epi8(_mm_shuffle_epi8(a, e), _mm_shuffle_epi8(b, e), _mm_slli_epi32(c, 3)); | ||||
|     simde__m128i d = simde_mm_set1_epi8(0xF); | ||||
|     simde__m128i e = simde_mm_sub_epi8(d, simde_mm_and_si128(c, d)); | ||||
|     return simde_mm_blendv_epi8(simde_mm_shuffle_epi8(a, e), simde_mm_shuffle_epi8(b, e), simde_mm_slli_epi32(c, 3)); | ||||
| } | ||||
| 
 | ||||
| inline __m128i _mm_cmpgt_epu8(__m128i a, __m128i b) | ||||
| inline simde__m128i simde_mm_cmpgt_epu8(simde__m128i a, simde__m128i b) | ||||
| { | ||||
|     __m128i c = _mm_set1_epi8(char(128)); | ||||
|     return _mm_cmpgt_epi8(_mm_xor_si128(a, c), _mm_xor_si128(b, c)); | ||||
|     simde__m128i c = simde_mm_set1_epi8(char(128)); | ||||
|     return simde_mm_cmpgt_epi8(simde_mm_xor_si128(a, c), simde_mm_xor_si128(b, c)); | ||||
| } | ||||
| 
 | ||||
| inline __m128i _mm_cmpgt_epu16(__m128i a, __m128i b) | ||||
| inline simde__m128i simde_mm_cmpgt_epu16(simde__m128i a, simde__m128i b) | ||||
| { | ||||
|     __m128i c = _mm_set1_epi16(short(32768)); | ||||
|     return _mm_cmpgt_epi16(_mm_xor_si128(a, c), _mm_xor_si128(b, c)); | ||||
|     simde__m128i c = simde_mm_set1_epi16(short(32768)); | ||||
|     return simde_mm_cmpgt_epi16(simde_mm_xor_si128(a, c), simde_mm_xor_si128(b, c)); | ||||
| } | ||||
| 
 | ||||
| inline __m128i _mm_vctsxs(__m128 src1) | ||||
| inline simde__m128i simde_mm_vctsxs(simde__m128 src1) | ||||
| { | ||||
|     __m128 xmm2 = _mm_cmpunord_ps(src1, src1); | ||||
|     __m128i xmm0 = _mm_cvttps_epi32(src1); | ||||
|     __m128i xmm1 = _mm_cmpeq_epi32(xmm0, _mm_set1_epi32(INT_MIN)); | ||||
|     xmm1 = _mm_andnot_si128(_mm_castps_si128(src1), xmm1); | ||||
|     __m128 dest = _mm_blendv_ps(_mm_castsi128_ps(xmm0), _mm_castsi128_ps(_mm_set1_epi32(INT_MAX)), _mm_castsi128_ps(xmm1)); | ||||
|     return _mm_andnot_si128(_mm_castps_si128(xmm2), _mm_castps_si128(dest)); | ||||
|     simde__m128 xmm2 = simde_mm_cmpunord_ps(src1, src1); | ||||
|     simde__m128i xmm0 = simde_mm_cvttps_epi32(src1); | ||||
|     simde__m128i xmm1 = simde_mm_cmpeq_epi32(xmm0, simde_mm_set1_epi32(INT_MIN)); | ||||
|     xmm1 = simde_mm_andnot_si128(simde_mm_castps_si128(src1), xmm1); | ||||
|     simde__m128 dest = simde_mm_blendv_ps(simde_mm_castsi128_ps(xmm0), simde_mm_castsi128_ps(simde_mm_set1_epi32(INT_MAX)), simde_mm_castsi128_ps(xmm1)); | ||||
|     return simde_mm_andnot_si128(simde_mm_castps_si128(xmm2), simde_mm_castps_si128(dest)); | ||||
| } | ||||
| 
 | ||||
| inline __m128i _mm_vsr(__m128i a, __m128i b) | ||||
| inline simde__m128i simde_mm_vsr(simde__m128i a, simde__m128i b) | ||||
| { | ||||
|     b = _mm_srli_epi64(_mm_slli_epi64(b, 61), 61); | ||||
|     return _mm_castps_si128(_mm_insert_ps(_mm_castsi128_ps(_mm_srl_epi64(a, b)), _mm_castsi128_ps(_mm_srl_epi64(_mm_srli_si128(a, 4), b)), 0x10)); | ||||
|     b = simde_mm_srli_epi64(simde_mm_slli_epi64(b, 61), 61); | ||||
|     return simde_mm_castps_si128(simde_mm_insert_ps(simde_mm_castsi128_ps(simde_mm_srl_epi64(a, b)), simde_mm_castsi128_ps(simde_mm_srl_epi64(simde_mm_srli_si128(a, 4), b)), 0x10)); | ||||
| } | ||||
| 
 | ||||
| #if defined(__aarch64__) || defined(_M_ARM64) | ||||
| inline uint64_t __rdtsc() | ||||
| { | ||||
|     uint64_t ret; | ||||
|     asm volatile("mrs %0, cntvct_el0\n\t" | ||||
|                  : "=r"(ret)::"memory"); | ||||
|     return ret; | ||||
| } | ||||
| #elif !defined(__x86_64__) && !defined(_M_X64) | ||||
| #   error "Missing implementation for __rdtsc()" | ||||
| #endif | ||||
| 
 | ||||
| #endif | ||||
|  |  | |||
|  | @ -5,6 +5,8 @@ | |||
| #include <vector> | ||||
| #include <unordered_map> | ||||
| #include <aes.hpp> | ||||
| #include <TinySHA1.hpp> | ||||
| #include <xex_patcher.h> | ||||
| 
 | ||||
| #define STRINGIFY(X) #X | ||||
| #define XE_EXPORT(MODULE, ORDINAL, NAME, TYPE) { (ORDINAL), "__imp__" STRINGIFY(NAME) } | ||||
|  | @ -135,7 +137,7 @@ Image Xex2LoadImage(const uint8_t* data, size_t dataSize) | |||
|     // Decompress image
 | ||||
|     if (fileFormatInfo != nullptr) | ||||
|     { | ||||
|         assert(fileFormatInfo->compressionType <= XEX_COMPRESSION_BASIC); | ||||
|         assert(fileFormatInfo->compressionType <= XEX_COMPRESSION_NORMAL); | ||||
| 
 | ||||
|         std::unique_ptr<uint8_t[]> decryptedData; | ||||
|         const uint8_t* srcData = nullptr; | ||||
|  | @ -192,6 +194,67 @@ Image Xex2LoadImage(const uint8_t* data, size_t dataSize) | |||
|                 destData += blocks[i].zeroSize; | ||||
|             } | ||||
|         } | ||||
|         else if (fileFormatInfo->compressionType == XEX_COMPRESSION_NORMAL) | ||||
|         { | ||||
|             result = std::make_unique<uint8_t[]>(imageSize); | ||||
|             auto* destData = result.get(); | ||||
| 
 | ||||
|             const Xex2CompressedBlockInfo* blocks = &((const Xex2FileNormalCompressionInfo*)(fileFormatInfo + 1))->firstBlock; | ||||
|             const uint32_t headerSize = header->headerSize.get(); | ||||
| 
 | ||||
|             const uint32_t exeLength = dataSize - headerSize; | ||||
|             const uint8_t* exeBuffer = srcData; | ||||
| 
 | ||||
|             auto compressBuffer = std::make_unique<uint8_t[]>(exeLength); | ||||
|             const uint8_t* p = NULL; | ||||
|             uint8_t* d = NULL; | ||||
|             sha1::SHA1 s; | ||||
| 
 | ||||
|             p = exeBuffer; | ||||
|             d = compressBuffer.get(); | ||||
| 
 | ||||
|             uint8_t blockCalcedDigest[0x14]; | ||||
|             while (blocks->blockSize)  | ||||
|             { | ||||
|                 const uint8_t* pNext = p + blocks->blockSize; | ||||
|                 const auto* nextBlock = (const Xex2CompressedBlockInfo*)p; | ||||
| 
 | ||||
|                 s.reset(); | ||||
|                 s.processBytes(p, blocks->blockSize); | ||||
|                 s.finalize(blockCalcedDigest); | ||||
| 
 | ||||
|                 if (memcmp(blockCalcedDigest, blocks->blockHash, 0x14) != 0) | ||||
|                     return {}; | ||||
| 
 | ||||
|                 p += 4; | ||||
|                 p += 20; | ||||
| 
 | ||||
|                 while (true)  | ||||
|                 { | ||||
|                     const size_t chunkSize = (p[0] << 8) | p[1]; | ||||
|                     p += 2; | ||||
| 
 | ||||
|                     if (!chunkSize) | ||||
|                         break; | ||||
| 
 | ||||
|                     memcpy(d, p, chunkSize); | ||||
|                     p += chunkSize; | ||||
|                     d += chunkSize; | ||||
|                 } | ||||
| 
 | ||||
|                 p = pNext; | ||||
|                 blocks = nextBlock; | ||||
|             } | ||||
| 
 | ||||
|             int resultCode = 0; | ||||
|             uint32_t uncompressedSize = security->imageSize; | ||||
|             uint8_t* buffer = destData; | ||||
| 
 | ||||
|             resultCode = lzxDecompress(compressBuffer.get(), d - compressBuffer.get(), buffer, uncompressedSize, ((const Xex2FileNormalCompressionInfo*)(fileFormatInfo + 1))->windowSize, nullptr, 0); | ||||
| 
 | ||||
|             if (resultCode) | ||||
|                 return {}; | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     image.data = std::move(result); | ||||
|  | @ -201,8 +264,17 @@ Image Xex2LoadImage(const uint8_t* data, size_t dataSize) | |||
|     const auto* dosHeader = reinterpret_cast<IMAGE_DOS_HEADER*>(image.data.get()); | ||||
|     const auto* ntHeaders = reinterpret_cast<IMAGE_NT_HEADERS32*>(image.data.get() + dosHeader->e_lfanew); | ||||
| 
 | ||||
|     image.base = ntHeaders->OptionalHeader.ImageBase; | ||||
|     image.entry_point = image.base + ntHeaders->OptionalHeader.AddressOfEntryPoint; | ||||
|     image.base = security->loadAddress; | ||||
|     const void* xex2BaseAddressPtr = getOptHeaderPtr(data, XEX_HEADER_IMAGE_BASE_ADDRESS); | ||||
|     if (xex2BaseAddressPtr != nullptr) | ||||
|     { | ||||
|         image.base = *reinterpret_cast<const be<uint32_t>*>(xex2BaseAddressPtr); | ||||
|     } | ||||
|     const void* xex2EntryPointPtr = getOptHeaderPtr(data, XEX_HEADER_ENTRY_POINT); | ||||
|     if (xex2EntryPointPtr != nullptr) | ||||
|     { | ||||
|         image.entry_point = *reinterpret_cast<const be<uint32_t>*>(xex2EntryPointPtr); | ||||
|     } | ||||
| 
 | ||||
|     const auto numSections = ntHeaders->FileHeader.NumberOfSections; | ||||
|     const auto* sections = reinterpret_cast<const IMAGE_SECTION_HEADER*>(ntHeaders + 1); | ||||
|  | @ -227,10 +299,13 @@ Image Xex2LoadImage(const uint8_t* data, size_t dataSize) | |||
|         std::vector<std::string_view> stringTable; | ||||
|         auto* pStrTable = reinterpret_cast<const char*>(imports + 1); | ||||
| 
 | ||||
|         size_t paddedStringOffset = 0; | ||||
|         for (size_t i = 0; i < imports->numImports; i++) | ||||
|         { | ||||
|             stringTable.emplace_back(pStrTable); | ||||
|             pStrTable += strlen(pStrTable) + 1; | ||||
|             stringTable.emplace_back(pStrTable + paddedStringOffset); | ||||
|              | ||||
|             // pad the offset to the next multiple of 4
 | ||||
|             paddedStringOffset += ((stringTable.back().length() + 1) + 3) & ~3; | ||||
|         } | ||||
| 
 | ||||
|         auto* library = (Xex2ImportLibrary*)(((char*)imports) + sizeof(Xex2ImportHeader) + imports->sizeOfStringTable); | ||||
|  |  | |||
|  | @ -245,13 +245,17 @@ inline const void* getOptHeaderPtr(const uint8_t* moduleBytes, uint32_t headerKe | |||
|         const Xex2OptHeader& optHeader = ((const Xex2OptHeader*)(xex2Header + 1))[i]; | ||||
|         if (optHeader.key == headerKey) | ||||
|         { | ||||
|             if ((headerKey & 0xFF) == 0) | ||||
|             if((headerKey & 0xFF) == 0) | ||||
|             { | ||||
|                 return &optHeader.value; | ||||
|                 return reinterpret_cast<const uint32_t *>(&optHeader.value); | ||||
|             } | ||||
|             else if ((headerKey & 0xFF) == 1) | ||||
|             { | ||||
|                 return reinterpret_cast<const void *>(&optHeader.value); | ||||
|             } | ||||
|             else | ||||
|             { | ||||
|                 return &moduleBytes[optHeader.offset]; | ||||
|                 return reinterpret_cast<const void *>(reinterpret_cast<uintptr_t>(moduleBytes) + optHeader.offset); | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|  |  | |||
|  | @ -141,7 +141,7 @@ inline bool bitScanForward(uint64_t v, uint32_t *outFirstSetIndex) | |||
| } | ||||
| #endif | ||||
| 
 | ||||
| static int lzxDecompress(const void *lzxData, size_t lzxLength, void *dst, size_t dstLength, uint32_t windowSize, void *windowData, size_t windowDataLength) | ||||
| int lzxDecompress(const void *lzxData, size_t lzxLength, void *dst, size_t dstLength, uint32_t windowSize, void *windowData, size_t windowDataLength) | ||||
| { | ||||
|     int resultCode = 1; | ||||
|     uint32_t windowBits; | ||||
|  | @ -403,7 +403,63 @@ XexPatcher::Result XexPatcher::apply(const uint8_t* xexBytes, size_t xexBytesSiz | |||
|             memmove(outDataCursor, srcDataCursor, blocks[i].dataSize); | ||||
|         } | ||||
|     } | ||||
|     else if (fileFormatInfo->compressionType == XEX_COMPRESSION_NORMAL || fileFormatInfo->compressionType == XEX_COMPRESSION_DELTA) | ||||
|     else if (fileFormatInfo->compressionType == XEX_COMPRESSION_NORMAL) | ||||
|     { | ||||
|         const Xex2CompressedBlockInfo* blocks = &((const Xex2FileNormalCompressionInfo*)(fileFormatInfo + 1))->firstBlock; | ||||
|         const uint32_t exeLength = xexBytesSize - xexHeader->headerSize.get(); | ||||
|         const uint8_t* exeBuffer = &outBytes[headerTargetSize]; | ||||
| 
 | ||||
|         auto compressBuffer = std::make_unique<uint8_t[]>(exeLength); | ||||
|         const uint8_t* p = NULL; | ||||
|         uint8_t* d = NULL; | ||||
|         sha1::SHA1 s; | ||||
| 
 | ||||
|         p = exeBuffer; | ||||
|         d = compressBuffer.get(); | ||||
| 
 | ||||
|         uint8_t blockCalcedDigest[0x14]; | ||||
|         while (blocks->blockSize)  | ||||
|         { | ||||
|             const uint8_t* pNext = p + blocks->blockSize; | ||||
|             const auto* nextBlock = (const Xex2CompressedBlockInfo*)p; | ||||
| 
 | ||||
|             s.reset(); | ||||
|             s.processBytes(p, blocks->blockSize); | ||||
|             s.finalize(blockCalcedDigest); | ||||
| 
 | ||||
|             if (memcmp(blockCalcedDigest, blocks->blockHash, 0x14) != 0) | ||||
|                 return Result::PatchFailed; | ||||
| 
 | ||||
|             p += 4; | ||||
|             p += 20; | ||||
| 
 | ||||
|             while (true)  | ||||
|             { | ||||
|                 const size_t chunkSize = (p[0] << 8) | p[1]; | ||||
|                 p += 2; | ||||
| 
 | ||||
|                 if (!chunkSize) | ||||
|                     break; | ||||
| 
 | ||||
|                 memcpy(d, p, chunkSize); | ||||
|                 p += chunkSize; | ||||
|                 d += chunkSize; | ||||
|             } | ||||
| 
 | ||||
|             p = pNext; | ||||
|             blocks = nextBlock; | ||||
|         } | ||||
| 
 | ||||
|         int resultCode = 0; | ||||
|         uint32_t uncompressedSize = originalSecurityInfo->imageSize; | ||||
|         uint8_t* buffer = outBytes.data() + newXexHeaderSize; | ||||
| 
 | ||||
|         resultCode = lzxDecompress(compressBuffer.get(), d - compressBuffer.get(), buffer, uncompressedSize, ((const Xex2FileNormalCompressionInfo*)(fileFormatInfo + 1))->windowSize, nullptr, 0); | ||||
| 
 | ||||
|         if (resultCode) | ||||
|             return Result::PatchFailed; | ||||
|     } | ||||
|     else if (fileFormatInfo->compressionType == XEX_COMPRESSION_DELTA) | ||||
|     { | ||||
|         return Result::XexFileUnsupported; | ||||
|     } | ||||
|  |  | |||
|  | @ -16,6 +16,8 @@ | |||
| #include <span> | ||||
| #include <vector> | ||||
| 
 | ||||
| extern int lzxDecompress(const void* lzxData, size_t lzxLength, void* dst, size_t dstLength, uint32_t windowSize, void* windowData, size_t windowDataLength); | ||||
| 
 | ||||
| struct XexPatcher | ||||
| { | ||||
|     enum class Result { | ||||
|  |  | |||
							
								
								
									
										2
									
								
								thirdparty/disasm/ppc-dis.c
									
										
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								thirdparty/disasm/ppc-dis.c
									
										
									
									
										vendored
									
									
								
							|  | @ -840,7 +840,7 @@ const struct powerpc_operand powerpc_operands[] = | |||
|                            { 8, 0, insert_vperm, extract_vperm, 0 }, | ||||
|                            | ||||
|                           #define VD3D0 VPERM128 + 1 | ||||
|                            { 3, 18, NULL, NULL, 0 }, | ||||
|                            { 7, 18, NULL, NULL, 0 }, | ||||
|                            | ||||
|                           #define VD3D1 VD3D0 + 1 | ||||
|                            { 3, 16, NULL, NULL, 0 }, | ||||
|  |  | |||
							
								
								
									
										1
									
								
								thirdparty/simde
									
										
									
									
										vendored
									
									
										Submodule
									
								
							
							
						
						
									
										1
									
								
								thirdparty/simde
									
										
									
									
										vendored
									
									
										Submodule
									
								
							|  | @ -0,0 +1 @@ | |||
| Subproject commit a532a12ca9bbdc5e6547eb602e6256b71a5377d4 | ||||
		Loading…
	
	Add table
		
		Reference in a new issue