chiark / gitweb /
pcre3 (2:8.38-3.1) unstable; urgency=medium
[pcre3.git] / sljit / sljitNativePPC_common.c
1 /*
2  *    Stack-less Just-In-Time compiler
3  *
4  *    Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without modification, are
7  * permitted provided that the following conditions are met:
8  *
9  *   1. Redistributions of source code must retain the above copyright notice, this list of
10  *      conditions and the following disclaimer.
11  *
12  *   2. Redistributions in binary form must reproduce the above copyright notice, this list
13  *      of conditions and the following disclaimer in the documentation and/or other materials
14  *      provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19  * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26
27 SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void)
28 {
29         return "PowerPC" SLJIT_CPUINFO;
30 }
31
32 /* Length of an instruction word.
33    Both for ppc-32 and ppc-64. */
34 typedef sljit_ui sljit_ins;
35
36 #if ((defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) && (defined _AIX)) \
37         || (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
38 #define SLJIT_PPC_STACK_FRAME_V2 1
39 #endif
40
41 #ifdef _AIX
42 #include <sys/cache.h>
43 #endif
44
45 #if (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
46 #define SLJIT_PASS_ENTRY_ADDR_TO_CALL 1
47 #endif
48
49 static void ppc_cache_flush(sljit_ins *from, sljit_ins *to)
50 {
51 #ifdef _AIX
52         _sync_cache_range((caddr_t)from, (int)((size_t)to - (size_t)from));
53 #elif defined(__GNUC__) || (defined(__IBM_GCC_ASM) && __IBM_GCC_ASM)
54 #       if defined(_ARCH_PWR) || defined(_ARCH_PWR2)
55         /* Cache flush for POWER architecture. */
56         while (from < to) {
57                 __asm__ volatile (
58                         "clf 0, %0\n"
59                         "dcs\n"
60                         : : "r"(from)
61                 );
62                 from++;
63         }
64         __asm__ volatile ( "ics" );
65 #       elif defined(_ARCH_COM) && !defined(_ARCH_PPC)
66 #       error "Cache flush is not implemented for PowerPC/POWER common mode."
67 #       else
68         /* Cache flush for PowerPC architecture. */
69         while (from < to) {
70                 __asm__ volatile (
71                         "dcbf 0, %0\n"
72                         "sync\n"
73                         "icbi 0, %0\n"
74                         : : "r"(from)
75                 );
76                 from++;
77         }
78         __asm__ volatile ( "isync" );
79 #       endif
80 #       ifdef __xlc__
81 #       warning "This file may fail to compile if -qfuncsect is used"
82 #       endif
83 #elif defined(__xlc__)
84 #error "Please enable GCC syntax for inline assembly statements with -qasm=gcc"
85 #else
86 #error "This platform requires a cache flush implementation."
87 #endif /* _AIX */
88 }
89
90 #define TMP_REG1        (SLJIT_NUMBER_OF_REGISTERS + 2)
91 #define TMP_REG2        (SLJIT_NUMBER_OF_REGISTERS + 3)
92 #define TMP_REG3        (SLJIT_NUMBER_OF_REGISTERS + 4)
93 #define TMP_ZERO        (SLJIT_NUMBER_OF_REGISTERS + 5)
94
95 #if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL)
96 #define TMP_CALL_REG    (SLJIT_NUMBER_OF_REGISTERS + 6)
97 #else
98 #define TMP_CALL_REG    TMP_REG2
99 #endif
100
101 #define TMP_FREG1       (0)
102 #define TMP_FREG2       (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)
103
104 static SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 7] = {
105         0, 3, 4, 5, 6, 7, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 1, 8, 9, 10, 31, 12
106 };
107
108 /* --------------------------------------------------------------------- */
109 /*  Instrucion forms                                                     */
110 /* --------------------------------------------------------------------- */
111 #define D(d)            (reg_map[d] << 21)
112 #define S(s)            (reg_map[s] << 21)
113 #define A(a)            (reg_map[a] << 16)
114 #define B(b)            (reg_map[b] << 11)
115 #define C(c)            (reg_map[c] << 6)
116 #define FD(fd)          ((fd) << 21)
117 #define FS(fs)          ((fs) << 21)
118 #define FA(fa)          ((fa) << 16)
119 #define FB(fb)          ((fb) << 11)
120 #define FC(fc)          ((fc) << 6)
121 #define IMM(imm)        ((imm) & 0xffff)
122 #define CRD(d)          ((d) << 21)
123
124 /* Instruction bit sections.
125    OE and Rc flag (see ALT_SET_FLAGS). */
126 #define OERC(flags)     (((flags & ALT_SET_FLAGS) >> 10) | (flags & ALT_SET_FLAGS))
127 /* Rc flag (see ALT_SET_FLAGS). */
128 #define RC(flags)       ((flags & ALT_SET_FLAGS) >> 10)
129 #define HI(opcode)      ((opcode) << 26)
130 #define LO(opcode)      ((opcode) << 1)
131
132 #define ADD             (HI(31) | LO(266))
133 #define ADDC            (HI(31) | LO(10))
134 #define ADDE            (HI(31) | LO(138))
135 #define ADDI            (HI(14))
136 #define ADDIC           (HI(13))
137 #define ADDIS           (HI(15))
138 #define ADDME           (HI(31) | LO(234))
139 #define AND             (HI(31) | LO(28))
140 #define ANDI            (HI(28))
141 #define ANDIS           (HI(29))
142 #define Bx              (HI(18))
143 #define BCx             (HI(16))
144 #define BCCTR           (HI(19) | LO(528) | (3 << 11))
145 #define BLR             (HI(19) | LO(16) | (0x14 << 21))
146 #define CNTLZD          (HI(31) | LO(58))
147 #define CNTLZW          (HI(31) | LO(26))
148 #define CMP             (HI(31) | LO(0))
149 #define CMPI            (HI(11))
150 #define CMPL            (HI(31) | LO(32))
151 #define CMPLI           (HI(10))
152 #define CROR            (HI(19) | LO(449))
153 #define DIVD            (HI(31) | LO(489))
154 #define DIVDU           (HI(31) | LO(457))
155 #define DIVW            (HI(31) | LO(491))
156 #define DIVWU           (HI(31) | LO(459))
157 #define EXTSB           (HI(31) | LO(954))
158 #define EXTSH           (HI(31) | LO(922))
159 #define EXTSW           (HI(31) | LO(986))
160 #define FABS            (HI(63) | LO(264))
161 #define FADD            (HI(63) | LO(21))
162 #define FADDS           (HI(59) | LO(21))
163 #define FCFID           (HI(63) | LO(846))
164 #define FCMPU           (HI(63) | LO(0))
165 #define FCTIDZ          (HI(63) | LO(815))
166 #define FCTIWZ          (HI(63) | LO(15))
167 #define FDIV            (HI(63) | LO(18))
168 #define FDIVS           (HI(59) | LO(18))
169 #define FMR             (HI(63) | LO(72))
170 #define FMUL            (HI(63) | LO(25))
171 #define FMULS           (HI(59) | LO(25))
172 #define FNEG            (HI(63) | LO(40))
173 #define FRSP            (HI(63) | LO(12))
174 #define FSUB            (HI(63) | LO(20))
175 #define FSUBS           (HI(59) | LO(20))
176 #define LD              (HI(58) | 0)
177 #define LWZ             (HI(32))
178 #define MFCR            (HI(31) | LO(19))
179 #define MFLR            (HI(31) | LO(339) | 0x80000)
180 #define MFXER           (HI(31) | LO(339) | 0x10000)
181 #define MTCTR           (HI(31) | LO(467) | 0x90000)
182 #define MTLR            (HI(31) | LO(467) | 0x80000)
183 #define MTXER           (HI(31) | LO(467) | 0x10000)
184 #define MULHD           (HI(31) | LO(73))
185 #define MULHDU          (HI(31) | LO(9))
186 #define MULHW           (HI(31) | LO(75))
187 #define MULHWU          (HI(31) | LO(11))
188 #define MULLD           (HI(31) | LO(233))
189 #define MULLI           (HI(7))
190 #define MULLW           (HI(31) | LO(235))
191 #define NEG             (HI(31) | LO(104))
192 #define NOP             (HI(24))
193 #define NOR             (HI(31) | LO(124))
194 #define OR              (HI(31) | LO(444))
195 #define ORI             (HI(24))
196 #define ORIS            (HI(25))
197 #define RLDICL          (HI(30))
198 #define RLWINM          (HI(21))
199 #define SLD             (HI(31) | LO(27))
200 #define SLW             (HI(31) | LO(24))
201 #define SRAD            (HI(31) | LO(794))
202 #define SRADI           (HI(31) | LO(413 << 1))
203 #define SRAW            (HI(31) | LO(792))
204 #define SRAWI           (HI(31) | LO(824))
205 #define SRD             (HI(31) | LO(539))
206 #define SRW             (HI(31) | LO(536))
207 #define STD             (HI(62) | 0)
208 #define STDU            (HI(62) | 1)
209 #define STDUX           (HI(31) | LO(181))
210 #define STFIWX          (HI(31) | LO(983))
211 #define STW             (HI(36))
212 #define STWU            (HI(37))
213 #define STWUX           (HI(31) | LO(183))
214 #define SUBF            (HI(31) | LO(40))
215 #define SUBFC           (HI(31) | LO(8))
216 #define SUBFE           (HI(31) | LO(136))
217 #define SUBFIC          (HI(8))
218 #define XOR             (HI(31) | LO(316))
219 #define XORI            (HI(26))
220 #define XORIS           (HI(27))
221
222 #define SIMM_MAX        (0x7fff)
223 #define SIMM_MIN        (-0x8000)
224 #define UIMM_MAX        (0xffff)
225
226 #if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL)
227 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_function_context(void** func_ptr, struct sljit_function_context* context, sljit_sw addr, void* func)
228 {
229         sljit_sw* ptrs;
230         if (func_ptr)
231                 *func_ptr = (void*)context;
232         ptrs = (sljit_sw*)func;
233         context->addr = addr ? addr : ptrs[0];
234         context->r2 = ptrs[1];
235         context->r11 = ptrs[2];
236 }
237 #endif
238
239 static sljit_si push_inst(struct sljit_compiler *compiler, sljit_ins ins)
240 {
241         sljit_ins *ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins));
242         FAIL_IF(!ptr);
243         *ptr = ins;
244         compiler->size++;
245         return SLJIT_SUCCESS;
246 }
247
248 static SLJIT_INLINE sljit_si detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code)
249 {
250         sljit_sw diff;
251         sljit_uw target_addr;
252         sljit_sw extra_jump_flags;
253
254 #if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL) && (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
255         if (jump->flags & (SLJIT_REWRITABLE_JUMP | IS_CALL))
256                 return 0;
257 #else
258         if (jump->flags & SLJIT_REWRITABLE_JUMP)
259                 return 0;
260 #endif
261
262         if (jump->flags & JUMP_ADDR)
263                 target_addr = jump->u.target;
264         else {
265                 SLJIT_ASSERT(jump->flags & JUMP_LABEL);
266                 target_addr = (sljit_uw)(code + jump->u.label->size);
267         }
268
269 #if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL) && (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
270         if (jump->flags & IS_CALL)
271                 goto keep_address;
272 #endif
273
274         diff = ((sljit_sw)target_addr - (sljit_sw)(code_ptr)) & ~0x3l;
275
276         extra_jump_flags = 0;
277         if (jump->flags & IS_COND) {
278                 if (diff <= 0x7fff && diff >= -0x8000) {
279                         jump->flags |= PATCH_B;
280                         return 1;
281                 }
282                 if (target_addr <= 0xffff) {
283                         jump->flags |= PATCH_B | PATCH_ABS_B;
284                         return 1;
285                 }
286                 extra_jump_flags = REMOVE_COND;
287
288                 diff -= sizeof(sljit_ins);
289         }
290
291         if (diff <= 0x01ffffff && diff >= -0x02000000) {
292                 jump->flags |= PATCH_B | extra_jump_flags;
293                 return 1;
294         }
295         if (target_addr <= 0x03ffffff) {
296                 jump->flags |= PATCH_B | PATCH_ABS_B | extra_jump_flags;
297                 return 1;
298         }
299
300 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
301 #if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL)
302 keep_address:
303 #endif
304         if (target_addr <= 0x7fffffff) {
305                 jump->flags |= PATCH_ABS32;
306                 return 1;
307         }
308         if (target_addr <= 0x7fffffffffffl) {
309                 jump->flags |= PATCH_ABS48;
310                 return 1;
311         }
312 #endif
313
314         return 0;
315 }
316
317 SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
318 {
319         struct sljit_memory_fragment *buf;
320         sljit_ins *code;
321         sljit_ins *code_ptr;
322         sljit_ins *buf_ptr;
323         sljit_ins *buf_end;
324         sljit_uw word_count;
325         sljit_uw addr;
326
327         struct sljit_label *label;
328         struct sljit_jump *jump;
329         struct sljit_const *const_;
330
331         CHECK_ERROR_PTR();
332         CHECK_PTR(check_sljit_generate_code(compiler));
333         reverse_buf(compiler);
334
335 #if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL)
336 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
337         compiler->size += (compiler->size & 0x1) + (sizeof(struct sljit_function_context) / sizeof(sljit_ins));
338 #else
339         compiler->size += (sizeof(struct sljit_function_context) / sizeof(sljit_ins));
340 #endif
341 #endif
342         code = (sljit_ins*)SLJIT_MALLOC_EXEC(compiler->size * sizeof(sljit_ins));
343         PTR_FAIL_WITH_EXEC_IF(code);
344         buf = compiler->buf;
345
346         code_ptr = code;
347         word_count = 0;
348         label = compiler->labels;
349         jump = compiler->jumps;
350         const_ = compiler->consts;
351         do {
352                 buf_ptr = (sljit_ins*)buf->memory;
353                 buf_end = buf_ptr + (buf->used_size >> 2);
354                 do {
355                         *code_ptr = *buf_ptr++;
356                         SLJIT_ASSERT(!label || label->size >= word_count);
357                         SLJIT_ASSERT(!jump || jump->addr >= word_count);
358                         SLJIT_ASSERT(!const_ || const_->addr >= word_count);
359                         /* These structures are ordered by their address. */
360                         if (label && label->size == word_count) {
361                                 /* Just recording the address. */
362                                 label->addr = (sljit_uw)code_ptr;
363                                 label->size = code_ptr - code;
364                                 label = label->next;
365                         }
366                         if (jump && jump->addr == word_count) {
367 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
368                                 jump->addr = (sljit_uw)(code_ptr - 3);
369 #else
370                                 jump->addr = (sljit_uw)(code_ptr - 6);
371 #endif
372                                 if (detect_jump_type(jump, code_ptr, code)) {
373 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
374                                         code_ptr[-3] = code_ptr[0];
375                                         code_ptr -= 3;
376 #else
377                                         if (jump->flags & PATCH_ABS32) {
378                                                 code_ptr -= 3;
379                                                 code_ptr[-1] = code_ptr[2];
380                                                 code_ptr[0] = code_ptr[3];
381                                         }
382                                         else if (jump->flags & PATCH_ABS48) {
383                                                 code_ptr--;
384                                                 code_ptr[-1] = code_ptr[0];
385                                                 code_ptr[0] = code_ptr[1];
386                                                 /* rldicr rX,rX,32,31 -> rX,rX,16,47 */
387                                                 SLJIT_ASSERT((code_ptr[-3] & 0xfc00ffff) == 0x780007c6);
388                                                 code_ptr[-3] ^= 0x8422;
389                                                 /* oris -> ori */
390                                                 code_ptr[-2] ^= 0x4000000;
391                                         }
392                                         else {
393                                                 code_ptr[-6] = code_ptr[0];
394                                                 code_ptr -= 6;
395                                         }
396 #endif
397                                         if (jump->flags & REMOVE_COND) {
398                                                 code_ptr[0] = BCx | (2 << 2) | ((code_ptr[0] ^ (8 << 21)) & 0x03ff0001);
399                                                 code_ptr++;
400                                                 jump->addr += sizeof(sljit_ins);
401                                                 code_ptr[0] = Bx;
402                                                 jump->flags -= IS_COND;
403                                         }
404                                 }
405                                 jump = jump->next;
406                         }
407                         if (const_ && const_->addr == word_count) {
408                                 const_->addr = (sljit_uw)code_ptr;
409                                 const_ = const_->next;
410                         }
411                         code_ptr ++;
412                         word_count ++;
413                 } while (buf_ptr < buf_end);
414
415                 buf = buf->next;
416         } while (buf);
417
418         if (label && label->size == word_count) {
419                 label->addr = (sljit_uw)code_ptr;
420                 label->size = code_ptr - code;
421                 label = label->next;
422         }
423
424         SLJIT_ASSERT(!label);
425         SLJIT_ASSERT(!jump);
426         SLJIT_ASSERT(!const_);
427 #if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL)
428         SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size - (sizeof(struct sljit_function_context) / sizeof(sljit_ins)));
429 #else
430         SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size);
431 #endif
432
433         jump = compiler->jumps;
434         while (jump) {
435                 do {
436                         addr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target;
437                         buf_ptr = (sljit_ins*)jump->addr;
438                         if (jump->flags & PATCH_B) {
439                                 if (jump->flags & IS_COND) {
440                                         if (!(jump->flags & PATCH_ABS_B)) {
441                                                 addr = addr - jump->addr;
442                                                 SLJIT_ASSERT((sljit_sw)addr <= 0x7fff && (sljit_sw)addr >= -0x8000);
443                                                 *buf_ptr = BCx | (addr & 0xfffc) | ((*buf_ptr) & 0x03ff0001);
444                                         }
445                                         else {
446                                                 SLJIT_ASSERT(addr <= 0xffff);
447                                                 *buf_ptr = BCx | (addr & 0xfffc) | 0x2 | ((*buf_ptr) & 0x03ff0001);
448                                         }
449                                 }
450                                 else {
451                                         if (!(jump->flags & PATCH_ABS_B)) {
452                                                 addr = addr - jump->addr;
453                                                 SLJIT_ASSERT((sljit_sw)addr <= 0x01ffffff && (sljit_sw)addr >= -0x02000000);
454                                                 *buf_ptr = Bx | (addr & 0x03fffffc) | ((*buf_ptr) & 0x1);
455                                         }
456                                         else {
457                                                 SLJIT_ASSERT(addr <= 0x03ffffff);
458                                                 *buf_ptr = Bx | (addr & 0x03fffffc) | 0x2 | ((*buf_ptr) & 0x1);
459                                         }
460                                 }
461                                 break;
462                         }
463                         /* Set the fields of immediate loads. */
464 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
465                         buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 16) & 0xffff);
466                         buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | (addr & 0xffff);
467 #else
468                         if (jump->flags & PATCH_ABS32) {
469                                 SLJIT_ASSERT(addr <= 0x7fffffff);
470                                 buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 16) & 0xffff);
471                                 buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | (addr & 0xffff);
472                                 break;
473                         }
474                         if (jump->flags & PATCH_ABS48) {
475                                 SLJIT_ASSERT(addr <= 0x7fffffffffff);
476                                 buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 32) & 0xffff);
477                                 buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | ((addr >> 16) & 0xffff);
478                                 buf_ptr[3] = (buf_ptr[3] & 0xffff0000) | (addr & 0xffff);
479                                 break;
480                         }
481                         buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 48) & 0xffff);
482                         buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | ((addr >> 32) & 0xffff);
483                         buf_ptr[3] = (buf_ptr[3] & 0xffff0000) | ((addr >> 16) & 0xffff);
484                         buf_ptr[4] = (buf_ptr[4] & 0xffff0000) | (addr & 0xffff);
485 #endif
486                 } while (0);
487                 jump = jump->next;
488         }
489
490         compiler->error = SLJIT_ERR_COMPILED;
491         compiler->executable_size = (code_ptr - code) * sizeof(sljit_ins);
492         SLJIT_CACHE_FLUSH(code, code_ptr);
493
494 #if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL)
495 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
496         if (((sljit_sw)code_ptr) & 0x4)
497                 code_ptr++;
498         sljit_set_function_context(NULL, (struct sljit_function_context*)code_ptr, (sljit_sw)code, (void*)sljit_generate_code);
499         return code_ptr;
500 #else
501         sljit_set_function_context(NULL, (struct sljit_function_context*)code_ptr, (sljit_sw)code, (void*)sljit_generate_code);
502         return code_ptr;
503 #endif
504 #else
505         return code;
506 #endif
507 }
508
509 /* --------------------------------------------------------------------- */
510 /*  Entry, exit                                                          */
511 /* --------------------------------------------------------------------- */
512
513 /* inp_flags: */
514
515 /* Creates an index in data_transfer_insts array. */
516 #define LOAD_DATA       0x01
517 #define INDEXED         0x02
518 #define WRITE_BACK      0x04
519 #define WORD_DATA       0x00
520 #define BYTE_DATA       0x08
521 #define HALF_DATA       0x10
522 #define INT_DATA        0x18
523 #define SIGNED_DATA     0x20
524 /* Separates integer and floating point registers */
525 #define GPR_REG         0x3f
526 #define DOUBLE_DATA     0x40
527
528 #define MEM_MASK        0x7f
529
530 /* Other inp_flags. */
531
532 #define ARG_TEST        0x000100
533 /* Integer opertion and set flags -> requires exts on 64 bit systems. */
534 #define ALT_SIGN_EXT    0x000200
535 /* This flag affects the RC() and OERC() macros. */
536 #define ALT_SET_FLAGS   0x000400
537 #define ALT_KEEP_CACHE  0x000800
538 #define ALT_FORM1       0x010000
539 #define ALT_FORM2       0x020000
540 #define ALT_FORM3       0x040000
541 #define ALT_FORM4       0x080000
542 #define ALT_FORM5       0x100000
543 #define ALT_FORM6       0x200000
544
545 /* Source and destination is register. */
546 #define REG_DEST        0x000001
547 #define REG1_SOURCE     0x000002
548 #define REG2_SOURCE     0x000004
549 /* getput_arg_fast returned true. */
550 #define FAST_DEST       0x000008
551 /* Multiple instructions are required. */
552 #define SLOW_DEST       0x000010
553 /*
554 ALT_SIGN_EXT            0x000200
555 ALT_SET_FLAGS           0x000400
556 ALT_FORM1               0x010000
557 ...
558 ALT_FORM6               0x200000 */
559
560 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
561 #include "sljitNativePPC_32.c"
562 #else
563 #include "sljitNativePPC_64.c"
564 #endif
565
566 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
567 #define STACK_STORE     STW
568 #define STACK_LOAD      LWZ
569 #else
570 #define STACK_STORE     STD
571 #define STACK_LOAD      LD
572 #endif
573
574 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler,
575         sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds,
576         sljit_si fscratches, sljit_si fsaveds, sljit_si local_size)
577 {
578         sljit_si i, tmp, offs;
579
580         CHECK_ERROR();
581         CHECK(check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size));
582         set_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);
583
584         FAIL_IF(push_inst(compiler, MFLR | D(0)));
585         offs = -(sljit_si)(sizeof(sljit_sw));
586         FAIL_IF(push_inst(compiler, STACK_STORE | S(TMP_ZERO) | A(SLJIT_SP) | IMM(offs)));
587
588         tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG;
589         for (i = SLJIT_S0; i >= tmp; i--) {
590                 offs -= (sljit_si)(sizeof(sljit_sw));
591                 FAIL_IF(push_inst(compiler, STACK_STORE | S(i) | A(SLJIT_SP) | IMM(offs)));
592         }
593
594         for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) {
595                 offs -= (sljit_si)(sizeof(sljit_sw));
596                 FAIL_IF(push_inst(compiler, STACK_STORE | S(i) | A(SLJIT_SP) | IMM(offs)));
597         }
598
599         SLJIT_ASSERT(offs == -(sljit_si)GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 1));
600
601 #if (defined SLJIT_PPC_STACK_FRAME_V2 && SLJIT_PPC_STACK_FRAME_V2)
602         FAIL_IF(push_inst(compiler, STACK_STORE | S(0) | A(SLJIT_SP) | IMM(2 * sizeof(sljit_sw))));
603 #else
604         FAIL_IF(push_inst(compiler, STACK_STORE | S(0) | A(SLJIT_SP) | IMM(sizeof(sljit_sw))));
605 #endif
606
607         FAIL_IF(push_inst(compiler, ADDI | D(TMP_ZERO) | A(0) | 0));
608         if (args >= 1)
609                 FAIL_IF(push_inst(compiler, OR | S(SLJIT_R0) | A(SLJIT_S0) | B(SLJIT_R0)));
610         if (args >= 2)
611                 FAIL_IF(push_inst(compiler, OR | S(SLJIT_R1) | A(SLJIT_S1) | B(SLJIT_R1)));
612         if (args >= 3)
613                 FAIL_IF(push_inst(compiler, OR | S(SLJIT_R2) | A(SLJIT_S2) | B(SLJIT_R2)));
614
615         local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1) + SLJIT_LOCALS_OFFSET;
616         local_size = (local_size + 15) & ~0xf;
617         compiler->local_size = local_size;
618
619 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
620         if (local_size <= SIMM_MAX)
621                 FAIL_IF(push_inst(compiler, STWU | S(SLJIT_SP) | A(SLJIT_SP) | IMM(-local_size)));
622         else {
623                 FAIL_IF(load_immediate(compiler, 0, -local_size));
624                 FAIL_IF(push_inst(compiler, STWUX | S(SLJIT_SP) | A(SLJIT_SP) | B(0)));
625         }
626 #else
627         if (local_size <= SIMM_MAX)
628                 FAIL_IF(push_inst(compiler, STDU | S(SLJIT_SP) | A(SLJIT_SP) | IMM(-local_size)));
629         else {
630                 FAIL_IF(load_immediate(compiler, 0, -local_size));
631                 FAIL_IF(push_inst(compiler, STDUX | S(SLJIT_SP) | A(SLJIT_SP) | B(0)));
632         }
633 #endif
634
635         return SLJIT_SUCCESS;
636 }
637
638 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_set_context(struct sljit_compiler *compiler,
639         sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds,
640         sljit_si fscratches, sljit_si fsaveds, sljit_si local_size)
641 {
642         CHECK_ERROR();
643         CHECK(check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size));
644         set_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);
645
646         local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1) + SLJIT_LOCALS_OFFSET;
647         compiler->local_size = (local_size + 15) & ~0xf;
648         return SLJIT_SUCCESS;
649 }
650
651 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw)
652 {
653         sljit_si i, tmp, offs;
654
655         CHECK_ERROR();
656         CHECK(check_sljit_emit_return(compiler, op, src, srcw));
657
658         FAIL_IF(emit_mov_before_return(compiler, op, src, srcw));
659
660         if (compiler->local_size <= SIMM_MAX)
661                 FAIL_IF(push_inst(compiler, ADDI | D(SLJIT_SP) | A(SLJIT_SP) | IMM(compiler->local_size)));
662         else {
663                 FAIL_IF(load_immediate(compiler, 0, compiler->local_size));
664                 FAIL_IF(push_inst(compiler, ADD | D(SLJIT_SP) | A(SLJIT_SP) | B(0)));
665         }
666
667 #if (defined SLJIT_PPC_STACK_FRAME_V2 && SLJIT_PPC_STACK_FRAME_V2)
668         FAIL_IF(push_inst(compiler, STACK_LOAD | D(0) | A(SLJIT_SP) | IMM(2 * sizeof(sljit_sw))));
669 #else
670         FAIL_IF(push_inst(compiler, STACK_LOAD | D(0) | A(SLJIT_SP) | IMM(sizeof(sljit_sw))));
671 #endif
672
673         offs = -(sljit_si)GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 1);
674
675         tmp = compiler->scratches;
676         for (i = SLJIT_FIRST_SAVED_REG; i <= tmp; i++) {
677                 FAIL_IF(push_inst(compiler, STACK_LOAD | D(i) | A(SLJIT_SP) | IMM(offs)));
678                 offs += (sljit_si)(sizeof(sljit_sw));
679         }
680
681         tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG;
682         for (i = tmp; i <= SLJIT_S0; i++) {
683                 FAIL_IF(push_inst(compiler, STACK_LOAD | D(i) | A(SLJIT_SP) | IMM(offs)));
684                 offs += (sljit_si)(sizeof(sljit_sw));
685         }
686
687         FAIL_IF(push_inst(compiler, STACK_LOAD | D(TMP_ZERO) | A(SLJIT_SP) | IMM(offs)));
688         SLJIT_ASSERT(offs == -(sljit_sw)(sizeof(sljit_sw)));
689
690         FAIL_IF(push_inst(compiler, MTLR | S(0)));
691         FAIL_IF(push_inst(compiler, BLR));
692
693         return SLJIT_SUCCESS;
694 }
695
696 #undef STACK_STORE
697 #undef STACK_LOAD
698
699 /* --------------------------------------------------------------------- */
700 /*  Operators                                                            */
701 /* --------------------------------------------------------------------- */
702
703 /* i/x - immediate/indexed form
704    n/w - no write-back / write-back (1 bit)
705    s/l - store/load (1 bit)
706    u/s - signed/unsigned (1 bit)
707    w/b/h/i - word/byte/half/int allowed (2 bit)
708    It contans 32 items, but not all are different. */
709
710 /* 64 bit only: [reg+imm] must be aligned to 4 bytes. */
711 #define INT_ALIGNED     0x10000
712 /* 64-bit only: there is no lwau instruction. */
713 #define UPDATE_REQ      0x20000
714
715 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
716 #define ARCH_32_64(a, b)        a
717 #define INST_CODE_AND_DST(inst, flags, reg) \
718         ((inst) | (((flags) & MEM_MASK) <= GPR_REG ? D(reg) : FD(reg)))
719 #else
720 #define ARCH_32_64(a, b)        b
721 #define INST_CODE_AND_DST(inst, flags, reg) \
722         (((inst) & ~(INT_ALIGNED | UPDATE_REQ)) | (((flags) & MEM_MASK) <= GPR_REG ? D(reg) : FD(reg)))
723 #endif
724
725 static SLJIT_CONST sljit_ins data_transfer_insts[64 + 8] = {
726
727 /* -------- Unsigned -------- */
728
729 /* Word. */
730
731 /* u w n i s */ ARCH_32_64(HI(36) /* stw */, HI(62) | INT_ALIGNED | 0x0 /* std */),
732 /* u w n i l */ ARCH_32_64(HI(32) /* lwz */, HI(58) | INT_ALIGNED | 0x0 /* ld */),
733 /* u w n x s */ ARCH_32_64(HI(31) | LO(151) /* stwx */, HI(31) | LO(149) /* stdx */),
734 /* u w n x l */ ARCH_32_64(HI(31) | LO(23) /* lwzx */, HI(31) | LO(21) /* ldx */),
735
736 /* u w w i s */ ARCH_32_64(HI(37) /* stwu */, HI(62) | INT_ALIGNED | 0x1 /* stdu */),
737 /* u w w i l */ ARCH_32_64(HI(33) /* lwzu */, HI(58) | INT_ALIGNED | 0x1 /* ldu */),
738 /* u w w x s */ ARCH_32_64(HI(31) | LO(183) /* stwux */, HI(31) | LO(181) /* stdux */),
739 /* u w w x l */ ARCH_32_64(HI(31) | LO(55) /* lwzux */, HI(31) | LO(53) /* ldux */),
740
741 /* Byte. */
742
743 /* u b n i s */ HI(38) /* stb */, 
744 /* u b n i l */ HI(34) /* lbz */,
745 /* u b n x s */ HI(31) | LO(215) /* stbx */,
746 /* u b n x l */ HI(31) | LO(87) /* lbzx */,
747
748 /* u b w i s */ HI(39) /* stbu */,
749 /* u b w i l */ HI(35) /* lbzu */,
750 /* u b w x s */ HI(31) | LO(247) /* stbux */,
751 /* u b w x l */ HI(31) | LO(119) /* lbzux */,
752
753 /* Half. */
754
755 /* u h n i s */ HI(44) /* sth */,
756 /* u h n i l */ HI(40) /* lhz */,
757 /* u h n x s */ HI(31) | LO(407) /* sthx */,
758 /* u h n x l */ HI(31) | LO(279) /* lhzx */,
759
760 /* u h w i s */ HI(45) /* sthu */,
761 /* u h w i l */ HI(41) /* lhzu */,
762 /* u h w x s */ HI(31) | LO(439) /* sthux */,
763 /* u h w x l */ HI(31) | LO(311) /* lhzux */,
764
765 /* Int. */
766
767 /* u i n i s */ HI(36) /* stw */,
768 /* u i n i l */ HI(32) /* lwz */,
769 /* u i n x s */ HI(31) | LO(151) /* stwx */,
770 /* u i n x l */ HI(31) | LO(23) /* lwzx */,
771
772 /* u i w i s */ HI(37) /* stwu */,
773 /* u i w i l */ HI(33) /* lwzu */,
774 /* u i w x s */ HI(31) | LO(183) /* stwux */,
775 /* u i w x l */ HI(31) | LO(55) /* lwzux */,
776
777 /* -------- Signed -------- */
778
779 /* Word. */
780
781 /* s w n i s */ ARCH_32_64(HI(36) /* stw */, HI(62) | INT_ALIGNED | 0x0 /* std */),
782 /* s w n i l */ ARCH_32_64(HI(32) /* lwz */, HI(58) | INT_ALIGNED | 0x0 /* ld */),
783 /* s w n x s */ ARCH_32_64(HI(31) | LO(151) /* stwx */, HI(31) | LO(149) /* stdx */),
784 /* s w n x l */ ARCH_32_64(HI(31) | LO(23) /* lwzx */, HI(31) | LO(21) /* ldx */),
785
786 /* s w w i s */ ARCH_32_64(HI(37) /* stwu */, HI(62) | INT_ALIGNED | 0x1 /* stdu */),
787 /* s w w i l */ ARCH_32_64(HI(33) /* lwzu */, HI(58) | INT_ALIGNED | 0x1 /* ldu */),
788 /* s w w x s */ ARCH_32_64(HI(31) | LO(183) /* stwux */, HI(31) | LO(181) /* stdux */),
789 /* s w w x l */ ARCH_32_64(HI(31) | LO(55) /* lwzux */, HI(31) | LO(53) /* ldux */),
790
791 /* Byte. */
792
793 /* s b n i s */ HI(38) /* stb */,
794 /* s b n i l */ HI(34) /* lbz */ /* EXTS_REQ */,
795 /* s b n x s */ HI(31) | LO(215) /* stbx */,
796 /* s b n x l */ HI(31) | LO(87) /* lbzx */ /* EXTS_REQ */,
797
798 /* s b w i s */ HI(39) /* stbu */,
799 /* s b w i l */ HI(35) /* lbzu */ /* EXTS_REQ */,
800 /* s b w x s */ HI(31) | LO(247) /* stbux */,
801 /* s b w x l */ HI(31) | LO(119) /* lbzux */ /* EXTS_REQ */,
802
803 /* Half. */
804
805 /* s h n i s */ HI(44) /* sth */,
806 /* s h n i l */ HI(42) /* lha */,
807 /* s h n x s */ HI(31) | LO(407) /* sthx */,
808 /* s h n x l */ HI(31) | LO(343) /* lhax */,
809
810 /* s h w i s */ HI(45) /* sthu */,
811 /* s h w i l */ HI(43) /* lhau */,
812 /* s h w x s */ HI(31) | LO(439) /* sthux */,
813 /* s h w x l */ HI(31) | LO(375) /* lhaux */,
814
815 /* Int. */
816
817 /* s i n i s */ HI(36) /* stw */,
818 /* s i n i l */ ARCH_32_64(HI(32) /* lwz */, HI(58) | INT_ALIGNED | 0x2 /* lwa */),
819 /* s i n x s */ HI(31) | LO(151) /* stwx */,
820 /* s i n x l */ ARCH_32_64(HI(31) | LO(23) /* lwzx */, HI(31) | LO(341) /* lwax */),
821
822 /* s i w i s */ HI(37) /* stwu */,
823 /* s i w i l */ ARCH_32_64(HI(33) /* lwzu */, HI(58) | INT_ALIGNED | UPDATE_REQ | 0x2 /* lwa */),
824 /* s i w x s */ HI(31) | LO(183) /* stwux */,
825 /* s i w x l */ ARCH_32_64(HI(31) | LO(55) /* lwzux */, HI(31) | LO(373) /* lwaux */),
826
827 /* -------- Double -------- */
828
829 /* d   n i s */ HI(54) /* stfd */,
830 /* d   n i l */ HI(50) /* lfd */,
831 /* d   n x s */ HI(31) | LO(727) /* stfdx */,
832 /* d   n x l */ HI(31) | LO(599) /* lfdx */,
833
834 /* s   n i s */ HI(52) /* stfs */,
835 /* s   n i l */ HI(48) /* lfs */,
836 /* s   n x s */ HI(31) | LO(663) /* stfsx */,
837 /* s   n x l */ HI(31) | LO(535) /* lfsx */,
838
839 };
840
841 #undef ARCH_32_64
842
843 /* Simple cases, (no caching is required). */
844 static sljit_si getput_arg_fast(struct sljit_compiler *compiler, sljit_si inp_flags, sljit_si reg, sljit_si arg, sljit_sw argw)
845 {
846         sljit_ins inst;
847
848         /* Should work when (arg & REG_MASK) == 0. */
849         SLJIT_COMPILE_ASSERT(A(0) == 0, a0_must_be_0);
850         SLJIT_ASSERT(arg & SLJIT_MEM);
851
852         if (arg & OFFS_REG_MASK) {
853                 if (argw & 0x3)
854                         return 0;
855                 if (inp_flags & ARG_TEST)
856                         return 1;
857
858                 inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK];
859                 SLJIT_ASSERT(!(inst & (INT_ALIGNED | UPDATE_REQ)));
860                 FAIL_IF(push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & REG_MASK) | B(OFFS_REG(arg))));
861                 return -1;
862         }
863
864         if (SLJIT_UNLIKELY(!(arg & REG_MASK)))
865                 inp_flags &= ~WRITE_BACK;
866
867 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
868         inst = data_transfer_insts[inp_flags & MEM_MASK];
869         SLJIT_ASSERT((arg & REG_MASK) || !(inst & UPDATE_REQ));
870
871         if (argw > SIMM_MAX || argw < SIMM_MIN || ((inst & INT_ALIGNED) && (argw & 0x3)) || (inst & UPDATE_REQ))
872                 return 0;
873         if (inp_flags & ARG_TEST)
874                 return 1;
875 #endif
876
877 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
878         if (argw > SIMM_MAX || argw < SIMM_MIN)
879                 return 0;
880         if (inp_flags & ARG_TEST)
881                 return 1;
882
883         inst = data_transfer_insts[inp_flags & MEM_MASK];
884         SLJIT_ASSERT(!(inst & (INT_ALIGNED | UPDATE_REQ)));
885 #endif
886
887         FAIL_IF(push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & REG_MASK) | IMM(argw)));
888         return -1;
889 }
890
891 /* See getput_arg below.
892    Note: can_cache is called only for binary operators. Those operator always
893    uses word arguments without write back. */
894 static sljit_si can_cache(sljit_si arg, sljit_sw argw, sljit_si next_arg, sljit_sw next_argw)
895 {
896         sljit_sw high_short, next_high_short;
897 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
898         sljit_sw diff;
899 #endif
900
901         SLJIT_ASSERT((arg & SLJIT_MEM) && (next_arg & SLJIT_MEM));
902
903         if (arg & OFFS_REG_MASK)
904                 return ((arg & OFFS_REG_MASK) == (next_arg & OFFS_REG_MASK) && (argw & 0x3) == (next_argw & 0x3));
905
906         if (next_arg & OFFS_REG_MASK)
907                 return 0;
908
909 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
910         high_short = (argw + ((argw & 0x8000) << 1)) & ~0xffff;
911         next_high_short = (next_argw + ((next_argw & 0x8000) << 1)) & ~0xffff;
912         return high_short == next_high_short;
913 #else
914         if (argw <= 0x7fffffffl && argw >= -0x80000000l) {
915                 high_short = (argw + ((argw & 0x8000) << 1)) & ~0xffff;
916                 next_high_short = (next_argw + ((next_argw & 0x8000) << 1)) & ~0xffff;
917                 if (high_short == next_high_short)
918                         return 1;
919         }
920
921         diff = argw - next_argw;
922         if (!(arg & REG_MASK))
923                 return diff <= SIMM_MAX && diff >= SIMM_MIN;
924
925         if (arg == next_arg && diff <= SIMM_MAX && diff >= SIMM_MIN)
926                 return 1;
927
928         return 0;
929 #endif
930 }
931
932 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
933 #define ADJUST_CACHED_IMM(imm) \
934         if ((inst & INT_ALIGNED) && (imm & 0x3)) { \
935                 /* Adjust cached value. Fortunately this is really a rare case */ \
936                 compiler->cache_argw += imm & 0x3; \
937                 FAIL_IF(push_inst(compiler, ADDI | D(TMP_REG3) | A(TMP_REG3) | (imm & 0x3))); \
938                 imm &= ~0x3; \
939         }
940 #endif
941
942 /* Emit the necessary instructions. See can_cache above. */
943 static sljit_si getput_arg(struct sljit_compiler *compiler, sljit_si inp_flags, sljit_si reg, sljit_si arg, sljit_sw argw, sljit_si next_arg, sljit_sw next_argw)
944 {
945         sljit_si tmp_r;
946         sljit_ins inst;
947         sljit_sw high_short, next_high_short;
948 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
949         sljit_sw diff;
950 #endif
951
952         SLJIT_ASSERT(arg & SLJIT_MEM);
953
954         tmp_r = ((inp_flags & LOAD_DATA) && ((inp_flags) & MEM_MASK) <= GPR_REG) ? reg : TMP_REG1;
955         /* Special case for "mov reg, [reg, ... ]". */
956         if ((arg & REG_MASK) == tmp_r)
957                 tmp_r = TMP_REG1;
958
959         if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
960                 argw &= 0x3;
961                 /* Otherwise getput_arg_fast would capture it. */
962                 SLJIT_ASSERT(argw);
963
964                 if ((SLJIT_MEM | (arg & OFFS_REG_MASK)) == compiler->cache_arg && argw == compiler->cache_argw)
965                         tmp_r = TMP_REG3;
966                 else {
967                         if ((arg & OFFS_REG_MASK) == (next_arg & OFFS_REG_MASK) && argw == (next_argw & 0x3)) {
968                                 compiler->cache_arg = SLJIT_MEM | (arg & OFFS_REG_MASK);
969                                 compiler->cache_argw = argw;
970                                 tmp_r = TMP_REG3;
971                         }
972 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
973                         FAIL_IF(push_inst(compiler, RLWINM | S(OFFS_REG(arg)) | A(tmp_r) | (argw << 11) | ((31 - argw) << 1)));
974 #else
975                         FAIL_IF(push_inst(compiler, RLDI(tmp_r, OFFS_REG(arg), argw, 63 - argw, 1)));
976 #endif
977                 }
978                 inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK];
979                 SLJIT_ASSERT(!(inst & (INT_ALIGNED | UPDATE_REQ)));
980                 return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & REG_MASK) | B(tmp_r));
981         }
982
983         if (SLJIT_UNLIKELY(!(arg & REG_MASK)))
984                 inp_flags &= ~WRITE_BACK;
985
986         inst = data_transfer_insts[inp_flags & MEM_MASK];
987         SLJIT_ASSERT((arg & REG_MASK) || !(inst & UPDATE_REQ));
988
989 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
990         if (argw <= 0x7fff7fffl && argw >= -0x80000000l
991                         && (!(inst & INT_ALIGNED) || !(argw & 0x3)) && !(inst & UPDATE_REQ)) {
992 #endif
993
994                 arg &= REG_MASK;
995                 high_short = (sljit_si)(argw + ((argw & 0x8000) << 1)) & ~0xffff;
996                 /* The getput_arg_fast should handle this otherwise. */
997 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
998                 SLJIT_ASSERT(high_short && high_short <= 0x7fffffffl && high_short >= -0x80000000l);
999 #else
1000                 SLJIT_ASSERT(high_short && !(inst & (INT_ALIGNED | UPDATE_REQ)));
1001 #endif
1002
1003                 if (inp_flags & WRITE_BACK) {
1004                         if (arg == reg) {
1005                                 FAIL_IF(push_inst(compiler, OR | S(reg) | A(tmp_r) | B(reg)));
1006                                 reg = tmp_r;
1007                         }
1008                         tmp_r = arg;
1009                         FAIL_IF(push_inst(compiler, ADDIS | D(arg) | A(arg) | IMM(high_short >> 16)));
1010                 }
1011                 else if (compiler->cache_arg != (SLJIT_MEM | arg) || high_short != compiler->cache_argw) {
1012                         if ((next_arg & SLJIT_MEM) && !(next_arg & OFFS_REG_MASK)) {
1013                                 next_high_short = (sljit_si)(next_argw + ((next_argw & 0x8000) << 1)) & ~0xffff;
1014                                 if (high_short == next_high_short) {
1015                                         compiler->cache_arg = SLJIT_MEM | arg;
1016                                         compiler->cache_argw = high_short;
1017                                         tmp_r = TMP_REG3;
1018                                 }
1019                         }
1020                         FAIL_IF(push_inst(compiler, ADDIS | D(tmp_r) | A(arg & REG_MASK) | IMM(high_short >> 16)));
1021                 }
1022                 else
1023                         tmp_r = TMP_REG3;
1024
1025                 return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(tmp_r) | IMM(argw));
1026
1027 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1028         }
1029
1030         /* Everything else is PPC-64 only. */
1031         if (SLJIT_UNLIKELY(!(arg & REG_MASK))) {
1032                 diff = argw - compiler->cache_argw;
1033                 if ((compiler->cache_arg & SLJIT_IMM) && diff <= SIMM_MAX && diff >= SIMM_MIN) {
1034                         ADJUST_CACHED_IMM(diff);
1035                         return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(TMP_REG3) | IMM(diff));
1036                 }
1037
1038                 diff = argw - next_argw;
1039                 if ((next_arg & SLJIT_MEM) && diff <= SIMM_MAX && diff >= SIMM_MIN) {
1040                         SLJIT_ASSERT(inp_flags & LOAD_DATA);
1041
1042                         compiler->cache_arg = SLJIT_IMM;
1043                         compiler->cache_argw = argw;
1044                         tmp_r = TMP_REG3;
1045                 }
1046
1047                 FAIL_IF(load_immediate(compiler, tmp_r, argw));
1048                 return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(tmp_r));
1049         }
1050
1051         diff = argw - compiler->cache_argw;
1052         if (compiler->cache_arg == arg && diff <= SIMM_MAX && diff >= SIMM_MIN) {
1053                 SLJIT_ASSERT(!(inp_flags & WRITE_BACK) && !(inst & UPDATE_REQ));
1054                 ADJUST_CACHED_IMM(diff);
1055                 return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(TMP_REG3) | IMM(diff));
1056         }
1057
1058         if ((compiler->cache_arg & SLJIT_IMM) && diff <= SIMM_MAX && diff >= SIMM_MIN) {
1059                 inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK];
1060                 SLJIT_ASSERT(!(inst & (INT_ALIGNED | UPDATE_REQ)));
1061                 if (compiler->cache_argw != argw) {
1062                         FAIL_IF(push_inst(compiler, ADDI | D(TMP_REG3) | A(TMP_REG3) | IMM(diff)));
1063                         compiler->cache_argw = argw;
1064                 }
1065                 return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & REG_MASK) | B(TMP_REG3));
1066         }
1067
1068         if (argw == next_argw && (next_arg & SLJIT_MEM)) {
1069                 SLJIT_ASSERT(inp_flags & LOAD_DATA);
1070                 FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
1071
1072                 compiler->cache_arg = SLJIT_IMM;
1073                 compiler->cache_argw = argw;
1074
1075                 inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK];
1076                 SLJIT_ASSERT(!(inst & (INT_ALIGNED | UPDATE_REQ)));
1077                 return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & REG_MASK) | B(TMP_REG3));
1078         }
1079
1080         diff = argw - next_argw;
1081         if (arg == next_arg && !(inp_flags & WRITE_BACK) && diff <= SIMM_MAX && diff >= SIMM_MIN) {
1082                 SLJIT_ASSERT(inp_flags & LOAD_DATA);
1083                 FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
1084                 FAIL_IF(push_inst(compiler, ADD | D(TMP_REG3) | A(TMP_REG3) | B(arg & REG_MASK)));
1085
1086                 compiler->cache_arg = arg;
1087                 compiler->cache_argw = argw;
1088
1089                 return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(TMP_REG3));
1090         }
1091
1092         if ((next_arg & SLJIT_MEM) && !(next_arg & OFFS_REG_MASK) && diff <= SIMM_MAX && diff >= SIMM_MIN) {
1093                 SLJIT_ASSERT(inp_flags & LOAD_DATA);
1094                 FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
1095
1096                 compiler->cache_arg = SLJIT_IMM;
1097                 compiler->cache_argw = argw;
1098                 tmp_r = TMP_REG3;
1099         }
1100         else
1101                 FAIL_IF(load_immediate(compiler, tmp_r, argw));
1102
1103         /* Get the indexed version instead of the normal one. */
1104         inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK];
1105         SLJIT_ASSERT(!(inst & (INT_ALIGNED | UPDATE_REQ)));
1106         return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & REG_MASK) | B(tmp_r));
1107 #endif
1108 }
1109
1110 static SLJIT_INLINE sljit_si emit_op_mem2(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, sljit_si arg1, sljit_sw arg1w, sljit_si arg2, sljit_sw arg2w)
1111 {
1112         if (getput_arg_fast(compiler, flags, reg, arg1, arg1w))
1113                 return compiler->error;
1114         return getput_arg(compiler, flags, reg, arg1, arg1w, arg2, arg2w);
1115 }
1116
1117 static sljit_si emit_op(struct sljit_compiler *compiler, sljit_si op, sljit_si input_flags,
1118         sljit_si dst, sljit_sw dstw,
1119         sljit_si src1, sljit_sw src1w,
1120         sljit_si src2, sljit_sw src2w)
1121 {
1122         /* arg1 goes to TMP_REG1 or src reg
1123            arg2 goes to TMP_REG2, imm or src reg
1124            TMP_REG3 can be used for caching
1125            result goes to TMP_REG2, so put result can use TMP_REG1 and TMP_REG3. */
1126         sljit_si dst_r;
1127         sljit_si src1_r;
1128         sljit_si src2_r;
1129         sljit_si sugg_src2_r = TMP_REG2;
1130         sljit_si flags = input_flags & (ALT_FORM1 | ALT_FORM2 | ALT_FORM3 | ALT_FORM4 | ALT_FORM5 | ALT_FORM6 | ALT_SIGN_EXT | ALT_SET_FLAGS);
1131
1132         if (!(input_flags & ALT_KEEP_CACHE)) {
1133                 compiler->cache_arg = 0;
1134                 compiler->cache_argw = 0;
1135         }
1136
1137         /* Destination check. */
1138         if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) {
1139                 if (op >= SLJIT_MOV && op <= SLJIT_MOVU_SI && !(src2 & SLJIT_MEM))
1140                         return SLJIT_SUCCESS;
1141                 dst_r = TMP_REG2;
1142         }
1143         else if (FAST_IS_REG(dst)) {
1144                 dst_r = dst;
1145                 flags |= REG_DEST;
1146                 if (op >= SLJIT_MOV && op <= SLJIT_MOVU_SI)
1147                         sugg_src2_r = dst_r;
1148         }
1149         else {
1150                 SLJIT_ASSERT(dst & SLJIT_MEM);
1151                 if (getput_arg_fast(compiler, input_flags | ARG_TEST, TMP_REG2, dst, dstw)) {
1152                         flags |= FAST_DEST;
1153                         dst_r = TMP_REG2;
1154                 }
1155                 else {
1156                         flags |= SLOW_DEST;
1157                         dst_r = 0;
1158                 }
1159         }
1160
1161         /* Source 1. */
1162         if (FAST_IS_REG(src1)) {
1163                 src1_r = src1;
1164                 flags |= REG1_SOURCE;
1165         }
1166         else if (src1 & SLJIT_IMM) {
1167                 FAIL_IF(load_immediate(compiler, TMP_REG1, src1w));
1168                 src1_r = TMP_REG1;
1169         }
1170         else if (getput_arg_fast(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w)) {
1171                 FAIL_IF(compiler->error);
1172                 src1_r = TMP_REG1;
1173         }
1174         else
1175                 src1_r = 0;
1176
1177         /* Source 2. */
1178         if (FAST_IS_REG(src2)) {
1179                 src2_r = src2;
1180                 flags |= REG2_SOURCE;
1181                 if (!(flags & REG_DEST) && op >= SLJIT_MOV && op <= SLJIT_MOVU_SI)
1182                         dst_r = src2_r;
1183         }
1184         else if (src2 & SLJIT_IMM) {
1185                 FAIL_IF(load_immediate(compiler, sugg_src2_r, src2w));
1186                 src2_r = sugg_src2_r;
1187         }
1188         else if (getput_arg_fast(compiler, input_flags | LOAD_DATA, sugg_src2_r, src2, src2w)) {
1189                 FAIL_IF(compiler->error);
1190                 src2_r = sugg_src2_r;
1191         }
1192         else
1193                 src2_r = 0;
1194
1195         /* src1_r, src2_r and dst_r can be zero (=unprocessed).
1196            All arguments are complex addressing modes, and it is a binary operator. */
1197         if (src1_r == 0 && src2_r == 0 && dst_r == 0) {
1198                 if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) {
1199                         FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG2, src2, src2w, src1, src1w));
1200                         FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw));
1201                 }
1202                 else {
1203                         FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w, src2, src2w));
1204                         FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG2, src2, src2w, dst, dstw));
1205                 }
1206                 src1_r = TMP_REG1;
1207                 src2_r = TMP_REG2;
1208         }
1209         else if (src1_r == 0 && src2_r == 0) {
1210                 FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w, src2, src2w));
1211                 src1_r = TMP_REG1;
1212         }
1213         else if (src1_r == 0 && dst_r == 0) {
1214                 FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw));
1215                 src1_r = TMP_REG1;
1216         }
1217         else if (src2_r == 0 && dst_r == 0) {
1218                 FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, sugg_src2_r, src2, src2w, dst, dstw));
1219                 src2_r = sugg_src2_r;
1220         }
1221
1222         if (dst_r == 0)
1223                 dst_r = TMP_REG2;
1224
1225         if (src1_r == 0) {
1226                 FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w, 0, 0));
1227                 src1_r = TMP_REG1;
1228         }
1229
1230         if (src2_r == 0) {
1231                 FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, sugg_src2_r, src2, src2w, 0, 0));
1232                 src2_r = sugg_src2_r;
1233         }
1234
1235         FAIL_IF(emit_single_op(compiler, op, flags, dst_r, src1_r, src2_r));
1236
1237         if (flags & (FAST_DEST | SLOW_DEST)) {
1238                 if (flags & FAST_DEST)
1239                         FAIL_IF(getput_arg_fast(compiler, input_flags, dst_r, dst, dstw));
1240                 else
1241                         FAIL_IF(getput_arg(compiler, input_flags, dst_r, dst, dstw, 0, 0));
1242         }
1243         return SLJIT_SUCCESS;
1244 }
1245
1246 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler, sljit_si op)
1247 {
1248 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1249         sljit_si int_op = op & SLJIT_INT_OP;
1250 #endif
1251
1252         CHECK_ERROR();
1253         CHECK(check_sljit_emit_op0(compiler, op));
1254
1255         op = GET_OPCODE(op);
1256         switch (op) {
1257         case SLJIT_BREAKPOINT:
1258         case SLJIT_NOP:
1259                 return push_inst(compiler, NOP);
1260         case SLJIT_LUMUL:
1261         case SLJIT_LSMUL:
1262                 FAIL_IF(push_inst(compiler, OR | S(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R0)));
1263 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1264                 FAIL_IF(push_inst(compiler, MULLD | D(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R1)));
1265                 return push_inst(compiler, (op == SLJIT_LUMUL ? MULHDU : MULHD) | D(SLJIT_R1) | A(TMP_REG1) | B(SLJIT_R1));
1266 #else
1267                 FAIL_IF(push_inst(compiler, MULLW | D(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R1)));
1268                 return push_inst(compiler, (op == SLJIT_LUMUL ? MULHWU : MULHW) | D(SLJIT_R1) | A(TMP_REG1) | B(SLJIT_R1));
1269 #endif
1270         case SLJIT_UDIVMOD:
1271         case SLJIT_SDIVMOD:
1272                 FAIL_IF(push_inst(compiler, OR | S(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R0)));
1273 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1274                 FAIL_IF(push_inst(compiler, (int_op ? (op == SLJIT_UDIVMOD ? DIVWU : DIVW) : (op == SLJIT_UDIVMOD ? DIVDU : DIVD)) | D(SLJIT_R0) | A(SLJIT_R0) | B(SLJIT_R1)));
1275                 FAIL_IF(push_inst(compiler, (int_op ? MULLW : MULLD) | D(SLJIT_R1) | A(SLJIT_R0) | B(SLJIT_R1)));
1276 #else
1277                 FAIL_IF(push_inst(compiler, (op == SLJIT_UDIVMOD ? DIVWU : DIVW) | D(SLJIT_R0) | A(SLJIT_R0) | B(SLJIT_R1)));
1278                 FAIL_IF(push_inst(compiler, MULLW | D(SLJIT_R1) | A(SLJIT_R0) | B(SLJIT_R1)));
1279 #endif
1280                 return push_inst(compiler, SUBF | D(SLJIT_R1) | A(SLJIT_R1) | B(TMP_REG1));
1281         case SLJIT_UDIVI:
1282         case SLJIT_SDIVI:
1283 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1284                 return push_inst(compiler, (int_op ? (op == SLJIT_UDIVI ? DIVWU : DIVW) : (op == SLJIT_UDIVI ? DIVDU : DIVD)) | D(SLJIT_R0) | A(SLJIT_R0) | B(SLJIT_R1));
1285 #else
1286                 return push_inst(compiler, (op == SLJIT_UDIVI ? DIVWU : DIVW) | D(SLJIT_R0) | A(SLJIT_R0) | B(SLJIT_R1));
1287 #endif
1288         }
1289
1290         return SLJIT_SUCCESS;
1291 }
1292
1293 #define EMIT_MOV(type, type_flags, type_cast) \
1294         emit_op(compiler, (src & SLJIT_IMM) ? SLJIT_MOV : type, flags | (type_flags), dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? type_cast srcw : srcw)
1295
1296 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler, sljit_si op,
1297         sljit_si dst, sljit_sw dstw,
1298         sljit_si src, sljit_sw srcw)
1299 {
1300         sljit_si flags = GET_FLAGS(op) ? ALT_SET_FLAGS : 0;
1301         sljit_si op_flags = GET_ALL_FLAGS(op);
1302
1303         CHECK_ERROR();
1304         CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw));
1305         ADJUST_LOCAL_OFFSET(dst, dstw);
1306         ADJUST_LOCAL_OFFSET(src, srcw);
1307
1308         op = GET_OPCODE(op);
1309         if ((src & SLJIT_IMM) && srcw == 0)
1310                 src = TMP_ZERO;
1311
1312         if (op_flags & SLJIT_SET_O)
1313                 FAIL_IF(push_inst(compiler, MTXER | S(TMP_ZERO)));
1314
1315         if (op_flags & SLJIT_INT_OP) {
1316                 if (op < SLJIT_NOT) {
1317                         if (FAST_IS_REG(src) && src == dst) {
1318                                 if (!TYPE_CAST_NEEDED(op))
1319                                         return SLJIT_SUCCESS;
1320                         }
1321 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1322                         if (op == SLJIT_MOV_SI && (src & SLJIT_MEM))
1323                                 op = SLJIT_MOV_UI;
1324                         if (op == SLJIT_MOVU_SI && (src & SLJIT_MEM))
1325                                 op = SLJIT_MOVU_UI;
1326                         if (op == SLJIT_MOV_UI && (src & SLJIT_IMM))
1327                                 op = SLJIT_MOV_SI;
1328                         if (op == SLJIT_MOVU_UI && (src & SLJIT_IMM))
1329                                 op = SLJIT_MOVU_SI;
1330 #endif
1331                 }
1332 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1333                 else {
1334                         /* Most operations expect sign extended arguments. */
1335                         flags |= INT_DATA | SIGNED_DATA;
1336                         if (src & SLJIT_IMM)
1337                                 srcw = (sljit_si)srcw;
1338                 }
1339 #endif
1340         }
1341
1342         switch (op) {
1343         case SLJIT_MOV:
1344         case SLJIT_MOV_P:
1345 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
1346         case SLJIT_MOV_UI:
1347         case SLJIT_MOV_SI:
1348 #endif
1349                 return emit_op(compiler, SLJIT_MOV, flags | WORD_DATA, dst, dstw, TMP_REG1, 0, src, srcw);
1350
1351 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1352         case SLJIT_MOV_UI:
1353                 return EMIT_MOV(SLJIT_MOV_UI, INT_DATA, (sljit_ui));
1354
1355         case SLJIT_MOV_SI:
1356                 return EMIT_MOV(SLJIT_MOV_SI, INT_DATA | SIGNED_DATA, (sljit_si));
1357 #endif
1358
1359         case SLJIT_MOV_UB:
1360                 return EMIT_MOV(SLJIT_MOV_UB, BYTE_DATA, (sljit_ub));
1361
1362         case SLJIT_MOV_SB:
1363                 return EMIT_MOV(SLJIT_MOV_SB, BYTE_DATA | SIGNED_DATA, (sljit_sb));
1364
1365         case SLJIT_MOV_UH:
1366                 return EMIT_MOV(SLJIT_MOV_UH, HALF_DATA, (sljit_uh));
1367
1368         case SLJIT_MOV_SH:
1369                 return EMIT_MOV(SLJIT_MOV_SH, HALF_DATA | SIGNED_DATA, (sljit_sh));
1370
1371         case SLJIT_MOVU:
1372         case SLJIT_MOVU_P:
1373 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
1374         case SLJIT_MOVU_UI:
1375         case SLJIT_MOVU_SI:
1376 #endif
1377                 return emit_op(compiler, SLJIT_MOV, flags | WORD_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw);
1378
1379 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1380         case SLJIT_MOVU_UI:
1381                 return EMIT_MOV(SLJIT_MOV_UI, INT_DATA | WRITE_BACK, (sljit_ui));
1382
1383         case SLJIT_MOVU_SI:
1384                 return EMIT_MOV(SLJIT_MOV_SI, INT_DATA | SIGNED_DATA | WRITE_BACK, (sljit_si));
1385 #endif
1386
1387         case SLJIT_MOVU_UB:
1388                 return EMIT_MOV(SLJIT_MOV_UB, BYTE_DATA | WRITE_BACK, (sljit_ub));
1389
1390         case SLJIT_MOVU_SB:
1391                 return EMIT_MOV(SLJIT_MOV_SB, BYTE_DATA | SIGNED_DATA | WRITE_BACK, (sljit_sb));
1392
1393         case SLJIT_MOVU_UH:
1394                 return EMIT_MOV(SLJIT_MOV_UH, HALF_DATA | WRITE_BACK, (sljit_uh));
1395
1396         case SLJIT_MOVU_SH:
1397                 return EMIT_MOV(SLJIT_MOV_SH, HALF_DATA | SIGNED_DATA | WRITE_BACK, (sljit_sh));
1398
1399         case SLJIT_NOT:
1400                 return emit_op(compiler, SLJIT_NOT, flags, dst, dstw, TMP_REG1, 0, src, srcw);
1401
1402         case SLJIT_NEG:
1403                 return emit_op(compiler, SLJIT_NEG, flags, dst, dstw, TMP_REG1, 0, src, srcw);
1404
1405         case SLJIT_CLZ:
1406 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1407                 return emit_op(compiler, SLJIT_CLZ, flags | (!(op_flags & SLJIT_INT_OP) ? 0 : ALT_FORM1), dst, dstw, TMP_REG1, 0, src, srcw);
1408 #else
1409                 return emit_op(compiler, SLJIT_CLZ, flags, dst, dstw, TMP_REG1, 0, src, srcw);
1410 #endif
1411         }
1412
1413         return SLJIT_SUCCESS;
1414 }
1415
1416 #undef EMIT_MOV
1417
1418 #define TEST_SL_IMM(src, srcw) \
1419         (((src) & SLJIT_IMM) && (srcw) <= SIMM_MAX && (srcw) >= SIMM_MIN)
1420
1421 #define TEST_UL_IMM(src, srcw) \
1422         (((src) & SLJIT_IMM) && !((srcw) & ~0xffff))
1423
1424 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1425 #define TEST_SH_IMM(src, srcw) \
1426         (((src) & SLJIT_IMM) && !((srcw) & 0xffff) && (srcw) <= 0x7fffffffl && (srcw) >= -0x80000000l)
1427 #else
1428 #define TEST_SH_IMM(src, srcw) \
1429         (((src) & SLJIT_IMM) && !((srcw) & 0xffff))
1430 #endif
1431
1432 #define TEST_UH_IMM(src, srcw) \
1433         (((src) & SLJIT_IMM) && !((srcw) & ~0xffff0000))
1434
1435 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1436 #define TEST_ADD_IMM(src, srcw) \
1437         (((src) & SLJIT_IMM) && (srcw) <= 0x7fff7fffl && (srcw) >= -0x80000000l)
1438 #else
1439 #define TEST_ADD_IMM(src, srcw) \
1440         ((src) & SLJIT_IMM)
1441 #endif
1442
1443 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1444 #define TEST_UI_IMM(src, srcw) \
1445         (((src) & SLJIT_IMM) && !((srcw) & ~0xffffffff))
1446 #else
1447 #define TEST_UI_IMM(src, srcw) \
1448         ((src) & SLJIT_IMM)
1449 #endif
1450
1451 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler, sljit_si op,
1452         sljit_si dst, sljit_sw dstw,
1453         sljit_si src1, sljit_sw src1w,
1454         sljit_si src2, sljit_sw src2w)
1455 {
1456         sljit_si flags = GET_FLAGS(op) ? ALT_SET_FLAGS : 0;
1457
1458         CHECK_ERROR();
1459         CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
1460         ADJUST_LOCAL_OFFSET(dst, dstw);
1461         ADJUST_LOCAL_OFFSET(src1, src1w);
1462         ADJUST_LOCAL_OFFSET(src2, src2w);
1463
1464         if ((src1 & SLJIT_IMM) && src1w == 0)
1465                 src1 = TMP_ZERO;
1466         if ((src2 & SLJIT_IMM) && src2w == 0)
1467                 src2 = TMP_ZERO;
1468
1469 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1470         if (op & SLJIT_INT_OP) {
1471                 /* Most operations expect sign extended arguments. */
1472                 flags |= INT_DATA | SIGNED_DATA;
1473                 if (src1 & SLJIT_IMM)
1474                         src1w = (sljit_si)(src1w);
1475                 if (src2 & SLJIT_IMM)
1476                         src2w = (sljit_si)(src2w);
1477                 if (GET_FLAGS(op))
1478                         flags |= ALT_SIGN_EXT;
1479         }
1480 #endif
1481         if (op & SLJIT_SET_O)
1482                 FAIL_IF(push_inst(compiler, MTXER | S(TMP_ZERO)));
1483         if (src2 == TMP_REG2)
1484                 flags |= ALT_KEEP_CACHE;
1485
1486         switch (GET_OPCODE(op)) {
1487         case SLJIT_ADD:
1488                 if (!GET_FLAGS(op) && ((src1 | src2) & SLJIT_IMM)) {
1489                         if (TEST_SL_IMM(src2, src2w)) {
1490                                 compiler->imm = src2w & 0xffff;
1491                                 return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0);
1492                         }
1493                         if (TEST_SL_IMM(src1, src1w)) {
1494                                 compiler->imm = src1w & 0xffff;
1495                                 return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM1, dst, dstw, src2, src2w, TMP_REG2, 0);
1496                         }
1497                         if (TEST_SH_IMM(src2, src2w)) {
1498                                 compiler->imm = (src2w >> 16) & 0xffff;
1499                                 return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0);
1500                         }
1501                         if (TEST_SH_IMM(src1, src1w)) {
1502                                 compiler->imm = (src1w >> 16) & 0xffff;
1503                                 return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2, dst, dstw, src2, src2w, TMP_REG2, 0);
1504                         }
1505                         /* Range between -1 and -32768 is covered above. */
1506                         if (TEST_ADD_IMM(src2, src2w)) {
1507                                 compiler->imm = src2w & 0xffffffff;
1508                                 return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM4, dst, dstw, src1, src1w, TMP_REG2, 0);
1509                         }
1510                         if (TEST_ADD_IMM(src1, src1w)) {
1511                                 compiler->imm = src1w & 0xffffffff;
1512                                 return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM4, dst, dstw, src2, src2w, TMP_REG2, 0);
1513                         }
1514                 }
1515                 if (!(GET_FLAGS(op) & (SLJIT_SET_E | SLJIT_SET_O))) {
1516                         if (TEST_SL_IMM(src2, src2w)) {
1517                                 compiler->imm = src2w & 0xffff;
1518                                 return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
1519                         }
1520                         if (TEST_SL_IMM(src1, src1w)) {
1521                                 compiler->imm = src1w & 0xffff;
1522                                 return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM3, dst, dstw, src2, src2w, TMP_REG2, 0);
1523                         }
1524                 }
1525                 return emit_op(compiler, SLJIT_ADD, flags, dst, dstw, src1, src1w, src2, src2w);
1526
1527         case SLJIT_ADDC:
1528                 return emit_op(compiler, SLJIT_ADDC, flags | (!(op & SLJIT_KEEP_FLAGS) ? 0 : ALT_FORM1), dst, dstw, src1, src1w, src2, src2w);
1529
1530         case SLJIT_SUB:
1531                 if (!GET_FLAGS(op) && ((src1 | src2) & SLJIT_IMM)) {
1532                         if (TEST_SL_IMM(src2, -src2w)) {
1533                                 compiler->imm = (-src2w) & 0xffff;
1534                                 return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0);
1535                         }
1536                         if (TEST_SL_IMM(src1, src1w)) {
1537                                 compiler->imm = src1w & 0xffff;
1538                                 return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM1, dst, dstw, src2, src2w, TMP_REG2, 0);
1539                         }
1540                         if (TEST_SH_IMM(src2, -src2w)) {
1541                                 compiler->imm = ((-src2w) >> 16) & 0xffff;
1542                                 return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0);
1543                         }
1544                         /* Range between -1 and -32768 is covered above. */
1545                         if (TEST_ADD_IMM(src2, -src2w)) {
1546                                 compiler->imm = -src2w & 0xffffffff;
1547                                 return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM4, dst, dstw, src1, src1w, TMP_REG2, 0);
1548                         }
1549                 }
1550                 if (dst == SLJIT_UNUSED && (op & (SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S)) && !(op & (SLJIT_SET_O | SLJIT_SET_C))) {
1551                         if (!(op & SLJIT_SET_U)) {
1552                                 /* We know ALT_SIGN_EXT is set if it is an SLJIT_INT_OP on 64 bit systems. */
1553                                 if (TEST_SL_IMM(src2, src2w)) {
1554                                         compiler->imm = src2w & 0xffff;
1555                                         return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0);
1556                                 }
1557                                 if (GET_FLAGS(op) == SLJIT_SET_E && TEST_SL_IMM(src1, src1w)) {
1558                                         compiler->imm = src1w & 0xffff;
1559                                         return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM2, dst, dstw, src2, src2w, TMP_REG2, 0);
1560                                 }
1561                         }
1562                         if (!(op & (SLJIT_SET_E | SLJIT_SET_S))) {
1563                                 /* We know ALT_SIGN_EXT is set if it is an SLJIT_INT_OP on 64 bit systems. */
1564                                 if (TEST_UL_IMM(src2, src2w)) {
1565                                         compiler->imm = src2w & 0xffff;
1566                                         return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
1567                                 }
1568                                 return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM4, dst, dstw, src1, src1w, src2, src2w);
1569                         }
1570                         if ((src2 & SLJIT_IMM) && src2w >= 0 && src2w <= 0x7fff) {
1571                                 compiler->imm = src2w;
1572                                 return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM2 | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
1573                         }
1574                         return emit_op(compiler, SLJIT_SUB, flags | ((op & SLJIT_SET_U) ? ALT_FORM4 : 0) | ((op & (SLJIT_SET_E | SLJIT_SET_S)) ? ALT_FORM5 : 0), dst, dstw, src1, src1w, src2, src2w);
1575                 }
1576                 if (!(op & (SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_O))) {
1577                         if (TEST_SL_IMM(src2, -src2w)) {
1578                                 compiler->imm = (-src2w) & 0xffff;
1579                                 return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
1580                         }
1581                 }
1582                 /* We know ALT_SIGN_EXT is set if it is an SLJIT_INT_OP on 64 bit systems. */
1583                 return emit_op(compiler, SLJIT_SUB, flags | (!(op & SLJIT_SET_U) ? 0 : ALT_FORM6), dst, dstw, src1, src1w, src2, src2w);
1584
1585         case SLJIT_SUBC:
1586                 return emit_op(compiler, SLJIT_SUBC, flags | (!(op & SLJIT_KEEP_FLAGS) ? 0 : ALT_FORM1), dst, dstw, src1, src1w, src2, src2w);
1587
1588         case SLJIT_MUL:
1589 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1590                 if (op & SLJIT_INT_OP)
1591                         flags |= ALT_FORM2;
1592 #endif
1593                 if (!GET_FLAGS(op)) {
1594                         if (TEST_SL_IMM(src2, src2w)) {
1595                                 compiler->imm = src2w & 0xffff;
1596                                 return emit_op(compiler, SLJIT_MUL, flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0);
1597                         }
1598                         if (TEST_SL_IMM(src1, src1w)) {
1599                                 compiler->imm = src1w & 0xffff;
1600                                 return emit_op(compiler, SLJIT_MUL, flags | ALT_FORM1, dst, dstw, src2, src2w, TMP_REG2, 0);
1601                         }
1602                 }
1603                 return emit_op(compiler, SLJIT_MUL, flags, dst, dstw, src1, src1w, src2, src2w);
1604
1605         case SLJIT_AND:
1606         case SLJIT_OR:
1607         case SLJIT_XOR:
1608                 /* Commutative unsigned operations. */
1609                 if (!GET_FLAGS(op) || GET_OPCODE(op) == SLJIT_AND) {
1610                         if (TEST_UL_IMM(src2, src2w)) {
1611                                 compiler->imm = src2w;
1612                                 return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0);
1613                         }
1614                         if (TEST_UL_IMM(src1, src1w)) {
1615                                 compiler->imm = src1w;
1616                                 return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM1, dst, dstw, src2, src2w, TMP_REG2, 0);
1617                         }
1618                         if (TEST_UH_IMM(src2, src2w)) {
1619                                 compiler->imm = (src2w >> 16) & 0xffff;
1620                                 return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0);
1621                         }
1622                         if (TEST_UH_IMM(src1, src1w)) {
1623                                 compiler->imm = (src1w >> 16) & 0xffff;
1624                                 return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM2, dst, dstw, src2, src2w, TMP_REG2, 0);
1625                         }
1626                 }
1627                 if (!GET_FLAGS(op) && GET_OPCODE(op) != SLJIT_AND) {
1628                         if (TEST_UI_IMM(src2, src2w)) {
1629                                 compiler->imm = src2w;
1630                                 return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
1631                         }
1632                         if (TEST_UI_IMM(src1, src1w)) {
1633                                 compiler->imm = src1w;
1634                                 return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM3, dst, dstw, src2, src2w, TMP_REG2, 0);
1635                         }
1636                 }
1637                 return emit_op(compiler, GET_OPCODE(op), flags, dst, dstw, src1, src1w, src2, src2w);
1638
1639         case SLJIT_ASHR:
1640                 if (op & SLJIT_KEEP_FLAGS)
1641                         flags |= ALT_FORM3;
1642                 /* Fall through. */
1643         case SLJIT_SHL:
1644         case SLJIT_LSHR:
1645 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1646                 if (op & SLJIT_INT_OP)
1647                         flags |= ALT_FORM2;
1648 #endif
1649                 if (src2 & SLJIT_IMM) {
1650                         compiler->imm = src2w;
1651                         return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0);
1652                 }
1653                 return emit_op(compiler, GET_OPCODE(op), flags, dst, dstw, src1, src1w, src2, src2w);
1654         }
1655
1656         return SLJIT_SUCCESS;
1657 }
1658
1659 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg)
1660 {
1661         CHECK_REG_INDEX(check_sljit_get_register_index(reg));
1662         return reg_map[reg];
1663 }
1664
1665 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_float_register_index(sljit_si reg)
1666 {
1667         CHECK_REG_INDEX(check_sljit_get_float_register_index(reg));
1668         return reg;
1669 }
1670
1671 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *compiler,
1672         void *instruction, sljit_si size)
1673 {
1674         CHECK_ERROR();
1675         CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
1676
1677         return push_inst(compiler, *(sljit_ins*)instruction);
1678 }
1679
1680 /* --------------------------------------------------------------------- */
1681 /*  Floating point operators                                             */
1682 /* --------------------------------------------------------------------- */
1683
1684 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void)
1685 {
1686 #ifdef SLJIT_IS_FPU_AVAILABLE
1687         return SLJIT_IS_FPU_AVAILABLE;
1688 #else
1689         /* Available by default. */
1690         return 1;
1691 #endif
1692 }
1693
1694 #define FLOAT_DATA(op) (DOUBLE_DATA | ((op & SLJIT_SINGLE_OP) >> 6))
1695 #define SELECT_FOP(op, single, double) ((op & SLJIT_SINGLE_OP) ? single : double)
1696
1697 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1698 #define FLOAT_TMP_MEM_OFFSET (6 * sizeof(sljit_sw))
1699 #else
1700 #define FLOAT_TMP_MEM_OFFSET (2 * sizeof(sljit_sw))
1701
1702 #if (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
1703 #define FLOAT_TMP_MEM_OFFSET_LOW (2 * sizeof(sljit_sw))
1704 #define FLOAT_TMP_MEM_OFFSET_HI (3 * sizeof(sljit_sw))
1705 #else
1706 #define FLOAT_TMP_MEM_OFFSET_LOW (3 * sizeof(sljit_sw))
1707 #define FLOAT_TMP_MEM_OFFSET_HI (2 * sizeof(sljit_sw))
1708 #endif
1709
1710 #endif /* SLJIT_CONFIG_PPC_64 */
1711
1712 static SLJIT_INLINE sljit_si sljit_emit_fop1_convw_fromd(struct sljit_compiler *compiler, sljit_si op,
1713         sljit_si dst, sljit_sw dstw,
1714         sljit_si src, sljit_sw srcw)
1715 {
1716         if (src & SLJIT_MEM) {
1717                 /* We can ignore the temporary data store on the stack from caching point of view. */
1718                 FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src, srcw, dst, dstw));
1719                 src = TMP_FREG1;
1720         }
1721
1722 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1723         op = GET_OPCODE(op);
1724         FAIL_IF(push_inst(compiler, (op == SLJIT_CONVI_FROMD ? FCTIWZ : FCTIDZ) | FD(TMP_FREG1) | FB(src)));
1725
1726         if (dst == SLJIT_UNUSED)
1727                 return SLJIT_SUCCESS;
1728
1729         if (op == SLJIT_CONVW_FROMD) {
1730                 if (FAST_IS_REG(dst)) {
1731                         FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA, TMP_FREG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, 0, 0));
1732                         return emit_op_mem2(compiler, WORD_DATA | LOAD_DATA, dst, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, 0, 0);
1733                 }
1734                 return emit_op_mem2(compiler, DOUBLE_DATA, TMP_FREG1, dst, dstw, 0, 0);
1735         }
1736
1737 #else
1738         FAIL_IF(push_inst(compiler, FCTIWZ | FD(TMP_FREG1) | FB(src)));
1739
1740         if (dst == SLJIT_UNUSED)
1741                 return SLJIT_SUCCESS;
1742 #endif
1743
1744         if (FAST_IS_REG(dst)) {
1745                 FAIL_IF(load_immediate(compiler, TMP_REG1, FLOAT_TMP_MEM_OFFSET));
1746                 FAIL_IF(push_inst(compiler, STFIWX | FS(TMP_FREG1) | A(SLJIT_SP) | B(TMP_REG1)));
1747                 return emit_op_mem2(compiler, INT_DATA | LOAD_DATA, dst, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, 0, 0);
1748         }
1749
1750         SLJIT_ASSERT(dst & SLJIT_MEM);
1751
1752         if (dst & OFFS_REG_MASK) {
1753                 dstw &= 0x3;
1754                 if (dstw) {
1755 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
1756                         FAIL_IF(push_inst(compiler, RLWINM | S(OFFS_REG(dst)) | A(TMP_REG1) | (dstw << 11) | ((31 - dstw) << 1)));
1757 #else
1758                         FAIL_IF(push_inst(compiler, RLDI(TMP_REG1, OFFS_REG(dst), dstw, 63 - dstw, 1)));
1759 #endif
1760                         dstw = TMP_REG1;
1761                 }
1762                 else
1763                         dstw = OFFS_REG(dst);
1764         }
1765         else {
1766                 if ((dst & REG_MASK) && !dstw) {
1767                         dstw = dst & REG_MASK;
1768                         dst = 0;
1769                 }
1770                 else {
1771                         /* This works regardless we have SLJIT_MEM1 or SLJIT_MEM0. */
1772                         FAIL_IF(load_immediate(compiler, TMP_REG1, dstw));
1773                         dstw = TMP_REG1;
1774                 }
1775         }
1776
1777         return push_inst(compiler, STFIWX | FS(TMP_FREG1) | A(dst & REG_MASK) | B(dstw));
1778 }
1779
1780 static SLJIT_INLINE sljit_si sljit_emit_fop1_convd_fromw(struct sljit_compiler *compiler, sljit_si op,
1781         sljit_si dst, sljit_sw dstw,
1782         sljit_si src, sljit_sw srcw)
1783 {
1784 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1785
1786         sljit_si dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
1787
1788         if (src & SLJIT_IMM) {
1789                 if (GET_OPCODE(op) == SLJIT_CONVD_FROMI)
1790                         srcw = (sljit_si)srcw;
1791                 FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
1792                 src = TMP_REG1;
1793         }
1794         else if (GET_OPCODE(op) == SLJIT_CONVD_FROMI) {
1795                 if (FAST_IS_REG(src))
1796                         FAIL_IF(push_inst(compiler, EXTSW | S(src) | A(TMP_REG1)));
1797                 else
1798                         FAIL_IF(emit_op_mem2(compiler, INT_DATA | SIGNED_DATA | LOAD_DATA, TMP_REG1, src, srcw, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET));
1799                 src = TMP_REG1;
1800         }
1801
1802         if (FAST_IS_REG(src)) {
1803                 FAIL_IF(emit_op_mem2(compiler, WORD_DATA, src, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET));
1804                 FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, dst, dstw));
1805         }
1806         else
1807                 FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, src, srcw, dst, dstw));
1808
1809         FAIL_IF(push_inst(compiler, FCFID | FD(dst_r) | FB(TMP_FREG1)));
1810
1811         if (dst & SLJIT_MEM)
1812                 return emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, 0, 0);
1813         if (op & SLJIT_SINGLE_OP)
1814                 return push_inst(compiler, FRSP | FD(dst_r) | FB(dst_r));
1815         return SLJIT_SUCCESS;
1816
1817 #else
1818
1819         sljit_si dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
1820         sljit_si invert_sign = 1;
1821
1822         if (src & SLJIT_IMM) {
1823                 FAIL_IF(load_immediate(compiler, TMP_REG1, srcw ^ 0x80000000));
1824                 src = TMP_REG1;
1825                 invert_sign = 0;
1826         }
1827         else if (!FAST_IS_REG(src)) {
1828                 FAIL_IF(emit_op_mem2(compiler, WORD_DATA | SIGNED_DATA | LOAD_DATA, TMP_REG1, src, srcw, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_LOW));
1829                 src = TMP_REG1;
1830         }
1831
1832         /* First, a special double floating point value is constructed: (2^53 + (input xor (2^31)))
1833            The double precision format has exactly 53 bit precision, so the lower 32 bit represents
1834            the lower 32 bit of such value. The result of xor 2^31 is the same as adding 0x80000000
1835            to the input, which shifts it into the 0 - 0xffffffff range. To get the converted floating
1836            point value, we need to substract 2^53 + 2^31 from the constructed value. */
1837         FAIL_IF(push_inst(compiler, ADDIS | D(TMP_REG2) | A(0) | 0x4330));
1838         if (invert_sign)
1839                 FAIL_IF(push_inst(compiler, XORIS | S(src) | A(TMP_REG1) | 0x8000));
1840         FAIL_IF(emit_op_mem2(compiler, WORD_DATA, TMP_REG2, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_HI, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET));
1841         FAIL_IF(emit_op_mem2(compiler, WORD_DATA, TMP_REG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_LOW, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_HI));
1842         FAIL_IF(push_inst(compiler, ADDIS | D(TMP_REG1) | A(0) | 0x8000));
1843         FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_LOW));
1844         FAIL_IF(emit_op_mem2(compiler, WORD_DATA, TMP_REG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_LOW, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET));
1845         FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG2, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_LOW));
1846
1847         FAIL_IF(push_inst(compiler, FSUB | FD(dst_r) | FA(TMP_FREG1) | FB(TMP_FREG2)));
1848
1849         if (dst & SLJIT_MEM)
1850                 return emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, 0, 0);
1851         if (op & SLJIT_SINGLE_OP)
1852                 return push_inst(compiler, FRSP | FD(dst_r) | FB(dst_r));
1853         return SLJIT_SUCCESS;
1854
1855 #endif
1856 }
1857
1858 static SLJIT_INLINE sljit_si sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_si op,
1859         sljit_si src1, sljit_sw src1w,
1860         sljit_si src2, sljit_sw src2w)
1861 {
1862         if (src1 & SLJIT_MEM) {
1863                 FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w));
1864                 src1 = TMP_FREG1;
1865         }
1866
1867         if (src2 & SLJIT_MEM) {
1868                 FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, 0, 0));
1869                 src2 = TMP_FREG2;
1870         }
1871
1872         return push_inst(compiler, FCMPU | CRD(4) | FA(src1) | FB(src2));
1873 }
1874
1875 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op,
1876         sljit_si dst, sljit_sw dstw,
1877         sljit_si src, sljit_sw srcw)
1878 {
1879         sljit_si dst_r;
1880
1881         CHECK_ERROR();
1882         compiler->cache_arg = 0;
1883         compiler->cache_argw = 0;
1884
1885         SLJIT_COMPILE_ASSERT((SLJIT_SINGLE_OP == 0x100) && !(DOUBLE_DATA & 0x4), float_transfer_bit_error);
1886         SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
1887
1888         if (GET_OPCODE(op) == SLJIT_CONVD_FROMS)
1889                 op ^= SLJIT_SINGLE_OP;
1890
1891         dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
1892
1893         if (src & SLJIT_MEM) {
1894                 FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, dst_r, src, srcw, dst, dstw));
1895                 src = dst_r;
1896         }
1897
1898         switch (GET_OPCODE(op)) {
1899         case SLJIT_CONVD_FROMS:
1900                 op ^= SLJIT_SINGLE_OP;
1901                 if (op & SLJIT_SINGLE_OP) {
1902                         FAIL_IF(push_inst(compiler, FRSP | FD(dst_r) | FB(src)));
1903                         break;
1904                 }
1905                 /* Fall through. */
1906         case SLJIT_DMOV:
1907                 if (src != dst_r) {
1908                         if (dst_r != TMP_FREG1)
1909                                 FAIL_IF(push_inst(compiler, FMR | FD(dst_r) | FB(src)));
1910                         else
1911                                 dst_r = src;
1912                 }
1913                 break;
1914         case SLJIT_DNEG:
1915                 FAIL_IF(push_inst(compiler, FNEG | FD(dst_r) | FB(src)));
1916                 break;
1917         case SLJIT_DABS:
1918                 FAIL_IF(push_inst(compiler, FABS | FD(dst_r) | FB(src)));
1919                 break;
1920         }
1921
1922         if (dst & SLJIT_MEM)
1923                 FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), dst_r, dst, dstw, 0, 0));
1924         return SLJIT_SUCCESS;
1925 }
1926
1927 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compiler, sljit_si op,
1928         sljit_si dst, sljit_sw dstw,
1929         sljit_si src1, sljit_sw src1w,
1930         sljit_si src2, sljit_sw src2w)
1931 {
1932         sljit_si dst_r, flags = 0;
1933
1934         CHECK_ERROR();
1935         CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
1936         ADJUST_LOCAL_OFFSET(dst, dstw);
1937         ADJUST_LOCAL_OFFSET(src1, src1w);
1938         ADJUST_LOCAL_OFFSET(src2, src2w);
1939
1940         compiler->cache_arg = 0;
1941         compiler->cache_argw = 0;
1942
1943         dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG2;
1944
1945         if (src1 & SLJIT_MEM) {
1946                 if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w)) {
1947                         FAIL_IF(compiler->error);
1948                         src1 = TMP_FREG1;
1949                 } else
1950                         flags |= ALT_FORM1;
1951         }
1952
1953         if (src2 & SLJIT_MEM) {
1954                 if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w)) {
1955                         FAIL_IF(compiler->error);
1956                         src2 = TMP_FREG2;
1957                 } else
1958                         flags |= ALT_FORM2;
1959         }
1960
1961         if ((flags & (ALT_FORM1 | ALT_FORM2)) == (ALT_FORM1 | ALT_FORM2)) {
1962                 if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) {
1963                         FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, src1, src1w));
1964                         FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, dst, dstw));
1965                 }
1966                 else {
1967                         FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w));
1968                         FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, dst, dstw));
1969                 }
1970         }
1971         else if (flags & ALT_FORM1)
1972                 FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, dst, dstw));
1973         else if (flags & ALT_FORM2)
1974                 FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, dst, dstw));
1975
1976         if (flags & ALT_FORM1)
1977                 src1 = TMP_FREG1;
1978         if (flags & ALT_FORM2)
1979                 src2 = TMP_FREG2;
1980
1981         switch (GET_OPCODE(op)) {
1982         case SLJIT_DADD:
1983                 FAIL_IF(push_inst(compiler, SELECT_FOP(op, FADDS, FADD) | FD(dst_r) | FA(src1) | FB(src2)));
1984                 break;
1985
1986         case SLJIT_DSUB:
1987                 FAIL_IF(push_inst(compiler, SELECT_FOP(op, FSUBS, FSUB) | FD(dst_r) | FA(src1) | FB(src2)));
1988                 break;
1989
1990         case SLJIT_DMUL:
1991                 FAIL_IF(push_inst(compiler, SELECT_FOP(op, FMULS, FMUL) | FD(dst_r) | FA(src1) | FC(src2) /* FMUL use FC as src2 */));
1992                 break;
1993
1994         case SLJIT_DDIV:
1995                 FAIL_IF(push_inst(compiler, SELECT_FOP(op, FDIVS, FDIV) | FD(dst_r) | FA(src1) | FB(src2)));
1996                 break;
1997         }
1998
1999         if (dst_r == TMP_FREG2)
2000                 FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG2, dst, dstw, 0, 0));
2001
2002         return SLJIT_SUCCESS;
2003 }
2004
2005 #undef FLOAT_DATA
2006 #undef SELECT_FOP
2007
2008 /* --------------------------------------------------------------------- */
2009 /*  Other instructions                                                   */
2010 /* --------------------------------------------------------------------- */
2011
2012 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw)
2013 {
2014         CHECK_ERROR();
2015         CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw));
2016         ADJUST_LOCAL_OFFSET(dst, dstw);
2017
2018         /* For UNUSED dst. Uncommon, but possible. */
2019         if (dst == SLJIT_UNUSED)
2020                 return SLJIT_SUCCESS;
2021
2022         if (FAST_IS_REG(dst))
2023                 return push_inst(compiler, MFLR | D(dst));
2024
2025         /* Memory. */
2026         FAIL_IF(push_inst(compiler, MFLR | D(TMP_REG2)));
2027         return emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, TMP_REG2, 0);
2028 }
2029
2030 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_si src, sljit_sw srcw)
2031 {
2032         CHECK_ERROR();
2033         CHECK(check_sljit_emit_fast_return(compiler, src, srcw));
2034         ADJUST_LOCAL_OFFSET(src, srcw);
2035
2036         if (FAST_IS_REG(src))
2037                 FAIL_IF(push_inst(compiler, MTLR | S(src)));
2038         else {
2039                 if (src & SLJIT_MEM)
2040                         FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, TMP_REG2, 0, TMP_REG1, 0, src, srcw));
2041                 else if (src & SLJIT_IMM)
2042                         FAIL_IF(load_immediate(compiler, TMP_REG2, srcw));
2043                 FAIL_IF(push_inst(compiler, MTLR | S(TMP_REG2)));
2044         }
2045         return push_inst(compiler, BLR);
2046 }
2047
2048 /* --------------------------------------------------------------------- */
2049 /*  Conditional instructions                                             */
2050 /* --------------------------------------------------------------------- */
2051
2052 SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
2053 {
2054         struct sljit_label *label;
2055
2056         CHECK_ERROR_PTR();
2057         CHECK_PTR(check_sljit_emit_label(compiler));
2058
2059         if (compiler->last_label && compiler->last_label->size == compiler->size)
2060                 return compiler->last_label;
2061
2062         label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
2063         PTR_FAIL_IF(!label);
2064         set_label(label, compiler);
2065         return label;
2066 }
2067
2068 static sljit_ins get_bo_bi_flags(sljit_si type)
2069 {
2070         switch (type) {
2071         case SLJIT_EQUAL:
2072                 return (12 << 21) | (2 << 16);
2073
2074         case SLJIT_NOT_EQUAL:
2075                 return (4 << 21) | (2 << 16);
2076
2077         case SLJIT_LESS:
2078         case SLJIT_D_LESS:
2079                 return (12 << 21) | ((4 + 0) << 16);
2080
2081         case SLJIT_GREATER_EQUAL:
2082         case SLJIT_D_GREATER_EQUAL:
2083                 return (4 << 21) | ((4 + 0) << 16);
2084
2085         case SLJIT_GREATER:
2086         case SLJIT_D_GREATER:
2087                 return (12 << 21) | ((4 + 1) << 16);
2088
2089         case SLJIT_LESS_EQUAL:
2090         case SLJIT_D_LESS_EQUAL:
2091                 return (4 << 21) | ((4 + 1) << 16);
2092
2093         case SLJIT_SIG_LESS:
2094                 return (12 << 21) | (0 << 16);
2095
2096         case SLJIT_SIG_GREATER_EQUAL:
2097                 return (4 << 21) | (0 << 16);
2098
2099         case SLJIT_SIG_GREATER:
2100                 return (12 << 21) | (1 << 16);
2101
2102         case SLJIT_SIG_LESS_EQUAL:
2103                 return (4 << 21) | (1 << 16);
2104
2105         case SLJIT_OVERFLOW:
2106         case SLJIT_MUL_OVERFLOW:
2107                 return (12 << 21) | (3 << 16);
2108
2109         case SLJIT_NOT_OVERFLOW:
2110         case SLJIT_MUL_NOT_OVERFLOW:
2111                 return (4 << 21) | (3 << 16);
2112
2113         case SLJIT_D_EQUAL:
2114                 return (12 << 21) | ((4 + 2) << 16);
2115
2116         case SLJIT_D_NOT_EQUAL:
2117                 return (4 << 21) | ((4 + 2) << 16);
2118
2119         case SLJIT_D_UNORDERED:
2120                 return (12 << 21) | ((4 + 3) << 16);
2121
2122         case SLJIT_D_ORDERED:
2123                 return (4 << 21) | ((4 + 3) << 16);
2124
2125         default:
2126                 SLJIT_ASSERT(type >= SLJIT_JUMP && type <= SLJIT_CALL3);
2127                 return (20 << 21);
2128         }
2129 }
2130
2131 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_si type)
2132 {
2133         struct sljit_jump *jump;
2134         sljit_ins bo_bi_flags;
2135
2136         CHECK_ERROR_PTR();
2137         CHECK_PTR(check_sljit_emit_jump(compiler, type));
2138
2139         bo_bi_flags = get_bo_bi_flags(type & 0xff);
2140         if (!bo_bi_flags)
2141                 return NULL;
2142
2143         jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2144         PTR_FAIL_IF(!jump);
2145         set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
2146         type &= 0xff;
2147
2148         /* In PPC, we don't need to touch the arguments. */
2149         if (type < SLJIT_JUMP)
2150                 jump->flags |= IS_COND;
2151 #if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL)
2152         if (type >= SLJIT_CALL0)
2153                 jump->flags |= IS_CALL;
2154 #endif
2155
2156         PTR_FAIL_IF(emit_const(compiler, TMP_CALL_REG, 0));
2157         PTR_FAIL_IF(push_inst(compiler, MTCTR | S(TMP_CALL_REG)));
2158         jump->addr = compiler->size;
2159         PTR_FAIL_IF(push_inst(compiler, BCCTR | bo_bi_flags | (type >= SLJIT_FAST_CALL ? 1 : 0)));
2160         return jump;
2161 }
2162
2163 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compiler, sljit_si type, sljit_si src, sljit_sw srcw)
2164 {
2165         struct sljit_jump *jump = NULL;
2166         sljit_si src_r;
2167
2168         CHECK_ERROR();
2169         CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
2170         ADJUST_LOCAL_OFFSET(src, srcw);
2171
2172         if (FAST_IS_REG(src)) {
2173 #if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL)
2174                 if (type >= SLJIT_CALL0) {
2175                         FAIL_IF(push_inst(compiler, OR | S(src) | A(TMP_CALL_REG) | B(src)));
2176                         src_r = TMP_CALL_REG;
2177                 }
2178                 else
2179                         src_r = src;
2180 #else
2181                 src_r = src;
2182 #endif
2183         } else if (src & SLJIT_IMM) {
2184                 jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2185                 FAIL_IF(!jump);
2186                 set_jump(jump, compiler, JUMP_ADDR);
2187                 jump->u.target = srcw;
2188 #if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL)
2189                 if (type >= SLJIT_CALL0)
2190                         jump->flags |= IS_CALL;
2191 #endif
2192                 FAIL_IF(emit_const(compiler, TMP_CALL_REG, 0));
2193                 src_r = TMP_CALL_REG;
2194         }
2195         else {
2196                 FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, TMP_CALL_REG, 0, TMP_REG1, 0, src, srcw));
2197                 src_r = TMP_CALL_REG;
2198         }
2199
2200         FAIL_IF(push_inst(compiler, MTCTR | S(src_r)));
2201         if (jump)
2202                 jump->addr = compiler->size;
2203         return push_inst(compiler, BCCTR | (20 << 21) | (type >= SLJIT_FAST_CALL ? 1 : 0));
2204 }
2205
2206 /* Get a bit from CR, all other bits are zeroed. */
2207 #define GET_CR_BIT(bit, dst) \
2208         FAIL_IF(push_inst(compiler, MFCR | D(dst))); \
2209         FAIL_IF(push_inst(compiler, RLWINM | S(dst) | A(dst) | ((1 + (bit)) << 11) | (31 << 6) | (31 << 1)));
2210
2211 #define INVERT_BIT(dst) \
2212         FAIL_IF(push_inst(compiler, XORI | S(dst) | A(dst) | 0x1));
2213
2214 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_si op,
2215         sljit_si dst, sljit_sw dstw,
2216         sljit_si src, sljit_sw srcw,
2217         sljit_si type)
2218 {
2219         sljit_si reg, input_flags;
2220         sljit_si flags = GET_ALL_FLAGS(op);
2221         sljit_sw original_dstw = dstw;
2222
2223         CHECK_ERROR();
2224         CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type));
2225         ADJUST_LOCAL_OFFSET(dst, dstw);
2226
2227         if (dst == SLJIT_UNUSED)
2228                 return SLJIT_SUCCESS;
2229
2230         op = GET_OPCODE(op);
2231         reg = (op < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG2;
2232
2233         compiler->cache_arg = 0;
2234         compiler->cache_argw = 0;
2235         if (op >= SLJIT_ADD && (src & SLJIT_MEM)) {
2236                 ADJUST_LOCAL_OFFSET(src, srcw);
2237 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
2238                 input_flags = (flags & SLJIT_INT_OP) ? INT_DATA : WORD_DATA;
2239 #else
2240                 input_flags = WORD_DATA;
2241 #endif
2242                 FAIL_IF(emit_op_mem2(compiler, input_flags | LOAD_DATA, TMP_REG1, src, srcw, dst, dstw));
2243                 src = TMP_REG1;
2244                 srcw = 0;
2245         }
2246
2247         switch (type & 0xff) {
2248         case SLJIT_EQUAL:
2249                 GET_CR_BIT(2, reg);
2250                 break;
2251
2252         case SLJIT_NOT_EQUAL:
2253                 GET_CR_BIT(2, reg);
2254                 INVERT_BIT(reg);
2255                 break;
2256
2257         case SLJIT_LESS:
2258         case SLJIT_D_LESS:
2259                 GET_CR_BIT(4 + 0, reg);
2260                 break;
2261
2262         case SLJIT_GREATER_EQUAL:
2263         case SLJIT_D_GREATER_EQUAL:
2264                 GET_CR_BIT(4 + 0, reg);
2265                 INVERT_BIT(reg);
2266                 break;
2267
2268         case SLJIT_GREATER:
2269         case SLJIT_D_GREATER:
2270                 GET_CR_BIT(4 + 1, reg);
2271                 break;
2272
2273         case SLJIT_LESS_EQUAL:
2274         case SLJIT_D_LESS_EQUAL:
2275                 GET_CR_BIT(4 + 1, reg);
2276                 INVERT_BIT(reg);
2277                 break;
2278
2279         case SLJIT_SIG_LESS:
2280                 GET_CR_BIT(0, reg);
2281                 break;
2282
2283         case SLJIT_SIG_GREATER_EQUAL:
2284                 GET_CR_BIT(0, reg);
2285                 INVERT_BIT(reg);
2286                 break;
2287
2288         case SLJIT_SIG_GREATER:
2289                 GET_CR_BIT(1, reg);
2290                 break;
2291
2292         case SLJIT_SIG_LESS_EQUAL:
2293                 GET_CR_BIT(1, reg);
2294                 INVERT_BIT(reg);
2295                 break;
2296
2297         case SLJIT_OVERFLOW:
2298         case SLJIT_MUL_OVERFLOW:
2299                 GET_CR_BIT(3, reg);
2300                 break;
2301
2302         case SLJIT_NOT_OVERFLOW:
2303         case SLJIT_MUL_NOT_OVERFLOW:
2304                 GET_CR_BIT(3, reg);
2305                 INVERT_BIT(reg);
2306                 break;
2307
2308         case SLJIT_D_EQUAL:
2309                 GET_CR_BIT(4 + 2, reg);
2310                 break;
2311
2312         case SLJIT_D_NOT_EQUAL:
2313                 GET_CR_BIT(4 + 2, reg);
2314                 INVERT_BIT(reg);
2315                 break;
2316
2317         case SLJIT_D_UNORDERED:
2318                 GET_CR_BIT(4 + 3, reg);
2319                 break;
2320
2321         case SLJIT_D_ORDERED:
2322                 GET_CR_BIT(4 + 3, reg);
2323                 INVERT_BIT(reg);
2324                 break;
2325
2326         default:
2327                 SLJIT_ASSERT_STOP();
2328                 break;
2329         }
2330
2331         if (op < SLJIT_ADD) {
2332 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
2333                 if (op == SLJIT_MOV)
2334                         input_flags = WORD_DATA;
2335                 else {
2336                         op = SLJIT_MOV_UI;
2337                         input_flags = INT_DATA;
2338                 }
2339 #else
2340                 op = SLJIT_MOV;
2341                 input_flags = WORD_DATA;
2342 #endif
2343                 if (reg != TMP_REG2)
2344                         return SLJIT_SUCCESS;
2345                 return emit_op(compiler, op, input_flags, dst, dstw, TMP_REG1, 0, TMP_REG2, 0);
2346         }
2347
2348 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
2349                 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
2350         compiler->skip_checks = 1;
2351 #endif
2352         return sljit_emit_op2(compiler, op | flags, dst, original_dstw, src, srcw, TMP_REG2, 0);
2353 }
2354
2355 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw init_value)
2356 {
2357         struct sljit_const *const_;
2358         sljit_si reg;
2359
2360         CHECK_ERROR_PTR();
2361         CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
2362         ADJUST_LOCAL_OFFSET(dst, dstw);
2363
2364         const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
2365         PTR_FAIL_IF(!const_);
2366         set_const(const_, compiler);
2367
2368         reg = SLOW_IS_REG(dst) ? dst : TMP_REG2;
2369
2370         PTR_FAIL_IF(emit_const(compiler, reg, init_value));
2371
2372         if (dst & SLJIT_MEM)
2373                 PTR_FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, TMP_REG2, 0));
2374         return const_;
2375 }