chiark / gitweb /
changes to file patched by no_jit_ppc64-el.patch
[pcre3.git] / sljit / sljitNativePPC_common.c
1 /*
2  *    Stack-less Just-In-Time compiler
3  *
4  *    Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without modification, are
7  * permitted provided that the following conditions are met:
8  *
9  *   1. Redistributions of source code must retain the above copyright notice, this list of
10  *      conditions and the following disclaimer.
11  *
12  *   2. Redistributions in binary form must reproduce the above copyright notice, this list
13  *      of conditions and the following disclaimer in the documentation and/or other materials
14  *      provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19  * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26
27 SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void)
28 {
29         return "PowerPC" SLJIT_CPUINFO;
30 }
31
32 /* Length of an instruction word.
33    Both for ppc-32 and ppc-64. */
34 typedef sljit_ui sljit_ins;
35
36 #if ((defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) && (defined _AIX)) \
37         || (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
38 #define SLJIT_PPC_STACK_FRAME_V2 1
39 #endif
40
41 #ifdef _AIX
42 #include <sys/cache.h>
43 #endif
44
45 #if (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
46 #define SLJIT_PASS_ENTRY_ADDR_TO_CALL 1
47 #endif
48
49 static void ppc_cache_flush(sljit_ins *from, sljit_ins *to)
50 {
51 #ifdef _AIX
52         _sync_cache_range((caddr_t)from, (int)((size_t)to - (size_t)from));
53 #elif defined(__GNUC__) || (defined(__IBM_GCC_ASM) && __IBM_GCC_ASM)
54 #       if defined(_ARCH_PWR) || defined(_ARCH_PWR2)
55         /* Cache flush for POWER architecture. */
56         while (from < to) {
57                 __asm__ volatile (
58                         "clf 0, %0\n"
59                         "dcs\n"
60                         : : "r"(from)
61                 );
62                 from++;
63         }
64         __asm__ volatile ( "ics" );
65 #       elif defined(_ARCH_COM) && !defined(_ARCH_PPC)
66 #       error "Cache flush is not implemented for PowerPC/POWER common mode."
67 #       else
68         /* Cache flush for PowerPC architecture. */
69         while (from < to) {
70                 __asm__ volatile (
71                         "dcbf 0, %0\n"
72                         "sync\n"
73                         "icbi 0, %0\n"
74                         : : "r"(from)
75                 );
76                 from++;
77         }
78         __asm__ volatile ( "isync" );
79 #       endif
80 #       ifdef __xlc__
81 #       warning "This file may fail to compile if -qfuncsect is used"
82 #       endif
83 #elif defined(__xlc__)
84 #error "Please enable GCC syntax for inline assembly statements with -qasm=gcc"
85 #else
86 #error "This platform requires a cache flush implementation."
87 #endif /* _AIX */
88 }
89
90 #define TMP_REG1        (SLJIT_NO_REGISTERS + 1)
91 #define TMP_REG2        (SLJIT_NO_REGISTERS + 2)
92 #define TMP_REG3        (SLJIT_NO_REGISTERS + 3)
93 #define TMP_ZERO        (SLJIT_NO_REGISTERS + 4)
94
95 #if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL)
96 #define TMP_CALL_REG    (SLJIT_NO_REGISTERS + 5)
97 #else
98 #define TMP_CALL_REG    TMP_REG2
99 #endif
100
101 #define TMP_FREG1       (0)
102 #define TMP_FREG2       (SLJIT_FLOAT_REG6 + 1)
103
104 static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 6] = {
105         0, 3, 4, 5, 6, 7, 30, 29, 28, 27, 26, 1, 8, 9, 10, 31, 12
106 };
107
108 /* --------------------------------------------------------------------- */
109 /*  Instrucion forms                                                     */
110 /* --------------------------------------------------------------------- */
111 #define D(d)            (reg_map[d] << 21)
112 #define S(s)            (reg_map[s] << 21)
113 #define A(a)            (reg_map[a] << 16)
114 #define B(b)            (reg_map[b] << 11)
115 #define C(c)            (reg_map[c] << 6)
116 #define FD(fd)          ((fd) << 21)
117 #define FA(fa)          ((fa) << 16)
118 #define FB(fb)          ((fb) << 11)
119 #define FC(fc)          ((fc) << 6)
120 #define IMM(imm)        ((imm) & 0xffff)
121 #define CRD(d)          ((d) << 21)
122
123 /* Instruction bit sections.
124    OE and Rc flag (see ALT_SET_FLAGS). */
125 #define OERC(flags)     (((flags & ALT_SET_FLAGS) >> 10) | (flags & ALT_SET_FLAGS))
126 /* Rc flag (see ALT_SET_FLAGS). */
127 #define RC(flags)       ((flags & ALT_SET_FLAGS) >> 10)
128 #define HI(opcode)      ((opcode) << 26)
129 #define LO(opcode)      ((opcode) << 1)
130
131 #define ADD             (HI(31) | LO(266))
132 #define ADDC            (HI(31) | LO(10))
133 #define ADDE            (HI(31) | LO(138))
134 #define ADDI            (HI(14))
135 #define ADDIC           (HI(13))
136 #define ADDIS           (HI(15))
137 #define ADDME           (HI(31) | LO(234))
138 #define AND             (HI(31) | LO(28))
139 #define ANDI            (HI(28))
140 #define ANDIS           (HI(29))
141 #define Bx              (HI(18))
142 #define BCx             (HI(16))
143 #define BCCTR           (HI(19) | LO(528) | (3 << 11))
144 #define BLR             (HI(19) | LO(16) | (0x14 << 21))
145 #define CNTLZD          (HI(31) | LO(58))
146 #define CNTLZW          (HI(31) | LO(26))
147 #define CMP             (HI(31) | LO(0))
148 #define CMPI            (HI(11))
149 #define CMPL            (HI(31) | LO(32))
150 #define CMPLI           (HI(10))
151 #define CROR            (HI(19) | LO(449))
152 #define DIVD            (HI(31) | LO(489))
153 #define DIVDU           (HI(31) | LO(457))
154 #define DIVW            (HI(31) | LO(491))
155 #define DIVWU           (HI(31) | LO(459))
156 #define EXTSB           (HI(31) | LO(954))
157 #define EXTSH           (HI(31) | LO(922))
158 #define EXTSW           (HI(31) | LO(986))
159 #define FABS            (HI(63) | LO(264))
160 #define FADD            (HI(63) | LO(21))
161 #define FADDS           (HI(59) | LO(21))
162 #define FCMPU           (HI(63) | LO(0))
163 #define FDIV            (HI(63) | LO(18))
164 #define FDIVS           (HI(59) | LO(18))
165 #define FMR             (HI(63) | LO(72))
166 #define FMUL            (HI(63) | LO(25))
167 #define FMULS           (HI(59) | LO(25))
168 #define FNEG            (HI(63) | LO(40))
169 #define FSUB            (HI(63) | LO(20))
170 #define FSUBS           (HI(59) | LO(20))
171 #define LD              (HI(58) | 0)
172 #define LWZ             (HI(32))
173 #define MFCR            (HI(31) | LO(19))
174 #define MFLR            (HI(31) | LO(339) | 0x80000)
175 #define MFXER           (HI(31) | LO(339) | 0x10000)
176 #define MTCTR           (HI(31) | LO(467) | 0x90000)
177 #define MTLR            (HI(31) | LO(467) | 0x80000)
178 #define MTXER           (HI(31) | LO(467) | 0x10000)
179 #define MULHD           (HI(31) | LO(73))
180 #define MULHDU          (HI(31) | LO(9))
181 #define MULHW           (HI(31) | LO(75))
182 #define MULHWU          (HI(31) | LO(11))
183 #define MULLD           (HI(31) | LO(233))
184 #define MULLI           (HI(7))
185 #define MULLW           (HI(31) | LO(235))
186 #define NEG             (HI(31) | LO(104))
187 #define NOP             (HI(24))
188 #define NOR             (HI(31) | LO(124))
189 #define OR              (HI(31) | LO(444))
190 #define ORI             (HI(24))
191 #define ORIS            (HI(25))
192 #define RLDICL          (HI(30))
193 #define RLWINM          (HI(21))
194 #define SLD             (HI(31) | LO(27))
195 #define SLW             (HI(31) | LO(24))
196 #define SRAD            (HI(31) | LO(794))
197 #define SRADI           (HI(31) | LO(413 << 1))
198 #define SRAW            (HI(31) | LO(792))
199 #define SRAWI           (HI(31) | LO(824))
200 #define SRD             (HI(31) | LO(539))
201 #define SRW             (HI(31) | LO(536))
202 #define STD             (HI(62) | 0)
203 #define STDU            (HI(62) | 1)
204 #define STDUX           (HI(31) | LO(181))
205 #define STW             (HI(36))
206 #define STWU            (HI(37))
207 #define STWUX           (HI(31) | LO(183))
208 #define SUBF            (HI(31) | LO(40))
209 #define SUBFC           (HI(31) | LO(8))
210 #define SUBFE           (HI(31) | LO(136))
211 #define SUBFIC          (HI(8))
212 #define XOR             (HI(31) | LO(316))
213 #define XORI            (HI(26))
214 #define XORIS           (HI(27))
215
216 #define SIMM_MAX        (0x7fff)
217 #define SIMM_MIN        (-0x8000)
218 #define UIMM_MAX        (0xffff)
219
220 #if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL)
221 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_function_context(void** func_ptr, struct sljit_function_context* context, sljit_sw addr, void* func)
222 {
223         sljit_sw* ptrs;
224         if (func_ptr)
225                 *func_ptr = (void*)context;
226         ptrs = (sljit_sw*)func;
227         context->addr = addr ? addr : ptrs[0];
228         context->r2 = ptrs[1];
229         context->r11 = ptrs[2];
230 }
231 #endif
232
233 static sljit_si push_inst(struct sljit_compiler *compiler, sljit_ins ins)
234 {
235         sljit_ins *ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins));
236         FAIL_IF(!ptr);
237         *ptr = ins;
238         compiler->size++;
239         return SLJIT_SUCCESS;
240 }
241
242 static SLJIT_INLINE sljit_si detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code)
243 {
244         sljit_sw diff;
245         sljit_uw target_addr;
246         sljit_sw extra_jump_flags;
247
248 #if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL) && (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
249         if (jump->flags & (SLJIT_REWRITABLE_JUMP | IS_CALL))
250                 return 0;
251 #else
252         if (jump->flags & SLJIT_REWRITABLE_JUMP)
253                 return 0;
254 #endif
255
256         if (jump->flags & JUMP_ADDR)
257                 target_addr = jump->u.target;
258         else {
259                 SLJIT_ASSERT(jump->flags & JUMP_LABEL);
260                 target_addr = (sljit_uw)(code + jump->u.label->size);
261         }
262
263 #if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL) && (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
264         if (jump->flags & IS_CALL)
265                 goto keep_address;
266 #endif
267
268         diff = ((sljit_sw)target_addr - (sljit_sw)(code_ptr)) & ~0x3l;
269
270         extra_jump_flags = 0;
271         if (jump->flags & IS_COND) {
272                 if (diff <= 0x7fff && diff >= -0x8000) {
273                         jump->flags |= PATCH_B;
274                         return 1;
275                 }
276                 if (target_addr <= 0xffff) {
277                         jump->flags |= PATCH_B | PATCH_ABS_B;
278                         return 1;
279                 }
280                 extra_jump_flags = REMOVE_COND;
281
282                 diff -= sizeof(sljit_ins);
283         }
284
285         if (diff <= 0x01ffffff && diff >= -0x02000000) {
286                 jump->flags |= PATCH_B | extra_jump_flags;
287                 return 1;
288         }
289         if (target_addr <= 0x03ffffff) {
290                 jump->flags |= PATCH_B | PATCH_ABS_B | extra_jump_flags;
291                 return 1;
292         }
293
294 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
295 #if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL)
296 keep_address:
297 #endif
298         if (target_addr <= 0x7fffffff) {
299                 jump->flags |= PATCH_ABS32;
300                 return 1;
301         }
302         if (target_addr <= 0x7fffffffffffl) {
303                 jump->flags |= PATCH_ABS48;
304                 return 1;
305         }
306 #endif
307
308         return 0;
309 }
310
311 SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
312 {
313         struct sljit_memory_fragment *buf;
314         sljit_ins *code;
315         sljit_ins *code_ptr;
316         sljit_ins *buf_ptr;
317         sljit_ins *buf_end;
318         sljit_uw word_count;
319         sljit_uw addr;
320
321         struct sljit_label *label;
322         struct sljit_jump *jump;
323         struct sljit_const *const_;
324
325         CHECK_ERROR_PTR();
326         check_sljit_generate_code(compiler);
327         reverse_buf(compiler);
328
329 #if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL)
330 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
331         compiler->size += (compiler->size & 0x1) + (sizeof(struct sljit_function_context) / sizeof(sljit_ins));
332 #else
333         compiler->size += (sizeof(struct sljit_function_context) / sizeof(sljit_ins));
334 #endif
335 #endif
336         code = (sljit_ins*)SLJIT_MALLOC_EXEC(compiler->size * sizeof(sljit_ins));
337         PTR_FAIL_WITH_EXEC_IF(code);
338         buf = compiler->buf;
339
340         code_ptr = code;
341         word_count = 0;
342         label = compiler->labels;
343         jump = compiler->jumps;
344         const_ = compiler->consts;
345         do {
346                 buf_ptr = (sljit_ins*)buf->memory;
347                 buf_end = buf_ptr + (buf->used_size >> 2);
348                 do {
349                         *code_ptr = *buf_ptr++;
350                         SLJIT_ASSERT(!label || label->size >= word_count);
351                         SLJIT_ASSERT(!jump || jump->addr >= word_count);
352                         SLJIT_ASSERT(!const_ || const_->addr >= word_count);
353                         /* These structures are ordered by their address. */
354                         if (label && label->size == word_count) {
355                                 /* Just recording the address. */
356                                 label->addr = (sljit_uw)code_ptr;
357                                 label->size = code_ptr - code;
358                                 label = label->next;
359                         }
360                         if (jump && jump->addr == word_count) {
361 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
362                                 jump->addr = (sljit_uw)(code_ptr - 3);
363 #else
364                                 jump->addr = (sljit_uw)(code_ptr - 6);
365 #endif
366                                 if (detect_jump_type(jump, code_ptr, code)) {
367 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
368                                         code_ptr[-3] = code_ptr[0];
369                                         code_ptr -= 3;
370 #else
371                                         if (jump->flags & PATCH_ABS32) {
372                                                 code_ptr -= 3;
373                                                 code_ptr[-1] = code_ptr[2];
374                                                 code_ptr[0] = code_ptr[3];
375                                         }
376                                         else if (jump->flags & PATCH_ABS48) {
377                                                 code_ptr--;
378                                                 code_ptr[-1] = code_ptr[0];
379                                                 code_ptr[0] = code_ptr[1];
380                                                 /* rldicr rX,rX,32,31 -> rX,rX,16,47 */
381                                                 SLJIT_ASSERT((code_ptr[-3] & 0xfc00ffff) == 0x780007c6);
382                                                 code_ptr[-3] ^= 0x8422;
383                                                 /* oris -> ori */
384                                                 code_ptr[-2] ^= 0x4000000;
385                                         }
386                                         else {
387                                                 code_ptr[-6] = code_ptr[0];
388                                                 code_ptr -= 6;
389                                         }
390 #endif
391                                         if (jump->flags & REMOVE_COND) {
392                                                 code_ptr[0] = BCx | (2 << 2) | ((code_ptr[0] ^ (8 << 21)) & 0x03ff0001);
393                                                 code_ptr++;
394                                                 jump->addr += sizeof(sljit_ins);
395                                                 code_ptr[0] = Bx;
396                                                 jump->flags -= IS_COND;
397                                         }
398                                 }
399                                 jump = jump->next;
400                         }
401                         if (const_ && const_->addr == word_count) {
402                                 const_->addr = (sljit_uw)code_ptr;
403                                 const_ = const_->next;
404                         }
405                         code_ptr ++;
406                         word_count ++;
407                 } while (buf_ptr < buf_end);
408
409                 buf = buf->next;
410         } while (buf);
411
412         if (label && label->size == word_count) {
413                 label->addr = (sljit_uw)code_ptr;
414                 label->size = code_ptr - code;
415                 label = label->next;
416         }
417
418         SLJIT_ASSERT(!label);
419         SLJIT_ASSERT(!jump);
420         SLJIT_ASSERT(!const_);
421 #if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL)
422         SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size - (sizeof(struct sljit_function_context) / sizeof(sljit_ins)));
423 #else
424         SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size);
425 #endif
426
427         jump = compiler->jumps;
428         while (jump) {
429                 do {
430                         addr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target;
431                         buf_ptr = (sljit_ins*)jump->addr;
432                         if (jump->flags & PATCH_B) {
433                                 if (jump->flags & IS_COND) {
434                                         if (!(jump->flags & PATCH_ABS_B)) {
435                                                 addr = addr - jump->addr;
436                                                 SLJIT_ASSERT((sljit_sw)addr <= 0x7fff && (sljit_sw)addr >= -0x8000);
437                                                 *buf_ptr = BCx | (addr & 0xfffc) | ((*buf_ptr) & 0x03ff0001);
438                                         }
439                                         else {
440                                                 SLJIT_ASSERT(addr <= 0xffff);
441                                                 *buf_ptr = BCx | (addr & 0xfffc) | 0x2 | ((*buf_ptr) & 0x03ff0001);
442                                         }
443                                 }
444                                 else {
445                                         if (!(jump->flags & PATCH_ABS_B)) {
446                                                 addr = addr - jump->addr;
447                                                 SLJIT_ASSERT((sljit_sw)addr <= 0x01ffffff && (sljit_sw)addr >= -0x02000000);
448                                                 *buf_ptr = Bx | (addr & 0x03fffffc) | ((*buf_ptr) & 0x1);
449                                         }
450                                         else {
451                                                 SLJIT_ASSERT(addr <= 0x03ffffff);
452                                                 *buf_ptr = Bx | (addr & 0x03fffffc) | 0x2 | ((*buf_ptr) & 0x1);
453                                         }
454                                 }
455                                 break;
456                         }
457                         /* Set the fields of immediate loads. */
458 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
459                         buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 16) & 0xffff);
460                         buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | (addr & 0xffff);
461 #else
462                         if (jump->flags & PATCH_ABS32) {
463                                 SLJIT_ASSERT(addr <= 0x7fffffff);
464                                 buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 16) & 0xffff);
465                                 buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | (addr & 0xffff);
466                                 break;
467                         }
468                         if (jump->flags & PATCH_ABS48) {
469                                 SLJIT_ASSERT(addr <= 0x7fffffffffff);
470                                 buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 32) & 0xffff);
471                                 buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | ((addr >> 16) & 0xffff);
472                                 buf_ptr[3] = (buf_ptr[3] & 0xffff0000) | (addr & 0xffff);
473                                 break;
474                         }
475                         buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 48) & 0xffff);
476                         buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | ((addr >> 32) & 0xffff);
477                         buf_ptr[3] = (buf_ptr[3] & 0xffff0000) | ((addr >> 16) & 0xffff);
478                         buf_ptr[4] = (buf_ptr[4] & 0xffff0000) | (addr & 0xffff);
479 #endif
480                 } while (0);
481                 jump = jump->next;
482         }
483
484         compiler->error = SLJIT_ERR_COMPILED;
485         compiler->executable_size = (code_ptr - code) * sizeof(sljit_ins);
486         SLJIT_CACHE_FLUSH(code, code_ptr);
487
488 #if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL)
489 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
490         if (((sljit_sw)code_ptr) & 0x4)
491                 code_ptr++;
492         sljit_set_function_context(NULL, (struct sljit_function_context*)code_ptr, (sljit_sw)code, (void*)sljit_generate_code);
493         return code_ptr;
494 #else
495         sljit_set_function_context(NULL, (struct sljit_function_context*)code_ptr, (sljit_sw)code, (void*)sljit_generate_code);
496         return code_ptr;
497 #endif
498 #else
499         return code;
500 #endif
501 }
502
503 /* --------------------------------------------------------------------- */
504 /*  Entry, exit                                                          */
505 /* --------------------------------------------------------------------- */
506
507 /* inp_flags: */
508
509 /* Creates an index in data_transfer_insts array. */
510 #define LOAD_DATA       0x01
511 #define INDEXED         0x02
512 #define WRITE_BACK      0x04
513 #define WORD_DATA       0x00
514 #define BYTE_DATA       0x08
515 #define HALF_DATA       0x10
516 #define INT_DATA        0x18
517 #define SIGNED_DATA     0x20
518 /* Separates integer and floating point registers */
519 #define GPR_REG         0x3f
520 #define DOUBLE_DATA     0x40
521
522 #define MEM_MASK        0x7f
523
524 /* Other inp_flags. */
525
526 #define ARG_TEST        0x000100
527 /* Integer opertion and set flags -> requires exts on 64 bit systems. */
528 #define ALT_SIGN_EXT    0x000200
529 /* This flag affects the RC() and OERC() macros. */
530 #define ALT_SET_FLAGS   0x000400
531 #define ALT_KEEP_CACHE  0x000800
532 #define ALT_FORM1       0x010000
533 #define ALT_FORM2       0x020000
534 #define ALT_FORM3       0x040000
535 #define ALT_FORM4       0x080000
536 #define ALT_FORM5       0x100000
537 #define ALT_FORM6       0x200000
538
539 /* Source and destination is register. */
540 #define REG_DEST        0x000001
541 #define REG1_SOURCE     0x000002
542 #define REG2_SOURCE     0x000004
543 /* getput_arg_fast returned true. */
544 #define FAST_DEST       0x000008
545 /* Multiple instructions are required. */
546 #define SLOW_DEST       0x000010
547 /*
548 ALT_SIGN_EXT            0x000200
549 ALT_SET_FLAGS           0x000400
550 ALT_FORM1               0x010000
551 ...
552 ALT_FORM6               0x200000 */
553
554 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
555 #include "sljitNativePPC_32.c"
556 #else
557 #include "sljitNativePPC_64.c"
558 #endif
559
560 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
561 #define STACK_STORE     STW
562 #define STACK_LOAD      LWZ
563 #else
564 #define STACK_STORE     STD
565 #define STACK_LOAD      LD
566 #endif
567
568 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size)
569 {
570         CHECK_ERROR();
571         check_sljit_emit_enter(compiler, args, scratches, saveds, local_size);
572
573         compiler->scratches = scratches;
574         compiler->saveds = saveds;
575 #if (defined SLJIT_DEBUG && SLJIT_DEBUG)
576         compiler->logical_local_size = local_size;
577 #endif
578
579         FAIL_IF(push_inst(compiler, MFLR | D(0)));
580         FAIL_IF(push_inst(compiler, STACK_STORE | S(TMP_ZERO) | A(SLJIT_LOCALS_REG) | IMM(-(sljit_si)(sizeof(sljit_sw))) ));
581         if (saveds >= 1)
582                 FAIL_IF(push_inst(compiler, STACK_STORE | S(SLJIT_SAVED_REG1) | A(SLJIT_LOCALS_REG) | IMM(-2 * (sljit_si)(sizeof(sljit_sw))) ));
583         if (saveds >= 2)
584                 FAIL_IF(push_inst(compiler, STACK_STORE | S(SLJIT_SAVED_REG2) | A(SLJIT_LOCALS_REG) | IMM(-3 * (sljit_si)(sizeof(sljit_sw))) ));
585         if (saveds >= 3)
586                 FAIL_IF(push_inst(compiler, STACK_STORE | S(SLJIT_SAVED_REG3) | A(SLJIT_LOCALS_REG) | IMM(-4 * (sljit_si)(sizeof(sljit_sw))) ));
587         if (saveds >= 4)
588                 FAIL_IF(push_inst(compiler, STACK_STORE | S(SLJIT_SAVED_EREG1) | A(SLJIT_LOCALS_REG) | IMM(-5 * (sljit_si)(sizeof(sljit_sw))) ));
589         if (saveds >= 5)
590                 FAIL_IF(push_inst(compiler, STACK_STORE | S(SLJIT_SAVED_EREG2) | A(SLJIT_LOCALS_REG) | IMM(-6 * (sljit_si)(sizeof(sljit_sw))) ));
591 #if (defined SLJIT_PPC_STACK_FRAME_V2 && SLJIT_PPC_STACK_FRAME_V2)
592         FAIL_IF(push_inst(compiler, STACK_STORE | S(0) | A(SLJIT_LOCALS_REG) | IMM(2 * sizeof(sljit_sw)) ));
593 #else
594         FAIL_IF(push_inst(compiler, STACK_STORE | S(0) | A(SLJIT_LOCALS_REG) | IMM(sizeof(sljit_sw)) ));
595 #endif
596
597         FAIL_IF(push_inst(compiler, ADDI | D(TMP_ZERO) | A(0) | 0));
598         if (args >= 1)
599                 FAIL_IF(push_inst(compiler, OR | S(SLJIT_SCRATCH_REG1) | A(SLJIT_SAVED_REG1) | B(SLJIT_SCRATCH_REG1)));
600         if (args >= 2)
601                 FAIL_IF(push_inst(compiler, OR | S(SLJIT_SCRATCH_REG2) | A(SLJIT_SAVED_REG2) | B(SLJIT_SCRATCH_REG2)));
602         if (args >= 3)
603                 FAIL_IF(push_inst(compiler, OR | S(SLJIT_SCRATCH_REG3) | A(SLJIT_SAVED_REG3) | B(SLJIT_SCRATCH_REG3)));
604
605 #if (defined SLJIT_PPC_STACK_FRAME_V2 && SLJIT_PPC_STACK_FRAME_V2)
606         compiler->local_size = (1 + saveds + 6 + 8) * sizeof(sljit_sw) + local_size;
607 #else
608         compiler->local_size = (1 + saveds + 2) * sizeof(sljit_sw) + local_size;
609 #endif
610         compiler->local_size = (compiler->local_size + 15) & ~0xf;
611
612 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
613         if (compiler->local_size <= SIMM_MAX)
614                 FAIL_IF(push_inst(compiler, STWU | S(SLJIT_LOCALS_REG) | A(SLJIT_LOCALS_REG) | IMM(-compiler->local_size)));
615         else {
616                 FAIL_IF(load_immediate(compiler, 0, -compiler->local_size));
617                 FAIL_IF(push_inst(compiler, STWUX | S(SLJIT_LOCALS_REG) | A(SLJIT_LOCALS_REG) | B(0)));
618         }
619 #else
620         if (compiler->local_size <= SIMM_MAX)
621                 FAIL_IF(push_inst(compiler, STDU | S(SLJIT_LOCALS_REG) | A(SLJIT_LOCALS_REG) | IMM(-compiler->local_size)));
622         else {
623                 FAIL_IF(load_immediate(compiler, 0, -compiler->local_size));
624                 FAIL_IF(push_inst(compiler, STDUX | S(SLJIT_LOCALS_REG) | A(SLJIT_LOCALS_REG) | B(0)));
625         }
626 #endif
627
628         return SLJIT_SUCCESS;
629 }
630
631 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size)
632 {
633         CHECK_ERROR_VOID();
634         check_sljit_set_context(compiler, args, scratches, saveds, local_size);
635
636         compiler->scratches = scratches;
637         compiler->saveds = saveds;
638 #if (defined SLJIT_DEBUG && SLJIT_DEBUG)
639         compiler->logical_local_size = local_size;
640 #endif
641
642 #if (defined SLJIT_PPC_STACK_FRAME_V2 && SLJIT_PPC_STACK_FRAME_V2)
643         compiler->local_size = (1 + saveds + 6 + 8) * sizeof(sljit_sw) + local_size;
644 #else
645         compiler->local_size = (1 + saveds + 2) * sizeof(sljit_sw) + local_size;
646 #endif
647         compiler->local_size = (compiler->local_size + 15) & ~0xf;
648 }
649
650 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw)
651 {
652         CHECK_ERROR();
653         check_sljit_emit_return(compiler, op, src, srcw);
654
655         FAIL_IF(emit_mov_before_return(compiler, op, src, srcw));
656
657         if (compiler->local_size <= SIMM_MAX)
658                 FAIL_IF(push_inst(compiler, ADDI | D(SLJIT_LOCALS_REG) | A(SLJIT_LOCALS_REG) | IMM(compiler->local_size)));
659         else {
660                 FAIL_IF(load_immediate(compiler, 0, compiler->local_size));
661                 FAIL_IF(push_inst(compiler, ADD | D(SLJIT_LOCALS_REG) | A(SLJIT_LOCALS_REG) | B(0)));
662         }
663
664 #if (defined SLJIT_PPC_STACK_FRAME_V2 && SLJIT_PPC_STACK_FRAME_V2)
665         FAIL_IF(push_inst(compiler, STACK_LOAD | D(0) | A(SLJIT_LOCALS_REG) | IMM(2 * sizeof(sljit_sw))));
666 #else
667         FAIL_IF(push_inst(compiler, STACK_LOAD | D(0) | A(SLJIT_LOCALS_REG) | IMM(sizeof(sljit_sw))));
668 #endif
669         if (compiler->saveds >= 5)
670                 FAIL_IF(push_inst(compiler, STACK_LOAD | D(SLJIT_SAVED_EREG2) | A(SLJIT_LOCALS_REG) | IMM(-6 * (sljit_si)(sizeof(sljit_sw))) ));
671         if (compiler->saveds >= 4)
672                 FAIL_IF(push_inst(compiler, STACK_LOAD | D(SLJIT_SAVED_EREG1) | A(SLJIT_LOCALS_REG) | IMM(-5 * (sljit_si)(sizeof(sljit_sw))) ));
673         if (compiler->saveds >= 3)
674                 FAIL_IF(push_inst(compiler, STACK_LOAD | D(SLJIT_SAVED_REG3) | A(SLJIT_LOCALS_REG) | IMM(-4 * (sljit_si)(sizeof(sljit_sw))) ));
675         if (compiler->saveds >= 2)
676                 FAIL_IF(push_inst(compiler, STACK_LOAD | D(SLJIT_SAVED_REG2) | A(SLJIT_LOCALS_REG) | IMM(-3 * (sljit_si)(sizeof(sljit_sw))) ));
677         if (compiler->saveds >= 1)
678                 FAIL_IF(push_inst(compiler, STACK_LOAD | D(SLJIT_SAVED_REG1) | A(SLJIT_LOCALS_REG) | IMM(-2 * (sljit_si)(sizeof(sljit_sw))) ));
679         FAIL_IF(push_inst(compiler, STACK_LOAD | D(TMP_ZERO) | A(SLJIT_LOCALS_REG) | IMM(-(sljit_si)(sizeof(sljit_sw))) ));
680
681         FAIL_IF(push_inst(compiler, MTLR | S(0)));
682         FAIL_IF(push_inst(compiler, BLR));
683
684         return SLJIT_SUCCESS;
685 }
686
687 #undef STACK_STORE
688 #undef STACK_LOAD
689
690 /* --------------------------------------------------------------------- */
691 /*  Operators                                                            */
692 /* --------------------------------------------------------------------- */
693
694 /* i/x - immediate/indexed form
695    n/w - no write-back / write-back (1 bit)
696    s/l - store/load (1 bit)
697    u/s - signed/unsigned (1 bit)
698    w/b/h/i - word/byte/half/int allowed (2 bit)
699    It contans 32 items, but not all are different. */
700
701 /* 64 bit only: [reg+imm] must be aligned to 4 bytes. */
702 #define INT_ALIGNED     0x10000
703 /* 64-bit only: there is no lwau instruction. */
704 #define UPDATE_REQ      0x20000
705
706 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
707 #define ARCH_32_64(a, b)        a
708 #define INST_CODE_AND_DST(inst, flags, reg) \
709         ((inst) | (((flags) & MEM_MASK) <= GPR_REG ? D(reg) : FD(reg)))
710 #else
711 #define ARCH_32_64(a, b)        b
712 #define INST_CODE_AND_DST(inst, flags, reg) \
713         (((inst) & ~(INT_ALIGNED | UPDATE_REQ)) | (((flags) & MEM_MASK) <= GPR_REG ? D(reg) : FD(reg)))
714 #endif
715
716 static SLJIT_CONST sljit_ins data_transfer_insts[64 + 8] = {
717
718 /* -------- Unsigned -------- */
719
720 /* Word. */
721
722 /* u w n i s */ ARCH_32_64(HI(36) /* stw */, HI(62) | INT_ALIGNED | 0x0 /* std */),
723 /* u w n i l */ ARCH_32_64(HI(32) /* lwz */, HI(58) | INT_ALIGNED | 0x0 /* ld */),
724 /* u w n x s */ ARCH_32_64(HI(31) | LO(151) /* stwx */, HI(31) | LO(149) /* stdx */),
725 /* u w n x l */ ARCH_32_64(HI(31) | LO(23) /* lwzx */, HI(31) | LO(21) /* ldx */),
726
727 /* u w w i s */ ARCH_32_64(HI(37) /* stwu */, HI(62) | INT_ALIGNED | 0x1 /* stdu */),
728 /* u w w i l */ ARCH_32_64(HI(33) /* lwzu */, HI(58) | INT_ALIGNED | 0x1 /* ldu */),
729 /* u w w x s */ ARCH_32_64(HI(31) | LO(183) /* stwux */, HI(31) | LO(181) /* stdux */),
730 /* u w w x l */ ARCH_32_64(HI(31) | LO(55) /* lwzux */, HI(31) | LO(53) /* ldux */),
731
732 /* Byte. */
733
734 /* u b n i s */ HI(38) /* stb */, 
735 /* u b n i l */ HI(34) /* lbz */,
736 /* u b n x s */ HI(31) | LO(215) /* stbx */,
737 /* u b n x l */ HI(31) | LO(87) /* lbzx */,
738
739 /* u b w i s */ HI(39) /* stbu */,
740 /* u b w i l */ HI(35) /* lbzu */,
741 /* u b w x s */ HI(31) | LO(247) /* stbux */,
742 /* u b w x l */ HI(31) | LO(119) /* lbzux */,
743
744 /* Half. */
745
746 /* u h n i s */ HI(44) /* sth */,
747 /* u h n i l */ HI(40) /* lhz */,
748 /* u h n x s */ HI(31) | LO(407) /* sthx */,
749 /* u h n x l */ HI(31) | LO(279) /* lhzx */,
750
751 /* u h w i s */ HI(45) /* sthu */,
752 /* u h w i l */ HI(41) /* lhzu */,
753 /* u h w x s */ HI(31) | LO(439) /* sthux */,
754 /* u h w x l */ HI(31) | LO(311) /* lhzux */,
755
756 /* Int. */
757
758 /* u i n i s */ HI(36) /* stw */,
759 /* u i n i l */ HI(32) /* lwz */,
760 /* u i n x s */ HI(31) | LO(151) /* stwx */,
761 /* u i n x l */ HI(31) | LO(23) /* lwzx */,
762
763 /* u i w i s */ HI(37) /* stwu */,
764 /* u i w i l */ HI(33) /* lwzu */,
765 /* u i w x s */ HI(31) | LO(183) /* stwux */,
766 /* u i w x l */ HI(31) | LO(55) /* lwzux */,
767
768 /* -------- Signed -------- */
769
770 /* Word. */
771
772 /* s w n i s */ ARCH_32_64(HI(36) /* stw */, HI(62) | INT_ALIGNED | 0x0 /* std */),
773 /* s w n i l */ ARCH_32_64(HI(32) /* lwz */, HI(58) | INT_ALIGNED | 0x0 /* ld */),
774 /* s w n x s */ ARCH_32_64(HI(31) | LO(151) /* stwx */, HI(31) | LO(149) /* stdx */),
775 /* s w n x l */ ARCH_32_64(HI(31) | LO(23) /* lwzx */, HI(31) | LO(21) /* ldx */),
776
777 /* s w w i s */ ARCH_32_64(HI(37) /* stwu */, HI(62) | INT_ALIGNED | 0x1 /* stdu */),
778 /* s w w i l */ ARCH_32_64(HI(33) /* lwzu */, HI(58) | INT_ALIGNED | 0x1 /* ldu */),
779 /* s w w x s */ ARCH_32_64(HI(31) | LO(183) /* stwux */, HI(31) | LO(181) /* stdux */),
780 /* s w w x l */ ARCH_32_64(HI(31) | LO(55) /* lwzux */, HI(31) | LO(53) /* ldux */),
781
782 /* Byte. */
783
784 /* s b n i s */ HI(38) /* stb */,
785 /* s b n i l */ HI(34) /* lbz */ /* EXTS_REQ */,
786 /* s b n x s */ HI(31) | LO(215) /* stbx */,
787 /* s b n x l */ HI(31) | LO(87) /* lbzx */ /* EXTS_REQ */,
788
789 /* s b w i s */ HI(39) /* stbu */,
790 /* s b w i l */ HI(35) /* lbzu */ /* EXTS_REQ */,
791 /* s b w x s */ HI(31) | LO(247) /* stbux */,
792 /* s b w x l */ HI(31) | LO(119) /* lbzux */ /* EXTS_REQ */,
793
794 /* Half. */
795
796 /* s h n i s */ HI(44) /* sth */,
797 /* s h n i l */ HI(42) /* lha */,
798 /* s h n x s */ HI(31) | LO(407) /* sthx */,
799 /* s h n x l */ HI(31) | LO(343) /* lhax */,
800
801 /* s h w i s */ HI(45) /* sthu */,
802 /* s h w i l */ HI(43) /* lhau */,
803 /* s h w x s */ HI(31) | LO(439) /* sthux */,
804 /* s h w x l */ HI(31) | LO(375) /* lhaux */,
805
806 /* Int. */
807
808 /* s i n i s */ HI(36) /* stw */,
809 /* s i n i l */ ARCH_32_64(HI(32) /* lwz */, HI(58) | INT_ALIGNED | 0x2 /* lwa */),
810 /* s i n x s */ HI(31) | LO(151) /* stwx */,
811 /* s i n x l */ ARCH_32_64(HI(31) | LO(23) /* lwzx */, HI(31) | LO(341) /* lwax */),
812
813 /* s i w i s */ HI(37) /* stwu */,
814 /* s i w i l */ ARCH_32_64(HI(33) /* lwzu */, HI(58) | INT_ALIGNED | UPDATE_REQ | 0x2 /* lwa */),
815 /* s i w x s */ HI(31) | LO(183) /* stwux */,
816 /* s i w x l */ ARCH_32_64(HI(31) | LO(55) /* lwzux */, HI(31) | LO(373) /* lwaux */),
817
818 /* -------- Double -------- */
819
820 /* d   n i s */ HI(54) /* stfd */,
821 /* d   n i l */ HI(50) /* lfd */,
822 /* d   n x s */ HI(31) | LO(727) /* stfdx */,
823 /* d   n x l */ HI(31) | LO(599) /* lfdx */,
824
825 /* s   n i s */ HI(52) /* stfs */,
826 /* s   n i l */ HI(48) /* lfs */,
827 /* s   n x s */ HI(31) | LO(663) /* stfsx */,
828 /* s   n x l */ HI(31) | LO(535) /* lfsx */,
829
830 };
831
832 #undef ARCH_32_64
833
834 /* Simple cases, (no caching is required). */
835 static sljit_si getput_arg_fast(struct sljit_compiler *compiler, sljit_si inp_flags, sljit_si reg, sljit_si arg, sljit_sw argw)
836 {
837         sljit_ins inst;
838
839         /* Should work when (arg & REG_MASK) == 0. */
840         SLJIT_COMPILE_ASSERT(A(0) == 0, a0_must_be_0);
841         SLJIT_ASSERT(arg & SLJIT_MEM);
842
843         if (arg & OFFS_REG_MASK) {
844                 if (argw & 0x3)
845                         return 0;
846                 if (inp_flags & ARG_TEST)
847                         return 1;
848
849                 inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK];
850                 SLJIT_ASSERT(!(inst & (INT_ALIGNED | UPDATE_REQ)));
851                 FAIL_IF(push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & REG_MASK) | B(OFFS_REG(arg))));
852                 return -1;
853         }
854
855         if (SLJIT_UNLIKELY(!(arg & REG_MASK)))
856                 inp_flags &= ~WRITE_BACK;
857
858 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
859         inst = data_transfer_insts[inp_flags & MEM_MASK];
860         SLJIT_ASSERT((arg & REG_MASK) || !(inst & UPDATE_REQ));
861
862         if (argw > SIMM_MAX || argw < SIMM_MIN || ((inst & INT_ALIGNED) && (argw & 0x3)) || (inst & UPDATE_REQ))
863                 return 0;
864         if (inp_flags & ARG_TEST)
865                 return 1;
866 #endif
867
868 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
869         if (argw > SIMM_MAX || argw < SIMM_MIN)
870                 return 0;
871         if (inp_flags & ARG_TEST)
872                 return 1;
873
874         inst = data_transfer_insts[inp_flags & MEM_MASK];
875         SLJIT_ASSERT(!(inst & (INT_ALIGNED | UPDATE_REQ)));
876 #endif
877
878         FAIL_IF(push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & REG_MASK) | IMM(argw)));
879         return -1;
880 }
881
882 /* See getput_arg below.
883    Note: can_cache is called only for binary operators. Those operator always
884    uses word arguments without write back. */
885 static sljit_si can_cache(sljit_si arg, sljit_sw argw, sljit_si next_arg, sljit_sw next_argw)
886 {
887         sljit_sw high_short, next_high_short;
888 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
889         sljit_sw diff;
890 #endif
891
892         SLJIT_ASSERT((arg & SLJIT_MEM) && (next_arg & SLJIT_MEM));
893
894         if (arg & OFFS_REG_MASK)
895                 return ((arg & OFFS_REG_MASK) == (next_arg & OFFS_REG_MASK) && (argw & 0x3) == (next_argw & 0x3));
896
897         if (next_arg & OFFS_REG_MASK)
898                 return 0;
899
900 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
901         high_short = (argw + ((argw & 0x8000) << 1)) & ~0xffff;
902         next_high_short = (next_argw + ((next_argw & 0x8000) << 1)) & ~0xffff;
903         return high_short == next_high_short;
904 #else
905         if (argw <= 0x7fffffffl && argw >= -0x80000000l) {
906                 high_short = (argw + ((argw & 0x8000) << 1)) & ~0xffff;
907                 next_high_short = (next_argw + ((next_argw & 0x8000) << 1)) & ~0xffff;
908                 if (high_short == next_high_short)
909                         return 1;
910         }
911
912         diff = argw - next_argw;
913         if (!(arg & REG_MASK))
914                 return diff <= SIMM_MAX && diff >= SIMM_MIN;
915
916         if (arg == next_arg && diff <= SIMM_MAX && diff >= SIMM_MIN)
917                 return 1;
918
919         return 0;
920 #endif
921 }
922
923 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
924 #define ADJUST_CACHED_IMM(imm) \
925         if ((inst & INT_ALIGNED) && (imm & 0x3)) { \
926                 /* Adjust cached value. Fortunately this is really a rare case */ \
927                 compiler->cache_argw += imm & 0x3; \
928                 FAIL_IF(push_inst(compiler, ADDI | D(TMP_REG3) | A(TMP_REG3) | (imm & 0x3))); \
929                 imm &= ~0x3; \
930         }
931 #endif
932
933 /* Emit the necessary instructions. See can_cache above. */
934 static sljit_si getput_arg(struct sljit_compiler *compiler, sljit_si inp_flags, sljit_si reg, sljit_si arg, sljit_sw argw, sljit_si next_arg, sljit_sw next_argw)
935 {
936         sljit_si tmp_r;
937         sljit_ins inst;
938         sljit_sw high_short, next_high_short;
939 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
940         sljit_sw diff;
941 #endif
942
943         SLJIT_ASSERT(arg & SLJIT_MEM);
944
945         tmp_r = ((inp_flags & LOAD_DATA) && ((inp_flags) & MEM_MASK) <= GPR_REG) ? reg : TMP_REG1;
946         /* Special case for "mov reg, [reg, ... ]". */
947         if ((arg & REG_MASK) == tmp_r)
948                 tmp_r = TMP_REG1;
949
950         if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
951                 argw &= 0x3;
952                 /* Otherwise getput_arg_fast would capture it. */
953                 SLJIT_ASSERT(argw);
954
955                 if ((SLJIT_MEM | (arg & OFFS_REG_MASK)) == compiler->cache_arg && argw == compiler->cache_argw)
956                         tmp_r = TMP_REG3;
957                 else {
958                         if ((arg & OFFS_REG_MASK) == (next_arg & OFFS_REG_MASK) && argw == (next_argw & 0x3)) {
959                                 compiler->cache_arg = SLJIT_MEM | (arg & OFFS_REG_MASK);
960                                 compiler->cache_argw = argw;
961                                 tmp_r = TMP_REG3;
962                         }
963 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
964                         FAIL_IF(push_inst(compiler, RLWINM | S(OFFS_REG(arg)) | A(tmp_r) | (argw << 11) | ((31 - argw) << 1)));
965 #else
966                         FAIL_IF(push_inst(compiler, RLDI(tmp_r, OFFS_REG(arg), argw, 63 - argw, 1)));
967 #endif
968                 }
969                 inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK];
970                 SLJIT_ASSERT(!(inst & (INT_ALIGNED | UPDATE_REQ)));
971                 return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & REG_MASK) | B(tmp_r));
972         }
973
974         if (SLJIT_UNLIKELY(!(arg & REG_MASK)))
975                 inp_flags &= ~WRITE_BACK;
976
977         inst = data_transfer_insts[inp_flags & MEM_MASK];
978         SLJIT_ASSERT((arg & REG_MASK) || !(inst & UPDATE_REQ));
979
980 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
981         if (argw <= 0x7fff7fffl && argw >= -0x80000000l
982                         && (!(inst & INT_ALIGNED) || !(argw & 0x3)) && !(inst & UPDATE_REQ)) {
983 #endif
984
985                 arg &= REG_MASK;
986                 high_short = (sljit_si)(argw + ((argw & 0x8000) << 1)) & ~0xffff;
987                 /* The getput_arg_fast should handle this otherwise. */
988 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
989                 SLJIT_ASSERT(high_short && high_short <= 0x7fffffffl && high_short >= -0x80000000l);
990 #else
991                 SLJIT_ASSERT(high_short && !(inst & (INT_ALIGNED | UPDATE_REQ)));
992 #endif
993
994                 if (inp_flags & WRITE_BACK) {
995                         if (arg == reg) {
996                                 FAIL_IF(push_inst(compiler, OR | S(reg) | A(tmp_r) | B(reg)));
997                                 reg = tmp_r;
998                         }
999                         tmp_r = arg;
1000                         FAIL_IF(push_inst(compiler, ADDIS | D(arg) | A(arg) | IMM(high_short >> 16)));
1001                 }
1002                 else if (compiler->cache_arg != arg || high_short != compiler->cache_argw) {
1003                         if ((next_arg & SLJIT_MEM) && !(next_arg & OFFS_REG_MASK)) {
1004                                 next_high_short = (sljit_si)(next_argw + ((next_argw & 0x8000) << 1)) & ~0xffff;
1005                                 if (high_short == next_high_short) {
1006                                         compiler->cache_arg = SLJIT_IMM | arg;
1007                                         compiler->cache_argw = next_high_short;
1008                                         tmp_r = TMP_REG3;
1009                                 }
1010                         }
1011                         FAIL_IF(push_inst(compiler, ADDIS | D(tmp_r) | A(arg & REG_MASK) | IMM(high_short >> 16)));
1012                 }
1013                 else
1014                         tmp_r = TMP_REG3;
1015
1016                 return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(tmp_r) | IMM(argw));
1017
1018 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1019         }
1020
1021         /* Everything else is PPC-64 only. */
1022         if (SLJIT_UNLIKELY(!(arg & REG_MASK))) {
1023                 diff = argw - compiler->cache_argw;
1024                 if ((compiler->cache_arg & SLJIT_IMM) && diff <= SIMM_MAX && diff >= SIMM_MIN) {
1025                         ADJUST_CACHED_IMM(diff);
1026                         return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(TMP_REG3) | IMM(diff));
1027                 }
1028
1029                 diff = argw - next_argw;
1030                 if ((next_arg & SLJIT_MEM) && diff <= SIMM_MAX && diff >= SIMM_MIN) {
1031                         SLJIT_ASSERT(inp_flags & LOAD_DATA);
1032
1033                         compiler->cache_arg = SLJIT_IMM;
1034                         compiler->cache_argw = argw;
1035                         tmp_r = TMP_REG3;
1036                 }
1037
1038                 FAIL_IF(load_immediate(compiler, tmp_r, argw));
1039                 return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(tmp_r));
1040         }
1041
1042         diff = argw - compiler->cache_argw;
1043         if (compiler->cache_arg == arg && diff <= SIMM_MAX && diff >= SIMM_MIN) {
1044                 SLJIT_ASSERT(!(inp_flags & WRITE_BACK) && !(inst & UPDATE_REQ));
1045                 ADJUST_CACHED_IMM(diff);
1046                 return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(TMP_REG3) | IMM(diff));
1047         }
1048
1049         if ((compiler->cache_arg & SLJIT_IMM) && diff <= SIMM_MAX && diff >= SIMM_MIN) {
1050                 inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK];
1051                 SLJIT_ASSERT(!(inst & (INT_ALIGNED | UPDATE_REQ)));
1052                 if (compiler->cache_argw != argw) {
1053                         FAIL_IF(push_inst(compiler, ADDI | D(TMP_REG3) | A(TMP_REG3) | IMM(diff)));
1054                         compiler->cache_argw = argw;
1055                 }
1056                 return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & REG_MASK) | B(TMP_REG3));
1057         }
1058
1059         if (argw == next_argw && (next_arg & SLJIT_MEM)) {
1060                 SLJIT_ASSERT(inp_flags & LOAD_DATA);
1061                 FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
1062
1063                 compiler->cache_arg = SLJIT_IMM;
1064                 compiler->cache_argw = argw;
1065
1066                 inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK];
1067                 SLJIT_ASSERT(!(inst & (INT_ALIGNED | UPDATE_REQ)));
1068                 return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & REG_MASK) | B(TMP_REG3));
1069         }
1070
1071         diff = argw - next_argw;
1072         if (arg == next_arg && !(inp_flags & WRITE_BACK) && diff <= SIMM_MAX && diff >= SIMM_MIN) {
1073                 SLJIT_ASSERT(inp_flags & LOAD_DATA);
1074                 FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
1075                 FAIL_IF(push_inst(compiler, ADD | D(TMP_REG3) | A(TMP_REG3) | B(arg & REG_MASK)));
1076
1077                 compiler->cache_arg = arg;
1078                 compiler->cache_argw = argw;
1079
1080                 return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(TMP_REG3));
1081         }
1082
1083         if ((next_arg & SLJIT_MEM) && !(next_arg & OFFS_REG_MASK) && diff <= SIMM_MAX && diff >= SIMM_MIN) {
1084                 SLJIT_ASSERT(inp_flags & LOAD_DATA);
1085                 FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
1086
1087                 compiler->cache_arg = SLJIT_IMM;
1088                 compiler->cache_argw = argw;
1089                 tmp_r = TMP_REG3;
1090         }
1091         else
1092                 FAIL_IF(load_immediate(compiler, tmp_r, argw));
1093
1094         /* Get the indexed version instead of the normal one. */
1095         inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK];
1096         SLJIT_ASSERT(!(inst & (INT_ALIGNED | UPDATE_REQ)));
1097         return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & REG_MASK) | B(tmp_r));
1098 #endif
1099 }
1100
1101 static SLJIT_INLINE sljit_si emit_op_mem2(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, sljit_si arg1, sljit_sw arg1w, sljit_si arg2, sljit_sw arg2w)
1102 {
1103         if (getput_arg_fast(compiler, flags, reg, arg1, arg1w))
1104                 return compiler->error;
1105         return getput_arg(compiler, flags, reg, arg1, arg1w, arg2, arg2w);
1106 }
1107
1108 static sljit_si emit_op(struct sljit_compiler *compiler, sljit_si op, sljit_si input_flags,
1109         sljit_si dst, sljit_sw dstw,
1110         sljit_si src1, sljit_sw src1w,
1111         sljit_si src2, sljit_sw src2w)
1112 {
1113         /* arg1 goes to TMP_REG1 or src reg
1114            arg2 goes to TMP_REG2, imm or src reg
1115            TMP_REG3 can be used for caching
1116            result goes to TMP_REG2, so put result can use TMP_REG1 and TMP_REG3. */
1117         sljit_si dst_r;
1118         sljit_si src1_r;
1119         sljit_si src2_r;
1120         sljit_si sugg_src2_r = TMP_REG2;
1121         sljit_si flags = input_flags & (ALT_FORM1 | ALT_FORM2 | ALT_FORM3 | ALT_FORM4 | ALT_FORM5 | ALT_FORM6 | ALT_SIGN_EXT | ALT_SET_FLAGS);
1122
1123         if (!(input_flags & ALT_KEEP_CACHE)) {
1124                 compiler->cache_arg = 0;
1125                 compiler->cache_argw = 0;
1126         }
1127
1128         /* Destination check. */
1129         if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) {
1130                 if (op >= SLJIT_MOV && op <= SLJIT_MOVU_SI && !(src2 & SLJIT_MEM))
1131                         return SLJIT_SUCCESS;
1132                 dst_r = TMP_REG2;
1133         }
1134         else if (FAST_IS_REG(dst)) {
1135                 dst_r = dst;
1136                 flags |= REG_DEST;
1137                 if (op >= SLJIT_MOV && op <= SLJIT_MOVU_SI)
1138                         sugg_src2_r = dst_r;
1139         }
1140         else {
1141                 SLJIT_ASSERT(dst & SLJIT_MEM);
1142                 if (getput_arg_fast(compiler, input_flags | ARG_TEST, TMP_REG2, dst, dstw)) {
1143                         flags |= FAST_DEST;
1144                         dst_r = TMP_REG2;
1145                 }
1146                 else {
1147                         flags |= SLOW_DEST;
1148                         dst_r = 0;
1149                 }
1150         }
1151
1152         /* Source 1. */
1153         if (FAST_IS_REG(src1)) {
1154                 src1_r = src1;
1155                 flags |= REG1_SOURCE;
1156         }
1157         else if (src1 & SLJIT_IMM) {
1158                 FAIL_IF(load_immediate(compiler, TMP_REG1, src1w));
1159                 src1_r = TMP_REG1;
1160         }
1161         else if (getput_arg_fast(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w)) {
1162                 FAIL_IF(compiler->error);
1163                 src1_r = TMP_REG1;
1164         }
1165         else
1166                 src1_r = 0;
1167
1168         /* Source 2. */
1169         if (FAST_IS_REG(src2)) {
1170                 src2_r = src2;
1171                 flags |= REG2_SOURCE;
1172                 if (!(flags & REG_DEST) && op >= SLJIT_MOV && op <= SLJIT_MOVU_SI)
1173                         dst_r = src2_r;
1174         }
1175         else if (src2 & SLJIT_IMM) {
1176                 FAIL_IF(load_immediate(compiler, sugg_src2_r, src2w));
1177                 src2_r = sugg_src2_r;
1178         }
1179         else if (getput_arg_fast(compiler, input_flags | LOAD_DATA, sugg_src2_r, src2, src2w)) {
1180                 FAIL_IF(compiler->error);
1181                 src2_r = sugg_src2_r;
1182         }
1183         else
1184                 src2_r = 0;
1185
1186         /* src1_r, src2_r and dst_r can be zero (=unprocessed).
1187            All arguments are complex addressing modes, and it is a binary operator. */
1188         if (src1_r == 0 && src2_r == 0 && dst_r == 0) {
1189                 if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) {
1190                         FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG2, src2, src2w, src1, src1w));
1191                         FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw));
1192                 }
1193                 else {
1194                         FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w, src2, src2w));
1195                         FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG2, src2, src2w, dst, dstw));
1196                 }
1197                 src1_r = TMP_REG1;
1198                 src2_r = TMP_REG2;
1199         }
1200         else if (src1_r == 0 && src2_r == 0) {
1201                 FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w, src2, src2w));
1202                 src1_r = TMP_REG1;
1203         }
1204         else if (src1_r == 0 && dst_r == 0) {
1205                 FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw));
1206                 src1_r = TMP_REG1;
1207         }
1208         else if (src2_r == 0 && dst_r == 0) {
1209                 FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, sugg_src2_r, src2, src2w, dst, dstw));
1210                 src2_r = sugg_src2_r;
1211         }
1212
1213         if (dst_r == 0)
1214                 dst_r = TMP_REG2;
1215
1216         if (src1_r == 0) {
1217                 FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w, 0, 0));
1218                 src1_r = TMP_REG1;
1219         }
1220
1221         if (src2_r == 0) {
1222                 FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, sugg_src2_r, src2, src2w, 0, 0));
1223                 src2_r = sugg_src2_r;
1224         }
1225
1226         FAIL_IF(emit_single_op(compiler, op, flags, dst_r, src1_r, src2_r));
1227
1228         if (flags & (FAST_DEST | SLOW_DEST)) {
1229                 if (flags & FAST_DEST)
1230                         FAIL_IF(getput_arg_fast(compiler, input_flags, dst_r, dst, dstw));
1231                 else
1232                         FAIL_IF(getput_arg(compiler, input_flags, dst_r, dst, dstw, 0, 0));
1233         }
1234         return SLJIT_SUCCESS;
1235 }
1236
1237 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler, sljit_si op)
1238 {
1239 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1240         sljit_si int_op = op & SLJIT_INT_OP;
1241 #endif
1242
1243         CHECK_ERROR();
1244         check_sljit_emit_op0(compiler, op);
1245
1246         op = GET_OPCODE(op);
1247         switch (op) {
1248         case SLJIT_BREAKPOINT:
1249         case SLJIT_NOP:
1250                 return push_inst(compiler, NOP);
1251         case SLJIT_UMUL:
1252         case SLJIT_SMUL:
1253                 FAIL_IF(push_inst(compiler, OR | S(SLJIT_SCRATCH_REG1) | A(TMP_REG1) | B(SLJIT_SCRATCH_REG1)));
1254 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1255                 FAIL_IF(push_inst(compiler, MULLD | D(SLJIT_SCRATCH_REG1) | A(TMP_REG1) | B(SLJIT_SCRATCH_REG2)));
1256                 return push_inst(compiler, (op == SLJIT_UMUL ? MULHDU : MULHD) | D(SLJIT_SCRATCH_REG2) | A(TMP_REG1) | B(SLJIT_SCRATCH_REG2));
1257 #else
1258                 FAIL_IF(push_inst(compiler, MULLW | D(SLJIT_SCRATCH_REG1) | A(TMP_REG1) | B(SLJIT_SCRATCH_REG2)));
1259                 return push_inst(compiler, (op == SLJIT_UMUL ? MULHWU : MULHW) | D(SLJIT_SCRATCH_REG2) | A(TMP_REG1) | B(SLJIT_SCRATCH_REG2));
1260 #endif
1261         case SLJIT_UDIV:
1262         case SLJIT_SDIV:
1263                 FAIL_IF(push_inst(compiler, OR | S(SLJIT_SCRATCH_REG1) | A(TMP_REG1) | B(SLJIT_SCRATCH_REG1)));
1264 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1265                 if (int_op) {
1266                         FAIL_IF(push_inst(compiler, (op == SLJIT_UDIV ? DIVWU : DIVW) | D(SLJIT_SCRATCH_REG1) | A(TMP_REG1) | B(SLJIT_SCRATCH_REG2)));
1267                         FAIL_IF(push_inst(compiler, MULLW | D(SLJIT_SCRATCH_REG2) | A(SLJIT_SCRATCH_REG1) | B(SLJIT_SCRATCH_REG2)));
1268                 } else {
1269                         FAIL_IF(push_inst(compiler, (op == SLJIT_UDIV ? DIVDU : DIVD) | D(SLJIT_SCRATCH_REG1) | A(TMP_REG1) | B(SLJIT_SCRATCH_REG2)));
1270                         FAIL_IF(push_inst(compiler, MULLD | D(SLJIT_SCRATCH_REG2) | A(SLJIT_SCRATCH_REG1) | B(SLJIT_SCRATCH_REG2)));
1271                 }
1272                 return push_inst(compiler, SUBF | D(SLJIT_SCRATCH_REG2) | A(SLJIT_SCRATCH_REG2) | B(TMP_REG1));
1273 #else
1274                 FAIL_IF(push_inst(compiler, (op == SLJIT_UDIV ? DIVWU : DIVW) | D(SLJIT_SCRATCH_REG1) | A(TMP_REG1) | B(SLJIT_SCRATCH_REG2)));
1275                 FAIL_IF(push_inst(compiler, MULLW | D(SLJIT_SCRATCH_REG2) | A(SLJIT_SCRATCH_REG1) | B(SLJIT_SCRATCH_REG2)));
1276                 return push_inst(compiler, SUBF | D(SLJIT_SCRATCH_REG2) | A(SLJIT_SCRATCH_REG2) | B(TMP_REG1));
1277 #endif
1278         }
1279
1280         return SLJIT_SUCCESS;
1281 }
1282
1283 #define EMIT_MOV(type, type_flags, type_cast) \
1284         emit_op(compiler, (src & SLJIT_IMM) ? SLJIT_MOV : type, flags | (type_flags), dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? type_cast srcw : srcw)
1285
1286 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler, sljit_si op,
1287         sljit_si dst, sljit_sw dstw,
1288         sljit_si src, sljit_sw srcw)
1289 {
1290         sljit_si flags = GET_FLAGS(op) ? ALT_SET_FLAGS : 0;
1291         sljit_si op_flags = GET_ALL_FLAGS(op);
1292
1293         CHECK_ERROR();
1294         check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw);
1295         ADJUST_LOCAL_OFFSET(dst, dstw);
1296         ADJUST_LOCAL_OFFSET(src, srcw);
1297
1298         op = GET_OPCODE(op);
1299         if ((src & SLJIT_IMM) && srcw == 0)
1300                 src = TMP_ZERO;
1301
1302         if (op_flags & SLJIT_SET_O)
1303                 FAIL_IF(push_inst(compiler, MTXER | S(TMP_ZERO)));
1304
1305         if (op_flags & SLJIT_INT_OP) {
1306                 if (op < SLJIT_NOT) {
1307                         if (FAST_IS_REG(src) && src == dst) {
1308                                 if (!TYPE_CAST_NEEDED(op))
1309                                         return SLJIT_SUCCESS;
1310                         }
1311 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1312                         if (op == SLJIT_MOV_SI && (src & SLJIT_MEM))
1313                                 op = SLJIT_MOV_UI;
1314                         if (op == SLJIT_MOVU_SI && (src & SLJIT_MEM))
1315                                 op = SLJIT_MOVU_UI;
1316                         if (op == SLJIT_MOV_UI && (src & SLJIT_IMM))
1317                                 op = SLJIT_MOV_SI;
1318                         if (op == SLJIT_MOVU_UI && (src & SLJIT_IMM))
1319                                 op = SLJIT_MOVU_SI;
1320 #endif
1321                 }
1322 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1323                 else {
1324                         /* Most operations expect sign extended arguments. */
1325                         flags |= INT_DATA | SIGNED_DATA;
1326                         if (src & SLJIT_IMM)
1327                                 srcw = (sljit_si)srcw;
1328                 }
1329 #endif
1330         }
1331
1332         switch (op) {
1333         case SLJIT_MOV:
1334         case SLJIT_MOV_P:
1335 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
1336         case SLJIT_MOV_UI:
1337         case SLJIT_MOV_SI:
1338 #endif
1339                 return emit_op(compiler, SLJIT_MOV, flags | WORD_DATA, dst, dstw, TMP_REG1, 0, src, srcw);
1340
1341 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1342         case SLJIT_MOV_UI:
1343                 return EMIT_MOV(SLJIT_MOV_UI, INT_DATA, (sljit_ui));
1344
1345         case SLJIT_MOV_SI:
1346                 return EMIT_MOV(SLJIT_MOV_SI, INT_DATA | SIGNED_DATA, (sljit_si));
1347 #endif
1348
1349         case SLJIT_MOV_UB:
1350                 return EMIT_MOV(SLJIT_MOV_UB, BYTE_DATA, (sljit_ub));
1351
1352         case SLJIT_MOV_SB:
1353                 return EMIT_MOV(SLJIT_MOV_SB, BYTE_DATA | SIGNED_DATA, (sljit_sb));
1354
1355         case SLJIT_MOV_UH:
1356                 return EMIT_MOV(SLJIT_MOV_UH, HALF_DATA, (sljit_uh));
1357
1358         case SLJIT_MOV_SH:
1359                 return EMIT_MOV(SLJIT_MOV_SH, HALF_DATA | SIGNED_DATA, (sljit_sh));
1360
1361         case SLJIT_MOVU:
1362         case SLJIT_MOVU_P:
1363 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
1364         case SLJIT_MOVU_UI:
1365         case SLJIT_MOVU_SI:
1366 #endif
1367                 return emit_op(compiler, SLJIT_MOV, flags | WORD_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw);
1368
1369 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1370         case SLJIT_MOVU_UI:
1371                 return EMIT_MOV(SLJIT_MOV_UI, INT_DATA | WRITE_BACK, (sljit_ui));
1372
1373         case SLJIT_MOVU_SI:
1374                 return EMIT_MOV(SLJIT_MOV_SI, INT_DATA | SIGNED_DATA | WRITE_BACK, (sljit_si));
1375 #endif
1376
1377         case SLJIT_MOVU_UB:
1378                 return EMIT_MOV(SLJIT_MOV_UB, BYTE_DATA | WRITE_BACK, (sljit_ub));
1379
1380         case SLJIT_MOVU_SB:
1381                 return EMIT_MOV(SLJIT_MOV_SB, BYTE_DATA | SIGNED_DATA | WRITE_BACK, (sljit_sb));
1382
1383         case SLJIT_MOVU_UH:
1384                 return EMIT_MOV(SLJIT_MOV_UH, HALF_DATA | WRITE_BACK, (sljit_uh));
1385
1386         case SLJIT_MOVU_SH:
1387                 return EMIT_MOV(SLJIT_MOV_SH, HALF_DATA | SIGNED_DATA | WRITE_BACK, (sljit_sh));
1388
1389         case SLJIT_NOT:
1390                 return emit_op(compiler, SLJIT_NOT, flags, dst, dstw, TMP_REG1, 0, src, srcw);
1391
1392         case SLJIT_NEG:
1393                 return emit_op(compiler, SLJIT_NEG, flags, dst, dstw, TMP_REG1, 0, src, srcw);
1394
1395         case SLJIT_CLZ:
1396 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1397                 return emit_op(compiler, SLJIT_CLZ, flags | (!(op_flags & SLJIT_INT_OP) ? 0 : ALT_FORM1), dst, dstw, TMP_REG1, 0, src, srcw);
1398 #else
1399                 return emit_op(compiler, SLJIT_CLZ, flags, dst, dstw, TMP_REG1, 0, src, srcw);
1400 #endif
1401         }
1402
1403         return SLJIT_SUCCESS;
1404 }
1405
1406 #undef EMIT_MOV
1407
1408 #define TEST_SL_IMM(src, srcw) \
1409         (((src) & SLJIT_IMM) && (srcw) <= SIMM_MAX && (srcw) >= SIMM_MIN)
1410
1411 #define TEST_UL_IMM(src, srcw) \
1412         (((src) & SLJIT_IMM) && !((srcw) & ~0xffff))
1413
1414 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1415 #define TEST_SH_IMM(src, srcw) \
1416         (((src) & SLJIT_IMM) && !((srcw) & 0xffff) && (srcw) <= 0x7fffffffl && (srcw) >= -0x80000000l)
1417 #else
1418 #define TEST_SH_IMM(src, srcw) \
1419         (((src) & SLJIT_IMM) && !((srcw) & 0xffff))
1420 #endif
1421
1422 #define TEST_UH_IMM(src, srcw) \
1423         (((src) & SLJIT_IMM) && !((srcw) & ~0xffff0000))
1424
1425 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1426 #define TEST_ADD_IMM(src, srcw) \
1427         (((src) & SLJIT_IMM) && (srcw) <= 0x7fff7fffl && (srcw) >= -0x80000000l)
1428 #else
1429 #define TEST_ADD_IMM(src, srcw) \
1430         ((src) & SLJIT_IMM)
1431 #endif
1432
1433 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1434 #define TEST_UI_IMM(src, srcw) \
1435         (((src) & SLJIT_IMM) && !((srcw) & ~0xffffffff))
1436 #else
1437 #define TEST_UI_IMM(src, srcw) \
1438         ((src) & SLJIT_IMM)
1439 #endif
1440
1441 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler, sljit_si op,
1442         sljit_si dst, sljit_sw dstw,
1443         sljit_si src1, sljit_sw src1w,
1444         sljit_si src2, sljit_sw src2w)
1445 {
1446         sljit_si flags = GET_FLAGS(op) ? ALT_SET_FLAGS : 0;
1447
1448         CHECK_ERROR();
1449         check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
1450         ADJUST_LOCAL_OFFSET(dst, dstw);
1451         ADJUST_LOCAL_OFFSET(src1, src1w);
1452         ADJUST_LOCAL_OFFSET(src2, src2w);
1453
1454         if ((src1 & SLJIT_IMM) && src1w == 0)
1455                 src1 = TMP_ZERO;
1456         if ((src2 & SLJIT_IMM) && src2w == 0)
1457                 src2 = TMP_ZERO;
1458
1459 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1460         if (op & SLJIT_INT_OP) {
1461                 /* Most operations expect sign extended arguments. */
1462                 flags |= INT_DATA | SIGNED_DATA;
1463                 if (src1 & SLJIT_IMM)
1464                         src1w = (sljit_si)(src1w);
1465                 if (src2 & SLJIT_IMM)
1466                         src2w = (sljit_si)(src2w);
1467                 if (GET_FLAGS(op))
1468                         flags |= ALT_SIGN_EXT;
1469         }
1470 #endif
1471         if (op & SLJIT_SET_O)
1472                 FAIL_IF(push_inst(compiler, MTXER | S(TMP_ZERO)));
1473         if (src2 == TMP_REG2)
1474                 flags |= ALT_KEEP_CACHE;
1475
1476         switch (GET_OPCODE(op)) {
1477         case SLJIT_ADD:
1478                 if (!GET_FLAGS(op) && ((src1 | src2) & SLJIT_IMM)) {
1479                         if (TEST_SL_IMM(src2, src2w)) {
1480                                 compiler->imm = src2w & 0xffff;
1481                                 return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0);
1482                         }
1483                         if (TEST_SL_IMM(src1, src1w)) {
1484                                 compiler->imm = src1w & 0xffff;
1485                                 return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM1, dst, dstw, src2, src2w, TMP_REG2, 0);
1486                         }
1487                         if (TEST_SH_IMM(src2, src2w)) {
1488                                 compiler->imm = (src2w >> 16) & 0xffff;
1489                                 return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0);
1490                         }
1491                         if (TEST_SH_IMM(src1, src1w)) {
1492                                 compiler->imm = (src1w >> 16) & 0xffff;
1493                                 return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2, dst, dstw, src2, src2w, TMP_REG2, 0);
1494                         }
1495                         /* Range between -1 and -32768 is covered above. */
1496                         if (TEST_ADD_IMM(src2, src2w)) {
1497                                 compiler->imm = src2w & 0xffffffff;
1498                                 return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM4, dst, dstw, src1, src1w, TMP_REG2, 0);
1499                         }
1500                         if (TEST_ADD_IMM(src1, src1w)) {
1501                                 compiler->imm = src1w & 0xffffffff;
1502                                 return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM4, dst, dstw, src2, src2w, TMP_REG2, 0);
1503                         }
1504                 }
1505                 if (!(GET_FLAGS(op) & (SLJIT_SET_E | SLJIT_SET_O))) {
1506                         if (TEST_SL_IMM(src2, src2w)) {
1507                                 compiler->imm = src2w & 0xffff;
1508                                 return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
1509                         }
1510                         if (TEST_SL_IMM(src1, src1w)) {
1511                                 compiler->imm = src1w & 0xffff;
1512                                 return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM3, dst, dstw, src2, src2w, TMP_REG2, 0);
1513                         }
1514                 }
1515                 return emit_op(compiler, SLJIT_ADD, flags, dst, dstw, src1, src1w, src2, src2w);
1516
1517         case SLJIT_ADDC:
1518                 return emit_op(compiler, SLJIT_ADDC, flags | (!(op & SLJIT_KEEP_FLAGS) ? 0 : ALT_FORM1), dst, dstw, src1, src1w, src2, src2w);
1519
1520         case SLJIT_SUB:
1521                 if (!GET_FLAGS(op) && ((src1 | src2) & SLJIT_IMM)) {
1522                         if (TEST_SL_IMM(src2, -src2w)) {
1523                                 compiler->imm = (-src2w) & 0xffff;
1524                                 return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0);
1525                         }
1526                         if (TEST_SL_IMM(src1, src1w)) {
1527                                 compiler->imm = src1w & 0xffff;
1528                                 return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM1, dst, dstw, src2, src2w, TMP_REG2, 0);
1529                         }
1530                         if (TEST_SH_IMM(src2, -src2w)) {
1531                                 compiler->imm = ((-src2w) >> 16) & 0xffff;
1532                                 return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0);
1533                         }
1534                         /* Range between -1 and -32768 is covered above. */
1535                         if (TEST_ADD_IMM(src2, -src2w)) {
1536                                 compiler->imm = -src2w & 0xffffffff;
1537                                 return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM4, dst, dstw, src1, src1w, TMP_REG2, 0);
1538                         }
1539                 }
1540                 if (dst == SLJIT_UNUSED && (op & (SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S)) && !(op & (SLJIT_SET_O | SLJIT_SET_C))) {
1541                         if (!(op & SLJIT_SET_U)) {
1542                                 /* We know ALT_SIGN_EXT is set if it is an SLJIT_INT_OP on 64 bit systems. */
1543                                 if (TEST_SL_IMM(src2, src2w)) {
1544                                         compiler->imm = src2w & 0xffff;
1545                                         return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0);
1546                                 }
1547                                 if (GET_FLAGS(op) == SLJIT_SET_E && TEST_SL_IMM(src1, src1w)) {
1548                                         compiler->imm = src1w & 0xffff;
1549                                         return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM2, dst, dstw, src2, src2w, TMP_REG2, 0);
1550                                 }
1551                         }
1552                         if (!(op & (SLJIT_SET_E | SLJIT_SET_S))) {
1553                                 /* We know ALT_SIGN_EXT is set if it is an SLJIT_INT_OP on 64 bit systems. */
1554                                 if (TEST_UL_IMM(src2, src2w)) {
1555                                         compiler->imm = src2w & 0xffff;
1556                                         return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
1557                                 }
1558                                 return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM4, dst, dstw, src1, src1w, src2, src2w);
1559                         }
1560                         if ((src2 & SLJIT_IMM) && src2w >= 0 && src2w <= 0x7fff) {
1561                                 compiler->imm = src2w;
1562                                 return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM2 | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
1563                         }
1564                         return emit_op(compiler, SLJIT_SUB, flags | ((op & SLJIT_SET_U) ? ALT_FORM4 : 0) | ((op & (SLJIT_SET_E | SLJIT_SET_S)) ? ALT_FORM5 : 0), dst, dstw, src1, src1w, src2, src2w);
1565                 }
1566                 if (!(op & (SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_O))) {
1567                         if (TEST_SL_IMM(src2, -src2w)) {
1568                                 compiler->imm = (-src2w) & 0xffff;
1569                                 return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
1570                         }
1571                 }
1572                 /* We know ALT_SIGN_EXT is set if it is an SLJIT_INT_OP on 64 bit systems. */
1573                 return emit_op(compiler, SLJIT_SUB, flags | (!(op & SLJIT_SET_U) ? 0 : ALT_FORM6), dst, dstw, src1, src1w, src2, src2w);
1574
1575         case SLJIT_SUBC:
1576                 return emit_op(compiler, SLJIT_SUBC, flags | (!(op & SLJIT_KEEP_FLAGS) ? 0 : ALT_FORM1), dst, dstw, src1, src1w, src2, src2w);
1577
1578         case SLJIT_MUL:
1579 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1580                 if (op & SLJIT_INT_OP)
1581                         flags |= ALT_FORM2;
1582 #endif
1583                 if (!GET_FLAGS(op)) {
1584                         if (TEST_SL_IMM(src2, src2w)) {
1585                                 compiler->imm = src2w & 0xffff;
1586                                 return emit_op(compiler, SLJIT_MUL, flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0);
1587                         }
1588                         if (TEST_SL_IMM(src1, src1w)) {
1589                                 compiler->imm = src1w & 0xffff;
1590                                 return emit_op(compiler, SLJIT_MUL, flags | ALT_FORM1, dst, dstw, src2, src2w, TMP_REG2, 0);
1591                         }
1592                 }
1593                 return emit_op(compiler, SLJIT_MUL, flags, dst, dstw, src1, src1w, src2, src2w);
1594
1595         case SLJIT_AND:
1596         case SLJIT_OR:
1597         case SLJIT_XOR:
1598                 /* Commutative unsigned operations. */
1599                 if (!GET_FLAGS(op) || GET_OPCODE(op) == SLJIT_AND) {
1600                         if (TEST_UL_IMM(src2, src2w)) {
1601                                 compiler->imm = src2w;
1602                                 return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0);
1603                         }
1604                         if (TEST_UL_IMM(src1, src1w)) {
1605                                 compiler->imm = src1w;
1606                                 return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM1, dst, dstw, src2, src2w, TMP_REG2, 0);
1607                         }
1608                         if (TEST_UH_IMM(src2, src2w)) {
1609                                 compiler->imm = (src2w >> 16) & 0xffff;
1610                                 return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0);
1611                         }
1612                         if (TEST_UH_IMM(src1, src1w)) {
1613                                 compiler->imm = (src1w >> 16) & 0xffff;
1614                                 return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM2, dst, dstw, src2, src2w, TMP_REG2, 0);
1615                         }
1616                 }
1617                 if (!GET_FLAGS(op) && GET_OPCODE(op) != SLJIT_AND) {
1618                         if (TEST_UI_IMM(src2, src2w)) {
1619                                 compiler->imm = src2w;
1620                                 return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
1621                         }
1622                         if (TEST_UI_IMM(src1, src1w)) {
1623                                 compiler->imm = src1w;
1624                                 return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM3, dst, dstw, src2, src2w, TMP_REG2, 0);
1625                         }
1626                 }
1627                 return emit_op(compiler, GET_OPCODE(op), flags, dst, dstw, src1, src1w, src2, src2w);
1628
1629         case SLJIT_ASHR:
1630                 if (op & SLJIT_KEEP_FLAGS)
1631                         flags |= ALT_FORM3;
1632                 /* Fall through. */
1633         case SLJIT_SHL:
1634         case SLJIT_LSHR:
1635 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1636                 if (op & SLJIT_INT_OP)
1637                         flags |= ALT_FORM2;
1638 #endif
1639                 if (src2 & SLJIT_IMM) {
1640                         compiler->imm = src2w;
1641                         return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0);
1642                 }
1643                 return emit_op(compiler, GET_OPCODE(op), flags, dst, dstw, src1, src1w, src2, src2w);
1644         }
1645
1646         return SLJIT_SUCCESS;
1647 }
1648
1649 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg)
1650 {
1651         check_sljit_get_register_index(reg);
1652         return reg_map[reg];
1653 }
1654
1655 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_float_register_index(sljit_si reg)
1656 {
1657         check_sljit_get_float_register_index(reg);
1658         return reg;
1659 }
1660
1661 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *compiler,
1662         void *instruction, sljit_si size)
1663 {
1664         CHECK_ERROR();
1665         check_sljit_emit_op_custom(compiler, instruction, size);
1666         SLJIT_ASSERT(size == 4);
1667
1668         return push_inst(compiler, *(sljit_ins*)instruction);
1669 }
1670
1671 /* --------------------------------------------------------------------- */
1672 /*  Floating point operators                                             */
1673 /* --------------------------------------------------------------------- */
1674
1675 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void)
1676 {
1677 #ifdef SLJIT_IS_FPU_AVAILABLE
1678         return SLJIT_IS_FPU_AVAILABLE;
1679 #else
1680         /* Available by default. */
1681         return 1;
1682 #endif
1683 }
1684
1685 #define FLOAT_DATA(op) (DOUBLE_DATA | ((op & SLJIT_SINGLE_OP) >> 6))
1686 #define SELECT_FOP(op, single, double) ((op & SLJIT_SINGLE_OP) ? single : double)
1687
1688 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op,
1689         sljit_si dst, sljit_sw dstw,
1690         sljit_si src, sljit_sw srcw)
1691 {
1692         sljit_si dst_fr;
1693
1694         CHECK_ERROR();
1695         check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw);
1696         SLJIT_COMPILE_ASSERT((SLJIT_SINGLE_OP == 0x100) && !(DOUBLE_DATA & 0x4), float_transfer_bit_error);
1697
1698         compiler->cache_arg = 0;
1699         compiler->cache_argw = 0;
1700
1701         if (GET_OPCODE(op) == SLJIT_CMPD) {
1702                 if (dst & SLJIT_MEM) {
1703                         FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, dst, dstw, src, srcw));
1704                         dst = TMP_FREG1;
1705                 }
1706
1707                 if (src & SLJIT_MEM) {
1708                         FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src, srcw, 0, 0));
1709                         src = TMP_FREG2;
1710                 }
1711
1712                 return push_inst(compiler, FCMPU | CRD(4) | FA(dst) | FB(src));
1713         }
1714
1715         dst_fr = FAST_IS_REG(dst) ? dst : TMP_FREG1;
1716
1717         if (src & SLJIT_MEM) {
1718                 FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, dst_fr, src, srcw, dst, dstw));
1719                 src = dst_fr;
1720         }
1721
1722         switch (GET_OPCODE(op)) {
1723                 case SLJIT_MOVD:
1724                         if (src != dst_fr && dst_fr != TMP_FREG1)
1725                                 FAIL_IF(push_inst(compiler, FMR | FD(dst_fr) | FB(src)));
1726                         break;
1727                 case SLJIT_NEGD:
1728                         FAIL_IF(push_inst(compiler, FNEG | FD(dst_fr) | FB(src)));
1729                         break;
1730                 case SLJIT_ABSD:
1731                         FAIL_IF(push_inst(compiler, FABS | FD(dst_fr) | FB(src)));
1732                         break;
1733         }
1734
1735         if (dst_fr == TMP_FREG1) {
1736                 if (GET_OPCODE(op) == SLJIT_MOVD)
1737                         dst_fr = src;
1738                 FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), dst_fr, dst, dstw, 0, 0));
1739         }
1740
1741         return SLJIT_SUCCESS;
1742 }
1743
1744 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compiler, sljit_si op,
1745         sljit_si dst, sljit_sw dstw,
1746         sljit_si src1, sljit_sw src1w,
1747         sljit_si src2, sljit_sw src2w)
1748 {
1749         sljit_si dst_fr, flags = 0;
1750
1751         CHECK_ERROR();
1752         check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
1753
1754         compiler->cache_arg = 0;
1755         compiler->cache_argw = 0;
1756
1757         dst_fr = FAST_IS_REG(dst) ? dst : TMP_FREG2;
1758
1759         if (src1 & SLJIT_MEM) {
1760                 if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w)) {
1761                         FAIL_IF(compiler->error);
1762                         src1 = TMP_FREG1;
1763                 } else
1764                         flags |= ALT_FORM1;
1765         }
1766
1767         if (src2 & SLJIT_MEM) {
1768                 if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w)) {
1769                         FAIL_IF(compiler->error);
1770                         src2 = TMP_FREG2;
1771                 } else
1772                         flags |= ALT_FORM2;
1773         }
1774
1775         if ((flags & (ALT_FORM1 | ALT_FORM2)) == (ALT_FORM1 | ALT_FORM2)) {
1776                 if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) {
1777                         FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, src1, src1w));
1778                         FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, dst, dstw));
1779                 }
1780                 else {
1781                         FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w));
1782                         FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, dst, dstw));
1783                 }
1784         }
1785         else if (flags & ALT_FORM1)
1786                 FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, dst, dstw));
1787         else if (flags & ALT_FORM2)
1788                 FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, dst, dstw));
1789
1790         if (flags & ALT_FORM1)
1791                 src1 = TMP_FREG1;
1792         if (flags & ALT_FORM2)
1793                 src2 = TMP_FREG2;
1794
1795         switch (GET_OPCODE(op)) {
1796         case SLJIT_ADDD:
1797                 FAIL_IF(push_inst(compiler, SELECT_FOP(op, FADDS, FADD) | FD(dst_fr) | FA(src1) | FB(src2)));
1798                 break;
1799
1800         case SLJIT_SUBD:
1801                 FAIL_IF(push_inst(compiler, SELECT_FOP(op, FSUBS, FSUB) | FD(dst_fr) | FA(src1) | FB(src2)));
1802                 break;
1803
1804         case SLJIT_MULD:
1805                 FAIL_IF(push_inst(compiler, SELECT_FOP(op, FMULS, FMUL) | FD(dst_fr) | FA(src1) | FC(src2) /* FMUL use FC as src2 */));
1806                 break;
1807
1808         case SLJIT_DIVD:
1809                 FAIL_IF(push_inst(compiler, SELECT_FOP(op, FDIVS, FDIV) | FD(dst_fr) | FA(src1) | FB(src2)));
1810                 break;
1811         }
1812
1813         if (dst_fr == TMP_FREG2)
1814                 FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG2, dst, dstw, 0, 0));
1815
1816         return SLJIT_SUCCESS;
1817 }
1818
1819 #undef FLOAT_DATA
1820 #undef SELECT_FOP
1821
1822 /* --------------------------------------------------------------------- */
1823 /*  Other instructions                                                   */
1824 /* --------------------------------------------------------------------- */
1825
1826 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw)
1827 {
1828         CHECK_ERROR();
1829         check_sljit_emit_fast_enter(compiler, dst, dstw);
1830         ADJUST_LOCAL_OFFSET(dst, dstw);
1831
1832         /* For UNUSED dst. Uncommon, but possible. */
1833         if (dst == SLJIT_UNUSED)
1834                 return SLJIT_SUCCESS;
1835
1836         if (FAST_IS_REG(dst))
1837                 return push_inst(compiler, MFLR | D(dst));
1838
1839         /* Memory. */
1840         FAIL_IF(push_inst(compiler, MFLR | D(TMP_REG2)));
1841         return emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, TMP_REG2, 0);
1842 }
1843
1844 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_si src, sljit_sw srcw)
1845 {
1846         CHECK_ERROR();
1847         check_sljit_emit_fast_return(compiler, src, srcw);
1848         ADJUST_LOCAL_OFFSET(src, srcw);
1849
1850         if (FAST_IS_REG(src))
1851                 FAIL_IF(push_inst(compiler, MTLR | S(src)));
1852         else {
1853                 if (src & SLJIT_MEM)
1854                         FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, TMP_REG2, 0, TMP_REG1, 0, src, srcw));
1855                 else if (src & SLJIT_IMM)
1856                         FAIL_IF(load_immediate(compiler, TMP_REG2, srcw));
1857                 FAIL_IF(push_inst(compiler, MTLR | S(TMP_REG2)));
1858         }
1859         return push_inst(compiler, BLR);
1860 }
1861
1862 /* --------------------------------------------------------------------- */
1863 /*  Conditional instructions                                             */
1864 /* --------------------------------------------------------------------- */
1865
1866 SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
1867 {
1868         struct sljit_label *label;
1869
1870         CHECK_ERROR_PTR();
1871         check_sljit_emit_label(compiler);
1872
1873         if (compiler->last_label && compiler->last_label->size == compiler->size)
1874                 return compiler->last_label;
1875
1876         label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
1877         PTR_FAIL_IF(!label);
1878         set_label(label, compiler);
1879         return label;
1880 }
1881
1882 static sljit_ins get_bo_bi_flags(sljit_si type)
1883 {
1884         switch (type) {
1885         case SLJIT_C_EQUAL:
1886                 return (12 << 21) | (2 << 16);
1887
1888         case SLJIT_C_NOT_EQUAL:
1889                 return (4 << 21) | (2 << 16);
1890
1891         case SLJIT_C_LESS:
1892         case SLJIT_C_FLOAT_LESS:
1893                 return (12 << 21) | ((4 + 0) << 16);
1894
1895         case SLJIT_C_GREATER_EQUAL:
1896         case SLJIT_C_FLOAT_GREATER_EQUAL:
1897                 return (4 << 21) | ((4 + 0) << 16);
1898
1899         case SLJIT_C_GREATER:
1900         case SLJIT_C_FLOAT_GREATER:
1901                 return (12 << 21) | ((4 + 1) << 16);
1902
1903         case SLJIT_C_LESS_EQUAL:
1904         case SLJIT_C_FLOAT_LESS_EQUAL:
1905                 return (4 << 21) | ((4 + 1) << 16);
1906
1907         case SLJIT_C_SIG_LESS:
1908                 return (12 << 21) | (0 << 16);
1909
1910         case SLJIT_C_SIG_GREATER_EQUAL:
1911                 return (4 << 21) | (0 << 16);
1912
1913         case SLJIT_C_SIG_GREATER:
1914                 return (12 << 21) | (1 << 16);
1915
1916         case SLJIT_C_SIG_LESS_EQUAL:
1917                 return (4 << 21) | (1 << 16);
1918
1919         case SLJIT_C_OVERFLOW:
1920         case SLJIT_C_MUL_OVERFLOW:
1921                 return (12 << 21) | (3 << 16);
1922
1923         case SLJIT_C_NOT_OVERFLOW:
1924         case SLJIT_C_MUL_NOT_OVERFLOW:
1925                 return (4 << 21) | (3 << 16);
1926
1927         case SLJIT_C_FLOAT_EQUAL:
1928                 return (12 << 21) | ((4 + 2) << 16);
1929
1930         case SLJIT_C_FLOAT_NOT_EQUAL:
1931                 return (4 << 21) | ((4 + 2) << 16);
1932
1933         case SLJIT_C_FLOAT_UNORDERED:
1934                 return (12 << 21) | ((4 + 3) << 16);
1935
1936         case SLJIT_C_FLOAT_ORDERED:
1937                 return (4 << 21) | ((4 + 3) << 16);
1938
1939         default:
1940                 SLJIT_ASSERT(type >= SLJIT_JUMP && type <= SLJIT_CALL3);
1941                 return (20 << 21);
1942         }
1943 }
1944
1945 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_si type)
1946 {
1947         struct sljit_jump *jump;
1948         sljit_ins bo_bi_flags;
1949
1950         CHECK_ERROR_PTR();
1951         check_sljit_emit_jump(compiler, type);
1952
1953         bo_bi_flags = get_bo_bi_flags(type & 0xff);
1954         if (!bo_bi_flags)
1955                 return NULL;
1956
1957         jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
1958         PTR_FAIL_IF(!jump);
1959         set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
1960         type &= 0xff;
1961
1962         /* In PPC, we don't need to touch the arguments. */
1963         if (type < SLJIT_JUMP)
1964                 jump->flags |= IS_COND;
1965 #if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL)
1966         if (type >= SLJIT_CALL0)
1967                 jump->flags |= IS_CALL;
1968 #endif
1969
1970         PTR_FAIL_IF(emit_const(compiler, TMP_CALL_REG, 0));
1971         PTR_FAIL_IF(push_inst(compiler, MTCTR | S(TMP_CALL_REG)));
1972         jump->addr = compiler->size;
1973         PTR_FAIL_IF(push_inst(compiler, BCCTR | bo_bi_flags | (type >= SLJIT_FAST_CALL ? 1 : 0)));
1974         return jump;
1975 }
1976
1977 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compiler, sljit_si type, sljit_si src, sljit_sw srcw)
1978 {
1979         struct sljit_jump *jump = NULL;
1980         sljit_si src_r;
1981
1982         CHECK_ERROR();
1983         check_sljit_emit_ijump(compiler, type, src, srcw);
1984         ADJUST_LOCAL_OFFSET(src, srcw);
1985
1986         if (FAST_IS_REG(src)) {
1987 #if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL)
1988                 if (type >= SLJIT_CALL0) {
1989                         FAIL_IF(push_inst(compiler, OR | S(src) | A(TMP_CALL_REG) | B(src)));
1990                         src_r = TMP_CALL_REG;
1991                 }
1992                 else
1993                         src_r = src;
1994 #else
1995                 src_r = src;
1996 #endif
1997         } else if (src & SLJIT_IMM) {
1998                 jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
1999                 FAIL_IF(!jump);
2000                 set_jump(jump, compiler, JUMP_ADDR);
2001                 jump->u.target = srcw;
2002 #if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL)
2003                 if (type >= SLJIT_CALL0)
2004                         jump->flags |= IS_CALL;
2005 #endif
2006                 FAIL_IF(emit_const(compiler, TMP_CALL_REG, 0));
2007                 src_r = TMP_CALL_REG;
2008         }
2009         else {
2010                 FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, TMP_CALL_REG, 0, TMP_REG1, 0, src, srcw));
2011                 src_r = TMP_CALL_REG;
2012         }
2013
2014         FAIL_IF(push_inst(compiler, MTCTR | S(src_r)));
2015         if (jump)
2016                 jump->addr = compiler->size;
2017         return push_inst(compiler, BCCTR | (20 << 21) | (type >= SLJIT_FAST_CALL ? 1 : 0));
2018 }
2019
2020 /* Get a bit from CR, all other bits are zeroed. */
2021 #define GET_CR_BIT(bit, dst) \
2022         FAIL_IF(push_inst(compiler, MFCR | D(dst))); \
2023         FAIL_IF(push_inst(compiler, RLWINM | S(dst) | A(dst) | ((1 + (bit)) << 11) | (31 << 6) | (31 << 1)));
2024
2025 #define INVERT_BIT(dst) \
2026         FAIL_IF(push_inst(compiler, XORI | S(dst) | A(dst) | 0x1));
2027
2028 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_si op,
2029         sljit_si dst, sljit_sw dstw,
2030         sljit_si src, sljit_sw srcw,
2031         sljit_si type)
2032 {
2033         sljit_si reg, input_flags;
2034         sljit_si flags = GET_ALL_FLAGS(op);
2035         sljit_sw original_dstw = dstw;
2036
2037         CHECK_ERROR();
2038         check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type);
2039         ADJUST_LOCAL_OFFSET(dst, dstw);
2040
2041         if (dst == SLJIT_UNUSED)
2042                 return SLJIT_SUCCESS;
2043
2044         op = GET_OPCODE(op);
2045         reg = (op < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG2;
2046
2047         compiler->cache_arg = 0;
2048         compiler->cache_argw = 0;
2049         if (op >= SLJIT_ADD && (src & SLJIT_MEM)) {
2050                 ADJUST_LOCAL_OFFSET(src, srcw);
2051 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
2052                 input_flags = (flags & SLJIT_INT_OP) ? INT_DATA : WORD_DATA;
2053 #else
2054                 input_flags = WORD_DATA;
2055 #endif
2056                 FAIL_IF(emit_op_mem2(compiler, input_flags | LOAD_DATA, TMP_REG1, src, srcw, dst, dstw));
2057                 src = TMP_REG1;
2058                 srcw = 0;
2059         }
2060
2061         switch (type) {
2062         case SLJIT_C_EQUAL:
2063                 GET_CR_BIT(2, reg);
2064                 break;
2065
2066         case SLJIT_C_NOT_EQUAL:
2067                 GET_CR_BIT(2, reg);
2068                 INVERT_BIT(reg);
2069                 break;
2070
2071         case SLJIT_C_LESS:
2072         case SLJIT_C_FLOAT_LESS:
2073                 GET_CR_BIT(4 + 0, reg);
2074                 break;
2075
2076         case SLJIT_C_GREATER_EQUAL:
2077         case SLJIT_C_FLOAT_GREATER_EQUAL:
2078                 GET_CR_BIT(4 + 0, reg);
2079                 INVERT_BIT(reg);
2080                 break;
2081
2082         case SLJIT_C_GREATER:
2083         case SLJIT_C_FLOAT_GREATER:
2084                 GET_CR_BIT(4 + 1, reg);
2085                 break;
2086
2087         case SLJIT_C_LESS_EQUAL:
2088         case SLJIT_C_FLOAT_LESS_EQUAL:
2089                 GET_CR_BIT(4 + 1, reg);
2090                 INVERT_BIT(reg);
2091                 break;
2092
2093         case SLJIT_C_SIG_LESS:
2094                 GET_CR_BIT(0, reg);
2095                 break;
2096
2097         case SLJIT_C_SIG_GREATER_EQUAL:
2098                 GET_CR_BIT(0, reg);
2099                 INVERT_BIT(reg);
2100                 break;
2101
2102         case SLJIT_C_SIG_GREATER:
2103                 GET_CR_BIT(1, reg);
2104                 break;
2105
2106         case SLJIT_C_SIG_LESS_EQUAL:
2107                 GET_CR_BIT(1, reg);
2108                 INVERT_BIT(reg);
2109                 break;
2110
2111         case SLJIT_C_OVERFLOW:
2112         case SLJIT_C_MUL_OVERFLOW:
2113                 GET_CR_BIT(3, reg);
2114                 break;
2115
2116         case SLJIT_C_NOT_OVERFLOW:
2117         case SLJIT_C_MUL_NOT_OVERFLOW:
2118                 GET_CR_BIT(3, reg);
2119                 INVERT_BIT(reg);
2120                 break;
2121
2122         case SLJIT_C_FLOAT_EQUAL:
2123                 GET_CR_BIT(4 + 2, reg);
2124                 break;
2125
2126         case SLJIT_C_FLOAT_NOT_EQUAL:
2127                 GET_CR_BIT(4 + 2, reg);
2128                 INVERT_BIT(reg);
2129                 break;
2130
2131         case SLJIT_C_FLOAT_UNORDERED:
2132                 GET_CR_BIT(4 + 3, reg);
2133                 break;
2134
2135         case SLJIT_C_FLOAT_ORDERED:
2136                 GET_CR_BIT(4 + 3, reg);
2137                 INVERT_BIT(reg);
2138                 break;
2139
2140         default:
2141                 SLJIT_ASSERT_STOP();
2142                 break;
2143         }
2144
2145         if (op < SLJIT_ADD) {
2146 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
2147                 if (op == SLJIT_MOV)
2148                         input_flags = WORD_DATA;
2149                 else {
2150                         op = SLJIT_MOV_UI;
2151                         input_flags = INT_DATA;
2152                 }
2153 #else
2154                 op = SLJIT_MOV;
2155                 input_flags = WORD_DATA;
2156 #endif
2157                 if (reg != TMP_REG2)
2158                         return SLJIT_SUCCESS;
2159                 return emit_op(compiler, op, input_flags, dst, dstw, TMP_REG1, 0, TMP_REG2, 0);
2160         }
2161
2162 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG)
2163         compiler->skip_checks = 1;
2164 #endif
2165         return sljit_emit_op2(compiler, op | flags, dst, original_dstw, src, srcw, TMP_REG2, 0);
2166 }
2167
2168 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw init_value)
2169 {
2170         struct sljit_const *const_;
2171         sljit_si reg;
2172
2173         CHECK_ERROR_PTR();
2174         check_sljit_emit_const(compiler, dst, dstw, init_value);
2175         ADJUST_LOCAL_OFFSET(dst, dstw);
2176
2177         const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
2178         PTR_FAIL_IF(!const_);
2179         set_const(const_, compiler);
2180
2181         reg = SLOW_IS_REG(dst) ? dst : TMP_REG2;
2182
2183         PTR_FAIL_IF(emit_const(compiler, reg, init_value));
2184
2185         if (dst & SLJIT_MEM)
2186                 PTR_FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, TMP_REG2, 0));
2187         return const_;
2188 }