chiark / gitweb /
Commit upstream pcre-8.39.tar.bz2
[pcre3.git] / sljit / sljitNativePPC_common.c
1 /*
2  *    Stack-less Just-In-Time compiler
3  *
4  *    Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without modification, are
7  * permitted provided that the following conditions are met:
8  *
9  *   1. Redistributions of source code must retain the above copyright notice, this list of
10  *      conditions and the following disclaimer.
11  *
12  *   2. Redistributions in binary form must reproduce the above copyright notice, this list
13  *      of conditions and the following disclaimer in the documentation and/or other materials
14  *      provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19  * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26
27 SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)
28 {
29         return "PowerPC" SLJIT_CPUINFO;
30 }
31
32 /* Length of an instruction word.
33    Both for ppc-32 and ppc-64. */
34 typedef sljit_u32 sljit_ins;
35
36 #if ((defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) && (defined _AIX)) \
37         || (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
38 #define SLJIT_PPC_STACK_FRAME_V2 1
39 #endif
40
41 #ifdef _AIX
42 #include <sys/cache.h>
43 #endif
44
45 #if (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
46 #define SLJIT_PASS_ENTRY_ADDR_TO_CALL 1
47 #endif
48
49 #if (defined SLJIT_CACHE_FLUSH_OWN_IMPL && SLJIT_CACHE_FLUSH_OWN_IMPL)
50
51 static void ppc_cache_flush(sljit_ins *from, sljit_ins *to)
52 {
53 #ifdef _AIX
54         _sync_cache_range((caddr_t)from, (int)((size_t)to - (size_t)from));
55 #elif defined(__GNUC__) || (defined(__IBM_GCC_ASM) && __IBM_GCC_ASM)
56 #       if defined(_ARCH_PWR) || defined(_ARCH_PWR2)
57         /* Cache flush for POWER architecture. */
58         while (from < to) {
59                 __asm__ volatile (
60                         "clf 0, %0\n"
61                         "dcs\n"
62                         : : "r"(from)
63                 );
64                 from++;
65         }
66         __asm__ volatile ( "ics" );
67 #       elif defined(_ARCH_COM) && !defined(_ARCH_PPC)
68 #       error "Cache flush is not implemented for PowerPC/POWER common mode."
69 #       else
70         /* Cache flush for PowerPC architecture. */
71         while (from < to) {
72                 __asm__ volatile (
73                         "dcbf 0, %0\n"
74                         "sync\n"
75                         "icbi 0, %0\n"
76                         : : "r"(from)
77                 );
78                 from++;
79         }
80         __asm__ volatile ( "isync" );
81 #       endif
82 #       ifdef __xlc__
83 #       warning "This file may fail to compile if -qfuncsect is used"
84 #       endif
85 #elif defined(__xlc__)
86 #error "Please enable GCC syntax for inline assembly statements with -qasm=gcc"
87 #else
88 #error "This platform requires a cache flush implementation."
89 #endif /* _AIX */
90 }
91
92 #endif /* (defined SLJIT_CACHE_FLUSH_OWN_IMPL && SLJIT_CACHE_FLUSH_OWN_IMPL) */
93
94 #define TMP_REG1        (SLJIT_NUMBER_OF_REGISTERS + 2)
95 #define TMP_REG2        (SLJIT_NUMBER_OF_REGISTERS + 3)
96 #define TMP_REG3        (SLJIT_NUMBER_OF_REGISTERS + 4)
97 #define TMP_ZERO        (SLJIT_NUMBER_OF_REGISTERS + 5)
98
99 #if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL)
100 #define TMP_CALL_REG    (SLJIT_NUMBER_OF_REGISTERS + 6)
101 #else
102 #define TMP_CALL_REG    TMP_REG2
103 #endif
104
105 #define TMP_FREG1       (0)
106 #define TMP_FREG2       (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)
107
108 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 7] = {
109         0, 3, 4, 5, 6, 7, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 1, 8, 9, 10, 31, 12
110 };
111
112 /* --------------------------------------------------------------------- */
113 /*  Instrucion forms                                                     */
114 /* --------------------------------------------------------------------- */
115 #define D(d)            (reg_map[d] << 21)
116 #define S(s)            (reg_map[s] << 21)
117 #define A(a)            (reg_map[a] << 16)
118 #define B(b)            (reg_map[b] << 11)
119 #define C(c)            (reg_map[c] << 6)
120 #define FD(fd)          ((fd) << 21)
121 #define FS(fs)          ((fs) << 21)
122 #define FA(fa)          ((fa) << 16)
123 #define FB(fb)          ((fb) << 11)
124 #define FC(fc)          ((fc) << 6)
125 #define IMM(imm)        ((imm) & 0xffff)
126 #define CRD(d)          ((d) << 21)
127
128 /* Instruction bit sections.
129    OE and Rc flag (see ALT_SET_FLAGS). */
130 #define OERC(flags)     (((flags & ALT_SET_FLAGS) >> 10) | (flags & ALT_SET_FLAGS))
131 /* Rc flag (see ALT_SET_FLAGS). */
132 #define RC(flags)       ((flags & ALT_SET_FLAGS) >> 10)
133 #define HI(opcode)      ((opcode) << 26)
134 #define LO(opcode)      ((opcode) << 1)
135
136 #define ADD             (HI(31) | LO(266))
137 #define ADDC            (HI(31) | LO(10))
138 #define ADDE            (HI(31) | LO(138))
139 #define ADDI            (HI(14))
140 #define ADDIC           (HI(13))
141 #define ADDIS           (HI(15))
142 #define ADDME           (HI(31) | LO(234))
143 #define AND             (HI(31) | LO(28))
144 #define ANDI            (HI(28))
145 #define ANDIS           (HI(29))
146 #define Bx              (HI(18))
147 #define BCx             (HI(16))
148 #define BCCTR           (HI(19) | LO(528) | (3 << 11))
149 #define BLR             (HI(19) | LO(16) | (0x14 << 21))
150 #define CNTLZD          (HI(31) | LO(58))
151 #define CNTLZW          (HI(31) | LO(26))
152 #define CMP             (HI(31) | LO(0))
153 #define CMPI            (HI(11))
154 #define CMPL            (HI(31) | LO(32))
155 #define CMPLI           (HI(10))
156 #define CROR            (HI(19) | LO(449))
157 #define DIVD            (HI(31) | LO(489))
158 #define DIVDU           (HI(31) | LO(457))
159 #define DIVW            (HI(31) | LO(491))
160 #define DIVWU           (HI(31) | LO(459))
161 #define EXTSB           (HI(31) | LO(954))
162 #define EXTSH           (HI(31) | LO(922))
163 #define EXTSW           (HI(31) | LO(986))
164 #define FABS            (HI(63) | LO(264))
165 #define FADD            (HI(63) | LO(21))
166 #define FADDS           (HI(59) | LO(21))
167 #define FCFID           (HI(63) | LO(846))
168 #define FCMPU           (HI(63) | LO(0))
169 #define FCTIDZ          (HI(63) | LO(815))
170 #define FCTIWZ          (HI(63) | LO(15))
171 #define FDIV            (HI(63) | LO(18))
172 #define FDIVS           (HI(59) | LO(18))
173 #define FMR             (HI(63) | LO(72))
174 #define FMUL            (HI(63) | LO(25))
175 #define FMULS           (HI(59) | LO(25))
176 #define FNEG            (HI(63) | LO(40))
177 #define FRSP            (HI(63) | LO(12))
178 #define FSUB            (HI(63) | LO(20))
179 #define FSUBS           (HI(59) | LO(20))
180 #define LD              (HI(58) | 0)
181 #define LWZ             (HI(32))
182 #define MFCR            (HI(31) | LO(19))
183 #define MFLR            (HI(31) | LO(339) | 0x80000)
184 #define MFXER           (HI(31) | LO(339) | 0x10000)
185 #define MTCTR           (HI(31) | LO(467) | 0x90000)
186 #define MTLR            (HI(31) | LO(467) | 0x80000)
187 #define MTXER           (HI(31) | LO(467) | 0x10000)
188 #define MULHD           (HI(31) | LO(73))
189 #define MULHDU          (HI(31) | LO(9))
190 #define MULHW           (HI(31) | LO(75))
191 #define MULHWU          (HI(31) | LO(11))
192 #define MULLD           (HI(31) | LO(233))
193 #define MULLI           (HI(7))
194 #define MULLW           (HI(31) | LO(235))
195 #define NEG             (HI(31) | LO(104))
196 #define NOP             (HI(24))
197 #define NOR             (HI(31) | LO(124))
198 #define OR              (HI(31) | LO(444))
199 #define ORI             (HI(24))
200 #define ORIS            (HI(25))
201 #define RLDICL          (HI(30))
202 #define RLWINM          (HI(21))
203 #define SLD             (HI(31) | LO(27))
204 #define SLW             (HI(31) | LO(24))
205 #define SRAD            (HI(31) | LO(794))
206 #define SRADI           (HI(31) | LO(413 << 1))
207 #define SRAW            (HI(31) | LO(792))
208 #define SRAWI           (HI(31) | LO(824))
209 #define SRD             (HI(31) | LO(539))
210 #define SRW             (HI(31) | LO(536))
211 #define STD             (HI(62) | 0)
212 #define STDU            (HI(62) | 1)
213 #define STDUX           (HI(31) | LO(181))
214 #define STFIWX          (HI(31) | LO(983))
215 #define STW             (HI(36))
216 #define STWU            (HI(37))
217 #define STWUX           (HI(31) | LO(183))
218 #define SUBF            (HI(31) | LO(40))
219 #define SUBFC           (HI(31) | LO(8))
220 #define SUBFE           (HI(31) | LO(136))
221 #define SUBFIC          (HI(8))
222 #define XOR             (HI(31) | LO(316))
223 #define XORI            (HI(26))
224 #define XORIS           (HI(27))
225
226 #define SIMM_MAX        (0x7fff)
227 #define SIMM_MIN        (-0x8000)
228 #define UIMM_MAX        (0xffff)
229
230 #if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL)
231 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_function_context(void** func_ptr, struct sljit_function_context* context, sljit_sw addr, void* func)
232 {
233         sljit_sw* ptrs;
234         if (func_ptr)
235                 *func_ptr = (void*)context;
236         ptrs = (sljit_sw*)func;
237         context->addr = addr ? addr : ptrs[0];
238         context->r2 = ptrs[1];
239         context->r11 = ptrs[2];
240 }
241 #endif
242
243 static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins)
244 {
245         sljit_ins *ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins));
246         FAIL_IF(!ptr);
247         *ptr = ins;
248         compiler->size++;
249         return SLJIT_SUCCESS;
250 }
251
252 static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code)
253 {
254         sljit_sw diff;
255         sljit_uw target_addr;
256         sljit_sw extra_jump_flags;
257
258 #if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL) && (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
259         if (jump->flags & (SLJIT_REWRITABLE_JUMP | IS_CALL))
260                 return 0;
261 #else
262         if (jump->flags & SLJIT_REWRITABLE_JUMP)
263                 return 0;
264 #endif
265
266         if (jump->flags & JUMP_ADDR)
267                 target_addr = jump->u.target;
268         else {
269                 SLJIT_ASSERT(jump->flags & JUMP_LABEL);
270                 target_addr = (sljit_uw)(code + jump->u.label->size);
271         }
272
273 #if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL) && (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
274         if (jump->flags & IS_CALL)
275                 goto keep_address;
276 #endif
277
278         diff = ((sljit_sw)target_addr - (sljit_sw)(code_ptr)) & ~0x3l;
279
280         extra_jump_flags = 0;
281         if (jump->flags & IS_COND) {
282                 if (diff <= 0x7fff && diff >= -0x8000) {
283                         jump->flags |= PATCH_B;
284                         return 1;
285                 }
286                 if (target_addr <= 0xffff) {
287                         jump->flags |= PATCH_B | PATCH_ABS_B;
288                         return 1;
289                 }
290                 extra_jump_flags = REMOVE_COND;
291
292                 diff -= sizeof(sljit_ins);
293         }
294
295         if (diff <= 0x01ffffff && diff >= -0x02000000) {
296                 jump->flags |= PATCH_B | extra_jump_flags;
297                 return 1;
298         }
299         if (target_addr <= 0x03ffffff) {
300                 jump->flags |= PATCH_B | PATCH_ABS_B | extra_jump_flags;
301                 return 1;
302         }
303
304 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
305 #if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL)
306 keep_address:
307 #endif
308         if (target_addr <= 0x7fffffff) {
309                 jump->flags |= PATCH_ABS32;
310                 return 1;
311         }
312         if (target_addr <= 0x7fffffffffffl) {
313                 jump->flags |= PATCH_ABS48;
314                 return 1;
315         }
316 #endif
317
318         return 0;
319 }
320
321 SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
322 {
323         struct sljit_memory_fragment *buf;
324         sljit_ins *code;
325         sljit_ins *code_ptr;
326         sljit_ins *buf_ptr;
327         sljit_ins *buf_end;
328         sljit_uw word_count;
329         sljit_uw addr;
330
331         struct sljit_label *label;
332         struct sljit_jump *jump;
333         struct sljit_const *const_;
334
335         CHECK_ERROR_PTR();
336         CHECK_PTR(check_sljit_generate_code(compiler));
337         reverse_buf(compiler);
338
339 #if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL)
340 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
341         compiler->size += (compiler->size & 0x1) + (sizeof(struct sljit_function_context) / sizeof(sljit_ins));
342 #else
343         compiler->size += (sizeof(struct sljit_function_context) / sizeof(sljit_ins));
344 #endif
345 #endif
346         code = (sljit_ins*)SLJIT_MALLOC_EXEC(compiler->size * sizeof(sljit_ins));
347         PTR_FAIL_WITH_EXEC_IF(code);
348         buf = compiler->buf;
349
350         code_ptr = code;
351         word_count = 0;
352         label = compiler->labels;
353         jump = compiler->jumps;
354         const_ = compiler->consts;
355         do {
356                 buf_ptr = (sljit_ins*)buf->memory;
357                 buf_end = buf_ptr + (buf->used_size >> 2);
358                 do {
359                         *code_ptr = *buf_ptr++;
360                         SLJIT_ASSERT(!label || label->size >= word_count);
361                         SLJIT_ASSERT(!jump || jump->addr >= word_count);
362                         SLJIT_ASSERT(!const_ || const_->addr >= word_count);
363                         /* These structures are ordered by their address. */
364                         if (label && label->size == word_count) {
365                                 /* Just recording the address. */
366                                 label->addr = (sljit_uw)code_ptr;
367                                 label->size = code_ptr - code;
368                                 label = label->next;
369                         }
370                         if (jump && jump->addr == word_count) {
371 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
372                                 jump->addr = (sljit_uw)(code_ptr - 3);
373 #else
374                                 jump->addr = (sljit_uw)(code_ptr - 6);
375 #endif
376                                 if (detect_jump_type(jump, code_ptr, code)) {
377 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
378                                         code_ptr[-3] = code_ptr[0];
379                                         code_ptr -= 3;
380 #else
381                                         if (jump->flags & PATCH_ABS32) {
382                                                 code_ptr -= 3;
383                                                 code_ptr[-1] = code_ptr[2];
384                                                 code_ptr[0] = code_ptr[3];
385                                         }
386                                         else if (jump->flags & PATCH_ABS48) {
387                                                 code_ptr--;
388                                                 code_ptr[-1] = code_ptr[0];
389                                                 code_ptr[0] = code_ptr[1];
390                                                 /* rldicr rX,rX,32,31 -> rX,rX,16,47 */
391                                                 SLJIT_ASSERT((code_ptr[-3] & 0xfc00ffff) == 0x780007c6);
392                                                 code_ptr[-3] ^= 0x8422;
393                                                 /* oris -> ori */
394                                                 code_ptr[-2] ^= 0x4000000;
395                                         }
396                                         else {
397                                                 code_ptr[-6] = code_ptr[0];
398                                                 code_ptr -= 6;
399                                         }
400 #endif
401                                         if (jump->flags & REMOVE_COND) {
402                                                 code_ptr[0] = BCx | (2 << 2) | ((code_ptr[0] ^ (8 << 21)) & 0x03ff0001);
403                                                 code_ptr++;
404                                                 jump->addr += sizeof(sljit_ins);
405                                                 code_ptr[0] = Bx;
406                                                 jump->flags -= IS_COND;
407                                         }
408                                 }
409                                 jump = jump->next;
410                         }
411                         if (const_ && const_->addr == word_count) {
412                                 const_->addr = (sljit_uw)code_ptr;
413                                 const_ = const_->next;
414                         }
415                         code_ptr ++;
416                         word_count ++;
417                 } while (buf_ptr < buf_end);
418
419                 buf = buf->next;
420         } while (buf);
421
422         if (label && label->size == word_count) {
423                 label->addr = (sljit_uw)code_ptr;
424                 label->size = code_ptr - code;
425                 label = label->next;
426         }
427
428         SLJIT_ASSERT(!label);
429         SLJIT_ASSERT(!jump);
430         SLJIT_ASSERT(!const_);
431 #if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL)
432         SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size - (sizeof(struct sljit_function_context) / sizeof(sljit_ins)));
433 #else
434         SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size);
435 #endif
436
437         jump = compiler->jumps;
438         while (jump) {
439                 do {
440                         addr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target;
441                         buf_ptr = (sljit_ins*)jump->addr;
442                         if (jump->flags & PATCH_B) {
443                                 if (jump->flags & IS_COND) {
444                                         if (!(jump->flags & PATCH_ABS_B)) {
445                                                 addr = addr - jump->addr;
446                                                 SLJIT_ASSERT((sljit_sw)addr <= 0x7fff && (sljit_sw)addr >= -0x8000);
447                                                 *buf_ptr = BCx | (addr & 0xfffc) | ((*buf_ptr) & 0x03ff0001);
448                                         }
449                                         else {
450                                                 SLJIT_ASSERT(addr <= 0xffff);
451                                                 *buf_ptr = BCx | (addr & 0xfffc) | 0x2 | ((*buf_ptr) & 0x03ff0001);
452                                         }
453                                 }
454                                 else {
455                                         if (!(jump->flags & PATCH_ABS_B)) {
456                                                 addr = addr - jump->addr;
457                                                 SLJIT_ASSERT((sljit_sw)addr <= 0x01ffffff && (sljit_sw)addr >= -0x02000000);
458                                                 *buf_ptr = Bx | (addr & 0x03fffffc) | ((*buf_ptr) & 0x1);
459                                         }
460                                         else {
461                                                 SLJIT_ASSERT(addr <= 0x03ffffff);
462                                                 *buf_ptr = Bx | (addr & 0x03fffffc) | 0x2 | ((*buf_ptr) & 0x1);
463                                         }
464                                 }
465                                 break;
466                         }
467                         /* Set the fields of immediate loads. */
468 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
469                         buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 16) & 0xffff);
470                         buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | (addr & 0xffff);
471 #else
472                         if (jump->flags & PATCH_ABS32) {
473                                 SLJIT_ASSERT(addr <= 0x7fffffff);
474                                 buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 16) & 0xffff);
475                                 buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | (addr & 0xffff);
476                                 break;
477                         }
478                         if (jump->flags & PATCH_ABS48) {
479                                 SLJIT_ASSERT(addr <= 0x7fffffffffff);
480                                 buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 32) & 0xffff);
481                                 buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | ((addr >> 16) & 0xffff);
482                                 buf_ptr[3] = (buf_ptr[3] & 0xffff0000) | (addr & 0xffff);
483                                 break;
484                         }
485                         buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 48) & 0xffff);
486                         buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | ((addr >> 32) & 0xffff);
487                         buf_ptr[3] = (buf_ptr[3] & 0xffff0000) | ((addr >> 16) & 0xffff);
488                         buf_ptr[4] = (buf_ptr[4] & 0xffff0000) | (addr & 0xffff);
489 #endif
490                 } while (0);
491                 jump = jump->next;
492         }
493
494         compiler->error = SLJIT_ERR_COMPILED;
495         compiler->executable_size = (code_ptr - code) * sizeof(sljit_ins);
496         SLJIT_CACHE_FLUSH(code, code_ptr);
497
498 #if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL)
499 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
500         if (((sljit_sw)code_ptr) & 0x4)
501                 code_ptr++;
502         sljit_set_function_context(NULL, (struct sljit_function_context*)code_ptr, (sljit_sw)code, (void*)sljit_generate_code);
503         return code_ptr;
504 #else
505         sljit_set_function_context(NULL, (struct sljit_function_context*)code_ptr, (sljit_sw)code, (void*)sljit_generate_code);
506         return code_ptr;
507 #endif
508 #else
509         return code;
510 #endif
511 }
512
513 /* --------------------------------------------------------------------- */
514 /*  Entry, exit                                                          */
515 /* --------------------------------------------------------------------- */
516
517 /* inp_flags: */
518
519 /* Creates an index in data_transfer_insts array. */
520 #define LOAD_DATA       0x01
521 #define INDEXED         0x02
522 #define WRITE_BACK      0x04
523 #define WORD_DATA       0x00
524 #define BYTE_DATA       0x08
525 #define HALF_DATA       0x10
526 #define INT_DATA        0x18
527 #define SIGNED_DATA     0x20
528 /* Separates integer and floating point registers */
529 #define GPR_REG         0x3f
530 #define DOUBLE_DATA     0x40
531
532 #define MEM_MASK        0x7f
533
534 /* Other inp_flags. */
535
536 #define ARG_TEST        0x000100
537 /* Integer opertion and set flags -> requires exts on 64 bit systems. */
538 #define ALT_SIGN_EXT    0x000200
539 /* This flag affects the RC() and OERC() macros. */
540 #define ALT_SET_FLAGS   0x000400
541 #define ALT_KEEP_CACHE  0x000800
542 #define ALT_FORM1       0x010000
543 #define ALT_FORM2       0x020000
544 #define ALT_FORM3       0x040000
545 #define ALT_FORM4       0x080000
546 #define ALT_FORM5       0x100000
547 #define ALT_FORM6       0x200000
548
549 /* Source and destination is register. */
550 #define REG_DEST        0x000001
551 #define REG1_SOURCE     0x000002
552 #define REG2_SOURCE     0x000004
553 /* getput_arg_fast returned true. */
554 #define FAST_DEST       0x000008
555 /* Multiple instructions are required. */
556 #define SLOW_DEST       0x000010
557 /*
558 ALT_SIGN_EXT            0x000200
559 ALT_SET_FLAGS           0x000400
560 ALT_FORM1               0x010000
561 ...
562 ALT_FORM6               0x200000 */
563
564 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
565 #include "sljitNativePPC_32.c"
566 #else
567 #include "sljitNativePPC_64.c"
568 #endif
569
570 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
571 #define STACK_STORE     STW
572 #define STACK_LOAD      LWZ
573 #else
574 #define STACK_STORE     STD
575 #define STACK_LOAD      LD
576 #endif
577
578 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
579         sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds,
580         sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
581 {
582         sljit_s32 i, tmp, offs;
583
584         CHECK_ERROR();
585         CHECK(check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size));
586         set_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);
587
588         FAIL_IF(push_inst(compiler, MFLR | D(0)));
589         offs = -(sljit_s32)(sizeof(sljit_sw));
590         FAIL_IF(push_inst(compiler, STACK_STORE | S(TMP_ZERO) | A(SLJIT_SP) | IMM(offs)));
591
592         tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG;
593         for (i = SLJIT_S0; i >= tmp; i--) {
594                 offs -= (sljit_s32)(sizeof(sljit_sw));
595                 FAIL_IF(push_inst(compiler, STACK_STORE | S(i) | A(SLJIT_SP) | IMM(offs)));
596         }
597
598         for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) {
599                 offs -= (sljit_s32)(sizeof(sljit_sw));
600                 FAIL_IF(push_inst(compiler, STACK_STORE | S(i) | A(SLJIT_SP) | IMM(offs)));
601         }
602
603         SLJIT_ASSERT(offs == -(sljit_s32)GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 1));
604
605 #if (defined SLJIT_PPC_STACK_FRAME_V2 && SLJIT_PPC_STACK_FRAME_V2)
606         FAIL_IF(push_inst(compiler, STACK_STORE | S(0) | A(SLJIT_SP) | IMM(2 * sizeof(sljit_sw))));
607 #else
608         FAIL_IF(push_inst(compiler, STACK_STORE | S(0) | A(SLJIT_SP) | IMM(sizeof(sljit_sw))));
609 #endif
610
611         FAIL_IF(push_inst(compiler, ADDI | D(TMP_ZERO) | A(0) | 0));
612         if (args >= 1)
613                 FAIL_IF(push_inst(compiler, OR | S(SLJIT_R0) | A(SLJIT_S0) | B(SLJIT_R0)));
614         if (args >= 2)
615                 FAIL_IF(push_inst(compiler, OR | S(SLJIT_R1) | A(SLJIT_S1) | B(SLJIT_R1)));
616         if (args >= 3)
617                 FAIL_IF(push_inst(compiler, OR | S(SLJIT_R2) | A(SLJIT_S2) | B(SLJIT_R2)));
618
619         local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1) + SLJIT_LOCALS_OFFSET;
620         local_size = (local_size + 15) & ~0xf;
621         compiler->local_size = local_size;
622
623 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
624         if (local_size <= SIMM_MAX)
625                 FAIL_IF(push_inst(compiler, STWU | S(SLJIT_SP) | A(SLJIT_SP) | IMM(-local_size)));
626         else {
627                 FAIL_IF(load_immediate(compiler, 0, -local_size));
628                 FAIL_IF(push_inst(compiler, STWUX | S(SLJIT_SP) | A(SLJIT_SP) | B(0)));
629         }
630 #else
631         if (local_size <= SIMM_MAX)
632                 FAIL_IF(push_inst(compiler, STDU | S(SLJIT_SP) | A(SLJIT_SP) | IMM(-local_size)));
633         else {
634                 FAIL_IF(load_immediate(compiler, 0, -local_size));
635                 FAIL_IF(push_inst(compiler, STDUX | S(SLJIT_SP) | A(SLJIT_SP) | B(0)));
636         }
637 #endif
638
639         return SLJIT_SUCCESS;
640 }
641
642 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler,
643         sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds,
644         sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
645 {
646         CHECK_ERROR();
647         CHECK(check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size));
648         set_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);
649
650         local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1) + SLJIT_LOCALS_OFFSET;
651         compiler->local_size = (local_size + 15) & ~0xf;
652         return SLJIT_SUCCESS;
653 }
654
655 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw)
656 {
657         sljit_s32 i, tmp, offs;
658
659         CHECK_ERROR();
660         CHECK(check_sljit_emit_return(compiler, op, src, srcw));
661
662         FAIL_IF(emit_mov_before_return(compiler, op, src, srcw));
663
664         if (compiler->local_size <= SIMM_MAX)
665                 FAIL_IF(push_inst(compiler, ADDI | D(SLJIT_SP) | A(SLJIT_SP) | IMM(compiler->local_size)));
666         else {
667                 FAIL_IF(load_immediate(compiler, 0, compiler->local_size));
668                 FAIL_IF(push_inst(compiler, ADD | D(SLJIT_SP) | A(SLJIT_SP) | B(0)));
669         }
670
671 #if (defined SLJIT_PPC_STACK_FRAME_V2 && SLJIT_PPC_STACK_FRAME_V2)
672         FAIL_IF(push_inst(compiler, STACK_LOAD | D(0) | A(SLJIT_SP) | IMM(2 * sizeof(sljit_sw))));
673 #else
674         FAIL_IF(push_inst(compiler, STACK_LOAD | D(0) | A(SLJIT_SP) | IMM(sizeof(sljit_sw))));
675 #endif
676
677         offs = -(sljit_s32)GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 1);
678
679         tmp = compiler->scratches;
680         for (i = SLJIT_FIRST_SAVED_REG; i <= tmp; i++) {
681                 FAIL_IF(push_inst(compiler, STACK_LOAD | D(i) | A(SLJIT_SP) | IMM(offs)));
682                 offs += (sljit_s32)(sizeof(sljit_sw));
683         }
684
685         tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG;
686         for (i = tmp; i <= SLJIT_S0; i++) {
687                 FAIL_IF(push_inst(compiler, STACK_LOAD | D(i) | A(SLJIT_SP) | IMM(offs)));
688                 offs += (sljit_s32)(sizeof(sljit_sw));
689         }
690
691         FAIL_IF(push_inst(compiler, STACK_LOAD | D(TMP_ZERO) | A(SLJIT_SP) | IMM(offs)));
692         SLJIT_ASSERT(offs == -(sljit_sw)(sizeof(sljit_sw)));
693
694         FAIL_IF(push_inst(compiler, MTLR | S(0)));
695         FAIL_IF(push_inst(compiler, BLR));
696
697         return SLJIT_SUCCESS;
698 }
699
700 #undef STACK_STORE
701 #undef STACK_LOAD
702
703 /* --------------------------------------------------------------------- */
704 /*  Operators                                                            */
705 /* --------------------------------------------------------------------- */
706
707 /* i/x - immediate/indexed form
708    n/w - no write-back / write-back (1 bit)
709    s/l - store/load (1 bit)
710    u/s - signed/unsigned (1 bit)
711    w/b/h/i - word/byte/half/int allowed (2 bit)
712    It contans 32 items, but not all are different. */
713
714 /* 64 bit only: [reg+imm] must be aligned to 4 bytes. */
715 #define INT_ALIGNED     0x10000
716 /* 64-bit only: there is no lwau instruction. */
717 #define UPDATE_REQ      0x20000
718
719 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
720 #define ARCH_32_64(a, b)        a
721 #define INST_CODE_AND_DST(inst, flags, reg) \
722         ((inst) | (((flags) & MEM_MASK) <= GPR_REG ? D(reg) : FD(reg)))
723 #else
724 #define ARCH_32_64(a, b)        b
725 #define INST_CODE_AND_DST(inst, flags, reg) \
726         (((inst) & ~(INT_ALIGNED | UPDATE_REQ)) | (((flags) & MEM_MASK) <= GPR_REG ? D(reg) : FD(reg)))
727 #endif
728
729 static const sljit_ins data_transfer_insts[64 + 8] = {
730
731 /* -------- Unsigned -------- */
732
733 /* Word. */
734
735 /* u w n i s */ ARCH_32_64(HI(36) /* stw */, HI(62) | INT_ALIGNED | 0x0 /* std */),
736 /* u w n i l */ ARCH_32_64(HI(32) /* lwz */, HI(58) | INT_ALIGNED | 0x0 /* ld */),
737 /* u w n x s */ ARCH_32_64(HI(31) | LO(151) /* stwx */, HI(31) | LO(149) /* stdx */),
738 /* u w n x l */ ARCH_32_64(HI(31) | LO(23) /* lwzx */, HI(31) | LO(21) /* ldx */),
739
740 /* u w w i s */ ARCH_32_64(HI(37) /* stwu */, HI(62) | INT_ALIGNED | 0x1 /* stdu */),
741 /* u w w i l */ ARCH_32_64(HI(33) /* lwzu */, HI(58) | INT_ALIGNED | 0x1 /* ldu */),
742 /* u w w x s */ ARCH_32_64(HI(31) | LO(183) /* stwux */, HI(31) | LO(181) /* stdux */),
743 /* u w w x l */ ARCH_32_64(HI(31) | LO(55) /* lwzux */, HI(31) | LO(53) /* ldux */),
744
745 /* Byte. */
746
747 /* u b n i s */ HI(38) /* stb */, 
748 /* u b n i l */ HI(34) /* lbz */,
749 /* u b n x s */ HI(31) | LO(215) /* stbx */,
750 /* u b n x l */ HI(31) | LO(87) /* lbzx */,
751
752 /* u b w i s */ HI(39) /* stbu */,
753 /* u b w i l */ HI(35) /* lbzu */,
754 /* u b w x s */ HI(31) | LO(247) /* stbux */,
755 /* u b w x l */ HI(31) | LO(119) /* lbzux */,
756
757 /* Half. */
758
759 /* u h n i s */ HI(44) /* sth */,
760 /* u h n i l */ HI(40) /* lhz */,
761 /* u h n x s */ HI(31) | LO(407) /* sthx */,
762 /* u h n x l */ HI(31) | LO(279) /* lhzx */,
763
764 /* u h w i s */ HI(45) /* sthu */,
765 /* u h w i l */ HI(41) /* lhzu */,
766 /* u h w x s */ HI(31) | LO(439) /* sthux */,
767 /* u h w x l */ HI(31) | LO(311) /* lhzux */,
768
769 /* Int. */
770
771 /* u i n i s */ HI(36) /* stw */,
772 /* u i n i l */ HI(32) /* lwz */,
773 /* u i n x s */ HI(31) | LO(151) /* stwx */,
774 /* u i n x l */ HI(31) | LO(23) /* lwzx */,
775
776 /* u i w i s */ HI(37) /* stwu */,
777 /* u i w i l */ HI(33) /* lwzu */,
778 /* u i w x s */ HI(31) | LO(183) /* stwux */,
779 /* u i w x l */ HI(31) | LO(55) /* lwzux */,
780
781 /* -------- Signed -------- */
782
783 /* Word. */
784
785 /* s w n i s */ ARCH_32_64(HI(36) /* stw */, HI(62) | INT_ALIGNED | 0x0 /* std */),
786 /* s w n i l */ ARCH_32_64(HI(32) /* lwz */, HI(58) | INT_ALIGNED | 0x0 /* ld */),
787 /* s w n x s */ ARCH_32_64(HI(31) | LO(151) /* stwx */, HI(31) | LO(149) /* stdx */),
788 /* s w n x l */ ARCH_32_64(HI(31) | LO(23) /* lwzx */, HI(31) | LO(21) /* ldx */),
789
790 /* s w w i s */ ARCH_32_64(HI(37) /* stwu */, HI(62) | INT_ALIGNED | 0x1 /* stdu */),
791 /* s w w i l */ ARCH_32_64(HI(33) /* lwzu */, HI(58) | INT_ALIGNED | 0x1 /* ldu */),
792 /* s w w x s */ ARCH_32_64(HI(31) | LO(183) /* stwux */, HI(31) | LO(181) /* stdux */),
793 /* s w w x l */ ARCH_32_64(HI(31) | LO(55) /* lwzux */, HI(31) | LO(53) /* ldux */),
794
795 /* Byte. */
796
797 /* s b n i s */ HI(38) /* stb */,
798 /* s b n i l */ HI(34) /* lbz */ /* EXTS_REQ */,
799 /* s b n x s */ HI(31) | LO(215) /* stbx */,
800 /* s b n x l */ HI(31) | LO(87) /* lbzx */ /* EXTS_REQ */,
801
802 /* s b w i s */ HI(39) /* stbu */,
803 /* s b w i l */ HI(35) /* lbzu */ /* EXTS_REQ */,
804 /* s b w x s */ HI(31) | LO(247) /* stbux */,
805 /* s b w x l */ HI(31) | LO(119) /* lbzux */ /* EXTS_REQ */,
806
807 /* Half. */
808
809 /* s h n i s */ HI(44) /* sth */,
810 /* s h n i l */ HI(42) /* lha */,
811 /* s h n x s */ HI(31) | LO(407) /* sthx */,
812 /* s h n x l */ HI(31) | LO(343) /* lhax */,
813
814 /* s h w i s */ HI(45) /* sthu */,
815 /* s h w i l */ HI(43) /* lhau */,
816 /* s h w x s */ HI(31) | LO(439) /* sthux */,
817 /* s h w x l */ HI(31) | LO(375) /* lhaux */,
818
819 /* Int. */
820
821 /* s i n i s */ HI(36) /* stw */,
822 /* s i n i l */ ARCH_32_64(HI(32) /* lwz */, HI(58) | INT_ALIGNED | 0x2 /* lwa */),
823 /* s i n x s */ HI(31) | LO(151) /* stwx */,
824 /* s i n x l */ ARCH_32_64(HI(31) | LO(23) /* lwzx */, HI(31) | LO(341) /* lwax */),
825
826 /* s i w i s */ HI(37) /* stwu */,
827 /* s i w i l */ ARCH_32_64(HI(33) /* lwzu */, HI(58) | INT_ALIGNED | UPDATE_REQ | 0x2 /* lwa */),
828 /* s i w x s */ HI(31) | LO(183) /* stwux */,
829 /* s i w x l */ ARCH_32_64(HI(31) | LO(55) /* lwzux */, HI(31) | LO(373) /* lwaux */),
830
831 /* -------- Double -------- */
832
833 /* d   n i s */ HI(54) /* stfd */,
834 /* d   n i l */ HI(50) /* lfd */,
835 /* d   n x s */ HI(31) | LO(727) /* stfdx */,
836 /* d   n x l */ HI(31) | LO(599) /* lfdx */,
837
838 /* s   n i s */ HI(52) /* stfs */,
839 /* s   n i l */ HI(48) /* lfs */,
840 /* s   n x s */ HI(31) | LO(663) /* stfsx */,
841 /* s   n x l */ HI(31) | LO(535) /* lfsx */,
842
843 };
844
845 #undef ARCH_32_64
846
847 /* Simple cases, (no caching is required). */
848 static sljit_s32 getput_arg_fast(struct sljit_compiler *compiler, sljit_s32 inp_flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw)
849 {
850         sljit_ins inst;
851
852         /* Should work when (arg & REG_MASK) == 0. */
853         SLJIT_COMPILE_ASSERT(A(0) == 0, a0_must_be_0);
854         SLJIT_ASSERT(arg & SLJIT_MEM);
855
856         if (arg & OFFS_REG_MASK) {
857                 if (argw & 0x3)
858                         return 0;
859                 if (inp_flags & ARG_TEST)
860                         return 1;
861
862                 inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK];
863                 SLJIT_ASSERT(!(inst & (INT_ALIGNED | UPDATE_REQ)));
864                 FAIL_IF(push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & REG_MASK) | B(OFFS_REG(arg))));
865                 return -1;
866         }
867
868         if (SLJIT_UNLIKELY(!(arg & REG_MASK)))
869                 inp_flags &= ~WRITE_BACK;
870
871 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
872         inst = data_transfer_insts[inp_flags & MEM_MASK];
873         SLJIT_ASSERT((arg & REG_MASK) || !(inst & UPDATE_REQ));
874
875         if (argw > SIMM_MAX || argw < SIMM_MIN || ((inst & INT_ALIGNED) && (argw & 0x3)) || (inst & UPDATE_REQ))
876                 return 0;
877         if (inp_flags & ARG_TEST)
878                 return 1;
879 #endif
880
881 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
882         if (argw > SIMM_MAX || argw < SIMM_MIN)
883                 return 0;
884         if (inp_flags & ARG_TEST)
885                 return 1;
886
887         inst = data_transfer_insts[inp_flags & MEM_MASK];
888         SLJIT_ASSERT(!(inst & (INT_ALIGNED | UPDATE_REQ)));
889 #endif
890
891         FAIL_IF(push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & REG_MASK) | IMM(argw)));
892         return -1;
893 }
894
895 /* See getput_arg below.
896    Note: can_cache is called only for binary operators. Those operator always
897    uses word arguments without write back. */
898 static sljit_s32 can_cache(sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw)
899 {
900         sljit_sw high_short, next_high_short;
901 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
902         sljit_sw diff;
903 #endif
904
905         SLJIT_ASSERT((arg & SLJIT_MEM) && (next_arg & SLJIT_MEM));
906
907         if (arg & OFFS_REG_MASK)
908                 return ((arg & OFFS_REG_MASK) == (next_arg & OFFS_REG_MASK) && (argw & 0x3) == (next_argw & 0x3));
909
910         if (next_arg & OFFS_REG_MASK)
911                 return 0;
912
913 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
914         high_short = (argw + ((argw & 0x8000) << 1)) & ~0xffff;
915         next_high_short = (next_argw + ((next_argw & 0x8000) << 1)) & ~0xffff;
916         return high_short == next_high_short;
917 #else
918         if (argw <= 0x7fffffffl && argw >= -0x80000000l) {
919                 high_short = (argw + ((argw & 0x8000) << 1)) & ~0xffff;
920                 next_high_short = (next_argw + ((next_argw & 0x8000) << 1)) & ~0xffff;
921                 if (high_short == next_high_short)
922                         return 1;
923         }
924
925         diff = argw - next_argw;
926         if (!(arg & REG_MASK))
927                 return diff <= SIMM_MAX && diff >= SIMM_MIN;
928
929         if (arg == next_arg && diff <= SIMM_MAX && diff >= SIMM_MIN)
930                 return 1;
931
932         return 0;
933 #endif
934 }
935
936 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
937 #define ADJUST_CACHED_IMM(imm) \
938         if ((inst & INT_ALIGNED) && (imm & 0x3)) { \
939                 /* Adjust cached value. Fortunately this is really a rare case */ \
940                 compiler->cache_argw += imm & 0x3; \
941                 FAIL_IF(push_inst(compiler, ADDI | D(TMP_REG3) | A(TMP_REG3) | (imm & 0x3))); \
942                 imm &= ~0x3; \
943         }
944 #endif
945
946 /* Emit the necessary instructions. See can_cache above. */
947 static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 inp_flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw)
948 {
949         sljit_s32 tmp_r;
950         sljit_ins inst;
951         sljit_sw high_short, next_high_short;
952 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
953         sljit_sw diff;
954 #endif
955
956         SLJIT_ASSERT(arg & SLJIT_MEM);
957
958         tmp_r = ((inp_flags & LOAD_DATA) && ((inp_flags) & MEM_MASK) <= GPR_REG) ? reg : TMP_REG1;
959         /* Special case for "mov reg, [reg, ... ]". */
960         if ((arg & REG_MASK) == tmp_r)
961                 tmp_r = TMP_REG1;
962
963         if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
964                 argw &= 0x3;
965                 /* Otherwise getput_arg_fast would capture it. */
966                 SLJIT_ASSERT(argw);
967
968                 if ((SLJIT_MEM | (arg & OFFS_REG_MASK)) == compiler->cache_arg && argw == compiler->cache_argw)
969                         tmp_r = TMP_REG3;
970                 else {
971                         if ((arg & OFFS_REG_MASK) == (next_arg & OFFS_REG_MASK) && argw == (next_argw & 0x3)) {
972                                 compiler->cache_arg = SLJIT_MEM | (arg & OFFS_REG_MASK);
973                                 compiler->cache_argw = argw;
974                                 tmp_r = TMP_REG3;
975                         }
976 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
977                         FAIL_IF(push_inst(compiler, RLWINM | S(OFFS_REG(arg)) | A(tmp_r) | (argw << 11) | ((31 - argw) << 1)));
978 #else
979                         FAIL_IF(push_inst(compiler, RLDI(tmp_r, OFFS_REG(arg), argw, 63 - argw, 1)));
980 #endif
981                 }
982                 inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK];
983                 SLJIT_ASSERT(!(inst & (INT_ALIGNED | UPDATE_REQ)));
984                 return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & REG_MASK) | B(tmp_r));
985         }
986
987         if (SLJIT_UNLIKELY(!(arg & REG_MASK)))
988                 inp_flags &= ~WRITE_BACK;
989
990         inst = data_transfer_insts[inp_flags & MEM_MASK];
991         SLJIT_ASSERT((arg & REG_MASK) || !(inst & UPDATE_REQ));
992
993 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
994         if (argw <= 0x7fff7fffl && argw >= -0x80000000l
995                         && (!(inst & INT_ALIGNED) || !(argw & 0x3)) && !(inst & UPDATE_REQ)) {
996 #endif
997
998                 arg &= REG_MASK;
999                 high_short = (sljit_s32)(argw + ((argw & 0x8000) << 1)) & ~0xffff;
1000                 /* The getput_arg_fast should handle this otherwise. */
1001 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1002                 SLJIT_ASSERT(high_short && high_short <= 0x7fffffffl && high_short >= -0x80000000l);
1003 #else
1004                 SLJIT_ASSERT(high_short && !(inst & (INT_ALIGNED | UPDATE_REQ)));
1005 #endif
1006
1007                 if (inp_flags & WRITE_BACK) {
1008                         if (arg == reg) {
1009                                 FAIL_IF(push_inst(compiler, OR | S(reg) | A(tmp_r) | B(reg)));
1010                                 reg = tmp_r;
1011                         }
1012                         tmp_r = arg;
1013                         FAIL_IF(push_inst(compiler, ADDIS | D(arg) | A(arg) | IMM(high_short >> 16)));
1014                 }
1015                 else if (compiler->cache_arg != (SLJIT_MEM | arg) || high_short != compiler->cache_argw) {
1016                         if ((next_arg & SLJIT_MEM) && !(next_arg & OFFS_REG_MASK)) {
1017                                 next_high_short = (sljit_s32)(next_argw + ((next_argw & 0x8000) << 1)) & ~0xffff;
1018                                 if (high_short == next_high_short) {
1019                                         compiler->cache_arg = SLJIT_MEM | arg;
1020                                         compiler->cache_argw = high_short;
1021                                         tmp_r = TMP_REG3;
1022                                 }
1023                         }
1024                         FAIL_IF(push_inst(compiler, ADDIS | D(tmp_r) | A(arg & REG_MASK) | IMM(high_short >> 16)));
1025                 }
1026                 else
1027                         tmp_r = TMP_REG3;
1028
1029                 return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(tmp_r) | IMM(argw));
1030
1031 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1032         }
1033
1034         /* Everything else is PPC-64 only. */
1035         if (SLJIT_UNLIKELY(!(arg & REG_MASK))) {
1036                 diff = argw - compiler->cache_argw;
1037                 if ((compiler->cache_arg & SLJIT_IMM) && diff <= SIMM_MAX && diff >= SIMM_MIN) {
1038                         ADJUST_CACHED_IMM(diff);
1039                         return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(TMP_REG3) | IMM(diff));
1040                 }
1041
1042                 diff = argw - next_argw;
1043                 if ((next_arg & SLJIT_MEM) && diff <= SIMM_MAX && diff >= SIMM_MIN) {
1044                         SLJIT_ASSERT(inp_flags & LOAD_DATA);
1045
1046                         compiler->cache_arg = SLJIT_IMM;
1047                         compiler->cache_argw = argw;
1048                         tmp_r = TMP_REG3;
1049                 }
1050
1051                 FAIL_IF(load_immediate(compiler, tmp_r, argw));
1052                 return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(tmp_r));
1053         }
1054
1055         diff = argw - compiler->cache_argw;
1056         if (compiler->cache_arg == arg && diff <= SIMM_MAX && diff >= SIMM_MIN) {
1057                 SLJIT_ASSERT(!(inp_flags & WRITE_BACK) && !(inst & UPDATE_REQ));
1058                 ADJUST_CACHED_IMM(diff);
1059                 return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(TMP_REG3) | IMM(diff));
1060         }
1061
1062         if ((compiler->cache_arg & SLJIT_IMM) && diff <= SIMM_MAX && diff >= SIMM_MIN) {
1063                 inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK];
1064                 SLJIT_ASSERT(!(inst & (INT_ALIGNED | UPDATE_REQ)));
1065                 if (compiler->cache_argw != argw) {
1066                         FAIL_IF(push_inst(compiler, ADDI | D(TMP_REG3) | A(TMP_REG3) | IMM(diff)));
1067                         compiler->cache_argw = argw;
1068                 }
1069                 return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & REG_MASK) | B(TMP_REG3));
1070         }
1071
1072         if (argw == next_argw && (next_arg & SLJIT_MEM)) {
1073                 SLJIT_ASSERT(inp_flags & LOAD_DATA);
1074                 FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
1075
1076                 compiler->cache_arg = SLJIT_IMM;
1077                 compiler->cache_argw = argw;
1078
1079                 inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK];
1080                 SLJIT_ASSERT(!(inst & (INT_ALIGNED | UPDATE_REQ)));
1081                 return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & REG_MASK) | B(TMP_REG3));
1082         }
1083
1084         diff = argw - next_argw;
1085         if (arg == next_arg && !(inp_flags & WRITE_BACK) && diff <= SIMM_MAX && diff >= SIMM_MIN) {
1086                 SLJIT_ASSERT(inp_flags & LOAD_DATA);
1087                 FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
1088                 FAIL_IF(push_inst(compiler, ADD | D(TMP_REG3) | A(TMP_REG3) | B(arg & REG_MASK)));
1089
1090                 compiler->cache_arg = arg;
1091                 compiler->cache_argw = argw;
1092
1093                 return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(TMP_REG3));
1094         }
1095
1096         if ((next_arg & SLJIT_MEM) && !(next_arg & OFFS_REG_MASK) && diff <= SIMM_MAX && diff >= SIMM_MIN) {
1097                 SLJIT_ASSERT(inp_flags & LOAD_DATA);
1098                 FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
1099
1100                 compiler->cache_arg = SLJIT_IMM;
1101                 compiler->cache_argw = argw;
1102                 tmp_r = TMP_REG3;
1103         }
1104         else
1105                 FAIL_IF(load_immediate(compiler, tmp_r, argw));
1106
1107         /* Get the indexed version instead of the normal one. */
1108         inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK];
1109         SLJIT_ASSERT(!(inst & (INT_ALIGNED | UPDATE_REQ)));
1110         return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & REG_MASK) | B(tmp_r));
1111 #endif
1112 }
1113
1114 static SLJIT_INLINE sljit_s32 emit_op_mem2(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg1, sljit_sw arg1w, sljit_s32 arg2, sljit_sw arg2w)
1115 {
1116         if (getput_arg_fast(compiler, flags, reg, arg1, arg1w))
1117                 return compiler->error;
1118         return getput_arg(compiler, flags, reg, arg1, arg1w, arg2, arg2w);
1119 }
1120
1121 static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 input_flags,
1122         sljit_s32 dst, sljit_sw dstw,
1123         sljit_s32 src1, sljit_sw src1w,
1124         sljit_s32 src2, sljit_sw src2w)
1125 {
1126         /* arg1 goes to TMP_REG1 or src reg
1127            arg2 goes to TMP_REG2, imm or src reg
1128            TMP_REG3 can be used for caching
1129            result goes to TMP_REG2, so put result can use TMP_REG1 and TMP_REG3. */
1130         sljit_s32 dst_r;
1131         sljit_s32 src1_r;
1132         sljit_s32 src2_r;
1133         sljit_s32 sugg_src2_r = TMP_REG2;
1134         sljit_s32 flags = input_flags & (ALT_FORM1 | ALT_FORM2 | ALT_FORM3 | ALT_FORM4 | ALT_FORM5 | ALT_FORM6 | ALT_SIGN_EXT | ALT_SET_FLAGS);
1135
1136         if (!(input_flags & ALT_KEEP_CACHE)) {
1137                 compiler->cache_arg = 0;
1138                 compiler->cache_argw = 0;
1139         }
1140
1141         /* Destination check. */
1142         if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) {
1143                 if (op >= SLJIT_MOV && op <= SLJIT_MOVU_S32 && !(src2 & SLJIT_MEM))
1144                         return SLJIT_SUCCESS;
1145                 dst_r = TMP_REG2;
1146         }
1147         else if (FAST_IS_REG(dst)) {
1148                 dst_r = dst;
1149                 flags |= REG_DEST;
1150                 if (op >= SLJIT_MOV && op <= SLJIT_MOVU_S32)
1151                         sugg_src2_r = dst_r;
1152         }
1153         else {
1154                 SLJIT_ASSERT(dst & SLJIT_MEM);
1155                 if (getput_arg_fast(compiler, input_flags | ARG_TEST, TMP_REG2, dst, dstw)) {
1156                         flags |= FAST_DEST;
1157                         dst_r = TMP_REG2;
1158                 }
1159                 else {
1160                         flags |= SLOW_DEST;
1161                         dst_r = 0;
1162                 }
1163         }
1164
1165         /* Source 1. */
1166         if (FAST_IS_REG(src1)) {
1167                 src1_r = src1;
1168                 flags |= REG1_SOURCE;
1169         }
1170         else if (src1 & SLJIT_IMM) {
1171                 FAIL_IF(load_immediate(compiler, TMP_REG1, src1w));
1172                 src1_r = TMP_REG1;
1173         }
1174         else if (getput_arg_fast(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w)) {
1175                 FAIL_IF(compiler->error);
1176                 src1_r = TMP_REG1;
1177         }
1178         else
1179                 src1_r = 0;
1180
1181         /* Source 2. */
1182         if (FAST_IS_REG(src2)) {
1183                 src2_r = src2;
1184                 flags |= REG2_SOURCE;
1185                 if (!(flags & REG_DEST) && op >= SLJIT_MOV && op <= SLJIT_MOVU_S32)
1186                         dst_r = src2_r;
1187         }
1188         else if (src2 & SLJIT_IMM) {
1189                 FAIL_IF(load_immediate(compiler, sugg_src2_r, src2w));
1190                 src2_r = sugg_src2_r;
1191         }
1192         else if (getput_arg_fast(compiler, input_flags | LOAD_DATA, sugg_src2_r, src2, src2w)) {
1193                 FAIL_IF(compiler->error);
1194                 src2_r = sugg_src2_r;
1195         }
1196         else
1197                 src2_r = 0;
1198
1199         /* src1_r, src2_r and dst_r can be zero (=unprocessed).
1200            All arguments are complex addressing modes, and it is a binary operator. */
1201         if (src1_r == 0 && src2_r == 0 && dst_r == 0) {
1202                 if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) {
1203                         FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG2, src2, src2w, src1, src1w));
1204                         FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw));
1205                 }
1206                 else {
1207                         FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w, src2, src2w));
1208                         FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG2, src2, src2w, dst, dstw));
1209                 }
1210                 src1_r = TMP_REG1;
1211                 src2_r = TMP_REG2;
1212         }
1213         else if (src1_r == 0 && src2_r == 0) {
1214                 FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w, src2, src2w));
1215                 src1_r = TMP_REG1;
1216         }
1217         else if (src1_r == 0 && dst_r == 0) {
1218                 FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw));
1219                 src1_r = TMP_REG1;
1220         }
1221         else if (src2_r == 0 && dst_r == 0) {
1222                 FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, sugg_src2_r, src2, src2w, dst, dstw));
1223                 src2_r = sugg_src2_r;
1224         }
1225
1226         if (dst_r == 0)
1227                 dst_r = TMP_REG2;
1228
1229         if (src1_r == 0) {
1230                 FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w, 0, 0));
1231                 src1_r = TMP_REG1;
1232         }
1233
1234         if (src2_r == 0) {
1235                 FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, sugg_src2_r, src2, src2w, 0, 0));
1236                 src2_r = sugg_src2_r;
1237         }
1238
1239         FAIL_IF(emit_single_op(compiler, op, flags, dst_r, src1_r, src2_r));
1240
1241         if (flags & (FAST_DEST | SLOW_DEST)) {
1242                 if (flags & FAST_DEST)
1243                         FAIL_IF(getput_arg_fast(compiler, input_flags, dst_r, dst, dstw));
1244                 else
1245                         FAIL_IF(getput_arg(compiler, input_flags, dst_r, dst, dstw, 0, 0));
1246         }
1247         return SLJIT_SUCCESS;
1248 }
1249
1250 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op)
1251 {
1252 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1253         sljit_s32 int_op = op & SLJIT_I32_OP;
1254 #endif
1255
1256         CHECK_ERROR();
1257         CHECK(check_sljit_emit_op0(compiler, op));
1258
1259         op = GET_OPCODE(op);
1260         switch (op) {
1261         case SLJIT_BREAKPOINT:
1262         case SLJIT_NOP:
1263                 return push_inst(compiler, NOP);
1264         case SLJIT_LMUL_UW:
1265         case SLJIT_LMUL_SW:
1266                 FAIL_IF(push_inst(compiler, OR | S(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R0)));
1267 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1268                 FAIL_IF(push_inst(compiler, MULLD | D(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R1)));
1269                 return push_inst(compiler, (op == SLJIT_LMUL_UW ? MULHDU : MULHD) | D(SLJIT_R1) | A(TMP_REG1) | B(SLJIT_R1));
1270 #else
1271                 FAIL_IF(push_inst(compiler, MULLW | D(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R1)));
1272                 return push_inst(compiler, (op == SLJIT_LMUL_UW ? MULHWU : MULHW) | D(SLJIT_R1) | A(TMP_REG1) | B(SLJIT_R1));
1273 #endif
1274         case SLJIT_DIVMOD_UW:
1275         case SLJIT_DIVMOD_SW:
1276                 FAIL_IF(push_inst(compiler, OR | S(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R0)));
1277 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1278                 FAIL_IF(push_inst(compiler, (int_op ? (op == SLJIT_DIVMOD_UW ? DIVWU : DIVW) : (op == SLJIT_DIVMOD_UW ? DIVDU : DIVD)) | D(SLJIT_R0) | A(SLJIT_R0) | B(SLJIT_R1)));
1279                 FAIL_IF(push_inst(compiler, (int_op ? MULLW : MULLD) | D(SLJIT_R1) | A(SLJIT_R0) | B(SLJIT_R1)));
1280 #else
1281                 FAIL_IF(push_inst(compiler, (op == SLJIT_DIVMOD_UW ? DIVWU : DIVW) | D(SLJIT_R0) | A(SLJIT_R0) | B(SLJIT_R1)));
1282                 FAIL_IF(push_inst(compiler, MULLW | D(SLJIT_R1) | A(SLJIT_R0) | B(SLJIT_R1)));
1283 #endif
1284                 return push_inst(compiler, SUBF | D(SLJIT_R1) | A(SLJIT_R1) | B(TMP_REG1));
1285         case SLJIT_DIV_UW:
1286         case SLJIT_DIV_SW:
1287 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1288                 return push_inst(compiler, (int_op ? (op == SLJIT_DIV_UW ? DIVWU : DIVW) : (op == SLJIT_DIV_UW ? DIVDU : DIVD)) | D(SLJIT_R0) | A(SLJIT_R0) | B(SLJIT_R1));
1289 #else
1290                 return push_inst(compiler, (op == SLJIT_DIV_UW ? DIVWU : DIVW) | D(SLJIT_R0) | A(SLJIT_R0) | B(SLJIT_R1));
1291 #endif
1292         }
1293
1294         return SLJIT_SUCCESS;
1295 }
1296
1297 #define EMIT_MOV(type, type_flags, type_cast) \
1298         emit_op(compiler, (src & SLJIT_IMM) ? SLJIT_MOV : type, flags | (type_flags), dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? type_cast srcw : srcw)
1299
1300 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op,
1301         sljit_s32 dst, sljit_sw dstw,
1302         sljit_s32 src, sljit_sw srcw)
1303 {
1304         sljit_s32 flags = GET_FLAGS(op) ? ALT_SET_FLAGS : 0;
1305         sljit_s32 op_flags = GET_ALL_FLAGS(op);
1306
1307         CHECK_ERROR();
1308         CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw));
1309         ADJUST_LOCAL_OFFSET(dst, dstw);
1310         ADJUST_LOCAL_OFFSET(src, srcw);
1311
1312         op = GET_OPCODE(op);
1313         if ((src & SLJIT_IMM) && srcw == 0)
1314                 src = TMP_ZERO;
1315
1316         if (op_flags & SLJIT_SET_O)
1317                 FAIL_IF(push_inst(compiler, MTXER | S(TMP_ZERO)));
1318
1319         if (op_flags & SLJIT_I32_OP) {
1320                 if (op < SLJIT_NOT) {
1321                         if (FAST_IS_REG(src) && src == dst) {
1322                                 if (!TYPE_CAST_NEEDED(op))
1323                                         return SLJIT_SUCCESS;
1324                         }
1325 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1326                         if (op == SLJIT_MOV_S32 && (src & SLJIT_MEM))
1327                                 op = SLJIT_MOV_U32;
1328                         if (op == SLJIT_MOVU_S32 && (src & SLJIT_MEM))
1329                                 op = SLJIT_MOVU_U32;
1330                         if (op == SLJIT_MOV_U32 && (src & SLJIT_IMM))
1331                                 op = SLJIT_MOV_S32;
1332                         if (op == SLJIT_MOVU_U32 && (src & SLJIT_IMM))
1333                                 op = SLJIT_MOVU_S32;
1334 #endif
1335                 }
1336 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1337                 else {
1338                         /* Most operations expect sign extended arguments. */
1339                         flags |= INT_DATA | SIGNED_DATA;
1340                         if (src & SLJIT_IMM)
1341                                 srcw = (sljit_s32)srcw;
1342                 }
1343 #endif
1344         }
1345
1346         switch (op) {
1347         case SLJIT_MOV:
1348         case SLJIT_MOV_P:
1349 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
1350         case SLJIT_MOV_U32:
1351         case SLJIT_MOV_S32:
1352 #endif
1353                 return emit_op(compiler, SLJIT_MOV, flags | WORD_DATA, dst, dstw, TMP_REG1, 0, src, srcw);
1354
1355 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1356         case SLJIT_MOV_U32:
1357                 return EMIT_MOV(SLJIT_MOV_U32, INT_DATA, (sljit_u32));
1358
1359         case SLJIT_MOV_S32:
1360                 return EMIT_MOV(SLJIT_MOV_S32, INT_DATA | SIGNED_DATA, (sljit_s32));
1361 #endif
1362
1363         case SLJIT_MOV_U8:
1364                 return EMIT_MOV(SLJIT_MOV_U8, BYTE_DATA, (sljit_u8));
1365
1366         case SLJIT_MOV_S8:
1367                 return EMIT_MOV(SLJIT_MOV_S8, BYTE_DATA | SIGNED_DATA, (sljit_s8));
1368
1369         case SLJIT_MOV_U16:
1370                 return EMIT_MOV(SLJIT_MOV_U16, HALF_DATA, (sljit_u16));
1371
1372         case SLJIT_MOV_S16:
1373                 return EMIT_MOV(SLJIT_MOV_S16, HALF_DATA | SIGNED_DATA, (sljit_s16));
1374
1375         case SLJIT_MOVU:
1376         case SLJIT_MOVU_P:
1377 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
1378         case SLJIT_MOVU_U32:
1379         case SLJIT_MOVU_S32:
1380 #endif
1381                 return emit_op(compiler, SLJIT_MOV, flags | WORD_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw);
1382
1383 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1384         case SLJIT_MOVU_U32:
1385                 return EMIT_MOV(SLJIT_MOV_U32, INT_DATA | WRITE_BACK, (sljit_u32));
1386
1387         case SLJIT_MOVU_S32:
1388                 return EMIT_MOV(SLJIT_MOV_S32, INT_DATA | SIGNED_DATA | WRITE_BACK, (sljit_s32));
1389 #endif
1390
1391         case SLJIT_MOVU_U8:
1392                 return EMIT_MOV(SLJIT_MOV_U8, BYTE_DATA | WRITE_BACK, (sljit_u8));
1393
1394         case SLJIT_MOVU_S8:
1395                 return EMIT_MOV(SLJIT_MOV_S8, BYTE_DATA | SIGNED_DATA | WRITE_BACK, (sljit_s8));
1396
1397         case SLJIT_MOVU_U16:
1398                 return EMIT_MOV(SLJIT_MOV_U16, HALF_DATA | WRITE_BACK, (sljit_u16));
1399
1400         case SLJIT_MOVU_S16:
1401                 return EMIT_MOV(SLJIT_MOV_S16, HALF_DATA | SIGNED_DATA | WRITE_BACK, (sljit_s16));
1402
1403         case SLJIT_NOT:
1404                 return emit_op(compiler, SLJIT_NOT, flags, dst, dstw, TMP_REG1, 0, src, srcw);
1405
1406         case SLJIT_NEG:
1407                 return emit_op(compiler, SLJIT_NEG, flags, dst, dstw, TMP_REG1, 0, src, srcw);
1408
1409         case SLJIT_CLZ:
1410 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1411                 return emit_op(compiler, SLJIT_CLZ, flags | (!(op_flags & SLJIT_I32_OP) ? 0 : ALT_FORM1), dst, dstw, TMP_REG1, 0, src, srcw);
1412 #else
1413                 return emit_op(compiler, SLJIT_CLZ, flags, dst, dstw, TMP_REG1, 0, src, srcw);
1414 #endif
1415         }
1416
1417         return SLJIT_SUCCESS;
1418 }
1419
1420 #undef EMIT_MOV
1421
1422 #define TEST_SL_IMM(src, srcw) \
1423         (((src) & SLJIT_IMM) && (srcw) <= SIMM_MAX && (srcw) >= SIMM_MIN)
1424
1425 #define TEST_UL_IMM(src, srcw) \
1426         (((src) & SLJIT_IMM) && !((srcw) & ~0xffff))
1427
1428 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1429 #define TEST_SH_IMM(src, srcw) \
1430         (((src) & SLJIT_IMM) && !((srcw) & 0xffff) && (srcw) <= 0x7fffffffl && (srcw) >= -0x80000000l)
1431 #else
1432 #define TEST_SH_IMM(src, srcw) \
1433         (((src) & SLJIT_IMM) && !((srcw) & 0xffff))
1434 #endif
1435
1436 #define TEST_UH_IMM(src, srcw) \
1437         (((src) & SLJIT_IMM) && !((srcw) & ~0xffff0000))
1438
1439 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1440 #define TEST_ADD_IMM(src, srcw) \
1441         (((src) & SLJIT_IMM) && (srcw) <= 0x7fff7fffl && (srcw) >= -0x80000000l)
1442 #else
1443 #define TEST_ADD_IMM(src, srcw) \
1444         ((src) & SLJIT_IMM)
1445 #endif
1446
1447 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1448 #define TEST_UI_IMM(src, srcw) \
1449         (((src) & SLJIT_IMM) && !((srcw) & ~0xffffffff))
1450 #else
1451 #define TEST_UI_IMM(src, srcw) \
1452         ((src) & SLJIT_IMM)
1453 #endif
1454
1455 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op,
1456         sljit_s32 dst, sljit_sw dstw,
1457         sljit_s32 src1, sljit_sw src1w,
1458         sljit_s32 src2, sljit_sw src2w)
1459 {
1460         sljit_s32 flags = GET_FLAGS(op) ? ALT_SET_FLAGS : 0;
1461
1462         CHECK_ERROR();
1463         CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
1464         ADJUST_LOCAL_OFFSET(dst, dstw);
1465         ADJUST_LOCAL_OFFSET(src1, src1w);
1466         ADJUST_LOCAL_OFFSET(src2, src2w);
1467
1468         if ((src1 & SLJIT_IMM) && src1w == 0)
1469                 src1 = TMP_ZERO;
1470         if ((src2 & SLJIT_IMM) && src2w == 0)
1471                 src2 = TMP_ZERO;
1472
1473 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1474         if (op & SLJIT_I32_OP) {
1475                 /* Most operations expect sign extended arguments. */
1476                 flags |= INT_DATA | SIGNED_DATA;
1477                 if (src1 & SLJIT_IMM)
1478                         src1w = (sljit_s32)(src1w);
1479                 if (src2 & SLJIT_IMM)
1480                         src2w = (sljit_s32)(src2w);
1481                 if (GET_FLAGS(op))
1482                         flags |= ALT_SIGN_EXT;
1483         }
1484 #endif
1485         if (op & SLJIT_SET_O)
1486                 FAIL_IF(push_inst(compiler, MTXER | S(TMP_ZERO)));
1487         if (src2 == TMP_REG2)
1488                 flags |= ALT_KEEP_CACHE;
1489
1490         switch (GET_OPCODE(op)) {
1491         case SLJIT_ADD:
1492                 if (!GET_FLAGS(op) && ((src1 | src2) & SLJIT_IMM)) {
1493                         if (TEST_SL_IMM(src2, src2w)) {
1494                                 compiler->imm = src2w & 0xffff;
1495                                 return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0);
1496                         }
1497                         if (TEST_SL_IMM(src1, src1w)) {
1498                                 compiler->imm = src1w & 0xffff;
1499                                 return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM1, dst, dstw, src2, src2w, TMP_REG2, 0);
1500                         }
1501                         if (TEST_SH_IMM(src2, src2w)) {
1502                                 compiler->imm = (src2w >> 16) & 0xffff;
1503                                 return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0);
1504                         }
1505                         if (TEST_SH_IMM(src1, src1w)) {
1506                                 compiler->imm = (src1w >> 16) & 0xffff;
1507                                 return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2, dst, dstw, src2, src2w, TMP_REG2, 0);
1508                         }
1509                         /* Range between -1 and -32768 is covered above. */
1510                         if (TEST_ADD_IMM(src2, src2w)) {
1511                                 compiler->imm = src2w & 0xffffffff;
1512                                 return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM4, dst, dstw, src1, src1w, TMP_REG2, 0);
1513                         }
1514                         if (TEST_ADD_IMM(src1, src1w)) {
1515                                 compiler->imm = src1w & 0xffffffff;
1516                                 return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM4, dst, dstw, src2, src2w, TMP_REG2, 0);
1517                         }
1518                 }
1519                 if (!(GET_FLAGS(op) & (SLJIT_SET_E | SLJIT_SET_O))) {
1520                         if (TEST_SL_IMM(src2, src2w)) {
1521                                 compiler->imm = src2w & 0xffff;
1522                                 return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
1523                         }
1524                         if (TEST_SL_IMM(src1, src1w)) {
1525                                 compiler->imm = src1w & 0xffff;
1526                                 return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM3, dst, dstw, src2, src2w, TMP_REG2, 0);
1527                         }
1528                 }
1529                 return emit_op(compiler, SLJIT_ADD, flags, dst, dstw, src1, src1w, src2, src2w);
1530
1531         case SLJIT_ADDC:
1532                 return emit_op(compiler, SLJIT_ADDC, flags | (!(op & SLJIT_KEEP_FLAGS) ? 0 : ALT_FORM1), dst, dstw, src1, src1w, src2, src2w);
1533
1534         case SLJIT_SUB:
1535                 if (!GET_FLAGS(op) && ((src1 | src2) & SLJIT_IMM)) {
1536                         if (TEST_SL_IMM(src2, -src2w)) {
1537                                 compiler->imm = (-src2w) & 0xffff;
1538                                 return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0);
1539                         }
1540                         if (TEST_SL_IMM(src1, src1w)) {
1541                                 compiler->imm = src1w & 0xffff;
1542                                 return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM1, dst, dstw, src2, src2w, TMP_REG2, 0);
1543                         }
1544                         if (TEST_SH_IMM(src2, -src2w)) {
1545                                 compiler->imm = ((-src2w) >> 16) & 0xffff;
1546                                 return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0);
1547                         }
1548                         /* Range between -1 and -32768 is covered above. */
1549                         if (TEST_ADD_IMM(src2, -src2w)) {
1550                                 compiler->imm = -src2w & 0xffffffff;
1551                                 return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM4, dst, dstw, src1, src1w, TMP_REG2, 0);
1552                         }
1553                 }
1554                 if (dst == SLJIT_UNUSED && (op & (SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S)) && !(op & (SLJIT_SET_O | SLJIT_SET_C))) {
1555                         if (!(op & SLJIT_SET_U)) {
1556                                 /* We know ALT_SIGN_EXT is set if it is an SLJIT_I32_OP on 64 bit systems. */
1557                                 if (TEST_SL_IMM(src2, src2w)) {
1558                                         compiler->imm = src2w & 0xffff;
1559                                         return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0);
1560                                 }
1561                                 if (GET_FLAGS(op) == SLJIT_SET_E && TEST_SL_IMM(src1, src1w)) {
1562                                         compiler->imm = src1w & 0xffff;
1563                                         return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM2, dst, dstw, src2, src2w, TMP_REG2, 0);
1564                                 }
1565                         }
1566                         if (!(op & (SLJIT_SET_E | SLJIT_SET_S))) {
1567                                 /* We know ALT_SIGN_EXT is set if it is an SLJIT_I32_OP on 64 bit systems. */
1568                                 if (TEST_UL_IMM(src2, src2w)) {
1569                                         compiler->imm = src2w & 0xffff;
1570                                         return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
1571                                 }
1572                                 return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM4, dst, dstw, src1, src1w, src2, src2w);
1573                         }
1574                         if ((src2 & SLJIT_IMM) && src2w >= 0 && src2w <= 0x7fff) {
1575                                 compiler->imm = src2w;
1576                                 return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM2 | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
1577                         }
1578                         return emit_op(compiler, SLJIT_SUB, flags | ((op & SLJIT_SET_U) ? ALT_FORM4 : 0) | ((op & (SLJIT_SET_E | SLJIT_SET_S)) ? ALT_FORM5 : 0), dst, dstw, src1, src1w, src2, src2w);
1579                 }
1580                 if (!(op & (SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_O))) {
1581                         if (TEST_SL_IMM(src2, -src2w)) {
1582                                 compiler->imm = (-src2w) & 0xffff;
1583                                 return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
1584                         }
1585                 }
1586                 /* We know ALT_SIGN_EXT is set if it is an SLJIT_I32_OP on 64 bit systems. */
1587                 return emit_op(compiler, SLJIT_SUB, flags | (!(op & SLJIT_SET_U) ? 0 : ALT_FORM6), dst, dstw, src1, src1w, src2, src2w);
1588
1589         case SLJIT_SUBC:
1590                 return emit_op(compiler, SLJIT_SUBC, flags | (!(op & SLJIT_KEEP_FLAGS) ? 0 : ALT_FORM1), dst, dstw, src1, src1w, src2, src2w);
1591
1592         case SLJIT_MUL:
1593 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1594                 if (op & SLJIT_I32_OP)
1595                         flags |= ALT_FORM2;
1596 #endif
1597                 if (!GET_FLAGS(op)) {
1598                         if (TEST_SL_IMM(src2, src2w)) {
1599                                 compiler->imm = src2w & 0xffff;
1600                                 return emit_op(compiler, SLJIT_MUL, flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0);
1601                         }
1602                         if (TEST_SL_IMM(src1, src1w)) {
1603                                 compiler->imm = src1w & 0xffff;
1604                                 return emit_op(compiler, SLJIT_MUL, flags | ALT_FORM1, dst, dstw, src2, src2w, TMP_REG2, 0);
1605                         }
1606                 }
1607                 return emit_op(compiler, SLJIT_MUL, flags, dst, dstw, src1, src1w, src2, src2w);
1608
1609         case SLJIT_AND:
1610         case SLJIT_OR:
1611         case SLJIT_XOR:
1612                 /* Commutative unsigned operations. */
1613                 if (!GET_FLAGS(op) || GET_OPCODE(op) == SLJIT_AND) {
1614                         if (TEST_UL_IMM(src2, src2w)) {
1615                                 compiler->imm = src2w;
1616                                 return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0);
1617                         }
1618                         if (TEST_UL_IMM(src1, src1w)) {
1619                                 compiler->imm = src1w;
1620                                 return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM1, dst, dstw, src2, src2w, TMP_REG2, 0);
1621                         }
1622                         if (TEST_UH_IMM(src2, src2w)) {
1623                                 compiler->imm = (src2w >> 16) & 0xffff;
1624                                 return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0);
1625                         }
1626                         if (TEST_UH_IMM(src1, src1w)) {
1627                                 compiler->imm = (src1w >> 16) & 0xffff;
1628                                 return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM2, dst, dstw, src2, src2w, TMP_REG2, 0);
1629                         }
1630                 }
1631                 if (!GET_FLAGS(op) && GET_OPCODE(op) != SLJIT_AND) {
1632                         if (TEST_UI_IMM(src2, src2w)) {
1633                                 compiler->imm = src2w;
1634                                 return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
1635                         }
1636                         if (TEST_UI_IMM(src1, src1w)) {
1637                                 compiler->imm = src1w;
1638                                 return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM3, dst, dstw, src2, src2w, TMP_REG2, 0);
1639                         }
1640                 }
1641                 return emit_op(compiler, GET_OPCODE(op), flags, dst, dstw, src1, src1w, src2, src2w);
1642
1643         case SLJIT_ASHR:
1644                 if (op & SLJIT_KEEP_FLAGS)
1645                         flags |= ALT_FORM3;
1646                 /* Fall through. */
1647         case SLJIT_SHL:
1648         case SLJIT_LSHR:
1649 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1650                 if (op & SLJIT_I32_OP)
1651                         flags |= ALT_FORM2;
1652 #endif
1653                 if (src2 & SLJIT_IMM) {
1654                         compiler->imm = src2w;
1655                         return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0);
1656                 }
1657                 return emit_op(compiler, GET_OPCODE(op), flags, dst, dstw, src1, src1w, src2, src2w);
1658         }
1659
1660         return SLJIT_SUCCESS;
1661 }
1662
1663 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg)
1664 {
1665         CHECK_REG_INDEX(check_sljit_get_register_index(reg));
1666         return reg_map[reg];
1667 }
1668
1669 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg)
1670 {
1671         CHECK_REG_INDEX(check_sljit_get_float_register_index(reg));
1672         return reg;
1673 }
1674
1675 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,
1676         void *instruction, sljit_s32 size)
1677 {
1678         CHECK_ERROR();
1679         CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
1680
1681         return push_inst(compiler, *(sljit_ins*)instruction);
1682 }
1683
1684 /* --------------------------------------------------------------------- */
1685 /*  Floating point operators                                             */
1686 /* --------------------------------------------------------------------- */
1687
1688 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_is_fpu_available(void)
1689 {
1690 #ifdef SLJIT_IS_FPU_AVAILABLE
1691         return SLJIT_IS_FPU_AVAILABLE;
1692 #else
1693         /* Available by default. */
1694         return 1;
1695 #endif
1696 }
1697
1698 #define FLOAT_DATA(op) (DOUBLE_DATA | ((op & SLJIT_F32_OP) >> 6))
1699 #define SELECT_FOP(op, single, double) ((op & SLJIT_F32_OP) ? single : double)
1700
1701 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1702 #define FLOAT_TMP_MEM_OFFSET (6 * sizeof(sljit_sw))
1703 #else
1704 #define FLOAT_TMP_MEM_OFFSET (2 * sizeof(sljit_sw))
1705
1706 #if (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
1707 #define FLOAT_TMP_MEM_OFFSET_LOW (2 * sizeof(sljit_sw))
1708 #define FLOAT_TMP_MEM_OFFSET_HI (3 * sizeof(sljit_sw))
1709 #else
1710 #define FLOAT_TMP_MEM_OFFSET_LOW (3 * sizeof(sljit_sw))
1711 #define FLOAT_TMP_MEM_OFFSET_HI (2 * sizeof(sljit_sw))
1712 #endif
1713
1714 #endif /* SLJIT_CONFIG_PPC_64 */
1715
1716 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op,
1717         sljit_s32 dst, sljit_sw dstw,
1718         sljit_s32 src, sljit_sw srcw)
1719 {
1720         if (src & SLJIT_MEM) {
1721                 /* We can ignore the temporary data store on the stack from caching point of view. */
1722                 FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src, srcw, dst, dstw));
1723                 src = TMP_FREG1;
1724         }
1725
1726 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1727         op = GET_OPCODE(op);
1728         FAIL_IF(push_inst(compiler, (op == SLJIT_CONV_S32_FROM_F64 ? FCTIWZ : FCTIDZ) | FD(TMP_FREG1) | FB(src)));
1729
1730         if (dst == SLJIT_UNUSED)
1731                 return SLJIT_SUCCESS;
1732
1733         if (op == SLJIT_CONV_SW_FROM_F64) {
1734                 if (FAST_IS_REG(dst)) {
1735                         FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA, TMP_FREG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, 0, 0));
1736                         return emit_op_mem2(compiler, WORD_DATA | LOAD_DATA, dst, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, 0, 0);
1737                 }
1738                 return emit_op_mem2(compiler, DOUBLE_DATA, TMP_FREG1, dst, dstw, 0, 0);
1739         }
1740
1741 #else
1742         FAIL_IF(push_inst(compiler, FCTIWZ | FD(TMP_FREG1) | FB(src)));
1743
1744         if (dst == SLJIT_UNUSED)
1745                 return SLJIT_SUCCESS;
1746 #endif
1747
1748         if (FAST_IS_REG(dst)) {
1749                 FAIL_IF(load_immediate(compiler, TMP_REG1, FLOAT_TMP_MEM_OFFSET));
1750                 FAIL_IF(push_inst(compiler, STFIWX | FS(TMP_FREG1) | A(SLJIT_SP) | B(TMP_REG1)));
1751                 return emit_op_mem2(compiler, INT_DATA | LOAD_DATA, dst, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, 0, 0);
1752         }
1753
1754         SLJIT_ASSERT(dst & SLJIT_MEM);
1755
1756         if (dst & OFFS_REG_MASK) {
1757                 dstw &= 0x3;
1758                 if (dstw) {
1759 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
1760                         FAIL_IF(push_inst(compiler, RLWINM | S(OFFS_REG(dst)) | A(TMP_REG1) | (dstw << 11) | ((31 - dstw) << 1)));
1761 #else
1762                         FAIL_IF(push_inst(compiler, RLDI(TMP_REG1, OFFS_REG(dst), dstw, 63 - dstw, 1)));
1763 #endif
1764                         dstw = TMP_REG1;
1765                 }
1766                 else
1767                         dstw = OFFS_REG(dst);
1768         }
1769         else {
1770                 if ((dst & REG_MASK) && !dstw) {
1771                         dstw = dst & REG_MASK;
1772                         dst = 0;
1773                 }
1774                 else {
1775                         /* This works regardless we have SLJIT_MEM1 or SLJIT_MEM0. */
1776                         FAIL_IF(load_immediate(compiler, TMP_REG1, dstw));
1777                         dstw = TMP_REG1;
1778                 }
1779         }
1780
1781         return push_inst(compiler, STFIWX | FS(TMP_FREG1) | A(dst & REG_MASK) | B(dstw));
1782 }
1783
1784 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op,
1785         sljit_s32 dst, sljit_sw dstw,
1786         sljit_s32 src, sljit_sw srcw)
1787 {
1788 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1789
1790         sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
1791
1792         if (src & SLJIT_IMM) {
1793                 if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32)
1794                         srcw = (sljit_s32)srcw;
1795                 FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
1796                 src = TMP_REG1;
1797         }
1798         else if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32) {
1799                 if (FAST_IS_REG(src))
1800                         FAIL_IF(push_inst(compiler, EXTSW | S(src) | A(TMP_REG1)));
1801                 else
1802                         FAIL_IF(emit_op_mem2(compiler, INT_DATA | SIGNED_DATA | LOAD_DATA, TMP_REG1, src, srcw, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET));
1803                 src = TMP_REG1;
1804         }
1805
1806         if (FAST_IS_REG(src)) {
1807                 FAIL_IF(emit_op_mem2(compiler, WORD_DATA, src, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET));
1808                 FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, dst, dstw));
1809         }
1810         else
1811                 FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, src, srcw, dst, dstw));
1812
1813         FAIL_IF(push_inst(compiler, FCFID | FD(dst_r) | FB(TMP_FREG1)));
1814
1815         if (dst & SLJIT_MEM)
1816                 return emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, 0, 0);
1817         if (op & SLJIT_F32_OP)
1818                 return push_inst(compiler, FRSP | FD(dst_r) | FB(dst_r));
1819         return SLJIT_SUCCESS;
1820
1821 #else
1822
1823         sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
1824         sljit_s32 invert_sign = 1;
1825
1826         if (src & SLJIT_IMM) {
1827                 FAIL_IF(load_immediate(compiler, TMP_REG1, srcw ^ 0x80000000));
1828                 src = TMP_REG1;
1829                 invert_sign = 0;
1830         }
1831         else if (!FAST_IS_REG(src)) {
1832                 FAIL_IF(emit_op_mem2(compiler, WORD_DATA | SIGNED_DATA | LOAD_DATA, TMP_REG1, src, srcw, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_LOW));
1833                 src = TMP_REG1;
1834         }
1835
1836         /* First, a special double floating point value is constructed: (2^53 + (input xor (2^31)))
1837            The double precision format has exactly 53 bit precision, so the lower 32 bit represents
1838            the lower 32 bit of such value. The result of xor 2^31 is the same as adding 0x80000000
1839            to the input, which shifts it into the 0 - 0xffffffff range. To get the converted floating
1840            point value, we need to substract 2^53 + 2^31 from the constructed value. */
1841         FAIL_IF(push_inst(compiler, ADDIS | D(TMP_REG2) | A(0) | 0x4330));
1842         if (invert_sign)
1843                 FAIL_IF(push_inst(compiler, XORIS | S(src) | A(TMP_REG1) | 0x8000));
1844         FAIL_IF(emit_op_mem2(compiler, WORD_DATA, TMP_REG2, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_HI, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET));
1845         FAIL_IF(emit_op_mem2(compiler, WORD_DATA, TMP_REG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_LOW, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_HI));
1846         FAIL_IF(push_inst(compiler, ADDIS | D(TMP_REG1) | A(0) | 0x8000));
1847         FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_LOW));
1848         FAIL_IF(emit_op_mem2(compiler, WORD_DATA, TMP_REG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_LOW, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET));
1849         FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG2, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_LOW));
1850
1851         FAIL_IF(push_inst(compiler, FSUB | FD(dst_r) | FA(TMP_FREG1) | FB(TMP_FREG2)));
1852
1853         if (dst & SLJIT_MEM)
1854                 return emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, 0, 0);
1855         if (op & SLJIT_F32_OP)
1856                 return push_inst(compiler, FRSP | FD(dst_r) | FB(dst_r));
1857         return SLJIT_SUCCESS;
1858
1859 #endif
1860 }
1861
1862 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op,
1863         sljit_s32 src1, sljit_sw src1w,
1864         sljit_s32 src2, sljit_sw src2w)
1865 {
1866         if (src1 & SLJIT_MEM) {
1867                 FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w));
1868                 src1 = TMP_FREG1;
1869         }
1870
1871         if (src2 & SLJIT_MEM) {
1872                 FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, 0, 0));
1873                 src2 = TMP_FREG2;
1874         }
1875
1876         return push_inst(compiler, FCMPU | CRD(4) | FA(src1) | FB(src2));
1877 }
1878
1879 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op,
1880         sljit_s32 dst, sljit_sw dstw,
1881         sljit_s32 src, sljit_sw srcw)
1882 {
1883         sljit_s32 dst_r;
1884
1885         CHECK_ERROR();
1886         compiler->cache_arg = 0;
1887         compiler->cache_argw = 0;
1888
1889         SLJIT_COMPILE_ASSERT((SLJIT_F32_OP == 0x100) && !(DOUBLE_DATA & 0x4), float_transfer_bit_error);
1890         SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
1891
1892         if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32)
1893                 op ^= SLJIT_F32_OP;
1894
1895         dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
1896
1897         if (src & SLJIT_MEM) {
1898                 FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, dst_r, src, srcw, dst, dstw));
1899                 src = dst_r;
1900         }
1901
1902         switch (GET_OPCODE(op)) {
1903         case SLJIT_CONV_F64_FROM_F32:
1904                 op ^= SLJIT_F32_OP;
1905                 if (op & SLJIT_F32_OP) {
1906                         FAIL_IF(push_inst(compiler, FRSP | FD(dst_r) | FB(src)));
1907                         break;
1908                 }
1909                 /* Fall through. */
1910         case SLJIT_MOV_F64:
1911                 if (src != dst_r) {
1912                         if (dst_r != TMP_FREG1)
1913                                 FAIL_IF(push_inst(compiler, FMR | FD(dst_r) | FB(src)));
1914                         else
1915                                 dst_r = src;
1916                 }
1917                 break;
1918         case SLJIT_NEG_F64:
1919                 FAIL_IF(push_inst(compiler, FNEG | FD(dst_r) | FB(src)));
1920                 break;
1921         case SLJIT_ABS_F64:
1922                 FAIL_IF(push_inst(compiler, FABS | FD(dst_r) | FB(src)));
1923                 break;
1924         }
1925
1926         if (dst & SLJIT_MEM)
1927                 FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), dst_r, dst, dstw, 0, 0));
1928         return SLJIT_SUCCESS;
1929 }
1930
1931 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op,
1932         sljit_s32 dst, sljit_sw dstw,
1933         sljit_s32 src1, sljit_sw src1w,
1934         sljit_s32 src2, sljit_sw src2w)
1935 {
1936         sljit_s32 dst_r, flags = 0;
1937
1938         CHECK_ERROR();
1939         CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
1940         ADJUST_LOCAL_OFFSET(dst, dstw);
1941         ADJUST_LOCAL_OFFSET(src1, src1w);
1942         ADJUST_LOCAL_OFFSET(src2, src2w);
1943
1944         compiler->cache_arg = 0;
1945         compiler->cache_argw = 0;
1946
1947         dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG2;
1948
1949         if (src1 & SLJIT_MEM) {
1950                 if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w)) {
1951                         FAIL_IF(compiler->error);
1952                         src1 = TMP_FREG1;
1953                 } else
1954                         flags |= ALT_FORM1;
1955         }
1956
1957         if (src2 & SLJIT_MEM) {
1958                 if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w)) {
1959                         FAIL_IF(compiler->error);
1960                         src2 = TMP_FREG2;
1961                 } else
1962                         flags |= ALT_FORM2;
1963         }
1964
1965         if ((flags & (ALT_FORM1 | ALT_FORM2)) == (ALT_FORM1 | ALT_FORM2)) {
1966                 if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) {
1967                         FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, src1, src1w));
1968                         FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, dst, dstw));
1969                 }
1970                 else {
1971                         FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w));
1972                         FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, dst, dstw));
1973                 }
1974         }
1975         else if (flags & ALT_FORM1)
1976                 FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, dst, dstw));
1977         else if (flags & ALT_FORM2)
1978                 FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, dst, dstw));
1979
1980         if (flags & ALT_FORM1)
1981                 src1 = TMP_FREG1;
1982         if (flags & ALT_FORM2)
1983                 src2 = TMP_FREG2;
1984
1985         switch (GET_OPCODE(op)) {
1986         case SLJIT_ADD_F64:
1987                 FAIL_IF(push_inst(compiler, SELECT_FOP(op, FADDS, FADD) | FD(dst_r) | FA(src1) | FB(src2)));
1988                 break;
1989
1990         case SLJIT_SUB_F64:
1991                 FAIL_IF(push_inst(compiler, SELECT_FOP(op, FSUBS, FSUB) | FD(dst_r) | FA(src1) | FB(src2)));
1992                 break;
1993
1994         case SLJIT_MUL_F64:
1995                 FAIL_IF(push_inst(compiler, SELECT_FOP(op, FMULS, FMUL) | FD(dst_r) | FA(src1) | FC(src2) /* FMUL use FC as src2 */));
1996                 break;
1997
1998         case SLJIT_DIV_F64:
1999                 FAIL_IF(push_inst(compiler, SELECT_FOP(op, FDIVS, FDIV) | FD(dst_r) | FA(src1) | FB(src2)));
2000                 break;
2001         }
2002
2003         if (dst_r == TMP_FREG2)
2004                 FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG2, dst, dstw, 0, 0));
2005
2006         return SLJIT_SUCCESS;
2007 }
2008
2009 #undef FLOAT_DATA
2010 #undef SELECT_FOP
2011
2012 /* --------------------------------------------------------------------- */
2013 /*  Other instructions                                                   */
2014 /* --------------------------------------------------------------------- */
2015
2016 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
2017 {
2018         CHECK_ERROR();
2019         CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw));
2020         ADJUST_LOCAL_OFFSET(dst, dstw);
2021
2022         /* For UNUSED dst. Uncommon, but possible. */
2023         if (dst == SLJIT_UNUSED)
2024                 return SLJIT_SUCCESS;
2025
2026         if (FAST_IS_REG(dst))
2027                 return push_inst(compiler, MFLR | D(dst));
2028
2029         /* Memory. */
2030         FAIL_IF(push_inst(compiler, MFLR | D(TMP_REG2)));
2031         return emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, TMP_REG2, 0);
2032 }
2033
2034 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw)
2035 {
2036         CHECK_ERROR();
2037         CHECK(check_sljit_emit_fast_return(compiler, src, srcw));
2038         ADJUST_LOCAL_OFFSET(src, srcw);
2039
2040         if (FAST_IS_REG(src))
2041                 FAIL_IF(push_inst(compiler, MTLR | S(src)));
2042         else {
2043                 if (src & SLJIT_MEM)
2044                         FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, TMP_REG2, 0, TMP_REG1, 0, src, srcw));
2045                 else if (src & SLJIT_IMM)
2046                         FAIL_IF(load_immediate(compiler, TMP_REG2, srcw));
2047                 FAIL_IF(push_inst(compiler, MTLR | S(TMP_REG2)));
2048         }
2049         return push_inst(compiler, BLR);
2050 }
2051
2052 /* --------------------------------------------------------------------- */
2053 /*  Conditional instructions                                             */
2054 /* --------------------------------------------------------------------- */
2055
2056 SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
2057 {
2058         struct sljit_label *label;
2059
2060         CHECK_ERROR_PTR();
2061         CHECK_PTR(check_sljit_emit_label(compiler));
2062
2063         if (compiler->last_label && compiler->last_label->size == compiler->size)
2064                 return compiler->last_label;
2065
2066         label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
2067         PTR_FAIL_IF(!label);
2068         set_label(label, compiler);
2069         return label;
2070 }
2071
2072 static sljit_ins get_bo_bi_flags(sljit_s32 type)
2073 {
2074         switch (type) {
2075         case SLJIT_EQUAL:
2076                 return (12 << 21) | (2 << 16);
2077
2078         case SLJIT_NOT_EQUAL:
2079                 return (4 << 21) | (2 << 16);
2080
2081         case SLJIT_LESS:
2082         case SLJIT_LESS_F64:
2083                 return (12 << 21) | ((4 + 0) << 16);
2084
2085         case SLJIT_GREATER_EQUAL:
2086         case SLJIT_GREATER_EQUAL_F64:
2087                 return (4 << 21) | ((4 + 0) << 16);
2088
2089         case SLJIT_GREATER:
2090         case SLJIT_GREATER_F64:
2091                 return (12 << 21) | ((4 + 1) << 16);
2092
2093         case SLJIT_LESS_EQUAL:
2094         case SLJIT_LESS_EQUAL_F64:
2095                 return (4 << 21) | ((4 + 1) << 16);
2096
2097         case SLJIT_SIG_LESS:
2098                 return (12 << 21) | (0 << 16);
2099
2100         case SLJIT_SIG_GREATER_EQUAL:
2101                 return (4 << 21) | (0 << 16);
2102
2103         case SLJIT_SIG_GREATER:
2104                 return (12 << 21) | (1 << 16);
2105
2106         case SLJIT_SIG_LESS_EQUAL:
2107                 return (4 << 21) | (1 << 16);
2108
2109         case SLJIT_OVERFLOW:
2110         case SLJIT_MUL_OVERFLOW:
2111                 return (12 << 21) | (3 << 16);
2112
2113         case SLJIT_NOT_OVERFLOW:
2114         case SLJIT_MUL_NOT_OVERFLOW:
2115                 return (4 << 21) | (3 << 16);
2116
2117         case SLJIT_EQUAL_F64:
2118                 return (12 << 21) | ((4 + 2) << 16);
2119
2120         case SLJIT_NOT_EQUAL_F64:
2121                 return (4 << 21) | ((4 + 2) << 16);
2122
2123         case SLJIT_UNORDERED_F64:
2124                 return (12 << 21) | ((4 + 3) << 16);
2125
2126         case SLJIT_ORDERED_F64:
2127                 return (4 << 21) | ((4 + 3) << 16);
2128
2129         default:
2130                 SLJIT_ASSERT(type >= SLJIT_JUMP && type <= SLJIT_CALL3);
2131                 return (20 << 21);
2132         }
2133 }
2134
2135 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type)
2136 {
2137         struct sljit_jump *jump;
2138         sljit_ins bo_bi_flags;
2139
2140         CHECK_ERROR_PTR();
2141         CHECK_PTR(check_sljit_emit_jump(compiler, type));
2142
2143         bo_bi_flags = get_bo_bi_flags(type & 0xff);
2144         if (!bo_bi_flags)
2145                 return NULL;
2146
2147         jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2148         PTR_FAIL_IF(!jump);
2149         set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
2150         type &= 0xff;
2151
2152         /* In PPC, we don't need to touch the arguments. */
2153         if (type < SLJIT_JUMP)
2154                 jump->flags |= IS_COND;
2155 #if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL)
2156         if (type >= SLJIT_CALL0)
2157                 jump->flags |= IS_CALL;
2158 #endif
2159
2160         PTR_FAIL_IF(emit_const(compiler, TMP_CALL_REG, 0));
2161         PTR_FAIL_IF(push_inst(compiler, MTCTR | S(TMP_CALL_REG)));
2162         jump->addr = compiler->size;
2163         PTR_FAIL_IF(push_inst(compiler, BCCTR | bo_bi_flags | (type >= SLJIT_FAST_CALL ? 1 : 0)));
2164         return jump;
2165 }
2166
2167 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw)
2168 {
2169         struct sljit_jump *jump = NULL;
2170         sljit_s32 src_r;
2171
2172         CHECK_ERROR();
2173         CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
2174         ADJUST_LOCAL_OFFSET(src, srcw);
2175
2176         if (FAST_IS_REG(src)) {
2177 #if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL)
2178                 if (type >= SLJIT_CALL0) {
2179                         FAIL_IF(push_inst(compiler, OR | S(src) | A(TMP_CALL_REG) | B(src)));
2180                         src_r = TMP_CALL_REG;
2181                 }
2182                 else
2183                         src_r = src;
2184 #else
2185                 src_r = src;
2186 #endif
2187         } else if (src & SLJIT_IMM) {
2188                 jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2189                 FAIL_IF(!jump);
2190                 set_jump(jump, compiler, JUMP_ADDR);
2191                 jump->u.target = srcw;
2192 #if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL)
2193                 if (type >= SLJIT_CALL0)
2194                         jump->flags |= IS_CALL;
2195 #endif
2196                 FAIL_IF(emit_const(compiler, TMP_CALL_REG, 0));
2197                 src_r = TMP_CALL_REG;
2198         }
2199         else {
2200                 FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, TMP_CALL_REG, 0, TMP_REG1, 0, src, srcw));
2201                 src_r = TMP_CALL_REG;
2202         }
2203
2204         FAIL_IF(push_inst(compiler, MTCTR | S(src_r)));
2205         if (jump)
2206                 jump->addr = compiler->size;
2207         return push_inst(compiler, BCCTR | (20 << 21) | (type >= SLJIT_FAST_CALL ? 1 : 0));
2208 }
2209
2210 /* Get a bit from CR, all other bits are zeroed. */
2211 #define GET_CR_BIT(bit, dst) \
2212         FAIL_IF(push_inst(compiler, MFCR | D(dst))); \
2213         FAIL_IF(push_inst(compiler, RLWINM | S(dst) | A(dst) | ((1 + (bit)) << 11) | (31 << 6) | (31 << 1)));
2214
2215 #define INVERT_BIT(dst) \
2216         FAIL_IF(push_inst(compiler, XORI | S(dst) | A(dst) | 0x1));
2217
2218 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
2219         sljit_s32 dst, sljit_sw dstw,
2220         sljit_s32 src, sljit_sw srcw,
2221         sljit_s32 type)
2222 {
2223         sljit_s32 reg, input_flags;
2224         sljit_s32 flags = GET_ALL_FLAGS(op);
2225         sljit_sw original_dstw = dstw;
2226
2227         CHECK_ERROR();
2228         CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type));
2229         ADJUST_LOCAL_OFFSET(dst, dstw);
2230
2231         if (dst == SLJIT_UNUSED)
2232                 return SLJIT_SUCCESS;
2233
2234         op = GET_OPCODE(op);
2235         reg = (op < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG2;
2236
2237         compiler->cache_arg = 0;
2238         compiler->cache_argw = 0;
2239         if (op >= SLJIT_ADD && (src & SLJIT_MEM)) {
2240                 ADJUST_LOCAL_OFFSET(src, srcw);
2241 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
2242                 input_flags = (flags & SLJIT_I32_OP) ? INT_DATA : WORD_DATA;
2243 #else
2244                 input_flags = WORD_DATA;
2245 #endif
2246                 FAIL_IF(emit_op_mem2(compiler, input_flags | LOAD_DATA, TMP_REG1, src, srcw, dst, dstw));
2247                 src = TMP_REG1;
2248                 srcw = 0;
2249         }
2250
2251         switch (type & 0xff) {
2252         case SLJIT_EQUAL:
2253                 GET_CR_BIT(2, reg);
2254                 break;
2255
2256         case SLJIT_NOT_EQUAL:
2257                 GET_CR_BIT(2, reg);
2258                 INVERT_BIT(reg);
2259                 break;
2260
2261         case SLJIT_LESS:
2262         case SLJIT_LESS_F64:
2263                 GET_CR_BIT(4 + 0, reg);
2264                 break;
2265
2266         case SLJIT_GREATER_EQUAL:
2267         case SLJIT_GREATER_EQUAL_F64:
2268                 GET_CR_BIT(4 + 0, reg);
2269                 INVERT_BIT(reg);
2270                 break;
2271
2272         case SLJIT_GREATER:
2273         case SLJIT_GREATER_F64:
2274                 GET_CR_BIT(4 + 1, reg);
2275                 break;
2276
2277         case SLJIT_LESS_EQUAL:
2278         case SLJIT_LESS_EQUAL_F64:
2279                 GET_CR_BIT(4 + 1, reg);
2280                 INVERT_BIT(reg);
2281                 break;
2282
2283         case SLJIT_SIG_LESS:
2284                 GET_CR_BIT(0, reg);
2285                 break;
2286
2287         case SLJIT_SIG_GREATER_EQUAL:
2288                 GET_CR_BIT(0, reg);
2289                 INVERT_BIT(reg);
2290                 break;
2291
2292         case SLJIT_SIG_GREATER:
2293                 GET_CR_BIT(1, reg);
2294                 break;
2295
2296         case SLJIT_SIG_LESS_EQUAL:
2297                 GET_CR_BIT(1, reg);
2298                 INVERT_BIT(reg);
2299                 break;
2300
2301         case SLJIT_OVERFLOW:
2302         case SLJIT_MUL_OVERFLOW:
2303                 GET_CR_BIT(3, reg);
2304                 break;
2305
2306         case SLJIT_NOT_OVERFLOW:
2307         case SLJIT_MUL_NOT_OVERFLOW:
2308                 GET_CR_BIT(3, reg);
2309                 INVERT_BIT(reg);
2310                 break;
2311
2312         case SLJIT_EQUAL_F64:
2313                 GET_CR_BIT(4 + 2, reg);
2314                 break;
2315
2316         case SLJIT_NOT_EQUAL_F64:
2317                 GET_CR_BIT(4 + 2, reg);
2318                 INVERT_BIT(reg);
2319                 break;
2320
2321         case SLJIT_UNORDERED_F64:
2322                 GET_CR_BIT(4 + 3, reg);
2323                 break;
2324
2325         case SLJIT_ORDERED_F64:
2326                 GET_CR_BIT(4 + 3, reg);
2327                 INVERT_BIT(reg);
2328                 break;
2329
2330         default:
2331                 SLJIT_ASSERT_STOP();
2332                 break;
2333         }
2334
2335         if (op < SLJIT_ADD) {
2336 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
2337                 if (op == SLJIT_MOV)
2338                         input_flags = WORD_DATA;
2339                 else {
2340                         op = SLJIT_MOV_U32;
2341                         input_flags = INT_DATA;
2342                 }
2343 #else
2344                 op = SLJIT_MOV;
2345                 input_flags = WORD_DATA;
2346 #endif
2347                 if (reg != TMP_REG2)
2348                         return SLJIT_SUCCESS;
2349                 return emit_op(compiler, op, input_flags, dst, dstw, TMP_REG1, 0, TMP_REG2, 0);
2350         }
2351
2352 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
2353                 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
2354         compiler->skip_checks = 1;
2355 #endif
2356         return sljit_emit_op2(compiler, op | flags, dst, original_dstw, src, srcw, TMP_REG2, 0);
2357 }
2358
2359 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
2360 {
2361         struct sljit_const *const_;
2362         sljit_s32 reg;
2363
2364         CHECK_ERROR_PTR();
2365         CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
2366         ADJUST_LOCAL_OFFSET(dst, dstw);
2367
2368         const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
2369         PTR_FAIL_IF(!const_);
2370         set_const(const_, compiler);
2371
2372         reg = SLOW_IS_REG(dst) ? dst : TMP_REG2;
2373
2374         PTR_FAIL_IF(emit_const(compiler, reg, init_value));
2375
2376         if (dst & SLJIT_MEM)
2377                 PTR_FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, TMP_REG2, 0));
2378         return const_;
2379 }