chiark / gitweb /
Commit upstream pcre-8.39.tar.bz2
[pcre3.git] / sljit / sljitNativeARM_64.c
1 /*
2  *    Stack-less Just-In-Time compiler
3  *
4  *    Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without modification, are
7  * permitted provided that the following conditions are met:
8  *
9  *   1. Redistributions of source code must retain the above copyright notice, this list of
10  *      conditions and the following disclaimer.
11  *
12  *   2. Redistributions in binary form must reproduce the above copyright notice, this list
13  *      of conditions and the following disclaimer in the documentation and/or other materials
14  *      provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19  * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26
27 SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)
28 {
29         return "ARM-64" SLJIT_CPUINFO;
30 }
31
32 /* Length of an instruction word */
33 typedef sljit_u32 sljit_ins;
34
35 #define TMP_ZERO        (0)
36
37 #define TMP_REG1        (SLJIT_NUMBER_OF_REGISTERS + 2)
38 #define TMP_REG2        (SLJIT_NUMBER_OF_REGISTERS + 3)
39 #define TMP_REG3        (SLJIT_NUMBER_OF_REGISTERS + 4)
40 #define TMP_LR          (SLJIT_NUMBER_OF_REGISTERS + 5)
41 #define TMP_SP          (SLJIT_NUMBER_OF_REGISTERS + 6)
42
43 #define TMP_FREG1       (0)
44 #define TMP_FREG2       (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)
45
46 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 8] = {
47   31, 0, 1, 2, 3, 4, 5, 6, 7, 12, 13, 14, 15, 16, 17, 8, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 29, 9, 10, 11, 30, 31
48 };
49
50 #define W_OP (1 << 31)
51 #define RD(rd) (reg_map[rd])
52 #define RT(rt) (reg_map[rt])
53 #define RN(rn) (reg_map[rn] << 5)
54 #define RT2(rt2) (reg_map[rt2] << 10)
55 #define RM(rm) (reg_map[rm] << 16)
56 #define VD(vd) (vd)
57 #define VT(vt) (vt)
58 #define VN(vn) ((vn) << 5)
59 #define VM(vm) ((vm) << 16)
60
61 /* --------------------------------------------------------------------- */
62 /*  Instrucion forms                                                     */
63 /* --------------------------------------------------------------------- */
64
65 #define ADC 0x9a000000
66 #define ADD 0x8b000000
67 #define ADDI 0x91000000
68 #define AND 0x8a000000
69 #define ANDI 0x92000000
70 #define ASRV 0x9ac02800
71 #define B 0x14000000
72 #define B_CC 0x54000000
73 #define BL 0x94000000
74 #define BLR 0xd63f0000
75 #define BR 0xd61f0000
76 #define BRK 0xd4200000
77 #define CBZ 0xb4000000
78 #define CLZ 0xdac01000
79 #define CSINC 0x9a800400
80 #define EOR 0xca000000
81 #define EORI 0xd2000000
82 #define FABS 0x1e60c000
83 #define FADD 0x1e602800
84 #define FCMP 0x1e602000
85 #define FCVT 0x1e224000
86 #define FCVTZS 0x9e780000
87 #define FDIV 0x1e601800
88 #define FMOV 0x1e604000
89 #define FMUL 0x1e600800
90 #define FNEG 0x1e614000
91 #define FSUB 0x1e603800
92 #define LDRI 0xf9400000
93 #define LDP 0xa9400000
94 #define LDP_PST 0xa8c00000
95 #define LSLV 0x9ac02000
96 #define LSRV 0x9ac02400
97 #define MADD 0x9b000000
98 #define MOVK 0xf2800000
99 #define MOVN 0x92800000
100 #define MOVZ 0xd2800000
101 #define NOP 0xd503201f
102 #define ORN 0xaa200000
103 #define ORR 0xaa000000
104 #define ORRI 0xb2000000
105 #define RET 0xd65f0000
106 #define SBC 0xda000000
107 #define SBFM 0x93000000
108 #define SCVTF 0x9e620000
109 #define SDIV 0x9ac00c00
110 #define SMADDL 0x9b200000
111 #define SMULH 0x9b403c00
112 #define STP 0xa9000000
113 #define STP_PRE 0xa9800000
114 #define STRI 0xf9000000
115 #define STR_FI 0x3d000000
116 #define STR_FR 0x3c206800
117 #define STUR_FI 0x3c000000
118 #define SUB 0xcb000000
119 #define SUBI 0xd1000000
120 #define SUBS 0xeb000000
121 #define UBFM 0xd3000000
122 #define UDIV 0x9ac00800
123 #define UMULH 0x9bc03c00
124
125 /* dest_reg is the absolute name of the register
126    Useful for reordering instructions in the delay slot. */
127 static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins)
128 {
129         sljit_ins *ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins));
130         FAIL_IF(!ptr);
131         *ptr = ins;
132         compiler->size++;
133         return SLJIT_SUCCESS;
134 }
135
136 static SLJIT_INLINE sljit_s32 emit_imm64_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_uw imm)
137 {
138         FAIL_IF(push_inst(compiler, MOVZ | RD(dst) | ((imm & 0xffff) << 5)));
139         FAIL_IF(push_inst(compiler, MOVK | RD(dst) | (((imm >> 16) & 0xffff) << 5) | (1 << 21)));
140         FAIL_IF(push_inst(compiler, MOVK | RD(dst) | (((imm >> 32) & 0xffff) << 5) | (2 << 21)));
141         return push_inst(compiler, MOVK | RD(dst) | ((imm >> 48) << 5) | (3 << 21));
142 }
143
144 static SLJIT_INLINE void modify_imm64_const(sljit_ins* inst, sljit_uw new_imm)
145 {
146         sljit_s32 dst = inst[0] & 0x1f;
147         SLJIT_ASSERT((inst[0] & 0xffe00000) == MOVZ && (inst[1] & 0xffe00000) == (MOVK | (1 << 21)));
148         inst[0] = MOVZ | dst | ((new_imm & 0xffff) << 5);
149         inst[1] = MOVK | dst | (((new_imm >> 16) & 0xffff) << 5) | (1 << 21);
150         inst[2] = MOVK | dst | (((new_imm >> 32) & 0xffff) << 5) | (2 << 21);
151         inst[3] = MOVK | dst | ((new_imm >> 48) << 5) | (3 << 21);
152 }
153
154 static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code)
155 {
156         sljit_sw diff;
157         sljit_uw target_addr;
158
159         if (jump->flags & SLJIT_REWRITABLE_JUMP) {
160                 jump->flags |= PATCH_ABS64;
161                 return 0;
162         }
163
164         if (jump->flags & JUMP_ADDR)
165                 target_addr = jump->u.target;
166         else {
167                 SLJIT_ASSERT(jump->flags & JUMP_LABEL);
168                 target_addr = (sljit_uw)(code + jump->u.label->size);
169         }
170         diff = (sljit_sw)target_addr - (sljit_sw)(code_ptr + 4);
171
172         if (jump->flags & IS_COND) {
173                 diff += sizeof(sljit_ins);
174                 if (diff <= 0xfffff && diff >= -0x100000) {
175                         code_ptr[-5] ^= (jump->flags & IS_CBZ) ? (0x1 << 24) : 0x1;
176                         jump->addr -= sizeof(sljit_ins);
177                         jump->flags |= PATCH_COND;
178                         return 5;
179                 }
180                 diff -= sizeof(sljit_ins);
181         }
182
183         if (diff <= 0x7ffffff && diff >= -0x8000000) {
184                 jump->flags |= PATCH_B;
185                 return 4;
186         }
187
188         if (target_addr <= 0xffffffffl) {
189                 if (jump->flags & IS_COND)
190                         code_ptr[-5] -= (2 << 5);
191                 code_ptr[-2] = code_ptr[0];
192                 return 2;
193         }
194         if (target_addr <= 0xffffffffffffl) {
195                 if (jump->flags & IS_COND)
196                         code_ptr[-5] -= (1 << 5);
197                 jump->flags |= PATCH_ABS48;
198                 code_ptr[-1] = code_ptr[0];
199                 return 1;
200         }
201
202         jump->flags |= PATCH_ABS64;
203         return 0;
204 }
205
206 SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
207 {
208         struct sljit_memory_fragment *buf;
209         sljit_ins *code;
210         sljit_ins *code_ptr;
211         sljit_ins *buf_ptr;
212         sljit_ins *buf_end;
213         sljit_uw word_count;
214         sljit_uw addr;
215         sljit_s32 dst;
216
217         struct sljit_label *label;
218         struct sljit_jump *jump;
219         struct sljit_const *const_;
220
221         CHECK_ERROR_PTR();
222         CHECK_PTR(check_sljit_generate_code(compiler));
223         reverse_buf(compiler);
224
225         code = (sljit_ins*)SLJIT_MALLOC_EXEC(compiler->size * sizeof(sljit_ins));
226         PTR_FAIL_WITH_EXEC_IF(code);
227         buf = compiler->buf;
228
229         code_ptr = code;
230         word_count = 0;
231         label = compiler->labels;
232         jump = compiler->jumps;
233         const_ = compiler->consts;
234
235         do {
236                 buf_ptr = (sljit_ins*)buf->memory;
237                 buf_end = buf_ptr + (buf->used_size >> 2);
238                 do {
239                         *code_ptr = *buf_ptr++;
240                         /* These structures are ordered by their address. */
241                         SLJIT_ASSERT(!label || label->size >= word_count);
242                         SLJIT_ASSERT(!jump || jump->addr >= word_count);
243                         SLJIT_ASSERT(!const_ || const_->addr >= word_count);
244                         if (label && label->size == word_count) {
245                                 label->addr = (sljit_uw)code_ptr;
246                                 label->size = code_ptr - code;
247                                 label = label->next;
248                         }
249                         if (jump && jump->addr == word_count) {
250                                         jump->addr = (sljit_uw)(code_ptr - 4);
251                                         code_ptr -= detect_jump_type(jump, code_ptr, code);
252                                         jump = jump->next;
253                         }
254                         if (const_ && const_->addr == word_count) {
255                                 const_->addr = (sljit_uw)code_ptr;
256                                 const_ = const_->next;
257                         }
258                         code_ptr ++;
259                         word_count ++;
260                 } while (buf_ptr < buf_end);
261
262                 buf = buf->next;
263         } while (buf);
264
265         if (label && label->size == word_count) {
266                 label->addr = (sljit_uw)code_ptr;
267                 label->size = code_ptr - code;
268                 label = label->next;
269         }
270
271         SLJIT_ASSERT(!label);
272         SLJIT_ASSERT(!jump);
273         SLJIT_ASSERT(!const_);
274         SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size);
275
276         jump = compiler->jumps;
277         while (jump) {
278                 do {
279                         addr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target;
280                         buf_ptr = (sljit_ins*)jump->addr;
281                         if (jump->flags & PATCH_B) {
282                                 addr = (sljit_sw)(addr - jump->addr) >> 2;
283                                 SLJIT_ASSERT((sljit_sw)addr <= 0x1ffffff && (sljit_sw)addr >= -0x2000000);
284                                 buf_ptr[0] = ((jump->flags & IS_BL) ? BL : B) | (addr & 0x3ffffff);
285                                 if (jump->flags & IS_COND)
286                                         buf_ptr[-1] -= (4 << 5);
287                                 break;
288                         }
289                         if (jump->flags & PATCH_COND) {
290                                 addr = (sljit_sw)(addr - jump->addr) >> 2;
291                                 SLJIT_ASSERT((sljit_sw)addr <= 0x3ffff && (sljit_sw)addr >= -0x40000);
292                                 buf_ptr[0] = (buf_ptr[0] & ~0xffffe0) | ((addr & 0x7ffff) << 5);
293                                 break;
294                         }
295
296                         SLJIT_ASSERT((jump->flags & (PATCH_ABS48 | PATCH_ABS64)) || addr <= 0xffffffffl);
297                         SLJIT_ASSERT((jump->flags & PATCH_ABS64) || addr <= 0xffffffffffffl);
298
299                         dst = buf_ptr[0] & 0x1f;
300                         buf_ptr[0] = MOVZ | dst | ((addr & 0xffff) << 5);
301                         buf_ptr[1] = MOVK | dst | (((addr >> 16) & 0xffff) << 5) | (1 << 21);
302                         if (jump->flags & (PATCH_ABS48 | PATCH_ABS64))
303                                 buf_ptr[2] = MOVK | dst | (((addr >> 32) & 0xffff) << 5) | (2 << 21);
304                         if (jump->flags & PATCH_ABS64)
305                                 buf_ptr[3] = MOVK | dst | (((addr >> 48) & 0xffff) << 5) | (3 << 21);
306                 } while (0);
307                 jump = jump->next;
308         }
309
310         compiler->error = SLJIT_ERR_COMPILED;
311         compiler->executable_size = (code_ptr - code) * sizeof(sljit_ins);
312         SLJIT_CACHE_FLUSH(code, code_ptr);
313         return code;
314 }
315
316 /* --------------------------------------------------------------------- */
317 /*  Core code generator functions.                                       */
318 /* --------------------------------------------------------------------- */
319
320 #define COUNT_TRAILING_ZERO(value, result) \
321         result = 0; \
322         if (!(value & 0xffffffff)) { \
323                 result += 32; \
324                 value >>= 32; \
325         } \
326         if (!(value & 0xffff)) { \
327                 result += 16; \
328                 value >>= 16; \
329         } \
330         if (!(value & 0xff)) { \
331                 result += 8; \
332                 value >>= 8; \
333         } \
334         if (!(value & 0xf)) { \
335                 result += 4; \
336                 value >>= 4; \
337         } \
338         if (!(value & 0x3)) { \
339                 result += 2; \
340                 value >>= 2; \
341         } \
342         if (!(value & 0x1)) { \
343                 result += 1; \
344                 value >>= 1; \
345         }
346
347 #define LOGICAL_IMM_CHECK 0x100
348
349 static sljit_ins logical_imm(sljit_sw imm, sljit_s32 len)
350 {
351         sljit_s32 negated, ones, right;
352         sljit_uw mask, uimm;
353         sljit_ins ins;
354
355         if (len & LOGICAL_IMM_CHECK) {
356                 len &= ~LOGICAL_IMM_CHECK;
357                 if (len == 32 && (imm == 0 || imm == -1))
358                         return 0;
359                 if (len == 16 && ((sljit_s32)imm == 0 || (sljit_s32)imm == -1))
360                         return 0;
361         }
362
363         SLJIT_ASSERT((len == 32 && imm != 0 && imm != -1)
364                 || (len == 16 && (sljit_s32)imm != 0 && (sljit_s32)imm != -1));
365         uimm = (sljit_uw)imm;
366         while (1) {
367                 if (len <= 0) {
368                         SLJIT_ASSERT_STOP();
369                         return 0;
370                 }
371                 mask = ((sljit_uw)1 << len) - 1;
372                 if ((uimm & mask) != ((uimm >> len) & mask))
373                         break;
374                 len >>= 1;
375         }
376
377         len <<= 1;
378
379         negated = 0;
380         if (uimm & 0x1) {
381                 negated = 1;
382                 uimm = ~uimm;
383         }
384
385         if (len < 64)
386                 uimm &= ((sljit_uw)1 << len) - 1;
387
388         /* Unsigned right shift. */
389         COUNT_TRAILING_ZERO(uimm, right);
390
391         /* Signed shift. We also know that the highest bit is set. */
392         imm = (sljit_sw)~uimm;
393         SLJIT_ASSERT(imm < 0);
394
395         COUNT_TRAILING_ZERO(imm, ones);
396
397         if (~imm)
398                 return 0;
399
400         if (len == 64)
401                 ins = 1 << 22;
402         else
403                 ins = (0x3f - ((len << 1) - 1)) << 10;
404
405         if (negated)
406                 return ins | ((len - ones - 1) << 10) | ((len - ones - right) << 16);
407
408         return ins | ((ones - 1) << 10) | ((len - right) << 16);
409 }
410
411 #undef COUNT_TRAILING_ZERO
412
413 static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw simm)
414 {
415         sljit_uw imm = (sljit_uw)simm;
416         sljit_s32 i, zeros, ones, first;
417         sljit_ins bitmask;
418
419         if (imm <= 0xffff)
420                 return push_inst(compiler, MOVZ | RD(dst) | (imm << 5));
421
422         if (simm >= -0x10000 && simm < 0)
423                 return push_inst(compiler, MOVN | RD(dst) | ((~imm & 0xffff) << 5));
424
425         if (imm <= 0xffffffffl) {
426                 if ((imm & 0xffff0000l) == 0xffff0000)
427                         return push_inst(compiler, (MOVN ^ W_OP) | RD(dst) | ((~imm & 0xffff) << 5));
428                 if ((imm & 0xffff) == 0xffff)
429                         return push_inst(compiler, (MOVN ^ W_OP) | RD(dst) | ((~imm & 0xffff0000l) >> (16 - 5)) | (1 << 21));
430                 bitmask = logical_imm(simm, 16);
431                 if (bitmask != 0)
432                         return push_inst(compiler, (ORRI ^ W_OP) | RD(dst) | RN(TMP_ZERO) | bitmask);
433         }
434         else {
435                 bitmask = logical_imm(simm, 32);
436                 if (bitmask != 0)
437                         return push_inst(compiler, ORRI | RD(dst) | RN(TMP_ZERO) | bitmask);
438         }
439
440         if (imm <= 0xffffffffl) {
441                 FAIL_IF(push_inst(compiler, MOVZ | RD(dst) | ((imm & 0xffff) << 5)));
442                 return push_inst(compiler, MOVK | RD(dst) | ((imm & 0xffff0000l) >> (16 - 5)) | (1 << 21));
443         }
444
445         if (simm >= -0x100000000l && simm < 0) {
446                 FAIL_IF(push_inst(compiler, MOVN | RD(dst) | ((~imm & 0xffff) << 5)));
447                 return push_inst(compiler, MOVK | RD(dst) | ((imm & 0xffff0000l) >> (16 - 5)) | (1 << 21));
448         }
449
450         /* A large amount of number can be constructed from ORR and MOVx,
451         but computing them is costly. We don't  */
452
453         zeros = 0;
454         ones = 0;
455         for (i = 4; i > 0; i--) {
456                 if ((simm & 0xffff) == 0)
457                         zeros++;
458                 if ((simm & 0xffff) == 0xffff)
459                         ones++;
460                 simm >>= 16;
461         }
462
463         simm = (sljit_sw)imm;
464         first = 1;
465         if (ones > zeros) {
466                 simm = ~simm;
467                 for (i = 0; i < 4; i++) {
468                         if (!(simm & 0xffff)) {
469                                 simm >>= 16;
470                                 continue;
471                         }
472                         if (first) {
473                                 first = 0;
474                                 FAIL_IF(push_inst(compiler, MOVN | RD(dst) | ((simm & 0xffff) << 5) | (i << 21)));
475                         }
476                         else
477                                 FAIL_IF(push_inst(compiler, MOVK | RD(dst) | ((~simm & 0xffff) << 5) | (i << 21)));
478                         simm >>= 16;
479                 }
480                 return SLJIT_SUCCESS;
481         }
482
483         for (i = 0; i < 4; i++) {
484                 if (!(simm & 0xffff)) {
485                         simm >>= 16;
486                         continue;
487                 }
488                 if (first) {
489                         first = 0;
490                         FAIL_IF(push_inst(compiler, MOVZ | RD(dst) | ((simm & 0xffff) << 5) | (i << 21)));
491                 }
492                 else
493                         FAIL_IF(push_inst(compiler, MOVK | RD(dst) | ((simm & 0xffff) << 5) | (i << 21)));
494                 simm >>= 16;
495         }
496         return SLJIT_SUCCESS;
497 }
498
499 #define ARG1_IMM        0x0010000
500 #define ARG2_IMM        0x0020000
501 #define INT_OP          0x0040000
502 #define SET_FLAGS       0x0080000
503 #define UNUSED_RETURN   0x0100000
504 #define SLOW_DEST       0x0200000
505 #define SLOW_SRC1       0x0400000
506 #define SLOW_SRC2       0x0800000
507
508 #define CHECK_FLAGS(flag_bits) \
509         if (flags & SET_FLAGS) { \
510                 inv_bits |= flag_bits; \
511                 if (flags & UNUSED_RETURN) \
512                         dst = TMP_ZERO; \
513         }
514
515 static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 dst, sljit_sw arg1, sljit_sw arg2)
516 {
517         /* dst must be register, TMP_REG1
518            arg1 must be register, TMP_REG1, imm
519            arg2 must be register, TMP_REG2, imm */
520         sljit_ins inv_bits = (flags & INT_OP) ? (1 << 31) : 0;
521         sljit_ins inst_bits;
522         sljit_s32 op = (flags & 0xffff);
523         sljit_s32 reg;
524         sljit_sw imm, nimm;
525
526         if (SLJIT_UNLIKELY((flags & (ARG1_IMM | ARG2_IMM)) == (ARG1_IMM | ARG2_IMM))) {
527                 /* Both are immediates. */
528                 flags &= ~ARG1_IMM;
529                 if (arg1 == 0 && op != SLJIT_ADD && op != SLJIT_SUB)
530                         arg1 = TMP_ZERO;
531                 else {
532                         FAIL_IF(load_immediate(compiler, TMP_REG1, arg1));
533                         arg1 = TMP_REG1;
534                 }
535         }
536
537         if (flags & (ARG1_IMM | ARG2_IMM)) {
538                 reg = (flags & ARG2_IMM) ? arg1 : arg2;
539                 imm = (flags & ARG2_IMM) ? arg2 : arg1;
540
541                 switch (op) {
542                 case SLJIT_MUL:
543                 case SLJIT_NEG:
544                 case SLJIT_CLZ:
545                 case SLJIT_ADDC:
546                 case SLJIT_SUBC:
547                         /* No form with immediate operand (except imm 0, which
548                         is represented by a ZERO register). */
549                         break;
550                 case SLJIT_MOV:
551                         SLJIT_ASSERT(!(flags & SET_FLAGS) && (flags & ARG2_IMM) && arg1 == TMP_REG1);
552                         return load_immediate(compiler, dst, imm);
553                 case SLJIT_NOT:
554                         SLJIT_ASSERT(flags & ARG2_IMM);
555                         FAIL_IF(load_immediate(compiler, dst, (flags & INT_OP) ? (~imm & 0xffffffff) : ~imm));
556                         goto set_flags;
557                 case SLJIT_SUB:
558                         if (flags & ARG1_IMM)
559                                 break;
560                         imm = -imm;
561                         /* Fall through. */
562                 case SLJIT_ADD:
563                         if (imm == 0) {
564                                 CHECK_FLAGS(1 << 29);
565                                 return push_inst(compiler, ((op == SLJIT_ADD ? ADDI : SUBI) ^ inv_bits) | RD(dst) | RN(reg));
566                         }
567                         if (imm > 0 && imm <= 0xfff) {
568                                 CHECK_FLAGS(1 << 29);
569                                 return push_inst(compiler, (ADDI ^ inv_bits) | RD(dst) | RN(reg) | (imm << 10));
570                         }
571                         nimm = -imm;
572                         if (nimm > 0 && nimm <= 0xfff) {
573                                 CHECK_FLAGS(1 << 29);
574                                 return push_inst(compiler, (SUBI ^ inv_bits) | RD(dst) | RN(reg) | (nimm << 10));
575                         }
576                         if (imm > 0 && imm <= 0xffffff && !(imm & 0xfff)) {
577                                 CHECK_FLAGS(1 << 29);
578                                 return push_inst(compiler, (ADDI ^ inv_bits) | RD(dst) | RN(reg) | ((imm >> 12) << 10) | (1 << 22));
579                         }
580                         if (nimm > 0 && nimm <= 0xffffff && !(nimm & 0xfff)) {
581                                 CHECK_FLAGS(1 << 29);
582                                 return push_inst(compiler, (SUBI ^ inv_bits) | RD(dst) | RN(reg) | ((nimm >> 12) << 10) | (1 << 22));
583                         }
584                         if (imm > 0 && imm <= 0xffffff && !(flags & SET_FLAGS)) {
585                                 FAIL_IF(push_inst(compiler, (ADDI ^ inv_bits) | RD(dst) | RN(reg) | ((imm >> 12) << 10) | (1 << 22)));
586                                 return push_inst(compiler, (ADDI ^ inv_bits) | RD(dst) | RN(dst) | ((imm & 0xfff) << 10));
587                         }
588                         if (nimm > 0 && nimm <= 0xffffff && !(flags & SET_FLAGS)) {
589                                 FAIL_IF(push_inst(compiler, (SUBI ^ inv_bits) | RD(dst) | RN(reg) | ((nimm >> 12) << 10) | (1 << 22)));
590                                 return push_inst(compiler, (SUBI ^ inv_bits) | RD(dst) | RN(dst) | ((nimm & 0xfff) << 10));
591                         }
592                         break;
593                 case SLJIT_AND:
594                         inst_bits = logical_imm(imm, LOGICAL_IMM_CHECK | ((flags & INT_OP) ? 16 : 32));
595                         if (!inst_bits)
596                                 break;
597                         CHECK_FLAGS(3 << 29);
598                         return push_inst(compiler, (ANDI ^ inv_bits) | RD(dst) | RN(reg) | inst_bits);
599                 case SLJIT_OR:
600                 case SLJIT_XOR:
601                         inst_bits = logical_imm(imm, LOGICAL_IMM_CHECK | ((flags & INT_OP) ? 16 : 32));
602                         if (!inst_bits)
603                                 break;
604                         if (op == SLJIT_OR)
605                                 inst_bits |= ORRI;
606                         else
607                                 inst_bits |= EORI;
608                         FAIL_IF(push_inst(compiler, (inst_bits ^ inv_bits) | RD(dst) | RN(reg)));
609                         goto set_flags;
610                 case SLJIT_SHL:
611                         if (flags & ARG1_IMM)
612                                 break;
613                         if (flags & INT_OP) {
614                                 imm &= 0x1f;
615                                 FAIL_IF(push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg1) | ((-imm & 0x1f) << 16) | ((31 - imm) << 10)));
616                         }
617                         else {
618                                 imm &= 0x3f;
619                                 FAIL_IF(push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg1) | (1 << 22) | ((-imm & 0x3f) << 16) | ((63 - imm) << 10)));
620                         }
621                         goto set_flags;
622                 case SLJIT_LSHR:
623                 case SLJIT_ASHR:
624                         if (flags & ARG1_IMM)
625                                 break;
626                         if (op == SLJIT_ASHR)
627                                 inv_bits |= 1 << 30;
628                         if (flags & INT_OP) {
629                                 imm &= 0x1f;
630                                 FAIL_IF(push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg1) | (imm << 16) | (31 << 10)));
631                         }
632                         else {
633                                 imm &= 0x3f;
634                                 FAIL_IF(push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg1) | (1 << 22) | (imm << 16) | (63 << 10)));
635                         }
636                         goto set_flags;
637                 default:
638                         SLJIT_ASSERT_STOP();
639                         break;
640                 }
641
642                 if (flags & ARG2_IMM) {
643                         if (arg2 == 0)
644                                 arg2 = TMP_ZERO;
645                         else {
646                                 FAIL_IF(load_immediate(compiler, TMP_REG2, arg2));
647                                 arg2 = TMP_REG2;
648                         }
649                 }
650                 else {
651                         if (arg1 == 0)
652                                 arg1 = TMP_ZERO;
653                         else {
654                                 FAIL_IF(load_immediate(compiler, TMP_REG1, arg1));
655                                 arg1 = TMP_REG1;
656                         }
657                 }
658         }
659
660         /* Both arguments are registers. */
661         switch (op) {
662         case SLJIT_MOV:
663         case SLJIT_MOV_P:
664         case SLJIT_MOVU:
665         case SLJIT_MOVU_P:
666                 SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
667                 if (dst == arg2)
668                         return SLJIT_SUCCESS;
669                 return push_inst(compiler, ORR | RD(dst) | RN(TMP_ZERO) | RM(arg2));
670         case SLJIT_MOV_U8:
671         case SLJIT_MOVU_U8:
672                 SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
673                 return push_inst(compiler, (UBFM ^ (1 << 31)) | RD(dst) | RN(arg2) | (7 << 10));
674         case SLJIT_MOV_S8:
675         case SLJIT_MOVU_S8:
676                 SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
677                 if (!(flags & INT_OP))
678                         inv_bits |= 1 << 22;
679                 return push_inst(compiler, (SBFM ^ inv_bits) | RD(dst) | RN(arg2) | (7 << 10));
680         case SLJIT_MOV_U16:
681         case SLJIT_MOVU_U16:
682                 SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
683                 return push_inst(compiler, (UBFM ^ (1 << 31)) | RD(dst) | RN(arg2) | (15 << 10));
684         case SLJIT_MOV_S16:
685         case SLJIT_MOVU_S16:
686                 SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
687                 if (!(flags & INT_OP))
688                         inv_bits |= 1 << 22;
689                 return push_inst(compiler, (SBFM ^ inv_bits) | RD(dst) | RN(arg2) | (15 << 10));
690         case SLJIT_MOV_U32:
691         case SLJIT_MOVU_U32:
692                 SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
693                 if ((flags & INT_OP) && dst == arg2)
694                         return SLJIT_SUCCESS;
695                 return push_inst(compiler, (ORR ^ (1 << 31)) | RD(dst) | RN(TMP_ZERO) | RM(arg2));
696         case SLJIT_MOV_S32:
697         case SLJIT_MOVU_S32:
698                 SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
699                 if ((flags & INT_OP) && dst == arg2)
700                         return SLJIT_SUCCESS;
701                 return push_inst(compiler, SBFM | (1 << 22) | RD(dst) | RN(arg2) | (31 << 10));
702         case SLJIT_NOT:
703                 SLJIT_ASSERT(arg1 == TMP_REG1);
704                 FAIL_IF(push_inst(compiler, (ORN ^ inv_bits) | RD(dst) | RN(TMP_ZERO) | RM(arg2)));
705                 goto set_flags;
706         case SLJIT_NEG:
707                 SLJIT_ASSERT(arg1 == TMP_REG1);
708                 if (flags & SET_FLAGS)
709                         inv_bits |= 1 << 29;
710                 return push_inst(compiler, (SUB ^ inv_bits) | RD(dst) | RN(TMP_ZERO) | RM(arg2));
711         case SLJIT_CLZ:
712                 SLJIT_ASSERT(arg1 == TMP_REG1);
713                 FAIL_IF(push_inst(compiler, (CLZ ^ inv_bits) | RD(dst) | RN(arg2)));
714                 goto set_flags;
715         case SLJIT_ADD:
716                 CHECK_FLAGS(1 << 29);
717                 return push_inst(compiler, (ADD ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2));
718         case SLJIT_ADDC:
719                 CHECK_FLAGS(1 << 29);
720                 return push_inst(compiler, (ADC ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2));
721         case SLJIT_SUB:
722                 CHECK_FLAGS(1 << 29);
723                 return push_inst(compiler, (SUB ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2));
724         case SLJIT_SUBC:
725                 CHECK_FLAGS(1 << 29);
726                 return push_inst(compiler, (SBC ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2));
727         case SLJIT_MUL:
728                 if (!(flags & SET_FLAGS))
729                         return push_inst(compiler, (MADD ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2) | RT2(TMP_ZERO));
730                 if (flags & INT_OP) {
731                         FAIL_IF(push_inst(compiler, SMADDL | RD(dst) | RN(arg1) | RM(arg2) | (31 << 10)));
732                         FAIL_IF(push_inst(compiler, ADD | RD(TMP_LR) | RN(TMP_ZERO) | RM(dst) | (2 << 22) | (31 << 10)));
733                         return push_inst(compiler, SUBS | RD(TMP_ZERO) | RN(TMP_LR) | RM(dst) | (2 << 22) | (63 << 10));
734                 }
735                 FAIL_IF(push_inst(compiler, SMULH | RD(TMP_LR) | RN(arg1) | RM(arg2)));
736                 FAIL_IF(push_inst(compiler, MADD | RD(dst) | RN(arg1) | RM(arg2) | RT2(TMP_ZERO)));
737                 return push_inst(compiler, SUBS | RD(TMP_ZERO) | RN(TMP_LR) | RM(dst) | (2 << 22) | (63 << 10));
738         case SLJIT_AND:
739                 CHECK_FLAGS(3 << 29);
740                 return push_inst(compiler, (AND ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2));
741         case SLJIT_OR:
742                 FAIL_IF(push_inst(compiler, (ORR ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)));
743                 goto set_flags;
744         case SLJIT_XOR:
745                 FAIL_IF(push_inst(compiler, (EOR ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)));
746                 goto set_flags;
747         case SLJIT_SHL:
748                 FAIL_IF(push_inst(compiler, (LSLV ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)));
749                 goto set_flags;
750         case SLJIT_LSHR:
751                 FAIL_IF(push_inst(compiler, (LSRV ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)));
752                 goto set_flags;
753         case SLJIT_ASHR:
754                 FAIL_IF(push_inst(compiler, (ASRV ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)));
755                 goto set_flags;
756         }
757
758         SLJIT_ASSERT_STOP();
759         return SLJIT_SUCCESS;
760
761 set_flags:
762         if (flags & SET_FLAGS)
763                 return push_inst(compiler, (SUBS ^ inv_bits) | RD(TMP_ZERO) | RN(dst) | RM(TMP_ZERO));
764         return SLJIT_SUCCESS;
765 }
766
767 #define STORE           0x01
768 #define SIGNED          0x02
769
770 #define UPDATE          0x04
771 #define ARG_TEST        0x08
772
773 #define BYTE_SIZE       0x000
774 #define HALF_SIZE       0x100
775 #define INT_SIZE        0x200
776 #define WORD_SIZE       0x300
777
778 #define MEM_SIZE_SHIFT(flags) ((flags) >> 8)
779
780 static const sljit_ins sljit_mem_imm[4] = {
781 /* u l */ 0x39400000 /* ldrb [reg,imm] */,
782 /* u s */ 0x39000000 /* strb [reg,imm] */,
783 /* s l */ 0x39800000 /* ldrsb [reg,imm] */,
784 /* s s */ 0x39000000 /* strb [reg,imm] */,
785 };
786
787 static const sljit_ins sljit_mem_simm[4] = {
788 /* u l */ 0x38400000 /* ldurb [reg,imm] */,
789 /* u s */ 0x38000000 /* sturb [reg,imm] */,
790 /* s l */ 0x38800000 /* ldursb [reg,imm] */,
791 /* s s */ 0x38000000 /* sturb [reg,imm] */,
792 };
793
794 static const sljit_ins sljit_mem_pre_simm[4] = {
795 /* u l */ 0x38400c00 /* ldrb [reg,imm]! */,
796 /* u s */ 0x38000c00 /* strb [reg,imm]! */,
797 /* s l */ 0x38800c00 /* ldrsb [reg,imm]! */,
798 /* s s */ 0x38000c00 /* strb [reg,imm]! */,
799 };
800
801 static const sljit_ins sljit_mem_reg[4] = {
802 /* u l */ 0x38606800 /* ldrb [reg,reg] */,
803 /* u s */ 0x38206800 /* strb [reg,reg] */,
804 /* s l */ 0x38a06800 /* ldrsb [reg,reg] */,
805 /* s s */ 0x38206800 /* strb [reg,reg] */,
806 };
807
808 /* Helper function. Dst should be reg + value, using at most 1 instruction, flags does not set. */
809 static sljit_s32 emit_set_delta(struct sljit_compiler *compiler, sljit_s32 dst, sljit_s32 reg, sljit_sw value)
810 {
811         if (value >= 0) {
812                 if (value <= 0xfff)
813                         return push_inst(compiler, ADDI | RD(dst) | RN(reg) | (value << 10));
814                 if (value <= 0xffffff && !(value & 0xfff))
815                         return push_inst(compiler, ADDI | (1 << 22) | RD(dst) | RN(reg) | (value >> 2));
816         }
817         else {
818                 value = -value;
819                 if (value <= 0xfff)
820                         return push_inst(compiler, SUBI | RD(dst) | RN(reg) | (value << 10));
821                 if (value <= 0xffffff && !(value & 0xfff))
822                         return push_inst(compiler, SUBI | (1 << 22) | RD(dst) | RN(reg) | (value >> 2));
823         }
824         return SLJIT_ERR_UNSUPPORTED;
825 }
826
827 /* Can perform an operation using at most 1 instruction. */
828 static sljit_s32 getput_arg_fast(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw)
829 {
830         sljit_u32 shift = MEM_SIZE_SHIFT(flags);
831
832         SLJIT_ASSERT(arg & SLJIT_MEM);
833
834         if (SLJIT_UNLIKELY(flags & UPDATE)) {
835                 if ((arg & REG_MASK) && !(arg & OFFS_REG_MASK) && argw <= 255 && argw >= -256) {
836                         if (SLJIT_UNLIKELY(flags & ARG_TEST))
837                                 return 1;
838
839                         arg &= REG_MASK;
840                         argw &= 0x1ff;
841                         FAIL_IF(push_inst(compiler, sljit_mem_pre_simm[flags & 0x3]
842                                 | (shift << 30) | RT(reg) | RN(arg) | (argw << 12)));
843                         return -1;
844                 }
845                 return 0;
846         }
847
848         if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
849                 argw &= 0x3;
850                 if (argw && argw != shift)
851                         return 0;
852
853                 if (SLJIT_UNLIKELY(flags & ARG_TEST))
854                         return 1;
855
856                 FAIL_IF(push_inst(compiler, sljit_mem_reg[flags & 0x3] | (shift << 30) | RT(reg)
857                         | RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | (argw ? (1 << 12) : 0)));
858                 return -1;
859         }
860
861         arg &= REG_MASK;
862         if (argw >= 0 && (argw >> shift) <= 0xfff && (argw & ((1 << shift) - 1)) == 0) {
863                 if (SLJIT_UNLIKELY(flags & ARG_TEST))
864                         return 1;
865
866                 FAIL_IF(push_inst(compiler, sljit_mem_imm[flags & 0x3] | (shift << 30)
867                         | RT(reg) | RN(arg) | (argw << (10 - shift))));
868                 return -1;
869         }
870
871         if (argw > 255 || argw < -256)
872                 return 0;
873
874         if (SLJIT_UNLIKELY(flags & ARG_TEST))
875                 return 1;
876
877         FAIL_IF(push_inst(compiler, sljit_mem_simm[flags & 0x3] | (shift << 30)
878                 | RT(reg) | RN(arg) | ((argw & 0x1ff) << 12)));
879         return -1;
880 }
881
882 /* see getput_arg below.
883    Note: can_cache is called only for binary operators. Those
884    operators always uses word arguments without write back. */
885 static sljit_s32 can_cache(sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw)
886 {
887         sljit_sw diff;
888         if ((arg & OFFS_REG_MASK) || !(next_arg & SLJIT_MEM))
889                 return 0;
890
891         if (!(arg & REG_MASK)) {
892                 diff = argw - next_argw;
893                 if (diff <= 0xfff && diff >= -0xfff)
894                         return 1;
895                 return 0;
896         }
897
898         if (argw == next_argw)
899                 return 1;
900
901         diff = argw - next_argw;
902         if (arg == next_arg && diff <= 0xfff && diff >= -0xfff)
903                 return 1;
904
905         return 0;
906 }
907
908 /* Emit the necessary instructions. See can_cache above. */
909 static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg,
910         sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw)
911 {
912         sljit_u32 shift = MEM_SIZE_SHIFT(flags);
913         sljit_s32 tmp_r, other_r;
914         sljit_sw diff;
915
916         SLJIT_ASSERT(arg & SLJIT_MEM);
917         if (!(next_arg & SLJIT_MEM)) {
918                 next_arg = 0;
919                 next_argw = 0;
920         }
921
922         tmp_r = (flags & STORE) ? TMP_REG3 : reg;
923
924         if (SLJIT_UNLIKELY((flags & UPDATE) && (arg & REG_MASK))) {
925                 /* Update only applies if a base register exists. */
926                 other_r = OFFS_REG(arg);
927                 if (!other_r) {
928                         other_r = arg & REG_MASK;
929                         if (other_r != reg && argw >= 0 && argw <= 0xffffff) {
930                                 if ((argw & 0xfff) != 0)
931                                         FAIL_IF(push_inst(compiler, ADDI | RD(other_r) | RN(other_r) | ((argw & 0xfff) << 10)));
932                                 if (argw >> 12)
933                                         FAIL_IF(push_inst(compiler, ADDI | (1 << 22) | RD(other_r) | RN(other_r) | ((argw >> 12) << 10)));
934                                 return push_inst(compiler, sljit_mem_imm[flags & 0x3] | (shift << 30) | RT(reg) | RN(other_r));
935                         }
936                         else if (other_r != reg && argw < 0 && argw >= -0xffffff) {
937                                 argw = -argw;
938                                 if ((argw & 0xfff) != 0)
939                                         FAIL_IF(push_inst(compiler, SUBI | RD(other_r) | RN(other_r) | ((argw & 0xfff) << 10)));
940                                 if (argw >> 12)
941                                         FAIL_IF(push_inst(compiler, SUBI | (1 << 22) | RD(other_r) | RN(other_r) | ((argw >> 12) << 10)));
942                                 return push_inst(compiler, sljit_mem_imm[flags & 0x3] | (shift << 30) | RT(reg) | RN(other_r));
943                         }
944
945                         if (compiler->cache_arg == SLJIT_MEM) {
946                                 if (argw == compiler->cache_argw) {
947                                         other_r = TMP_REG3;
948                                         argw = 0;
949                                 }
950                                 else if (emit_set_delta(compiler, TMP_REG3, TMP_REG3, argw - compiler->cache_argw) != SLJIT_ERR_UNSUPPORTED) {
951                                         FAIL_IF(compiler->error);
952                                         compiler->cache_argw = argw;
953                                         other_r = TMP_REG3;
954                                         argw = 0;
955                                 }
956                         }
957
958                         if (argw) {
959                                 FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
960                                 compiler->cache_arg = SLJIT_MEM;
961                                 compiler->cache_argw = argw;
962                                 other_r = TMP_REG3;
963                                 argw = 0;
964                         }
965                 }
966
967                 /* No caching here. */
968                 arg &= REG_MASK;
969                 argw &= 0x3;
970                 if (!argw || argw == shift) {
971                         FAIL_IF(push_inst(compiler, sljit_mem_reg[flags & 0x3] | (shift << 30) | RT(reg) | RN(arg) | RM(other_r) | (argw ? (1 << 12) : 0)));
972                         return push_inst(compiler, ADD | RD(arg) | RN(arg) | RM(other_r) | (argw << 10));
973                 }
974                 if (arg != reg) {
975                         FAIL_IF(push_inst(compiler, ADD | RD(arg) | RN(arg) | RM(other_r) | (argw << 10)));
976                         return push_inst(compiler, sljit_mem_imm[flags & 0x3] | (shift << 30) | RT(reg) | RN(arg));
977                 }
978                 FAIL_IF(push_inst(compiler, ADD | RD(TMP_LR) | RN(arg) | RM(other_r) | (argw << 10)));
979                 FAIL_IF(push_inst(compiler, sljit_mem_imm[flags & 0x3] | (shift << 30) | RT(reg) | RN(TMP_LR)));
980                 return push_inst(compiler, ORR | RD(arg) | RN(TMP_ZERO) | RM(TMP_LR));
981         }
982
983         if (arg & OFFS_REG_MASK) {
984                 other_r = OFFS_REG(arg);
985                 arg &= REG_MASK;
986                 FAIL_IF(push_inst(compiler, ADD | RD(tmp_r) | RN(arg) | RM(other_r) | ((argw & 0x3) << 10)));
987                 return push_inst(compiler, sljit_mem_imm[flags & 0x3] | (shift << 30) | RT(reg) | RN(tmp_r));
988         }
989
990         if (compiler->cache_arg == arg) {
991                 diff = argw - compiler->cache_argw;
992                 if (diff <= 255 && diff >= -256)
993                         return push_inst(compiler, sljit_mem_simm[flags & 0x3] | (shift << 30)
994                                 | RT(reg) | RN(TMP_REG3) | ((diff & 0x1ff) << 12));
995                 if (emit_set_delta(compiler, TMP_REG3, TMP_REG3, diff) != SLJIT_ERR_UNSUPPORTED) {
996                         FAIL_IF(compiler->error);
997                         return push_inst(compiler, sljit_mem_imm[flags & 0x3] | (shift << 30) | RT(reg) | RN(arg));
998                 }
999         }
1000
1001         if (argw >= 0 && argw <= 0xffffff && (argw & ((1 << shift) - 1)) == 0) {
1002                 FAIL_IF(push_inst(compiler, ADDI | (1 << 22) | RD(tmp_r) | RN(arg & REG_MASK) | ((argw >> 12) << 10)));
1003                 return push_inst(compiler, sljit_mem_imm[flags & 0x3] | (shift << 30)
1004                         | RT(reg) | RN(tmp_r) | ((argw & 0xfff) << (10 - shift)));
1005         }
1006
1007         diff = argw - next_argw;
1008         next_arg = (arg & REG_MASK) && (arg == next_arg) && diff <= 0xfff && diff >= -0xfff && diff != 0;
1009         arg &= REG_MASK;
1010
1011         if (arg && compiler->cache_arg == SLJIT_MEM) {
1012                 if (compiler->cache_argw == argw)
1013                         return push_inst(compiler, sljit_mem_reg[flags & 0x3] | (shift << 30) | RT(reg) | RN(arg) | RM(TMP_REG3));
1014                 if (emit_set_delta(compiler, TMP_REG3, TMP_REG3, argw - compiler->cache_argw) != SLJIT_ERR_UNSUPPORTED) {
1015                         FAIL_IF(compiler->error);
1016                         compiler->cache_argw = argw;
1017                         return push_inst(compiler, sljit_mem_reg[flags & 0x3] | (shift << 30) | RT(reg) | RN(arg) | RM(TMP_REG3));
1018                 }
1019         }
1020
1021         compiler->cache_argw = argw;
1022         if (next_arg && emit_set_delta(compiler, TMP_REG3, arg, argw) != SLJIT_ERR_UNSUPPORTED) {
1023                 FAIL_IF(compiler->error);
1024                 compiler->cache_arg = SLJIT_MEM | arg;
1025                 arg = 0;
1026         }
1027         else {
1028                 FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
1029                 compiler->cache_arg = SLJIT_MEM;
1030
1031                 if (next_arg) {
1032                         FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG3) | RN(TMP_REG3) | RM(arg)));
1033                         compiler->cache_arg = SLJIT_MEM | arg;
1034                         arg = 0;
1035                 }
1036         }
1037
1038         if (arg)
1039                 return push_inst(compiler, sljit_mem_reg[flags & 0x3] | (shift << 30) | RT(reg) | RN(arg) | RM(TMP_REG3));
1040         return push_inst(compiler, sljit_mem_imm[flags & 0x3] | (shift << 30) | RT(reg) | RN(TMP_REG3));
1041 }
1042
1043 static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw)
1044 {
1045         if (getput_arg_fast(compiler, flags, reg, arg, argw))
1046                 return compiler->error;
1047         compiler->cache_arg = 0;
1048         compiler->cache_argw = 0;
1049         return getput_arg(compiler, flags, reg, arg, argw, 0, 0);
1050 }
1051
1052 static SLJIT_INLINE sljit_s32 emit_op_mem2(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg1, sljit_sw arg1w, sljit_s32 arg2, sljit_sw arg2w)
1053 {
1054         if (getput_arg_fast(compiler, flags, reg, arg1, arg1w))
1055                 return compiler->error;
1056         return getput_arg(compiler, flags, reg, arg1, arg1w, arg2, arg2w);
1057 }
1058
1059 /* --------------------------------------------------------------------- */
1060 /*  Entry, exit                                                          */
1061 /* --------------------------------------------------------------------- */
1062
1063 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
1064         sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds,
1065         sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
1066 {
1067         sljit_s32 i, tmp, offs, prev, saved_regs_size;
1068
1069         CHECK_ERROR();
1070         CHECK(check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size));
1071         set_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);
1072
1073         saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 0);
1074         local_size += saved_regs_size + SLJIT_LOCALS_OFFSET;
1075         local_size = (local_size + 15) & ~0xf;
1076         compiler->local_size = local_size;
1077
1078         if (local_size <= (63 * sizeof(sljit_sw))) {
1079                 FAIL_IF(push_inst(compiler, STP_PRE | 29 | RT2(TMP_LR)
1080                         | RN(TMP_SP) | ((-(local_size >> 3) & 0x7f) << 15)));
1081                 FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(TMP_SP) | (0 << 10)));
1082                 offs = (local_size - saved_regs_size) << (15 - 3);
1083         } else {
1084                 offs = 0 << 15;
1085                 if (saved_regs_size & 0x8) {
1086                         offs = 1 << 15;
1087                         saved_regs_size += sizeof(sljit_sw);
1088                 }
1089                 local_size -= saved_regs_size + SLJIT_LOCALS_OFFSET;
1090                 if (saved_regs_size > 0)
1091                         FAIL_IF(push_inst(compiler, SUBI | RD(TMP_SP) | RN(TMP_SP) | (saved_regs_size << 10)));
1092         }
1093
1094         tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG;
1095         prev = -1;
1096         for (i = SLJIT_S0; i >= tmp; i--) {
1097                 if (prev == -1) {
1098                         if (!(offs & (1 << 15))) {
1099                                 prev = i;
1100                                 continue;
1101                         }
1102                         FAIL_IF(push_inst(compiler, STRI | RT(i) | RN(TMP_SP) | (offs >> 5)));
1103                         offs += 1 << 15;
1104                         continue;
1105                 }
1106                 FAIL_IF(push_inst(compiler, STP | RT(prev) | RT2(i) | RN(TMP_SP) | offs));
1107                 offs += 2 << 15;
1108                 prev = -1;
1109         }
1110
1111         for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) {
1112                 if (prev == -1) {
1113                         if (!(offs & (1 << 15))) {
1114                                 prev = i;
1115                                 continue;
1116                         }
1117                         FAIL_IF(push_inst(compiler, STRI | RT(i) | RN(TMP_SP) | (offs >> 5)));
1118                         offs += 1 << 15;
1119                         continue;
1120                 }
1121                 FAIL_IF(push_inst(compiler, STP | RT(prev) | RT2(i) | RN(TMP_SP) | offs));
1122                 offs += 2 << 15;
1123                 prev = -1;
1124         }
1125
1126         SLJIT_ASSERT(prev == -1);
1127
1128         if (compiler->local_size > (63 * sizeof(sljit_sw))) {
1129                 /* The local_size is already adjusted by the saved registers. */
1130                 if (local_size > 0xfff) {
1131                         FAIL_IF(push_inst(compiler, SUBI | RD(TMP_SP) | RN(TMP_SP) | ((local_size >> 12) << 10) | (1 << 22)));
1132                         local_size &= 0xfff;
1133                 }
1134                 if (local_size)
1135                         FAIL_IF(push_inst(compiler, SUBI | RD(TMP_SP) | RN(TMP_SP) | (local_size << 10)));
1136                 FAIL_IF(push_inst(compiler, STP_PRE | 29 | RT2(TMP_LR)
1137                         | RN(TMP_SP) | ((-(16 >> 3) & 0x7f) << 15)));
1138                 FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(TMP_SP) | (0 << 10)));
1139         }
1140
1141         if (args >= 1)
1142                 FAIL_IF(push_inst(compiler, ORR | RD(SLJIT_S0) | RN(TMP_ZERO) | RM(SLJIT_R0)));
1143         if (args >= 2)
1144                 FAIL_IF(push_inst(compiler, ORR | RD(SLJIT_S1) | RN(TMP_ZERO) | RM(SLJIT_R1)));
1145         if (args >= 3)
1146                 FAIL_IF(push_inst(compiler, ORR | RD(SLJIT_S2) | RN(TMP_ZERO) | RM(SLJIT_R2)));
1147
1148         return SLJIT_SUCCESS;
1149 }
1150
1151 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler,
1152         sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds,
1153         sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
1154 {
1155         CHECK_ERROR();
1156         CHECK(check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size));
1157         set_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);
1158
1159         local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 0) + SLJIT_LOCALS_OFFSET;
1160         local_size = (local_size + 15) & ~0xf;
1161         compiler->local_size = local_size;
1162         return SLJIT_SUCCESS;
1163 }
1164
1165 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw)
1166 {
1167         sljit_s32 local_size;
1168         sljit_s32 i, tmp, offs, prev, saved_regs_size;
1169
1170         CHECK_ERROR();
1171         CHECK(check_sljit_emit_return(compiler, op, src, srcw));
1172
1173         FAIL_IF(emit_mov_before_return(compiler, op, src, srcw));
1174
1175         local_size = compiler->local_size;
1176
1177         saved_regs_size = GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 0);
1178         if (local_size <= (63 * sizeof(sljit_sw)))
1179                 offs = (local_size - saved_regs_size) << (15 - 3);
1180         else {
1181                 FAIL_IF(push_inst(compiler, LDP_PST | 29 | RT2(TMP_LR)
1182                         | RN(TMP_SP) | (((16 >> 3) & 0x7f) << 15)));
1183                 offs = 0 << 15;
1184                 if (saved_regs_size & 0x8) {
1185                         offs = 1 << 15;
1186                         saved_regs_size += sizeof(sljit_sw);
1187                 }
1188                 local_size -= saved_regs_size + SLJIT_LOCALS_OFFSET;
1189                 if (local_size > 0xfff) {
1190                         FAIL_IF(push_inst(compiler, ADDI | RD(TMP_SP) | RN(TMP_SP) | ((local_size >> 12) << 10) | (1 << 22)));
1191                         local_size &= 0xfff;
1192                 }
1193                 if (local_size)
1194                         FAIL_IF(push_inst(compiler, ADDI | RD(TMP_SP) | RN(TMP_SP) | (local_size << 10)));
1195         }
1196
1197         tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG;
1198         prev = -1;
1199         for (i = SLJIT_S0; i >= tmp; i--) {
1200                 if (prev == -1) {
1201                         if (!(offs & (1 << 15))) {
1202                                 prev = i;
1203                                 continue;
1204                         }
1205                         FAIL_IF(push_inst(compiler, LDRI | RT(i) | RN(TMP_SP) | (offs >> 5)));
1206                         offs += 1 << 15;
1207                         continue;
1208                 }
1209                 FAIL_IF(push_inst(compiler, LDP | RT(prev) | RT2(i) | RN(TMP_SP) | offs));
1210                 offs += 2 << 15;
1211                 prev = -1;
1212         }
1213
1214         for (i = compiler->scratches; i >= SLJIT_FIRST_SAVED_REG; i--) {
1215                 if (prev == -1) {
1216                         if (!(offs & (1 << 15))) {
1217                                 prev = i;
1218                                 continue;
1219                         }
1220                         FAIL_IF(push_inst(compiler, LDRI | RT(i) | RN(TMP_SP) | (offs >> 5)));
1221                         offs += 1 << 15;
1222                         continue;
1223                 }
1224                 FAIL_IF(push_inst(compiler, LDP | RT(prev) | RT2(i) | RN(TMP_SP) | offs));
1225                 offs += 2 << 15;
1226                 prev = -1;
1227         }
1228
1229         SLJIT_ASSERT(prev == -1);
1230
1231         if (compiler->local_size <= (63 * sizeof(sljit_sw))) {
1232                 FAIL_IF(push_inst(compiler, LDP_PST | 29 | RT2(TMP_LR)
1233                         | RN(TMP_SP) | (((local_size >> 3) & 0x7f) << 15)));
1234         } else if (saved_regs_size > 0) {
1235                 FAIL_IF(push_inst(compiler, ADDI | RD(TMP_SP) | RN(TMP_SP) | (saved_regs_size << 10)));
1236         }
1237
1238         FAIL_IF(push_inst(compiler, RET | RN(TMP_LR)));
1239         return SLJIT_SUCCESS;
1240 }
1241
1242 /* --------------------------------------------------------------------- */
1243 /*  Operators                                                            */
1244 /* --------------------------------------------------------------------- */
1245
1246 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op)
1247 {
1248         sljit_ins inv_bits = (op & SLJIT_I32_OP) ? (1 << 31) : 0;
1249
1250         CHECK_ERROR();
1251         CHECK(check_sljit_emit_op0(compiler, op));
1252
1253         op = GET_OPCODE(op);
1254         switch (op) {
1255         case SLJIT_BREAKPOINT:
1256                 return push_inst(compiler, BRK);
1257         case SLJIT_NOP:
1258                 return push_inst(compiler, NOP);
1259         case SLJIT_LMUL_UW:
1260         case SLJIT_LMUL_SW:
1261                 FAIL_IF(push_inst(compiler, ORR | RD(TMP_REG1) | RN(TMP_ZERO) | RM(SLJIT_R0)));
1262                 FAIL_IF(push_inst(compiler, MADD | RD(SLJIT_R0) | RN(SLJIT_R0) | RM(SLJIT_R1) | RT2(TMP_ZERO)));
1263                 return push_inst(compiler, (op == SLJIT_LMUL_UW ? UMULH : SMULH) | RD(SLJIT_R1) | RN(TMP_REG1) | RM(SLJIT_R1));
1264         case SLJIT_DIVMOD_UW:
1265         case SLJIT_DIVMOD_SW:
1266                 FAIL_IF(push_inst(compiler, (ORR ^ inv_bits) | RD(TMP_REG1) | RN(TMP_ZERO) | RM(SLJIT_R0)));
1267                 FAIL_IF(push_inst(compiler, ((op == SLJIT_DIVMOD_UW ? UDIV : SDIV) ^ inv_bits) | RD(SLJIT_R0) | RN(SLJIT_R0) | RM(SLJIT_R1)));
1268                 FAIL_IF(push_inst(compiler, (MADD ^ inv_bits) | RD(SLJIT_R1) | RN(SLJIT_R0) | RM(SLJIT_R1) | RT2(TMP_ZERO)));
1269                 return push_inst(compiler, (SUB ^ inv_bits) | RD(SLJIT_R1) | RN(TMP_REG1) | RM(SLJIT_R1));
1270         case SLJIT_DIV_UW:
1271         case SLJIT_DIV_SW:
1272                 return push_inst(compiler, ((op == SLJIT_DIV_UW ? UDIV : SDIV) ^ inv_bits) | RD(SLJIT_R0) | RN(SLJIT_R0) | RM(SLJIT_R1));
1273         }
1274
1275         return SLJIT_SUCCESS;
1276 }
1277
1278 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op,
1279         sljit_s32 dst, sljit_sw dstw,
1280         sljit_s32 src, sljit_sw srcw)
1281 {
1282         sljit_s32 dst_r, flags, mem_flags;
1283         sljit_s32 op_flags = GET_ALL_FLAGS(op);
1284
1285         CHECK_ERROR();
1286         CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw));
1287         ADJUST_LOCAL_OFFSET(dst, dstw);
1288         ADJUST_LOCAL_OFFSET(src, srcw);
1289
1290         compiler->cache_arg = 0;
1291         compiler->cache_argw = 0;
1292
1293         dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1;
1294
1295         op = GET_OPCODE(op);
1296         if (op >= SLJIT_MOV && op <= SLJIT_MOVU_P) {
1297                 switch (op) {
1298                 case SLJIT_MOV:
1299                 case SLJIT_MOV_P:
1300                         flags = WORD_SIZE;
1301                         break;
1302                 case SLJIT_MOV_U8:
1303                         flags = BYTE_SIZE;
1304                         if (src & SLJIT_IMM)
1305                                 srcw = (sljit_u8)srcw;
1306                         break;
1307                 case SLJIT_MOV_S8:
1308                         flags = BYTE_SIZE | SIGNED;
1309                         if (src & SLJIT_IMM)
1310                                 srcw = (sljit_s8)srcw;
1311                         break;
1312                 case SLJIT_MOV_U16:
1313                         flags = HALF_SIZE;
1314                         if (src & SLJIT_IMM)
1315                                 srcw = (sljit_u16)srcw;
1316                         break;
1317                 case SLJIT_MOV_S16:
1318                         flags = HALF_SIZE | SIGNED;
1319                         if (src & SLJIT_IMM)
1320                                 srcw = (sljit_s16)srcw;
1321                         break;
1322                 case SLJIT_MOV_U32:
1323                         flags = INT_SIZE;
1324                         if (src & SLJIT_IMM)
1325                                 srcw = (sljit_u32)srcw;
1326                         break;
1327                 case SLJIT_MOV_S32:
1328                         flags = INT_SIZE | SIGNED;
1329                         if (src & SLJIT_IMM)
1330                                 srcw = (sljit_s32)srcw;
1331                         break;
1332                 case SLJIT_MOVU:
1333                 case SLJIT_MOVU_P:
1334                         flags = WORD_SIZE | UPDATE;
1335                         break;
1336                 case SLJIT_MOVU_U8:
1337                         flags = BYTE_SIZE | UPDATE;
1338                         if (src & SLJIT_IMM)
1339                                 srcw = (sljit_u8)srcw;
1340                         break;
1341                 case SLJIT_MOVU_S8:
1342                         flags = BYTE_SIZE | SIGNED | UPDATE;
1343                         if (src & SLJIT_IMM)
1344                                 srcw = (sljit_s8)srcw;
1345                         break;
1346                 case SLJIT_MOVU_U16:
1347                         flags = HALF_SIZE | UPDATE;
1348                         if (src & SLJIT_IMM)
1349                                 srcw = (sljit_u16)srcw;
1350                         break;
1351                 case SLJIT_MOVU_S16:
1352                         flags = HALF_SIZE | SIGNED | UPDATE;
1353                         if (src & SLJIT_IMM)
1354                                 srcw = (sljit_s16)srcw;
1355                         break;
1356                 case SLJIT_MOVU_U32:
1357                         flags = INT_SIZE | UPDATE;
1358                         if (src & SLJIT_IMM)
1359                                 srcw = (sljit_u32)srcw;
1360                         break;
1361                 case SLJIT_MOVU_S32:
1362                         flags = INT_SIZE | SIGNED | UPDATE;
1363                         if (src & SLJIT_IMM)
1364                                 srcw = (sljit_s32)srcw;
1365                         break;
1366                 default:
1367                         SLJIT_ASSERT_STOP();
1368                         flags = 0;
1369                         break;
1370                 }
1371
1372                 if (src & SLJIT_IMM)
1373                         FAIL_IF(emit_op_imm(compiler, SLJIT_MOV | ARG2_IMM, dst_r, TMP_REG1, srcw));
1374                 else if (src & SLJIT_MEM) {
1375                         if (getput_arg_fast(compiler, flags, dst_r, src, srcw))
1376                                 FAIL_IF(compiler->error);
1377                         else
1378                                 FAIL_IF(getput_arg(compiler, flags, dst_r, src, srcw, dst, dstw));
1379                 } else {
1380                         if (dst_r != TMP_REG1)
1381                                 return emit_op_imm(compiler, op | ((op_flags & SLJIT_I32_OP) ? INT_OP : 0), dst_r, TMP_REG1, src);
1382                         dst_r = src;
1383                 }
1384
1385                 if (dst & SLJIT_MEM) {
1386                         if (getput_arg_fast(compiler, flags | STORE, dst_r, dst, dstw))
1387                                 return compiler->error;
1388                         else
1389                                 return getput_arg(compiler, flags | STORE, dst_r, dst, dstw, 0, 0);
1390                 }
1391                 return SLJIT_SUCCESS;
1392         }
1393
1394         flags = GET_FLAGS(op_flags) ? SET_FLAGS : 0;
1395         mem_flags = WORD_SIZE;
1396         if (op_flags & SLJIT_I32_OP) {
1397                 flags |= INT_OP;
1398                 mem_flags = INT_SIZE;
1399         }
1400
1401         if (dst == SLJIT_UNUSED)
1402                 flags |= UNUSED_RETURN;
1403
1404         if (src & SLJIT_MEM) {
1405                 if (getput_arg_fast(compiler, mem_flags, TMP_REG2, src, srcw))
1406                         FAIL_IF(compiler->error);
1407                 else
1408                         FAIL_IF(getput_arg(compiler, mem_flags, TMP_REG2, src, srcw, dst, dstw));
1409                 src = TMP_REG2;
1410         }
1411
1412         if (src & SLJIT_IMM) {
1413                 flags |= ARG2_IMM;
1414                 if (op_flags & SLJIT_I32_OP)
1415                         srcw = (sljit_s32)srcw;
1416         } else
1417                 srcw = src;
1418
1419         emit_op_imm(compiler, flags | op, dst_r, TMP_REG1, srcw);
1420
1421         if (dst & SLJIT_MEM) {
1422                 if (getput_arg_fast(compiler, mem_flags | STORE, dst_r, dst, dstw))
1423                         return compiler->error;
1424                 else
1425                         return getput_arg(compiler, mem_flags | STORE, dst_r, dst, dstw, 0, 0);
1426         }
1427         return SLJIT_SUCCESS;
1428 }
1429
1430 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op,
1431         sljit_s32 dst, sljit_sw dstw,
1432         sljit_s32 src1, sljit_sw src1w,
1433         sljit_s32 src2, sljit_sw src2w)
1434 {
1435         sljit_s32 dst_r, flags, mem_flags;
1436
1437         CHECK_ERROR();
1438         CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
1439         ADJUST_LOCAL_OFFSET(dst, dstw);
1440         ADJUST_LOCAL_OFFSET(src1, src1w);
1441         ADJUST_LOCAL_OFFSET(src2, src2w);
1442
1443         compiler->cache_arg = 0;
1444         compiler->cache_argw = 0;
1445
1446         dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1;
1447         flags = GET_FLAGS(op) ? SET_FLAGS : 0;
1448         mem_flags = WORD_SIZE;
1449         if (op & SLJIT_I32_OP) {
1450                 flags |= INT_OP;
1451                 mem_flags = INT_SIZE;
1452         }
1453
1454         if (dst == SLJIT_UNUSED)
1455                 flags |= UNUSED_RETURN;
1456
1457         if ((dst & SLJIT_MEM) && !getput_arg_fast(compiler, mem_flags | STORE | ARG_TEST, TMP_REG1, dst, dstw))
1458                 flags |= SLOW_DEST;
1459
1460         if (src1 & SLJIT_MEM) {
1461                 if (getput_arg_fast(compiler, mem_flags, TMP_REG1, src1, src1w))
1462                         FAIL_IF(compiler->error);
1463                 else
1464                         flags |= SLOW_SRC1;
1465         }
1466         if (src2 & SLJIT_MEM) {
1467                 if (getput_arg_fast(compiler, mem_flags, TMP_REG2, src2, src2w))
1468                         FAIL_IF(compiler->error);
1469                 else
1470                         flags |= SLOW_SRC2;
1471         }
1472
1473         if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) {
1474                 if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) {
1475                         FAIL_IF(getput_arg(compiler, mem_flags, TMP_REG2, src2, src2w, src1, src1w));
1476                         FAIL_IF(getput_arg(compiler, mem_flags, TMP_REG1, src1, src1w, dst, dstw));
1477                 }
1478                 else {
1479                         FAIL_IF(getput_arg(compiler, mem_flags, TMP_REG1, src1, src1w, src2, src2w));
1480                         FAIL_IF(getput_arg(compiler, mem_flags, TMP_REG2, src2, src2w, dst, dstw));
1481                 }
1482         }
1483         else if (flags & SLOW_SRC1)
1484                 FAIL_IF(getput_arg(compiler, mem_flags, TMP_REG1, src1, src1w, dst, dstw));
1485         else if (flags & SLOW_SRC2)
1486                 FAIL_IF(getput_arg(compiler, mem_flags, TMP_REG2, src2, src2w, dst, dstw));
1487
1488         if (src1 & SLJIT_MEM)
1489                 src1 = TMP_REG1;
1490         if (src2 & SLJIT_MEM)
1491                 src2 = TMP_REG2;
1492
1493         if (src1 & SLJIT_IMM)
1494                 flags |= ARG1_IMM;
1495         else
1496                 src1w = src1;
1497         if (src2 & SLJIT_IMM)
1498                 flags |= ARG2_IMM;
1499         else
1500                 src2w = src2;
1501
1502         emit_op_imm(compiler, flags | GET_OPCODE(op), dst_r, src1w, src2w);
1503
1504         if (dst & SLJIT_MEM) {
1505                 if (!(flags & SLOW_DEST)) {
1506                         getput_arg_fast(compiler, mem_flags | STORE, dst_r, dst, dstw);
1507                         return compiler->error;
1508                 }
1509                 return getput_arg(compiler, mem_flags | STORE, TMP_REG1, dst, dstw, 0, 0);
1510         }
1511
1512         return SLJIT_SUCCESS;
1513 }
1514
1515 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg)
1516 {
1517         CHECK_REG_INDEX(check_sljit_get_register_index(reg));
1518         return reg_map[reg];
1519 }
1520
1521 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg)
1522 {
1523         CHECK_REG_INDEX(check_sljit_get_float_register_index(reg));
1524         return reg;
1525 }
1526
1527 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,
1528         void *instruction, sljit_s32 size)
1529 {
1530         CHECK_ERROR();
1531         CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
1532
1533         return push_inst(compiler, *(sljit_ins*)instruction);
1534 }
1535
1536 /* --------------------------------------------------------------------- */
1537 /*  Floating point operators                                             */
1538 /* --------------------------------------------------------------------- */
1539
1540 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_is_fpu_available(void)
1541 {
1542 #ifdef SLJIT_IS_FPU_AVAILABLE
1543         return SLJIT_IS_FPU_AVAILABLE;
1544 #else
1545         /* Available by default. */
1546         return 1;
1547 #endif
1548 }
1549
1550 static sljit_s32 emit_fop_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw)
1551 {
1552         sljit_u32 shift = MEM_SIZE_SHIFT(flags);
1553         sljit_ins ins_bits = (shift << 30);
1554         sljit_s32 other_r;
1555         sljit_sw diff;
1556
1557         SLJIT_ASSERT(arg & SLJIT_MEM);
1558
1559         if (!(flags & STORE))
1560                 ins_bits |= 1 << 22;
1561
1562         if (arg & OFFS_REG_MASK) {
1563                 argw &= 3;
1564                 if (!argw || argw == shift)
1565                         return push_inst(compiler, STR_FR | ins_bits | VT(reg)
1566                                 | RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | (argw ? (1 << 12) : 0));
1567                 other_r = OFFS_REG(arg);
1568                 arg &= REG_MASK;
1569                 FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG1) | RN(arg) | RM(other_r) | (argw << 10)));
1570                 arg = TMP_REG1;
1571                 argw = 0;
1572         }
1573
1574         arg &= REG_MASK;
1575         if (arg && argw >= 0 && ((argw >> shift) <= 0xfff) && (argw & ((1 << shift) - 1)) == 0)
1576                 return push_inst(compiler, STR_FI | ins_bits | VT(reg) | RN(arg) | (argw << (10 - shift)));
1577
1578         if (arg && argw <= 255 && argw >= -256)
1579                 return push_inst(compiler, STUR_FI | ins_bits | VT(reg) | RN(arg) | ((argw & 0x1ff) << 12));
1580
1581         /* Slow cases */
1582         if (compiler->cache_arg == SLJIT_MEM && argw != compiler->cache_argw) {
1583                 diff = argw - compiler->cache_argw;
1584                 if (!arg && diff <= 255 && diff >= -256)
1585                         return push_inst(compiler, STUR_FI | ins_bits | VT(reg) | RN(TMP_REG3) | ((diff & 0x1ff) << 12));
1586                 if (emit_set_delta(compiler, TMP_REG3, TMP_REG3, argw - compiler->cache_argw) != SLJIT_ERR_UNSUPPORTED) {
1587                         FAIL_IF(compiler->error);
1588                         compiler->cache_argw = argw;
1589                 }
1590         }
1591
1592         if (compiler->cache_arg != SLJIT_MEM || argw != compiler->cache_argw) {
1593                 compiler->cache_arg = SLJIT_MEM;
1594                 compiler->cache_argw = argw;
1595                 FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
1596         }
1597
1598         if (arg & REG_MASK)
1599                 return push_inst(compiler, STR_FR | ins_bits | VT(reg) | RN(arg) | RM(TMP_REG3));
1600         return push_inst(compiler, STR_FI | ins_bits | VT(reg) | RN(TMP_REG3));
1601 }
1602
1603 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op,
1604         sljit_s32 dst, sljit_sw dstw,
1605         sljit_s32 src, sljit_sw srcw)
1606 {
1607         sljit_s32 dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1;
1608         sljit_ins inv_bits = (op & SLJIT_F32_OP) ? (1 << 22) : 0;
1609
1610         if (GET_OPCODE(op) == SLJIT_CONV_S32_FROM_F64)
1611                 inv_bits |= (1 << 31);
1612
1613         if (src & SLJIT_MEM) {
1614                 emit_fop_mem(compiler, (op & SLJIT_F32_OP) ? INT_SIZE : WORD_SIZE, TMP_FREG1, src, srcw);
1615                 src = TMP_FREG1;
1616         }
1617
1618         FAIL_IF(push_inst(compiler, (FCVTZS ^ inv_bits) | RD(dst_r) | VN(src)));
1619
1620         if (dst_r == TMP_REG1 && dst != SLJIT_UNUSED)
1621                 return emit_op_mem(compiler, ((GET_OPCODE(op) == SLJIT_CONV_S32_FROM_F64) ? INT_SIZE : WORD_SIZE) | STORE, TMP_REG1, dst, dstw);
1622         return SLJIT_SUCCESS;
1623 }
1624
1625 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op,
1626         sljit_s32 dst, sljit_sw dstw,
1627         sljit_s32 src, sljit_sw srcw)
1628 {
1629         sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
1630         sljit_ins inv_bits = (op & SLJIT_F32_OP) ? (1 << 22) : 0;
1631
1632         if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32)
1633                 inv_bits |= (1 << 31);
1634
1635         if (src & SLJIT_MEM) {
1636                 emit_op_mem(compiler, ((GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32) ? INT_SIZE : WORD_SIZE), TMP_REG1, src, srcw);
1637                 src = TMP_REG1;
1638         } else if (src & SLJIT_IMM) {
1639 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1640                 if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32)
1641                         srcw = (sljit_s32)srcw;
1642 #endif
1643                 FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
1644                 src = TMP_REG1;
1645         }
1646
1647         FAIL_IF(push_inst(compiler, (SCVTF ^ inv_bits) | VD(dst_r) | RN(src)));
1648
1649         if (dst & SLJIT_MEM)
1650                 return emit_fop_mem(compiler, ((op & SLJIT_F32_OP) ? INT_SIZE : WORD_SIZE) | STORE, TMP_FREG1, dst, dstw);
1651         return SLJIT_SUCCESS;
1652 }
1653
1654 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op,
1655         sljit_s32 src1, sljit_sw src1w,
1656         sljit_s32 src2, sljit_sw src2w)
1657 {
1658         sljit_s32 mem_flags = (op & SLJIT_F32_OP) ? INT_SIZE : WORD_SIZE;
1659         sljit_ins inv_bits = (op & SLJIT_F32_OP) ? (1 << 22) : 0;
1660
1661         if (src1 & SLJIT_MEM) {
1662                 emit_fop_mem(compiler, mem_flags, TMP_FREG1, src1, src1w);
1663                 src1 = TMP_FREG1;
1664         }
1665
1666         if (src2 & SLJIT_MEM) {
1667                 emit_fop_mem(compiler, mem_flags, TMP_FREG2, src2, src2w);
1668                 src2 = TMP_FREG2;
1669         }
1670
1671         return push_inst(compiler, (FCMP ^ inv_bits) | VN(src1) | VM(src2));
1672 }
1673
1674 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op,
1675         sljit_s32 dst, sljit_sw dstw,
1676         sljit_s32 src, sljit_sw srcw)
1677 {
1678         sljit_s32 dst_r, mem_flags = (op & SLJIT_F32_OP) ? INT_SIZE : WORD_SIZE;
1679         sljit_ins inv_bits;
1680
1681         CHECK_ERROR();
1682         compiler->cache_arg = 0;
1683         compiler->cache_argw = 0;
1684
1685         SLJIT_COMPILE_ASSERT((INT_SIZE ^ 0x100) == WORD_SIZE, must_be_one_bit_difference);
1686         SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
1687
1688         inv_bits = (op & SLJIT_F32_OP) ? (1 << 22) : 0;
1689         dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
1690
1691         if (src & SLJIT_MEM) {
1692                 emit_fop_mem(compiler, (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32) ? (mem_flags ^ 0x100) : mem_flags, dst_r, src, srcw);
1693                 src = dst_r;
1694         }
1695
1696         switch (GET_OPCODE(op)) {
1697         case SLJIT_MOV_F64:
1698                 if (src != dst_r) {
1699                         if (dst_r != TMP_FREG1)
1700                                 FAIL_IF(push_inst(compiler, (FMOV ^ inv_bits) | VD(dst_r) | VN(src)));
1701                         else
1702                                 dst_r = src;
1703                 }
1704                 break;
1705         case SLJIT_NEG_F64:
1706                 FAIL_IF(push_inst(compiler, (FNEG ^ inv_bits) | VD(dst_r) | VN(src)));
1707                 break;
1708         case SLJIT_ABS_F64:
1709                 FAIL_IF(push_inst(compiler, (FABS ^ inv_bits) | VD(dst_r) | VN(src)));
1710                 break;
1711         case SLJIT_CONV_F64_FROM_F32:
1712                 FAIL_IF(push_inst(compiler, FCVT | ((op & SLJIT_F32_OP) ? (1 << 22) : (1 << 15)) | VD(dst_r) | VN(src)));
1713                 break;
1714         }
1715
1716         if (dst & SLJIT_MEM)
1717                 return emit_fop_mem(compiler, mem_flags | STORE, dst_r, dst, dstw);
1718         return SLJIT_SUCCESS;
1719 }
1720
1721 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op,
1722         sljit_s32 dst, sljit_sw dstw,
1723         sljit_s32 src1, sljit_sw src1w,
1724         sljit_s32 src2, sljit_sw src2w)
1725 {
1726         sljit_s32 dst_r, mem_flags = (op & SLJIT_F32_OP) ? INT_SIZE : WORD_SIZE;
1727         sljit_ins inv_bits = (op & SLJIT_F32_OP) ? (1 << 22) : 0;
1728
1729         CHECK_ERROR();
1730         CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
1731         ADJUST_LOCAL_OFFSET(dst, dstw);
1732         ADJUST_LOCAL_OFFSET(src1, src1w);
1733         ADJUST_LOCAL_OFFSET(src2, src2w);
1734
1735         compiler->cache_arg = 0;
1736         compiler->cache_argw = 0;
1737
1738         dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
1739         if (src1 & SLJIT_MEM) {
1740                 emit_fop_mem(compiler, mem_flags, TMP_FREG1, src1, src1w);
1741                 src1 = TMP_FREG1;
1742         }
1743         if (src2 & SLJIT_MEM) {
1744                 emit_fop_mem(compiler, mem_flags, TMP_FREG2, src2, src2w);
1745                 src2 = TMP_FREG2;
1746         }
1747
1748         switch (GET_OPCODE(op)) {
1749         case SLJIT_ADD_F64:
1750                 FAIL_IF(push_inst(compiler, (FADD ^ inv_bits) | VD(dst_r) | VN(src1) | VM(src2)));
1751                 break;
1752         case SLJIT_SUB_F64:
1753                 FAIL_IF(push_inst(compiler, (FSUB ^ inv_bits) | VD(dst_r) | VN(src1) | VM(src2)));
1754                 break;
1755         case SLJIT_MUL_F64:
1756                 FAIL_IF(push_inst(compiler, (FMUL ^ inv_bits) | VD(dst_r) | VN(src1) | VM(src2)));
1757                 break;
1758         case SLJIT_DIV_F64:
1759                 FAIL_IF(push_inst(compiler, (FDIV ^ inv_bits) | VD(dst_r) | VN(src1) | VM(src2)));
1760                 break;
1761         }
1762
1763         if (!(dst & SLJIT_MEM))
1764                 return SLJIT_SUCCESS;
1765         return emit_fop_mem(compiler, mem_flags | STORE, TMP_FREG1, dst, dstw);
1766 }
1767
1768 /* --------------------------------------------------------------------- */
1769 /*  Other instructions                                                   */
1770 /* --------------------------------------------------------------------- */
1771
1772 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
1773 {
1774         CHECK_ERROR();
1775         CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw));
1776         ADJUST_LOCAL_OFFSET(dst, dstw);
1777
1778         /* For UNUSED dst. Uncommon, but possible. */
1779         if (dst == SLJIT_UNUSED)
1780                 return SLJIT_SUCCESS;
1781
1782         if (FAST_IS_REG(dst))
1783                 return push_inst(compiler, ORR | RD(dst) | RN(TMP_ZERO) | RM(TMP_LR));
1784
1785         /* Memory. */
1786         return emit_op_mem(compiler, WORD_SIZE | STORE, TMP_LR, dst, dstw);
1787 }
1788
1789 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw)
1790 {
1791         CHECK_ERROR();
1792         CHECK(check_sljit_emit_fast_return(compiler, src, srcw));
1793         ADJUST_LOCAL_OFFSET(src, srcw);
1794
1795         if (FAST_IS_REG(src))
1796                 FAIL_IF(push_inst(compiler, ORR | RD(TMP_LR) | RN(TMP_ZERO) | RM(src)));
1797         else if (src & SLJIT_MEM)
1798                 FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_LR, src, srcw));
1799         else if (src & SLJIT_IMM)
1800                 FAIL_IF(load_immediate(compiler, TMP_LR, srcw));
1801
1802         return push_inst(compiler, RET | RN(TMP_LR));
1803 }
1804
1805 /* --------------------------------------------------------------------- */
1806 /*  Conditional instructions                                             */
1807 /* --------------------------------------------------------------------- */
1808
1809 static sljit_uw get_cc(sljit_s32 type)
1810 {
1811         switch (type) {
1812         case SLJIT_EQUAL:
1813         case SLJIT_MUL_NOT_OVERFLOW:
1814         case SLJIT_EQUAL_F64:
1815                 return 0x1;
1816
1817         case SLJIT_NOT_EQUAL:
1818         case SLJIT_MUL_OVERFLOW:
1819         case SLJIT_NOT_EQUAL_F64:
1820                 return 0x0;
1821
1822         case SLJIT_LESS:
1823         case SLJIT_LESS_F64:
1824                 return 0x2;
1825
1826         case SLJIT_GREATER_EQUAL:
1827         case SLJIT_GREATER_EQUAL_F64:
1828                 return 0x3;
1829
1830         case SLJIT_GREATER:
1831         case SLJIT_GREATER_F64:
1832                 return 0x9;
1833
1834         case SLJIT_LESS_EQUAL:
1835         case SLJIT_LESS_EQUAL_F64:
1836                 return 0x8;
1837
1838         case SLJIT_SIG_LESS:
1839                 return 0xa;
1840
1841         case SLJIT_SIG_GREATER_EQUAL:
1842                 return 0xb;
1843
1844         case SLJIT_SIG_GREATER:
1845                 return 0xd;
1846
1847         case SLJIT_SIG_LESS_EQUAL:
1848                 return 0xc;
1849
1850         case SLJIT_OVERFLOW:
1851         case SLJIT_UNORDERED_F64:
1852                 return 0x7;
1853
1854         case SLJIT_NOT_OVERFLOW:
1855         case SLJIT_ORDERED_F64:
1856                 return 0x6;
1857
1858         default:
1859                 SLJIT_ASSERT_STOP();
1860                 return 0xe;
1861         }
1862 }
1863
1864 SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
1865 {
1866         struct sljit_label *label;
1867
1868         CHECK_ERROR_PTR();
1869         CHECK_PTR(check_sljit_emit_label(compiler));
1870
1871         if (compiler->last_label && compiler->last_label->size == compiler->size)
1872                 return compiler->last_label;
1873
1874         label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
1875         PTR_FAIL_IF(!label);
1876         set_label(label, compiler);
1877         return label;
1878 }
1879
1880 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type)
1881 {
1882         struct sljit_jump *jump;
1883
1884         CHECK_ERROR_PTR();
1885         CHECK_PTR(check_sljit_emit_jump(compiler, type));
1886
1887         jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
1888         PTR_FAIL_IF(!jump);
1889         set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
1890         type &= 0xff;
1891
1892         if (type < SLJIT_JUMP) {
1893                 jump->flags |= IS_COND;
1894                 PTR_FAIL_IF(push_inst(compiler, B_CC | (6 << 5) | get_cc(type)));
1895         }
1896         else if (type >= SLJIT_FAST_CALL)
1897                 jump->flags |= IS_BL;
1898
1899         PTR_FAIL_IF(emit_imm64_const(compiler, TMP_REG1, 0));
1900         jump->addr = compiler->size;
1901         PTR_FAIL_IF(push_inst(compiler, ((type >= SLJIT_FAST_CALL) ? BLR : BR) | RN(TMP_REG1)));
1902
1903         return jump;
1904 }
1905
1906 static SLJIT_INLINE struct sljit_jump* emit_cmp_to0(struct sljit_compiler *compiler, sljit_s32 type,
1907         sljit_s32 src, sljit_sw srcw)
1908 {
1909         struct sljit_jump *jump;
1910         sljit_ins inv_bits = (type & SLJIT_I32_OP) ? (1 << 31) : 0;
1911
1912         SLJIT_ASSERT((type & 0xff) == SLJIT_EQUAL || (type & 0xff) == SLJIT_NOT_EQUAL);
1913         ADJUST_LOCAL_OFFSET(src, srcw);
1914
1915         jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
1916         PTR_FAIL_IF(!jump);
1917         set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
1918         jump->flags |= IS_CBZ | IS_COND;
1919
1920         if (src & SLJIT_MEM) {
1921                 PTR_FAIL_IF(emit_op_mem(compiler, inv_bits ? INT_SIZE : WORD_SIZE, TMP_REG1, src, srcw));
1922                 src = TMP_REG1;
1923         }
1924         else if (src & SLJIT_IMM) {
1925                 PTR_FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
1926                 src = TMP_REG1;
1927         }
1928         SLJIT_ASSERT(FAST_IS_REG(src));
1929
1930         if ((type & 0xff) == SLJIT_EQUAL)
1931                 inv_bits |= 1 << 24;
1932
1933         PTR_FAIL_IF(push_inst(compiler, (CBZ ^ inv_bits) | (6 << 5) | RT(src)));
1934         PTR_FAIL_IF(emit_imm64_const(compiler, TMP_REG1, 0));
1935         jump->addr = compiler->size;
1936         PTR_FAIL_IF(push_inst(compiler, BR | RN(TMP_REG1)));
1937         return jump;
1938 }
1939
1940 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw)
1941 {
1942         struct sljit_jump *jump;
1943
1944         CHECK_ERROR();
1945         CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
1946         ADJUST_LOCAL_OFFSET(src, srcw);
1947
1948         /* In ARM, we don't need to touch the arguments. */
1949         if (!(src & SLJIT_IMM)) {
1950                 if (src & SLJIT_MEM) {
1951                         FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, src, srcw));
1952                         src = TMP_REG1;
1953                 }
1954                 return push_inst(compiler, ((type >= SLJIT_FAST_CALL) ? BLR : BR) | RN(src));
1955         }
1956
1957         jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
1958         FAIL_IF(!jump);
1959         set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_FAST_CALL) ? IS_BL : 0));
1960         jump->u.target = srcw;
1961
1962         FAIL_IF(emit_imm64_const(compiler, TMP_REG1, 0));
1963         jump->addr = compiler->size;
1964         return push_inst(compiler, ((type >= SLJIT_FAST_CALL) ? BLR : BR) | RN(TMP_REG1));
1965 }
1966
1967 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
1968         sljit_s32 dst, sljit_sw dstw,
1969         sljit_s32 src, sljit_sw srcw,
1970         sljit_s32 type)
1971 {
1972         sljit_s32 dst_r, flags, mem_flags;
1973         sljit_ins cc;
1974
1975         CHECK_ERROR();
1976         CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type));
1977         ADJUST_LOCAL_OFFSET(dst, dstw);
1978         ADJUST_LOCAL_OFFSET(src, srcw);
1979
1980         if (dst == SLJIT_UNUSED)
1981                 return SLJIT_SUCCESS;
1982
1983         cc = get_cc(type & 0xff);
1984         dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1985
1986         if (GET_OPCODE(op) < SLJIT_ADD) {
1987                 FAIL_IF(push_inst(compiler, CSINC | (cc << 12) | RD(dst_r) | RN(TMP_ZERO) | RM(TMP_ZERO)));
1988                 if (dst_r != TMP_REG1)
1989                         return SLJIT_SUCCESS;
1990                 return emit_op_mem(compiler, (GET_OPCODE(op) == SLJIT_MOV ? WORD_SIZE : INT_SIZE) | STORE, TMP_REG1, dst, dstw);
1991         }
1992
1993         compiler->cache_arg = 0;
1994         compiler->cache_argw = 0;
1995         flags = GET_FLAGS(op) ? SET_FLAGS : 0;
1996         mem_flags = WORD_SIZE;
1997         if (op & SLJIT_I32_OP) {
1998                 flags |= INT_OP;
1999                 mem_flags = INT_SIZE;
2000         }
2001
2002         if (src & SLJIT_MEM) {
2003                 FAIL_IF(emit_op_mem2(compiler, mem_flags, TMP_REG1, src, srcw, dst, dstw));
2004                 src = TMP_REG1;
2005                 srcw = 0;
2006         } else if (src & SLJIT_IMM)
2007                 flags |= ARG1_IMM;
2008
2009         FAIL_IF(push_inst(compiler, CSINC | (cc << 12) | RD(TMP_REG2) | RN(TMP_ZERO) | RM(TMP_ZERO)));
2010         emit_op_imm(compiler, flags | GET_OPCODE(op), dst_r, src, TMP_REG2);
2011
2012         if (dst_r != TMP_REG1)
2013                 return SLJIT_SUCCESS;
2014         return emit_op_mem2(compiler, mem_flags | STORE, TMP_REG1, dst, dstw, 0, 0);
2015 }
2016
2017 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
2018 {
2019         struct sljit_const *const_;
2020         sljit_s32 dst_r;
2021
2022         CHECK_ERROR_PTR();
2023         CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
2024         ADJUST_LOCAL_OFFSET(dst, dstw);
2025
2026         const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
2027         PTR_FAIL_IF(!const_);
2028         set_const(const_, compiler);
2029
2030         dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1;
2031         PTR_FAIL_IF(emit_imm64_const(compiler, dst_r, init_value));
2032
2033         if (dst & SLJIT_MEM)
2034                 PTR_FAIL_IF(emit_op_mem(compiler, WORD_SIZE | STORE, dst_r, dst, dstw));
2035         return const_;
2036 }
2037
2038 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr)
2039 {
2040         sljit_ins* inst = (sljit_ins*)addr;
2041         modify_imm64_const(inst, new_addr);
2042         SLJIT_CACHE_FLUSH(inst, inst + 4);
2043 }
2044
2045 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant)
2046 {
2047         sljit_ins* inst = (sljit_ins*)addr;
2048         modify_imm64_const(inst, new_constant);
2049         SLJIT_CACHE_FLUSH(inst, inst + 4);
2050 }