chiark / gitweb /
pcre3 (1:8.30-5) unstable; urgency=low
[pcre3.git] / sljit / sljitNativeX86_common.c
1 /*
2  *    Stack-less Just-In-Time compiler
3  *
4  *    Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without modification, are
7  * permitted provided that the following conditions are met:
8  *
9  *   1. Redistributions of source code must retain the above copyright notice, this list of
10  *      conditions and the following disclaimer.
11  *
12  *   2. Redistributions in binary form must reproduce the above copyright notice, this list
13  *      of conditions and the following disclaimer in the documentation and/or other materials
14  *      provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19  * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26
27 SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name()
28 {
29         return "x86" SLJIT_CPUINFO;
30 }
31
32 /*
33    32b register indexes:
34      0 - EAX
35      1 - ECX
36      2 - EDX
37      3 - EBX
38      4 - none
39      5 - EBP
40      6 - ESI
41      7 - EDI
42 */
43
44 /*
45    64b register indexes:
46      0 - RAX
47      1 - RCX
48      2 - RDX
49      3 - RBX
50      4 - none
51      5 - RBP
52      6 - RSI
53      7 - RDI
54      8 - R8   - From now on REX prefix is required
55      9 - R9
56     10 - R10
57     11 - R11
58     12 - R12
59     13 - R13
60     14 - R14
61     15 - R15
62 */
63
64 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
65
66 /* Last register + 1. */
67 #define TMP_REGISTER    (SLJIT_NO_REGISTERS + 1)
68
69 static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 2] = {
70   0, 0, 2, 1, 0, 0, 3, 6, 7, 0, 0, 4, 5
71 };
72
73 #define CHECK_EXTRA_REGS(p, w, do) \
74         if (p >= SLJIT_TEMPORARY_EREG1 && p <= SLJIT_TEMPORARY_EREG2) { \
75                 w = compiler->temporaries_start + (p - SLJIT_TEMPORARY_EREG1) * sizeof(sljit_w); \
76                 p = SLJIT_MEM1(SLJIT_LOCALS_REG); \
77                 do; \
78         } \
79         else if (p >= SLJIT_SAVED_EREG1 && p <= SLJIT_SAVED_EREG2) { \
80                 w = compiler->saveds_start + (p - SLJIT_SAVED_EREG1) * sizeof(sljit_w); \
81                 p = SLJIT_MEM1(SLJIT_LOCALS_REG); \
82                 do; \
83         }
84
85 #else /* SLJIT_CONFIG_X86_32 */
86
87 /* Last register + 1. */
88 #define TMP_REGISTER    (SLJIT_NO_REGISTERS + 1)
89 #define TMP_REG2        (SLJIT_NO_REGISTERS + 2)
90 #define TMP_REG3        (SLJIT_NO_REGISTERS + 3)
91
92 /* Note: r12 & 0x7 == 0b100, which decoded as SIB byte present
93    Note: avoid to use r12 and r13 for memory addessing
94    therefore r12 is better for SAVED_EREG than SAVED_REG. */
95 #ifndef _WIN64
96 /* 1st passed in rdi, 2nd argument passed in rsi, 3rd in rdx. */
97 static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 4] = {
98   0, 0, 6, 1, 8, 11, 3, 15, 14, 13, 12, 4, 2, 7, 9
99 };
100 /* low-map. reg_map & 0x7. */
101 static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NO_REGISTERS + 4] = {
102   0, 0, 6, 1, 0, 3,  3, 7,  6,  5,  4,  4, 2, 7, 1
103 };
104 #else
105 /* 1st passed in rcx, 2nd argument passed in rdx, 3rd in r8. */
106 static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 4] = {
107   0, 0, 2, 1, 11, 13, 3, 6, 7, 14, 12, 15, 10, 8, 9
108 };
109 /* low-map. reg_map & 0x7. */
110 static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NO_REGISTERS + 4] = {
111   0, 0, 2, 1, 3,  5,  3, 6, 7,  6,  4,  7, 2,  0, 1
112 };
113 #endif
114
115 #define REX_W           0x48
116 #define REX_R           0x44
117 #define REX_X           0x42
118 #define REX_B           0x41
119 #define REX             0x40
120
121 typedef unsigned int sljit_uhw;
122 typedef int sljit_hw;
123
124 #define IS_HALFWORD(x)          ((x) <= 0x7fffffffll && (x) >= -0x80000000ll)
125 #define NOT_HALFWORD(x)         ((x) > 0x7fffffffll || (x) < -0x80000000ll)
126
127 #define CHECK_EXTRA_REGS(p, w, do)
128
129 #endif /* SLJIT_CONFIG_X86_32 */
130
131 #if (defined SLJIT_SSE2 && SLJIT_SSE2)
132 #define TMP_FREG        (SLJIT_FLOAT_REG4 + 1)
133 #endif
134
135 /* Size flags for emit_x86_instruction: */
136 #define EX86_BIN_INS            0x0010
137 #define EX86_SHIFT_INS          0x0020
138 #define EX86_REX                0x0040
139 #define EX86_NO_REXW            0x0080
140 #define EX86_BYTE_ARG           0x0100
141 #define EX86_HALF_ARG           0x0200
142 #define EX86_PREF_66            0x0400
143
144 #if (defined SLJIT_SSE2 && SLJIT_SSE2)
145 #define EX86_PREF_F2            0x0800
146 #define EX86_SSE2               0x1000
147 #endif
148
149 #define INC_SIZE(s)                     (*buf++ = (s), compiler->size += (s))
150 #define INC_CSIZE(s)                    (*code++ = (s), compiler->size += (s))
151
152 #define PUSH_REG(r)                     (*buf++ = (0x50 + (r)))
153 #define POP_REG(r)                      (*buf++ = (0x58 + (r)))
154 #define RET()                           (*buf++ = (0xc3))
155 #define RETN(n)                         (*buf++ = (0xc2), *buf++ = n, *buf++ = 0)
156 /* r32, r/m32 */
157 #define MOV_RM(mod, reg, rm)            (*buf++ = (0x8b), *buf++ = (mod) << 6 | (reg) << 3 | (rm))
158
159 static sljit_ub get_jump_code(int type)
160 {
161         switch (type) {
162         case SLJIT_C_EQUAL:
163         case SLJIT_C_FLOAT_EQUAL:
164                 return 0x84;
165
166         case SLJIT_C_NOT_EQUAL:
167         case SLJIT_C_FLOAT_NOT_EQUAL:
168                 return 0x85;
169
170         case SLJIT_C_LESS:
171         case SLJIT_C_FLOAT_LESS:
172                 return 0x82;
173
174         case SLJIT_C_GREATER_EQUAL:
175         case SLJIT_C_FLOAT_GREATER_EQUAL:
176                 return 0x83;
177
178         case SLJIT_C_GREATER:
179         case SLJIT_C_FLOAT_GREATER:
180                 return 0x87;
181
182         case SLJIT_C_LESS_EQUAL:
183         case SLJIT_C_FLOAT_LESS_EQUAL:
184                 return 0x86;
185
186         case SLJIT_C_SIG_LESS:
187                 return 0x8c;
188
189         case SLJIT_C_SIG_GREATER_EQUAL:
190                 return 0x8d;
191
192         case SLJIT_C_SIG_GREATER:
193                 return 0x8f;
194
195         case SLJIT_C_SIG_LESS_EQUAL:
196                 return 0x8e;
197
198         case SLJIT_C_OVERFLOW:
199         case SLJIT_C_MUL_OVERFLOW:
200                 return 0x80;
201
202         case SLJIT_C_NOT_OVERFLOW:
203         case SLJIT_C_MUL_NOT_OVERFLOW:
204                 return 0x81;
205
206         case SLJIT_C_FLOAT_NAN:
207                 return 0x8a;
208
209         case SLJIT_C_FLOAT_NOT_NAN:
210                 return 0x8b;
211         }
212         return 0;
213 }
214
215 static sljit_ub* generate_far_jump_code(struct sljit_jump *jump, sljit_ub *code_ptr, int type);
216
217 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
218 static sljit_ub* generate_fixed_jump(sljit_ub *code_ptr, sljit_w addr, int type);
219 #endif
220
221 static sljit_ub* generate_near_jump_code(struct sljit_jump *jump, sljit_ub *code_ptr, sljit_ub *code, int type)
222 {
223         int short_jump;
224         sljit_uw label_addr;
225
226         if (jump->flags & JUMP_LABEL)
227                 label_addr = (sljit_uw)(code + jump->u.label->size);
228         else
229                 label_addr = jump->u.target;
230         short_jump = (sljit_w)(label_addr - (jump->addr + 2)) >= -128 && (sljit_w)(label_addr - (jump->addr + 2)) <= 127;
231
232 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
233         if ((sljit_w)(label_addr - (jump->addr + 1)) > 0x7fffffffll || (sljit_w)(label_addr - (jump->addr + 1)) < -0x80000000ll)
234                 return generate_far_jump_code(jump, code_ptr, type);
235 #endif
236
237         if (type == SLJIT_JUMP) {
238                 if (short_jump)
239                         *code_ptr++ = 0xeb;
240                 else
241                         *code_ptr++ = 0xe9;
242                 jump->addr++;
243         }
244         else if (type >= SLJIT_FAST_CALL) {
245                 short_jump = 0;
246                 *code_ptr++ = 0xe8;
247                 jump->addr++;
248         }
249         else if (short_jump) {
250                 *code_ptr++ = get_jump_code(type) - 0x10;
251                 jump->addr++;
252         }
253         else {
254                 *code_ptr++ = 0x0f;
255                 *code_ptr++ = get_jump_code(type);
256                 jump->addr += 2;
257         }
258
259         if (short_jump) {
260                 jump->flags |= PATCH_MB;
261                 code_ptr += sizeof(sljit_b);
262         } else {
263                 jump->flags |= PATCH_MW;
264 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
265                 code_ptr += sizeof(sljit_w);
266 #else
267                 code_ptr += sizeof(sljit_hw);
268 #endif
269         }
270
271         return code_ptr;
272 }
273
274 SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
275 {
276         struct sljit_memory_fragment *buf;
277         sljit_ub *code;
278         sljit_ub *code_ptr;
279         sljit_ub *buf_ptr;
280         sljit_ub *buf_end;
281         sljit_ub len;
282
283         struct sljit_label *label;
284         struct sljit_jump *jump;
285         struct sljit_const *const_;
286
287         CHECK_ERROR_PTR();
288         check_sljit_generate_code(compiler);
289         reverse_buf(compiler);
290
291         /* Second code generation pass. */
292         code = (sljit_ub*)SLJIT_MALLOC_EXEC(compiler->size);
293         PTR_FAIL_WITH_EXEC_IF(code);
294         buf = compiler->buf;
295
296         code_ptr = code;
297         label = compiler->labels;
298         jump = compiler->jumps;
299         const_ = compiler->consts;
300         do {
301                 buf_ptr = buf->memory;
302                 buf_end = buf_ptr + buf->used_size;
303                 do {
304                         len = *buf_ptr++;
305                         if (len > 0) {
306                                 /* The code is already generated. */
307                                 SLJIT_MEMMOVE(code_ptr, buf_ptr, len);
308                                 code_ptr += len;
309                                 buf_ptr += len;
310                         }
311                         else {
312                                 if (*buf_ptr >= 4) {
313                                         jump->addr = (sljit_uw)code_ptr;
314                                         if (!(jump->flags & SLJIT_REWRITABLE_JUMP))
315                                                 code_ptr = generate_near_jump_code(jump, code_ptr, code, *buf_ptr - 4);
316                                         else
317                                                 code_ptr = generate_far_jump_code(jump, code_ptr, *buf_ptr - 4);
318                                         jump = jump->next;
319                                 }
320                                 else if (*buf_ptr == 0) {
321                                         label->addr = (sljit_uw)code_ptr;
322                                         label->size = code_ptr - code;
323                                         label = label->next;
324                                 }
325                                 else if (*buf_ptr == 1) {
326                                         const_->addr = ((sljit_uw)code_ptr) - sizeof(sljit_w);
327                                         const_ = const_->next;
328                                 }
329                                 else {
330 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
331                                         *code_ptr++ = (*buf_ptr == 2) ? 0xe8 /* call */ : 0xe9 /* jmp */;
332                                         buf_ptr++;
333                                         *(sljit_w*)code_ptr = *(sljit_w*)buf_ptr - ((sljit_w)code_ptr + sizeof(sljit_w));
334                                         code_ptr += sizeof(sljit_w);
335                                         buf_ptr += sizeof(sljit_w) - 1;
336 #else
337                                         code_ptr = generate_fixed_jump(code_ptr, *(sljit_w*)(buf_ptr + 1), *buf_ptr);
338                                         buf_ptr += sizeof(sljit_w);
339 #endif
340                                 }
341                                 buf_ptr++;
342                         }
343                 } while (buf_ptr < buf_end);
344                 SLJIT_ASSERT(buf_ptr == buf_end);
345                 buf = buf->next;
346         } while (buf);
347
348         SLJIT_ASSERT(!label);
349         SLJIT_ASSERT(!jump);
350         SLJIT_ASSERT(!const_);
351
352         jump = compiler->jumps;
353         while (jump) {
354                 if (jump->flags & PATCH_MB) {
355                         SLJIT_ASSERT((sljit_w)(jump->u.label->addr - (jump->addr + sizeof(sljit_b))) >= -128 && (sljit_w)(jump->u.label->addr - (jump->addr + sizeof(sljit_b))) <= 127);
356                         *(sljit_ub*)jump->addr = (sljit_ub)(jump->u.label->addr - (jump->addr + sizeof(sljit_b)));
357                 } else if (jump->flags & PATCH_MW) {
358                         if (jump->flags & JUMP_LABEL) {
359 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
360                                 *(sljit_w*)jump->addr = (sljit_w)(jump->u.label->addr - (jump->addr + sizeof(sljit_w)));
361 #else
362                                 SLJIT_ASSERT((sljit_w)(jump->u.label->addr - (jump->addr + sizeof(sljit_hw))) >= -0x80000000ll && (sljit_w)(jump->u.label->addr - (jump->addr + sizeof(sljit_hw))) <= 0x7fffffffll);
363                                 *(sljit_hw*)jump->addr = (sljit_hw)(jump->u.label->addr - (jump->addr + sizeof(sljit_hw)));
364 #endif
365                         }
366                         else {
367 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
368                                 *(sljit_w*)jump->addr = (sljit_w)(jump->u.target - (jump->addr + sizeof(sljit_w)));
369 #else
370                                 SLJIT_ASSERT((sljit_w)(jump->u.target - (jump->addr + sizeof(sljit_hw))) >= -0x80000000ll && (sljit_w)(jump->u.target - (jump->addr + sizeof(sljit_hw))) <= 0x7fffffffll);
371                                 *(sljit_hw*)jump->addr = (sljit_hw)(jump->u.target - (jump->addr + sizeof(sljit_hw)));
372 #endif
373                         }
374                 }
375 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
376                 else if (jump->flags & PATCH_MD)
377                         *(sljit_w*)jump->addr = jump->u.label->addr;
378 #endif
379
380                 jump = jump->next;
381         }
382
383         /* Maybe we waste some space because of short jumps. */
384         SLJIT_ASSERT(code_ptr <= code + compiler->size);
385         compiler->error = SLJIT_ERR_COMPILED;
386         compiler->executable_size = compiler->size;
387         return (void*)code;
388 }
389
390 /* --------------------------------------------------------------------- */
391 /*  Operators                                                            */
392 /* --------------------------------------------------------------------- */
393
394 static int emit_cum_binary(struct sljit_compiler *compiler,
395         sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
396         int dst, sljit_w dstw,
397         int src1, sljit_w src1w,
398         int src2, sljit_w src2w);
399
400 static int emit_non_cum_binary(struct sljit_compiler *compiler,
401         sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
402         int dst, sljit_w dstw,
403         int src1, sljit_w src1w,
404         int src2, sljit_w src2w);
405
406 static int emit_mov(struct sljit_compiler *compiler,
407         int dst, sljit_w dstw,
408         int src, sljit_w srcw);
409
410 static SLJIT_INLINE int emit_save_flags(struct sljit_compiler *compiler)
411 {
412         sljit_ub *buf;
413
414 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
415         buf = (sljit_ub*)ensure_buf(compiler, 1 + 5);
416         FAIL_IF(!buf);
417         INC_SIZE(5);
418         *buf++ = 0x9c; /* pushfd */
419 #else
420         buf = (sljit_ub*)ensure_buf(compiler, 1 + 6);
421         FAIL_IF(!buf);
422         INC_SIZE(6);
423         *buf++ = 0x9c; /* pushfq */
424         *buf++ = 0x48;
425 #endif
426         *buf++ = 0x8d; /* lea esp/rsp, [esp/rsp + sizeof(sljit_w)] */
427         *buf++ = 0x64;
428         *buf++ = 0x24;
429         *buf++ = sizeof(sljit_w);
430         compiler->flags_saved = 1;
431         return SLJIT_SUCCESS;
432 }
433
434 static SLJIT_INLINE int emit_restore_flags(struct sljit_compiler *compiler, int keep_flags)
435 {
436         sljit_ub *buf;
437
438 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
439         buf = (sljit_ub*)ensure_buf(compiler, 1 + 5);
440         FAIL_IF(!buf);
441         INC_SIZE(5);
442 #else
443         buf = (sljit_ub*)ensure_buf(compiler, 1 + 6);
444         FAIL_IF(!buf);
445         INC_SIZE(6);
446         *buf++ = 0x48;
447 #endif
448         *buf++ = 0x8d; /* lea esp/rsp, [esp/rsp - sizeof(sljit_w)] */
449         *buf++ = 0x64;
450         *buf++ = 0x24;
451         *buf++ = (sljit_ub)-(int)sizeof(sljit_w);
452         *buf++ = 0x9d; /* popfd / popfq */
453         compiler->flags_saved = keep_flags;
454         return SLJIT_SUCCESS;
455 }
456
457 #ifdef _WIN32
458 #include <malloc.h>
459
460 static void SLJIT_CALL sljit_touch_stack(sljit_w local_size)
461 {
462         /* Workaround for calling _chkstk. */
463         alloca(local_size);
464 }
465 #endif
466
467 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
468 #include "sljitNativeX86_32.c"
469 #else
470 #include "sljitNativeX86_64.c"
471 #endif
472
473 static int emit_mov(struct sljit_compiler *compiler,
474         int dst, sljit_w dstw,
475         int src, sljit_w srcw)
476 {
477         sljit_ub* code;
478
479         if (dst == SLJIT_UNUSED) {
480                 /* No destination, doesn't need to setup flags. */
481                 if (src & SLJIT_MEM) {
482                         code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src, srcw);
483                         FAIL_IF(!code);
484                         *code = 0x8b;
485                 }
486                 return SLJIT_SUCCESS;
487         }
488         if (src >= SLJIT_TEMPORARY_REG1 && src <= TMP_REGISTER) {
489                 code = emit_x86_instruction(compiler, 1, src, 0, dst, dstw);
490                 FAIL_IF(!code);
491                 *code = 0x89;
492                 return SLJIT_SUCCESS;
493         }
494         if (src & SLJIT_IMM) {
495                 if (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) {
496 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
497                         return emit_do_imm(compiler, 0xb8 + reg_map[dst], srcw);
498 #else
499                         if (!compiler->mode32) {
500                                 if (NOT_HALFWORD(srcw))
501                                         return emit_load_imm64(compiler, dst, srcw);
502                         }
503                         else
504                                 return emit_do_imm32(compiler, (reg_map[dst] >= 8) ? REX_B : 0, 0xb8 + reg_lmap[dst], srcw);
505 #endif
506                 }
507 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
508                 if (!compiler->mode32 && NOT_HALFWORD(srcw)) {
509                         FAIL_IF(emit_load_imm64(compiler, TMP_REG2, srcw));
510                         code = emit_x86_instruction(compiler, 1, TMP_REG2, 0, dst, dstw);
511                         FAIL_IF(!code);
512                         *code = 0x89;
513                         return SLJIT_SUCCESS;
514                 }
515 #endif
516                 code = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, dstw);
517                 FAIL_IF(!code);
518                 *code = 0xc7;
519                 return SLJIT_SUCCESS;
520         }
521         if (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) {
522                 code = emit_x86_instruction(compiler, 1, dst, 0, src, srcw);
523                 FAIL_IF(!code);
524                 *code = 0x8b;
525                 return SLJIT_SUCCESS;
526         }
527
528         /* Memory to memory move. Requires two instruction. */
529         code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src, srcw);
530         FAIL_IF(!code);
531         *code = 0x8b;
532         code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, dst, dstw);
533         FAIL_IF(!code);
534         *code = 0x89;
535         return SLJIT_SUCCESS;
536 }
537
538 #define EMIT_MOV(compiler, dst, dstw, src, srcw) \
539         FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));
540
541 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op0(struct sljit_compiler *compiler, int op)
542 {
543         sljit_ub *buf;
544 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
545         int size;
546 #endif
547
548         CHECK_ERROR();
549         check_sljit_emit_op0(compiler, op);
550
551         switch (GET_OPCODE(op)) {
552         case SLJIT_BREAKPOINT:
553                 buf = (sljit_ub*)ensure_buf(compiler, 1 + 1);
554                 FAIL_IF(!buf);
555                 INC_SIZE(1);
556                 *buf = 0xcc;
557                 break;
558         case SLJIT_NOP:
559                 buf = (sljit_ub*)ensure_buf(compiler, 1 + 1);
560                 FAIL_IF(!buf);
561                 INC_SIZE(1);
562                 *buf = 0x90;
563                 break;
564         case SLJIT_UMUL:
565         case SLJIT_SMUL:
566         case SLJIT_UDIV:
567         case SLJIT_SDIV:
568                 compiler->flags_saved = 0;
569 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
570 #ifdef _WIN64
571                 SLJIT_COMPILE_ASSERT(
572                         reg_map[SLJIT_TEMPORARY_REG1] == 0
573                         && reg_map[SLJIT_TEMPORARY_REG2] == 2
574                         && reg_map[TMP_REGISTER] > 7,
575                         invalid_register_assignment_for_div_mul);
576 #else
577                 SLJIT_COMPILE_ASSERT(
578                         reg_map[SLJIT_TEMPORARY_REG1] == 0
579                         && reg_map[SLJIT_TEMPORARY_REG2] < 7
580                         && reg_map[TMP_REGISTER] == 2,
581                         invalid_register_assignment_for_div_mul);
582 #endif
583                 compiler->mode32 = op & SLJIT_INT_OP;
584 #endif
585
586                 op = GET_OPCODE(op);
587                 if (op == SLJIT_UDIV) {
588 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
589                         EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_TEMPORARY_REG2, 0);
590                         buf = emit_x86_instruction(compiler, 1, SLJIT_TEMPORARY_REG2, 0, SLJIT_TEMPORARY_REG2, 0);
591 #else
592                         buf = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, TMP_REGISTER, 0);
593 #endif
594                         FAIL_IF(!buf);
595                         *buf = 0x33;
596                 }
597
598                 if (op == SLJIT_SDIV) {
599 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
600                         EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_TEMPORARY_REG2, 0);
601 #endif
602
603                         /* CDQ instruction */
604 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
605                         buf = (sljit_ub*)ensure_buf(compiler, 1 + 1);
606                         FAIL_IF(!buf);
607                         INC_SIZE(1);
608                         *buf = 0x99;
609 #else
610                         if (compiler->mode32) {
611                                 buf = (sljit_ub*)ensure_buf(compiler, 1 + 1);
612                                 FAIL_IF(!buf);
613                                 INC_SIZE(1);
614                                 *buf = 0x99;
615                         } else {
616                                 buf = (sljit_ub*)ensure_buf(compiler, 1 + 2);
617                                 FAIL_IF(!buf);
618                                 INC_SIZE(2);
619                                 *buf++ = REX_W;
620                                 *buf = 0x99;
621                         }
622 #endif
623                 }
624
625 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
626                 buf = (sljit_ub*)ensure_buf(compiler, 1 + 2);
627                 FAIL_IF(!buf);
628                 INC_SIZE(2);
629                 *buf++ = 0xf7;
630                 *buf = 0xc0 | ((op >= SLJIT_UDIV) ? reg_map[TMP_REGISTER] : reg_map[SLJIT_TEMPORARY_REG2]);
631 #else
632 #ifdef _WIN64
633                 size = (!compiler->mode32 || op >= SLJIT_UDIV) ? 3 : 2;
634 #else
635                 size = (!compiler->mode32) ? 3 : 2;
636 #endif
637                 buf = (sljit_ub*)ensure_buf(compiler, 1 + size);
638                 FAIL_IF(!buf);
639                 INC_SIZE(size);
640 #ifdef _WIN64
641                 if (!compiler->mode32)
642                         *buf++ = REX_W | ((op >= SLJIT_UDIV) ? REX_B : 0);
643                 else if (op >= SLJIT_UDIV)
644                         *buf++ = REX_B;
645                 *buf++ = 0xf7;
646                 *buf = 0xc0 | ((op >= SLJIT_UDIV) ? reg_lmap[TMP_REGISTER] : reg_lmap[SLJIT_TEMPORARY_REG2]);
647 #else
648                 if (!compiler->mode32)
649                         *buf++ = REX_W;
650                 *buf++ = 0xf7;
651                 *buf = 0xc0 | reg_map[SLJIT_TEMPORARY_REG2];
652 #endif
653 #endif
654                 switch (op) {
655                 case SLJIT_UMUL:
656                         *buf |= 4 << 3;
657                         break;
658                 case SLJIT_SMUL:
659                         *buf |= 5 << 3;
660                         break;
661                 case SLJIT_UDIV:
662                         *buf |= 6 << 3;
663                         break;
664                 case SLJIT_SDIV:
665                         *buf |= 7 << 3;
666                         break;
667                 }
668 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && !defined(_WIN64)
669                 EMIT_MOV(compiler, SLJIT_TEMPORARY_REG2, 0, TMP_REGISTER, 0);
670 #endif
671                 break;
672         }
673
674         return SLJIT_SUCCESS;
675 }
676
677 #define ENCODE_PREFIX(prefix) \
678         do { \
679                 code = (sljit_ub*)ensure_buf(compiler, 1 + 1); \
680                 FAIL_IF(!code); \
681                 INC_CSIZE(1); \
682                 *code = (prefix); \
683         } while (0)
684
685 static int emit_mov_byte(struct sljit_compiler *compiler, int sign,
686         int dst, sljit_w dstw,
687         int src, sljit_w srcw)
688 {
689         sljit_ub* code;
690         int dst_r;
691 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
692         int work_r;
693 #endif
694
695 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
696         compiler->mode32 = 0;
697 #endif
698
699         if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM))
700                 return SLJIT_SUCCESS; /* Empty instruction. */
701
702         if (src & SLJIT_IMM) {
703                 if (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) {
704 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
705                         return emit_do_imm(compiler, 0xb8 + reg_map[dst], srcw);
706 #else
707                         return emit_load_imm64(compiler, dst, srcw);
708 #endif
709                 }
710                 code = emit_x86_instruction(compiler, 1 | EX86_BYTE_ARG | EX86_NO_REXW, SLJIT_IMM, srcw, dst, dstw);
711                 FAIL_IF(!code);
712                 *code = 0xc6;
713                 return SLJIT_SUCCESS;
714         }
715
716         dst_r = (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) ? dst : TMP_REGISTER;
717
718         if ((dst & SLJIT_MEM) && src >= SLJIT_TEMPORARY_REG1 && src <= SLJIT_NO_REGISTERS) {
719 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
720                 if (reg_map[src] >= 4) {
721                         SLJIT_ASSERT(dst_r == TMP_REGISTER);
722                         EMIT_MOV(compiler, TMP_REGISTER, 0, src, 0);
723                 } else
724                         dst_r = src;
725 #else
726                 dst_r = src;
727 #endif
728         }
729 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
730         else if (src >= SLJIT_TEMPORARY_REG1 && src <= SLJIT_NO_REGISTERS && reg_map[src] >= 4) {
731                 /* src, dst are registers. */
732                 SLJIT_ASSERT(dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER);
733                 if (reg_map[dst] < 4) {
734                         if (dst != src)
735                                 EMIT_MOV(compiler, dst, 0, src, 0);
736                         code = emit_x86_instruction(compiler, 2, dst, 0, dst, 0);
737                         FAIL_IF(!code);
738                         *code++ = 0x0f;
739                         *code = sign ? 0xbe : 0xb6;
740                 }
741                 else {
742                         if (dst != src)
743                                 EMIT_MOV(compiler, dst, 0, src, 0);
744                         if (sign) {
745                                 /* shl reg, 24 */
746                                 code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0);
747                                 FAIL_IF(!code);
748                                 *code |= 0x4 << 3;
749                                 code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0);
750                                 FAIL_IF(!code);
751                                 /* shr/sar reg, 24 */
752                                 *code |= 0x7 << 3;
753                         }
754                         else {
755                                 /* and dst, 0xff */
756                                 code = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 255, dst, 0);
757                                 FAIL_IF(!code);
758                                 *(code + 1) |= 0x4 << 3;
759                         }
760                 }
761                 return SLJIT_SUCCESS;
762         }
763 #endif
764         else {
765                 /* src can be memory addr or reg_map[src] < 4 on x86_32 architectures. */
766                 code = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
767                 FAIL_IF(!code);
768                 *code++ = 0x0f;
769                 *code = sign ? 0xbe : 0xb6;
770         }
771
772         if (dst & SLJIT_MEM) {
773 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
774                 if (dst_r == TMP_REGISTER) {
775                         /* Find a non-used register, whose reg_map[src] < 4. */
776                         if ((dst & 0xf) == SLJIT_TEMPORARY_REG1) {
777                                 if ((dst & 0xf0) == (SLJIT_TEMPORARY_REG2 << 4))
778                                         work_r = SLJIT_TEMPORARY_REG3;
779                                 else
780                                         work_r = SLJIT_TEMPORARY_REG2;
781                         }
782                         else {
783                                 if ((dst & 0xf0) != (SLJIT_TEMPORARY_REG1 << 4))
784                                         work_r = SLJIT_TEMPORARY_REG1;
785                                 else if ((dst & 0xf) == SLJIT_TEMPORARY_REG2)
786                                         work_r = SLJIT_TEMPORARY_REG3;
787                                 else
788                                         work_r = SLJIT_TEMPORARY_REG2;
789                         }
790
791                         if (work_r == SLJIT_TEMPORARY_REG1) {
792                                 ENCODE_PREFIX(0x90 + reg_map[TMP_REGISTER]);
793                         }
794                         else {
795                                 code = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0);
796                                 FAIL_IF(!code);
797                                 *code = 0x87;
798                         }
799
800                         code = emit_x86_instruction(compiler, 1, work_r, 0, dst, dstw);
801                         FAIL_IF(!code);
802                         *code = 0x88;
803
804                         if (work_r == SLJIT_TEMPORARY_REG1) {
805                                 ENCODE_PREFIX(0x90 + reg_map[TMP_REGISTER]);
806                         }
807                         else {
808                                 code = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0);
809                                 FAIL_IF(!code);
810                                 *code = 0x87;
811                         }
812                 }
813                 else {
814                         code = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
815                         FAIL_IF(!code);
816                         *code = 0x88;
817                 }
818 #else
819                 code = emit_x86_instruction(compiler, 1 | EX86_REX | EX86_NO_REXW, dst_r, 0, dst, dstw);
820                 FAIL_IF(!code);
821                 *code = 0x88;
822 #endif
823         }
824
825         return SLJIT_SUCCESS;
826 }
827
828 static int emit_mov_half(struct sljit_compiler *compiler, int sign,
829         int dst, sljit_w dstw,
830         int src, sljit_w srcw)
831 {
832         sljit_ub* code;
833         int dst_r;
834
835 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
836         compiler->mode32 = 0;
837 #endif
838
839         if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM))
840                 return SLJIT_SUCCESS; /* Empty instruction. */
841
842         if (src & SLJIT_IMM) {
843                 if (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) {
844 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
845                         return emit_do_imm(compiler, 0xb8 + reg_map[dst], srcw);
846 #else
847                         return emit_load_imm64(compiler, dst, srcw);
848 #endif
849                 }
850                 code = emit_x86_instruction(compiler, 1 | EX86_HALF_ARG | EX86_NO_REXW | EX86_PREF_66, SLJIT_IMM, srcw, dst, dstw);
851                 FAIL_IF(!code);
852                 *code = 0xc7;
853                 return SLJIT_SUCCESS;
854         }
855
856         dst_r = (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) ? dst : TMP_REGISTER;
857
858         if ((dst & SLJIT_MEM) && (src >= SLJIT_TEMPORARY_REG1 && src <= SLJIT_NO_REGISTERS))
859                 dst_r = src;
860         else {
861                 code = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
862                 FAIL_IF(!code);
863                 *code++ = 0x0f;
864                 *code = sign ? 0xbf : 0xb7;
865         }
866
867         if (dst & SLJIT_MEM) {
868                 code = emit_x86_instruction(compiler, 1 | EX86_NO_REXW | EX86_PREF_66, dst_r, 0, dst, dstw);
869                 FAIL_IF(!code);
870                 *code = 0x89;
871         }
872
873         return SLJIT_SUCCESS;
874 }
875
876 static int emit_unary(struct sljit_compiler *compiler, int un_index,
877         int dst, sljit_w dstw,
878         int src, sljit_w srcw)
879 {
880         sljit_ub* code;
881
882         if (dst == SLJIT_UNUSED) {
883                 EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw);
884                 code = emit_x86_instruction(compiler, 1, 0, 0, TMP_REGISTER, 0);
885                 FAIL_IF(!code);
886                 *code++ = 0xf7;
887                 *code |= (un_index) << 3;
888                 return SLJIT_SUCCESS;
889         }
890         if (dst == src && dstw == srcw) {
891                 /* Same input and output */
892                 code = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
893                 FAIL_IF(!code);
894                 *code++ = 0xf7;
895                 *code |= (un_index) << 3;
896                 return SLJIT_SUCCESS;
897         }
898         if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) {
899                 EMIT_MOV(compiler, dst, 0, src, srcw);
900                 code = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
901                 FAIL_IF(!code);
902                 *code++ = 0xf7;
903                 *code |= (un_index) << 3;
904                 return SLJIT_SUCCESS;
905         }
906         EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw);
907         code = emit_x86_instruction(compiler, 1, 0, 0, TMP_REGISTER, 0);
908         FAIL_IF(!code);
909         *code++ = 0xf7;
910         *code |= (un_index) << 3;
911         EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
912         return SLJIT_SUCCESS;
913 }
914
915 static int emit_not_with_flags(struct sljit_compiler *compiler,
916         int dst, sljit_w dstw,
917         int src, sljit_w srcw)
918 {
919         sljit_ub* code;
920
921         if (dst == SLJIT_UNUSED) {
922                 EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw);
923                 code = emit_x86_instruction(compiler, 1, 0, 0, TMP_REGISTER, 0);
924                 FAIL_IF(!code);
925                 *code++ = 0xf7;
926                 *code |= 0x2 << 3;
927                 code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, TMP_REGISTER, 0);
928                 FAIL_IF(!code);
929                 *code = 0x0b;
930                 return SLJIT_SUCCESS;
931         }
932         if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) {
933                 EMIT_MOV(compiler, dst, 0, src, srcw);
934                 code = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
935                 FAIL_IF(!code);
936                 *code++ = 0xf7;
937                 *code |= 0x2 << 3;
938                 code = emit_x86_instruction(compiler, 1, dst, 0, dst, 0);
939                 FAIL_IF(!code);
940                 *code = 0x0b;
941                 return SLJIT_SUCCESS;
942         }
943         EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw);
944         code = emit_x86_instruction(compiler, 1, 0, 0, TMP_REGISTER, 0);
945         FAIL_IF(!code);
946         *code++ = 0xf7;
947         *code |= 0x2 << 3;
948         code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, TMP_REGISTER, 0);
949         FAIL_IF(!code);
950         *code = 0x0b;
951         EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
952         return SLJIT_SUCCESS;
953 }
954
955 static int emit_clz(struct sljit_compiler *compiler, int op,
956         int dst, sljit_w dstw,
957         int src, sljit_w srcw)
958 {
959         sljit_ub* code;
960         int dst_r;
961
962         SLJIT_UNUSED_ARG(op);
963         if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) {
964                 /* Just set the zero flag. */
965                 EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw);
966                 code = emit_x86_instruction(compiler, 1, 0, 0, TMP_REGISTER, 0);
967                 FAIL_IF(!code);
968                 *code++ = 0xf7;
969                 *code |= 0x2 << 3;
970 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
971                 code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 31, TMP_REGISTER, 0);
972 #else
973                 code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, !(op & SLJIT_INT_OP) ? 63 : 31, TMP_REGISTER, 0);
974 #endif
975                 FAIL_IF(!code);
976                 *code |= 0x5 << 3;
977                 return SLJIT_SUCCESS;
978         }
979
980         if (SLJIT_UNLIKELY(src & SLJIT_IMM)) {
981                 EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw);
982                 src = TMP_REGISTER;
983                 srcw = 0;
984         }
985
986         code = emit_x86_instruction(compiler, 2, TMP_REGISTER, 0, src, srcw);
987         FAIL_IF(!code);
988         *code++ = 0x0f;
989         *code = 0xbd;
990
991 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
992         if (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER)
993                 dst_r = dst;
994         else {
995                 /* Find an unused temporary register. */
996                 if ((dst & 0xf) != SLJIT_TEMPORARY_REG1 && (dst & 0xf0) != (SLJIT_TEMPORARY_REG1 << 4))
997                         dst_r = SLJIT_TEMPORARY_REG1;
998                 else if ((dst & 0xf) != SLJIT_TEMPORARY_REG2 && (dst & 0xf0) != (SLJIT_TEMPORARY_REG2 << 4))
999                         dst_r = SLJIT_TEMPORARY_REG2;
1000                 else
1001                         dst_r = SLJIT_TEMPORARY_REG3;
1002                 EMIT_MOV(compiler, dst, dstw, dst_r, 0);
1003         }
1004         EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, 32 + 31);
1005 #else
1006         dst_r = (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) ? dst : TMP_REG2;
1007         compiler->mode32 = 0;
1008         EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, !(op & SLJIT_INT_OP) ? 64 + 63 : 32 + 31);
1009         compiler->mode32 = op & SLJIT_INT_OP;
1010 #endif
1011
1012         code = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REGISTER, 0);
1013         FAIL_IF(!code);
1014         *code++ = 0x0f;
1015         *code = 0x45;
1016
1017 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1018         code = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 31, dst_r, 0);
1019 #else
1020         code = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, !(op & SLJIT_INT_OP) ? 63 : 31, dst_r, 0);
1021 #endif
1022         FAIL_IF(!code);
1023         *(code + 1) |= 0x6 << 3;
1024
1025 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1026         if (dst & SLJIT_MEM) {
1027                 code = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
1028                 FAIL_IF(!code);
1029                 *code = 0x87;
1030         }
1031 #else
1032         if (dst & SLJIT_MEM)
1033                 EMIT_MOV(compiler, dst, dstw, TMP_REG2, 0);
1034 #endif
1035         return SLJIT_SUCCESS;
1036 }
1037
1038 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op1(struct sljit_compiler *compiler, int op,
1039         int dst, sljit_w dstw,
1040         int src, sljit_w srcw)
1041 {
1042         sljit_ub* code;
1043         int update = 0;
1044 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1045         int dst_is_ereg = 0;
1046         int src_is_ereg = 0;
1047 #else
1048         #define src_is_ereg 0
1049 #endif
1050
1051         CHECK_ERROR();
1052         check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw);
1053
1054 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1055         compiler->mode32 = op & SLJIT_INT_OP;
1056 #endif
1057         CHECK_EXTRA_REGS(dst, dstw, dst_is_ereg = 1);
1058         CHECK_EXTRA_REGS(src, srcw, src_is_ereg = 1);
1059
1060         if (GET_OPCODE(op) >= SLJIT_MOV && GET_OPCODE(op) <= SLJIT_MOVU_SI) {
1061                 op = GET_OPCODE(op);
1062 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1063                 compiler->mode32 = 0;
1064 #endif
1065
1066                 SLJIT_COMPILE_ASSERT(SLJIT_MOV + 7 == SLJIT_MOVU, movu_offset);
1067                 if (op >= SLJIT_MOVU) {
1068                         update = 1;
1069                         op -= 7;
1070                 }
1071
1072                 if (src & SLJIT_IMM) {
1073                         switch (op) {
1074                         case SLJIT_MOV_UB:
1075                                 srcw = (unsigned char)srcw;
1076                                 break;
1077                         case SLJIT_MOV_SB:
1078                                 srcw = (signed char)srcw;
1079                                 break;
1080                         case SLJIT_MOV_UH:
1081                                 srcw = (unsigned short)srcw;
1082                                 break;
1083                         case SLJIT_MOV_SH:
1084                                 srcw = (signed short)srcw;
1085                                 break;
1086 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1087                         case SLJIT_MOV_UI:
1088                                 srcw = (unsigned int)srcw;
1089                                 break;
1090                         case SLJIT_MOV_SI:
1091                                 srcw = (signed int)srcw;
1092                                 break;
1093 #endif
1094                         }
1095 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1096                         if (SLJIT_UNLIKELY(dst_is_ereg))
1097                                 return emit_mov(compiler, dst, dstw, src, srcw);
1098 #endif
1099                 }
1100
1101                 if (SLJIT_UNLIKELY(update) && (src & SLJIT_MEM) && !src_is_ereg && (src & 0xf) && (srcw != 0 || (src & 0xf0) != 0)) {
1102                         code = emit_x86_instruction(compiler, 1, src & 0xf, 0, src, srcw);
1103                         FAIL_IF(!code);
1104                         *code = 0x8d;
1105                         src &= SLJIT_MEM | 0xf;
1106                         srcw = 0;
1107                 }
1108
1109 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1110                 if (SLJIT_UNLIKELY(dst_is_ereg) && (!(op == SLJIT_MOV || op == SLJIT_MOV_UI || op == SLJIT_MOV_SI) || (src & SLJIT_MEM))) {
1111                         SLJIT_ASSERT(dst == SLJIT_MEM1(SLJIT_LOCALS_REG));
1112                         dst = TMP_REGISTER;
1113                 }
1114 #endif
1115
1116                 switch (op) {
1117                 case SLJIT_MOV:
1118 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1119                 case SLJIT_MOV_UI:
1120                 case SLJIT_MOV_SI:
1121 #endif
1122                         FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));
1123                         break;
1124                 case SLJIT_MOV_UB:
1125                         FAIL_IF(emit_mov_byte(compiler, 0, dst, dstw, src, (src & SLJIT_IMM) ? (unsigned char)srcw : srcw));
1126                         break;
1127                 case SLJIT_MOV_SB:
1128                         FAIL_IF(emit_mov_byte(compiler, 1, dst, dstw, src, (src & SLJIT_IMM) ? (signed char)srcw : srcw));
1129                         break;
1130                 case SLJIT_MOV_UH:
1131                         FAIL_IF(emit_mov_half(compiler, 0, dst, dstw, src, (src & SLJIT_IMM) ? (unsigned short)srcw : srcw));
1132                         break;
1133                 case SLJIT_MOV_SH:
1134                         FAIL_IF(emit_mov_half(compiler, 1, dst, dstw, src, (src & SLJIT_IMM) ? (signed short)srcw : srcw));
1135                         break;
1136 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1137                 case SLJIT_MOV_UI:
1138                         FAIL_IF(emit_mov_int(compiler, 0, dst, dstw, src, (src & SLJIT_IMM) ? (unsigned int)srcw : srcw));
1139                         break;
1140                 case SLJIT_MOV_SI:
1141                         FAIL_IF(emit_mov_int(compiler, 1, dst, dstw, src, (src & SLJIT_IMM) ? (signed int)srcw : srcw));
1142                         break;
1143 #endif
1144                 }
1145
1146 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1147                 if (SLJIT_UNLIKELY(dst_is_ereg) && dst == TMP_REGISTER)
1148                         return emit_mov(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), dstw, TMP_REGISTER, 0);
1149 #endif
1150
1151                 if (SLJIT_UNLIKELY(update) && (dst & SLJIT_MEM) && (dst & 0xf) && (dstw != 0 || (dst & 0xf0) != 0)) {
1152                         code = emit_x86_instruction(compiler, 1, dst & 0xf, 0, dst, dstw);
1153                         FAIL_IF(!code);
1154                         *code = 0x8d;
1155                 }
1156                 return SLJIT_SUCCESS;
1157         }
1158
1159         if (SLJIT_UNLIKELY(GET_FLAGS(op)))
1160                 compiler->flags_saved = 0;
1161
1162         switch (GET_OPCODE(op)) {
1163         case SLJIT_NOT:
1164                 if (SLJIT_UNLIKELY(op & SLJIT_SET_E))
1165                         return emit_not_with_flags(compiler, dst, dstw, src, srcw);
1166                 return emit_unary(compiler, 0x2, dst, dstw, src, srcw);
1167
1168         case SLJIT_NEG:
1169                 if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
1170                         FAIL_IF(emit_save_flags(compiler));
1171                 return emit_unary(compiler, 0x3, dst, dstw, src, srcw);
1172
1173         case SLJIT_CLZ:
1174                 if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
1175                         FAIL_IF(emit_save_flags(compiler));
1176                 return emit_clz(compiler, op, dst, dstw, src, srcw);
1177         }
1178
1179         return SLJIT_SUCCESS;
1180
1181 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1182         #undef src_is_ereg
1183 #endif
1184 }
1185
1186 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1187
1188 #define BINARY_IMM(_op_imm_, _op_mr_, immw, arg, argw) \
1189         if (IS_HALFWORD(immw) || compiler->mode32) { \
1190                 code = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \
1191                 FAIL_IF(!code); \
1192                 *(code + 1) |= (_op_imm_); \
1193         } \
1194         else { \
1195                 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, immw)); \
1196                 code = emit_x86_instruction(compiler, 1, TMP_REG2, 0, arg, argw); \
1197                 FAIL_IF(!code); \
1198                 *code = (_op_mr_); \
1199         }
1200
1201 #define BINARY_EAX_IMM(_op_eax_imm_, immw) \
1202         FAIL_IF(emit_do_imm32(compiler, (!compiler->mode32) ? REX_W : 0, (_op_eax_imm_), immw))
1203
1204 #else
1205
1206 #define BINARY_IMM(_op_imm_, _op_mr_, immw, arg, argw) \
1207         code = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \
1208         FAIL_IF(!code); \
1209         *(code + 1) |= (_op_imm_);
1210
1211 #define BINARY_EAX_IMM(_op_eax_imm_, immw) \
1212         FAIL_IF(emit_do_imm(compiler, (_op_eax_imm_), immw))
1213
1214 #endif
1215
1216 static int emit_cum_binary(struct sljit_compiler *compiler,
1217         sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
1218         int dst, sljit_w dstw,
1219         int src1, sljit_w src1w,
1220         int src2, sljit_w src2w)
1221 {
1222         sljit_ub* code;
1223
1224         if (dst == SLJIT_UNUSED) {
1225                 EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1226                 if (src2 & SLJIT_IMM) {
1227                         BINARY_IMM(op_imm, op_mr, src2w, TMP_REGISTER, 0);
1228                 }
1229                 else {
1230                         code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w);
1231                         FAIL_IF(!code);
1232                         *code = op_rm;
1233                 }
1234                 return SLJIT_SUCCESS;
1235         }
1236
1237         if (dst == src1 && dstw == src1w) {
1238                 if (src2 & SLJIT_IMM) {
1239 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1240                         if ((dst == SLJIT_TEMPORARY_REG1) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1241 #else
1242                         if ((dst == SLJIT_TEMPORARY_REG1) && (src2w > 127 || src2w < -128)) {
1243 #endif
1244                                 BINARY_EAX_IMM(op_eax_imm, src2w);
1245                         }
1246                         else {
1247                                 BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);
1248                         }
1249                 }
1250                 else if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) {
1251                         code = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);
1252                         FAIL_IF(!code);
1253                         *code = op_rm;
1254                 }
1255                 else if (src2 >= SLJIT_TEMPORARY_REG1 && src2 <= TMP_REGISTER) {
1256                         /* Special exception for sljit_emit_cond_value. */
1257                         code = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);
1258                         FAIL_IF(!code);
1259                         *code = op_mr;
1260                 }
1261                 else {
1262                         EMIT_MOV(compiler, TMP_REGISTER, 0, src2, src2w);
1263                         code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, dst, dstw);
1264                         FAIL_IF(!code);
1265                         *code = op_mr;
1266                 }
1267                 return SLJIT_SUCCESS;
1268         }
1269
1270         /* Only for cumulative operations. */
1271         if (dst == src2 && dstw == src2w) {
1272                 if (src1 & SLJIT_IMM) {
1273 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1274                         if ((dst == SLJIT_TEMPORARY_REG1) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
1275 #else
1276                         if ((dst == SLJIT_TEMPORARY_REG1) && (src1w > 127 || src1w < -128)) {
1277 #endif
1278                                 BINARY_EAX_IMM(op_eax_imm, src1w);
1279                         }
1280                         else {
1281                                 BINARY_IMM(op_imm, op_mr, src1w, dst, dstw);
1282                         }
1283                 }
1284                 else if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) {
1285                         code = emit_x86_instruction(compiler, 1, dst, dstw, src1, src1w);
1286                         FAIL_IF(!code);
1287                         *code = op_rm;
1288                 }
1289                 else if (src1 >= SLJIT_TEMPORARY_REG1 && src1 <= SLJIT_NO_REGISTERS) {
1290                         code = emit_x86_instruction(compiler, 1, src1, src1w, dst, dstw);
1291                         FAIL_IF(!code);
1292                         *code = op_mr;
1293                 }
1294                 else {
1295                         EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1296                         code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, dst, dstw);
1297                         FAIL_IF(!code);
1298                         *code = op_mr;
1299                 }
1300                 return SLJIT_SUCCESS;
1301         }
1302
1303         /* General version. */
1304         if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) {
1305                 EMIT_MOV(compiler, dst, 0, src1, src1w);
1306                 if (src2 & SLJIT_IMM) {
1307                         BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
1308                 }
1309                 else {
1310                         code = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);
1311                         FAIL_IF(!code);
1312                         *code = op_rm;
1313                 }
1314         }
1315         else {
1316                 /* This version requires less memory writing. */
1317                 EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1318                 if (src2 & SLJIT_IMM) {
1319                         BINARY_IMM(op_imm, op_mr, src2w, TMP_REGISTER, 0);
1320                 }
1321                 else {
1322                         code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w);
1323                         FAIL_IF(!code);
1324                         *code = op_rm;
1325                 }
1326                 EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
1327         }
1328
1329         return SLJIT_SUCCESS;
1330 }
1331
1332 static int emit_non_cum_binary(struct sljit_compiler *compiler,
1333         sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
1334         int dst, sljit_w dstw,
1335         int src1, sljit_w src1w,
1336         int src2, sljit_w src2w)
1337 {
1338         sljit_ub* code;
1339
1340         if (dst == SLJIT_UNUSED) {
1341                 EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1342                 if (src2 & SLJIT_IMM) {
1343                         BINARY_IMM(op_imm, op_mr, src2w, TMP_REGISTER, 0);
1344                 }
1345                 else {
1346                         code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w);
1347                         FAIL_IF(!code);
1348                         *code = op_rm;
1349                 }
1350                 return SLJIT_SUCCESS;
1351         }
1352
1353         if (dst == src1 && dstw == src1w) {
1354                 if (src2 & SLJIT_IMM) {
1355 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1356                         if ((dst == SLJIT_TEMPORARY_REG1) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1357 #else
1358                         if ((dst == SLJIT_TEMPORARY_REG1) && (src2w > 127 || src2w < -128)) {
1359 #endif
1360                                 BINARY_EAX_IMM(op_eax_imm, src2w);
1361                         }
1362                         else {
1363                                 BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);
1364                         }
1365                 }
1366                 else if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) {
1367                         code = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);
1368                         FAIL_IF(!code);
1369                         *code = op_rm;
1370                 }
1371                 else if (src2 >= SLJIT_TEMPORARY_REG1 && src2 <= SLJIT_NO_REGISTERS) {
1372                         code = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);
1373                         FAIL_IF(!code);
1374                         *code = op_mr;
1375                 }
1376                 else {
1377                         EMIT_MOV(compiler, TMP_REGISTER, 0, src2, src2w);
1378                         code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, dst, dstw);
1379                         FAIL_IF(!code);
1380                         *code = op_mr;
1381                 }
1382                 return SLJIT_SUCCESS;
1383         }
1384
1385         /* General version. */
1386         if ((dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) && dst != src2) {
1387                 EMIT_MOV(compiler, dst, 0, src1, src1w);
1388                 if (src2 & SLJIT_IMM) {
1389                         BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
1390                 }
1391                 else {
1392                         code = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);
1393                         FAIL_IF(!code);
1394                         *code = op_rm;
1395                 }
1396         }
1397         else {
1398                 /* This version requires less memory writing. */
1399                 EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1400                 if (src2 & SLJIT_IMM) {
1401                         BINARY_IMM(op_imm, op_mr, src2w, TMP_REGISTER, 0);
1402                 }
1403                 else {
1404                         code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w);
1405                         FAIL_IF(!code);
1406                         *code = op_rm;
1407                 }
1408                 EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
1409         }
1410
1411         return SLJIT_SUCCESS;
1412 }
1413
1414 static int emit_mul(struct sljit_compiler *compiler,
1415         int dst, sljit_w dstw,
1416         int src1, sljit_w src1w,
1417         int src2, sljit_w src2w)
1418 {
1419         sljit_ub* code;
1420         int dst_r;
1421
1422         dst_r = (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) ? dst : TMP_REGISTER;
1423
1424         /* Register destination. */
1425         if (dst_r == src1 && !(src2 & SLJIT_IMM)) {
1426                 code = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w);
1427                 FAIL_IF(!code);
1428                 *code++ = 0x0f;
1429                 *code = 0xaf;
1430         }
1431         else if (dst_r == src2 && !(src1 & SLJIT_IMM)) {
1432                 code = emit_x86_instruction(compiler, 2, dst_r, 0, src1, src1w);
1433                 FAIL_IF(!code);
1434                 *code++ = 0x0f;
1435                 *code = 0xaf;
1436         }
1437         else if (src1 & SLJIT_IMM) {
1438                 if (src2 & SLJIT_IMM) {
1439                         EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, src2w);
1440                         src2 = dst_r;
1441                         src2w = 0;
1442                 }
1443
1444                 if (src1w <= 127 && src1w >= -128) {
1445                         code = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
1446                         FAIL_IF(!code);
1447                         *code = 0x6b;
1448                         code = (sljit_ub*)ensure_buf(compiler, 1 + 1);
1449                         FAIL_IF(!code);
1450                         INC_CSIZE(1);
1451                         *code = (sljit_b)src1w;
1452                 }
1453 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1454                 else {
1455                         code = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
1456                         FAIL_IF(!code);
1457                         *code = 0x69;
1458                         code = (sljit_ub*)ensure_buf(compiler, 1 + 4);
1459                         FAIL_IF(!code);
1460                         INC_CSIZE(4);
1461                         *(sljit_w*)code = src1w;
1462                 }
1463 #else
1464                 else if (IS_HALFWORD(src1w)) {
1465                         code = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
1466                         FAIL_IF(!code);
1467                         *code = 0x69;
1468                         code = (sljit_ub*)ensure_buf(compiler, 1 + 4);
1469                         FAIL_IF(!code);
1470                         INC_CSIZE(4);
1471                         *(sljit_hw*)code = (sljit_hw)src1w;
1472                 }
1473                 else {
1474                         EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src1w);
1475                         if (dst_r != src2)
1476                                 EMIT_MOV(compiler, dst_r, 0, src2, src2w);
1477                         code = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
1478                         FAIL_IF(!code);
1479                         *code++ = 0x0f;
1480                         *code = 0xaf;
1481                 }
1482 #endif
1483         }
1484         else if (src2 & SLJIT_IMM) {
1485                 /* Note: src1 is NOT immediate. */
1486
1487                 if (src2w <= 127 && src2w >= -128) {
1488                         code = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
1489                         FAIL_IF(!code);
1490                         *code = 0x6b;
1491                         code = (sljit_ub*)ensure_buf(compiler, 1 + 1);
1492                         FAIL_IF(!code);
1493                         INC_CSIZE(1);
1494                         *code = (sljit_b)src2w;
1495                 }
1496 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1497                 else {
1498                         code = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
1499                         FAIL_IF(!code);
1500                         *code = 0x69;
1501                         code = (sljit_ub*)ensure_buf(compiler, 1 + 4);
1502                         FAIL_IF(!code);
1503                         INC_CSIZE(4);
1504                         *(sljit_w*)code = src2w;
1505                 }
1506 #else
1507                 else if (IS_HALFWORD(src2w)) {
1508                         code = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
1509                         FAIL_IF(!code);
1510                         *code = 0x69;
1511                         code = (sljit_ub*)ensure_buf(compiler, 1 + 4);
1512                         FAIL_IF(!code);
1513                         INC_CSIZE(4);
1514                         *(sljit_hw*)code = (sljit_hw)src2w;
1515                 }
1516                 else {
1517                         EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src1w);
1518                         if (dst_r != src1)
1519                                 EMIT_MOV(compiler, dst_r, 0, src1, src1w);
1520                         code = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
1521                         FAIL_IF(!code);
1522                         *code++ = 0x0f;
1523                         *code = 0xaf;
1524                 }
1525 #endif
1526         }
1527         else {
1528                 /* Neither argument is immediate. */
1529                 if (ADDRESSING_DEPENDS_ON(src2, dst_r))
1530                         dst_r = TMP_REGISTER;
1531                 EMIT_MOV(compiler, dst_r, 0, src1, src1w);
1532                 code = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w);
1533                 FAIL_IF(!code);
1534                 *code++ = 0x0f;
1535                 *code = 0xaf;
1536         }
1537
1538         if (dst_r == TMP_REGISTER)
1539                 EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
1540
1541         return SLJIT_SUCCESS;
1542 }
1543
1544 static int emit_lea_binary(struct sljit_compiler *compiler,
1545         int dst, sljit_w dstw,
1546         int src1, sljit_w src1w,
1547         int src2, sljit_w src2w)
1548 {
1549         sljit_ub* code;
1550         int dst_r, done = 0;
1551
1552         /* These cases better be left to handled by normal way. */
1553         if (dst == src1 && dstw == src1w)
1554                 return SLJIT_ERR_UNSUPPORTED;
1555         if (dst == src2 && dstw == src2w)
1556                 return SLJIT_ERR_UNSUPPORTED;
1557
1558         dst_r = (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) ? dst : TMP_REGISTER;
1559
1560         if (src1 >= SLJIT_TEMPORARY_REG1 && src1 <= SLJIT_NO_REGISTERS) {
1561                 if (src2 >= SLJIT_TEMPORARY_REG1 && src2 <= SLJIT_NO_REGISTERS) {
1562                         /* It is not possible to be both SLJIT_LOCALS_REG. */
1563                         if (src1 != SLJIT_LOCALS_REG || src2 != SLJIT_LOCALS_REG) {
1564                                 code = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM2(src1, src2), 0);
1565                                 FAIL_IF(!code);
1566                                 *code = 0x8d;
1567                                 done = 1;
1568                         }
1569                 }
1570 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1571                 if ((src2 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1572                         code = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), (int)src2w);
1573 #else
1574                 if (src2 & SLJIT_IMM) {
1575                         code = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), src2w);
1576 #endif
1577                         FAIL_IF(!code);
1578                         *code = 0x8d;
1579                         done = 1;
1580                 }
1581         }
1582         else if (src2 >= SLJIT_TEMPORARY_REG1 && src2 <= SLJIT_NO_REGISTERS) {
1583 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1584                 if ((src1 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src1w))) {
1585                         code = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), (int)src1w);
1586 #else
1587                 if (src1 & SLJIT_IMM) {
1588                         code = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), src1w);
1589 #endif
1590                         FAIL_IF(!code);
1591                         *code = 0x8d;
1592                         done = 1;
1593                 }
1594         }
1595
1596         if (done) {
1597                 if (dst_r == TMP_REGISTER)
1598                         return emit_mov(compiler, dst, dstw, TMP_REGISTER, 0);
1599                 return SLJIT_SUCCESS;
1600         }
1601         return SLJIT_ERR_UNSUPPORTED;
1602 }
1603
1604 static int emit_cmp_binary(struct sljit_compiler *compiler,
1605         int src1, sljit_w src1w,
1606         int src2, sljit_w src2w)
1607 {
1608         sljit_ub* code;
1609
1610 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1611         if (src1 == SLJIT_TEMPORARY_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1612 #else
1613         if (src1 == SLJIT_TEMPORARY_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) {
1614 #endif
1615                 BINARY_EAX_IMM(0x3d, src2w);
1616                 return SLJIT_SUCCESS;
1617         }
1618
1619         if (src1 >= SLJIT_TEMPORARY_REG1 && src1 <= SLJIT_NO_REGISTERS) {
1620                 if (src2 & SLJIT_IMM) {
1621                         BINARY_IMM(0x7 << 3, 0x39, src2w, src1, 0);
1622                 }
1623                 else {
1624                         code = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);
1625                         FAIL_IF(!code);
1626                         *code = 0x3b;
1627                 }
1628                 return SLJIT_SUCCESS;
1629         }
1630
1631         if (src2 >= SLJIT_TEMPORARY_REG1 && src2 <= SLJIT_NO_REGISTERS && !(src1 & SLJIT_IMM)) {
1632                 code = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
1633                 FAIL_IF(!code);
1634                 *code = 0x39;
1635                 return SLJIT_SUCCESS;
1636         }
1637
1638         if (src2 & SLJIT_IMM) {
1639                 if (src1 & SLJIT_IMM) {
1640                         EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1641                         src1 = TMP_REGISTER;
1642                         src1w = 0;
1643                 }
1644                 BINARY_IMM(0x7 << 3, 0x39, src2w, src1, src1w);
1645         }
1646         else {
1647                 EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1648                 code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w);
1649                 FAIL_IF(!code);
1650                 *code = 0x3b;
1651         }
1652         return SLJIT_SUCCESS;
1653 }
1654
1655 static int emit_test_binary(struct sljit_compiler *compiler,
1656         int src1, sljit_w src1w,
1657         int src2, sljit_w src2w)
1658 {
1659         sljit_ub* code;
1660
1661 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1662         if (src1 == SLJIT_TEMPORARY_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1663 #else
1664         if (src1 == SLJIT_TEMPORARY_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) {
1665 #endif
1666                 BINARY_EAX_IMM(0xa9, src2w);
1667                 return SLJIT_SUCCESS;
1668         }
1669
1670 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1671         if (src2 == SLJIT_TEMPORARY_REG1 && (src2 & SLJIT_IMM) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
1672 #else
1673         if (src2 == SLJIT_TEMPORARY_REG1 && (src1 & SLJIT_IMM) && (src1w > 127 || src1w < -128)) {
1674 #endif
1675                 BINARY_EAX_IMM(0xa9, src1w);
1676                 return SLJIT_SUCCESS;
1677         }
1678
1679         if (src1 >= SLJIT_TEMPORARY_REG1 && src1 <= SLJIT_NO_REGISTERS) {
1680                 if (src2 & SLJIT_IMM) {
1681 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1682                         if (IS_HALFWORD(src2w) || compiler->mode32) {
1683                                 code = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, 0);
1684                                 FAIL_IF(!code);
1685                                 *code = 0xf7;
1686                         }
1687                         else {
1688                                 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
1689                                 code = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src1, 0);
1690                                 FAIL_IF(!code);
1691                                 *code = 0x85;
1692                         }
1693 #else
1694                         code = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, 0);
1695                         FAIL_IF(!code);
1696                         *code = 0xf7;
1697 #endif
1698                 }
1699                 else {
1700                         code = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);
1701                         FAIL_IF(!code);
1702                         *code = 0x85;
1703                 }
1704                 return SLJIT_SUCCESS;
1705         }
1706
1707         if (src2 >= SLJIT_TEMPORARY_REG1 && src2 <= SLJIT_NO_REGISTERS) {
1708                 if (src1 & SLJIT_IMM) {
1709 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1710                         if (IS_HALFWORD(src1w) || compiler->mode32) {
1711                                 code = emit_x86_instruction(compiler, 1, SLJIT_IMM, src1w, src2, 0);
1712                                 FAIL_IF(!code);
1713                                 *code = 0xf7;
1714                         }
1715                         else {
1716                                 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src1w));
1717                                 code = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src2, 0);
1718                                 FAIL_IF(!code);
1719                                 *code = 0x85;
1720                         }
1721 #else
1722                         code = emit_x86_instruction(compiler, 1, src1, src1w, src2, 0);
1723                         FAIL_IF(!code);
1724                         *code = 0xf7;
1725 #endif
1726                 }
1727                 else {
1728                         code = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
1729                         FAIL_IF(!code);
1730                         *code = 0x85;
1731                 }
1732                 return SLJIT_SUCCESS;
1733         }
1734
1735         EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1736         if (src2 & SLJIT_IMM) {
1737 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1738                 if (IS_HALFWORD(src2w) || compiler->mode32) {
1739                         code = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REGISTER, 0);
1740                         FAIL_IF(!code);
1741                         *code = 0xf7;
1742                 }
1743                 else {
1744                         FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
1745                         code = emit_x86_instruction(compiler, 1, TMP_REG2, 0, TMP_REGISTER, 0);
1746                         FAIL_IF(!code);
1747                         *code = 0x85;
1748                 }
1749 #else
1750                 code = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REGISTER, 0);
1751                 FAIL_IF(!code);
1752                 *code = 0xf7;
1753 #endif
1754         }
1755         else {
1756                 code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w);
1757                 FAIL_IF(!code);
1758                 *code = 0x85;
1759         }
1760         return SLJIT_SUCCESS;
1761 }
1762
1763 static int emit_shift(struct sljit_compiler *compiler,
1764         sljit_ub mode,
1765         int dst, sljit_w dstw,
1766         int src1, sljit_w src1w,
1767         int src2, sljit_w src2w)
1768 {
1769         sljit_ub* code;
1770
1771         if ((src2 & SLJIT_IMM) || (src2 == SLJIT_PREF_SHIFT_REG)) {
1772                 if (dst == src1 && dstw == src1w) {
1773                         code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, dstw);
1774                         FAIL_IF(!code);
1775                         *code |= mode;
1776                         return SLJIT_SUCCESS;
1777                 }
1778                 if (dst == SLJIT_UNUSED) {
1779                         EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1780                         code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REGISTER, 0);
1781                         FAIL_IF(!code);
1782                         *code |= mode;
1783                         return SLJIT_SUCCESS;
1784                 }
1785                 if (dst == SLJIT_PREF_SHIFT_REG && src2 == SLJIT_PREF_SHIFT_REG) {
1786                         EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1787                         code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0);
1788                         FAIL_IF(!code);
1789                         *code |= mode;
1790                         EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0);
1791                         return SLJIT_SUCCESS;
1792                 }
1793                 if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) {
1794                         EMIT_MOV(compiler, dst, 0, src1, src1w);
1795                         code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, 0);
1796                         FAIL_IF(!code);
1797                         *code |= mode;
1798                         return SLJIT_SUCCESS;
1799                 }
1800
1801                 EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1802                 code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REGISTER, 0);
1803                 FAIL_IF(!code);
1804                 *code |= mode;
1805                 EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
1806                 return SLJIT_SUCCESS;
1807         }
1808
1809         if (dst == SLJIT_PREF_SHIFT_REG) {
1810                 EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1811                 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
1812                 code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0);
1813                 FAIL_IF(!code);
1814                 *code |= mode;
1815                 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0);
1816         }
1817         else if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS && dst != src2 && !ADDRESSING_DEPENDS_ON(src2, dst)) {
1818                 if (src1 != dst)
1819                         EMIT_MOV(compiler, dst, 0, src1, src1w);
1820                 EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_PREF_SHIFT_REG, 0);
1821                 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
1822                 code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, dst, 0);
1823                 FAIL_IF(!code);
1824                 *code |= mode;
1825                 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0);
1826         }
1827         else {
1828                 /* This case is really difficult, since ecx itself may used for
1829                    addressing, and we must ensure to work even in that case. */
1830                 EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1831 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1832                 EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_PREF_SHIFT_REG, 0);
1833 #else
1834                 /* [esp - 4] is reserved for eflags. */
1835                 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), -(int)(2 * sizeof(sljit_w)), SLJIT_PREF_SHIFT_REG, 0);
1836 #endif
1837                 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
1838                 code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0);
1839                 FAIL_IF(!code);
1840                 *code |= mode;
1841 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1842                 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG2, 0);
1843 #else
1844                 /* [esp - 4] is reserved for eflags. */
1845                 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), -(int)(2 * sizeof(sljit_w)));
1846 #endif
1847                 EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
1848         }
1849
1850         return SLJIT_SUCCESS;
1851 }
1852
1853 static int emit_shift_with_flags(struct sljit_compiler *compiler,
1854         sljit_ub mode, int set_flags,
1855         int dst, sljit_w dstw,
1856         int src1, sljit_w src1w,
1857         int src2, sljit_w src2w)
1858 {
1859         /* The CPU does not set flags if the shift count is 0. */
1860         if (src2 & SLJIT_IMM) {
1861 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1862                 if ((src2w & 0x3f) != 0 || (compiler->mode32 && (src2w & 0x1f) != 0))
1863                         return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
1864 #else
1865                 if ((src2w & 0x1f) != 0)
1866                         return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
1867 #endif
1868                 if (!set_flags)
1869                         return emit_mov(compiler, dst, dstw, src1, src1w);
1870                 /* OR dst, src, 0 */
1871                 return emit_cum_binary(compiler, 0x0b, 0x09, 0x1 << 3, 0x0d,
1872                         dst, dstw, src1, src1w, SLJIT_IMM, 0);
1873         }
1874
1875         if (!set_flags)
1876                 return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
1877
1878         if (!(dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS))
1879                 FAIL_IF(emit_cmp_binary(compiler, src1, src1w, SLJIT_IMM, 0));
1880
1881         FAIL_IF(emit_shift(compiler,mode, dst, dstw, src1, src1w, src2, src2w));
1882
1883         if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS)
1884                 return emit_cmp_binary(compiler, dst, dstw, SLJIT_IMM, 0);
1885         return SLJIT_SUCCESS;
1886 }
1887
1888 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op2(struct sljit_compiler *compiler, int op,
1889         int dst, sljit_w dstw,
1890         int src1, sljit_w src1w,
1891         int src2, sljit_w src2w)
1892 {
1893         CHECK_ERROR();
1894         check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
1895
1896 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1897         compiler->mode32 = op & SLJIT_INT_OP;
1898 #endif
1899         CHECK_EXTRA_REGS(dst, dstw, (void)0);
1900         CHECK_EXTRA_REGS(src1, src1w, (void)0);
1901         CHECK_EXTRA_REGS(src2, src2w, (void)0);
1902
1903         if (GET_OPCODE(op) >= SLJIT_MUL) {
1904                 if (SLJIT_UNLIKELY(GET_FLAGS(op)))
1905                         compiler->flags_saved = 0;
1906                 else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
1907                         FAIL_IF(emit_save_flags(compiler));
1908         }
1909
1910         switch (GET_OPCODE(op)) {
1911         case SLJIT_ADD:
1912                 if (!GET_FLAGS(op)) {
1913                         if (emit_lea_binary(compiler, dst, dstw, src1, src1w, src2, src2w) != SLJIT_ERR_UNSUPPORTED)
1914                                 return compiler->error;
1915                 } 
1916                 else
1917                         compiler->flags_saved = 0;
1918                 if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
1919                         FAIL_IF(emit_save_flags(compiler));
1920                 return emit_cum_binary(compiler, 0x03, 0x01, 0x0 << 3, 0x05,
1921                         dst, dstw, src1, src1w, src2, src2w);
1922         case SLJIT_ADDC:
1923                 if (SLJIT_UNLIKELY(compiler->flags_saved)) /* C flag must be restored. */
1924                         FAIL_IF(emit_restore_flags(compiler, 1));
1925                 else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS))
1926                         FAIL_IF(emit_save_flags(compiler));
1927                 if (SLJIT_UNLIKELY(GET_FLAGS(op)))
1928                         compiler->flags_saved = 0;
1929                 return emit_cum_binary(compiler, 0x13, 0x11, 0x2 << 3, 0x15,
1930                         dst, dstw, src1, src1w, src2, src2w);
1931         case SLJIT_SUB:
1932                 if (!GET_FLAGS(op)) {
1933                         if ((src2 & SLJIT_IMM) && emit_lea_binary(compiler, dst, dstw, src1, src1w, SLJIT_IMM, -src2w) != SLJIT_ERR_UNSUPPORTED)
1934                                 return compiler->error;
1935                 }
1936                 else
1937                         compiler->flags_saved = 0;
1938                 if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
1939                         FAIL_IF(emit_save_flags(compiler));
1940                 if (dst == SLJIT_UNUSED)
1941                         return emit_cmp_binary(compiler, src1, src1w, src2, src2w);
1942                 return emit_non_cum_binary(compiler, 0x2b, 0x29, 0x5 << 3, 0x2d,
1943                         dst, dstw, src1, src1w, src2, src2w);
1944         case SLJIT_SUBC:
1945                 if (SLJIT_UNLIKELY(compiler->flags_saved)) /* C flag must be restored. */
1946                         FAIL_IF(emit_restore_flags(compiler, 1));
1947                 else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS))
1948                         FAIL_IF(emit_save_flags(compiler));
1949                 if (SLJIT_UNLIKELY(GET_FLAGS(op)))
1950                         compiler->flags_saved = 0;
1951                 return emit_non_cum_binary(compiler, 0x1b, 0x19, 0x3 << 3, 0x1d,
1952                         dst, dstw, src1, src1w, src2, src2w);
1953         case SLJIT_MUL:
1954                 return emit_mul(compiler, dst, dstw, src1, src1w, src2, src2w);
1955         case SLJIT_AND:
1956                 if (dst == SLJIT_UNUSED)
1957                         return emit_test_binary(compiler, src1, src1w, src2, src2w);
1958                 return emit_cum_binary(compiler, 0x23, 0x21, 0x4 << 3, 0x25,
1959                         dst, dstw, src1, src1w, src2, src2w);
1960         case SLJIT_OR:
1961                 return emit_cum_binary(compiler, 0x0b, 0x09, 0x1 << 3, 0x0d,
1962                         dst, dstw, src1, src1w, src2, src2w);
1963         case SLJIT_XOR:
1964                 return emit_cum_binary(compiler, 0x33, 0x31, 0x6 << 3, 0x35,
1965                         dst, dstw, src1, src1w, src2, src2w);
1966         case SLJIT_SHL:
1967                 return emit_shift_with_flags(compiler, 0x4 << 3, GET_FLAGS(op),
1968                         dst, dstw, src1, src1w, src2, src2w);
1969         case SLJIT_LSHR:
1970                 return emit_shift_with_flags(compiler, 0x5 << 3, GET_FLAGS(op),
1971                         dst, dstw, src1, src1w, src2, src2w);
1972         case SLJIT_ASHR:
1973                 return emit_shift_with_flags(compiler, 0x7 << 3, GET_FLAGS(op),
1974                         dst, dstw, src1, src1w, src2, src2w);
1975         }
1976
1977         return SLJIT_SUCCESS;
1978 }
1979
1980 SLJIT_API_FUNC_ATTRIBUTE int sljit_get_register_index(int reg)
1981 {
1982         check_sljit_get_register_index(reg);
1983 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1984         if (reg == SLJIT_TEMPORARY_EREG1 || reg == SLJIT_TEMPORARY_EREG2
1985                         || reg == SLJIT_SAVED_EREG1 || reg == SLJIT_SAVED_EREG2)
1986                 return -1;
1987 #endif
1988         return reg_map[reg];
1989 }
1990
1991 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op_custom(struct sljit_compiler *compiler,
1992         void *instruction, int size)
1993 {
1994         sljit_ub *buf;
1995
1996         CHECK_ERROR();
1997         check_sljit_emit_op_custom(compiler, instruction, size);
1998         SLJIT_ASSERT(size > 0 && size < 16);
1999
2000         buf = (sljit_ub*)ensure_buf(compiler, 1 + size);
2001         FAIL_IF(!buf);
2002         INC_SIZE(size);
2003         SLJIT_MEMMOVE(buf, instruction, size);
2004         return SLJIT_SUCCESS;
2005 }
2006
2007 /* --------------------------------------------------------------------- */
2008 /*  Floating point operators                                             */
2009 /* --------------------------------------------------------------------- */
2010
2011 #if (defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO)
2012 static int sse2_available = 0;
2013 #endif
2014
2015 #if (defined SLJIT_SSE2 && SLJIT_SSE2)
2016
2017 /* Alignment + 2 * 16 bytes. */
2018 static sljit_i sse2_data[3 + 4 + 4];
2019 static sljit_i *sse2_buffer;
2020
2021 static void init_compiler()
2022 {
2023 #if (defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO)
2024         int features = 0;
2025 #endif
2026
2027         sse2_buffer = (sljit_i*)(((sljit_uw)sse2_data + 15) & ~0xf);
2028         sse2_buffer[0] = 0;
2029         sse2_buffer[1] = 0x80000000;
2030         sse2_buffer[4] = 0xffffffff;
2031         sse2_buffer[5] = 0x7fffffff;
2032
2033 #if (defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO)
2034 #ifdef __GNUC__
2035         /* AT&T syntax. */
2036         asm (
2037                 "pushl %%ebx\n"
2038                 "movl $0x1, %%eax\n"
2039                 "cpuid\n"
2040                 "popl %%ebx\n"
2041                 "movl %%edx, %0\n"
2042                 : "=g" (features)
2043                 :
2044                 : "%eax", "%ecx", "%edx"
2045         );
2046 #elif defined(_MSC_VER) || defined(__BORLANDC__)
2047         /* Intel syntax. */
2048         __asm {
2049                 mov eax, 1
2050                 push ebx
2051                 cpuid
2052                 pop ebx
2053                 mov features, edx
2054         }
2055 #else
2056         #error "SLJIT_SSE2_AUTO is not implemented for this C compiler"
2057 #endif
2058         sse2_available = (features >> 26) & 0x1;
2059 #endif
2060 }
2061
2062 #endif
2063
2064 SLJIT_API_FUNC_ATTRIBUTE int sljit_is_fpu_available(void)
2065 {
2066         /* Always available. */
2067         return 1;
2068 }
2069
2070 #if (defined SLJIT_SSE2 && SLJIT_SSE2)
2071
2072 static int emit_sse2(struct sljit_compiler *compiler, sljit_ub opcode,
2073         int xmm1, int xmm2, sljit_w xmm2w)
2074 {
2075         sljit_ub *buf;
2076
2077         buf = emit_x86_instruction(compiler, 2 | EX86_PREF_F2 | EX86_SSE2, xmm1, 0, xmm2, xmm2w);
2078         FAIL_IF(!buf);
2079         *buf++ = 0x0f;
2080         *buf = opcode;
2081         return SLJIT_SUCCESS;
2082 }
2083
2084 static int emit_sse2_logic(struct sljit_compiler *compiler, sljit_ub opcode,
2085         int xmm1, int xmm2, sljit_w xmm2w)
2086 {
2087         sljit_ub *buf;
2088
2089         buf = emit_x86_instruction(compiler, 2 | EX86_PREF_66 | EX86_SSE2, xmm1, 0, xmm2, xmm2w);
2090         FAIL_IF(!buf);
2091         *buf++ = 0x0f;
2092         *buf = opcode;
2093         return SLJIT_SUCCESS;
2094 }
2095
2096 static SLJIT_INLINE int emit_sse2_load(struct sljit_compiler *compiler,
2097         int dst, int src, sljit_w srcw)
2098 {
2099         return emit_sse2(compiler, 0x10, dst, src, srcw);
2100 }
2101
2102 static SLJIT_INLINE int emit_sse2_store(struct sljit_compiler *compiler,
2103         int dst, sljit_w dstw, int src)
2104 {
2105         return emit_sse2(compiler, 0x11, src, dst, dstw);
2106 }
2107
2108 #if !(defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO)
2109 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop1(struct sljit_compiler *compiler, int op,
2110 #else
2111 static int sljit_emit_sse2_fop1(struct sljit_compiler *compiler, int op,
2112 #endif
2113         int dst, sljit_w dstw,
2114         int src, sljit_w srcw)
2115 {
2116         int dst_r;
2117
2118         CHECK_ERROR();
2119         check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw);
2120
2121 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2122         compiler->mode32 = 1;
2123 #endif
2124
2125         if (GET_OPCODE(op) == SLJIT_FCMP) {
2126                 compiler->flags_saved = 0;
2127                 if (dst >= SLJIT_FLOAT_REG1 && dst <= SLJIT_FLOAT_REG4)
2128                         dst_r = dst;
2129                 else {
2130                         dst_r = TMP_FREG;
2131                         FAIL_IF(emit_sse2_load(compiler, dst_r, dst, dstw));
2132                 }
2133                 return emit_sse2_logic(compiler, 0x2e, dst_r, src, srcw);
2134         }
2135
2136         if (op == SLJIT_FMOV) {
2137                 if (dst >= SLJIT_FLOAT_REG1 && dst <= SLJIT_FLOAT_REG4)
2138                         return emit_sse2_load(compiler, dst, src, srcw);
2139                 if (src >= SLJIT_FLOAT_REG1 && src <= SLJIT_FLOAT_REG4)
2140                         return emit_sse2_store(compiler, dst, dstw, src);
2141                 FAIL_IF(emit_sse2_load(compiler, TMP_FREG, src, srcw));
2142                 return emit_sse2_store(compiler, dst, dstw, TMP_FREG);
2143         }
2144
2145         if (dst >= SLJIT_FLOAT_REG1 && dst <= SLJIT_FLOAT_REG4) {
2146                 dst_r = dst;
2147                 if (dst != src)
2148                         FAIL_IF(emit_sse2_load(compiler, dst_r, src, srcw));
2149         }
2150         else {
2151                 dst_r = TMP_FREG;
2152                 FAIL_IF(emit_sse2_load(compiler, dst_r, src, srcw));
2153         }
2154
2155         switch (op) {
2156         case SLJIT_FNEG:
2157                 FAIL_IF(emit_sse2_logic(compiler, 0x57, dst_r, SLJIT_MEM0(), (sljit_w)sse2_buffer));
2158                 break;
2159
2160         case SLJIT_FABS:
2161                 FAIL_IF(emit_sse2_logic(compiler, 0x54, dst_r, SLJIT_MEM0(), (sljit_w)(sse2_buffer + 4)));
2162                 break;
2163         }
2164
2165         if (dst_r == TMP_FREG)
2166                 return emit_sse2_store(compiler, dst, dstw, TMP_FREG);
2167         return SLJIT_SUCCESS;
2168 }
2169
2170 #if !(defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO)
2171 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop2(struct sljit_compiler *compiler, int op,
2172 #else
2173 static int sljit_emit_sse2_fop2(struct sljit_compiler *compiler, int op,
2174 #endif
2175         int dst, sljit_w dstw,
2176         int src1, sljit_w src1w,
2177         int src2, sljit_w src2w)
2178 {
2179         int dst_r;
2180
2181         CHECK_ERROR();
2182         check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
2183
2184 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2185         compiler->mode32 = 1;
2186 #endif
2187
2188         if (dst >= SLJIT_FLOAT_REG1 && dst <= SLJIT_FLOAT_REG4) {
2189                 dst_r = dst;
2190                 if (dst == src1)
2191                         ; /* Do nothing here. */
2192                 else if (dst == src2 && (op == SLJIT_FADD || op == SLJIT_FMUL)) {
2193                         /* Swap arguments. */
2194                         src2 = src1;
2195                         src2w = src1w;
2196                 }
2197                 else if (dst != src2)
2198                         FAIL_IF(emit_sse2_load(compiler, dst_r, src1, src1w));
2199                 else {
2200                         dst_r = TMP_FREG;
2201                         FAIL_IF(emit_sse2_load(compiler, TMP_FREG, src1, src1w));
2202                 }
2203         }
2204         else {
2205                 dst_r = TMP_FREG;
2206                 FAIL_IF(emit_sse2_load(compiler, TMP_FREG, src1, src1w));
2207         }
2208
2209         switch (op) {
2210         case SLJIT_FADD:
2211                 FAIL_IF(emit_sse2(compiler, 0x58, dst_r, src2, src2w));
2212                 break;
2213
2214         case SLJIT_FSUB:
2215                 FAIL_IF(emit_sse2(compiler, 0x5c, dst_r, src2, src2w));
2216                 break;
2217
2218         case SLJIT_FMUL:
2219                 FAIL_IF(emit_sse2(compiler, 0x59, dst_r, src2, src2w));
2220                 break;
2221
2222         case SLJIT_FDIV:
2223                 FAIL_IF(emit_sse2(compiler, 0x5e, dst_r, src2, src2w));
2224                 break;
2225         }
2226
2227         if (dst_r == TMP_FREG)
2228                 return emit_sse2_store(compiler, dst, dstw, TMP_FREG);
2229         return SLJIT_SUCCESS;
2230 }
2231
2232 #endif
2233
2234 #if (defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO) || !(defined SLJIT_SSE2 && SLJIT_SSE2)
2235
2236 static int emit_fld(struct sljit_compiler *compiler,
2237         int src, sljit_w srcw)
2238 {
2239         sljit_ub *buf;
2240
2241         if (src >= SLJIT_FLOAT_REG1 && src <= SLJIT_FLOAT_REG4) {
2242                 buf = (sljit_ub*)ensure_buf(compiler, 1 + 2);
2243                 FAIL_IF(!buf);
2244                 INC_SIZE(2);
2245                 *buf++ = 0xd9;
2246                 *buf = 0xc0 + src - 1;
2247                 return SLJIT_SUCCESS;
2248         }
2249
2250         buf = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
2251         FAIL_IF(!buf);
2252         *buf = 0xdd;
2253         return SLJIT_SUCCESS;
2254 }
2255
2256 static int emit_fop(struct sljit_compiler *compiler,
2257         sljit_ub st_arg, sljit_ub st_arg2,
2258         sljit_ub m64fp_arg, sljit_ub m64fp_arg2,
2259         int src, sljit_w srcw)
2260 {
2261         sljit_ub *buf;
2262
2263         if (src >= SLJIT_FLOAT_REG1 && src <= SLJIT_FLOAT_REG4) {
2264                 buf = (sljit_ub*)ensure_buf(compiler, 1 + 2);
2265                 FAIL_IF(!buf);
2266                 INC_SIZE(2);
2267                 *buf++ = st_arg;
2268                 *buf = st_arg2 + src;
2269                 return SLJIT_SUCCESS;
2270         }
2271
2272         buf = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
2273         FAIL_IF(!buf);
2274         *buf++ = m64fp_arg;
2275         *buf |= m64fp_arg2;
2276         return SLJIT_SUCCESS;
2277 }
2278
2279 static int emit_fop_regs(struct sljit_compiler *compiler,
2280         sljit_ub st_arg, sljit_ub st_arg2,
2281         int src)
2282 {
2283         sljit_ub *buf;
2284
2285         buf = (sljit_ub*)ensure_buf(compiler, 1 + 2);
2286         FAIL_IF(!buf);
2287         INC_SIZE(2);
2288         *buf++ = st_arg;
2289         *buf = st_arg2 + src;
2290         return SLJIT_SUCCESS;
2291 }
2292
2293 #if !(defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO)
2294 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop1(struct sljit_compiler *compiler, int op,
2295 #else
2296 static int sljit_emit_fpu_fop1(struct sljit_compiler *compiler, int op,
2297 #endif
2298         int dst, sljit_w dstw,
2299         int src, sljit_w srcw)
2300 {
2301 #if !(defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2302         sljit_ub *buf;
2303 #endif
2304
2305         CHECK_ERROR();
2306         check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw);
2307
2308 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2309         compiler->mode32 = 1;
2310 #endif
2311
2312         if (GET_OPCODE(op) == SLJIT_FCMP) {
2313                 compiler->flags_saved = 0;
2314 #if !(defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2315                 FAIL_IF(emit_fld(compiler, dst, dstw));
2316                 FAIL_IF(emit_fop(compiler, 0xd8, 0xd8, 0xdc, 0x3 << 3, src, srcw));
2317
2318                 /* Copy flags. */
2319                 EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_TEMPORARY_REG1, 0);
2320                 buf = (sljit_ub*)ensure_buf(compiler, 1 + 3);
2321                 FAIL_IF(!buf);
2322                 INC_SIZE(3);
2323                 *buf++ = 0xdf;
2324                 *buf++ = 0xe0;
2325                 /* Note: lahf is not supported on all x86-64 architectures. */
2326                 *buf++ = 0x9e;
2327                 EMIT_MOV(compiler, SLJIT_TEMPORARY_REG1, 0, TMP_REGISTER, 0);
2328 #else
2329                 if (src >= SLJIT_FLOAT_REG1 && src <= SLJIT_FLOAT_REG4) {
2330                         FAIL_IF(emit_fld(compiler, dst, dstw));
2331                         FAIL_IF(emit_fop_regs(compiler, 0xdf, 0xe8, src));
2332                 } else {
2333                         FAIL_IF(emit_fld(compiler, src, srcw));
2334                         FAIL_IF(emit_fld(compiler, dst + ((dst >= SLJIT_FLOAT_REG1 && dst <= SLJIT_FLOAT_REG4) ? 1 : 0), dstw));
2335                         FAIL_IF(emit_fop_regs(compiler, 0xdf, 0xe8, src));
2336                         FAIL_IF(emit_fop_regs(compiler, 0xdd, 0xd8, 0));
2337                 }
2338 #endif
2339                 return SLJIT_SUCCESS;
2340         }
2341
2342         FAIL_IF(emit_fld(compiler, src, srcw));
2343
2344         switch (op) {
2345         case SLJIT_FNEG:
2346                 FAIL_IF(emit_fop_regs(compiler, 0xd9, 0xe0, 0));
2347                 break;
2348         case SLJIT_FABS:
2349                 FAIL_IF(emit_fop_regs(compiler, 0xd9, 0xe1, 0));
2350                 break;
2351         }
2352
2353         FAIL_IF(emit_fop(compiler, 0xdd, 0xd8, 0xdd, 0x3 << 3, dst, dstw));
2354
2355         return SLJIT_SUCCESS;
2356 }
2357
2358 #if !(defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO)
2359 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop2(struct sljit_compiler *compiler, int op,
2360 #else
2361 static int sljit_emit_fpu_fop2(struct sljit_compiler *compiler, int op,
2362 #endif
2363         int dst, sljit_w dstw,
2364         int src1, sljit_w src1w,
2365         int src2, sljit_w src2w)
2366 {
2367         CHECK_ERROR();
2368         check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
2369
2370 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2371         compiler->mode32 = 1;
2372 #endif
2373
2374         if (src1 >= SLJIT_FLOAT_REG1 && src1 <= SLJIT_FLOAT_REG4 && dst == src1) {
2375                 FAIL_IF(emit_fld(compiler, src2, src2w));
2376
2377                 switch (op) {
2378                 case SLJIT_FADD:
2379                         FAIL_IF(emit_fop_regs(compiler, 0xde, 0xc0, src1));
2380                         break;
2381                 case SLJIT_FSUB:
2382                         FAIL_IF(emit_fop_regs(compiler, 0xde, 0xe8, src1));
2383                         break;
2384                 case SLJIT_FMUL:
2385                         FAIL_IF(emit_fop_regs(compiler, 0xde, 0xc8, src1));
2386                         break;
2387                 case SLJIT_FDIV:
2388                         FAIL_IF(emit_fop_regs(compiler, 0xde, 0xf8, src1));
2389                         break;
2390                 }
2391                 return SLJIT_SUCCESS;
2392         }
2393
2394         FAIL_IF(emit_fld(compiler, src1, src1w));
2395
2396         if (src2 >= SLJIT_FLOAT_REG1 && src2 <= SLJIT_FLOAT_REG4 && dst == src2) {
2397                 switch (op) {
2398                 case SLJIT_FADD:
2399                         FAIL_IF(emit_fop_regs(compiler, 0xde, 0xc0, src2));
2400                         break;
2401                 case SLJIT_FSUB:
2402                         FAIL_IF(emit_fop_regs(compiler, 0xde, 0xe0, src2));
2403                         break;
2404                 case SLJIT_FMUL:
2405                         FAIL_IF(emit_fop_regs(compiler, 0xde, 0xc8, src2));
2406                         break;
2407                 case SLJIT_FDIV:
2408                         FAIL_IF(emit_fop_regs(compiler, 0xde, 0xf0, src2));
2409                         break;
2410                 }
2411                 return SLJIT_SUCCESS;
2412         }
2413
2414         switch (op) {
2415         case SLJIT_FADD:
2416                 FAIL_IF(emit_fop(compiler, 0xd8, 0xc0, 0xdc, 0x0 << 3, src2, src2w));
2417                 break;
2418         case SLJIT_FSUB:
2419                 FAIL_IF(emit_fop(compiler, 0xd8, 0xe0, 0xdc, 0x4 << 3, src2, src2w));
2420                 break;
2421         case SLJIT_FMUL:
2422                 FAIL_IF(emit_fop(compiler, 0xd8, 0xc8, 0xdc, 0x1 << 3, src2, src2w));
2423                 break;
2424         case SLJIT_FDIV:
2425                 FAIL_IF(emit_fop(compiler, 0xd8, 0xf0, 0xdc, 0x6 << 3, src2, src2w));
2426                 break;
2427         }
2428
2429         FAIL_IF(emit_fop(compiler, 0xdd, 0xd8, 0xdd, 0x3 << 3, dst, dstw));
2430
2431         return SLJIT_SUCCESS;
2432 }
2433 #endif
2434
2435 #if (defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO)
2436
2437 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop1(struct sljit_compiler *compiler, int op,
2438         int dst, sljit_w dstw,
2439         int src, sljit_w srcw)
2440 {
2441         if (sse2_available)
2442                 return sljit_emit_sse2_fop1(compiler, op, dst, dstw, src, srcw);
2443         else
2444                 return sljit_emit_fpu_fop1(compiler, op, dst, dstw, src, srcw);
2445 }
2446
2447 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop2(struct sljit_compiler *compiler, int op,
2448         int dst, sljit_w dstw,
2449         int src1, sljit_w src1w,
2450         int src2, sljit_w src2w)
2451 {
2452         if (sse2_available)
2453                 return sljit_emit_sse2_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
2454         else
2455                 return sljit_emit_fpu_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
2456 }
2457
2458 #endif
2459
2460 /* --------------------------------------------------------------------- */
2461 /*  Conditional instructions                                             */
2462 /* --------------------------------------------------------------------- */
2463
2464 SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
2465 {
2466         sljit_ub *buf;
2467         struct sljit_label *label;
2468
2469         CHECK_ERROR_PTR();
2470         check_sljit_emit_label(compiler);
2471
2472         /* We should restore the flags before the label,
2473            since other taken jumps has their own flags as well. */
2474         if (SLJIT_UNLIKELY(compiler->flags_saved))
2475                 PTR_FAIL_IF(emit_restore_flags(compiler, 0));
2476
2477         if (compiler->last_label && compiler->last_label->size == compiler->size)
2478                 return compiler->last_label;
2479
2480         label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
2481         PTR_FAIL_IF(!label);
2482         set_label(label, compiler);
2483
2484         buf = (sljit_ub*)ensure_buf(compiler, 2);
2485         PTR_FAIL_IF(!buf);
2486
2487         *buf++ = 0;
2488         *buf++ = 0;
2489
2490         return label;
2491 }
2492
2493 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, int type)
2494 {
2495         sljit_ub *buf;
2496         struct sljit_jump *jump;
2497
2498         CHECK_ERROR_PTR();
2499         check_sljit_emit_jump(compiler, type);
2500
2501         if (SLJIT_UNLIKELY(compiler->flags_saved)) {
2502                 if ((type & 0xff) <= SLJIT_JUMP)
2503                         PTR_FAIL_IF(emit_restore_flags(compiler, 0));
2504                 compiler->flags_saved = 0;
2505         }
2506
2507         jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2508         PTR_FAIL_IF_NULL(jump);
2509         set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
2510         type &= 0xff;
2511
2512         if (type >= SLJIT_CALL1)
2513                 PTR_FAIL_IF(call_with_args(compiler, type));
2514
2515         /* Worst case size. */
2516 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2517         compiler->size += (type >= SLJIT_JUMP) ? 5 : 6;
2518 #else
2519         compiler->size += (type >= SLJIT_JUMP) ? (10 + 3) : (2 + 10 + 3);
2520 #endif
2521
2522         buf = (sljit_ub*)ensure_buf(compiler, 2);
2523         PTR_FAIL_IF_NULL(buf);
2524
2525         *buf++ = 0;
2526         *buf++ = type + 4;
2527         return jump;
2528 }
2529
2530 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_ijump(struct sljit_compiler *compiler, int type, int src, sljit_w srcw)
2531 {
2532         sljit_ub *code;
2533         struct sljit_jump *jump;
2534
2535         CHECK_ERROR();
2536         check_sljit_emit_ijump(compiler, type, src, srcw);
2537
2538         CHECK_EXTRA_REGS(src, srcw, (void)0);
2539         if (SLJIT_UNLIKELY(compiler->flags_saved)) {
2540                 if (type <= SLJIT_JUMP)
2541                         FAIL_IF(emit_restore_flags(compiler, 0));
2542                 compiler->flags_saved = 0;
2543         }
2544
2545         if (type >= SLJIT_CALL1) {
2546 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2547 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
2548                 if (src == SLJIT_TEMPORARY_REG3) {
2549                         EMIT_MOV(compiler, TMP_REGISTER, 0, src, 0);
2550                         src = TMP_REGISTER;
2551                 }
2552                 if ((src & SLJIT_MEM) && (src & 0xf) == SLJIT_LOCALS_REG && type >= SLJIT_CALL3) {
2553                         if (src & 0xf0) {
2554                                 EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw);
2555                                 src = TMP_REGISTER;
2556                         }
2557                         else
2558                                 srcw += sizeof(sljit_w);
2559                 }
2560 #else
2561                 if ((src & SLJIT_MEM) && (src & 0xf) == SLJIT_LOCALS_REG) {
2562                         if (src & 0xf0) {
2563                                 EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw);
2564                                 src = TMP_REGISTER;
2565                         }
2566                         else
2567                                 srcw += sizeof(sljit_w) * (type - SLJIT_CALL0);
2568                 }
2569 #endif
2570 #endif
2571 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && defined(_WIN64)
2572                 if (src == SLJIT_TEMPORARY_REG3) {
2573                         EMIT_MOV(compiler, TMP_REGISTER, 0, src, 0);
2574                         src = TMP_REGISTER;
2575                 }
2576 #endif
2577                 FAIL_IF(call_with_args(compiler, type));
2578         }
2579
2580         if (src == SLJIT_IMM) {
2581                 jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2582                 FAIL_IF_NULL(jump);
2583                 set_jump(jump, compiler, JUMP_ADDR);
2584                 jump->u.target = srcw;
2585
2586                 /* Worst case size. */
2587 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2588                 compiler->size += 5;
2589 #else
2590                 compiler->size += 10 + 3;
2591 #endif
2592
2593                 code = (sljit_ub*)ensure_buf(compiler, 2);
2594                 FAIL_IF_NULL(code);
2595
2596                 *code++ = 0;
2597                 *code++ = type + 4;
2598         }
2599         else {
2600 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2601                 /* REX_W is not necessary (src is not immediate). */
2602                 compiler->mode32 = 1;
2603 #endif
2604                 code = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
2605                 FAIL_IF(!code);
2606                 *code++ = 0xff;
2607                 *code |= (type >= SLJIT_FAST_CALL) ? (2 << 3) : (4 << 3);
2608         }
2609         return SLJIT_SUCCESS;
2610 }
2611
2612 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_cond_value(struct sljit_compiler *compiler, int op, int dst, sljit_w dstw, int type)
2613 {
2614         sljit_ub *buf;
2615         sljit_ub cond_set = 0;
2616 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2617         int reg;
2618 #endif
2619
2620         CHECK_ERROR();
2621         check_sljit_emit_cond_value(compiler, op, dst, dstw, type);
2622
2623         if (dst == SLJIT_UNUSED)
2624                 return SLJIT_SUCCESS;
2625
2626         CHECK_EXTRA_REGS(dst, dstw, (void)0);
2627         if (SLJIT_UNLIKELY(compiler->flags_saved))
2628                 FAIL_IF(emit_restore_flags(compiler, 0));
2629
2630         switch (type) {
2631         case SLJIT_C_EQUAL:
2632         case SLJIT_C_FLOAT_EQUAL:
2633                 cond_set = 0x94;
2634                 break;
2635
2636         case SLJIT_C_NOT_EQUAL:
2637         case SLJIT_C_FLOAT_NOT_EQUAL:
2638                 cond_set = 0x95;
2639                 break;
2640
2641         case SLJIT_C_LESS:
2642         case SLJIT_C_FLOAT_LESS:
2643                 cond_set = 0x92;
2644                 break;
2645
2646         case SLJIT_C_GREATER_EQUAL:
2647         case SLJIT_C_FLOAT_GREATER_EQUAL:
2648                 cond_set = 0x93;
2649                 break;
2650
2651         case SLJIT_C_GREATER:
2652         case SLJIT_C_FLOAT_GREATER:
2653                 cond_set = 0x97;
2654                 break;
2655
2656         case SLJIT_C_LESS_EQUAL:
2657         case SLJIT_C_FLOAT_LESS_EQUAL:
2658                 cond_set = 0x96;
2659                 break;
2660
2661         case SLJIT_C_SIG_LESS:
2662                 cond_set = 0x9c;
2663                 break;
2664
2665         case SLJIT_C_SIG_GREATER_EQUAL:
2666                 cond_set = 0x9d;
2667                 break;
2668
2669         case SLJIT_C_SIG_GREATER:
2670                 cond_set = 0x9f;
2671                 break;
2672
2673         case SLJIT_C_SIG_LESS_EQUAL:
2674                 cond_set = 0x9e;
2675                 break;
2676
2677         case SLJIT_C_OVERFLOW:
2678         case SLJIT_C_MUL_OVERFLOW:
2679                 cond_set = 0x90;
2680                 break;
2681
2682         case SLJIT_C_NOT_OVERFLOW:
2683         case SLJIT_C_MUL_NOT_OVERFLOW:
2684                 cond_set = 0x91;
2685                 break;
2686
2687         case SLJIT_C_FLOAT_NAN:
2688                 cond_set = 0x9a;
2689                 break;
2690
2691         case SLJIT_C_FLOAT_NOT_NAN:
2692                 cond_set = 0x9b;
2693                 break;
2694         }
2695
2696 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2697         reg = (op == SLJIT_MOV && dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) ? dst : TMP_REGISTER;
2698
2699         buf = (sljit_ub*)ensure_buf(compiler, 1 + 4 + 4);
2700         FAIL_IF(!buf);
2701         INC_SIZE(4 + 4);
2702         /* Set low register to conditional flag. */
2703         *buf++ = (reg_map[reg] <= 7) ? 0x40 : REX_B;
2704         *buf++ = 0x0f;
2705         *buf++ = cond_set;
2706         *buf++ = 0xC0 | reg_lmap[reg];
2707         *buf++ = REX_W | (reg_map[reg] <= 7 ? 0 : (REX_B | REX_R));
2708         *buf++ = 0x0f;
2709         *buf++ = 0xb6;
2710         *buf = 0xC0 | (reg_lmap[reg] << 3) | reg_lmap[reg];
2711
2712         if (reg == TMP_REGISTER) {
2713                 if (op == SLJIT_MOV) {
2714                         compiler->mode32 = 0;
2715                         EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
2716                 }
2717                 else {
2718 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG)
2719                         compiler->skip_checks = 1;
2720 #endif
2721                         return sljit_emit_op2(compiler, op, dst, dstw, dst, dstw, TMP_REGISTER, 0);
2722                 }
2723         }
2724 #else
2725         if (op == SLJIT_MOV) {
2726                 if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_TEMPORARY_REG3) {
2727                         buf = (sljit_ub*)ensure_buf(compiler, 1 + 3 + 3);
2728                         FAIL_IF(!buf);
2729                         INC_SIZE(3 + 3);
2730                         /* Set low byte to conditional flag. */
2731                         *buf++ = 0x0f;
2732                         *buf++ = cond_set;
2733                         *buf++ = 0xC0 | reg_map[dst];
2734
2735                         *buf++ = 0x0f;
2736                         *buf++ = 0xb6;
2737                         *buf = 0xC0 | (reg_map[dst] << 3) | reg_map[dst];
2738                 }
2739                 else {
2740                         EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_TEMPORARY_REG1, 0);
2741
2742                         buf = (sljit_ub*)ensure_buf(compiler, 1 + 3 + 3);
2743                         FAIL_IF(!buf);
2744                         INC_SIZE(3 + 3);
2745                         /* Set al to conditional flag. */
2746                         *buf++ = 0x0f;
2747                         *buf++ = cond_set;
2748                         *buf++ = 0xC0;
2749
2750                         *buf++ = 0x0f;
2751                         *buf++ = 0xb6;
2752                         if (dst >= SLJIT_SAVED_REG1 && dst <= SLJIT_NO_REGISTERS)
2753                                 *buf = 0xC0 | (reg_map[dst] << 3);
2754                         else {
2755                                 *buf = 0xC0;
2756                                 EMIT_MOV(compiler, dst, dstw, SLJIT_TEMPORARY_REG1, 0);
2757                         }
2758
2759                         EMIT_MOV(compiler, SLJIT_TEMPORARY_REG1, 0, TMP_REGISTER, 0);
2760                 }
2761         }
2762         else {
2763                 if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_TEMPORARY_REG3) {
2764                         EMIT_MOV(compiler, TMP_REGISTER, 0, dst, 0);
2765                         buf = (sljit_ub*)ensure_buf(compiler, 1 + 3);
2766                         FAIL_IF(!buf);
2767                         INC_SIZE(3);
2768
2769                         *buf++ = 0x0f;
2770                         *buf++ = cond_set;
2771                         *buf++ = 0xC0 | reg_map[dst];
2772                 }
2773                 else {
2774                         EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_TEMPORARY_REG1, 0);
2775
2776                         buf = (sljit_ub*)ensure_buf(compiler, 1 + 3 + 3 + 1);
2777                         FAIL_IF(!buf);
2778                         INC_SIZE(3 + 3 + 1);
2779                         /* Set al to conditional flag. */
2780                         *buf++ = 0x0f;
2781                         *buf++ = cond_set;
2782                         *buf++ = 0xC0;
2783
2784                         *buf++ = 0x0f;
2785                         *buf++ = 0xb6;
2786                         *buf++ = 0xC0;
2787
2788                         *buf++ = 0x90 + reg_map[TMP_REGISTER];
2789                 }
2790 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG)
2791                 compiler->skip_checks = 1;
2792 #endif
2793                 return sljit_emit_op2(compiler, op, dst, dstw, dst, dstw, TMP_REGISTER, 0);
2794         }
2795 #endif
2796
2797         return SLJIT_SUCCESS;
2798 }
2799
2800 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, int dst, sljit_w dstw, sljit_w init_value)
2801 {
2802         sljit_ub *buf;
2803         struct sljit_const *const_;
2804 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2805         int reg;
2806 #endif
2807
2808         CHECK_ERROR_PTR();
2809         check_sljit_emit_const(compiler, dst, dstw, init_value);
2810
2811         CHECK_EXTRA_REGS(dst, dstw, (void)0);
2812
2813         const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
2814         PTR_FAIL_IF(!const_);
2815         set_const(const_, compiler);
2816
2817 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2818         compiler->mode32 = 0;
2819         reg = (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) ? dst : TMP_REGISTER;
2820
2821         if (emit_load_imm64(compiler, reg, init_value))
2822                 return NULL;
2823 #else
2824         if (dst == SLJIT_UNUSED)
2825                 dst = TMP_REGISTER;
2826
2827         if (emit_mov(compiler, dst, dstw, SLJIT_IMM, init_value))
2828                 return NULL;
2829 #endif
2830
2831         buf = (sljit_ub*)ensure_buf(compiler, 2);
2832         PTR_FAIL_IF(!buf);
2833
2834         *buf++ = 0;
2835         *buf++ = 1;
2836
2837 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2838         if (reg == TMP_REGISTER && dst != SLJIT_UNUSED)
2839                 if (emit_mov(compiler, dst, dstw, TMP_REGISTER, 0))
2840                         return NULL;
2841 #endif
2842
2843         return const_;
2844 }
2845
2846 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr)
2847 {
2848 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2849         *(sljit_w*)addr = new_addr - (addr + 4);
2850 #else
2851         *(sljit_uw*)addr = new_addr;
2852 #endif
2853 }
2854
2855 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_w new_constant)
2856 {
2857         *(sljit_w*)addr = new_constant;
2858 }