2 * Stack-less Just-In-Time compiler
4 * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
6 * Redistribution and use in source and binary forms, with or without modification, are
7 * permitted provided that the following conditions are met:
9 * 1. Redistributions of source code must retain the above copyright notice, this list of
10 * conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright notice, this list
13 * of conditions and the following disclaimer in the documentation and/or other materials
14 * provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19 * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void)
29 return "x86" SLJIT_CPUINFO;
54 8 - R8 - From now on REX prefix is required
64 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
66 /* Last register + 1. */
67 #define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2)
69 static SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 3] = {
70 0, 0, 2, 1, 0, 0, 0, 0, 7, 6, 3, 4, 5
73 #define CHECK_EXTRA_REGS(p, w, do) \
74 if (p >= SLJIT_R3 && p <= SLJIT_R6) { \
75 w = SLJIT_LOCALS_OFFSET + ((p) - (SLJIT_R3 + 4)) * sizeof(sljit_sw); \
76 p = SLJIT_MEM1(SLJIT_SP); \
80 #else /* SLJIT_CONFIG_X86_32 */
82 /* Last register + 1. */
83 #define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2)
84 #define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3)
85 #define TMP_REG3 (SLJIT_NUMBER_OF_REGISTERS + 4)
87 /* Note: r12 & 0x7 == 0b100, which decoded as SIB byte present
88 Note: avoid to use r12 and r13 for memory addessing
89 therefore r12 is better for SAVED_EREG than SAVED_REG. */
91 /* 1st passed in rdi, 2nd argument passed in rsi, 3rd in rdx. */
92 static SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = {
93 0, 0, 6, 1, 8, 11, 10, 12, 5, 13, 14, 15, 3, 4, 2, 7, 9
95 /* low-map. reg_map & 0x7. */
96 static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 5] = {
97 0, 0, 6, 1, 0, 3, 2, 4, 5, 5, 6, 7, 3, 4, 2, 7, 1
100 /* 1st passed in rcx, 2nd argument passed in rdx, 3rd in r8. */
101 static SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = {
102 0, 0, 2, 1, 11, 12, 5, 13, 14, 15, 7, 6, 3, 4, 10, 8, 9
104 /* low-map. reg_map & 0x7. */
105 static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 5] = {
106 0, 0, 2, 1, 3, 4, 5, 5, 6, 7, 7, 6, 3, 4, 2, 0, 1
117 #define HALFWORD_MAX 0x7fffffffl
118 #define HALFWORD_MIN -0x80000000l
120 #define HALFWORD_MAX 0x7fffffffll
121 #define HALFWORD_MIN -0x80000000ll
124 #define IS_HALFWORD(x) ((x) <= HALFWORD_MAX && (x) >= HALFWORD_MIN)
125 #define NOT_HALFWORD(x) ((x) > HALFWORD_MAX || (x) < HALFWORD_MIN)
127 #define CHECK_EXTRA_REGS(p, w, do)
129 #endif /* SLJIT_CONFIG_X86_32 */
133 /* Size flags for emit_x86_instruction: */
134 #define EX86_BIN_INS 0x0010
135 #define EX86_SHIFT_INS 0x0020
136 #define EX86_REX 0x0040
137 #define EX86_NO_REXW 0x0080
138 #define EX86_BYTE_ARG 0x0100
139 #define EX86_HALF_ARG 0x0200
140 #define EX86_PREF_66 0x0400
141 #define EX86_PREF_F2 0x0800
142 #define EX86_PREF_F3 0x1000
143 #define EX86_SSE2_OP1 0x2000
144 #define EX86_SSE2_OP2 0x4000
145 #define EX86_SSE2 (EX86_SSE2_OP1 | EX86_SSE2_OP2)
147 /* --------------------------------------------------------------------- */
148 /* Instrucion forms */
149 /* --------------------------------------------------------------------- */
151 #define ADD (/* BINARY */ 0 << 3)
152 #define ADD_EAX_i32 0x05
153 #define ADD_r_rm 0x03
154 #define ADD_rm_r 0x01
155 #define ADDSD_x_xm 0x58
156 #define ADC (/* BINARY */ 2 << 3)
157 #define ADC_EAX_i32 0x15
158 #define ADC_r_rm 0x13
159 #define ADC_rm_r 0x11
160 #define AND (/* BINARY */ 4 << 3)
161 #define AND_EAX_i32 0x25
162 #define AND_r_rm 0x23
163 #define AND_rm_r 0x21
164 #define ANDPD_x_xm 0x54
165 #define BSR_r_rm (/* GROUP_0F */ 0xbd)
166 #define CALL_i32 0xe8
167 #define CALL_rm (/* GROUP_FF */ 2 << 3)
169 #define CMOVNE_r_rm (/* GROUP_0F */ 0x45)
170 #define CMP (/* BINARY */ 7 << 3)
171 #define CMP_EAX_i32 0x3d
172 #define CMP_r_rm 0x3b
173 #define CMP_rm_r 0x39
174 #define CVTPD2PS_x_xm 0x5a
175 #define CVTSI2SD_x_rm 0x2a
176 #define CVTTSD2SI_r_xm 0x2c
177 #define DIV (/* GROUP_F7 */ 6 << 3)
178 #define DIVSD_x_xm 0x5e
180 #define IDIV (/* GROUP_F7 */ 7 << 3)
181 #define IMUL (/* GROUP_F7 */ 5 << 3)
182 #define IMUL_r_rm (/* GROUP_0F */ 0xaf)
183 #define IMUL_r_rm_i8 0x6b
184 #define IMUL_r_rm_i32 0x69
189 #define JMP_rm (/* GROUP_FF */ 4 << 3)
191 #define MOV_r_rm 0x8b
192 #define MOV_r_i32 0xb8
193 #define MOV_rm_r 0x89
194 #define MOV_rm_i32 0xc7
195 #define MOV_rm8_i8 0xc6
196 #define MOV_rm8_r8 0x88
197 #define MOVSD_x_xm 0x10
198 #define MOVSD_xm_x 0x11
199 #define MOVSXD_r_rm 0x63
200 #define MOVSX_r_rm8 (/* GROUP_0F */ 0xbe)
201 #define MOVSX_r_rm16 (/* GROUP_0F */ 0xbf)
202 #define MOVZX_r_rm8 (/* GROUP_0F */ 0xb6)
203 #define MOVZX_r_rm16 (/* GROUP_0F */ 0xb7)
204 #define MUL (/* GROUP_F7 */ 4 << 3)
205 #define MULSD_x_xm 0x59
206 #define NEG_rm (/* GROUP_F7 */ 3 << 3)
208 #define NOT_rm (/* GROUP_F7 */ 2 << 3)
209 #define OR (/* BINARY */ 1 << 3)
211 #define OR_EAX_i32 0x0d
213 #define OR_rm8_r8 0x08
217 #define PUSH_i32 0x68
219 #define PUSH_rm (/* GROUP_FF */ 6 << 3)
221 #define RET_near 0xc3
223 #define SBB (/* BINARY */ 3 << 3)
224 #define SBB_EAX_i32 0x1d
225 #define SBB_r_rm 0x1b
226 #define SBB_rm_r 0x19
227 #define SAR (/* SHIFT */ 7 << 3)
228 #define SHL (/* SHIFT */ 4 << 3)
229 #define SHR (/* SHIFT */ 5 << 3)
230 #define SUB (/* BINARY */ 5 << 3)
231 #define SUB_EAX_i32 0x2d
232 #define SUB_r_rm 0x2b
233 #define SUB_rm_r 0x29
234 #define SUBSD_x_xm 0x5c
235 #define TEST_EAX_i32 0xa9
236 #define TEST_rm_r 0x85
237 #define UCOMISD_x_xm 0x2e
238 #define UNPCKLPD_x_xm 0x14
239 #define XCHG_EAX_r 0x90
240 #define XCHG_r_rm 0x87
241 #define XOR (/* BINARY */ 6 << 3)
242 #define XOR_EAX_i32 0x35
243 #define XOR_r_rm 0x33
244 #define XOR_rm_r 0x31
245 #define XORPD_x_xm 0x57
247 #define GROUP_0F 0x0f
248 #define GROUP_F7 0xf7
249 #define GROUP_FF 0xff
250 #define GROUP_BINARY_81 0x81
251 #define GROUP_BINARY_83 0x83
252 #define GROUP_SHIFT_1 0xd1
253 #define GROUP_SHIFT_N 0xc1
254 #define GROUP_SHIFT_CL 0xd3
257 #define MOD_DISP8 0x40
259 #define INC_SIZE(s) (*inst++ = (s), compiler->size += (s))
261 #define PUSH_REG(r) (*inst++ = (PUSH_r + (r)))
262 #define POP_REG(r) (*inst++ = (POP_r + (r)))
263 #define RET() (*inst++ = (RET_near))
264 #define RET_I16(n) (*inst++ = (RET_i16), *inst++ = n, *inst++ = 0)
266 #define MOV_RM(mod, reg, rm) (*inst++ = (MOV_r_rm), *inst++ = (mod) << 6 | (reg) << 3 | (rm))
268 /* Multithreading does not affect these static variables, since they store
269 built-in CPU features. Therefore they can be overwritten by different threads
270 if they detect the CPU features in the same time. */
271 #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
272 static sljit_si cpu_has_sse2 = -1;
274 static sljit_si cpu_has_cmov = -1;
277 #include <cmnintrin.h>
278 #elif defined(_MSC_VER) && _MSC_VER >= 1400
282 static void get_cpu_features(void)
286 #if defined(_MSC_VER) && _MSC_VER >= 1400
290 features = (sljit_ui)CPUInfo[3];
292 #elif defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__SUNPRO_C)
297 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
298 /* On x86-32, there is no red zone, so this
299 should work (no need for a local variable). */
303 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
309 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
310 : "%eax", "%ecx", "%edx"
312 : "%rax", "%rbx", "%rcx", "%rdx"
316 #else /* _MSC_VER && _MSC_VER >= 1400 */
325 #endif /* _MSC_VER && _MSC_VER >= 1400 */
327 #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
328 cpu_has_sse2 = (features >> 26) & 0x1;
330 cpu_has_cmov = (features >> 15) & 0x1;
333 static sljit_ub get_jump_code(sljit_si type)
338 return 0x84 /* je */;
340 case SLJIT_NOT_EQUAL:
341 case SLJIT_D_NOT_EQUAL:
342 return 0x85 /* jne */;
346 return 0x82 /* jc */;
348 case SLJIT_GREATER_EQUAL:
349 case SLJIT_D_GREATER_EQUAL:
350 return 0x83 /* jae */;
353 case SLJIT_D_GREATER:
354 return 0x87 /* jnbe */;
356 case SLJIT_LESS_EQUAL:
357 case SLJIT_D_LESS_EQUAL:
358 return 0x86 /* jbe */;
361 return 0x8c /* jl */;
363 case SLJIT_SIG_GREATER_EQUAL:
364 return 0x8d /* jnl */;
366 case SLJIT_SIG_GREATER:
367 return 0x8f /* jnle */;
369 case SLJIT_SIG_LESS_EQUAL:
370 return 0x8e /* jle */;
373 case SLJIT_MUL_OVERFLOW:
374 return 0x80 /* jo */;
376 case SLJIT_NOT_OVERFLOW:
377 case SLJIT_MUL_NOT_OVERFLOW:
378 return 0x81 /* jno */;
380 case SLJIT_D_UNORDERED:
381 return 0x8a /* jp */;
383 case SLJIT_D_ORDERED:
384 return 0x8b /* jpo */;
389 static sljit_ub* generate_far_jump_code(struct sljit_jump *jump, sljit_ub *code_ptr, sljit_si type);
391 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
392 static sljit_ub* generate_fixed_jump(sljit_ub *code_ptr, sljit_sw addr, sljit_si type);
395 static sljit_ub* generate_near_jump_code(struct sljit_jump *jump, sljit_ub *code_ptr, sljit_ub *code, sljit_si type)
400 if (jump->flags & JUMP_LABEL)
401 label_addr = (sljit_uw)(code + jump->u.label->size);
403 label_addr = jump->u.target;
404 short_jump = (sljit_sw)(label_addr - (jump->addr + 2)) >= -128 && (sljit_sw)(label_addr - (jump->addr + 2)) <= 127;
406 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
407 if ((sljit_sw)(label_addr - (jump->addr + 1)) > HALFWORD_MAX || (sljit_sw)(label_addr - (jump->addr + 1)) < HALFWORD_MIN)
408 return generate_far_jump_code(jump, code_ptr, type);
411 if (type == SLJIT_JUMP) {
413 *code_ptr++ = JMP_i8;
415 *code_ptr++ = JMP_i32;
418 else if (type >= SLJIT_FAST_CALL) {
420 *code_ptr++ = CALL_i32;
423 else if (short_jump) {
424 *code_ptr++ = get_jump_code(type) - 0x10;
428 *code_ptr++ = GROUP_0F;
429 *code_ptr++ = get_jump_code(type);
434 jump->flags |= PATCH_MB;
435 code_ptr += sizeof(sljit_sb);
437 jump->flags |= PATCH_MW;
438 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
439 code_ptr += sizeof(sljit_sw);
441 code_ptr += sizeof(sljit_si);
448 SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
450 struct sljit_memory_fragment *buf;
457 struct sljit_label *label;
458 struct sljit_jump *jump;
459 struct sljit_const *const_;
462 CHECK_PTR(check_sljit_generate_code(compiler));
463 reverse_buf(compiler);
465 /* Second code generation pass. */
466 code = (sljit_ub*)SLJIT_MALLOC_EXEC(compiler->size);
467 PTR_FAIL_WITH_EXEC_IF(code);
471 label = compiler->labels;
472 jump = compiler->jumps;
473 const_ = compiler->consts;
475 buf_ptr = buf->memory;
476 buf_end = buf_ptr + buf->used_size;
480 /* The code is already generated. */
481 SLJIT_MEMMOVE(code_ptr, buf_ptr, len);
487 jump->addr = (sljit_uw)code_ptr;
488 if (!(jump->flags & SLJIT_REWRITABLE_JUMP))
489 code_ptr = generate_near_jump_code(jump, code_ptr, code, *buf_ptr - 4);
491 code_ptr = generate_far_jump_code(jump, code_ptr, *buf_ptr - 4);
494 else if (*buf_ptr == 0) {
495 label->addr = (sljit_uw)code_ptr;
496 label->size = code_ptr - code;
499 else if (*buf_ptr == 1) {
500 const_->addr = ((sljit_uw)code_ptr) - sizeof(sljit_sw);
501 const_ = const_->next;
504 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
505 *code_ptr++ = (*buf_ptr == 2) ? CALL_i32 : JMP_i32;
507 *(sljit_sw*)code_ptr = *(sljit_sw*)buf_ptr - ((sljit_sw)code_ptr + sizeof(sljit_sw));
508 code_ptr += sizeof(sljit_sw);
509 buf_ptr += sizeof(sljit_sw) - 1;
511 code_ptr = generate_fixed_jump(code_ptr, *(sljit_sw*)(buf_ptr + 1), *buf_ptr);
512 buf_ptr += sizeof(sljit_sw);
517 } while (buf_ptr < buf_end);
518 SLJIT_ASSERT(buf_ptr == buf_end);
522 SLJIT_ASSERT(!label);
524 SLJIT_ASSERT(!const_);
526 jump = compiler->jumps;
528 if (jump->flags & PATCH_MB) {
529 SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_sb))) >= -128 && (sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_sb))) <= 127);
530 *(sljit_ub*)jump->addr = (sljit_ub)(jump->u.label->addr - (jump->addr + sizeof(sljit_sb)));
531 } else if (jump->flags & PATCH_MW) {
532 if (jump->flags & JUMP_LABEL) {
533 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
534 *(sljit_sw*)jump->addr = (sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_sw)));
536 SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_si))) >= HALFWORD_MIN && (sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_si))) <= HALFWORD_MAX);
537 *(sljit_si*)jump->addr = (sljit_si)(jump->u.label->addr - (jump->addr + sizeof(sljit_si)));
541 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
542 *(sljit_sw*)jump->addr = (sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_sw)));
544 SLJIT_ASSERT((sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_si))) >= HALFWORD_MIN && (sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_si))) <= HALFWORD_MAX);
545 *(sljit_si*)jump->addr = (sljit_si)(jump->u.target - (jump->addr + sizeof(sljit_si)));
549 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
550 else if (jump->flags & PATCH_MD)
551 *(sljit_sw*)jump->addr = jump->u.label->addr;
557 /* Maybe we waste some space because of short jumps. */
558 SLJIT_ASSERT(code_ptr <= code + compiler->size);
559 compiler->error = SLJIT_ERR_COMPILED;
560 compiler->executable_size = code_ptr - code;
564 /* --------------------------------------------------------------------- */
566 /* --------------------------------------------------------------------- */
568 static sljit_si emit_cum_binary(struct sljit_compiler *compiler,
569 sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
570 sljit_si dst, sljit_sw dstw,
571 sljit_si src1, sljit_sw src1w,
572 sljit_si src2, sljit_sw src2w);
574 static sljit_si emit_non_cum_binary(struct sljit_compiler *compiler,
575 sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
576 sljit_si dst, sljit_sw dstw,
577 sljit_si src1, sljit_sw src1w,
578 sljit_si src2, sljit_sw src2w);
580 static sljit_si emit_mov(struct sljit_compiler *compiler,
581 sljit_si dst, sljit_sw dstw,
582 sljit_si src, sljit_sw srcw);
584 static SLJIT_INLINE sljit_si emit_save_flags(struct sljit_compiler *compiler)
588 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
589 inst = (sljit_ub*)ensure_buf(compiler, 1 + 5);
593 inst = (sljit_ub*)ensure_buf(compiler, 1 + 6);
598 *inst++ = LEA_r_m; /* lea esp/rsp, [esp/rsp + sizeof(sljit_sw)] */
601 *inst++ = (sljit_ub)sizeof(sljit_sw);
603 compiler->flags_saved = 1;
604 return SLJIT_SUCCESS;
607 static SLJIT_INLINE sljit_si emit_restore_flags(struct sljit_compiler *compiler, sljit_si keep_flags)
611 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
612 inst = (sljit_ub*)ensure_buf(compiler, 1 + 5);
617 inst = (sljit_ub*)ensure_buf(compiler, 1 + 6);
623 *inst++ = LEA_r_m; /* lea esp/rsp, [esp/rsp - sizeof(sljit_sw)] */
626 *inst++ = (sljit_ub)-(sljit_sb)sizeof(sljit_sw);
627 compiler->flags_saved = keep_flags;
628 return SLJIT_SUCCESS;
634 static void SLJIT_CALL sljit_grow_stack(sljit_sw local_size)
636 /* Workaround for calling the internal _chkstk() function on Windows.
637 This function touches all 4k pages belongs to the requested stack space,
638 which size is passed in local_size. This is necessary on Windows where
639 the stack can only grow in 4k steps. However, this function just burn
640 CPU cycles if the stack is large enough. However, you don't know it in
641 advance, so it must always be called. I think this is a bad design in
642 general even if it has some reasons. */
643 *(volatile sljit_si*)alloca(local_size) = 0;
648 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
649 #include "sljitNativeX86_32.c"
651 #include "sljitNativeX86_64.c"
654 static sljit_si emit_mov(struct sljit_compiler *compiler,
655 sljit_si dst, sljit_sw dstw,
656 sljit_si src, sljit_sw srcw)
660 if (dst == SLJIT_UNUSED) {
661 /* No destination, doesn't need to setup flags. */
662 if (src & SLJIT_MEM) {
663 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src, srcw);
667 return SLJIT_SUCCESS;
669 if (FAST_IS_REG(src)) {
670 inst = emit_x86_instruction(compiler, 1, src, 0, dst, dstw);
673 return SLJIT_SUCCESS;
675 if (src & SLJIT_IMM) {
676 if (FAST_IS_REG(dst)) {
677 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
678 return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw);
680 if (!compiler->mode32) {
681 if (NOT_HALFWORD(srcw))
682 return emit_load_imm64(compiler, dst, srcw);
685 return emit_do_imm32(compiler, (reg_map[dst] >= 8) ? REX_B : 0, MOV_r_i32 + reg_lmap[dst], srcw);
688 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
689 if (!compiler->mode32 && NOT_HALFWORD(srcw)) {
690 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, srcw));
691 inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, dst, dstw);
694 return SLJIT_SUCCESS;
697 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, dstw);
700 return SLJIT_SUCCESS;
702 if (FAST_IS_REG(dst)) {
703 inst = emit_x86_instruction(compiler, 1, dst, 0, src, srcw);
706 return SLJIT_SUCCESS;
709 /* Memory to memory move. Requires two instruction. */
710 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src, srcw);
713 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
716 return SLJIT_SUCCESS;
719 #define EMIT_MOV(compiler, dst, dstw, src, srcw) \
720 FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));
722 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler, sljit_si op)
725 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
730 CHECK(check_sljit_emit_op0(compiler, op));
732 switch (GET_OPCODE(op)) {
733 case SLJIT_BREAKPOINT:
734 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
740 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
751 compiler->flags_saved = 0;
752 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
754 SLJIT_COMPILE_ASSERT(
755 reg_map[SLJIT_R0] == 0
756 && reg_map[SLJIT_R1] == 2
757 && reg_map[TMP_REG1] > 7,
758 invalid_register_assignment_for_div_mul);
760 SLJIT_COMPILE_ASSERT(
761 reg_map[SLJIT_R0] == 0
762 && reg_map[SLJIT_R1] < 7
763 && reg_map[TMP_REG1] == 2,
764 invalid_register_assignment_for_div_mul);
766 compiler->mode32 = op & SLJIT_INT_OP;
768 SLJIT_COMPILE_ASSERT((SLJIT_UDIVMOD & 0x2) == 0 && SLJIT_UDIVI - 0x2 == SLJIT_UDIVMOD, bad_div_opcode_assignments);
771 if ((op | 0x2) == SLJIT_UDIVI) {
772 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
773 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0);
774 inst = emit_x86_instruction(compiler, 1, SLJIT_R1, 0, SLJIT_R1, 0);
776 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0);
782 if ((op | 0x2) == SLJIT_SDIVI) {
783 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
784 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0);
787 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
788 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
793 if (compiler->mode32) {
794 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
799 inst = (sljit_ub*)ensure_buf(compiler, 1 + 2);
808 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
809 inst = (sljit_ub*)ensure_buf(compiler, 1 + 2);
813 *inst = MOD_REG | ((op >= SLJIT_UDIVMOD) ? reg_map[TMP_REG1] : reg_map[SLJIT_R1]);
816 size = (!compiler->mode32 || op >= SLJIT_UDIVMOD) ? 3 : 2;
818 size = (!compiler->mode32) ? 3 : 2;
820 inst = (sljit_ub*)ensure_buf(compiler, 1 + size);
824 if (!compiler->mode32)
825 *inst++ = REX_W | ((op >= SLJIT_UDIVMOD) ? REX_B : 0);
826 else if (op >= SLJIT_UDIVMOD)
829 *inst = MOD_REG | ((op >= SLJIT_UDIVMOD) ? reg_lmap[TMP_REG1] : reg_lmap[SLJIT_R1]);
831 if (!compiler->mode32)
834 *inst = MOD_REG | reg_map[SLJIT_R1];
853 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && !defined(_WIN64)
854 if (op <= SLJIT_SDIVMOD)
855 EMIT_MOV(compiler, SLJIT_R1, 0, TMP_REG1, 0);
857 if (op >= SLJIT_UDIVI)
858 EMIT_MOV(compiler, SLJIT_R1, 0, TMP_REG1, 0);
863 return SLJIT_SUCCESS;
866 #define ENCODE_PREFIX(prefix) \
868 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1); \
874 static sljit_si emit_mov_byte(struct sljit_compiler *compiler, sljit_si sign,
875 sljit_si dst, sljit_sw dstw,
876 sljit_si src, sljit_sw srcw)
880 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
884 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
885 compiler->mode32 = 0;
888 if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM))
889 return SLJIT_SUCCESS; /* Empty instruction. */
891 if (src & SLJIT_IMM) {
892 if (FAST_IS_REG(dst)) {
893 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
894 return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw);
896 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0);
899 return SLJIT_SUCCESS;
902 inst = emit_x86_instruction(compiler, 1 | EX86_BYTE_ARG | EX86_NO_REXW, SLJIT_IMM, srcw, dst, dstw);
905 return SLJIT_SUCCESS;
908 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
910 if ((dst & SLJIT_MEM) && FAST_IS_REG(src)) {
911 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
912 if (reg_map[src] >= 4) {
913 SLJIT_ASSERT(dst_r == TMP_REG1);
914 EMIT_MOV(compiler, TMP_REG1, 0, src, 0);
921 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
922 else if (FAST_IS_REG(src) && reg_map[src] >= 4) {
923 /* src, dst are registers. */
924 SLJIT_ASSERT(SLOW_IS_REG(dst));
925 if (reg_map[dst] < 4) {
927 EMIT_MOV(compiler, dst, 0, src, 0);
928 inst = emit_x86_instruction(compiler, 2, dst, 0, dst, 0);
931 *inst = sign ? MOVSX_r_rm8 : MOVZX_r_rm8;
935 EMIT_MOV(compiler, dst, 0, src, 0);
938 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0);
942 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0);
947 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 0xff, dst, 0);
952 return SLJIT_SUCCESS;
956 /* src can be memory addr or reg_map[src] < 4 on x86_32 architectures. */
957 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
960 *inst = sign ? MOVSX_r_rm8 : MOVZX_r_rm8;
963 if (dst & SLJIT_MEM) {
964 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
965 if (dst_r == TMP_REG1) {
966 /* Find a non-used register, whose reg_map[src] < 4. */
967 if ((dst & REG_MASK) == SLJIT_R0) {
968 if ((dst & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_R1))
974 if ((dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_R0))
976 else if ((dst & REG_MASK) == SLJIT_R1)
982 if (work_r == SLJIT_R0) {
983 ENCODE_PREFIX(XCHG_EAX_r + reg_map[TMP_REG1]);
986 inst = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0);
991 inst = emit_x86_instruction(compiler, 1, work_r, 0, dst, dstw);
995 if (work_r == SLJIT_R0) {
996 ENCODE_PREFIX(XCHG_EAX_r + reg_map[TMP_REG1]);
999 inst = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0);
1005 inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
1010 inst = emit_x86_instruction(compiler, 1 | EX86_REX | EX86_NO_REXW, dst_r, 0, dst, dstw);
1016 return SLJIT_SUCCESS;
1019 static sljit_si emit_mov_half(struct sljit_compiler *compiler, sljit_si sign,
1020 sljit_si dst, sljit_sw dstw,
1021 sljit_si src, sljit_sw srcw)
1026 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1027 compiler->mode32 = 0;
1030 if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM))
1031 return SLJIT_SUCCESS; /* Empty instruction. */
1033 if (src & SLJIT_IMM) {
1034 if (FAST_IS_REG(dst)) {
1035 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1036 return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw);
1038 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0);
1041 return SLJIT_SUCCESS;
1044 inst = emit_x86_instruction(compiler, 1 | EX86_HALF_ARG | EX86_NO_REXW | EX86_PREF_66, SLJIT_IMM, srcw, dst, dstw);
1047 return SLJIT_SUCCESS;
1050 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1052 if ((dst & SLJIT_MEM) && FAST_IS_REG(src))
1055 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
1058 *inst = sign ? MOVSX_r_rm16 : MOVZX_r_rm16;
1061 if (dst & SLJIT_MEM) {
1062 inst = emit_x86_instruction(compiler, 1 | EX86_NO_REXW | EX86_PREF_66, dst_r, 0, dst, dstw);
1067 return SLJIT_SUCCESS;
1070 static sljit_si emit_unary(struct sljit_compiler *compiler, sljit_ub opcode,
1071 sljit_si dst, sljit_sw dstw,
1072 sljit_si src, sljit_sw srcw)
1076 if (dst == SLJIT_UNUSED) {
1077 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1078 inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
1082 return SLJIT_SUCCESS;
1084 if (dst == src && dstw == srcw) {
1085 /* Same input and output */
1086 inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
1090 return SLJIT_SUCCESS;
1092 if (FAST_IS_REG(dst)) {
1093 EMIT_MOV(compiler, dst, 0, src, srcw);
1094 inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
1098 return SLJIT_SUCCESS;
1100 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1101 inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
1105 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1106 return SLJIT_SUCCESS;
1109 static sljit_si emit_not_with_flags(struct sljit_compiler *compiler,
1110 sljit_si dst, sljit_sw dstw,
1111 sljit_si src, sljit_sw srcw)
1115 if (dst == SLJIT_UNUSED) {
1116 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1117 inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
1121 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0);
1124 return SLJIT_SUCCESS;
1126 if (FAST_IS_REG(dst)) {
1127 EMIT_MOV(compiler, dst, 0, src, srcw);
1128 inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
1132 inst = emit_x86_instruction(compiler, 1, dst, 0, dst, 0);
1135 return SLJIT_SUCCESS;
1137 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1138 inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
1142 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0);
1145 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1146 return SLJIT_SUCCESS;
1149 static sljit_si emit_clz(struct sljit_compiler *compiler, sljit_si op_flags,
1150 sljit_si dst, sljit_sw dstw,
1151 sljit_si src, sljit_sw srcw)
1156 SLJIT_UNUSED_ARG(op_flags);
1157 if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) {
1158 /* Just set the zero flag. */
1159 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1160 inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
1164 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1165 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 31, TMP_REG1, 0);
1167 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, !(op_flags & SLJIT_INT_OP) ? 63 : 31, TMP_REG1, 0);
1171 return SLJIT_SUCCESS;
1174 if (SLJIT_UNLIKELY(src & SLJIT_IMM)) {
1175 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcw);
1180 inst = emit_x86_instruction(compiler, 2, TMP_REG1, 0, src, srcw);
1185 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1186 if (FAST_IS_REG(dst))
1189 /* Find an unused temporary register. */
1190 if ((dst & REG_MASK) != SLJIT_R0 && (dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_R0))
1192 else if ((dst & REG_MASK) != SLJIT_R1 && (dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_R1))
1196 EMIT_MOV(compiler, dst, dstw, dst_r, 0);
1198 EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, 32 + 31);
1200 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
1201 compiler->mode32 = 0;
1202 EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, !(op_flags & SLJIT_INT_OP) ? 64 + 63 : 32 + 31);
1203 compiler->mode32 = op_flags & SLJIT_INT_OP;
1206 if (cpu_has_cmov == -1)
1210 inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG1, 0);
1213 *inst = CMOVNE_r_rm;
1215 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1216 inst = (sljit_ub*)ensure_buf(compiler, 1 + 4);
1223 *inst++ = MOD_REG | (reg_map[dst_r] << 3) | reg_map[TMP_REG1];
1225 inst = (sljit_ub*)ensure_buf(compiler, 1 + 5);
1231 *inst++ = REX_W | (reg_map[dst_r] >= 8 ? REX_R : 0) | (reg_map[TMP_REG1] >= 8 ? REX_B : 0);
1233 *inst++ = MOD_REG | (reg_lmap[dst_r] << 3) | reg_lmap[TMP_REG1];
1237 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1238 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 31, dst_r, 0);
1240 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, !(op_flags & SLJIT_INT_OP) ? 63 : 31, dst_r, 0);
1245 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1246 if (dst & SLJIT_MEM) {
1247 inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
1252 if (dst & SLJIT_MEM)
1253 EMIT_MOV(compiler, dst, dstw, TMP_REG2, 0);
1255 return SLJIT_SUCCESS;
1258 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler, sljit_si op,
1259 sljit_si dst, sljit_sw dstw,
1260 sljit_si src, sljit_sw srcw)
1263 sljit_si update = 0;
1264 sljit_si op_flags = GET_ALL_FLAGS(op);
1265 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1266 sljit_si dst_is_ereg = 0;
1267 sljit_si src_is_ereg = 0;
1269 # define src_is_ereg 0
1273 CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw));
1274 ADJUST_LOCAL_OFFSET(dst, dstw);
1275 ADJUST_LOCAL_OFFSET(src, srcw);
1277 CHECK_EXTRA_REGS(dst, dstw, dst_is_ereg = 1);
1278 CHECK_EXTRA_REGS(src, srcw, src_is_ereg = 1);
1279 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1280 compiler->mode32 = op_flags & SLJIT_INT_OP;
1283 op = GET_OPCODE(op);
1284 if (op >= SLJIT_MOV && op <= SLJIT_MOVU_P) {
1285 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1286 compiler->mode32 = 0;
1289 if (op_flags & SLJIT_INT_OP) {
1290 if (FAST_IS_REG(src) && src == dst) {
1291 if (!TYPE_CAST_NEEDED(op))
1292 return SLJIT_SUCCESS;
1294 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1295 if (op == SLJIT_MOV_SI && (src & SLJIT_MEM))
1297 if (op == SLJIT_MOVU_SI && (src & SLJIT_MEM))
1299 if (op == SLJIT_MOV_UI && (src & SLJIT_IMM))
1301 if (op == SLJIT_MOVU_UI && (src & SLJIT_IMM))
1306 SLJIT_COMPILE_ASSERT(SLJIT_MOV + 8 == SLJIT_MOVU, movu_offset);
1307 if (op >= SLJIT_MOVU) {
1312 if (src & SLJIT_IMM) {
1315 srcw = (sljit_ub)srcw;
1318 srcw = (sljit_sb)srcw;
1321 srcw = (sljit_uh)srcw;
1324 srcw = (sljit_sh)srcw;
1326 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1328 srcw = (sljit_ui)srcw;
1331 srcw = (sljit_si)srcw;
1335 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1336 if (SLJIT_UNLIKELY(dst_is_ereg))
1337 return emit_mov(compiler, dst, dstw, src, srcw);
1341 if (SLJIT_UNLIKELY(update) && (src & SLJIT_MEM) && !src_is_ereg && (src & REG_MASK) && (srcw != 0 || (src & OFFS_REG_MASK) != 0)) {
1342 inst = emit_x86_instruction(compiler, 1, src & REG_MASK, 0, src, srcw);
1345 src &= SLJIT_MEM | 0xf;
1349 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1350 if (SLJIT_UNLIKELY(dst_is_ereg) && (!(op == SLJIT_MOV || op == SLJIT_MOV_UI || op == SLJIT_MOV_SI || op == SLJIT_MOV_P) || (src & SLJIT_MEM))) {
1351 SLJIT_ASSERT(dst == SLJIT_MEM1(SLJIT_SP));
1359 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1363 FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));
1366 FAIL_IF(emit_mov_byte(compiler, 0, dst, dstw, src, srcw));
1369 FAIL_IF(emit_mov_byte(compiler, 1, dst, dstw, src, srcw));
1372 FAIL_IF(emit_mov_half(compiler, 0, dst, dstw, src, srcw));
1375 FAIL_IF(emit_mov_half(compiler, 1, dst, dstw, src, srcw));
1377 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1379 FAIL_IF(emit_mov_int(compiler, 0, dst, dstw, src, srcw));
1382 FAIL_IF(emit_mov_int(compiler, 1, dst, dstw, src, srcw));
1387 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1388 if (SLJIT_UNLIKELY(dst_is_ereg) && dst == TMP_REG1)
1389 return emit_mov(compiler, SLJIT_MEM1(SLJIT_SP), dstw, TMP_REG1, 0);
1392 if (SLJIT_UNLIKELY(update) && (dst & SLJIT_MEM) && (dst & REG_MASK) && (dstw != 0 || (dst & OFFS_REG_MASK) != 0)) {
1393 inst = emit_x86_instruction(compiler, 1, dst & REG_MASK, 0, dst, dstw);
1397 return SLJIT_SUCCESS;
1400 if (SLJIT_UNLIKELY(GET_FLAGS(op_flags)))
1401 compiler->flags_saved = 0;
1405 if (SLJIT_UNLIKELY(op_flags & SLJIT_SET_E))
1406 return emit_not_with_flags(compiler, dst, dstw, src, srcw);
1407 return emit_unary(compiler, NOT_rm, dst, dstw, src, srcw);
1410 if (SLJIT_UNLIKELY(op_flags & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
1411 FAIL_IF(emit_save_flags(compiler));
1412 return emit_unary(compiler, NEG_rm, dst, dstw, src, srcw);
1415 if (SLJIT_UNLIKELY(op_flags & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
1416 FAIL_IF(emit_save_flags(compiler));
1417 return emit_clz(compiler, op_flags, dst, dstw, src, srcw);
1420 return SLJIT_SUCCESS;
1422 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1427 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1429 #define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \
1430 if (IS_HALFWORD(immw) || compiler->mode32) { \
1431 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \
1433 *(inst + 1) |= (op_imm); \
1436 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, immw)); \
1437 inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, arg, argw); \
1442 #define BINARY_EAX_IMM(op_eax_imm, immw) \
1443 FAIL_IF(emit_do_imm32(compiler, (!compiler->mode32) ? REX_W : 0, (op_eax_imm), immw))
1447 #define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \
1448 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \
1450 *(inst + 1) |= (op_imm);
1452 #define BINARY_EAX_IMM(op_eax_imm, immw) \
1453 FAIL_IF(emit_do_imm(compiler, (op_eax_imm), immw))
1457 static sljit_si emit_cum_binary(struct sljit_compiler *compiler,
1458 sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
1459 sljit_si dst, sljit_sw dstw,
1460 sljit_si src1, sljit_sw src1w,
1461 sljit_si src2, sljit_sw src2w)
1465 if (dst == SLJIT_UNUSED) {
1466 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1467 if (src2 & SLJIT_IMM) {
1468 BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
1471 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1475 return SLJIT_SUCCESS;
1478 if (dst == src1 && dstw == src1w) {
1479 if (src2 & SLJIT_IMM) {
1480 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1481 if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1483 if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128)) {
1485 BINARY_EAX_IMM(op_eax_imm, src2w);
1488 BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);
1491 else if (FAST_IS_REG(dst)) {
1492 inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);
1496 else if (FAST_IS_REG(src2)) {
1497 /* Special exception for sljit_emit_op_flags. */
1498 inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);
1503 EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w);
1504 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
1508 return SLJIT_SUCCESS;
1511 /* Only for cumulative operations. */
1512 if (dst == src2 && dstw == src2w) {
1513 if (src1 & SLJIT_IMM) {
1514 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1515 if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
1517 if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128)) {
1519 BINARY_EAX_IMM(op_eax_imm, src1w);
1522 BINARY_IMM(op_imm, op_mr, src1w, dst, dstw);
1525 else if (FAST_IS_REG(dst)) {
1526 inst = emit_x86_instruction(compiler, 1, dst, dstw, src1, src1w);
1530 else if (FAST_IS_REG(src1)) {
1531 inst = emit_x86_instruction(compiler, 1, src1, src1w, dst, dstw);
1536 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1537 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
1541 return SLJIT_SUCCESS;
1544 /* General version. */
1545 if (FAST_IS_REG(dst)) {
1546 EMIT_MOV(compiler, dst, 0, src1, src1w);
1547 if (src2 & SLJIT_IMM) {
1548 BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
1551 inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);
1557 /* This version requires less memory writing. */
1558 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1559 if (src2 & SLJIT_IMM) {
1560 BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
1563 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1567 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1570 return SLJIT_SUCCESS;
1573 static sljit_si emit_non_cum_binary(struct sljit_compiler *compiler,
1574 sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
1575 sljit_si dst, sljit_sw dstw,
1576 sljit_si src1, sljit_sw src1w,
1577 sljit_si src2, sljit_sw src2w)
1581 if (dst == SLJIT_UNUSED) {
1582 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1583 if (src2 & SLJIT_IMM) {
1584 BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
1587 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1591 return SLJIT_SUCCESS;
1594 if (dst == src1 && dstw == src1w) {
1595 if (src2 & SLJIT_IMM) {
1596 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1597 if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1599 if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128)) {
1601 BINARY_EAX_IMM(op_eax_imm, src2w);
1604 BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);
1607 else if (FAST_IS_REG(dst)) {
1608 inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);
1612 else if (FAST_IS_REG(src2)) {
1613 inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);
1618 EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w);
1619 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
1623 return SLJIT_SUCCESS;
1626 /* General version. */
1627 if (FAST_IS_REG(dst) && dst != src2) {
1628 EMIT_MOV(compiler, dst, 0, src1, src1w);
1629 if (src2 & SLJIT_IMM) {
1630 BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
1633 inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);
1639 /* This version requires less memory writing. */
1640 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1641 if (src2 & SLJIT_IMM) {
1642 BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
1645 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1649 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1652 return SLJIT_SUCCESS;
1655 static sljit_si emit_mul(struct sljit_compiler *compiler,
1656 sljit_si dst, sljit_sw dstw,
1657 sljit_si src1, sljit_sw src1w,
1658 sljit_si src2, sljit_sw src2w)
1663 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1665 /* Register destination. */
1666 if (dst_r == src1 && !(src2 & SLJIT_IMM)) {
1667 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w);
1672 else if (dst_r == src2 && !(src1 & SLJIT_IMM)) {
1673 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src1, src1w);
1678 else if (src1 & SLJIT_IMM) {
1679 if (src2 & SLJIT_IMM) {
1680 EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, src2w);
1685 if (src1w <= 127 && src1w >= -128) {
1686 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
1688 *inst = IMUL_r_rm_i8;
1689 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
1692 *inst = (sljit_sb)src1w;
1694 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1696 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
1698 *inst = IMUL_r_rm_i32;
1699 inst = (sljit_ub*)ensure_buf(compiler, 1 + 4);
1702 *(sljit_sw*)inst = src1w;
1705 else if (IS_HALFWORD(src1w)) {
1706 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
1708 *inst = IMUL_r_rm_i32;
1709 inst = (sljit_ub*)ensure_buf(compiler, 1 + 4);
1712 *(sljit_si*)inst = (sljit_si)src1w;
1715 EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src1w);
1717 EMIT_MOV(compiler, dst_r, 0, src2, src2w);
1718 inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
1725 else if (src2 & SLJIT_IMM) {
1726 /* Note: src1 is NOT immediate. */
1728 if (src2w <= 127 && src2w >= -128) {
1729 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
1731 *inst = IMUL_r_rm_i8;
1732 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
1735 *inst = (sljit_sb)src2w;
1737 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1739 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
1741 *inst = IMUL_r_rm_i32;
1742 inst = (sljit_ub*)ensure_buf(compiler, 1 + 4);
1745 *(sljit_sw*)inst = src2w;
1748 else if (IS_HALFWORD(src2w)) {
1749 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
1751 *inst = IMUL_r_rm_i32;
1752 inst = (sljit_ub*)ensure_buf(compiler, 1 + 4);
1755 *(sljit_si*)inst = (sljit_si)src2w;
1758 EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src2w);
1760 EMIT_MOV(compiler, dst_r, 0, src1, src1w);
1761 inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
1769 /* Neither argument is immediate. */
1770 if (ADDRESSING_DEPENDS_ON(src2, dst_r))
1772 EMIT_MOV(compiler, dst_r, 0, src1, src1w);
1773 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w);
1779 if (dst_r == TMP_REG1)
1780 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1782 return SLJIT_SUCCESS;
1785 static sljit_si emit_lea_binary(struct sljit_compiler *compiler, sljit_si keep_flags,
1786 sljit_si dst, sljit_sw dstw,
1787 sljit_si src1, sljit_sw src1w,
1788 sljit_si src2, sljit_sw src2w)
1791 sljit_si dst_r, done = 0;
1793 /* These cases better be left to handled by normal way. */
1795 if (dst == src1 && dstw == src1w)
1796 return SLJIT_ERR_UNSUPPORTED;
1797 if (dst == src2 && dstw == src2w)
1798 return SLJIT_ERR_UNSUPPORTED;
1801 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1803 if (FAST_IS_REG(src1)) {
1804 if (FAST_IS_REG(src2)) {
1805 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM2(src1, src2), 0);
1810 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1811 if ((src2 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1812 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), (sljit_si)src2w);
1814 if (src2 & SLJIT_IMM) {
1815 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), src2w);
1822 else if (FAST_IS_REG(src2)) {
1823 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1824 if ((src1 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src1w))) {
1825 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), (sljit_si)src1w);
1827 if (src1 & SLJIT_IMM) {
1828 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), src1w);
1837 if (dst_r == TMP_REG1)
1838 return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
1839 return SLJIT_SUCCESS;
1841 return SLJIT_ERR_UNSUPPORTED;
1844 static sljit_si emit_cmp_binary(struct sljit_compiler *compiler,
1845 sljit_si src1, sljit_sw src1w,
1846 sljit_si src2, sljit_sw src2w)
1850 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1851 if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1853 if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) {
1855 BINARY_EAX_IMM(CMP_EAX_i32, src2w);
1856 return SLJIT_SUCCESS;
1859 if (FAST_IS_REG(src1)) {
1860 if (src2 & SLJIT_IMM) {
1861 BINARY_IMM(CMP, CMP_rm_r, src2w, src1, 0);
1864 inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);
1868 return SLJIT_SUCCESS;
1871 if (FAST_IS_REG(src2) && !(src1 & SLJIT_IMM)) {
1872 inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
1875 return SLJIT_SUCCESS;
1878 if (src2 & SLJIT_IMM) {
1879 if (src1 & SLJIT_IMM) {
1880 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1884 BINARY_IMM(CMP, CMP_rm_r, src2w, src1, src1w);
1887 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1888 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1892 return SLJIT_SUCCESS;
1895 static sljit_si emit_test_binary(struct sljit_compiler *compiler,
1896 sljit_si src1, sljit_sw src1w,
1897 sljit_si src2, sljit_sw src2w)
1901 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1902 if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1904 if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) {
1906 BINARY_EAX_IMM(TEST_EAX_i32, src2w);
1907 return SLJIT_SUCCESS;
1910 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1911 if (src2 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
1913 if (src2 == SLJIT_R0 && (src1 & SLJIT_IMM) && (src1w > 127 || src1w < -128)) {
1915 BINARY_EAX_IMM(TEST_EAX_i32, src1w);
1916 return SLJIT_SUCCESS;
1919 if (!(src1 & SLJIT_IMM)) {
1920 if (src2 & SLJIT_IMM) {
1921 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1922 if (IS_HALFWORD(src2w) || compiler->mode32) {
1923 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, src1w);
1928 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
1929 inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src1, src1w);
1934 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, src1w);
1938 return SLJIT_SUCCESS;
1940 else if (FAST_IS_REG(src1)) {
1941 inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);
1944 return SLJIT_SUCCESS;
1948 if (!(src2 & SLJIT_IMM)) {
1949 if (src1 & SLJIT_IMM) {
1950 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1951 if (IS_HALFWORD(src1w) || compiler->mode32) {
1952 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src1w, src2, src2w);
1957 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src1w));
1958 inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src2, src2w);
1963 inst = emit_x86_instruction(compiler, 1, src1, src1w, src2, src2w);
1967 return SLJIT_SUCCESS;
1969 else if (FAST_IS_REG(src2)) {
1970 inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
1973 return SLJIT_SUCCESS;
1977 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1978 if (src2 & SLJIT_IMM) {
1979 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1980 if (IS_HALFWORD(src2w) || compiler->mode32) {
1981 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0);
1986 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
1987 inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, TMP_REG1, 0);
1992 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0);
1998 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
2002 return SLJIT_SUCCESS;
2005 static sljit_si emit_shift(struct sljit_compiler *compiler,
2007 sljit_si dst, sljit_sw dstw,
2008 sljit_si src1, sljit_sw src1w,
2009 sljit_si src2, sljit_sw src2w)
2013 if ((src2 & SLJIT_IMM) || (src2 == SLJIT_PREF_SHIFT_REG)) {
2014 if (dst == src1 && dstw == src1w) {
2015 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, dstw);
2018 return SLJIT_SUCCESS;
2020 if (dst == SLJIT_UNUSED) {
2021 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2022 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REG1, 0);
2025 return SLJIT_SUCCESS;
2027 if (dst == SLJIT_PREF_SHIFT_REG && src2 == SLJIT_PREF_SHIFT_REG) {
2028 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2029 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2032 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2033 return SLJIT_SUCCESS;
2035 if (FAST_IS_REG(dst)) {
2036 EMIT_MOV(compiler, dst, 0, src1, src1w);
2037 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, 0);
2040 return SLJIT_SUCCESS;
2043 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2044 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REG1, 0);
2047 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
2048 return SLJIT_SUCCESS;
2051 if (dst == SLJIT_PREF_SHIFT_REG) {
2052 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2053 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
2054 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2057 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2059 else if (FAST_IS_REG(dst) && dst != src2 && !ADDRESSING_DEPENDS_ON(src2, dst)) {
2061 EMIT_MOV(compiler, dst, 0, src1, src1w);
2062 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_PREF_SHIFT_REG, 0);
2063 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
2064 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, dst, 0);
2067 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2070 /* This case is really difficult, since ecx itself may used for
2071 addressing, and we must ensure to work even in that case. */
2072 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2073 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2074 EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_PREF_SHIFT_REG, 0);
2076 /* [esp+0] contains the flags. */
2077 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_sw), SLJIT_PREF_SHIFT_REG, 0);
2079 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
2080 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2083 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2084 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG2, 0);
2086 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_sw));
2088 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
2091 return SLJIT_SUCCESS;
2094 static sljit_si emit_shift_with_flags(struct sljit_compiler *compiler,
2095 sljit_ub mode, sljit_si set_flags,
2096 sljit_si dst, sljit_sw dstw,
2097 sljit_si src1, sljit_sw src1w,
2098 sljit_si src2, sljit_sw src2w)
2100 /* The CPU does not set flags if the shift count is 0. */
2101 if (src2 & SLJIT_IMM) {
2102 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2103 if ((src2w & 0x3f) != 0 || (compiler->mode32 && (src2w & 0x1f) != 0))
2104 return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
2106 if ((src2w & 0x1f) != 0)
2107 return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
2110 return emit_mov(compiler, dst, dstw, src1, src1w);
2111 /* OR dst, src, 0 */
2112 return emit_cum_binary(compiler, OR_r_rm, OR_rm_r, OR, OR_EAX_i32,
2113 dst, dstw, src1, src1w, SLJIT_IMM, 0);
2117 return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
2119 if (!FAST_IS_REG(dst))
2120 FAIL_IF(emit_cmp_binary(compiler, src1, src1w, SLJIT_IMM, 0));
2122 FAIL_IF(emit_shift(compiler,mode, dst, dstw, src1, src1w, src2, src2w));
2124 if (FAST_IS_REG(dst))
2125 return emit_cmp_binary(compiler, dst, dstw, SLJIT_IMM, 0);
2126 return SLJIT_SUCCESS;
2129 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler, sljit_si op,
2130 sljit_si dst, sljit_sw dstw,
2131 sljit_si src1, sljit_sw src1w,
2132 sljit_si src2, sljit_sw src2w)
2135 CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
2136 ADJUST_LOCAL_OFFSET(dst, dstw);
2137 ADJUST_LOCAL_OFFSET(src1, src1w);
2138 ADJUST_LOCAL_OFFSET(src2, src2w);
2140 CHECK_EXTRA_REGS(dst, dstw, (void)0);
2141 CHECK_EXTRA_REGS(src1, src1w, (void)0);
2142 CHECK_EXTRA_REGS(src2, src2w, (void)0);
2143 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2144 compiler->mode32 = op & SLJIT_INT_OP;
2147 if (GET_OPCODE(op) >= SLJIT_MUL) {
2148 if (SLJIT_UNLIKELY(GET_FLAGS(op)))
2149 compiler->flags_saved = 0;
2150 else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
2151 FAIL_IF(emit_save_flags(compiler));
2154 switch (GET_OPCODE(op)) {
2156 if (!GET_FLAGS(op)) {
2157 if (emit_lea_binary(compiler, op & SLJIT_KEEP_FLAGS, dst, dstw, src1, src1w, src2, src2w) != SLJIT_ERR_UNSUPPORTED)
2158 return compiler->error;
2161 compiler->flags_saved = 0;
2162 if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
2163 FAIL_IF(emit_save_flags(compiler));
2164 return emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32,
2165 dst, dstw, src1, src1w, src2, src2w);
2167 if (SLJIT_UNLIKELY(compiler->flags_saved)) /* C flag must be restored. */
2168 FAIL_IF(emit_restore_flags(compiler, 1));
2169 else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS))
2170 FAIL_IF(emit_save_flags(compiler));
2171 if (SLJIT_UNLIKELY(GET_FLAGS(op)))
2172 compiler->flags_saved = 0;
2173 return emit_cum_binary(compiler, ADC_r_rm, ADC_rm_r, ADC, ADC_EAX_i32,
2174 dst, dstw, src1, src1w, src2, src2w);
2176 if (!GET_FLAGS(op)) {
2177 if ((src2 & SLJIT_IMM) && emit_lea_binary(compiler, op & SLJIT_KEEP_FLAGS, dst, dstw, src1, src1w, SLJIT_IMM, -src2w) != SLJIT_ERR_UNSUPPORTED)
2178 return compiler->error;
2181 compiler->flags_saved = 0;
2182 if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
2183 FAIL_IF(emit_save_flags(compiler));
2184 if (dst == SLJIT_UNUSED)
2185 return emit_cmp_binary(compiler, src1, src1w, src2, src2w);
2186 return emit_non_cum_binary(compiler, SUB_r_rm, SUB_rm_r, SUB, SUB_EAX_i32,
2187 dst, dstw, src1, src1w, src2, src2w);
2189 if (SLJIT_UNLIKELY(compiler->flags_saved)) /* C flag must be restored. */
2190 FAIL_IF(emit_restore_flags(compiler, 1));
2191 else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS))
2192 FAIL_IF(emit_save_flags(compiler));
2193 if (SLJIT_UNLIKELY(GET_FLAGS(op)))
2194 compiler->flags_saved = 0;
2195 return emit_non_cum_binary(compiler, SBB_r_rm, SBB_rm_r, SBB, SBB_EAX_i32,
2196 dst, dstw, src1, src1w, src2, src2w);
2198 return emit_mul(compiler, dst, dstw, src1, src1w, src2, src2w);
2200 if (dst == SLJIT_UNUSED)
2201 return emit_test_binary(compiler, src1, src1w, src2, src2w);
2202 return emit_cum_binary(compiler, AND_r_rm, AND_rm_r, AND, AND_EAX_i32,
2203 dst, dstw, src1, src1w, src2, src2w);
2205 return emit_cum_binary(compiler, OR_r_rm, OR_rm_r, OR, OR_EAX_i32,
2206 dst, dstw, src1, src1w, src2, src2w);
2208 return emit_cum_binary(compiler, XOR_r_rm, XOR_rm_r, XOR, XOR_EAX_i32,
2209 dst, dstw, src1, src1w, src2, src2w);
2211 return emit_shift_with_flags(compiler, SHL, GET_FLAGS(op),
2212 dst, dstw, src1, src1w, src2, src2w);
2214 return emit_shift_with_flags(compiler, SHR, GET_FLAGS(op),
2215 dst, dstw, src1, src1w, src2, src2w);
2217 return emit_shift_with_flags(compiler, SAR, GET_FLAGS(op),
2218 dst, dstw, src1, src1w, src2, src2w);
2221 return SLJIT_SUCCESS;
2224 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg)
2226 CHECK_REG_INDEX(check_sljit_get_register_index(reg));
2227 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2228 if (reg >= SLJIT_R3 && reg <= SLJIT_R6)
2231 return reg_map[reg];
2234 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_float_register_index(sljit_si reg)
2236 CHECK_REG_INDEX(check_sljit_get_float_register_index(reg));
2240 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *compiler,
2241 void *instruction, sljit_si size)
2246 CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
2248 inst = (sljit_ub*)ensure_buf(compiler, 1 + size);
2251 SLJIT_MEMMOVE(inst, instruction, size);
2252 return SLJIT_SUCCESS;
2255 /* --------------------------------------------------------------------- */
2256 /* Floating point operators */
2257 /* --------------------------------------------------------------------- */
2259 /* Alignment + 2 * 16 bytes. */
2260 static sljit_si sse2_data[3 + (4 + 4) * 2];
2261 static sljit_si *sse2_buffer;
2263 static void init_compiler(void)
2265 sse2_buffer = (sljit_si*)(((sljit_uw)sse2_data + 15) & ~0xf);
2266 /* Single precision constants. */
2267 sse2_buffer[0] = 0x80000000;
2268 sse2_buffer[4] = 0x7fffffff;
2269 /* Double precision constants. */
2271 sse2_buffer[9] = 0x80000000;
2272 sse2_buffer[12] = 0xffffffff;
2273 sse2_buffer[13] = 0x7fffffff;
2276 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void)
2278 #ifdef SLJIT_IS_FPU_AVAILABLE
2279 return SLJIT_IS_FPU_AVAILABLE;
2280 #elif (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
2281 if (cpu_has_sse2 == -1)
2283 return cpu_has_sse2;
2284 #else /* SLJIT_DETECT_SSE2 */
2286 #endif /* SLJIT_DETECT_SSE2 */
2289 static sljit_si emit_sse2(struct sljit_compiler *compiler, sljit_ub opcode,
2290 sljit_si single, sljit_si xmm1, sljit_si xmm2, sljit_sw xmm2w)
2294 inst = emit_x86_instruction(compiler, 2 | (single ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, xmm1, 0, xmm2, xmm2w);
2298 return SLJIT_SUCCESS;
2301 static sljit_si emit_sse2_logic(struct sljit_compiler *compiler, sljit_ub opcode,
2302 sljit_si pref66, sljit_si xmm1, sljit_si xmm2, sljit_sw xmm2w)
2306 inst = emit_x86_instruction(compiler, 2 | (pref66 ? EX86_PREF_66 : 0) | EX86_SSE2, xmm1, 0, xmm2, xmm2w);
2310 return SLJIT_SUCCESS;
2313 static SLJIT_INLINE sljit_si emit_sse2_load(struct sljit_compiler *compiler,
2314 sljit_si single, sljit_si dst, sljit_si src, sljit_sw srcw)
2316 return emit_sse2(compiler, MOVSD_x_xm, single, dst, src, srcw);
2319 static SLJIT_INLINE sljit_si emit_sse2_store(struct sljit_compiler *compiler,
2320 sljit_si single, sljit_si dst, sljit_sw dstw, sljit_si src)
2322 return emit_sse2(compiler, MOVSD_xm_x, single, src, dst, dstw);
2325 static SLJIT_INLINE sljit_si sljit_emit_fop1_convw_fromd(struct sljit_compiler *compiler, sljit_si op,
2326 sljit_si dst, sljit_sw dstw,
2327 sljit_si src, sljit_sw srcw)
2329 sljit_si dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1;
2332 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2333 if (GET_OPCODE(op) == SLJIT_CONVW_FROMD)
2334 compiler->mode32 = 0;
2337 inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_SINGLE_OP) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP2, dst_r, 0, src, srcw);
2340 *inst = CVTTSD2SI_r_xm;
2342 if (dst_r == TMP_REG1 && dst != SLJIT_UNUSED)
2343 return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
2344 return SLJIT_SUCCESS;
2347 static SLJIT_INLINE sljit_si sljit_emit_fop1_convd_fromw(struct sljit_compiler *compiler, sljit_si op,
2348 sljit_si dst, sljit_sw dstw,
2349 sljit_si src, sljit_sw srcw)
2351 sljit_si dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG;
2354 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2355 if (GET_OPCODE(op) == SLJIT_CONVD_FROMW)
2356 compiler->mode32 = 0;
2359 if (src & SLJIT_IMM) {
2360 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2361 if (GET_OPCODE(op) == SLJIT_CONVD_FROMI)
2362 srcw = (sljit_si)srcw;
2364 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
2369 inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_SINGLE_OP) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP1, dst_r, 0, src, srcw);
2372 *inst = CVTSI2SD_x_rm;
2374 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2375 compiler->mode32 = 1;
2377 if (dst_r == TMP_FREG)
2378 return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, TMP_FREG);
2379 return SLJIT_SUCCESS;
2382 static SLJIT_INLINE sljit_si sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_si op,
2383 sljit_si src1, sljit_sw src1w,
2384 sljit_si src2, sljit_sw src2w)
2386 compiler->flags_saved = 0;
2387 if (!FAST_IS_REG(src1)) {
2388 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, TMP_FREG, src1, src1w));
2391 return emit_sse2_logic(compiler, UCOMISD_x_xm, !(op & SLJIT_SINGLE_OP), src1, src2, src2w);
2394 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op,
2395 sljit_si dst, sljit_sw dstw,
2396 sljit_si src, sljit_sw srcw)
2400 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2401 compiler->mode32 = 1;
2405 SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
2407 if (GET_OPCODE(op) == SLJIT_DMOV) {
2408 if (FAST_IS_REG(dst))
2409 return emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, dst, src, srcw);
2410 if (FAST_IS_REG(src))
2411 return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, src);
2412 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, TMP_FREG, src, srcw));
2413 return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, TMP_FREG);
2416 if (GET_OPCODE(op) == SLJIT_CONVD_FROMS) {
2417 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG;
2418 if (FAST_IS_REG(src)) {
2419 /* We overwrite the high bits of source. From SLJIT point of view,
2420 this is not an issue.
2421 Note: In SSE3, we could also use MOVDDUP and MOVSLDUP. */
2422 FAIL_IF(emit_sse2_logic(compiler, UNPCKLPD_x_xm, op & SLJIT_SINGLE_OP, src, src, 0));
2425 FAIL_IF(emit_sse2_load(compiler, !(op & SLJIT_SINGLE_OP), TMP_FREG, src, srcw));
2429 FAIL_IF(emit_sse2_logic(compiler, CVTPD2PS_x_xm, op & SLJIT_SINGLE_OP, dst_r, src, 0));
2430 if (dst_r == TMP_FREG)
2431 return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, TMP_FREG);
2432 return SLJIT_SUCCESS;
2435 if (SLOW_IS_REG(dst)) {
2438 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, dst_r, src, srcw));
2442 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, dst_r, src, srcw));
2445 switch (GET_OPCODE(op)) {
2447 FAIL_IF(emit_sse2_logic(compiler, XORPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_SINGLE_OP ? sse2_buffer : sse2_buffer + 8)));
2451 FAIL_IF(emit_sse2_logic(compiler, ANDPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_SINGLE_OP ? sse2_buffer + 4 : sse2_buffer + 12)));
2455 if (dst_r == TMP_FREG)
2456 return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, TMP_FREG);
2457 return SLJIT_SUCCESS;
2460 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compiler, sljit_si op,
2461 sljit_si dst, sljit_sw dstw,
2462 sljit_si src1, sljit_sw src1w,
2463 sljit_si src2, sljit_sw src2w)
2468 CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
2469 ADJUST_LOCAL_OFFSET(dst, dstw);
2470 ADJUST_LOCAL_OFFSET(src1, src1w);
2471 ADJUST_LOCAL_OFFSET(src2, src2w);
2473 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2474 compiler->mode32 = 1;
2477 if (FAST_IS_REG(dst)) {
2480 ; /* Do nothing here. */
2481 else if (dst == src2 && (op == SLJIT_DADD || op == SLJIT_DMUL)) {
2482 /* Swap arguments. */
2486 else if (dst != src2)
2487 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, dst_r, src1, src1w));
2490 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, TMP_FREG, src1, src1w));
2495 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, TMP_FREG, src1, src1w));
2498 switch (GET_OPCODE(op)) {
2500 FAIL_IF(emit_sse2(compiler, ADDSD_x_xm, op & SLJIT_SINGLE_OP, dst_r, src2, src2w));
2504 FAIL_IF(emit_sse2(compiler, SUBSD_x_xm, op & SLJIT_SINGLE_OP, dst_r, src2, src2w));
2508 FAIL_IF(emit_sse2(compiler, MULSD_x_xm, op & SLJIT_SINGLE_OP, dst_r, src2, src2w));
2512 FAIL_IF(emit_sse2(compiler, DIVSD_x_xm, op & SLJIT_SINGLE_OP, dst_r, src2, src2w));
2516 if (dst_r == TMP_FREG)
2517 return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, TMP_FREG);
2518 return SLJIT_SUCCESS;
2521 /* --------------------------------------------------------------------- */
2522 /* Conditional instructions */
2523 /* --------------------------------------------------------------------- */
2525 SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
2528 struct sljit_label *label;
2531 CHECK_PTR(check_sljit_emit_label(compiler));
2533 /* We should restore the flags before the label,
2534 since other taken jumps has their own flags as well. */
2535 if (SLJIT_UNLIKELY(compiler->flags_saved))
2536 PTR_FAIL_IF(emit_restore_flags(compiler, 0));
2538 if (compiler->last_label && compiler->last_label->size == compiler->size)
2539 return compiler->last_label;
2541 label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
2542 PTR_FAIL_IF(!label);
2543 set_label(label, compiler);
2545 inst = (sljit_ub*)ensure_buf(compiler, 2);
2554 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_si type)
2557 struct sljit_jump *jump;
2560 CHECK_PTR(check_sljit_emit_jump(compiler, type));
2562 if (SLJIT_UNLIKELY(compiler->flags_saved)) {
2563 if ((type & 0xff) <= SLJIT_JUMP)
2564 PTR_FAIL_IF(emit_restore_flags(compiler, 0));
2565 compiler->flags_saved = 0;
2568 jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2569 PTR_FAIL_IF_NULL(jump);
2570 set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
2573 if (type >= SLJIT_CALL1)
2574 PTR_FAIL_IF(call_with_args(compiler, type));
2576 /* Worst case size. */
2577 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2578 compiler->size += (type >= SLJIT_JUMP) ? 5 : 6;
2580 compiler->size += (type >= SLJIT_JUMP) ? (10 + 3) : (2 + 10 + 3);
2583 inst = (sljit_ub*)ensure_buf(compiler, 2);
2584 PTR_FAIL_IF_NULL(inst);
2591 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compiler, sljit_si type, sljit_si src, sljit_sw srcw)
2594 struct sljit_jump *jump;
2597 CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
2598 ADJUST_LOCAL_OFFSET(src, srcw);
2600 CHECK_EXTRA_REGS(src, srcw, (void)0);
2602 if (SLJIT_UNLIKELY(compiler->flags_saved)) {
2603 if (type <= SLJIT_JUMP)
2604 FAIL_IF(emit_restore_flags(compiler, 0));
2605 compiler->flags_saved = 0;
2608 if (type >= SLJIT_CALL1) {
2609 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2610 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
2611 if (src == SLJIT_R2) {
2612 EMIT_MOV(compiler, TMP_REG1, 0, src, 0);
2615 if (src == SLJIT_MEM1(SLJIT_SP) && type >= SLJIT_CALL3)
2616 srcw += sizeof(sljit_sw);
2619 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && defined(_WIN64)
2620 if (src == SLJIT_R2) {
2621 EMIT_MOV(compiler, TMP_REG1, 0, src, 0);
2625 FAIL_IF(call_with_args(compiler, type));
2628 if (src == SLJIT_IMM) {
2629 jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2631 set_jump(jump, compiler, JUMP_ADDR);
2632 jump->u.target = srcw;
2634 /* Worst case size. */
2635 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2636 compiler->size += 5;
2638 compiler->size += 10 + 3;
2641 inst = (sljit_ub*)ensure_buf(compiler, 2);
2648 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2649 /* REX_W is not necessary (src is not immediate). */
2650 compiler->mode32 = 1;
2652 inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
2655 *inst |= (type >= SLJIT_FAST_CALL) ? CALL_rm : JMP_rm;
2657 return SLJIT_SUCCESS;
2660 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_si op,
2661 sljit_si dst, sljit_sw dstw,
2662 sljit_si src, sljit_sw srcw,
2666 sljit_ub cond_set = 0;
2667 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2670 /* CHECK_EXTRA_REGS migh overwrite these values. */
2671 sljit_si dst_save = dst;
2672 sljit_sw dstw_save = dstw;
2676 CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type));
2677 SLJIT_UNUSED_ARG(srcw);
2679 if (dst == SLJIT_UNUSED)
2680 return SLJIT_SUCCESS;
2682 ADJUST_LOCAL_OFFSET(dst, dstw);
2683 CHECK_EXTRA_REGS(dst, dstw, (void)0);
2684 if (SLJIT_UNLIKELY(compiler->flags_saved))
2685 FAIL_IF(emit_restore_flags(compiler, op & SLJIT_KEEP_FLAGS));
2688 /* setcc = jcc + 0x10. */
2689 cond_set = get_jump_code(type) + 0x10;
2691 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2692 if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst) && dst == src) {
2693 inst = (sljit_ub*)ensure_buf(compiler, 1 + 4 + 3);
2696 /* Set low register to conditional flag. */
2697 *inst++ = (reg_map[TMP_REG1] <= 7) ? REX : REX_B;
2700 *inst++ = MOD_REG | reg_lmap[TMP_REG1];
2701 *inst++ = REX | (reg_map[TMP_REG1] <= 7 ? 0 : REX_R) | (reg_map[dst] <= 7 ? 0 : REX_B);
2702 *inst++ = OR_rm8_r8;
2703 *inst++ = MOD_REG | (reg_lmap[TMP_REG1] << 3) | reg_lmap[dst];
2704 return SLJIT_SUCCESS;
2707 reg = (op == SLJIT_MOV && FAST_IS_REG(dst)) ? dst : TMP_REG1;
2709 inst = (sljit_ub*)ensure_buf(compiler, 1 + 4 + 4);
2712 /* Set low register to conditional flag. */
2713 *inst++ = (reg_map[reg] <= 7) ? REX : REX_B;
2716 *inst++ = MOD_REG | reg_lmap[reg];
2717 *inst++ = REX_W | (reg_map[reg] <= 7 ? 0 : (REX_B | REX_R));
2719 *inst++ = MOVZX_r_rm8;
2720 *inst = MOD_REG | (reg_lmap[reg] << 3) | reg_lmap[reg];
2722 if (reg != TMP_REG1)
2723 return SLJIT_SUCCESS;
2725 if (GET_OPCODE(op) < SLJIT_ADD) {
2726 compiler->mode32 = GET_OPCODE(op) != SLJIT_MOV;
2727 return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
2729 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
2730 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
2731 compiler->skip_checks = 1;
2733 return sljit_emit_op2(compiler, op, dst, dstw, dst, dstw, TMP_REG1, 0);
2734 #else /* SLJIT_CONFIG_X86_64 */
2735 if (GET_OPCODE(op) < SLJIT_ADD && FAST_IS_REG(dst)) {
2736 if (reg_map[dst] <= 4) {
2737 /* Low byte is accessible. */
2738 inst = (sljit_ub*)ensure_buf(compiler, 1 + 3 + 3);
2741 /* Set low byte to conditional flag. */
2744 *inst++ = MOD_REG | reg_map[dst];
2747 *inst++ = MOVZX_r_rm8;
2748 *inst = MOD_REG | (reg_map[dst] << 3) | reg_map[dst];
2749 return SLJIT_SUCCESS;
2752 /* Low byte is not accessible. */
2753 if (cpu_has_cmov == -1)
2757 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, 1);
2758 /* a xor reg, reg operation would overwrite the flags. */
2759 EMIT_MOV(compiler, dst, 0, SLJIT_IMM, 0);
2761 inst = (sljit_ub*)ensure_buf(compiler, 1 + 3);
2766 /* cmovcc = setcc - 0x50. */
2767 *inst++ = cond_set - 0x50;
2768 *inst++ = MOD_REG | (reg_map[dst] << 3) | reg_map[TMP_REG1];
2769 return SLJIT_SUCCESS;
2772 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1 + 3 + 3 + 1);
2774 INC_SIZE(1 + 3 + 3 + 1);
2775 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2776 /* Set al to conditional flag. */
2779 *inst++ = MOD_REG | 0 /* eax */;
2782 *inst++ = MOVZX_r_rm8;
2783 *inst++ = MOD_REG | (reg_map[dst] << 3) | 0 /* eax */;
2784 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2785 return SLJIT_SUCCESS;
2788 if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst) && dst == src && reg_map[dst] <= 4) {
2789 SLJIT_COMPILE_ASSERT(reg_map[SLJIT_R0] == 0, scratch_reg1_must_be_eax);
2790 if (dst != SLJIT_R0) {
2791 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1 + 3 + 2 + 1);
2793 INC_SIZE(1 + 3 + 2 + 1);
2794 /* Set low register to conditional flag. */
2795 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2798 *inst++ = MOD_REG | 0 /* eax */;
2799 *inst++ = OR_rm8_r8;
2800 *inst++ = MOD_REG | (0 /* eax */ << 3) | reg_map[dst];
2801 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2804 inst = (sljit_ub*)ensure_buf(compiler, 1 + 2 + 3 + 2 + 2);
2806 INC_SIZE(2 + 3 + 2 + 2);
2807 /* Set low register to conditional flag. */
2808 *inst++ = XCHG_r_rm;
2809 *inst++ = MOD_REG | (1 /* ecx */ << 3) | reg_map[TMP_REG1];
2812 *inst++ = MOD_REG | 1 /* ecx */;
2813 *inst++ = OR_rm8_r8;
2814 *inst++ = MOD_REG | (1 /* ecx */ << 3) | 0 /* eax */;
2815 *inst++ = XCHG_r_rm;
2816 *inst++ = MOD_REG | (1 /* ecx */ << 3) | reg_map[TMP_REG1];
2818 return SLJIT_SUCCESS;
2821 /* Set TMP_REG1 to the bit. */
2822 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1 + 3 + 3 + 1);
2824 INC_SIZE(1 + 3 + 3 + 1);
2825 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2826 /* Set al to conditional flag. */
2829 *inst++ = MOD_REG | 0 /* eax */;
2832 *inst++ = MOVZX_r_rm8;
2833 *inst++ = MOD_REG | (0 << 3) /* eax */ | 0 /* eax */;
2835 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2837 if (GET_OPCODE(op) < SLJIT_ADD)
2838 return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
2840 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
2841 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
2842 compiler->skip_checks = 1;
2844 return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REG1, 0);
2845 #endif /* SLJIT_CONFIG_X86_64 */
2848 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_local_base(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw offset)
2851 CHECK(check_sljit_get_local_base(compiler, dst, dstw, offset));
2852 ADJUST_LOCAL_OFFSET(dst, dstw);
2854 CHECK_EXTRA_REGS(dst, dstw, (void)0);
2856 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2857 compiler->mode32 = 0;
2860 ADJUST_LOCAL_OFFSET(SLJIT_MEM1(SLJIT_SP), offset);
2862 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2863 if (NOT_HALFWORD(offset)) {
2864 FAIL_IF(emit_load_imm64(compiler, TMP_REG1, offset));
2865 #if (defined SLJIT_DEBUG && SLJIT_DEBUG)
2866 SLJIT_ASSERT(emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0) != SLJIT_ERR_UNSUPPORTED);
2867 return compiler->error;
2869 return emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0);
2875 return emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_SP, 0, SLJIT_IMM, offset);
2876 return emit_mov(compiler, dst, dstw, SLJIT_SP, 0);
2879 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw init_value)
2882 struct sljit_const *const_;
2883 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2888 CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
2889 ADJUST_LOCAL_OFFSET(dst, dstw);
2891 CHECK_EXTRA_REGS(dst, dstw, (void)0);
2893 const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
2894 PTR_FAIL_IF(!const_);
2895 set_const(const_, compiler);
2897 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2898 compiler->mode32 = 0;
2899 reg = SLOW_IS_REG(dst) ? dst : TMP_REG1;
2901 if (emit_load_imm64(compiler, reg, init_value))
2904 if (dst == SLJIT_UNUSED)
2907 if (emit_mov(compiler, dst, dstw, SLJIT_IMM, init_value))
2911 inst = (sljit_ub*)ensure_buf(compiler, 2);
2917 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2918 if (dst & SLJIT_MEM)
2919 if (emit_mov(compiler, dst, dstw, TMP_REG1, 0))
2926 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr)
2928 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2929 *(sljit_sw*)addr = new_addr - (addr + 4);
2931 *(sljit_uw*)addr = new_addr;
2935 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant)
2937 *(sljit_sw*)addr = new_constant;
2940 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_x86_is_sse2_available(void)
2942 #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
2943 if (cpu_has_sse2 == -1)
2945 return cpu_has_sse2;
2951 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_x86_is_cmov_available(void)
2953 if (cpu_has_cmov == -1)
2955 return cpu_has_cmov;
2958 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_x86_emit_cmov(struct sljit_compiler *compiler,
2961 sljit_si src, sljit_sw srcw)
2966 #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
2967 CHECK_ARGUMENT(sljit_x86_is_cmov_available());
2968 CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_INT_OP)));
2969 CHECK_ARGUMENT((type & 0xff) >= SLJIT_EQUAL && (type & 0xff) <= SLJIT_D_ORDERED);
2970 CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(dst_reg & ~SLJIT_INT_OP));
2971 FUNCTION_CHECK_SRC(src, srcw);
2973 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
2974 if (SLJIT_UNLIKELY(!!compiler->verbose)) {
2975 fprintf(compiler->verbose, " x86_cmov%s %s%s, ",
2976 !(dst_reg & SLJIT_INT_OP) ? "" : ".i",
2977 JUMP_PREFIX(type), jump_names[type & 0xff]);
2978 sljit_verbose_reg(compiler, dst_reg & ~SLJIT_INT_OP);
2979 fprintf(compiler->verbose, ", ");
2980 sljit_verbose_param(compiler, src, srcw);
2981 fprintf(compiler->verbose, "\n");
2985 ADJUST_LOCAL_OFFSET(src, srcw);
2986 CHECK_EXTRA_REGS(src, srcw, (void)0);
2988 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2989 compiler->mode32 = dst_reg & SLJIT_INT_OP;
2991 dst_reg &= ~SLJIT_INT_OP;
2993 if (SLJIT_UNLIKELY(src & SLJIT_IMM)) {
2994 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcw);
2999 inst = emit_x86_instruction(compiler, 2, dst_reg, 0, src, srcw);
3002 *inst = get_jump_code(type & 0xff) - 0x40;
3003 return SLJIT_SUCCESS;