2 * Stack-less Just-In-Time compiler
4 * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
6 * Redistribution and use in source and binary forms, with or without modification, are
7 * permitted provided that the following conditions are met:
9 * 1. Redistributions of source code must retain the above copyright notice, this list of
10 * conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright notice, this list
13 * of conditions and the following disclaimer in the documentation and/or other materials
14 * provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19 * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void)
29 return "x86" SLJIT_CPUINFO;
54 8 - R8 - From now on REX prefix is required
64 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
66 /* Last register + 1. */
67 #define TMP_REG1 (SLJIT_NO_REGISTERS + 1)
69 static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 2] = {
70 0, 0, 2, 1, 0, 0, 3, 6, 7, 0, 0, 4, 5
73 #define CHECK_EXTRA_REGS(p, w, do) \
74 if (p >= SLJIT_TEMPORARY_EREG1 && p <= SLJIT_TEMPORARY_EREG2) { \
75 w = compiler->scratches_start + (p - SLJIT_TEMPORARY_EREG1) * sizeof(sljit_sw); \
76 p = SLJIT_MEM1(SLJIT_LOCALS_REG); \
79 else if (p >= SLJIT_SAVED_EREG1 && p <= SLJIT_SAVED_EREG2) { \
80 w = compiler->saveds_start + (p - SLJIT_SAVED_EREG1) * sizeof(sljit_sw); \
81 p = SLJIT_MEM1(SLJIT_LOCALS_REG); \
85 #else /* SLJIT_CONFIG_X86_32 */
87 /* Last register + 1. */
88 #define TMP_REG1 (SLJIT_NO_REGISTERS + 1)
89 #define TMP_REG2 (SLJIT_NO_REGISTERS + 2)
90 #define TMP_REG3 (SLJIT_NO_REGISTERS + 3)
92 /* Note: r12 & 0x7 == 0b100, which decoded as SIB byte present
93 Note: avoid to use r12 and r13 for memory addessing
94 therefore r12 is better for SAVED_EREG than SAVED_REG. */
96 /* 1st passed in rdi, 2nd argument passed in rsi, 3rd in rdx. */
97 static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 4] = {
98 0, 0, 6, 1, 8, 11, 3, 15, 14, 13, 12, 4, 2, 7, 9
100 /* low-map. reg_map & 0x7. */
101 static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NO_REGISTERS + 4] = {
102 0, 0, 6, 1, 0, 3, 3, 7, 6, 5, 4, 4, 2, 7, 1
105 /* 1st passed in rcx, 2nd argument passed in rdx, 3rd in r8. */
106 static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 4] = {
107 0, 0, 2, 1, 11, 13, 3, 6, 7, 14, 15, 4, 10, 8, 9
109 /* low-map. reg_map & 0x7. */
110 static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NO_REGISTERS + 4] = {
111 0, 0, 2, 1, 3, 5, 3, 6, 7, 6, 7, 4, 2, 0, 1
122 #define HALFWORD_MAX 0x7fffffffl
123 #define HALFWORD_MIN -0x80000000l
125 #define HALFWORD_MAX 0x7fffffffll
126 #define HALFWORD_MIN -0x80000000ll
129 #define IS_HALFWORD(x) ((x) <= HALFWORD_MAX && (x) >= HALFWORD_MIN)
130 #define NOT_HALFWORD(x) ((x) > HALFWORD_MAX || (x) < HALFWORD_MIN)
132 #define CHECK_EXTRA_REGS(p, w, do)
134 #endif /* SLJIT_CONFIG_X86_32 */
136 #if (defined SLJIT_SSE2 && SLJIT_SSE2)
140 /* Size flags for emit_x86_instruction: */
141 #define EX86_BIN_INS 0x0010
142 #define EX86_SHIFT_INS 0x0020
143 #define EX86_REX 0x0040
144 #define EX86_NO_REXW 0x0080
145 #define EX86_BYTE_ARG 0x0100
146 #define EX86_HALF_ARG 0x0200
147 #define EX86_PREF_66 0x0400
149 #if (defined SLJIT_SSE2 && SLJIT_SSE2)
150 #define EX86_SSE2 0x0800
151 #define EX86_PREF_F2 0x1000
152 #define EX86_PREF_F3 0x2000
155 /* --------------------------------------------------------------------- */
156 /* Instrucion forms */
157 /* --------------------------------------------------------------------- */
159 #define ADD (/* BINARY */ 0 << 3)
160 #define ADD_EAX_i32 0x05
161 #define ADD_r_rm 0x03
162 #define ADD_rm_r 0x01
163 #define ADDSD_x_xm 0x58
164 #define ADC (/* BINARY */ 2 << 3)
165 #define ADC_EAX_i32 0x15
166 #define ADC_r_rm 0x13
167 #define ADC_rm_r 0x11
168 #define AND (/* BINARY */ 4 << 3)
169 #define AND_EAX_i32 0x25
170 #define AND_r_rm 0x23
171 #define AND_rm_r 0x21
172 #define ANDPD_x_xm 0x54
173 #define BSR_r_rm (/* GROUP_0F */ 0xbd)
174 #define CALL_i32 0xe8
175 #define CALL_rm (/* GROUP_FF */ 2 << 3)
177 #define CMOVNE_r_rm (/* GROUP_0F */ 0x45)
178 #define CMP (/* BINARY */ 7 << 3)
179 #define CMP_EAX_i32 0x3d
180 #define CMP_r_rm 0x3b
181 #define CMP_rm_r 0x39
182 #define DIV (/* GROUP_F7 */ 6 << 3)
183 #define DIVSD_x_xm 0x5e
185 #define IDIV (/* GROUP_F7 */ 7 << 3)
186 #define IMUL (/* GROUP_F7 */ 5 << 3)
187 #define IMUL_r_rm (/* GROUP_0F */ 0xaf)
188 #define IMUL_r_rm_i8 0x6b
189 #define IMUL_r_rm_i32 0x69
193 #define JMP_rm (/* GROUP_FF */ 4 << 3)
195 #define MOV_r_rm 0x8b
196 #define MOV_r_i32 0xb8
197 #define MOV_rm_r 0x89
198 #define MOV_rm_i32 0xc7
199 #define MOV_rm8_i8 0xc6
200 #define MOV_rm8_r8 0x88
201 #define MOVSD_x_xm 0x10
202 #define MOVSD_xm_x 0x11
203 #define MOVSXD_r_rm 0x63
204 #define MOVSX_r_rm8 (/* GROUP_0F */ 0xbe)
205 #define MOVSX_r_rm16 (/* GROUP_0F */ 0xbf)
206 #define MOVZX_r_rm8 (/* GROUP_0F */ 0xb6)
207 #define MOVZX_r_rm16 (/* GROUP_0F */ 0xb7)
208 #define MUL (/* GROUP_F7 */ 4 << 3)
209 #define MULSD_x_xm 0x59
210 #define NEG_rm (/* GROUP_F7 */ 3 << 3)
212 #define NOT_rm (/* GROUP_F7 */ 2 << 3)
213 #define OR (/* BINARY */ 1 << 3)
215 #define OR_EAX_i32 0x0d
217 #define OR_rm8_r8 0x08
221 #define PUSH_i32 0x68
223 #define PUSH_rm (/* GROUP_FF */ 6 << 3)
225 #define RET_near 0xc3
227 #define SBB (/* BINARY */ 3 << 3)
228 #define SBB_EAX_i32 0x1d
229 #define SBB_r_rm 0x1b
230 #define SBB_rm_r 0x19
231 #define SAR (/* SHIFT */ 7 << 3)
232 #define SHL (/* SHIFT */ 4 << 3)
233 #define SHR (/* SHIFT */ 5 << 3)
234 #define SUB (/* BINARY */ 5 << 3)
235 #define SUB_EAX_i32 0x2d
236 #define SUB_r_rm 0x2b
237 #define SUB_rm_r 0x29
238 #define SUBSD_x_xm 0x5c
239 #define TEST_EAX_i32 0xa9
240 #define TEST_rm_r 0x85
241 #define UCOMISD_x_xm 0x2e
242 #define XCHG_EAX_r 0x90
243 #define XCHG_r_rm 0x87
244 #define XOR (/* BINARY */ 6 << 3)
245 #define XOR_EAX_i32 0x35
246 #define XOR_r_rm 0x33
247 #define XOR_rm_r 0x31
248 #define XORPD_x_xm 0x57
250 #define GROUP_0F 0x0f
251 #define GROUP_F7 0xf7
252 #define GROUP_FF 0xff
253 #define GROUP_BINARY_81 0x81
254 #define GROUP_BINARY_83 0x83
255 #define GROUP_SHIFT_1 0xd1
256 #define GROUP_SHIFT_N 0xc1
257 #define GROUP_SHIFT_CL 0xd3
260 #define MOD_DISP8 0x40
262 #define INC_SIZE(s) (*inst++ = (s), compiler->size += (s))
264 #define PUSH_REG(r) (*inst++ = (PUSH_r + (r)))
265 #define POP_REG(r) (*inst++ = (POP_r + (r)))
266 #define RET() (*inst++ = (RET_near))
267 #define RET_I16(n) (*inst++ = (RET_i16), *inst++ = n, *inst++ = 0)
269 #define MOV_RM(mod, reg, rm) (*inst++ = (MOV_r_rm), *inst++ = (mod) << 6 | (reg) << 3 | (rm))
271 /* Multithreading does not affect these static variables, since they store
272 built-in CPU features. Therefore they can be overwritten by different threads
273 if they detect the CPU features in the same time. */
274 #if (defined SLJIT_SSE2 && SLJIT_SSE2) && (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
275 static sljit_si cpu_has_sse2 = -1;
277 static sljit_si cpu_has_cmov = -1;
279 #if defined(_MSC_VER) && _MSC_VER >= 1400
283 static void get_cpu_features(void)
287 #if defined(_MSC_VER) && _MSC_VER >= 1400
291 features = (sljit_ui)CPUInfo[3];
293 #elif defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__SUNPRO_C)
298 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
299 /* On x86-32, there is no red zone, so this
300 should work (no need for a local variable). */
304 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
310 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
311 : "%eax", "%ecx", "%edx"
313 : "%rax", "%rbx", "%rcx", "%rdx"
317 #else /* _MSC_VER && _MSC_VER >= 1400 */
326 #endif /* _MSC_VER && _MSC_VER >= 1400 */
328 #if (defined SLJIT_SSE2 && SLJIT_SSE2) && (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
329 cpu_has_sse2 = (features >> 26) & 0x1;
331 cpu_has_cmov = (features >> 15) & 0x1;
334 static sljit_ub get_jump_code(sljit_si type)
338 case SLJIT_C_FLOAT_EQUAL:
339 return 0x84 /* je */;
341 case SLJIT_C_NOT_EQUAL:
342 case SLJIT_C_FLOAT_NOT_EQUAL:
343 return 0x85 /* jne */;
346 case SLJIT_C_FLOAT_LESS:
347 return 0x82 /* jc */;
349 case SLJIT_C_GREATER_EQUAL:
350 case SLJIT_C_FLOAT_GREATER_EQUAL:
351 return 0x83 /* jae */;
353 case SLJIT_C_GREATER:
354 case SLJIT_C_FLOAT_GREATER:
355 return 0x87 /* jnbe */;
357 case SLJIT_C_LESS_EQUAL:
358 case SLJIT_C_FLOAT_LESS_EQUAL:
359 return 0x86 /* jbe */;
361 case SLJIT_C_SIG_LESS:
362 return 0x8c /* jl */;
364 case SLJIT_C_SIG_GREATER_EQUAL:
365 return 0x8d /* jnl */;
367 case SLJIT_C_SIG_GREATER:
368 return 0x8f /* jnle */;
370 case SLJIT_C_SIG_LESS_EQUAL:
371 return 0x8e /* jle */;
373 case SLJIT_C_OVERFLOW:
374 case SLJIT_C_MUL_OVERFLOW:
375 return 0x80 /* jo */;
377 case SLJIT_C_NOT_OVERFLOW:
378 case SLJIT_C_MUL_NOT_OVERFLOW:
379 return 0x81 /* jno */;
381 case SLJIT_C_FLOAT_UNORDERED:
382 return 0x8a /* jp */;
384 case SLJIT_C_FLOAT_ORDERED:
385 return 0x8b /* jpo */;
390 static sljit_ub* generate_far_jump_code(struct sljit_jump *jump, sljit_ub *code_ptr, sljit_si type);
392 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
393 static sljit_ub* generate_fixed_jump(sljit_ub *code_ptr, sljit_sw addr, sljit_si type);
396 static sljit_ub* generate_near_jump_code(struct sljit_jump *jump, sljit_ub *code_ptr, sljit_ub *code, sljit_si type)
401 if (jump->flags & JUMP_LABEL)
402 label_addr = (sljit_uw)(code + jump->u.label->size);
404 label_addr = jump->u.target;
405 short_jump = (sljit_sw)(label_addr - (jump->addr + 2)) >= -128 && (sljit_sw)(label_addr - (jump->addr + 2)) <= 127;
407 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
408 if ((sljit_sw)(label_addr - (jump->addr + 1)) > HALFWORD_MAX || (sljit_sw)(label_addr - (jump->addr + 1)) < HALFWORD_MIN)
409 return generate_far_jump_code(jump, code_ptr, type);
412 if (type == SLJIT_JUMP) {
414 *code_ptr++ = JMP_i8;
416 *code_ptr++ = JMP_i32;
419 else if (type >= SLJIT_FAST_CALL) {
421 *code_ptr++ = CALL_i32;
424 else if (short_jump) {
425 *code_ptr++ = get_jump_code(type) - 0x10;
429 *code_ptr++ = GROUP_0F;
430 *code_ptr++ = get_jump_code(type);
435 jump->flags |= PATCH_MB;
436 code_ptr += sizeof(sljit_sb);
438 jump->flags |= PATCH_MW;
439 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
440 code_ptr += sizeof(sljit_sw);
442 code_ptr += sizeof(sljit_si);
449 SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
451 struct sljit_memory_fragment *buf;
458 struct sljit_label *label;
459 struct sljit_jump *jump;
460 struct sljit_const *const_;
463 check_sljit_generate_code(compiler);
464 reverse_buf(compiler);
466 /* Second code generation pass. */
467 code = (sljit_ub*)SLJIT_MALLOC_EXEC(compiler->size);
468 PTR_FAIL_WITH_EXEC_IF(code);
472 label = compiler->labels;
473 jump = compiler->jumps;
474 const_ = compiler->consts;
476 buf_ptr = buf->memory;
477 buf_end = buf_ptr + buf->used_size;
481 /* The code is already generated. */
482 SLJIT_MEMMOVE(code_ptr, buf_ptr, len);
488 jump->addr = (sljit_uw)code_ptr;
489 if (!(jump->flags & SLJIT_REWRITABLE_JUMP))
490 code_ptr = generate_near_jump_code(jump, code_ptr, code, *buf_ptr - 4);
492 code_ptr = generate_far_jump_code(jump, code_ptr, *buf_ptr - 4);
495 else if (*buf_ptr == 0) {
496 label->addr = (sljit_uw)code_ptr;
497 label->size = code_ptr - code;
500 else if (*buf_ptr == 1) {
501 const_->addr = ((sljit_uw)code_ptr) - sizeof(sljit_sw);
502 const_ = const_->next;
505 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
506 *code_ptr++ = (*buf_ptr == 2) ? CALL_i32 : JMP_i32;
508 *(sljit_sw*)code_ptr = *(sljit_sw*)buf_ptr - ((sljit_sw)code_ptr + sizeof(sljit_sw));
509 code_ptr += sizeof(sljit_sw);
510 buf_ptr += sizeof(sljit_sw) - 1;
512 code_ptr = generate_fixed_jump(code_ptr, *(sljit_sw*)(buf_ptr + 1), *buf_ptr);
513 buf_ptr += sizeof(sljit_sw);
518 } while (buf_ptr < buf_end);
519 SLJIT_ASSERT(buf_ptr == buf_end);
523 SLJIT_ASSERT(!label);
525 SLJIT_ASSERT(!const_);
527 jump = compiler->jumps;
529 if (jump->flags & PATCH_MB) {
530 SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_sb))) >= -128 && (sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_sb))) <= 127);
531 *(sljit_ub*)jump->addr = (sljit_ub)(jump->u.label->addr - (jump->addr + sizeof(sljit_sb)));
532 } else if (jump->flags & PATCH_MW) {
533 if (jump->flags & JUMP_LABEL) {
534 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
535 *(sljit_sw*)jump->addr = (sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_sw)));
537 SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_si))) >= HALFWORD_MIN && (sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_si))) <= HALFWORD_MAX);
538 *(sljit_si*)jump->addr = (sljit_si)(jump->u.label->addr - (jump->addr + sizeof(sljit_si)));
542 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
543 *(sljit_sw*)jump->addr = (sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_sw)));
545 SLJIT_ASSERT((sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_si))) >= HALFWORD_MIN && (sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_si))) <= HALFWORD_MAX);
546 *(sljit_si*)jump->addr = (sljit_si)(jump->u.target - (jump->addr + sizeof(sljit_si)));
550 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
551 else if (jump->flags & PATCH_MD)
552 *(sljit_sw*)jump->addr = jump->u.label->addr;
558 /* Maybe we waste some space because of short jumps. */
559 SLJIT_ASSERT(code_ptr <= code + compiler->size);
560 compiler->error = SLJIT_ERR_COMPILED;
561 compiler->executable_size = code_ptr - code;
565 /* --------------------------------------------------------------------- */
567 /* --------------------------------------------------------------------- */
569 static sljit_si emit_cum_binary(struct sljit_compiler *compiler,
570 sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
571 sljit_si dst, sljit_sw dstw,
572 sljit_si src1, sljit_sw src1w,
573 sljit_si src2, sljit_sw src2w);
575 static sljit_si emit_non_cum_binary(struct sljit_compiler *compiler,
576 sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
577 sljit_si dst, sljit_sw dstw,
578 sljit_si src1, sljit_sw src1w,
579 sljit_si src2, sljit_sw src2w);
581 static sljit_si emit_mov(struct sljit_compiler *compiler,
582 sljit_si dst, sljit_sw dstw,
583 sljit_si src, sljit_sw srcw);
585 static SLJIT_INLINE sljit_si emit_save_flags(struct sljit_compiler *compiler)
589 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
590 inst = (sljit_ub*)ensure_buf(compiler, 1 + 5);
594 inst = (sljit_ub*)ensure_buf(compiler, 1 + 6);
599 *inst++ = LEA_r_m; /* lea esp/rsp, [esp/rsp + sizeof(sljit_sw)] */
602 *inst++ = (sljit_ub)sizeof(sljit_sw);
604 compiler->flags_saved = 1;
605 return SLJIT_SUCCESS;
608 static SLJIT_INLINE sljit_si emit_restore_flags(struct sljit_compiler *compiler, sljit_si keep_flags)
612 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
613 inst = (sljit_ub*)ensure_buf(compiler, 1 + 5);
618 inst = (sljit_ub*)ensure_buf(compiler, 1 + 6);
624 *inst++ = LEA_r_m; /* lea esp/rsp, [esp/rsp - sizeof(sljit_sw)] */
627 *inst++ = (sljit_ub)-(sljit_sb)sizeof(sljit_sw);
628 compiler->flags_saved = keep_flags;
629 return SLJIT_SUCCESS;
635 static void SLJIT_CALL sljit_grow_stack(sljit_sw local_size)
637 /* Workaround for calling the internal _chkstk() function on Windows.
638 This function touches all 4k pages belongs to the requested stack space,
639 which size is passed in local_size. This is necessary on Windows where
640 the stack can only grow in 4k steps. However, this function just burn
641 CPU cycles if the stack is large enough. However, you don't know it in
642 advance, so it must always be called. I think this is a bad design in
643 general even if it has some reasons. */
644 *(volatile sljit_si*)alloca(local_size) = 0;
649 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
650 #include "sljitNativeX86_32.c"
652 #include "sljitNativeX86_64.c"
655 static sljit_si emit_mov(struct sljit_compiler *compiler,
656 sljit_si dst, sljit_sw dstw,
657 sljit_si src, sljit_sw srcw)
661 if (dst == SLJIT_UNUSED) {
662 /* No destination, doesn't need to setup flags. */
663 if (src & SLJIT_MEM) {
664 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src, srcw);
668 return SLJIT_SUCCESS;
670 if (FAST_IS_REG(src)) {
671 inst = emit_x86_instruction(compiler, 1, src, 0, dst, dstw);
674 return SLJIT_SUCCESS;
676 if (src & SLJIT_IMM) {
677 if (FAST_IS_REG(dst)) {
678 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
679 return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw);
681 if (!compiler->mode32) {
682 if (NOT_HALFWORD(srcw))
683 return emit_load_imm64(compiler, dst, srcw);
686 return emit_do_imm32(compiler, (reg_map[dst] >= 8) ? REX_B : 0, MOV_r_i32 + reg_lmap[dst], srcw);
689 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
690 if (!compiler->mode32 && NOT_HALFWORD(srcw)) {
691 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, srcw));
692 inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, dst, dstw);
695 return SLJIT_SUCCESS;
698 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, dstw);
701 return SLJIT_SUCCESS;
703 if (FAST_IS_REG(dst)) {
704 inst = emit_x86_instruction(compiler, 1, dst, 0, src, srcw);
707 return SLJIT_SUCCESS;
710 /* Memory to memory move. Requires two instruction. */
711 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src, srcw);
714 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
717 return SLJIT_SUCCESS;
720 #define EMIT_MOV(compiler, dst, dstw, src, srcw) \
721 FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));
723 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler, sljit_si op)
726 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
731 check_sljit_emit_op0(compiler, op);
733 switch (GET_OPCODE(op)) {
734 case SLJIT_BREAKPOINT:
735 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
741 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
750 compiler->flags_saved = 0;
751 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
753 SLJIT_COMPILE_ASSERT(
754 reg_map[SLJIT_SCRATCH_REG1] == 0
755 && reg_map[SLJIT_SCRATCH_REG2] == 2
756 && reg_map[TMP_REG1] > 7,
757 invalid_register_assignment_for_div_mul);
759 SLJIT_COMPILE_ASSERT(
760 reg_map[SLJIT_SCRATCH_REG1] == 0
761 && reg_map[SLJIT_SCRATCH_REG2] < 7
762 && reg_map[TMP_REG1] == 2,
763 invalid_register_assignment_for_div_mul);
765 compiler->mode32 = op & SLJIT_INT_OP;
769 if (op == SLJIT_UDIV) {
770 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
771 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_SCRATCH_REG2, 0);
772 inst = emit_x86_instruction(compiler, 1, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0);
774 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0);
780 if (op == SLJIT_SDIV) {
781 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
782 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_SCRATCH_REG2, 0);
785 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
786 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
791 if (compiler->mode32) {
792 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
797 inst = (sljit_ub*)ensure_buf(compiler, 1 + 2);
806 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
807 inst = (sljit_ub*)ensure_buf(compiler, 1 + 2);
811 *inst = MOD_REG | ((op >= SLJIT_UDIV) ? reg_map[TMP_REG1] : reg_map[SLJIT_SCRATCH_REG2]);
814 size = (!compiler->mode32 || op >= SLJIT_UDIV) ? 3 : 2;
816 size = (!compiler->mode32) ? 3 : 2;
818 inst = (sljit_ub*)ensure_buf(compiler, 1 + size);
822 if (!compiler->mode32)
823 *inst++ = REX_W | ((op >= SLJIT_UDIV) ? REX_B : 0);
824 else if (op >= SLJIT_UDIV)
827 *inst = MOD_REG | ((op >= SLJIT_UDIV) ? reg_lmap[TMP_REG1] : reg_lmap[SLJIT_SCRATCH_REG2]);
829 if (!compiler->mode32)
832 *inst = MOD_REG | reg_map[SLJIT_SCRATCH_REG2];
849 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && !defined(_WIN64)
850 EMIT_MOV(compiler, SLJIT_SCRATCH_REG2, 0, TMP_REG1, 0);
855 return SLJIT_SUCCESS;
858 #define ENCODE_PREFIX(prefix) \
860 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1); \
866 static sljit_si emit_mov_byte(struct sljit_compiler *compiler, sljit_si sign,
867 sljit_si dst, sljit_sw dstw,
868 sljit_si src, sljit_sw srcw)
872 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
876 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
877 compiler->mode32 = 0;
880 if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM))
881 return SLJIT_SUCCESS; /* Empty instruction. */
883 if (src & SLJIT_IMM) {
884 if (FAST_IS_REG(dst)) {
885 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
886 return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw);
888 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0);
891 return SLJIT_SUCCESS;
894 inst = emit_x86_instruction(compiler, 1 | EX86_BYTE_ARG | EX86_NO_REXW, SLJIT_IMM, srcw, dst, dstw);
897 return SLJIT_SUCCESS;
900 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
902 if ((dst & SLJIT_MEM) && FAST_IS_REG(src)) {
903 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
904 if (reg_map[src] >= 4) {
905 SLJIT_ASSERT(dst_r == TMP_REG1);
906 EMIT_MOV(compiler, TMP_REG1, 0, src, 0);
913 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
914 else if (FAST_IS_REG(src) && reg_map[src] >= 4) {
915 /* src, dst are registers. */
916 SLJIT_ASSERT(SLOW_IS_REG(dst));
917 if (reg_map[dst] < 4) {
919 EMIT_MOV(compiler, dst, 0, src, 0);
920 inst = emit_x86_instruction(compiler, 2, dst, 0, dst, 0);
923 *inst = sign ? MOVSX_r_rm8 : MOVZX_r_rm8;
927 EMIT_MOV(compiler, dst, 0, src, 0);
930 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0);
934 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0);
939 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 0xff, dst, 0);
944 return SLJIT_SUCCESS;
948 /* src can be memory addr or reg_map[src] < 4 on x86_32 architectures. */
949 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
952 *inst = sign ? MOVSX_r_rm8 : MOVZX_r_rm8;
955 if (dst & SLJIT_MEM) {
956 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
957 if (dst_r == TMP_REG1) {
958 /* Find a non-used register, whose reg_map[src] < 4. */
959 if ((dst & REG_MASK) == SLJIT_SCRATCH_REG1) {
960 if ((dst & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SCRATCH_REG2))
961 work_r = SLJIT_SCRATCH_REG3;
963 work_r = SLJIT_SCRATCH_REG2;
966 if ((dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_SCRATCH_REG1))
967 work_r = SLJIT_SCRATCH_REG1;
968 else if ((dst & REG_MASK) == SLJIT_SCRATCH_REG2)
969 work_r = SLJIT_SCRATCH_REG3;
971 work_r = SLJIT_SCRATCH_REG2;
974 if (work_r == SLJIT_SCRATCH_REG1) {
975 ENCODE_PREFIX(XCHG_EAX_r + reg_map[TMP_REG1]);
978 inst = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0);
983 inst = emit_x86_instruction(compiler, 1, work_r, 0, dst, dstw);
987 if (work_r == SLJIT_SCRATCH_REG1) {
988 ENCODE_PREFIX(XCHG_EAX_r + reg_map[TMP_REG1]);
991 inst = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0);
997 inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
1002 inst = emit_x86_instruction(compiler, 1 | EX86_REX | EX86_NO_REXW, dst_r, 0, dst, dstw);
1008 return SLJIT_SUCCESS;
1011 static sljit_si emit_mov_half(struct sljit_compiler *compiler, sljit_si sign,
1012 sljit_si dst, sljit_sw dstw,
1013 sljit_si src, sljit_sw srcw)
1018 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1019 compiler->mode32 = 0;
1022 if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM))
1023 return SLJIT_SUCCESS; /* Empty instruction. */
1025 if (src & SLJIT_IMM) {
1026 if (FAST_IS_REG(dst)) {
1027 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1028 return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw);
1030 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0);
1033 return SLJIT_SUCCESS;
1036 inst = emit_x86_instruction(compiler, 1 | EX86_HALF_ARG | EX86_NO_REXW | EX86_PREF_66, SLJIT_IMM, srcw, dst, dstw);
1039 return SLJIT_SUCCESS;
1042 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1044 if ((dst & SLJIT_MEM) && FAST_IS_REG(src))
1047 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
1050 *inst = sign ? MOVSX_r_rm16 : MOVZX_r_rm16;
1053 if (dst & SLJIT_MEM) {
1054 inst = emit_x86_instruction(compiler, 1 | EX86_NO_REXW | EX86_PREF_66, dst_r, 0, dst, dstw);
1059 return SLJIT_SUCCESS;
1062 static sljit_si emit_unary(struct sljit_compiler *compiler, sljit_ub opcode,
1063 sljit_si dst, sljit_sw dstw,
1064 sljit_si src, sljit_sw srcw)
1068 if (dst == SLJIT_UNUSED) {
1069 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1070 inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
1074 return SLJIT_SUCCESS;
1076 if (dst == src && dstw == srcw) {
1077 /* Same input and output */
1078 inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
1082 return SLJIT_SUCCESS;
1084 if (FAST_IS_REG(dst)) {
1085 EMIT_MOV(compiler, dst, 0, src, srcw);
1086 inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
1090 return SLJIT_SUCCESS;
1092 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1093 inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
1097 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1098 return SLJIT_SUCCESS;
1101 static sljit_si emit_not_with_flags(struct sljit_compiler *compiler,
1102 sljit_si dst, sljit_sw dstw,
1103 sljit_si src, sljit_sw srcw)
1107 if (dst == SLJIT_UNUSED) {
1108 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1109 inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
1113 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0);
1116 return SLJIT_SUCCESS;
1118 if (FAST_IS_REG(dst)) {
1119 EMIT_MOV(compiler, dst, 0, src, srcw);
1120 inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
1124 inst = emit_x86_instruction(compiler, 1, dst, 0, dst, 0);
1127 return SLJIT_SUCCESS;
1129 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1130 inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
1134 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0);
1137 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1138 return SLJIT_SUCCESS;
1141 static sljit_si emit_clz(struct sljit_compiler *compiler, sljit_si op_flags,
1142 sljit_si dst, sljit_sw dstw,
1143 sljit_si src, sljit_sw srcw)
1148 SLJIT_UNUSED_ARG(op_flags);
1149 if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) {
1150 /* Just set the zero flag. */
1151 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1152 inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
1156 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1157 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 31, TMP_REG1, 0);
1159 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, !(op_flags & SLJIT_INT_OP) ? 63 : 31, TMP_REG1, 0);
1163 return SLJIT_SUCCESS;
1166 if (SLJIT_UNLIKELY(src & SLJIT_IMM)) {
1167 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcw);
1172 inst = emit_x86_instruction(compiler, 2, TMP_REG1, 0, src, srcw);
1177 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1178 if (FAST_IS_REG(dst))
1181 /* Find an unused temporary register. */
1182 if ((dst & REG_MASK) != SLJIT_SCRATCH_REG1 && (dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_SCRATCH_REG1))
1183 dst_r = SLJIT_SCRATCH_REG1;
1184 else if ((dst & REG_MASK) != SLJIT_SCRATCH_REG2 && (dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_SCRATCH_REG2))
1185 dst_r = SLJIT_SCRATCH_REG2;
1187 dst_r = SLJIT_SCRATCH_REG3;
1188 EMIT_MOV(compiler, dst, dstw, dst_r, 0);
1190 EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, 32 + 31);
1192 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
1193 compiler->mode32 = 0;
1194 EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, !(op_flags & SLJIT_INT_OP) ? 64 + 63 : 32 + 31);
1195 compiler->mode32 = op_flags & SLJIT_INT_OP;
1198 if (cpu_has_cmov == -1)
1202 inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG1, 0);
1205 *inst = CMOVNE_r_rm;
1207 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1208 inst = (sljit_ub*)ensure_buf(compiler, 1 + 4);
1215 *inst++ = MOD_REG | (reg_map[dst_r] << 3) | reg_map[TMP_REG1];
1217 inst = (sljit_ub*)ensure_buf(compiler, 1 + 5);
1223 *inst++ = REX_W | (reg_map[dst_r] >= 8 ? REX_R : 0) | (reg_map[TMP_REG1] >= 8 ? REX_B : 0);
1225 *inst++ = MOD_REG | (reg_lmap[dst_r] << 3) | reg_lmap[TMP_REG1];
1229 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1230 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 31, dst_r, 0);
1232 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, !(op_flags & SLJIT_INT_OP) ? 63 : 31, dst_r, 0);
1237 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1238 if (dst & SLJIT_MEM) {
1239 inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
1244 if (dst & SLJIT_MEM)
1245 EMIT_MOV(compiler, dst, dstw, TMP_REG2, 0);
1247 return SLJIT_SUCCESS;
1250 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler, sljit_si op,
1251 sljit_si dst, sljit_sw dstw,
1252 sljit_si src, sljit_sw srcw)
1255 sljit_si update = 0;
1256 sljit_si op_flags = GET_ALL_FLAGS(op);
1257 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1258 sljit_si dst_is_ereg = 0;
1259 sljit_si src_is_ereg = 0;
1261 # define src_is_ereg 0
1265 check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw);
1266 ADJUST_LOCAL_OFFSET(dst, dstw);
1267 ADJUST_LOCAL_OFFSET(src, srcw);
1269 CHECK_EXTRA_REGS(dst, dstw, dst_is_ereg = 1);
1270 CHECK_EXTRA_REGS(src, srcw, src_is_ereg = 1);
1271 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1272 compiler->mode32 = op_flags & SLJIT_INT_OP;
1275 op = GET_OPCODE(op);
1276 if (op >= SLJIT_MOV && op <= SLJIT_MOVU_P) {
1277 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1278 compiler->mode32 = 0;
1281 if (op_flags & SLJIT_INT_OP) {
1282 if (FAST_IS_REG(src) && src == dst) {
1283 if (!TYPE_CAST_NEEDED(op))
1284 return SLJIT_SUCCESS;
1286 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1287 if (op == SLJIT_MOV_SI && (src & SLJIT_MEM))
1289 if (op == SLJIT_MOVU_SI && (src & SLJIT_MEM))
1291 if (op == SLJIT_MOV_UI && (src & SLJIT_IMM))
1293 if (op == SLJIT_MOVU_UI && (src & SLJIT_IMM))
1298 SLJIT_COMPILE_ASSERT(SLJIT_MOV + 8 == SLJIT_MOVU, movu_offset);
1299 if (op >= SLJIT_MOVU) {
1304 if (src & SLJIT_IMM) {
1307 srcw = (sljit_ub)srcw;
1310 srcw = (sljit_sb)srcw;
1313 srcw = (sljit_uh)srcw;
1316 srcw = (sljit_sh)srcw;
1318 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1320 srcw = (sljit_ui)srcw;
1323 srcw = (sljit_si)srcw;
1327 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1328 if (SLJIT_UNLIKELY(dst_is_ereg))
1329 return emit_mov(compiler, dst, dstw, src, srcw);
1333 if (SLJIT_UNLIKELY(update) && (src & SLJIT_MEM) && !src_is_ereg && (src & REG_MASK) && (srcw != 0 || (src & OFFS_REG_MASK) != 0)) {
1334 inst = emit_x86_instruction(compiler, 1, src & REG_MASK, 0, src, srcw);
1337 src &= SLJIT_MEM | 0xf;
1341 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1342 if (SLJIT_UNLIKELY(dst_is_ereg) && (!(op == SLJIT_MOV || op == SLJIT_MOV_UI || op == SLJIT_MOV_SI || op == SLJIT_MOV_P) || (src & SLJIT_MEM))) {
1343 SLJIT_ASSERT(dst == SLJIT_MEM1(SLJIT_LOCALS_REG));
1351 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1355 FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));
1358 FAIL_IF(emit_mov_byte(compiler, 0, dst, dstw, src, srcw));
1361 FAIL_IF(emit_mov_byte(compiler, 1, dst, dstw, src, srcw));
1364 FAIL_IF(emit_mov_half(compiler, 0, dst, dstw, src, srcw));
1367 FAIL_IF(emit_mov_half(compiler, 1, dst, dstw, src, srcw));
1369 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1371 FAIL_IF(emit_mov_int(compiler, 0, dst, dstw, src, srcw));
1374 FAIL_IF(emit_mov_int(compiler, 1, dst, dstw, src, srcw));
1379 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1380 if (SLJIT_UNLIKELY(dst_is_ereg) && dst == TMP_REG1)
1381 return emit_mov(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), dstw, TMP_REG1, 0);
1384 if (SLJIT_UNLIKELY(update) && (dst & SLJIT_MEM) && (dst & REG_MASK) && (dstw != 0 || (dst & OFFS_REG_MASK) != 0)) {
1385 inst = emit_x86_instruction(compiler, 1, dst & REG_MASK, 0, dst, dstw);
1389 return SLJIT_SUCCESS;
1392 if (SLJIT_UNLIKELY(GET_FLAGS(op_flags)))
1393 compiler->flags_saved = 0;
1397 if (SLJIT_UNLIKELY(op_flags & SLJIT_SET_E))
1398 return emit_not_with_flags(compiler, dst, dstw, src, srcw);
1399 return emit_unary(compiler, NOT_rm, dst, dstw, src, srcw);
1402 if (SLJIT_UNLIKELY(op_flags & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
1403 FAIL_IF(emit_save_flags(compiler));
1404 return emit_unary(compiler, NEG_rm, dst, dstw, src, srcw);
1407 if (SLJIT_UNLIKELY(op_flags & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
1408 FAIL_IF(emit_save_flags(compiler));
1409 return emit_clz(compiler, op_flags, dst, dstw, src, srcw);
1412 return SLJIT_SUCCESS;
1414 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1419 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1421 #define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \
1422 if (IS_HALFWORD(immw) || compiler->mode32) { \
1423 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \
1425 *(inst + 1) |= (op_imm); \
1428 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, immw)); \
1429 inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, arg, argw); \
1434 #define BINARY_EAX_IMM(op_eax_imm, immw) \
1435 FAIL_IF(emit_do_imm32(compiler, (!compiler->mode32) ? REX_W : 0, (op_eax_imm), immw))
1439 #define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \
1440 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \
1442 *(inst + 1) |= (op_imm);
1444 #define BINARY_EAX_IMM(op_eax_imm, immw) \
1445 FAIL_IF(emit_do_imm(compiler, (op_eax_imm), immw))
1449 static sljit_si emit_cum_binary(struct sljit_compiler *compiler,
1450 sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
1451 sljit_si dst, sljit_sw dstw,
1452 sljit_si src1, sljit_sw src1w,
1453 sljit_si src2, sljit_sw src2w)
1457 if (dst == SLJIT_UNUSED) {
1458 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1459 if (src2 & SLJIT_IMM) {
1460 BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
1463 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1467 return SLJIT_SUCCESS;
1470 if (dst == src1 && dstw == src1w) {
1471 if (src2 & SLJIT_IMM) {
1472 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1473 if ((dst == SLJIT_SCRATCH_REG1) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1475 if ((dst == SLJIT_SCRATCH_REG1) && (src2w > 127 || src2w < -128)) {
1477 BINARY_EAX_IMM(op_eax_imm, src2w);
1480 BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);
1483 else if (FAST_IS_REG(dst)) {
1484 inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);
1488 else if (FAST_IS_REG(src2)) {
1489 /* Special exception for sljit_emit_op_flags. */
1490 inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);
1495 EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w);
1496 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
1500 return SLJIT_SUCCESS;
1503 /* Only for cumulative operations. */
1504 if (dst == src2 && dstw == src2w) {
1505 if (src1 & SLJIT_IMM) {
1506 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1507 if ((dst == SLJIT_SCRATCH_REG1) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
1509 if ((dst == SLJIT_SCRATCH_REG1) && (src1w > 127 || src1w < -128)) {
1511 BINARY_EAX_IMM(op_eax_imm, src1w);
1514 BINARY_IMM(op_imm, op_mr, src1w, dst, dstw);
1517 else if (FAST_IS_REG(dst)) {
1518 inst = emit_x86_instruction(compiler, 1, dst, dstw, src1, src1w);
1522 else if (FAST_IS_REG(src1)) {
1523 inst = emit_x86_instruction(compiler, 1, src1, src1w, dst, dstw);
1528 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1529 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
1533 return SLJIT_SUCCESS;
1536 /* General version. */
1537 if (FAST_IS_REG(dst)) {
1538 EMIT_MOV(compiler, dst, 0, src1, src1w);
1539 if (src2 & SLJIT_IMM) {
1540 BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
1543 inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);
1549 /* This version requires less memory writing. */
1550 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1551 if (src2 & SLJIT_IMM) {
1552 BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
1555 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1559 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1562 return SLJIT_SUCCESS;
1565 static sljit_si emit_non_cum_binary(struct sljit_compiler *compiler,
1566 sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
1567 sljit_si dst, sljit_sw dstw,
1568 sljit_si src1, sljit_sw src1w,
1569 sljit_si src2, sljit_sw src2w)
1573 if (dst == SLJIT_UNUSED) {
1574 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1575 if (src2 & SLJIT_IMM) {
1576 BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
1579 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1583 return SLJIT_SUCCESS;
1586 if (dst == src1 && dstw == src1w) {
1587 if (src2 & SLJIT_IMM) {
1588 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1589 if ((dst == SLJIT_SCRATCH_REG1) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1591 if ((dst == SLJIT_SCRATCH_REG1) && (src2w > 127 || src2w < -128)) {
1593 BINARY_EAX_IMM(op_eax_imm, src2w);
1596 BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);
1599 else if (FAST_IS_REG(dst)) {
1600 inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);
1604 else if (FAST_IS_REG(src2)) {
1605 inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);
1610 EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w);
1611 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
1615 return SLJIT_SUCCESS;
1618 /* General version. */
1619 if (FAST_IS_REG(dst) && dst != src2) {
1620 EMIT_MOV(compiler, dst, 0, src1, src1w);
1621 if (src2 & SLJIT_IMM) {
1622 BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
1625 inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);
1631 /* This version requires less memory writing. */
1632 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1633 if (src2 & SLJIT_IMM) {
1634 BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
1637 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1641 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1644 return SLJIT_SUCCESS;
1647 static sljit_si emit_mul(struct sljit_compiler *compiler,
1648 sljit_si dst, sljit_sw dstw,
1649 sljit_si src1, sljit_sw src1w,
1650 sljit_si src2, sljit_sw src2w)
1655 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1657 /* Register destination. */
1658 if (dst_r == src1 && !(src2 & SLJIT_IMM)) {
1659 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w);
1664 else if (dst_r == src2 && !(src1 & SLJIT_IMM)) {
1665 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src1, src1w);
1670 else if (src1 & SLJIT_IMM) {
1671 if (src2 & SLJIT_IMM) {
1672 EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, src2w);
1677 if (src1w <= 127 && src1w >= -128) {
1678 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
1680 *inst = IMUL_r_rm_i8;
1681 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
1684 *inst = (sljit_sb)src1w;
1686 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1688 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
1690 *inst = IMUL_r_rm_i32;
1691 inst = (sljit_ub*)ensure_buf(compiler, 1 + 4);
1694 *(sljit_sw*)inst = src1w;
1697 else if (IS_HALFWORD(src1w)) {
1698 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
1700 *inst = IMUL_r_rm_i32;
1701 inst = (sljit_ub*)ensure_buf(compiler, 1 + 4);
1704 *(sljit_si*)inst = (sljit_si)src1w;
1707 EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src1w);
1709 EMIT_MOV(compiler, dst_r, 0, src2, src2w);
1710 inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
1717 else if (src2 & SLJIT_IMM) {
1718 /* Note: src1 is NOT immediate. */
1720 if (src2w <= 127 && src2w >= -128) {
1721 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
1723 *inst = IMUL_r_rm_i8;
1724 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
1727 *inst = (sljit_sb)src2w;
1729 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1731 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
1733 *inst = IMUL_r_rm_i32;
1734 inst = (sljit_ub*)ensure_buf(compiler, 1 + 4);
1737 *(sljit_sw*)inst = src2w;
1740 else if (IS_HALFWORD(src2w)) {
1741 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
1743 *inst = IMUL_r_rm_i32;
1744 inst = (sljit_ub*)ensure_buf(compiler, 1 + 4);
1747 *(sljit_si*)inst = (sljit_si)src2w;
1750 EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src1w);
1752 EMIT_MOV(compiler, dst_r, 0, src1, src1w);
1753 inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
1761 /* Neither argument is immediate. */
1762 if (ADDRESSING_DEPENDS_ON(src2, dst_r))
1764 EMIT_MOV(compiler, dst_r, 0, src1, src1w);
1765 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w);
1771 if (dst_r == TMP_REG1)
1772 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1774 return SLJIT_SUCCESS;
1777 static sljit_si emit_lea_binary(struct sljit_compiler *compiler, sljit_si keep_flags,
1778 sljit_si dst, sljit_sw dstw,
1779 sljit_si src1, sljit_sw src1w,
1780 sljit_si src2, sljit_sw src2w)
1783 sljit_si dst_r, done = 0;
1785 /* These cases better be left to handled by normal way. */
1787 if (dst == src1 && dstw == src1w)
1788 return SLJIT_ERR_UNSUPPORTED;
1789 if (dst == src2 && dstw == src2w)
1790 return SLJIT_ERR_UNSUPPORTED;
1793 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1795 if (FAST_IS_REG(src1)) {
1796 if (FAST_IS_REG(src2)) {
1797 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM2(src1, src2), 0);
1802 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1803 if ((src2 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1804 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), (sljit_si)src2w);
1806 if (src2 & SLJIT_IMM) {
1807 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), src2w);
1814 else if (FAST_IS_REG(src2)) {
1815 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1816 if ((src1 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src1w))) {
1817 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), (sljit_si)src1w);
1819 if (src1 & SLJIT_IMM) {
1820 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), src1w);
1829 if (dst_r == TMP_REG1)
1830 return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
1831 return SLJIT_SUCCESS;
1833 return SLJIT_ERR_UNSUPPORTED;
1836 static sljit_si emit_cmp_binary(struct sljit_compiler *compiler,
1837 sljit_si src1, sljit_sw src1w,
1838 sljit_si src2, sljit_sw src2w)
1842 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1843 if (src1 == SLJIT_SCRATCH_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1845 if (src1 == SLJIT_SCRATCH_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) {
1847 BINARY_EAX_IMM(CMP_EAX_i32, src2w);
1848 return SLJIT_SUCCESS;
1851 if (FAST_IS_REG(src1)) {
1852 if (src2 & SLJIT_IMM) {
1853 BINARY_IMM(CMP, CMP_rm_r, src2w, src1, 0);
1856 inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);
1860 return SLJIT_SUCCESS;
1863 if (FAST_IS_REG(src2) && !(src1 & SLJIT_IMM)) {
1864 inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
1867 return SLJIT_SUCCESS;
1870 if (src2 & SLJIT_IMM) {
1871 if (src1 & SLJIT_IMM) {
1872 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1876 BINARY_IMM(CMP, CMP_rm_r, src2w, src1, src1w);
1879 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1880 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1884 return SLJIT_SUCCESS;
1887 static sljit_si emit_test_binary(struct sljit_compiler *compiler,
1888 sljit_si src1, sljit_sw src1w,
1889 sljit_si src2, sljit_sw src2w)
1893 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1894 if (src1 == SLJIT_SCRATCH_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1896 if (src1 == SLJIT_SCRATCH_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) {
1898 BINARY_EAX_IMM(TEST_EAX_i32, src2w);
1899 return SLJIT_SUCCESS;
1902 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1903 if (src2 == SLJIT_SCRATCH_REG1 && (src2 & SLJIT_IMM) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
1905 if (src2 == SLJIT_SCRATCH_REG1 && (src1 & SLJIT_IMM) && (src1w > 127 || src1w < -128)) {
1907 BINARY_EAX_IMM(TEST_EAX_i32, src1w);
1908 return SLJIT_SUCCESS;
1911 if (FAST_IS_REG(src1)) {
1912 if (src2 & SLJIT_IMM) {
1913 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1914 if (IS_HALFWORD(src2w) || compiler->mode32) {
1915 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, 0);
1920 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
1921 inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src1, 0);
1926 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, 0);
1932 inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);
1936 return SLJIT_SUCCESS;
1939 if (FAST_IS_REG(src2)) {
1940 if (src1 & SLJIT_IMM) {
1941 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1942 if (IS_HALFWORD(src1w) || compiler->mode32) {
1943 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src1w, src2, 0);
1948 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src1w));
1949 inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src2, 0);
1954 inst = emit_x86_instruction(compiler, 1, src1, src1w, src2, 0);
1960 inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
1964 return SLJIT_SUCCESS;
1967 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1968 if (src2 & SLJIT_IMM) {
1969 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1970 if (IS_HALFWORD(src2w) || compiler->mode32) {
1971 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0);
1976 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
1977 inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, TMP_REG1, 0);
1982 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0);
1988 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1992 return SLJIT_SUCCESS;
1995 static sljit_si emit_shift(struct sljit_compiler *compiler,
1997 sljit_si dst, sljit_sw dstw,
1998 sljit_si src1, sljit_sw src1w,
1999 sljit_si src2, sljit_sw src2w)
2003 if ((src2 & SLJIT_IMM) || (src2 == SLJIT_PREF_SHIFT_REG)) {
2004 if (dst == src1 && dstw == src1w) {
2005 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, dstw);
2008 return SLJIT_SUCCESS;
2010 if (dst == SLJIT_UNUSED) {
2011 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2012 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REG1, 0);
2015 return SLJIT_SUCCESS;
2017 if (dst == SLJIT_PREF_SHIFT_REG && src2 == SLJIT_PREF_SHIFT_REG) {
2018 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2019 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2022 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2023 return SLJIT_SUCCESS;
2025 if (FAST_IS_REG(dst)) {
2026 EMIT_MOV(compiler, dst, 0, src1, src1w);
2027 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, 0);
2030 return SLJIT_SUCCESS;
2033 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2034 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REG1, 0);
2037 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
2038 return SLJIT_SUCCESS;
2041 if (dst == SLJIT_PREF_SHIFT_REG) {
2042 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2043 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
2044 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2047 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2049 else if (FAST_IS_REG(dst) && dst != src2 && !ADDRESSING_DEPENDS_ON(src2, dst)) {
2051 EMIT_MOV(compiler, dst, 0, src1, src1w);
2052 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_PREF_SHIFT_REG, 0);
2053 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
2054 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, dst, 0);
2057 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2060 /* This case is really difficult, since ecx itself may used for
2061 addressing, and we must ensure to work even in that case. */
2062 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2063 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2064 EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_PREF_SHIFT_REG, 0);
2066 /* [esp+0] contains the flags. */
2067 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), sizeof(sljit_sw), SLJIT_PREF_SHIFT_REG, 0);
2069 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
2070 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2073 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2074 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG2, 0);
2076 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), sizeof(sljit_sw));
2078 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
2081 return SLJIT_SUCCESS;
2084 static sljit_si emit_shift_with_flags(struct sljit_compiler *compiler,
2085 sljit_ub mode, sljit_si set_flags,
2086 sljit_si dst, sljit_sw dstw,
2087 sljit_si src1, sljit_sw src1w,
2088 sljit_si src2, sljit_sw src2w)
2090 /* The CPU does not set flags if the shift count is 0. */
2091 if (src2 & SLJIT_IMM) {
2092 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2093 if ((src2w & 0x3f) != 0 || (compiler->mode32 && (src2w & 0x1f) != 0))
2094 return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
2096 if ((src2w & 0x1f) != 0)
2097 return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
2100 return emit_mov(compiler, dst, dstw, src1, src1w);
2101 /* OR dst, src, 0 */
2102 return emit_cum_binary(compiler, OR_r_rm, OR_rm_r, OR, OR_EAX_i32,
2103 dst, dstw, src1, src1w, SLJIT_IMM, 0);
2107 return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
2109 if (!FAST_IS_REG(dst))
2110 FAIL_IF(emit_cmp_binary(compiler, src1, src1w, SLJIT_IMM, 0));
2112 FAIL_IF(emit_shift(compiler,mode, dst, dstw, src1, src1w, src2, src2w));
2114 if (FAST_IS_REG(dst))
2115 return emit_cmp_binary(compiler, dst, dstw, SLJIT_IMM, 0);
2116 return SLJIT_SUCCESS;
2119 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler, sljit_si op,
2120 sljit_si dst, sljit_sw dstw,
2121 sljit_si src1, sljit_sw src1w,
2122 sljit_si src2, sljit_sw src2w)
2125 check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
2126 ADJUST_LOCAL_OFFSET(dst, dstw);
2127 ADJUST_LOCAL_OFFSET(src1, src1w);
2128 ADJUST_LOCAL_OFFSET(src2, src2w);
2130 CHECK_EXTRA_REGS(dst, dstw, (void)0);
2131 CHECK_EXTRA_REGS(src1, src1w, (void)0);
2132 CHECK_EXTRA_REGS(src2, src2w, (void)0);
2133 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2134 compiler->mode32 = op & SLJIT_INT_OP;
2137 if (GET_OPCODE(op) >= SLJIT_MUL) {
2138 if (SLJIT_UNLIKELY(GET_FLAGS(op)))
2139 compiler->flags_saved = 0;
2140 else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
2141 FAIL_IF(emit_save_flags(compiler));
2144 switch (GET_OPCODE(op)) {
2146 if (!GET_FLAGS(op)) {
2147 if (emit_lea_binary(compiler, op & SLJIT_KEEP_FLAGS, dst, dstw, src1, src1w, src2, src2w) != SLJIT_ERR_UNSUPPORTED)
2148 return compiler->error;
2151 compiler->flags_saved = 0;
2152 if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
2153 FAIL_IF(emit_save_flags(compiler));
2154 return emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32,
2155 dst, dstw, src1, src1w, src2, src2w);
2157 if (SLJIT_UNLIKELY(compiler->flags_saved)) /* C flag must be restored. */
2158 FAIL_IF(emit_restore_flags(compiler, 1));
2159 else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS))
2160 FAIL_IF(emit_save_flags(compiler));
2161 if (SLJIT_UNLIKELY(GET_FLAGS(op)))
2162 compiler->flags_saved = 0;
2163 return emit_cum_binary(compiler, ADC_r_rm, ADC_rm_r, ADC, ADC_EAX_i32,
2164 dst, dstw, src1, src1w, src2, src2w);
2166 if (!GET_FLAGS(op)) {
2167 if ((src2 & SLJIT_IMM) && emit_lea_binary(compiler, op & SLJIT_KEEP_FLAGS, dst, dstw, src1, src1w, SLJIT_IMM, -src2w) != SLJIT_ERR_UNSUPPORTED)
2168 return compiler->error;
2171 compiler->flags_saved = 0;
2172 if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
2173 FAIL_IF(emit_save_flags(compiler));
2174 if (dst == SLJIT_UNUSED)
2175 return emit_cmp_binary(compiler, src1, src1w, src2, src2w);
2176 return emit_non_cum_binary(compiler, SUB_r_rm, SUB_rm_r, SUB, SUB_EAX_i32,
2177 dst, dstw, src1, src1w, src2, src2w);
2179 if (SLJIT_UNLIKELY(compiler->flags_saved)) /* C flag must be restored. */
2180 FAIL_IF(emit_restore_flags(compiler, 1));
2181 else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS))
2182 FAIL_IF(emit_save_flags(compiler));
2183 if (SLJIT_UNLIKELY(GET_FLAGS(op)))
2184 compiler->flags_saved = 0;
2185 return emit_non_cum_binary(compiler, SBB_r_rm, SBB_rm_r, SBB, SBB_EAX_i32,
2186 dst, dstw, src1, src1w, src2, src2w);
2188 return emit_mul(compiler, dst, dstw, src1, src1w, src2, src2w);
2190 if (dst == SLJIT_UNUSED)
2191 return emit_test_binary(compiler, src1, src1w, src2, src2w);
2192 return emit_cum_binary(compiler, AND_r_rm, AND_rm_r, AND, AND_EAX_i32,
2193 dst, dstw, src1, src1w, src2, src2w);
2195 return emit_cum_binary(compiler, OR_r_rm, OR_rm_r, OR, OR_EAX_i32,
2196 dst, dstw, src1, src1w, src2, src2w);
2198 return emit_cum_binary(compiler, XOR_r_rm, XOR_rm_r, XOR, XOR_EAX_i32,
2199 dst, dstw, src1, src1w, src2, src2w);
2201 return emit_shift_with_flags(compiler, SHL, GET_FLAGS(op),
2202 dst, dstw, src1, src1w, src2, src2w);
2204 return emit_shift_with_flags(compiler, SHR, GET_FLAGS(op),
2205 dst, dstw, src1, src1w, src2, src2w);
2207 return emit_shift_with_flags(compiler, SAR, GET_FLAGS(op),
2208 dst, dstw, src1, src1w, src2, src2w);
2211 return SLJIT_SUCCESS;
2214 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg)
2216 check_sljit_get_register_index(reg);
2217 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2218 if (reg == SLJIT_TEMPORARY_EREG1 || reg == SLJIT_TEMPORARY_EREG2
2219 || reg == SLJIT_SAVED_EREG1 || reg == SLJIT_SAVED_EREG2)
2222 return reg_map[reg];
2225 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_float_register_index(sljit_si reg)
2227 check_sljit_get_float_register_index(reg);
2231 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *compiler,
2232 void *instruction, sljit_si size)
2237 check_sljit_emit_op_custom(compiler, instruction, size);
2238 SLJIT_ASSERT(size > 0 && size < 16);
2240 inst = (sljit_ub*)ensure_buf(compiler, 1 + size);
2243 SLJIT_MEMMOVE(inst, instruction, size);
2244 return SLJIT_SUCCESS;
2247 /* --------------------------------------------------------------------- */
2248 /* Floating point operators */
2249 /* --------------------------------------------------------------------- */
2251 #if (defined SLJIT_SSE2 && SLJIT_SSE2)
2253 /* Alignment + 2 * 16 bytes. */
2254 static sljit_si sse2_data[3 + (4 + 4) * 2];
2255 static sljit_si *sse2_buffer;
2257 static void init_compiler(void)
2259 sse2_buffer = (sljit_si*)(((sljit_uw)sse2_data + 15) & ~0xf);
2260 /* Single precision constants. */
2261 sse2_buffer[0] = 0x80000000;
2262 sse2_buffer[4] = 0x7fffffff;
2263 /* Double precision constants. */
2265 sse2_buffer[9] = 0x80000000;
2266 sse2_buffer[12] = 0xffffffff;
2267 sse2_buffer[13] = 0x7fffffff;
2272 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void)
2274 #ifdef SLJIT_IS_FPU_AVAILABLE
2275 return SLJIT_IS_FPU_AVAILABLE;
2276 #elif (defined SLJIT_SSE2 && SLJIT_SSE2)
2277 #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
2278 if (cpu_has_sse2 == -1)
2280 return cpu_has_sse2;
2281 #else /* SLJIT_DETECT_SSE2 */
2283 #endif /* SLJIT_DETECT_SSE2 */
2284 #else /* SLJIT_SSE2 */
2289 #if (defined SLJIT_SSE2 && SLJIT_SSE2)
2291 static sljit_si emit_sse2(struct sljit_compiler *compiler, sljit_ub opcode,
2292 sljit_si single, sljit_si xmm1, sljit_si xmm2, sljit_sw xmm2w)
2296 inst = emit_x86_instruction(compiler, 2 | (single ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, xmm1, 0, xmm2, xmm2w);
2300 return SLJIT_SUCCESS;
2303 static sljit_si emit_sse2_logic(struct sljit_compiler *compiler, sljit_ub opcode,
2304 sljit_si pref66, sljit_si xmm1, sljit_si xmm2, sljit_sw xmm2w)
2308 inst = emit_x86_instruction(compiler, 2 | (pref66 ? EX86_PREF_66 : 0) | EX86_SSE2, xmm1, 0, xmm2, xmm2w);
2312 return SLJIT_SUCCESS;
2315 static SLJIT_INLINE sljit_si emit_sse2_load(struct sljit_compiler *compiler,
2316 sljit_si single, sljit_si dst, sljit_si src, sljit_sw srcw)
2318 return emit_sse2(compiler, MOVSD_x_xm, single, dst, src, srcw);
2321 static SLJIT_INLINE sljit_si emit_sse2_store(struct sljit_compiler *compiler,
2322 sljit_si single, sljit_si dst, sljit_sw dstw, sljit_si src)
2324 return emit_sse2(compiler, MOVSD_xm_x, single, src, dst, dstw);
2327 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op,
2328 sljit_si dst, sljit_sw dstw,
2329 sljit_si src, sljit_sw srcw)
2334 check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw);
2336 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2337 compiler->mode32 = 1;
2340 if (GET_OPCODE(op) == SLJIT_CMPD) {
2341 compiler->flags_saved = 0;
2342 if (FAST_IS_REG(dst))
2346 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, dst_r, dst, dstw));
2348 return emit_sse2_logic(compiler, UCOMISD_x_xm, !(op & SLJIT_SINGLE_OP), dst_r, src, srcw);
2351 if (op == SLJIT_MOVD) {
2352 if (FAST_IS_REG(dst))
2353 return emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, dst, src, srcw);
2354 if (FAST_IS_REG(src))
2355 return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, src);
2356 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, TMP_FREG, src, srcw));
2357 return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, TMP_FREG);
2360 if (SLOW_IS_REG(dst)) {
2363 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, dst_r, src, srcw));
2367 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, dst_r, src, srcw));
2370 switch (GET_OPCODE(op)) {
2372 FAIL_IF(emit_sse2_logic(compiler, XORPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_SINGLE_OP ? sse2_buffer : sse2_buffer + 8)));
2376 FAIL_IF(emit_sse2_logic(compiler, ANDPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_SINGLE_OP ? sse2_buffer + 4 : sse2_buffer + 12)));
2380 if (dst_r == TMP_FREG)
2381 return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, TMP_FREG);
2382 return SLJIT_SUCCESS;
2385 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compiler, sljit_si op,
2386 sljit_si dst, sljit_sw dstw,
2387 sljit_si src1, sljit_sw src1w,
2388 sljit_si src2, sljit_sw src2w)
2393 check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
2395 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2396 compiler->mode32 = 1;
2399 if (FAST_IS_REG(dst)) {
2402 ; /* Do nothing here. */
2403 else if (dst == src2 && (op == SLJIT_ADDD || op == SLJIT_MULD)) {
2404 /* Swap arguments. */
2408 else if (dst != src2)
2409 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, dst_r, src1, src1w));
2412 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, TMP_FREG, src1, src1w));
2417 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, TMP_FREG, src1, src1w));
2420 switch (GET_OPCODE(op)) {
2422 FAIL_IF(emit_sse2(compiler, ADDSD_x_xm, op & SLJIT_SINGLE_OP, dst_r, src2, src2w));
2426 FAIL_IF(emit_sse2(compiler, SUBSD_x_xm, op & SLJIT_SINGLE_OP, dst_r, src2, src2w));
2430 FAIL_IF(emit_sse2(compiler, MULSD_x_xm, op & SLJIT_SINGLE_OP, dst_r, src2, src2w));
2434 FAIL_IF(emit_sse2(compiler, DIVSD_x_xm, op & SLJIT_SINGLE_OP, dst_r, src2, src2w));
2438 if (dst_r == TMP_FREG)
2439 return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, TMP_FREG);
2440 return SLJIT_SUCCESS;
2445 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op,
2446 sljit_si dst, sljit_sw dstw,
2447 sljit_si src, sljit_sw srcw)
2450 /* Should cause an assertion fail. */
2451 check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw);
2452 compiler->error = SLJIT_ERR_UNSUPPORTED;
2453 return SLJIT_ERR_UNSUPPORTED;
2456 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compiler, sljit_si op,
2457 sljit_si dst, sljit_sw dstw,
2458 sljit_si src1, sljit_sw src1w,
2459 sljit_si src2, sljit_sw src2w)
2462 /* Should cause an assertion fail. */
2463 check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
2464 compiler->error = SLJIT_ERR_UNSUPPORTED;
2465 return SLJIT_ERR_UNSUPPORTED;
2470 /* --------------------------------------------------------------------- */
2471 /* Conditional instructions */
2472 /* --------------------------------------------------------------------- */
2474 SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
2477 struct sljit_label *label;
2480 check_sljit_emit_label(compiler);
2482 /* We should restore the flags before the label,
2483 since other taken jumps has their own flags as well. */
2484 if (SLJIT_UNLIKELY(compiler->flags_saved))
2485 PTR_FAIL_IF(emit_restore_flags(compiler, 0));
2487 if (compiler->last_label && compiler->last_label->size == compiler->size)
2488 return compiler->last_label;
2490 label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
2491 PTR_FAIL_IF(!label);
2492 set_label(label, compiler);
2494 inst = (sljit_ub*)ensure_buf(compiler, 2);
2503 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_si type)
2506 struct sljit_jump *jump;
2509 check_sljit_emit_jump(compiler, type);
2511 if (SLJIT_UNLIKELY(compiler->flags_saved)) {
2512 if ((type & 0xff) <= SLJIT_JUMP)
2513 PTR_FAIL_IF(emit_restore_flags(compiler, 0));
2514 compiler->flags_saved = 0;
2517 jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2518 PTR_FAIL_IF_NULL(jump);
2519 set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
2522 if (type >= SLJIT_CALL1)
2523 PTR_FAIL_IF(call_with_args(compiler, type));
2525 /* Worst case size. */
2526 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2527 compiler->size += (type >= SLJIT_JUMP) ? 5 : 6;
2529 compiler->size += (type >= SLJIT_JUMP) ? (10 + 3) : (2 + 10 + 3);
2532 inst = (sljit_ub*)ensure_buf(compiler, 2);
2533 PTR_FAIL_IF_NULL(inst);
2540 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compiler, sljit_si type, sljit_si src, sljit_sw srcw)
2543 struct sljit_jump *jump;
2546 check_sljit_emit_ijump(compiler, type, src, srcw);
2547 ADJUST_LOCAL_OFFSET(src, srcw);
2549 CHECK_EXTRA_REGS(src, srcw, (void)0);
2551 if (SLJIT_UNLIKELY(compiler->flags_saved)) {
2552 if (type <= SLJIT_JUMP)
2553 FAIL_IF(emit_restore_flags(compiler, 0));
2554 compiler->flags_saved = 0;
2557 if (type >= SLJIT_CALL1) {
2558 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2559 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
2560 if (src == SLJIT_SCRATCH_REG3) {
2561 EMIT_MOV(compiler, TMP_REG1, 0, src, 0);
2564 if (src == SLJIT_MEM1(SLJIT_LOCALS_REG) && type >= SLJIT_CALL3)
2565 srcw += sizeof(sljit_sw);
2568 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && defined(_WIN64)
2569 if (src == SLJIT_SCRATCH_REG3) {
2570 EMIT_MOV(compiler, TMP_REG1, 0, src, 0);
2574 FAIL_IF(call_with_args(compiler, type));
2577 if (src == SLJIT_IMM) {
2578 jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2580 set_jump(jump, compiler, JUMP_ADDR);
2581 jump->u.target = srcw;
2583 /* Worst case size. */
2584 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2585 compiler->size += 5;
2587 compiler->size += 10 + 3;
2590 inst = (sljit_ub*)ensure_buf(compiler, 2);
2597 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2598 /* REX_W is not necessary (src is not immediate). */
2599 compiler->mode32 = 1;
2601 inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
2604 *inst |= (type >= SLJIT_FAST_CALL) ? CALL_rm : JMP_rm;
2606 return SLJIT_SUCCESS;
2609 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_si op,
2610 sljit_si dst, sljit_sw dstw,
2611 sljit_si src, sljit_sw srcw,
2615 sljit_ub cond_set = 0;
2616 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2619 /* CHECK_EXTRA_REGS migh overwrite these values. */
2620 sljit_si dst_save = dst;
2621 sljit_sw dstw_save = dstw;
2625 check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type);
2627 if (dst == SLJIT_UNUSED)
2628 return SLJIT_SUCCESS;
2630 ADJUST_LOCAL_OFFSET(dst, dstw);
2631 CHECK_EXTRA_REGS(dst, dstw, (void)0);
2632 if (SLJIT_UNLIKELY(compiler->flags_saved))
2633 FAIL_IF(emit_restore_flags(compiler, op & SLJIT_KEEP_FLAGS));
2635 /* setcc = jcc + 0x10. */
2636 cond_set = get_jump_code(type) + 0x10;
2638 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2639 if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst) && dst == src) {
2640 inst = (sljit_ub*)ensure_buf(compiler, 1 + 4 + 3);
2643 /* Set low register to conditional flag. */
2644 *inst++ = (reg_map[TMP_REG1] <= 7) ? REX : REX_B;
2647 *inst++ = MOD_REG | reg_lmap[TMP_REG1];
2648 *inst++ = REX | (reg_map[TMP_REG1] <= 7 ? 0 : REX_R) | (reg_map[dst] <= 7 ? 0 : REX_B);
2649 *inst++ = OR_rm8_r8;
2650 *inst++ = MOD_REG | (reg_lmap[TMP_REG1] << 3) | reg_lmap[dst];
2651 return SLJIT_SUCCESS;
2654 reg = (op == SLJIT_MOV && FAST_IS_REG(dst)) ? dst : TMP_REG1;
2656 inst = (sljit_ub*)ensure_buf(compiler, 1 + 4 + 4);
2659 /* Set low register to conditional flag. */
2660 *inst++ = (reg_map[reg] <= 7) ? REX : REX_B;
2663 *inst++ = MOD_REG | reg_lmap[reg];
2664 *inst++ = REX_W | (reg_map[reg] <= 7 ? 0 : (REX_B | REX_R));
2666 *inst++ = MOVZX_r_rm8;
2667 *inst = MOD_REG | (reg_lmap[reg] << 3) | reg_lmap[reg];
2669 if (reg != TMP_REG1)
2670 return SLJIT_SUCCESS;
2672 if (GET_OPCODE(op) < SLJIT_ADD) {
2673 compiler->mode32 = GET_OPCODE(op) != SLJIT_MOV;
2674 return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
2676 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG)
2677 compiler->skip_checks = 1;
2679 return sljit_emit_op2(compiler, op, dst, dstw, dst, dstw, TMP_REG1, 0);
2680 #else /* SLJIT_CONFIG_X86_64 */
2681 if (GET_OPCODE(op) < SLJIT_ADD && FAST_IS_REG(dst)) {
2682 if (reg_map[dst] <= 4) {
2683 /* Low byte is accessible. */
2684 inst = (sljit_ub*)ensure_buf(compiler, 1 + 3 + 3);
2687 /* Set low byte to conditional flag. */
2690 *inst++ = MOD_REG | reg_map[dst];
2693 *inst++ = MOVZX_r_rm8;
2694 *inst = MOD_REG | (reg_map[dst] << 3) | reg_map[dst];
2695 return SLJIT_SUCCESS;
2698 /* Low byte is not accessible. */
2699 if (cpu_has_cmov == -1)
2703 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, 1);
2704 /* a xor reg, reg operation would overwrite the flags. */
2705 EMIT_MOV(compiler, dst, 0, SLJIT_IMM, 0);
2707 inst = (sljit_ub*)ensure_buf(compiler, 1 + 3);
2712 /* cmovcc = setcc - 0x50. */
2713 *inst++ = cond_set - 0x50;
2714 *inst++ = MOD_REG | (reg_map[dst] << 3) | reg_map[TMP_REG1];
2715 return SLJIT_SUCCESS;
2718 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1 + 3 + 3 + 1);
2720 INC_SIZE(1 + 3 + 3 + 1);
2721 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2722 /* Set al to conditional flag. */
2725 *inst++ = MOD_REG | 0 /* eax */;
2728 *inst++ = MOVZX_r_rm8;
2729 *inst++ = MOD_REG | (reg_map[dst] << 3) | 0 /* eax */;
2730 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2731 return SLJIT_SUCCESS;
2734 if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst) && dst == src && reg_map[dst] <= 4) {
2735 SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SCRATCH_REG1] == 0, scratch_reg1_must_be_eax);
2736 if (dst != SLJIT_SCRATCH_REG1) {
2737 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1 + 3 + 2 + 1);
2739 INC_SIZE(1 + 3 + 2 + 1);
2740 /* Set low register to conditional flag. */
2741 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2744 *inst++ = MOD_REG | 0 /* eax */;
2745 *inst++ = OR_rm8_r8;
2746 *inst++ = MOD_REG | (0 /* eax */ << 3) | reg_map[dst];
2747 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2750 inst = (sljit_ub*)ensure_buf(compiler, 1 + 2 + 3 + 2 + 2);
2752 INC_SIZE(2 + 3 + 2 + 2);
2753 /* Set low register to conditional flag. */
2754 *inst++ = XCHG_r_rm;
2755 *inst++ = MOD_REG | (1 /* ecx */ << 3) | reg_map[TMP_REG1];
2758 *inst++ = MOD_REG | 1 /* ecx */;
2759 *inst++ = OR_rm8_r8;
2760 *inst++ = MOD_REG | (1 /* ecx */ << 3) | 0 /* eax */;
2761 *inst++ = XCHG_r_rm;
2762 *inst++ = MOD_REG | (1 /* ecx */ << 3) | reg_map[TMP_REG1];
2764 return SLJIT_SUCCESS;
2767 /* Set TMP_REG1 to the bit. */
2768 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1 + 3 + 3 + 1);
2770 INC_SIZE(1 + 3 + 3 + 1);
2771 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2772 /* Set al to conditional flag. */
2775 *inst++ = MOD_REG | 0 /* eax */;
2778 *inst++ = MOVZX_r_rm8;
2779 *inst++ = MOD_REG | (0 << 3) /* eax */ | 0 /* eax */;
2781 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2783 if (GET_OPCODE(op) < SLJIT_ADD)
2784 return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
2786 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG)
2787 compiler->skip_checks = 1;
2789 return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REG1, 0);
2790 #endif /* SLJIT_CONFIG_X86_64 */
2793 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_local_base(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw offset)
2796 check_sljit_get_local_base(compiler, dst, dstw, offset);
2797 ADJUST_LOCAL_OFFSET(dst, dstw);
2799 CHECK_EXTRA_REGS(dst, dstw, (void)0);
2801 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2802 compiler->mode32 = 0;
2805 ADJUST_LOCAL_OFFSET(SLJIT_MEM1(SLJIT_LOCALS_REG), offset);
2807 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2808 if (NOT_HALFWORD(offset)) {
2809 FAIL_IF(emit_load_imm64(compiler, TMP_REG1, offset));
2810 #if (defined SLJIT_DEBUG && SLJIT_DEBUG)
2811 SLJIT_ASSERT(emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_LOCALS_REG, 0, TMP_REG1, 0) != SLJIT_ERR_UNSUPPORTED);
2812 return compiler->error;
2814 return emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_LOCALS_REG, 0, TMP_REG1, 0);
2820 return emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_LOCALS_REG, 0, SLJIT_IMM, offset);
2821 return emit_mov(compiler, dst, dstw, SLJIT_LOCALS_REG, 0);
2824 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw init_value)
2827 struct sljit_const *const_;
2828 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2833 check_sljit_emit_const(compiler, dst, dstw, init_value);
2834 ADJUST_LOCAL_OFFSET(dst, dstw);
2836 CHECK_EXTRA_REGS(dst, dstw, (void)0);
2838 const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
2839 PTR_FAIL_IF(!const_);
2840 set_const(const_, compiler);
2842 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2843 compiler->mode32 = 0;
2844 reg = SLOW_IS_REG(dst) ? dst : TMP_REG1;
2846 if (emit_load_imm64(compiler, reg, init_value))
2849 if (dst == SLJIT_UNUSED)
2852 if (emit_mov(compiler, dst, dstw, SLJIT_IMM, init_value))
2856 inst = (sljit_ub*)ensure_buf(compiler, 2);
2862 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2863 if (dst & SLJIT_MEM)
2864 if (emit_mov(compiler, dst, dstw, TMP_REG1, 0))
2871 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr)
2873 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2874 *(sljit_sw*)addr = new_addr - (addr + 4);
2876 *(sljit_uw*)addr = new_addr;
2880 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant)
2882 *(sljit_sw*)addr = new_constant;