chiark / gitweb /
Record pcre3 (2:8.38-1) in archive suite sid
[pcre3.git] / sljit / sljitNativeX86_64.c
1 /*
2  *    Stack-less Just-In-Time compiler
3  *
4  *    Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without modification, are
7  * permitted provided that the following conditions are met:
8  *
9  *   1. Redistributions of source code must retain the above copyright notice, this list of
10  *      conditions and the following disclaimer.
11  *
12  *   2. Redistributions in binary form must reproduce the above copyright notice, this list
13  *      of conditions and the following disclaimer in the documentation and/or other materials
14  *      provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19  * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26
27 /* x86 64-bit arch dependent functions. */
28
29 static sljit_si emit_load_imm64(struct sljit_compiler *compiler, sljit_si reg, sljit_sw imm)
30 {
31         sljit_ub *inst;
32
33         inst = (sljit_ub*)ensure_buf(compiler, 1 + 2 + sizeof(sljit_sw));
34         FAIL_IF(!inst);
35         INC_SIZE(2 + sizeof(sljit_sw));
36         *inst++ = REX_W | ((reg_map[reg] <= 7) ? 0 : REX_B);
37         *inst++ = MOV_r_i32 + (reg_map[reg] & 0x7);
38         *(sljit_sw*)inst = imm;
39         return SLJIT_SUCCESS;
40 }
41
42 static sljit_ub* generate_far_jump_code(struct sljit_jump *jump, sljit_ub *code_ptr, sljit_si type)
43 {
44         if (type < SLJIT_JUMP) {
45                 /* Invert type. */
46                 *code_ptr++ = get_jump_code(type ^ 0x1) - 0x10;
47                 *code_ptr++ = 10 + 3;
48         }
49
50         SLJIT_COMPILE_ASSERT(reg_map[TMP_REG3] == 9, tmp3_is_9_first);
51         *code_ptr++ = REX_W | REX_B;
52         *code_ptr++ = MOV_r_i32 + 1;
53         jump->addr = (sljit_uw)code_ptr;
54
55         if (jump->flags & JUMP_LABEL)
56                 jump->flags |= PATCH_MD;
57         else
58                 *(sljit_sw*)code_ptr = jump->u.target;
59
60         code_ptr += sizeof(sljit_sw);
61         *code_ptr++ = REX_B;
62         *code_ptr++ = GROUP_FF;
63         *code_ptr++ = (type >= SLJIT_FAST_CALL) ? (MOD_REG | CALL_rm | 1) : (MOD_REG | JMP_rm | 1);
64
65         return code_ptr;
66 }
67
68 static sljit_ub* generate_fixed_jump(sljit_ub *code_ptr, sljit_sw addr, sljit_si type)
69 {
70         sljit_sw delta = addr - ((sljit_sw)code_ptr + 1 + sizeof(sljit_si));
71
72         if (delta <= HALFWORD_MAX && delta >= HALFWORD_MIN) {
73                 *code_ptr++ = (type == 2) ? CALL_i32 : JMP_i32;
74                 *(sljit_sw*)code_ptr = delta;
75         }
76         else {
77                 SLJIT_COMPILE_ASSERT(reg_map[TMP_REG3] == 9, tmp3_is_9_second);
78                 *code_ptr++ = REX_W | REX_B;
79                 *code_ptr++ = MOV_r_i32 + 1;
80                 *(sljit_sw*)code_ptr = addr;
81                 code_ptr += sizeof(sljit_sw);
82                 *code_ptr++ = REX_B;
83                 *code_ptr++ = GROUP_FF;
84                 *code_ptr++ = (type == 2) ? (MOD_REG | CALL_rm | 1) : (MOD_REG | JMP_rm | 1);
85         }
86
87         return code_ptr;
88 }
89
90 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler,
91         sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds,
92         sljit_si fscratches, sljit_si fsaveds, sljit_si local_size)
93 {
94         sljit_si i, tmp, size, saved_register_size;
95         sljit_ub *inst;
96
97         CHECK_ERROR();
98         CHECK(check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size));
99         set_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);
100
101         compiler->flags_saved = 0;
102
103         /* Including the return address saved by the call instruction. */
104         saved_register_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1);
105
106         tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG;
107         for (i = SLJIT_S0; i >= tmp; i--) {
108                 size = reg_map[i] >= 8 ? 2 : 1;
109                 inst = (sljit_ub*)ensure_buf(compiler, 1 + size);
110                 FAIL_IF(!inst);
111                 INC_SIZE(size);
112                 if (reg_map[i] >= 8)
113                         *inst++ = REX_B;
114                 PUSH_REG(reg_lmap[i]);
115         }
116
117         for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) {
118                 size = reg_map[i] >= 8 ? 2 : 1;
119                 inst = (sljit_ub*)ensure_buf(compiler, 1 + size);
120                 FAIL_IF(!inst);
121                 INC_SIZE(size);
122                 if (reg_map[i] >= 8)
123                         *inst++ = REX_B;
124                 PUSH_REG(reg_lmap[i]);
125         }
126
127         if (args > 0) {
128                 size = args * 3;
129                 inst = (sljit_ub*)ensure_buf(compiler, 1 + size);
130                 FAIL_IF(!inst);
131
132                 INC_SIZE(size);
133
134 #ifndef _WIN64
135                 if (args > 0) {
136                         *inst++ = REX_W;
137                         *inst++ = MOV_r_rm;
138                         *inst++ = MOD_REG | (reg_map[SLJIT_S0] << 3) | 0x7 /* rdi */;
139                 }
140                 if (args > 1) {
141                         *inst++ = REX_W | REX_R;
142                         *inst++ = MOV_r_rm;
143                         *inst++ = MOD_REG | (reg_lmap[SLJIT_S1] << 3) | 0x6 /* rsi */;
144                 }
145                 if (args > 2) {
146                         *inst++ = REX_W | REX_R;
147                         *inst++ = MOV_r_rm;
148                         *inst++ = MOD_REG | (reg_lmap[SLJIT_S2] << 3) | 0x2 /* rdx */;
149                 }
150 #else
151                 if (args > 0) {
152                         *inst++ = REX_W;
153                         *inst++ = MOV_r_rm;
154                         *inst++ = MOD_REG | (reg_map[SLJIT_S0] << 3) | 0x1 /* rcx */;
155                 }
156                 if (args > 1) {
157                         *inst++ = REX_W;
158                         *inst++ = MOV_r_rm;
159                         *inst++ = MOD_REG | (reg_map[SLJIT_S1] << 3) | 0x2 /* rdx */;
160                 }
161                 if (args > 2) {
162                         *inst++ = REX_W | REX_B;
163                         *inst++ = MOV_r_rm;
164                         *inst++ = MOD_REG | (reg_map[SLJIT_S2] << 3) | 0x0 /* r8 */;
165                 }
166 #endif
167         }
168
169         local_size = ((local_size + SLJIT_LOCALS_OFFSET + saved_register_size + 15) & ~15) - saved_register_size;
170         compiler->local_size = local_size;
171
172 #ifdef _WIN64
173         if (local_size > 1024) {
174                 /* Allocate stack for the callback, which grows the stack. */
175                 inst = (sljit_ub*)ensure_buf(compiler, 1 + 4 + (3 + sizeof(sljit_si)));
176                 FAIL_IF(!inst);
177                 INC_SIZE(4 + (3 + sizeof(sljit_si)));
178                 *inst++ = REX_W;
179                 *inst++ = GROUP_BINARY_83;
180                 *inst++ = MOD_REG | SUB | 4;
181                 /* Allocated size for registers must be divisible by 8. */
182                 SLJIT_ASSERT(!(saved_register_size & 0x7));
183                 /* Aligned to 16 byte. */
184                 if (saved_register_size & 0x8) {
185                         *inst++ = 5 * sizeof(sljit_sw);
186                         local_size -= 5 * sizeof(sljit_sw);
187                 } else {
188                         *inst++ = 4 * sizeof(sljit_sw);
189                         local_size -= 4 * sizeof(sljit_sw);
190                 }
191                 /* Second instruction */
192                 SLJIT_COMPILE_ASSERT(reg_map[SLJIT_R0] < 8, temporary_reg1_is_loreg);
193                 *inst++ = REX_W;
194                 *inst++ = MOV_rm_i32;
195                 *inst++ = MOD_REG | reg_lmap[SLJIT_R0];
196                 *(sljit_si*)inst = local_size;
197 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
198                         || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
199                 compiler->skip_checks = 1;
200 #endif
201                 FAIL_IF(sljit_emit_ijump(compiler, SLJIT_CALL1, SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_grow_stack)));
202         }
203 #endif
204
205         SLJIT_ASSERT(local_size > 0);
206         if (local_size <= 127) {
207                 inst = (sljit_ub*)ensure_buf(compiler, 1 + 4);
208                 FAIL_IF(!inst);
209                 INC_SIZE(4);
210                 *inst++ = REX_W;
211                 *inst++ = GROUP_BINARY_83;
212                 *inst++ = MOD_REG | SUB | 4;
213                 *inst++ = local_size;
214         }
215         else {
216                 inst = (sljit_ub*)ensure_buf(compiler, 1 + 7);
217                 FAIL_IF(!inst);
218                 INC_SIZE(7);
219                 *inst++ = REX_W;
220                 *inst++ = GROUP_BINARY_81;
221                 *inst++ = MOD_REG | SUB | 4;
222                 *(sljit_si*)inst = local_size;
223                 inst += sizeof(sljit_si);
224         }
225
226 #ifdef _WIN64
227         /* Save xmm6 register: movaps [rsp + 0x20], xmm6 */
228         if (fscratches >= 6 || fsaveds >= 1) {
229                 inst = (sljit_ub*)ensure_buf(compiler, 1 + 5);
230                 FAIL_IF(!inst);
231                 INC_SIZE(5);
232                 *inst++ = GROUP_0F;
233                 *(sljit_si*)inst = 0x20247429;
234         }
235 #endif
236
237         return SLJIT_SUCCESS;
238 }
239
240 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_set_context(struct sljit_compiler *compiler,
241         sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds,
242         sljit_si fscratches, sljit_si fsaveds, sljit_si local_size)
243 {
244         sljit_si saved_register_size;
245
246         CHECK_ERROR();
247         CHECK(check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size));
248         set_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);
249
250         /* Including the return address saved by the call instruction. */
251         saved_register_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1);
252         compiler->local_size = ((local_size + SLJIT_LOCALS_OFFSET + saved_register_size + 15) & ~15) - saved_register_size;
253         return SLJIT_SUCCESS;
254 }
255
256 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw)
257 {
258         sljit_si i, tmp, size;
259         sljit_ub *inst;
260
261         CHECK_ERROR();
262         CHECK(check_sljit_emit_return(compiler, op, src, srcw));
263
264         compiler->flags_saved = 0;
265         FAIL_IF(emit_mov_before_return(compiler, op, src, srcw));
266
267 #ifdef _WIN64
268         /* Restore xmm6 register: movaps xmm6, [rsp + 0x20] */
269         if (compiler->fscratches >= 6 || compiler->fsaveds >= 1) {
270                 inst = (sljit_ub*)ensure_buf(compiler, 1 + 5);
271                 FAIL_IF(!inst);
272                 INC_SIZE(5);
273                 *inst++ = GROUP_0F;
274                 *(sljit_si*)inst = 0x20247428;
275         }
276 #endif
277
278         SLJIT_ASSERT(compiler->local_size > 0);
279         if (compiler->local_size <= 127) {
280                 inst = (sljit_ub*)ensure_buf(compiler, 1 + 4);
281                 FAIL_IF(!inst);
282                 INC_SIZE(4);
283                 *inst++ = REX_W;
284                 *inst++ = GROUP_BINARY_83;
285                 *inst++ = MOD_REG | ADD | 4;
286                 *inst = compiler->local_size;
287         }
288         else {
289                 inst = (sljit_ub*)ensure_buf(compiler, 1 + 7);
290                 FAIL_IF(!inst);
291                 INC_SIZE(7);
292                 *inst++ = REX_W;
293                 *inst++ = GROUP_BINARY_81;
294                 *inst++ = MOD_REG | ADD | 4;
295                 *(sljit_si*)inst = compiler->local_size;
296         }
297
298         tmp = compiler->scratches;
299         for (i = SLJIT_FIRST_SAVED_REG; i <= tmp; i++) {
300                 size = reg_map[i] >= 8 ? 2 : 1;
301                 inst = (sljit_ub*)ensure_buf(compiler, 1 + size);
302                 FAIL_IF(!inst);
303                 INC_SIZE(size);
304                 if (reg_map[i] >= 8)
305                         *inst++ = REX_B;
306                 POP_REG(reg_lmap[i]);
307         }
308
309         tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG;
310         for (i = tmp; i <= SLJIT_S0; i++) {
311                 size = reg_map[i] >= 8 ? 2 : 1;
312                 inst = (sljit_ub*)ensure_buf(compiler, 1 + size);
313                 FAIL_IF(!inst);
314                 INC_SIZE(size);
315                 if (reg_map[i] >= 8)
316                         *inst++ = REX_B;
317                 POP_REG(reg_lmap[i]);
318         }
319
320         inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
321         FAIL_IF(!inst);
322         INC_SIZE(1);
323         RET();
324         return SLJIT_SUCCESS;
325 }
326
327 /* --------------------------------------------------------------------- */
328 /*  Operators                                                            */
329 /* --------------------------------------------------------------------- */
330
331 static sljit_si emit_do_imm32(struct sljit_compiler *compiler, sljit_ub rex, sljit_ub opcode, sljit_sw imm)
332 {
333         sljit_ub *inst;
334         sljit_si length = 1 + (rex ? 1 : 0) + sizeof(sljit_si);
335
336         inst = (sljit_ub*)ensure_buf(compiler, 1 + length);
337         FAIL_IF(!inst);
338         INC_SIZE(length);
339         if (rex)
340                 *inst++ = rex;
341         *inst++ = opcode;
342         *(sljit_si*)inst = imm;
343         return SLJIT_SUCCESS;
344 }
345
346 static sljit_ub* emit_x86_instruction(struct sljit_compiler *compiler, sljit_si size,
347         /* The register or immediate operand. */
348         sljit_si a, sljit_sw imma,
349         /* The general operand (not immediate). */
350         sljit_si b, sljit_sw immb)
351 {
352         sljit_ub *inst;
353         sljit_ub *buf_ptr;
354         sljit_ub rex = 0;
355         sljit_si flags = size & ~0xf;
356         sljit_si inst_size;
357
358         /* The immediate operand must be 32 bit. */
359         SLJIT_ASSERT(!(a & SLJIT_IMM) || compiler->mode32 || IS_HALFWORD(imma));
360         /* Both cannot be switched on. */
361         SLJIT_ASSERT((flags & (EX86_BIN_INS | EX86_SHIFT_INS)) != (EX86_BIN_INS | EX86_SHIFT_INS));
362         /* Size flags not allowed for typed instructions. */
363         SLJIT_ASSERT(!(flags & (EX86_BIN_INS | EX86_SHIFT_INS)) || (flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) == 0);
364         /* Both size flags cannot be switched on. */
365         SLJIT_ASSERT((flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) != (EX86_BYTE_ARG | EX86_HALF_ARG));
366         /* SSE2 and immediate is not possible. */
367         SLJIT_ASSERT(!(a & SLJIT_IMM) || !(flags & EX86_SSE2));
368         SLJIT_ASSERT((flags & (EX86_PREF_F2 | EX86_PREF_F3)) != (EX86_PREF_F2 | EX86_PREF_F3)
369                 && (flags & (EX86_PREF_F2 | EX86_PREF_66)) != (EX86_PREF_F2 | EX86_PREF_66)
370                 && (flags & (EX86_PREF_F3 | EX86_PREF_66)) != (EX86_PREF_F3 | EX86_PREF_66));
371
372         size &= 0xf;
373         inst_size = size;
374
375         if (!compiler->mode32 && !(flags & EX86_NO_REXW))
376                 rex |= REX_W;
377         else if (flags & EX86_REX)
378                 rex |= REX;
379
380         if (flags & (EX86_PREF_F2 | EX86_PREF_F3))
381                 inst_size++;
382         if (flags & EX86_PREF_66)
383                 inst_size++;
384
385         /* Calculate size of b. */
386         inst_size += 1; /* mod r/m byte. */
387         if (b & SLJIT_MEM) {
388                 if (!(b & OFFS_REG_MASK)) {
389                         if (NOT_HALFWORD(immb)) {
390                                 if (emit_load_imm64(compiler, TMP_REG3, immb))
391                                         return NULL;
392                                 immb = 0;
393                                 if (b & REG_MASK)
394                                         b |= TO_OFFS_REG(TMP_REG3);
395                                 else
396                                         b |= TMP_REG3;
397                         }
398                         else if (reg_lmap[b & REG_MASK] == 4)
399                                 b |= TO_OFFS_REG(SLJIT_SP);
400                 }
401
402                 if ((b & REG_MASK) == SLJIT_UNUSED)
403                         inst_size += 1 + sizeof(sljit_si); /* SIB byte required to avoid RIP based addressing. */
404                 else {
405                         if (reg_map[b & REG_MASK] >= 8)
406                                 rex |= REX_B;
407
408                         if (immb != 0 && (!(b & OFFS_REG_MASK) || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SP))) {
409                                 /* Immediate operand. */
410                                 if (immb <= 127 && immb >= -128)
411                                         inst_size += sizeof(sljit_sb);
412                                 else
413                                         inst_size += sizeof(sljit_si);
414                         }
415                         else if (reg_lmap[b & REG_MASK] == 5)
416                                 inst_size += sizeof(sljit_sb);
417
418                         if ((b & OFFS_REG_MASK) != SLJIT_UNUSED) {
419                                 inst_size += 1; /* SIB byte. */
420                                 if (reg_map[OFFS_REG(b)] >= 8)
421                                         rex |= REX_X;
422                         }
423                 }
424         }
425         else if (!(flags & EX86_SSE2_OP2) && reg_map[b] >= 8)
426                 rex |= REX_B;
427
428         if (a & SLJIT_IMM) {
429                 if (flags & EX86_BIN_INS) {
430                         if (imma <= 127 && imma >= -128) {
431                                 inst_size += 1;
432                                 flags |= EX86_BYTE_ARG;
433                         } else
434                                 inst_size += 4;
435                 }
436                 else if (flags & EX86_SHIFT_INS) {
437                         imma &= compiler->mode32 ? 0x1f : 0x3f;
438                         if (imma != 1) {
439                                 inst_size ++;
440                                 flags |= EX86_BYTE_ARG;
441                         }
442                 } else if (flags & EX86_BYTE_ARG)
443                         inst_size++;
444                 else if (flags & EX86_HALF_ARG)
445                         inst_size += sizeof(short);
446                 else
447                         inst_size += sizeof(sljit_si);
448         }
449         else {
450                 SLJIT_ASSERT(!(flags & EX86_SHIFT_INS) || a == SLJIT_PREF_SHIFT_REG);
451                 /* reg_map[SLJIT_PREF_SHIFT_REG] is less than 8. */
452                 if (!(flags & EX86_SSE2_OP1) && reg_map[a] >= 8)
453                         rex |= REX_R;
454         }
455
456         if (rex)
457                 inst_size++;
458
459         inst = (sljit_ub*)ensure_buf(compiler, 1 + inst_size);
460         PTR_FAIL_IF(!inst);
461
462         /* Encoding the byte. */
463         INC_SIZE(inst_size);
464         if (flags & EX86_PREF_F2)
465                 *inst++ = 0xf2;
466         if (flags & EX86_PREF_F3)
467                 *inst++ = 0xf3;
468         if (flags & EX86_PREF_66)
469                 *inst++ = 0x66;
470         if (rex)
471                 *inst++ = rex;
472         buf_ptr = inst + size;
473
474         /* Encode mod/rm byte. */
475         if (!(flags & EX86_SHIFT_INS)) {
476                 if ((flags & EX86_BIN_INS) && (a & SLJIT_IMM))
477                         *inst = (flags & EX86_BYTE_ARG) ? GROUP_BINARY_83 : GROUP_BINARY_81;
478
479                 if ((a & SLJIT_IMM) || (a == 0))
480                         *buf_ptr = 0;
481                 else if (!(flags & EX86_SSE2_OP1))
482                         *buf_ptr = reg_lmap[a] << 3;
483                 else
484                         *buf_ptr = a << 3;
485         }
486         else {
487                 if (a & SLJIT_IMM) {
488                         if (imma == 1)
489                                 *inst = GROUP_SHIFT_1;
490                         else
491                                 *inst = GROUP_SHIFT_N;
492                 } else
493                         *inst = GROUP_SHIFT_CL;
494                 *buf_ptr = 0;
495         }
496
497         if (!(b & SLJIT_MEM))
498                 *buf_ptr++ |= MOD_REG + ((!(flags & EX86_SSE2_OP2)) ? reg_lmap[b] : b);
499         else if ((b & REG_MASK) != SLJIT_UNUSED) {
500                 if ((b & OFFS_REG_MASK) == SLJIT_UNUSED || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SP)) {
501                         if (immb != 0 || reg_lmap[b & REG_MASK] == 5) {
502                                 if (immb <= 127 && immb >= -128)
503                                         *buf_ptr |= 0x40;
504                                 else
505                                         *buf_ptr |= 0x80;
506                         }
507
508                         if ((b & OFFS_REG_MASK) == SLJIT_UNUSED)
509                                 *buf_ptr++ |= reg_lmap[b & REG_MASK];
510                         else {
511                                 *buf_ptr++ |= 0x04;
512                                 *buf_ptr++ = reg_lmap[b & REG_MASK] | (reg_lmap[OFFS_REG(b)] << 3);
513                         }
514
515                         if (immb != 0 || reg_lmap[b & REG_MASK] == 5) {
516                                 if (immb <= 127 && immb >= -128)
517                                         *buf_ptr++ = immb; /* 8 bit displacement. */
518                                 else {
519                                         *(sljit_si*)buf_ptr = immb; /* 32 bit displacement. */
520                                         buf_ptr += sizeof(sljit_si);
521                                 }
522                         }
523                 }
524                 else {
525                         if (reg_lmap[b & REG_MASK] == 5)
526                                 *buf_ptr |= 0x40;
527                         *buf_ptr++ |= 0x04;
528                         *buf_ptr++ = reg_lmap[b & REG_MASK] | (reg_lmap[OFFS_REG(b)] << 3) | (immb << 6);
529                         if (reg_lmap[b & REG_MASK] == 5)
530                                 *buf_ptr++ = 0;
531                 }
532         }
533         else {
534                 *buf_ptr++ |= 0x04;
535                 *buf_ptr++ = 0x25;
536                 *(sljit_si*)buf_ptr = immb; /* 32 bit displacement. */
537                 buf_ptr += sizeof(sljit_si);
538         }
539
540         if (a & SLJIT_IMM) {
541                 if (flags & EX86_BYTE_ARG)
542                         *buf_ptr = imma;
543                 else if (flags & EX86_HALF_ARG)
544                         *(short*)buf_ptr = imma;
545                 else if (!(flags & EX86_SHIFT_INS))
546                         *(sljit_si*)buf_ptr = imma;
547         }
548
549         return !(flags & EX86_SHIFT_INS) ? inst : (inst + 1);
550 }
551
552 /* --------------------------------------------------------------------- */
553 /*  Call / return instructions                                           */
554 /* --------------------------------------------------------------------- */
555
556 static SLJIT_INLINE sljit_si call_with_args(struct sljit_compiler *compiler, sljit_si type)
557 {
558         sljit_ub *inst;
559
560 #ifndef _WIN64
561         SLJIT_COMPILE_ASSERT(reg_map[SLJIT_R1] == 6 && reg_map[SLJIT_R0] < 8 && reg_map[SLJIT_R2] < 8, args_registers);
562
563         inst = (sljit_ub*)ensure_buf(compiler, 1 + ((type < SLJIT_CALL3) ? 3 : 6));
564         FAIL_IF(!inst);
565         INC_SIZE((type < SLJIT_CALL3) ? 3 : 6);
566         if (type >= SLJIT_CALL3) {
567                 *inst++ = REX_W;
568                 *inst++ = MOV_r_rm;
569                 *inst++ = MOD_REG | (0x2 /* rdx */ << 3) | reg_lmap[SLJIT_R2];
570         }
571         *inst++ = REX_W;
572         *inst++ = MOV_r_rm;
573         *inst++ = MOD_REG | (0x7 /* rdi */ << 3) | reg_lmap[SLJIT_R0];
574 #else
575         SLJIT_COMPILE_ASSERT(reg_map[SLJIT_R1] == 2 && reg_map[SLJIT_R0] < 8 && reg_map[SLJIT_R2] < 8, args_registers);
576
577         inst = (sljit_ub*)ensure_buf(compiler, 1 + ((type < SLJIT_CALL3) ? 3 : 6));
578         FAIL_IF(!inst);
579         INC_SIZE((type < SLJIT_CALL3) ? 3 : 6);
580         if (type >= SLJIT_CALL3) {
581                 *inst++ = REX_W | REX_R;
582                 *inst++ = MOV_r_rm;
583                 *inst++ = MOD_REG | (0x0 /* r8 */ << 3) | reg_lmap[SLJIT_R2];
584         }
585         *inst++ = REX_W;
586         *inst++ = MOV_r_rm;
587         *inst++ = MOD_REG | (0x1 /* rcx */ << 3) | reg_lmap[SLJIT_R0];
588 #endif
589         return SLJIT_SUCCESS;
590 }
591
592 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw)
593 {
594         sljit_ub *inst;
595
596         CHECK_ERROR();
597         CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw));
598         ADJUST_LOCAL_OFFSET(dst, dstw);
599
600         /* For UNUSED dst. Uncommon, but possible. */
601         if (dst == SLJIT_UNUSED)
602                 dst = TMP_REG1;
603
604         if (FAST_IS_REG(dst)) {
605                 if (reg_map[dst] < 8) {
606                         inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
607                         FAIL_IF(!inst);
608                         INC_SIZE(1);
609                         POP_REG(reg_lmap[dst]);
610                         return SLJIT_SUCCESS;
611                 }
612
613                 inst = (sljit_ub*)ensure_buf(compiler, 1 + 2);
614                 FAIL_IF(!inst);
615                 INC_SIZE(2);
616                 *inst++ = REX_B;
617                 POP_REG(reg_lmap[dst]);
618                 return SLJIT_SUCCESS;
619         }
620
621         /* REX_W is not necessary (src is not immediate). */
622         compiler->mode32 = 1;
623         inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
624         FAIL_IF(!inst);
625         *inst++ = POP_rm;
626         return SLJIT_SUCCESS;
627 }
628
629 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_si src, sljit_sw srcw)
630 {
631         sljit_ub *inst;
632
633         CHECK_ERROR();
634         CHECK(check_sljit_emit_fast_return(compiler, src, srcw));
635         ADJUST_LOCAL_OFFSET(src, srcw);
636
637         if ((src & SLJIT_IMM) && NOT_HALFWORD(srcw)) {
638                 FAIL_IF(emit_load_imm64(compiler, TMP_REG1, srcw));
639                 src = TMP_REG1;
640         }
641
642         if (FAST_IS_REG(src)) {
643                 if (reg_map[src] < 8) {
644                         inst = (sljit_ub*)ensure_buf(compiler, 1 + 1 + 1);
645                         FAIL_IF(!inst);
646
647                         INC_SIZE(1 + 1);
648                         PUSH_REG(reg_lmap[src]);
649                 }
650                 else {
651                         inst = (sljit_ub*)ensure_buf(compiler, 1 + 2 + 1);
652                         FAIL_IF(!inst);
653
654                         INC_SIZE(2 + 1);
655                         *inst++ = REX_B;
656                         PUSH_REG(reg_lmap[src]);
657                 }
658         }
659         else if (src & SLJIT_MEM) {
660                 /* REX_W is not necessary (src is not immediate). */
661                 compiler->mode32 = 1;
662                 inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
663                 FAIL_IF(!inst);
664                 *inst++ = GROUP_FF;
665                 *inst |= PUSH_rm;
666
667                 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
668                 FAIL_IF(!inst);
669                 INC_SIZE(1);
670         }
671         else {
672                 SLJIT_ASSERT(IS_HALFWORD(srcw));
673                 /* SLJIT_IMM. */
674                 inst = (sljit_ub*)ensure_buf(compiler, 1 + 5 + 1);
675                 FAIL_IF(!inst);
676
677                 INC_SIZE(5 + 1);
678                 *inst++ = PUSH_i32;
679                 *(sljit_si*)inst = srcw;
680                 inst += sizeof(sljit_si);
681         }
682
683         RET();
684         return SLJIT_SUCCESS;
685 }
686
687
688 /* --------------------------------------------------------------------- */
689 /*  Extend input                                                         */
690 /* --------------------------------------------------------------------- */
691
692 static sljit_si emit_mov_int(struct sljit_compiler *compiler, sljit_si sign,
693         sljit_si dst, sljit_sw dstw,
694         sljit_si src, sljit_sw srcw)
695 {
696         sljit_ub* inst;
697         sljit_si dst_r;
698
699         compiler->mode32 = 0;
700
701         if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM))
702                 return SLJIT_SUCCESS; /* Empty instruction. */
703
704         if (src & SLJIT_IMM) {
705                 if (FAST_IS_REG(dst)) {
706                         if (sign || ((sljit_uw)srcw <= 0x7fffffff)) {
707                                 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, (sljit_sw)(sljit_si)srcw, dst, dstw);
708                                 FAIL_IF(!inst);
709                                 *inst = MOV_rm_i32;
710                                 return SLJIT_SUCCESS;
711                         }
712                         return emit_load_imm64(compiler, dst, srcw);
713                 }
714                 compiler->mode32 = 1;
715                 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, (sljit_sw)(sljit_si)srcw, dst, dstw);
716                 FAIL_IF(!inst);
717                 *inst = MOV_rm_i32;
718                 compiler->mode32 = 0;
719                 return SLJIT_SUCCESS;
720         }
721
722         dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
723
724         if ((dst & SLJIT_MEM) && FAST_IS_REG(src))
725                 dst_r = src;
726         else {
727                 if (sign) {
728                         inst = emit_x86_instruction(compiler, 1, dst_r, 0, src, srcw);
729                         FAIL_IF(!inst);
730                         *inst++ = MOVSXD_r_rm;
731                 } else {
732                         compiler->mode32 = 1;
733                         FAIL_IF(emit_mov(compiler, dst_r, 0, src, srcw));
734                         compiler->mode32 = 0;
735                 }
736         }
737
738         if (dst & SLJIT_MEM) {
739                 compiler->mode32 = 1;
740                 inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
741                 FAIL_IF(!inst);
742                 *inst = MOV_rm_r;
743                 compiler->mode32 = 0;
744         }
745
746         return SLJIT_SUCCESS;
747 }