chiark / gitweb /
pcre3 (2:8.35-7.1) unstable; urgency=medium
[pcre3.git] / sljit / sljitNativeX86_64.c
1 /*
2  *    Stack-less Just-In-Time compiler
3  *
4  *    Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without modification, are
7  * permitted provided that the following conditions are met:
8  *
9  *   1. Redistributions of source code must retain the above copyright notice, this list of
10  *      conditions and the following disclaimer.
11  *
12  *   2. Redistributions in binary form must reproduce the above copyright notice, this list
13  *      of conditions and the following disclaimer in the documentation and/or other materials
14  *      provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19  * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26
27 /* x86 64-bit arch dependent functions. */
28
29 static sljit_si emit_load_imm64(struct sljit_compiler *compiler, sljit_si reg, sljit_sw imm)
30 {
31         sljit_ub *inst;
32
33         inst = (sljit_ub*)ensure_buf(compiler, 1 + 2 + sizeof(sljit_sw));
34         FAIL_IF(!inst);
35         INC_SIZE(2 + sizeof(sljit_sw));
36         *inst++ = REX_W | ((reg_map[reg] <= 7) ? 0 : REX_B);
37         *inst++ = MOV_r_i32 + (reg_map[reg] & 0x7);
38         *(sljit_sw*)inst = imm;
39         return SLJIT_SUCCESS;
40 }
41
42 static sljit_ub* generate_far_jump_code(struct sljit_jump *jump, sljit_ub *code_ptr, sljit_si type)
43 {
44         if (type < SLJIT_JUMP) {
45                 /* Invert type. */
46                 *code_ptr++ = get_jump_code(type ^ 0x1) - 0x10;
47                 *code_ptr++ = 10 + 3;
48         }
49
50         SLJIT_COMPILE_ASSERT(reg_map[TMP_REG3] == 9, tmp3_is_9_first);
51         *code_ptr++ = REX_W | REX_B;
52         *code_ptr++ = MOV_r_i32 + 1;
53         jump->addr = (sljit_uw)code_ptr;
54
55         if (jump->flags & JUMP_LABEL)
56                 jump->flags |= PATCH_MD;
57         else
58                 *(sljit_sw*)code_ptr = jump->u.target;
59
60         code_ptr += sizeof(sljit_sw);
61         *code_ptr++ = REX_B;
62         *code_ptr++ = GROUP_FF;
63         *code_ptr++ = (type >= SLJIT_FAST_CALL) ? (MOD_REG | CALL_rm | 1) : (MOD_REG | JMP_rm | 1);
64
65         return code_ptr;
66 }
67
68 static sljit_ub* generate_fixed_jump(sljit_ub *code_ptr, sljit_sw addr, sljit_si type)
69 {
70         sljit_sw delta = addr - ((sljit_sw)code_ptr + 1 + sizeof(sljit_si));
71
72         if (delta <= HALFWORD_MAX && delta >= HALFWORD_MIN) {
73                 *code_ptr++ = (type == 2) ? CALL_i32 : JMP_i32;
74                 *(sljit_sw*)code_ptr = delta;
75         }
76         else {
77                 SLJIT_COMPILE_ASSERT(reg_map[TMP_REG3] == 9, tmp3_is_9_second);
78                 *code_ptr++ = REX_W | REX_B;
79                 *code_ptr++ = MOV_r_i32 + 1;
80                 *(sljit_sw*)code_ptr = addr;
81                 code_ptr += sizeof(sljit_sw);
82                 *code_ptr++ = REX_B;
83                 *code_ptr++ = GROUP_FF;
84                 *code_ptr++ = (type == 2) ? (MOD_REG | CALL_rm | 1) : (MOD_REG | JMP_rm | 1);
85         }
86
87         return code_ptr;
88 }
89
90 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size)
91 {
92         sljit_si size, pushed_size;
93         sljit_ub *inst;
94
95         CHECK_ERROR();
96         check_sljit_emit_enter(compiler, args, scratches, saveds, local_size);
97
98         compiler->scratches = scratches;
99         compiler->saveds = saveds;
100         compiler->flags_saved = 0;
101 #if (defined SLJIT_DEBUG && SLJIT_DEBUG)
102         compiler->logical_local_size = local_size;
103 #endif
104
105         size = saveds;
106         /* Including the return address saved by the call instruction. */
107         pushed_size = (saveds + 1) * sizeof(sljit_sw);
108 #ifndef _WIN64
109         if (saveds >= 2)
110                 size += saveds - 1;
111 #else
112         if (saveds >= 4)
113                 size += saveds - 3;
114         if (scratches >= 5) {
115                 size += (5 - 4) * 2;
116                 pushed_size += sizeof(sljit_sw);
117         }
118 #endif
119         size += args * 3;
120         if (size > 0) {
121                 inst = (sljit_ub*)ensure_buf(compiler, 1 + size);
122                 FAIL_IF(!inst);
123
124                 INC_SIZE(size);
125                 if (saveds >= 5) {
126                         SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SAVED_EREG2] >= 8, saved_ereg2_is_hireg);
127                         *inst++ = REX_B;
128                         PUSH_REG(reg_lmap[SLJIT_SAVED_EREG2]);
129                 }
130                 if (saveds >= 4) {
131                         SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SAVED_EREG1] >= 8, saved_ereg1_is_hireg);
132                         *inst++ = REX_B;
133                         PUSH_REG(reg_lmap[SLJIT_SAVED_EREG1]);
134                 }
135                 if (saveds >= 3) {
136 #ifndef _WIN64
137                         SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SAVED_REG3] >= 8, saved_reg3_is_hireg);
138                         *inst++ = REX_B;
139 #else
140                         SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SAVED_REG3] < 8, saved_reg3_is_loreg);
141 #endif
142                         PUSH_REG(reg_lmap[SLJIT_SAVED_REG3]);
143                 }
144                 if (saveds >= 2) {
145 #ifndef _WIN64
146                         SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SAVED_REG2] >= 8, saved_reg2_is_hireg);
147                         *inst++ = REX_B;
148 #else
149                         SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SAVED_REG2] < 8, saved_reg2_is_loreg);
150 #endif
151                         PUSH_REG(reg_lmap[SLJIT_SAVED_REG2]);
152                 }
153                 if (saveds >= 1) {
154                         SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SAVED_REG1] < 8, saved_reg1_is_loreg);
155                         PUSH_REG(reg_lmap[SLJIT_SAVED_REG1]);
156                 }
157 #ifdef _WIN64
158                 if (scratches >= 5) {
159                         SLJIT_COMPILE_ASSERT(reg_map[SLJIT_TEMPORARY_EREG2] >= 8, temporary_ereg2_is_hireg);
160                         *inst++ = REX_B;
161                         PUSH_REG(reg_lmap[SLJIT_TEMPORARY_EREG2]);
162                 }
163 #endif
164
165 #ifndef _WIN64
166                 if (args > 0) {
167                         *inst++ = REX_W;
168                         *inst++ = MOV_r_rm;
169                         *inst++ = MOD_REG | (reg_map[SLJIT_SAVED_REG1] << 3) | 0x7 /* rdi */;
170                 }
171                 if (args > 1) {
172                         *inst++ = REX_W | REX_R;
173                         *inst++ = MOV_r_rm;
174                         *inst++ = MOD_REG | (reg_lmap[SLJIT_SAVED_REG2] << 3) | 0x6 /* rsi */;
175                 }
176                 if (args > 2) {
177                         *inst++ = REX_W | REX_R;
178                         *inst++ = MOV_r_rm;
179                         *inst++ = MOD_REG | (reg_lmap[SLJIT_SAVED_REG3] << 3) | 0x2 /* rdx */;
180                 }
181 #else
182                 if (args > 0) {
183                         *inst++ = REX_W;
184                         *inst++ = MOV_r_rm;
185                         *inst++ = MOD_REG | (reg_map[SLJIT_SAVED_REG1] << 3) | 0x1 /* rcx */;
186                 }
187                 if (args > 1) {
188                         *inst++ = REX_W;
189                         *inst++ = MOV_r_rm;
190                         *inst++ = MOD_REG | (reg_map[SLJIT_SAVED_REG2] << 3) | 0x2 /* rdx */;
191                 }
192                 if (args > 2) {
193                         *inst++ = REX_W | REX_B;
194                         *inst++ = MOV_r_rm;
195                         *inst++ = MOD_REG | (reg_map[SLJIT_SAVED_REG3] << 3) | 0x0 /* r8 */;
196                 }
197 #endif
198         }
199
200         local_size = ((local_size + FIXED_LOCALS_OFFSET + pushed_size + 16 - 1) & ~(16 - 1)) - pushed_size;
201         compiler->local_size = local_size;
202 #ifdef _WIN64
203         if (local_size > 1024) {
204                 /* Allocate stack for the callback, which grows the stack. */
205                 inst = (sljit_ub*)ensure_buf(compiler, 1 + 4 + (3 + sizeof(sljit_si)));
206                 FAIL_IF(!inst);
207                 INC_SIZE(4 + (3 + sizeof(sljit_si)));
208                 *inst++ = REX_W;
209                 *inst++ = GROUP_BINARY_83;
210                 *inst++ = MOD_REG | SUB | 4;
211                 /* Pushed size must be divisible by 8. */
212                 SLJIT_ASSERT(!(pushed_size & 0x7));
213                 if (pushed_size & 0x8) {
214                         *inst++ = 5 * sizeof(sljit_sw);
215                         local_size -= 5 * sizeof(sljit_sw);
216                 } else {
217                         *inst++ = 4 * sizeof(sljit_sw);
218                         local_size -= 4 * sizeof(sljit_sw);
219                 }
220                 /* Second instruction */
221                 SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SCRATCH_REG1] < 8, temporary_reg1_is_loreg);
222                 *inst++ = REX_W;
223                 *inst++ = MOV_rm_i32;
224                 *inst++ = MOD_REG | reg_lmap[SLJIT_SCRATCH_REG1];
225                 *(sljit_si*)inst = local_size;
226 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG)
227                 compiler->skip_checks = 1;
228 #endif
229                 FAIL_IF(sljit_emit_ijump(compiler, SLJIT_CALL1, SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_grow_stack)));
230         }
231 #endif
232         SLJIT_ASSERT(local_size > 0);
233         if (local_size <= 127) {
234                 inst = (sljit_ub*)ensure_buf(compiler, 1 + 4);
235                 FAIL_IF(!inst);
236                 INC_SIZE(4);
237                 *inst++ = REX_W;
238                 *inst++ = GROUP_BINARY_83;
239                 *inst++ = MOD_REG | SUB | 4;
240                 *inst++ = local_size;
241         }
242         else {
243                 inst = (sljit_ub*)ensure_buf(compiler, 1 + 7);
244                 FAIL_IF(!inst);
245                 INC_SIZE(7);
246                 *inst++ = REX_W;
247                 *inst++ = GROUP_BINARY_81;
248                 *inst++ = MOD_REG | SUB | 4;
249                 *(sljit_si*)inst = local_size;
250                 inst += sizeof(sljit_si);
251         }
252 #ifdef _WIN64
253         /* Save xmm6 with MOVAPS instruction. */
254         inst = (sljit_ub*)ensure_buf(compiler, 1 + 5);
255         FAIL_IF(!inst);
256         INC_SIZE(5);
257         *inst++ = GROUP_0F;
258         *(sljit_si*)inst = 0x20247429;
259 #endif
260
261         return SLJIT_SUCCESS;
262 }
263
264 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size)
265 {
266         sljit_si pushed_size;
267
268         CHECK_ERROR_VOID();
269         check_sljit_set_context(compiler, args, scratches, saveds, local_size);
270
271         compiler->scratches = scratches;
272         compiler->saveds = saveds;
273 #if (defined SLJIT_DEBUG && SLJIT_DEBUG)
274         compiler->logical_local_size = local_size;
275 #endif
276
277         /* Including the return address saved by the call instruction. */
278         pushed_size = (saveds + 1) * sizeof(sljit_sw);
279 #ifdef _WIN64
280         if (scratches >= 5)
281                 pushed_size += sizeof(sljit_sw);
282 #endif
283         compiler->local_size = ((local_size + FIXED_LOCALS_OFFSET + pushed_size + 16 - 1) & ~(16 - 1)) - pushed_size;
284 }
285
286 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw)
287 {
288         sljit_si size;
289         sljit_ub *inst;
290
291         CHECK_ERROR();
292         check_sljit_emit_return(compiler, op, src, srcw);
293
294         compiler->flags_saved = 0;
295         FAIL_IF(emit_mov_before_return(compiler, op, src, srcw));
296
297 #ifdef _WIN64
298         /* Restore xmm6 with MOVAPS instruction. */
299         inst = (sljit_ub*)ensure_buf(compiler, 1 + 5);
300         FAIL_IF(!inst);
301         INC_SIZE(5);
302         *inst++ = GROUP_0F;
303         *(sljit_si*)inst = 0x20247428;
304 #endif
305         SLJIT_ASSERT(compiler->local_size > 0);
306         if (compiler->local_size <= 127) {
307                 inst = (sljit_ub*)ensure_buf(compiler, 1 + 4);
308                 FAIL_IF(!inst);
309                 INC_SIZE(4);
310                 *inst++ = REX_W;
311                 *inst++ = GROUP_BINARY_83;
312                 *inst++ = MOD_REG | ADD | 4;
313                 *inst = compiler->local_size;
314         }
315         else {
316                 inst = (sljit_ub*)ensure_buf(compiler, 1 + 7);
317                 FAIL_IF(!inst);
318                 INC_SIZE(7);
319                 *inst++ = REX_W;
320                 *inst++ = GROUP_BINARY_81;
321                 *inst++ = MOD_REG | ADD | 4;
322                 *(sljit_si*)inst = compiler->local_size;
323         }
324
325         size = 1 + compiler->saveds;
326 #ifndef _WIN64
327         if (compiler->saveds >= 2)
328                 size += compiler->saveds - 1;
329 #else
330         if (compiler->saveds >= 4)
331                 size += compiler->saveds - 3;
332         if (compiler->scratches >= 5)
333                 size += (5 - 4) * 2;
334 #endif
335         inst = (sljit_ub*)ensure_buf(compiler, 1 + size);
336         FAIL_IF(!inst);
337
338         INC_SIZE(size);
339
340 #ifdef _WIN64
341         if (compiler->scratches >= 5) {
342                 *inst++ = REX_B;
343                 POP_REG(reg_lmap[SLJIT_TEMPORARY_EREG2]);
344         }
345 #endif
346         if (compiler->saveds >= 1)
347                 POP_REG(reg_map[SLJIT_SAVED_REG1]);
348         if (compiler->saveds >= 2) {
349 #ifndef _WIN64
350                 *inst++ = REX_B;
351 #endif
352                 POP_REG(reg_lmap[SLJIT_SAVED_REG2]);
353         }
354         if (compiler->saveds >= 3) {
355 #ifndef _WIN64
356                 *inst++ = REX_B;
357 #endif
358                 POP_REG(reg_lmap[SLJIT_SAVED_REG3]);
359         }
360         if (compiler->saveds >= 4) {
361                 *inst++ = REX_B;
362                 POP_REG(reg_lmap[SLJIT_SAVED_EREG1]);
363         }
364         if (compiler->saveds >= 5) {
365                 *inst++ = REX_B;
366                 POP_REG(reg_lmap[SLJIT_SAVED_EREG2]);
367         }
368
369         RET();
370         return SLJIT_SUCCESS;
371 }
372
373 /* --------------------------------------------------------------------- */
374 /*  Operators                                                            */
375 /* --------------------------------------------------------------------- */
376
377 static sljit_si emit_do_imm32(struct sljit_compiler *compiler, sljit_ub rex, sljit_ub opcode, sljit_sw imm)
378 {
379         sljit_ub *inst;
380         sljit_si length = 1 + (rex ? 1 : 0) + sizeof(sljit_si);
381
382         inst = (sljit_ub*)ensure_buf(compiler, 1 + length);
383         FAIL_IF(!inst);
384         INC_SIZE(length);
385         if (rex)
386                 *inst++ = rex;
387         *inst++ = opcode;
388         *(sljit_si*)inst = imm;
389         return SLJIT_SUCCESS;
390 }
391
392 static sljit_ub* emit_x86_instruction(struct sljit_compiler *compiler, sljit_si size,
393         /* The register or immediate operand. */
394         sljit_si a, sljit_sw imma,
395         /* The general operand (not immediate). */
396         sljit_si b, sljit_sw immb)
397 {
398         sljit_ub *inst;
399         sljit_ub *buf_ptr;
400         sljit_ub rex = 0;
401         sljit_si flags = size & ~0xf;
402         sljit_si inst_size;
403
404         /* The immediate operand must be 32 bit. */
405         SLJIT_ASSERT(!(a & SLJIT_IMM) || compiler->mode32 || IS_HALFWORD(imma));
406         /* Both cannot be switched on. */
407         SLJIT_ASSERT((flags & (EX86_BIN_INS | EX86_SHIFT_INS)) != (EX86_BIN_INS | EX86_SHIFT_INS));
408         /* Size flags not allowed for typed instructions. */
409         SLJIT_ASSERT(!(flags & (EX86_BIN_INS | EX86_SHIFT_INS)) || (flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) == 0);
410         /* Both size flags cannot be switched on. */
411         SLJIT_ASSERT((flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) != (EX86_BYTE_ARG | EX86_HALF_ARG));
412 #if (defined SLJIT_SSE2 && SLJIT_SSE2)
413         /* SSE2 and immediate is not possible. */
414         SLJIT_ASSERT(!(a & SLJIT_IMM) || !(flags & EX86_SSE2));
415         SLJIT_ASSERT((flags & (EX86_PREF_F2 | EX86_PREF_F3)) != (EX86_PREF_F2 | EX86_PREF_F3)
416                 && (flags & (EX86_PREF_F2 | EX86_PREF_66)) != (EX86_PREF_F2 | EX86_PREF_66)
417                 && (flags & (EX86_PREF_F3 | EX86_PREF_66)) != (EX86_PREF_F3 | EX86_PREF_66));
418 #endif
419
420         size &= 0xf;
421         inst_size = size;
422
423         if ((b & SLJIT_MEM) && !(b & OFFS_REG_MASK) && NOT_HALFWORD(immb)) {
424                 if (emit_load_imm64(compiler, TMP_REG3, immb))
425                         return NULL;
426                 immb = 0;
427                 if (b & REG_MASK)
428                         b |= TO_OFFS_REG(TMP_REG3);
429                 else
430                         b |= TMP_REG3;
431         }
432
433         if (!compiler->mode32 && !(flags & EX86_NO_REXW))
434                 rex |= REX_W;
435         else if (flags & EX86_REX)
436                 rex |= REX;
437
438 #if (defined SLJIT_SSE2 && SLJIT_SSE2)
439         if (flags & (EX86_PREF_F2 | EX86_PREF_F3))
440                 inst_size++;
441 #endif
442         if (flags & EX86_PREF_66)
443                 inst_size++;
444
445         /* Calculate size of b. */
446         inst_size += 1; /* mod r/m byte. */
447         if (b & SLJIT_MEM) {
448                 if ((b & REG_MASK) == SLJIT_UNUSED)
449                         inst_size += 1 + sizeof(sljit_si); /* SIB byte required to avoid RIP based addressing. */
450                 else {
451                         if (reg_map[b & REG_MASK] >= 8)
452                                 rex |= REX_B;
453                         if (immb != 0 && !(b & OFFS_REG_MASK)) {
454                                 /* Immediate operand. */
455                                 if (immb <= 127 && immb >= -128)
456                                         inst_size += sizeof(sljit_sb);
457                                 else
458                                         inst_size += sizeof(sljit_si);
459                         }
460                 }
461
462                 if ((b & REG_MASK) == SLJIT_LOCALS_REG && !(b & OFFS_REG_MASK))
463                         b |= TO_OFFS_REG(SLJIT_LOCALS_REG);
464
465                 if ((b & OFFS_REG_MASK) != SLJIT_UNUSED) {
466                         inst_size += 1; /* SIB byte. */
467                         if (reg_map[OFFS_REG(b)] >= 8)
468                                 rex |= REX_X;
469                 }
470         }
471 #if (defined SLJIT_SSE2 && SLJIT_SSE2)
472         else if (!(flags & EX86_SSE2) && reg_map[b] >= 8)
473                 rex |= REX_B;
474 #else
475         else if (reg_map[b] >= 8)
476                 rex |= REX_B;
477 #endif
478
479         if (a & SLJIT_IMM) {
480                 if (flags & EX86_BIN_INS) {
481                         if (imma <= 127 && imma >= -128) {
482                                 inst_size += 1;
483                                 flags |= EX86_BYTE_ARG;
484                         } else
485                                 inst_size += 4;
486                 }
487                 else if (flags & EX86_SHIFT_INS) {
488                         imma &= compiler->mode32 ? 0x1f : 0x3f;
489                         if (imma != 1) {
490                                 inst_size ++;
491                                 flags |= EX86_BYTE_ARG;
492                         }
493                 } else if (flags & EX86_BYTE_ARG)
494                         inst_size++;
495                 else if (flags & EX86_HALF_ARG)
496                         inst_size += sizeof(short);
497                 else
498                         inst_size += sizeof(sljit_si);
499         }
500         else {
501                 SLJIT_ASSERT(!(flags & EX86_SHIFT_INS) || a == SLJIT_PREF_SHIFT_REG);
502                 /* reg_map[SLJIT_PREF_SHIFT_REG] is less than 8. */
503 #if (defined SLJIT_SSE2 && SLJIT_SSE2)
504                 if (!(flags & EX86_SSE2) && reg_map[a] >= 8)
505                         rex |= REX_R;
506 #else
507                 if (reg_map[a] >= 8)
508                         rex |= REX_R;
509 #endif
510         }
511
512         if (rex)
513                 inst_size++;
514
515         inst = (sljit_ub*)ensure_buf(compiler, 1 + inst_size);
516         PTR_FAIL_IF(!inst);
517
518         /* Encoding the byte. */
519         INC_SIZE(inst_size);
520 #if (defined SLJIT_SSE2 && SLJIT_SSE2)
521         if (flags & EX86_PREF_F2)
522                 *inst++ = 0xf2;
523         if (flags & EX86_PREF_F3)
524                 *inst++ = 0xf3;
525 #endif
526         if (flags & EX86_PREF_66)
527                 *inst++ = 0x66;
528         if (rex)
529                 *inst++ = rex;
530         buf_ptr = inst + size;
531
532         /* Encode mod/rm byte. */
533         if (!(flags & EX86_SHIFT_INS)) {
534                 if ((flags & EX86_BIN_INS) && (a & SLJIT_IMM))
535                         *inst = (flags & EX86_BYTE_ARG) ? GROUP_BINARY_83 : GROUP_BINARY_81;
536
537                 if ((a & SLJIT_IMM) || (a == 0))
538                         *buf_ptr = 0;
539 #if (defined SLJIT_SSE2 && SLJIT_SSE2)
540                 else if (!(flags & EX86_SSE2))
541                         *buf_ptr = reg_lmap[a] << 3;
542                 else
543                         *buf_ptr = a << 3;
544 #else
545                 else
546                         *buf_ptr = reg_lmap[a] << 3;
547 #endif
548         }
549         else {
550                 if (a & SLJIT_IMM) {
551                         if (imma == 1)
552                                 *inst = GROUP_SHIFT_1;
553                         else
554                                 *inst = GROUP_SHIFT_N;
555                 } else
556                         *inst = GROUP_SHIFT_CL;
557                 *buf_ptr = 0;
558         }
559
560         if (!(b & SLJIT_MEM))
561 #if (defined SLJIT_SSE2 && SLJIT_SSE2)
562                 *buf_ptr++ |= MOD_REG + ((!(flags & EX86_SSE2)) ? reg_lmap[b] : b);
563 #else
564                 *buf_ptr++ |= MOD_REG + reg_lmap[b];
565 #endif
566         else if ((b & REG_MASK) != SLJIT_UNUSED) {
567                 if ((b & OFFS_REG_MASK) == SLJIT_UNUSED || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_LOCALS_REG)) {
568                         if (immb != 0) {
569                                 if (immb <= 127 && immb >= -128)
570                                         *buf_ptr |= 0x40;
571                                 else
572                                         *buf_ptr |= 0x80;
573                         }
574
575                         if ((b & OFFS_REG_MASK) == SLJIT_UNUSED)
576                                 *buf_ptr++ |= reg_lmap[b & REG_MASK];
577                         else {
578                                 *buf_ptr++ |= 0x04;
579                                 *buf_ptr++ = reg_lmap[b & REG_MASK] | (reg_lmap[OFFS_REG(b)] << 3);
580                         }
581
582                         if (immb != 0) {
583                                 if (immb <= 127 && immb >= -128)
584                                         *buf_ptr++ = immb; /* 8 bit displacement. */
585                                 else {
586                                         *(sljit_si*)buf_ptr = immb; /* 32 bit displacement. */
587                                         buf_ptr += sizeof(sljit_si);
588                                 }
589                         }
590                 }
591                 else {
592                         *buf_ptr++ |= 0x04;
593                         *buf_ptr++ = reg_lmap[b & REG_MASK] | (reg_lmap[OFFS_REG(b)] << 3) | (immb << 6);
594                 }
595         }
596         else {
597                 *buf_ptr++ |= 0x04;
598                 *buf_ptr++ = 0x25;
599                 *(sljit_si*)buf_ptr = immb; /* 32 bit displacement. */
600                 buf_ptr += sizeof(sljit_si);
601         }
602
603         if (a & SLJIT_IMM) {
604                 if (flags & EX86_BYTE_ARG)
605                         *buf_ptr = imma;
606                 else if (flags & EX86_HALF_ARG)
607                         *(short*)buf_ptr = imma;
608                 else if (!(flags & EX86_SHIFT_INS))
609                         *(sljit_si*)buf_ptr = imma;
610         }
611
612         return !(flags & EX86_SHIFT_INS) ? inst : (inst + 1);
613 }
614
615 /* --------------------------------------------------------------------- */
616 /*  Call / return instructions                                           */
617 /* --------------------------------------------------------------------- */
618
619 static SLJIT_INLINE sljit_si call_with_args(struct sljit_compiler *compiler, sljit_si type)
620 {
621         sljit_ub *inst;
622
623 #ifndef _WIN64
624         SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SCRATCH_REG2] == 6 && reg_map[SLJIT_SCRATCH_REG1] < 8 && reg_map[SLJIT_SCRATCH_REG3] < 8, args_registers);
625
626         inst = (sljit_ub*)ensure_buf(compiler, 1 + ((type < SLJIT_CALL3) ? 3 : 6));
627         FAIL_IF(!inst);
628         INC_SIZE((type < SLJIT_CALL3) ? 3 : 6);
629         if (type >= SLJIT_CALL3) {
630                 *inst++ = REX_W;
631                 *inst++ = MOV_r_rm;
632                 *inst++ = MOD_REG | (0x2 /* rdx */ << 3) | reg_lmap[SLJIT_SCRATCH_REG3];
633         }
634         *inst++ = REX_W;
635         *inst++ = MOV_r_rm;
636         *inst++ = MOD_REG | (0x7 /* rdi */ << 3) | reg_lmap[SLJIT_SCRATCH_REG1];
637 #else
638         SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SCRATCH_REG2] == 2 && reg_map[SLJIT_SCRATCH_REG1] < 8 && reg_map[SLJIT_SCRATCH_REG3] < 8, args_registers);
639
640         inst = (sljit_ub*)ensure_buf(compiler, 1 + ((type < SLJIT_CALL3) ? 3 : 6));
641         FAIL_IF(!inst);
642         INC_SIZE((type < SLJIT_CALL3) ? 3 : 6);
643         if (type >= SLJIT_CALL3) {
644                 *inst++ = REX_W | REX_R;
645                 *inst++ = MOV_r_rm;
646                 *inst++ = MOD_REG | (0x0 /* r8 */ << 3) | reg_lmap[SLJIT_SCRATCH_REG3];
647         }
648         *inst++ = REX_W;
649         *inst++ = MOV_r_rm;
650         *inst++ = MOD_REG | (0x1 /* rcx */ << 3) | reg_lmap[SLJIT_SCRATCH_REG1];
651 #endif
652         return SLJIT_SUCCESS;
653 }
654
655 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw)
656 {
657         sljit_ub *inst;
658
659         CHECK_ERROR();
660         check_sljit_emit_fast_enter(compiler, dst, dstw);
661         ADJUST_LOCAL_OFFSET(dst, dstw);
662
663         /* For UNUSED dst. Uncommon, but possible. */
664         if (dst == SLJIT_UNUSED)
665                 dst = TMP_REG1;
666
667         if (FAST_IS_REG(dst)) {
668                 if (reg_map[dst] < 8) {
669                         inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
670                         FAIL_IF(!inst);
671                         INC_SIZE(1);
672                         POP_REG(reg_lmap[dst]);
673                         return SLJIT_SUCCESS;
674                 }
675
676                 inst = (sljit_ub*)ensure_buf(compiler, 1 + 2);
677                 FAIL_IF(!inst);
678                 INC_SIZE(2);
679                 *inst++ = REX_B;
680                 POP_REG(reg_lmap[dst]);
681                 return SLJIT_SUCCESS;
682         }
683
684         /* REX_W is not necessary (src is not immediate). */
685         compiler->mode32 = 1;
686         inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
687         FAIL_IF(!inst);
688         *inst++ = POP_rm;
689         return SLJIT_SUCCESS;
690 }
691
692 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_si src, sljit_sw srcw)
693 {
694         sljit_ub *inst;
695
696         CHECK_ERROR();
697         check_sljit_emit_fast_return(compiler, src, srcw);
698         ADJUST_LOCAL_OFFSET(src, srcw);
699
700         if ((src & SLJIT_IMM) && NOT_HALFWORD(srcw)) {
701                 FAIL_IF(emit_load_imm64(compiler, TMP_REG1, srcw));
702                 src = TMP_REG1;
703         }
704
705         if (FAST_IS_REG(src)) {
706                 if (reg_map[src] < 8) {
707                         inst = (sljit_ub*)ensure_buf(compiler, 1 + 1 + 1);
708                         FAIL_IF(!inst);
709
710                         INC_SIZE(1 + 1);
711                         PUSH_REG(reg_lmap[src]);
712                 }
713                 else {
714                         inst = (sljit_ub*)ensure_buf(compiler, 1 + 2 + 1);
715                         FAIL_IF(!inst);
716
717                         INC_SIZE(2 + 1);
718                         *inst++ = REX_B;
719                         PUSH_REG(reg_lmap[src]);
720                 }
721         }
722         else if (src & SLJIT_MEM) {
723                 /* REX_W is not necessary (src is not immediate). */
724                 compiler->mode32 = 1;
725                 inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
726                 FAIL_IF(!inst);
727                 *inst++ = GROUP_FF;
728                 *inst |= PUSH_rm;
729
730                 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
731                 FAIL_IF(!inst);
732                 INC_SIZE(1);
733         }
734         else {
735                 SLJIT_ASSERT(IS_HALFWORD(srcw));
736                 /* SLJIT_IMM. */
737                 inst = (sljit_ub*)ensure_buf(compiler, 1 + 5 + 1);
738                 FAIL_IF(!inst);
739
740                 INC_SIZE(5 + 1);
741                 *inst++ = PUSH_i32;
742                 *(sljit_si*)inst = srcw;
743                 inst += sizeof(sljit_si);
744         }
745
746         RET();
747         return SLJIT_SUCCESS;
748 }
749
750
751 /* --------------------------------------------------------------------- */
752 /*  Extend input                                                         */
753 /* --------------------------------------------------------------------- */
754
755 static sljit_si emit_mov_int(struct sljit_compiler *compiler, sljit_si sign,
756         sljit_si dst, sljit_sw dstw,
757         sljit_si src, sljit_sw srcw)
758 {
759         sljit_ub* inst;
760         sljit_si dst_r;
761
762         compiler->mode32 = 0;
763
764         if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM))
765                 return SLJIT_SUCCESS; /* Empty instruction. */
766
767         if (src & SLJIT_IMM) {
768                 if (FAST_IS_REG(dst)) {
769                         if (sign || ((sljit_uw)srcw <= 0x7fffffff)) {
770                                 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, (sljit_sw)(sljit_si)srcw, dst, dstw);
771                                 FAIL_IF(!inst);
772                                 *inst = MOV_rm_i32;
773                                 return SLJIT_SUCCESS;
774                         }
775                         return emit_load_imm64(compiler, dst, srcw);
776                 }
777                 compiler->mode32 = 1;
778                 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, (sljit_sw)(sljit_si)srcw, dst, dstw);
779                 FAIL_IF(!inst);
780                 *inst = MOV_rm_i32;
781                 compiler->mode32 = 0;
782                 return SLJIT_SUCCESS;
783         }
784
785         dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
786
787         if ((dst & SLJIT_MEM) && FAST_IS_REG(src))
788                 dst_r = src;
789         else {
790                 if (sign) {
791                         inst = emit_x86_instruction(compiler, 1, dst_r, 0, src, srcw);
792                         FAIL_IF(!inst);
793                         *inst++ = MOVSXD_r_rm;
794                 } else {
795                         compiler->mode32 = 1;
796                         FAIL_IF(emit_mov(compiler, dst_r, 0, src, srcw));
797                         compiler->mode32 = 0;
798                 }
799         }
800
801         if (dst & SLJIT_MEM) {
802                 compiler->mode32 = 1;
803                 inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
804                 FAIL_IF(!inst);
805                 *inst = MOV_rm_r;
806                 compiler->mode32 = 0;
807         }
808
809         return SLJIT_SUCCESS;
810 }