chiark / gitweb /
tidy up conflict
[pcre3.git] / sljit / sljitNativeTILEGX_64.c
1 /*
2  *    Stack-less Just-In-Time compiler
3  *
4  *    Copyright 2013-2013 Tilera Corporation(jiwang@tilera.com). All rights reserved.
5  *    Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without modification, are
8  * permitted provided that the following conditions are met:
9  *
10  *   1. Redistributions of source code must retain the above copyright notice, this list of
11  *      conditions and the following disclaimer.
12  *
13  *   2. Redistributions in binary form must reproduce the above copyright notice, this list
14  *      of conditions and the following disclaimer in the documentation and/or other materials
15  *      provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
18  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
20  * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
22  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
23  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
25  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27
28 /* TileGX architecture. */
29 /* Contributed by Tilera Corporation. */
30 #include "sljitNativeTILEGX-encoder.c"
31
32 #define SIMM_8BIT_MAX (0x7f)
33 #define SIMM_8BIT_MIN (-0x80)
34 #define SIMM_16BIT_MAX (0x7fff)
35 #define SIMM_16BIT_MIN (-0x8000)
36 #define SIMM_17BIT_MAX (0xffff)
37 #define SIMM_17BIT_MIN (-0x10000)
38 #define SIMM_32BIT_MIN (-0x80000000)
39 #define SIMM_32BIT_MAX (0x7fffffff)
40 #define SIMM_48BIT_MIN (0x800000000000L)
41 #define SIMM_48BIT_MAX (0x7fffffff0000L)
42 #define IMM16(imm) ((imm) & 0xffff)
43
44 #define UIMM_16BIT_MAX (0xffff)
45
46 #define TMP_REG1 (SLJIT_NO_REGISTERS + 1)
47 #define TMP_REG2 (SLJIT_NO_REGISTERS + 2)
48 #define TMP_REG3 (SLJIT_NO_REGISTERS + 3)
49 #define ADDR_TMP (SLJIT_NO_REGISTERS + 4)
50 #define PIC_ADDR_REG TMP_REG2
51
52 static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 5] = {
53         63, 0, 1, 2, 3, 4, 30, 31, 32, 33, 34, 54, 5, 16, 6, 7
54 };
55
56 #define SLJIT_LOCALS_REG_mapped 54
57 #define TMP_REG1_mapped 5
58 #define TMP_REG2_mapped 16
59 #define TMP_REG3_mapped 6
60 #define ADDR_TMP_mapped 7
61 #define SLJIT_SAVED_REG1_mapped 30
62 #define SLJIT_SAVED_REG2_mapped 31
63 #define SLJIT_SAVED_REG3_mapped 32
64 #define SLJIT_SAVED_EREG1_mapped 33
65 #define SLJIT_SAVED_EREG2_mapped 34
66
67 /* Flags are keept in volatile registers. */
68 #define EQUAL_FLAG 8
69 /* And carry flag as well. */
70 #define ULESS_FLAG 9
71 #define UGREATER_FLAG 10
72 #define LESS_FLAG 11
73 #define GREATER_FLAG 12
74 #define OVERFLOW_FLAG 13
75
76 #define ZERO 63
77 #define RA 55
78 #define TMP_EREG1 14
79 #define TMP_EREG2 15
80
81 #define LOAD_DATA 0x01
82 #define WORD_DATA 0x00
83 #define BYTE_DATA 0x02
84 #define HALF_DATA 0x04
85 #define INT_DATA 0x06
86 #define SIGNED_DATA 0x08
87 #define DOUBLE_DATA 0x10
88
89 /* Separates integer and floating point registers */
90 #define GPR_REG 0xf
91
92 #define MEM_MASK 0x1f
93
94 #define WRITE_BACK 0x00020
95 #define ARG_TEST 0x00040
96 #define ALT_KEEP_CACHE 0x00080
97 #define CUMULATIVE_OP 0x00100
98 #define LOGICAL_OP 0x00200
99 #define IMM_OP 0x00400
100 #define SRC2_IMM 0x00800
101
102 #define UNUSED_DEST 0x01000
103 #define REG_DEST 0x02000
104 #define REG1_SOURCE 0x04000
105 #define REG2_SOURCE 0x08000
106 #define SLOW_SRC1 0x10000
107 #define SLOW_SRC2 0x20000
108 #define SLOW_DEST 0x40000
109
110 /* Only these flags are set. UNUSED_DEST is not set when no flags should be set.
111  */
112 #define CHECK_FLAGS(list) (!(flags & UNUSED_DEST) || (op & GET_FLAGS(~(list))))
113
114 SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char *sljit_get_platform_name(void)
115 {
116         return "TileGX" SLJIT_CPUINFO;
117 }
118
119 /* Length of an instruction word */
120 typedef sljit_uw sljit_ins;
121
122 struct jit_instr {
123         const struct tilegx_opcode* opcode; 
124         tilegx_pipeline pipe;
125         unsigned long input_registers;
126         unsigned long output_registers;
127         int operand_value[4];
128         int line;
129 };
130
131 /* Opcode Helper Macros */
132 #define TILEGX_X_MODE 0
133
134 #define X_MODE create_Mode(TILEGX_X_MODE)
135
136 #define FNOP_X0 \
137         create_Opcode_X0(RRR_0_OPCODE_X0) | \
138         create_RRROpcodeExtension_X0(UNARY_RRR_0_OPCODE_X0) | \
139         create_UnaryOpcodeExtension_X0(FNOP_UNARY_OPCODE_X0)
140
141 #define FNOP_X1 \
142         create_Opcode_X1(RRR_0_OPCODE_X1) | \
143         create_RRROpcodeExtension_X1(UNARY_RRR_0_OPCODE_X1) | \
144         create_UnaryOpcodeExtension_X1(FNOP_UNARY_OPCODE_X1)
145
146 #define NOP \
147         create_Mode(TILEGX_X_MODE) | FNOP_X0 | FNOP_X1
148
149 #define ANOP_X0 \
150         create_Opcode_X0(RRR_0_OPCODE_X0) | \
151         create_RRROpcodeExtension_X0(UNARY_RRR_0_OPCODE_X0) | \
152         create_UnaryOpcodeExtension_X0(NOP_UNARY_OPCODE_X0)
153
154 #define BPT create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
155         create_RRROpcodeExtension_X1(UNARY_RRR_0_OPCODE_X1) | \
156         create_UnaryOpcodeExtension_X1(ILL_UNARY_OPCODE_X1) | \
157         create_Dest_X1(0x1C) | create_SrcA_X1(0x25) | ANOP_X0
158
159 #define ADD_X1 \
160         create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
161         create_RRROpcodeExtension_X1(ADD_RRR_0_OPCODE_X1) | FNOP_X0
162
163 #define ADDI_X1 \
164         create_Mode(TILEGX_X_MODE) | create_Opcode_X1(IMM8_OPCODE_X1) | \
165         create_Imm8OpcodeExtension_X1(ADDI_IMM8_OPCODE_X1) | FNOP_X0
166
167 #define SUB_X1 \
168         create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
169         create_RRROpcodeExtension_X1(SUB_RRR_0_OPCODE_X1) | FNOP_X0
170
171 #define NOR_X1 \
172         create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
173         create_RRROpcodeExtension_X1(NOR_RRR_0_OPCODE_X1) | FNOP_X0
174
175 #define OR_X1 \
176         create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
177         create_RRROpcodeExtension_X1(OR_RRR_0_OPCODE_X1) | FNOP_X0
178
179 #define AND_X1 \
180         create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
181         create_RRROpcodeExtension_X1(AND_RRR_0_OPCODE_X1) | FNOP_X0
182
183 #define XOR_X1 \
184         create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
185         create_RRROpcodeExtension_X1(XOR_RRR_0_OPCODE_X1) | FNOP_X0
186
187 #define CMOVNEZ_X0 \
188         create_Mode(TILEGX_X_MODE) | create_Opcode_X0(RRR_0_OPCODE_X0) | \
189         create_RRROpcodeExtension_X0(CMOVNEZ_RRR_0_OPCODE_X0) | FNOP_X1
190
191 #define CMOVEQZ_X0 \
192         create_Mode(TILEGX_X_MODE) | create_Opcode_X0(RRR_0_OPCODE_X0) | \
193         create_RRROpcodeExtension_X0(CMOVEQZ_RRR_0_OPCODE_X0) | FNOP_X1
194
195 #define ADDLI_X1 \
196         create_Mode(TILEGX_X_MODE) | create_Opcode_X1(ADDLI_OPCODE_X1) | FNOP_X0
197
198 #define V4INT_L_X1 \
199         create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
200         create_RRROpcodeExtension_X1(V4INT_L_RRR_0_OPCODE_X1) | FNOP_X0
201
202 #define BFEXTU_X0 \
203         create_Mode(TILEGX_X_MODE) | create_Opcode_X0(BF_OPCODE_X0) | \
204         create_BFOpcodeExtension_X0(BFEXTU_BF_OPCODE_X0) | FNOP_X1
205
206 #define BFEXTS_X0 \
207         create_Mode(TILEGX_X_MODE) | create_Opcode_X0(BF_OPCODE_X0) | \
208         create_BFOpcodeExtension_X0(BFEXTS_BF_OPCODE_X0) | FNOP_X1
209
210 #define SHL16INSLI_X1 \
211         create_Mode(TILEGX_X_MODE) | create_Opcode_X1(SHL16INSLI_OPCODE_X1) | FNOP_X0
212
213 #define ST_X1 \
214         create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
215         create_RRROpcodeExtension_X1(ST_RRR_0_OPCODE_X1) | create_Dest_X1(0x0) | FNOP_X0
216
217 #define LD_X1 \
218         create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
219         create_RRROpcodeExtension_X1(UNARY_RRR_0_OPCODE_X1) | \
220         create_UnaryOpcodeExtension_X1(LD_UNARY_OPCODE_X1) | FNOP_X0
221
222 #define JR_X1 \
223         create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
224         create_RRROpcodeExtension_X1(UNARY_RRR_0_OPCODE_X1) | \
225         create_UnaryOpcodeExtension_X1(JR_UNARY_OPCODE_X1) | FNOP_X0
226
227 #define JALR_X1 \
228         create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
229         create_RRROpcodeExtension_X1(UNARY_RRR_0_OPCODE_X1) | \
230         create_UnaryOpcodeExtension_X1(JALR_UNARY_OPCODE_X1) | FNOP_X0
231
232 #define CLZ_X0 \
233         create_Mode(TILEGX_X_MODE) | create_Opcode_X0(RRR_0_OPCODE_X0) | \
234         create_RRROpcodeExtension_X0(UNARY_RRR_0_OPCODE_X0) | \
235         create_UnaryOpcodeExtension_X0(CNTLZ_UNARY_OPCODE_X0) | FNOP_X1
236
237 #define CMPLTUI_X1 \
238         create_Mode(TILEGX_X_MODE) | create_Opcode_X1(IMM8_OPCODE_X1) | \
239         create_Imm8OpcodeExtension_X1(CMPLTUI_IMM8_OPCODE_X1) | FNOP_X0
240
241 #define CMPLTU_X1 \
242         create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
243         create_RRROpcodeExtension_X1(CMPLTU_RRR_0_OPCODE_X1) | FNOP_X0
244
245 #define CMPLTS_X1 \
246         create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
247         create_RRROpcodeExtension_X1(CMPLTS_RRR_0_OPCODE_X1) | FNOP_X0
248
249 #define XORI_X1 \
250         create_Mode(TILEGX_X_MODE) | create_Opcode_X1(IMM8_OPCODE_X1) | \
251         create_Imm8OpcodeExtension_X1(XORI_IMM8_OPCODE_X1) | FNOP_X0
252
253 #define ORI_X1 \
254         create_Mode(TILEGX_X_MODE) | create_Opcode_X1(IMM8_OPCODE_X1) | \
255         create_Imm8OpcodeExtension_X1(ORI_IMM8_OPCODE_X1) | FNOP_X0
256
257 #define ANDI_X1 \
258         create_Mode(TILEGX_X_MODE) | create_Opcode_X1(IMM8_OPCODE_X1) | \
259         create_Imm8OpcodeExtension_X1(ANDI_IMM8_OPCODE_X1) | FNOP_X0
260
261 #define SHLI_X1 \
262         create_Mode(TILEGX_X_MODE) | create_Opcode_X1(SHIFT_OPCODE_X1) | \
263         create_ShiftOpcodeExtension_X1(SHLI_SHIFT_OPCODE_X1) | FNOP_X0
264
265 #define SHL_X1 \
266         create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
267         create_RRROpcodeExtension_X1(SHL_RRR_0_OPCODE_X1) | FNOP_X0
268
269 #define SHRSI_X1 \
270         create_Mode(TILEGX_X_MODE) | create_Opcode_X1(SHIFT_OPCODE_X1) | \
271         create_ShiftOpcodeExtension_X1(SHRSI_SHIFT_OPCODE_X1) | FNOP_X0
272
273 #define SHRS_X1 \
274         create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
275         create_RRROpcodeExtension_X1(SHRS_RRR_0_OPCODE_X1) | FNOP_X0
276
277 #define SHRUI_X1 \
278         create_Mode(TILEGX_X_MODE) | create_Opcode_X1(SHIFT_OPCODE_X1) | \
279         create_ShiftOpcodeExtension_X1(SHRUI_SHIFT_OPCODE_X1) | FNOP_X0
280
281 #define SHRU_X1 \
282         create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
283         create_RRROpcodeExtension_X1(SHRU_RRR_0_OPCODE_X1) | FNOP_X0
284
285 #define BEQZ_X1 \
286         create_Mode(TILEGX_X_MODE) | create_Opcode_X1(BRANCH_OPCODE_X1) | \
287         create_BrType_X1(BEQZ_BRANCH_OPCODE_X1) | FNOP_X0
288
289 #define BNEZ_X1 \
290         create_Mode(TILEGX_X_MODE) | create_Opcode_X1(BRANCH_OPCODE_X1) | \
291         create_BrType_X1(BNEZ_BRANCH_OPCODE_X1) | FNOP_X0
292
293 #define J_X1 \
294         create_Mode(TILEGX_X_MODE) | create_Opcode_X1(JUMP_OPCODE_X1) | \
295         create_JumpOpcodeExtension_X1(J_JUMP_OPCODE_X1) | FNOP_X0
296
297 #define JAL_X1 \
298         create_Mode(TILEGX_X_MODE) | create_Opcode_X1(JUMP_OPCODE_X1) | \
299         create_JumpOpcodeExtension_X1(JAL_JUMP_OPCODE_X1) | FNOP_X0
300
301 #define DEST_X0(x) create_Dest_X0(x)
302 #define SRCA_X0(x) create_SrcA_X0(x)
303 #define SRCB_X0(x) create_SrcB_X0(x)
304 #define DEST_X1(x) create_Dest_X1(x)
305 #define SRCA_X1(x) create_SrcA_X1(x)
306 #define SRCB_X1(x) create_SrcB_X1(x)
307 #define IMM16_X1(x) create_Imm16_X1(x)
308 #define IMM8_X1(x) create_Imm8_X1(x)
309 #define BFSTART_X0(x) create_BFStart_X0(x)
310 #define BFEND_X0(x) create_BFEnd_X0(x)
311 #define SHIFTIMM_X1(x) create_ShAmt_X1(x)
312 #define JOFF_X1(x) create_JumpOff_X1(x)
313 #define BOFF_X1(x) create_BrOff_X1(x)
314
315 static SLJIT_CONST tilegx_mnemonic data_transfer_insts[16] = {
316         /* u w s */ TILEGX_OPC_ST   /* st */,
317         /* u w l */ TILEGX_OPC_LD   /* ld */,
318         /* u b s */ TILEGX_OPC_ST1  /* st1 */,
319         /* u b l */ TILEGX_OPC_LD1U /* ld1u */,
320         /* u h s */ TILEGX_OPC_ST2  /* st2 */,
321         /* u h l */ TILEGX_OPC_LD2U /* ld2u */,
322         /* u i s */ TILEGX_OPC_ST4  /* st4 */,
323         /* u i l */ TILEGX_OPC_LD4U /* ld4u */,
324         /* s w s */ TILEGX_OPC_ST   /* st */,
325         /* s w l */ TILEGX_OPC_LD   /* ld */,
326         /* s b s */ TILEGX_OPC_ST1  /* st1 */,
327         /* s b l */ TILEGX_OPC_LD1S /* ld1s */,
328         /* s h s */ TILEGX_OPC_ST2  /* st2 */,
329         /* s h l */ TILEGX_OPC_LD2S /* ld2s */,
330         /* s i s */ TILEGX_OPC_ST4  /* st4 */,
331         /* s i l */ TILEGX_OPC_LD4S /* ld4s */,
332 };
333
334 #ifdef TILEGX_JIT_DEBUG
335 static sljit_si push_inst_debug(struct sljit_compiler *compiler, sljit_ins ins, int line)
336 {
337         sljit_ins *ptr = (sljit_ins *)ensure_buf(compiler, sizeof(sljit_ins));
338         FAIL_IF(!ptr);
339         *ptr = ins;
340         compiler->size++;
341         printf("|%04d|S0|:\t\t", line);
342         print_insn_tilegx(ptr);
343         return SLJIT_SUCCESS;
344 }
345
346 static sljit_si push_inst_nodebug(struct sljit_compiler *compiler, sljit_ins ins)
347 {
348         sljit_ins *ptr = (sljit_ins *)ensure_buf(compiler, sizeof(sljit_ins));
349         FAIL_IF(!ptr);
350         *ptr = ins;
351         compiler->size++;
352         return SLJIT_SUCCESS;
353 }
354
355 #define push_inst(a, b) push_inst_debug(a, b, __LINE__)
356 #else
357 static sljit_si push_inst(struct sljit_compiler *compiler, sljit_ins ins)
358 {
359         sljit_ins *ptr = (sljit_ins *)ensure_buf(compiler, sizeof(sljit_ins));
360         FAIL_IF(!ptr);
361         *ptr = ins;
362         compiler->size++;
363         return SLJIT_SUCCESS;
364 }
365 #endif
366
367 #define BUNDLE_FORMAT_MASK(p0, p1, p2) \
368         ((p0) | ((p1) << 8) | ((p2) << 16))
369
370 #define BUNDLE_FORMAT(p0, p1, p2) \
371         { \
372                 { \
373                         (tilegx_pipeline)(p0), \
374                         (tilegx_pipeline)(p1), \
375                         (tilegx_pipeline)(p2) \
376                 }, \
377                 BUNDLE_FORMAT_MASK(1 << (p0), 1 << (p1), (1 << (p2))) \
378         }
379
380 #define NO_PIPELINE TILEGX_NUM_PIPELINE_ENCODINGS
381
382 #define tilegx_is_x_pipeline(p) ((int)(p) <= (int)TILEGX_PIPELINE_X1)
383
384 #define PI(encoding) \
385         push_inst(compiler, encoding)
386
387 #define PB3(opcode, dst, srca, srcb) \
388         push_3_buffer(compiler, opcode, dst, srca, srcb, __LINE__)
389
390 #define PB2(opcode, dst, src) \
391         push_2_buffer(compiler, opcode, dst, src, __LINE__)
392
393 #define JR(reg) \
394         push_jr_buffer(compiler, TILEGX_OPC_JR, reg, __LINE__)
395
396 #define ADD(dst, srca, srcb) \
397         push_3_buffer(compiler, TILEGX_OPC_ADD, dst, srca, srcb, __LINE__)
398
399 #define SUB(dst, srca, srcb) \
400         push_3_buffer(compiler, TILEGX_OPC_SUB, dst, srca, srcb, __LINE__)
401
402 #define NOR(dst, srca, srcb) \
403         push_3_buffer(compiler, TILEGX_OPC_NOR, dst, srca, srcb, __LINE__)
404
405 #define OR(dst, srca, srcb) \
406         push_3_buffer(compiler, TILEGX_OPC_OR, dst, srca, srcb, __LINE__)
407
408 #define XOR(dst, srca, srcb) \
409         push_3_buffer(compiler, TILEGX_OPC_XOR, dst, srca, srcb, __LINE__)
410
411 #define AND(dst, srca, srcb) \
412         push_3_buffer(compiler, TILEGX_OPC_AND, dst, srca, srcb, __LINE__)
413
414 #define CLZ(dst, src) \
415         push_2_buffer(compiler, TILEGX_OPC_CLZ, dst, src, __LINE__)
416
417 #define SHLI(dst, srca, srcb) \
418         push_3_buffer(compiler, TILEGX_OPC_SHLI, dst, srca, srcb, __LINE__)
419
420 #define SHRUI(dst, srca, imm) \
421         push_3_buffer(compiler, TILEGX_OPC_SHRUI, dst, srca, imm, __LINE__)
422
423 #define XORI(dst, srca, imm) \
424         push_3_buffer(compiler, TILEGX_OPC_XORI, dst, srca, imm, __LINE__)
425
426 #define ORI(dst, srca, imm) \
427         push_3_buffer(compiler, TILEGX_OPC_ORI, dst, srca, imm, __LINE__)
428
429 #define CMPLTU(dst, srca, srcb) \
430         push_3_buffer(compiler, TILEGX_OPC_CMPLTU, dst, srca, srcb, __LINE__)
431
432 #define CMPLTS(dst, srca, srcb) \
433         push_3_buffer(compiler, TILEGX_OPC_CMPLTS, dst, srca, srcb, __LINE__)
434
435 #define CMPLTUI(dst, srca, imm) \
436         push_3_buffer(compiler, TILEGX_OPC_CMPLTUI, dst, srca, imm, __LINE__)
437
438 #define CMOVNEZ(dst, srca, srcb) \
439         push_3_buffer(compiler, TILEGX_OPC_CMOVNEZ, dst, srca, srcb, __LINE__)
440
441 #define CMOVEQZ(dst, srca, srcb) \
442         push_3_buffer(compiler, TILEGX_OPC_CMOVEQZ, dst, srca, srcb, __LINE__)
443
444 #define ADDLI(dst, srca, srcb) \
445         push_3_buffer(compiler, TILEGX_OPC_ADDLI, dst, srca, srcb, __LINE__)
446
447 #define SHL16INSLI(dst, srca, srcb) \
448         push_3_buffer(compiler, TILEGX_OPC_SHL16INSLI, dst, srca, srcb, __LINE__)
449
450 #define LD_ADD(dst, addr, adjust) \
451         push_3_buffer(compiler, TILEGX_OPC_LD_ADD, dst, addr, adjust, __LINE__)
452
453 #define ST_ADD(src, addr, adjust) \
454         push_3_buffer(compiler, TILEGX_OPC_ST_ADD, src, addr, adjust, __LINE__)
455
456 #define LD(dst, addr) \
457         push_2_buffer(compiler, TILEGX_OPC_LD, dst, addr, __LINE__)
458
459 #define BFEXTU(dst, src, start, end) \
460         push_4_buffer(compiler, TILEGX_OPC_BFEXTU, dst, src, start, end, __LINE__)
461
462 #define BFEXTS(dst, src, start, end) \
463         push_4_buffer(compiler, TILEGX_OPC_BFEXTS, dst, src, start, end, __LINE__)
464
465 #define ADD_SOLO(dest, srca, srcb) \
466         push_inst(compiler, ADD_X1 | DEST_X1(dest) | SRCA_X1(srca) | SRCB_X1(srcb))
467
468 #define ADDI_SOLO(dest, srca, imm) \
469         push_inst(compiler, ADDI_X1 | DEST_X1(dest) | SRCA_X1(srca) | IMM8_X1(imm))
470
471 #define ADDLI_SOLO(dest, srca, imm) \
472         push_inst(compiler, ADDLI_X1 | DEST_X1(dest) | SRCA_X1(srca) | IMM16_X1(imm))
473
474 #define SHL16INSLI_SOLO(dest, srca, imm) \
475         push_inst(compiler, SHL16INSLI_X1 | DEST_X1(dest) | SRCA_X1(srca) | IMM16_X1(imm))
476
477 #define JALR_SOLO(reg) \
478         push_inst(compiler, JALR_X1 | SRCA_X1(reg))
479
480 #define JR_SOLO(reg) \
481         push_inst(compiler, JR_X1 | SRCA_X1(reg))
482
483 struct Format {
484         /* Mapping of bundle issue slot to assigned pipe. */
485         tilegx_pipeline pipe[TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE];
486
487         /* Mask of pipes used by this bundle. */
488         unsigned int pipe_mask;
489 };
490
491 const struct Format formats[] =
492 {
493         /* In Y format we must always have something in Y2, since it has
494         * no fnop, so this conveys that Y2 must always be used. */
495         BUNDLE_FORMAT(TILEGX_PIPELINE_Y0, TILEGX_PIPELINE_Y2, NO_PIPELINE),
496         BUNDLE_FORMAT(TILEGX_PIPELINE_Y1, TILEGX_PIPELINE_Y2, NO_PIPELINE),
497         BUNDLE_FORMAT(TILEGX_PIPELINE_Y2, TILEGX_PIPELINE_Y0, NO_PIPELINE),
498         BUNDLE_FORMAT(TILEGX_PIPELINE_Y2, TILEGX_PIPELINE_Y1, NO_PIPELINE),
499
500         /* Y format has three instructions. */
501         BUNDLE_FORMAT(TILEGX_PIPELINE_Y0, TILEGX_PIPELINE_Y1, TILEGX_PIPELINE_Y2),
502         BUNDLE_FORMAT(TILEGX_PIPELINE_Y0, TILEGX_PIPELINE_Y2, TILEGX_PIPELINE_Y1),
503         BUNDLE_FORMAT(TILEGX_PIPELINE_Y1, TILEGX_PIPELINE_Y0, TILEGX_PIPELINE_Y2),
504         BUNDLE_FORMAT(TILEGX_PIPELINE_Y1, TILEGX_PIPELINE_Y2, TILEGX_PIPELINE_Y0),
505         BUNDLE_FORMAT(TILEGX_PIPELINE_Y2, TILEGX_PIPELINE_Y0, TILEGX_PIPELINE_Y1),
506         BUNDLE_FORMAT(TILEGX_PIPELINE_Y2, TILEGX_PIPELINE_Y1, TILEGX_PIPELINE_Y0),
507
508         /* X format has only two instructions. */
509         BUNDLE_FORMAT(TILEGX_PIPELINE_X0, TILEGX_PIPELINE_X1, NO_PIPELINE),
510         BUNDLE_FORMAT(TILEGX_PIPELINE_X1, TILEGX_PIPELINE_X0, NO_PIPELINE)
511 };
512
513
514 struct jit_instr inst_buf[TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE];
515 unsigned long inst_buf_index;
516
517 tilegx_pipeline get_any_valid_pipe(const struct tilegx_opcode* opcode)
518 {
519         /* FIXME: tile: we could pregenerate this. */
520         int pipe;
521         for (pipe = 0; ((opcode->pipes & (1 << pipe)) == 0 && pipe < TILEGX_NUM_PIPELINE_ENCODINGS); pipe++)
522                 ;
523         return (tilegx_pipeline)(pipe);
524 }
525
526 void insert_nop(tilegx_mnemonic opc, int line)
527 {
528         const struct tilegx_opcode* opcode = NULL;
529
530         memmove(&inst_buf[1], &inst_buf[0], inst_buf_index * sizeof inst_buf[0]);
531
532         opcode = &tilegx_opcodes[opc];
533         inst_buf[0].opcode = opcode;
534         inst_buf[0].pipe = get_any_valid_pipe(opcode);
535         inst_buf[0].input_registers = 0;
536         inst_buf[0].output_registers = 0;
537         inst_buf[0].line = line;
538         ++inst_buf_index;
539 }
540
541 const struct Format* compute_format()
542 {
543         unsigned int compatible_pipes = BUNDLE_FORMAT_MASK(
544                 inst_buf[0].opcode->pipes,
545                 inst_buf[1].opcode->pipes,
546                 (inst_buf_index == 3 ? inst_buf[2].opcode->pipes : (1 << NO_PIPELINE)));
547
548         const struct Format* match = NULL;
549         const struct Format *b = NULL;
550         unsigned int i = 0;
551         for (i; i < sizeof formats / sizeof formats[0]; i++) {
552                 b = &formats[i];
553                 if ((b->pipe_mask & compatible_pipes) == b->pipe_mask) {
554                         match = b;
555                         break;
556                 }
557         }
558
559         return match;
560 }
561
562 sljit_si assign_pipes()
563 {
564         unsigned long output_registers = 0;
565         unsigned int i = 0;
566
567         if (inst_buf_index == 1) {
568                 tilegx_mnemonic opc = inst_buf[0].opcode->can_bundle
569                                         ? TILEGX_OPC_FNOP : TILEGX_OPC_NOP;
570                 insert_nop(opc, __LINE__);
571         }
572
573         const struct Format* match = compute_format();
574
575         if (match == NULL)
576                 return -1;
577
578         for (i = 0; i < inst_buf_index; i++) {
579
580                 if ((i > 0) && ((inst_buf[i].input_registers & output_registers) != 0))
581                         return -1;
582
583                 if ((i > 0) && ((inst_buf[i].output_registers & output_registers) != 0))
584                         return -1;
585
586                 /* Don't include Rzero in the match set, to avoid triggering
587                    needlessly on 'prefetch' instrs. */
588
589                 output_registers |= inst_buf[i].output_registers & 0xFFFFFFFFFFFFFFL;
590
591                 inst_buf[i].pipe = match->pipe[i];
592         }
593
594         /* If only 2 instrs, and in Y-mode, insert a nop. */
595         if (inst_buf_index == 2 && !tilegx_is_x_pipeline(match->pipe[0])) {
596                 insert_nop(TILEGX_OPC_FNOP, __LINE__);
597
598                 /* Select the yet unassigned pipe. */
599                 tilegx_pipeline pipe = (tilegx_pipeline)(((TILEGX_PIPELINE_Y0
600                                         + TILEGX_PIPELINE_Y1 + TILEGX_PIPELINE_Y2)
601                                         - (inst_buf[1].pipe + inst_buf[2].pipe)));
602
603                 inst_buf[0].pipe = pipe;
604         }
605
606         return 0;
607 }
608
609 tilegx_bundle_bits get_bundle_bit(struct jit_instr *inst)
610 {
611         int i, val;
612         const struct tilegx_opcode* opcode = inst->opcode;
613         tilegx_bundle_bits bits = opcode->fixed_bit_values[inst->pipe];
614
615         const struct tilegx_operand* operand = NULL;
616         for (i = 0; i < opcode->num_operands; i++) {
617                 operand = &tilegx_operands[opcode->operands[inst->pipe][i]];
618                 val = inst->operand_value[i];
619
620                 bits |= operand->insert(val);
621         }
622
623         return bits;
624 }
625
626 static sljit_si update_buffer(struct sljit_compiler *compiler)
627 {
628         int count;
629         int i;
630         int orig_index = inst_buf_index;
631         struct jit_instr inst0 = inst_buf[0];
632         struct jit_instr inst1 = inst_buf[1];
633         struct jit_instr inst2 = inst_buf[2];
634         tilegx_bundle_bits bits = 0;
635
636         /* If the bundle is valid as is, perform the encoding and return 1. */
637         if (assign_pipes() == 0) {
638                 for (i = 0; i < inst_buf_index; i++) {
639                         bits |= get_bundle_bit(inst_buf + i);
640 #ifdef TILEGX_JIT_DEBUG
641                         printf("|%04d", inst_buf[i].line);
642 #endif
643                 }
644 #ifdef TILEGX_JIT_DEBUG
645                 if (inst_buf_index == 3)
646                         printf("|M0|:\t");
647                 else
648                         printf("|M0|:\t\t");
649                 print_insn_tilegx(&bits);
650 #endif
651
652                 inst_buf_index = 0;
653
654 #ifdef TILEGX_JIT_DEBUG
655                 return push_inst_nodebug(compiler, bits);
656 #else
657                 return push_inst(compiler, bits);
658 #endif
659         }
660
661         /* If the bundle is invalid, split it in two. First encode the first two
662            (or possibly 1) instructions, and then the last, separately. Note that
663            assign_pipes may have re-ordered the instrs (by inserting no-ops in
664            lower slots) so we need to reset them. */
665
666         inst_buf_index = orig_index - 1;
667         inst_buf[0] = inst0;
668         inst_buf[1] = inst1;
669         inst_buf[2] = inst2;
670         if (assign_pipes() == 0) {
671                 for (i = 0; i < inst_buf_index; i++) {
672                         bits |= get_bundle_bit(inst_buf + i);
673 #ifdef TILEGX_JIT_DEBUG
674                         printf("|%04d", inst_buf[i].line);
675 #endif
676                 }
677
678 #ifdef TILEGX_JIT_DEBUG
679                 if (inst_buf_index == 3)
680                         printf("|M1|:\t");
681                 else
682                         printf("|M1|:\t\t");
683                 print_insn_tilegx(&bits);
684 #endif
685
686                 if ((orig_index - 1) == 2) {
687                         inst_buf[0] = inst2;
688                         inst_buf_index = 1;
689                 } else if ((orig_index - 1) == 1) {
690                         inst_buf[0] = inst1;
691                         inst_buf_index = 1;
692                 } else
693                         SLJIT_ASSERT_STOP();
694
695 #ifdef TILEGX_JIT_DEBUG
696                 return push_inst_nodebug(compiler, bits);
697 #else
698                 return push_inst(compiler, bits);
699 #endif
700         } else {
701                 /* We had 3 instrs of which the first 2 can't live in the same bundle.
702                    Split those two. Note that we don't try to then combine the second
703                    and third instr into a single bundle.  First instruction: */
704                 inst_buf_index = 1;
705                 inst_buf[0] = inst0;
706                 inst_buf[1] = inst1;
707                 inst_buf[2] = inst2;
708                 if (assign_pipes() == 0) {
709                         for (i = 0; i < inst_buf_index; i++) {
710                                 bits |= get_bundle_bit(inst_buf + i);
711 #ifdef TILEGX_JIT_DEBUG
712                                 printf("|%04d", inst_buf[i].line);
713 #endif
714                         }
715
716 #ifdef TILEGX_JIT_DEBUG
717                         if (inst_buf_index == 3)
718                                 printf("|M2|:\t");
719                         else
720                                 printf("|M2|:\t\t");
721                         print_insn_tilegx(&bits);
722 #endif
723
724                         inst_buf[0] = inst1;
725                         inst_buf[1] = inst2;
726                         inst_buf_index = orig_index - 1;
727 #ifdef TILEGX_JIT_DEBUG
728                         return push_inst_nodebug(compiler, bits);
729 #else
730                         return push_inst(compiler, bits);
731 #endif
732                 } else
733                         SLJIT_ASSERT_STOP();
734         }
735
736         SLJIT_ASSERT_STOP();
737 }
738
739 static sljit_si flush_buffer(struct sljit_compiler *compiler)
740 {
741         while (inst_buf_index != 0)
742                 update_buffer(compiler);
743 }
744
745 static sljit_si push_4_buffer(struct sljit_compiler *compiler, tilegx_mnemonic opc, int op0, int op1, int op2, int op3, int line)
746 {
747         if (inst_buf_index == TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE)
748                 FAIL_IF(update_buffer(compiler));
749
750         const struct tilegx_opcode* opcode = &tilegx_opcodes[opc];
751         inst_buf[inst_buf_index].opcode = opcode;
752         inst_buf[inst_buf_index].pipe = get_any_valid_pipe(opcode);
753         inst_buf[inst_buf_index].operand_value[0] = op0;
754         inst_buf[inst_buf_index].operand_value[1] = op1;
755         inst_buf[inst_buf_index].operand_value[2] = op2;
756         inst_buf[inst_buf_index].operand_value[3] = op3;
757         inst_buf[inst_buf_index].input_registers = 1L << op1;
758         inst_buf[inst_buf_index].output_registers = 1L << op0;
759         inst_buf[inst_buf_index].line = line;
760         inst_buf_index++;
761
762         return SLJIT_SUCCESS;
763 }
764
765 static sljit_si push_3_buffer(struct sljit_compiler *compiler, tilegx_mnemonic opc, int op0, int op1, int op2, int line)
766 {
767         if (inst_buf_index == TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE)
768                 FAIL_IF(update_buffer(compiler));
769
770         const struct tilegx_opcode* opcode = &tilegx_opcodes[opc];
771         inst_buf[inst_buf_index].opcode = opcode;
772         inst_buf[inst_buf_index].pipe = get_any_valid_pipe(opcode);
773         inst_buf[inst_buf_index].operand_value[0] = op0;
774         inst_buf[inst_buf_index].operand_value[1] = op1;
775         inst_buf[inst_buf_index].operand_value[2] = op2;
776         inst_buf[inst_buf_index].line = line;
777
778         switch (opc) {
779         case TILEGX_OPC_ST_ADD:
780                 inst_buf[inst_buf_index].input_registers = (1L << op0) | (1L << op1);
781                 inst_buf[inst_buf_index].output_registers = 1L << op0;
782                 break;
783         case TILEGX_OPC_LD_ADD:
784                 inst_buf[inst_buf_index].input_registers = 1L << op1;
785                 inst_buf[inst_buf_index].output_registers = (1L << op0) | (1L << op1);
786                 break;
787         case TILEGX_OPC_ADD:
788         case TILEGX_OPC_AND:
789         case TILEGX_OPC_SUB:
790         case TILEGX_OPC_OR:
791         case TILEGX_OPC_XOR:
792         case TILEGX_OPC_NOR:
793         case TILEGX_OPC_SHL:
794         case TILEGX_OPC_SHRU:
795         case TILEGX_OPC_SHRS:
796         case TILEGX_OPC_CMPLTU:
797         case TILEGX_OPC_CMPLTS:
798         case TILEGX_OPC_CMOVEQZ:
799         case TILEGX_OPC_CMOVNEZ:
800                 inst_buf[inst_buf_index].input_registers = (1L << op1) | (1L << op2);
801                 inst_buf[inst_buf_index].output_registers = 1L << op0;
802                 break;
803         case TILEGX_OPC_ADDLI:
804         case TILEGX_OPC_XORI:
805         case TILEGX_OPC_ORI:
806         case TILEGX_OPC_SHLI:
807         case TILEGX_OPC_SHRUI:
808         case TILEGX_OPC_SHRSI:
809         case TILEGX_OPC_SHL16INSLI:
810         case TILEGX_OPC_CMPLTUI:
811         case TILEGX_OPC_CMPLTSI:
812                 inst_buf[inst_buf_index].input_registers = 1L << op1;
813                 inst_buf[inst_buf_index].output_registers = 1L << op0;
814                 break;
815         default:
816                 printf("unrecoginzed opc: %s\n", opcode->name);
817                 SLJIT_ASSERT_STOP();
818         }
819
820         inst_buf_index++;
821
822         return SLJIT_SUCCESS;
823 }
824
825 static sljit_si push_2_buffer(struct sljit_compiler *compiler, tilegx_mnemonic opc, int op0, int op1, int line)
826 {
827         if (inst_buf_index == TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE)
828                 FAIL_IF(update_buffer(compiler));
829
830         const struct tilegx_opcode* opcode = &tilegx_opcodes[opc];
831         inst_buf[inst_buf_index].opcode = opcode;
832         inst_buf[inst_buf_index].pipe = get_any_valid_pipe(opcode);
833         inst_buf[inst_buf_index].operand_value[0] = op0;
834         inst_buf[inst_buf_index].operand_value[1] = op1;
835         inst_buf[inst_buf_index].line = line;
836
837         switch (opc) {
838         case TILEGX_OPC_BEQZ:
839         case TILEGX_OPC_BNEZ:
840                 inst_buf[inst_buf_index].input_registers = 1L << op0;
841                 break;
842         case TILEGX_OPC_ST:
843         case TILEGX_OPC_ST1:
844         case TILEGX_OPC_ST2:
845         case TILEGX_OPC_ST4:
846                 inst_buf[inst_buf_index].input_registers = (1L << op0) | (1L << op1);
847                 inst_buf[inst_buf_index].output_registers = 0;
848                 break;
849         case TILEGX_OPC_CLZ:
850         case TILEGX_OPC_LD:
851         case TILEGX_OPC_LD1U:
852         case TILEGX_OPC_LD1S:
853         case TILEGX_OPC_LD2U:
854         case TILEGX_OPC_LD2S:
855         case TILEGX_OPC_LD4U:
856         case TILEGX_OPC_LD4S:
857                 inst_buf[inst_buf_index].input_registers = 1L << op1;
858                 inst_buf[inst_buf_index].output_registers = 1L << op0;
859                 break;
860         default:
861                 printf("unrecoginzed opc: %s\n", opcode->name);
862                 SLJIT_ASSERT_STOP();
863         }
864
865         inst_buf_index++;
866
867         return SLJIT_SUCCESS;
868 }
869
870 static sljit_si push_0_buffer(struct sljit_compiler *compiler, tilegx_mnemonic opc, int line)
871 {
872         if (inst_buf_index == TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE)
873                 FAIL_IF(update_buffer(compiler));
874
875         const struct tilegx_opcode* opcode = &tilegx_opcodes[opc];
876         inst_buf[inst_buf_index].opcode = opcode;
877         inst_buf[inst_buf_index].pipe = get_any_valid_pipe(opcode);
878         inst_buf[inst_buf_index].input_registers = 0;
879         inst_buf[inst_buf_index].output_registers = 0;
880         inst_buf[inst_buf_index].line = line;
881         inst_buf_index++;
882
883         return SLJIT_SUCCESS;
884 }
885
886 static sljit_si push_jr_buffer(struct sljit_compiler *compiler, tilegx_mnemonic opc, int op0, int line)
887 {
888         if (inst_buf_index == TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE)
889                 FAIL_IF(update_buffer(compiler));
890
891         const struct tilegx_opcode* opcode = &tilegx_opcodes[opc];
892         inst_buf[inst_buf_index].opcode = opcode;
893         inst_buf[inst_buf_index].pipe = get_any_valid_pipe(opcode);
894         inst_buf[inst_buf_index].operand_value[0] = op0;
895         inst_buf[inst_buf_index].input_registers = 1L << op0;
896         inst_buf[inst_buf_index].output_registers = 0;
897         inst_buf[inst_buf_index].line = line;
898         inst_buf_index++;
899  
900         return flush_buffer(compiler);
901 }
902
903 static SLJIT_INLINE sljit_ins * detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code)
904 {
905         sljit_sw diff;
906         sljit_uw target_addr;
907         sljit_ins *inst;
908         sljit_ins saved_inst;
909
910         if (jump->flags & SLJIT_REWRITABLE_JUMP)
911                 return code_ptr;
912
913         if (jump->flags & JUMP_ADDR)
914                 target_addr = jump->u.target;
915         else {
916                 SLJIT_ASSERT(jump->flags & JUMP_LABEL);
917                 target_addr = (sljit_uw)(code + jump->u.label->size);
918         }
919
920         inst = (sljit_ins *)jump->addr;
921         if (jump->flags & IS_COND)
922                 inst--;
923
924         diff = ((sljit_sw) target_addr - (sljit_sw) inst) >> 3;
925         if (diff <= SIMM_17BIT_MAX && diff >= SIMM_17BIT_MIN) {
926                 jump->flags |= PATCH_B;
927
928                 if (!(jump->flags & IS_COND)) {
929                         if (jump->flags & IS_JAL) {
930                                 jump->flags &= ~(PATCH_B);
931                                 jump->flags |= PATCH_J;
932                                 inst[0] = JAL_X1;
933
934 #ifdef TILEGX_JIT_DEBUG
935                                 printf("[runtime relocate]%04d:\t", __LINE__);
936                                 print_insn_tilegx(inst);
937 #endif
938                         } else {
939                                 inst[0] = BEQZ_X1 | SRCA_X1(ZERO);
940
941 #ifdef TILEGX_JIT_DEBUG
942                                 printf("[runtime relocate]%04d:\t", __LINE__);
943                                 print_insn_tilegx(inst);
944 #endif
945                         }
946
947                         return inst;
948                 }
949
950                 inst[0] = inst[0] ^ (0x7L << 55);
951
952 #ifdef TILEGX_JIT_DEBUG
953                 printf("[runtime relocate]%04d:\t", __LINE__);
954                 print_insn_tilegx(inst);
955 #endif
956                 jump->addr -= sizeof(sljit_ins);
957                 return inst;
958         }
959
960         if (jump->flags & IS_COND) {
961                 if ((target_addr & ~0x3FFFFFFFL) == ((jump->addr + sizeof(sljit_ins)) & ~0x3FFFFFFFL)) {
962                         jump->flags |= PATCH_J;
963                         inst[0] = (inst[0] & ~(BOFF_X1(-1))) | BOFF_X1(2);
964                         inst[1] = J_X1;
965                         return inst + 1;
966                 }
967
968                 return code_ptr;
969         }
970
971         if ((target_addr & ~0x3FFFFFFFL) == ((jump->addr + sizeof(sljit_ins)) & ~0x3FFFFFFFL)) {
972                 jump->flags |= PATCH_J;
973
974                 if (jump->flags & IS_JAL) {
975                         inst[0] = JAL_X1;
976
977 #ifdef TILEGX_JIT_DEBUG
978                         printf("[runtime relocate]%04d:\t", __LINE__);
979                         print_insn_tilegx(inst);
980 #endif
981
982                 } else {
983                         inst[0] = J_X1;
984
985 #ifdef TILEGX_JIT_DEBUG
986                         printf("[runtime relocate]%04d:\t", __LINE__);
987                         print_insn_tilegx(inst);
988 #endif
989                 }
990
991                 return inst;
992         }
993
994         return code_ptr;
995 }
996
997 SLJIT_API_FUNC_ATTRIBUTE void * sljit_generate_code(struct sljit_compiler *compiler)
998 {
999         struct sljit_memory_fragment *buf;
1000         sljit_ins *code;
1001         sljit_ins *code_ptr;
1002         sljit_ins *buf_ptr;
1003         sljit_ins *buf_end;
1004         sljit_uw word_count;
1005         sljit_uw addr;
1006
1007         struct sljit_label *label;
1008         struct sljit_jump *jump;
1009         struct sljit_const *const_;
1010
1011         CHECK_ERROR_PTR();
1012         check_sljit_generate_code(compiler);
1013         reverse_buf(compiler);
1014
1015         code = (sljit_ins *)SLJIT_MALLOC_EXEC(compiler->size * sizeof(sljit_ins));
1016         PTR_FAIL_WITH_EXEC_IF(code);
1017         buf = compiler->buf;
1018
1019         code_ptr = code;
1020         word_count = 0;
1021         label = compiler->labels;
1022         jump = compiler->jumps;
1023         const_ = compiler->consts;
1024         do {
1025                 buf_ptr = (sljit_ins *)buf->memory;
1026                 buf_end = buf_ptr + (buf->used_size >> 3);
1027                 do {
1028                         *code_ptr = *buf_ptr++;
1029                         SLJIT_ASSERT(!label || label->size >= word_count);
1030                         SLJIT_ASSERT(!jump || jump->addr >= word_count);
1031                         SLJIT_ASSERT(!const_ || const_->addr >= word_count);
1032                         /* These structures are ordered by their address. */
1033                         if (label && label->size == word_count) {
1034                                 /* Just recording the address. */
1035                                 label->addr = (sljit_uw) code_ptr;
1036                                 label->size = code_ptr - code;
1037                                 label = label->next;
1038                         }
1039
1040                         if (jump && jump->addr == word_count) {
1041                                 if (jump->flags & IS_JAL)
1042                                         jump->addr = (sljit_uw)(code_ptr - 4);
1043                                 else
1044                                         jump->addr = (sljit_uw)(code_ptr - 3);
1045
1046                                 code_ptr = detect_jump_type(jump, code_ptr, code);
1047                                 jump = jump->next;
1048                         }
1049
1050                         if (const_ && const_->addr == word_count) {
1051                                 /* Just recording the address. */
1052                                 const_->addr = (sljit_uw) code_ptr;
1053                                 const_ = const_->next;
1054                         }
1055
1056                         code_ptr++;
1057                         word_count++;
1058                 } while (buf_ptr < buf_end);
1059
1060                 buf = buf->next;
1061         } while (buf);
1062
1063         if (label && label->size == word_count) {
1064                 label->addr = (sljit_uw) code_ptr;
1065                 label->size = code_ptr - code;
1066                 label = label->next;
1067         }
1068
1069         SLJIT_ASSERT(!label);
1070         SLJIT_ASSERT(!jump);
1071         SLJIT_ASSERT(!const_);
1072         SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size);
1073
1074         jump = compiler->jumps;
1075         while (jump) {
1076                 do {
1077                         addr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target;
1078                         buf_ptr = (sljit_ins *)jump->addr;
1079
1080                         if (jump->flags & PATCH_B) {
1081                                 addr = (sljit_sw)(addr - (jump->addr)) >> 3;
1082                                 SLJIT_ASSERT((sljit_sw) addr <= SIMM_17BIT_MAX && (sljit_sw) addr >= SIMM_17BIT_MIN);
1083                                 buf_ptr[0] = (buf_ptr[0] & ~(BOFF_X1(-1))) | BOFF_X1(addr);
1084
1085 #ifdef TILEGX_JIT_DEBUG
1086                                 printf("[runtime relocate]%04d:\t", __LINE__);
1087                                 print_insn_tilegx(buf_ptr);
1088 #endif
1089                                 break;
1090                         }
1091
1092                         if (jump->flags & PATCH_J) {
1093                                 SLJIT_ASSERT((addr & ~0x3FFFFFFFL) == ((jump->addr + sizeof(sljit_ins)) & ~0x3FFFFFFFL));
1094                                 addr = (sljit_sw)(addr - (jump->addr)) >> 3;
1095                                 buf_ptr[0] = (buf_ptr[0] & ~(JOFF_X1(-1))) | JOFF_X1(addr);
1096
1097 #ifdef TILEGX_JIT_DEBUG
1098                                 printf("[runtime relocate]%04d:\t", __LINE__);
1099                                 print_insn_tilegx(buf_ptr);
1100 #endif
1101                                 break;
1102                         }
1103
1104                         SLJIT_ASSERT(!(jump->flags & IS_JAL));
1105
1106                         /* Set the fields of immediate loads. */
1107                         buf_ptr[0] = (buf_ptr[0] & ~(0xFFFFL << 43)) | (((addr >> 32) & 0xFFFFL) << 43);
1108                         buf_ptr[1] = (buf_ptr[1] & ~(0xFFFFL << 43)) | (((addr >> 16) & 0xFFFFL) << 43);
1109                         buf_ptr[2] = (buf_ptr[2] & ~(0xFFFFL << 43)) | ((addr & 0xFFFFL) << 43);
1110                 } while (0);
1111
1112                 jump = jump->next;
1113         }
1114
1115         compiler->error = SLJIT_ERR_COMPILED;
1116         compiler->executable_size = (code_ptr - code) * sizeof(sljit_ins);
1117         SLJIT_CACHE_FLUSH(code, code_ptr);
1118         return code;
1119 }
1120
1121 static sljit_si load_immediate(struct sljit_compiler *compiler, sljit_si dst_ar, sljit_sw imm)
1122 {
1123
1124         if (imm <= SIMM_16BIT_MAX && imm >= SIMM_16BIT_MIN)
1125                 return ADDLI(dst_ar, ZERO, imm);
1126
1127         if (imm <= SIMM_32BIT_MAX && imm >= SIMM_32BIT_MIN) {
1128                 FAIL_IF(ADDLI(dst_ar, ZERO, imm >> 16));
1129                 return SHL16INSLI(dst_ar, dst_ar, imm);
1130         }
1131
1132         if (imm <= SIMM_48BIT_MAX && imm >= SIMM_48BIT_MIN) {
1133                 FAIL_IF(ADDLI(dst_ar, ZERO, imm >> 32));
1134                 FAIL_IF(SHL16INSLI(dst_ar, dst_ar, imm >> 16));
1135                 return SHL16INSLI(dst_ar, dst_ar, imm);
1136         }
1137
1138         FAIL_IF(ADDLI(dst_ar, ZERO, imm >> 48));
1139         FAIL_IF(SHL16INSLI(dst_ar, dst_ar, imm >> 32));
1140         FAIL_IF(SHL16INSLI(dst_ar, dst_ar, imm >> 16));
1141         return SHL16INSLI(dst_ar, dst_ar, imm);
1142 }
1143
1144 static sljit_si emit_const(struct sljit_compiler *compiler, sljit_si dst_ar, sljit_sw imm, int flush)
1145 {
1146         /* Should *not* be optimized as load_immediate, as pcre relocation
1147            mechanism will match this fixed 4-instruction pattern. */
1148         if (flush) {
1149                 FAIL_IF(ADDLI_SOLO(dst_ar, ZERO, imm >> 32));
1150                 FAIL_IF(SHL16INSLI_SOLO(dst_ar, dst_ar, imm >> 16));
1151                 return SHL16INSLI_SOLO(dst_ar, dst_ar, imm);
1152         }
1153
1154         FAIL_IF(ADDLI(dst_ar, ZERO, imm >> 32));
1155         FAIL_IF(SHL16INSLI(dst_ar, dst_ar, imm >> 16));
1156         return SHL16INSLI(dst_ar, dst_ar, imm);
1157 }
1158
1159 static sljit_si emit_const_64(struct sljit_compiler *compiler, sljit_si dst_ar, sljit_sw imm, int flush)
1160 {
1161         /* Should *not* be optimized as load_immediate, as pcre relocation
1162            mechanism will match this fixed 4-instruction pattern. */
1163         if (flush) {
1164                 FAIL_IF(ADDLI_SOLO(reg_map[dst_ar], ZERO, imm >> 48));
1165                 FAIL_IF(SHL16INSLI_SOLO(reg_map[dst_ar], reg_map[dst_ar], imm >> 32));
1166                 FAIL_IF(SHL16INSLI_SOLO(reg_map[dst_ar], reg_map[dst_ar], imm >> 16));
1167                 return SHL16INSLI_SOLO(reg_map[dst_ar], reg_map[dst_ar], imm);
1168         }
1169
1170         FAIL_IF(ADDLI(reg_map[dst_ar], ZERO, imm >> 48));
1171         FAIL_IF(SHL16INSLI(reg_map[dst_ar], reg_map[dst_ar], imm >> 32));
1172         FAIL_IF(SHL16INSLI(reg_map[dst_ar], reg_map[dst_ar], imm >> 16));
1173         return SHL16INSLI(reg_map[dst_ar], reg_map[dst_ar], imm);
1174 }
1175
1176 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size)
1177 {
1178         sljit_ins base;
1179         sljit_ins bundle = 0;
1180
1181         CHECK_ERROR();
1182         check_sljit_emit_enter(compiler, args, scratches, saveds, local_size);
1183
1184         compiler->scratches = scratches;
1185         compiler->saveds = saveds;
1186 #if (defined SLJIT_DEBUG && SLJIT_DEBUG)
1187         compiler->logical_local_size = local_size;
1188 #endif
1189
1190         local_size += (saveds + 1) * sizeof(sljit_sw);
1191         local_size = (local_size + 7) & ~7;
1192         compiler->local_size = local_size;
1193
1194         if (local_size <= SIMM_16BIT_MAX) {
1195                 /* Frequent case. */
1196                 FAIL_IF(ADDLI(SLJIT_LOCALS_REG_mapped, SLJIT_LOCALS_REG_mapped, -local_size));
1197                 base = SLJIT_LOCALS_REG_mapped;
1198         } else {
1199                 FAIL_IF(load_immediate(compiler, TMP_REG1_mapped, local_size));
1200                 FAIL_IF(ADD(TMP_REG2_mapped, SLJIT_LOCALS_REG_mapped, ZERO));
1201                 FAIL_IF(SUB(SLJIT_LOCALS_REG_mapped, SLJIT_LOCALS_REG_mapped, TMP_REG1_mapped));
1202                 base = TMP_REG2_mapped;
1203                 local_size = 0;
1204         }
1205
1206         FAIL_IF(ADDLI(ADDR_TMP_mapped, base, local_size - 8));
1207         FAIL_IF(ST_ADD(ADDR_TMP_mapped, RA, -8));
1208
1209         if (saveds >= 1)
1210                 FAIL_IF(ST_ADD(ADDR_TMP_mapped, SLJIT_SAVED_REG1_mapped, -8));
1211
1212         if (saveds >= 2)
1213                 FAIL_IF(ST_ADD(ADDR_TMP_mapped, SLJIT_SAVED_REG2_mapped, -8));
1214
1215         if (saveds >= 3)
1216                 FAIL_IF(ST_ADD(ADDR_TMP_mapped, SLJIT_SAVED_REG3_mapped, -8));
1217
1218         if (saveds >= 4)
1219                 FAIL_IF(ST_ADD(ADDR_TMP_mapped, SLJIT_SAVED_EREG1_mapped, -8));
1220
1221         if (saveds >= 5)
1222                 FAIL_IF(ST_ADD(ADDR_TMP_mapped, SLJIT_SAVED_EREG2_mapped, -8));
1223
1224         if (args >= 1)
1225                 FAIL_IF(ADD(SLJIT_SAVED_REG1_mapped, 0, ZERO));
1226
1227         if (args >= 2)
1228                 FAIL_IF(ADD(SLJIT_SAVED_REG2_mapped, 1, ZERO));
1229
1230         if (args >= 3)
1231                 FAIL_IF(ADD(SLJIT_SAVED_REG3_mapped, 2, ZERO));
1232
1233         return SLJIT_SUCCESS;
1234 }
1235
1236 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size)
1237 {
1238         CHECK_ERROR_VOID();
1239         check_sljit_set_context(compiler, args, scratches, saveds, local_size);
1240
1241         compiler->scratches = scratches;
1242         compiler->saveds = saveds;
1243 #if (defined SLJIT_DEBUG && SLJIT_DEBUG)
1244         compiler->logical_local_size = local_size;
1245 #endif
1246
1247         local_size += (saveds + 1) * sizeof(sljit_sw);
1248         compiler->local_size = (local_size + 7) & ~7;
1249 }
1250
1251 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw)
1252 {
1253         sljit_si local_size;
1254         sljit_ins base;
1255         int addr_initialized = 0;
1256
1257         CHECK_ERROR();
1258         check_sljit_emit_return(compiler, op, src, srcw);
1259
1260         FAIL_IF(emit_mov_before_return(compiler, op, src, srcw));
1261
1262         local_size = compiler->local_size;
1263         if (local_size <= SIMM_16BIT_MAX)
1264                 base = SLJIT_LOCALS_REG_mapped;
1265         else {
1266                 FAIL_IF(load_immediate(compiler, TMP_REG1_mapped, local_size));
1267                 FAIL_IF(ADD(TMP_REG1_mapped, SLJIT_LOCALS_REG_mapped, TMP_REG1_mapped));
1268                 base = TMP_REG1_mapped;
1269                 local_size = 0;
1270         }
1271
1272         FAIL_IF(ADDLI(ADDR_TMP_mapped, base, local_size - 8));
1273         FAIL_IF(LD(RA, ADDR_TMP_mapped));
1274
1275         if (compiler->saveds >= 5) {
1276                 FAIL_IF(ADDLI(ADDR_TMP_mapped, base, local_size - 48));
1277                 addr_initialized = 1;
1278
1279                 FAIL_IF(LD_ADD(SLJIT_SAVED_EREG2_mapped, ADDR_TMP_mapped, 8));
1280         }
1281
1282         if (compiler->saveds >= 4) {
1283                 if (addr_initialized == 0) {
1284                         FAIL_IF(ADDLI(ADDR_TMP_mapped, base, local_size - 40));
1285                         addr_initialized = 1;
1286                 }
1287
1288                 FAIL_IF(LD_ADD(SLJIT_SAVED_EREG1_mapped, ADDR_TMP_mapped, 8));
1289         }
1290
1291         if (compiler->saveds >= 3) {
1292                 if (addr_initialized == 0) {
1293                         FAIL_IF(ADDLI(ADDR_TMP_mapped, base, local_size - 32));
1294                         addr_initialized = 1;
1295                 }
1296
1297                 FAIL_IF(LD_ADD(SLJIT_SAVED_REG3_mapped, ADDR_TMP_mapped, 8));
1298         }
1299
1300         if (compiler->saveds >= 2) {
1301                 if (addr_initialized == 0) {
1302                         FAIL_IF(ADDLI(ADDR_TMP_mapped, base, local_size - 24));
1303                         addr_initialized = 1;
1304                 }
1305
1306                 FAIL_IF(LD_ADD(SLJIT_SAVED_REG2_mapped, ADDR_TMP_mapped, 8));
1307         }
1308
1309         if (compiler->saveds >= 1) {
1310                 if (addr_initialized == 0) {
1311                         FAIL_IF(ADDLI(ADDR_TMP_mapped, base, local_size - 16));
1312                         /* addr_initialized = 1; no need to initialize as it's the last one. */
1313                 }
1314
1315                 FAIL_IF(LD_ADD(SLJIT_SAVED_REG1_mapped, ADDR_TMP_mapped, 8));
1316         }
1317
1318         if (compiler->local_size <= SIMM_16BIT_MAX)
1319                 FAIL_IF(ADDLI(SLJIT_LOCALS_REG_mapped, SLJIT_LOCALS_REG_mapped, compiler->local_size));
1320         else
1321                 FAIL_IF(ADD(SLJIT_LOCALS_REG_mapped, TMP_REG1_mapped, ZERO));
1322
1323         return JR(RA);
1324 }
1325
1326 /* reg_ar is an absoulute register! */
1327
1328 /* Can perform an operation using at most 1 instruction. */
1329 static sljit_si getput_arg_fast(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg_ar, sljit_si arg, sljit_sw argw)
1330 {
1331         SLJIT_ASSERT(arg & SLJIT_MEM);
1332
1333         if ((!(flags & WRITE_BACK) || !(arg & REG_MASK))
1334                         && !(arg & OFFS_REG_MASK) && argw <= SIMM_16BIT_MAX && argw >= SIMM_16BIT_MIN) {
1335                 /* Works for both absoulte and relative addresses. */
1336                 if (SLJIT_UNLIKELY(flags & ARG_TEST))
1337                         return 1;
1338
1339                 FAIL_IF(ADDLI(ADDR_TMP_mapped, reg_map[arg & REG_MASK], argw));
1340
1341                 if (flags & LOAD_DATA)
1342                         FAIL_IF(PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, ADDR_TMP_mapped));
1343                 else
1344                         FAIL_IF(PB2(data_transfer_insts[flags & MEM_MASK], ADDR_TMP_mapped, reg_ar));
1345
1346                 return -1;
1347         }
1348
1349         return 0;
1350 }
1351
1352 /* See getput_arg below.
1353    Note: can_cache is called only for binary operators. Those
1354    operators always uses word arguments without write back. */
1355 static sljit_si can_cache(sljit_si arg, sljit_sw argw, sljit_si next_arg, sljit_sw next_argw)
1356 {
1357         SLJIT_ASSERT((arg & SLJIT_MEM) && (next_arg & SLJIT_MEM));
1358
1359         /* Simple operation except for updates. */
1360         if (arg & OFFS_REG_MASK) {
1361                 argw &= 0x3;
1362                 next_argw &= 0x3;
1363                 if (argw && argw == next_argw
1364                                 && (arg == next_arg || (arg & OFFS_REG_MASK) == (next_arg & OFFS_REG_MASK)))
1365                         return 1;
1366                 return 0;
1367         }
1368
1369         if (arg == next_arg) {
1370                 if (((next_argw - argw) <= SIMM_16BIT_MAX
1371                                 && (next_argw - argw) >= SIMM_16BIT_MIN))
1372                         return 1;
1373
1374                 return 0;
1375         }
1376
1377         return 0;
1378 }
1379
1380 /* Emit the necessary instructions. See can_cache above. */
1381 static sljit_si getput_arg(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg_ar, sljit_si arg, sljit_sw argw, sljit_si next_arg, sljit_sw next_argw)
1382 {
1383         sljit_si tmp_ar, base;
1384
1385         SLJIT_ASSERT(arg & SLJIT_MEM);
1386         if (!(next_arg & SLJIT_MEM)) {
1387                 next_arg = 0;
1388                 next_argw = 0;
1389         }
1390
1391         if ((flags & MEM_MASK) <= GPR_REG && (flags & LOAD_DATA))
1392                 tmp_ar = reg_ar;
1393         else
1394                 tmp_ar = TMP_REG1_mapped;
1395
1396         base = arg & REG_MASK;
1397
1398         if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
1399                 argw &= 0x3;
1400
1401                 if ((flags & WRITE_BACK) && reg_ar == reg_map[base]) {
1402                         SLJIT_ASSERT(!(flags & LOAD_DATA) && reg_map[TMP_REG1] != reg_ar);
1403                         FAIL_IF(ADD(TMP_REG1_mapped, reg_ar, ZERO));
1404                         reg_ar = TMP_REG1_mapped;
1405                 }
1406
1407                 /* Using the cache. */
1408                 if (argw == compiler->cache_argw) {
1409                         if (!(flags & WRITE_BACK)) {
1410                                 if (arg == compiler->cache_arg) {
1411                                         if (flags & LOAD_DATA)
1412                                                 return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, TMP_REG3_mapped);
1413                                         else
1414                                                 return PB2(data_transfer_insts[flags & MEM_MASK], TMP_REG3_mapped, reg_ar);
1415                                 }
1416
1417                                 if ((SLJIT_MEM | (arg & OFFS_REG_MASK)) == compiler->cache_arg) {
1418                                         if (arg == next_arg && argw == (next_argw & 0x3)) {
1419                                                 compiler->cache_arg = arg;
1420                                                 compiler->cache_argw = argw;
1421                                                 FAIL_IF(ADD(TMP_REG3_mapped, reg_map[base], TMP_REG3_mapped));
1422                                                 if (flags & LOAD_DATA)
1423                                                         return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, TMP_REG3_mapped);
1424                                                 else
1425                                                         return PB2(data_transfer_insts[flags & MEM_MASK], TMP_REG3_mapped, reg_ar);
1426                                         }
1427
1428                                         FAIL_IF(ADD(tmp_ar, reg_map[base], TMP_REG3_mapped));
1429                                         if (flags & LOAD_DATA)
1430                                                 return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, tmp_ar);
1431                                         else
1432                                                 return PB2(data_transfer_insts[flags & MEM_MASK], tmp_ar, reg_ar);
1433                                 }
1434                         } else {
1435                                 if ((SLJIT_MEM | (arg & OFFS_REG_MASK)) == compiler->cache_arg) {
1436                                         FAIL_IF(ADD(reg_map[base], reg_map[base], TMP_REG3_mapped));
1437                                         if (flags & LOAD_DATA)
1438                                                 return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, reg_map[base]);
1439                                         else
1440                                                 return PB2(data_transfer_insts[flags & MEM_MASK], reg_map[base], reg_ar);
1441                                 }
1442                         }
1443                 }
1444
1445                 if (SLJIT_UNLIKELY(argw)) {
1446                         compiler->cache_arg = SLJIT_MEM | (arg & OFFS_REG_MASK);
1447                         compiler->cache_argw = argw;
1448                         FAIL_IF(SHLI(TMP_REG3_mapped, reg_map[OFFS_REG(arg)], argw));
1449                 }
1450
1451                 if (!(flags & WRITE_BACK)) {
1452                         if (arg == next_arg && argw == (next_argw & 0x3)) {
1453                                 compiler->cache_arg = arg;
1454                                 compiler->cache_argw = argw;
1455                                 FAIL_IF(ADD(TMP_REG3_mapped, reg_map[base], reg_map[!argw ? OFFS_REG(arg) : TMP_REG3]));
1456                                 tmp_ar = TMP_REG3_mapped;
1457                         } else
1458                                 FAIL_IF(ADD(tmp_ar, reg_map[base], reg_map[!argw ? OFFS_REG(arg) : TMP_REG3]));
1459
1460                         if (flags & LOAD_DATA)
1461                                 return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, tmp_ar);
1462                         else
1463                                 return PB2(data_transfer_insts[flags & MEM_MASK], tmp_ar, reg_ar);
1464                 }
1465
1466                 FAIL_IF(ADD(reg_map[base], reg_map[base], reg_map[!argw ? OFFS_REG(arg) : TMP_REG3]));
1467
1468                 if (flags & LOAD_DATA)
1469                         return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, reg_map[base]);
1470                 else
1471                         return PB2(data_transfer_insts[flags & MEM_MASK], reg_map[base], reg_ar);
1472         }
1473
1474         if (SLJIT_UNLIKELY(flags & WRITE_BACK) && base) {
1475                 /* Update only applies if a base register exists. */
1476                 if (reg_ar == reg_map[base]) {
1477                         SLJIT_ASSERT(!(flags & LOAD_DATA) && TMP_REG1_mapped != reg_ar);
1478                         if (argw <= SIMM_16BIT_MAX && argw >= SIMM_16BIT_MIN) {
1479                                 FAIL_IF(ADDLI(ADDR_TMP_mapped, reg_map[base], argw));
1480                                 if (flags & LOAD_DATA)
1481                                         FAIL_IF(PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, ADDR_TMP_mapped));
1482                                 else
1483                                         FAIL_IF(PB2(data_transfer_insts[flags & MEM_MASK], ADDR_TMP_mapped, reg_ar));
1484
1485                                 if (argw)
1486                                         return ADDLI(reg_map[base], reg_map[base], argw);
1487
1488                                 return SLJIT_SUCCESS;
1489                         }
1490
1491                         FAIL_IF(ADD(TMP_REG1_mapped, reg_ar, ZERO));
1492                         reg_ar = TMP_REG1_mapped;
1493                 }
1494
1495                 if (argw <= SIMM_16BIT_MAX && argw >= SIMM_16BIT_MIN) {
1496                         if (argw)
1497                                 FAIL_IF(ADDLI(reg_map[base], reg_map[base], argw));
1498                 } else {
1499                         if (compiler->cache_arg == SLJIT_MEM
1500                                         && argw - compiler->cache_argw <= SIMM_16BIT_MAX
1501                                         && argw - compiler->cache_argw >= SIMM_16BIT_MIN) {
1502                                 if (argw != compiler->cache_argw) {
1503                                         FAIL_IF(ADD(TMP_REG3_mapped, TMP_REG3_mapped, argw - compiler->cache_argw));
1504                                         compiler->cache_argw = argw;
1505                                 }
1506
1507                                 FAIL_IF(ADD(reg_map[base], reg_map[base], TMP_REG3_mapped));
1508                         } else {
1509                                 compiler->cache_arg = SLJIT_MEM;
1510                                 compiler->cache_argw = argw;
1511                                 FAIL_IF(load_immediate(compiler, TMP_REG3_mapped, argw));
1512                                 FAIL_IF(ADD(reg_map[base], reg_map[base], TMP_REG3_mapped));
1513                         }
1514                 }
1515
1516                 if (flags & LOAD_DATA)
1517                         return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, reg_map[base]);
1518                 else
1519                         return PB2(data_transfer_insts[flags & MEM_MASK], reg_map[base], reg_ar);
1520         }
1521
1522         if (compiler->cache_arg == arg
1523                         && argw - compiler->cache_argw <= SIMM_16BIT_MAX
1524                         && argw - compiler->cache_argw >= SIMM_16BIT_MIN) {
1525                 if (argw != compiler->cache_argw) {
1526                         FAIL_IF(ADDLI(TMP_REG3_mapped, TMP_REG3_mapped, argw - compiler->cache_argw));
1527                         compiler->cache_argw = argw;
1528                 }
1529
1530                 if (flags & LOAD_DATA)
1531                         return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, TMP_REG3_mapped);
1532                 else
1533                         return PB2(data_transfer_insts[flags & MEM_MASK], TMP_REG3_mapped, reg_ar);
1534         }
1535
1536         if (compiler->cache_arg == SLJIT_MEM
1537                         && argw - compiler->cache_argw <= SIMM_16BIT_MAX
1538                         && argw - compiler->cache_argw >= SIMM_16BIT_MIN) {
1539                 if (argw != compiler->cache_argw)
1540                         FAIL_IF(ADDLI(TMP_REG3_mapped, TMP_REG3_mapped, argw - compiler->cache_argw));
1541         } else {
1542                 compiler->cache_arg = SLJIT_MEM;
1543                 FAIL_IF(load_immediate(compiler, TMP_REG3_mapped, argw));
1544         }
1545
1546         compiler->cache_argw = argw;
1547
1548         if (!base) {
1549                 if (flags & LOAD_DATA)
1550                         return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, TMP_REG3_mapped);
1551                 else
1552                         return PB2(data_transfer_insts[flags & MEM_MASK], TMP_REG3_mapped, reg_ar);
1553         }
1554
1555         if (arg == next_arg
1556                         && next_argw - argw <= SIMM_16BIT_MAX
1557                         && next_argw - argw >= SIMM_16BIT_MIN) {
1558                 compiler->cache_arg = arg;
1559                 FAIL_IF(ADD(TMP_REG3_mapped, TMP_REG3_mapped, reg_map[base]));
1560                 if (flags & LOAD_DATA)
1561                         return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, TMP_REG3_mapped);
1562                 else
1563                         return PB2(data_transfer_insts[flags & MEM_MASK], TMP_REG3_mapped, reg_ar);
1564         }
1565
1566         FAIL_IF(ADD(tmp_ar, TMP_REG3_mapped, reg_map[base]));
1567
1568         if (flags & LOAD_DATA)
1569                 return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, tmp_ar);
1570         else
1571                 return PB2(data_transfer_insts[flags & MEM_MASK], tmp_ar, reg_ar);
1572 }
1573
1574 static SLJIT_INLINE sljit_si emit_op_mem(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg_ar, sljit_si arg, sljit_sw argw)
1575 {
1576         if (getput_arg_fast(compiler, flags, reg_ar, arg, argw))
1577                 return compiler->error;
1578
1579         compiler->cache_arg = 0;
1580         compiler->cache_argw = 0;
1581         return getput_arg(compiler, flags, reg_ar, arg, argw, 0, 0);
1582 }
1583
1584 static SLJIT_INLINE sljit_si emit_op_mem2(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, sljit_si arg1, sljit_sw arg1w, sljit_si arg2, sljit_sw arg2w)
1585 {
1586         if (getput_arg_fast(compiler, flags, reg, arg1, arg1w))
1587                 return compiler->error;
1588         return getput_arg(compiler, flags, reg, arg1, arg1w, arg2, arg2w);
1589 }
1590
1591 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw)
1592 {
1593         CHECK_ERROR();
1594         check_sljit_emit_fast_enter(compiler, dst, dstw);
1595         ADJUST_LOCAL_OFFSET(dst, dstw);
1596
1597         /* For UNUSED dst. Uncommon, but possible. */
1598         if (dst == SLJIT_UNUSED)
1599                 return SLJIT_SUCCESS;
1600
1601         if (FAST_IS_REG(dst))
1602                 return ADD(reg_map[dst], RA, ZERO);
1603
1604         /* Memory. */
1605         return emit_op_mem(compiler, WORD_DATA, RA, dst, dstw);
1606 }
1607
1608 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_si src, sljit_sw srcw)
1609 {
1610         CHECK_ERROR();
1611         check_sljit_emit_fast_return(compiler, src, srcw);
1612         ADJUST_LOCAL_OFFSET(src, srcw);
1613
1614         if (FAST_IS_REG(src))
1615                 FAIL_IF(ADD(RA, reg_map[src], ZERO));
1616
1617         else if (src & SLJIT_MEM)
1618                 FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, RA, src, srcw));
1619
1620         else if (src & SLJIT_IMM)
1621                 FAIL_IF(load_immediate(compiler, RA, srcw));
1622
1623         return JR(RA);
1624 }
1625
1626 static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, sljit_si op, sljit_si flags, sljit_si dst, sljit_si src1, sljit_sw src2)
1627 {
1628         sljit_si overflow_ra = 0;
1629
1630         switch (GET_OPCODE(op)) {
1631         case SLJIT_MOV:
1632         case SLJIT_MOV_P:
1633                 SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
1634                 if (dst != src2)
1635                         return ADD(reg_map[dst], reg_map[src2], ZERO);
1636                 return SLJIT_SUCCESS;
1637
1638         case SLJIT_MOV_UI:
1639         case SLJIT_MOV_SI:
1640                 SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
1641                 if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
1642                         if (op == SLJIT_MOV_SI)
1643                                 return BFEXTS(reg_map[dst], reg_map[src2], 0, 31);
1644
1645                 return BFEXTU(reg_map[dst], reg_map[src2], 0, 31);
1646                 } else if (dst != src2)
1647                         SLJIT_ASSERT_STOP();
1648
1649                 return SLJIT_SUCCESS;
1650
1651         case SLJIT_MOV_UB:
1652         case SLJIT_MOV_SB:
1653                 SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
1654                 if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
1655                         if (op == SLJIT_MOV_SB)
1656                                 return BFEXTS(reg_map[dst], reg_map[src2], 0, 7);
1657
1658                         return BFEXTU(reg_map[dst], reg_map[src2], 0, 7);
1659                 } else if (dst != src2)
1660                         SLJIT_ASSERT_STOP();
1661
1662                 return SLJIT_SUCCESS;
1663
1664         case SLJIT_MOV_UH:
1665         case SLJIT_MOV_SH:
1666                 SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
1667                 if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
1668                         if (op == SLJIT_MOV_SH)
1669                                 return BFEXTS(reg_map[dst], reg_map[src2], 0, 15);
1670
1671                         return BFEXTU(reg_map[dst], reg_map[src2], 0, 15);
1672                 } else if (dst != src2)
1673                         SLJIT_ASSERT_STOP();
1674
1675                 return SLJIT_SUCCESS;
1676
1677         case SLJIT_NOT:
1678                 SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
1679                 if (op & SLJIT_SET_E)
1680                         FAIL_IF(NOR(EQUAL_FLAG, reg_map[src2], reg_map[src2]));
1681                 if (CHECK_FLAGS(SLJIT_SET_E))
1682                         FAIL_IF(NOR(reg_map[dst], reg_map[src2], reg_map[src2]));
1683
1684                 return SLJIT_SUCCESS;
1685
1686         case SLJIT_CLZ:
1687                 SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
1688                 if (op & SLJIT_SET_E)
1689                         FAIL_IF(CLZ(EQUAL_FLAG, reg_map[src2]));
1690                 if (CHECK_FLAGS(SLJIT_SET_E))
1691                         FAIL_IF(CLZ(reg_map[dst], reg_map[src2]));
1692
1693                 return SLJIT_SUCCESS;
1694
1695         case SLJIT_ADD:
1696                 if (flags & SRC2_IMM) {
1697                         if (op & SLJIT_SET_O) {
1698                                 FAIL_IF(SHRUI(TMP_EREG1, reg_map[src1], 63));
1699                                 if (src2 < 0)
1700                                         FAIL_IF(XORI(TMP_EREG1, TMP_EREG1, 1));
1701                         }
1702
1703                         if (op & SLJIT_SET_E)
1704                                 FAIL_IF(ADDLI(EQUAL_FLAG, reg_map[src1], src2));
1705
1706                         if (op & SLJIT_SET_C) {
1707                                 if (src2 >= 0)
1708                                         FAIL_IF(ORI(ULESS_FLAG ,reg_map[src1], src2));
1709                                 else {
1710                                         FAIL_IF(ADDLI(ULESS_FLAG ,ZERO, src2));
1711                                         FAIL_IF(OR(ULESS_FLAG,reg_map[src1],ULESS_FLAG));
1712                                 }
1713                         }
1714
1715                         /* dst may be the same as src1 or src2. */
1716                         if (CHECK_FLAGS(SLJIT_SET_E))
1717                                 FAIL_IF(ADDLI(reg_map[dst], reg_map[src1], src2));
1718
1719                         if (op & SLJIT_SET_O) {
1720                                 FAIL_IF(SHRUI(OVERFLOW_FLAG, reg_map[dst], 63));
1721
1722                                 if (src2 < 0)
1723                                         FAIL_IF(XORI(OVERFLOW_FLAG, OVERFLOW_FLAG, 1));
1724                         }
1725                 } else {
1726                         if (op & SLJIT_SET_O) {
1727                                 FAIL_IF(XOR(TMP_EREG1, reg_map[src1], reg_map[src2]));
1728                                 FAIL_IF(SHRUI(TMP_EREG1, TMP_EREG1, 63));
1729
1730                                 if (src1 != dst)
1731                                         overflow_ra = reg_map[src1];
1732                                 else if (src2 != dst)
1733                                         overflow_ra = reg_map[src2];
1734                                 else {
1735                                         /* Rare ocasion. */
1736                                         FAIL_IF(ADD(TMP_EREG2, reg_map[src1], ZERO));
1737                                         overflow_ra = TMP_EREG2;
1738                                 }
1739                         }
1740
1741                         if (op & SLJIT_SET_E)
1742                                 FAIL_IF(ADD(EQUAL_FLAG ,reg_map[src1], reg_map[src2]));
1743
1744                         if (op & SLJIT_SET_C)
1745                                 FAIL_IF(OR(ULESS_FLAG,reg_map[src1], reg_map[src2]));
1746
1747                         /* dst may be the same as src1 or src2. */
1748                         if (CHECK_FLAGS(SLJIT_SET_E))
1749                                 FAIL_IF(ADD(reg_map[dst],reg_map[src1], reg_map[src2]));
1750
1751                         if (op & SLJIT_SET_O) {
1752                                 FAIL_IF(XOR(OVERFLOW_FLAG,reg_map[dst], overflow_ra));
1753                                 FAIL_IF(SHRUI(OVERFLOW_FLAG, OVERFLOW_FLAG, 63));
1754                         }
1755                 }
1756
1757                 /* a + b >= a | b (otherwise, the carry should be set to 1). */
1758                 if (op & SLJIT_SET_C)
1759                         FAIL_IF(CMPLTU(ULESS_FLAG ,reg_map[dst] ,ULESS_FLAG));
1760
1761                 if (op & SLJIT_SET_O)
1762                         return CMOVNEZ(OVERFLOW_FLAG, TMP_EREG1, ZERO);
1763
1764                 return SLJIT_SUCCESS;
1765
1766         case SLJIT_ADDC:
1767                 if (flags & SRC2_IMM) {
1768                         if (op & SLJIT_SET_C) {
1769                                 if (src2 >= 0)
1770                                         FAIL_IF(ORI(TMP_EREG1, reg_map[src1], src2));
1771                                 else {
1772                                         FAIL_IF(ADDLI(TMP_EREG1, ZERO, src2));
1773                                         FAIL_IF(OR(TMP_EREG1, reg_map[src1], TMP_EREG1));
1774                                 }
1775                         }
1776
1777                         FAIL_IF(ADDLI(reg_map[dst], reg_map[src1], src2));
1778
1779                 } else {
1780                         if (op & SLJIT_SET_C)
1781                                 FAIL_IF(OR(TMP_EREG1, reg_map[src1], reg_map[src2]));
1782
1783                         /* dst may be the same as src1 or src2. */
1784                         FAIL_IF(ADD(reg_map[dst], reg_map[src1], reg_map[src2]));
1785                 }
1786
1787                 if (op & SLJIT_SET_C)
1788                         FAIL_IF(CMPLTU(TMP_EREG1, reg_map[dst], TMP_EREG1));
1789
1790                 FAIL_IF(ADD(reg_map[dst], reg_map[dst], ULESS_FLAG));
1791
1792                 if (!(op & SLJIT_SET_C))
1793                         return SLJIT_SUCCESS;
1794
1795                 /* Set TMP_EREG2 (dst == 0) && (ULESS_FLAG == 1). */
1796                 FAIL_IF(CMPLTUI(TMP_EREG2, reg_map[dst], 1));
1797                 FAIL_IF(AND(TMP_EREG2, TMP_EREG2, ULESS_FLAG));
1798                 /* Set carry flag. */
1799                 return OR(ULESS_FLAG, TMP_EREG2, TMP_EREG1);
1800
1801         case SLJIT_SUB:
1802                 if ((flags & SRC2_IMM) && ((op & (SLJIT_SET_U | SLJIT_SET_S)) || src2 == SIMM_16BIT_MIN)) {
1803                         FAIL_IF(ADDLI(TMP_REG2_mapped, ZERO, src2));
1804                         src2 = TMP_REG2;
1805                         flags &= ~SRC2_IMM;
1806                 }
1807
1808                 if (flags & SRC2_IMM) {
1809                         if (op & SLJIT_SET_O) {
1810                                 FAIL_IF(SHRUI(TMP_EREG1,reg_map[src1], 63));
1811
1812                                 if (src2 < 0)
1813                                         FAIL_IF(XORI(TMP_EREG1, TMP_EREG1, 1));
1814
1815                                 if (src1 != dst)
1816                                         overflow_ra = reg_map[src1];
1817                                 else {
1818                                         /* Rare ocasion. */
1819                                         FAIL_IF(ADD(TMP_EREG2, reg_map[src1], ZERO));
1820         
1821                                         overflow_ra = TMP_EREG2;
1822                                 }
1823                         }
1824
1825                         if (op & SLJIT_SET_E)
1826                                 FAIL_IF(ADDLI(EQUAL_FLAG, reg_map[src1], -src2));
1827
1828                         if (op & SLJIT_SET_C) {
1829                                 FAIL_IF(load_immediate(compiler, ADDR_TMP_mapped, src2));
1830                                 FAIL_IF(CMPLTU(ULESS_FLAG, reg_map[src1], ADDR_TMP_mapped));
1831                         }
1832
1833                         /* dst may be the same as src1 or src2. */
1834                         if (CHECK_FLAGS(SLJIT_SET_E))
1835                                 FAIL_IF(ADDLI(reg_map[dst], reg_map[src1], -src2));
1836
1837                 } else {
1838
1839                         if (op & SLJIT_SET_O) {
1840                                 FAIL_IF(XOR(TMP_EREG1, reg_map[src1], reg_map[src2]));
1841                                 FAIL_IF(SHRUI(TMP_EREG1, TMP_EREG1, 63));
1842
1843                                 if (src1 != dst)
1844                                         overflow_ra = reg_map[src1];
1845                                 else {
1846                                         /* Rare ocasion. */
1847                                         FAIL_IF(ADD(TMP_EREG2, reg_map[src1], ZERO));
1848                                         overflow_ra = TMP_EREG2;
1849                                 }
1850                         }
1851
1852                         if (op & SLJIT_SET_E)
1853                                 FAIL_IF(SUB(EQUAL_FLAG, reg_map[src1], reg_map[src2]));
1854
1855                         if (op & (SLJIT_SET_U | SLJIT_SET_C))
1856                                 FAIL_IF(CMPLTU(ULESS_FLAG, reg_map[src1], reg_map[src2]));
1857
1858                         if (op & SLJIT_SET_U)
1859                                 FAIL_IF(CMPLTU(UGREATER_FLAG, reg_map[src2], reg_map[src1]));
1860
1861                         if (op & SLJIT_SET_S) {
1862                                 FAIL_IF(CMPLTS(LESS_FLAG ,reg_map[src1] ,reg_map[src2]));
1863                                 FAIL_IF(CMPLTS(GREATER_FLAG ,reg_map[src2] ,reg_map[src1]));
1864                         }
1865
1866                         /* dst may be the same as src1 or src2. */
1867                         if (CHECK_FLAGS(SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_C))
1868                                 FAIL_IF(SUB(reg_map[dst], reg_map[src1], reg_map[src2]));
1869                 }
1870
1871                 if (op & SLJIT_SET_O) {
1872                         FAIL_IF(XOR(OVERFLOW_FLAG, reg_map[dst], overflow_ra));
1873                         FAIL_IF(SHRUI(OVERFLOW_FLAG, OVERFLOW_FLAG, 63));
1874                         return CMOVEQZ(OVERFLOW_FLAG, TMP_EREG1, ZERO);
1875                 }
1876
1877                 return SLJIT_SUCCESS;
1878
1879         case SLJIT_SUBC:
1880                 if ((flags & SRC2_IMM) && src2 == SIMM_16BIT_MIN) {
1881                         FAIL_IF(ADDLI(TMP_REG2_mapped, ZERO, src2));
1882                         src2 = TMP_REG2;
1883                         flags &= ~SRC2_IMM;
1884                 }
1885
1886                 if (flags & SRC2_IMM) {
1887                         if (op & SLJIT_SET_C) {
1888                                 FAIL_IF(load_immediate(compiler, ADDR_TMP_mapped, -src2));
1889                                 FAIL_IF(CMPLTU(TMP_EREG1, reg_map[src1], ADDR_TMP_mapped));
1890                         }
1891
1892                         /* dst may be the same as src1 or src2. */
1893                         FAIL_IF(ADDLI(reg_map[dst], reg_map[src1], -src2));
1894
1895                 } else {
1896                         if (op & SLJIT_SET_C)
1897                                 FAIL_IF(CMPLTU(TMP_EREG1, reg_map[src1], reg_map[src2]));
1898                                 /* dst may be the same as src1 or src2. */
1899                         FAIL_IF(SUB(reg_map[dst], reg_map[src1], reg_map[src2]));
1900                 }
1901
1902                 if (op & SLJIT_SET_C)
1903                         FAIL_IF(CMOVEQZ(TMP_EREG1, reg_map[dst], ULESS_FLAG));
1904
1905                 FAIL_IF(SUB(reg_map[dst], reg_map[dst], ULESS_FLAG));
1906
1907                 if (op & SLJIT_SET_C)
1908                         FAIL_IF(ADD(ULESS_FLAG, TMP_EREG1, ZERO));
1909
1910                 return SLJIT_SUCCESS;
1911
1912 #define EMIT_LOGICAL(op_imm, op_norm) \
1913         if (flags & SRC2_IMM) { \
1914                 FAIL_IF(load_immediate(compiler, ADDR_TMP_mapped, src2)); \
1915                 if (op & SLJIT_SET_E) \
1916                         FAIL_IF(push_3_buffer( \
1917                                 compiler, op_norm, EQUAL_FLAG, reg_map[src1], \
1918                                 ADDR_TMP_mapped, __LINE__)); \
1919                 if (CHECK_FLAGS(SLJIT_SET_E)) \
1920                         FAIL_IF(push_3_buffer( \
1921                                 compiler, op_norm, reg_map[dst], reg_map[src1], \
1922                                 ADDR_TMP_mapped, __LINE__)); \
1923         } else { \
1924                 if (op & SLJIT_SET_E) \
1925                         FAIL_IF(push_3_buffer( \
1926                                 compiler, op_norm, EQUAL_FLAG, reg_map[src1], \
1927                                 reg_map[src2], __LINE__)); \
1928                 if (CHECK_FLAGS(SLJIT_SET_E)) \
1929                         FAIL_IF(push_3_buffer( \
1930                                 compiler, op_norm, reg_map[dst], reg_map[src1], \
1931                                 reg_map[src2], __LINE__)); \
1932         }
1933
1934         case SLJIT_AND:
1935                 EMIT_LOGICAL(TILEGX_OPC_ANDI, TILEGX_OPC_AND);
1936                 return SLJIT_SUCCESS;
1937
1938         case SLJIT_OR:
1939                 EMIT_LOGICAL(TILEGX_OPC_ORI, TILEGX_OPC_OR);
1940                 return SLJIT_SUCCESS;
1941
1942         case SLJIT_XOR:
1943                 EMIT_LOGICAL(TILEGX_OPC_XORI, TILEGX_OPC_XOR);
1944                 return SLJIT_SUCCESS;
1945
1946 #define EMIT_SHIFT(op_imm, op_norm) \
1947         if (flags & SRC2_IMM) { \
1948                 if (op & SLJIT_SET_E) \
1949                         FAIL_IF(push_3_buffer( \
1950                                 compiler, op_imm, EQUAL_FLAG, reg_map[src1], \
1951                                 src2 & 0x3F, __LINE__)); \
1952                 if (CHECK_FLAGS(SLJIT_SET_E)) \
1953                         FAIL_IF(push_3_buffer( \
1954                                 compiler, op_imm, reg_map[dst], reg_map[src1], \
1955                                 src2 & 0x3F, __LINE__)); \
1956         } else { \
1957                 if (op & SLJIT_SET_E) \
1958                         FAIL_IF(push_3_buffer( \
1959                                 compiler, op_imm, reg_map[dst], reg_map[src1], \
1960                                 src2 & 0x3F, __LINE__)); \
1961                 if (CHECK_FLAGS(SLJIT_SET_E)) \
1962                         FAIL_IF(push_3_buffer( \
1963                                 compiler, op_norm, reg_map[dst], reg_map[src1], \
1964                                 reg_map[src2], __LINE__)); \
1965         }
1966
1967         case SLJIT_SHL:
1968                 EMIT_SHIFT(TILEGX_OPC_SHLI, TILEGX_OPC_SHL);
1969                 return SLJIT_SUCCESS;
1970
1971         case SLJIT_LSHR:
1972                 EMIT_SHIFT(TILEGX_OPC_SHRUI, TILEGX_OPC_SHRU);
1973                 return SLJIT_SUCCESS;
1974
1975         case SLJIT_ASHR:
1976                 EMIT_SHIFT(TILEGX_OPC_SHRSI, TILEGX_OPC_SHRS);
1977                 return SLJIT_SUCCESS;
1978         }
1979
1980         SLJIT_ASSERT_STOP();
1981         return SLJIT_SUCCESS;
1982 }
1983
1984 static sljit_si emit_op(struct sljit_compiler *compiler, sljit_si op, sljit_si flags, sljit_si dst, sljit_sw dstw, sljit_si src1, sljit_sw src1w, sljit_si src2, sljit_sw src2w)
1985 {
1986         /* arg1 goes to TMP_REG1 or src reg.
1987            arg2 goes to TMP_REG2, imm or src reg.
1988            TMP_REG3 can be used for caching.
1989            result goes to TMP_REG2, so put result can use TMP_REG1 and TMP_REG3. */
1990         sljit_si dst_r = TMP_REG2;
1991         sljit_si src1_r;
1992         sljit_sw src2_r = 0;
1993         sljit_si sugg_src2_r = TMP_REG2;
1994
1995         if (!(flags & ALT_KEEP_CACHE)) {
1996                 compiler->cache_arg = 0;
1997                 compiler->cache_argw = 0;
1998         }
1999
2000         if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) {
2001                 if (op >= SLJIT_MOV && op <= SLJIT_MOVU_SI && !(src2 & SLJIT_MEM))
2002                         return SLJIT_SUCCESS;
2003                 if (GET_FLAGS(op))
2004                         flags |= UNUSED_DEST;
2005         } else if (FAST_IS_REG(dst)) {
2006                 dst_r = dst;
2007                 flags |= REG_DEST;
2008                 if (op >= SLJIT_MOV && op <= SLJIT_MOVU_SI)
2009                         sugg_src2_r = dst_r;
2010         } else if ((dst & SLJIT_MEM) && !getput_arg_fast(compiler, flags | ARG_TEST, TMP_REG1_mapped, dst, dstw))
2011                 flags |= SLOW_DEST;
2012
2013         if (flags & IMM_OP) {
2014                 if ((src2 & SLJIT_IMM) && src2w) {
2015                         if ((!(flags & LOGICAL_OP)
2016                                         && (src2w <= SIMM_16BIT_MAX && src2w >= SIMM_16BIT_MIN))
2017                                         || ((flags & LOGICAL_OP) && !(src2w & ~UIMM_16BIT_MAX))) {
2018                                 flags |= SRC2_IMM;
2019                                 src2_r = src2w;
2020                         }
2021                 }
2022
2023                 if (!(flags & SRC2_IMM) && (flags & CUMULATIVE_OP) && (src1 & SLJIT_IMM) && src1w) {
2024                         if ((!(flags & LOGICAL_OP)
2025                                         && (src1w <= SIMM_16BIT_MAX && src1w >= SIMM_16BIT_MIN))
2026                                         || ((flags & LOGICAL_OP) && !(src1w & ~UIMM_16BIT_MAX))) {
2027                                 flags |= SRC2_IMM;
2028                                 src2_r = src1w;
2029
2030                                 /* And swap arguments. */
2031                                 src1 = src2;
2032                                 src1w = src2w;
2033                                 src2 = SLJIT_IMM;
2034                                 /* src2w = src2_r unneeded. */
2035                         }
2036                 }
2037         }
2038
2039         /* Source 1. */
2040         if (FAST_IS_REG(src1)) {
2041                 src1_r = src1;
2042                 flags |= REG1_SOURCE;
2043         } else if (src1 & SLJIT_IMM) {
2044                 if (src1w) {
2045                         FAIL_IF(load_immediate(compiler, TMP_REG1_mapped, src1w));
2046                         src1_r = TMP_REG1;
2047                 } else
2048                         src1_r = 0;
2049         } else {
2050                 if (getput_arg_fast(compiler, flags | LOAD_DATA, TMP_REG1_mapped, src1, src1w))
2051                         FAIL_IF(compiler->error);
2052                 else
2053                         flags |= SLOW_SRC1;
2054                 src1_r = TMP_REG1;
2055         }
2056
2057         /* Source 2. */
2058         if (FAST_IS_REG(src2)) {
2059                 src2_r = src2;
2060                 flags |= REG2_SOURCE;
2061                 if (!(flags & REG_DEST) && op >= SLJIT_MOV && op <= SLJIT_MOVU_SI)
2062                         dst_r = src2_r;
2063         } else if (src2 & SLJIT_IMM) {
2064                 if (!(flags & SRC2_IMM)) {
2065                         if (src2w) {
2066                                 FAIL_IF(load_immediate(compiler, reg_map[sugg_src2_r], src2w));
2067                                 src2_r = sugg_src2_r;
2068                         } else {
2069                                 src2_r = 0;
2070                                 if ((op >= SLJIT_MOV && op <= SLJIT_MOVU_SI) && (dst & SLJIT_MEM))
2071                                         dst_r = 0;
2072                         }
2073                 }
2074         } else {
2075                 if (getput_arg_fast(compiler, flags | LOAD_DATA, reg_map[sugg_src2_r], src2, src2w))
2076                         FAIL_IF(compiler->error);
2077                 else
2078                         flags |= SLOW_SRC2;
2079                 src2_r = sugg_src2_r;
2080         }
2081
2082         if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) {
2083                 SLJIT_ASSERT(src2_r == TMP_REG2);
2084                 if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) {
2085                         FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG2_mapped, src2, src2w, src1, src1w));
2086                         FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1_mapped, src1, src1w, dst, dstw));
2087                 } else {
2088                         FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1_mapped, src1, src1w, src2, src2w));
2089                         FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG2_mapped, src2, src2w, dst, dstw));
2090                 }
2091         } else if (flags & SLOW_SRC1)
2092                 FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1_mapped, src1, src1w, dst, dstw));
2093         else if (flags & SLOW_SRC2)
2094                 FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, reg_map[sugg_src2_r], src2, src2w, dst, dstw));
2095
2096         FAIL_IF(emit_single_op(compiler, op, flags, dst_r, src1_r, src2_r));
2097
2098         if (dst & SLJIT_MEM) {
2099                 if (!(flags & SLOW_DEST)) {
2100                         getput_arg_fast(compiler, flags, reg_map[dst_r], dst, dstw);
2101                         return compiler->error;
2102                 }
2103
2104                 return getput_arg(compiler, flags, reg_map[dst_r], dst, dstw, 0, 0);
2105         }
2106
2107         return SLJIT_SUCCESS;
2108 }
2109
2110 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_si op, sljit_si dst, sljit_sw dstw, sljit_si src, sljit_sw srcw, sljit_si type)
2111 {
2112         sljit_si sugg_dst_ar, dst_ar;
2113         sljit_si flags = GET_ALL_FLAGS(op);
2114
2115         CHECK_ERROR();
2116         check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type);
2117         ADJUST_LOCAL_OFFSET(dst, dstw);
2118
2119         if (dst == SLJIT_UNUSED)
2120                 return SLJIT_SUCCESS;
2121
2122         op = GET_OPCODE(op);
2123         sugg_dst_ar = reg_map[(op < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG2];
2124
2125         compiler->cache_arg = 0;
2126         compiler->cache_argw = 0;
2127         if (op >= SLJIT_ADD && (src & SLJIT_MEM)) {
2128                 ADJUST_LOCAL_OFFSET(src, srcw);
2129                 FAIL_IF(emit_op_mem2(compiler, WORD_DATA | LOAD_DATA, TMP_REG1_mapped, src, srcw, dst, dstw));
2130                 src = TMP_REG1;
2131                 srcw = 0;
2132         }
2133
2134         switch (type) {
2135         case SLJIT_C_EQUAL:
2136         case SLJIT_C_NOT_EQUAL:
2137                 FAIL_IF(CMPLTUI(sugg_dst_ar, EQUAL_FLAG, 1));
2138                 dst_ar = sugg_dst_ar;
2139                 break;
2140         case SLJIT_C_LESS:
2141         case SLJIT_C_GREATER_EQUAL:
2142         case SLJIT_C_FLOAT_LESS:
2143         case SLJIT_C_FLOAT_GREATER_EQUAL:
2144                 dst_ar = ULESS_FLAG;
2145                 break;
2146         case SLJIT_C_GREATER:
2147         case SLJIT_C_LESS_EQUAL:
2148         case SLJIT_C_FLOAT_GREATER:
2149         case SLJIT_C_FLOAT_LESS_EQUAL:
2150                 dst_ar = UGREATER_FLAG;
2151                 break;
2152         case SLJIT_C_SIG_LESS:
2153         case SLJIT_C_SIG_GREATER_EQUAL:
2154                 dst_ar = LESS_FLAG;
2155                 break;
2156         case SLJIT_C_SIG_GREATER:
2157         case SLJIT_C_SIG_LESS_EQUAL:
2158                 dst_ar = GREATER_FLAG;
2159                 break;
2160         case SLJIT_C_OVERFLOW:
2161         case SLJIT_C_NOT_OVERFLOW:
2162                 dst_ar = OVERFLOW_FLAG;
2163                 break;
2164         case SLJIT_C_MUL_OVERFLOW:
2165         case SLJIT_C_MUL_NOT_OVERFLOW:
2166                 FAIL_IF(CMPLTUI(sugg_dst_ar, OVERFLOW_FLAG, 1));
2167                 dst_ar = sugg_dst_ar;
2168                 type ^= 0x1; /* Flip type bit for the XORI below. */
2169                 break;
2170         case SLJIT_C_FLOAT_EQUAL:
2171         case SLJIT_C_FLOAT_NOT_EQUAL:
2172                 dst_ar = EQUAL_FLAG;
2173                 break;
2174
2175         default:
2176                 SLJIT_ASSERT_STOP();
2177                 dst_ar = sugg_dst_ar;
2178                 break;
2179         }
2180
2181         if (type & 0x1) {
2182                 FAIL_IF(XORI(sugg_dst_ar, dst_ar, 1));
2183                 dst_ar = sugg_dst_ar;
2184         }
2185
2186         if (op >= SLJIT_ADD) {
2187                 if (TMP_REG2_mapped != dst_ar)
2188                         FAIL_IF(ADD(TMP_REG2_mapped, dst_ar, ZERO));
2189                 return emit_op(compiler, op | flags, CUMULATIVE_OP | LOGICAL_OP | IMM_OP | ALT_KEEP_CACHE, dst, dstw, src, srcw, TMP_REG2, 0);
2190         }
2191
2192         if (dst & SLJIT_MEM)
2193                 return emit_op_mem(compiler, WORD_DATA, dst_ar, dst, dstw);
2194
2195         if (sugg_dst_ar != dst_ar)
2196                 return ADD(sugg_dst_ar, dst_ar, ZERO);
2197
2198         return SLJIT_SUCCESS;
2199 }
2200
2201 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler, sljit_si op) {
2202         CHECK_ERROR();
2203         check_sljit_emit_op0(compiler, op);
2204
2205         op = GET_OPCODE(op);
2206         switch (op) {
2207         case SLJIT_NOP:
2208                 return push_0_buffer(compiler, TILEGX_OPC_FNOP, __LINE__);
2209
2210         case SLJIT_BREAKPOINT:
2211                 return PI(BPT);
2212
2213         case SLJIT_UMUL:
2214         case SLJIT_SMUL:
2215         case SLJIT_UDIV:
2216         case SLJIT_SDIV:
2217                 SLJIT_ASSERT_STOP();
2218         }
2219
2220         return SLJIT_SUCCESS;
2221 }
2222
2223 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler, sljit_si op, sljit_si dst, sljit_sw dstw, sljit_si src, sljit_sw srcw)
2224 {
2225         CHECK_ERROR();
2226         check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw);
2227         ADJUST_LOCAL_OFFSET(dst, dstw);
2228         ADJUST_LOCAL_OFFSET(src, srcw);
2229
2230         switch (GET_OPCODE(op)) {
2231         case SLJIT_MOV:
2232         case SLJIT_MOV_P:
2233                 return emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, src, srcw);
2234
2235         case SLJIT_MOV_UI:
2236                 return emit_op(compiler, SLJIT_MOV_UI, INT_DATA, dst, dstw, TMP_REG1, 0, src, srcw);
2237
2238         case SLJIT_MOV_SI:
2239                 return emit_op(compiler, SLJIT_MOV_SI, INT_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, srcw);
2240
2241         case SLJIT_MOV_UB:
2242                 return emit_op(compiler, SLJIT_MOV_UB, BYTE_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_ub) srcw : srcw);
2243
2244         case SLJIT_MOV_SB:
2245                 return emit_op(compiler, SLJIT_MOV_SB, BYTE_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_sb) srcw : srcw);
2246
2247         case SLJIT_MOV_UH:
2248                 return emit_op(compiler, SLJIT_MOV_UH, HALF_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_uh) srcw : srcw);
2249
2250         case SLJIT_MOV_SH:
2251                 return emit_op(compiler, SLJIT_MOV_SH, HALF_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_sh) srcw : srcw);
2252
2253         case SLJIT_MOVU:
2254         case SLJIT_MOVU_P:
2255                 return emit_op(compiler, SLJIT_MOV, WORD_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw);
2256
2257         case SLJIT_MOVU_UI:
2258                 return emit_op(compiler, SLJIT_MOV_UI, INT_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw);
2259
2260         case SLJIT_MOVU_SI:
2261                 return emit_op(compiler, SLJIT_MOV_SI, INT_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw);
2262
2263         case SLJIT_MOVU_UB:
2264                 return emit_op(compiler, SLJIT_MOV_UB, BYTE_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_ub) srcw : srcw);
2265
2266         case SLJIT_MOVU_SB:
2267                 return emit_op(compiler, SLJIT_MOV_SB, BYTE_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_sb) srcw : srcw);
2268
2269         case SLJIT_MOVU_UH:
2270                 return emit_op(compiler, SLJIT_MOV_UH, HALF_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_uh) srcw : srcw);
2271
2272         case SLJIT_MOVU_SH:
2273                 return emit_op(compiler, SLJIT_MOV_SH, HALF_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_sh) srcw : srcw);
2274
2275         case SLJIT_NOT:
2276                 return emit_op(compiler, op, 0, dst, dstw, TMP_REG1, 0, src, srcw);
2277
2278         case SLJIT_NEG:
2279                 return emit_op(compiler, SLJIT_SUB | GET_ALL_FLAGS(op), IMM_OP, dst, dstw, SLJIT_IMM, 0, src, srcw);
2280
2281         case SLJIT_CLZ:
2282                 return emit_op(compiler, op, 0, dst, dstw, TMP_REG1, 0, src, srcw);
2283         }
2284
2285         return SLJIT_SUCCESS;
2286 }
2287
2288 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler, sljit_si op, sljit_si dst, sljit_sw dstw, sljit_si src1, sljit_sw src1w, sljit_si src2, sljit_sw src2w)
2289 {
2290         CHECK_ERROR();
2291         check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
2292         ADJUST_LOCAL_OFFSET(dst, dstw);
2293         ADJUST_LOCAL_OFFSET(src1, src1w);
2294         ADJUST_LOCAL_OFFSET(src2, src2w);
2295
2296         switch (GET_OPCODE(op)) {
2297         case SLJIT_ADD:
2298         case SLJIT_ADDC:
2299                 return emit_op(compiler, op, CUMULATIVE_OP | IMM_OP, dst, dstw, src1, src1w, src2, src2w);
2300
2301         case SLJIT_SUB:
2302         case SLJIT_SUBC:
2303                 return emit_op(compiler, op, IMM_OP, dst, dstw, src1, src1w, src2, src2w);
2304
2305         case SLJIT_MUL:
2306                 return emit_op(compiler, op, CUMULATIVE_OP, dst, dstw, src1, src1w, src2, src2w);
2307
2308         case SLJIT_AND:
2309         case SLJIT_OR:
2310         case SLJIT_XOR:
2311                 return emit_op(compiler, op, CUMULATIVE_OP | LOGICAL_OP | IMM_OP, dst, dstw, src1, src1w, src2, src2w);
2312
2313         case SLJIT_SHL:
2314         case SLJIT_LSHR:
2315         case SLJIT_ASHR:
2316                 if (src2 & SLJIT_IMM)
2317                         src2w &= 0x3f;
2318                 if (op & SLJIT_INT_OP)
2319                         src2w &= 0x1f;
2320
2321                 return emit_op(compiler, op, IMM_OP, dst, dstw, src1, src1w, src2, src2w);
2322         }
2323
2324         return SLJIT_SUCCESS;
2325 }
2326
2327 SLJIT_API_FUNC_ATTRIBUTE struct sljit_label * sljit_emit_label(struct sljit_compiler *compiler)
2328 {
2329         struct sljit_label *label;
2330
2331         flush_buffer(compiler);
2332
2333         CHECK_ERROR_PTR();
2334         check_sljit_emit_label(compiler);
2335
2336         if (compiler->last_label && compiler->last_label->size == compiler->size)
2337                 return compiler->last_label;
2338
2339         label = (struct sljit_label *)ensure_abuf(compiler, sizeof(struct sljit_label));
2340         PTR_FAIL_IF(!label);
2341         set_label(label, compiler);
2342         return label;
2343 }
2344
2345 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compiler, sljit_si type, sljit_si src, sljit_sw srcw)
2346 {
2347         sljit_si src_r = TMP_REG2;
2348         struct sljit_jump *jump = NULL;
2349
2350         flush_buffer(compiler);
2351
2352         CHECK_ERROR();
2353         check_sljit_emit_ijump(compiler, type, src, srcw);
2354         ADJUST_LOCAL_OFFSET(src, srcw);
2355
2356         if (FAST_IS_REG(src)) {
2357                 if (reg_map[src] != 0)
2358                         src_r = src;
2359                 else
2360                         FAIL_IF(ADD_SOLO(TMP_REG2_mapped, reg_map[src], ZERO));
2361         }
2362
2363         if (type >= SLJIT_CALL0) {
2364                 SLJIT_ASSERT(reg_map[PIC_ADDR_REG] == 16 && PIC_ADDR_REG == TMP_REG2);
2365                 if (src & (SLJIT_IMM | SLJIT_MEM)) {
2366                         if (src & SLJIT_IMM)
2367                                 FAIL_IF(emit_const(compiler, reg_map[PIC_ADDR_REG], srcw, 1));
2368                         else {
2369                                 SLJIT_ASSERT(src_r == TMP_REG2 && (src & SLJIT_MEM));
2370                                 FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, TMP_REG2, 0, TMP_REG1, 0, src, srcw));
2371                         }
2372
2373                         FAIL_IF(ADD_SOLO(0, reg_map[SLJIT_SCRATCH_REG1], ZERO));
2374
2375                         FAIL_IF(ADDI_SOLO(54, 54, -16));
2376
2377                         FAIL_IF(JALR_SOLO(reg_map[PIC_ADDR_REG]));
2378
2379                         return ADDI_SOLO(54, 54, 16);
2380                 }
2381
2382                 /* Register input. */
2383                 if (type >= SLJIT_CALL1)
2384                         FAIL_IF(ADD_SOLO(0, reg_map[SLJIT_SCRATCH_REG1], ZERO));
2385
2386                 FAIL_IF(ADD_SOLO(reg_map[PIC_ADDR_REG], reg_map[src_r], ZERO));
2387
2388                 FAIL_IF(ADDI_SOLO(54, 54, -16));
2389
2390                 FAIL_IF(JALR_SOLO(reg_map[src_r]));
2391
2392                 return ADDI_SOLO(54, 54, 16);
2393         }
2394
2395         if (src & SLJIT_IMM) {
2396                 jump = (struct sljit_jump *)ensure_abuf(compiler, sizeof(struct sljit_jump));
2397                 FAIL_IF(!jump);
2398                 set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_FAST_CALL) ? IS_JAL : 0));
2399                 jump->u.target = srcw;
2400                 FAIL_IF(emit_const(compiler, TMP_REG2_mapped, 0, 1));
2401
2402                 if (type >= SLJIT_FAST_CALL) {
2403                         FAIL_IF(ADD_SOLO(ZERO, ZERO, ZERO));
2404                         jump->addr = compiler->size;
2405                         FAIL_IF(JR_SOLO(reg_map[src_r]));
2406                 } else {
2407                         jump->addr = compiler->size;
2408                         FAIL_IF(JR_SOLO(reg_map[src_r]));
2409                 }
2410
2411                 return SLJIT_SUCCESS;
2412
2413         } else if (src & SLJIT_MEM)
2414                 FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, TMP_REG2, 0, TMP_REG1, 0, src, srcw));
2415
2416         FAIL_IF(JR_SOLO(reg_map[src_r]));
2417
2418         if (jump)
2419                 jump->addr = compiler->size;
2420
2421         return SLJIT_SUCCESS;
2422 }
2423
2424 #define BR_Z(src) \
2425         inst = BEQZ_X1 | SRCA_X1(src); \
2426         flags = IS_COND;
2427
2428 #define BR_NZ(src) \
2429         inst = BNEZ_X1 | SRCA_X1(src); \
2430         flags = IS_COND;
2431
2432 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump * sljit_emit_jump(struct sljit_compiler *compiler, sljit_si type)
2433 {
2434         struct sljit_jump *jump;
2435         sljit_ins inst;
2436         sljit_si flags = 0;
2437
2438         flush_buffer(compiler);
2439
2440         CHECK_ERROR_PTR();
2441         check_sljit_emit_jump(compiler, type);
2442
2443         jump = (struct sljit_jump *)ensure_abuf(compiler, sizeof(struct sljit_jump));
2444         PTR_FAIL_IF(!jump);
2445         set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
2446         type &= 0xff;
2447
2448         switch (type) {
2449         case SLJIT_C_EQUAL:
2450         case SLJIT_C_FLOAT_NOT_EQUAL:
2451                 BR_NZ(EQUAL_FLAG);
2452                 break;
2453         case SLJIT_C_NOT_EQUAL:
2454         case SLJIT_C_FLOAT_EQUAL:
2455                 BR_Z(EQUAL_FLAG);
2456                 break;
2457         case SLJIT_C_LESS:
2458         case SLJIT_C_FLOAT_LESS:
2459                 BR_Z(ULESS_FLAG);
2460                 break;
2461         case SLJIT_C_GREATER_EQUAL:
2462         case SLJIT_C_FLOAT_GREATER_EQUAL:
2463                 BR_NZ(ULESS_FLAG);
2464                 break;
2465         case SLJIT_C_GREATER:
2466         case SLJIT_C_FLOAT_GREATER:
2467                 BR_Z(UGREATER_FLAG);
2468                 break;
2469         case SLJIT_C_LESS_EQUAL:
2470         case SLJIT_C_FLOAT_LESS_EQUAL:
2471                 BR_NZ(UGREATER_FLAG);
2472                 break;
2473         case SLJIT_C_SIG_LESS:
2474                 BR_Z(LESS_FLAG);
2475                 break;
2476         case SLJIT_C_SIG_GREATER_EQUAL:
2477                 BR_NZ(LESS_FLAG);
2478                 break;
2479         case SLJIT_C_SIG_GREATER:
2480                 BR_Z(GREATER_FLAG);
2481                 break;
2482         case SLJIT_C_SIG_LESS_EQUAL:
2483                 BR_NZ(GREATER_FLAG);
2484                 break;
2485         case SLJIT_C_OVERFLOW:
2486         case SLJIT_C_MUL_OVERFLOW:
2487                 BR_Z(OVERFLOW_FLAG);
2488                 break;
2489         case SLJIT_C_NOT_OVERFLOW:
2490         case SLJIT_C_MUL_NOT_OVERFLOW:
2491                 BR_NZ(OVERFLOW_FLAG);
2492                 break;
2493         default:
2494                 /* Not conditional branch. */
2495                 inst = 0;
2496                 break;
2497         }
2498
2499         jump->flags |= flags;
2500
2501         if (inst) {
2502                 inst = inst | ((type <= SLJIT_JUMP) ? BOFF_X1(5) : BOFF_X1(6));
2503                 PTR_FAIL_IF(PI(inst));
2504         }
2505
2506         PTR_FAIL_IF(emit_const(compiler, TMP_REG2_mapped, 0, 1));
2507         if (type <= SLJIT_JUMP) {
2508                 jump->addr = compiler->size;
2509                 PTR_FAIL_IF(JR_SOLO(TMP_REG2_mapped));
2510         } else {
2511                 SLJIT_ASSERT(reg_map[PIC_ADDR_REG] == 16 && PIC_ADDR_REG == TMP_REG2);
2512                 /* Cannot be optimized out if type is >= CALL0. */
2513                 jump->flags |= IS_JAL | (type >= SLJIT_CALL0 ? SLJIT_REWRITABLE_JUMP : 0);
2514                 PTR_FAIL_IF(ADD_SOLO(0, reg_map[SLJIT_SCRATCH_REG1], ZERO));
2515                 jump->addr = compiler->size;
2516                 PTR_FAIL_IF(JALR_SOLO(TMP_REG2_mapped));
2517         }
2518
2519         return jump;
2520 }
2521
2522 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void)
2523 {
2524         return 0;
2525 }
2526
2527 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op, sljit_si dst, sljit_sw dstw, sljit_si src, sljit_sw srcw)
2528 {
2529         SLJIT_ASSERT_STOP();
2530 }
2531
2532 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compiler, sljit_si op, sljit_si dst, sljit_sw dstw, sljit_si src1, sljit_sw src1w, sljit_si src2, sljit_sw src2w)
2533 {
2534         SLJIT_ASSERT_STOP();
2535 }
2536
2537 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const * sljit_emit_const(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw init_value)
2538 {
2539         struct sljit_const *const_;
2540         sljit_si reg;
2541
2542         flush_buffer(compiler);
2543
2544         CHECK_ERROR_PTR();
2545         check_sljit_emit_const(compiler, dst, dstw, init_value);
2546         ADJUST_LOCAL_OFFSET(dst, dstw);
2547
2548         const_ = (struct sljit_const *)ensure_abuf(compiler, sizeof(struct sljit_const));
2549         PTR_FAIL_IF(!const_);
2550         set_const(const_, compiler);
2551
2552         reg = FAST_IS_REG(dst) ? dst : TMP_REG2;
2553
2554         PTR_FAIL_IF(emit_const_64(compiler, reg, init_value, 1));
2555
2556         if (dst & SLJIT_MEM)
2557                 PTR_FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, TMP_REG2, 0));
2558         return const_;
2559 }
2560
2561 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr)
2562 {
2563         sljit_ins *inst = (sljit_ins *)addr;
2564
2565         inst[0] = (inst[0] & ~(0xFFFFL << 43)) | (((new_addr >> 32) & 0xffff) << 43);
2566         inst[1] = (inst[1] & ~(0xFFFFL << 43)) | (((new_addr >> 16) & 0xffff) << 43);
2567         inst[2] = (inst[2] & ~(0xFFFFL << 43)) | ((new_addr & 0xffff) << 43);
2568         SLJIT_CACHE_FLUSH(inst, inst + 3);
2569 }
2570
2571 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant)
2572 {
2573         sljit_ins *inst = (sljit_ins *)addr;
2574
2575         inst[0] = (inst[0] & ~(0xFFFFL << 43)) | (((new_constant >> 48) & 0xFFFFL) << 43);
2576         inst[1] = (inst[1] & ~(0xFFFFL << 43)) | (((new_constant >> 32) & 0xFFFFL) << 43);
2577         inst[2] = (inst[2] & ~(0xFFFFL << 43)) | (((new_constant >> 16) & 0xFFFFL) << 43);
2578         inst[3] = (inst[3] & ~(0xFFFFL << 43)) | ((new_constant & 0xFFFFL) << 43);
2579         SLJIT_CACHE_FLUSH(inst, inst + 4);
2580 }