chiark / gitweb /
Record pcre3 (2:8.38-1) in archive suite sid
[pcre3.git] / sljit / sljitNativeARM_32.c
index 6747c4f61748b0adc9e20602da174a7ce1b097fa..5cd4c71a298f6f1891fb05291b1c8ada65cebc00 100644 (file)
@@ -36,13 +36,13 @@ SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void)
 }
 
 /* Last register + 1. */
-#define TMP_REG1       (SLJIT_NO_REGISTERS + 1)
-#define TMP_REG2       (SLJIT_NO_REGISTERS + 2)
-#define TMP_REG3       (SLJIT_NO_REGISTERS + 3)
-#define TMP_PC         (SLJIT_NO_REGISTERS + 4)
+#define TMP_REG1       (SLJIT_NUMBER_OF_REGISTERS + 2)
+#define TMP_REG2       (SLJIT_NUMBER_OF_REGISTERS + 3)
+#define TMP_REG3       (SLJIT_NUMBER_OF_REGISTERS + 4)
+#define TMP_PC         (SLJIT_NUMBER_OF_REGISTERS + 5)
 
 #define TMP_FREG1      (0)
-#define TMP_FREG2      (SLJIT_FLOAT_REG6 + 1)
+#define TMP_FREG2      (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)
 
 /* In ARM instruction words.
    Cache lines are usually 32 byte aligned. */
@@ -55,8 +55,8 @@ SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void)
        (((max_diff) / (sljit_si)sizeof(sljit_uw)) - (CONST_POOL_ALIGNMENT - 1))
 
 /* See sljit_emit_enter and sljit_emit_op0 if you want to change them. */
-static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 5] = {
-       0, 0, 1, 2, 10, 11, 4, 5, 6, 7, 8, 13, 3, 12, 14, 15
+static SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 6] = {
+       0, 0, 1, 2, 11, 10, 9, 8, 7, 6, 5, 4, 13, 3, 12, 14, 15
 };
 
 #define RM(rm) (reg_map[rm])
@@ -102,8 +102,12 @@ static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 5] = {
 #define VABS_F32       0xeeb00ac0
 #define VADD_F32       0xee300a00
 #define VCMP_F32       0xeeb40a40
+#define VCVT_F32_S32   0xeeb80ac0
+#define VCVT_F64_F32   0xeeb70ac0
+#define VCVT_S32_F32   0xeebd0ac0
 #define VDIV_F32       0xee800a00
 #define VMOV_F32       0xeeb00a40
+#define VMOV           0xee000a10
 #define VMRS           0xeef1fa10
 #define VMUL_F32       0xee200a00
 #define VNEG_F32       0xeeb10a40
@@ -311,11 +315,13 @@ struct future_patch {
        sljit_si value;
 };
 
-static SLJIT_INLINE sljit_si resolve_const_pool_index(struct future_patch **first_patch, sljit_uw cpool_current_index, sljit_uw *cpool_start_address, sljit_uw *buf_ptr)
+static sljit_si resolve_const_pool_index(struct sljit_compiler *compiler, struct future_patch **first_patch, sljit_uw cpool_current_index, sljit_uw *cpool_start_address, sljit_uw *buf_ptr)
 {
        sljit_si value;
        struct future_patch *curr_patch, *prev_patch;
 
+       SLJIT_UNUSED_ARG(compiler);
+
        /* Using the values generated by patch_pc_relative_loads. */
        if (!*first_patch)
                value = (sljit_si)cpool_start_address[cpool_current_index];
@@ -333,7 +339,7 @@ static SLJIT_INLINE sljit_si resolve_const_pool_index(struct future_patch **firs
                                        prev_patch->next = curr_patch->next;
                                else
                                        *first_patch = curr_patch->next;
-                               SLJIT_FREE(curr_patch);
+                               SLJIT_FREE(curr_patch, compiler->allocator_data);
                                break;
                        }
                        prev_patch = curr_patch;
@@ -343,12 +349,12 @@ static SLJIT_INLINE sljit_si resolve_const_pool_index(struct future_patch **firs
 
        if (value >= 0) {
                if ((sljit_uw)value > cpool_current_index) {
-                       curr_patch = (struct future_patch*)SLJIT_MALLOC(sizeof(struct future_patch));
+                       curr_patch = (struct future_patch*)SLJIT_MALLOC(sizeof(struct future_patch), compiler->allocator_data);
                        if (!curr_patch) {
                                while (*first_patch) {
                                        curr_patch = *first_patch;
                                        *first_patch = (*first_patch)->next;
-                                       SLJIT_FREE(curr_patch);
+                                       SLJIT_FREE(curr_patch, compiler->allocator_data);
                                }
                                return SLJIT_ERR_ALLOC_FAILED;
                        }
@@ -570,7 +576,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
        struct sljit_const *const_;
 
        CHECK_ERROR_PTR();
-       check_sljit_generate_code(compiler);
+       CHECK_PTR(check_sljit_generate_code(compiler));
        reverse_buf(compiler);
 
        /* Second code generation pass. */
@@ -619,7 +625,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
                                        cpool_skip_alignment--;
                                }
                                else {
-                                       if (SLJIT_UNLIKELY(resolve_const_pool_index(&first_patch, cpool_current_index, cpool_start_address, buf_ptr))) {
+                                       if (SLJIT_UNLIKELY(resolve_const_pool_index(compiler, &first_patch, cpool_current_index, cpool_start_address, buf_ptr))) {
                                                SLJIT_FREE_EXEC(code);
                                                compiler->error = SLJIT_ERR_ALLOC_FAILED;
                                                return NULL;
@@ -709,7 +715,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
                buf_end = buf_ptr + compiler->cpool_fill;
                cpool_current_index = 0;
                while (buf_ptr < buf_end) {
-                       if (SLJIT_UNLIKELY(resolve_const_pool_index(&first_patch, cpool_current_index, cpool_start_address, buf_ptr))) {
+                       if (SLJIT_UNLIKELY(resolve_const_pool_index(compiler, &first_patch, cpool_current_index, cpool_start_address, buf_ptr))) {
                                SLJIT_FREE_EXEC(code);
                                compiler->error = SLJIT_ERR_ALLOC_FAILED;
                                return NULL;
@@ -810,9 +816,6 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
 #define SIGNED_DATA    0x40
 #define LOAD_DATA      0x80
 
-#define EMIT_INSTRUCTION(inst) \
-       FAIL_IF(push_inst(compiler, (inst)))
-
 /* Condition: AL. */
 #define EMIT_DATA_PROCESS_INS(opcode, set_flags, dst, src1, src2) \
        (0xe0000000 | ((opcode) << 21) | (set_flags) | RD(dst) | RN(src1) | (src2))
@@ -822,111 +825,85 @@ static sljit_si emit_op(struct sljit_compiler *compiler, sljit_si op, sljit_si i
        sljit_si src1, sljit_sw src1w,
        sljit_si src2, sljit_sw src2w);
 
-SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size)
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler,
+       sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds,
+       sljit_si fscratches, sljit_si fsaveds, sljit_si local_size)
 {
-       sljit_si size;
+       sljit_si size, i, tmp;
        sljit_uw push;
 
        CHECK_ERROR();
-       check_sljit_emit_enter(compiler, args, scratches, saveds, local_size);
-
-       compiler->scratches = scratches;
-       compiler->saveds = saveds;
-#if (defined SLJIT_DEBUG && SLJIT_DEBUG)
-       compiler->logical_local_size = local_size;
-#endif
+       CHECK(check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size));
+       set_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);
 
        /* Push saved registers, temporary registers
           stmdb sp!, {..., lr} */
        push = PUSH | (1 << 14);
-       if (scratches >= 5)
-               push |= 1 << 11;
-       if (scratches >= 4)
-               push |= 1 << 10;
-       if (saveds >= 5)
-               push |= 1 << 8;
-       if (saveds >= 4)
-               push |= 1 << 7;
-       if (saveds >= 3)
-               push |= 1 << 6;
-       if (saveds >= 2)
-               push |= 1 << 5;
-       if (saveds >= 1)
-               push |= 1 << 4;
-       EMIT_INSTRUCTION(push);
+
+       tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG;
+       for (i = SLJIT_S0; i >= tmp; i--)
+               push |= 1 << reg_map[i];
+
+       for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--)
+               push |= 1 << reg_map[i];
+
+       FAIL_IF(push_inst(compiler, push));
 
        /* Stack must be aligned to 8 bytes: */
-       size = (1 + saveds) * sizeof(sljit_uw);
-       if (scratches >= 4)
-               size += (scratches - 3) * sizeof(sljit_uw);
-       local_size += size;
-       local_size = (local_size + 7) & ~7;
-       local_size -= size;
+       size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1);
+       local_size = ((size + local_size + 7) & ~7) - size;
        compiler->local_size = local_size;
        if (local_size > 0)
-               FAIL_IF(emit_op(compiler, SLJIT_SUB, ALLOW_IMM, SLJIT_LOCALS_REG, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, local_size));
+               FAIL_IF(emit_op(compiler, SLJIT_SUB, ALLOW_IMM, SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, local_size));
 
        if (args >= 1)
-               EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, SLJIT_SAVED_REG1, SLJIT_UNUSED, RM(SLJIT_SCRATCH_REG1)));
+               FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, SLJIT_S0, SLJIT_UNUSED, RM(SLJIT_R0))));
        if (args >= 2)
-               EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, SLJIT_SAVED_REG2, SLJIT_UNUSED, RM(SLJIT_SCRATCH_REG2)));
+               FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, SLJIT_S1, SLJIT_UNUSED, RM(SLJIT_R1))));
        if (args >= 3)
-               EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, SLJIT_SAVED_REG3, SLJIT_UNUSED, RM(SLJIT_SCRATCH_REG3)));
+               FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, SLJIT_S2, SLJIT_UNUSED, RM(SLJIT_R2))));
 
        return SLJIT_SUCCESS;
 }
 
-SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size)
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_set_context(struct sljit_compiler *compiler,
+       sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds,
+       sljit_si fscratches, sljit_si fsaveds, sljit_si local_size)
 {
        sljit_si size;
 
-       CHECK_ERROR_VOID();
-       check_sljit_set_context(compiler, args, scratches, saveds, local_size);
-
-       compiler->scratches = scratches;
-       compiler->saveds = saveds;
-#if (defined SLJIT_DEBUG && SLJIT_DEBUG)
-       compiler->logical_local_size = local_size;
-#endif
+       CHECK_ERROR();
+       CHECK(check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size));
+       set_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);
 
-       size = (1 + saveds) * sizeof(sljit_uw);
-       if (scratches >= 4)
-               size += (scratches - 3) * sizeof(sljit_uw);
-       local_size += size;
-       local_size = (local_size + 7) & ~7;
-       local_size -= size;
-       compiler->local_size = local_size;
+       size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1);
+       compiler->local_size = ((size + local_size + 7) & ~7) - size;
+       return SLJIT_SUCCESS;
 }
 
 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw)
 {
+       sljit_si i, tmp;
        sljit_uw pop;
 
        CHECK_ERROR();
-       check_sljit_emit_return(compiler, op, src, srcw);
+       CHECK(check_sljit_emit_return(compiler, op, src, srcw));
 
        FAIL_IF(emit_mov_before_return(compiler, op, src, srcw));
 
        if (compiler->local_size > 0)
-               FAIL_IF(emit_op(compiler, SLJIT_ADD, ALLOW_IMM, SLJIT_LOCALS_REG, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, compiler->local_size));
+               FAIL_IF(emit_op(compiler, SLJIT_ADD, ALLOW_IMM, SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, compiler->local_size));
 
-       pop = POP | (1 << 15);
        /* Push saved registers, temporary registers
           ldmia sp!, {..., pc} */
-       if (compiler->scratches >= 5)
-               pop |= 1 << 11;
-       if (compiler->scratches >= 4)
-               pop |= 1 << 10;
-       if (compiler->saveds >= 5)
-               pop |= 1 << 8;
-       if (compiler->saveds >= 4)
-               pop |= 1 << 7;
-       if (compiler->saveds >= 3)
-               pop |= 1 << 6;
-       if (compiler->saveds >= 2)
-               pop |= 1 << 5;
-       if (compiler->saveds >= 1)
-               pop |= 1 << 4;
+       pop = POP | (1 << 15);
+
+       tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG;
+       for (i = SLJIT_S0; i >= tmp; i--)
+               pop |= 1 << reg_map[i];
+
+       for (i = compiler->scratches; i >= SLJIT_FIRST_SAVED_REG; i--)
+               pop |= 1 << reg_map[i];
 
        return push_inst(compiler, pop);
 }
@@ -1031,7 +1008,7 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj
 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
                        if (op == SLJIT_MOV_UB)
                                return push_inst(compiler, EMIT_DATA_PROCESS_INS(AND_DP, 0, dst, src2, SRC2_IMM | 0xff));
-                       EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (24 << 7) | reg_map[src2]));
+                       FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (24 << 7) | reg_map[src2])));
                        return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (24 << 7) | (op == SLJIT_MOV_UB ? 0x20 : 0x40) | reg_map[dst]));
 #else
                        return push_inst(compiler, (op == SLJIT_MOV_UB ? UXTB : SXTB) | RD(dst) | RM(src2));
@@ -1050,7 +1027,7 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj
                SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED));
                if ((flags & (REG_DEST | REG_SOURCE)) == (REG_DEST | REG_SOURCE)) {
 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
-                       EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (16 << 7) | reg_map[src2]));
+                       FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (16 << 7) | reg_map[src2])));
                        return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (16 << 7) | (op == SLJIT_MOV_UH ? 0x20 : 0x40) | reg_map[dst]));
 #else
                        return push_inst(compiler, (op == SLJIT_MOV_UH ? UXTH : SXTH) | RD(dst) | RM(src2));
@@ -1303,8 +1280,8 @@ static sljit_si generate_int(struct sljit_compiler *compiler, sljit_si reg, slji
                        return 0;
        }
 
-       EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(positive ? MOV_DP : MVN_DP, 0, reg, SLJIT_UNUSED, imm1));
-       EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(positive ? ORR_DP : BIC_DP, 0, reg, reg, imm2));
+       FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(positive ? MOV_DP : MVN_DP, 0, reg, SLJIT_UNUSED, imm1)));
+       FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(positive ? ORR_DP : BIC_DP, 0, reg, reg, imm2)));
        return 1;
 }
 #endif
@@ -1320,16 +1297,12 @@ static sljit_si load_immediate(struct sljit_compiler *compiler, sljit_si reg, sl
 
        /* Create imm by 1 inst. */
        tmp = get_imm(imm);
-       if (tmp) {
-               EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, reg, SLJIT_UNUSED, tmp));
-               return SLJIT_SUCCESS;
-       }
+       if (tmp)
+               return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, reg, SLJIT_UNUSED, tmp));
 
        tmp = get_imm(~imm);
-       if (tmp) {
-               EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MVN_DP, 0, reg, SLJIT_UNUSED, tmp));
-               return SLJIT_SUCCESS;
-       }
+       if (tmp)
+               return push_inst(compiler, EMIT_DATA_PROCESS_INS(MVN_DP, 0, reg, SLJIT_UNUSED, tmp));
 
 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
        /* Create imm by 2 inst. */
@@ -1369,14 +1342,14 @@ static sljit_si getput_arg_fast(struct sljit_compiler *compiler, sljit_si inp_fl
                if (imm) {
                        if (inp_flags & ARG_TEST)
                                return 1;
-                       EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, reg, SLJIT_UNUSED, imm));
+                       FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, reg, SLJIT_UNUSED, imm)));
                        return -1;
                }
                imm = get_imm(~argw);
                if (imm) {
                        if (inp_flags & ARG_TEST)
                                return 1;
-                       EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MVN_DP, 0, reg, SLJIT_UNUSED, imm));
+                       FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MVN_DP, 0, reg, SLJIT_UNUSED, imm)));
                        return -1;
                }
                return 0;
@@ -1394,8 +1367,8 @@ static sljit_si getput_arg_fast(struct sljit_compiler *compiler, sljit_si inp_fl
 
                if (inp_flags & ARG_TEST)
                        return 1;
-               EMIT_INSTRUCTION(EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & REG_MASK,
-                       RM(OFFS_REG(arg)) | (IS_TYPE1_TRANSFER(inp_flags) ? SRC2_IMM : 0) | ((argw & 0x3) << 7)));
+               FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & REG_MASK,
+                       RM(OFFS_REG(arg)) | (IS_TYPE1_TRANSFER(inp_flags) ? SRC2_IMM : 0) | ((argw & 0x3) << 7))));
                return -1;
        }
 
@@ -1403,13 +1376,13 @@ static sljit_si getput_arg_fast(struct sljit_compiler *compiler, sljit_si inp_fl
                if (argw >= 0 && argw <= 0xfff) {
                        if (inp_flags & ARG_TEST)
                                return 1;
-                       EMIT_INSTRUCTION(EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & REG_MASK, argw));
+                       FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & REG_MASK, argw)));
                        return -1;
                }
                if (argw < 0 && argw >= -0xfff) {
                        if (inp_flags & ARG_TEST)
                                return 1;
-                       EMIT_INSTRUCTION(EMIT_DATA_TRANSFER(inp_flags, 0, inp_flags & WRITE_BACK, reg, arg & REG_MASK, -argw));
+                       FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, 0, inp_flags & WRITE_BACK, reg, arg & REG_MASK, -argw)));
                        return -1;
                }
        }
@@ -1417,14 +1390,14 @@ static sljit_si getput_arg_fast(struct sljit_compiler *compiler, sljit_si inp_fl
                if (argw >= 0 && argw <= 0xff) {
                        if (inp_flags & ARG_TEST)
                                return 1;
-                       EMIT_INSTRUCTION(EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & REG_MASK, TYPE2_TRANSFER_IMM(argw)));
+                       FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & REG_MASK, TYPE2_TRANSFER_IMM(argw))));
                        return -1;
                }
                if (argw < 0 && argw >= -0xff) {
                        if (inp_flags & ARG_TEST)
                                return 1;
                        argw = -argw;
-                       EMIT_INSTRUCTION(EMIT_DATA_TRANSFER(inp_flags, 0, inp_flags & WRITE_BACK, reg, arg & REG_MASK, TYPE2_TRANSFER_IMM(argw)));
+                       FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, 0, inp_flags & WRITE_BACK, reg, arg & REG_MASK, TYPE2_TRANSFER_IMM(argw))));
                        return -1;
                }
        }
@@ -1477,7 +1450,7 @@ static sljit_si can_cache(sljit_si arg, sljit_sw argw, sljit_si next_arg, sljit_
                        /* This can only happen for stores */ \
                        /* since ldr reg, [reg, ...]! has no meaning */ \
                        SLJIT_ASSERT(!(inp_flags & LOAD_DATA)); \
-                       EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG3, SLJIT_UNUSED, RM(reg))); \
+                       FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG3, SLJIT_UNUSED, RM(reg)))); \
                        reg = TMP_REG3; \
                } \
        }
@@ -1537,9 +1510,8 @@ static sljit_si getput_arg(struct sljit_compiler *compiler, sljit_si inp_flags,
                SLJIT_ASSERT((argw & 0x3) && !(max_delta & 0xf00));
                if (inp_flags & WRITE_BACK)
                        tmp_r = arg & REG_MASK;
-               EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(ADD_DP, 0, tmp_r, arg & REG_MASK, RM(OFFS_REG(arg)) | ((argw & 0x3) << 7)));
-               EMIT_INSTRUCTION(EMIT_DATA_TRANSFER(inp_flags, 1, 0, reg, tmp_r, TYPE2_TRANSFER_IMM(0)));
-               return SLJIT_SUCCESS;
+               FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, tmp_r, arg & REG_MASK, RM(OFFS_REG(arg)) | ((argw & 0x3) << 7))));
+               return push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, 1, 0, reg, tmp_r, TYPE2_TRANSFER_IMM(0)));
        }
 
        imm = (sljit_uw)(argw - compiler->cache_argw);
@@ -1558,7 +1530,7 @@ static sljit_si getput_arg(struct sljit_compiler *compiler, sljit_si inp_flags,
        imm = get_imm(argw & ~max_delta);
        if (imm) {
                TEST_WRITE_BACK();
-               EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(ADD_DP, 0, tmp_r, arg & REG_MASK, imm));
+               FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, tmp_r, arg & REG_MASK, imm)));
                GETPUT_ARG_DATA_TRANSFER(1, inp_flags & WRITE_BACK, reg, tmp_r, argw & max_delta);
                return SLJIT_SUCCESS;
        }
@@ -1567,15 +1539,14 @@ static sljit_si getput_arg(struct sljit_compiler *compiler, sljit_si inp_flags,
        if (imm) {
                argw = -argw;
                TEST_WRITE_BACK();
-               EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(SUB_DP, 0, tmp_r, arg & REG_MASK, imm));
+               FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(SUB_DP, 0, tmp_r, arg & REG_MASK, imm)));
                GETPUT_ARG_DATA_TRANSFER(0, inp_flags & WRITE_BACK, reg, tmp_r, argw & max_delta);
                return SLJIT_SUCCESS;
        }
 
        if ((compiler->cache_arg & SLJIT_IMM) && compiler->cache_argw == argw) {
                TEST_WRITE_BACK();
-               EMIT_INSTRUCTION(EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & REG_MASK, RM(TMP_REG3) | (max_delta & 0xf00 ? SRC2_IMM : 0)));
-               return SLJIT_SUCCESS;
+               return push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & REG_MASK, RM(TMP_REG3) | (max_delta & 0xf00 ? SRC2_IMM : 0)));
        }
 
        if (argw == next_argw && (next_arg & SLJIT_MEM)) {
@@ -1586,15 +1557,14 @@ static sljit_si getput_arg(struct sljit_compiler *compiler, sljit_si inp_flags,
                compiler->cache_argw = argw;
 
                TEST_WRITE_BACK();
-               EMIT_INSTRUCTION(EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & REG_MASK, RM(TMP_REG3) | (max_delta & 0xf00 ? SRC2_IMM : 0)));
-               return SLJIT_SUCCESS;
+               return push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & REG_MASK, RM(TMP_REG3) | (max_delta & 0xf00 ? SRC2_IMM : 0)));
        }
 
        imm = (sljit_uw)(argw - next_argw);
        if (arg == next_arg && !(inp_flags & WRITE_BACK) && (imm <= (sljit_uw)max_delta || imm >= (sljit_uw)-max_delta)) {
                SLJIT_ASSERT(inp_flags & LOAD_DATA);
                FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
-               EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(ADD_DP, 0, TMP_REG3, TMP_REG3, reg_map[arg & REG_MASK]));
+               FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, TMP_REG3, TMP_REG3, reg_map[arg & REG_MASK])));
 
                compiler->cache_arg = arg;
                compiler->cache_argw = argw;
@@ -1610,8 +1580,7 @@ static sljit_si getput_arg(struct sljit_compiler *compiler, sljit_si inp_flags,
        }
 
        FAIL_IF(load_immediate(compiler, tmp_r, argw));
-       EMIT_INSTRUCTION(EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & REG_MASK, reg_map[tmp_r] | (max_delta & 0xf00 ? SRC2_IMM : 0)));
-       return SLJIT_SUCCESS;
+       return push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & REG_MASK, reg_map[tmp_r] | (max_delta & 0xf00 ? SRC2_IMM : 0)));
 }
 
 static SLJIT_INLINE sljit_si emit_op_mem(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, sljit_si arg, sljit_sw argw)
@@ -1838,44 +1807,59 @@ extern int __aeabi_idivmod(int numerator, int denominator);
 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler, sljit_si op)
 {
        CHECK_ERROR();
-       check_sljit_emit_op0(compiler, op);
+       CHECK(check_sljit_emit_op0(compiler, op));
 
        op = GET_OPCODE(op);
        switch (op) {
        case SLJIT_BREAKPOINT:
-               EMIT_INSTRUCTION(BKPT);
+               FAIL_IF(push_inst(compiler, BKPT));
                break;
        case SLJIT_NOP:
-               EMIT_INSTRUCTION(NOP);
+               FAIL_IF(push_inst(compiler, NOP));
                break;
-       case SLJIT_UMUL:
-       case SLJIT_SMUL:
+       case SLJIT_LUMUL:
+       case SLJIT_LSMUL:
 #if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
-               return push_inst(compiler, (op == SLJIT_UMUL ? UMULL : SMULL)
-                       | (reg_map[SLJIT_SCRATCH_REG2] << 16)
-                       | (reg_map[SLJIT_SCRATCH_REG1] << 12)
-                       | (reg_map[SLJIT_SCRATCH_REG1] << 8)
-                       | reg_map[SLJIT_SCRATCH_REG2]);
+               return push_inst(compiler, (op == SLJIT_LUMUL ? UMULL : SMULL)
+                       | (reg_map[SLJIT_R1] << 16)
+                       | (reg_map[SLJIT_R0] << 12)
+                       | (reg_map[SLJIT_R0] << 8)
+                       | reg_map[SLJIT_R1]);
 #else
-               EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG1, SLJIT_UNUSED, RM(SLJIT_SCRATCH_REG2)));
-               return push_inst(compiler, (op == SLJIT_UMUL ? UMULL : SMULL)
-                       | (reg_map[SLJIT_SCRATCH_REG2] << 16)
-                       | (reg_map[SLJIT_SCRATCH_REG1] << 12)
-                       | (reg_map[SLJIT_SCRATCH_REG1] << 8)
+               FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG1, SLJIT_UNUSED, RM(SLJIT_R1))));
+               return push_inst(compiler, (op == SLJIT_LUMUL ? UMULL : SMULL)
+                       | (reg_map[SLJIT_R1] << 16)
+                       | (reg_map[SLJIT_R0] << 12)
+                       | (reg_map[SLJIT_R0] << 8)
                        | reg_map[TMP_REG1]);
 #endif
-       case SLJIT_UDIV:
-       case SLJIT_SDIV:
-               if (compiler->scratches >= 3)
-                       EMIT_INSTRUCTION(0xe52d2008 /* str r2, [sp, #-8]! */);
+       case SLJIT_UDIVMOD:
+       case SLJIT_SDIVMOD:
+       case SLJIT_UDIVI:
+       case SLJIT_SDIVI:
+               SLJIT_COMPILE_ASSERT((SLJIT_UDIVMOD & 0x2) == 0 && SLJIT_UDIVI - 0x2 == SLJIT_UDIVMOD, bad_div_opcode_assignments);
+               SLJIT_COMPILE_ASSERT(reg_map[2] == 1 && reg_map[3] == 2, bad_register_mapping);
+
+               if ((op >= SLJIT_UDIVI) && (compiler->scratches >= 3)) {
+                       FAIL_IF(push_inst(compiler, 0xe52d2008 /* str r2, [sp, #-8]! */));
+                       FAIL_IF(push_inst(compiler, 0xe58d1004 /* str r1, [sp, #4] */));
+               }
+               else if ((op >= SLJIT_UDIVI) || (compiler->scratches >= 3))
+                       FAIL_IF(push_inst(compiler, 0xe52d0008 | (op >= SLJIT_UDIVI ? 0x1000 : 0x2000) /* str r1/r2, [sp, #-8]! */));
+
 #if defined(__GNUC__)
                FAIL_IF(sljit_emit_ijump(compiler, SLJIT_FAST_CALL, SLJIT_IMM,
-                       (op == SLJIT_UDIV ? SLJIT_FUNC_OFFSET(__aeabi_uidivmod) : SLJIT_FUNC_OFFSET(__aeabi_idivmod))));
+                       ((op | 0x2) == SLJIT_UDIVI ? SLJIT_FUNC_OFFSET(__aeabi_uidivmod) : SLJIT_FUNC_OFFSET(__aeabi_idivmod))));
 #else
 #error "Software divmod functions are needed"
 #endif
-               if (compiler->scratches >= 3)
-                       return push_inst(compiler, 0xe49d2008 /* ldr r2, [sp], #8 */);
+
+               if ((op >= SLJIT_UDIVI) && (compiler->scratches >= 3)) {
+                       FAIL_IF(push_inst(compiler, 0xe59d1004 /* ldr r1, [sp, #4] */));
+                       FAIL_IF(push_inst(compiler, 0xe49d2008 /* ldr r2, [sp], #8 */));
+               }
+               else if ((op >= SLJIT_UDIVI) || (compiler->scratches >= 3))
+                       return push_inst(compiler, 0xe49d0008 | (op >= SLJIT_UDIVI ? 0x1000 : 0x2000) /* ldr r1/r2, [sp], #8 */);
                return SLJIT_SUCCESS;
        }
 
@@ -1887,7 +1871,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler
        sljit_si src, sljit_sw srcw)
 {
        CHECK_ERROR();
-       check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw);
+       CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw));
        ADJUST_LOCAL_OFFSET(dst, dstw);
        ADJUST_LOCAL_OFFSET(src, srcw);
 
@@ -1932,7 +1916,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler
                return emit_op(compiler, op, ALLOW_ANY_IMM, dst, dstw, TMP_REG1, 0, src, srcw);
 
        case SLJIT_NEG:
-#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG)
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
+                       || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
                compiler->skip_checks = 1;
 #endif
                return sljit_emit_op2(compiler, SLJIT_SUB | GET_ALL_FLAGS(op), dst, dstw, SLJIT_IMM, 0, src, srcw);
@@ -1950,7 +1935,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler
        sljit_si src2, sljit_sw src2w)
 {
        CHECK_ERROR();
-       check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
+       CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
        ADJUST_LOCAL_OFFSET(dst, dstw);
        ADJUST_LOCAL_OFFSET(src1, src1w);
        ADJUST_LOCAL_OFFSET(src2, src2w);
@@ -1988,22 +1973,21 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler
 
 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg)
 {
-       check_sljit_get_register_index(reg);
+       CHECK_REG_INDEX(check_sljit_get_register_index(reg));
        return reg_map[reg];
 }
 
 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_float_register_index(sljit_si reg)
 {
-       check_sljit_get_float_register_index(reg);
-       return reg;
+       CHECK_REG_INDEX(check_sljit_get_float_register_index(reg));
+       return reg << 1;
 }
 
 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *compiler,
        void *instruction, sljit_si size)
 {
        CHECK_ERROR();
-       check_sljit_emit_op_custom(compiler, instruction, size);
-       SLJIT_ASSERT(size == 4);
+       CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
 
        return push_inst(compiler, *(sljit_uw*)instruction);
 }
@@ -2064,7 +2048,7 @@ static sljit_si emit_fop_mem(struct sljit_compiler *compiler, sljit_si flags, sl
        SLJIT_ASSERT(arg & SLJIT_MEM);
 
        if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
-               EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(ADD_DP, 0, TMP_REG1, arg & REG_MASK, RM(OFFS_REG(arg)) | ((argw & 0x3) << 7)));
+               FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, TMP_REG1, arg & REG_MASK, RM(OFFS_REG(arg)) | ((argw & 0x3) << 7))));
                arg = SLJIT_MEM | TMP_REG1;
                argw = 0;
        }
@@ -2097,13 +2081,13 @@ static sljit_si emit_fop_mem(struct sljit_compiler *compiler, sljit_si flags, sl
                }
                imm = get_imm(argw & ~0x3fc);
                if (imm) {
-                       EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(ADD_DP, 0, TMP_REG1, arg & REG_MASK, imm));
+                       FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, TMP_REG1, arg & REG_MASK, imm)));
                        return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, TMP_REG1, reg, (argw & 0x3fc) >> 2));
                }
                imm = get_imm(-argw & ~0x3fc);
                if (imm) {
                        argw = -argw;
-                       EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(SUB_DP, 0, TMP_REG1, arg & REG_MASK, imm));
+                       FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(SUB_DP, 0, TMP_REG1, arg & REG_MASK, imm)));
                        return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 0, TMP_REG1, reg, (argw & 0x3fc) >> 2));
                }
        }
@@ -2112,7 +2096,7 @@ static sljit_si emit_fop_mem(struct sljit_compiler *compiler, sljit_si flags, sl
        compiler->cache_argw = argw;
        if (arg & REG_MASK) {
                FAIL_IF(load_immediate(compiler, TMP_REG1, argw));
-               EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(ADD_DP, 0, TMP_REG3, arg & REG_MASK, reg_map[TMP_REG1]));
+               FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, TMP_REG3, arg & REG_MASK, reg_map[TMP_REG1])));
        }
        else
                FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
@@ -2120,60 +2104,114 @@ static sljit_si emit_fop_mem(struct sljit_compiler *compiler, sljit_si flags, sl
        return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, TMP_REG3, reg, 0));
 }
 
+static SLJIT_INLINE sljit_si sljit_emit_fop1_convw_fromd(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src, sljit_sw srcw)
+{
+       if (src & SLJIT_MEM) {
+               FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, TMP_FREG1, src, srcw));
+               src = TMP_FREG1;
+       }
+
+       FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCVT_S32_F32, op & SLJIT_SINGLE_OP, TMP_FREG1, src, 0)));
+
+       if (dst == SLJIT_UNUSED)
+               return SLJIT_SUCCESS;
+
+       if (FAST_IS_REG(dst))
+               return push_inst(compiler, VMOV | (1 << 20) | RD(dst) | (TMP_FREG1 << 16));
+
+       /* Store the integer value from a VFP register. */
+       return emit_fop_mem(compiler, 0, TMP_FREG1, dst, dstw);
+}
+
+static SLJIT_INLINE sljit_si sljit_emit_fop1_convd_fromw(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src, sljit_sw srcw)
+{
+       sljit_si dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
+
+       if (FAST_IS_REG(src))
+               FAIL_IF(push_inst(compiler, VMOV | RD(src) | (TMP_FREG1 << 16)));
+       else if (src & SLJIT_MEM) {
+               /* Load the integer value into a VFP register. */
+               FAIL_IF(emit_fop_mem(compiler, FPU_LOAD, TMP_FREG1, src, srcw));
+       }
+       else {
+               FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
+               FAIL_IF(push_inst(compiler, VMOV | RD(TMP_REG1) | (TMP_FREG1 << 16)));
+       }
+
+       FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCVT_F32_S32, op & SLJIT_SINGLE_OP, dst_r, TMP_FREG1, 0)));
+
+       if (dst & SLJIT_MEM)
+               return emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP), TMP_FREG1, dst, dstw);
+       return SLJIT_SUCCESS;
+}
+
+static SLJIT_INLINE sljit_si sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si src1, sljit_sw src1w,
+       sljit_si src2, sljit_sw src2w)
+{
+       if (src1 & SLJIT_MEM) {
+               FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, TMP_FREG1, src1, src1w));
+               src1 = TMP_FREG1;
+       }
+
+       if (src2 & SLJIT_MEM) {
+               FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, TMP_FREG2, src2, src2w));
+               src2 = TMP_FREG2;
+       }
+
+       FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCMP_F32, op & SLJIT_SINGLE_OP, src1, src2, 0)));
+       return push_inst(compiler, VMRS);
+}
+
 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op,
        sljit_si dst, sljit_sw dstw,
        sljit_si src, sljit_sw srcw)
 {
-       sljit_si dst_fr;
+       sljit_si dst_r;
 
        CHECK_ERROR();
-       check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw);
-       SLJIT_COMPILE_ASSERT((SLJIT_SINGLE_OP == 0x100), float_transfer_bit_error);
-
        compiler->cache_arg = 0;
        compiler->cache_argw = 0;
-       op ^= SLJIT_SINGLE_OP;
+       if (GET_OPCODE(op) != SLJIT_CONVD_FROMS)
+               op ^= SLJIT_SINGLE_OP;
 
-       if (GET_OPCODE(op) == SLJIT_CMPD) {
-               if (dst & SLJIT_MEM) {
-                       FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, TMP_FREG1, dst, dstw));
-                       dst = TMP_FREG1;
-               }
-               if (src & SLJIT_MEM) {
-                       FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, TMP_FREG2, src, srcw));
-                       src = TMP_FREG2;
-               }
-               EMIT_INSTRUCTION(EMIT_FPU_OPERATION(VCMP_F32, op & SLJIT_SINGLE_OP, dst, src, 0));
-               EMIT_INSTRUCTION(VMRS);
-               return SLJIT_SUCCESS;
-       }
+       SLJIT_COMPILE_ASSERT((SLJIT_SINGLE_OP == 0x100), float_transfer_bit_error);
+       SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
 
-       dst_fr = FAST_IS_REG(dst) ? dst : TMP_FREG1;
+       dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
 
        if (src & SLJIT_MEM) {
-               FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, dst_fr, src, srcw));
-               src = dst_fr;
+               FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, dst_r, src, srcw));
+               src = dst_r;
        }
 
        switch (GET_OPCODE(op)) {
-               case SLJIT_MOVD:
-                       if (src != dst_fr && dst_fr != TMP_FREG1)
-                               EMIT_INSTRUCTION(EMIT_FPU_OPERATION(VMOV_F32, op & SLJIT_SINGLE_OP, dst_fr, src, 0));
-                       break;
-               case SLJIT_NEGD:
-                       EMIT_INSTRUCTION(EMIT_FPU_OPERATION(VNEG_F32, op & SLJIT_SINGLE_OP, dst_fr, src, 0));
-                       break;
-               case SLJIT_ABSD:
-                       EMIT_INSTRUCTION(EMIT_FPU_OPERATION(VABS_F32, op & SLJIT_SINGLE_OP, dst_fr, src, 0));
-                       break;
-       }
-
-       if (dst_fr == TMP_FREG1) {
-               if (GET_OPCODE(op) == SLJIT_MOVD)
-                       dst_fr = src;
-               FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP), dst_fr, dst, dstw));
+       case SLJIT_DMOV:
+               if (src != dst_r) {
+                       if (dst_r != TMP_FREG1)
+                               FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMOV_F32, op & SLJIT_SINGLE_OP, dst_r, src, 0)));
+                       else
+                               dst_r = src;
+               }
+               break;
+       case SLJIT_DNEG:
+               FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VNEG_F32, op & SLJIT_SINGLE_OP, dst_r, src, 0)));
+               break;
+       case SLJIT_DABS:
+               FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VABS_F32, op & SLJIT_SINGLE_OP, dst_r, src, 0)));
+               break;
+       case SLJIT_CONVD_FROMS:
+               FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCVT_F64_F32, op & SLJIT_SINGLE_OP, dst_r, src, 0)));
+               op ^= SLJIT_SINGLE_OP;
+               break;
        }
 
+       if (dst & SLJIT_MEM)
+               return emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP), dst_r, dst, dstw);
        return SLJIT_SUCCESS;
 }
 
@@ -2182,16 +2220,19 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compile
        sljit_si src1, sljit_sw src1w,
        sljit_si src2, sljit_sw src2w)
 {
-       sljit_si dst_fr;
+       sljit_si dst_r;
 
        CHECK_ERROR();
-       check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
+       CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
+       ADJUST_LOCAL_OFFSET(dst, dstw);
+       ADJUST_LOCAL_OFFSET(src1, src1w);
+       ADJUST_LOCAL_OFFSET(src2, src2w);
 
        compiler->cache_arg = 0;
        compiler->cache_argw = 0;
        op ^= SLJIT_SINGLE_OP;
 
-       dst_fr = FAST_IS_REG(dst) ? dst : TMP_FREG1;
+       dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
 
        if (src2 & SLJIT_MEM) {
                FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, TMP_FREG2, src2, src2w));
@@ -2204,24 +2245,24 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compile
        }
 
        switch (GET_OPCODE(op)) {
-       case SLJIT_ADDD:
-               EMIT_INSTRUCTION(EMIT_FPU_OPERATION(VADD_F32, op & SLJIT_SINGLE_OP, dst_fr, src2, src1));
+       case SLJIT_DADD:
+               FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VADD_F32, op & SLJIT_SINGLE_OP, dst_r, src2, src1)));
                break;
 
-       case SLJIT_SUBD:
-               EMIT_INSTRUCTION(EMIT_FPU_OPERATION(VSUB_F32, op & SLJIT_SINGLE_OP, dst_fr, src2, src1));
+       case SLJIT_DSUB:
+               FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VSUB_F32, op & SLJIT_SINGLE_OP, dst_r, src2, src1)));
                break;
 
-       case SLJIT_MULD:
-               EMIT_INSTRUCTION(EMIT_FPU_OPERATION(VMUL_F32, op & SLJIT_SINGLE_OP, dst_fr, src2, src1));
+       case SLJIT_DMUL:
+               FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMUL_F32, op & SLJIT_SINGLE_OP, dst_r, src2, src1)));
                break;
 
-       case SLJIT_DIVD:
-               EMIT_INSTRUCTION(EMIT_FPU_OPERATION(VDIV_F32, op & SLJIT_SINGLE_OP, dst_fr, src2, src1));
+       case SLJIT_DDIV:
+               FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VDIV_F32, op & SLJIT_SINGLE_OP, dst_r, src2, src1)));
                break;
        }
 
-       if (dst_fr == TMP_FREG1)
+       if (dst_r == TMP_FREG1)
                FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP), TMP_FREG1, dst, dstw));
 
        return SLJIT_SUCCESS;
@@ -2238,7 +2279,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compile
 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw)
 {
        CHECK_ERROR();
-       check_sljit_emit_fast_enter(compiler, dst, dstw);
+       CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw));
        ADJUST_LOCAL_OFFSET(dst, dstw);
 
        /* For UNUSED dst. Uncommon, but possible. */
@@ -2252,7 +2293,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *c
        if (getput_arg_fast(compiler, WORD_DATA, TMP_REG3, dst, dstw))
                return compiler->error;
        /* TMP_REG3 is used for caching. */
-       EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG2, SLJIT_UNUSED, RM(TMP_REG3)));
+       FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG2, SLJIT_UNUSED, RM(TMP_REG3))));
        compiler->cache_arg = 0;
        compiler->cache_argw = 0;
        return getput_arg(compiler, WORD_DATA, TMP_REG2, dst, dstw, 0, 0);
@@ -2261,11 +2302,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *c
 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_si src, sljit_sw srcw)
 {
        CHECK_ERROR();
-       check_sljit_emit_fast_return(compiler, src, srcw);
+       CHECK(check_sljit_emit_fast_return(compiler, src, srcw));
        ADJUST_LOCAL_OFFSET(src, srcw);
 
        if (FAST_IS_REG(src))
-               EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG3, SLJIT_UNUSED, RM(src)));
+               FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG3, SLJIT_UNUSED, RM(src))));
        else if (src & SLJIT_MEM) {
                if (getput_arg_fast(compiler, WORD_DATA | LOAD_DATA, TMP_REG3, src, srcw))
                        FAIL_IF(compiler->error);
@@ -2273,7 +2314,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler *
                        compiler->cache_arg = 0;
                        compiler->cache_argw = 0;
                        FAIL_IF(getput_arg(compiler, WORD_DATA | LOAD_DATA, TMP_REG2, src, srcw, 0, 0));
-                       EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG3, SLJIT_UNUSED, RM(TMP_REG2)));
+                       FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG3, SLJIT_UNUSED, RM(TMP_REG2))));
                }
        }
        else if (src & SLJIT_IMM)
@@ -2288,53 +2329,54 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler *
 static sljit_uw get_cc(sljit_si type)
 {
        switch (type) {
-       case SLJIT_C_EQUAL:
-       case SLJIT_C_MUL_NOT_OVERFLOW:
-       case SLJIT_C_FLOAT_EQUAL:
+       case SLJIT_EQUAL:
+       case SLJIT_MUL_NOT_OVERFLOW:
+       case SLJIT_D_EQUAL:
                return 0x00000000;
 
-       case SLJIT_C_NOT_EQUAL:
-       case SLJIT_C_MUL_OVERFLOW:
-       case SLJIT_C_FLOAT_NOT_EQUAL:
+       case SLJIT_NOT_EQUAL:
+       case SLJIT_MUL_OVERFLOW:
+       case SLJIT_D_NOT_EQUAL:
                return 0x10000000;
 
-       case SLJIT_C_LESS:
-       case SLJIT_C_FLOAT_LESS:
+       case SLJIT_LESS:
+       case SLJIT_D_LESS:
                return 0x30000000;
 
-       case SLJIT_C_GREATER_EQUAL:
-       case SLJIT_C_FLOAT_GREATER_EQUAL:
+       case SLJIT_GREATER_EQUAL:
+       case SLJIT_D_GREATER_EQUAL:
                return 0x20000000;
 
-       case SLJIT_C_GREATER:
-       case SLJIT_C_FLOAT_GREATER:
+       case SLJIT_GREATER:
+       case SLJIT_D_GREATER:
                return 0x80000000;
 
-       case SLJIT_C_LESS_EQUAL:
-       case SLJIT_C_FLOAT_LESS_EQUAL:
+       case SLJIT_LESS_EQUAL:
+       case SLJIT_D_LESS_EQUAL:
                return 0x90000000;
 
-       case SLJIT_C_SIG_LESS:
+       case SLJIT_SIG_LESS:
                return 0xb0000000;
 
-       case SLJIT_C_SIG_GREATER_EQUAL:
+       case SLJIT_SIG_GREATER_EQUAL:
                return 0xa0000000;
 
-       case SLJIT_C_SIG_GREATER:
+       case SLJIT_SIG_GREATER:
                return 0xc0000000;
 
-       case SLJIT_C_SIG_LESS_EQUAL:
+       case SLJIT_SIG_LESS_EQUAL:
                return 0xd0000000;
 
-       case SLJIT_C_OVERFLOW:
-       case SLJIT_C_FLOAT_UNORDERED:
+       case SLJIT_OVERFLOW:
+       case SLJIT_D_UNORDERED:
                return 0x60000000;
 
-       case SLJIT_C_NOT_OVERFLOW:
-       case SLJIT_C_FLOAT_ORDERED:
+       case SLJIT_NOT_OVERFLOW:
+       case SLJIT_D_ORDERED:
                return 0x70000000;
 
-       default: /* SLJIT_JUMP */
+       default:
+               SLJIT_ASSERT(type >= SLJIT_JUMP && type <= SLJIT_CALL3);
                return 0xe0000000;
        }
 }
@@ -2344,7 +2386,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compi
        struct sljit_label *label;
 
        CHECK_ERROR_PTR();
-       check_sljit_emit_label(compiler);
+       CHECK_PTR(check_sljit_emit_label(compiler));
 
        if (compiler->last_label && compiler->last_label->size == compiler->size)
                return compiler->last_label;
@@ -2360,7 +2402,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile
        struct sljit_jump *jump;
 
        CHECK_ERROR_PTR();
-       check_sljit_emit_jump(compiler, type);
+       CHECK_PTR(check_sljit_emit_jump(compiler, type));
 
        jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
        PTR_FAIL_IF(!jump);
@@ -2401,7 +2443,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compil
        struct sljit_jump *jump;
 
        CHECK_ERROR();
-       check_sljit_emit_ijump(compiler, type, src, srcw);
+       CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
        ADJUST_LOCAL_OFFSET(src, srcw);
 
        /* In ARM, we don't need to touch the arguments. */
@@ -2442,7 +2484,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *com
        sljit_uw cc, ins;
 
        CHECK_ERROR();
-       check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type);
+       CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type));
        ADJUST_LOCAL_OFFSET(dst, dstw);
        ADJUST_LOCAL_OFFSET(src, srcw);
 
@@ -2450,18 +2492,18 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *com
                return SLJIT_SUCCESS;
 
        op = GET_OPCODE(op);
-       cc = get_cc(type);
+       cc = get_cc(type & 0xff);
        dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
 
        if (op < SLJIT_ADD) {
-               EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst_r, SLJIT_UNUSED, SRC2_IMM | 0));
-               EMIT_INSTRUCTION((EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst_r, SLJIT_UNUSED, SRC2_IMM | 1) & ~COND_MASK) | cc);
+               FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst_r, SLJIT_UNUSED, SRC2_IMM | 0)));
+               FAIL_IF(push_inst(compiler, (EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst_r, SLJIT_UNUSED, SRC2_IMM | 1) & ~COND_MASK) | cc));
                return (dst_r == TMP_REG2) ? emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw) : SLJIT_SUCCESS;
        }
 
        ins = (op == SLJIT_AND ? AND_DP : (op == SLJIT_OR ? ORR_DP : EOR_DP));
        if ((op == SLJIT_OR || op == SLJIT_XOR) && FAST_IS_REG(dst) && dst == src) {
-               EMIT_INSTRUCTION((EMIT_DATA_PROCESS_INS(ins, 0, dst, dst, SRC2_IMM | 1) & ~COND_MASK) | cc);
+               FAIL_IF(push_inst(compiler, (EMIT_DATA_PROCESS_INS(ins, 0, dst, dst, SRC2_IMM | 1) & ~COND_MASK) | cc));
                /* The condition must always be set, even if the ORR/EOR is not executed above. */
                return (flags & SLJIT_SET_E) ? push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, SET_FLAGS, TMP_REG1, SLJIT_UNUSED, RM(dst))) : SLJIT_SUCCESS;
        }
@@ -2478,8 +2520,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *com
                srcw = 0;
        }
 
-       EMIT_INSTRUCTION((EMIT_DATA_PROCESS_INS(ins, 0, dst_r, src, SRC2_IMM | 1) & ~COND_MASK) | cc);
-       EMIT_INSTRUCTION((EMIT_DATA_PROCESS_INS(ins, 0, dst_r, src, SRC2_IMM | 0) & ~COND_MASK) | (cc ^ 0x10000000));
+       FAIL_IF(push_inst(compiler, (EMIT_DATA_PROCESS_INS(ins, 0, dst_r, src, SRC2_IMM | 1) & ~COND_MASK) | cc));
+       FAIL_IF(push_inst(compiler, (EMIT_DATA_PROCESS_INS(ins, 0, dst_r, src, SRC2_IMM | 0) & ~COND_MASK) | (cc ^ 0x10000000)));
        if (dst_r == TMP_REG2)
                FAIL_IF(emit_op_mem2(compiler, WORD_DATA, TMP_REG2, dst, dstw, 0, 0));
 
@@ -2492,7 +2534,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compi
        sljit_si reg;
 
        CHECK_ERROR_PTR();
-       check_sljit_emit_const(compiler, dst, dstw, init_value);
+       CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
        ADJUST_LOCAL_OFFSET(dst, dstw);
 
        const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));