X-Git-Url: http://git.sourceforge.jp/view?a=blobdiff_plain;f=gcc%2Foptabs.c;h=0d5cd73c3a2417de85df24dfbd5dab7c6ffc5dd0;hb=7dc5d28ee6bb7c1e9a6748c8d454d91816a6364c;hp=7901b95f6321535ff9f0a25e80376f805ca84a1a;hpb=8808bf16125e1bea5cd2e969d19a53b9618593f1;p=pf3gnuchains%2Fgcc-fork.git diff --git a/gcc/optabs.c b/gcc/optabs.c index 7901b95f632..0d5cd73c3a2 100644 --- a/gcc/optabs.c +++ b/gcc/optabs.c @@ -2052,11 +2052,11 @@ expand_binop (enum machine_mode mode, optab binoptab, rtx op0, rtx op1, { rtx temp = emit_move_insn (target, xtarget); - set_unique_reg_note (temp, - REG_EQUAL, - gen_rtx_fmt_ee (binoptab->code, mode, - copy_rtx (xop0), - copy_rtx (xop1))); + set_dst_reg_note (temp, REG_EQUAL, + gen_rtx_fmt_ee (binoptab->code, mode, + copy_rtx (xop0), + copy_rtx (xop1)), + target); } else target = xtarget; @@ -2104,11 +2104,12 @@ expand_binop (enum machine_mode mode, optab binoptab, rtx op0, rtx op1, if (optab_handler (mov_optab, mode) != CODE_FOR_nothing) { temp = emit_move_insn (target ? target : product, product); - set_unique_reg_note (temp, - REG_EQUAL, - gen_rtx_fmt_ee (MULT, mode, - copy_rtx (op0), - copy_rtx (op1))); + set_dst_reg_note (temp, + REG_EQUAL, + gen_rtx_fmt_ee (MULT, mode, + copy_rtx (op0), + copy_rtx (op1)), + target ? target : product); } return product; } @@ -2966,8 +2967,9 @@ expand_absneg_bit (enum rtx_code code, enum machine_mode mode, gen_lowpart (imode, target), 1, OPTAB_LIB_WIDEN); target = lowpart_subreg_maybe_copy (mode, temp, imode); - set_unique_reg_note (get_last_insn (), REG_EQUAL, - gen_rtx_fmt_e (code, mode, copy_rtx (op0))); + set_dst_reg_note (get_last_insn (), REG_EQUAL, + gen_rtx_fmt_e (code, mode, copy_rtx (op0)), + target); } return target; @@ -3899,8 +3901,7 @@ emit_libcall_block (rtx insns, rtx target, rtx result, rtx equiv) } last = emit_move_insn (target, result); - if (optab_handler (mov_optab, GET_MODE (target)) != CODE_FOR_nothing) - set_unique_reg_note (last, REG_EQUAL, copy_rtx (equiv)); + set_dst_reg_note (last, REG_EQUAL, copy_rtx (equiv), target); if (final_dest != target) emit_move_insn (final_dest, target); @@ -5213,11 +5214,10 @@ expand_fix (rtx to, rtx from, int unsignedp) { /* Make a place for a REG_NOTE and add it. */ insn = emit_move_insn (to, to); - set_unique_reg_note (insn, - REG_EQUAL, - gen_rtx_fmt_e (UNSIGNED_FIX, - GET_MODE (to), - copy_rtx (from))); + set_dst_reg_note (insn, REG_EQUAL, + gen_rtx_fmt_e (UNSIGNED_FIX, GET_MODE (to), + copy_rtx (from)), + to); } return; @@ -6586,6 +6586,57 @@ init_optabs (void) targetm.init_libfuncs (); } +/* A helper function for init_sync_libfuncs. Using the basename BASE, + install libfuncs into TAB for BASE_N for 1 <= N <= MAX. */ + +static void +init_sync_libfuncs_1 (optab tab, const char *base, int max) +{ + enum machine_mode mode; + char buf[64]; + size_t len = strlen (base); + int i; + + gcc_assert (max <= 8); + gcc_assert (len + 3 < sizeof (buf)); + + memcpy (buf, base, len); + buf[len] = '_'; + buf[len + 1] = '0'; + buf[len + 2] = '\0'; + + mode = QImode; + for (i = 1; i <= max; i *= 2) + { + buf[len + 1] = '0' + i; + set_optab_libfunc (tab, mode, buf); + mode = GET_MODE_2XWIDER_MODE (mode); + } +} + +void +init_sync_libfuncs (int max) +{ + init_sync_libfuncs_1 (sync_compare_and_swap_optab, + "__sync_val_compare_and_swap", max); + init_sync_libfuncs_1 (sync_lock_test_and_set_optab, + "__sync_lock_test_and_set", max); + + init_sync_libfuncs_1 (sync_old_add_optab, "__sync_fetch_and_add", max); + init_sync_libfuncs_1 (sync_old_sub_optab, "__sync_fetch_and_sub", max); + init_sync_libfuncs_1 (sync_old_ior_optab, "__sync_fetch_and_or", max); + init_sync_libfuncs_1 (sync_old_and_optab, "__sync_fetch_and_and", max); + init_sync_libfuncs_1 (sync_old_xor_optab, "__sync_fetch_and_xor", max); + init_sync_libfuncs_1 (sync_old_nand_optab, "__sync_fetch_and_nand", max); + + init_sync_libfuncs_1 (sync_new_add_optab, "__sync_add_and_fetch", max); + init_sync_libfuncs_1 (sync_new_sub_optab, "__sync_sub_and_fetch", max); + init_sync_libfuncs_1 (sync_new_ior_optab, "__sync_or_and_fetch", max); + init_sync_libfuncs_1 (sync_new_and_optab, "__sync_and_and_fetch", max); + init_sync_libfuncs_1 (sync_new_xor_optab, "__sync_xor_and_fetch", max); + init_sync_libfuncs_1 (sync_new_nand_optab, "__sync_nand_and_fetch", max); +} + /* Print information about the current contents of the optabs on STDERR. */ @@ -6881,9 +6932,9 @@ can_vec_perm_for_code_p (enum tree_code code, enum machine_mode mode, break; case VEC_INTERLEAVE_HIGH_EXPR: - alt = nelt / 2; - /* FALLTHRU */ case VEC_INTERLEAVE_LOW_EXPR: + if ((BYTES_BIG_ENDIAN != 0) ^ (code == VEC_INTERLEAVE_HIGH_EXPR)) + alt = nelt / 2; for (i = 0; i < nelt / 2; ++i) { data[i * 2] = i + alt; @@ -6987,7 +7038,8 @@ expand_vec_perm (enum machine_mode mode, rtx v0, rtx v1, rtx sel, rtx target) } /* If the input is a constant, expand it specially. */ - if (CONSTANT_P (sel)) + gcc_assert (GET_MODE_CLASS (GET_MODE (sel)) == MODE_VECTOR_INT); + if (GET_CODE (sel) == CONST_VECTOR) { icode = direct_optab_handler (vec_perm_const_optab, mode); if (icode != CODE_FOR_nothing) @@ -7005,7 +7057,7 @@ expand_vec_perm (enum machine_mode mode, rtx v0, rtx v1, rtx sel, rtx target) { unsigned int j, this_e; - this_e = INTVAL (XVECEXP (sel, 0, i)); + this_e = INTVAL (CONST_VECTOR_ELT (sel, i)); this_e &= 2 * e - 1; this_e *= u; @@ -7165,24 +7217,46 @@ expand_vec_cond_expr (tree vec_cond_type, tree op0, tree op1, tree op2, /* Return true if there is a compare_and_swap pattern. */ bool -can_compare_and_swap_p (enum machine_mode mode) +can_compare_and_swap_p (enum machine_mode mode, bool allow_libcall) { enum insn_code icode; - /* Check for __sync_compare_and_swap. */ - icode = direct_optab_handler (sync_compare_and_swap_optab, mode); - if (icode != CODE_FOR_nothing) - return true; - /* Check for __atomic_compare_and_swap. */ icode = direct_optab_handler (atomic_compare_and_swap_optab, mode); if (icode != CODE_FOR_nothing) - return true; + return true; + + /* Check for __sync_compare_and_swap. */ + icode = optab_handler (sync_compare_and_swap_optab, mode); + if (icode != CODE_FOR_nothing) + return true; + if (allow_libcall && optab_libfunc (sync_compare_and_swap_optab, mode)) + return true; /* No inline compare and swap. */ return false; } +/* Return true if an atomic exchange can be performed. */ + +bool +can_atomic_exchange_p (enum machine_mode mode, bool allow_libcall) +{ + enum insn_code icode; + + /* Check for __atomic_exchange. */ + icode = direct_optab_handler (atomic_exchange_optab, mode); + if (icode != CODE_FOR_nothing) + return true; + + /* Don't check __sync_test_and_set, as on some platforms that + has reduced functionality. Targets that really do support + a proper exchange should simply be updated to the __atomics. */ + + return can_compare_and_swap_p (mode, allow_libcall); +} + + /* Helper function to find the MODE_CC set in a sync_compare_and_swap pattern. */ @@ -7252,21 +7326,15 @@ expand_compare_and_swap_loop (rtx mem, rtx old_reg, rtx new_reg, rtx seq) } -/* This function expands the atomic exchange operation: - atomically store VAL in MEM and return the previous value in MEM. - - MEMMODEL is the memory model variant to use. - TARGET is an optional place to stick the return value. - USE_TEST_AND_SET indicates whether __sync_lock_test_and_set should be used - as a fall back if the atomic_exchange pattern does not exist. */ - -rtx -expand_atomic_exchange (rtx target, rtx mem, rtx val, enum memmodel model, - bool use_test_and_set) +/* This function tries to emit an atomic_exchange intruction. VAL is written + to *MEM using memory model MODEL. The previous contents of *MEM are returned, + using TARGET if possible. */ + +static rtx +maybe_emit_atomic_exchange (rtx target, rtx mem, rtx val, enum memmodel model) { enum machine_mode mode = GET_MODE (mem); enum insn_code icode; - rtx last_insn; /* If the target supports the exchange directly, great. */ icode = direct_optab_handler (atomic_exchange_optab, mode); @@ -7283,46 +7351,79 @@ expand_atomic_exchange (rtx target, rtx mem, rtx val, enum memmodel model, return ops[0].value; } - /* Legacy sync_lock_test_and_set works the same, but is only defined as an - acquire barrier. If the pattern exists, and the memory model is stronger - than acquire, add a release barrier before the instruction. - The barrier is not needed if sync_lock_test_and_set doesn't exist since - it will expand into a compare-and-swap loop. + return NULL_RTX; +} + +/* This function tries to implement an atomic exchange operation using + __sync_lock_test_and_set. VAL is written to *MEM using memory model MODEL. + The previous contents of *MEM are returned, using TARGET if possible. + Since this instructionn is an acquire barrier only, stronger memory + models may require additional barriers to be emitted. */ + +static rtx +maybe_emit_sync_lock_test_and_set (rtx target, rtx mem, rtx val, + enum memmodel model) +{ + enum machine_mode mode = GET_MODE (mem); + enum insn_code icode; + rtx last_insn = get_last_insn (); - Some targets have non-compliant test_and_sets, so it would be incorrect - to emit a test_and_set in place of an __atomic_exchange. The test_and_set - builtin shares this expander since exchange can always replace the - test_and_set. */ + icode = optab_handler (sync_lock_test_and_set_optab, mode); - if (use_test_and_set) + /* Legacy sync_lock_test_and_set is an acquire barrier. If the pattern + exists, and the memory model is stronger than acquire, add a release + barrier before the instruction. */ + + if (model == MEMMODEL_SEQ_CST + || model == MEMMODEL_RELEASE + || model == MEMMODEL_ACQ_REL) + expand_mem_thread_fence (model); + + if (icode != CODE_FOR_nothing) { - icode = direct_optab_handler (sync_lock_test_and_set_optab, mode); - last_insn = get_last_insn (); - if ((icode != CODE_FOR_nothing) && (model == MEMMODEL_SEQ_CST || - model == MEMMODEL_RELEASE || - model == MEMMODEL_ACQ_REL)) - expand_builtin_mem_thread_fence (model); + struct expand_operand ops[3]; + create_output_operand (&ops[0], target, mode); + create_fixed_operand (&ops[1], mem); + /* VAL may have been promoted to a wider mode. Shrink it if so. */ + create_convert_operand_to (&ops[2], val, mode, true); + if (maybe_expand_insn (icode, 3, ops)) + return ops[0].value; + } - if (icode != CODE_FOR_nothing) + /* If an external test-and-set libcall is provided, use that instead of + any external compare-and-swap that we might get from the compare-and- + swap-loop expansion later. */ + if (!can_compare_and_swap_p (mode, false)) + { + rtx libfunc = optab_libfunc (sync_lock_test_and_set_optab, mode); + if (libfunc != NULL) { - struct expand_operand ops[3]; - - create_output_operand (&ops[0], target, mode); - create_fixed_operand (&ops[1], mem); - /* VAL may have been promoted to a wider mode. Shrink it if so. */ - create_convert_operand_to (&ops[2], val, mode, true); - if (maybe_expand_insn (icode, 3, ops)) - return ops[0].value; - } + rtx addr; - /* Remove any fence that was inserted since a compare and swap loop is - already a full memory barrier. */ - if (last_insn != get_last_insn ()) - delete_insns_since (last_insn); + addr = convert_memory_address (ptr_mode, XEXP (mem, 0)); + return emit_library_call_value (libfunc, NULL_RTX, LCT_NORMAL, + mode, 2, addr, ptr_mode, + val, mode); + } } - /* Otherwise, use a compare-and-swap loop for the exchange. */ - if (can_compare_and_swap_p (mode)) + /* If the test_and_set can't be emitted, eliminate any barrier that might + have been emitted. */ + delete_insns_since (last_insn); + return NULL_RTX; +} + +/* This function tries to implement an atomic exchange operation using a + compare_and_swap loop. VAL is written to *MEM. The previous contents of + *MEM are returned, using TARGET if possible. No memory model is required + since a compare_and_swap loop is seq-cst. */ + +static rtx +maybe_emit_compare_and_swap_exchange_loop (rtx target, rtx mem, rtx val) +{ + enum machine_mode mode = GET_MODE (mem); + + if (can_compare_and_swap_p (mode, true)) { if (!target || !register_operand (target, mode)) target = gen_reg_rtx (mode); @@ -7335,6 +7436,105 @@ expand_atomic_exchange (rtx target, rtx mem, rtx val, enum memmodel model, return NULL_RTX; } +#ifndef HAVE_atomic_test_and_set +#define HAVE_atomic_test_and_set 0 +#define gen_atomic_test_and_set(x,y,z) (gcc_unreachable (), NULL_RTX) +#endif + +/* This function expands the legacy _sync_lock test_and_set operation which is + generally an atomic exchange. Some limited targets only allow the + constant 1 to be stored. This is an ACQUIRE operation. + + TARGET is an optional place to stick the return value. + MEM is where VAL is stored. */ + +rtx +expand_sync_lock_test_and_set (rtx target, rtx mem, rtx val) +{ + rtx ret; + + /* Try an atomic_exchange first. */ + ret = maybe_emit_atomic_exchange (target, mem, val, MEMMODEL_ACQUIRE); + + if (!ret) + ret = maybe_emit_sync_lock_test_and_set (target, mem, val, + MEMMODEL_ACQUIRE); + if (!ret) + ret = maybe_emit_compare_and_swap_exchange_loop (target, mem, val); + + /* If there are no other options, try atomic_test_and_set if the value + being stored is 1. */ + if (!ret && val == const1_rtx && HAVE_atomic_test_and_set) + { + ret = gen_atomic_test_and_set (target, mem, GEN_INT (MEMMODEL_ACQUIRE)); + emit_insn (ret); + } + + return ret; +} + +/* This function expands the atomic test_and_set operation: + atomically store a boolean TRUE into MEM and return the previous value. + + MEMMODEL is the memory model variant to use. + TARGET is an optional place to stick the return value. */ + +rtx +expand_atomic_test_and_set (rtx target, rtx mem, enum memmodel model) +{ + enum machine_mode mode = GET_MODE (mem); + rtx ret = NULL_RTX; + + if (target == NULL_RTX) + target = gen_reg_rtx (mode); + + if (HAVE_atomic_test_and_set) + { + ret = gen_atomic_test_and_set (target, mem, GEN_INT (MEMMODEL_ACQUIRE)); + emit_insn (ret); + return ret; + } + + /* If there is no test and set, try exchange, then a compare_and_swap loop, + then __sync_test_and_set. */ + ret = maybe_emit_atomic_exchange (target, mem, const1_rtx, model); + + if (!ret) + ret = maybe_emit_compare_and_swap_exchange_loop (target, mem, const1_rtx); + + if (!ret) + ret = maybe_emit_sync_lock_test_and_set (target, mem, const1_rtx, model); + + if (ret) + return ret; + + /* Failing all else, assume a single threaded environment and simply perform + the operation. */ + emit_move_insn (target, mem); + emit_move_insn (mem, const1_rtx); + return target; +} + +/* This function expands the atomic exchange operation: + atomically store VAL in MEM and return the previous value in MEM. + + MEMMODEL is the memory model variant to use. + TARGET is an optional place to stick the return value. */ + +rtx +expand_atomic_exchange (rtx target, rtx mem, rtx val, enum memmodel model) +{ + rtx ret; + + ret = maybe_emit_atomic_exchange (target, mem, val, model); + + /* Next try a compare-and-swap loop for the exchange. */ + if (!ret) + ret = maybe_emit_compare_and_swap_exchange_loop (target, mem, val); + + return ret; +} + /* This function expands the atomic compare exchange operation: *PTARGET_BOOL is an optional place to store the boolean success/failure. @@ -7356,7 +7556,8 @@ expand_atomic_compare_and_swap (rtx *ptarget_bool, rtx *ptarget_oval, enum machine_mode mode = GET_MODE (mem); struct expand_operand ops[8]; enum insn_code icode; - rtx target_bool, target_oval; + rtx target_oval, target_bool = NULL_RTX; + rtx libfunc; /* Load expected into a register for the compare and swap. */ if (MEM_P (expected)) @@ -7400,7 +7601,7 @@ expand_atomic_compare_and_swap (rtx *ptarget_bool, rtx *ptarget_oval, /* Otherwise fall back to the original __sync_val_compare_and_swap which is always seq-cst. */ - icode = direct_optab_handler (sync_compare_and_swap_optab, mode); + icode = optab_handler (sync_compare_and_swap_optab, mode); if (icode != CODE_FOR_nothing) { rtx cc_reg; @@ -7413,7 +7614,6 @@ expand_atomic_compare_and_swap (rtx *ptarget_bool, rtx *ptarget_oval, return false; target_oval = ops[0].value; - target_bool = NULL_RTX; /* If the caller isn't interested in the boolean return value, skip the computation of it. */ @@ -7424,17 +7624,37 @@ expand_atomic_compare_and_swap (rtx *ptarget_bool, rtx *ptarget_oval, cc_reg = NULL_RTX; if (have_insn_for (COMPARE, CCmode)) note_stores (PATTERN (get_last_insn ()), find_cc_set, &cc_reg); + if (cc_reg) + { + target_bool = emit_store_flag_force (target_bool, EQ, cc_reg, + const0_rtx, VOIDmode, 0, 1); + goto success; + } + goto success_bool_from_val; + } - target_bool - = (cc_reg - ? emit_store_flag_force (target_bool, EQ, cc_reg, - const0_rtx, VOIDmode, 0, 1) - : emit_store_flag_force (target_bool, EQ, target_oval, - expected, VOIDmode, 1, 1)); - goto success; + /* Also check for library support for __sync_val_compare_and_swap. */ + libfunc = optab_libfunc (sync_compare_and_swap_optab, mode); + if (libfunc != NULL) + { + rtx addr = convert_memory_address (ptr_mode, XEXP (mem, 0)); + target_oval = emit_library_call_value (libfunc, NULL_RTX, LCT_NORMAL, + mode, 3, addr, ptr_mode, + expected, mode, desired, mode); + + /* Compute the boolean return value only if requested. */ + if (ptarget_bool) + goto success_bool_from_val; + else + goto success; } + + /* Failure. */ return false; + success_bool_from_val: + target_bool = emit_store_flag_force (target_bool, EQ, target_oval, + expected, VOIDmode, 1, 1); success: /* Make sure that the oval output winds up where the caller asked. */ if (ptarget_oval) @@ -7444,6 +7664,76 @@ expand_atomic_compare_and_swap (rtx *ptarget_bool, rtx *ptarget_oval, return true; } +/* Generate asm volatile("" : : : "memory") as the memory barrier. */ + +static void +expand_asm_memory_barrier (void) +{ + rtx asm_op, clob; + + asm_op = gen_rtx_ASM_OPERANDS (VOIDmode, empty_string, empty_string, 0, + rtvec_alloc (0), rtvec_alloc (0), + rtvec_alloc (0), UNKNOWN_LOCATION); + MEM_VOLATILE_P (asm_op) = 1; + + clob = gen_rtx_SCRATCH (VOIDmode); + clob = gen_rtx_MEM (BLKmode, clob); + clob = gen_rtx_CLOBBER (VOIDmode, clob); + + emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, asm_op, clob))); +} + +/* This routine will either emit the mem_thread_fence pattern or issue a + sync_synchronize to generate a fence for memory model MEMMODEL. */ + +#ifndef HAVE_mem_thread_fence +# define HAVE_mem_thread_fence 0 +# define gen_mem_thread_fence(x) (gcc_unreachable (), NULL_RTX) +#endif +#ifndef HAVE_memory_barrier +# define HAVE_memory_barrier 0 +# define gen_memory_barrier() (gcc_unreachable (), NULL_RTX) +#endif + +void +expand_mem_thread_fence (enum memmodel model) +{ + if (HAVE_mem_thread_fence) + emit_insn (gen_mem_thread_fence (GEN_INT (model))); + else if (model != MEMMODEL_RELAXED) + { + if (HAVE_memory_barrier) + emit_insn (gen_memory_barrier ()); + else if (synchronize_libfunc != NULL_RTX) + emit_library_call (synchronize_libfunc, LCT_NORMAL, VOIDmode, 0); + else + expand_asm_memory_barrier (); + } +} + +/* This routine will either emit the mem_signal_fence pattern or issue a + sync_synchronize to generate a fence for memory model MEMMODEL. */ + +#ifndef HAVE_mem_signal_fence +# define HAVE_mem_signal_fence 0 +# define gen_mem_signal_fence(x) (gcc_unreachable (), NULL_RTX) +#endif + +void +expand_mem_signal_fence (enum memmodel model) +{ + if (HAVE_mem_signal_fence) + emit_insn (gen_mem_signal_fence (GEN_INT (model))); + else if (model != MEMMODEL_RELAXED) + { + /* By default targets are coherent between a thread and the signal + handler running on the same thread. Thus this really becomes a + compiler barrier, in that stores must not be sunk past + (or raised above) a given point. */ + expand_asm_memory_barrier (); + } +} + /* This function expands the atomic load operation: return the atomically loaded value in MEM. @@ -7486,13 +7776,13 @@ expand_atomic_load (rtx target, rtx mem, enum memmodel model) target = gen_reg_rtx (mode); /* Emit the appropriate barrier before the load. */ - expand_builtin_mem_thread_fence (model); + expand_mem_thread_fence (model); emit_move_insn (target, mem); /* For SEQ_CST, also emit a barrier after the load. */ if (model == MEMMODEL_SEQ_CST) - expand_builtin_mem_thread_fence (model); + expand_mem_thread_fence (model); return target; } @@ -7533,7 +7823,7 @@ expand_atomic_store (rtx mem, rtx val, enum memmodel model, bool use_release) { /* lock_release is only a release barrier. */ if (model == MEMMODEL_SEQ_CST) - expand_builtin_mem_thread_fence (model); + expand_mem_thread_fence (model); return const0_rtx; } } @@ -7544,7 +7834,9 @@ expand_atomic_store (rtx mem, rtx val, enum memmodel model, bool use_release) the result. If that doesn't work, don't do anything. */ if (GET_MODE_PRECISION(mode) > BITS_PER_WORD) { - rtx target = expand_atomic_exchange (NULL_RTX, mem, val, model, false); + rtx target = maybe_emit_atomic_exchange (NULL_RTX, mem, val, model); + if (!target) + target = maybe_emit_compare_and_swap_exchange_loop (NULL_RTX, mem, val); if (target) return const0_rtx; else @@ -7553,13 +7845,13 @@ expand_atomic_store (rtx mem, rtx val, enum memmodel model, bool use_release) /* If there is no mem_store, default to a move with barriers */ if (model == MEMMODEL_SEQ_CST || model == MEMMODEL_RELEASE) - expand_builtin_mem_thread_fence (model); + expand_mem_thread_fence (model); emit_move_insn (mem, val); /* For SEQ_CST, also emit a barrier after the load. */ if (model == MEMMODEL_SEQ_CST) - expand_builtin_mem_thread_fence (model); + expand_mem_thread_fence (model); return const0_rtx; } @@ -7570,57 +7862,123 @@ expand_atomic_store (rtx mem, rtx val, enum memmodel model, bool use_release) struct atomic_op_functions { - struct direct_optab_d *mem_fetch_before; - struct direct_optab_d *mem_fetch_after; - struct direct_optab_d *mem_no_result; - struct direct_optab_d *fetch_before; - struct direct_optab_d *fetch_after; - struct direct_optab_d *no_result; + direct_optab mem_fetch_before; + direct_optab mem_fetch_after; + direct_optab mem_no_result; + optab fetch_before; + optab fetch_after; + direct_optab no_result; enum rtx_code reverse_code; }; -static const struct atomic_op_functions * -get_atomic_op_for_code (enum rtx_code code) -{ - static const struct atomic_op_functions add_op = { - atomic_fetch_add_optab, atomic_add_fetch_optab, atomic_add_optab, - sync_old_add_optab, sync_new_add_optab, sync_add_optab, MINUS - }, sub_op = { - atomic_fetch_sub_optab, atomic_sub_fetch_optab, atomic_sub_optab, - sync_old_sub_optab, sync_new_sub_optab, sync_sub_optab, PLUS - }, xor_op = { - atomic_fetch_xor_optab, atomic_xor_fetch_optab, atomic_xor_optab, - sync_old_xor_optab, sync_new_xor_optab, sync_xor_optab, XOR - }, and_op = { - atomic_fetch_and_optab, atomic_and_fetch_optab, atomic_and_optab, - sync_old_and_optab, sync_new_and_optab, sync_and_optab, UNKNOWN - }, nand_op = { - atomic_fetch_nand_optab, atomic_nand_fetch_optab, atomic_nand_optab, - sync_old_nand_optab, sync_new_nand_optab, sync_nand_optab, UNKNOWN - }, ior_op = { - atomic_fetch_or_optab, atomic_or_fetch_optab, atomic_or_optab, - sync_old_ior_optab, sync_new_ior_optab, sync_ior_optab, UNKNOWN - }; +/* Fill in structure pointed to by OP with the various optab entries for an + operation of type CODE. */ + +static void +get_atomic_op_for_code (struct atomic_op_functions *op, enum rtx_code code) +{ + gcc_assert (op!= NULL); + + /* If SWITCHABLE_TARGET is defined, then subtargets can be switched + in the source code during compilation, and the optab entries are not + computable until runtime. Fill in the values at runtime. */ switch (code) { case PLUS: - return &add_op; + op->mem_fetch_before = atomic_fetch_add_optab; + op->mem_fetch_after = atomic_add_fetch_optab; + op->mem_no_result = atomic_add_optab; + op->fetch_before = sync_old_add_optab; + op->fetch_after = sync_new_add_optab; + op->no_result = sync_add_optab; + op->reverse_code = MINUS; + break; case MINUS: - return &sub_op; + op->mem_fetch_before = atomic_fetch_sub_optab; + op->mem_fetch_after = atomic_sub_fetch_optab; + op->mem_no_result = atomic_sub_optab; + op->fetch_before = sync_old_sub_optab; + op->fetch_after = sync_new_sub_optab; + op->no_result = sync_sub_optab; + op->reverse_code = PLUS; + break; case XOR: - return &xor_op; + op->mem_fetch_before = atomic_fetch_xor_optab; + op->mem_fetch_after = atomic_xor_fetch_optab; + op->mem_no_result = atomic_xor_optab; + op->fetch_before = sync_old_xor_optab; + op->fetch_after = sync_new_xor_optab; + op->no_result = sync_xor_optab; + op->reverse_code = XOR; + break; case AND: - return &and_op; + op->mem_fetch_before = atomic_fetch_and_optab; + op->mem_fetch_after = atomic_and_fetch_optab; + op->mem_no_result = atomic_and_optab; + op->fetch_before = sync_old_and_optab; + op->fetch_after = sync_new_and_optab; + op->no_result = sync_and_optab; + op->reverse_code = UNKNOWN; + break; case IOR: - return &ior_op; + op->mem_fetch_before = atomic_fetch_or_optab; + op->mem_fetch_after = atomic_or_fetch_optab; + op->mem_no_result = atomic_or_optab; + op->fetch_before = sync_old_ior_optab; + op->fetch_after = sync_new_ior_optab; + op->no_result = sync_ior_optab; + op->reverse_code = UNKNOWN; + break; case NOT: - return &nand_op; + op->mem_fetch_before = atomic_fetch_nand_optab; + op->mem_fetch_after = atomic_nand_fetch_optab; + op->mem_no_result = atomic_nand_optab; + op->fetch_before = sync_old_nand_optab; + op->fetch_after = sync_new_nand_optab; + op->no_result = sync_nand_optab; + op->reverse_code = UNKNOWN; + break; default: gcc_unreachable (); } } +/* See if there is a more optimal way to implement the operation "*MEM CODE VAL" + using memory order MODEL. If AFTER is true the operation needs to return + the value of *MEM after the operation, otherwise the previous value. + TARGET is an optional place to place the result. The result is unused if + it is const0_rtx. + Return the result if there is a better sequence, otherwise NULL_RTX. */ + +static rtx +maybe_optimize_fetch_op (rtx target, rtx mem, rtx val, enum rtx_code code, + enum memmodel model, bool after) +{ + /* If the value is prefetched, or not used, it may be possible to replace + the sequence with a native exchange operation. */ + if (!after || target == const0_rtx) + { + /* fetch_and (&x, 0, m) can be replaced with exchange (&x, 0, m). */ + if (code == AND && val == const0_rtx) + { + if (target == const0_rtx) + target = gen_reg_rtx (GET_MODE (mem)); + return maybe_emit_atomic_exchange (target, mem, val, model); + } + + /* fetch_or (&x, -1, m) can be replaced with exchange (&x, -1, m). */ + if (code == IOR && val == constm1_rtx) + { + if (target == const0_rtx) + target = gen_reg_rtx (GET_MODE (mem)); + return maybe_emit_atomic_exchange (target, mem, val, model); + } + } + + return NULL_RTX; +} + /* Try to emit an instruction for a specific operation varaition. OPTAB contains the OP functions. TARGET is an optional place to return the result. const0_rtx means unused. @@ -7635,7 +7993,6 @@ maybe_emit_op (const struct atomic_op_functions *optab, rtx target, rtx mem, rtx val, bool use_memmodel, enum memmodel model, bool after) { enum machine_mode mode = GET_MODE (mem); - struct direct_optab_d *this_optab; struct expand_operand ops[4]; enum insn_code icode; int op_counter = 0; @@ -7646,13 +8003,13 @@ maybe_emit_op (const struct atomic_op_functions *optab, rtx target, rtx mem, { if (use_memmodel) { - this_optab = optab->mem_no_result; + icode = direct_optab_handler (optab->mem_no_result, mode); create_integer_operand (&ops[2], model); num_ops = 3; } else { - this_optab = optab->no_result; + icode = direct_optab_handler (optab->no_result, mode); num_ops = 2; } } @@ -7661,19 +8018,19 @@ maybe_emit_op (const struct atomic_op_functions *optab, rtx target, rtx mem, { if (use_memmodel) { - this_optab = after ? optab->mem_fetch_after : optab->mem_fetch_before; + icode = direct_optab_handler (after ? optab->mem_fetch_after + : optab->mem_fetch_before, mode); create_integer_operand (&ops[3], model); - num_ops= 4; + num_ops = 4; } else { - this_optab = after ? optab->fetch_after : optab->fetch_before; + icode = optab_handler (after ? optab->fetch_after + : optab->fetch_before, mode); num_ops = 3; } create_output_operand (&ops[op_counter++], target, mode); } - - icode = direct_optab_handler (this_optab, mode); if (icode == CODE_FOR_nothing) return NULL_RTX; @@ -7682,7 +8039,7 @@ maybe_emit_op (const struct atomic_op_functions *optab, rtx target, rtx mem, create_convert_operand_to (&ops[op_counter++], val, mode, true); if (maybe_expand_insn (icode, num_ops, ops)) - return ((target == const0_rtx) ? const0_rtx : ops[0].value); + return (target == const0_rtx ? const0_rtx : ops[0].value); return NULL_RTX; } @@ -7701,22 +8058,27 @@ expand_atomic_fetch_op (rtx target, rtx mem, rtx val, enum rtx_code code, enum memmodel model, bool after) { enum machine_mode mode = GET_MODE (mem); - const struct atomic_op_functions *optab; + struct atomic_op_functions optab; rtx result; bool unused_result = (target == const0_rtx); - optab = get_atomic_op_for_code (code); + get_atomic_op_for_code (&optab, code); + + /* Check to see if there are any better instructions. */ + result = maybe_optimize_fetch_op (target, mem, val, code, model, after); + if (result) + return result; /* Check for the case where the result isn't used and try those patterns. */ if (unused_result) { /* Try the memory model variant first. */ - result = maybe_emit_op (optab, target, mem, val, true, model, true); + result = maybe_emit_op (&optab, target, mem, val, true, model, true); if (result) return result; /* Next try the old style withuot a memory model. */ - result = maybe_emit_op (optab, target, mem, val, false, model, true); + result = maybe_emit_op (&optab, target, mem, val, false, model, true); if (result) return result; @@ -7725,42 +8087,79 @@ expand_atomic_fetch_op (rtx target, rtx mem, rtx val, enum rtx_code code, } /* Try the __atomic version. */ - result = maybe_emit_op (optab, target, mem, val, true, model, after); + result = maybe_emit_op (&optab, target, mem, val, true, model, after); if (result) return result; /* Try the older __sync version. */ - result = maybe_emit_op (optab, target, mem, val, false, model, after); + result = maybe_emit_op (&optab, target, mem, val, false, model, after); if (result) return result; /* If the fetch value can be calculated from the other variation of fetch, try that operation. */ - if (after || optab->reverse_code != UNKNOWN || target == const0_rtx) + if (after || unused_result || optab.reverse_code != UNKNOWN) { /* Try the __atomic version, then the older __sync version. */ - result = maybe_emit_op (optab, target, mem, val, true, model, !after); + result = maybe_emit_op (&optab, target, mem, val, true, model, !after); if (!result) - result = maybe_emit_op (optab, target, mem, val, false, model, !after); + result = maybe_emit_op (&optab, target, mem, val, false, model, !after); if (result) { /* If the result isn't used, no need to do compensation code. */ if (unused_result) - return target; + return result; /* Issue compensation code. Fetch_after == fetch_before OP val. Fetch_before == after REVERSE_OP val. */ if (!after) - code = optab->reverse_code; - result = expand_simple_binop (mode, code, result, val, NULL_RTX, true, - OPTAB_LIB_WIDEN); + code = optab.reverse_code; + if (code == NOT) + { + result = expand_simple_binop (mode, AND, result, val, NULL_RTX, + true, OPTAB_LIB_WIDEN); + result = expand_simple_unop (mode, NOT, result, target, true); + } + else + result = expand_simple_binop (mode, code, result, val, target, + true, OPTAB_LIB_WIDEN); + return result; + } + } + + /* Try the __sync libcalls only if we can't do compare-and-swap inline. */ + if (!can_compare_and_swap_p (mode, false)) + { + rtx libfunc; + bool fixup = false; + + libfunc = optab_libfunc (after ? optab.fetch_after + : optab.fetch_before, mode); + if (libfunc == NULL + && (after || unused_result || optab.reverse_code != UNKNOWN)) + { + fixup = true; + if (!after) + code = optab.reverse_code; + libfunc = optab_libfunc (after ? optab.fetch_before + : optab.fetch_after, mode); + } + if (libfunc != NULL) + { + rtx addr = convert_memory_address (ptr_mode, XEXP (mem, 0)); + result = emit_library_call_value (libfunc, NULL, LCT_NORMAL, mode, + 2, addr, ptr_mode, val, mode); + + if (!unused_result && fixup) + result = expand_simple_binop (mode, code, result, val, target, + true, OPTAB_LIB_WIDEN); return result; } } /* If nothing else has succeeded, default to a compare and swap loop. */ - if (can_compare_and_swap_p (mode)) + if (can_compare_and_swap_p (mode, true)) { rtx insn; rtx t0 = gen_reg_rtx (mode), t1; @@ -7843,24 +8242,31 @@ maybe_legitimize_operand_same_code (enum insn_code icode, unsigned int opno, return true; /* If the operand is a memory whose address has no side effects, - try forcing the address into a register. The check for side - effects is important because force_reg cannot handle things - like auto-modified addresses. */ - if (insn_data[(int) icode].operand[opno].allows_mem - && MEM_P (op->value) - && !side_effects_p (XEXP (op->value, 0))) - { - rtx addr, mem, last; - - last = get_last_insn (); - addr = force_reg (Pmode, XEXP (op->value, 0)); - mem = replace_equiv_address (op->value, addr); - if (insn_operand_matches (icode, opno, mem)) + try forcing the address into a non-virtual pseudo register. + The check for side effects is important because copy_to_mode_reg + cannot handle things like auto-modified addresses. */ + if (insn_data[(int) icode].operand[opno].allows_mem && MEM_P (op->value)) + { + rtx addr, mem; + + mem = op->value; + addr = XEXP (mem, 0); + if (!(REG_P (addr) && REGNO (addr) > LAST_VIRTUAL_REGISTER) + && !side_effects_p (addr)) { - op->value = mem; - return true; + rtx last; + enum machine_mode mode; + + last = get_last_insn (); + mode = targetm.addr_space.address_mode (MEM_ADDR_SPACE (mem)); + mem = replace_equiv_address (mem, copy_to_mode_reg (mode, addr)); + if (insn_operand_matches (icode, opno, mem)) + { + op->value = mem; + return true; + } + delete_insns_since (last); } - delete_insns_since (last); } return false;