/* Expand the basic unary and binary arithmetic operations, for GNU compiler.
Copyright (C) 1987, 1988, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
- 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
- Free Software Foundation, Inc.
+ 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
+ 2011, 2012 Free Software Foundation, Inc.
This file is part of GCC.
}
if (GET_RTX_CLASS (code) == RTX_UNARY)
- note = gen_rtx_fmt_e (code, GET_MODE (target), copy_rtx (op0));
+ switch (code)
+ {
+ case FFS:
+ case CLZ:
+ case CTZ:
+ case CLRSB:
+ case POPCOUNT:
+ case PARITY:
+ case BSWAP:
+ if (GET_MODE (op0) != VOIDmode && GET_MODE (target) != GET_MODE (op0))
+ {
+ note = gen_rtx_fmt_e (code, GET_MODE (op0), copy_rtx (op0));
+ if (GET_MODE_SIZE (GET_MODE (op0))
+ > GET_MODE_SIZE (GET_MODE (target)))
+ note = simplify_gen_unary (TRUNCATE, GET_MODE (target),
+ note, GET_MODE (op0));
+ else
+ note = simplify_gen_unary (ZERO_EXTEND, GET_MODE (target),
+ note, GET_MODE (op0));
+ break;
+ }
+ /* FALLTHRU */
+ default:
+ note = gen_rtx_fmt_e (code, GET_MODE (target), copy_rtx (op0));
+ break;
+ }
else
note = gen_rtx_fmt_ee (code, GET_MODE (target), copy_rtx (op0), copy_rtx (op1));
return 1;
}
\f
+/* Given two input operands, OP0 and OP1, determine what the correct from_mode
+ for a widening operation would be. In most cases this would be OP0, but if
+ that's a constant it'll be VOIDmode, which isn't useful. */
+
+static enum machine_mode
+widened_mode (enum machine_mode to_mode, rtx op0, rtx op1)
+{
+ enum machine_mode m0 = GET_MODE (op0);
+ enum machine_mode m1 = GET_MODE (op1);
+ enum machine_mode result;
+
+ if (m0 == VOIDmode && m1 == VOIDmode)
+ return to_mode;
+ else if (m0 == VOIDmode || GET_MODE_SIZE (m0) < GET_MODE_SIZE (m1))
+ result = m1;
+ else
+ result = m0;
+
+ if (GET_MODE_SIZE (result) > GET_MODE_SIZE (to_mode))
+ return to_mode;
+
+ return result;
+}
+\f
+/* Find a widening optab even if it doesn't widen as much as we want.
+ E.g. if from_mode is HImode, and to_mode is DImode, and there is no
+ direct HI->SI insn, then return SI->DI, if that exists.
+ If PERMIT_NON_WIDENING is non-zero then this can be used with
+ non-widening optabs also. */
+
+enum insn_code
+find_widening_optab_handler_and_mode (optab op, enum machine_mode to_mode,
+ enum machine_mode from_mode,
+ int permit_non_widening,
+ enum machine_mode *found_mode)
+{
+ for (; (permit_non_widening || from_mode != to_mode)
+ && GET_MODE_SIZE (from_mode) <= GET_MODE_SIZE (to_mode)
+ && from_mode != VOIDmode;
+ from_mode = GET_MODE_WIDER_MODE (from_mode))
+ {
+ enum insn_code handler = widening_optab_handler (op, to_mode,
+ from_mode);
+
+ if (handler != CODE_FOR_nothing)
+ {
+ if (found_mode)
+ *found_mode = from_mode;
+ return handler;
+ }
+ }
+
+ return CODE_FOR_nothing;
+}
+\f
/* Widen OP to MODE and return the rtx for the widened operand. UNSIGNEDP
says whether OP is signed or unsigned. NO_EXTEND is nonzero if we need
not actually do a sign-extend or zero-extend, but can leave the
return TYPE_UNSIGNED (type) ?
vec_widen_umult_lo_optab : vec_widen_smult_lo_optab;
+ case VEC_WIDEN_LSHIFT_HI_EXPR:
+ return TYPE_UNSIGNED (type) ?
+ vec_widen_ushiftl_hi_optab : vec_widen_sshiftl_hi_optab;
+
+ case VEC_WIDEN_LSHIFT_LO_EXPR:
+ return TYPE_UNSIGNED (type) ?
+ vec_widen_ushiftl_lo_optab : vec_widen_sshiftl_lo_optab;
+
case VEC_UNPACK_HI_EXPR:
return TYPE_UNSIGNED (type) ?
vec_unpacku_hi_optab : vec_unpacks_hi_optab;
case ABS_EXPR:
return trapv ? absv_optab : abs_optab;
- case VEC_EXTRACT_EVEN_EXPR:
- return vec_extract_even_optab;
-
- case VEC_EXTRACT_ODD_EXPR:
- return vec_extract_odd_optab;
-
- case VEC_INTERLEAVE_HIGH_EXPR:
- return vec_interleave_high_optab;
-
- case VEC_INTERLEAVE_LOW_EXPR:
- return vec_interleave_low_optab;
-
default:
return NULL;
}
optab_for_tree_code (ops->code, TREE_TYPE (oprnd0), optab_default);
if (ops->code == WIDEN_MULT_PLUS_EXPR
|| ops->code == WIDEN_MULT_MINUS_EXPR)
- icode = optab_handler (widen_pattern_optab,
- TYPE_MODE (TREE_TYPE (ops->op2)));
+ icode = find_widening_optab_handler (widen_pattern_optab,
+ TYPE_MODE (TREE_TYPE (ops->op2)),
+ tmode0, 0);
else
icode = optab_handler (widen_pattern_optab, tmode0);
gcc_assert (icode != CODE_FOR_nothing);
calculated at compile time. The arguments and return value are
otherwise the same as for expand_binop. */
-static rtx
+rtx
simplify_expand_binop (enum machine_mode mode, optab binoptab,
rtx op0, rtx op1, rtx target, int unsignedp,
enum optab_methods methods)
return eops[0].value;
}
+/* Create a new vector value in VMODE with all elements set to OP. The
+ mode of OP must be the element mode of VMODE. If OP is a constant,
+ then the return value will be a constant. */
+
+static rtx
+expand_vector_broadcast (enum machine_mode vmode, rtx op)
+{
+ enum insn_code icode;
+ rtvec vec;
+ rtx ret;
+ int i, n;
+
+ gcc_checking_assert (VECTOR_MODE_P (vmode));
+
+ n = GET_MODE_NUNITS (vmode);
+ vec = rtvec_alloc (n);
+ for (i = 0; i < n; ++i)
+ RTVEC_ELT (vec, i) = op;
+
+ if (CONSTANT_P (op))
+ return gen_rtx_CONST_VECTOR (vmode, vec);
+
+ /* ??? If the target doesn't have a vec_init, then we have no easy way
+ of performing this operation. Most of this sort of generic support
+ is hidden away in the vector lowering support in gimple. */
+ icode = optab_handler (vec_init_optab, vmode);
+ if (icode == CODE_FOR_nothing)
+ return NULL;
+
+ ret = gen_reg_rtx (vmode);
+ emit_insn (GEN_FCN (icode) (ret, gen_rtx_PARALLEL (vmode, vec)));
+
+ return ret;
+}
+
/* This subroutine of expand_doubleword_shift handles the cases in which
the effective shift value is >= BITS_PER_WORD. The arguments and return
value are the same as for the parent routine, except that SUPERWORD_OP1
|| binoptab == umul_highpart_optab);
}
-/* X is to be used in mode MODE as an operand to BINOPTAB. If we're
+/* X is to be used in mode MODE as operand OPN to BINOPTAB. If we're
optimizing, and if the operand is a constant that costs more than
1 instruction, force the constant into a register and return that
register. Return X otherwise. UNSIGNEDP says whether X is unsigned. */
static rtx
avoid_expensive_constant (enum machine_mode mode, optab binoptab,
- rtx x, bool unsignedp)
+ int opn, rtx x, bool unsignedp)
{
bool speed = optimize_insn_for_speed_p ();
if (mode != VOIDmode
&& optimize
&& CONSTANT_P (x)
- && rtx_cost (x, binoptab->code, speed) > rtx_cost (x, SET, speed))
+ && rtx_cost (x, binoptab->code, opn, speed) > set_src_cost (x, speed))
{
if (CONST_INT_P (x))
{
rtx target, int unsignedp, enum optab_methods methods,
rtx last)
{
- enum insn_code icode = optab_handler (binoptab, mode);
+ enum machine_mode from_mode = widened_mode (mode, op0, op1);
+ enum insn_code icode = find_widening_optab_handler (binoptab, mode,
+ from_mode, 1);
enum machine_mode xmode0 = insn_data[(int) icode].operand[1].mode;
enum machine_mode xmode1 = insn_data[(int) icode].operand[2].mode;
enum machine_mode mode0, mode1, tmp_mode;
}
/* If we are optimizing, force expensive constants into a register. */
- xop0 = avoid_expensive_constant (xmode0, binoptab, xop0, unsignedp);
+ xop0 = avoid_expensive_constant (xmode0, binoptab, 0, xop0, unsignedp);
if (!shift_optab_p (binoptab))
- xop1 = avoid_expensive_constant (xmode1, binoptab, xop1, unsignedp);
+ xop1 = avoid_expensive_constant (xmode1, binoptab, 1, xop1, unsignedp);
/* In case the insn wants input operands in modes different from
those of the actual operands, convert the operands. It would
/* If we can do it with a three-operand insn, do so. */
if (methods != OPTAB_MUST_WIDEN
- && optab_handler (binoptab, mode) != CODE_FOR_nothing)
+ && find_widening_optab_handler (binoptab, mode,
+ widened_mode (mode, op0, op1), 1)
+ != CODE_FOR_nothing)
{
temp = expand_binop_directly (mode, binoptab, op0, op1, target,
unsignedp, methods, last);
{
optab otheroptab = (binoptab == rotl_optab ? rotr_optab : rotl_optab);
rtx newop1;
- unsigned int bits = GET_MODE_BITSIZE (mode);
+ unsigned int bits = GET_MODE_PRECISION (mode);
if (CONST_INT_P (op1))
newop1 = GEN_INT (bits - INTVAL (op1));
takes operands of this mode and makes a wider mode. */
if (binoptab == smul_optab
- && GET_MODE_WIDER_MODE (mode) != VOIDmode
- && (optab_handler ((unsignedp ? umul_widen_optab : smul_widen_optab),
- GET_MODE_WIDER_MODE (mode))
+ && GET_MODE_2XWIDER_MODE (mode) != VOIDmode
+ && (widening_optab_handler ((unsignedp ? umul_widen_optab
+ : smul_widen_optab),
+ GET_MODE_2XWIDER_MODE (mode), mode)
!= CODE_FOR_nothing))
{
- temp = expand_binop (GET_MODE_WIDER_MODE (mode),
+ temp = expand_binop (GET_MODE_2XWIDER_MODE (mode),
unsignedp ? umul_widen_optab : smul_widen_optab,
op0, op1, NULL_RTX, unsignedp, OPTAB_DIRECT);
if (temp != 0)
{
if (GET_MODE_CLASS (mode) == MODE_INT
- && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (mode),
- GET_MODE_BITSIZE (GET_MODE (temp))))
+ && TRULY_NOOP_TRUNCATION_MODES_P (mode, GET_MODE (temp)))
return gen_lowpart (mode, temp);
else
return convert_to_mode (mode, temp, unsignedp);
}
}
+ /* If this is a vector shift by a scalar, see if we can do a vector
+ shift by a vector. If so, broadcast the scalar into a vector. */
+ if (mclass == MODE_VECTOR_INT)
+ {
+ optab otheroptab = NULL;
+
+ if (binoptab == ashl_optab)
+ otheroptab = vashl_optab;
+ else if (binoptab == ashr_optab)
+ otheroptab = vashr_optab;
+ else if (binoptab == lshr_optab)
+ otheroptab = vlshr_optab;
+ else if (binoptab == rotl_optab)
+ otheroptab = vrotl_optab;
+ else if (binoptab == rotr_optab)
+ otheroptab = vrotr_optab;
+
+ if (otheroptab && optab_handler (otheroptab, mode) != CODE_FOR_nothing)
+ {
+ rtx vop1 = expand_vector_broadcast (mode, op1);
+ if (vop1)
+ {
+ temp = expand_binop_directly (mode, otheroptab, op0, vop1,
+ target, unsignedp, methods, last);
+ if (temp)
+ return temp;
+ }
+ }
+ }
+
/* Look for a wider mode of the same class for which we think we
can open-code the operation. Check for a widening multiply at the
wider mode as well. */
if (optab_handler (binoptab, wider_mode) != CODE_FOR_nothing
|| (binoptab == smul_optab
&& GET_MODE_WIDER_MODE (wider_mode) != VOIDmode
- && (optab_handler ((unsignedp ? umul_widen_optab
- : smul_widen_optab),
- GET_MODE_WIDER_MODE (wider_mode))
+ && (find_widening_optab_handler ((unsignedp
+ ? umul_widen_optab
+ : smul_widen_optab),
+ GET_MODE_WIDER_MODE (wider_mode),
+ mode, 0)
!= CODE_FOR_nothing)))
{
rtx xop0 = op0, xop1 = op1;
&& mclass == MODE_INT)
{
no_extend = 1;
- xop0 = avoid_expensive_constant (mode, binoptab,
+ xop0 = avoid_expensive_constant (mode, binoptab, 0,
xop0, unsignedp);
if (binoptab != ashl_optab)
- xop1 = avoid_expensive_constant (mode, binoptab,
+ xop1 = avoid_expensive_constant (mode, binoptab, 1,
xop1, unsignedp);
}
if (temp)
{
if (mclass != MODE_INT
- || !TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (mode),
- GET_MODE_BITSIZE (wider_mode)))
+ || !TRULY_NOOP_TRUNCATION_MODES_P (mode, wider_mode))
{
if (target == 0)
target = gen_reg_rtx (mode);
&& mclass == MODE_INT
&& (CONST_INT_P (op1) || optimize_insn_for_speed_p ())
&& GET_MODE_SIZE (mode) == 2 * UNITS_PER_WORD
+ && GET_MODE_PRECISION (mode) == GET_MODE_BITSIZE (mode)
&& optab_handler (binoptab, word_mode) != CODE_FOR_nothing
&& optab_handler (ashl_optab, word_mode) != CODE_FOR_nothing
&& optab_handler (lshr_optab, word_mode) != CODE_FOR_nothing)
if ((binoptab == rotl_optab || binoptab == rotr_optab)
&& mclass == MODE_INT
&& CONST_INT_P (op1)
- && GET_MODE_SIZE (mode) == 2 * UNITS_PER_WORD
+ && GET_MODE_PRECISION (mode) == 2 * BITS_PER_WORD
&& optab_handler (ashl_optab, word_mode) != CODE_FOR_nothing
&& optab_handler (lshr_optab, word_mode) != CODE_FOR_nothing)
{
{
rtx temp = emit_move_insn (target, xtarget);
- set_unique_reg_note (temp,
- REG_EQUAL,
- gen_rtx_fmt_ee (binoptab->code, mode,
- copy_rtx (xop0),
- copy_rtx (xop1)));
+ set_dst_reg_note (temp, REG_EQUAL,
+ gen_rtx_fmt_ee (binoptab->code, mode,
+ copy_rtx (xop0),
+ copy_rtx (xop1)),
+ target);
}
else
target = xtarget;
&& optab_handler (add_optab, word_mode) != CODE_FOR_nothing)
{
rtx product = NULL_RTX;
-
- if (optab_handler (umul_widen_optab, mode) != CODE_FOR_nothing)
+ if (widening_optab_handler (umul_widen_optab, mode, word_mode)
+ != CODE_FOR_nothing)
{
product = expand_doubleword_mult (mode, op0, op1, target,
true, methods);
}
if (product == NULL_RTX
- && optab_handler (smul_widen_optab, mode) != CODE_FOR_nothing)
+ && widening_optab_handler (smul_widen_optab, mode, word_mode)
+ != CODE_FOR_nothing)
{
product = expand_doubleword_mult (mode, op0, op1, target,
false, methods);
if (optab_handler (mov_optab, mode) != CODE_FOR_nothing)
{
temp = emit_move_insn (target ? target : product, product);
- set_unique_reg_note (temp,
- REG_EQUAL,
- gen_rtx_fmt_ee (MULT, mode,
- copy_rtx (op0),
- copy_rtx (op1)));
+ set_dst_reg_note (temp,
+ REG_EQUAL,
+ gen_rtx_fmt_ee (MULT, mode,
+ copy_rtx (op0),
+ copy_rtx (op1)),
+ target ? target : product);
}
return product;
}
wider_mode != VOIDmode;
wider_mode = GET_MODE_WIDER_MODE (wider_mode))
{
- if (optab_handler (binoptab, wider_mode) != CODE_FOR_nothing
+ if (find_widening_optab_handler (binoptab, wider_mode, mode, 1)
+ != CODE_FOR_nothing
|| (methods == OPTAB_LIB
&& optab_libfunc (binoptab, wider_mode)))
{
if (temp)
{
if (mclass != MODE_INT
- || !TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (mode),
- GET_MODE_BITSIZE (wider_mode)))
+ || !TRULY_NOOP_TRUNCATION_MODES_P (mode, wider_mode))
{
if (target == 0)
target = gen_reg_rtx (mode);
rtx xop0 = op0, xop1 = op1;
/* If we are optimizing, force expensive constants into a register. */
- xop0 = avoid_expensive_constant (mode0, binoptab, xop0, unsignedp);
- xop1 = avoid_expensive_constant (mode1, binoptab, xop1, unsignedp);
+ xop0 = avoid_expensive_constant (mode0, binoptab, 0, xop0, unsignedp);
+ xop1 = avoid_expensive_constant (mode1, binoptab, 1, xop1, unsignedp);
create_fixed_operand (&ops[0], targ0);
create_convert_operand_from (&ops[1], op0, mode, unsignedp);
if (target == 0)
target = gen_reg_rtx (mode);
- xop0 = widen_operand (op0, wider_mode, mode, true, false);
- temp = expand_unop (wider_mode, unoptab, xop0, NULL_RTX, true);
+ xop0 = widen_operand (op0, wider_mode, mode,
+ unoptab != clrsb_optab, false);
+ temp = expand_unop (wider_mode, unoptab, xop0, NULL_RTX,
+ unoptab != clrsb_optab);
if (temp != 0)
temp = expand_binop (wider_mode, sub_optab, temp,
- GEN_INT (GET_MODE_BITSIZE (wider_mode)
- - GET_MODE_BITSIZE (mode)),
+ GEN_INT (GET_MODE_PRECISION (wider_mode)
+ - GET_MODE_PRECISION (mode)),
target, true, OPTAB_DIRECT);
if (temp == 0)
delete_insns_since (last);
x = widen_operand (op0, wider_mode, mode, true, true);
x = expand_unop (wider_mode, bswap_optab, x, NULL_RTX, true);
+ gcc_assert (GET_MODE_PRECISION (wider_mode) == GET_MODE_BITSIZE (wider_mode)
+ && GET_MODE_PRECISION (mode) == GET_MODE_BITSIZE (mode));
if (x != 0)
x = expand_shift (RSHIFT_EXPR, wider_mode, x,
GET_MODE_BITSIZE (wider_mode)
}
/* Try calculating ctz(x) as K - clz(x & -x) ,
- where K is GET_MODE_BITSIZE(mode) - 1.
+ where K is GET_MODE_PRECISION(mode) - 1.
Both __builtin_ctz and __builtin_clz are undefined at zero, so we
don't have to worry about what the hardware does in that case. (If
if (temp)
temp = expand_unop_direct (mode, clz_optab, temp, NULL_RTX, true);
if (temp)
- temp = expand_binop (mode, sub_optab, GEN_INT (GET_MODE_BITSIZE (mode) - 1),
+ temp = expand_binop (mode, sub_optab, GEN_INT (GET_MODE_PRECISION (mode) - 1),
temp, target,
true, OPTAB_DIRECT);
if (temp == 0)
if (CLZ_DEFINED_VALUE_AT_ZERO (mode, val) == 2)
{
defined_at_zero = true;
- val = (GET_MODE_BITSIZE (mode) - 1) - val;
+ val = (GET_MODE_PRECISION (mode) - 1) - val;
}
}
else
gen_lowpart (imode, target), 1, OPTAB_LIB_WIDEN);
target = lowpart_subreg_maybe_copy (mode, temp, imode);
- set_unique_reg_note (get_last_insn (), REG_EQUAL,
- gen_rtx_fmt_e (code, mode, copy_rtx (op0)));
+ set_dst_reg_note (get_last_insn (), REG_EQUAL,
+ gen_rtx_fmt_e (code, mode, copy_rtx (op0)),
+ target);
}
return target;
if (temp)
{
if (mclass != MODE_INT
- || !TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (mode),
- GET_MODE_BITSIZE (wider_mode)))
+ || !TRULY_NOOP_TRUNCATION_MODES_P (mode, wider_mode))
{
if (target == 0)
target = gen_reg_rtx (mode);
unsignedp);
/* If we are generating clz using wider mode, adjust the
- result. */
- if (unoptab == clz_optab && temp != 0)
+ result. Similarly for clrsb. */
+ if ((unoptab == clz_optab || unoptab == clrsb_optab)
+ && temp != 0)
temp = expand_binop (wider_mode, sub_optab, temp,
- GEN_INT (GET_MODE_BITSIZE (wider_mode)
- - GET_MODE_BITSIZE (mode)),
+ GEN_INT (GET_MODE_PRECISION (wider_mode)
+ - GET_MODE_PRECISION (mode)),
target, true, OPTAB_DIRECT);
if (temp)
false) >= 2)
{
rtx extended = expand_shift (RSHIFT_EXPR, mode, op0,
- GET_MODE_BITSIZE (mode) - 1,
+ GET_MODE_PRECISION (mode) - 1,
NULL_RTX, 0);
temp = expand_binop (mode, xor_optab, extended, op0, target, 0,
false) >= 2)
{
rtx extended = expand_shift (RSHIFT_EXPR, mode, op0,
- GET_MODE_BITSIZE (mode) - 1,
+ GET_MODE_PRECISION (mode) - 1,
NULL_RTX, 0);
temp = expand_binop (mode, xor_optab, extended, op0, target, 0,
}
last = emit_move_insn (target, result);
- if (optab_handler (mov_optab, GET_MODE (target)) != CODE_FOR_nothing)
- set_unique_reg_note (last, REG_EQUAL, copy_rtx (equiv));
+ set_dst_reg_note (last, REG_EQUAL, copy_rtx (equiv), target);
if (final_dest != target)
emit_move_insn (final_dest, target);
/* If we are optimizing, force expensive constants into a register. */
if (CONSTANT_P (x) && optimize
- && (rtx_cost (x, COMPARE, optimize_insn_for_speed_p ())
+ && (rtx_cost (x, COMPARE, 0, optimize_insn_for_speed_p ())
> COSTS_N_INSNS (1)))
x = force_reg (mode, x);
if (CONSTANT_P (y) && optimize
- && (rtx_cost (y, COMPARE, optimize_insn_for_speed_p ())
+ && (rtx_cost (y, COMPARE, 1, optimize_insn_for_speed_p ())
> COSTS_N_INSNS (1)))
y = force_reg (mode, y);
result against 1 in the biased case, and zero in the unbiased
case. For unsigned comparisons always compare against 1 after
biasing the unbiased result by adding 1. This gives us a way to
- represent LTU. */
+ represent LTU.
+ The comparisons in the fixed-point helper library are always
+ biased. */
x = result;
y = const1_rtx;
- if (!TARGET_LIB_INT_CMP_BIASED)
+ if (!TARGET_LIB_INT_CMP_BIASED && !ALL_FIXED_POINT_MODE_P (mode))
{
if (unsignedp)
x = plus_constant (result, 1);
return CODE_FOR_nothing;
}
-static enum insn_code
+enum insn_code
can_float_p (enum machine_mode fltmode, enum machine_mode fixmode,
int unsignedp)
{
tab = unsignedp ? ufloat_optab : sfloat_optab;
return convert_optab_handler (tab, fltmode, fixmode);
}
+
+/* Function supportable_convert_operation
+
+ Check whether an operation represented by the code CODE is a
+ convert operation that is supported by the target platform in
+ vector form (i.e., when operating on arguments of type VECTYPE_IN
+ producing a result of type VECTYPE_OUT).
+
+ Convert operations we currently support directly are FIX_TRUNC and FLOAT.
+ This function checks if these operations are supported
+ by the target platform either directly (via vector tree-codes), or via
+ target builtins.
+
+ Output:
+ - CODE1 is code of vector operation to be used when
+ vectorizing the operation, if available.
+ - DECL is decl of target builtin functions to be used
+ when vectorizing the operation, if available. In this case,
+ CODE1 is CALL_EXPR. */
+
+bool
+supportable_convert_operation (enum tree_code code,
+ tree vectype_out, tree vectype_in,
+ tree *decl, enum tree_code *code1)
+{
+ enum machine_mode m1,m2;
+ int truncp;
+
+ m1 = TYPE_MODE (vectype_out);
+ m2 = TYPE_MODE (vectype_in);
+
+ /* First check if we can done conversion directly. */
+ if ((code == FIX_TRUNC_EXPR
+ && can_fix_p (m1,m2,TYPE_UNSIGNED (vectype_out), &truncp)
+ != CODE_FOR_nothing)
+ || (code == FLOAT_EXPR
+ && can_float_p (m1,m2,TYPE_UNSIGNED (vectype_in))
+ != CODE_FOR_nothing))
+ {
+ *code1 = code;
+ return true;
+ }
+
+ /* Now check for builtin. */
+ if (targetm.vectorize.builtin_conversion
+ && targetm.vectorize.builtin_conversion (code, vectype_out, vectype_in))
+ {
+ *code1 = CALL_EXPR;
+ *decl = targetm.vectorize.builtin_conversion (code, vectype_out, vectype_in);
+ return true;
+ }
+ return false;
+}
+
\f
/* Generate code to convert FROM to floating point
and store in TO. FROM must be fixed point and not VOIDmode.
int doing_unsigned = unsignedp;
if (fmode != GET_MODE (to)
- && significand_size (fmode) < GET_MODE_BITSIZE (GET_MODE (from)))
+ && significand_size (fmode) < GET_MODE_PRECISION (GET_MODE (from)))
continue;
icode = can_float_p (fmode, imode, unsignedp);
for (fmode = GET_MODE (to); fmode != VOIDmode;
fmode = GET_MODE_WIDER_MODE (fmode))
- if (GET_MODE_BITSIZE (GET_MODE (from)) < GET_MODE_BITSIZE (fmode)
+ if (GET_MODE_PRECISION (GET_MODE (from)) < GET_MODE_BITSIZE (fmode)
&& can_float_p (fmode, GET_MODE (from), 0) != CODE_FOR_nothing)
break;
/* Avoid double-rounding when TO is narrower than FROM. */
if ((significand_size (fmode) + 1)
- < GET_MODE_BITSIZE (GET_MODE (from)))
+ < GET_MODE_PRECISION (GET_MODE (from)))
{
rtx temp1;
rtx neglabel = gen_label_rtx ();
0, label);
- real_2expN (&offset, GET_MODE_BITSIZE (GET_MODE (from)), fmode);
+ real_2expN (&offset, GET_MODE_PRECISION (GET_MODE (from)), fmode);
temp = expand_binop (fmode, add_optab, target,
CONST_DOUBLE_FROM_REAL_VALUE (offset, fmode),
target, 0, OPTAB_LIB_WIDEN);
2^63. The subtraction of 2^63 should not generate any rounding as it
simply clears out that bit. The rest is trivial. */
- if (unsignedp && GET_MODE_BITSIZE (GET_MODE (to)) <= HOST_BITS_PER_WIDE_INT)
+ if (unsignedp && GET_MODE_PRECISION (GET_MODE (to)) <= HOST_BITS_PER_WIDE_INT)
for (fmode = GET_MODE (from); fmode != VOIDmode;
fmode = GET_MODE_WIDER_MODE (fmode))
if (CODE_FOR_nothing != can_fix_p (GET_MODE (to), fmode, 0, &must_trunc)
&& (!DECIMAL_FLOAT_MODE_P (fmode)
- || GET_MODE_BITSIZE (fmode) > GET_MODE_BITSIZE (GET_MODE (to))))
+ || GET_MODE_BITSIZE (fmode) > GET_MODE_PRECISION (GET_MODE (to))))
{
int bitsize;
REAL_VALUE_TYPE offset;
rtx limit, lab1, lab2, insn;
- bitsize = GET_MODE_BITSIZE (GET_MODE (to));
+ bitsize = GET_MODE_PRECISION (GET_MODE (to));
real_2expN (&offset, bitsize - 1, fmode);
limit = CONST_DOUBLE_FROM_REAL_VALUE (offset, fmode);
lab1 = gen_label_rtx ();
{
/* Make a place for a REG_NOTE and add it. */
insn = emit_move_insn (to, to);
- set_unique_reg_note (insn,
- REG_EQUAL,
- gen_rtx_fmt_e (UNSIGNED_FIX,
- GET_MODE (to),
- copy_rtx (from)));
+ set_dst_reg_note (insn, REG_EQUAL,
+ gen_rtx_fmt_e (UNSIGNED_FIX, GET_MODE (to),
+ copy_rtx (from)),
+ to);
}
return;
init_optab (usashl_optab, US_ASHIFT);
init_optab (ashr_optab, ASHIFTRT);
init_optab (lshr_optab, LSHIFTRT);
+ init_optabv (vashl_optab, ASHIFT);
+ init_optabv (vashr_optab, ASHIFTRT);
+ init_optabv (vlshr_optab, LSHIFTRT);
init_optab (rotl_optab, ROTATE);
init_optab (rotr_optab, ROTATERT);
init_optab (smin_optab, SMIN);
init_optab (udot_prod_optab, UNKNOWN);
init_optab (vec_extract_optab, UNKNOWN);
- init_optab (vec_extract_even_optab, UNKNOWN);
- init_optab (vec_extract_odd_optab, UNKNOWN);
- init_optab (vec_interleave_high_optab, UNKNOWN);
- init_optab (vec_interleave_low_optab, UNKNOWN);
init_optab (vec_set_optab, UNKNOWN);
init_optab (vec_init_optab, UNKNOWN);
init_optab (vec_shl_optab, UNKNOWN);
init_optab (vec_widen_umult_lo_optab, UNKNOWN);
init_optab (vec_widen_smult_hi_optab, UNKNOWN);
init_optab (vec_widen_smult_lo_optab, UNKNOWN);
+ init_optab (vec_widen_ushiftl_hi_optab, UNKNOWN);
+ init_optab (vec_widen_ushiftl_lo_optab, UNKNOWN);
+ init_optab (vec_widen_sshiftl_hi_optab, UNKNOWN);
+ init_optab (vec_widen_sshiftl_lo_optab, UNKNOWN);
init_optab (vec_unpacks_hi_optab, UNKNOWN);
init_optab (vec_unpacks_lo_optab, UNKNOWN);
init_optab (vec_unpacku_hi_optab, UNKNOWN);
targetm.init_libfuncs ();
}
+/* A helper function for init_sync_libfuncs. Using the basename BASE,
+ install libfuncs into TAB for BASE_N for 1 <= N <= MAX. */
+
+static void
+init_sync_libfuncs_1 (optab tab, const char *base, int max)
+{
+ enum machine_mode mode;
+ char buf[64];
+ size_t len = strlen (base);
+ int i;
+
+ gcc_assert (max <= 8);
+ gcc_assert (len + 3 < sizeof (buf));
+
+ memcpy (buf, base, len);
+ buf[len] = '_';
+ buf[len + 1] = '0';
+ buf[len + 2] = '\0';
+
+ mode = QImode;
+ for (i = 1; i <= max; i *= 2)
+ {
+ buf[len + 1] = '0' + i;
+ set_optab_libfunc (tab, mode, buf);
+ mode = GET_MODE_2XWIDER_MODE (mode);
+ }
+}
+
+void
+init_sync_libfuncs (int max)
+{
+ init_sync_libfuncs_1 (sync_compare_and_swap_optab,
+ "__sync_val_compare_and_swap", max);
+ init_sync_libfuncs_1 (sync_lock_test_and_set_optab,
+ "__sync_lock_test_and_set", max);
+
+ init_sync_libfuncs_1 (sync_old_add_optab, "__sync_fetch_and_add", max);
+ init_sync_libfuncs_1 (sync_old_sub_optab, "__sync_fetch_and_sub", max);
+ init_sync_libfuncs_1 (sync_old_ior_optab, "__sync_fetch_and_or", max);
+ init_sync_libfuncs_1 (sync_old_and_optab, "__sync_fetch_and_and", max);
+ init_sync_libfuncs_1 (sync_old_xor_optab, "__sync_fetch_and_xor", max);
+ init_sync_libfuncs_1 (sync_old_nand_optab, "__sync_fetch_and_nand", max);
+
+ init_sync_libfuncs_1 (sync_new_add_optab, "__sync_add_and_fetch", max);
+ init_sync_libfuncs_1 (sync_new_sub_optab, "__sync_sub_and_fetch", max);
+ init_sync_libfuncs_1 (sync_new_ior_optab, "__sync_or_and_fetch", max);
+ init_sync_libfuncs_1 (sync_new_and_optab, "__sync_and_and_fetch", max);
+ init_sync_libfuncs_1 (sync_new_xor_optab, "__sync_xor_and_fetch", max);
+ init_sync_libfuncs_1 (sync_new_nand_optab, "__sync_nand_and_fetch", max);
+}
+
/* Print information about the current contents of the optabs on
STDERR. */
return gen_rtx_fmt_ee (rcode, VOIDmode, ops[0].value, ops[1].value);
}
-/* Return insn code for TYPE, the type of a VEC_COND_EXPR. */
+/* Return true if VEC_PERM_EXPR can be expanded using SIMD extensions
+ of the CPU. SEL may be NULL, which stands for an unknown constant. */
+
+bool
+can_vec_perm_p (enum machine_mode mode, bool variable,
+ const unsigned char *sel)
+{
+ enum machine_mode qimode;
+
+ /* If the target doesn't implement a vector mode for the vector type,
+ then no operations are supported. */
+ if (!VECTOR_MODE_P (mode))
+ return false;
+
+ if (!variable)
+ {
+ if (direct_optab_handler (vec_perm_const_optab, mode) != CODE_FOR_nothing
+ && (sel == NULL
+ || targetm.vectorize.vec_perm_const_ok == NULL
+ || targetm.vectorize.vec_perm_const_ok (mode, sel)))
+ return true;
+ }
+
+ if (direct_optab_handler (vec_perm_optab, mode) != CODE_FOR_nothing)
+ return true;
+
+ /* We allow fallback to a QI vector mode, and adjust the mask. */
+ if (GET_MODE_INNER (mode) == QImode)
+ return false;
+ qimode = mode_for_vector (QImode, GET_MODE_SIZE (mode));
+ if (!VECTOR_MODE_P (qimode))
+ return false;
+
+ /* ??? For completeness, we ought to check the QImode version of
+ vec_perm_const_optab. But all users of this implicit lowering
+ feature implement the variable vec_perm_optab. */
+ if (direct_optab_handler (vec_perm_optab, qimode) == CODE_FOR_nothing)
+ return false;
+
+ /* In order to support the lowering of variable permutations,
+ we need to support shifts and adds. */
+ if (variable)
+ {
+ if (GET_MODE_UNIT_SIZE (mode) > 2
+ && optab_handler (ashl_optab, mode) == CODE_FOR_nothing
+ && optab_handler (vashl_optab, mode) == CODE_FOR_nothing)
+ return false;
+ if (optab_handler (add_optab, qimode) == CODE_FOR_nothing)
+ return false;
+ }
+
+ return true;
+}
+
+/* A subroutine of expand_vec_perm for expanding one vec_perm insn. */
+
+static rtx
+expand_vec_perm_1 (enum insn_code icode, rtx target,
+ rtx v0, rtx v1, rtx sel)
+{
+ enum machine_mode tmode = GET_MODE (target);
+ enum machine_mode smode = GET_MODE (sel);
+ struct expand_operand ops[4];
+
+ create_output_operand (&ops[0], target, tmode);
+ create_input_operand (&ops[3], sel, smode);
+
+ /* Make an effort to preserve v0 == v1. The target expander is able to
+ rely on this to determine if we're permuting a single input operand. */
+ if (rtx_equal_p (v0, v1))
+ {
+ if (!insn_operand_matches (icode, 1, v0))
+ v0 = force_reg (tmode, v0);
+ gcc_checking_assert (insn_operand_matches (icode, 1, v0));
+ gcc_checking_assert (insn_operand_matches (icode, 2, v0));
+
+ create_fixed_operand (&ops[1], v0);
+ create_fixed_operand (&ops[2], v0);
+ }
+ else
+ {
+ create_input_operand (&ops[1], v0, tmode);
+ create_input_operand (&ops[2], v1, tmode);
+ }
+
+ if (maybe_expand_insn (icode, 4, ops))
+ return ops[0].value;
+ return NULL_RTX;
+}
+
+/* Generate instructions for vec_perm optab given its mode
+ and three operands. */
+
+rtx
+expand_vec_perm (enum machine_mode mode, rtx v0, rtx v1, rtx sel, rtx target)
+{
+ enum insn_code icode;
+ enum machine_mode qimode;
+ unsigned int i, w, e, u;
+ rtx tmp, sel_qi = NULL;
+ rtvec vec;
+
+ if (!target || GET_MODE (target) != mode)
+ target = gen_reg_rtx (mode);
+
+ w = GET_MODE_SIZE (mode);
+ e = GET_MODE_NUNITS (mode);
+ u = GET_MODE_UNIT_SIZE (mode);
+
+ /* Set QIMODE to a different vector mode with byte elements.
+ If no such mode, or if MODE already has byte elements, use VOIDmode. */
+ qimode = VOIDmode;
+ if (GET_MODE_INNER (mode) != QImode)
+ {
+ qimode = mode_for_vector (QImode, w);
+ if (!VECTOR_MODE_P (qimode))
+ qimode = VOIDmode;
+ }
+
+ /* If the input is a constant, expand it specially. */
+ gcc_assert (GET_MODE_CLASS (GET_MODE (sel)) == MODE_VECTOR_INT);
+ if (GET_CODE (sel) == CONST_VECTOR)
+ {
+ icode = direct_optab_handler (vec_perm_const_optab, mode);
+ if (icode != CODE_FOR_nothing)
+ {
+ tmp = expand_vec_perm_1 (icode, target, v0, v1, sel);
+ if (tmp)
+ return tmp;
+ }
+
+ /* Fall back to a constant byte-based permutation. */
+ if (qimode != VOIDmode)
+ {
+ vec = rtvec_alloc (w);
+ for (i = 0; i < e; ++i)
+ {
+ unsigned int j, this_e;
+
+ this_e = INTVAL (CONST_VECTOR_ELT (sel, i));
+ this_e &= 2 * e - 1;
+ this_e *= u;
+
+ for (j = 0; j < u; ++j)
+ RTVEC_ELT (vec, i * u + j) = GEN_INT (this_e + j);
+ }
+ sel_qi = gen_rtx_CONST_VECTOR (qimode, vec);
+
+ icode = direct_optab_handler (vec_perm_const_optab, qimode);
+ if (icode != CODE_FOR_nothing)
+ {
+ tmp = expand_vec_perm_1 (icode, gen_lowpart (qimode, target),
+ gen_lowpart (qimode, v0),
+ gen_lowpart (qimode, v1), sel_qi);
+ if (tmp)
+ return gen_lowpart (mode, tmp);
+ }
+ }
+ }
+
+ /* Otherwise expand as a fully variable permuation. */
+ icode = direct_optab_handler (vec_perm_optab, mode);
+ if (icode != CODE_FOR_nothing)
+ {
+ tmp = expand_vec_perm_1 (icode, target, v0, v1, sel);
+ if (tmp)
+ return tmp;
+ }
+
+ /* As a special case to aid several targets, lower the element-based
+ permutation to a byte-based permutation and try again. */
+ if (qimode == VOIDmode)
+ return NULL_RTX;
+ icode = direct_optab_handler (vec_perm_optab, qimode);
+ if (icode == CODE_FOR_nothing)
+ return NULL_RTX;
+
+ if (sel_qi == NULL)
+ {
+ /* Multiply each element by its byte size. */
+ enum machine_mode selmode = GET_MODE (sel);
+ if (u == 2)
+ sel = expand_simple_binop (selmode, PLUS, sel, sel,
+ sel, 0, OPTAB_DIRECT);
+ else
+ sel = expand_simple_binop (selmode, ASHIFT, sel,
+ GEN_INT (exact_log2 (u)),
+ sel, 0, OPTAB_DIRECT);
+ gcc_assert (sel != NULL);
+
+ /* Broadcast the low byte each element into each of its bytes. */
+ vec = rtvec_alloc (w);
+ for (i = 0; i < w; ++i)
+ {
+ int this_e = i / u * u;
+ if (BYTES_BIG_ENDIAN)
+ this_e += u - 1;
+ RTVEC_ELT (vec, i) = GEN_INT (this_e);
+ }
+ tmp = gen_rtx_CONST_VECTOR (qimode, vec);
+ sel = gen_lowpart (qimode, sel);
+ sel = expand_vec_perm (qimode, sel, sel, tmp, NULL);
+ gcc_assert (sel != NULL);
+
+ /* Add the byte offset to each byte element. */
+ /* Note that the definition of the indicies here is memory ordering,
+ so there should be no difference between big and little endian. */
+ vec = rtvec_alloc (w);
+ for (i = 0; i < w; ++i)
+ RTVEC_ELT (vec, i) = GEN_INT (i % u);
+ tmp = gen_rtx_CONST_VECTOR (qimode, vec);
+ sel_qi = expand_simple_binop (qimode, PLUS, sel, tmp,
+ sel, 0, OPTAB_DIRECT);
+ gcc_assert (sel_qi != NULL);
+ }
+
+ tmp = expand_vec_perm_1 (icode, gen_lowpart (qimode, target),
+ gen_lowpart (qimode, v0),
+ gen_lowpart (qimode, v1), sel_qi);
+ if (tmp)
+ tmp = gen_lowpart (mode, tmp);
+ return tmp;
+}
+
+/* Return insn code for a conditional operator with a comparison in
+ mode CMODE, unsigned if UNS is true, resulting in a value of mode VMODE. */
static inline enum insn_code
-get_vcond_icode (tree type, enum machine_mode mode)
+get_vcond_icode (enum machine_mode vmode, enum machine_mode cmode, bool uns)
{
enum insn_code icode = CODE_FOR_nothing;
-
- if (TYPE_UNSIGNED (type))
- icode = direct_optab_handler (vcondu_optab, mode);
+ if (uns)
+ icode = convert_optab_handler (vcondu_optab, vmode, cmode);
else
- icode = direct_optab_handler (vcond_optab, mode);
+ icode = convert_optab_handler (vcond_optab, vmode, cmode);
return icode;
}
/* Return TRUE iff, appropriate vector insns are available
- for vector cond expr with type TYPE in VMODE mode. */
+ for vector cond expr with vector type VALUE_TYPE and a comparison
+ with operand vector types in CMP_OP_TYPE. */
bool
-expand_vec_cond_expr_p (tree type, enum machine_mode vmode)
-{
- if (get_vcond_icode (type, vmode) == CODE_FOR_nothing)
+expand_vec_cond_expr_p (tree value_type, tree cmp_op_type)
+{
+ enum machine_mode value_mode = TYPE_MODE (value_type);
+ enum machine_mode cmp_op_mode = TYPE_MODE (cmp_op_type);
+ if (GET_MODE_SIZE (value_mode) != GET_MODE_SIZE (cmp_op_mode)
+ || GET_MODE_NUNITS (value_mode) != GET_MODE_NUNITS (cmp_op_mode)
+ || get_vcond_icode (TYPE_MODE (value_type), TYPE_MODE (cmp_op_type),
+ TYPE_UNSIGNED (cmp_op_type)) == CODE_FOR_nothing)
return false;
return true;
}
enum insn_code icode;
rtx comparison, rtx_op1, rtx_op2;
enum machine_mode mode = TYPE_MODE (vec_cond_type);
- bool unsignedp = TYPE_UNSIGNED (vec_cond_type);
+ enum machine_mode cmp_op_mode;
+ bool unsignedp;
- icode = get_vcond_icode (vec_cond_type, mode);
+ gcc_assert (COMPARISON_CLASS_P (op0));
+
+ unsignedp = TYPE_UNSIGNED (TREE_TYPE (TREE_OPERAND (op0, 0)));
+ cmp_op_mode = TYPE_MODE (TREE_TYPE (TREE_OPERAND (op0, 0)));
+
+ gcc_assert (GET_MODE_SIZE (mode) == GET_MODE_SIZE (cmp_op_mode)
+ && GET_MODE_NUNITS (mode) == GET_MODE_NUNITS (cmp_op_mode));
+
+ icode = get_vcond_icode (mode, cmp_op_mode, unsignedp);
if (icode == CODE_FOR_nothing)
return 0;
}
\f
-/* This is an internal subroutine of the other compare_and_swap expanders.
- MEM, OLD_VAL and NEW_VAL are as you'd expect for a compare-and-swap
- operation. TARGET is an optional place to store the value result of
- the operation. ICODE is the particular instruction to expand. Return
- the result of the operation. */
+/* Return true if there is a compare_and_swap pattern. */
-static rtx
-expand_val_compare_and_swap_1 (rtx mem, rtx old_val, rtx new_val,
- rtx target, enum insn_code icode)
+bool
+can_compare_and_swap_p (enum machine_mode mode, bool allow_libcall)
{
- struct expand_operand ops[4];
- enum machine_mode mode = GET_MODE (mem);
+ enum insn_code icode;
- create_output_operand (&ops[0], target, mode);
- create_fixed_operand (&ops[1], mem);
- /* OLD_VAL and NEW_VAL may have been promoted to a wider mode.
- Shrink them if so. */
- create_convert_operand_to (&ops[2], old_val, mode, true);
- create_convert_operand_to (&ops[3], new_val, mode, true);
- if (maybe_expand_insn (icode, 4, ops))
- return ops[0].value;
- return NULL_RTX;
+ /* Check for __atomic_compare_and_swap. */
+ icode = direct_optab_handler (atomic_compare_and_swap_optab, mode);
+ if (icode != CODE_FOR_nothing)
+ return true;
+
+ /* Check for __sync_compare_and_swap. */
+ icode = optab_handler (sync_compare_and_swap_optab, mode);
+ if (icode != CODE_FOR_nothing)
+ return true;
+ if (allow_libcall && optab_libfunc (sync_compare_and_swap_optab, mode))
+ return true;
+
+ /* No inline compare and swap. */
+ return false;
}
-/* Expand a compare-and-swap operation and return its value. */
+/* Return true if an atomic exchange can be performed. */
-rtx
-expand_val_compare_and_swap (rtx mem, rtx old_val, rtx new_val, rtx target)
+bool
+can_atomic_exchange_p (enum machine_mode mode, bool allow_libcall)
{
- enum machine_mode mode = GET_MODE (mem);
- enum insn_code icode
- = direct_optab_handler (sync_compare_and_swap_optab, mode);
+ enum insn_code icode;
- if (icode == CODE_FOR_nothing)
- return NULL_RTX;
+ /* Check for __atomic_exchange. */
+ icode = direct_optab_handler (atomic_exchange_optab, mode);
+ if (icode != CODE_FOR_nothing)
+ return true;
+
+ /* Don't check __sync_test_and_set, as on some platforms that
+ has reduced functionality. Targets that really do support
+ a proper exchange should simply be updated to the __atomics. */
- return expand_val_compare_and_swap_1 (mem, old_val, new_val, target, icode);
+ return can_compare_and_swap_p (mode, allow_libcall);
}
+
/* Helper function to find the MODE_CC set in a sync_compare_and_swap
pattern. */
}
}
-/* Expand a compare-and-swap operation and store true into the result if
- the operation was successful and false otherwise. Return the result.
- Unlike other routines, TARGET is not optional. */
-
-rtx
-expand_bool_compare_and_swap (rtx mem, rtx old_val, rtx new_val, rtx target)
-{
- enum machine_mode mode = GET_MODE (mem);
- enum insn_code icode;
- rtx subtarget, seq, cc_reg;
-
- /* If the target supports a compare-and-swap pattern that simultaneously
- sets some flag for success, then use it. Otherwise use the regular
- compare-and-swap and follow that immediately with a compare insn. */
- icode = direct_optab_handler (sync_compare_and_swap_optab, mode);
- if (icode == CODE_FOR_nothing)
- return NULL_RTX;
-
- do_pending_stack_adjust ();
- do
- {
- start_sequence ();
- subtarget = expand_val_compare_and_swap_1 (mem, old_val, new_val,
- NULL_RTX, icode);
- cc_reg = NULL_RTX;
- if (subtarget == NULL_RTX)
- {
- end_sequence ();
- return NULL_RTX;
- }
-
- if (have_insn_for (COMPARE, CCmode))
- note_stores (PATTERN (get_last_insn ()), find_cc_set, &cc_reg);
- seq = get_insns ();
- end_sequence ();
-
- /* We might be comparing against an old value. Try again. :-( */
- if (!cc_reg && MEM_P (old_val))
- {
- seq = NULL_RTX;
- old_val = force_reg (mode, old_val);
- }
- }
- while (!seq);
-
- emit_insn (seq);
- if (cc_reg)
- return emit_store_flag_force (target, EQ, cc_reg, const0_rtx, VOIDmode, 0, 1);
- else
- return emit_store_flag_force (target, EQ, subtarget, old_val, VOIDmode, 1, 1);
-}
-
/* This is a helper function for the other atomic operations. This function
emits a loop that contains SEQ that iterates until a compare-and-swap
operation at the end succeeds. MEM is the memory to be modified. SEQ is
expand_compare_and_swap_loop (rtx mem, rtx old_reg, rtx new_reg, rtx seq)
{
enum machine_mode mode = GET_MODE (mem);
- enum insn_code icode;
- rtx label, cmp_reg, subtarget, cc_reg;
+ rtx label, cmp_reg, success, oldval;
/* The loop we want to generate looks like
label:
old_reg = cmp_reg;
seq;
- cmp_reg = compare-and-swap(mem, old_reg, new_reg)
- if (cmp_reg != old_reg)
+ (success, cmp_reg) = compare-and-swap(mem, old_reg, new_reg)
+ if (success)
goto label;
Note that we only do the plain load from memory once. Subsequent
if (seq)
emit_insn (seq);
- /* If the target supports a compare-and-swap pattern that simultaneously
- sets some flag for success, then use it. Otherwise use the regular
- compare-and-swap and follow that immediately with a compare insn. */
- icode = direct_optab_handler (sync_compare_and_swap_optab, mode);
- if (icode == CODE_FOR_nothing)
+ success = NULL_RTX;
+ oldval = cmp_reg;
+ if (!expand_atomic_compare_and_swap (&success, &oldval, mem, old_reg,
+ new_reg, false, MEMMODEL_SEQ_CST,
+ MEMMODEL_RELAXED))
return false;
- subtarget = expand_val_compare_and_swap_1 (mem, old_reg, new_reg,
- cmp_reg, icode);
- if (subtarget == NULL_RTX)
- return false;
+ if (oldval != cmp_reg)
+ emit_move_insn (cmp_reg, oldval);
- cc_reg = NULL_RTX;
- if (have_insn_for (COMPARE, CCmode))
- note_stores (PATTERN (get_last_insn ()), find_cc_set, &cc_reg);
- if (cc_reg)
- {
- cmp_reg = cc_reg;
- old_reg = const0_rtx;
- }
- else
+ /* ??? Mark this jump predicted not taken? */
+ emit_cmp_and_jump_insns (success, const0_rtx, EQ, const0_rtx,
+ GET_MODE (success), 1, label);
+ return true;
+}
+
+
+/* This function tries to emit an atomic_exchange intruction. VAL is written
+ to *MEM using memory model MODEL. The previous contents of *MEM are returned,
+ using TARGET if possible. */
+
+static rtx
+maybe_emit_atomic_exchange (rtx target, rtx mem, rtx val, enum memmodel model)
+{
+ enum machine_mode mode = GET_MODE (mem);
+ enum insn_code icode;
+
+ /* If the target supports the exchange directly, great. */
+ icode = direct_optab_handler (atomic_exchange_optab, mode);
+ if (icode != CODE_FOR_nothing)
{
- if (subtarget != cmp_reg)
- emit_move_insn (cmp_reg, subtarget);
+ struct expand_operand ops[4];
+
+ create_output_operand (&ops[0], target, mode);
+ create_fixed_operand (&ops[1], mem);
+ /* VAL may have been promoted to a wider mode. Shrink it if so. */
+ create_convert_operand_to (&ops[2], val, mode, true);
+ create_integer_operand (&ops[3], model);
+ if (maybe_expand_insn (icode, 4, ops))
+ return ops[0].value;
}
- /* ??? Mark this jump predicted not taken? */
- emit_cmp_and_jump_insns (cmp_reg, old_reg, NE, const0_rtx, GET_MODE (cmp_reg), 1,
- label);
- return true;
+ return NULL_RTX;
}
-/* This function generates the atomic operation MEM CODE= VAL. In this
- case, we do not care about any resulting value. Returns NULL if we
- cannot generate the operation. */
+/* This function tries to implement an atomic exchange operation using
+ __sync_lock_test_and_set. VAL is written to *MEM using memory model MODEL.
+ The previous contents of *MEM are returned, using TARGET if possible.
+ Since this instructionn is an acquire barrier only, stronger memory
+ models may require additional barriers to be emitted. */
-rtx
-expand_sync_operation (rtx mem, rtx val, enum rtx_code code)
+static rtx
+maybe_emit_sync_lock_test_and_set (rtx target, rtx mem, rtx val,
+ enum memmodel model)
{
enum machine_mode mode = GET_MODE (mem);
enum insn_code icode;
- rtx insn;
+ rtx last_insn = get_last_insn ();
- /* Look to see if the target supports the operation directly. */
- switch (code)
+ icode = optab_handler (sync_lock_test_and_set_optab, mode);
+
+ /* Legacy sync_lock_test_and_set is an acquire barrier. If the pattern
+ exists, and the memory model is stronger than acquire, add a release
+ barrier before the instruction. */
+
+ if (model == MEMMODEL_SEQ_CST
+ || model == MEMMODEL_RELEASE
+ || model == MEMMODEL_ACQ_REL)
+ expand_mem_thread_fence (model);
+
+ if (icode != CODE_FOR_nothing)
{
- case PLUS:
- icode = direct_optab_handler (sync_add_optab, mode);
- break;
- case IOR:
- icode = direct_optab_handler (sync_ior_optab, mode);
- break;
- case XOR:
- icode = direct_optab_handler (sync_xor_optab, mode);
- break;
- case AND:
- icode = direct_optab_handler (sync_and_optab, mode);
- break;
- case NOT:
- icode = direct_optab_handler (sync_nand_optab, mode);
- break;
+ struct expand_operand ops[3];
+ create_output_operand (&ops[0], target, mode);
+ create_fixed_operand (&ops[1], mem);
+ /* VAL may have been promoted to a wider mode. Shrink it if so. */
+ create_convert_operand_to (&ops[2], val, mode, true);
+ if (maybe_expand_insn (icode, 3, ops))
+ return ops[0].value;
+ }
- case MINUS:
- icode = direct_optab_handler (sync_sub_optab, mode);
- if (icode == CODE_FOR_nothing || CONST_INT_P (val))
+ /* If an external test-and-set libcall is provided, use that instead of
+ any external compare-and-swap that we might get from the compare-and-
+ swap-loop expansion later. */
+ if (!can_compare_and_swap_p (mode, false))
+ {
+ rtx libfunc = optab_libfunc (sync_lock_test_and_set_optab, mode);
+ if (libfunc != NULL)
{
- icode = direct_optab_handler (sync_add_optab, mode);
- if (icode != CODE_FOR_nothing)
- {
- val = expand_simple_unop (mode, NEG, val, NULL_RTX, 1);
- code = PLUS;
- }
+ rtx addr;
+
+ addr = convert_memory_address (ptr_mode, XEXP (mem, 0));
+ return emit_library_call_value (libfunc, NULL_RTX, LCT_NORMAL,
+ mode, 2, addr, ptr_mode,
+ val, mode);
}
- break;
+ }
- default:
- gcc_unreachable ();
+ /* If the test_and_set can't be emitted, eliminate any barrier that might
+ have been emitted. */
+ delete_insns_since (last_insn);
+ return NULL_RTX;
+}
+
+/* This function tries to implement an atomic exchange operation using a
+ compare_and_swap loop. VAL is written to *MEM. The previous contents of
+ *MEM are returned, using TARGET if possible. No memory model is required
+ since a compare_and_swap loop is seq-cst. */
+
+static rtx
+maybe_emit_compare_and_swap_exchange_loop (rtx target, rtx mem, rtx val)
+{
+ enum machine_mode mode = GET_MODE (mem);
+
+ if (can_compare_and_swap_p (mode, true))
+ {
+ if (!target || !register_operand (target, mode))
+ target = gen_reg_rtx (mode);
+ if (GET_MODE (val) != VOIDmode && GET_MODE (val) != mode)
+ val = convert_modes (mode, GET_MODE (val), val, 1);
+ if (expand_compare_and_swap_loop (mem, target, val, NULL_RTX))
+ return target;
}
- /* Generate the direct operation, if present. */
- if (icode != CODE_FOR_nothing)
+ return NULL_RTX;
+}
+
+/* This function tries to implement an atomic test-and-set operation
+ using the atomic_test_and_set instruction pattern. A boolean value
+ is returned from the operation, using TARGET if possible. */
+
+#ifndef HAVE_atomic_test_and_set
+#define HAVE_atomic_test_and_set 0
+#define CODE_FOR_atomic_test_and_set CODE_FOR_nothing
+#endif
+
+static rtx
+maybe_emit_atomic_test_and_set (rtx target, rtx mem, enum memmodel model)
+{
+ enum machine_mode pat_bool_mode;
+ struct expand_operand ops[3];
+
+ if (!HAVE_atomic_test_and_set)
+ return NULL_RTX;
+
+ /* While we always get QImode from __atomic_test_and_set, we get
+ other memory modes from __sync_lock_test_and_set. Note that we
+ use no endian adjustment here. This matches the 4.6 behavior
+ in the Sparc backend. */
+ gcc_checking_assert
+ (insn_data[CODE_FOR_atomic_test_and_set].operand[1].mode == QImode);
+ if (GET_MODE (mem) != QImode)
+ mem = adjust_address_nv (mem, QImode, 0);
+
+ pat_bool_mode = insn_data[CODE_FOR_atomic_test_and_set].operand[0].mode;
+ create_output_operand (&ops[0], target, pat_bool_mode);
+ create_fixed_operand (&ops[1], mem);
+ create_integer_operand (&ops[2], model);
+
+ if (maybe_expand_insn (CODE_FOR_atomic_test_and_set, 3, ops))
+ return ops[0].value;
+ return NULL_RTX;
+}
+
+/* This function expands the legacy _sync_lock test_and_set operation which is
+ generally an atomic exchange. Some limited targets only allow the
+ constant 1 to be stored. This is an ACQUIRE operation.
+
+ TARGET is an optional place to stick the return value.
+ MEM is where VAL is stored. */
+
+rtx
+expand_sync_lock_test_and_set (rtx target, rtx mem, rtx val)
+{
+ rtx ret;
+
+ /* Try an atomic_exchange first. */
+ ret = maybe_emit_atomic_exchange (target, mem, val, MEMMODEL_ACQUIRE);
+ if (ret)
+ return ret;
+
+ ret = maybe_emit_sync_lock_test_and_set (target, mem, val, MEMMODEL_ACQUIRE);
+ if (ret)
+ return ret;
+
+ ret = maybe_emit_compare_and_swap_exchange_loop (target, mem, val);
+ if (ret)
+ return ret;
+
+ /* If there are no other options, try atomic_test_and_set if the value
+ being stored is 1. */
+ if (val == const1_rtx)
+ ret = maybe_emit_atomic_test_and_set (target, mem, MEMMODEL_ACQUIRE);
+
+ return ret;
+}
+
+/* This function expands the atomic test_and_set operation:
+ atomically store a boolean TRUE into MEM and return the previous value.
+
+ MEMMODEL is the memory model variant to use.
+ TARGET is an optional place to stick the return value. */
+
+rtx
+expand_atomic_test_and_set (rtx target, rtx mem, enum memmodel model)
+{
+ enum machine_mode mode = GET_MODE (mem);
+ rtx ret, trueval, subtarget;
+
+ ret = maybe_emit_atomic_test_and_set (target, mem, model);
+ if (ret)
+ return ret;
+
+ /* Be binary compatible with non-default settings of trueval, and different
+ cpu revisions. E.g. one revision may have atomic-test-and-set, but
+ another only has atomic-exchange. */
+ if (targetm.atomic_test_and_set_trueval == 1)
{
- struct expand_operand ops[2];
+ trueval = const1_rtx;
+ subtarget = target ? target : gen_reg_rtx (mode);
+ }
+ else
+ {
+ trueval = gen_int_mode (targetm.atomic_test_and_set_trueval, mode);
+ subtarget = gen_reg_rtx (mode);
+ }
- create_fixed_operand (&ops[0], mem);
- /* VAL may have been promoted to a wider mode. Shrink it if so. */
- create_convert_operand_to (&ops[1], val, mode, true);
- if (maybe_expand_insn (icode, 2, ops))
- return const0_rtx;
+ /* Try the atomic-exchange optab... */
+ ret = maybe_emit_atomic_exchange (subtarget, mem, trueval, model);
+
+ /* ... then an atomic-compare-and-swap loop ... */
+ if (!ret)
+ ret = maybe_emit_compare_and_swap_exchange_loop (subtarget, mem, trueval);
+
+ /* ... before trying the vaguely defined legacy lock_test_and_set. */
+ if (!ret)
+ ret = maybe_emit_sync_lock_test_and_set (subtarget, mem, trueval, model);
+
+ /* Recall that the legacy lock_test_and_set optab was allowed to do magic
+ things with the value 1. Thus we try again without trueval. */
+ if (!ret && targetm.atomic_test_and_set_trueval != 1)
+ ret = maybe_emit_sync_lock_test_and_set (subtarget, mem, const1_rtx, model);
+
+ /* Failing all else, assume a single threaded environment and simply
+ perform the operation. */
+ if (!ret)
+ {
+ emit_move_insn (subtarget, mem);
+ emit_move_insn (mem, trueval);
+ ret = subtarget;
}
- /* Failing that, generate a compare-and-swap loop in which we perform the
- operation with normal arithmetic instructions. */
- if (direct_optab_handler (sync_compare_and_swap_optab, mode)
- != CODE_FOR_nothing)
+ /* Recall that have to return a boolean value; rectify if trueval
+ is not exactly one. */
+ if (targetm.atomic_test_and_set_trueval != 1)
+ ret = emit_store_flag_force (target, NE, ret, const0_rtx, mode, 0, 1);
+
+ return ret;
+}
+
+/* This function expands the atomic exchange operation:
+ atomically store VAL in MEM and return the previous value in MEM.
+
+ MEMMODEL is the memory model variant to use.
+ TARGET is an optional place to stick the return value. */
+
+rtx
+expand_atomic_exchange (rtx target, rtx mem, rtx val, enum memmodel model)
+{
+ rtx ret;
+
+ ret = maybe_emit_atomic_exchange (target, mem, val, model);
+
+ /* Next try a compare-and-swap loop for the exchange. */
+ if (!ret)
+ ret = maybe_emit_compare_and_swap_exchange_loop (target, mem, val);
+
+ return ret;
+}
+
+/* This function expands the atomic compare exchange operation:
+
+ *PTARGET_BOOL is an optional place to store the boolean success/failure.
+ *PTARGET_OVAL is an optional place to store the old value from memory.
+ Both target parameters may be NULL to indicate that we do not care about
+ that return value. Both target parameters are updated on success to
+ the actual location of the corresponding result.
+
+ MEMMODEL is the memory model variant to use.
+
+ The return value of the function is true for success. */
+
+bool
+expand_atomic_compare_and_swap (rtx *ptarget_bool, rtx *ptarget_oval,
+ rtx mem, rtx expected, rtx desired,
+ bool is_weak, enum memmodel succ_model,
+ enum memmodel fail_model)
+{
+ enum machine_mode mode = GET_MODE (mem);
+ struct expand_operand ops[8];
+ enum insn_code icode;
+ rtx target_oval, target_bool = NULL_RTX;
+ rtx libfunc;
+
+ /* Load expected into a register for the compare and swap. */
+ if (MEM_P (expected))
+ expected = copy_to_reg (expected);
+
+ /* Make sure we always have some place to put the return oldval.
+ Further, make sure that place is distinct from the input expected,
+ just in case we need that path down below. */
+ if (ptarget_oval == NULL
+ || (target_oval = *ptarget_oval) == NULL
+ || reg_overlap_mentioned_p (expected, target_oval))
+ target_oval = gen_reg_rtx (mode);
+
+ icode = direct_optab_handler (atomic_compare_and_swap_optab, mode);
+ if (icode != CODE_FOR_nothing)
{
- rtx t0 = gen_reg_rtx (mode), t1;
+ enum machine_mode bool_mode = insn_data[icode].operand[0].mode;
+
+ /* Make sure we always have a place for the bool operand. */
+ if (ptarget_bool == NULL
+ || (target_bool = *ptarget_bool) == NULL
+ || GET_MODE (target_bool) != bool_mode)
+ target_bool = gen_reg_rtx (bool_mode);
+
+ /* Emit the compare_and_swap. */
+ create_output_operand (&ops[0], target_bool, bool_mode);
+ create_output_operand (&ops[1], target_oval, mode);
+ create_fixed_operand (&ops[2], mem);
+ create_convert_operand_to (&ops[3], expected, mode, true);
+ create_convert_operand_to (&ops[4], desired, mode, true);
+ create_integer_operand (&ops[5], is_weak);
+ create_integer_operand (&ops[6], succ_model);
+ create_integer_operand (&ops[7], fail_model);
+ expand_insn (icode, 8, ops);
+
+ /* Return success/failure. */
+ target_bool = ops[0].value;
+ target_oval = ops[1].value;
+ goto success;
+ }
+
+ /* Otherwise fall back to the original __sync_val_compare_and_swap
+ which is always seq-cst. */
+ icode = optab_handler (sync_compare_and_swap_optab, mode);
+ if (icode != CODE_FOR_nothing)
+ {
+ rtx cc_reg;
- start_sequence ();
+ create_output_operand (&ops[0], target_oval, mode);
+ create_fixed_operand (&ops[1], mem);
+ create_convert_operand_to (&ops[2], expected, mode, true);
+ create_convert_operand_to (&ops[3], desired, mode, true);
+ if (!maybe_expand_insn (icode, 4, ops))
+ return false;
- t1 = t0;
- if (code == NOT)
+ target_oval = ops[0].value;
+
+ /* If the caller isn't interested in the boolean return value,
+ skip the computation of it. */
+ if (ptarget_bool == NULL)
+ goto success;
+
+ /* Otherwise, work out if the compare-and-swap succeeded. */
+ cc_reg = NULL_RTX;
+ if (have_insn_for (COMPARE, CCmode))
+ note_stores (PATTERN (get_last_insn ()), find_cc_set, &cc_reg);
+ if (cc_reg)
{
- t1 = expand_simple_binop (mode, AND, t1, val, NULL_RTX,
- true, OPTAB_LIB_WIDEN);
- t1 = expand_simple_unop (mode, code, t1, NULL_RTX, true);
+ target_bool = emit_store_flag_force (target_bool, EQ, cc_reg,
+ const0_rtx, VOIDmode, 0, 1);
+ goto success;
}
+ goto success_bool_from_val;
+ }
+
+ /* Also check for library support for __sync_val_compare_and_swap. */
+ libfunc = optab_libfunc (sync_compare_and_swap_optab, mode);
+ if (libfunc != NULL)
+ {
+ rtx addr = convert_memory_address (ptr_mode, XEXP (mem, 0));
+ target_oval = emit_library_call_value (libfunc, NULL_RTX, LCT_NORMAL,
+ mode, 3, addr, ptr_mode,
+ expected, mode, desired, mode);
+
+ /* Compute the boolean return value only if requested. */
+ if (ptarget_bool)
+ goto success_bool_from_val;
else
- t1 = expand_simple_binop (mode, code, t1, val, NULL_RTX,
- true, OPTAB_LIB_WIDEN);
- insn = get_insns ();
- end_sequence ();
+ goto success;
+ }
- if (t1 != NULL && expand_compare_and_swap_loop (mem, t0, t1, insn))
- return const0_rtx;
+ /* Failure. */
+ return false;
+
+ success_bool_from_val:
+ target_bool = emit_store_flag_force (target_bool, EQ, target_oval,
+ expected, VOIDmode, 1, 1);
+ success:
+ /* Make sure that the oval output winds up where the caller asked. */
+ if (ptarget_oval)
+ *ptarget_oval = target_oval;
+ if (ptarget_bool)
+ *ptarget_bool = target_bool;
+ return true;
+}
+
+/* Generate asm volatile("" : : : "memory") as the memory barrier. */
+
+static void
+expand_asm_memory_barrier (void)
+{
+ rtx asm_op, clob;
+
+ asm_op = gen_rtx_ASM_OPERANDS (VOIDmode, empty_string, empty_string, 0,
+ rtvec_alloc (0), rtvec_alloc (0),
+ rtvec_alloc (0), UNKNOWN_LOCATION);
+ MEM_VOLATILE_P (asm_op) = 1;
+
+ clob = gen_rtx_SCRATCH (VOIDmode);
+ clob = gen_rtx_MEM (BLKmode, clob);
+ clob = gen_rtx_CLOBBER (VOIDmode, clob);
+
+ emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, asm_op, clob)));
+}
+
+/* This routine will either emit the mem_thread_fence pattern or issue a
+ sync_synchronize to generate a fence for memory model MEMMODEL. */
+
+#ifndef HAVE_mem_thread_fence
+# define HAVE_mem_thread_fence 0
+# define gen_mem_thread_fence(x) (gcc_unreachable (), NULL_RTX)
+#endif
+#ifndef HAVE_memory_barrier
+# define HAVE_memory_barrier 0
+# define gen_memory_barrier() (gcc_unreachable (), NULL_RTX)
+#endif
+
+void
+expand_mem_thread_fence (enum memmodel model)
+{
+ if (HAVE_mem_thread_fence)
+ emit_insn (gen_mem_thread_fence (GEN_INT (model)));
+ else if (model != MEMMODEL_RELAXED)
+ {
+ if (HAVE_memory_barrier)
+ emit_insn (gen_memory_barrier ());
+ else if (synchronize_libfunc != NULL_RTX)
+ emit_library_call (synchronize_libfunc, LCT_NORMAL, VOIDmode, 0);
+ else
+ expand_asm_memory_barrier ();
}
+}
- return NULL_RTX;
+/* This routine will either emit the mem_signal_fence pattern or issue a
+ sync_synchronize to generate a fence for memory model MEMMODEL. */
+
+#ifndef HAVE_mem_signal_fence
+# define HAVE_mem_signal_fence 0
+# define gen_mem_signal_fence(x) (gcc_unreachable (), NULL_RTX)
+#endif
+
+void
+expand_mem_signal_fence (enum memmodel model)
+{
+ if (HAVE_mem_signal_fence)
+ emit_insn (gen_mem_signal_fence (GEN_INT (model)));
+ else if (model != MEMMODEL_RELAXED)
+ {
+ /* By default targets are coherent between a thread and the signal
+ handler running on the same thread. Thus this really becomes a
+ compiler barrier, in that stores must not be sunk past
+ (or raised above) a given point. */
+ expand_asm_memory_barrier ();
+ }
}
-/* This function generates the atomic operation MEM CODE= VAL. In this
- case, we do care about the resulting value: if AFTER is true then
- return the value MEM holds after the operation, if AFTER is false
- then return the value MEM holds before the operation. TARGET is an
- optional place for the result value to be stored. */
+/* This function expands the atomic load operation:
+ return the atomically loaded value in MEM.
+
+ MEMMODEL is the memory model variant to use.
+ TARGET is an option place to stick the return value. */
rtx
-expand_sync_fetch_operation (rtx mem, rtx val, enum rtx_code code,
- bool after, rtx target)
+expand_atomic_load (rtx target, rtx mem, enum memmodel model)
{
enum machine_mode mode = GET_MODE (mem);
- enum insn_code old_code, new_code, icode;
- bool compensate;
- rtx insn;
+ enum insn_code icode;
+
+ /* If the target supports the load directly, great. */
+ icode = direct_optab_handler (atomic_load_optab, mode);
+ if (icode != CODE_FOR_nothing)
+ {
+ struct expand_operand ops[3];
+
+ create_output_operand (&ops[0], target, mode);
+ create_fixed_operand (&ops[1], mem);
+ create_integer_operand (&ops[2], model);
+ if (maybe_expand_insn (icode, 3, ops))
+ return ops[0].value;
+ }
+
+ /* If the size of the object is greater than word size on this target,
+ then we assume that a load will not be atomic. */
+ if (GET_MODE_PRECISION (mode) > BITS_PER_WORD)
+ {
+ /* Issue val = compare_and_swap (mem, 0, 0).
+ This may cause the occasional harmless store of 0 when the value is
+ already 0, but it seems to be OK according to the standards guys. */
+ if (expand_atomic_compare_and_swap (NULL, &target, mem, const0_rtx,
+ const0_rtx, false, model, model))
+ return target;
+ else
+ /* Otherwise there is no atomic load, leave the library call. */
+ return NULL_RTX;
+ }
+
+ /* Otherwise assume loads are atomic, and emit the proper barriers. */
+ if (!target || target == const0_rtx)
+ target = gen_reg_rtx (mode);
+
+ /* Emit the appropriate barrier before the load. */
+ expand_mem_thread_fence (model);
+
+ emit_move_insn (target, mem);
+
+ /* For SEQ_CST, also emit a barrier after the load. */
+ if (model == MEMMODEL_SEQ_CST)
+ expand_mem_thread_fence (model);
+
+ return target;
+}
+
+/* This function expands the atomic store operation:
+ Atomically store VAL in MEM.
+ MEMMODEL is the memory model variant to use.
+ USE_RELEASE is true if __sync_lock_release can be used as a fall back.
+ function returns const0_rtx if a pattern was emitted. */
+
+rtx
+expand_atomic_store (rtx mem, rtx val, enum memmodel model, bool use_release)
+{
+ enum machine_mode mode = GET_MODE (mem);
+ enum insn_code icode;
+ struct expand_operand ops[3];
+
+ /* If the target supports the store directly, great. */
+ icode = direct_optab_handler (atomic_store_optab, mode);
+ if (icode != CODE_FOR_nothing)
+ {
+ create_fixed_operand (&ops[0], mem);
+ create_input_operand (&ops[1], val, mode);
+ create_integer_operand (&ops[2], model);
+ if (maybe_expand_insn (icode, 3, ops))
+ return const0_rtx;
+ }
+
+ /* If using __sync_lock_release is a viable alternative, try it. */
+ if (use_release)
+ {
+ icode = direct_optab_handler (sync_lock_release_optab, mode);
+ if (icode != CODE_FOR_nothing)
+ {
+ create_fixed_operand (&ops[0], mem);
+ create_input_operand (&ops[1], const0_rtx, mode);
+ if (maybe_expand_insn (icode, 2, ops))
+ {
+ /* lock_release is only a release barrier. */
+ if (model == MEMMODEL_SEQ_CST)
+ expand_mem_thread_fence (model);
+ return const0_rtx;
+ }
+ }
+ }
+
+ /* If the size of the object is greater than word size on this target,
+ a default store will not be atomic, Try a mem_exchange and throw away
+ the result. If that doesn't work, don't do anything. */
+ if (GET_MODE_PRECISION(mode) > BITS_PER_WORD)
+ {
+ rtx target = maybe_emit_atomic_exchange (NULL_RTX, mem, val, model);
+ if (!target)
+ target = maybe_emit_compare_and_swap_exchange_loop (NULL_RTX, mem, val);
+ if (target)
+ return const0_rtx;
+ else
+ return NULL_RTX;
+ }
+
+ /* If there is no mem_store, default to a move with barriers */
+ if (model == MEMMODEL_SEQ_CST || model == MEMMODEL_RELEASE)
+ expand_mem_thread_fence (model);
+
+ emit_move_insn (mem, val);
+
+ /* For SEQ_CST, also emit a barrier after the load. */
+ if (model == MEMMODEL_SEQ_CST)
+ expand_mem_thread_fence (model);
+
+ return const0_rtx;
+}
+
+
+/* Structure containing the pointers and values required to process the
+ various forms of the atomic_fetch_op and atomic_op_fetch builtins. */
+
+struct atomic_op_functions
+{
+ direct_optab mem_fetch_before;
+ direct_optab mem_fetch_after;
+ direct_optab mem_no_result;
+ optab fetch_before;
+ optab fetch_after;
+ direct_optab no_result;
+ enum rtx_code reverse_code;
+};
+
- /* Look to see if the target supports the operation directly. */
+/* Fill in structure pointed to by OP with the various optab entries for an
+ operation of type CODE. */
+
+static void
+get_atomic_op_for_code (struct atomic_op_functions *op, enum rtx_code code)
+{
+ gcc_assert (op!= NULL);
+
+ /* If SWITCHABLE_TARGET is defined, then subtargets can be switched
+ in the source code during compilation, and the optab entries are not
+ computable until runtime. Fill in the values at runtime. */
switch (code)
{
case PLUS:
- old_code = direct_optab_handler (sync_old_add_optab, mode);
- new_code = direct_optab_handler (sync_new_add_optab, mode);
+ op->mem_fetch_before = atomic_fetch_add_optab;
+ op->mem_fetch_after = atomic_add_fetch_optab;
+ op->mem_no_result = atomic_add_optab;
+ op->fetch_before = sync_old_add_optab;
+ op->fetch_after = sync_new_add_optab;
+ op->no_result = sync_add_optab;
+ op->reverse_code = MINUS;
break;
- case IOR:
- old_code = direct_optab_handler (sync_old_ior_optab, mode);
- new_code = direct_optab_handler (sync_new_ior_optab, mode);
+ case MINUS:
+ op->mem_fetch_before = atomic_fetch_sub_optab;
+ op->mem_fetch_after = atomic_sub_fetch_optab;
+ op->mem_no_result = atomic_sub_optab;
+ op->fetch_before = sync_old_sub_optab;
+ op->fetch_after = sync_new_sub_optab;
+ op->no_result = sync_sub_optab;
+ op->reverse_code = PLUS;
break;
case XOR:
- old_code = direct_optab_handler (sync_old_xor_optab, mode);
- new_code = direct_optab_handler (sync_new_xor_optab, mode);
+ op->mem_fetch_before = atomic_fetch_xor_optab;
+ op->mem_fetch_after = atomic_xor_fetch_optab;
+ op->mem_no_result = atomic_xor_optab;
+ op->fetch_before = sync_old_xor_optab;
+ op->fetch_after = sync_new_xor_optab;
+ op->no_result = sync_xor_optab;
+ op->reverse_code = XOR;
break;
case AND:
- old_code = direct_optab_handler (sync_old_and_optab, mode);
- new_code = direct_optab_handler (sync_new_and_optab, mode);
+ op->mem_fetch_before = atomic_fetch_and_optab;
+ op->mem_fetch_after = atomic_and_fetch_optab;
+ op->mem_no_result = atomic_and_optab;
+ op->fetch_before = sync_old_and_optab;
+ op->fetch_after = sync_new_and_optab;
+ op->no_result = sync_and_optab;
+ op->reverse_code = UNKNOWN;
+ break;
+ case IOR:
+ op->mem_fetch_before = atomic_fetch_or_optab;
+ op->mem_fetch_after = atomic_or_fetch_optab;
+ op->mem_no_result = atomic_or_optab;
+ op->fetch_before = sync_old_ior_optab;
+ op->fetch_after = sync_new_ior_optab;
+ op->no_result = sync_ior_optab;
+ op->reverse_code = UNKNOWN;
break;
case NOT:
- old_code = direct_optab_handler (sync_old_nand_optab, mode);
- new_code = direct_optab_handler (sync_new_nand_optab, mode);
+ op->mem_fetch_before = atomic_fetch_nand_optab;
+ op->mem_fetch_after = atomic_nand_fetch_optab;
+ op->mem_no_result = atomic_nand_optab;
+ op->fetch_before = sync_old_nand_optab;
+ op->fetch_after = sync_new_nand_optab;
+ op->no_result = sync_nand_optab;
+ op->reverse_code = UNKNOWN;
break;
+ default:
+ gcc_unreachable ();
+ }
+}
- case MINUS:
- old_code = direct_optab_handler (sync_old_sub_optab, mode);
- new_code = direct_optab_handler (sync_new_sub_optab, mode);
- if ((old_code == CODE_FOR_nothing && new_code == CODE_FOR_nothing)
- || CONST_INT_P (val))
- {
- old_code = direct_optab_handler (sync_old_add_optab, mode);
- new_code = direct_optab_handler (sync_new_add_optab, mode);
- if (old_code != CODE_FOR_nothing || new_code != CODE_FOR_nothing)
- {
- val = expand_simple_unop (mode, NEG, val, NULL_RTX, 1);
- code = PLUS;
- }
+/* See if there is a more optimal way to implement the operation "*MEM CODE VAL"
+ using memory order MODEL. If AFTER is true the operation needs to return
+ the value of *MEM after the operation, otherwise the previous value.
+ TARGET is an optional place to place the result. The result is unused if
+ it is const0_rtx.
+ Return the result if there is a better sequence, otherwise NULL_RTX. */
+
+static rtx
+maybe_optimize_fetch_op (rtx target, rtx mem, rtx val, enum rtx_code code,
+ enum memmodel model, bool after)
+{
+ /* If the value is prefetched, or not used, it may be possible to replace
+ the sequence with a native exchange operation. */
+ if (!after || target == const0_rtx)
+ {
+ /* fetch_and (&x, 0, m) can be replaced with exchange (&x, 0, m). */
+ if (code == AND && val == const0_rtx)
+ {
+ if (target == const0_rtx)
+ target = gen_reg_rtx (GET_MODE (mem));
+ return maybe_emit_atomic_exchange (target, mem, val, model);
}
- break;
- default:
- gcc_unreachable ();
+ /* fetch_or (&x, -1, m) can be replaced with exchange (&x, -1, m). */
+ if (code == IOR && val == constm1_rtx)
+ {
+ if (target == const0_rtx)
+ target = gen_reg_rtx (GET_MODE (mem));
+ return maybe_emit_atomic_exchange (target, mem, val, model);
+ }
}
- /* If the target does supports the proper new/old operation, great. But
- if we only support the opposite old/new operation, check to see if we
- can compensate. In the case in which the old value is supported, then
- we can always perform the operation again with normal arithmetic. In
- the case in which the new value is supported, then we can only handle
- this in the case the operation is reversible. */
- compensate = false;
- if (after)
+ return NULL_RTX;
+}
+
+/* Try to emit an instruction for a specific operation varaition.
+ OPTAB contains the OP functions.
+ TARGET is an optional place to return the result. const0_rtx means unused.
+ MEM is the memory location to operate on.
+ VAL is the value to use in the operation.
+ USE_MEMMODEL is TRUE if the variation with a memory model should be tried.
+ MODEL is the memory model, if used.
+ AFTER is true if the returned result is the value after the operation. */
+
+static rtx
+maybe_emit_op (const struct atomic_op_functions *optab, rtx target, rtx mem,
+ rtx val, bool use_memmodel, enum memmodel model, bool after)
+{
+ enum machine_mode mode = GET_MODE (mem);
+ struct expand_operand ops[4];
+ enum insn_code icode;
+ int op_counter = 0;
+ int num_ops;
+
+ /* Check to see if there is a result returned. */
+ if (target == const0_rtx)
{
- icode = new_code;
- if (icode == CODE_FOR_nothing)
- {
- icode = old_code;
- if (icode != CODE_FOR_nothing)
- compensate = true;
+ if (use_memmodel)
+ {
+ icode = direct_optab_handler (optab->mem_no_result, mode);
+ create_integer_operand (&ops[2], model);
+ num_ops = 3;
+ }
+ else
+ {
+ icode = direct_optab_handler (optab->no_result, mode);
+ num_ops = 2;
}
}
+ /* Otherwise, we need to generate a result. */
else
{
- icode = old_code;
- if (icode == CODE_FOR_nothing
- && (code == PLUS || code == MINUS || code == XOR))
+ if (use_memmodel)
+ {
+ icode = direct_optab_handler (after ? optab->mem_fetch_after
+ : optab->mem_fetch_before, mode);
+ create_integer_operand (&ops[3], model);
+ num_ops = 4;
+ }
+ else
{
- icode = new_code;
- if (icode != CODE_FOR_nothing)
- compensate = true;
+ icode = optab_handler (after ? optab->fetch_after
+ : optab->fetch_before, mode);
+ num_ops = 3;
}
+ create_output_operand (&ops[op_counter++], target, mode);
}
+ if (icode == CODE_FOR_nothing)
+ return NULL_RTX;
- /* If we found something supported, great. */
- if (icode != CODE_FOR_nothing)
+ create_fixed_operand (&ops[op_counter++], mem);
+ /* VAL may have been promoted to a wider mode. Shrink it if so. */
+ create_convert_operand_to (&ops[op_counter++], val, mode, true);
+
+ if (maybe_expand_insn (icode, num_ops, ops))
+ return (target == const0_rtx ? const0_rtx : ops[0].value);
+
+ return NULL_RTX;
+}
+
+
+/* This function expands an atomic fetch_OP or OP_fetch operation:
+ TARGET is an option place to stick the return value. const0_rtx indicates
+ the result is unused.
+ atomically fetch MEM, perform the operation with VAL and return it to MEM.
+ CODE is the operation being performed (OP)
+ MEMMODEL is the memory model variant to use.
+ AFTER is true to return the result of the operation (OP_fetch).
+ AFTER is false to return the value before the operation (fetch_OP). */
+rtx
+expand_atomic_fetch_op (rtx target, rtx mem, rtx val, enum rtx_code code,
+ enum memmodel model, bool after)
+{
+ enum machine_mode mode = GET_MODE (mem);
+ struct atomic_op_functions optab;
+ rtx result;
+ bool unused_result = (target == const0_rtx);
+
+ get_atomic_op_for_code (&optab, code);
+
+ /* Check to see if there are any better instructions. */
+ result = maybe_optimize_fetch_op (target, mem, val, code, model, after);
+ if (result)
+ return result;
+
+ /* Check for the case where the result isn't used and try those patterns. */
+ if (unused_result)
{
- struct expand_operand ops[3];
+ /* Try the memory model variant first. */
+ result = maybe_emit_op (&optab, target, mem, val, true, model, true);
+ if (result)
+ return result;
- create_output_operand (&ops[0], target, mode);
- create_fixed_operand (&ops[1], mem);
- /* VAL may have been promoted to a wider mode. Shrink it if so. */
- create_convert_operand_to (&ops[2], val, mode, true);
- if (maybe_expand_insn (icode, 3, ops))
+ /* Next try the old style withuot a memory model. */
+ result = maybe_emit_op (&optab, target, mem, val, false, model, true);
+ if (result)
+ return result;
+
+ /* There is no no-result pattern, so try patterns with a result. */
+ target = NULL_RTX;
+ }
+
+ /* Try the __atomic version. */
+ result = maybe_emit_op (&optab, target, mem, val, true, model, after);
+ if (result)
+ return result;
+
+ /* Try the older __sync version. */
+ result = maybe_emit_op (&optab, target, mem, val, false, model, after);
+ if (result)
+ return result;
+
+ /* If the fetch value can be calculated from the other variation of fetch,
+ try that operation. */
+ if (after || unused_result || optab.reverse_code != UNKNOWN)
+ {
+ /* Try the __atomic version, then the older __sync version. */
+ result = maybe_emit_op (&optab, target, mem, val, true, model, !after);
+ if (!result)
+ result = maybe_emit_op (&optab, target, mem, val, false, model, !after);
+
+ if (result)
{
- target = ops[0].value;
- val = ops[2].value;
- /* If we need to compensate for using an operation with the
- wrong return value, do so now. */
- if (compensate)
+ /* If the result isn't used, no need to do compensation code. */
+ if (unused_result)
+ return result;
+
+ /* Issue compensation code. Fetch_after == fetch_before OP val.
+ Fetch_before == after REVERSE_OP val. */
+ if (!after)
+ code = optab.reverse_code;
+ if (code == NOT)
{
- if (!after)
- {
- if (code == PLUS)
- code = MINUS;
- else if (code == MINUS)
- code = PLUS;
- }
-
- if (code == NOT)
- {
- target = expand_simple_binop (mode, AND, target, val,
- NULL_RTX, true,
- OPTAB_LIB_WIDEN);
- target = expand_simple_unop (mode, code, target,
- NULL_RTX, true);
- }
- else
- target = expand_simple_binop (mode, code, target, val,
- NULL_RTX, true,
- OPTAB_LIB_WIDEN);
+ result = expand_simple_binop (mode, AND, result, val, NULL_RTX,
+ true, OPTAB_LIB_WIDEN);
+ result = expand_simple_unop (mode, NOT, result, target, true);
}
+ else
+ result = expand_simple_binop (mode, code, result, val, target,
+ true, OPTAB_LIB_WIDEN);
+ return result;
+ }
+ }
- return target;
+ /* Try the __sync libcalls only if we can't do compare-and-swap inline. */
+ if (!can_compare_and_swap_p (mode, false))
+ {
+ rtx libfunc;
+ bool fixup = false;
+
+ libfunc = optab_libfunc (after ? optab.fetch_after
+ : optab.fetch_before, mode);
+ if (libfunc == NULL
+ && (after || unused_result || optab.reverse_code != UNKNOWN))
+ {
+ fixup = true;
+ if (!after)
+ code = optab.reverse_code;
+ libfunc = optab_libfunc (after ? optab.fetch_before
+ : optab.fetch_after, mode);
+ }
+ if (libfunc != NULL)
+ {
+ rtx addr = convert_memory_address (ptr_mode, XEXP (mem, 0));
+ result = emit_library_call_value (libfunc, NULL, LCT_NORMAL, mode,
+ 2, addr, ptr_mode, val, mode);
+
+ if (!unused_result && fixup)
+ result = expand_simple_binop (mode, code, result, val, target,
+ true, OPTAB_LIB_WIDEN);
+ return result;
}
}
- /* Failing that, generate a compare-and-swap loop in which we perform the
- operation with normal arithmetic instructions. */
- if (direct_optab_handler (sync_compare_and_swap_optab, mode)
- != CODE_FOR_nothing)
+ /* If nothing else has succeeded, default to a compare and swap loop. */
+ if (can_compare_and_swap_p (mode, true))
{
+ rtx insn;
rtx t0 = gen_reg_rtx (mode), t1;
- if (!target || !register_operand (target, mode))
- target = gen_reg_rtx (mode);
-
start_sequence ();
- if (!after)
- emit_move_insn (target, t0);
+ /* If the result is used, get a register for it. */
+ if (!unused_result)
+ {
+ if (!target || !register_operand (target, mode))
+ target = gen_reg_rtx (mode);
+ /* If fetch_before, copy the value now. */
+ if (!after)
+ emit_move_insn (target, t0);
+ }
+ else
+ target = const0_rtx;
+
t1 = t0;
if (code == NOT)
- {
+ {
t1 = expand_simple_binop (mode, AND, t1, val, NULL_RTX,
true, OPTAB_LIB_WIDEN);
t1 = expand_simple_unop (mode, code, t1, NULL_RTX, true);
}
else
- t1 = expand_simple_binop (mode, code, t1, val, NULL_RTX,
- true, OPTAB_LIB_WIDEN);
- if (after)
- emit_move_insn (target, t1);
+ t1 = expand_simple_binop (mode, code, t1, val, NULL_RTX, true,
+ OPTAB_LIB_WIDEN);
+ /* For after, copy the value now. */
+ if (!unused_result && after)
+ emit_move_insn (target, t1);
insn = get_insns ();
end_sequence ();
if (t1 != NULL && expand_compare_and_swap_loop (mem, t0, t1, insn))
- return target;
- }
-
- return NULL_RTX;
-}
-
-/* This function expands a test-and-set operation. Ideally we atomically
- store VAL in MEM and return the previous value in MEM. Some targets
- may not support this operation and only support VAL with the constant 1;
- in this case while the return value will be 0/1, but the exact value
- stored in MEM is target defined. TARGET is an option place to stick
- the return value. */
-
-rtx
-expand_sync_lock_test_and_set (rtx mem, rtx val, rtx target)
-{
- enum machine_mode mode = GET_MODE (mem);
- enum insn_code icode;
-
- /* If the target supports the test-and-set directly, great. */
- icode = direct_optab_handler (sync_lock_test_and_set_optab, mode);
- if (icode != CODE_FOR_nothing)
- {
- struct expand_operand ops[3];
-
- create_output_operand (&ops[0], target, mode);
- create_fixed_operand (&ops[1], mem);
- /* VAL may have been promoted to a wider mode. Shrink it if so. */
- create_convert_operand_to (&ops[2], val, mode, true);
- if (maybe_expand_insn (icode, 3, ops))
- return ops[0].value;
- }
-
- /* Otherwise, use a compare-and-swap loop for the exchange. */
- if (direct_optab_handler (sync_compare_and_swap_optab, mode)
- != CODE_FOR_nothing)
- {
- if (!target || !register_operand (target, mode))
- target = gen_reg_rtx (mode);
- if (GET_MODE (val) != VOIDmode && GET_MODE (val) != mode)
- val = convert_modes (mode, GET_MODE (val), val, 1);
- if (expand_compare_and_swap_loop (mem, target, val, NULL_RTX))
- return target;
+ return target;
}
return NULL_RTX;
return true;
/* If the operand is a memory whose address has no side effects,
- try forcing the address into a register. The check for side
- effects is important because force_reg cannot handle things
- like auto-modified addresses. */
- if (insn_data[(int) icode].operand[opno].allows_mem
- && MEM_P (op->value)
- && !side_effects_p (XEXP (op->value, 0)))
- {
- rtx addr, mem, last;
-
- last = get_last_insn ();
- addr = force_reg (Pmode, XEXP (op->value, 0));
- mem = replace_equiv_address (op->value, addr);
- if (insn_operand_matches (icode, opno, mem))
+ try forcing the address into a non-virtual pseudo register.
+ The check for side effects is important because copy_to_mode_reg
+ cannot handle things like auto-modified addresses. */
+ if (insn_data[(int) icode].operand[opno].allows_mem && MEM_P (op->value))
+ {
+ rtx addr, mem;
+
+ mem = op->value;
+ addr = XEXP (mem, 0);
+ if (!(REG_P (addr) && REGNO (addr) > LAST_VIRTUAL_REGISTER)
+ && !side_effects_p (addr))
{
- op->value = mem;
- return true;
+ rtx last;
+ enum machine_mode mode;
+
+ last = get_last_insn ();
+ mode = targetm.addr_space.address_mode (MEM_ADDR_SPACE (mem));
+ mem = replace_equiv_address (mem, copy_to_mode_reg (mode, addr));
+ if (insn_operand_matches (icode, opno, mem))
+ {
+ op->value = mem;
+ return true;
+ }
+ delete_insns_since (last);
}
- delete_insns_since (last);
}
return false;
case 6:
return GEN_FCN (icode) (ops[0].value, ops[1].value, ops[2].value,
ops[3].value, ops[4].value, ops[5].value);
+ case 7:
+ return GEN_FCN (icode) (ops[0].value, ops[1].value, ops[2].value,
+ ops[3].value, ops[4].value, ops[5].value,
+ ops[6].value);
+ case 8:
+ return GEN_FCN (icode) (ops[0].value, ops[1].value, ops[2].value,
+ ops[3].value, ops[4].value, ops[5].value,
+ ops[6].value, ops[7].value);
}
gcc_unreachable ();
}