struct move_by_pieces *);
static bool block_move_libcall_safe_for_call_parm (void);
static bool emit_block_move_via_movmem (rtx, rtx, rtx, unsigned);
-static rtx emit_block_move_via_libcall (rtx, rtx, rtx);
+static rtx emit_block_move_via_libcall (rtx, rtx, rtx, bool);
static tree emit_block_move_libcall_fn (int);
static void emit_block_move_via_loop (rtx, rtx, rtx, unsigned);
static rtx clear_by_pieces_1 (void *, HOST_WIDE_INT, enum machine_mode);
static void store_by_pieces_2 (rtx (*) (rtx, ...), enum machine_mode,
struct store_by_pieces *);
static bool clear_storage_via_clrmem (rtx, rtx, unsigned);
-static rtx clear_storage_via_libcall (rtx, rtx);
+static rtx clear_storage_via_libcall (rtx, rtx, bool);
static tree clear_storage_libcall_fn (int);
static rtx compress_float_constant (rtx, rtx);
static rtx get_subtarget (rtx);
enum insn_code cmpstr_optab[NUM_MACHINE_MODES];
enum insn_code cmpmem_optab[NUM_MACHINE_MODES];
+/* Synchronization primitives. */
+enum insn_code sync_add_optab[NUM_MACHINE_MODES];
+enum insn_code sync_sub_optab[NUM_MACHINE_MODES];
+enum insn_code sync_ior_optab[NUM_MACHINE_MODES];
+enum insn_code sync_and_optab[NUM_MACHINE_MODES];
+enum insn_code sync_xor_optab[NUM_MACHINE_MODES];
+enum insn_code sync_nand_optab[NUM_MACHINE_MODES];
+enum insn_code sync_old_add_optab[NUM_MACHINE_MODES];
+enum insn_code sync_old_sub_optab[NUM_MACHINE_MODES];
+enum insn_code sync_old_ior_optab[NUM_MACHINE_MODES];
+enum insn_code sync_old_and_optab[NUM_MACHINE_MODES];
+enum insn_code sync_old_xor_optab[NUM_MACHINE_MODES];
+enum insn_code sync_old_nand_optab[NUM_MACHINE_MODES];
+enum insn_code sync_new_add_optab[NUM_MACHINE_MODES];
+enum insn_code sync_new_sub_optab[NUM_MACHINE_MODES];
+enum insn_code sync_new_ior_optab[NUM_MACHINE_MODES];
+enum insn_code sync_new_and_optab[NUM_MACHINE_MODES];
+enum insn_code sync_new_xor_optab[NUM_MACHINE_MODES];
+enum insn_code sync_new_nand_optab[NUM_MACHINE_MODES];
+enum insn_code sync_compare_and_swap[NUM_MACHINE_MODES];
+enum insn_code sync_compare_and_swap_cc[NUM_MACHINE_MODES];
+enum insn_code sync_lock_test_and_set[NUM_MACHINE_MODES];
+enum insn_code sync_lock_release[NUM_MACHINE_MODES];
+
/* SLOW_UNALIGNED_ACCESS is nonzero if unaligned accesses are very slow. */
#ifndef SLOW_UNALIGNED_ACCESS
switch (method)
{
case BLOCK_OP_NORMAL:
+ case BLOCK_OP_TAILCALL:
may_use_call = true;
break;
else if (emit_block_move_via_movmem (x, y, size, align))
;
else if (may_use_call)
- retval = emit_block_move_via_libcall (x, y, size);
+ retval = emit_block_move_via_libcall (x, y, size,
+ method == BLOCK_OP_TAILCALL);
else
emit_block_move_via_loop (x, y, size, align);
Return the return value from memcpy, 0 otherwise. */
static rtx
-emit_block_move_via_libcall (rtx dst, rtx src, rtx size)
+emit_block_move_via_libcall (rtx dst, rtx src, rtx size, bool tailcall)
{
rtx dst_addr, src_addr;
tree call_expr, arg_list, fn, src_tree, dst_tree, size_tree;
call_expr = build1 (ADDR_EXPR, build_pointer_type (TREE_TYPE (fn)), fn);
call_expr = build3 (CALL_EXPR, TREE_TYPE (TREE_TYPE (fn)),
call_expr, arg_list, NULL_TREE);
+ CALL_EXPR_TAILCALL (call_expr) = tailcall;
retval = expand_expr (call_expr, NULL_RTX, VOIDmode, 0);
its length in bytes. */
rtx
-clear_storage (rtx object, rtx size)
+clear_storage (rtx object, rtx size, enum block_op_methods method)
{
enum machine_mode mode = GET_MODE (object);
unsigned int align;
+ gcc_assert (method == BLOCK_OP_NORMAL || method == BLOCK_OP_TAILCALL);
+
/* If OBJECT is not BLKmode and SIZE is the same size as its mode,
just move a zero. Otherwise, do this a piece at a time. */
if (mode != BLKmode
else if (clear_storage_via_clrmem (object, size, align))
;
else
- return clear_storage_via_libcall (object, size);
+ return clear_storage_via_libcall (object, size,
+ method == BLOCK_OP_TAILCALL);
return NULL;
}
Return the return value of memset, 0 otherwise. */
static rtx
-clear_storage_via_libcall (rtx object, rtx size)
+clear_storage_via_libcall (rtx object, rtx size, bool tailcall)
{
tree call_expr, arg_list, fn, object_tree, size_tree;
enum machine_mode size_mode;
call_expr = build1 (ADDR_EXPR, build_pointer_type (TREE_TYPE (fn)), fn);
call_expr = build3 (CALL_EXPR, TREE_TYPE (TREE_TYPE (fn)),
call_expr, arg_list, NULL_TREE);
+ CALL_EXPR_TAILCALL (call_expr) = tailcall;
retval = expand_expr (call_expr, NULL_RTX, VOIDmode, 0);
the original object if it spans an even number of hard regs.
This special case is important for SCmode on 64-bit platforms
where the natural size of floating-point regs is 32-bit. */
- || (GET_CODE (cplx) == REG
+ || (REG_P (cplx)
&& REGNO (cplx) < FIRST_PSEUDO_REGISTER
&& hard_regno_nregs[REGNO (cplx)][cmode] % 2 == 0)
/* For MEMs we always try to make a "subreg", that is to adjust
the original object if it spans an even number of hard regs.
This special case is important for SCmode on 64-bit platforms
where the natural size of floating-point regs is 32-bit. */
- || (GET_CODE (cplx) == REG
+ || (REG_P (cplx)
&& REGNO (cplx) < FIRST_PSEUDO_REGISTER
&& hard_regno_nregs[REGNO (cplx)][cmode] % 2 == 0)
/* For MEMs we always try to make a "subreg", that is to adjust
}
else
{
- /* Note that we do want simplify_subreg's behaviour of validating
+ /* Note that we do want simplify_subreg's behavior of validating
that the new mode is ok for a hard register. If we were to use
simplify_gen_subreg, we would create the subreg, but would
probably run into the target not being able to implement it. */
if (push_operand (x, mode))
return emit_move_complex_push (mode, x, y);
- /* For memory to memory moves, optimial behaviour can be had with the
- existing block move logic. */
- if (MEM_P (x) && MEM_P (y))
- {
- emit_block_move (x, y, GEN_INT (GET_MODE_SIZE (mode)),
- BLOCK_OP_NO_LIBCALL);
- return get_last_insn ();
- }
-
/* See if we can coerce the target into moving both values at once. */
+ /* Move floating point as parts. */
+ if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
+ && mov_optab->handlers[GET_MODE_INNER (mode)].insn_code != CODE_FOR_nothing)
+ try_int = false;
/* Not possible if the values are inherently not adjacent. */
- if (GET_CODE (x) == CONCAT || GET_CODE (y) == CONCAT)
+ else if (GET_CODE (x) == CONCAT || GET_CODE (y) == CONCAT)
try_int = false;
/* Is possible if both are registers (or subregs of registers). */
else if (register_operand (x, mode) && register_operand (y, mode))
if (try_int)
{
- rtx ret = emit_move_via_integer (mode, x, y);
+ rtx ret;
+
+ /* For memory to memory moves, optimal behavior can be had with the
+ existing block move logic. */
+ if (MEM_P (x) && MEM_P (y))
+ {
+ emit_block_move (x, y, GEN_INT (GET_MODE_SIZE (mode)),
+ BLOCK_OP_NO_LIBCALL);
+ return get_last_insn ();
+ }
+
+ ret = emit_move_via_integer (mode, x, y);
if (ret)
return ret;
}
rtx ypart = operand_subword (y, i, 1, mode);
/* If we can't get a part of Y, put Y into memory if it is a
- constant. Otherwise, force it into a register. If we still
- can't get a part of Y, abort. */
+ constant. Otherwise, force it into a register. Then we must
+ be able to get a part of Y. */
if (ypart == 0 && CONSTANT_P (y))
{
y = force_const_mem (mode, y);
int not_stack;
/* # bytes of start of argument
that we must make space for but need not store. */
- int offset = partial % (PARM_BOUNDARY / BITS_PER_WORD);
+ int offset = partial % (PARM_BOUNDARY / BITS_PER_UNIT);
int args_offset = INTVAL (args_so_far);
int skip;
offset = 0;
/* Now NOT_STACK gets the number of words that we don't need to
- allocate on the stack. */
+ allocate on the stack. Convert OFFSET to words too. */
not_stack = (partial - offset) / UNITS_PER_WORD;
+ offset /= UNITS_PER_WORD;
/* If the partial register-part of the arg counts in its stack size,
skip the part of stack space corresponding to the registers.
emit_move_insn (str_rtx, result);
return true;
+ case BIT_IOR_EXPR:
+ case BIT_XOR_EXPR:
+ if (TREE_CODE (op1) != INTEGER_CST)
+ break;
+ value = expand_expr (op1, NULL_RTX, GET_MODE (str_rtx), 0);
+ value = convert_modes (GET_MODE (str_rtx),
+ TYPE_MODE (TREE_TYPE (op1)), value,
+ TYPE_UNSIGNED (TREE_TYPE (op1)));
+
+ /* We may be accessing data outside the field, which means
+ we can alias adjacent data. */
+ if (MEM_P (str_rtx))
+ {
+ str_rtx = shallow_copy_rtx (str_rtx);
+ set_mem_alias_set (str_rtx, 0);
+ set_mem_expr (str_rtx, 0);
+ }
+
+ binop = TREE_CODE (src) == BIT_IOR_EXPR ? ior_optab : xor_optab;
+ if (bitpos + bitsize != GET_MODE_BITSIZE (GET_MODE (str_rtx)))
+ {
+ rtx mask = GEN_INT (((unsigned HOST_WIDE_INT) 1 << bitsize)
+ - 1);
+ value = expand_and (GET_MODE (str_rtx), value, mask,
+ NULL_RTX);
+ }
+ value = expand_shift (LSHIFT_EXPR, GET_MODE (str_rtx), value,
+ build_int_cst (NULL_TREE, bitpos),
+ NULL_RTX, 1);
+ result = expand_binop (GET_MODE (str_rtx), binop, str_rtx,
+ value, str_rtx, 1, OPTAB_WIDEN);
+ if (result != str_rtx)
+ emit_move_insn (str_rtx, result);
+ return true;
+
default:
break;
}
{
enum machine_mode mode1;
HOST_WIDE_INT bitsize, bitpos;
- rtx orig_to_rtx;
tree offset;
int unsignedp;
int volatilep = 0;
/* If we are going to use store_bit_field and extract_bit_field,
make sure to_rtx will be safe for multiple use. */
- orig_to_rtx = to_rtx = expand_expr (tem, NULL_RTX, VOIDmode, 0);
+ to_rtx = expand_expr (tem, NULL_RTX, VOIDmode, 0);
if (offset != 0)
{
but TARGET is not valid memory reference, TEMP will differ
from TARGET although it is really the same location. */
&& !(alt_rtl && rtx_equal_p (alt_rtl, target))
- /* If there's nothing to copy, don't bother. Don't call expr_size
- unless necessary, because some front-ends (C++) expr_size-hook
- aborts on objects that are not supposed to be bit-copied or
- bit-initialized. */
+ /* If there's nothing to copy, don't bother. Don't call
+ expr_size unless necessary, because some front-ends (C++)
+ expr_size-hook must not be given objects that are not
+ supposed to be bit-copied or bit-initialized. */
&& expr_size (exp) != const0_rtx)
{
if (GET_MODE (temp) != GET_MODE (target)
}
if (size != const0_rtx)
- clear_storage (target, size);
+ clear_storage (target, size, BLOCK_OP_NORMAL);
if (label)
emit_label (label);
* how many scalar fields are set to non-constant values,
and place it in *P_NC_ELTS; and
* how many scalar fields in total are in CTOR,
- and place it in *P_ELT_COUNT. */
+ and place it in *P_ELT_COUNT.
+ * if a type is a union, and the initializer from the constructor
+ is not the largest element in the union, then set *p_must_clear. */
static void
categorize_ctor_elements_1 (tree ctor, HOST_WIDE_INT *p_nz_elts,
HOST_WIDE_INT *p_nc_elts,
- HOST_WIDE_INT *p_elt_count)
+ HOST_WIDE_INT *p_elt_count,
+ bool *p_must_clear)
{
HOST_WIDE_INT nz_elts, nc_elts, elt_count;
tree list;
{
case CONSTRUCTOR:
{
- HOST_WIDE_INT nz = 0, nc = 0, count = 0;
- categorize_ctor_elements_1 (value, &nz, &nc, &count);
+ HOST_WIDE_INT nz = 0, nc = 0, ic = 0;
+ categorize_ctor_elements_1 (value, &nz, &nc, &ic, p_must_clear);
nz_elts += mult * nz;
nc_elts += mult * nc;
- elt_count += mult * count;
+ elt_count += mult * ic;
}
break;
}
}
+ if (!*p_must_clear
+ && (TREE_CODE (TREE_TYPE (ctor)) == UNION_TYPE
+ || TREE_CODE (TREE_TYPE (ctor)) == QUAL_UNION_TYPE))
+ {
+ tree init_sub_type;
+ bool clear_this = true;
+
+ list = CONSTRUCTOR_ELTS (ctor);
+ if (list)
+ {
+ /* We don't expect more than one element of the union to be
+ initialized. Not sure what we should do otherwise... */
+ gcc_assert (TREE_CHAIN (list) == NULL);
+
+ init_sub_type = TREE_TYPE (TREE_VALUE (list));
+
+ /* ??? We could look at each element of the union, and find the
+ largest element. Which would avoid comparing the size of the
+ initialized element against any tail padding in the union.
+ Doesn't seem worth the effort... */
+ if (simple_cst_equal (TYPE_SIZE (TREE_TYPE (ctor)),
+ TYPE_SIZE (init_sub_type)) == 1)
+ {
+ /* And now we have to find out if the element itself is fully
+ constructed. E.g. for union { struct { int a, b; } s; } u
+ = { .s = { .a = 1 } }. */
+ if (elt_count == count_type_elements (init_sub_type))
+ clear_this = false;
+ }
+ }
+
+ *p_must_clear = clear_this;
+ }
+
*p_nz_elts += nz_elts;
*p_nc_elts += nc_elts;
*p_elt_count += elt_count;
void
categorize_ctor_elements (tree ctor, HOST_WIDE_INT *p_nz_elts,
HOST_WIDE_INT *p_nc_elts,
- HOST_WIDE_INT *p_elt_count)
+ HOST_WIDE_INT *p_elt_count,
+ bool *p_must_clear)
{
*p_nz_elts = 0;
*p_nc_elts = 0;
*p_elt_count = 0;
- categorize_ctor_elements_1 (ctor, p_nz_elts, p_nc_elts, p_elt_count);
+ *p_must_clear = false;
+ categorize_ctor_elements_1 (ctor, p_nz_elts, p_nc_elts, p_elt_count,
+ p_must_clear);
}
/* Count the number of scalars in TYPE. Return -1 on overflow or
case VOID_TYPE:
case METHOD_TYPE:
- case FILE_TYPE:
case FUNCTION_TYPE:
case LANG_TYPE:
default:
{
HOST_WIDE_INT nz_elts, nc_elts, count, elts;
+ bool must_clear;
+
+ categorize_ctor_elements (exp, &nz_elts, &nc_elts, &count, &must_clear);
+ if (must_clear)
+ return 1;
- categorize_ctor_elements (exp, &nz_elts, &nc_elts, &count);
elts = count_type_elements (TREE_TYPE (exp));
return nz_elts < elts / 4;
&& ! CONSTRUCTOR_ELTS (exp))
/* If the constructor is empty, clear the union. */
{
- clear_storage (target, expr_size (exp));
+ clear_storage (target, expr_size (exp), BLOCK_OP_NORMAL);
cleared = 1;
}
|| ((HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (target))
== size)))
{
- clear_storage (target, GEN_INT (size));
+ clear_storage (target, GEN_INT (size), BLOCK_OP_NORMAL);
cleared = 1;
}
if (BYTES_BIG_ENDIAN)
value
- = fold (build2 (LSHIFT_EXPR, type, value,
- build_int_cst (NULL_TREE,
- BITS_PER_WORD - bitsize)));
+ = fold_build2 (LSHIFT_EXPR, type, value,
+ build_int_cst (NULL_TREE,
+ BITS_PER_WORD - bitsize));
bitsize = BITS_PER_WORD;
mode = word_mode;
}
if (REG_P (target))
emit_move_insn (target, CONST0_RTX (GET_MODE (target)));
else
- clear_storage (target, GEN_INT (size));
+ clear_storage (target, GEN_INT (size), BLOCK_OP_NORMAL);
cleared = 1;
}
/* Assign value to element index. */
position
= convert (ssizetype,
- fold (build2 (MINUS_EXPR, TREE_TYPE (index),
- index, TYPE_MIN_VALUE (domain))));
+ fold_build2 (MINUS_EXPR, TREE_TYPE (index),
+ index, TYPE_MIN_VALUE (domain)));
position = size_binop (MULT_EXPR, position,
convert (ssizetype,
TYPE_SIZE_UNIT (elttype)));
if (minelt)
index = fold_convert (ssizetype,
- fold (build2 (MINUS_EXPR,
- TREE_TYPE (index),
- index,
- TYPE_MIN_VALUE (domain))));
+ fold_build2 (MINUS_EXPR,
+ TREE_TYPE (index),
+ index,
+ TYPE_MIN_VALUE (domain)));
position = size_binop (MULT_EXPR, index,
convert (ssizetype,
if (REG_P (target))
emit_move_insn (target, CONST0_RTX (GET_MODE (target)));
else
- clear_storage (target, GEN_INT (size));
+ clear_storage (target, GEN_INT (size), BLOCK_OP_NORMAL);
cleared = 1;
}
&& TREE_CODE (TYPE_SIZE (TREE_TYPE (exp))) == INTEGER_CST
&& compare_tree_int (TYPE_SIZE (TREE_TYPE (exp)), bitsize) != 0))
{
- rtx temp = expand_expr (exp, NULL_RTX, VOIDmode, 0);
+ rtx temp;
+
+ /* If EXP is a NOP_EXPR of precision less than its mode, then that
+ implies a mask operation. If the precision is the same size as
+ the field we're storing into, that mask is redundant. This is
+ particularly common with bit field assignments generated by the
+ C front end. */
+ if (TREE_CODE (exp) == NOP_EXPR)
+ {
+ tree type = TREE_TYPE (exp);
+ if (INTEGRAL_TYPE_P (type)
+ && TYPE_PRECISION (type) < GET_MODE_BITSIZE (TYPE_MODE (type))
+ && bitsize == TYPE_PRECISION (type))
+ {
+ type = TREE_TYPE (TREE_OPERAND (exp, 0));
+ if (INTEGRAL_TYPE_P (type) && TYPE_PRECISION (type) >= bitsize)
+ exp = TREE_OPERAND (exp, 0);
+ }
+ }
+
+ temp = expand_expr (exp, NULL_RTX, VOIDmode, 0);
/* If BITSIZE is narrower than the size of the type of EXP
we will be narrowing TEMP. Normally, what's wanted are the
index, then convert to sizetype and multiply by the size of
the array element. */
if (! integer_zerop (low_bound))
- index = fold (build2 (MINUS_EXPR, TREE_TYPE (index),
- index, low_bound));
+ index = fold_build2 (MINUS_EXPR, TREE_TYPE (index),
+ index, low_bound);
offset = size_binop (PLUS_EXPR, offset,
size_binop (MULT_EXPR,
/* If the DECL isn't in memory, then the DECL wasn't properly
marked TREE_ADDRESSABLE, which will be either a front-end
or a tree optimizer bug. */
- gcc_assert (GET_CODE (result) == MEM);
+ gcc_assert (MEM_P (result));
result = XEXP (result, 0);
/* ??? Is this needed anymore? */
/* If op1 was placed in target, swap op0 and op1. */
if (target != op0 && target == op1)
{
- rtx tem = op0;
+ temp = op0;
op0 = op1;
- op1 = tem;
+ op1 = temp;
}
/* We generate better code and avoid problems with op1 mentioning
if (! CONSTANT_P (op1))
op1 = force_reg (mode, op1);
+#ifdef HAVE_conditional_move
+ /* Use a conditional move if possible. */
+ if (can_conditionally_move_p (mode))
+ {
+ enum rtx_code comparison_code;
+ rtx insn;
+
+ if (code == MAX_EXPR)
+ comparison_code = unsignedp ? GEU : GE;
+ else
+ comparison_code = unsignedp ? LEU : LE;
+
+ /* ??? Same problem as in expmed.c: emit_conditional_move
+ forces a stack adjustment via compare_from_rtx, and we
+ lose the stack adjustment if the sequence we are about
+ to create is discarded. */
+ do_pending_stack_adjust ();
+
+ start_sequence ();
+
+ /* Try to emit the conditional move. */
+ insn = emit_conditional_move (target, comparison_code,
+ op0, op1, mode,
+ op0, op1, mode,
+ unsignedp);
+
+ /* If we could do the conditional move, emit the sequence,
+ and return. */
+ if (insn)
+ {
+ rtx seq = get_insns ();
+ end_sequence ();
+ emit_insn (seq);
+ return target;
+ }
+
+ /* Otherwise discard the sequence and fall back to code with
+ branches. */
+ end_sequence ();
+ }
+#endif
if (target != op0)
emit_move_insn (target, op0);
- op0 = gen_label_rtx ();
+ temp = gen_label_rtx ();
/* If this mode is an integer too wide to compare properly,
compare word by word. Rely on cse to optimize constant cases. */
{
if (code == MAX_EXPR)
do_jump_by_parts_greater_rtx (mode, unsignedp, target, op1,
- NULL_RTX, op0);
+ NULL_RTX, temp);
else
do_jump_by_parts_greater_rtx (mode, unsignedp, op1, target,
- NULL_RTX, op0);
+ NULL_RTX, temp);
}
else
{
do_compare_rtx_and_jump (target, op1, code == MAX_EXPR ? GE : LE,
- unsignedp, mode, NULL_RTX, NULL_RTX, op0);
+ unsignedp, mode, NULL_RTX, NULL_RTX, temp);
}
emit_move_insn (target, op1);
- emit_label (op0);
+ emit_label (temp);
return target;
case BIT_NOT_EXPR:
op2 = expand_expr (oprnd2, NULL_RTX, VOIDmode, 0);
temp = expand_ternary_op (mode, this_optab, op0, op1, op2,
target, unsignedp);
- if (temp == 0)
- abort ();
+ gcc_assert (temp);
return temp;
}
if (! HAVE_tablejump)
return 0;
- index_expr = fold (build2 (MINUS_EXPR, index_type,
- convert (index_type, index_expr),
- convert (index_type, minval)));
+ index_expr = fold_build2 (MINUS_EXPR, index_type,
+ convert (index_type, index_expr),
+ convert (index_type, minval));
index = expand_expr (index_expr, NULL_RTX, VOIDmode, 0);
do_pending_stack_adjust ();