/* Subroutines used for code generation on IBM S/390 and zSeries
Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
- 2007 Free Software Foundation, Inc.
+ 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
Contributed by Hartmut Penner (hpenner@de.ibm.com) and
- Ulrich Weigand (uweigand@de.ibm.com).
+ Ulrich Weigand (uweigand@de.ibm.com) and
+ Andreas Krebbel (Andreas.Krebbel@de.ibm.com).
This file is part of GCC.
#include "debug.h"
#include "langhooks.h"
#include "optabs.h"
-#include "tree-gimple.h"
+#include "gimple.h"
#include "df.h"
+#include "params.h"
/* Define the specific costs for a given cpu. */
-struct processor_costs
+struct processor_costs
{
/* multiplication */
const int m; /* cost of an M instruction. */
const struct processor_costs *s390_cost;
static const
-struct processor_costs z900_cost =
+struct processor_costs z900_cost =
{
COSTS_N_INSNS (5), /* M */
COSTS_N_INSNS (10), /* MGHI */
};
static const
-struct processor_costs z990_cost =
+struct processor_costs z990_cost =
{
COSTS_N_INSNS (4), /* M */
COSTS_N_INSNS (2), /* MGHI */
};
static const
-struct processor_costs z9_109_cost =
+struct processor_costs z9_109_cost =
{
COSTS_N_INSNS (4), /* M */
COSTS_N_INSNS (2), /* MGHI */
COSTS_N_INSNS (24), /* DSGR */
};
-extern int reload_completed;
+static const
+struct processor_costs z10_cost =
+{
+ COSTS_N_INSNS (10), /* M */
+ COSTS_N_INSNS (10), /* MGHI */
+ COSTS_N_INSNS (10), /* MH */
+ COSTS_N_INSNS (10), /* MHI */
+ COSTS_N_INSNS (10), /* ML */
+ COSTS_N_INSNS (10), /* MR */
+ COSTS_N_INSNS (10), /* MS */
+ COSTS_N_INSNS (10), /* MSG */
+ COSTS_N_INSNS (10), /* MSGF */
+ COSTS_N_INSNS (10), /* MSGFR */
+ COSTS_N_INSNS (10), /* MSGR */
+ COSTS_N_INSNS (10), /* MSR */
+ COSTS_N_INSNS (1) , /* multiplication in DFmode */
+ COSTS_N_INSNS (50), /* MXBR */
+ COSTS_N_INSNS (120), /* SQXBR */
+ COSTS_N_INSNS (52), /* SQDBR */
+ COSTS_N_INSNS (38), /* SQEBR */
+ COSTS_N_INSNS (1), /* MADBR */
+ COSTS_N_INSNS (1), /* MAEBR */
+ COSTS_N_INSNS (111), /* DXBR */
+ COSTS_N_INSNS (39), /* DDBR */
+ COSTS_N_INSNS (32), /* DEBR */
+ COSTS_N_INSNS (160), /* DLGR */
+ COSTS_N_INSNS (71), /* DLR */
+ COSTS_N_INSNS (71), /* DR */
+ COSTS_N_INSNS (71), /* DSGFR */
+ COSTS_N_INSNS (71), /* DSGR */
+};
-/* Save information from a "cmpxx" operation until the branch or scc is
- emitted. */
-rtx s390_compare_op0, s390_compare_op1;
+extern int reload_completed;
-/* Save the result of a compare_and_swap until the branch or scc is
- emitted. */
-rtx s390_compare_emitted = NULL_RTX;
+/* Kept up to date using the SCHED_VARIABLE_ISSUE hook. */
+static rtx last_scheduled_insn;
/* Structure used to hold the components of a S/390 memory
address. A legitimate address on S/390 is of the general
/* Which cpu are we tuning for. */
enum processor_type s390_tune = PROCESSOR_max;
-enum processor_flags s390_tune_flags;
+int s390_tune_flags;
/* Which instruction set architecture to use. */
enum processor_type s390_arch;
-enum processor_flags s390_arch_flags;
+int s390_arch_flags;
HOST_WIDE_INT s390_warn_framesize = 0;
HOST_WIDE_INT s390_stack_size = 0;
HOST_WIDE_INT s390_stack_guard = 0;
-/* The following structure is embedded in the machine
+/* The following structure is embedded in the machine
specific part of struct function. */
-struct s390_frame_layout GTY (())
+struct GTY (()) s390_frame_layout
{
/* Offset within stack frame. */
HOST_WIDE_INT gprs_offset;
int last_save_gpr;
int last_restore_gpr;
- /* Bits standing for floating point registers. Set, if the
- respective register has to be saved. Starting with reg 16 (f0)
+ /* Bits standing for floating point registers. Set, if the
+ respective register has to be saved. Starting with reg 16 (f0)
at the rightmost bit.
Bit 15 - 8 7 6 5 4 3 2 1 0
fpr 15 - 8 7 5 3 1 6 4 2 0
/* Define the structure for the machine field in struct function. */
-struct machine_function GTY(())
+struct GTY(()) machine_function
{
struct s390_frame_layout frame_layout;
/* True if we may need to perform branch splitting. */
bool split_branches_pending_p;
- /* True during final stage of literal pool processing. */
- bool decomposed_literal_pool_addresses_ok_p;
-
/* Some local-dynamic TLS symbol name. */
const char *some_ld_name;
#define REGNO_PAIR_OK(REGNO, MODE) \
(HARD_REGNO_NREGS ((REGNO), (MODE)) == 1 || !((REGNO) & 1))
+/* That's the read ahead of the dynamic branch prediction unit in
+ bytes on a z10 CPU. */
+#define Z10_PREDICT_DISTANCE 384
+
static enum machine_mode
s390_libgcc_cmp_return_mode (void)
{
s390_scalar_mode_supported_p (enum machine_mode mode)
{
if (DECIMAL_FLOAT_MODE_P (mode))
- return true;
+ return default_decimal_float_supported_p ();
else
return default_scalar_mode_supported_p (mode);
}
case CCZ1mode:
if (m2 == CCZmode)
return m1;
-
+
return VOIDmode;
default:
if (INTVAL (op2) == 0)
return CCTmode;
- /* Selected bits all one: CC3.
+ /* Selected bits all one: CC3.
e.g.: int a; if ((a & (16 + 128)) == 16 + 128) */
if (INTVAL (op2) == INTVAL (op1))
return CCT3mode;
case GT:
/* The only overflow condition of NEG and ABS happens when
-INT_MAX is used as parameter, which stays negative. So
- we have an overflow from a positive value to a negative.
+ we have an overflow from a positive value to a negative.
Using CCAP mode the resulting cc can be used for comparisons. */
if ((GET_CODE (op0) == NEG || GET_CODE (op0) == ABS)
&& GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
/* If constants are involved in an add instruction it is possible to use
the resulting cc for comparisons with zero. Knowing the sign of the
constant the overflow behavior gets predictable. e.g.:
- int a, b; if ((b = a + c) > 0)
+ int a, b; if ((b = a + c) > 0)
with c as a constant value: c < 0 -> CCAN and c >= 0 -> CCAP */
if (GET_CODE (op0) == PLUS && GET_CODE (XEXP (op0, 1)) == CONST_INT
&& CONST_OK_FOR_K (INTVAL (XEXP (op0, 1))))
&& GET_CODE (*op1) == CONST_INT
&& INTVAL (*op1) == 0xffff
&& SCALAR_INT_MODE_P (GET_MODE (*op0))
- && (nonzero_bits (*op0, GET_MODE (*op0))
+ && (nonzero_bits (*op0, GET_MODE (*op0))
& ~(unsigned HOST_WIDE_INT) 0xffff) == 0)
{
*op0 = gen_lowpart (HImode, *op0);
s390_emit_compare (enum rtx_code code, rtx op0, rtx op1)
{
enum machine_mode mode = s390_select_ccmode (code, op0, op1);
- rtx ret = NULL_RTX;
+ rtx cc;
/* Do not output a redundant compare instruction if a compare_and_swap
pattern already computed the result and the machine modes are compatible. */
- if (s390_compare_emitted
- && (s390_cc_modes_compatible (GET_MODE (s390_compare_emitted), mode)
- == GET_MODE (s390_compare_emitted)))
- ret = gen_rtx_fmt_ee (code, VOIDmode, s390_compare_emitted, const0_rtx);
+ if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
+ {
+ gcc_assert (s390_cc_modes_compatible (GET_MODE (op0), mode)
+ == GET_MODE (op0));
+ cc = op0;
+ }
else
{
- rtx cc = gen_rtx_REG (mode, CC_REGNUM);
-
+ cc = gen_rtx_REG (mode, CC_REGNUM);
emit_insn (gen_rtx_SET (VOIDmode, cc, gen_rtx_COMPARE (mode, op0, op1)));
- ret = gen_rtx_fmt_ee (code, VOIDmode, cc, const0_rtx);
}
- s390_compare_emitted = NULL_RTX;
- return ret;
+
+ return gen_rtx_fmt_ee (code, VOIDmode, cc, const0_rtx);
}
-/* Emit a SImode compare and swap instruction setting MEM to NEW if OLD
+/* Emit a SImode compare and swap instruction setting MEM to NEW_RTX if OLD
matches CMP.
Return the correct condition RTL to be placed in the IF_THEN_ELSE of the
conditional branch testing the result. */
static rtx
-s390_emit_compare_and_swap (enum rtx_code code, rtx old, rtx mem, rtx cmp, rtx new)
+s390_emit_compare_and_swap (enum rtx_code code, rtx old, rtx mem, rtx cmp, rtx new_rtx)
{
- rtx ret;
-
- emit_insn (gen_sync_compare_and_swap_ccsi (old, mem, cmp, new));
- ret = gen_rtx_fmt_ee (code, VOIDmode, s390_compare_emitted, const0_rtx);
-
- s390_compare_emitted = NULL_RTX;
-
- return ret;
+ emit_insn (gen_sync_compare_and_swapsi (old, mem, cmp, new_rtx));
+ return s390_emit_compare (code, gen_rtx_REG (CCZ1mode, CC_REGNUM), const0_rtx);
}
/* Emit a jump instruction to TARGET. If COND is NULL_RTX, emit an
}
}
+
+/* Return branch condition mask to implement a compare and branch
+ specified by CODE. Return -1 for invalid comparisons. */
+
+int
+s390_compare_and_branch_condition_mask (rtx code)
+{
+ const int CC0 = 1 << 3;
+ const int CC1 = 1 << 2;
+ const int CC2 = 1 << 1;
+
+ switch (GET_CODE (code))
+ {
+ case EQ:
+ return CC0;
+ case NE:
+ return CC1 | CC2;
+ case LT:
+ case LTU:
+ return CC1;
+ case GT:
+ case GTU:
+ return CC2;
+ case LE:
+ case LEU:
+ return CC0 | CC1;
+ case GE:
+ case GEU:
+ return CC0 | CC2;
+ default:
+ gcc_unreachable ();
+ }
+ return -1;
+}
+
/* If INV is false, return assembler mnemonic string to implement
a branch specified by CODE. If INV is true, return mnemonic
for the corresponding inverted branch. */
static const char *
s390_branch_condition_mnemonic (rtx code, int inv)
{
+ int mask;
+
static const char *const mnemonic[16] =
{
NULL, "o", "h", "nle",
"le", "nh", "no", NULL
};
- int mask = s390_branch_condition_mask (code);
+ if (GET_CODE (XEXP (code, 0)) == REG
+ && REGNO (XEXP (code, 0)) == CC_REGNUM
+ && XEXP (code, 1) == const0_rtx)
+ mask = s390_branch_condition_mask (code);
+ else
+ mask = s390_compare_and_branch_condition_mask (code);
+
gcc_assert (mask >= 0);
if (inv)
return part == -1 ? -1 : n_parts - 1 - part;
}
+/* Return true if IN contains a contiguous bitfield in the lower SIZE
+ bits and no other bits are set in IN. POS and LENGTH can be used
+ to obtain the start position and the length of the bitfield.
+
+ POS gives the position of the first bit of the bitfield counting
+ from the lowest order bit starting with zero. In order to use this
+ value for S/390 instructions this has to be converted to "bits big
+ endian" style. */
+
+bool
+s390_contiguous_bitmask_p (unsigned HOST_WIDE_INT in, int size,
+ int *pos, int *length)
+{
+ int tmp_pos = 0;
+ int tmp_length = 0;
+ int i;
+ unsigned HOST_WIDE_INT mask = 1ULL;
+ bool contiguous = false;
+
+ for (i = 0; i < size; mask <<= 1, i++)
+ {
+ if (contiguous)
+ {
+ if (mask & in)
+ tmp_length++;
+ else
+ break;
+ }
+ else
+ {
+ if (mask & in)
+ {
+ contiguous = true;
+ tmp_length++;
+ }
+ else
+ tmp_pos++;
+ }
+ }
+
+ if (!tmp_length)
+ return false;
+
+ /* Calculate a mask for all bits beyond the contiguous bits. */
+ mask = (-1LL & ~(((1ULL << (tmp_length + tmp_pos - 1)) << 1) - 1));
+
+ if (mask & in)
+ return false;
+
+ if (tmp_length + tmp_pos - 1 > size)
+ return false;
+
+ if (length)
+ *length = tmp_length;
+
+ if (pos)
+ *pos = tmp_pos;
+
+ return true;
+}
+
/* Check whether we can (and want to) split a double-word
move in mode MODE from SRC to DST into two single-word
moves, moving the subword FIRST_SUBWORD first. */
/* This overlapping check is used by peepholes merging memory block operations.
Overlapping operations would otherwise be recognized by the S/390 hardware
- and would fall back to a slower implementation. Allowing overlapping
+ and would fall back to a slower implementation. Allowing overlapping
operations would lead to slow code but not to wrong code. Therefore we are
- somewhat optimistic if we cannot prove that the memory blocks are
+ somewhat optimistic if we cannot prove that the memory blocks are
overlapping.
That's why we return false here although this may accept operations on
overlapping memory areas. */
static struct machine_function *
s390_init_machine_status (void)
{
- return ggc_alloc_cleared (sizeof (struct machine_function));
+ return GGC_CNEW (struct machine_function);
}
/* Change optimizations to be performed, depending on the
static bool
s390_handle_arch_option (const char *arg,
enum processor_type *type,
- enum processor_flags *flags)
+ int *flags)
{
static struct pta
{
const char *const name; /* processor name or nickname. */
const enum processor_type processor;
- const enum processor_flags flags;
+ const int flags; /* From enum processor_flags. */
}
const processor_alias_table[] =
{
| PF_LONG_DISPLACEMENT | PF_EXTIMM},
{"z9-ec", PROCESSOR_2094_Z9_109, PF_IEEE_FLOAT | PF_ZARCH
| PF_LONG_DISPLACEMENT | PF_EXTIMM | PF_DFP },
+ {"z10", PROCESSOR_2097_Z10, PF_IEEE_FLOAT | PF_ZARCH
+ | PF_LONG_DISPLACEMENT | PF_EXTIMM | PF_DFP | PF_Z10},
};
size_t i;
}
/* Set processor cost function. */
- if (s390_tune == PROCESSOR_2094_Z9_109)
- s390_cost = &z9_109_cost;
- else if (s390_tune == PROCESSOR_2084_Z990)
- s390_cost = &z990_cost;
- else
- s390_cost = &z900_cost;
-
+ switch (s390_tune)
+ {
+ case PROCESSOR_2084_Z990:
+ s390_cost = &z990_cost;
+ break;
+ case PROCESSOR_2094_Z9_109:
+ s390_cost = &z9_109_cost;
+ break;
+ case PROCESSOR_2097_Z10:
+ s390_cost = &z10_cost;
+ break;
+ default:
+ s390_cost = &z900_cost;
+ }
+
if (TARGET_BACKCHAIN && TARGET_PACKED_STACK && TARGET_HARD_FLOAT)
error ("-mbackchain -mpacked-stack -mhard-float are not supported "
"in combination");
error ("stack size must not be greater than 64k");
}
else if (s390_stack_guard)
- error ("-mstack-guard implies use of -mstack-size");
+ error ("-mstack-guard implies use of -mstack-size");
#ifdef TARGET_DEFAULT_LONG_DOUBLE_128
if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
target_flags |= MASK_LONG_DOUBLE_128;
#endif
+
+ if (s390_tune == PROCESSOR_2097_Z10)
+ {
+ if (!PARAM_SET_P (PARAM_MAX_UNROLLED_INSNS))
+ set_param_value ("max-unrolled-insns", 100);
+ if (!PARAM_SET_P (PARAM_MAX_UNROLL_TIMES))
+ set_param_value ("max-unroll-times", 32);
+ if (!PARAM_SET_P (PARAM_MAX_COMPLETELY_PEELED_INSNS))
+ set_param_value ("max-completely-peeled-insns", 800);
+ if (!PARAM_SET_P (PARAM_MAX_COMPLETELY_PEEL_TIMES))
+ set_param_value ("max-completely-peel-times", 64);
+ }
+
+ set_param_value ("max-pending-list-length", 256);
}
/* Map for smallest class containing reg regno. */
if (!disp)
return true;
+ /* Without the long displacement facility we don't need to
+ distingiush between long and short displacement. */
+ if (!TARGET_LONG_DISPLACEMENT)
+ return true;
+
/* Integer displacement in range. */
if (GET_CODE (disp) == CONST_INT)
return INTVAL (disp) >= 0 && INTVAL (disp) < 4096;
{
case UNSPEC_LTREF:
if (!disp)
- disp = gen_rtx_UNSPEC (Pmode,
+ disp = gen_rtx_UNSPEC (Pmode,
gen_rtvec (1, XVECEXP (base, 0, 0)),
UNSPEC_LTREL_OFFSET);
else
return false;
}
- if (!REG_P (base)
- || (GET_MODE (base) != SImode
+ if (!REG_P (base)
+ || (GET_MODE (base) != SImode
&& GET_MODE (base) != Pmode))
return false;
{
case UNSPEC_LTREF:
if (!disp)
- disp = gen_rtx_UNSPEC (Pmode,
+ disp = gen_rtx_UNSPEC (Pmode,
gen_rtvec (1, XVECEXP (indx, 0, 0)),
UNSPEC_LTREL_OFFSET);
else
return false;
}
- if (!REG_P (indx)
+ if (!REG_P (indx)
|| (GET_MODE (indx) != SImode
&& GET_MODE (indx) != Pmode))
return false;
/* Validate displacement. */
if (!disp)
{
- /* If virtual registers are involved, the displacement will change later
- anyway as the virtual registers get eliminated. This could make a
- valid displacement invalid, but it is more likely to make an invalid
- displacement valid, because we sometimes access the register save area
+ /* If virtual registers are involved, the displacement will change later
+ anyway as the virtual registers get eliminated. This could make a
+ valid displacement invalid, but it is more likely to make an invalid
+ displacement valid, because we sometimes access the register save area
via negative offsets to one of those registers.
Thus we don't check the displacement for validity here. If after
elimination the displacement turns out to be invalid after all,
this is fixed up by reload in any case. */
- if (base != arg_pointer_rtx
- && indx != arg_pointer_rtx
- && base != return_address_pointer_rtx
+ if (base != arg_pointer_rtx
+ && indx != arg_pointer_rtx
+ && base != return_address_pointer_rtx
&& indx != return_address_pointer_rtx
- && base != frame_pointer_rtx
+ && base != frame_pointer_rtx
&& indx != frame_pointer_rtx
- && base != virtual_stack_vars_rtx
+ && base != virtual_stack_vars_rtx
&& indx != virtual_stack_vars_rtx)
if (!DISP_IN_RANGE (offset))
return false;
;
}
- /* Accept chunkified literal pool symbol references. */
- else if (cfun && cfun->machine
- && cfun->machine->decomposed_literal_pool_addresses_ok_p
- && GET_CODE (disp) == MINUS
- && GET_CODE (XEXP (disp, 0)) == LABEL_REF
- && GET_CODE (XEXP (disp, 1)) == LABEL_REF)
- {
- ;
- }
+ /* Accept pool label offsets. */
+ else if (GET_CODE (disp) == UNSPEC
+ && XINT (disp, 1) == UNSPEC_POOL_OFFSET)
+ ;
/* Accept literal pool references. */
else if (GET_CODE (disp) == UNSPEC
}
-/* Evaluates constraint strings described by the regular expression
- ([A|B](Q|R|S|T))|U|W and returns 1 if OP is a valid operand for the
- constraint given in STR, or 0 else. */
+/* Return true if ADDR is of kind symbol_ref or symbol_ref + const_int
+ and return these parts in SYMREF and ADDEND. You can pass NULL in
+ SYMREF and/or ADDEND if you are not interested in these values. */
-int
-s390_mem_constraint (const char *str, rtx op)
+static bool
+s390_symref_operand_p (rtx addr, rtx *symref, HOST_WIDE_INT *addend)
{
- struct s390_address addr;
- char c = str[0];
+ HOST_WIDE_INT tmpaddend = 0;
+
+ if (GET_CODE (addr) == CONST)
+ addr = XEXP (addr, 0);
- /* Check for offsettable variants of memory constraints. */
- if (c == 'A')
+ if (GET_CODE (addr) == PLUS)
{
- /* Only accept non-volatile MEMs. */
- if (!MEM_P (op) || MEM_VOLATILE_P (op))
- return 0;
+ if (GET_CODE (XEXP (addr, 0)) == SYMBOL_REF
+ && CONST_INT_P (XEXP (addr, 1)))
+ {
+ tmpaddend = INTVAL (XEXP (addr, 1));
+ addr = XEXP (addr, 0);
+ }
+ else
+ return false;
+ }
+ else
+ if (GET_CODE (addr) != SYMBOL_REF)
+ return false;
- if ((reload_completed || reload_in_progress)
- ? !offsettable_memref_p (op) : !offsettable_nonstrict_memref_p (op))
- return 0;
+ if (symref)
+ *symref = addr;
+ if (addend)
+ *addend = tmpaddend;
- c = str[1];
- }
+ return true;
+}
+
+
+/* Return true if the address in OP is valid for constraint letter C
+ if wrapped in a MEM rtx. Set LIT_POOL_OK to true if it literal
+ pool MEMs should be accepted. Only the Q, R, S, T constraint
+ letters are allowed for C. */
+
+static int
+s390_check_qrst_address (char c, rtx op, bool lit_pool_ok)
+{
+ struct s390_address addr;
+ bool decomposed = false;
- /* Check for non-literal-pool variants of memory constraints. */
- else if (c == 'B')
+ /* This check makes sure that no symbolic address (except literal
+ pool references) are accepted by the R or T constraints. */
+ if (s390_symref_operand_p (op, NULL, NULL))
{
- if (GET_CODE (op) != MEM)
+ if (!lit_pool_ok)
return 0;
- if (!s390_decompose_address (XEXP (op, 0), &addr))
+ if (!s390_decompose_address (op, &addr))
return 0;
- if (addr.literal_pool)
+ if (!addr.literal_pool)
return 0;
-
- c = str[1];
+ decomposed = true;
}
switch (c)
{
- case 'Q':
- if (GET_CODE (op) != MEM)
- return 0;
- if (!s390_decompose_address (XEXP (op, 0), &addr))
+ case 'Q': /* no index short displacement */
+ if (!decomposed && !s390_decompose_address (op, &addr))
return 0;
if (addr.indx)
return 0;
-
- if (TARGET_LONG_DISPLACEMENT)
- {
- if (!s390_short_displacement (addr.disp))
- return 0;
- }
- break;
-
- case 'R':
- if (GET_CODE (op) != MEM)
+ if (!s390_short_displacement (addr.disp))
return 0;
+ break;
+ case 'R': /* with index short displacement */
if (TARGET_LONG_DISPLACEMENT)
{
- if (!s390_decompose_address (XEXP (op, 0), &addr))
+ if (!decomposed && !s390_decompose_address (op, &addr))
return 0;
if (!s390_short_displacement (addr.disp))
return 0;
}
+ /* Any invalid address here will be fixed up by reload,
+ so accept it for the most generic constraint. */
break;
- case 'S':
+ case 'S': /* no index long displacement */
if (!TARGET_LONG_DISPLACEMENT)
return 0;
- if (GET_CODE (op) != MEM)
- return 0;
- if (!s390_decompose_address (XEXP (op, 0), &addr))
+ if (!decomposed && !s390_decompose_address (op, &addr))
return 0;
if (addr.indx)
return 0;
return 0;
break;
- case 'T':
+ case 'T': /* with index long displacement */
if (!TARGET_LONG_DISPLACEMENT)
return 0;
- if (GET_CODE (op) != MEM)
- return 0;
/* Any invalid address here will be fixed up by reload,
so accept it for the most generic constraint. */
- if (s390_decompose_address (XEXP (op, 0), &addr)
+ if ((decomposed || s390_decompose_address (op, &addr))
&& s390_short_displacement (addr.disp))
return 0;
break;
+ default:
+ return 0;
+ }
+ return 1;
+}
- case 'U':
- if (TARGET_LONG_DISPLACEMENT)
- {
- if (!s390_decompose_address (op, &addr))
- return 0;
- if (!s390_short_displacement (addr.disp))
- return 0;
- }
- break;
- case 'W':
- if (!TARGET_LONG_DISPLACEMENT)
+/* Evaluates constraint strings described by the regular expression
+ ([A|B|Z](Q|R|S|T))|U|W|Y and returns 1 if OP is a valid operand for
+ the constraint given in STR, or 0 else. */
+
+int
+s390_mem_constraint (const char *str, rtx op)
+{
+ char c = str[0];
+
+ switch (c)
+ {
+ case 'A':
+ /* Check for offsettable variants of memory constraints. */
+ if (!MEM_P (op) || MEM_VOLATILE_P (op))
return 0;
- /* Any invalid address here will be fixed up by reload,
- so accept it for the most generic constraint. */
- if (s390_decompose_address (op, &addr)
- && s390_short_displacement (addr.disp))
+ if ((reload_completed || reload_in_progress)
+ ? !offsettable_memref_p (op) : !offsettable_nonstrict_memref_p (op))
return 0;
- break;
-
+ return s390_check_qrst_address (str[1], XEXP (op, 0), true);
+ case 'B':
+ /* Check for non-literal-pool variants of memory constraints. */
+ if (!MEM_P (op))
+ return 0;
+ return s390_check_qrst_address (str[1], XEXP (op, 0), false);
+ case 'Q':
+ case 'R':
+ case 'S':
+ case 'T':
+ if (GET_CODE (op) != MEM)
+ return 0;
+ return s390_check_qrst_address (c, XEXP (op, 0), true);
+ case 'U':
+ return (s390_check_qrst_address ('Q', op, true)
+ || s390_check_qrst_address ('R', op, true));
+ case 'W':
+ return (s390_check_qrst_address ('S', op, true)
+ || s390_check_qrst_address ('T', op, true));
case 'Y':
/* Simply check for the basic form of a shift count. Reload will
take care of making sure we have a proper base register. */
if (!s390_decompose_shift_count (op, NULL, NULL))
return 0;
break;
-
+ case 'Z':
+ return s390_check_qrst_address (str[1], op, true);
default:
return 0;
}
-
return 1;
}
-
/* Evaluates constraint strings starting with letter O. Input
parameter C is the second letter following the "O" in the constraint
string. Returns 1 if VALUE meets the respective constraint and 0
/* Compute a (partial) cost for rtx X. Return true if the complete
cost has been computed, and false if subexpressions should be
- scanned. In either case, *TOTAL contains the cost result.
- CODE contains GET_CODE (x), OUTER_CODE contains the code
+ scanned. In either case, *TOTAL contains the cost result.
+ CODE contains GET_CODE (x), OUTER_CODE contains the code
of the superexpression of x. */
static bool
-s390_rtx_costs (rtx x, int code, int outer_code, int *total)
+s390_rtx_costs (rtx x, int code, int outer_code, int *total,
+ bool speed ATTRIBUTE_UNUSED)
{
switch (code)
{
*total = s390_cost->madbr;
else
*total = s390_cost->maebr;
- *total += rtx_cost (XEXP (XEXP (x, 0), 0), MULT)
- + rtx_cost (XEXP (XEXP (x, 0), 1), MULT)
- + rtx_cost (XEXP (x, 1), code);
+ *total += (rtx_cost (XEXP (XEXP (x, 0), 0), MULT, speed)
+ + rtx_cost (XEXP (XEXP (x, 0), 1), MULT, speed)
+ + rtx_cost (XEXP (x, 1), (enum rtx_code) code, speed));
return true; /* Do not do an additional recursive descent. */
}
*total = COSTS_N_INSNS (1);
return false;
- case MULT:
+ case MULT:
switch (GET_MODE (x))
{
case SImode:
/* Return the cost of an address rtx ADDR. */
static int
-s390_address_cost (rtx addr)
+s390_address_cost (rtx addr, bool speed ATTRIBUTE_UNUSED)
{
struct s390_address ad;
if (!s390_decompose_address (addr, &ad))
return false;
}
-/* Given an rtx OP being reloaded into a reg required to be in class CLASS,
+/* Given an rtx OP being reloaded into a reg required to be in class RCLASS,
return the class of reg to actually use. */
enum reg_class
-s390_preferred_reload_class (rtx op, enum reg_class class)
+s390_preferred_reload_class (rtx op, enum reg_class rclass)
{
switch (GET_CODE (op))
{
case CONST_DOUBLE:
case CONST_INT:
if (legitimate_reload_constant_p (op))
- return class;
+ return rclass;
else
return NO_REGS;
case LABEL_REF:
case SYMBOL_REF:
case CONST:
- if (reg_class_subset_p (ADDR_REGS, class))
+ if (reg_class_subset_p (ADDR_REGS, rclass))
return ADDR_REGS;
else
return NO_REGS;
break;
}
- return class;
+ return rclass;
+}
+
+/* Return true if ADDR is SYMBOL_REF + addend with addend being a
+ multiple of ALIGNMENT and the SYMBOL_REF being naturally
+ aligned. */
+
+bool
+s390_check_symref_alignment (rtx addr, HOST_WIDE_INT alignment)
+{
+ HOST_WIDE_INT addend;
+ rtx symref;
+
+ if (!s390_symref_operand_p (addr, &symref, &addend))
+ return false;
+
+ return (!SYMBOL_REF_NOT_NATURALLY_ALIGNED_P (symref)
+ && !(addend & (alignment - 1)));
+}
+
+/* ADDR is moved into REG using larl. If ADDR isn't a valid larl
+ operand SCRATCH is used to reload the even part of the address and
+ adding one. */
+
+void
+s390_reload_larl_operand (rtx reg, rtx addr, rtx scratch)
+{
+ HOST_WIDE_INT addend;
+ rtx symref;
+
+ if (!s390_symref_operand_p (addr, &symref, &addend))
+ gcc_unreachable ();
+
+ if (!(addend & 1))
+ /* Easy case. The addend is even so larl will do fine. */
+ emit_move_insn (reg, addr);
+ else
+ {
+ /* We can leave the scratch register untouched if the target
+ register is a valid base register. */
+ if (REGNO (reg) < FIRST_PSEUDO_REGISTER
+ && REGNO_REG_CLASS (REGNO (reg)) == ADDR_REGS)
+ scratch = reg;
+
+ gcc_assert (REGNO (scratch) < FIRST_PSEUDO_REGISTER);
+ gcc_assert (REGNO_REG_CLASS (REGNO (scratch)) == ADDR_REGS);
+
+ if (addend != 1)
+ emit_move_insn (scratch,
+ gen_rtx_CONST (Pmode,
+ gen_rtx_PLUS (Pmode, symref,
+ GEN_INT (addend - 1))));
+ else
+ emit_move_insn (scratch, symref);
+
+ /* Increment the address using la in order to avoid clobbering cc. */
+ emit_move_insn (reg, gen_rtx_PLUS (Pmode, scratch, const1_rtx));
+ }
+}
+
+/* Generate what is necessary to move between REG and MEM using
+ SCRATCH. The direction is given by TOMEM. */
+
+void
+s390_reload_symref_address (rtx reg, rtx mem, rtx scratch, bool tomem)
+{
+ /* Reload might have pulled a constant out of the literal pool.
+ Force it back in. */
+ if (CONST_INT_P (mem) || GET_CODE (mem) == CONST_DOUBLE
+ || GET_CODE (mem) == CONST)
+ mem = force_const_mem (GET_MODE (reg), mem);
+
+ gcc_assert (MEM_P (mem));
+
+ /* For a load from memory we can leave the scratch register
+ untouched if the target register is a valid base register. */
+ if (!tomem
+ && REGNO (reg) < FIRST_PSEUDO_REGISTER
+ && REGNO_REG_CLASS (REGNO (reg)) == ADDR_REGS
+ && GET_MODE (reg) == GET_MODE (scratch))
+ scratch = reg;
+
+ /* Load address into scratch register. Since we can't have a
+ secondary reload for a secondary reload we have to cover the case
+ where larl would need a secondary reload here as well. */
+ s390_reload_larl_operand (scratch, XEXP (mem, 0), scratch);
+
+ /* Now we can use a standard load/store to do the move. */
+ if (tomem)
+ emit_move_insn (replace_equiv_address (mem, scratch), reg);
+ else
+ emit_move_insn (reg, replace_equiv_address (mem, scratch));
}
/* Inform reload about cases where moving X with a mode MODE to a register in
- CLASS requires an extra scratch or immediate register. Return the class
+ RCLASS requires an extra scratch or immediate register. Return the class
needed for the immediate register. */
static enum reg_class
-s390_secondary_reload (bool in_p, rtx x, enum reg_class class,
+s390_secondary_reload (bool in_p, rtx x, enum reg_class rclass,
enum machine_mode mode, secondary_reload_info *sri)
{
/* Intermediate register needed. */
- if (reg_classes_intersect_p (CC_REGS, class))
+ if (reg_classes_intersect_p (CC_REGS, rclass))
return GENERAL_REGS;
+ if (TARGET_Z10)
+ {
+ /* On z10 several optimizer steps may generate larl operands with
+ an odd addend. */
+ if (in_p
+ && s390_symref_operand_p (x, NULL, NULL)
+ && mode == Pmode
+ && !s390_check_symref_alignment (x, 2))
+ sri->icode = ((mode == DImode) ? CODE_FOR_reloaddi_larl_odd_addend_z10
+ : CODE_FOR_reloadsi_larl_odd_addend_z10);
+
+ /* On z10 we need a scratch register when moving QI, TI or floating
+ point mode values from or to a memory location with a SYMBOL_REF
+ or if the symref addend of a SI or DI move is not aligned to the
+ width of the access. */
+ if (MEM_P (x)
+ && s390_symref_operand_p (XEXP (x, 0), NULL, NULL)
+ && (mode == QImode || mode == TImode || FLOAT_MODE_P (mode)
+ || (!TARGET_64BIT && mode == DImode)
+ || ((mode == HImode || mode == SImode || mode == DImode)
+ && (!s390_check_symref_alignment (XEXP (x, 0),
+ GET_MODE_SIZE (mode))))))
+ {
+#define __SECONDARY_RELOAD_CASE(M,m) \
+ case M##mode: \
+ if (TARGET_64BIT) \
+ sri->icode = in_p ? CODE_FOR_reload##m##di_toreg_z10 : \
+ CODE_FOR_reload##m##di_tomem_z10; \
+ else \
+ sri->icode = in_p ? CODE_FOR_reload##m##si_toreg_z10 : \
+ CODE_FOR_reload##m##si_tomem_z10; \
+ break;
+
+ switch (GET_MODE (x))
+ {
+ __SECONDARY_RELOAD_CASE (QI, qi);
+ __SECONDARY_RELOAD_CASE (HI, hi);
+ __SECONDARY_RELOAD_CASE (SI, si);
+ __SECONDARY_RELOAD_CASE (DI, di);
+ __SECONDARY_RELOAD_CASE (TI, ti);
+ __SECONDARY_RELOAD_CASE (SF, sf);
+ __SECONDARY_RELOAD_CASE (DF, df);
+ __SECONDARY_RELOAD_CASE (TF, tf);
+ __SECONDARY_RELOAD_CASE (SD, sd);
+ __SECONDARY_RELOAD_CASE (DD, dd);
+ __SECONDARY_RELOAD_CASE (TD, td);
+
+ default:
+ gcc_unreachable ();
+ }
+#undef __SECONDARY_RELOAD_CASE
+ }
+ }
+
/* We need a scratch register when loading a PLUS expression which
is not a legitimate operand of the LOAD ADDRESS instruction. */
if (in_p && s390_plus_operand (x, mode))
/* For GENERAL_REGS a displacement overflow is no problem if occurring
in a s_operand address since we may fallback to lm/stm. So we only
have to care about overflows in the b+i+d case. */
- if ((reg_classes_intersect_p (GENERAL_REGS, class)
+ if ((reg_classes_intersect_p (GENERAL_REGS, rclass)
&& s390_class_max_nregs (GENERAL_REGS, mode) > 1
&& GET_CODE (XEXP (XEXP (x, 0), 0)) == PLUS)
/* For FP_REGS no lm/stm is available so this check is triggered
for displacement overflows in b+i+d and b+d like addresses. */
- || (reg_classes_intersect_p (FP_REGS, class)
+ || (reg_classes_intersect_p (FP_REGS, rclass)
&& s390_class_max_nregs (FP_REGS, mode) > 1))
{
if (in_p)
}
}
+ /* A scratch address register is needed when a symbolic constant is
+ copied to r0 compiling with -fPIC. In other cases the target
+ register might be used as temporary (see legitimize_pic_address). */
+ if (in_p && SYMBOLIC_CONST (x) && flag_pic == 2 && rclass != ADDR_REGS)
+ sri->icode = (TARGET_64BIT ?
+ CODE_FOR_reloaddi_PIC_addr :
+ CODE_FOR_reloadsi_PIC_addr);
+
/* Either scratch or no register needed. */
return NO_REGS;
}
/* Return true if ADDR is a valid memory address.
STRICT specifies whether strict register checking applies. */
-bool
-legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
- rtx addr, int strict)
+static bool
+s390_legitimate_address_p (enum machine_mode mode, rtx addr, bool strict)
{
struct s390_address ad;
+
+ if (TARGET_Z10
+ && larl_operand (addr, VOIDmode)
+ && (mode == VOIDmode
+ || s390_check_symref_alignment (addr, GET_MODE_SIZE (mode))))
+ return true;
+
if (!s390_decompose_address (addr, &ad))
return false;
}
else
{
- if (ad.base
+ if (ad.base
&& !(REGNO (ad.base) >= FIRST_PSEUDO_REGISTER
|| REGNO_REG_CLASS (REGNO (ad.base)) == ADDR_REGS))
return false;
-
+
if (ad.indx
&& !(REGNO (ad.indx) >= FIRST_PSEUDO_REGISTER
|| REGNO_REG_CLASS (REGNO (ad.indx)) == ADDR_REGS))
differentiate them from global data objects. The returned
address is the PIC reg + an unspec constant.
- GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
+ TARGET_LEGITIMIZE_ADDRESS_P rejects symbolic references unless the PIC
reg also appears in the address. */
rtx
legitimize_pic_address (rtx orig, rtx reg)
{
rtx addr = orig;
- rtx new = orig;
+ rtx new_rtx = orig;
rtx base;
gcc_assert (!TLS_SYMBOLIC_CONST (addr));
addr = force_const_mem (Pmode, addr);
emit_move_insn (temp, addr);
- new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
+ new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
if (reg != 0)
{
- s390_load_address (reg, new);
- new = reg;
+ s390_load_address (reg, new_rtx);
+ new_rtx = reg;
}
}
}
if (reload_in_progress || reload_completed)
df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
- new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
- new = gen_rtx_CONST (Pmode, new);
- new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
- new = gen_const_mem (Pmode, new);
- emit_move_insn (reg, new);
- new = reg;
+ new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
+ new_rtx = gen_rtx_CONST (Pmode, new_rtx);
+ new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
+ new_rtx = gen_const_mem (Pmode, new_rtx);
+ emit_move_insn (reg, new_rtx);
+ new_rtx = reg;
}
else if (TARGET_CPU_ZARCH)
{
/* If the GOT offset might be >= 4k, we determine the position
of the GOT entry via a PC-relative LARL (@GOTENT). */
- rtx temp = gen_reg_rtx (Pmode);
+ rtx temp = reg ? reg : gen_reg_rtx (Pmode);
+
+ gcc_assert (REGNO (temp) >= FIRST_PSEUDO_REGISTER
+ || REGNO_REG_CLASS (REGNO (temp)) == ADDR_REGS);
- new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTENT);
- new = gen_rtx_CONST (Pmode, new);
- emit_move_insn (temp, new);
+ new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTENT);
+ new_rtx = gen_rtx_CONST (Pmode, new_rtx);
+ emit_move_insn (temp, new_rtx);
- new = gen_const_mem (Pmode, temp);
- emit_move_insn (reg, new);
- new = reg;
+ new_rtx = gen_const_mem (Pmode, temp);
+ emit_move_insn (reg, new_rtx);
+ new_rtx = reg;
}
else
{
/* If the GOT offset might be >= 4k, we have to load it
from the literal pool (@GOT). */
- rtx temp = gen_reg_rtx (Pmode);
+ rtx temp = reg ? reg : gen_reg_rtx (Pmode);
+
+ gcc_assert (REGNO (temp) >= FIRST_PSEUDO_REGISTER
+ || REGNO_REG_CLASS (REGNO (temp)) == ADDR_REGS);
if (reload_in_progress || reload_completed)
df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
addr = force_const_mem (Pmode, addr);
emit_move_insn (temp, addr);
- new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
- new = gen_const_mem (Pmode, new);
- emit_move_insn (reg, new);
- new = reg;
+ new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
+ new_rtx = gen_const_mem (Pmode, new_rtx);
+ emit_move_insn (reg, new_rtx);
+ new_rtx = reg;
}
}
else
out of the literal pool, force them back in. */
case UNSPEC_GOTOFF:
case UNSPEC_PLTOFF:
- new = force_const_mem (Pmode, orig);
+ new_rtx = force_const_mem (Pmode, orig);
break;
/* @GOT is OK as is if small. */
case UNSPEC_GOT:
if (flag_pic == 2)
- new = force_const_mem (Pmode, orig);
+ new_rtx = force_const_mem (Pmode, orig);
break;
/* @GOTENT is OK as is. */
addr = force_const_mem (Pmode, addr);
emit_move_insn (temp, addr);
- new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
+ new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
if (reg != 0)
{
- s390_load_address (reg, new);
- new = reg;
+ s390_load_address (reg, new_rtx);
+ new_rtx = reg;
}
}
break;
gcc_unreachable ();
}
}
- else
+ else
gcc_assert (GET_CODE (addr) == PLUS);
}
if (GET_CODE (addr) == PLUS)
}
emit_move_insn (temp, op0);
- new = gen_rtx_PLUS (Pmode, temp, op1);
+ new_rtx = gen_rtx_PLUS (Pmode, temp, op1);
if (reg != 0)
{
- s390_load_address (reg, new);
- new = reg;
+ s390_load_address (reg, new_rtx);
+ new_rtx = reg;
}
}
else
addr = force_const_mem (Pmode, addr);
emit_move_insn (temp, addr);
- new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
+ new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
if (reg != 0)
{
- s390_load_address (reg, new);
- new = reg;
+ s390_load_address (reg, new_rtx);
+ new_rtx = reg;
}
}
}
{
gcc_assert (XVECLEN (op0, 0) == 1);
- new = force_const_mem (Pmode, orig);
+ new_rtx = force_const_mem (Pmode, orig);
}
/* Otherwise, compute the sum. */
else
{
base = legitimize_pic_address (XEXP (addr, 0), reg);
- new = legitimize_pic_address (XEXP (addr, 1),
+ new_rtx = legitimize_pic_address (XEXP (addr, 1),
base == reg ? NULL_RTX : reg);
- if (GET_CODE (new) == CONST_INT)
- new = plus_constant (base, INTVAL (new));
+ if (GET_CODE (new_rtx) == CONST_INT)
+ new_rtx = plus_constant (base, INTVAL (new_rtx));
else
{
- if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
+ if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
{
- base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
- new = XEXP (new, 1);
+ base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
+ new_rtx = XEXP (new_rtx, 1);
}
- new = gen_rtx_PLUS (Pmode, base, new);
+ new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
}
- if (GET_CODE (new) == CONST)
- new = XEXP (new, 0);
- new = force_operand (new, 0);
+ if (GET_CODE (new_rtx) == CONST)
+ new_rtx = XEXP (new_rtx, 0);
+ new_rtx = force_operand (new_rtx, 0);
}
}
}
- return new;
+ return new_rtx;
}
/* Load the thread pointer into a register. */
gen_rtx_REG (Pmode, RETURN_REGNUM));
use_reg (&CALL_INSN_FUNCTION_USAGE (insn), result_reg);
- CONST_OR_PURE_CALL_P (insn) = 1;
+ RTL_CONST_CALL_P (insn) = 1;
}
/* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
static rtx
legitimize_tls_address (rtx addr, rtx reg)
{
- rtx new, tls_call, temp, base, r2, insn;
+ rtx new_rtx, tls_call, temp, base, r2, insn;
if (GET_CODE (addr) == SYMBOL_REF)
switch (tls_symbolic_operand (addr))
start_sequence ();
r2 = gen_rtx_REG (Pmode, 2);
tls_call = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_TLSGD);
- new = gen_rtx_CONST (Pmode, tls_call);
- new = force_const_mem (Pmode, new);
- emit_move_insn (r2, new);
+ new_rtx = gen_rtx_CONST (Pmode, tls_call);
+ new_rtx = force_const_mem (Pmode, new_rtx);
+ emit_move_insn (r2, new_rtx);
s390_emit_tls_call_insn (r2, tls_call);
insn = get_insns ();
end_sequence ();
- new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_NTPOFF);
+ new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_NTPOFF);
temp = gen_reg_rtx (Pmode);
- emit_libcall_block (insn, temp, r2, new);
+ emit_libcall_block (insn, temp, r2, new_rtx);
- new = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
+ new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
if (reg != 0)
{
- s390_load_address (reg, new);
- new = reg;
+ s390_load_address (reg, new_rtx);
+ new_rtx = reg;
}
break;
start_sequence ();
r2 = gen_rtx_REG (Pmode, 2);
tls_call = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TLSLDM);
- new = gen_rtx_CONST (Pmode, tls_call);
- new = force_const_mem (Pmode, new);
- emit_move_insn (r2, new);
+ new_rtx = gen_rtx_CONST (Pmode, tls_call);
+ new_rtx = force_const_mem (Pmode, new_rtx);
+ emit_move_insn (r2, new_rtx);
s390_emit_tls_call_insn (r2, tls_call);
insn = get_insns ();
end_sequence ();
- new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TLSLDM_NTPOFF);
+ new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TLSLDM_NTPOFF);
temp = gen_reg_rtx (Pmode);
- emit_libcall_block (insn, temp, r2, new);
+ emit_libcall_block (insn, temp, r2, new_rtx);
- new = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
+ new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
base = gen_reg_rtx (Pmode);
- s390_load_address (base, new);
+ s390_load_address (base, new_rtx);
- new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_DTPOFF);
- new = gen_rtx_CONST (Pmode, new);
- new = force_const_mem (Pmode, new);
+ new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_DTPOFF);
+ new_rtx = gen_rtx_CONST (Pmode, new_rtx);
+ new_rtx = force_const_mem (Pmode, new_rtx);
temp = gen_reg_rtx (Pmode);
- emit_move_insn (temp, new);
+ emit_move_insn (temp, new_rtx);
- new = gen_rtx_PLUS (Pmode, base, temp);
+ new_rtx = gen_rtx_PLUS (Pmode, base, temp);
if (reg != 0)
{
- s390_load_address (reg, new);
- new = reg;
+ s390_load_address (reg, new_rtx);
+ new_rtx = reg;
}
break;
if (reload_in_progress || reload_completed)
df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
- new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTNTPOFF);
- new = gen_rtx_CONST (Pmode, new);
- new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
- new = gen_const_mem (Pmode, new);
+ new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTNTPOFF);
+ new_rtx = gen_rtx_CONST (Pmode, new_rtx);
+ new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
+ new_rtx = gen_const_mem (Pmode, new_rtx);
temp = gen_reg_rtx (Pmode);
- emit_move_insn (temp, new);
+ emit_move_insn (temp, new_rtx);
}
else if (TARGET_CPU_ZARCH)
{
/* If the GOT offset might be >= 4k, we determine the position
of the GOT entry via a PC-relative LARL. */
- new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_INDNTPOFF);
- new = gen_rtx_CONST (Pmode, new);
+ new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_INDNTPOFF);
+ new_rtx = gen_rtx_CONST (Pmode, new_rtx);
temp = gen_reg_rtx (Pmode);
- emit_move_insn (temp, new);
+ emit_move_insn (temp, new_rtx);
- new = gen_const_mem (Pmode, temp);
+ new_rtx = gen_const_mem (Pmode, temp);
temp = gen_reg_rtx (Pmode);
- emit_move_insn (temp, new);
+ emit_move_insn (temp, new_rtx);
}
else if (flag_pic)
{
if (reload_in_progress || reload_completed)
df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
- new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTNTPOFF);
- new = gen_rtx_CONST (Pmode, new);
- new = force_const_mem (Pmode, new);
+ new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTNTPOFF);
+ new_rtx = gen_rtx_CONST (Pmode, new_rtx);
+ new_rtx = force_const_mem (Pmode, new_rtx);
temp = gen_reg_rtx (Pmode);
- emit_move_insn (temp, new);
+ emit_move_insn (temp, new_rtx);
- new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
- new = gen_const_mem (Pmode, new);
+ new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
+ new_rtx = gen_const_mem (Pmode, new_rtx);
- new = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, new, addr), UNSPEC_TLS_LOAD);
+ new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, new_rtx, addr), UNSPEC_TLS_LOAD);
temp = gen_reg_rtx (Pmode);
- emit_insn (gen_rtx_SET (Pmode, temp, new));
+ emit_insn (gen_rtx_SET (Pmode, temp, new_rtx));
}
else
{
/* In position-dependent code, load the absolute address of
the GOT entry from the literal pool. */
- new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_INDNTPOFF);
- new = gen_rtx_CONST (Pmode, new);
- new = force_const_mem (Pmode, new);
+ new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_INDNTPOFF);
+ new_rtx = gen_rtx_CONST (Pmode, new_rtx);
+ new_rtx = force_const_mem (Pmode, new_rtx);
temp = gen_reg_rtx (Pmode);
- emit_move_insn (temp, new);
+ emit_move_insn (temp, new_rtx);
- new = temp;
- new = gen_const_mem (Pmode, new);
- new = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, new, addr), UNSPEC_TLS_LOAD);
+ new_rtx = temp;
+ new_rtx = gen_const_mem (Pmode, new_rtx);
+ new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, new_rtx, addr), UNSPEC_TLS_LOAD);
temp = gen_reg_rtx (Pmode);
- emit_insn (gen_rtx_SET (Pmode, temp, new));
+ emit_insn (gen_rtx_SET (Pmode, temp, new_rtx));
}
- new = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
+ new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
if (reg != 0)
{
- s390_load_address (reg, new);
- new = reg;
+ s390_load_address (reg, new_rtx);
+ new_rtx = reg;
}
break;
case TLS_MODEL_LOCAL_EXEC:
- new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_NTPOFF);
- new = gen_rtx_CONST (Pmode, new);
- new = force_const_mem (Pmode, new);
+ new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_NTPOFF);
+ new_rtx = gen_rtx_CONST (Pmode, new_rtx);
+ new_rtx = force_const_mem (Pmode, new_rtx);
temp = gen_reg_rtx (Pmode);
- emit_move_insn (temp, new);
+ emit_move_insn (temp, new_rtx);
- new = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
+ new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
if (reg != 0)
{
- s390_load_address (reg, new);
- new = reg;
+ s390_load_address (reg, new_rtx);
+ new_rtx = reg;
}
break;
{
case UNSPEC_INDNTPOFF:
gcc_assert (TARGET_CPU_ZARCH);
- new = addr;
+ new_rtx = addr;
break;
default:
else if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
&& GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
{
- new = XEXP (XEXP (addr, 0), 0);
- if (GET_CODE (new) != SYMBOL_REF)
- new = gen_rtx_CONST (Pmode, new);
+ new_rtx = XEXP (XEXP (addr, 0), 0);
+ if (GET_CODE (new_rtx) != SYMBOL_REF)
+ new_rtx = gen_rtx_CONST (Pmode, new_rtx);
- new = legitimize_tls_address (new, reg);
- new = plus_constant (new, INTVAL (XEXP (XEXP (addr, 0), 1)));
- new = force_operand (new, 0);
+ new_rtx = legitimize_tls_address (new_rtx, reg);
+ new_rtx = plus_constant (new_rtx, INTVAL (XEXP (XEXP (addr, 0), 1)));
+ new_rtx = force_operand (new_rtx, 0);
}
else
gcc_unreachable (); /* for now ... */
- return new;
+ return new_rtx;
}
-/* Emit insns to move operands[1] into operands[0]. */
+/* Emit insns making the address in operands[1] valid for a standard
+ move to operands[0]. operands[1] is replaced by an address which
+ should be used instead of the former RTX to emit the move
+ pattern. */
void
emit_symbolic_move (rtx *operands)
When -fpic is used, special handling is needed for symbolic references.
See comments by legitimize_pic_address for details. */
-rtx
-legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
- enum machine_mode mode ATTRIBUTE_UNUSED)
+static rtx
+s390_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
+ enum machine_mode mode ATTRIBUTE_UNUSED)
{
rtx constant_term = const0_rtx;
{
x = legitimize_tls_address (x, 0);
- if (legitimate_address_p (mode, x, FALSE))
+ if (s390_legitimate_address_p (mode, x, FALSE))
return x;
}
else if (GET_CODE (x) == PLUS
- && (TLS_SYMBOLIC_CONST (XEXP (x, 0))
+ && (TLS_SYMBOLIC_CONST (XEXP (x, 0))
|| TLS_SYMBOLIC_CONST (XEXP (x, 1))))
{
return x;
|| SYMBOLIC_CONST (XEXP (x, 1)))))
x = legitimize_pic_address (x, 0);
- if (legitimate_address_p (mode, x, FALSE))
+ if (s390_legitimate_address_p (mode, x, FALSE))
return x;
}
MODE is the mode of the enclosing MEM. OPNUM is the operand number
and TYPE is the reload type of the current reload. */
-rtx
+rtx
legitimize_reload_address (rtx ad, enum machine_mode mode ATTRIBUTE_UNUSED,
int opnum, int type)
{
{
HOST_WIDE_INT lower = INTVAL (XEXP (ad, 1)) & 0xfff;
HOST_WIDE_INT upper = INTVAL (XEXP (ad, 1)) ^ lower;
- rtx cst, tem, new;
+ rtx cst, tem, new_rtx;
cst = GEN_INT (upper);
if (!legitimate_reload_constant_p (cst))
cst = force_const_mem (Pmode, cst);
tem = gen_rtx_PLUS (Pmode, XEXP (ad, 0), cst);
- new = gen_rtx_PLUS (Pmode, tem, GEN_INT (lower));
+ new_rtx = gen_rtx_PLUS (Pmode, tem, GEN_INT (lower));
push_reload (XEXP (tem, 1), 0, &XEXP (tem, 1), 0,
- BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
+ BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
opnum, (enum reload_type) type);
- return new;
+ return new_rtx;
}
return NULL_RTX;
dst = change_address (dst, VOIDmode, dst_addr);
src = change_address (src, VOIDmode, src_addr);
- temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1, 0);
+ temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
+ OPTAB_DIRECT);
if (temp != count)
emit_move_insn (count, temp);
- temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1, 0);
+ temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
+ OPTAB_DIRECT);
if (temp != blocks)
emit_move_insn (blocks, temp);
emit_label (loop_start_label);
+ if (TARGET_Z10
+ && (GET_CODE (len) != CONST_INT || INTVAL (len) > 768))
+ {
+ rtx prefetch;
+
+ /* Issue a read prefetch for the +3 cache line. */
+ prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, src_addr, GEN_INT (768)),
+ const0_rtx, const0_rtx);
+ PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
+ emit_insn (prefetch);
+
+ /* Issue a write prefetch for the +3 cache line. */
+ prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (768)),
+ const1_rtx, const0_rtx);
+ PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
+ emit_insn (prefetch);
+ }
+
emit_insn (gen_movmem_short (dst, src, GEN_INT (255)));
s390_load_address (dst_addr,
gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (256)));
s390_load_address (src_addr,
gen_rtx_PLUS (Pmode, src_addr, GEN_INT (256)));
- temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1, 0);
+ temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
+ OPTAB_DIRECT);
if (temp != blocks)
emit_move_insn (blocks, temp);
return;
gcc_assert (GET_CODE (val) == CONST_INT || GET_MODE (val) == QImode);
-
+
if (GET_CODE (len) == CONST_INT && INTVAL (len) > 0 && INTVAL (len) <= 257)
{
if (val == const0_rtx && INTVAL (len) <= 256)
{
/* Initialize memory by storing the first byte. */
emit_move_insn (adjust_address (dst, QImode, 0), val);
-
+
if (INTVAL (len) > 1)
{
/* Initiate 1 byte overlap move.
rtx dstp1 = adjust_address (dst, VOIDmode, 1);
set_mem_size (dst, const1_rtx);
- emit_insn (gen_movmem_short (dstp1, dst,
+ emit_insn (gen_movmem_short (dstp1, dst,
GEN_INT (INTVAL (len) - 2)));
}
}
dst = change_address (dst, VOIDmode, dst_addr);
if (val == const0_rtx)
- temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1, 0);
+ temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
+ OPTAB_DIRECT);
else
{
dstp1 = adjust_address (dst, VOIDmode, 1);
/* Initialize memory by storing the first byte. */
emit_move_insn (adjust_address (dst, QImode, 0), val);
-
+
/* If count is 1 we are done. */
emit_cmp_and_jump_insns (count, const1_rtx,
EQ, NULL_RTX, mode, 1, end_label);
- temp = expand_binop (mode, add_optab, count, GEN_INT (-2), count, 1, 0);
+ temp = expand_binop (mode, add_optab, count, GEN_INT (-2), count, 1,
+ OPTAB_DIRECT);
}
if (temp != count)
emit_move_insn (count, temp);
- temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1, 0);
+ temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
+ OPTAB_DIRECT);
if (temp != blocks)
emit_move_insn (blocks, temp);
emit_label (loop_start_label);
+ if (TARGET_Z10
+ && (GET_CODE (len) != CONST_INT || INTVAL (len) > 1024))
+ {
+ /* Issue a write prefetch for the +4 cache line. */
+ rtx prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, dst_addr,
+ GEN_INT (1024)),
+ const1_rtx, const0_rtx);
+ emit_insn (prefetch);
+ PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
+ }
+
if (val == const0_rtx)
emit_insn (gen_clrmem_short (dst, GEN_INT (255)));
else
s390_load_address (dst_addr,
gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (256)));
- temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1, 0);
+ temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
+ OPTAB_DIRECT);
if (temp != blocks)
emit_move_insn (blocks, temp);
op0 = change_address (op0, VOIDmode, addr0);
op1 = change_address (op1, VOIDmode, addr1);
- temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1, 0);
+ temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
+ OPTAB_DIRECT);
if (temp != count)
emit_move_insn (count, temp);
- temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1, 0);
+ temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
+ OPTAB_DIRECT);
if (temp != blocks)
emit_move_insn (blocks, temp);
emit_label (loop_start_label);
+ if (TARGET_Z10
+ && (GET_CODE (len) != CONST_INT || INTVAL (len) > 512))
+ {
+ rtx prefetch;
+
+ /* Issue a read prefetch for the +2 cache line of operand 1. */
+ prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, addr0, GEN_INT (512)),
+ const0_rtx, const0_rtx);
+ emit_insn (prefetch);
+ PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
+
+ /* Issue a read prefetch for the +2 cache line of operand 2. */
+ prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, addr1, GEN_INT (512)),
+ const0_rtx, const0_rtx);
+ emit_insn (prefetch);
+ PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
+ }
+
emit_insn (gen_cmpmem_short (op0, op1, GEN_INT (255)));
temp = gen_rtx_NE (VOIDmode, ccreg, const0_rtx);
temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
s390_load_address (addr1,
gen_rtx_PLUS (Pmode, addr1, GEN_INT (256)));
- temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1, 0);
+ temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
+ OPTAB_DIRECT);
if (temp != blocks)
emit_move_insn (blocks, temp);
}
p = rtvec_alloc (2);
- RTVEC_ELT (p, 0) =
+ RTVEC_ELT (p, 0) =
gen_rtx_SET (VOIDmode, dst, op_res);
- RTVEC_ELT (p, 1) =
+ RTVEC_ELT (p, 1) =
gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
if (!register_operand (src, GET_MODE (dst)))
src = force_reg (GET_MODE (dst), src);
- op_res = gen_rtx_MINUS (GET_MODE (dst),
- gen_rtx_MINUS (GET_MODE (dst), src, const0_rtx),
- gen_rtx_fmt_ee (cmp_code, GET_MODE (dst),
- gen_rtx_REG (cc_mode, CC_REGNUM),
+ op_res = gen_rtx_MINUS (GET_MODE (dst),
+ gen_rtx_MINUS (GET_MODE (dst), src, const0_rtx),
+ gen_rtx_fmt_ee (cmp_code, GET_MODE (dst),
+ gen_rtx_REG (cc_mode, CC_REGNUM),
const0_rtx));
p = rtvec_alloc (2);
- RTVEC_ELT (p, 0) =
+ RTVEC_ELT (p, 0) =
gen_rtx_SET (VOIDmode, dst, op_res);
- RTVEC_ELT (p, 1) =
+ RTVEC_ELT (p, 1) =
gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
return false;
}
-/* Expand code for the insv template. Return true if successful, false else. */
+/* Expand code for the insv template. Return true if successful. */
-bool
+bool
s390_expand_insv (rtx dest, rtx op1, rtx op2, rtx src)
{
int bitsize = INTVAL (op1);
int bitpos = INTVAL (op2);
+ /* On z10 we can use the risbg instruction to implement insv. */
+ if (TARGET_Z10
+ && ((GET_MODE (dest) == DImode && GET_MODE (src) == DImode)
+ || (GET_MODE (dest) == SImode && GET_MODE (src) == SImode)))
+ {
+ rtx op;
+ rtx clobber;
+
+ op = gen_rtx_SET (GET_MODE(src),
+ gen_rtx_ZERO_EXTRACT (GET_MODE (dest), dest, op1, op2),
+ src);
+ clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
+ emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clobber)));
+
+ return true;
+ }
+
/* We need byte alignment. */
if (bitsize % BITS_PER_UNIT)
return false;
set_mem_size (dest, GEN_INT (size));
s390_expand_movmem (dest, src_mem, GEN_INT (size));
}
-
+
/* (set (ze (mem)) (reg)). */
else if (register_operand (src, word_mode))
{
int stcmh_width = bitsize - GET_MODE_BITSIZE (SImode);
int size = stcmh_width / BITS_PER_UNIT;
- emit_move_insn (adjust_address (dest, SImode, size),
+ emit_move_insn (adjust_address (dest, SImode, size),
gen_lowpart (SImode, src));
set_mem_size (dest, GEN_INT (size));
emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest, GEN_INT
/* (set (ze (reg)) (const_int)). */
if (TARGET_ZARCH
- && register_operand (dest, word_mode)
+ && register_operand (dest, word_mode)
&& (bitpos % 16) == 0
&& (bitsize % 16) == 0
&& const_int_operand (src, VOIDmode))
putsize = GET_MODE_BITSIZE (putmode);
regpos -= putsize;
- emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest,
+ emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest,
GEN_INT (putsize),
- GEN_INT (regpos)),
+ GEN_INT (regpos)),
gen_int_mode (val, putmode));
val >>= putsize;
}
{
val = expand_simple_binop (SImode, AND, val, GEN_INT (GET_MODE_MASK (mode)),
NULL_RTX, 1, OPTAB_DIRECT);
- return expand_simple_binop (SImode, ASHIFT, val, count,
+ return expand_simple_binop (SImode, ASHIFT, val, count,
NULL_RTX, 1, OPTAB_DIRECT);
}
/* Structure to hold the initial parameters for a compare_and_swap operation
- in HImode and QImode. */
+ in HImode and QImode. */
struct alignment_context
{
- rtx memsi; /* SI aligned memory location. */
+ rtx memsi; /* SI aligned memory location. */
rtx shift; /* Bit offset with regard to lsb. */
rtx modemask; /* Mask of the HQImode shifted by SHIFT bits. */
rtx modemaski; /* ~modemask */
ac->shift = expand_simple_binop (SImode, MULT, ac->shift, GEN_INT (BITS_PER_UNIT),
NULL_RTX, 1, OPTAB_DIRECT);
/* Calculate masks. */
- ac->modemask = expand_simple_binop (SImode, ASHIFT,
+ ac->modemask = expand_simple_binop (SImode, ASHIFT,
GEN_INT (GET_MODE_MASK (mode)), ac->shift,
NULL_RTX, 1, OPTAB_DIRECT);
ac->modemaski = expand_simple_unop (SImode, NOT, ac->modemask, NULL_RTX, 1);
}
/* Expand an atomic compare and swap operation for HImode and QImode. MEM is
- the memory location, CMP the old value to compare MEM with and NEW the value
+ the memory location, CMP the old value to compare MEM with and NEW_RTX the value
to set if CMP == MEM.
CMP is never in memory for compare_and_swap_cc because
expand_bool_compare_and_swap puts it into a register for later compare. */
void
-s390_expand_cs_hqi (enum machine_mode mode, rtx target, rtx mem, rtx cmp, rtx new)
+s390_expand_cs_hqi (enum machine_mode mode, rtx target, rtx mem, rtx cmp, rtx new_rtx)
{
struct alignment_context ac;
rtx cmpv, newv, val, resv, cc;
/* Shift the values to the correct bit positions. */
if (!(ac.aligned && MEM_P (cmp)))
cmp = s390_expand_mask_and_shift (cmp, mode, ac.shift);
- if (!(ac.aligned && MEM_P (new)))
- new = s390_expand_mask_and_shift (new, mode, ac.shift);
+ if (!(ac.aligned && MEM_P (new_rtx)))
+ new_rtx = s390_expand_mask_and_shift (new_rtx, mode, ac.shift);
/* Load full word. Subsequent loads are performed by CS. */
val = expand_simple_binop (SImode, AND, ac.memsi, ac.modemaski,
/* Start CS loop. */
emit_label (csloop);
- /* val = "<mem>00..0<mem>"
+ /* val = "<mem>00..0<mem>"
* cmp = "00..0<cmp>00..0"
- * new = "00..0<new>00..0"
+ * new = "00..0<new>00..0"
*/
/* Patch cmp and new with val at correct position. */
else
cmpv = force_reg (SImode, expand_simple_binop (SImode, IOR, cmp, val,
NULL_RTX, 1, OPTAB_DIRECT));
- if (ac.aligned && MEM_P (new))
+ if (ac.aligned && MEM_P (new_rtx))
{
newv = force_reg (SImode, val);
- store_bit_field (newv, GET_MODE_BITSIZE (mode), 0, SImode, new);
+ store_bit_field (newv, GET_MODE_BITSIZE (mode), 0, SImode, new_rtx);
}
else
- newv = force_reg (SImode, expand_simple_binop (SImode, IOR, new, val,
+ newv = force_reg (SImode, expand_simple_binop (SImode, IOR, new_rtx, val,
NULL_RTX, 1, OPTAB_DIRECT));
/* Jump to end if we're done (likely?). */
cmpv, newv));
/* Check for changes outside mode. */
- resv = expand_simple_binop (SImode, AND, res, ac.modemaski,
+ resv = expand_simple_binop (SImode, AND, res, ac.modemaski,
NULL_RTX, 1, OPTAB_DIRECT);
- cc = s390_emit_compare (NE, resv, val);
+ cc = s390_emit_compare (NE, resv, val);
emit_move_insn (val, resv);
/* Loop internal if so. */
s390_emit_jump (csloop, cc);
emit_label (csend);
-
+
/* Return the correct part of the bitfield. */
- convert_move (target, expand_simple_binop (SImode, LSHIFTRT, res, ac.shift,
+ convert_move (target, expand_simple_binop (SImode, LSHIFTRT, res, ac.shift,
NULL_RTX, 1, OPTAB_DIRECT), 1);
}
{
struct alignment_context ac;
rtx cmp;
- rtx new = gen_reg_rtx (SImode);
+ rtx new_rtx = gen_reg_rtx (SImode);
rtx orig = gen_reg_rtx (SImode);
rtx csloop = gen_label_rtx ();
/* Start CS loop. */
emit_label (csloop);
- emit_move_insn (new, cmp);
+ emit_move_insn (new_rtx, cmp);
/* Patch new with val at correct position. */
switch (code)
{
case PLUS:
case MINUS:
- val = expand_simple_binop (SImode, code, new, orig,
+ val = expand_simple_binop (SImode, code, new_rtx, orig,
NULL_RTX, 1, OPTAB_DIRECT);
val = expand_simple_binop (SImode, AND, val, ac.modemask,
NULL_RTX, 1, OPTAB_DIRECT);
/* FALLTHRU */
- case SET:
+ case SET:
if (ac.aligned && MEM_P (val))
- store_bit_field (new, GET_MODE_BITSIZE (mode), 0, SImode, val);
+ store_bit_field (new_rtx, GET_MODE_BITSIZE (mode), 0, SImode, val);
else
{
- new = expand_simple_binop (SImode, AND, new, ac.modemaski,
+ new_rtx = expand_simple_binop (SImode, AND, new_rtx, ac.modemaski,
NULL_RTX, 1, OPTAB_DIRECT);
- new = expand_simple_binop (SImode, IOR, new, val,
+ new_rtx = expand_simple_binop (SImode, IOR, new_rtx, val,
NULL_RTX, 1, OPTAB_DIRECT);
}
break;
case AND:
case IOR:
case XOR:
- new = expand_simple_binop (SImode, code, new, val,
+ new_rtx = expand_simple_binop (SImode, code, new_rtx, val,
NULL_RTX, 1, OPTAB_DIRECT);
break;
case MULT: /* NAND */
- new = expand_simple_binop (SImode, XOR, new, ac.modemask,
+ new_rtx = expand_simple_binop (SImode, AND, new_rtx, val,
NULL_RTX, 1, OPTAB_DIRECT);
- new = expand_simple_binop (SImode, AND, new, val,
+ new_rtx = expand_simple_binop (SImode, XOR, new_rtx, ac.modemask,
NULL_RTX, 1, OPTAB_DIRECT);
break;
default:
}
s390_emit_jump (csloop, s390_emit_compare_and_swap (NE, cmp,
- ac.memsi, cmp, new));
+ ac.memsi, cmp, new_rtx));
/* Return the correct part of the bitfield. */
if (target)
convert_move (target, expand_simple_binop (SImode, LSHIFTRT,
- after ? new : cmp, ac.shift,
+ after ? new_rtx : cmp, ac.shift,
NULL_RTX, 1, OPTAB_DIRECT), 1);
}
static rtx
s390_delegitimize_address (rtx orig_x)
{
- rtx x = orig_x, y;
+ rtx x, y;
+ orig_x = delegitimize_mem_from_attrs (orig_x);
+ x = orig_x;
if (GET_CODE (x) != MEM)
return orig_x;
fprintf (file, "@INDNTPOFF");
return true;
}
-
+
+ if (GET_CODE (x) == UNSPEC && XVECLEN (x, 0) == 2)
+ switch (XINT (x, 1))
+ {
+ case UNSPEC_POOL_OFFSET:
+ x = gen_rtx_MINUS (GET_MODE (x), XVECEXP (x, 0, 0), XVECEXP (x, 0, 1));
+ output_addr_const (file, x);
+ return true;
+ }
return false;
}
{
struct s390_address ad;
+ if (s390_symref_operand_p (addr, NULL, NULL))
+ {
+ gcc_assert (TARGET_Z10);
+ output_addr_const (file, addr);
+ return;
+ }
+
if (!s390_decompose_address (addr, &ad)
|| (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
|| (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx))))
'C': print opcode suffix for branch condition.
'D': print opcode suffix for inverse branch condition.
+ 'E': print opcode suffix for branch on index instruction.
'J': print tls_load/tls_gdcall/tls_ldcall suffix
'G': print the size of the operand in bytes.
'O': print only the displacement of a memory reference.
'Y': print shift count operand.
'b': print integer X as if it's an unsigned byte.
+ 'c': print integer X as if it's an signed byte.
'x': print integer X as if it's an unsigned halfword.
'h': print integer X as if it's a signed halfword.
'i': print the first nonzero HImode part of X.
fprintf (file, s390_branch_condition_mnemonic (x, TRUE));
return;
+ case 'E':
+ if (GET_CODE (x) == LE)
+ fprintf (file, "l");
+ else if (GET_CODE (x) == GT)
+ fprintf (file, "h");
+ else
+ gcc_unreachable ();
+ return;
+
case 'J':
if (GET_CODE (x) == SYMBOL_REF)
{
case CONST_INT:
if (code == 'b')
fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 0xff);
+ else if (code == 'c')
+ fprintf (file, HOST_WIDE_INT_PRINT_DEC, ((INTVAL (x) & 0xff) ^ 0x80) - 0x80);
else if (code == 'x')
fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 0xffff);
else if (code == 'h')
return 0;
}
+
/* A C statement (sans semicolon) to update the integer scheduling priority
INSN_PRIORITY (INSN). Increase the priority to execute the INSN earlier,
reduce the priority to execute INSN later. Do not define this macro if
A STD instruction should be scheduled earlier,
in order to use the bypass. */
+
static int
s390_adjust_priority (rtx insn ATTRIBUTE_UNUSED, int priority)
{
return priority;
if (s390_tune != PROCESSOR_2084_Z990
- && s390_tune != PROCESSOR_2094_Z9_109)
+ && s390_tune != PROCESSOR_2094_Z9_109
+ && s390_tune != PROCESSOR_2097_Z10)
return priority;
switch (s390_safe_attr_type (insn))
return priority;
}
+
/* The number of instructions that can be issued per cycle. */
static int
s390_issue_rate (void)
{
- if (s390_tune == PROCESSOR_2084_Z990
- || s390_tune == PROCESSOR_2094_Z9_109)
- return 3;
- return 1;
+ switch (s390_tune)
+ {
+ case PROCESSOR_2084_Z990:
+ case PROCESSOR_2094_Z9_109:
+ return 3;
+ case PROCESSOR_2097_Z10:
+ return 2;
+ default:
+ return 1;
+ }
}
static int
}
-/* Find an annotated literal pool symbol referenced in RTX X,
- and store it at REF. Will abort if X contains references to
+/* Find an annotated literal pool symbol referenced in RTX X,
+ and store it at REF. Will abort if X contains references to
more than one such pool symbol; multiple references to the same
symbol are allowed, however.
if (*ref == NULL_RTX)
*ref = sym;
- else
+ else
gcc_assert (*ref == sym);
return;
}
}
-/* Replace every reference to the annotated literal pool
+/* Replace every reference to the annotated literal pool
symbol REF in X by its base plus OFFSET. */
static void
}
}
+/* Return an rtx that represents the offset of X from the start of
+ pool POOL. */
+
+static rtx
+s390_pool_offset (struct constant_pool *pool, rtx x)
+{
+ rtx label;
+
+ label = gen_rtx_LABEL_REF (GET_MODE (x), pool->label);
+ x = gen_rtx_UNSPEC (GET_MODE (x), gen_rtvec (2, x, label),
+ UNSPEC_POOL_OFFSET);
+ return gen_rtx_CONST (GET_MODE (x), x);
+}
+
/* Find constant VAL of mode MODE in the constant pool POOL.
Return an RTX describing the distance from the start of
the pool to the location of the new constant. */
enum machine_mode mode)
{
struct constant *c;
- rtx offset;
int i;
for (i = 0; i < NR_C_MODES; i++)
gcc_assert (c);
- offset = gen_rtx_MINUS (Pmode, gen_rtx_LABEL_REF (Pmode, c->label),
- gen_rtx_LABEL_REF (Pmode, pool->label));
- offset = gen_rtx_CONST (Pmode, offset);
- return offset;
+ return s390_pool_offset (pool, gen_rtx_LABEL_REF (Pmode, c->label));
}
/* Check whether INSN is an execute. Return the label_ref to its
s390_find_execute (struct constant_pool *pool, rtx insn)
{
struct constant *c;
- rtx offset;
for (c = pool->execute; c != NULL; c = c->next)
if (INSN_UID (insn) == INSN_UID (c->value))
gcc_assert (c);
- offset = gen_rtx_MINUS (Pmode, gen_rtx_LABEL_REF (Pmode, c->label),
- gen_rtx_LABEL_REF (Pmode, pool->label));
- offset = gen_rtx_CONST (Pmode, offset);
- return offset;
+ return s390_pool_offset (pool, gen_rtx_LABEL_REF (Pmode, c->label));
}
/* For an execute INSN, extract the execute target template. */
&& GET_CODE (XEXP (value, 0)) == UNSPEC
&& XINT (XEXP (value, 0), 1) == UNSPEC_LTREL_OFFSET
&& XVECLEN (XEXP (value, 0), 0) == 1)
- {
- value = gen_rtx_MINUS (Pmode, XVECEXP (XEXP (value, 0), 0, 0),
- gen_rtx_LABEL_REF (VOIDmode, pool->label));
- value = gen_rtx_CONST (VOIDmode, value);
- }
+ value = s390_pool_offset (pool, XVECEXP (XEXP (value, 0), 0, 0));
insn = emit_label_after (c->label, insn);
INSN_ADDRESSES_NEW (insn, -1);
for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
{
- rtx new_insn = gen_reload_base (cfun->machine->base_reg,
+ rtx new_insn = gen_reload_base (cfun->machine->base_reg,
curr_pool->label);
rtx insn = curr_pool->first_insn;
INSN_ADDRESSES_NEW (emit_insn_before (new_insn, insn), -1);
struct constant_pool *pool = s390_find_pool (pool_list, insn);
if (pool)
{
- rtx new_insn = gen_reload_base (cfun->machine->base_reg,
+ rtx new_insn = gen_reload_base (cfun->machine->base_reg,
pool->label);
INSN_ADDRESSES_NEW (emit_insn_after (new_insn, insn), -1);
}
}
}
-
/* Output the constant pool entry EXP in mode MODE with alignment ALIGN. */
void
case MODE_INT:
assemble_integer (exp, GET_MODE_SIZE (mode), align, 1);
+ mark_symbol_refs_as_used (exp);
break;
default:
}
-/* Helper function for s390_regs_ever_clobbered. Sets the fields in DATA for all
+/* Helper function for s390_regs_ever_clobbered. Sets the fields in DATA for all
clobbered hard regs in SETREG. */
static void
may use the eh registers, but the code which sets these registers is not
contained in that function. Hence s390_regs_ever_clobbered is not able to
deal with this automatically. */
- if (current_function_calls_eh_return || cfun->machine->has_landing_pad_p)
+ if (crtl->calls_eh_return || cfun->machine->has_landing_pad_p)
for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM ; i++)
- if (current_function_calls_eh_return
- || (cfun->machine->has_landing_pad_p
+ if (crtl->calls_eh_return
+ || (cfun->machine->has_landing_pad_p
&& df_regs_ever_live_p (EH_RETURN_DATA_REGNO (i))))
regs_ever_clobbered[EH_RETURN_DATA_REGNO (i)] = 1;
This flag is also set for the unwinding code in libgcc.
See expand_builtin_unwind_init. For regs_ever_live this is done by
reload. */
- if (current_function_has_nonlocal_label)
+ if (cfun->has_nonlocal_label)
for (i = 0; i < 16; i++)
if (!call_really_used_regs[i])
regs_ever_clobbered[i] = 1;
{
if (INSN_P (cur_insn))
note_stores (PATTERN (cur_insn),
- s390_reg_clobbered_rtx,
+ s390_reg_clobbered_rtx,
regs_ever_clobbered);
}
}
}
-/* Determine the frame area which actually has to be accessed
- in the function epilogue. The values are stored at the
+/* Determine the frame area which actually has to be accessed
+ in the function epilogue. The values are stored at the
given pointers AREA_BOTTOM (address of the lowest used stack
- address) and AREA_TOP (address of the first item which does
+ address) and AREA_TOP (address of the first item which does
not belong to the stack frame). */
static void
b = MIN (b, cfun_frame_layout.f4_offset + (i - 2) * 8);
t = MAX (t, cfun_frame_layout.f4_offset + (i - 1) * 8);
}
-
+
*area_bottom = b;
*area_top = t;
}
clobbered_regs[HARD_FRAME_POINTER_REGNUM] = 1;
if (flag_pic)
- clobbered_regs[PIC_OFFSET_TABLE_REGNUM]
+ clobbered_regs[PIC_OFFSET_TABLE_REGNUM]
|= df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM);
- clobbered_regs[BASE_REGNUM]
+ clobbered_regs[BASE_REGNUM]
|= (cfun->machine->base_reg
&& REGNO (cfun->machine->base_reg) == BASE_REGNUM);
|| TARGET_TPF_PROFILING
|| cfun->machine->split_branches_pending_p
|| cfun_frame_layout.save_return_addr_p
- || current_function_calls_eh_return
- || current_function_stdarg);
+ || crtl->calls_eh_return
+ || cfun->stdarg);
clobbered_regs[STACK_POINTER_REGNUM]
|= (!current_function_is_leaf
|| TARGET_TPF_PROFILING
|| cfun_save_high_fprs_p
|| get_frame_size () > 0
- || current_function_calls_alloca
- || current_function_stdarg);
+ || cfun->calls_alloca
+ || cfun->stdarg);
for (i = 6; i < 16; i++)
if (df_regs_ever_live_p (i) || clobbered_regs[i])
cfun_frame_layout.first_save_gpr_slot = i;
cfun_frame_layout.last_save_gpr_slot = j;
- for (i = cfun_frame_layout.first_save_gpr_slot;
- i < cfun_frame_layout.last_save_gpr_slot + 1;
+ for (i = cfun_frame_layout.first_save_gpr_slot;
+ i < cfun_frame_layout.last_save_gpr_slot + 1;
i++)
if (clobbered_regs[i])
break;
for (j = cfun_frame_layout.last_save_gpr_slot; j > i; j--)
if (clobbered_regs[j])
break;
-
+
if (i == cfun_frame_layout.last_save_gpr_slot + 1)
{
/* Nothing to save/restore. */
}
}
- if (current_function_stdarg)
+ if (cfun->stdarg)
{
/* Varargs functions need to save gprs 2 to 6. */
if (cfun->va_list_gpr_size
cfun_frame_layout.frame_size = get_frame_size ();
if (!TARGET_64BIT && cfun_frame_layout.frame_size > 0x7fff0000)
fatal_error ("total size of local variables exceeds architecture limit");
-
+
if (!TARGET_PACKED_STACK)
{
cfun_frame_layout.backchain_offset = 0;
{
cfun_frame_layout.backchain_offset = (STACK_POINTER_OFFSET
- UNITS_PER_WORD);
- cfun_frame_layout.gprs_offset
- = (cfun_frame_layout.backchain_offset
+ cfun_frame_layout.gprs_offset
+ = (cfun_frame_layout.backchain_offset
- (STACK_POINTER_REGNUM - cfun_frame_layout.first_save_gpr_slot + 1)
* UNITS_PER_WORD);
-
+
if (TARGET_64BIT)
{
- cfun_frame_layout.f4_offset
+ cfun_frame_layout.f4_offset
= (cfun_frame_layout.gprs_offset
- 8 * (cfun_fpr_bit_p (2) + cfun_fpr_bit_p (3)));
-
- cfun_frame_layout.f0_offset
- = (cfun_frame_layout.f4_offset
+
+ cfun_frame_layout.f0_offset
+ = (cfun_frame_layout.f4_offset
- 8 * (cfun_fpr_bit_p (0) + cfun_fpr_bit_p (1)));
}
else
{
/* On 31 bit we have to care about alignment of the
floating point regs to provide fastest access. */
- cfun_frame_layout.f0_offset
- = ((cfun_frame_layout.gprs_offset
+ cfun_frame_layout.f0_offset
+ = ((cfun_frame_layout.gprs_offset
& ~(STACK_BOUNDARY / BITS_PER_UNIT - 1))
- 8 * (cfun_fpr_bit_p (0) + cfun_fpr_bit_p (1)));
-
- cfun_frame_layout.f4_offset
+
+ cfun_frame_layout.f4_offset
= (cfun_frame_layout.f0_offset
- 8 * (cfun_fpr_bit_p (2) + cfun_fpr_bit_p (3)));
}
}
else /* no backchain */
{
- cfun_frame_layout.f4_offset
+ cfun_frame_layout.f4_offset
= (STACK_POINTER_OFFSET
- 8 * (cfun_fpr_bit_p (2) + cfun_fpr_bit_p (3)));
-
- cfun_frame_layout.f0_offset
+
+ cfun_frame_layout.f0_offset
= (cfun_frame_layout.f4_offset
- 8 * (cfun_fpr_bit_p (0) + cfun_fpr_bit_p (1)));
-
- cfun_frame_layout.gprs_offset
+
+ cfun_frame_layout.gprs_offset
= cfun_frame_layout.f0_offset - cfun_gprs_save_area_size;
}
&& !TARGET_TPF_PROFILING
&& cfun_frame_layout.frame_size == 0
&& !cfun_save_high_fprs_p
- && !current_function_calls_alloca
- && !current_function_stdarg)
+ && !cfun->calls_alloca
+ && !cfun->stdarg)
return;
if (!TARGET_PACKED_STACK)
if (TARGET_BACKCHAIN)
cfun_frame_layout.frame_size += UNITS_PER_WORD;
- /* No alignment trouble here because f8-f15 are only saved under
+ /* No alignment trouble here because f8-f15 are only saved under
64 bit. */
cfun_frame_layout.f8_offset = (MIN (MIN (cfun_frame_layout.f0_offset,
cfun_frame_layout.f4_offset),
for (i = 0; i < 8; i++)
if (cfun_fpr_bit_p (i))
cfun_frame_layout.frame_size += 8;
-
+
cfun_frame_layout.frame_size += cfun_gprs_save_area_size;
-
+
/* If under 31 bit an odd number of gprs has to be saved we have to adjust
the frame size to sustain 8 byte alignment of stack frames. */
cfun_frame_layout.frame_size = ((cfun_frame_layout.frame_size +
/* Try to predict whether we'll need the base register. */
base_used = cfun->machine->split_branches_pending_p
- || current_function_uses_const_pool
+ || crtl->uses_const_pool
|| (!DISP_IN_RANGE (frame_size)
&& !CONST_OK_FOR_K (frame_size));
s390_register_info (clobbered_regs);
- df_set_regs_ever_live (BASE_REGNUM,
+ df_set_regs_ever_live (BASE_REGNUM,
clobbered_regs[BASE_REGNUM] ? true : false);
- df_set_regs_ever_live (RETURN_REGNUM,
+ df_set_regs_ever_live (RETURN_REGNUM,
clobbered_regs[RETURN_REGNUM] ? true : false);
- df_set_regs_ever_live (STACK_POINTER_REGNUM,
+ df_set_regs_ever_live (STACK_POINTER_REGNUM,
clobbered_regs[STACK_POINTER_REGNUM] ? true : false);
if (cfun->machine->base_reg)
case GENERAL_REGS:
if (REGNO_PAIR_OK (regno, mode))
{
- if (TARGET_64BIT
+ if (TARGET_64BIT
|| (mode != TFmode && mode != TCmode && mode != TDmode))
return true;
- }
+ }
break;
case CC_REGS:
if (GET_MODE_CLASS (mode) == MODE_CC)
default:
return false;
}
-
+
return false;
}
}
/* Maximum number of registers to represent a value of mode MODE
- in a register of class CLASS. */
+ in a register of class RCLASS. */
bool
-s390_class_max_nregs (enum reg_class class, enum machine_mode mode)
+s390_class_max_nregs (enum reg_class rclass, enum machine_mode mode)
{
- switch (class)
+ switch (rclass)
{
case FP_REGS:
if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
/* Return true if register FROM can be eliminated via register TO. */
-bool
-s390_can_eliminate (int from, int to)
+static bool
+s390_can_eliminate (const int from, const int to)
{
/* On zSeries machines, we have not marked the base register as fixed.
Instead, we have an elimination rule BASE_REGNUM -> BASE_REGNUM.
/* Make sure we actually saved the return address. */
if (from == RETURN_ADDRESS_POINTER_REGNUM)
- if (!current_function_calls_eh_return
- && !current_function_stdarg
+ if (!crtl->calls_eh_return
+ && !cfun->stdarg
&& !cfun_frame_layout.save_return_addr_p)
return false;
switch (from)
{
case FRAME_POINTER_REGNUM:
- offset = (get_frame_size()
+ offset = (get_frame_size()
+ STACK_POINTER_OFFSET
+ crtl->outgoing_args_size);
break;
return emit_move_insn (gen_rtx_REG (DFmode, regnum), addr);
}
+/* Return true if REGNO is a global register, but not one
+ of the special ones that need to be saved/restored in anyway. */
+
+static inline bool
+global_not_special_regno_p (int regno)
+{
+ return (global_regs[regno]
+ /* These registers are special and need to be
+ restored in any case. */
+ && !(regno == STACK_POINTER_REGNUM
+ || regno == RETURN_REGNUM
+ || regno == BASE_REGNUM
+ || (flag_pic && regno == (int)PIC_OFFSET_TABLE_REGNUM)));
+}
+
/* Generate insn to save registers FIRST to LAST into
the register save area located at offset OFFSET
relative to register BASE. */
else
insn = gen_movsi (addr, gen_rtx_REG (Pmode, first));
- RTX_FRAME_RELATED_P (insn) = 1;
+ if (!global_not_special_regno_p (first))
+ RTX_FRAME_RELATED_P (insn) = 1;
return insn;
}
gen_rtx_REG (Pmode, first),
GEN_INT (last - first + 1));
- if (first <= 6 && current_function_stdarg)
+ if (first <= 6 && cfun->stdarg)
for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
{
rtx mem = XEXP (XVECEXP (PATTERN (insn), 0, i), 0);
-
+
if (first + i <= 6)
set_mem_alias_set (mem, get_varargs_alias_set ());
}
set, even if it does not. Therefore we emit a new pattern
without those registers as REG_FRAME_RELATED_EXPR note. */
- if (first >= 6)
+ if (first >= 6 && !global_not_special_regno_p (first))
{
rtx pat = PATTERN (insn);
for (i = 0; i < XVECLEN (pat, 0); i++)
- if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
+ if (GET_CODE (XVECEXP (pat, 0, i)) == SET
+ && !global_not_special_regno_p (REGNO (SET_SRC (XVECEXP (pat,
+ 0, i)))))
RTX_FRAME_RELATED_P (XVECEXP (pat, 0, i)) = 1;
RTX_FRAME_RELATED_P (insn) = 1;
}
else if (last >= 6)
{
- addr = plus_constant (base, offset + (6 - first) * UNITS_PER_WORD);
+ int start;
+
+ for (start = first >= 6 ? first : 6; start <= last; start++)
+ if (!global_not_special_regno_p (start))
+ break;
+
+ if (start > last)
+ return insn;
+
+ addr = plus_constant (base, offset + (start - first) * UNITS_PER_WORD);
note = gen_store_multiple (gen_rtx_MEM (Pmode, addr),
- gen_rtx_REG (Pmode, 6),
- GEN_INT (last - 6 + 1));
+ gen_rtx_REG (Pmode, start),
+ GEN_INT (last - start + 1));
note = PATTERN (note);
- REG_NOTES (insn) =
- gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
- note, REG_NOTES (insn));
+ add_reg_note (insn, REG_FRAME_RELATED_EXPR, note);
for (i = 0; i < XVECLEN (note, 0); i++)
- if (GET_CODE (XVECEXP (note, 0, i)) == SET)
+ if (GET_CODE (XVECEXP (note, 0, i)) == SET
+ && !global_not_special_regno_p (REGNO (SET_SRC (XVECEXP (note,
+ 0, i)))))
RTX_FRAME_RELATED_P (XVECEXP (note, 0, i)) = 1;
RTX_FRAME_RELATED_P (insn) = 1;
/* Choose best register to use for temp use within prologue.
See below for why TPF must use the register 1. */
- if (!has_hard_reg_initial_val (Pmode, RETURN_REGNUM)
- && !current_function_is_leaf
+ if (!has_hard_reg_initial_val (Pmode, RETURN_REGNUM)
+ && !current_function_is_leaf
&& !TARGET_TPF_PROFILING)
temp_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
else
/* Save call saved gprs. */
if (cfun_frame_layout.first_save_gpr != -1)
{
- insn = save_gprs (stack_pointer_rtx,
- cfun_frame_layout.gprs_offset +
- UNITS_PER_WORD * (cfun_frame_layout.first_save_gpr
+ insn = save_gprs (stack_pointer_rtx,
+ cfun_frame_layout.gprs_offset +
+ UNITS_PER_WORD * (cfun_frame_layout.first_save_gpr
- cfun_frame_layout.first_save_gpr_slot),
- cfun_frame_layout.first_save_gpr,
+ cfun_frame_layout.first_save_gpr,
cfun_frame_layout.last_save_gpr);
emit_insn (insn);
}
if (cfun_fpr_bit_p (i))
{
insn = save_fpr (stack_pointer_rtx, offset, i + 16);
-
+
RTX_FRAME_RELATED_P (insn) = 1;
offset -= 8;
}
if (offset >= cfun_frame_layout.f8_offset)
next_fpr = i + 16;
}
-
+
if (!TARGET_PACKED_STACK)
next_fpr = cfun_save_high_fprs_p ? 31 : 0;
if (cfun_frame_layout.frame_size > 0)
{
rtx frame_off = GEN_INT (-cfun_frame_layout.frame_size);
+ rtx real_frame_off;
if (s390_stack_size)
{
}
else
{
- HOST_WIDE_INT stack_check_mask = ((s390_stack_size - 1)
- & ~(stack_guard - 1));
- rtx t = gen_rtx_AND (Pmode, stack_pointer_rtx,
- GEN_INT (stack_check_mask));
- if (TARGET_64BIT)
- gen_cmpdi (t, const0_rtx);
+ /* stack_guard has to be smaller than s390_stack_size.
+ Otherwise we would emit an AND with zero which would
+ not match the test under mask pattern. */
+ if (stack_guard >= s390_stack_size)
+ {
+ warning (0, "frame size of function %qs is "
+ HOST_WIDE_INT_PRINT_DEC
+ " bytes which is more than half the stack size. "
+ "The dynamic check would not be reliable. "
+ "No check emitted for this function.",
+ current_function_name(),
+ cfun_frame_layout.frame_size);
+ }
else
- gen_cmpsi (t, const0_rtx);
-
- emit_insn (gen_conditional_trap (gen_rtx_EQ (CCmode,
- gen_rtx_REG (CCmode,
- CC_REGNUM),
- const0_rtx),
- const0_rtx));
+ {
+ HOST_WIDE_INT stack_check_mask = ((s390_stack_size - 1)
+ & ~(stack_guard - 1));
+
+ rtx t = gen_rtx_AND (Pmode, stack_pointer_rtx,
+ GEN_INT (stack_check_mask));
+ if (TARGET_64BIT)
+ emit_insn (gen_ctrapdi4 (gen_rtx_EQ (VOIDmode,
+ t, const0_rtx),
+ t, const0_rtx, const0_rtx));
+ else
+ emit_insn (gen_ctrapsi4 (gen_rtx_EQ (VOIDmode,
+ t, const0_rtx),
+ t, const0_rtx, const0_rtx));
+ }
}
}
- if (s390_warn_framesize > 0
+ if (s390_warn_framesize > 0
&& cfun_frame_layout.frame_size >= s390_warn_framesize)
- warning (0, "frame size of %qs is " HOST_WIDE_INT_PRINT_DEC " bytes",
+ warning (0, "frame size of %qs is " HOST_WIDE_INT_PRINT_DEC " bytes",
current_function_name (), cfun_frame_layout.frame_size);
if (s390_warn_dynamicstack_p && cfun->calls_alloca)
if (DISP_IN_RANGE (INTVAL (frame_off)))
{
insn = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
- gen_rtx_PLUS (Pmode, stack_pointer_rtx,
+ gen_rtx_PLUS (Pmode, stack_pointer_rtx,
frame_off));
insn = emit_insn (insn);
}
}
RTX_FRAME_RELATED_P (insn) = 1;
- REG_NOTES (insn) =
- gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
- gen_rtx_SET (VOIDmode, stack_pointer_rtx,
- gen_rtx_PLUS (Pmode, stack_pointer_rtx,
- GEN_INT (-cfun_frame_layout.frame_size))),
- REG_NOTES (insn));
+ real_frame_off = GEN_INT (-cfun_frame_layout.frame_size);
+ add_reg_note (insn, REG_FRAME_RELATED_EXPR,
+ gen_rtx_SET (VOIDmode, stack_pointer_rtx,
+ gen_rtx_PLUS (Pmode, stack_pointer_rtx,
+ real_frame_off)));
/* Set backchain. */
if (TARGET_BACKCHAIN)
{
if (cfun_frame_layout.backchain_offset)
- addr = gen_rtx_MEM (Pmode,
- plus_constant (stack_pointer_rtx,
+ addr = gen_rtx_MEM (Pmode,
+ plus_constant (stack_pointer_rtx,
cfun_frame_layout.backchain_offset));
else
- addr = gen_rtx_MEM (Pmode, stack_pointer_rtx);
+ addr = gen_rtx_MEM (Pmode, stack_pointer_rtx);
set_mem_alias_set (addr, get_frame_alias_set ());
insn = emit_insn (gen_move_insn (addr, temp_reg));
}
if (TARGET_BACKCHAIN && flag_non_call_exceptions)
{
addr = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode));
- emit_insn (gen_rtx_CLOBBER (VOIDmode, addr));
+ emit_clobber (addr);
}
}
moved below the use of the stack slots. */
s390_emit_stack_tie ();
- insn = emit_insn (gen_add2_insn (temp_reg,
+ insn = emit_insn (gen_add2_insn (temp_reg,
GEN_INT (cfun_frame_layout.f8_offset)));
offset = 0;
cfun_frame_layout.frame_size
+ cfun_frame_layout.f8_offset
+ offset);
-
+
insn = save_fpr (temp_reg, offset, i);
offset += 8;
RTX_FRAME_RELATED_P (insn) = 1;
- REG_NOTES (insn) =
- gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
- gen_rtx_SET (VOIDmode,
- gen_rtx_MEM (DFmode, addr),
- gen_rtx_REG (DFmode, i)),
- REG_NOTES (insn));
+ add_reg_note (insn, REG_FRAME_RELATED_EXPR,
+ gen_rtx_SET (VOIDmode,
+ gen_rtx_MEM (DFmode, addr),
+ gen_rtx_REG (DFmode, i)));
}
}
void
s390_emit_epilogue (bool sibcall)
{
- rtx frame_pointer, return_reg;
+ rtx frame_pointer, return_reg, cfa_restores = NULL_RTX;
int area_bottom, area_top, offset = 0;
int next_offset;
rtvec p;
/* Check whether to use frame or stack pointer for restore. */
- frame_pointer = (frame_pointer_needed
+ frame_pointer = (frame_pointer_needed
? hard_frame_pointer_rtx : stack_pointer_rtx);
s390_frame_area (&area_bottom, &area_top);
}
else
{
- rtx insn, frame_off;
+ rtx insn, frame_off, cfa;
offset = area_bottom < 0 ? -area_bottom : 0;
frame_off = GEN_INT (cfun_frame_layout.frame_size - offset);
+ cfa = gen_rtx_SET (VOIDmode, frame_pointer,
+ gen_rtx_PLUS (Pmode, frame_pointer, frame_off));
if (DISP_IN_RANGE (INTVAL (frame_off)))
{
insn = gen_rtx_SET (VOIDmode, frame_pointer,
insn = emit_insn (gen_add2_insn (frame_pointer, frame_off));
annotate_constant_pool_refs (&PATTERN (insn));
}
+ add_reg_note (insn, REG_CFA_ADJUST_CFA, cfa);
+ RTX_FRAME_RELATED_P (insn) = 1;
}
/* Restore call saved fprs. */
{
restore_fpr (frame_pointer,
offset + next_offset, i);
+ cfa_restores
+ = alloc_reg_note (REG_CFA_RESTORE,
+ gen_rtx_REG (DFmode, i), cfa_restores);
next_offset += 8;
}
}
}
-
+
}
else
{
{
restore_fpr (frame_pointer,
offset + next_offset, i);
+ cfa_restores
+ = alloc_reg_note (REG_CFA_RESTORE,
+ gen_rtx_REG (DFmode, i), cfa_restores);
next_offset += 8;
}
else if (!TARGET_PACKED_STACK)
next_offset += 8;
}
-
+
}
/* Return register. */
i <= cfun_frame_layout.last_restore_gpr;
i++)
{
- /* These registers are special and need to be
- restored in any case. */
- if (i == STACK_POINTER_REGNUM
- || i == RETURN_REGNUM
- || i == BASE_REGNUM
- || (flag_pic && i == (int)PIC_OFFSET_TABLE_REGNUM))
- continue;
-
- if (global_regs[i])
+ if (global_not_special_regno_p (i))
{
addr = plus_constant (frame_pointer,
- offset + cfun_frame_layout.gprs_offset
+ offset + cfun_frame_layout.gprs_offset
+ (i - cfun_frame_layout.first_save_gpr_slot)
* UNITS_PER_WORD);
addr = gen_rtx_MEM (Pmode, addr);
set_mem_alias_set (addr, get_frame_alias_set ());
emit_move_insn (addr, gen_rtx_REG (Pmode, i));
}
+ else
+ cfa_restores
+ = alloc_reg_note (REG_CFA_RESTORE,
+ gen_rtx_REG (Pmode, i), cfa_restores);
}
if (! sibcall)
addr = plus_constant (frame_pointer,
offset + cfun_frame_layout.gprs_offset
- + (RETURN_REGNUM
+ + (RETURN_REGNUM
- cfun_frame_layout.first_save_gpr_slot)
* UNITS_PER_WORD);
addr = gen_rtx_MEM (Pmode, addr);
insn = restore_gprs (frame_pointer,
offset + cfun_frame_layout.gprs_offset
- + (cfun_frame_layout.first_restore_gpr
+ + (cfun_frame_layout.first_restore_gpr
- cfun_frame_layout.first_save_gpr_slot)
* UNITS_PER_WORD,
cfun_frame_layout.first_restore_gpr,
cfun_frame_layout.last_restore_gpr);
- emit_insn (insn);
+ insn = emit_insn (insn);
+ REG_NOTES (insn) = cfa_restores;
+ add_reg_note (insn, REG_CFA_DEF_CFA,
+ plus_constant (stack_pointer_rtx, STACK_POINTER_OFFSET));
+ RTX_FRAME_RELATED_P (insn) = 1;
}
if (! sibcall)
return true;
}
+/* Function arguments and return values are promoted to word size. */
+
+static enum machine_mode
+s390_promote_function_mode (const_tree type, enum machine_mode mode,
+ int *punsignedp,
+ const_tree fntype ATTRIBUTE_UNUSED,
+ int for_return ATTRIBUTE_UNUSED)
+{
+ if (INTEGRAL_MODE_P (mode)
+ && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
+ {
+ if (POINTER_TYPE_P (type))
+ *punsignedp = POINTERS_EXTEND_UNSIGNED;
+ return Pmode;
+ }
+
+ return mode;
+}
+
/* Define where to return a (scalar) value of type TYPE.
If TYPE is null, define where to return a (scalar)
value of mode MODE from a libcall. */
rtx
-s390_function_value (const_tree type, enum machine_mode mode)
+s390_function_value (const_tree type, const_tree fn, enum machine_mode mode)
{
if (type)
{
int unsignedp = TYPE_UNSIGNED (type);
- mode = promote_mode (type, TYPE_MODE (type), &unsignedp, 1);
+ mode = promote_function_mode (type, TYPE_MODE (type), &unsignedp, fn, 1);
}
gcc_assert (GET_MODE_CLASS (mode) == MODE_INT || SCALAR_FLOAT_MODE_P (mode));
record = lang_hooks.types.make_type (RECORD_TYPE);
type_decl =
- build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
+ build_decl (BUILTINS_LOCATION,
+ TYPE_DECL, get_identifier ("__va_list_tag"), record);
- f_gpr = build_decl (FIELD_DECL, get_identifier ("__gpr"),
+ f_gpr = build_decl (BUILTINS_LOCATION,
+ FIELD_DECL, get_identifier ("__gpr"),
long_integer_type_node);
- f_fpr = build_decl (FIELD_DECL, get_identifier ("__fpr"),
+ f_fpr = build_decl (BUILTINS_LOCATION,
+ FIELD_DECL, get_identifier ("__fpr"),
long_integer_type_node);
- f_ovf = build_decl (FIELD_DECL, get_identifier ("__overflow_arg_area"),
+ f_ovf = build_decl (BUILTINS_LOCATION,
+ FIELD_DECL, get_identifier ("__overflow_arg_area"),
ptr_type_node);
- f_sav = build_decl (FIELD_DECL, get_identifier ("__reg_save_area"),
+ f_sav = build_decl (BUILTINS_LOCATION,
+ FIELD_DECL, get_identifier ("__reg_save_area"),
ptr_type_node);
va_list_gpr_counter_field = f_gpr;
if (cfun->va_list_gpr_size)
{
- t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (gpr), gpr,
- build_int_cst (NULL_TREE, n_gpr));
+ t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
+ build_int_cst (NULL_TREE, n_gpr));
TREE_SIDE_EFFECTS (t) = 1;
expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
}
if (cfun->va_list_fpr_size)
{
- t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (fpr), fpr,
+ t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
build_int_cst (NULL_TREE, n_fpr));
TREE_SIDE_EFFECTS (t) = 1;
expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), t, size_int (off));
- t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (ovf), ovf, t);
+ t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
TREE_SIDE_EFFECTS (t) = 1;
expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
}
t = make_tree (TREE_TYPE (sav), return_address_pointer_rtx);
t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (sav), t,
size_int (-RETURN_REGNUM * UNITS_PER_WORD));
-
- t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (sav), sav, t);
+
+ t = build2 (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
TREE_SIDE_EFFECTS (t) = 1;
expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
}
} */
static tree
-s390_gimplify_va_arg (tree valist, tree type, tree *pre_p,
- tree *post_p ATTRIBUTE_UNUSED)
+s390_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
+ gimple_seq *post_p ATTRIBUTE_UNUSED)
{
tree f_gpr, f_fpr, f_ovf, f_sav;
tree gpr, fpr, ovf, sav, reg, t, u;
valist = build_va_arg_indirect_ref (valist);
gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
- ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
+ /* The tree for args* cannot be shared between gpr/fpr and ovf since
+ both appear on a lhs. */
+ valist = unshare_expr (valist);
+ ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
+
size = int_size_in_bytes (type);
if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
/* Pull the value out of the saved registers ... */
- lab_false = create_artificial_label ();
- lab_over = create_artificial_label ();
+ lab_false = create_artificial_label (UNKNOWN_LOCATION);
+ lab_over = create_artificial_label (UNKNOWN_LOCATION);
addr = create_tmp_var (ptr_type_node, "addr");
- DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
t = fold_convert (TREE_TYPE (reg), size_int (max_reg));
t = build2 (GT_EXPR, boolean_type_node, reg, t);
t = build3 (COND_EXPR, void_type_node, t, u, NULL_TREE);
gimplify_and_add (t, pre_p);
- t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav,
+ t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav,
size_int (sav_ofs));
- u = build2 (MULT_EXPR, TREE_TYPE (reg), reg,
+ u = build2 (MULT_EXPR, TREE_TYPE (reg), reg,
fold_convert (TREE_TYPE (reg), size_int (sav_scale)));
t = build2 (POINTER_PLUS_EXPR, ptr_type_node, t, fold_convert (sizetype, u));
- t = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
- gimplify_and_add (t, pre_p);
+ gimplify_assign (addr, t, pre_p);
- t = build1 (GOTO_EXPR, void_type_node, lab_over);
- gimplify_and_add (t, pre_p);
+ gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
- t = build1 (LABEL_EXPR, void_type_node, lab_false);
- append_to_statement_list (t, pre_p);
+ gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
/* ... Otherwise out of the overflow area. */
t = ovf;
if (size < UNITS_PER_WORD)
- t = build2 (POINTER_PLUS_EXPR, ptr_type_node, t,
+ t = build2 (POINTER_PLUS_EXPR, ptr_type_node, t,
size_int (UNITS_PER_WORD - size));
gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
- u = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
- gimplify_and_add (u, pre_p);
+ gimplify_assign (addr, t, pre_p);
- t = build2 (POINTER_PLUS_EXPR, ptr_type_node, t,
+ t = build2 (POINTER_PLUS_EXPR, ptr_type_node, t,
size_int (size));
- t = build2 (GIMPLE_MODIFY_STMT, ptr_type_node, ovf, t);
- gimplify_and_add (t, pre_p);
+ gimplify_assign (ovf, t, pre_p);
- t = build1 (LABEL_EXPR, void_type_node, lab_over);
- append_to_statement_list (t, pre_p);
+ gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
/* Increment register save count. */
if (indirect_p)
{
- t = build_pointer_type (build_pointer_type (type));
+ t = build_pointer_type_for_mode (build_pointer_type (type),
+ ptr_mode, true);
addr = fold_convert (t, addr);
addr = build_va_arg_indirect_ref (addr);
}
else
{
- t = build_pointer_type (type);
+ t = build_pointer_type_for_mode (type, ptr_mode, true);
addr = fold_convert (t, addr);
}
S390_BUILTIN_max
};
-static unsigned int const code_for_builtin_64[S390_BUILTIN_max] = {
+static enum insn_code const code_for_builtin_64[S390_BUILTIN_max] = {
CODE_FOR_get_tp_64,
CODE_FOR_set_tp_64
};
-static unsigned int const code_for_builtin_31[S390_BUILTIN_max] = {
+static enum insn_code const code_for_builtin_31[S390_BUILTIN_max] = {
CODE_FOR_get_tp_31,
CODE_FOR_set_tp_31
};
{
#define MAX_ARGS 2
- unsigned int const *code_for_builtin =
+ enum insn_code const *code_for_builtin =
TARGET_64BIT ? code_for_builtin_64 : code_for_builtin_31;
tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
insn_op = &insn_data[icode].operand[arity + nonvoid];
- op[arity] = expand_expr (arg, NULL_RTX, insn_op->mode, 0);
+ op[arity] = expand_expr (arg, NULL_RTX, insn_op->mode, EXPAND_NORMAL);
if (!(*insn_op->predicate) (op[arity], insn_op->mode))
op[arity] = copy_to_mode_reg (insn_op->mode, op[arity]);
On S/390, we use gpr 1 internally in the trampoline code;
gpr 0 is used to hold the static chain. */
-void
-s390_trampoline_template (FILE *file)
+static void
+s390_asm_trampoline_template (FILE *file)
{
rtx op[2];
op[0] = gen_rtx_REG (Pmode, 0);
FNADDR is an RTX for the address of the function's pure code.
CXT is an RTX for the static chain value for the function. */
-void
-s390_initialize_trampoline (rtx addr, rtx fnaddr, rtx cxt)
+static void
+s390_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
{
- emit_move_insn (gen_rtx_MEM (Pmode,
- memory_address (Pmode,
- plus_constant (addr, (TARGET_64BIT ? 16 : 8)))), cxt);
- emit_move_insn (gen_rtx_MEM (Pmode,
- memory_address (Pmode,
- plus_constant (addr, (TARGET_64BIT ? 24 : 12)))), fnaddr);
+ rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
+ rtx mem;
+
+ emit_block_move (m_tramp, assemble_trampoline_template (),
+ GEN_INT (2*UNITS_PER_WORD), BLOCK_OP_NORMAL);
+
+ mem = adjust_address (m_tramp, Pmode, 2*UNITS_PER_WORD);
+ emit_move_insn (mem, cxt);
+ mem = adjust_address (m_tramp, Pmode, 3*UNITS_PER_WORD);
+ emit_move_insn (mem, fnaddr);
}
/* Output assembler code to FILE to increment profiler label # LABELNO
{
default_encode_section_info (decl, rtl, first);
- /* If a variable has a forced alignment to < 2 bytes, mark it with
- SYMBOL_FLAG_ALIGN1 to prevent it from being used as LARL operand. */
- if (TREE_CODE (decl) == VAR_DECL
- && DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) < 16)
- SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_ALIGN1;
+ if (TREE_CODE (decl) == VAR_DECL)
+ {
+ /* If a variable has a forced alignment to < 2 bytes, mark it
+ with SYMBOL_FLAG_ALIGN1 to prevent it from being used as LARL
+ operand. */
+ if (DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) < 16)
+ SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_ALIGN1;
+ if (!DECL_SIZE (decl)
+ || !DECL_ALIGN (decl)
+ || !host_integerp (DECL_SIZE (decl), 0)
+ || (DECL_ALIGN (decl) <= 64
+ && DECL_ALIGN (decl) != tree_low_cst (DECL_SIZE (decl), 0)))
+ SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_NOT_NATURALLY_ALIGNED;
+ }
+
+ /* Literal pool references don't have a decl so they are handled
+ differently here. We rely on the information in the MEM_ALIGN
+ entry to decide upon natural alignment. */
+ if (MEM_P (rtl)
+ && GET_CODE (XEXP (rtl, 0)) == SYMBOL_REF
+ && TREE_CONSTANT_POOL_ADDRESS_P (XEXP (rtl, 0))
+ && (MEM_ALIGN (rtl) == 0
+ || GET_MODE_BITSIZE (GET_MODE (rtl)) == 0
+ || MEM_ALIGN (rtl) < GET_MODE_BITSIZE (GET_MODE (rtl))))
+ SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_NOT_NATURALLY_ALIGNED;
}
/* Output thunk to FILE that implements a C++ virtual function call (with
rtx op[10];
int nonlocal = 0;
+ /* Make sure unwind info is emitted for the thunk if needed. */
+ final_start_function (emit_barrier (), file, 1);
+
/* Operand 0 is the target function. */
op[0] = XEXP (DECL_RTL (function), 0);
if (flag_pic && !SYMBOL_REF_LOCAL_P (op[0]))
output_asm_insn (".long\t%3", op);
}
}
+ final_end_function ();
}
static bool
{
*p1 = CC_REGNUM;
*p2 = INVALID_REGNUM;
-
+
return true;
}
/* If all special registers are in fact used, there's nothing we
can do, so no point in walking the insn list. */
- if (cfun_frame_layout.first_save_gpr <= BASE_REGNUM
+ if (cfun_frame_layout.first_save_gpr <= BASE_REGNUM
&& cfun_frame_layout.last_save_gpr >= BASE_REGNUM
- && (TARGET_CPU_ZARCH
- || (cfun_frame_layout.first_save_gpr <= RETURN_REGNUM
+ && (TARGET_CPU_ZARCH
+ || (cfun_frame_layout.first_save_gpr <= RETURN_REGNUM
&& cfun_frame_layout.last_save_gpr >= RETURN_REGNUM)))
return;
if (cfun_frame_layout.first_save_gpr != -1)
{
- new_insn = save_gprs (base,
+ new_insn = save_gprs (base,
off + (cfun_frame_layout.first_save_gpr
- - first) * UNITS_PER_WORD,
+ - first) * UNITS_PER_WORD,
cfun_frame_layout.first_save_gpr,
cfun_frame_layout.last_save_gpr);
new_insn = emit_insn_before (new_insn, insn);
if (cfun_frame_layout.first_restore_gpr != -1)
{
- new_insn = restore_gprs (base,
+ new_insn = restore_gprs (base,
off + (cfun_frame_layout.first_restore_gpr
- - first) * UNITS_PER_WORD,
+ - first) * UNITS_PER_WORD,
cfun_frame_layout.first_restore_gpr,
cfun_frame_layout.last_restore_gpr);
new_insn = emit_insn_before (new_insn, insn);
}
}
+/* On z10 the dynamic branch prediction must see the backward jump in
+ a window of 384 bytes. If not it falls back to the static
+ prediction. This function rearranges the loop backward branch in a
+ way which makes the static prediction always correct. The function
+ returns true if it added an instruction. */
+static bool
+s390_z10_fix_long_loop_prediction (rtx insn)
+{
+ rtx set = single_set (insn);
+ rtx code_label, label_ref, new_label;
+ rtx uncond_jump;
+ rtx cur_insn;
+ rtx tmp;
+ int distance;
+
+ /* This will exclude branch on count and branch on index patterns
+ since these are correctly statically predicted. */
+ if (!set
+ || SET_DEST (set) != pc_rtx
+ || GET_CODE (SET_SRC(set)) != IF_THEN_ELSE)
+ return false;
+
+ label_ref = (GET_CODE (XEXP (SET_SRC (set), 1)) == LABEL_REF ?
+ XEXP (SET_SRC (set), 1) : XEXP (SET_SRC (set), 2));
+
+ gcc_assert (GET_CODE (label_ref) == LABEL_REF);
+
+ code_label = XEXP (label_ref, 0);
+
+ if (INSN_ADDRESSES (INSN_UID (code_label)) == -1
+ || INSN_ADDRESSES (INSN_UID (insn)) == -1
+ || (INSN_ADDRESSES (INSN_UID (insn))
+ - INSN_ADDRESSES (INSN_UID (code_label)) < Z10_PREDICT_DISTANCE))
+ return false;
+
+ for (distance = 0, cur_insn = PREV_INSN (insn);
+ distance < Z10_PREDICT_DISTANCE - 6;
+ distance += get_attr_length (cur_insn), cur_insn = PREV_INSN (cur_insn))
+ if (!cur_insn || JUMP_P (cur_insn) || LABEL_P (cur_insn))
+ return false;
+
+ new_label = gen_label_rtx ();
+ uncond_jump = emit_jump_insn_after (
+ gen_rtx_SET (VOIDmode, pc_rtx,
+ gen_rtx_LABEL_REF (VOIDmode, code_label)),
+ insn);
+ emit_label_after (new_label, uncond_jump);
+
+ tmp = XEXP (SET_SRC (set), 1);
+ XEXP (SET_SRC (set), 1) = XEXP (SET_SRC (set), 2);
+ XEXP (SET_SRC (set), 2) = tmp;
+ INSN_CODE (insn) = -1;
+
+ XEXP (label_ref, 0) = new_label;
+ JUMP_LABEL (insn) = new_label;
+ JUMP_LABEL (uncond_jump) = code_label;
+
+ return true;
+}
+
+/* Returns 1 if INSN reads the value of REG for purposes not related
+ to addressing of memory, and 0 otherwise. */
+static int
+s390_non_addr_reg_read_p (rtx reg, rtx insn)
+{
+ return reg_referenced_p (reg, PATTERN (insn))
+ && !reg_used_in_mem_p (REGNO (reg), PATTERN (insn));
+}
+
+/* Starting from INSN find_cond_jump looks downwards in the insn
+ stream for a single jump insn which is the last user of the
+ condition code set in INSN. */
+static rtx
+find_cond_jump (rtx insn)
+{
+ for (; insn; insn = NEXT_INSN (insn))
+ {
+ rtx ite, cc;
+
+ if (LABEL_P (insn))
+ break;
+
+ if (!JUMP_P (insn))
+ {
+ if (reg_mentioned_p (gen_rtx_REG (CCmode, CC_REGNUM), insn))
+ break;
+ continue;
+ }
+
+ /* This will be triggered by a return. */
+ if (GET_CODE (PATTERN (insn)) != SET)
+ break;
+
+ gcc_assert (SET_DEST (PATTERN (insn)) == pc_rtx);
+ ite = SET_SRC (PATTERN (insn));
+
+ if (GET_CODE (ite) != IF_THEN_ELSE)
+ break;
+
+ cc = XEXP (XEXP (ite, 0), 0);
+ if (!REG_P (cc) || !CC_REGNO_P (REGNO (cc)))
+ break;
+
+ if (find_reg_note (insn, REG_DEAD, cc))
+ return insn;
+ break;
+ }
+
+ return NULL_RTX;
+}
+
+/* Swap the condition in COND and the operands in OP0 and OP1 so that
+ the semantics does not change. If NULL_RTX is passed as COND the
+ function tries to find the conditional jump starting with INSN. */
+static void
+s390_swap_cmp (rtx cond, rtx *op0, rtx *op1, rtx insn)
+{
+ rtx tmp = *op0;
+
+ if (cond == NULL_RTX)
+ {
+ rtx jump = find_cond_jump (NEXT_INSN (insn));
+ jump = jump ? single_set (jump) : NULL_RTX;
+
+ if (jump == NULL_RTX)
+ return;
+
+ cond = XEXP (XEXP (jump, 1), 0);
+ }
+
+ *op0 = *op1;
+ *op1 = tmp;
+ PUT_CODE (cond, swap_condition (GET_CODE (cond)));
+}
+
+/* On z10, instructions of the compare-and-branch family have the
+ property to access the register occurring as second operand with
+ its bits complemented. If such a compare is grouped with a second
+ instruction that accesses the same register non-complemented, and
+ if that register's value is delivered via a bypass, then the
+ pipeline recycles, thereby causing significant performance decline.
+ This function locates such situations and exchanges the two
+ operands of the compare. The function return true whenever it
+ added an insn. */
+static bool
+s390_z10_optimize_cmp (rtx insn)
+{
+ rtx prev_insn, next_insn;
+ bool insn_added_p = false;
+ rtx cond, *op0, *op1;
+
+ if (GET_CODE (PATTERN (insn)) == PARALLEL)
+ {
+ /* Handle compare and branch and branch on count
+ instructions. */
+ rtx pattern = single_set (insn);
+
+ if (!pattern
+ || SET_DEST (pattern) != pc_rtx
+ || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE)
+ return false;
+
+ cond = XEXP (SET_SRC (pattern), 0);
+ op0 = &XEXP (cond, 0);
+ op1 = &XEXP (cond, 1);
+ }
+ else if (GET_CODE (PATTERN (insn)) == SET)
+ {
+ rtx src, dest;
+
+ /* Handle normal compare instructions. */
+ src = SET_SRC (PATTERN (insn));
+ dest = SET_DEST (PATTERN (insn));
+
+ if (!REG_P (dest)
+ || !CC_REGNO_P (REGNO (dest))
+ || GET_CODE (src) != COMPARE)
+ return false;
+
+ /* s390_swap_cmp will try to find the conditional
+ jump when passing NULL_RTX as condition. */
+ cond = NULL_RTX;
+ op0 = &XEXP (src, 0);
+ op1 = &XEXP (src, 1);
+ }
+ else
+ return false;
+
+ if (!REG_P (*op0) || !REG_P (*op1))
+ return false;
+
+ if (GET_MODE_CLASS (GET_MODE (*op0)) != MODE_INT)
+ return false;
+
+ /* Swap the COMPARE arguments and its mask if there is a
+ conflicting access in the previous insn. */
+ prev_insn = prev_active_insn (insn);
+ if (prev_insn != NULL_RTX && INSN_P (prev_insn)
+ && reg_referenced_p (*op1, PATTERN (prev_insn)))
+ s390_swap_cmp (cond, op0, op1, insn);
+
+ /* Check if there is a conflict with the next insn. If there
+ was no conflict with the previous insn, then swap the
+ COMPARE arguments and its mask. If we already swapped
+ the operands, or if swapping them would cause a conflict
+ with the previous insn, issue a NOP after the COMPARE in
+ order to separate the two instuctions. */
+ next_insn = next_active_insn (insn);
+ if (next_insn != NULL_RTX && INSN_P (next_insn)
+ && s390_non_addr_reg_read_p (*op1, next_insn))
+ {
+ if (prev_insn != NULL_RTX && INSN_P (prev_insn)
+ && s390_non_addr_reg_read_p (*op0, prev_insn))
+ {
+ if (REGNO (*op1) == 0)
+ emit_insn_after (gen_nop1 (), insn);
+ else
+ emit_insn_after (gen_nop (), insn);
+ insn_added_p = true;
+ }
+ else
+ s390_swap_cmp (cond, op0, op1, insn);
+ }
+ return insn_added_p;
+}
+
/* Perform machine-dependent processing. */
static void
machine_dependent_reorg might confuse insn length counts. */
split_all_insns_noflow ();
- /* From here on decomposed literal pool addresses must be accepted. */
- cfun->machine->decomposed_literal_pool_addresses_ok_p = true;
-
/* Install the main literal pool and the associated base
register load insns.
/* Try to optimize prologue and epilogue further. */
s390_optimize_prologue ();
+
+ /* Walk over the insns and do some z10 specific changes. */
+ if (s390_tune == PROCESSOR_2097_Z10)
+ {
+ rtx insn;
+ bool insn_added_p = false;
+
+ /* The insn lengths and addresses have to be up to date for the
+ following manipulations. */
+ shorten_branches (get_insns ());
+
+ for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+ {
+ if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
+ continue;
+
+ if (JUMP_P (insn))
+ insn_added_p |= s390_z10_fix_long_loop_prediction (insn);
+
+ if (GET_CODE (PATTERN (insn)) == PARALLEL
+ || GET_CODE (PATTERN (insn)) == SET)
+ insn_added_p |= s390_z10_optimize_cmp (insn);
+ }
+
+ /* Adjust branches if we added new instructions. */
+ if (insn_added_p)
+ shorten_branches (get_insns ());
+ }
+}
+
+/* Return true if INSN is a fp load insn writing register REGNO. */
+static inline bool
+s390_fpload_toreg (rtx insn, unsigned int regno)
+{
+ rtx set;
+ enum attr_type flag = s390_safe_attr_type (insn);
+
+ if (flag != TYPE_FLOADSF && flag != TYPE_FLOADDF)
+ return false;
+
+ set = single_set (insn);
+
+ if (set == NULL_RTX)
+ return false;
+
+ if (!REG_P (SET_DEST (set)) || !MEM_P (SET_SRC (set)))
+ return false;
+
+ if (REGNO (SET_DEST (set)) != regno)
+ return false;
+
+ return true;
+}
+
+/* This value describes the distance to be avoided between an
+ aritmetic fp instruction and an fp load writing the same register.
+ Z10_EARLYLOAD_DISTANCE - 1 as well as Z10_EARLYLOAD_DISTANCE + 1 is
+ fine but the exact value has to be avoided. Otherwise the FP
+ pipeline will throw an exception causing a major penalty. */
+#define Z10_EARLYLOAD_DISTANCE 7
+
+/* Rearrange the ready list in order to avoid the situation described
+ for Z10_EARLYLOAD_DISTANCE. A problematic load instruction is
+ moved to the very end of the ready list. */
+static void
+s390_z10_prevent_earlyload_conflicts (rtx *ready, int *nready_p)
+{
+ unsigned int regno;
+ int nready = *nready_p;
+ rtx tmp;
+ int i;
+ rtx insn;
+ rtx set;
+ enum attr_type flag;
+ int distance;
+
+ /* Skip DISTANCE - 1 active insns. */
+ for (insn = last_scheduled_insn, distance = Z10_EARLYLOAD_DISTANCE - 1;
+ distance > 0 && insn != NULL_RTX;
+ distance--, insn = prev_active_insn (insn))
+ if (CALL_P (insn) || JUMP_P (insn))
+ return;
+
+ if (insn == NULL_RTX)
+ return;
+
+ set = single_set (insn);
+
+ if (set == NULL_RTX || !REG_P (SET_DEST (set))
+ || GET_MODE_CLASS (GET_MODE (SET_DEST (set))) != MODE_FLOAT)
+ return;
+
+ flag = s390_safe_attr_type (insn);
+
+ if (flag == TYPE_FLOADSF || flag == TYPE_FLOADDF)
+ return;
+
+ regno = REGNO (SET_DEST (set));
+ i = nready - 1;
+
+ while (!s390_fpload_toreg (ready[i], regno) && i > 0)
+ i--;
+
+ if (!i)
+ return;
+
+ tmp = ready[i];
+ memmove (&ready[1], &ready[0], sizeof (rtx) * i);
+ ready[0] = tmp;
+}
+
+/* This function is called via hook TARGET_SCHED_REORDER before
+ issueing one insn from list READY which contains *NREADYP entries.
+ For target z10 it reorders load instructions to avoid early load
+ conflicts in the floating point pipeline */
+static int
+s390_sched_reorder (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
+ rtx *ready, int *nreadyp, int clock ATTRIBUTE_UNUSED)
+{
+ if (s390_tune == PROCESSOR_2097_Z10)
+ if (reload_completed && *nreadyp > 1)
+ s390_z10_prevent_earlyload_conflicts (ready, nreadyp);
+
+ return s390_issue_rate ();
+}
+
+/* This function is called via hook TARGET_SCHED_VARIABLE_ISSUE after
+ the scheduler has issued INSN. It stores the last issued insn into
+ last_scheduled_insn in order to make it available for
+ s390_sched_reorder. */
+static int
+s390_sched_variable_issue (FILE *file ATTRIBUTE_UNUSED,
+ int verbose ATTRIBUTE_UNUSED,
+ rtx insn, int more)
+{
+ last_scheduled_insn = insn;
+
+ if (GET_CODE (PATTERN (insn)) != USE
+ && GET_CODE (PATTERN (insn)) != CLOBBER)
+ return more - 1;
+ else
+ return more;
}
+static void
+s390_sched_init (FILE *file ATTRIBUTE_UNUSED,
+ int verbose ATTRIBUTE_UNUSED,
+ int max_ready ATTRIBUTE_UNUSED)
+{
+ last_scheduled_insn = NULL_RTX;
+}
/* Initialize GCC target structure. */
#undef TARGET_DELEGITIMIZE_ADDRESS
#define TARGET_DELEGITIMIZE_ADDRESS s390_delegitimize_address
+#undef TARGET_LEGITIMIZE_ADDRESS
+#define TARGET_LEGITIMIZE_ADDRESS s390_legitimize_address
+
#undef TARGET_RETURN_IN_MEMORY
#define TARGET_RETURN_IN_MEMORY s390_return_in_memory
#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD s390_first_cycle_multipass_dfa_lookahead
+#undef TARGET_SCHED_VARIABLE_ISSUE
+#define TARGET_SCHED_VARIABLE_ISSUE s390_sched_variable_issue
+#undef TARGET_SCHED_REORDER
+#define TARGET_SCHED_REORDER s390_sched_reorder
+#undef TARGET_SCHED_INIT
+#define TARGET_SCHED_INIT s390_sched_init
+
#undef TARGET_CANNOT_COPY_INSN_P
#define TARGET_CANNOT_COPY_INSN_P s390_cannot_copy_insn_p
#undef TARGET_RTX_COSTS
#undef TARGET_GIMPLIFY_VA_ARG_EXPR
#define TARGET_GIMPLIFY_VA_ARG_EXPR s390_gimplify_va_arg
-#undef TARGET_PROMOTE_FUNCTION_ARGS
-#define TARGET_PROMOTE_FUNCTION_ARGS hook_bool_const_tree_true
-#undef TARGET_PROMOTE_FUNCTION_RETURN
-#define TARGET_PROMOTE_FUNCTION_RETURN hook_bool_const_tree_true
+#undef TARGET_PROMOTE_FUNCTION_MODE
+#define TARGET_PROMOTE_FUNCTION_MODE s390_promote_function_mode
#undef TARGET_PASS_BY_REFERENCE
#define TARGET_PASS_BY_REFERENCE s390_pass_by_reference
#undef TARGET_LIBGCC_SHIFT_COUNT_MODE
#define TARGET_LIBGCC_SHIFT_COUNT_MODE s390_libgcc_shift_count_mode
+#undef TARGET_LEGITIMATE_ADDRESS_P
+#define TARGET_LEGITIMATE_ADDRESS_P s390_legitimate_address_p
+
+#undef TARGET_CAN_ELIMINATE
+#define TARGET_CAN_ELIMINATE s390_can_eliminate
+
+#undef TARGET_ASM_TRAMPOLINE_TEMPLATE
+#define TARGET_ASM_TRAMPOLINE_TEMPLATE s390_asm_trampoline_template
+#undef TARGET_TRAMPOLINE_INIT
+#define TARGET_TRAMPOLINE_INIT s390_trampoline_init
+
struct gcc_target targetm = TARGET_INITIALIZER;
#include "gt-s390.h"