COSTS_N_INSNS (71), /* DSGR */
};
+static const
+struct processor_costs z196_cost =
+{
+ COSTS_N_INSNS (7), /* M */
+ COSTS_N_INSNS (5), /* MGHI */
+ COSTS_N_INSNS (5), /* MH */
+ COSTS_N_INSNS (5), /* MHI */
+ COSTS_N_INSNS (7), /* ML */
+ COSTS_N_INSNS (7), /* MR */
+ COSTS_N_INSNS (6), /* MS */
+ COSTS_N_INSNS (8), /* MSG */
+ COSTS_N_INSNS (6), /* MSGF */
+ COSTS_N_INSNS (6), /* MSGFR */
+ COSTS_N_INSNS (8), /* MSGR */
+ COSTS_N_INSNS (6), /* MSR */
+ COSTS_N_INSNS (1) , /* multiplication in DFmode */
+ COSTS_N_INSNS (40), /* MXBR B+40 */
+ COSTS_N_INSNS (100), /* SQXBR B+100 */
+ COSTS_N_INSNS (42), /* SQDBR B+42 */
+ COSTS_N_INSNS (28), /* SQEBR B+28 */
+ COSTS_N_INSNS (1), /* MADBR B */
+ COSTS_N_INSNS (1), /* MAEBR B */
+ COSTS_N_INSNS (101), /* DXBR B+101 */
+ COSTS_N_INSNS (29), /* DDBR */
+ COSTS_N_INSNS (22), /* DEBR */
+ COSTS_N_INSNS (160), /* DLGR cracked */
+ COSTS_N_INSNS (160), /* DLR cracked */
+ COSTS_N_INSNS (160), /* DR expanded */
+ COSTS_N_INSNS (160), /* DSGFR cracked */
+ COSTS_N_INSNS (160), /* DSGR cracked */
+};
+
extern int reload_completed;
/* Kept up to date using the SCHED_VARIABLE_ISSUE hook. */
(HARD_REGNO_NREGS ((REGNO), (MODE)) == 1 || !((REGNO) & 1))
/* That's the read ahead of the dynamic branch prediction unit in
- bytes on a z10 CPU. */
-#define Z10_PREDICT_DISTANCE 384
+ bytes on a z10 (or higher) CPU. */
+#define PREDICT_DISTANCE (TARGET_Z10 ? 384 : 2048)
static enum machine_mode
s390_libgcc_cmp_return_mode (void)
{"z9-ec", PROCESSOR_2094_Z9_109, PF_IEEE_FLOAT | PF_ZARCH
| PF_LONG_DISPLACEMENT | PF_EXTIMM | PF_DFP },
{"z10", PROCESSOR_2097_Z10, PF_IEEE_FLOAT | PF_ZARCH
- | PF_LONG_DISPLACEMENT | PF_EXTIMM | PF_DFP | PF_Z10},
+ | PF_LONG_DISPLACEMENT | PF_EXTIMM | PF_DFP | PF_Z10},
+ {"z196", PROCESSOR_2817_Z196, PF_IEEE_FLOAT | PF_ZARCH
+ | PF_LONG_DISPLACEMENT | PF_EXTIMM | PF_DFP | PF_Z10 | PF_Z196 },
};
size_t i;
break;
case PROCESSOR_2097_Z10:
s390_cost = &z10_cost;
+ case PROCESSOR_2817_Z196:
+ s390_cost = &z196_cost;
break;
default:
s390_cost = &z900_cost;
target_flags |= MASK_LONG_DOUBLE_128;
#endif
- if (s390_tune == PROCESSOR_2097_Z10)
+ if (s390_tune == PROCESSOR_2097_Z10
+ || s390_tune == PROCESSOR_2817_Z196)
{
if (!PARAM_SET_P (PARAM_MAX_UNROLLED_INSNS))
set_param_value ("max-unrolled-insns", 100);
operand during and after reload. The difference to
legitimate_constant_p is that this function will not accept
a constant that would need to be forced to the literal pool
- before it can be used as operand. */
+ before it can be used as operand.
+ This function accepts all constants which can be loaded directly
+ into a GPR. */
bool
legitimate_reload_constant_p (rtx op)
return false;
}
+/* Returns true if the constant value OP is a legitimate fp operand
+ during and after reload.
+ This function accepts all constants which can be loaded directly
+ into an FPR. */
+
+static bool
+legitimate_reload_fp_constant_p (rtx op)
+{
+ /* Accept floating-point zero operands if the load zero instruction
+ can be used. */
+ if (TARGET_Z196
+ && GET_CODE (op) == CONST_DOUBLE
+ && s390_float_const_zero_p (op))
+ return true;
+
+ return false;
+}
+
/* Given an rtx OP being reloaded into a reg required to be in class RCLASS,
return the class of reg to actually use. */
else if (reg_class_subset_p (ADDR_REGS, rclass)
&& legitimate_reload_constant_p (op))
return ADDR_REGS;
- else
- return NO_REGS;
+ else if (reg_class_subset_p (FP_REGS, rclass)
+ && legitimate_reload_fp_constant_p (op))
+ return FP_REGS;
+ return NO_REGS;
/* If a symbolic constant or a PLUS is reloaded,
it is most likely being used as an address, so
if (addr.indx && !REGNO_OK_FOR_INDEX_P (REGNO (addr.indx)))
return false;
+ /* Avoid LA instructions with index register on z196; it is
+ preferable to use regular add instructions when possible. */
+ if (addr.indx && s390_tune == PROCESSOR_2817_Z196)
+ return false;
+
if (!TARGET_64BIT && !addr.pointer)
return false;
A STD instruction should be scheduled earlier,
in order to use the bypass. */
-
-
static int
s390_adjust_priority (rtx insn ATTRIBUTE_UNUSED, int priority)
{
if (s390_tune != PROCESSOR_2084_Z990
&& s390_tune != PROCESSOR_2094_Z9_109
- && s390_tune != PROCESSOR_2097_Z10)
+ && s390_tune != PROCESSOR_2097_Z10
+ && s390_tune != PROCESSOR_2817_Z196)
return priority;
switch (s390_safe_attr_type (insn))
{
case PROCESSOR_2084_Z990:
case PROCESSOR_2094_Z9_109:
+ case PROCESSOR_2817_Z196:
return 3;
case PROCESSOR_2097_Z10:
return 2;
}
}
-/* On z10 the dynamic branch prediction must see the backward jump in
- a window of 384 bytes. If not it falls back to the static
- prediction. This function rearranges the loop backward branch in a
- way which makes the static prediction always correct. The function
- returns true if it added an instruction. */
+/* On z10 and later the dynamic branch prediction must see the
+ backward jump within a certain windows. If not it falls back to
+ the static prediction. This function rearranges the loop backward
+ branch in a way which makes the static prediction always correct.
+ The function returns true if it added an instruction. */
static bool
-s390_z10_fix_long_loop_prediction (rtx insn)
+s390_fix_long_loop_prediction (rtx insn)
{
rtx set = single_set (insn);
rtx code_label, label_ref, new_label;
if (INSN_ADDRESSES (INSN_UID (code_label)) == -1
|| INSN_ADDRESSES (INSN_UID (insn)) == -1
|| (INSN_ADDRESSES (INSN_UID (insn))
- - INSN_ADDRESSES (INSN_UID (code_label)) < Z10_PREDICT_DISTANCE))
+ - INSN_ADDRESSES (INSN_UID (code_label)) < PREDICT_DISTANCE))
return false;
for (distance = 0, cur_insn = PREV_INSN (insn);
- distance < Z10_PREDICT_DISTANCE - 6;
+ distance < PREDICT_DISTANCE - 6;
distance += get_attr_length (cur_insn), cur_insn = PREV_INSN (cur_insn))
if (!cur_insn || JUMP_P (cur_insn) || LABEL_P (cur_insn))
return false;
/* Try to optimize prologue and epilogue further. */
s390_optimize_prologue ();
- /* Walk over the insns and do some z10 specific changes. */
- if (s390_tune == PROCESSOR_2097_Z10)
+ /* Walk over the insns and do some >=z10 specific changes. */
+ if (s390_tune == PROCESSOR_2097_Z10
+ || s390_tune == PROCESSOR_2817_Z196)
{
rtx insn;
bool insn_added_p = false;
continue;
if (JUMP_P (insn))
- insn_added_p |= s390_z10_fix_long_loop_prediction (insn);
+ insn_added_p |= s390_fix_long_loop_prediction (insn);
- if (GET_CODE (PATTERN (insn)) == PARALLEL
- || GET_CODE (PATTERN (insn)) == SET)
+ if ((GET_CODE (PATTERN (insn)) == PARALLEL
+ || GET_CODE (PATTERN (insn)) == SET)
+ && s390_tune == PROCESSOR_2097_Z10)
insn_added_p |= s390_z10_optimize_cmp (insn);
}
}
/* This target hook implementation for TARGET_LOOP_UNROLL_ADJUST calculates
- a new number struct loop *loop should be unrolled if tuned for the z10
- cpu. The loop is analyzed for memory accesses by calling check_dpu for
+ a new number struct loop *loop should be unrolled if tuned for cpus with
+ a built-in stride prefetcher.
+ The loop is analyzed for memory accesses by calling check_dpu for
each rtx of the loop. Depending on the loop_depth and the amount of
memory accesses a new number <=nunroll is returned to improve the
behaviour of the hardware prefetch unit. */
unsigned i;
unsigned mem_count = 0;
- /* Only z10 needs special handling. */
- if (s390_tune != PROCESSOR_2097_Z10)
+ if (s390_tune != PROCESSOR_2097_Z10 && s390_tune != PROCESSOR_2817_Z196)
return nunroll;
/* Count the number of memory references within the loop body. */