#define COSTS_N_BYTES(N) ((N) * 2)
static const
-struct processor_costs size_cost = { /* costs for tunning for size */
+struct processor_costs size_cost = { /* costs for tuning for size */
COSTS_N_BYTES (2), /* cost of an add instruction */
COSTS_N_BYTES (3), /* cost of a lea instruction */
COSTS_N_BYTES (2), /* variable shift costs */
const int x86_double_with_add = ~m_386;
const int x86_use_bit_test = m_386;
const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6 | m_GENERIC;
-const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
+const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
const int x86_fisttp = m_NOCONA;
const int x86_3dnow_a = m_ATHLON_K8;
const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
with partial reg. dependencies used by Athlon/P4 based chips, it is better
to leave it off for generic32 for now. */
const int x86_partial_reg_stall = m_PPRO;
+const int x86_partial_flag_reg_stall = m_GENERIC;
const int x86_use_himode_fiop = m_386 | m_486 | m_K6;
const int x86_use_simode_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT | m_GENERIC);
const int x86_use_mov0 = m_K6;
When we have only some of our vector isa extensions enabled, then there
are some modes for which vector_mode_supported_p is false. For these
modes, the generic vector support in gcc will choose some non-vector mode
- in order to implement the type. By computing the natural mode, we'll
+ in order to implement the type. By computing the natural mode, we'll
select the proper ABI location for the operand and not depend on whatever
the middle-end decides to do with these vector types. */
subclasses[0] = X86_64_SSE_CLASS;
if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
subclasses[0] = X86_64_INTEGER_CLASS;
-
+
for (i = 0; i < words; i++)
classes[i] = subclasses[i % num];
-
+
break;
}
case UNION_TYPE:
if (TREE_CODE (field) == FIELD_DECL)
{
int num;
+
+ if (TREE_TYPE (field) == error_mark_node)
+ continue;
+
num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
TREE_TYPE (field), subclasses,
bit_offset);
return 0;
default:
gcc_assert (VECTOR_MODE_P (mode));
-
+
if (bytes > 16)
return 0;
-
+
gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
-
+
if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
classes[0] = X86_64_INTEGERSI_CLASS;
else
tree type, int in_return, int nintregs, int nsseregs,
const int *intreg, int sse_regno)
{
+ /* The following variables hold the static issued_error state. */
+ static bool issued_sse_arg_error;
+ static bool issued_sse_ret_error;
+ static bool issued_x87_ret_error;
+
enum machine_mode tmpmode;
int bytes =
(mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
some less clueful developer tries to use floating-point anyway. */
if (needed_sseregs && !TARGET_SSE)
{
- static bool issued_error;
- if (!issued_error)
+ if (in_return)
{
- issued_error = true;
- if (in_return)
- error ("SSE register return with SSE disabled");
- else
- error ("SSE register argument with SSE disabled");
+ if (!issued_sse_ret_error)
+ {
+ error ("SSE register return with SSE disabled");
+ issued_sse_ret_error = true;
+ }
+ }
+ else if (!issued_sse_arg_error)
+ {
+ error ("SSE register argument with SSE disabled");
+ issued_sse_arg_error = true;
}
return NULL;
}
+ /* Likewise, error if the ABI requires us to return values in the
+ x87 registers and the user specified -mno-80387. */
+ if (!TARGET_80387 && in_return)
+ for (i = 0; i < n; i++)
+ if (class[i] == X86_64_X87_CLASS
+ || class[i] == X86_64_X87UP_CLASS
+ || class[i] == X86_64_COMPLEX_X87_CLASS)
+ {
+ if (!issued_x87_ret_error)
+ {
+ error ("x87 register return with x87 disabled");
+ issued_x87_ret_error = true;
+ }
+ return NULL;
+ }
+
/* First construct simple cases. Avoid SCmode, since we want to use
single register to pass this type. */
if (n == 1 && mode != SCmode)
case QUAL_UNION_TYPE:
{
tree field;
-
+
if (TYPE_BINFO (type))
{
tree binfo, base_binfo;
int i;
-
+
for (binfo = TYPE_BINFO (type), i = 0;
BINFO_BASE_ITERATE (binfo, i, base_binfo); i++)
if (contains_128bit_aligned_vector_p
if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
return true;
break;
-
+
default:
gcc_unreachable ();
}
gcc_assert (!TARGET_64BIT);
/* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
- we prevent this case when mmx is not available. */
- if ((VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8))
- return FIRST_MMX_REG;
+ we normally prevent this case when mmx is not available. However
+ some ABIs may require the result to be returned like DImode. */
+ if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
+ return TARGET_MMX ? FIRST_MMX_REG : 0;
/* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
- we prevent this case when sse is not available. */
+ we prevent this case when sse is not available. However some ABIs
+ may require the result to be returned like integer TImode. */
if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
- return FIRST_SSE_REG;
+ return TARGET_SSE ? FIRST_SSE_REG : 0;
/* Decimal floating point values can go in %eax, unlike other float modes. */
if (DECIMAL_FLOAT_MODE_P (mode))
XFmode);
}
+/* Return 1 if mode is a valid mode for sse. */
+static int
+standard_sse_mode_p (enum machine_mode mode)
+{
+ switch (mode)
+ {
+ case V16QImode:
+ case V8HImode:
+ case V4SImode:
+ case V2DImode:
+ case V4SFmode:
+ case V2DFmode:
+ return 1;
+
+ default:
+ return 0;
+ }
+}
+
/* Return 1 if X is FP constant we can load to SSE register w/o using memory.
*/
int
standard_sse_constant_p (rtx x)
{
- if (x == const0_rtx)
+ enum machine_mode mode = GET_MODE (x);
+
+ if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
return 1;
- return (x == CONST0_RTX (GET_MODE (x)));
+ if (vector_all_ones_operand (x, mode)
+ && standard_sse_mode_p (mode))
+ return TARGET_SSE2 ? 2 : -1;
+
+ return 0;
+}
+
+/* Return the opcode of the special instruction to be used to load
+ the constant X. */
+
+const char *
+standard_sse_constant_opcode (rtx insn, rtx x)
+{
+ switch (standard_sse_constant_p (x))
+ {
+ case 1:
+ if (get_attr_mode (insn) == MODE_V4SF)
+ return "xorps\t%0, %0";
+ else if (get_attr_mode (insn) == MODE_V2DF)
+ return "xorpd\t%0, %0";
+ else
+ return "pxor\t%0, %0";
+ case 2:
+ return "pcmpeqd\t%0, %0";
+ }
+ gcc_unreachable ();
}
/* Returns 1 if OP contains a symbol reference */
if (from == ARG_POINTER_REGNUM)
return frame.stack_pointer_offset;
-
+
gcc_assert (from == FRAME_POINTER_REGNUM);
return frame.stack_pointer_offset - frame.frame_pointer_offset;
}
/* And here we cheat like madmen with the unwind info. We force the
cfa register back to sp+4, which is exactly what it was at the
start of the function. Re-pushing the return address results in
- the return at the same spot relative to the cfa, and thus is
+ the return at the same spot relative to the cfa, and thus is
correct wrt the unwind info. */
x = cfun->machine->force_align_arg_pointer;
x = gen_frame_mem (Pmode, plus_constant (x, -4));
{
if (pic_offset_table_rtx)
REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
+#if TARGET_MACHO
+ /* Mach-O doesn't support labels at the end of objects, so if
+ it looks like we might want one, insert a NOP. */
+ {
+ rtx insn = get_last_insn ();
+ while (insn
+ && NOTE_P (insn)
+ && NOTE_LINE_NUMBER (insn) != NOTE_INSN_DELETED_LABEL)
+ insn = PREV_INSN (insn);
+ if (insn
+ && (LABEL_P (insn)
+ || (NOTE_P (insn)
+ && NOTE_LINE_NUMBER (insn) == NOTE_INSN_DELETED_LABEL)))
+ fputs ("\tnop\n", file);
+ }
+#endif
+
}
\f
/* Extract the parts of an RTL expression that is a valid memory address
{
rtx reg;
reason_rtx = base;
-
+
if (REG_P (base))
reg = base;
else if (GET_CODE (base) == SUBREG
goto is_legitimate_pic;
reason = "64bit address unspec";
goto report_error;
-
+
case UNSPEC_GOTPCREL:
gcc_assert (flag_pic);
goto is_legitimate_pic;
{
rtx x = ix86_tls_module_base ();
- base = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, base));
-
- set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
+ set_unique_reg_note (get_last_insn (), REG_EQUIV,
+ gen_rtx_MINUS (Pmode, x, tp));
}
off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
off = gen_rtx_CONST (Pmode, off);
dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
+
+ if (TARGET_GNU2_TLS)
+ {
+ dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
+
+ set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
+ }
+
break;
case TLS_MODEL_INITIAL_EXEC:
putc ('+', file);
output_pic_addr_const (file, XEXP (x, 1), code);
}
- else
+ else
{
gcc_assert (GET_CODE (XEXP (x, 1)) == CONST_INT);
output_pic_addr_const (file, XEXP (x, 1), code);
/* In the name of slightly smaller debug output, and to cater to
general assembler lossage, recognize PIC+GOTOFF and turn it back
- into a direct symbol reference.
+ into a direct symbol reference.
On Darwin, this is necessary to avoid a crash, because Darwin
has a different PIC label for each routine but the DWARF debugging
if (! result)
return orig_x;
-
+
if (const_addend)
result = gen_rtx_PLUS (Pmode, result, const_addend);
if (reg_addend)
emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
slot = SLOT_CW_CEIL;
break;
-
+
case I387_CW_MASK_PM:
/* mask precision exception for nearbyint() */
emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
return "";
}
+/* Output code for x87 ffreep insn. The OPNO argument, which may only
+ have the values zero or one, indicates the ffreep insn's operand
+ from the OPERANDS array. */
+
+static const char *
+output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
+{
+ if (TARGET_USE_FFREEP)
+#if HAVE_AS_IX86_FFREEP
+ return opno ? "ffreep\t%y1" : "ffreep\t%y0";
+#else
+ switch (REGNO (operands[opno]))
+ {
+ case FIRST_STACK_REG + 0: return ".word\t0xc0df";
+ case FIRST_STACK_REG + 1: return ".word\t0xc1df";
+ case FIRST_STACK_REG + 2: return ".word\t0xc2df";
+ case FIRST_STACK_REG + 3: return ".word\t0xc3df";
+ case FIRST_STACK_REG + 4: return ".word\t0xc4df";
+ case FIRST_STACK_REG + 5: return ".word\t0xc5df";
+ case FIRST_STACK_REG + 6: return ".word\t0xc6df";
+ case FIRST_STACK_REG + 7: return ".word\t0xc7df";
+ }
+#endif
+
+ return opno ? "fstp\t%y1" : "fstp\t%y0";
+}
+
+
/* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
should be used. UNORDERED_P is true when fucom should be used. */
if (stack_top_dies)
{
output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
- return TARGET_USE_FFREEP ? "ffreep\t%y1" : "fstp\t%y1";
+ return output_387_ffreep (operands, 1);
}
else
return "ftst\n\tfnstsw\t%0";
output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
else
output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
- return TARGET_USE_FFREEP ? "ffreep\t%y0" : "fstp\t%y0";
+ return output_387_ffreep (operands, 0);
}
else
{
#else
if (GET_CODE (op0) == MEM)
op1 = force_reg (Pmode, op1);
- else
+ else
op1 = legitimize_address (op1, op1, Pmode);
#endif /* TARGET_MACHO */
}
to handle some of them more efficiently. */
if ((reload_in_progress | reload_completed) == 0
&& register_operand (op0, mode)
- && CONSTANT_P (op1) && op1 != CONST0_RTX (mode))
+ && CONSTANT_P (op1)
+ && standard_sse_constant_p (op1) <= 0)
op1 = validize_mem (force_const_mem (mode, op1));
/* Make operand1 a register if it isn't already. */
emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
}
-/* Implement the movmisalign patterns for SSE. Non-SSE modes go
+/* Implement the movmisalign patterns for SSE. Non-SSE modes go
straight to ix86_expand_vector_move. */
void
{
/* The only non-offsetable memories we handle are pushes. */
int ok = push_operand (operand, VOIDmode);
-
+
gcc_assert (ok);
-
+
operand = copy_rtx (operand);
PUT_MODE (operand, Pmode);
parts[0] = parts[1] = parts[2] = operand;
default:
gcc_unreachable ();
}
-
+
if (GET_MODE (part[1][0]) == SImode)
part[1][0] = part[1][1];
}
ix86_expand_clear (low[0]);
ix86_expand_clear (high[0]);
emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
-
+
d = gen_lowpart (QImode, low[0]);
d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
s = gen_rtx_EQ (QImode, flags, const0_rtx);
if (GET_CODE (addr) == PARALLEL)
addr = XVECEXP (addr, 0, 0);
-
+
gcc_assert (GET_CODE (addr) == SET);
-
+
addr = SET_SRC (addr);
}
else
integer_type_node, NULL_TREE);
def_builtin (MASK_SSE, "__builtin_ia32_vec_set_v8hi",
ftype, IX86_BUILTIN_VEC_SET_V8HI);
-
+
ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
intHI_type_node,
integer_type_node, NULL_TREE);
instructions from inside the compiler, we can't allow the use of MMX
registers unless the user explicitly asks for it. So we do *not* define
vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
- we have builtins invoked by mmintrin.h that gives us license to emit
+ we have builtins invoked by mmintrin.h that gives us license to emit
these sorts of instructions. */
static rtx
ix86_force_to_memory (enum machine_mode mode, rtx operand)
{
rtx result;
-
+
gcc_assert (reload_completed);
if (TARGET_RED_ZONE)
{
{
enum machine_mode mode = GET_MODE (x);
- /* We're only allowed to return a subclass of CLASS. Many of the
+ /* We're only allowed to return a subclass of CLASS. Many of the
following checks fail for NO_REGS, so eliminate that early. */
if (class == NO_REGS)
return NO_REGS;
if (!TARGET_SSE2)
return true;
- /* If the target says that inter-unit moves are more expensive
+ /* If the target says that inter-unit moves are more expensive
than moving through memory, then don't generate them. */
if (!TARGET_INTER_UNIT_MOVES && !optimize_size)
return true;
return true;
/* ??? For the cost of one register reformat penalty, we could use
- the same instructions to move SFmode and DFmode data, but the
+ the same instructions to move SFmode and DFmode data, but the
relevant move patterns don't support those alternatives. */
if (mode == SFmode || mode == DFmode)
return true;
return true;
/* Vector registers do not support subreg with nonzero offsets, which
- are otherwise valid for integer registers. Since we can't see
+ are otherwise valid for integer registers. Since we can't see
whether we have a nonzero offset from here, prohibit all
nonparadoxical subregs changing size. */
if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
else if (VALID_FP_MODE_P (mode))
return 1;
/* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
- on to use that value in smaller contexts, this can easily force a
+ on to use that value in smaller contexts, this can easily force a
pseudo to be allocated to GENERAL_REGS. Since this is no worse than
supporting DImode, allow it. */
else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
return 0;
}
-/* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
+/* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
tieable integer mode. */
static bool
if (mode2 == DFmode)
return mode1 == SFmode;
- /* If MODE2 is only appropriate for an SSE register, then tie with
+ /* If MODE2 is only appropriate for an SSE register, then tie with
any other mode acceptable to SSE registers. */
if (GET_MODE_SIZE (mode2) >= 8
&& ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
{
case V2SImode:
case V2SFmode:
- if (!mmx_ok && !TARGET_SSE)
+ if (!mmx_ok)
return false;
/* FALLTHRU */
{
case V2SFmode:
case V2SImode:
- if (!mmx_ok && !TARGET_SSE)
+ if (!mmx_ok)
return false;
/* FALLTHRU */
if (!register_operand (op1, half_mode))
op1 = force_reg (half_mode, op1);
- emit_insn (gen_rtx_SET (VOIDmode, target,
+ emit_insn (gen_rtx_SET (VOIDmode, target,
gen_rtx_VEC_CONCAT (mode, op0, op1)));
}
else
}
}
-/* Initialize vector TARGET via VALS. Suppress the use of MMX
+/* Initialize vector TARGET via VALS. Suppress the use of MMX
instructions unless MMX_OK is true. */
void
if (REG_P (operands[1])
&& find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
{
- if (REGNO (operands[0]) == FIRST_STACK_REG
- && TARGET_USE_FFREEP)
- return "ffreep\t%y0";
+ if (REGNO (operands[0]) == FIRST_STACK_REG)
+ return output_387_ffreep (operands, 0);
return "fstp\t%y0";
}
if (STACK_TOP_P (operands[0]))