/* Subroutines used for code generation on IA-32.
- Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
- 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
+ Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
+ 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
Free Software Foundation, Inc.
This file is part of GCC.
#include "tm-constrs.h"
#include "params.h"
#include "cselib.h"
+#include "debug.h"
+#include "dwarf2out.h"
static rtx legitimize_dllimport_symbol (rtx, bool);
m_AMD_MULTIPLE,
/* X86_TUNE_INTER_UNIT_MOVES */
- ~(m_AMD_MULTIPLE | m_ATOM | m_GENERIC),
+ ~(m_AMD_MULTIPLE | m_GENERIC),
/* X86_TUNE_INTER_UNIT_CONVERSIONS */
~(m_AMDFAM10),
\f
static struct machine_function * ix86_init_machine_status (void);
static rtx ix86_function_value (const_tree, const_tree, bool);
+static bool ix86_function_value_regno_p (const unsigned int);
static rtx ix86_static_chain (const_tree, bool);
static int ix86_function_regparm (const_tree, const_tree);
static void ix86_compute_frame_layout (struct ix86_frame *);
static enum calling_abi ix86_function_abi (const_tree);
\f
+#ifndef SUBTARGET32_DEFAULT_CPU
+#define SUBTARGET32_DEFAULT_CPU "i386"
+#endif
+
/* The svr4 ABI for the i386 says that records and unions are returned
in memory. */
#ifndef DEFAULT_PCC_STRUCT_RETURN
}
}
\f
-/* Return a string the documents the current -m options. The caller is
+/* Return a string that documents the current -m options. The caller is
responsible for freeing the string. */
static char *
{
{ "-m64", OPTION_MASK_ISA_64BIT },
{ "-mfma4", OPTION_MASK_ISA_FMA4 },
+ { "-mfma", OPTION_MASK_ISA_FMA },
{ "-mxop", OPTION_MASK_ISA_XOP },
{ "-mlwp", OPTION_MASK_ISA_LWP },
{ "-msse4a", OPTION_MASK_ISA_SSE4A },
if (isa && add_nl_p)
{
opts[num++][0] = isa_other;
- sprintf (isa_other, "(other isa: 0x%x)", isa);
+ sprintf (isa_other, "(other isa: %#x)", isa);
}
/* Add flag options. */
if (flags && add_nl_p)
{
opts[num++][0] = target_other;
- sprintf (target_other, "(other flags: 0x%x)", isa);
+ sprintf (target_other, "(other flags: %#x)", isa);
}
/* Add -fpmath= option. */
{
int i;
unsigned int ix86_arch_mask, ix86_tune_mask;
+ const bool ix86_tune_specified = (ix86_tune_string != NULL);
const char *prefix;
const char *suffix;
const char *sw;
|| !strcmp (ix86_tune_string, "generic64")))
;
else if (!strncmp (ix86_tune_string, "generic", 7))
- error ("bad value (%s) for %stune=%s %s",
+ error ("bad value (%s) for %stune=%s %s",
ix86_tune_string, prefix, suffix, sw);
+ else if (!strcmp (ix86_tune_string, "x86-64"))
+ warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated. Use "
+ "%stune=k8%s or %stune=generic%s instead as appropriate.",
+ prefix, suffix, prefix, suffix, prefix, suffix);
}
else
{
ix86_tune_string = "generic32";
}
}
+
if (ix86_stringop_string)
{
if (!strcmp (ix86_stringop_string, "rep_byte"))
error ("bad value (%s) for %sstringop-strategy=%s %s",
ix86_stringop_string, prefix, suffix, sw);
}
- if (!strcmp (ix86_tune_string, "x86-64"))
- warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated. Use "
- "%stune=k8%s or %stune=generic%s instead as appropriate.",
- prefix, suffix, prefix, suffix, prefix, suffix);
if (!ix86_arch_string)
- ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
+ ix86_arch_string = TARGET_64BIT ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
else
ix86_arch_specified = 1;
- if (!strcmp (ix86_arch_string, "generic"))
- error ("generic CPU can be used only for %stune=%s %s",
- prefix, suffix, sw);
- if (!strncmp (ix86_arch_string, "generic", 7))
- error ("bad value (%s) for %sarch=%s %s",
- ix86_arch_string, prefix, suffix, sw);
-
/* Validate -mabi= value. */
if (ix86_abi_string)
{
break;
}
- if (i == pta_size)
+ if (!strcmp (ix86_arch_string, "generic"))
+ error ("generic CPU can be used only for %stune=%s %s",
+ prefix, suffix, sw);
+ else if (!strncmp (ix86_arch_string, "generic", 7) || i == pta_size)
error ("bad value (%s) for %sarch=%s %s",
ix86_arch_string, prefix, suffix, sw);
x86_prefetch_sse = true;
break;
}
- if (i == pta_size)
+
+ if (ix86_tune_specified && i == pta_size)
error ("bad value (%s) for %stune=%s %s",
ix86_tune_string, prefix, suffix, sw);
ix86_tls_dialect = TLS_DIALECT_GNU;
else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
ix86_tls_dialect = TLS_DIALECT_GNU2;
- else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
- ix86_tls_dialect = TLS_DIALECT_SUN;
else
error ("bad value (%s) for %stls-dialect=%s %s",
ix86_tls_dialect_string, prefix, suffix, sw);
return true;
}
-/* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
- calling convention attributes;
+/* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
+ and "sseregparm" calling convention attributes;
arguments as in struct attribute_spec.handler. */
static tree
error ("fastcall and regparm attributes are not compatible");
}
+ if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
+ {
+ error ("regparam and thiscall attributes are not compatible");
+ }
+
cst = TREE_VALUE (args);
if (TREE_CODE (cst) != INTEGER_CST)
{
if (TARGET_64BIT)
{
/* Do not warn when emulating the MS ABI. */
- if (TREE_CODE (*node) != FUNCTION_TYPE
+ if ((TREE_CODE (*node) != FUNCTION_TYPE
+ && TREE_CODE (*node) != METHOD_TYPE)
|| ix86_function_type_abi (*node) != MS_ABI)
warning (OPT_Wattributes, "%qE attribute ignored",
name);
{
error ("fastcall and regparm attributes are not compatible");
}
+ if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
+ {
+ error ("fastcall and thiscall attributes are not compatible");
+ }
}
/* Can combine stdcall with fastcall (redundant), regparm and
{
error ("stdcall and fastcall attributes are not compatible");
}
+ if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
+ {
+ error ("stdcall and thiscall attributes are not compatible");
+ }
}
/* Can combine cdecl with regparm and sseregparm. */
{
error ("fastcall and cdecl attributes are not compatible");
}
+ if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
+ {
+ error ("cdecl and thiscall attributes are not compatible");
+ }
+ }
+ else if (is_attribute_p ("thiscall", name))
+ {
+ if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
+ warning (OPT_Wattributes, "%qE attribute is used for none class-method",
+ name);
+ if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
+ {
+ error ("stdcall and thiscall attributes are not compatible");
+ }
+ if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
+ {
+ error ("fastcall and thiscall attributes are not compatible");
+ }
+ if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
+ {
+ error ("cdecl and thiscall attributes are not compatible");
+ }
}
/* Can combine sseregparm with all attributes. */
!= !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
return 0;
+ /* Check for mismatched thiscall types. */
+ if (!lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type1))
+ != !lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type2)))
+ return 0;
+
/* Check for mismatched return types (cdecl vs stdcall). */
if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
!= !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
return 2;
+ if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type)))
+ return 1;
+
/* Use register calling convention for local functions when possible. */
if (decl
&& TREE_CODE (decl) == FUNCTION_DECL
/* Stdcall and fastcall functions will pop the stack if not
variable args. */
if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
- || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
+ || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype))
+ || lookup_attribute ("thiscall", TYPE_ATTRIBUTES (funtype)))
rtd = 1;
if (rtd && ! stdarg_p (funtype))
else look for regparm information. */
if (fntype)
{
- if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
+ if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)))
+ {
+ cum->nregs = 1;
+ cum->fastcall = 1; /* Same first register as in fastcall. */
+ }
+ else if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
{
cum->nregs = 2;
cum->fastcall = 1;
}
/* for V1xx modes, just use the base mode */
- if (VECTOR_MODE_P (mode) && mode != V1DImode
+ if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
&& GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
mode = GET_MODE_INNER (mode);
classes[0] = X86_64_SSE_CLASS;
classes[1] = X86_64_SSEUP_CLASS;
return 2;
+ case V1TImode:
case V1DImode:
case V2SFmode:
case V2SImode:
case V4HImode:
case V2SImode:
case V2SFmode:
+ case V1TImode:
case V1DImode:
if (!type || !AGGREGATE_TYPE_P (type))
{
case V4HImode:
case V2SImode:
case V2SFmode:
+ case V1TImode:
case V1DImode:
if (!type || !AGGREGATE_TYPE_P (type))
{
/* Return true if N is a possible register number of function value. */
-bool
-ix86_function_value_regno_p (int regno)
+static bool
+ix86_function_value_regno_p (const unsigned int regno)
{
switch (regno)
{
{
rtx save_area, mem;
rtx label;
- rtx label_ref;
rtx tmp_reg;
rtx nsse_reg;
alias_set_type set;
SSE saves. We need some preparation work to get this working. */
label = gen_label_rtx ();
- label_ref = gen_rtx_LABEL_REF (Pmode, label);
- /* Compute address to jump to :
- label - eax*4 + nnamed_sse_arguments*4 Or
- label - eax*5 + nnamed_sse_arguments*5 for AVX. */
- tmp_reg = gen_reg_rtx (Pmode);
nsse_reg = gen_reg_rtx (Pmode);
emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, AX_REG)));
- emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
- gen_rtx_MULT (Pmode, nsse_reg,
- GEN_INT (4))));
-
- /* vmovaps is one byte longer than movaps. */
- if (TARGET_AVX)
- emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
- gen_rtx_PLUS (Pmode, tmp_reg,
- nsse_reg)));
-
- if (cum->sse_regno)
- emit_move_insn
- (nsse_reg,
- gen_rtx_CONST (DImode,
- gen_rtx_PLUS (DImode,
- label_ref,
- GEN_INT (cum->sse_regno
- * (TARGET_AVX ? 5 : 4)))));
- else
- emit_move_insn (nsse_reg, label_ref);
- emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
/* Compute address of memory block we save into. We always use pointer
pointing 127 bytes after first byte to store - this is needed to keep
mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
MEM_NOTRAP_P (mem) = 1;
set_mem_alias_set (mem, set);
- set_mem_align (mem, BITS_PER_WORD);
+ set_mem_align (mem, 64);
/* And finally do the dirty job! */
emit_insn (gen_sse_prologue_save (mem, nsse_reg,
- GEN_INT (cum->sse_regno), label));
+ GEN_INT (cum->sse_regno), label,
+ gen_reg_rtx (Pmode)));
}
}
int indirect_p = 0;
tree ptrtype;
enum machine_mode nat_mode;
- int arg_boundary;
+ unsigned int arg_boundary;
/* Only 64bit target needs something special. */
if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
size_int (-align));
t = fold_convert (TREE_TYPE (ovf), t);
+ if (crtl->stack_alignment_needed < arg_boundary)
+ crtl->stack_alignment_needed = arg_boundary;
}
gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
gimplify_assign (addr, t, pre_p);
/* This function generates code for -fpic that loads %ebx with
the return address of the caller and then returns. */
-void
-ix86_file_end (void)
+static void
+ix86_code_end (void)
{
rtx xops[2];
int regno;
for (regno = 0; regno < 8; ++regno)
{
char name[32];
+ tree decl;
if (! ((pic_labels_used >> regno) & 1))
continue;
get_pc_thunk_name (name, regno);
+ decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
+ get_identifier (name),
+ build_function_type (void_type_node, void_list_node));
+ DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
+ NULL_TREE, void_type_node);
+ TREE_PUBLIC (decl) = 1;
+ TREE_STATIC (decl) = 1;
+
#if TARGET_MACHO
if (TARGET_MACHO)
{
assemble_name (asm_out_file, name);
fputs ("\n", asm_out_file);
ASM_OUTPUT_LABEL (asm_out_file, name);
+ DECL_WEAK (decl) = 1;
}
else
#endif
if (USE_HIDDEN_LINKONCE)
{
- tree decl;
-
- decl = build_decl (BUILTINS_LOCATION,
- FUNCTION_DECL, get_identifier (name),
- error_mark_node);
- TREE_PUBLIC (decl) = 1;
- TREE_STATIC (decl) = 1;
DECL_COMDAT_GROUP (decl) = DECL_ASSEMBLER_NAME (decl);
(*targetm.asm_out.unique_section) (decl, 0);
ASM_OUTPUT_LABEL (asm_out_file, name);
}
+ DECL_INITIAL (decl) = make_node (BLOCK);
+ current_function_decl = decl;
+ init_function_start (decl);
+ first_function_block_is_cold = false;
+ /* Make sure unwind info is emitted for the thunk if needed. */
+ final_start_function (emit_barrier (), asm_out_file, 1);
+
xops[0] = gen_rtx_REG (Pmode, regno);
xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
output_asm_insn ("ret", xops);
+ final_end_function ();
+ init_insn_lengths ();
+ free_after_compilation (cfun);
+ set_cfun (NULL);
+ current_function_decl = NULL;
}
-
- if (NEED_INDICATE_EXEC_STACK)
- file_end_indicate_exec_stack ();
}
/* Emit code for the SET_GOT patterns. */
if (!flag_pic)
output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
else
- output_asm_insn ("call\t%a2", xops);
+ {
+ output_asm_insn ("call\t%a2", xops);
+#ifdef DWARF2_UNWIND_INFO
+ /* The call to next label acts as a push. */
+ if (dwarf2out_do_frame ())
+ {
+ rtx insn;
+ start_sequence ();
+ insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
+ gen_rtx_PLUS (Pmode,
+ stack_pointer_rtx,
+ GEN_INT (-4))));
+ RTX_FRAME_RELATED_P (insn) = 1;
+ dwarf2out_frame_debug (insn, true);
+ end_sequence ();
+ }
+#endif
+ }
#if TARGET_MACHO
/* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
if (flag_pic)
- output_asm_insn ("pop%z0\t%0", xops);
+ {
+ output_asm_insn ("pop%z0\t%0", xops);
+#ifdef DWARF2_UNWIND_INFO
+ /* The pop is a pop and clobbers dest, but doesn't restore it
+ for unwind info purposes. */
+ if (dwarf2out_do_frame ())
+ {
+ rtx insn;
+ start_sequence ();
+ insn = emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx));
+ dwarf2out_frame_debug (insn, true);
+ insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
+ gen_rtx_PLUS (Pmode,
+ stack_pointer_rtx,
+ GEN_INT (4))));
+ RTX_FRAME_RELATED_P (insn) = 1;
+ dwarf2out_frame_debug (insn, true);
+ end_sequence ();
+ }
+#endif
+ }
}
else
{
get_pc_thunk_name (name, REGNO (dest));
pic_labels_used |= 1 << REGNO (dest);
+#ifdef DWARF2_UNWIND_INFO
+ /* Ensure all queued register saves are flushed before the
+ call. */
+ if (dwarf2out_do_frame ())
+ {
+ rtx insn;
+ start_sequence ();
+ insn = emit_barrier ();
+ end_sequence ();
+ dwarf2out_frame_debug (insn, false);
+ }
+#endif
xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
xops[2] = gen_rtx_MEM (QImode, xops[2]);
output_asm_insn ("call\t%X2", xops);
&& cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
{
int count = frame->nregs;
+ struct cgraph_node *node = cgraph_node (current_function_decl);
cfun->machine->use_fast_prologue_epilogue_nregs = count;
/* The fast prologue uses move instead of push to save registers. This
slow to use many of them. */
if (count)
count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
- if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
+ if (node->frequency < NODE_FREQUENCY_NORMAL
|| (flag_branch_probabilities
- && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
+ && node->frequency < NODE_FREQUENCY_HOT))
cfun->machine->use_fast_prologue_epilogue = false;
else
cfun->machine->use_fast_prologue_epilogue
passing. */
if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2
&& !lookup_attribute ("fastcall",
+ TYPE_ATTRIBUTES (TREE_TYPE (decl)))
+ && !lookup_attribute ("thiscall",
TYPE_ATTRIBUTES (TREE_TYPE (decl))))
return CX_REG;
else
end_sequence ();
insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
- RTX_FRAME_RELATED_P (insn) = 1;
+ if (!optimize)
+ {
+ add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
+ RTX_FRAME_RELATED_P (insn) = 1;
+ }
return drap_vreg;
}
else
ix86_cfa_state->reg == stack_pointer_rtx);
else
{
- /* Only valid for Win32. */
rtx eax = gen_rtx_REG (Pmode, AX_REG);
bool eax_live;
rtx t;
- gcc_assert (!TARGET_64BIT || cfun->machine->call_abi == MS_ABI);
-
if (cfun->machine->call_abi == MS_ABI)
eax_live = false;
else
rtx base_reg, index_reg;
HOST_WIDE_INT scale = 1;
rtx scale_rtx = NULL_RTX;
+ rtx tmp;
int retval = 1;
enum ix86_address_seg seg = SEG_DEFAULT;
scale_rtx = XEXP (op, 1);
break;
+ case ASHIFT:
+ if (index)
+ return 0;
+ index = XEXP (op, 0);
+ tmp = XEXP (op, 1);
+ if (!CONST_INT_P (tmp))
+ return 0;
+ scale = INTVAL (tmp);
+ if ((unsigned HOST_WIDE_INT) scale > 3)
+ return 0;
+ scale = 1 << scale;
+ break;
+
case UNSPEC:
if (XINT (op, 1) == UNSPEC_TP
&& TARGET_TLS_DIRECT_SEG_REFS
}
else if (GET_CODE (addr) == ASHIFT)
{
- rtx tmp;
-
/* We're called for lea too, which implements ashift on occasion. */
index = XEXP (addr, 0);
tmp = XEXP (addr, 1);
break;
case UNSPEC_GOTTPOFF:
/* FIXME: This might be @TPOFF in Sun ld too. */
- fputs ("@GOTTPOFF", file);
+ fputs ("@gottpoff", file);
break;
case UNSPEC_TPOFF:
- fputs ("@TPOFF", file);
+ fputs ("@tpoff", file);
break;
case UNSPEC_NTPOFF:
if (TARGET_64BIT)
- fputs ("@TPOFF", file);
+ fputs ("@tpoff", file);
else
- fputs ("@NTPOFF", file);
+ fputs ("@ntpoff", file);
break;
case UNSPEC_DTPOFF:
- fputs ("@DTPOFF", file);
+ fputs ("@dtpoff", file);
break;
case UNSPEC_GOTNTPOFF:
if (TARGET_64BIT)
fputs (ASSEMBLER_DIALECT == ASM_ATT ?
- "@GOTTPOFF(%rip)": "@GOTTPOFF[rip]", file);
+ "@gottpoff(%rip)": "@gottpoff[rip]", file);
else
- fputs ("@GOTNTPOFF", file);
+ fputs ("@gotntpoff", file);
break;
case UNSPEC_INDNTPOFF:
- fputs ("@INDNTPOFF", file);
+ fputs ("@indntpoff", file);
break;
#if TARGET_MACHO
case UNSPEC_MACHOPIC_OFFSET:
{
fputs (ASM_LONG, file);
output_addr_const (file, x);
- fputs ("@DTPOFF", file);
+ fputs ("@dtpoff", file);
switch (size)
{
case 4:
ix86_delegitimize_address (rtx x)
{
rtx orig_x = delegitimize_mem_from_attrs (x);
+ /* addend is NULL or some rtx if x is something+GOTOFF where
+ something doesn't include the PIC register. */
+ rtx addend = NULL_RTX;
/* reg_addend is NULL or a multiple of some register. */
rtx reg_addend = NULL_RTX;
/* const_addend is NULL or a const_int. */
else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
reg_addend = XEXP (reg_addend, 0);
else
- return orig_x;
- if (!REG_P (reg_addend)
- && GET_CODE (reg_addend) != MULT
- && GET_CODE (reg_addend) != ASHIFT)
- return orig_x;
+ {
+ reg_addend = NULL_RTX;
+ addend = XEXP (x, 0);
+ }
}
else
- return orig_x;
+ addend = XEXP (x, 0);
x = XEXP (XEXP (x, 1), 0);
if (GET_CODE (x) == PLUS
}
if (GET_CODE (x) == UNSPEC
- && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x))
+ && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
|| (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
result = XVECEXP (x, 0, 0);
result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
if (reg_addend)
result = gen_rtx_PLUS (Pmode, reg_addend, result);
+ if (addend)
+ {
+ /* If the rest of original X doesn't involve the PIC register, add
+ addend and subtract pic_offset_table_rtx. This can happen e.g.
+ for code like:
+ leal (%ebx, %ecx, 4), %ecx
+ ...
+ movl foo@GOTOFF(%ecx), %edx
+ in which case we return (%ecx - %ebx) + foo. */
+ if (pic_offset_table_rtx)
+ result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
+ pic_offset_table_rtx),
+ result);
+ else
+ return orig_x;
+ }
return result;
}
return cfun->machine->some_ld_name;
for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
- if (INSN_P (insn)
+ if (NONDEBUG_INSN_P (insn)
&& for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
return cfun->machine->some_ld_name;
L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
C -- print opcode suffix for set/cmov insn.
c -- like C, but print reversed condition
- E,e -- likewise, but for compare-and-branch fused insn.
F,f -- likewise, but for floating-point.
O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
otherwise nothing
put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
return;
- case 'E':
- put_condition_code (GET_CODE (x), CCmode, 0, 0, file);
- return;
-
- case 'e':
- put_condition_code (GET_CODE (x), CCmode, 1, 0, file);
- return;
-
case 'H':
/* It doesn't actually matter what mode we use here, as we're
only going to use this for printing. */
if (ASSEMBLER_DIALECT == ASM_ATT)
putc ('$', file);
- fprintf (file, "0x%08lx", (long unsigned int) l);
+ fprintf (file, "%#08lx", (long unsigned int) l);
}
/* These float cases don't actually occur as immediate operands. */
case UNSPEC_GOTTPOFF:
output_addr_const (file, op);
/* FIXME: This might be @TPOFF in Sun ld. */
- fputs ("@GOTTPOFF", file);
+ fputs ("@gottpoff", file);
break;
case UNSPEC_TPOFF:
output_addr_const (file, op);
- fputs ("@TPOFF", file);
+ fputs ("@tpoff", file);
break;
case UNSPEC_NTPOFF:
output_addr_const (file, op);
if (TARGET_64BIT)
- fputs ("@TPOFF", file);
+ fputs ("@tpoff", file);
else
- fputs ("@NTPOFF", file);
+ fputs ("@ntpoff", file);
break;
case UNSPEC_DTPOFF:
output_addr_const (file, op);
- fputs ("@DTPOFF", file);
+ fputs ("@dtpoff", file);
break;
case UNSPEC_GOTNTPOFF:
output_addr_const (file, op);
if (TARGET_64BIT)
fputs (ASSEMBLER_DIALECT == ASM_ATT ?
- "@GOTTPOFF(%rip)" : "@GOTTPOFF[rip]", file);
+ "@gottpoff(%rip)" : "@gottpoff[rip]", file);
else
- fputs ("@GOTNTPOFF", file);
+ fputs ("@gotntpoff", file);
break;
case UNSPEC_INDNTPOFF:
output_addr_const (file, op);
- fputs ("@INDNTPOFF", file);
+ fputs ("@indntpoff", file);
break;
#if TARGET_MACHO
case UNSPEC_MACHOPIC_OFFSET:
rtx prev = PREV_INSN (insn);
while (prev && distance < LEA_SEARCH_THRESHOLD)
{
- if (INSN_P (prev))
+ if (NONDEBUG_INSN_P (prev))
{
distance++;
for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
&& prev != insn
&& distance < LEA_SEARCH_THRESHOLD)
{
- if (INSN_P (prev))
+ if (NONDEBUG_INSN_P (prev))
{
distance++;
for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
rtx next = NEXT_INSN (insn);
while (next && distance < LEA_SEARCH_THRESHOLD)
{
- if (INSN_P (next))
+ if (NONDEBUG_INSN_P (next))
{
distance++;
&& next != insn
&& distance < LEA_SEARCH_THRESHOLD)
{
- if (INSN_P (next))
+ if (NONDEBUG_INSN_P (next))
{
distance++;
enum rtx_code code = GET_CODE (operands[1]), compare_code;
rtx compare_seq, compare_op;
enum machine_mode mode = GET_MODE (operands[0]);
- bool sign_bit_compare_p = false;;
+ bool sign_bit_compare_p = false;
start_sequence ();
ix86_compare_op0 = XEXP (operands[1], 0);
if (!sign_bit_compare_p)
{
rtx flags;
- rtx (*insn)(rtx, rtx, rtx);
bool fpcmp = false;
compare_code = GET_CODE (compare_op);
tmp = gen_reg_rtx (mode);
if (mode == DImode)
- insn = gen_x86_movdicc_0_m1;
+ emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
else
- insn = gen_x86_movsicc_0_m1;
-
- emit_insn (insn (tmp, flags, compare_op));
+ emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
+ flags, compare_op));
}
else
{
/* XOP supports all of the comparisons on all vector int types. */
if (!TARGET_XOP)
{
- /* Canonicalize the comparison to EQ, GT, GTU. */
- switch (code)
- {
- case EQ:
- case GT:
- case GTU:
- break;
-
- case NE:
- case LE:
- case LEU:
- code = reverse_condition (code);
- negate = true;
- break;
-
- case GE:
- case GEU:
- code = reverse_condition (code);
- negate = true;
- /* FALLTHRU */
-
- case LT:
- case LTU:
- code = swap_condition (code);
- x = cop0, cop0 = cop1, cop1 = x;
- break;
-
- default:
- gcc_unreachable ();
- }
-
- /* Only SSE4.1/SSE4.2 supports V2DImode. */
- if (mode == V2DImode)
- {
+ /* Canonicalize the comparison to EQ, GT, GTU. */
switch (code)
{
case EQ:
- /* SSE4.1 supports EQ. */
- if (!TARGET_SSE4_1)
- return false;
- break;
-
case GT:
case GTU:
- /* SSE4.2 supports GT/GTU. */
- if (!TARGET_SSE4_2)
- return false;
+ break;
+
+ case NE:
+ case LE:
+ case LEU:
+ code = reverse_condition (code);
+ negate = true;
+ break;
+
+ case GE:
+ case GEU:
+ code = reverse_condition (code);
+ negate = true;
+ /* FALLTHRU */
+
+ case LT:
+ case LTU:
+ code = swap_condition (code);
+ x = cop0, cop0 = cop1, cop1 = x;
break;
default:
gcc_unreachable ();
}
- }
- /* Unsigned parallel compare is not supported by the hardware. Play some
- tricks to turn this into a signed comparison against 0. */
- if (code == GTU)
- {
- cop0 = force_reg (mode, cop0);
+ /* Only SSE4.1/SSE4.2 supports V2DImode. */
+ if (mode == V2DImode)
+ {
+ switch (code)
+ {
+ case EQ:
+ /* SSE4.1 supports EQ. */
+ if (!TARGET_SSE4_1)
+ return false;
+ break;
- switch (mode)
+ case GT:
+ case GTU:
+ /* SSE4.2 supports GT/GTU. */
+ if (!TARGET_SSE4_2)
+ return false;
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+ }
+
+ /* Unsigned parallel compare is not supported by the hardware.
+ Play some tricks to turn this into a signed comparison
+ against 0. */
+ if (code == GTU)
{
- case V4SImode:
- case V2DImode:
- {
- rtx t1, t2, mask;
-
- /* Perform a parallel modulo subtraction. */
- t1 = gen_reg_rtx (mode);
- emit_insn ((mode == V4SImode
- ? gen_subv4si3
- : gen_subv2di3) (t1, cop0, cop1));
-
- /* Extract the original sign bit of op0. */
- mask = ix86_build_signbit_mask (GET_MODE_INNER (mode),
- true, false);
- t2 = gen_reg_rtx (mode);
- emit_insn ((mode == V4SImode
- ? gen_andv4si3
- : gen_andv2di3) (t2, cop0, mask));
-
- /* XOR it back into the result of the subtraction. This results
- in the sign bit set iff we saw unsigned underflow. */
- x = gen_reg_rtx (mode);
- emit_insn ((mode == V4SImode
- ? gen_xorv4si3
- : gen_xorv2di3) (x, t1, t2));
-
- code = GT;
- }
- break;
+ cop0 = force_reg (mode, cop0);
+
+ switch (mode)
+ {
+ case V4SImode:
+ case V2DImode:
+ {
+ rtx t1, t2, mask;
+ rtx (*gen_sub3) (rtx, rtx, rtx);
+
+ /* Subtract (-(INT MAX) - 1) from both operands to make
+ them signed. */
+ mask = ix86_build_signbit_mask (GET_MODE_INNER (mode),
+ true, false);
+ gen_sub3 = (mode == V4SImode
+ ? gen_subv4si3 : gen_subv2di3);
+ t1 = gen_reg_rtx (mode);
+ emit_insn (gen_sub3 (t1, cop0, mask));
+
+ t2 = gen_reg_rtx (mode);
+ emit_insn (gen_sub3 (t2, cop1, mask));
+
+ cop0 = t1;
+ cop1 = t2;
+ code = GT;
+ }
+ break;
- case V16QImode:
- case V8HImode:
- /* Perform a parallel unsigned saturating subtraction. */
- x = gen_reg_rtx (mode);
- emit_insn (gen_rtx_SET (VOIDmode, x,
- gen_rtx_US_MINUS (mode, cop0, cop1)));
+ case V16QImode:
+ case V8HImode:
+ /* Perform a parallel unsigned saturating subtraction. */
+ x = gen_reg_rtx (mode);
+ emit_insn (gen_rtx_SET (VOIDmode, x,
+ gen_rtx_US_MINUS (mode, cop0, cop1)));
- code = EQ;
- negate = !negate;
- break;
+ cop0 = x;
+ cop1 = CONST0_RTX (mode);
+ code = EQ;
+ negate = !negate;
+ break;
- default:
- gcc_unreachable ();
+ default:
+ gcc_unreachable ();
+ }
}
-
- cop0 = x;
- cop1 = CONST0_RTX (mode);
- }
}
x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
{
/* Shift higher 8 bytes to lower 8 bytes. */
src = gen_reg_rtx (imode);
- emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, src),
- gen_lowpart (TImode, operands[1]),
- GEN_INT (64)));
+ emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, src),
+ gen_lowpart (V1TImode, operands[1]),
+ GEN_INT (64)));
}
else
src = operands[1];
: gen_x86_64_shld) (high[0], low[0], operands[2]));
}
- emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
+ emit_insn ((mode == DImode
+ ? gen_ashlsi3
+ : gen_ashldi3) (low[0], low[0], operands[2]));
if (TARGET_CMOVE && scratch)
{
ix86_expand_clear (scratch);
emit_insn ((mode == DImode
- ? gen_x86_shift_adj_1
- : gen_x86_64_shift_adj_1) (high[0], low[0], operands[2],
- scratch));
+ ? gen_x86_shiftsi_adj_1
+ : gen_x86_shiftdi_adj_1) (high[0], low[0], operands[2],
+ scratch));
}
else
emit_insn ((mode == DImode
- ? gen_x86_shift_adj_2
- : gen_x86_64_shift_adj_2) (high[0], low[0], operands[2]));
+ ? gen_x86_shiftsi_adj_2
+ : gen_x86_shiftdi_adj_2) (high[0], low[0], operands[2]));
}
void
: gen_ashrdi3) (scratch, scratch,
GEN_INT (single_width - 1)));
emit_insn ((mode == DImode
- ? gen_x86_shift_adj_1
- : gen_x86_64_shift_adj_1) (low[0], high[0], operands[2],
- scratch));
+ ? gen_x86_shiftsi_adj_1
+ : gen_x86_shiftdi_adj_1) (low[0], high[0], operands[2],
+ scratch));
}
else
emit_insn ((mode == DImode
- ? gen_x86_shift_adj_3
- : gen_x86_64_shift_adj_3) (low[0], high[0], operands[2]));
+ ? gen_x86_shiftsi_adj_3
+ : gen_x86_shiftdi_adj_3) (low[0], high[0], operands[2]));
}
}
{
ix86_expand_clear (scratch);
emit_insn ((mode == DImode
- ? gen_x86_shift_adj_1
- : gen_x86_64_shift_adj_1) (low[0], high[0], operands[2],
- scratch));
+ ? gen_x86_shiftsi_adj_1
+ : gen_x86_shiftdi_adj_1) (low[0], high[0], operands[2],
+ scratch));
}
else
emit_insn ((mode == DImode
- ? gen_x86_shift_adj_2
- : gen_x86_64_shift_adj_2) (low[0], high[0], operands[2]));
+ ? gen_x86_shiftsi_adj_2
+ : gen_x86_shiftdi_adj_2) (low[0], high[0], operands[2]));
}
}
}
/* x86-64 ABI requires arrays greater than 16 bytes to be aligned
- to 16byte boundary. */
- if (TARGET_64BIT)
+ to 16byte boundary. Exact wording is:
+
+ An array uses the same alignment as its elements, except that a local or
+ global array variable of length at least 16 bytes or
+ a C99 variable-length array variable always has alignment of at least 16 bytes.
+
+ This was added to allow use of aligned SSE instructions at arrays. This
+ rule is meant for static storage (where compiler can not do the analysis
+ by itself). We follow it for automatic variables only when convenient.
+ We fully control everything in the function compiled and functions from
+ other unit can not rely on the alignment.
+
+ Exclude va_list type. It is the common case of local array where
+ we can not benefit from the alignment. */
+ if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
+ && TARGET_SSE)
{
if (AGGREGATE_TYPE_P (type)
+ && (TYPE_MAIN_VARIANT (type)
+ != TYPE_MAIN_VARIANT (va_list_type_node))
&& TYPE_SIZE (type)
&& TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
&& (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
us with EAX for the static chain. */
regno = AX_REG;
}
+ else if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)))
+ {
+ /* Thiscall functions use ecx for arguments, which leaves
+ us with EAX for the static chain. */
+ regno = AX_REG;
+ }
else if (ix86_function_regparm (fntype, fndecl) == 3)
{
/* For regparm 3, we have no free call-clobbered registers in
IX86_BUILTIN_VPERMILPS,
IX86_BUILTIN_VPERMILPD256,
IX86_BUILTIN_VPERMILPS256,
+ IX86_BUILTIN_VPERMIL2PD,
+ IX86_BUILTIN_VPERMIL2PS,
+ IX86_BUILTIN_VPERMIL2PD256,
+ IX86_BUILTIN_VPERMIL2PS256,
IX86_BUILTIN_VPERM2F128PD256,
IX86_BUILTIN_VPERM2F128PS256,
IX86_BUILTIN_VPERM2F128SI256,
IX86_BUILTIN_VPCOMTRUEQ,
/* LWP instructions. */
- IX86_BUILTIN_LLWPCB16,
- IX86_BUILTIN_LLWPCB32,
- IX86_BUILTIN_LLWPCB64,
- IX86_BUILTIN_SLWPCB16,
- IX86_BUILTIN_SLWPCB32,
- IX86_BUILTIN_SLWPCB64,
- IX86_BUILTIN_LWPVAL16,
+ IX86_BUILTIN_LLWPCB,
+ IX86_BUILTIN_SLWPCB,
IX86_BUILTIN_LWPVAL32,
IX86_BUILTIN_LWPVAL64,
- IX86_BUILTIN_LWPINS16,
IX86_BUILTIN_LWPINS32,
IX86_BUILTIN_LWPINS64,
+ IX86_BUILTIN_CLZS,
+
IX86_BUILTIN_MAX
};
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_V4DF },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_V8SF },
- { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcbhi1, "__builtin_ia32_llwpcb16", IX86_BUILTIN_LLWPCB16, UNKNOWN, (int) VOID_FTYPE_VOID },
- { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcbsi1, "__builtin_ia32_llwpcb32", IX86_BUILTIN_LLWPCB32, UNKNOWN, (int) VOID_FTYPE_VOID },
- { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcbdi1, "__builtin_ia32_llwpcb64", IX86_BUILTIN_LLWPCB64, UNKNOWN, (int) VOID_FTYPE_VOID },
-
- { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcbhi1, "__builtin_ia32_slwpcb16", IX86_BUILTIN_SLWPCB16, UNKNOWN, (int) VOID_FTYPE_VOID },
- { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcbsi1, "__builtin_ia32_slwpcb32", IX86_BUILTIN_SLWPCB32, UNKNOWN, (int) VOID_FTYPE_VOID },
- { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcbdi1, "__builtin_ia32_slwpcb64", IX86_BUILTIN_SLWPCB64, UNKNOWN, (int) VOID_FTYPE_VOID },
-
- { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalhi3, "__builtin_ia32_lwpval16", IX86_BUILTIN_LWPVAL16, UNKNOWN, (int) VOID_FTYPE_USHORT_UINT_USHORT },
- { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
- { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
- { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinshi3, "__builtin_ia32_lwpins16", IX86_BUILTIN_LWPINS16, UNKNOWN, (int) UCHAR_FTYPE_USHORT_UINT_USHORT },
- { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
- { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
+ { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
+ { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
+ { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
+ { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
+ { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
+ { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
};
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
- { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
- { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
+
+ { OPTION_MASK_ISA_ABM, CODE_FOR_clzhi2_abm, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
};
/* FMA4 and XOP. */
+#define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
+#define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
+#define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
+#define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
#define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
#define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
#define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
{ OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
{ OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
+
};
/* Set up all the MMX/SSE builtins, even builtins for instructions that are not
switch (m_type)
{
+ case MULTI_ARG_4_DF2_DI_I:
+ case MULTI_ARG_4_DF2_DI_I1:
+ case MULTI_ARG_4_SF2_SI_I:
+ case MULTI_ARG_4_SF2_SI_I1:
+ nargs = 4;
+ last_arg_constant = true;
+ break;
+
case MULTI_ARG_3_SF:
case MULTI_ARG_3_DF:
case MULTI_ARG_3_SF2:
pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
break;
+ case 4:
+ pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
+ break;
+
default:
gcc_unreachable ();
}
case FLOAT_FTYPE_FLOAT:
case INT_FTYPE_INT:
case UINT64_FTYPE_INT:
+ case UINT16_FTYPE_UINT16:
case INT64_FTYPE_INT64:
case INT64_FTYPE_V4SF:
case INT64_FTYPE_V2DF:
break;
case V2DI_FTYPE_V2DI_INT_CONVERT:
nargs = 2;
- rmode = V2DImode;
+ rmode = V1TImode;
nargs_constant = 1;
break;
case V8HI_FTYPE_V8HI_INT:
nargs = 3;
nargs_constant = 2;
break;
+ case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
+ case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
+ case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
+ case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
+ nargs = 4;
+ nargs_constant = 1;
+ break;
case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
nargs = 4;
nargs_constant = 2;
case CODE_FOR_sse4_1_blendpd:
case CODE_FOR_avx_vpermilv2df:
+ case CODE_FOR_xop_vpermil2v2df3:
+ case CODE_FOR_xop_vpermil2v4sf3:
+ case CODE_FOR_xop_vpermil2v4df3:
+ case CODE_FOR_xop_vpermil2v8sf3:
error ("the last argument must be a 2-bit immediate");
return const0_rtx;
{
rtx op;
enum machine_mode mode;
- } args[2];
+ } args[3];
enum insn_code icode = d->icode;
bool last_arg_constant = false;
const struct insn_data *insn_p = &insn_data[icode];
case V4DF_FTYPE_PCV2DF:
case V4DF_FTYPE_PCDOUBLE:
case V2DF_FTYPE_PCDOUBLE:
+ case VOID_FTYPE_PVOID:
nargs = 1;
klass = load;
memory = 0;
/* Reserve memory operand for target. */
memory = ARRAY_SIZE (args);
break;
- case VOID_FTYPE_USHORT_UINT_USHORT:
case VOID_FTYPE_UINT_UINT_UINT:
case VOID_FTYPE_UINT64_UINT_UINT:
- case UCHAR_FTYPE_USHORT_UINT_USHORT:
case UCHAR_FTYPE_UINT_UINT_UINT:
case UCHAR_FTYPE_UINT64_UINT_UINT:
nargs = 3;
- klass = store;
- memory = 0;
+ klass = load;
+ memory = ARRAY_SIZE (args);
+ last_arg_constant = true;
break;
default:
gcc_unreachable ();
if (last_arg_constant && (i + 1) == nargs)
{
if (!match)
- switch (icode)
- {
- default:
+ {
+ if (icode == CODE_FOR_lwp_lwpvalsi3
+ || icode == CODE_FOR_lwp_lwpinssi3
+ || icode == CODE_FOR_lwp_lwpvaldi3
+ || icode == CODE_FOR_lwp_lwpinsdi3)
+ error ("the last argument must be a 32-bit immediate");
+ else
error ("the last argument must be an 8-bit immediate");
- return const0_rtx;
- }
+ return const0_rtx;
+ }
}
else
{
case 2:
pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
break;
+ case 3:
+ pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
+ break;
default:
gcc_unreachable ();
}
return target;
}
+ case IX86_BUILTIN_LLWPCB:
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ op0 = expand_normal (arg0);
+ icode = CODE_FOR_lwp_llwpcb;
+ if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
+ op0 = copy_to_mode_reg (Pmode, op0);
+ emit_insn (gen_lwp_llwpcb (op0));
+ return 0;
+
+ case IX86_BUILTIN_SLWPCB:
+ icode = CODE_FOR_lwp_slwpcb;
+ if (!target
+ || ! (*insn_data[icode].operand[0].predicate) (target, Pmode))
+ target = gen_reg_rtx (Pmode);
+ emit_insn (gen_lwp_slwpcb (target));
+ return target;
+
default:
break;
}
if it is not available. */
static tree
-ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
+ix86_builtin_vectorized_function (tree fndecl, tree type_out,
tree type_in)
{
enum machine_mode in_mode, out_mode;
int in_n, out_n;
+ enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
if (TREE_CODE (type_out) != VECTOR_TYPE
- || TREE_CODE (type_in) != VECTOR_TYPE)
+ || TREE_CODE (type_in) != VECTOR_TYPE
+ || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
return NULL_TREE;
out_mode = TYPE_MODE (TREE_TYPE (type_out));
/* Returns a decl of a function that implements conversion of an integer vector
- into a floating-point vector, or vice-versa. TYPE is the type of the integer
- side of the conversion.
+ into a floating-point vector, or vice-versa. DEST_TYPE and SRC_TYPE
+ are the types involved when converting according to CODE.
Return NULL_TREE if it is not available. */
static tree
-ix86_vectorize_builtin_conversion (unsigned int code, tree type)
+ix86_vectorize_builtin_conversion (unsigned int code,
+ tree dest_type, tree src_type)
{
- if (! (TARGET_SSE2 && TREE_CODE (type) == VECTOR_TYPE))
+ if (! TARGET_SSE2)
return NULL_TREE;
switch (code)
{
case FLOAT_EXPR:
- switch (TYPE_MODE (type))
+ switch (TYPE_MODE (src_type))
{
case V4SImode:
- return TYPE_UNSIGNED (type)
- ? ix86_builtins[IX86_BUILTIN_CVTUDQ2PS]
- : ix86_builtins[IX86_BUILTIN_CVTDQ2PS];
+ switch (TYPE_MODE (dest_type))
+ {
+ case V4SFmode:
+ return (TYPE_UNSIGNED (src_type)
+ ? ix86_builtins[IX86_BUILTIN_CVTUDQ2PS]
+ : ix86_builtins[IX86_BUILTIN_CVTDQ2PS]);
+ case V4DFmode:
+ return (TYPE_UNSIGNED (src_type)
+ ? NULL_TREE
+ : ix86_builtins[IX86_BUILTIN_CVTDQ2PD256]);
+ default:
+ return NULL_TREE;
+ }
+ break;
+ case V8SImode:
+ switch (TYPE_MODE (dest_type))
+ {
+ case V8SFmode:
+ return (TYPE_UNSIGNED (src_type)
+ ? NULL_TREE
+ : ix86_builtins[IX86_BUILTIN_CVTDQ2PS]);
+ default:
+ return NULL_TREE;
+ }
+ break;
default:
return NULL_TREE;
}
case FIX_TRUNC_EXPR:
- switch (TYPE_MODE (type))
+ switch (TYPE_MODE (dest_type))
{
case V4SImode:
- return TYPE_UNSIGNED (type)
- ? NULL_TREE
- : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ];
+ switch (TYPE_MODE (src_type))
+ {
+ case V4SFmode:
+ return (TYPE_UNSIGNED (dest_type)
+ ? NULL_TREE
+ : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ]);
+ case V4DFmode:
+ return (TYPE_UNSIGNED (dest_type)
+ ? NULL_TREE
+ : ix86_builtins[IX86_BUILTIN_CVTTPD2DQ256]);
+ default:
+ return NULL_TREE;
+ }
+ break;
+
+ case V8SImode:
+ switch (TYPE_MODE (src_type))
+ {
+ case V8SFmode:
+ return (TYPE_UNSIGNED (dest_type)
+ ? NULL_TREE
+ : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ256]);
+ default:
+ return NULL_TREE;
+ }
+ break;
+
default:
return NULL_TREE;
}
+
default:
return NULL_TREE;
-
}
+
+ return NULL_TREE;
}
/* Returns a code for a target-specific builtin that implements
if (!CONST_INT_P (er))
return 0;
ei = INTVAL (er);
- if (ei >= 2 * nelt)
+ if (ei >= nelt)
return 0;
ipar[i] = ei;
}
fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
fprintf (file, ASM_LONG "%s\n", binder_name);
}
-
-void
-darwin_x86_file_end (void)
-{
- darwin_file_end ();
- ix86_file_end ();
-}
#endif /* TARGET_MACHO */
/* Order the registers for register allocator. */
if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
regno = aggr ? DX_REG : CX_REG;
+ else if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type)))
+ {
+ regno = CX_REG;
+ if (aggr)
+ return gen_rtx_MEM (SImode,
+ plus_constant (stack_pointer_rtx, 4));
+ }
else
{
regno = AX_REG;
*(*this + vcall_offset) should be added to THIS. */
static void
-x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
+x86_output_mi_thunk (FILE *file,
tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
HOST_WIDE_INT vcall_offset, tree function)
{
rtx this_param = x86_this_parameter (function);
rtx this_reg, tmp;
+ /* Make sure unwind info is emitted for the thunk if needed. */
+ final_start_function (emit_barrier (), file, 1);
+
/* If VCALL_OFFSET, we'll need THIS in a register. Might as well
pull it in now and let DELTA benefit. */
if (REG_P (this_param))
/* Adjust the this parameter by a fixed constant. */
if (delta)
{
- /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
- Exceptions: -128 encodes smaller than 128, so swap sign and op. */
- bool sub = delta < 0 || delta == 128;
- xops[0] = GEN_INT (sub ? -delta : delta);
+ xops[0] = GEN_INT (delta);
xops[1] = this_reg ? this_reg : this_param;
if (TARGET_64BIT)
{
xops[0] = tmp;
xops[1] = this_param;
}
- if (sub)
+ if (x86_maybe_negate_const_int (&xops[0], DImode))
output_asm_insn ("sub{q}\t{%0, %1|%1, %0}", xops);
else
output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
}
- else if (sub)
+ else if (x86_maybe_negate_const_int (&xops[0], SImode))
output_asm_insn ("sub{l}\t{%0, %1|%1, %0}", xops);
else
output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
{
int tmp_regno = CX_REG;
if (lookup_attribute ("fastcall",
- TYPE_ATTRIBUTES (TREE_TYPE (function))))
+ TYPE_ATTRIBUTES (TREE_TYPE (function)))
+ || lookup_attribute ("thiscall",
+ TYPE_ATTRIBUTES (TREE_TYPE (function))))
tmp_regno = AX_REG;
tmp = gen_rtx_REG (SImode, tmp_regno);
}
output_asm_insn ("jmp\t{*}%1", xops);
}
}
+ final_end_function ();
}
static void
if (TARGET_64BIT)
{
#ifndef NO_PROFILE_COUNTERS
- fprintf (file, "\tleaq\t" LPREFIX "P%d@(%%rip),%%r11\n", labelno);
+ fprintf (file, "\tleaq\t" LPREFIX "P%d(%%rip),%%r11\n", labelno);
#endif
if (DEFAULT_ABI == SYSV_ABI && flag_pic)
replace = true;
/* Empty functions get branch mispredict even when the jump destination
is not visible to us. */
- if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
+ if (!prev && !optimize_function_for_size_p (cfun))
replace = true;
}
if (replace)
extended_reg_mentioned_1, NULL);
}
+/* If profitable, negate (without causing overflow) integer constant
+ of mode MODE at location LOC. Return true in this case. */
+bool
+x86_maybe_negate_const_int (rtx *loc, enum machine_mode mode)
+{
+ HOST_WIDE_INT val;
+
+ if (!CONST_INT_P (*loc))
+ return false;
+
+ switch (mode)
+ {
+ case DImode:
+ /* DImode x86_64 constants must fit in 32 bits. */
+ gcc_assert (x86_64_immediate_operand (*loc, mode));
+
+ mode = SImode;
+ break;
+
+ case SImode:
+ case HImode:
+ case QImode:
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ /* Avoid overflows. */
+ if (mode_signbit_p (mode, *loc))
+ return false;
+
+ val = INTVAL (*loc);
+
+ /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
+ Exceptions: -128 encodes smaller than 128, so swap sign and op. */
+ if ((val < 0 && val != -128)
+ || val == 128)
+ {
+ *loc = GEN_INT (-val);
+ return true;
+ }
+
+ return false;
+}
+
/* Generate an unsigned DImode/SImode to FP conversion. This is the same code
optabs would emit if we didn't have TFmode patterns. */
insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
if (recog_memoized (insn) < 0)
{
+ rtx seq;
/* If that fails, force VAL into a register. */
+
+ start_sequence ();
XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
+ seq = get_insns ();
+ end_sequence ();
+ if (seq)
+ emit_insn_before (seq, insn);
+
ok = recog_memoized (insn) >= 0;
gcc_assert (ok);
}
emit_move_insn (operand0, res);
}
\f
-/* Validate whether a FMA4 instruction is valid or not.
- OPERANDS is the array of operands.
- NUM is the number of operands.
- USES_OC0 is true if the instruction uses OC0 and provides 4 variants.
- NUM_MEMORY is the maximum number of memory operands to accept.
- NUM_MEMORY less than zero is a special case to allow an operand
- of an instruction to be memory operation.
- when COMMUTATIVE is set, operand 1 and 2 can be swapped. */
-
-bool
-ix86_fma4_valid_op_p (rtx operands[], rtx insn ATTRIBUTE_UNUSED, int num,
- bool uses_oc0, int num_memory, bool commutative)
-{
- int mem_mask;
- int mem_count;
- int i;
-
- /* Count the number of memory arguments */
- mem_mask = 0;
- mem_count = 0;
- for (i = 0; i < num; i++)
- {
- enum machine_mode mode = GET_MODE (operands[i]);
- if (register_operand (operands[i], mode))
- ;
-
- else if (memory_operand (operands[i], mode))
- {
- mem_mask |= (1 << i);
- mem_count++;
- }
-
- else
- {
- rtx pattern = PATTERN (insn);
-
- /* allow 0 for pcmov */
- if (GET_CODE (pattern) != SET
- || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE
- || i < 2
- || operands[i] != CONST0_RTX (mode))
- return false;
- }
- }
-
- /* Special case pmacsdq{l,h} where we allow the 3rd argument to be
- a memory operation. */
- if (num_memory < 0)
- {
- num_memory = -num_memory;
- if ((mem_mask & (1 << (num-1))) != 0)
- {
- mem_mask &= ~(1 << (num-1));
- mem_count--;
- }
- }
-
- /* If there were no memory operations, allow the insn */
- if (mem_mask == 0)
- return true;
-
- /* Do not allow the destination register to be a memory operand. */
- else if (mem_mask & (1 << 0))
- return false;
-
- /* If there are too many memory operations, disallow the instruction. While
- the hardware only allows 1 memory reference, before register allocation
- for some insns, we allow two memory operations sometimes in order to allow
- code like the following to be optimized:
-
- float fmadd (float *a, float *b, float *c) { return (*a * *b) + *c; }
-
- or similar cases that are vectorized into using the vfmaddss
- instruction. */
- else if (mem_count > num_memory)
- return false;
-
- /* Don't allow more than one memory operation if not optimizing. */
- else if (mem_count > 1 && !optimize)
- return false;
-
- else if (num == 4 && mem_count == 1)
- {
- /* formats (destination is the first argument), example vfmaddss:
- xmm1, xmm1, xmm2, xmm3/mem
- xmm1, xmm1, xmm2/mem, xmm3
- xmm1, xmm2, xmm3/mem, xmm1
- xmm1, xmm2/mem, xmm3, xmm1 */
- if (uses_oc0)
- return ((mem_mask == (1 << 1))
- || (mem_mask == (1 << 2))
- || (mem_mask == (1 << 3)));
-
- /* format, example vpmacsdd:
- xmm1, xmm2, xmm3/mem, xmm1 */
- if (commutative)
- return (mem_mask == (1 << 2) || mem_mask == (1 << 1));
- else
- return (mem_mask == (1 << 2));
- }
-
- else if (num == 4 && num_memory == 2)
- {
- /* If there are two memory operations, we can load one of the memory ops
- into the destination register. This is for optimizing the
- multiply/add ops, which the combiner has optimized both the multiply
- and the add insns to have a memory operation. We have to be careful
- that the destination doesn't overlap with the inputs. */
- rtx op0 = operands[0];
-
- if (reg_mentioned_p (op0, operands[1])
- || reg_mentioned_p (op0, operands[2])
- || reg_mentioned_p (op0, operands[3]))
- return false;
-
- /* formats (destination is the first argument), example vfmaddss:
- xmm1, xmm1, xmm2, xmm3/mem
- xmm1, xmm1, xmm2/mem, xmm3
- xmm1, xmm2, xmm3/mem, xmm1
- xmm1, xmm2/mem, xmm3, xmm1
-
- For the oc0 case, we will load either operands[1] or operands[3] into
- operands[0], so any combination of 2 memory operands is ok. */
- if (uses_oc0)
- return true;
-
- /* format, example vpmacsdd:
- xmm1, xmm2, xmm3/mem, xmm1
-
- For the integer multiply/add instructions be more restrictive and
- require operands[2] and operands[3] to be the memory operands. */
- if (commutative)
- return (mem_mask == ((1 << 1) | (1 << 3)) || ((1 << 2) | (1 << 3)));
- else
- return (mem_mask == ((1 << 2) | (1 << 3)));
- }
-
- else if (num == 3 && num_memory == 1)
- {
- /* formats, example vprotb:
- xmm1, xmm2, xmm3/mem
- xmm1, xmm2/mem, xmm3 */
- if (uses_oc0)
- return ((mem_mask == (1 << 1)) || (mem_mask == (1 << 2)));
-
- /* format, example vpcomeq:
- xmm1, xmm2, xmm3/mem */
- else
- return (mem_mask == (1 << 2));
- }
-
- else
- gcc_unreachable ();
-
- return false;
-}
-
-
-/* Fixup an FMA4 instruction that has 2 memory input references into a form the
- hardware will allow by using the destination register to load one of the
- memory operations. Presently this is used by the multiply/add routines to
- allow 2 memory references. */
-
-void
-ix86_expand_fma4_multiple_memory (rtx operands[],
- int num,
- enum machine_mode mode)
-{
- rtx op0 = operands[0];
- if (num != 4
- || memory_operand (op0, mode)
- || reg_mentioned_p (op0, operands[1])
- || reg_mentioned_p (op0, operands[2])
- || reg_mentioned_p (op0, operands[3]))
- gcc_unreachable ();
-
- /* For 2 memory operands, pick either operands[1] or operands[3] to move into
- the destination register. */
- if (memory_operand (operands[1], mode))
- {
- emit_move_insn (op0, operands[1]);
- operands[1] = op0;
- }
- else if (memory_operand (operands[3], mode))
- {
- emit_move_insn (op0, operands[3]);
- operands[3] = op0;
- }
- else
- gcc_unreachable ();
-
- return;
-}
/* Table of valid machine attributes. */
static const struct attribute_spec ix86_attribute_table[] =
/* Fastcall attribute says callee is responsible for popping arguments
if they are not variable. */
{ "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
+ /* Thiscall attribute says callee is responsible for popping arguments
+ if they are not variable. */
+ { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
/* Cdecl attribute says the callee is a normal C declaration */
{ "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
/* Regparm attribute specifies how many integer arguments are to be
tree itype = TREE_TYPE (vec_type);
bool u = TYPE_UNSIGNED (itype);
enum machine_mode vmode = TYPE_MODE (vec_type);
- enum ix86_builtins fcode;
+ enum ix86_builtins fcode = fcode; /* Silence bogus warning. */
bool ok = TARGET_SSE2;
switch (vmode)
do_subreg:
vmode = V8HImode;
target = gen_lowpart (vmode, target);
- op0 = gen_lowpart (vmode, target);
- op1 = gen_lowpart (vmode, target);
+ op0 = gen_lowpart (vmode, op0);
+ op1 = gen_lowpart (vmode, op1);
break;
default:
}
/* This matches five different patterns with the different modes. */
- x = gen_rtx_VEC_MERGE (vmode, op0, op1, GEN_INT (mask));
+ x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
x = gen_rtx_SET (VOIDmode, target, x);
emit_insn (x);
input where SEL+CONCAT may not. */
if (d->op0 == d->op1)
{
- if (expand_vselect (d->target, d->op0, d->perm, nelt))
+ int mask = nelt - 1;
+
+ for (i = 0; i < nelt; i++)
+ perm2[i] = d->perm[i] & mask;
+
+ if (expand_vselect (d->target, d->op0, perm2, nelt))
return true;
/* There are plenty of patterns in sse.md that are written for
every other permutation operand. */
for (i = 0; i < nelt; i += 2)
{
- perm2[i] = d->perm[i];
- perm2[i+1] = d->perm[i+1] + nelt;
+ perm2[i] = d->perm[i] & mask;
+ perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
}
if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
return true;
/* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
if (nelt >= 4)
{
- memcpy (perm2, d->perm, nelt);
- for (i = 2; i < nelt; i += 4)
+ for (i = 0; i < nelt; i += 4)
{
- perm2[i+0] += nelt;
- perm2[i+1] += nelt;
+ perm2[i + 0] = d->perm[i + 0] & mask;
+ perm2[i + 1] = d->perm[i + 1] & mask;
+ perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
+ perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
}
if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
#define TARGET_DEFAULT_TARGET_FLAGS \
(TARGET_DEFAULT \
| TARGET_SUBTARGET_DEFAULT \
- | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
+ | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT \
+ | MASK_FUSED_MADD)
#undef TARGET_HANDLE_OPTION
#define TARGET_HANDLE_OPTION ix86_handle_option
#undef TARGET_FUNCTION_VALUE
#define TARGET_FUNCTION_VALUE ix86_function_value
+#undef TARGET_FUNCTION_VALUE_REGNO_P
+#define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
+
#undef TARGET_SECONDARY_RELOAD
#define TARGET_SECONDARY_RELOAD ix86_secondary_reload
#undef TARGET_CAN_ELIMINATE
#define TARGET_CAN_ELIMINATE ix86_can_eliminate
+#undef TARGET_ASM_CODE_END
+#define TARGET_ASM_CODE_END ix86_code_end
+
struct gcc_target targetm = TARGET_INITIALIZER;
\f
#include "gt-i386.h"