const int x86_use_ffreep = m_ATHLON_K8;
const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
const int x86_inter_unit_moves = ~(m_ATHLON_K8);
+const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_PPRO;
/* In case the average insn count for single function invocation is
lower than this constant, emit fast (but longer) prologue and
#define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
/* Define the structure for the machine field in struct function. */
-struct machine_function GTY(())
+
+struct stack_local_entry GTY(())
{
- rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS];
- const char *some_ld_name;
- int save_varrargs_registers;
- int accesses_prev_frame;
+ unsigned short mode;
+ unsigned short n;
+ rtx rtl;
+ struct stack_local_entry *next;
};
-#define ix86_stack_locals (cfun->machine->stack_locals)
-#define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers)
-
/* Structure describing stack frame layout.
Stack grows downward:
enum fpmath_unit ix86_fpmath;
/* Which cpu are we scheduling for. */
-enum processor_type ix86_cpu;
+enum processor_type ix86_tune;
/* Which instruction set architecture to use. */
enum processor_type ix86_arch;
/* Strings to hold which cpu and instruction set architecture to use. */
-const char *ix86_cpu_string; /* for -mcpu=<xxx> */
+const char *ix86_tune_string; /* for -mtune=<xxx> */
const char *ix86_arch_string; /* for -march=<xxx> */
const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
const int *, int));
static enum x86_64_reg_class merge_classes PARAMS ((enum x86_64_reg_class,
enum x86_64_reg_class));
+
+/* Table of constants used by fldpi, fldln2, etc... */
+static REAL_VALUE_TYPE ext_80387_constants_table [5];
+static bool ext_80387_constants_init = 0;
+static void init_ext_80387_constants PARAMS ((void));
\f
/* Initialize the GCC target structure. */
#undef TARGET_ATTRIBUTE_TABLE
SUBTARGET_OVERRIDE_OPTIONS;
#endif
- if (!ix86_cpu_string && ix86_arch_string)
- ix86_cpu_string = ix86_arch_string;
- if (!ix86_cpu_string)
- ix86_cpu_string = cpu_names [TARGET_CPU_DEFAULT];
+ if (!ix86_tune_string && ix86_arch_string)
+ ix86_tune_string = ix86_arch_string;
+ if (!ix86_tune_string)
+ ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
if (!ix86_arch_string)
ix86_arch_string = TARGET_64BIT ? "k8" : "i386";
{
ix86_arch = processor_alias_table[i].processor;
/* Default cpu tuning to the architecture. */
- ix86_cpu = ix86_arch;
+ ix86_tune = ix86_arch;
if (processor_alias_table[i].flags & PTA_MMX
&& !(target_flags_explicit & MASK_MMX))
target_flags |= MASK_MMX;
error ("bad value (%s) for -march= switch", ix86_arch_string);
for (i = 0; i < pta_size; i++)
- if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
+ if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
{
- ix86_cpu = processor_alias_table[i].processor;
+ ix86_tune = processor_alias_table[i].processor;
if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
error ("CPU you selected does not support x86-64 instruction set");
break;
if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
x86_prefetch_sse = true;
if (i == pta_size)
- error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
+ error ("bad value (%s) for -mtune= switch", ix86_tune_string);
if (optimize_size)
ix86_cost = &size_cost;
else
- ix86_cost = processor_target_table[ix86_cpu].cost;
- target_flags |= processor_target_table[ix86_cpu].target_enable;
- target_flags &= ~processor_target_table[ix86_cpu].target_disable;
+ ix86_cost = processor_target_table[ix86_tune].cost;
+ target_flags |= processor_target_table[ix86_tune].target_enable;
+ target_flags &= ~processor_target_table[ix86_tune].target_disable;
/* Arrange to set up i386_stack_locals for all functions. */
init_machine_status = ix86_init_machine_status;
/* Default align_* from the processor table. */
if (align_loops == 0)
{
- align_loops = processor_target_table[ix86_cpu].align_loop;
- align_loops_max_skip = processor_target_table[ix86_cpu].align_loop_max_skip;
+ align_loops = processor_target_table[ix86_tune].align_loop;
+ align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
}
if (align_jumps == 0)
{
- align_jumps = processor_target_table[ix86_cpu].align_jump;
- align_jumps_max_skip = processor_target_table[ix86_cpu].align_jump_max_skip;
+ align_jumps = processor_target_table[ix86_tune].align_jump;
+ align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
}
if (align_functions == 0)
{
- align_functions = processor_target_table[ix86_cpu].align_func;
+ align_functions = processor_target_table[ix86_tune].align_func;
}
/* Validate -mpreferred-stack-boundary= value, or provide default.
}
/* Validate -mbranch-cost= value, or provide default. */
- ix86_branch_cost = processor_target_table[ix86_cpu].cost->branch_cost;
+ ix86_branch_cost = processor_target_table[ix86_tune].cost->branch_cost;
if (ix86_branch_cost_string)
{
i = atoi (ix86_branch_cost_string);
if (x86_3dnow_a & (1 << ix86_arch))
target_flags |= MASK_3DNOW_A;
}
- if ((x86_accumulate_outgoing_args & CPUMASK)
+ if ((x86_accumulate_outgoing_args & TUNEMASK)
&& !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
&& !optimize_size)
target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
break;
case BLKmode:
+ if (bytes < 0)
+ break;
+ /* FALLTHRU */
case DImode:
case SImode:
case HImode:
case MULT:
/* Modern CPUs have same latency for HImode and SImode multiply,
but 386 and 486 do HImode multiply faster. */
- return ix86_cpu > PROCESSOR_I486;
+ return ix86_tune > PROCESSOR_I486;
case PLUS:
case AND:
case IOR:
return 1;
}
\f
+/* Initialize the table of extra 80387 mathematical constants. */
+
+static void
+init_ext_80387_constants ()
+{
+ static const char * cst[5] =
+ {
+ "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
+ "0.6931471805599453094286904741849753009", /* 1: fldln2 */
+ "1.4426950408889634073876517827983434472", /* 2: fldl2e */
+ "3.3219280948873623478083405569094566090", /* 3: fldl2t */
+ "3.1415926535897932385128089594061862044", /* 4: fldpi */
+ };
+ int i;
+
+ for (i = 0; i < 5; i++)
+ {
+ real_from_string (&ext_80387_constants_table[i], cst[i]);
+ /* Ensure each constant is rounded to XFmode precision. */
+ real_convert (&ext_80387_constants_table[i], XFmode,
+ &ext_80387_constants_table[i]);
+ }
+
+ ext_80387_constants_init = 1;
+}
+
/* Return true if the constant is something that can be loaded with
- a special instruction. Only handle 0.0 and 1.0; others are less
- worthwhile. */
+ a special instruction. */
int
standard_80387_constant_p (x)
{
if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
return -1;
- /* Note that on the 80387, other constants, such as pi, that we should support
- too. On some machines, these are much slower to load as standard constant,
- than to load from doubles in memory. */
+
if (x == CONST0_RTX (GET_MODE (x)))
return 1;
if (x == CONST1_RTX (GET_MODE (x)))
return 2;
+
+ /* For XFmode constants, try to find a special 80387 instruction on
+ those CPUs that benefit from them. */
+ if (GET_MODE (x) == XFmode
+ && x86_ext_80387_constants & TUNEMASK)
+ {
+ REAL_VALUE_TYPE r;
+ int i;
+
+ if (! ext_80387_constants_init)
+ init_ext_80387_constants ();
+
+ REAL_VALUE_FROM_CONST_DOUBLE (r, x);
+ for (i = 0; i < 5; i++)
+ if (real_identical (&r, &ext_80387_constants_table[i]))
+ return i + 3;
+ }
+
return 0;
}
+/* Return the opcode of the special instruction to be used to load
+ the constant X. */
+
+const char *
+standard_80387_constant_opcode (x)
+ rtx x;
+{
+ switch (standard_80387_constant_p (x))
+ {
+ case 1:
+ return "fldz";
+ case 2:
+ return "fld1";
+ case 3:
+ return "fldlg2";
+ case 4:
+ return "fldln2";
+ case 5:
+ return "fldl2e";
+ case 6:
+ return "fldl2t";
+ case 7:
+ return "fldpi";
+ }
+ abort ();
+}
+
+/* Return the CONST_DOUBLE representing the 80387 constant that is
+ loaded by the specified special instruction. The argument IDX
+ matches the return value from standard_80387_constant_p. */
+
+rtx
+standard_80387_constant_rtx (idx)
+ int idx;
+{
+ int i;
+
+ if (! ext_80387_constants_init)
+ init_ext_80387_constants ();
+
+ switch (idx)
+ {
+ case 3:
+ case 4:
+ case 5:
+ case 6:
+ case 7:
+ i = idx - 3;
+ break;
+
+ default:
+ abort ();
+ }
+
+ return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i], XFmode);
+}
+
/* Return 1 if X is FP constant we can load to SSE register w/o using memory.
*/
int
/* Special case: on K6, [%esi] makes the instruction vector decoded.
Avoid this by transforming to [%esi+0]. */
- if (ix86_cpu == PROCESSOR_K6 && !optimize_size
+ if (ix86_tune == PROCESSOR_K6 && !optimize_size
&& base && !index && !disp
&& REG_P (base)
&& REGNO_REG_CLASS (REGNO (base)) == SIREG)
enum machine_mode mode;
int n;
{
+ struct stack_local_entry *s;
+
if (n < 0 || n >= MAX_386_STACK_LOCALS)
abort ();
- if (ix86_stack_locals[(int) mode][n] == NULL_RTX)
- ix86_stack_locals[(int) mode][n]
- = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
+ for (s = ix86_stack_locals; s; s = s->next)
+ if (s->mode == mode && s->n == n)
+ return s->rtl;
+
+ s = (struct stack_local_entry *)
+ ggc_alloc (sizeof (struct stack_local_entry));
+ s->n = n;
+ s->mode = mode;
+ s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
- return ix86_stack_locals[(int) mode][n];
+ s->next = ix86_stack_locals;
+ ix86_stack_locals = s;
+ return s->rtl;
}
/* Construct the SYMBOL_REF for the tls_get_addr function. */
static int
ix86_issue_rate ()
{
- switch (ix86_cpu)
+ switch (ix86_tune)
{
case PROCESSOR_PENTIUM:
case PROCESSOR_K6:
insn_type = get_attr_type (insn);
dep_insn_type = get_attr_type (dep_insn);
- switch (ix86_cpu)
+ switch (ix86_tune)
{
case PROCESSOR_PENTIUM:
/* Address Generation Interlock adds a cycle of latency. */
goto out;
}
- switch (ix86_cpu)
+ switch (ix86_tune)
{
default:
break;
int can_issue_more;
{
int i;
- switch (ix86_cpu)
+ switch (ix86_tune)
{
default:
return can_issue_more - 1;
static int
ia32_multipass_dfa_lookahead ()
{
- if (ix86_cpu == PROCESSOR_PENTIUM)
+ if (ix86_tune == PROCESSOR_PENTIUM)
return 2;
else
return 0;
\f
#define def_builtin(MASK, NAME, TYPE, CODE) \
do { \
- if ((MASK) & target_flags) \
+ if ((MASK) & target_flags \
+ && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
NULL, NULL_TREE); \
} while (0)
/* Used for builtins that are enabled both by -msse and -msse2. */
#define MASK_SSE1 (MASK_SSE | MASK_SSE2)
+#define MASK_SSE164 (MASK_SSE | MASK_SSE2 | MASK_64BIT)
+#define MASK_SSE264 (MASK_SSE2 | MASK_64BIT)
static const struct builtin_description bdesc_comi[] =
{
{ MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
{ MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
{ MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
+ { MASK_MMX, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
{ MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
{ MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
{ MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
+ { MASK_MMX, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
{ MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
{ MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
{ MASK_SSE1, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
{ MASK_SSE1, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
+ { MASK_SSE164, CODE_FOR_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
{ MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
{ MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
{ MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
{ MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
{ MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
- { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
+ { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
{ MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
{ MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
{ MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
- { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
+ { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
{ MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
{ MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
{ MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
{ MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
+ { MASK_SSE264, CODE_FOR_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
{ MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
{ MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 }
};
{ MASK_SSE1, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
{ MASK_SSE1, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
+ { MASK_SSE164, CODE_FOR_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
{ MASK_SSE1, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
{ MASK_SSE1, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
+ { MASK_SSE164, CODE_FOR_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
{ MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
{ MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
{ MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
{ MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
+ { MASK_SSE264, CODE_FOR_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
+ { MASK_SSE264, CODE_FOR_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
{ MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
{ MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
tree int_ftype_v4sf
= build_function_type_list (integer_type_node,
V4SF_type_node, NULL_TREE);
+ tree int64_ftype_v4sf
+ = build_function_type_list (long_long_integer_type_node,
+ V4SF_type_node, NULL_TREE);
tree int_ftype_v8qi
= build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
tree v4sf_ftype_v4sf_int
= build_function_type_list (V4SF_type_node,
V4SF_type_node, integer_type_node, NULL_TREE);
+ tree v4sf_ftype_v4sf_int64
+ = build_function_type_list (V4SF_type_node,
+ V4SF_type_node, long_long_integer_type_node,
+ NULL_TREE);
tree v4sf_ftype_v4sf_v2si
= build_function_type_list (V4SF_type_node,
V4SF_type_node, V2SI_type_node, NULL_TREE);
= build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
tree int_ftype_v2df
= build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
+ tree int64_ftype_v2df
+ = build_function_type_list (long_long_integer_type_node,
+ V2DF_type_node, NULL_TREE);
tree v2df_ftype_v2df_int
= build_function_type_list (V2DF_type_node,
V2DF_type_node, integer_type_node, NULL_TREE);
+ tree v2df_ftype_v2df_int64
+ = build_function_type_list (V2DF_type_node,
+ V2DF_type_node, long_long_integer_type_node,
+ NULL_TREE);
tree v4sf_ftype_v4sf_v2df
= build_function_type_list (V4SF_type_node,
V4SF_type_node, V2DF_type_node, NULL_TREE);
def_builtin (MASK_SSE1, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
def_builtin (MASK_SSE1, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
def_builtin (MASK_SSE1, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
+ def_builtin (MASK_SSE164, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
def_builtin (MASK_SSE1, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
+ def_builtin (MASK_SSE164, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
def_builtin (MASK_SSE1, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
def_builtin (MASK_SSE1, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
+ def_builtin (MASK_SSE164, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
+ def_builtin (MASK_SSE264, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
+ def_builtin (MASK_SSE264, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
+ def_builtin (MASK_SSE264, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
case 1: /* 0.0 */
*total = 1;
break;
- case 2: /* 1.0 */
+ default: /* Other constants */
*total = 2;
break;
- default:
+ case 0:
+ case -1:
/* Start with (MEM (SYMBOL_REF)), since that's where
it'll probably end up. Add a penalty for size. */
*total = (COSTS_N_INSNS (1)