X-Git-Url: http://git.sourceforge.jp/view?a=blobdiff_plain;f=gcc%2Fconfig%2Fi386%2Fi386.c;h=5afa63f0fecd1423e81084bab8ccd13985603a8c;hb=75ac84a79eeeac52f7fd36a8f39ed97f9f535db9;hp=5a1af7ac88425d809446d6917f731ba09f6c3141;hpb=dbddc6c40d60108eff1004ab455c8c371d7d5deb;p=pf3gnuchains%2Fgcc-fork.git diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 5a1af7ac884..5afa63f0fec 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -48,6 +48,7 @@ Boston, MA 02110-1301, USA. */ #include "langhooks.h" #include "cgraph.h" #include "tree-gimple.h" +#include "dwarf2.h" #ifndef CHECK_STACK_LIMIT #define CHECK_STACK_LIMIT (-1) @@ -812,6 +813,11 @@ unsigned int ix86_preferred_stack_boundary; /* Values 1-5: see jump.c */ int ix86_branch_cost; +/* Variables which are this size or smaller are put in the data/bss + or ldata/lbss sections. */ + +int ix86_section_threshold = 65536; + /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */ char internal_label_prefix[16]; int internal_label_prefix_len; @@ -891,7 +897,7 @@ static int ix86_function_regparm (tree, tree); const struct attribute_spec ix86_attribute_table[]; static bool ix86_function_ok_for_sibcall (tree, tree); static tree ix86_handle_cconv_attribute (tree *, tree, tree, int, bool *); -static int ix86_value_regno (enum machine_mode, tree); +static int ix86_value_regno (enum machine_mode, tree, tree); static bool contains_128bit_aligned_vector_p (tree); static rtx ix86_struct_value_rtx (tree, int); static bool ix86_ms_bitfield_layout_p (tree); @@ -906,6 +912,7 @@ static bool ix86_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode, static void ix86_init_builtins (void); static rtx ix86_expand_builtin (tree, rtx, rtx, enum machine_mode, int); static const char *ix86_mangle_fundamental_type (tree); +static tree ix86_stack_protect_fail (void); /* This function is only used on Solaris. */ static void i386_solaris_elf_named_section (const char *, unsigned int, tree) @@ -944,6 +951,12 @@ static const char * const x86_64_reg_class_name[] = { static REAL_VALUE_TYPE ext_80387_constants_table [5]; static bool ext_80387_constants_init = 0; static void init_ext_80387_constants (void); +static bool ix86_in_large_data_p (tree) ATTRIBUTE_UNUSED; +static void ix86_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED; +static void x86_64_elf_unique_section (tree decl, int reloc) ATTRIBUTE_UNUSED; +static void x86_64_elf_select_section (tree decl, int reloc, + unsigned HOST_WIDE_INT align) + ATTRIBUTE_UNUSED; /* Initialize the GCC target structure. */ #undef TARGET_ATTRIBUTE_TABLE @@ -964,6 +977,13 @@ static void init_ext_80387_constants (void); #undef TARGET_ASM_FUNCTION_EPILOGUE #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue +#undef TARGET_ENCODE_SECTION_INFO +#ifndef SUBTARGET_ENCODE_SECTION_INFO +#define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info +#else +#define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO +#endif + #undef TARGET_ASM_OPEN_PAREN #define TARGET_ASM_OPEN_PAREN "" #undef TARGET_ASM_CLOSE_PAREN @@ -1081,6 +1101,12 @@ static void init_ext_80387_constants (void); #undef TARGET_MANGLE_FUNDAMENTAL_TYPE #define TARGET_MANGLE_FUNDAMENTAL_TYPE ix86_mangle_fundamental_type +#undef TARGET_STACK_PROTECT_FAIL +#define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail + +#undef TARGET_FUNCTION_VALUE +#define TARGET_FUNCTION_VALUE ix86_function_value + struct gcc_target targetm = TARGET_INITIALIZER; @@ -1284,14 +1310,14 @@ override_options (void) { if (!strcmp (ix86_cmodel_string, "small")) ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL; + else if (!strcmp (ix86_cmodel_string, "medium")) + ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM; else if (flag_pic) sorry ("code model %s not supported in PIC mode", ix86_cmodel_string); else if (!strcmp (ix86_cmodel_string, "32")) ix86_cmodel = CM_32; else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic) ix86_cmodel = CM_KERNEL; - else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic) - ix86_cmodel = CM_MEDIUM; else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic) ix86_cmodel = CM_LARGE; else @@ -1495,6 +1521,14 @@ override_options (void) else ix86_branch_cost = i; } + if (ix86_section_threshold_string) + { + i = atoi (ix86_section_threshold_string); + if (i < 0) + error ("-mlarge-data-threshold=%d is negative", i); + else + ix86_section_threshold = i; + } if (ix86_tls_dialect_string) { @@ -1569,6 +1603,11 @@ override_options (void) target_flags |= MASK_NO_RED_ZONE; } + /* Accept -msseregparm only if at least SSE support is enabled. */ + if (TARGET_SSEREGPARM + && ! TARGET_SSE) + error ("-msseregparm used without SSE enabled"); + ix86_fpmath = TARGET_FPMATH_DEFAULT; if (ix86_fpmath_string != 0) @@ -1629,6 +1668,177 @@ override_options (void) flag_schedule_insns_after_reload = flag_schedule_insns = 0; } +/* switch to the appropriate section for output of DECL. + DECL is either a `VAR_DECL' node or a constant of some sort. + RELOC indicates whether forming the initial value of DECL requires + link-time relocations. */ + +static void +x86_64_elf_select_section (tree decl, int reloc, + unsigned HOST_WIDE_INT align) +{ + if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC) + && ix86_in_large_data_p (decl)) + { + const char *sname = NULL; + switch (categorize_decl_for_section (decl, reloc, flag_pic)) + { + case SECCAT_DATA: + sname = ".ldata"; + break; + case SECCAT_DATA_REL: + sname = ".ldata.rel"; + break; + case SECCAT_DATA_REL_LOCAL: + sname = ".ldata.rel.local"; + break; + case SECCAT_DATA_REL_RO: + sname = ".ldata.rel.ro"; + break; + case SECCAT_DATA_REL_RO_LOCAL: + sname = ".ldata.rel.ro.local"; + break; + case SECCAT_BSS: + sname = ".lbss"; + break; + case SECCAT_RODATA: + case SECCAT_RODATA_MERGE_STR: + case SECCAT_RODATA_MERGE_STR_INIT: + case SECCAT_RODATA_MERGE_CONST: + sname = ".lrodata"; + break; + case SECCAT_SRODATA: + case SECCAT_SDATA: + case SECCAT_SBSS: + gcc_unreachable (); + case SECCAT_TEXT: + case SECCAT_TDATA: + case SECCAT_TBSS: + /* We don't split these for medium model. Place them into + default sections and hope for best. */ + break; + } + if (sname) + { + named_section (decl, sname, reloc); + return; + } + } + default_elf_select_section (decl, reloc, align); +} + +/* Build up a unique section name, expressed as a + STRING_CST node, and assign it to DECL_SECTION_NAME (decl). + RELOC indicates whether the initial value of EXP requires + link-time relocations. */ + +static void +x86_64_elf_unique_section (tree decl, int reloc) +{ + if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC) + && ix86_in_large_data_p (decl)) + { + const char *prefix = NULL; + /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */ + bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP; + + switch (categorize_decl_for_section (decl, reloc, flag_pic)) + { + case SECCAT_DATA: + case SECCAT_DATA_REL: + case SECCAT_DATA_REL_LOCAL: + case SECCAT_DATA_REL_RO: + case SECCAT_DATA_REL_RO_LOCAL: + prefix = one_only ? ".gnu.linkonce.ld." : ".ldata."; + break; + case SECCAT_BSS: + prefix = one_only ? ".gnu.linkonce.lb." : ".lbss."; + break; + case SECCAT_RODATA: + case SECCAT_RODATA_MERGE_STR: + case SECCAT_RODATA_MERGE_STR_INIT: + case SECCAT_RODATA_MERGE_CONST: + prefix = one_only ? ".gnu.linkonce.lr." : ".lrodata."; + break; + case SECCAT_SRODATA: + case SECCAT_SDATA: + case SECCAT_SBSS: + gcc_unreachable (); + case SECCAT_TEXT: + case SECCAT_TDATA: + case SECCAT_TBSS: + /* We don't split these for medium model. Place them into + default sections and hope for best. */ + break; + } + if (prefix) + { + const char *name; + size_t nlen, plen; + char *string; + plen = strlen (prefix); + + name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)); + name = targetm.strip_name_encoding (name); + nlen = strlen (name); + + string = alloca (nlen + plen + 1); + memcpy (string, prefix, plen); + memcpy (string + plen, name, nlen + 1); + + DECL_SECTION_NAME (decl) = build_string (nlen + plen, string); + return; + } + } + default_unique_section (decl, reloc); +} + +#ifdef COMMON_ASM_OP +/* This says how to output assembler code to declare an + uninitialized external linkage data object. + + For medium model x86-64 we need to use .largecomm opcode for + large objects. */ +void +x86_elf_aligned_common (FILE *file, + const char *name, unsigned HOST_WIDE_INT size, + int align) +{ + if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC) + && size > (unsigned int)ix86_section_threshold) + fprintf (file, ".largecomm\t"); + else + fprintf (file, "%s", COMMON_ASM_OP); + assemble_name (file, name); + fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n", + size, align / BITS_PER_UNIT); +} + +/* Utility function for targets to use in implementing + ASM_OUTPUT_ALIGNED_BSS. */ + +void +x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED, + const char *name, unsigned HOST_WIDE_INT size, + int align) +{ + if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC) + && size > (unsigned int)ix86_section_threshold) + named_section (decl, ".lbss", 0); + else + bss_section (); + ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT)); +#ifdef ASM_DECLARE_OBJECT_NAME + last_assemble_variable_decl = decl; + ASM_DECLARE_OBJECT_NAME (file, name, decl); +#else + /* Standard thing is just output label for the object. */ + ASM_OUTPUT_LABEL (file, name); +#endif /* ASM_DECLARE_OBJECT_NAME */ + ASM_OUTPUT_SKIP (file, size ? size : 1); +} +#endif + void optimization_options (int level, int size ATTRIBUTE_UNUSED) { @@ -1696,6 +1906,7 @@ static bool ix86_function_ok_for_sibcall (tree decl, tree exp) { tree func; + rtx a, b; /* If we are generating position-independent code, we cannot sibcall optimize any indirect call, or a direct call to a global function, @@ -1706,16 +1917,23 @@ ix86_function_ok_for_sibcall (tree decl, tree exp) if (decl) func = decl; else - func = NULL; + { + func = TREE_TYPE (TREE_OPERAND (exp, 0)); + if (POINTER_TYPE_P (func)) + func = TREE_TYPE (func); + } - /* If we are returning floats on the 80387 register stack, we cannot + /* Check that the return value locations are the same. Like + if we are returning floats on the 80387 register stack, we cannot make a sibcall from a function that doesn't return a float to a function that does or, conversely, from a function that does return a float to a function that doesn't; the necessary stack adjustment - would not be executed. */ - if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp), func)) - != STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)), - cfun->decl))) + would not be executed. This is also the place we notice + differences in the return value ABI. */ + a = ix86_function_value (TREE_TYPE (exp), func, false); + b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)), + cfun->decl, false); + if (! rtx_equal_p (a, b)) return false; /* If this call is indirect, we'll need to be able to use a call-clobbered @@ -1943,8 +2161,9 @@ ix86_function_sseregparm (tree type, tree decl) { /* Use SSE registers to pass SFmode and DFmode arguments if requested by the sseregparm attribute. */ - if (type - && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))) + if (TARGET_SSEREGPARM + || (type + && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type)))) { if (!TARGET_SSE) { @@ -3179,7 +3398,8 @@ ix86_function_value_regno_p (int regno) If the precise function being called is known, FUNC is its FUNCTION_DECL; otherwise, FUNC is 0. */ rtx -ix86_function_value (tree valtype, tree func) +ix86_function_value (tree valtype, tree fntype_or_decl, + bool outgoing ATTRIBUTE_UNUSED) { enum machine_mode natmode = type_natural_mode (valtype); @@ -3195,7 +3415,15 @@ ix86_function_value (tree valtype, tree func) return ret; } else - return gen_rtx_REG (TYPE_MODE (valtype), ix86_value_regno (natmode, func)); + { + tree fn = NULL_TREE, fntype; + if (fntype_or_decl + && DECL_P (fntype_or_decl)) + fn = fntype_or_decl; + fntype = fn ? TREE_TYPE (fn) : fntype_or_decl; + return gen_rtx_REG (TYPE_MODE (valtype), + ix86_value_regno (natmode, fn, fntype)); + } } /* Return false iff type is returned in memory. */ @@ -3311,13 +3539,13 @@ ix86_libcall_value (enum machine_mode mode) } } else - return gen_rtx_REG (mode, ix86_value_regno (mode, NULL)); + return gen_rtx_REG (mode, ix86_value_regno (mode, NULL, NULL)); } /* Given a mode, return the register to use for a return value. */ static int -ix86_value_regno (enum machine_mode mode, tree func) +ix86_value_regno (enum machine_mode mode, tree func, tree fntype) { gcc_assert (!TARGET_64BIT); @@ -3337,9 +3565,10 @@ ix86_value_regno (enum machine_mode mode, tree func) /* Floating point return values in %st(0), except for local functions when SSE math is enabled or for functions with sseregparm attribute. */ - if (func && (mode == SFmode || mode == DFmode)) + if ((func || fntype) + && (mode == SFmode || mode == DFmode)) { - int sse_level = ix86_function_sseregparm (TREE_TYPE (func), func); + int sse_level = ix86_function_sseregparm (fntype, func); if ((sse_level >= 1 && mode == SFmode) || (sse_level == 2 && mode == DFmode)) return FIRST_SSE_REG; @@ -4171,7 +4400,7 @@ output_set_got (rtx dest) if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION) output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops); else if (!TARGET_MACHO) - output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops); + output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops); return ""; } @@ -4613,7 +4842,10 @@ ix86_expand_prologue (void) if (pic_reg_used) { - insn = emit_insn (gen_set_got (pic_offset_table_rtx)); + if (TARGET_64BIT) + insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx)); + else + insn = emit_insn (gen_set_got (pic_offset_table_rtx)); /* Even with accurate pre-reload life analysis, we can wind up deleting all references to the pic register after reload. @@ -5142,6 +5374,8 @@ legitimate_constant_p (rtx x) if (GET_CODE (x) == UNSPEC) switch (XINT (x, 1)) { + case UNSPEC_GOTOFF: + return TARGET_64BIT; case UNSPEC_TPOFF: case UNSPEC_NTPOFF: return local_exec_symbolic_operand (XVECEXP (x, 0, 0), Pmode); @@ -5201,11 +5435,16 @@ legitimate_pic_operand_p (rtx x) { case CONST: inner = XEXP (x, 0); + if (GET_CODE (inner) == PLUS + && GET_CODE (XEXP (inner, 1)) == CONST_INT) + inner = XEXP (inner, 0); /* Only some unspecs are valid as "constants". */ if (GET_CODE (inner) == UNSPEC) switch (XINT (inner, 1)) { + case UNSPEC_GOTOFF: + return TARGET_64BIT; case UNSPEC_TPOFF: return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode); default: @@ -5238,7 +5477,7 @@ legitimate_pic_address_disp_p (rtx disp) if (tls_symbolic_operand (disp, GET_MODE (disp))) return 0; if (GET_CODE (disp) == SYMBOL_REF - && ix86_cmodel == CM_SMALL_PIC + && !SYMBOL_REF_FAR_ADDR_P (disp) && SYMBOL_REF_LOCAL_P (disp)) return 1; if (GET_CODE (disp) == LABEL_REF) @@ -5253,7 +5492,7 @@ legitimate_pic_address_disp_p (rtx disp) if (tls_symbolic_operand (op0, GET_MODE (op0))) return 0; if (((GET_CODE (op0) == SYMBOL_REF - && ix86_cmodel == CM_SMALL_PIC + && !SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)) || GET_CODE (op0) == LABEL_REF) && GET_CODE (op1) == CONST_INT @@ -5271,7 +5510,8 @@ legitimate_pic_address_disp_p (rtx disp) /* We are unsafe to allow PLUS expressions. This limit allowed distance of GOT tables. We should not need these anyway. */ if (GET_CODE (disp) != UNSPEC - || XINT (disp, 1) != UNSPEC_GOTPCREL) + || (XINT (disp, 1) != UNSPEC_GOTPCREL + && XINT (disp, 1) != UNSPEC_GOTOFF)) return 0; if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF @@ -5556,7 +5796,7 @@ legitimate_address_p (enum machine_mode mode, rtx addr, int strict) return FALSE; } -/* Return an unique alias set for the GOT. */ +/* Return a unique alias set for the GOT. */ static HOST_WIDE_INT ix86_GOT_alias_set (void) @@ -5601,6 +5841,40 @@ legitimize_pic_address (rtx orig, rtx reg) if (TARGET_64BIT && legitimate_pic_address_disp_p (addr)) new = addr; + else if (TARGET_64BIT + && ix86_cmodel != CM_SMALL_PIC + && local_symbolic_operand (addr, Pmode)) + { + rtx tmpreg; + /* This symbol may be referenced via a displacement from the PIC + base address (@GOTOFF). */ + + if (reload_in_progress) + regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1; + if (GET_CODE (addr) == CONST) + addr = XEXP (addr, 0); + if (GET_CODE (addr) == PLUS) + { + new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF); + new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1)); + } + else + new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF); + new = gen_rtx_CONST (Pmode, new); + if (!reg) + tmpreg = gen_reg_rtx (Pmode); + else + tmpreg = reg; + emit_move_insn (tmpreg, new); + + if (reg != 0) + { + new = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx, + tmpreg, 1, OPTAB_DIRECT); + new = reg; + } + else new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg); + } else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode)) { /* This symbol may be referenced via a displacement from the PIC @@ -7169,7 +7443,7 @@ split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[]) } } } -/* Split one or more TImode RTL references into pairs of SImode +/* Split one or more TImode RTL references into pairs of DImode references. The RTL can be REG, offsettable MEM, integer constant, or CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to split and "num" is its length. lo_half and hi_half are output arrays @@ -7853,7 +8127,7 @@ ix86_expand_move (enum machine_mode mode, rtx operands[]) #else if (GET_CODE (op0) == MEM) op1 = force_reg (Pmode, op1); - else + else op1 = legitimize_address (op1, op1, Pmode); #endif /* TARGET_MACHO */ } @@ -8123,17 +8397,6 @@ ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode, && GET_RTX_CLASS (code) != RTX_COMM_ARITH) src1 = force_reg (mode, src1); - /* If optimizing, copy to regs to improve CSE */ - if (optimize && ! no_new_pseudos) - { - if (GET_CODE (dst) == MEM) - dst = gen_reg_rtx (mode); - if (GET_CODE (src1) == MEM) - src1 = force_reg (mode, src1); - if (GET_CODE (src2) == MEM) - src2 = force_reg (mode, src2); - } - src1 = operands[1] = src1; src2 = operands[2] = src2; return dst; @@ -8243,15 +8506,6 @@ ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode, if (MEM_P (src) && !matching_memory) src = force_reg (mode, src); - /* If optimizing, copy to regs to improve CSE. */ - if (optimize && ! no_new_pseudos) - { - if (GET_CODE (dst) == MEM) - dst = gen_reg_rtx (mode); - if (GET_CODE (src) == MEM) - src = force_reg (mode, src); - } - /* Emit the instruction. */ op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src)); @@ -8379,7 +8633,7 @@ ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode, matching_memory = false; if (MEM_P (dst)) { - if (rtx_equal_p (dst, src) && (!optimize || no_new_pseudos)) + if (rtx_equal_p (dst, src)) matching_memory = true; else dst = gen_reg_rtx (mode); @@ -9313,10 +9567,12 @@ ix86_expand_branch (enum rtx_code code, rtx label) case DImode: if (TARGET_64BIT) goto simple; + case TImode: /* Expand DImode branch into multiple compare+branch. */ { rtx lo[2], hi[2], label2; enum rtx_code code1, code2, code3; + enum machine_mode submode; if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1)) { @@ -9325,8 +9581,18 @@ ix86_expand_branch (enum rtx_code code, rtx label) ix86_compare_op1 = tmp; code = swap_condition (code); } - split_di (&ix86_compare_op0, 1, lo+0, hi+0); - split_di (&ix86_compare_op1, 1, lo+1, hi+1); + if (GET_MODE (ix86_compare_op0) == DImode) + { + split_di (&ix86_compare_op0, 1, lo+0, hi+0); + split_di (&ix86_compare_op1, 1, lo+1, hi+1); + submode = SImode; + } + else + { + split_ti (&ix86_compare_op0, 1, lo+0, hi+0); + split_ti (&ix86_compare_op1, 1, lo+1, hi+1); + submode = DImode; + } /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to avoid two branches. This costs one extra insn, so disable when @@ -9340,15 +9606,15 @@ ix86_expand_branch (enum rtx_code code, rtx label) xor1 = hi[0]; if (hi[1] != const0_rtx) - xor1 = expand_binop (SImode, xor_optab, xor1, hi[1], + xor1 = expand_binop (submode, xor_optab, xor1, hi[1], NULL_RTX, 0, OPTAB_WIDEN); xor0 = lo[0]; if (lo[1] != const0_rtx) - xor0 = expand_binop (SImode, xor_optab, xor0, lo[1], + xor0 = expand_binop (submode, xor_optab, xor0, lo[1], NULL_RTX, 0, OPTAB_WIDEN); - tmp = expand_binop (SImode, ior_optab, xor1, xor0, + tmp = expand_binop (submode, ior_optab, xor1, xor0, NULL_RTX, 0, OPTAB_WIDEN); ix86_compare_op0 = tmp; @@ -9516,8 +9782,7 @@ ix86_expand_setcc (enum rtx_code code, rtx dest) rtx ret, tmp, tmpreg, equiv; rtx second_test, bypass_test; - if (GET_MODE (ix86_compare_op0) == DImode - && !TARGET_64BIT) + if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode)) return 0; /* FAIL */ gcc_assert (GET_MODE (dest) == QImode); @@ -9572,7 +9837,7 @@ ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop) /* Do not handle DImode compares that go trought special path. Also we can't deal with FP compares yet. This is possible to add. */ - if ((mode == DImode && !TARGET_64BIT)) + if (mode == (TARGET_64BIT ? TImode : DImode)) return false; if (FLOAT_MODE_P (mode)) { @@ -9712,7 +9977,7 @@ ix86_expand_int_movcc (rtx operands[]) HImode insns, we'd be swallowed in word prefix ops. */ if ((mode != HImode || TARGET_FAST_PREFIX) - && (mode != DImode || TARGET_64BIT) + && (mode != (TARGET_64BIT ? TImode : DImode)) && GET_CODE (operands[2]) == CONST_INT && GET_CODE (operands[3]) == CONST_INT) { @@ -10497,94 +10762,102 @@ ix86_expand_fp_vcond (rtx operands[]) /* Expand a signed integral vector conditional move. */ bool -ix86_expand_int_vcond (rtx operands[], bool unsignedp) +ix86_expand_int_vcond (rtx operands[]) { enum machine_mode mode = GET_MODE (operands[0]); enum rtx_code code = GET_CODE (operands[3]); - rtx cmp, x; + bool negate = false; + rtx x, cop0, cop1; + + cop0 = operands[4]; + cop1 = operands[5]; - if (unsignedp) - code = signed_condition (code); - if (code == NE || code == LE || code == GE) + /* Canonicalize the comparison to EQ, GT, GTU. */ + switch (code) { - /* Inverse of a supported code. */ - x = operands[1]; - operands[1] = operands[2]; - operands[2] = x; + case EQ: + case GT: + case GTU: + break; + + case NE: + case LE: + case LEU: code = reverse_condition (code); - } - if (code == LT) - { - /* Swap of a supported code. */ - x = operands[4]; - operands[4] = operands[5]; - operands[5] = x; + negate = true; + break; + + case GE: + case GEU: + code = reverse_condition (code); + negate = true; + /* FALLTHRU */ + + case LT: + case LTU: code = swap_condition (code); - } - gcc_assert (code == EQ || code == GT); + x = cop0, cop0 = cop1, cop1 = x; + break; - /* Unlike floating-point, we can rely on the optimizers to have already - converted to MIN/MAX expressions, so we don't have to handle that. */ + default: + gcc_unreachable (); + } - /* Unsigned GT is not directly supported. We can zero-extend QI and - HImode elements to the next wider element size, use a signed compare, - then repack. For three extra instructions, this is definitely a win. */ - if (code == GT && unsignedp) + /* Unsigned parallel compare is not supported by the hardware. Play some + tricks to turn this into a signed comparison against 0. */ + if (code == GTU) { - rtx o0l, o0h, o1l, o1h, cl, ch, zero; - enum machine_mode wider; - rtx (*unpackl) (rtx, rtx, rtx); - rtx (*unpackh) (rtx, rtx, rtx); - rtx (*pack) (rtx, rtx, rtx); - switch (mode) { - case V16QImode: - wider = V8HImode; - unpackl = gen_sse2_punpcklbw; - unpackh = gen_sse2_punpckhbw; - pack = gen_sse2_packsswb; + case V4SImode: + { + rtx t1, t2, mask; + + /* Perform a parallel modulo subtraction. */ + t1 = gen_reg_rtx (mode); + emit_insn (gen_subv4si3 (t1, cop0, cop1)); + + /* Extract the original sign bit of op0. */ + mask = GEN_INT (-0x80000000); + mask = gen_rtx_CONST_VECTOR (mode, + gen_rtvec (4, mask, mask, mask, mask)); + mask = force_reg (mode, mask); + t2 = gen_reg_rtx (mode); + emit_insn (gen_andv4si3 (t2, cop0, mask)); + + /* XOR it back into the result of the subtraction. This results + in the sign bit set iff we saw unsigned underflow. */ + x = gen_reg_rtx (mode); + emit_insn (gen_xorv4si3 (x, t1, t2)); + + code = GT; + } break; + + case V16QImode: case V8HImode: - wider = V4SImode; - unpackl = gen_sse2_punpcklwd; - unpackh = gen_sse2_punpckhwd; - pack = gen_sse2_packssdw; + /* Perform a parallel unsigned saturating subtraction. */ + x = gen_reg_rtx (mode); + emit_insn (gen_rtx_SET (VOIDmode, x, + gen_rtx_US_MINUS (mode, cop0, cop1))); + + code = EQ; + negate = !negate; break; + default: gcc_unreachable (); } - operands[4] = force_reg (mode, operands[4]); - operands[5] = force_reg (mode, operands[5]); - - o0l = gen_reg_rtx (wider); - o0h = gen_reg_rtx (wider); - o1l = gen_reg_rtx (wider); - o1h = gen_reg_rtx (wider); - cl = gen_reg_rtx (wider); - ch = gen_reg_rtx (wider); - cmp = gen_reg_rtx (mode); - zero = force_reg (mode, CONST0_RTX (mode)); - - emit_insn (unpackl (gen_lowpart (mode, o0l), operands[4], zero)); - emit_insn (unpackh (gen_lowpart (mode, o0h), operands[4], zero)); - emit_insn (unpackl (gen_lowpart (mode, o1l), operands[5], zero)); - emit_insn (unpackh (gen_lowpart (mode, o1h), operands[5], zero)); - - x = gen_rtx_GT (wider, o0l, o1l); - emit_insn (gen_rtx_SET (VOIDmode, cl, x)); - - x = gen_rtx_GT (wider, o0h, o1h); - emit_insn (gen_rtx_SET (VOIDmode, ch, x)); - - emit_insn (pack (cmp, cl, ch)); + cop0 = x; + cop1 = CONST0_RTX (mode); } - else - cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5], - operands[1], operands[2]); - ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]); + x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1, + operands[1+negate], operands[2-negate]); + + ix86_expand_sse_movcc (operands[0], x, operands[1+negate], + operands[2-negate]); return true; } @@ -11053,68 +11326,81 @@ ix86_split_long_move (rtx operands[]) return; } -/* Helper function of ix86_split_ashldi used to generate an SImode +/* Helper function of ix86_split_ashl used to generate an SImode/DImode left shift by a constant, either using a single shift or a sequence of add instructions. */ static void -ix86_expand_ashlsi3_const (rtx operand, int count) +ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode) { if (count == 1) - emit_insn (gen_addsi3 (operand, operand, operand)); + { + emit_insn ((mode == DImode + ? gen_addsi3 + : gen_adddi3) (operand, operand, operand)); + } else if (!optimize_size && count * ix86_cost->add <= ix86_cost->shift_const) { int i; for (i=0; i= 32) + if (count >= single_width) { emit_move_insn (high[0], low[1]); emit_move_insn (low[0], const0_rtx); - if (count > 32) - ix86_expand_ashlsi3_const (high[0], count - 32); + if (count > single_width) + ix86_expand_ashl_const (high[0], count - single_width, mode); } else { if (!rtx_equal_p (operands[0], operands[1])) emit_move_insn (operands[0], operands[1]); - emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count))); - ix86_expand_ashlsi3_const (low[0], count); + emit_insn ((mode == DImode + ? gen_x86_shld_1 + : gen_x86_64_shld) (high[0], low[0], GEN_INT (count))); + ix86_expand_ashl_const (low[0], count, mode); } return; } - split_di (operands, 1, low, high); + (mode == DImode ? split_di : split_ti) (operands, 1, low, high); if (operands[1] == const1_rtx) { - /* Assuming we've chosen a QImode capable registers, then 1LL << N - can be done with two 32-bit shifts, no branches, no cmoves. */ + /* Assuming we've chosen a QImode capable registers, then 1 << N + can be done with two 32/64-bit shifts, no branches, no cmoves. */ if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0])) { rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG); ix86_expand_clear (low[0]); ix86_expand_clear (high[0]); - emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (32))); + emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width))); d = gen_lowpart (QImode, low[0]); d = gen_rtx_STRICT_LOW_PART (VOIDmode, d); @@ -11128,7 +11414,7 @@ ix86_split_ashldi (rtx *operands, rtx scratch) } /* Otherwise, we can get the same results by manually performing - a bit extract operation on bit 5, and then performing the two + a bit extract operation on bit 5/6, and then performing the two shifts. The two methods of getting 0/1 into low/high are exactly the same size. Avoiding the shift in the bit extract case helps pentium4 a bit; no one else seems to care much either way. */ @@ -11137,29 +11423,39 @@ ix86_split_ashldi (rtx *operands, rtx scratch) rtx x; if (TARGET_PARTIAL_REG_STALL && !optimize_size) - x = gen_rtx_ZERO_EXTEND (SImode, operands[2]); + x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]); else - x = gen_lowpart (SImode, operands[2]); + x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]); emit_insn (gen_rtx_SET (VOIDmode, high[0], x)); - emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (5))); - emit_insn (gen_andsi3 (high[0], high[0], GEN_INT (1))); + emit_insn ((mode == DImode + ? gen_lshrsi3 + : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6))); + emit_insn ((mode == DImode + ? gen_andsi3 + : gen_anddi3) (high[0], high[0], GEN_INT (1))); emit_move_insn (low[0], high[0]); - emit_insn (gen_xorsi3 (low[0], low[0], GEN_INT (1))); + emit_insn ((mode == DImode + ? gen_xorsi3 + : gen_xordi3) (low[0], low[0], GEN_INT (1))); } - emit_insn (gen_ashlsi3 (low[0], low[0], operands[2])); - emit_insn (gen_ashlsi3 (high[0], high[0], operands[2])); + emit_insn ((mode == DImode + ? gen_ashlsi3 + : gen_ashldi3) (low[0], low[0], operands[2])); + emit_insn ((mode == DImode + ? gen_ashlsi3 + : gen_ashldi3) (high[0], high[0], operands[2])); return; } if (operands[1] == constm1_rtx) { - /* For -1LL << N, we can avoid the shld instruction, because we - know that we're shifting 0...31 ones into a -1. */ + /* For -1 << N, we can avoid the shld instruction, because we + know that we're shifting 0...31/63 ones into a -1. */ emit_move_insn (low[0], constm1_rtx); if (optimize_size) - emit_move_insn (high[0], low[0]); + emit_move_insn (high[0], low[0]); else emit_move_insn (high[0], constm1_rtx); } @@ -11168,53 +11464,71 @@ ix86_split_ashldi (rtx *operands, rtx scratch) if (!rtx_equal_p (operands[0], operands[1])) emit_move_insn (operands[0], operands[1]); - split_di (operands, 1, low, high); - emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2])); + (mode == DImode ? split_di : split_ti) (operands, 1, low, high); + emit_insn ((mode == DImode + ? gen_x86_shld_1 + : gen_x86_64_shld) (high[0], low[0], operands[2])); } - emit_insn (gen_ashlsi3 (low[0], low[0], operands[2])); + emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2])); if (TARGET_CMOVE && scratch) { ix86_expand_clear (scratch); - emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch)); + emit_insn ((mode == DImode + ? gen_x86_shift_adj_1 + : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch)); } else emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2])); } void -ix86_split_ashrdi (rtx *operands, rtx scratch) +ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode) { rtx low[2], high[2]; int count; + const int single_width = mode == DImode ? 32 : 64; if (GET_CODE (operands[2]) == CONST_INT) { - split_di (operands, 2, low, high); - count = INTVAL (operands[2]) & 63; + (mode == DImode ? split_di : split_ti) (operands, 2, low, high); + count = INTVAL (operands[2]) & (single_width * 2 - 1); - if (count == 63) + if (count == single_width * 2 - 1) { emit_move_insn (high[0], high[1]); - emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31))); + emit_insn ((mode == DImode + ? gen_ashrsi3 + : gen_ashrdi3) (high[0], high[0], + GEN_INT (single_width - 1))); emit_move_insn (low[0], high[0]); } - else if (count >= 32) + else if (count >= single_width) { emit_move_insn (low[0], high[1]); emit_move_insn (high[0], low[0]); - emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31))); - if (count > 32) - emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32))); + emit_insn ((mode == DImode + ? gen_ashrsi3 + : gen_ashrdi3) (high[0], high[0], + GEN_INT (single_width - 1))); + if (count > single_width) + emit_insn ((mode == DImode + ? gen_ashrsi3 + : gen_ashrdi3) (low[0], low[0], + GEN_INT (count - single_width))); } else { if (!rtx_equal_p (operands[0], operands[1])) emit_move_insn (operands[0], operands[1]); - emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count))); - emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count))); + emit_insn ((mode == DImode + ? gen_x86_shrd_1 + : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count))); + emit_insn ((mode == DImode + ? gen_ashrsi3 + : gen_ashrdi3) (high[0], high[0], GEN_INT (count))); } } else @@ -11222,17 +11536,26 @@ ix86_split_ashrdi (rtx *operands, rtx scratch) if (!rtx_equal_p (operands[0], operands[1])) emit_move_insn (operands[0], operands[1]); - split_di (operands, 1, low, high); + (mode == DImode ? split_di : split_ti) (operands, 1, low, high); - emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2])); - emit_insn (gen_ashrsi3 (high[0], high[0], operands[2])); + emit_insn ((mode == DImode + ? gen_x86_shrd_1 + : gen_x86_64_shrd) (low[0], high[0], operands[2])); + emit_insn ((mode == DImode + ? gen_ashrsi3 + : gen_ashrdi3) (high[0], high[0], operands[2])); if (TARGET_CMOVE && scratch) { emit_move_insn (scratch, high[0]); - emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31))); - emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2], - scratch)); + emit_insn ((mode == DImode + ? gen_ashrsi3 + : gen_ashrdi3) (scratch, scratch, + GEN_INT (single_width - 1))); + emit_insn ((mode == DImode + ? gen_x86_shift_adj_1 + : gen_x86_64_shift_adj) (low[0], high[0], operands[2], + scratch)); } else emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2])); @@ -11240,30 +11563,38 @@ ix86_split_ashrdi (rtx *operands, rtx scratch) } void -ix86_split_lshrdi (rtx *operands, rtx scratch) +ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode) { rtx low[2], high[2]; int count; + const int single_width = mode == DImode ? 32 : 64; if (GET_CODE (operands[2]) == CONST_INT) { - split_di (operands, 2, low, high); - count = INTVAL (operands[2]) & 63; + (mode == DImode ? split_di : split_ti) (operands, 2, low, high); + count = INTVAL (operands[2]) & (single_width * 2 - 1); - if (count >= 32) + if (count >= single_width) { emit_move_insn (low[0], high[1]); ix86_expand_clear (high[0]); - if (count > 32) - emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32))); + if (count > single_width) + emit_insn ((mode == DImode + ? gen_lshrsi3 + : gen_lshrdi3) (low[0], low[0], + GEN_INT (count - single_width))); } else { if (!rtx_equal_p (operands[0], operands[1])) emit_move_insn (operands[0], operands[1]); - emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count))); - emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count))); + emit_insn ((mode == DImode + ? gen_x86_shrd_1 + : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count))); + emit_insn ((mode == DImode + ? gen_lshrsi3 + : gen_lshrdi3) (high[0], high[0], GEN_INT (count))); } } else @@ -11271,17 +11602,23 @@ ix86_split_lshrdi (rtx *operands, rtx scratch) if (!rtx_equal_p (operands[0], operands[1])) emit_move_insn (operands[0], operands[1]); - split_di (operands, 1, low, high); + (mode == DImode ? split_di : split_ti) (operands, 1, low, high); - emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2])); - emit_insn (gen_lshrsi3 (high[0], high[0], operands[2])); + emit_insn ((mode == DImode + ? gen_x86_shrd_1 + : gen_x86_64_shrd) (low[0], high[0], operands[2])); + emit_insn ((mode == DImode + ? gen_lshrsi3 + : gen_lshrdi3) (high[0], high[0], operands[2])); /* Heh. By reversing the arguments, we can reuse this pattern. */ if (TARGET_CMOVE && scratch) { ix86_expand_clear (scratch); - emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2], - scratch)); + emit_insn ((mode == DImode + ? gen_x86_shift_adj_1 + : gen_x86_64_shift_adj) (low[0], high[0], operands[2], + scratch)); } else emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2])); @@ -14647,7 +14984,7 @@ get_element_number (tree vec_type, tree arg) if (!host_integerp (arg, 1) || (elt = tree_low_cst (arg, 1), elt > max)) { - error ("selector must be an integer constant in the range 0..%i", max); + error ("selector must be an integer constant in the range 0..%wi", max); return 0; } @@ -17376,7 +17713,7 @@ ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt) } } -/* Expand a vector reduction on V4SFmode for SSE1. FN is the binar +/* Expand a vector reduction on V4SFmode for SSE1. FN is the binary pattern to reduce; DEST is the destination; IN is the input vector. */ void @@ -17431,6 +17768,49 @@ ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED, return clobbers; } +/* Return true if this goes in small data/bss. */ + +static bool +ix86_in_large_data_p (tree exp) +{ + if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC) + return false; + + /* Functions are never large data. */ + if (TREE_CODE (exp) == FUNCTION_DECL) + return false; + + if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp)) + { + const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp)); + if (strcmp (section, ".ldata") == 0 + || strcmp (section, ".lbss") == 0) + return true; + return false; + } + else + { + HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp)); + + /* If this is an incomplete type with size 0, then we can't put it + in data because it might be too big when completed. */ + if (!size || size > ix86_section_threshold) + return true; + } + + return false; +} +static void +ix86_encode_section_info (tree decl, rtx rtl, int first) +{ + default_encode_section_info (decl, rtl, first); + + if (TREE_CODE (decl) == VAR_DECL + && (TREE_STATIC (decl) || DECL_EXTERNAL (decl)) + && ix86_in_large_data_p (decl)) + SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR; +} + /* Worker function for REVERSE_CONDITION. */ enum rtx_code @@ -17561,4 +17941,43 @@ ix86_mangle_fundamental_type (tree type) } } +/* For 32-bit code we can save PIC register setup by using + __stack_chk_fail_local hidden function instead of calling + __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC + register, so it is better to call __stack_chk_fail directly. */ + +static tree +ix86_stack_protect_fail (void) +{ + return TARGET_64BIT + ? default_external_stack_protect_fail () + : default_hidden_stack_protect_fail (); +} + +/* Select a format to encode pointers in exception handling data. CODE + is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is + true if the symbol may be affected by dynamic relocations. + + ??? All x86 object file formats are capable of representing this. + After all, the relocation needed is the same as for the call insn. + Whether or not a particular assembler allows us to enter such, I + guess we'll have to see. */ +int +asm_preferred_eh_data_format (int code, int global) +{ + if (flag_pic) + { +int type = DW_EH_PE_sdata8; + if (!TARGET_64BIT + || ix86_cmodel == CM_SMALL_PIC + || (ix86_cmodel == CM_MEDIUM_PIC && (global || code))) + type = DW_EH_PE_sdata4; + return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type; + } + if (ix86_cmodel == CM_SMALL + || (ix86_cmodel == CM_MEDIUM && code)) + return DW_EH_PE_udata4; + return DW_EH_PE_absptr; +} + #include "gt-i386.h"