X-Git-Url: http://git.sourceforge.jp/view?a=blobdiff_plain;ds=sidebyside;f=gcc%2Fconfig%2Fi386%2Fi386.c;h=10aac6e4e7be706119f58762883ac31471c93d54;hb=88cbb88160f4da2536638767e87ddebf157e45dc;hp=6431d9bfa3225c70ff80b893ce7580647b94aa33;hpb=f94eb6dbd245e4f7413928e21ff3694a87cbb948;p=pf3gnuchains%2Fgcc-fork.git diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 6431d9bfa32..10aac6e4e7b 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -1880,6 +1880,7 @@ static bool ext_80387_constants_init = 0; static struct machine_function * ix86_init_machine_status (void); static rtx ix86_function_value (const_tree, const_tree, bool); +static bool ix86_function_value_regno_p (const unsigned int); static rtx ix86_static_chain (const_tree, bool); static int ix86_function_regparm (const_tree, const_tree); static void ix86_compute_frame_layout (struct ix86_frame *); @@ -2512,7 +2513,7 @@ ix86_target_string (int isa, int flags, const char *arch, const char *tune, if (isa && add_nl_p) { opts[num++][0] = isa_other; - sprintf (isa_other, "(other isa: 0x%x)", isa); + sprintf (isa_other, "(other isa: %#x)", isa); } /* Add flag options. */ @@ -2528,7 +2529,7 @@ ix86_target_string (int isa, int flags, const char *arch, const char *tune, if (flags && add_nl_p) { opts[num++][0] = target_other; - sprintf (target_other, "(other flags: 0x%x)", isa); + sprintf (target_other, "(other flags: %#x)", flags); } /* Add -fpmath= option. */ @@ -6331,8 +6332,8 @@ ix86_function_arg_boundary (enum machine_mode mode, tree type) /* Return true if N is a possible register number of function value. */ -bool -ix86_function_value_regno_p (int regno) +static bool +ix86_function_value_regno_p (const unsigned int regno) { switch (regno) { @@ -6790,7 +6791,6 @@ setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum) { rtx save_area, mem; rtx label; - rtx label_ref; rtx tmp_reg; rtx nsse_reg; alias_set_type set; @@ -6841,35 +6841,9 @@ setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum) SSE saves. We need some preparation work to get this working. */ label = gen_label_rtx (); - label_ref = gen_rtx_LABEL_REF (Pmode, label); - /* Compute address to jump to : - label - eax*4 + nnamed_sse_arguments*4 Or - label - eax*5 + nnamed_sse_arguments*5 for AVX. */ - tmp_reg = gen_reg_rtx (Pmode); nsse_reg = gen_reg_rtx (Pmode); emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, AX_REG))); - emit_insn (gen_rtx_SET (VOIDmode, tmp_reg, - gen_rtx_MULT (Pmode, nsse_reg, - GEN_INT (4)))); - - /* vmovaps is one byte longer than movaps. */ - if (TARGET_AVX) - emit_insn (gen_rtx_SET (VOIDmode, tmp_reg, - gen_rtx_PLUS (Pmode, tmp_reg, - nsse_reg))); - - if (cum->sse_regno) - emit_move_insn - (nsse_reg, - gen_rtx_CONST (DImode, - gen_rtx_PLUS (DImode, - label_ref, - GEN_INT (cum->sse_regno - * (TARGET_AVX ? 5 : 4))))); - else - emit_move_insn (nsse_reg, label_ref); - emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg)); /* Compute address of memory block we save into. We always use pointer pointing 127 bytes after first byte to store - this is needed to keep @@ -6882,11 +6856,12 @@ setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum) mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127)); MEM_NOTRAP_P (mem) = 1; set_mem_alias_set (mem, set); - set_mem_align (mem, BITS_PER_WORD); + set_mem_align (mem, 64); /* And finally do the dirty job! */ emit_insn (gen_sse_prologue_save (mem, nsse_reg, - GEN_INT (cum->sse_regno), label)); + GEN_INT (cum->sse_regno), label, + gen_reg_rtx (Pmode))); } } @@ -7047,7 +7022,7 @@ ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p, int indirect_p = 0; tree ptrtype; enum machine_mode nat_mode; - int arg_boundary; + unsigned int arg_boundary; /* Only 64bit target needs something special. */ if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist))) @@ -7279,6 +7254,8 @@ ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p, t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t, size_int (-align)); t = fold_convert (TREE_TYPE (ovf), t); + if (crtl->stack_alignment_needed < arg_boundary) + crtl->stack_alignment_needed = arg_boundary; } gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue); gimplify_assign (addr, t, pre_p); @@ -8034,6 +8011,7 @@ ix86_compute_frame_layout (struct ix86_frame *frame) && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs) { int count = frame->nregs; + struct cgraph_node *node = cgraph_node (current_function_decl); cfun->machine->use_fast_prologue_epilogue_nregs = count; /* The fast prologue uses move instead of push to save registers. This @@ -8048,9 +8026,9 @@ ix86_compute_frame_layout (struct ix86_frame *frame) slow to use many of them. */ if (count) count = (count - 1) * FAST_PROLOGUE_INSN_COUNT; - if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL + if (node->frequency < NODE_FREQUENCY_NORMAL || (flag_branch_probabilities - && cfun->function_frequency < FUNCTION_FREQUENCY_HOT)) + && node->frequency < NODE_FREQUENCY_HOT)) cfun->machine->use_fast_prologue_epilogue = false; else cfun->machine->use_fast_prologue_epilogue @@ -9381,6 +9359,7 @@ ix86_decompose_address (rtx addr, struct ix86_address *out) rtx base_reg, index_reg; HOST_WIDE_INT scale = 1; rtx scale_rtx = NULL_RTX; + rtx tmp; int retval = 1; enum ix86_address_seg seg = SEG_DEFAULT; @@ -9416,6 +9395,19 @@ ix86_decompose_address (rtx addr, struct ix86_address *out) scale_rtx = XEXP (op, 1); break; + case ASHIFT: + if (index) + return 0; + index = XEXP (op, 0); + tmp = XEXP (op, 1); + if (!CONST_INT_P (tmp)) + return 0; + scale = INTVAL (tmp); + if ((unsigned HOST_WIDE_INT) scale > 3) + return 0; + scale = 1 << scale; + break; + case UNSPEC: if (XINT (op, 1) == UNSPEC_TP && TARGET_TLS_DIRECT_SEG_REFS @@ -9456,8 +9448,6 @@ ix86_decompose_address (rtx addr, struct ix86_address *out) } else if (GET_CODE (addr) == ASHIFT) { - rtx tmp; - /* We're called for lea too, which implements ashift on occasion. */ index = XEXP (addr, 0); tmp = XEXP (addr, 1); @@ -11431,7 +11421,7 @@ get_some_local_dynamic_name (void) return cfun->machine->some_ld_name; for (insn = get_insns (); insn ; insn = NEXT_INSN (insn)) - if (INSN_P (insn) + if (NONDEBUG_INSN_P (insn) && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0)) return cfun->machine->some_ld_name; @@ -13677,7 +13667,7 @@ distance_non_agu_define (unsigned int regno1, unsigned int regno2, rtx prev = PREV_INSN (insn); while (prev && distance < LEA_SEARCH_THRESHOLD) { - if (INSN_P (prev)) + if (NONDEBUG_INSN_P (prev)) { distance++; for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++) @@ -13717,7 +13707,7 @@ distance_non_agu_define (unsigned int regno1, unsigned int regno2, && prev != insn && distance < LEA_SEARCH_THRESHOLD) { - if (INSN_P (prev)) + if (NONDEBUG_INSN_P (prev)) { distance++; for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++) @@ -13763,7 +13753,7 @@ distance_agu_use (unsigned int regno0, rtx insn) rtx next = NEXT_INSN (insn); while (next && distance < LEA_SEARCH_THRESHOLD) { - if (INSN_P (next)) + if (NONDEBUG_INSN_P (next)) { distance++; @@ -13812,7 +13802,7 @@ distance_agu_use (unsigned int regno0, rtx insn) && next != insn && distance < LEA_SEARCH_THRESHOLD) { - if (INSN_P (next)) + if (NONDEBUG_INSN_P (next)) { distance++; @@ -17224,8 +17214,8 @@ ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode) } else emit_insn ((mode == DImode - ? gen_x86_shift_adj_3 - : gen_x86_64_shift_adj_3) (low[0], high[0], operands[2])); + ? gen_x86_shiftsi_adj_3 + : gen_x86_shiftdi_adj_3) (low[0], high[0], operands[2])); } } @@ -20099,10 +20089,26 @@ ix86_local_alignment (tree exp, enum machine_mode mode, } /* x86-64 ABI requires arrays greater than 16 bytes to be aligned - to 16byte boundary. */ - if (TARGET_64BIT) + to 16byte boundary. Exact wording is: + + An array uses the same alignment as its elements, except that a local or + global array variable of length at least 16 bytes or + a C99 variable-length array variable always has alignment of at least 16 bytes. + + This was added to allow use of aligned SSE instructions at arrays. This + rule is meant for static storage (where compiler can not do the analysis + by itself). We follow it for automatic variables only when convenient. + We fully control everything in the function compiled and functions from + other unit can not rely on the alignment. + + Exclude va_list type. It is the common case of local array where + we can not benefit from the alignment. */ + if (TARGET_64BIT && optimize_function_for_speed_p (cfun) + && TARGET_SSE) { if (AGGREGATE_TYPE_P (type) + && (TYPE_MAIN_VARIANT (type) + != TYPE_MAIN_VARIANT (va_list_type_node)) && TYPE_SIZE (type) && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16 @@ -26232,11 +26238,13 @@ x86_this_parameter (tree function) if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type))) regno = aggr ? DX_REG : CX_REG; - /* ???: To be verified. It is not absolutely clear how aggregates - have to be treated for thiscall. We assume that they are - identical to fastcall. */ else if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type))) - regno = aggr ? DX_REG : CX_REG; + { + regno = CX_REG; + if (aggr) + return gen_rtx_MEM (SImode, + plus_constant (stack_pointer_rtx, 4)); + } else { regno = AX_REG; @@ -26700,7 +26708,7 @@ ix86_pad_returns (void) replace = true; /* Empty functions get branch mispredict even when the jump destination is not visible to us. */ - if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED) + if (!prev && !optimize_function_for_size_p (cfun)) replace = true; } if (replace) @@ -30649,6 +30657,9 @@ ix86_enum_va_list (int idx, const char **pname, tree *ptree) #undef TARGET_FUNCTION_VALUE #define TARGET_FUNCTION_VALUE ix86_function_value +#undef TARGET_FUNCTION_VALUE_REGNO_P +#define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p + #undef TARGET_SECONDARY_RELOAD #define TARGET_SECONDARY_RELOAD ix86_secondary_reload