X-Git-Url: http://git.sourceforge.jp/view?a=blobdiff_plain;f=gcc%2Fconfig%2Fi386%2Fi386.c;h=407e37c7067f11382461f03b703282ccc7a9d800;hb=f79785036c34a21e90b80d29507b630d6d1b662b;hp=f91410a2d6623532759f0b30ee3fd2ccf0562d29;hpb=1870b3a03c94040a9afddf30f8a0cc91e138b981;p=pf3gnuchains%2Fgcc-fork.git diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index f91410a2d66..407e37c7067 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -6790,6 +6790,7 @@ setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum) { rtx save_area, mem; rtx label; + rtx label_ref; rtx tmp_reg; rtx nsse_reg; alias_set_type set; @@ -6840,9 +6841,35 @@ setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum) SSE saves. We need some preparation work to get this working. */ label = gen_label_rtx (); + label_ref = gen_rtx_LABEL_REF (Pmode, label); + /* Compute address to jump to : + label - eax*4 + nnamed_sse_arguments*4 Or + label - eax*5 + nnamed_sse_arguments*5 for AVX. */ + tmp_reg = gen_reg_rtx (Pmode); nsse_reg = gen_reg_rtx (Pmode); emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, AX_REG))); + emit_insn (gen_rtx_SET (VOIDmode, tmp_reg, + gen_rtx_MULT (Pmode, nsse_reg, + GEN_INT (4)))); + + /* vmovaps is one byte longer than movaps. */ + if (TARGET_AVX) + emit_insn (gen_rtx_SET (VOIDmode, tmp_reg, + gen_rtx_PLUS (Pmode, tmp_reg, + nsse_reg))); + + if (cum->sse_regno) + emit_move_insn + (nsse_reg, + gen_rtx_CONST (DImode, + gen_rtx_PLUS (DImode, + label_ref, + GEN_INT (cum->sse_regno + * (TARGET_AVX ? 5 : 4))))); + else + emit_move_insn (nsse_reg, label_ref); + emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg)); /* Compute address of memory block we save into. We always use pointer pointing 127 bytes after first byte to store - this is needed to keep @@ -6855,12 +6882,11 @@ setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum) mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127)); MEM_NOTRAP_P (mem) = 1; set_mem_alias_set (mem, set); - set_mem_align (mem, 64); + set_mem_align (mem, BITS_PER_WORD); /* And finally do the dirty job! */ emit_insn (gen_sse_prologue_save (mem, nsse_reg, - GEN_INT (cum->sse_regno), label, - gen_reg_rtx (Pmode))); + GEN_INT (cum->sse_regno), label)); } } @@ -7021,7 +7047,7 @@ ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p, int indirect_p = 0; tree ptrtype; enum machine_mode nat_mode; - unsigned int arg_boundary; + int arg_boundary; /* Only 64bit target needs something special. */ if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist))) @@ -7253,8 +7279,6 @@ ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p, t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t, size_int (-align)); t = fold_convert (TREE_TYPE (ovf), t); - if (crtl->stack_alignment_needed < arg_boundary) - crtl->stack_alignment_needed = arg_boundary; } gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue); gimplify_assign (addr, t, pre_p); @@ -9357,7 +9381,6 @@ ix86_decompose_address (rtx addr, struct ix86_address *out) rtx base_reg, index_reg; HOST_WIDE_INT scale = 1; rtx scale_rtx = NULL_RTX; - rtx tmp; int retval = 1; enum ix86_address_seg seg = SEG_DEFAULT; @@ -9393,19 +9416,6 @@ ix86_decompose_address (rtx addr, struct ix86_address *out) scale_rtx = XEXP (op, 1); break; - case ASHIFT: - if (index) - return 0; - index = XEXP (op, 0); - tmp = XEXP (op, 1); - if (!CONST_INT_P (tmp)) - return 0; - scale = INTVAL (tmp); - if ((unsigned HOST_WIDE_INT) scale > 3) - return 0; - scale = 1 << scale; - break; - case UNSPEC: if (XINT (op, 1) == UNSPEC_TP && TARGET_TLS_DIRECT_SEG_REFS @@ -9446,6 +9456,8 @@ ix86_decompose_address (rtx addr, struct ix86_address *out) } else if (GET_CODE (addr) == ASHIFT) { + rtx tmp; + /* We're called for lea too, which implements ashift on occasion. */ index = XEXP (addr, 0); tmp = XEXP (addr, 1); @@ -17118,22 +17130,20 @@ ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode) : gen_x86_64_shld) (high[0], low[0], operands[2])); } - emit_insn ((mode == DImode - ? gen_ashlsi3 - : gen_ashldi3) (low[0], low[0], operands[2])); + emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2])); if (TARGET_CMOVE && scratch) { ix86_expand_clear (scratch); emit_insn ((mode == DImode - ? gen_x86_shiftsi_adj_1 - : gen_x86_shiftdi_adj_1) (high[0], low[0], operands[2], - scratch)); + ? gen_x86_shift_adj_1 + : gen_x86_64_shift_adj_1) (high[0], low[0], operands[2], + scratch)); } else emit_insn ((mode == DImode - ? gen_x86_shiftsi_adj_2 - : gen_x86_shiftdi_adj_2) (high[0], low[0], operands[2])); + ? gen_x86_shift_adj_2 + : gen_x86_64_shift_adj_2) (high[0], low[0], operands[2])); } void @@ -17206,14 +17216,14 @@ ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode) : gen_ashrdi3) (scratch, scratch, GEN_INT (single_width - 1))); emit_insn ((mode == DImode - ? gen_x86_shiftsi_adj_1 - : gen_x86_shiftdi_adj_1) (low[0], high[0], operands[2], - scratch)); + ? gen_x86_shift_adj_1 + : gen_x86_64_shift_adj_1) (low[0], high[0], operands[2], + scratch)); } else emit_insn ((mode == DImode - ? gen_x86_shiftsi_adj_3 - : gen_x86_shiftdi_adj_3) (low[0], high[0], operands[2])); + ? gen_x86_shift_adj_3 + : gen_x86_64_shift_adj_3) (low[0], high[0], operands[2])); } } @@ -17271,14 +17281,14 @@ ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode) { ix86_expand_clear (scratch); emit_insn ((mode == DImode - ? gen_x86_shiftsi_adj_1 - : gen_x86_shiftdi_adj_1) (low[0], high[0], operands[2], - scratch)); + ? gen_x86_shift_adj_1 + : gen_x86_64_shift_adj_1) (low[0], high[0], operands[2], + scratch)); } else emit_insn ((mode == DImode - ? gen_x86_shiftsi_adj_2 - : gen_x86_shiftdi_adj_2) (low[0], high[0], operands[2])); + ? gen_x86_shift_adj_2 + : gen_x86_64_shift_adj_2) (low[0], high[0], operands[2])); } } @@ -20087,26 +20097,10 @@ ix86_local_alignment (tree exp, enum machine_mode mode, } /* x86-64 ABI requires arrays greater than 16 bytes to be aligned - to 16byte boundary. Exact wording is: - - An array uses the same alignment as its elements, except that a local or - global array variable of length at least 16 bytes or - a C99 variable-length array variable always has alignment of at least 16 bytes. - - This was added to allow use of aligned SSE instructions at arrays. This - rule is meant for static storage (where compiler can not do the analysis - by itself). We follow it for automatic variables only when convenient. - We fully control everything in the function compiled and functions from - other unit can not rely on the alignment. - - Exclude va_list type. It is the common case of local array where - we can not benefit from the alignment. */ - if (TARGET_64BIT && optimize_function_for_speed_p (cfun) - && TARGET_SSE) + to 16byte boundary. */ + if (TARGET_64BIT) { if (AGGREGATE_TYPE_P (type) - && (TYPE_MAIN_VARIANT (type) - != TYPE_MAIN_VARIANT (va_list_type_node)) && TYPE_SIZE (type) && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16 @@ -26236,13 +26230,11 @@ x86_this_parameter (tree function) if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type))) regno = aggr ? DX_REG : CX_REG; + /* ???: To be verified. It is not absolutely clear how aggregates + have to be treated for thiscall. We assume that they are + identical to fastcall. */ else if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type))) - { - regno = CX_REG; - if (aggr) - return gen_rtx_MEM (SImode, - plus_constant (stack_pointer_rtx, 4)); - } + regno = aggr ? DX_REG : CX_REG; else { regno = AX_REG;