From: hubicka Date: Sun, 18 Apr 2010 10:52:26 +0000 (+0000) Subject: * i386.md (UNSPEC_SSE_PROLOGUE_SAVE_LOW): New. X-Git-Url: http://git.sourceforge.jp/view?p=pf3gnuchains%2Fgcc-fork.git;a=commitdiff_plain;h=b9c956addd494a86ecfb98ef6e4cd547cc509fe3 * i386.md (UNSPEC_SSE_PROLOGUE_SAVE_LOW): New. (sse_prologue_save_insn expander): Use new pattern. (sse_prologue_save_insn1): New pattern and splitter. (sse_prologue_save_insn): Update to deal also with 64bit aligned blocks. * i386.c (setup_incoming_varargs_64): Do not compute jump destination here. (ix86_gimplify_va_arg): Update alignment needed. (ix86_local_alignment): Do not align all local arrays to 128bit. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@158483 138bc75d-0d04-0410-961f-82ee72b054a4 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index cad41b16d5a..8c4dac3aba6 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,15 @@ +2010-04-18 Jan Hubicka + + * i386.md (UNSPEC_SSE_PROLOGUE_SAVE_LOW): New. + (sse_prologue_save_insn expander): Use new pattern. + (sse_prologue_save_insn1): New pattern and splitter. + (sse_prologue_save_insn): Update to deal also with 64bit aligned + blocks. + * i386.c (setup_incoming_varargs_64): Do not compute jump destination here. + (ix86_gimplify_va_arg): Update alignment needed. + (ix86_local_alignment): Do not align all local arrays + to 128bit. + 2010-04-17 Jan Hubicka * ipa-inline.c (cgraph_early_inlining): Handle flattening too. diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index d08a7ea1081..fbc15522673 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -85,6 +85,7 @@ (UNSPEC_SET_RIP 16) (UNSPEC_SET_GOT_OFFSET 17) (UNSPEC_MEMORY_BLOCKAGE 18) + (UNSPEC_SSE_PROLOGUE_SAVE_LOW 19) ; TLS support (UNSPEC_TP 20) @@ -18441,15 +18442,24 @@ (reg:DI XMM5_REG) (reg:DI XMM6_REG) (reg:DI XMM7_REG)] UNSPEC_SSE_PROLOGUE_SAVE)) - (use (match_operand:DI 1 "register_operand" "")) + (clobber (match_operand:DI 1 "register_operand" "")) (use (match_operand:DI 2 "immediate_operand" "")) - (use (label_ref:DI (match_operand 3 "" "")))])] + (use (label_ref:DI (match_operand 3 "" ""))) + (clobber (match_operand:DI 4 "register_operand" "")) + (use (match_dup 1))])] "TARGET_64BIT" "") -(define_insn "*sse_prologue_save_insn" +;; Pre-reload version of prologue save. Until after prologue generation we don't know +;; what the size of save instruction will be. +;; Operand 0+operand 6 is the memory save area +;; Operand 1 is number of registers to save (will get overwritten to operand 5) +;; Operand 2 is number of non-vaargs SSE arguments +;; Operand 3 is label starting the save block +;; Operand 4 is used for temporary computation of jump address +(define_insn "*sse_prologue_save_insn1" [(set (mem:BLK (plus:DI (match_operand:DI 0 "register_operand" "R") - (match_operand:DI 4 "const_int_operand" "n"))) + (match_operand:DI 6 "const_int_operand" "n"))) (unspec:BLK [(reg:DI XMM0_REG) (reg:DI XMM1_REG) (reg:DI XMM2_REG) @@ -18458,9 +18468,98 @@ (reg:DI XMM5_REG) (reg:DI XMM6_REG) (reg:DI XMM7_REG)] UNSPEC_SSE_PROLOGUE_SAVE)) + (clobber (match_operand:DI 1 "register_operand" "=r")) + (use (match_operand:DI 2 "const_int_operand" "i")) + (use (label_ref:DI (match_operand 3 "" "X"))) + (clobber (match_operand:DI 4 "register_operand" "=&r")) + (use (match_operand:DI 5 "register_operand" "1"))] + "TARGET_64BIT + && INTVAL (operands[6]) + X86_64_SSE_REGPARM_MAX * 16 - 16 < 128 + && INTVAL (operands[6]) + INTVAL (operands[2]) * 16 >= -128" + "#" + [(set_attr "type" "other") + (set_attr "memory" "store") + (set_attr "mode" "DI")]) + +;; We know size of save instruction; expand the computation of jump address +;; in the jumptable. +(define_split + [(parallel [(set (match_operand:BLK 0 "" "") + (unspec:BLK [(reg:DI XMM0_REG) + (reg:DI XMM1_REG) + (reg:DI XMM2_REG) + (reg:DI XMM3_REG) + (reg:DI XMM4_REG) + (reg:DI XMM5_REG) + (reg:DI XMM6_REG) + (reg:DI XMM7_REG)] UNSPEC_SSE_PROLOGUE_SAVE)) + (clobber (match_operand:DI 1 "register_operand" "")) + (use (match_operand:DI 2 "const_int_operand" "")) + (use (match_operand 3 "" "")) + (clobber (match_operand:DI 4 "register_operand" "")) + (use (match_operand:DI 5 "register_operand" ""))])] + "reload_completed" + [(parallel [(set (match_dup 0) + (unspec:BLK [(reg:DI XMM0_REG) + (reg:DI XMM1_REG) + (reg:DI XMM2_REG) + (reg:DI XMM3_REG) + (reg:DI XMM4_REG) + (reg:DI XMM5_REG) + (reg:DI XMM6_REG) + (reg:DI XMM7_REG)] UNSPEC_SSE_PROLOGUE_SAVE_LOW)) + (use (match_dup 1)) + (use (match_dup 2)) + (use (match_dup 3)) + (use (match_dup 5))])] +{ + /* Movaps is 4 bytes, AVX and movsd is 5 bytes. */ + int size = 4 + (TARGET_AVX || crtl->stack_alignment_needed < 128); + + /* Compute address to jump to: + label - eax*size + nnamed_sse_arguments*size. */ + if (size == 5) + emit_insn (gen_rtx_SET (VOIDmode, operands[4], + gen_rtx_PLUS + (Pmode, + gen_rtx_MULT (Pmode, operands[1], + GEN_INT (4)), + operands[1]))); + else if (size == 4) + emit_insn (gen_rtx_SET (VOIDmode, operands[4], + gen_rtx_MULT (Pmode, operands[1], + GEN_INT (4)))); + else + gcc_unreachable (); + if (INTVAL (operands[2])) + emit_move_insn + (operands[1], + gen_rtx_CONST (DImode, + gen_rtx_PLUS (DImode, + operands[3], + GEN_INT (INTVAL (operands[2]) + * size)))); + else + emit_move_insn (operands[1], operands[3]); + emit_insn (gen_subdi3 (operands[1], operands[1], operands[4])); + operands[5] = GEN_INT (size); +}) + +(define_insn "sse_prologue_save_insn" + [(set (mem:BLK (plus:DI (match_operand:DI 0 "register_operand" "R") + (match_operand:DI 4 "const_int_operand" "n"))) + (unspec:BLK [(reg:DI XMM0_REG) + (reg:DI XMM1_REG) + (reg:DI XMM2_REG) + (reg:DI XMM3_REG) + (reg:DI XMM4_REG) + (reg:DI XMM5_REG) + (reg:DI XMM6_REG) + (reg:DI XMM7_REG)] UNSPEC_SSE_PROLOGUE_SAVE_LOW)) (use (match_operand:DI 1 "register_operand" "r")) (use (match_operand:DI 2 "const_int_operand" "i")) - (use (label_ref:DI (match_operand 3 "" "X")))] + (use (label_ref:DI (match_operand 3 "" "X"))) + (use (match_operand:DI 5 "const_int_operand" "i"))] "TARGET_64BIT && INTVAL (operands[4]) + X86_64_SSE_REGPARM_MAX * 16 - 16 < 128 && INTVAL (operands[4]) + INTVAL (operands[2]) * 16 >= -128" @@ -18480,7 +18579,10 @@ PUT_MODE (operands[4], TImode); if (GET_CODE (XEXP (operands[0], 0)) != PLUS) output_asm_insn ("rex", operands); - output_asm_insn ("%vmovaps\t{%5, %4|%4, %5}", operands); + if (crtl->stack_alignment_needed < 128) + output_asm_insn ("%vmovsd\t{%5, %4|%4, %5}", operands); + else + output_asm_insn ("%vmovaps\t{%5, %4|%4, %5}", operands); } (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (operands[3])); @@ -18489,11 +18591,11 @@ [(set_attr "type" "other") (set_attr "length_immediate" "0") (set_attr "length_address" "0") + ;; 2 bytes for jump and opernds[4] bytes for each save. (set (attr "length") - (if_then_else - (eq (symbol_ref "TARGET_AVX") (const_int 0)) - (const_string "34") - (const_string "42"))) + (plus (const_int 2) + (mult (symbol_ref ("INTVAL (operands[5])")) + (symbol_ref ("X86_64_SSE_REGPARM_MAX - INTVAL (operands[2])"))))) (set_attr "memory" "store") (set_attr "modrm" "0") (set_attr "prefix" "maybe_vex")