OSDN Git Service

Emit vzerouppers after reload.
authorhjl <hjl@138bc75d-0d04-0410-961f-82ee72b054a4>
Tue, 2 Nov 2010 18:08:44 +0000 (18:08 +0000)
committerhjl <hjl@138bc75d-0d04-0410-961f-82ee72b054a4>
Tue, 2 Nov 2010 18:08:44 +0000 (18:08 +0000)
gcc/

2010-11-02  Uros Bizjak  <ubizjak@gmail.com>
    H.J. Lu  <hongjiu.lu@intel.com>

* config/i386/i386-protos.h (ix86_split_call_vzeroupper): New.
(ix86_split_call_pop_vzeroupper): Likewise.

* config/i386/i386.c (move_or_delete_vzeroupper_2): Rewrite
the loop.
(ix86_expand_call): Use UNSPEC_CALL_NEEDS_VZEROUPPER.
(ix86_split_call_vzeroupper): New.
(ix86_split_call_pop_vzeroupper): Likewise.

* config/i386/i386.md (UNSPEC_CALL_NEEDS_VZEROUPPER): New.
(*call_pop_0_vzeroupper): Likewise.
(*call_pop_1_vzeroupper): Likewise.
(*sibcall_pop_1_vzeroupper): Likewise.
(*call_0_vzeroupper): Likewise.
(*call_1_vzeroupper): Likewise.
(*sibcall_1_vzeroupper): Likewise.
(*call_1_rex64_vzeroupper): Likewise.
(*call_1_rex64_ms_sysv_vzeroupper): New.
(*call_1_rex64_large_vzeroupper): Likewise.
(*sibcall_1_rex64_vzeroupper): Likewise.
(*call_value_pop_0_vzeroupper): New.
(*call_value_pop_1_vzeroupper): Likewise.
(*sibcall_value_pop_1_vzeroupper): Likewise.
(*call_value_0_vzeroupper): New.
(*call_value_0_rex64_vzeroupper): Use
(*call_value_0_rex64_ms_sysv_vzeroupper): Likewise.
(*call_value_1_vzeroupper): Likewise.
(*sibcall_value_1_vzeroupper): Likewise.
(*call_value_1_rex64_vzeroupper): Likewise.
(*call_value_1_rex64_ms_sysv_vzeroupper): Likewise.
(*call_value_1_rex64_large_vzeroupper): Likewise.
(*sibcall_value_1_rex64_vzeroupper): Likewise.

gcc/testsuite/

2010-11-02  H.J. Lu  <hongjiu.lu@intel.com>

* gcc.target/i386/avx-vzeroupper-15.c: New.
* gcc.target/i386/avx-vzeroupper-16.c: Likewise.
* gcc.target/i386/avx-vzeroupper-17.c: Likewise.
* gcc.target/i386/avx-vzeroupper-18.c: Likewise.

PR target/46253
* gcc.target/i386/pr46253.c: New.

git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@166208 138bc75d-0d04-0410-961f-82ee72b054a4

gcc/ChangeLog
gcc/config/i386/i386-protos.h
gcc/config/i386/i386.c
gcc/config/i386/i386.md
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.target/i386/avx-vzeroupper-15.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx-vzeroupper-16.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx-vzeroupper-17.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx-vzeroupper-18.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/pr46253.c [new file with mode: 0644]

index 9c50ffe..a852af3 100644 (file)
@@ -1,3 +1,39 @@
+2010-11-02  Uros Bizjak  <ubizjak@gmail.com>
+           H.J. Lu  <hongjiu.lu@intel.com>
+
+       * config/i386/i386-protos.h (ix86_split_call_vzeroupper): New.
+       (ix86_split_call_pop_vzeroupper): Likewise.
+
+       * config/i386/i386.c (move_or_delete_vzeroupper_2): Rewrite
+       the loop.
+       (ix86_expand_call): Use UNSPEC_CALL_NEEDS_VZEROUPPER.
+       (ix86_split_call_vzeroupper): New.
+       (ix86_split_call_pop_vzeroupper): Likewise.
+
+       * config/i386/i386.md (UNSPEC_CALL_NEEDS_VZEROUPPER): New.
+       (*call_pop_0_vzeroupper): Likewise.
+       (*call_pop_1_vzeroupper): Likewise.
+       (*sibcall_pop_1_vzeroupper): Likewise.
+       (*call_0_vzeroupper): Likewise.
+       (*call_1_vzeroupper): Likewise.
+       (*sibcall_1_vzeroupper): Likewise.
+       (*call_1_rex64_vzeroupper): Likewise.
+       (*call_1_rex64_ms_sysv_vzeroupper): New.
+       (*call_1_rex64_large_vzeroupper): Likewise.
+       (*sibcall_1_rex64_vzeroupper): Likewise.
+       (*call_value_pop_0_vzeroupper): New.
+       (*call_value_pop_1_vzeroupper): Likewise.
+       (*sibcall_value_pop_1_vzeroupper): Likewise.
+       (*call_value_0_vzeroupper): New.
+       (*call_value_0_rex64_vzeroupper): Use
+       (*call_value_0_rex64_ms_sysv_vzeroupper): Likewise.
+       (*call_value_1_vzeroupper): Likewise.
+       (*sibcall_value_1_vzeroupper): Likewise.
+       (*call_value_1_rex64_vzeroupper): Likewise.
+       (*call_value_1_rex64_ms_sysv_vzeroupper): Likewise.
+       (*call_value_1_rex64_large_vzeroupper): Likewise.
+       (*sibcall_value_1_rex64_vzeroupper): Likewise.
+
 2010-11-02  Ian Lance Taylor  <iant@google.com>
 
        PR lto/46273
index 13b1394..c64135b 100644 (file)
@@ -119,6 +119,8 @@ extern void ix86_expand_sse_unpack (rtx[], bool, bool);
 extern void ix86_expand_sse4_unpack (rtx[], bool, bool);
 extern bool ix86_expand_int_addcc (rtx[]);
 extern rtx ix86_expand_call (rtx, rtx, rtx, rtx, rtx, int);
+extern void ix86_split_call_vzeroupper (rtx, rtx);
+extern void ix86_split_call_pop_vzeroupper (rtx, rtx);
 extern void x86_initialize_trampoline (rtx, rtx, rtx);
 extern rtx ix86_zero_extend_to_Pmode (rtx);
 extern void ix86_split_long_move (rtx[]);
index bda8ed3..a5beb83 100644 (file)
@@ -108,163 +108,119 @@ check_avx256_stores (rtx dest, const_rtx set, void *data)
 static void
 move_or_delete_vzeroupper_2 (basic_block bb, bool upper_128bits_set)
 {
-  rtx curr_insn, next_insn, prev_insn, insn;
+  rtx insn;
+  rtx vzeroupper_insn = NULL_RTX;
+  rtx pat;
+  int avx256;
 
   if (dump_file)
     fprintf (dump_file, " BB [%i] entry: upper 128bits: %d\n",
             bb->index, upper_128bits_set);
 
-  for (curr_insn = BB_HEAD (bb);
-       curr_insn && curr_insn != NEXT_INSN (BB_END (bb));
-       curr_insn = next_insn)
+  insn = BB_HEAD (bb);
+  while (insn != BB_END (bb))
     {
-      int avx256;
+      insn = NEXT_INSN (insn);
 
-      next_insn = NEXT_INSN (curr_insn);
-
-      if (!NONDEBUG_INSN_P (curr_insn))
+      if (!NONDEBUG_INSN_P (insn))
        continue;
 
-      /* Search for vzeroupper.  */
-      insn = PATTERN (curr_insn);
-      if (GET_CODE (insn) == UNSPEC_VOLATILE
-         && XINT (insn, 1) == UNSPECV_VZEROUPPER)
+      /* Move vzeroupper before jump/call.  */
+      if (JUMP_P (insn) || CALL_P (insn))
+       {
+         if (!vzeroupper_insn)
+           continue;
+
+         if (PREV_INSN (insn) != vzeroupper_insn)
+           {
+             if (dump_file)
+               {
+                 fprintf (dump_file, "Move vzeroupper after:\n");
+                 print_rtl_single (dump_file, PREV_INSN (insn));
+                 fprintf (dump_file, "before:\n");
+                 print_rtl_single (dump_file, insn);
+               }
+             reorder_insns_nobb (vzeroupper_insn, vzeroupper_insn,
+                                 PREV_INSN (insn));
+           }
+         vzeroupper_insn = NULL_RTX;
+         continue;
+       }
+
+      pat = PATTERN (insn);
+
+      /* Check insn for vzeroupper intrinsic.  */
+      if (GET_CODE (pat) == UNSPEC_VOLATILE
+         && XINT (pat, 1) == UNSPECV_VZEROUPPER)
        {
-         /* Found vzeroupper.  */
          if (dump_file)
            {
+             /* Found vzeroupper intrinsic.  */
              fprintf (dump_file, "Found vzeroupper:\n");
-             print_rtl_single (dump_file, curr_insn);
+             print_rtl_single (dump_file, insn);
            }
        }
       else
        {
-         /* Check vzeroall intrinsic.  */
-         if (GET_CODE (insn) == PARALLEL
-             && GET_CODE (XVECEXP (insn, 0, 0)) == UNSPEC_VOLATILE
-             && XINT (XVECEXP (insn, 0, 0), 1) == UNSPECV_VZEROALL)
-           upper_128bits_set = false;
-         else if (!upper_128bits_set)
+         /* Check insn for vzeroall intrinsic.  */
+         if (GET_CODE (pat) == PARALLEL
+             && GET_CODE (XVECEXP (pat, 0, 0)) == UNSPEC_VOLATILE
+             && XINT (XVECEXP (pat, 0, 0), 1) == UNSPECV_VZEROALL)
            {
-             /* Check if upper 128bits of AVX registers are used.  */
-             note_stores (insn, check_avx256_stores,
-                          &upper_128bits_set);
+             upper_128bits_set = false;
+
+             /* Delete pending vzeroupper insertion.  */
+             if (vzeroupper_insn)
+               {
+                 delete_insn (vzeroupper_insn);
+                 vzeroupper_insn = NULL_RTX;
+               }
            }
+         else if (!upper_128bits_set)
+           note_stores (pat, check_avx256_stores, &upper_128bits_set);
          continue;
        }
 
-      avx256 = INTVAL (XVECEXP (insn, 0, 0));
+      /* Process vzeroupper intrinsic.  */
+      avx256 = INTVAL (XVECEXP (pat, 0, 0));
 
       if (!upper_128bits_set)
        {
          /* Since the upper 128bits are cleared, callee must not pass
             256bit AVX register.  We only need to check if callee
             returns 256bit AVX register.  */
-         upper_128bits_set = avx256 == callee_return_avx256;
+         upper_128bits_set = (avx256 == callee_return_avx256);
 
-         /* Remove unnecessary vzeroupper since upper 128bits are
-            cleared.  */
+         /* Remove unnecessary vzeroupper since
+            upper 128bits are cleared.  */
          if (dump_file)
            {
              fprintf (dump_file, "Delete redundant vzeroupper:\n");
-             print_rtl_single (dump_file, curr_insn);
+             print_rtl_single (dump_file, insn);
            }
-         delete_insn (curr_insn);
-         continue;
+         delete_insn (insn);
        }
       else if (avx256 == callee_return_pass_avx256
               || avx256 == callee_pass_avx256)
        {
          /* Callee passes 256bit AVX register.  Check if callee
             returns 256bit AVX register.  */
-         upper_128bits_set = avx256 == callee_return_pass_avx256;
+         upper_128bits_set = (avx256 == callee_return_pass_avx256);
 
-         /* Must remove vzeroupper since callee passes 256bit AVX
-            register.  */
+         /* Must remove vzeroupper since
+            callee passes in 256bit AVX register.  */
          if (dump_file)
            {
              fprintf (dump_file, "Delete callee pass vzeroupper:\n");
-             print_rtl_single (dump_file, curr_insn);
-           }
-         delete_insn (curr_insn);
-         continue;
-       }
-
-      /* Find the jump after vzeroupper.  */
-      prev_insn = curr_insn;
-      if (avx256 == vzeroupper_intrinsic)
-       {
-         /* For vzeroupper intrinsic, check if there is another
-            vzeroupper.  */
-         insn = NEXT_INSN (curr_insn);
-         while (insn)
-           {
-             if (NONJUMP_INSN_P (insn)
-                 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
-                 && XINT (PATTERN (insn), 1) == UNSPECV_VZEROUPPER)
-               {
-                 if (dump_file)
-                   {
-                     fprintf (dump_file,
-                              "Delete redundant vzeroupper intrinsic:\n");
-                     print_rtl_single (dump_file, curr_insn);
-                   }
-                 delete_insn (curr_insn);
-                 insn = NULL;
-                 continue;
-               }
-
-             if (JUMP_P (insn) || CALL_P (insn))
-               break;
-             prev_insn = insn;
-             insn = NEXT_INSN (insn);
-             if (insn == NEXT_INSN (BB_END (bb)))
-               break;
+             print_rtl_single (dump_file, insn);
            }
-
-         /* Continue if redundant vzeroupper intrinsic is deleted.  */
-         if (!insn)
-           continue;
+         delete_insn (insn);
        }
       else
        {
-         /* Find the next jump/call.  */
-         insn = NEXT_INSN (curr_insn);
-         while (insn)
-           {
-             if (JUMP_P (insn) || CALL_P (insn))
-               break;
-             prev_insn = insn;
-             insn = NEXT_INSN (insn);
-             if (insn == NEXT_INSN (BB_END (bb)))
-               break;
-           }
-
-         if (!insn)
-           gcc_unreachable();
+         upper_128bits_set = false;
+         vzeroupper_insn = insn;
        }
-
-      /* Keep vzeroupper.  */
-      upper_128bits_set = false;
-
-      /* Also allow label as the next instruction.  */
-      if (insn == NEXT_INSN (BB_END (bb)) && !LABEL_P (insn))
-       gcc_unreachable();
-
-      /* Move vzeroupper before jump/call if neeeded.  */
-      if (curr_insn != prev_insn)
-       {
-         reorder_insns_nobb (curr_insn, curr_insn, prev_insn);
-         if (dump_file)
-           {
-             fprintf (dump_file, "Move vzeroupper after:\n");
-             print_rtl_single (dump_file, prev_insn);
-             fprintf (dump_file, "before:\n");
-             print_rtl_single (dump_file, insn);
-           }
-       }
-
-      next_insn = NEXT_INSN (insn);
     }
 
   BLOCK_INFO (bb)->upper_128bits_set = upper_128bits_set;
@@ -21565,10 +21521,12 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
                               + 2, vec));
     }
 
-  /* Emit vzeroupper if needed.  */
+  /* Add UNSPEC_CALL_NEEDS_VZEROUPPER decoration.  */
   if (TARGET_VZEROUPPER && cfun->machine->use_avx256_p)
     {
+      rtx unspec;
       int avx256;
+
       cfun->machine->use_vzeroupper_p = 1;
       if (cfun->machine->callee_pass_avx256_p)
        {
@@ -21581,7 +21539,11 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
        avx256 = callee_return_avx256;
       else
        avx256 = call_no_avx256;
-      emit_insn (gen_avx_vzeroupper (GEN_INT (avx256))); 
+
+      unspec = gen_rtx_UNSPEC (VOIDmode,
+                              gen_rtvec (1, GEN_INT (avx256)),
+                              UNSPEC_CALL_NEEDS_VZEROUPPER);
+      call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, unspec));
     }
 
   call = emit_call_insn (call);
@@ -21591,6 +21553,24 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
   return call;
 }
 
+void
+ix86_split_call_vzeroupper (rtx insn, rtx vzeroupper)
+{
+  rtx call = XVECEXP (PATTERN (insn), 0, 0);
+  emit_insn (gen_avx_vzeroupper (vzeroupper));
+  emit_call_insn (call);
+}
+
+void
+ix86_split_call_pop_vzeroupper (rtx insn, rtx vzeroupper)
+{
+  rtx call = XVECEXP (PATTERN (insn), 0, 0);
+  rtx pop = XVECEXP (PATTERN (insn), 0, 1);
+  emit_insn (gen_avx_vzeroupper (vzeroupper));
+  emit_call_insn (gen_rtx_PARALLEL (VOIDmode,
+                                   gen_rtvec (2, call, pop)));
+}
+
 /* Output the assembly for a call instruction.  */
 
 const char *
index feaf781..278bd77 100644 (file)
   UNSPEC_LD_MPIC       ; load_macho_picbase
   UNSPEC_TRUNC_NOOP
   UNSPEC_DIV_ALREADY_SPLIT
+  UNSPEC_CALL_NEEDS_VZEROUPPER
 
   ;; For SSE/MMX support:
   UNSPEC_FIX_NOTRUNC
   DONE;
 })
 
+(define_insn_and_split "*call_pop_0_vzeroupper"
+  [(call (mem:QI (match_operand:SI 0 "constant_call_address_operand" ""))
+        (match_operand:SI 1 "" ""))
+   (set (reg:SI SP_REG)
+       (plus:SI (reg:SI SP_REG)
+                (match_operand:SI 2 "immediate_operand" "")))
+   (unspec [(match_operand 3 "const_int_operand" "")]
+          UNSPEC_CALL_NEEDS_VZEROUPPER)]
+  "TARGET_VZEROUPPER && !TARGET_64BIT"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  "ix86_split_call_pop_vzeroupper (curr_insn, operands[3]); DONE;"
+  [(set_attr "type" "call")])
+
 (define_insn "*call_pop_0"
   [(call (mem:QI (match_operand:SI 0 "constant_call_address_operand" ""))
         (match_operand:SI 1 "" ""))
 }
   [(set_attr "type" "call")])
 
+(define_insn_and_split "*call_pop_1_vzeroupper"
+  [(call (mem:QI (match_operand:SI 0 "call_insn_operand" "lsm"))
+        (match_operand:SI 1 "" ""))
+   (set (reg:SI SP_REG)
+       (plus:SI (reg:SI SP_REG)
+                (match_operand:SI 2 "immediate_operand" "i")))
+   (unspec [(match_operand 3 "const_int_operand" "")]
+          UNSPEC_CALL_NEEDS_VZEROUPPER)]
+  "TARGET_VZEROUPPER && !TARGET_64BIT && !SIBLING_CALL_P (insn)"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  "ix86_split_call_pop_vzeroupper (curr_insn, operands[3]); DONE;"
+  [(set_attr "type" "call")])
+
 (define_insn "*call_pop_1"
   [(call (mem:QI (match_operand:SI 0 "call_insn_operand" "lsm"))
         (match_operand:SI 1 "" ""))
 }
   [(set_attr "type" "call")])
 
+(define_insn_and_split "*sibcall_pop_1_vzeroupper"
+  [(call (mem:QI (match_operand:SI 0 "sibcall_insn_operand" "s,U"))
+        (match_operand:SI 1 "" ""))
+   (set (reg:SI SP_REG)
+       (plus:SI (reg:SI SP_REG)
+                (match_operand:SI 2 "immediate_operand" "i,i")))
+   (unspec [(match_operand 3 "const_int_operand" "")]
+          UNSPEC_CALL_NEEDS_VZEROUPPER)]
+  "TARGET_VZEROUPPER && !TARGET_64BIT && SIBLING_CALL_P (insn)"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  "ix86_split_call_pop_vzeroupper (curr_insn, operands[3]); DONE;"
+  [(set_attr "type" "call")])
+
 (define_insn "*sibcall_pop_1"
   [(call (mem:QI (match_operand:SI 0 "sibcall_insn_operand" "s,U"))
         (match_operand:SI 1 "" ""))
   DONE;
 })
 
+(define_insn_and_split "*call_0_vzeroupper"
+  [(call (mem:QI (match_operand 0 "constant_call_address_operand" ""))
+        (match_operand 1 "" ""))
+   (unspec [(match_operand 2 "const_int_operand" "")]
+          UNSPEC_CALL_NEEDS_VZEROUPPER)]
+  "TARGET_VZEROUPPER"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  "ix86_split_call_vzeroupper (curr_insn, operands[2]); DONE;"
+  [(set_attr "type" "call")])
+
 (define_insn "*call_0"
   [(call (mem:QI (match_operand 0 "constant_call_address_operand" ""))
         (match_operand 1 "" ""))]
   { return ix86_output_call_insn (insn, operands[0], 0); }
   [(set_attr "type" "call")])
 
+(define_insn_and_split "*call_1_vzeroupper"
+  [(call (mem:QI (match_operand:SI 0 "call_insn_operand" "lsm"))
+        (match_operand 1 "" ""))
+   (unspec [(match_operand 2 "const_int_operand" "")]
+          UNSPEC_CALL_NEEDS_VZEROUPPER)]
+  "TARGET_VZEROUPPER && !TARGET_64BIT && !SIBLING_CALL_P (insn)"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  "ix86_split_call_vzeroupper (curr_insn, operands[2]); DONE;"
+  [(set_attr "type" "call")])
+
 (define_insn "*call_1"
   [(call (mem:QI (match_operand:SI 0 "call_insn_operand" "lsm"))
         (match_operand 1 "" ""))]
   { return ix86_output_call_insn (insn, operands[0], 0); }
   [(set_attr "type" "call")])
 
+(define_insn_and_split "*sibcall_1_vzeroupper"
+  [(call (mem:QI (match_operand:SI 0 "sibcall_insn_operand" "s,U"))
+        (match_operand 1 "" ""))
+   (unspec [(match_operand 2 "const_int_operand" "")]
+          UNSPEC_CALL_NEEDS_VZEROUPPER)]
+  "TARGET_VZEROUPPER && !TARGET_64BIT && SIBLING_CALL_P (insn)"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  "ix86_split_call_vzeroupper (curr_insn, operands[2]); DONE;"
+  [(set_attr "type" "call")])
+
 (define_insn "*sibcall_1"
   [(call (mem:QI (match_operand:SI 0 "sibcall_insn_operand" "s,U"))
         (match_operand 1 "" ""))]
   { return ix86_output_call_insn (insn, operands[0], 0); }
   [(set_attr "type" "call")])
 
+(define_insn_and_split "*call_1_rex64_vzeroupper"
+  [(call (mem:QI (match_operand:DI 0 "call_insn_operand" "rsm"))
+        (match_operand 1 "" ""))
+   (unspec [(match_operand 2 "const_int_operand" "")]
+          UNSPEC_CALL_NEEDS_VZEROUPPER)]
+  "TARGET_VZEROUPPER && TARGET_64BIT && !SIBLING_CALL_P (insn)
+   && ix86_cmodel != CM_LARGE && ix86_cmodel != CM_LARGE_PIC"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  "ix86_split_call_vzeroupper (curr_insn, operands[2]); DONE;"
+  [(set_attr "type" "call")])
+
 (define_insn "*call_1_rex64"
   [(call (mem:QI (match_operand:DI 0 "call_insn_operand" "rsm"))
         (match_operand 1 "" ""))]
   { return ix86_output_call_insn (insn, operands[0], 0); }
   [(set_attr "type" "call")])
 
+(define_insn_and_split "*call_1_rex64_ms_sysv_vzeroupper"
+  [(parallel
+    [(call (mem:QI (match_operand:DI 0 "call_insn_operand" "rsm"))
+          (match_operand 1 "" ""))
+     (unspec [(const_int 0)] UNSPEC_MS_TO_SYSV_CALL)
+     (clobber (reg:TI XMM6_REG))
+     (clobber (reg:TI XMM7_REG))
+     (clobber (reg:TI XMM8_REG))
+     (clobber (reg:TI XMM9_REG))
+     (clobber (reg:TI XMM10_REG))
+     (clobber (reg:TI XMM11_REG))
+     (clobber (reg:TI XMM12_REG))
+     (clobber (reg:TI XMM13_REG))
+     (clobber (reg:TI XMM14_REG))
+     (clobber (reg:TI XMM15_REG))
+     (clobber (reg:DI SI_REG))
+     (clobber (reg:DI DI_REG))])
+   (unspec [(match_operand 2 "const_int_operand" "")]
+          UNSPEC_CALL_NEEDS_VZEROUPPER)]
+  "TARGET_VZEROUPPER && TARGET_64BIT && !SIBLING_CALL_P (insn)"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  "ix86_split_call_vzeroupper (curr_insn, operands[2]); DONE;"
+  [(set_attr "type" "call")])
+
 (define_insn "*call_1_rex64_ms_sysv"
   [(call (mem:QI (match_operand:DI 0 "call_insn_operand" "rsm"))
         (match_operand 1 "" ""))
   { return ix86_output_call_insn (insn, operands[0], 0); }
   [(set_attr "type" "call")])
 
+(define_insn_and_split "*call_1_rex64_large_vzeroupper"
+  [(call (mem:QI (match_operand:DI 0 "call_insn_operand" "rm"))
+        (match_operand 1 "" ""))
+   (unspec [(match_operand 2 "const_int_operand" "")]
+          UNSPEC_CALL_NEEDS_VZEROUPPER)]
+  "TARGET_VZEROUPPER && TARGET_64BIT && !SIBLING_CALL_P (insn)"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  "ix86_split_call_vzeroupper (curr_insn, operands[2]); DONE;"
+  [(set_attr "type" "call")])
+
 (define_insn "*call_1_rex64_large"
   [(call (mem:QI (match_operand:DI 0 "call_insn_operand" "rm"))
         (match_operand 1 "" ""))]
   { return ix86_output_call_insn (insn, operands[0], 0); }
   [(set_attr "type" "call")])
 
+(define_insn_and_split "*sibcall_1_rex64_vzeroupper"
+  [(call (mem:QI (match_operand:DI 0 "sibcall_insn_operand" "s,U"))
+        (match_operand 1 "" ""))
+   (unspec [(match_operand 2 "const_int_operand" "")]
+          UNSPEC_CALL_NEEDS_VZEROUPPER)]
+  "TARGET_VZEROUPPER && TARGET_64BIT && !SIBLING_CALL_P (insn)"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  "ix86_split_call_vzeroupper (curr_insn, operands[2]); DONE;"
+  [(set_attr "type" "call")])
+
 (define_insn "*sibcall_1_rex64"
   [(call (mem:QI (match_operand:DI 0 "sibcall_insn_operand" "s,U"))
         (match_operand 1 "" ""))]
 ;; Call-value patterns last so that the wildcard operand does not
 ;; disrupt insn-recog's switch tables.
 
+(define_insn_and_split "*call_value_pop_0_vzeroupper"
+  [(set (match_operand 0 "" "")
+       (call (mem:QI (match_operand:SI 1 "constant_call_address_operand" ""))
+             (match_operand:SI 2 "" "")))
+   (set (reg:SI SP_REG)
+       (plus:SI (reg:SI SP_REG)
+                (match_operand:SI 3 "immediate_operand" "")))
+   (unspec [(match_operand 4 "const_int_operand" "")]
+          UNSPEC_CALL_NEEDS_VZEROUPPER)]
+  "TARGET_VZEROUPPER && !TARGET_64BIT"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  "ix86_split_call_pop_vzeroupper (curr_insn, operands[4]); DONE;"
+  [(set_attr "type" "callv")])
+
 (define_insn "*call_value_pop_0"
   [(set (match_operand 0 "" "")
        (call (mem:QI (match_operand:SI 1 "constant_call_address_operand" ""))
   { return ix86_output_call_insn (insn, operands[1], 1); }
   [(set_attr "type" "callv")])
 
+(define_insn_and_split "*call_value_pop_1_vzeroupper"
+  [(set (match_operand 0 "" "")
+       (call (mem:QI (match_operand:SI 1 "call_insn_operand" "lsm"))
+             (match_operand:SI 2 "" "")))
+   (set (reg:SI SP_REG)
+       (plus:SI (reg:SI SP_REG)
+                (match_operand:SI 3 "immediate_operand" "i")))
+   (unspec [(match_operand 4 "const_int_operand" "")]
+          UNSPEC_CALL_NEEDS_VZEROUPPER)]
+  "TARGET_VZEROUPPER && !TARGET_64BIT && !SIBLING_CALL_P (insn)"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  "ix86_split_call_pop_vzeroupper (curr_insn, operands[4]); DONE;"
+  [(set_attr "type" "callv")])
+
 (define_insn "*call_value_pop_1"
   [(set (match_operand 0 "" "")
        (call (mem:QI (match_operand:SI 1 "call_insn_operand" "lsm"))
   { return ix86_output_call_insn (insn, operands[1], 1); }
   [(set_attr "type" "callv")])
 
+(define_insn_and_split "*sibcall_value_pop_1_vzeroupper"
+  [(set (match_operand 0 "" "")
+       (call (mem:QI (match_operand:SI 1 "sibcall_insn_operand" "s,U"))
+             (match_operand:SI 2 "" "")))
+   (set (reg:SI SP_REG)
+       (plus:SI (reg:SI SP_REG)
+                (match_operand:SI 3 "immediate_operand" "i,i")))
+   (unspec [(match_operand 4 "const_int_operand" "")]
+          UNSPEC_CALL_NEEDS_VZEROUPPER)]
+  "TARGET_VZEROUPPER && !TARGET_64BIT && SIBLING_CALL_P (insn)"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  "ix86_split_call_pop_vzeroupper (curr_insn, operands[4]); DONE;"
+  [(set_attr "type" "callv")])
+
 (define_insn "*sibcall_value_pop_1"
   [(set (match_operand 0 "" "")
        (call (mem:QI (match_operand:SI 1 "sibcall_insn_operand" "s,U"))
   { return ix86_output_call_insn (insn, operands[1], 1); }
   [(set_attr "type" "callv")])
 
+(define_insn_and_split "*call_value_0_vzeroupper"
+  [(set (match_operand 0 "" "")
+       (call (mem:QI (match_operand:SI 1 "constant_call_address_operand" ""))
+             (match_operand:SI 2 "" "")))
+   (unspec [(match_operand 3 "const_int_operand" "")]
+          UNSPEC_CALL_NEEDS_VZEROUPPER)]
+  "TARGET_VZEROUPPER && !TARGET_64BIT"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  "ix86_split_call_vzeroupper (curr_insn, operands[3]); DONE;"
+  [(set_attr "type" "callv")])
+
 (define_insn "*call_value_0"
   [(set (match_operand 0 "" "")
        (call (mem:QI (match_operand:SI 1 "constant_call_address_operand" ""))
   { return ix86_output_call_insn (insn, operands[1], 1); }
   [(set_attr "type" "callv")])
 
+(define_insn_and_split "*call_value_0_rex64_vzeroupper"
+  [(set (match_operand 0 "" "")
+       (call (mem:QI (match_operand:DI 1 "constant_call_address_operand" ""))
+             (match_operand:DI 2 "const_int_operand" "")))
+   (unspec [(match_operand 3 "const_int_operand" "")]
+          UNSPEC_CALL_NEEDS_VZEROUPPER)]
+  "TARGET_VZEROUPPER && TARGET_64BIT"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  "ix86_split_call_vzeroupper (curr_insn, operands[3]); DONE;"
+  [(set_attr "type" "callv")])
+
 (define_insn "*call_value_0_rex64"
   [(set (match_operand 0 "" "")
        (call (mem:QI (match_operand:DI 1 "constant_call_address_operand" ""))
   { return ix86_output_call_insn (insn, operands[1], 1); }
   [(set_attr "type" "callv")])
 
+(define_insn_and_split "*call_value_0_rex64_ms_sysv_vzeroupper"
+  [(parallel
+    [(set (match_operand 0 "" "")
+         (call (mem:QI (match_operand:DI 1 "constant_call_address_operand" ""))
+               (match_operand:DI 2 "const_int_operand" "")))
+     (unspec [(const_int 0)] UNSPEC_MS_TO_SYSV_CALL)
+     (clobber (reg:TI XMM6_REG))
+     (clobber (reg:TI XMM7_REG))
+     (clobber (reg:TI XMM8_REG))
+     (clobber (reg:TI XMM9_REG))
+     (clobber (reg:TI XMM10_REG))
+     (clobber (reg:TI XMM11_REG))
+     (clobber (reg:TI XMM12_REG))
+     (clobber (reg:TI XMM13_REG))
+     (clobber (reg:TI XMM14_REG))
+     (clobber (reg:TI XMM15_REG))
+     (clobber (reg:DI SI_REG))
+     (clobber (reg:DI DI_REG))])
+   (unspec [(match_operand 3 "const_int_operand" "")]
+          UNSPEC_CALL_NEEDS_VZEROUPPER)]
+  "TARGET_VZEROUPPER && TARGET_64BIT && !SIBLING_CALL_P (insn)"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  "ix86_split_call_vzeroupper (curr_insn, operands[3]); DONE;"
+  [(set_attr "type" "callv")])
+
 (define_insn "*call_value_0_rex64_ms_sysv"
   [(set (match_operand 0 "" "")
        (call (mem:QI (match_operand:DI 1 "constant_call_address_operand" ""))
   { return ix86_output_call_insn (insn, operands[1], 1); }
   [(set_attr "type" "callv")])
 
+(define_insn_and_split "*call_value_1_vzeroupper"
+  [(set (match_operand 0 "" "")
+       (call (mem:QI (match_operand:SI 1 "call_insn_operand" "lsm"))
+             (match_operand:SI 2 "" "")))
+   (unspec [(match_operand 3 "const_int_operand" "")]
+          UNSPEC_CALL_NEEDS_VZEROUPPER)]
+  "TARGET_VZEROUPPER && !TARGET_64BIT && !SIBLING_CALL_P (insn)"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  "ix86_split_call_vzeroupper (curr_insn, operands[3]); DONE;"
+  [(set_attr "type" "callv")])
+
 (define_insn "*call_value_1"
   [(set (match_operand 0 "" "")
        (call (mem:QI (match_operand:SI 1 "call_insn_operand" "lsm"))
   { return ix86_output_call_insn (insn, operands[1], 1); }
   [(set_attr "type" "callv")])
 
+(define_insn_and_split "*sibcall_value_1_vzeroupper"
+  [(set (match_operand 0 "" "")
+       (call (mem:QI (match_operand:SI 1 "sibcall_insn_operand" "s,U"))
+             (match_operand:SI 2 "" "")))
+   (unspec [(match_operand 3 "const_int_operand" "")]
+          UNSPEC_CALL_NEEDS_VZEROUPPER)]
+  "TARGET_VZEROUPPER && !TARGET_64BIT && SIBLING_CALL_P (insn)"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  "ix86_split_call_vzeroupper (curr_insn, operands[3]); DONE;"
+  [(set_attr "type" "callv")])
+
 (define_insn "*sibcall_value_1"
   [(set (match_operand 0 "" "")
        (call (mem:QI (match_operand:SI 1 "sibcall_insn_operand" "s,U"))
   { return ix86_output_call_insn (insn, operands[1], 1); }
   [(set_attr "type" "callv")])
 
+(define_insn_and_split "*call_value_1_rex64_vzeroupper"
+  [(set (match_operand 0 "" "")
+       (call (mem:QI (match_operand:DI 1 "call_insn_operand" "rsm"))
+             (match_operand:DI 2 "" "")))
+   (unspec [(match_operand 3 "const_int_operand" "")]
+          UNSPEC_CALL_NEEDS_VZEROUPPER)]
+  "TARGET_VZEROUPPER && TARGET_64BIT && !SIBLING_CALL_P (insn)
+   && ix86_cmodel != CM_LARGE && ix86_cmodel != CM_LARGE_PIC"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  "ix86_split_call_vzeroupper (curr_insn, operands[3]); DONE;"
+  [(set_attr "type" "callv")])
+
 (define_insn "*call_value_1_rex64"
   [(set (match_operand 0 "" "")
        (call (mem:QI (match_operand:DI 1 "call_insn_operand" "rsm"))
   { return ix86_output_call_insn (insn, operands[1], 1); }
   [(set_attr "type" "callv")])
 
+(define_insn_and_split "*call_value_1_rex64_ms_sysv_vzeroupper"
+  [(parallel
+    [(set (match_operand 0 "" "")
+         (call (mem:QI (match_operand:DI 1 "call_insn_operand" "rsm"))
+               (match_operand:DI 2 "" "")))
+     (unspec [(const_int 0)] UNSPEC_MS_TO_SYSV_CALL)
+     (clobber (reg:TI XMM6_REG))
+     (clobber (reg:TI XMM7_REG))
+     (clobber (reg:TI XMM8_REG))
+     (clobber (reg:TI XMM9_REG))
+     (clobber (reg:TI XMM10_REG))
+     (clobber (reg:TI XMM11_REG))
+     (clobber (reg:TI XMM12_REG))
+     (clobber (reg:TI XMM13_REG))
+     (clobber (reg:TI XMM14_REG))
+     (clobber (reg:TI XMM15_REG))
+     (clobber (reg:DI SI_REG))
+     (clobber (reg:DI DI_REG))])
+   (unspec [(match_operand 3 "const_int_operand" "")]
+          UNSPEC_CALL_NEEDS_VZEROUPPER)]
+  "TARGET_VZEROUPPER && TARGET_64BIT && !SIBLING_CALL_P (insn)"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  "ix86_split_call_vzeroupper (curr_insn, operands[3]); DONE;"
+  [(set_attr "type" "callv")])
+
 (define_insn "*call_value_1_rex64_ms_sysv"
   [(set (match_operand 0 "" "")
        (call (mem:QI (match_operand:DI 1 "call_insn_operand" "rsm"))
   { return ix86_output_call_insn (insn, operands[1], 1); }
   [(set_attr "type" "callv")])
 
+(define_insn_and_split "*call_value_1_rex64_large_vzeroupper"
+  [(set (match_operand 0 "" "")
+       (call (mem:QI (match_operand:DI 1 "call_insn_operand" "rm"))
+             (match_operand:DI 2 "" "")))
+   (unspec [(match_operand 3 "const_int_operand" "")]
+          UNSPEC_CALL_NEEDS_VZEROUPPER)]
+  "TARGET_VZEROUPPER && TARGET_64BIT && !SIBLING_CALL_P (insn)"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  "ix86_split_call_vzeroupper (curr_insn, operands[3]); DONE;"
+  [(set_attr "type" "callv")])
+
 (define_insn "*call_value_1_rex64_large"
   [(set (match_operand 0 "" "")
        (call (mem:QI (match_operand:DI 1 "call_insn_operand" "rm"))
   { return ix86_output_call_insn (insn, operands[1], 1); }
   [(set_attr "type" "callv")])
 
+(define_insn_and_split "*sibcall_value_1_rex64_vzeroupper"
+  [(set (match_operand 0 "" "")
+       (call (mem:QI (match_operand:DI 1 "sibcall_insn_operand" "s,U"))
+             (match_operand:DI 2 "" "")))
+   (unspec [(match_operand 3 "const_int_operand" "")]
+          UNSPEC_CALL_NEEDS_VZEROUPPER)]
+  "TARGET_VZEROUPPER && TARGET_64BIT && SIBLING_CALL_P (insn)"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  "ix86_split_call_vzeroupper (curr_insn, operands[3]); DONE;"
+  [(set_attr "type" "callv")])
+
 (define_insn "*sibcall_value_1_rex64"
   [(set (match_operand 0 "" "")
        (call (mem:QI (match_operand:DI 1 "sibcall_insn_operand" "s,U"))
index 2fcf2b2..c484117 100644 (file)
@@ -1,3 +1,13 @@
+2010-11-02  H.J. Lu  <hongjiu.lu@intel.com>
+
+       * gcc.target/i386/avx-vzeroupper-15.c: New.
+       * gcc.target/i386/avx-vzeroupper-16.c: Likewise.
+       * gcc.target/i386/avx-vzeroupper-17.c: Likewise.
+       * gcc.target/i386/avx-vzeroupper-18.c: Likewise.
+
+       PR target/46253
+       * gcc.target/i386/pr46253.c: New.
+
 2010-11-02  Steven G. Kargl  < kargl@gcc.gnu.org>
            Tobias Burnus  <burnus@net-b.de>
 
diff --git a/gcc/testsuite/gcc.target/i386/avx-vzeroupper-15.c b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-15.c
new file mode 100644 (file)
index 0000000..134a3dd
--- /dev/null
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O0 -mavx -mtune=generic -dp" } */
+
+#include <immintrin.h>
+
+extern __m256 x, y;
+extern void (*bar) (void);
+
+void
+foo ()
+{
+  x = y;
+  bar ();
+}
+
+/* { dg-final { scan-assembler-times "avx_vzeroupper" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/avx-vzeroupper-16.c b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-16.c
new file mode 100644 (file)
index 0000000..3fb099d
--- /dev/null
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-options "-O0 -mavx -mabi=ms -mtune=generic -dp" } */
+
+typedef float __m256 __attribute__ ((__vector_size__ (32), __may_alias__));
+
+extern __m256 x;
+
+extern __m256 __attribute__ ((sysv_abi))  bar (__m256);
+
+void
+foo (void)
+{
+  bar (x);
+}
+
+/* { dg-final { scan-assembler-times "avx_vzeroupper" 1 } } */
+/* { dg-final { scan-assembler-times "\\*call_value_0_rex64_ms_sysv" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/avx-vzeroupper-17.c b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-17.c
new file mode 100644 (file)
index 0000000..2f3cfd2
--- /dev/null
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-options "-O0 -mavx -mabi=ms -mtune=generic -dp" } */
+
+typedef float __m256 __attribute__ ((__vector_size__ (32), __may_alias__));
+
+extern __m256 x;
+
+extern __m256 __attribute__ ((sysv_abi)) (*bar) (__m256);
+
+void
+foo (void)
+{
+  bar (x);
+}
+
+/* { dg-final { scan-assembler-times "avx_vzeroupper" 1 } } */
+/* { dg-final { scan-assembler-times "\\*call_value_1_rex64_ms_sysv" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/avx-vzeroupper-18.c b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-18.c
new file mode 100644 (file)
index 0000000..541f77d
--- /dev/null
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-options "-O0 -mavx -mabi=ms -mtune=generic -dp" } */
+
+typedef float __m256 __attribute__ ((__vector_size__ (32), __may_alias__));
+
+extern __m256 x;
+
+extern void __attribute__ ((sysv_abi))  bar (__m256);
+
+void
+foo (void)
+{
+  bar (x);
+}
+
+/* { dg-final { scan-assembler-not "avx_vzeroupper" } } */
+/* { dg-final { scan-assembler-times "\\*call_1_rex64_ms_sysv" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr46253.c b/gcc/testsuite/gcc.target/i386/pr46253.c
new file mode 100644 (file)
index 0000000..406790a
--- /dev/null
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O -g -mf16c -mtune=generic -dp" } */
+
+typedef __m256i __attribute__ ((__vector_size__ (32)));
+
+__m256i bar (void);
+void foo (void)
+{
+  int i = 0;
+  bar ();
+  __builtin_ia32_vzeroupper ();
+  while (++i);
+}
+
+/* { dg-final { scan-assembler-times "avx_vzeroupper" 1 } } */