X-Git-Url: http://git.sourceforge.jp/view?a=blobdiff_plain;f=gcc%2Fconfig%2Fi386%2Fsse.md;h=e9f6c3da8fbbe9c6af5a539103527ccfd8c57092;hb=33541f98a3ec41f15aa971e3fe350c546b1f1117;hp=cea13cbf88c9a6ebcc031f54d1d5cbad051cffa5;hpb=908dc1fce67d23202ee6adbd708dee239a6c9b62;p=pf3gnuchains%2Fgcc-fork.git diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index cea13cbf88c..e9f6c3da8fb 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -18,23 +18,53 @@ ;; along with GCC; see the file COPYING3. If not see ;; . +;; All vector modes including V1TImode, used in move patterns. +(define_mode_iterator V16 + [(V32QI "TARGET_AVX") V16QI + (V16HI "TARGET_AVX") V8HI + (V8SI "TARGET_AVX") V4SI + (V4DI "TARGET_AVX") V2DI + V1TI + (V8SF "TARGET_AVX") V4SF + (V4DF "TARGET_AVX") V2DF]) + +;; All vector modes +(define_mode_iterator V + [(V32QI "TARGET_AVX") V16QI + (V16HI "TARGET_AVX") V8HI + (V8SI "TARGET_AVX") V4SI + (V4DI "TARGET_AVX") V2DI + (V8SF "TARGET_AVX") V4SF + (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")]) + +;; All 128bit vector modes +(define_mode_iterator V_128 + [V16QI V8HI V4SI V2DI V4SF (V2DF "TARGET_SSE2")]) + +;; All 256bit vector modes +(define_mode_iterator V_256 + [V32QI V16HI V8SI V4DI V8SF V4DF]) ;; All vector float modes (define_mode_iterator VF - [(V4SF "TARGET_SSE") (V2DF "TARGET_SSE2") - (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")]) + [(V8SF "TARGET_AVX") V4SF + (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")]) ;; All SFmode vector float modes (define_mode_iterator VF1 - [(V4SF "TARGET_SSE") (V8SF "TARGET_AVX")]) + [(V8SF "TARGET_AVX") V4SF]) ;; All DFmode vector float modes (define_mode_iterator VF2 - [(V2DF "TARGET_SSE2") (V4DF "TARGET_AVX")]) + [(V4DF "TARGET_AVX") V2DF]) ;; All 128bit vector float modes (define_mode_iterator VF_128 - [(V4SF "TARGET_SSE") (V2DF "TARGET_SSE2")]) + [V4SF (V2DF "TARGET_SSE2")]) + +;; All 256bit vector float modes +(define_mode_iterator VF_256 + [V8SF V4DF]) ;; All vector integer modes (define_mode_iterator VI @@ -43,6 +73,14 @@ (V8SI "TARGET_AVX") V4SI (V4DI "TARGET_AVX") V2DI]) +;; All QImode vector integer modes +(define_mode_iterator VI1 + [(V32QI "TARGET_AVX") V16QI]) + +;; All DImode vector integer modes +(define_mode_iterator VI8 + [(V4DI "TARGET_AVX") V2DI]) + ;; All 128bit vector integer modes (define_mode_iterator VI_128 [V16QI V8HI V4SI V2DI]) @@ -53,128 +91,105 @@ (define_mode_iterator VI24_128 [V8HI V4SI]) (define_mode_iterator VI248_128 [V8HI V4SI V2DI]) +;; Int-float size matches +(define_mode_iterator VI4F_128 [V4SI V4SF]) +(define_mode_iterator VI8F_128 [V2DI V2DF]) +(define_mode_iterator VI4F_256 [V8SI V8SF]) +(define_mode_iterator VI8F_256 [V4DI V4DF]) -;; Instruction suffix for sign and zero extensions. -(define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")]) +;; Mapping from float mode to required SSE level +(define_mode_attr sse + [(SF "sse") (DF "sse2") + (V4SF "sse") (V2DF "sse2") + (V8SF "avx") (V4DF "avx")]) -;; All 16-byte vector modes handled by SSE -(define_mode_iterator SSEMODE [V16QI V8HI V4SI V2DI V4SF V2DF]) -(define_mode_iterator SSEMODE16 [V16QI V8HI V4SI V2DI V1TI V4SF V2DF]) +(define_mode_attr sse2 + [(V16QI "sse2") (V32QI "avx") + (V2DI "sse2") (V4DI "avx")]) -;; All 32-byte vector modes handled by AVX -(define_mode_iterator AVX256MODE [V32QI V16HI V8SI V4DI V8SF V4DF]) +(define_mode_attr sse3 + [(V16QI "sse3") (V32QI "avx")]) -;; All QI vector modes handled by AVX -(define_mode_iterator AVXMODEQI [V32QI V16QI]) +(define_mode_attr sse4_1 + [(V4SF "sse4_1") (V2DF "sse4_1") + (V8SF "avx") (V4DF "avx")]) -;; All DI vector modes handled by AVX -(define_mode_iterator AVXMODEDI [V4DI V2DI]) +(define_mode_attr avxsizesuffix + [(V32QI "256") (V16HI "256") (V8SI "256") (V4DI "256") + (V16QI "") (V8HI "") (V4SI "") (V2DI "") + (V8SF "256") (V4DF "256") + (V4SF "") (V2DF "")]) -;; All vector modes handled by AVX -(define_mode_iterator AVXMODE - [V16QI V8HI V4SI V2DI V4SF V2DF V32QI V16HI V8SI V4DI V8SF V4DF]) -(define_mode_iterator AVXMODE16 - [V16QI V8HI V4SI V2DI V1TI V4SF V2DF V32QI V16HI V8SI V4DI V8SF V4DF]) +;; SSE instruction mode +(define_mode_attr sseinsnmode + [(V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI") + (V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI") + (V8SF "V8SF") (V4DF "V4DF") + (V4SF "V4SF") (V2DF "V2DF")]) -;; Mix-n-match -(define_mode_iterator SSEMODE124 [V16QI V8HI V4SI]) -(define_mode_iterator SSEMODE1248 [V16QI V8HI V4SI V2DI]) -(define_mode_iterator SSEMODEF2P [V4SF V2DF]) +;; Mapping of vector float modes to an integer mode of the same size +(define_mode_attr sseintvecmode + [(V8SF "V8SI") (V4DF "V4DI") + (V4SF "V4SI") (V2DF "V2DI")]) -(define_mode_iterator AVX256MODEF2P [V8SF V4DF]) -(define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF]) -(define_mode_iterator AVX256MODE24P [V8SI V8SF V4DI V4DF]) -(define_mode_iterator AVX256MODE4P [V4DI V4DF]) -(define_mode_iterator AVX256MODE8P [V8SI V8SF]) -(define_mode_iterator AVXMODEF2P [V4SF V2DF V8SF V4DF]) -(define_mode_iterator AVXMODEFDP [V2DF V4DF]) -(define_mode_iterator AVXMODEFSP [V4SF V8SF]) +;; Mapping of vector modes to a vector mode of double size +(define_mode_attr ssedoublevecmode + [(V32QI "V64QI") (V16HI "V32HI") (V8SI "V16SI") (V4DI "V8DI") + (V16QI "V32QI") (V8HI "V16HI") (V4SI "V8SI") (V2DI "V4DI") + (V8SF "V16SF") (V4DF "V8DF") + (V4SF "V8SF") (V2DF "V4DF")]) + +;; Mapping of vector modes to a vector mode of half size +(define_mode_attr ssehalfvecmode + [(V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI") (V4DI "V2DI") + (V16QI "V8QI") (V8HI "V4HI") (V4SI "V2SI") + (V8SF "V4SF") (V4DF "V2DF") + (V4SF "V2SF")]) -(define_mode_iterator FMAMODE [SF DF V4SF V2DF V8SF V4DF]) +;; Mapping of vector modes back to the scalar modes +(define_mode_attr ssescalarmode + [(V32QI "QI") (V16HI "HI") (V8SI "SI") (V4DI "DI") + (V16QI "QI") (V8HI "HI") (V4SI "SI") (V2DI "DI") + (V8SF "SF") (V4DF "DF") + (V4SF "SF") (V2DF "DF")]) -;; Int-float size matches -(define_mode_iterator SSEMODE4S [V4SF V4SI]) -(define_mode_iterator SSEMODE2D [V2DF V2DI]) +;; Number of scalar elements in each vector type +(define_mode_attr ssescalarnum + [(V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4") + (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2") + (V8SF "8") (V4DF "4") + (V4SF "4") (V2DF "2")]) -;; Modes handled by vec_extract_even/odd pattern. -(define_mode_iterator SSEMODE_EO - [(V4SF "TARGET_SSE") - (V2DF "TARGET_SSE2") - (V2DI "TARGET_SSE2") (V4SI "TARGET_SSE2") - (V8HI "TARGET_SSE2") (V16QI "TARGET_SSE2") - (V4DF "TARGET_AVX") (V8SF "TARGET_AVX")]) +;; SSE scalar suffix for vector modes +(define_mode_attr ssescalarmodesuffix + [(V8SF "ss") (V4DF "sd") + (V4SF "ss") (V2DF "sd") + (V8SI "ss") (V4DI "sd") + (V4SI "d")]) -;; Modes handled by storent patterns. -(define_mode_iterator STORENT_MODE - [(SF "TARGET_SSE4A") (DF "TARGET_SSE4A") - (SI "TARGET_SSE2") (V2DI "TARGET_SSE2") (V2DF "TARGET_SSE2") - (V4SF "TARGET_SSE") - (V4DF "TARGET_AVX") (V8SF "TARGET_AVX")]) +;; Pack/unpack vector modes +(define_mode_attr sseunpackmode + [(V16QI "V8HI") (V8HI "V4SI") (V4SI "V2DI")]) -;; Modes handled by vector extract patterns. -(define_mode_iterator VEC_EXTRACT_MODE - [(V2DI "TARGET_SSE") (V4SI "TARGET_SSE") - (V8HI "TARGET_SSE") (V16QI "TARGET_SSE") - (V2DF "TARGET_SSE") (V4SF "TARGET_SSE") - (V4DF "TARGET_AVX") (V8SF "TARGET_AVX")]) +(define_mode_attr ssepackmode + [(V8HI "V16QI") (V4SI "V8HI") (V2DI "V4SI")]) -;; Mapping from float mode to required SSE level -(define_mode_attr sse - [(SF "sse") (DF "sse2") - (V4SF "sse") (V2DF "sse2") - (V8SF "avx") (V4DF "avx")]) +;; Mapping of the max integer size for xop rotate immediate constraint +(define_mode_attr sserotatemax + [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")]) -;; Mapping from integer vector mode to mnemonic suffix -(define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")]) +;; Mapping of mode to cast intrinsic name +(define_mode_attr castmode [(V8SI "si") (V8SF "ps") (V4DF "pd")]) -;; Mapping of the insn mnemonic suffix -(define_mode_attr ssemodesuffix - [(SF "ss") (DF "sd") (V4SF "ps") (V2DF "pd") (V8SF "ps") (V4DF "pd") - (V8SI "ps") (V4DI "pd")]) -(define_mode_attr ssescalarmodesuffix - [(SF "ss") (DF "sd") (V4SF "ss") (V2DF "sd") (V8SF "ss") (V8SI "ss") - (V4DF "sd") (V4SI "d") (V4DI "sd")]) +;; Instruction suffix for sign and zero extensions. +(define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")]) -;; Mapping of the max integer size for xop rotate immediate constraint -(define_mode_attr sserotatemax [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")]) -;; Mapping of vector modes back to the scalar modes -(define_mode_attr ssescalarmode [(V4SF "SF") (V2DF "DF") - (V16QI "QI") (V8HI "HI") - (V4SI "SI") (V2DI "DI")]) -;; Mapping of vector modes to a vector mode of double size -(define_mode_attr ssedoublesizemode - [(V2DF "V4DF") (V2DI "V4DI") (V4SF "V8SF") (V4SI "V8SI") - (V8HI "V16HI") (V16QI "V32QI") - (V4DF "V8DF") (V8SF "V16SF") - (V4DI "V8DI") (V8SI "V16SI") (V16HI "V32HI") (V32QI "V64QI")]) +;; Mix-n-match +(define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF]) -;; Number of scalar elements in each vector type -(define_mode_attr ssescalarnum - [(V4SF "4") (V2DF "2") (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2") - (V8SF "8") (V4DF "4") (V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4")]) - -;; Mapping for AVX -(define_mode_attr avxvecmode - [(V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI") - (V4SF "V4SF") (V8SF "V8SF") (V2DF "V2DF") (V4DF "V4DF") - (V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI")]) -(define_mode_attr avxhalfvecmode - [(V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI") (V4DI "V2DI") - (V8SF "V4SF") (V4DF "V2DF") - (V16QI "V8QI") (V8HI "V4HI") (V4SI "V2SI") (V4SF "V2SF")]) -(define_mode_attr avxscalarmode - [(V16QI "QI") (V8HI "HI") (V4SI "SI") (V2DI "DI") (V4SF "SF") (V2DF "DF") - (V32QI "QI") (V16HI "HI") (V8SI "SI") (V4DI "DI") (V8SF "SF") (V4DF "DF")]) -(define_mode_attr avxpermvecmode - [(V2DF "V2DI") (V4SF "V4SI") (V4DF "V4DI") (V8SF "V8SI")]) -(define_mode_attr avxmodesuffixp - [(V2DF "pd") (V4SI "si") (V4SF "ps") (V8SF "ps") (V8SI "si") - (V4DF "pd")]) -(define_mode_attr avxmodesuffix - [(V16QI "") (V32QI "256") (V4SI "") (V4SF "") (V2DF "") - (V8SI "256") (V8SF "256") (V4DF "256")]) +(define_mode_iterator FMAMODE [SF DF V4SF V2DF V8SF V4DF]) ;; Mapping of immediate bits for blend instructions (define_mode_attr blendbits @@ -188,19 +203,22 @@ ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; All of these patterns are enabled for SSE1 as well as SSE2. +;; This is essential for maintaining stable calling conventions. + (define_expand "mov" - [(set (match_operand:AVX256MODE 0 "nonimmediate_operand" "") - (match_operand:AVX256MODE 1 "nonimmediate_operand" ""))] - "TARGET_AVX" + [(set (match_operand:V16 0 "nonimmediate_operand" "") + (match_operand:V16 1 "nonimmediate_operand" ""))] + "TARGET_SSE" { ix86_expand_vector_move (mode, operands); DONE; }) -(define_insn "*avx_mov_internal" - [(set (match_operand:AVXMODE16 0 "nonimmediate_operand" "=x,x ,m") - (match_operand:AVXMODE16 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))] - "TARGET_AVX +(define_insn "*mov_internal" + [(set (match_operand:V16 0 "nonimmediate_operand" "=x,x ,m") + (match_operand:V16 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))] + "TARGET_SSE && (register_operand (operands[0], mode) || register_operand (operands[1], mode))" { @@ -214,85 +232,51 @@ { case MODE_V8SF: case MODE_V4SF: - if (misaligned_operand (operands[0], mode) - || misaligned_operand (operands[1], mode)) + if (TARGET_AVX + && (misaligned_operand (operands[0], mode) + || misaligned_operand (operands[1], mode))) return "vmovups\t{%1, %0|%0, %1}"; else - return "vmovaps\t{%1, %0|%0, %1}"; + return "%vmovaps\t{%1, %0|%0, %1}"; + case MODE_V4DF: case MODE_V2DF: - if (misaligned_operand (operands[0], mode) - || misaligned_operand (operands[1], mode)) + if (TARGET_AVX + && (misaligned_operand (operands[0], mode) + || misaligned_operand (operands[1], mode))) return "vmovupd\t{%1, %0|%0, %1}"; else if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL) - return "vmovaps\t{%1, %0|%0, %1}"; + return "%vmovaps\t{%1, %0|%0, %1}"; else - return "vmovapd\t{%1, %0|%0, %1}"; - default: - if (misaligned_operand (operands[0], mode) - || misaligned_operand (operands[1], mode)) + return "%vmovapd\t{%1, %0|%0, %1}"; + + case MODE_OI: + case MODE_TI: + if (TARGET_AVX + && (misaligned_operand (operands[0], mode) + || misaligned_operand (operands[1], mode))) return "vmovdqu\t{%1, %0|%0, %1}"; else if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL) - return "vmovaps\t{%1, %0|%0, %1}"; + return "%vmovaps\t{%1, %0|%0, %1}"; else - return "vmovdqa\t{%1, %0|%0, %1}"; - } - default: - gcc_unreachable (); - } -} - [(set_attr "type" "sselog1,ssemov,ssemov") - (set_attr "prefix" "vex") - (set_attr "mode" "")]) - -;; All of these patterns are enabled for SSE1 as well as SSE2. -;; This is essential for maintaining stable calling conventions. - -(define_expand "mov" - [(set (match_operand:SSEMODE16 0 "nonimmediate_operand" "") - (match_operand:SSEMODE16 1 "nonimmediate_operand" ""))] - "TARGET_SSE" -{ - ix86_expand_vector_move (mode, operands); - DONE; -}) + return "%vmovdqa\t{%1, %0|%0, %1}"; -(define_insn "*mov_internal" - [(set (match_operand:SSEMODE16 0 "nonimmediate_operand" "=x,x ,m") - (match_operand:SSEMODE16 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))] - "TARGET_SSE - && (register_operand (operands[0], mode) - || register_operand (operands[1], mode))" -{ - switch (which_alternative) - { - case 0: - return standard_sse_constant_opcode (insn, operands[1]); - case 1: - case 2: - switch (get_attr_mode (insn)) - { - case MODE_V4SF: - return "movaps\t{%1, %0|%0, %1}"; - case MODE_V2DF: - if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL) - return "movaps\t{%1, %0|%0, %1}"; - else - return "movapd\t{%1, %0|%0, %1}"; default: - if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL) - return "movaps\t{%1, %0|%0, %1}"; - else - return "movdqa\t{%1, %0|%0, %1}"; + gcc_unreachable (); } default: gcc_unreachable (); } } [(set_attr "type" "sselog1,ssemov,ssemov") + (set_attr "prefix" "maybe_vex") (set (attr "mode") - (cond [(ior (ior (ne (symbol_ref "optimize_function_for_size_p (cfun)") (const_int 0)) - (eq (symbol_ref "TARGET_SSE2") (const_int 0))) + (cond [(ne (symbol_ref "TARGET_AVX") (const_int 0)) + (const_string "") + (ior (ior + (ne (symbol_ref "optimize_function_for_size_p (cfun)") + (const_int 0)) + (eq (symbol_ref "TARGET_SSE2") (const_int 0))) (and (eq_attr "alternative" "2") (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES") (const_int 0)))) @@ -304,6 +288,19 @@ ] (const_string "TI")))]) +(define_insn "sse2_movq128" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (vec_concat:V2DI + (vec_select:DI + (match_operand:V2DI 1 "nonimmediate_operand" "xm") + (parallel [(const_int 0)])) + (const_int 0)))] + "TARGET_SSE2" + "%vmovq\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") + (set_attr "prefix" "maybe_vex") + (set_attr "mode" "TI")]) + ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm. ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded ;; from memory, we'd prefer to load the memory directly into the %xmm @@ -366,15 +363,7 @@ }) (define_expand "push1" - [(match_operand:AVX256MODE 0 "register_operand" "")] - "TARGET_AVX" -{ - ix86_expand_push (mode, operands[0]); - DONE; -}) - -(define_expand "push1" - [(match_operand:SSEMODE16 0 "register_operand" "")] + [(match_operand:V16 0 "register_operand" "")] "TARGET_SSE" { ix86_expand_push (mode, operands[0]); @@ -382,168 +371,83 @@ }) (define_expand "movmisalign" - [(set (match_operand:AVX256MODE 0 "nonimmediate_operand" "") - (match_operand:AVX256MODE 1 "nonimmediate_operand" ""))] - "TARGET_AVX" -{ - ix86_expand_vector_move_misalign (mode, operands); - DONE; -}) - -(define_expand "movmisalign" - [(set (match_operand:SSEMODE16 0 "nonimmediate_operand" "") - (match_operand:SSEMODE16 1 "nonimmediate_operand" ""))] + [(set (match_operand:V16 0 "nonimmediate_operand" "") + (match_operand:V16 1 "nonimmediate_operand" ""))] "TARGET_SSE" { ix86_expand_vector_move_misalign (mode, operands); DONE; }) -(define_expand "avx_movu" - [(set (match_operand:AVXMODEF2P 0 "nonimmediate_operand" "") - (unspec:AVXMODEF2P - [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "")] +(define_expand "_movu" + [(set (match_operand:VF 0 "nonimmediate_operand" "") + (unspec:VF + [(match_operand:VF 1 "nonimmediate_operand" "")] UNSPEC_MOVU))] - "AVX_VEC_FLOAT_MODE_P (mode)" + "TARGET_SSE" { if (MEM_P (operands[0]) && MEM_P (operands[1])) operands[1] = force_reg (mode, operands[1]); }) -(define_insn "*avx_movu" - [(set (match_operand:AVXMODEF2P 0 "nonimmediate_operand" "=x,m") - (unspec:AVXMODEF2P - [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "xm,x")] +(define_insn "*_movu" + [(set (match_operand:VF 0 "nonimmediate_operand" "=x,m") + (unspec:VF + [(match_operand:VF 1 "nonimmediate_operand" "xm,x")] UNSPEC_MOVU))] - "AVX_VEC_FLOAT_MODE_P (mode) - && !(MEM_P (operands[0]) && MEM_P (operands[1]))" - "vmovu\t{%1, %0|%0, %1}" + "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))" + "%vmovu\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov") (set_attr "movu" "1") - (set_attr "prefix" "vex") - (set_attr "mode" "")]) - -(define_insn "sse2_movq128" - [(set (match_operand:V2DI 0 "register_operand" "=x") - (vec_concat:V2DI - (vec_select:DI - (match_operand:V2DI 1 "nonimmediate_operand" "xm") - (parallel [(const_int 0)])) - (const_int 0)))] - "TARGET_SSE2" - "%vmovq\t{%1, %0|%0, %1}" - [(set_attr "type" "ssemov") (set_attr "prefix" "maybe_vex") - (set_attr "mode" "TI")]) - -(define_expand "_movu" - [(set (match_operand:SSEMODEF2P 0 "nonimmediate_operand" "") - (unspec:SSEMODEF2P - [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")] - UNSPEC_MOVU))] - "SSE_VEC_FLOAT_MODE_P (mode)" -{ - if (MEM_P (operands[0]) && MEM_P (operands[1])) - operands[1] = force_reg (mode, operands[1]); -}) - -(define_insn "*_movu" - [(set (match_operand:SSEMODEF2P 0 "nonimmediate_operand" "=x,m") - (unspec:SSEMODEF2P - [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm,x")] - UNSPEC_MOVU))] - "SSE_VEC_FLOAT_MODE_P (mode) - && !(MEM_P (operands[0]) && MEM_P (operands[1]))" - "movu\t{%1, %0|%0, %1}" - [(set_attr "type" "ssemov") - (set_attr "movu" "1") (set_attr "mode" "")]) -(define_expand "avx_movdqu" - [(set (match_operand:AVXMODEQI 0 "nonimmediate_operand" "") - (unspec:AVXMODEQI - [(match_operand:AVXMODEQI 1 "nonimmediate_operand" "")] - UNSPEC_MOVU))] - "TARGET_AVX" -{ - if (MEM_P (operands[0]) && MEM_P (operands[1])) - operands[1] = force_reg (mode, operands[1]); -}) - -(define_insn "*avx_movdqu" - [(set (match_operand:AVXMODEQI 0 "nonimmediate_operand" "=x,m") - (unspec:AVXMODEQI - [(match_operand:AVXMODEQI 1 "nonimmediate_operand" "xm,x")] - UNSPEC_MOVU))] - "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))" - "vmovdqu\t{%1, %0|%0, %1}" - [(set_attr "type" "ssemov") - (set_attr "movu" "1") - (set_attr "prefix" "vex") - (set_attr "mode" "")]) - -(define_expand "sse2_movdqu" - [(set (match_operand:V16QI 0 "nonimmediate_operand" "") - (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "")] - UNSPEC_MOVU))] +(define_expand "_movdqu" + [(set (match_operand:VI1 0 "nonimmediate_operand" "") + (unspec:VI1 [(match_operand:VI1 1 "nonimmediate_operand" "")] + UNSPEC_MOVU))] "TARGET_SSE2" { if (MEM_P (operands[0]) && MEM_P (operands[1])) - operands[1] = force_reg (V16QImode, operands[1]); + operands[1] = force_reg (mode, operands[1]); }) -(define_insn "*sse2_movdqu" - [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m") - (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")] - UNSPEC_MOVU))] +(define_insn "*_movdqu" + [(set (match_operand:VI1 0 "nonimmediate_operand" "=x,m") + (unspec:VI1 [(match_operand:VI1 1 "nonimmediate_operand" "xm,x")] + UNSPEC_MOVU))] "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))" - "movdqu\t{%1, %0|%0, %1}" + "%vmovdqu\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov") (set_attr "movu" "1") - (set_attr "prefix_data16" "1") - (set_attr "mode" "TI")]) - -(define_insn "avx_movnt" - [(set (match_operand:AVXMODEF2P 0 "memory_operand" "=m") - (unspec:AVXMODEF2P - [(match_operand:AVXMODEF2P 1 "register_operand" "x")] - UNSPEC_MOVNT))] - "AVX_VEC_FLOAT_MODE_P (mode)" - "vmovnt\t{%1, %0|%0, %1}" - [(set_attr "type" "ssemov") - (set_attr "prefix" "vex") - (set_attr "mode" "")]) - -(define_insn "_movnt" - [(set (match_operand:SSEMODEF2P 0 "memory_operand" "=m") - (unspec:SSEMODEF2P - [(match_operand:SSEMODEF2P 1 "register_operand" "x")] - UNSPEC_MOVNT))] - "SSE_VEC_FLOAT_MODE_P (mode)" - "movnt\t{%1, %0|%0, %1}" - [(set_attr "type" "ssemov") - (set_attr "mode" "")]) - -(define_insn "avx_movnt" - [(set (match_operand:AVXMODEDI 0 "memory_operand" "=m") - (unspec:AVXMODEDI - [(match_operand:AVXMODEDI 1 "register_operand" "x")] - UNSPEC_MOVNT))] - "TARGET_AVX" - "vmovntdq\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecvt") - (set_attr "prefix" "vex") - (set_attr "mode" "")]) + (set (attr "prefix_data16") + (if_then_else + (ne (symbol_ref "TARGET_AVX") (const_int 0)) + (const_string "*") + (const_string "1"))) + (set_attr "prefix" "maybe_vex") + (set_attr "mode" "")]) -(define_insn "sse2_movntv2di" - [(set (match_operand:V2DI 0 "memory_operand" "=m") - (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")] - UNSPEC_MOVNT))] - "TARGET_SSE2" - "movntdq\t{%1, %0|%0, %1}" +(define_insn "_lddqu" + [(set (match_operand:VI1 0 "register_operand" "=x") + (unspec:VI1 [(match_operand:VI1 1 "memory_operand" "m")] + UNSPEC_LDDQU))] + "TARGET_SSE3" + "%vlddqu\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov") - (set_attr "prefix_data16" "1") - (set_attr "mode" "TI")]) + (set_attr "movu" "1") + (set (attr "prefix_data16") + (if_then_else + (ne (symbol_ref "TARGET_AVX") (const_int 0)) + (const_string "*") + (const_string "0"))) + (set (attr "prefix_rep") + (if_then_else + (ne (symbol_ref "TARGET_AVX") (const_int 0)) + (const_string "*") + (const_string "1"))) + (set_attr "prefix" "maybe_vex") + (set_attr "mode" "")]) (define_insn "sse2_movntsi" [(set (match_operand:SI 0 "memory_operand" "=m") @@ -555,39 +459,48 @@ (set_attr "prefix_data16" "0") (set_attr "mode" "V2DF")]) -(define_insn "avx_lddqu" - [(set (match_operand:AVXMODEQI 0 "register_operand" "=x") - (unspec:AVXMODEQI - [(match_operand:AVXMODEQI 1 "memory_operand" "m")] - UNSPEC_LDDQU))] - "TARGET_AVX" - "vlddqu\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecvt") - (set_attr "movu" "1") - (set_attr "prefix" "vex") - (set_attr "mode" "")]) - -(define_insn "sse3_lddqu" - [(set (match_operand:V16QI 0 "register_operand" "=x") - (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")] - UNSPEC_LDDQU))] - "TARGET_SSE3" - "lddqu\t{%1, %0|%0, %1}" +(define_insn "_movnt" + [(set (match_operand:VF 0 "memory_operand" "=m") + (unspec:VF [(match_operand:VF 1 "register_operand" "x")] + UNSPEC_MOVNT))] + "TARGET_SSE" + "%vmovnt\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov") - (set_attr "movu" "1") - (set_attr "prefix_data16" "0") - (set_attr "prefix_rep" "1") - (set_attr "mode" "TI")]) + (set_attr "prefix" "maybe_vex") + (set_attr "mode" "")]) + +(define_insn "_movnt" + [(set (match_operand:VI8 0 "memory_operand" "=m") + (unspec:VI8 [(match_operand:VI8 1 "register_operand" "x")] + UNSPEC_MOVNT))] + "TARGET_SSE2" + "%vmovntdq\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set (attr "prefix_data16") + (if_then_else + (ne (symbol_ref "TARGET_AVX") (const_int 0)) + (const_string "*") + (const_string "1"))) + (set_attr "prefix" "maybe_vex") + (set_attr "mode" "")]) ; Expand patterns for non-temporal stores. At the moment, only those ; that directly map to insns are defined; it would be possible to ; define patterns for other modes that would expand to several insns. +;; Modes handled by storent patterns. +(define_mode_iterator STORENT_MODE + [(SI "TARGET_SSE2") (SF "TARGET_SSE4A") (DF "TARGET_SSE4A") + (V2DI "TARGET_SSE2") + (V8SF "TARGET_AVX") V4SF + (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")]) + (define_expand "storent" [(set (match_operand:STORENT_MODE 0 "memory_operand" "") (unspec:STORENT_MODE [(match_operand:STORENT_MODE 1 "register_operand" "")] - UNSPEC_MOVNT))]) + UNSPEC_MOVNT))] + "TARGET_SSE") ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; @@ -599,7 +512,7 @@ [(set (match_operand:VF 0 "register_operand" "") (absneg:VF (match_operand:VF 1 "register_operand" "")))] - "" + "TARGET_SSE" "ix86_expand_fp_absneg_operator (, mode, operands); DONE;") (define_insn_and_split "*absneg2" @@ -607,7 +520,7 @@ (match_operator:VF 3 "absneg_operator" [(match_operand:VF 1 "nonimmediate_operand" "0, xm,x, m")])) (use (match_operand:VF 2 "nonimmediate_operand" "xm,0, xm,x"))] - "" + "TARGET_SSE" "#" "reload_completed" [(const_int 0)] @@ -645,7 +558,7 @@ (plusminus:VF (match_operand:VF 1 "nonimmediate_operand" "") (match_operand:VF 2 "nonimmediate_operand" "")))] - "" + "TARGET_SSE" "ix86_fixup_binary_operands_no_copy (, mode, operands);") (define_insn "*3" @@ -653,7 +566,7 @@ (plusminus:VF (match_operand:VF 1 "nonimmediate_operand" "0,x") (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))] - "ix86_binary_operator_ok (, mode, operands)" + "TARGET_SSE && ix86_binary_operator_ok (, mode, operands)" "@ \t{%2, %0|%0, %2} v\t{%2, %1, %0|%0, %1, %2}" @@ -670,7 +583,7 @@ (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm")) (match_dup 1) (const_int 1)))] - "" + "TARGET_SSE" "@ \t{%2, %0|%0, %2} v\t{%2, %1, %0|%0, %1, %2}" @@ -684,7 +597,7 @@ (mult:VF (match_operand:VF 1 "nonimmediate_operand" "") (match_operand:VF 2 "nonimmediate_operand" "")))] - "" + "TARGET_SSE" "ix86_fixup_binary_operands_no_copy (MULT, mode, operands);") (define_insn "*mul3" @@ -692,7 +605,7 @@ (mult:VF (match_operand:VF 1 "nonimmediate_operand" "%0,x") (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))] - "ix86_binary_operator_ok (MULT, mode, operands)" + "TARGET_SSE && ix86_binary_operator_ok (MULT, mode, operands)" "@ mul\t{%2, %0|%0, %2} vmul\t{%2, %1, %0|%0, %1, %2}" @@ -709,7 +622,7 @@ (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm")) (match_dup 1) (const_int 1)))] - "" + "TARGET_SSE" "@ mul\t{%2, %0|%0, %2} vmul\t{%2, %1, %0|%0, %1, %2}" @@ -722,14 +635,14 @@ [(set (match_operand:VF2 0 "register_operand" "") (div:VF2 (match_operand:VF2 1 "register_operand" "") (match_operand:VF2 2 "nonimmediate_operand" "")))] - "" + "TARGET_SSE2" "ix86_fixup_binary_operands_no_copy (DIV, mode, operands);") (define_expand "div3" [(set (match_operand:VF1 0 "register_operand" "") (div:VF1 (match_operand:VF1 1 "register_operand" "") (match_operand:VF1 2 "nonimmediate_operand" "")))] - "" + "TARGET_SSE" { ix86_fixup_binary_operands_no_copy (DIV, mode, operands); @@ -747,7 +660,7 @@ (div:VF (match_operand:VF 1 "register_operand" "0,x") (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))] - "" + "TARGET_SSE" "@ div\t{%2, %0|%0, %2} vdiv\t{%2, %1, %0|%0, %1, %2}" @@ -764,7 +677,7 @@ (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm")) (match_dup 1) (const_int 1)))] - "" + "TARGET_SSE" "@ div\t{%2, %0|%0, %2} vdiv\t{%2, %1, %0|%0, %1, %2}" @@ -777,7 +690,7 @@ [(set (match_operand:VF1 0 "register_operand" "=x") (unspec:VF1 [(match_operand:VF1 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))] - "" + "TARGET_SSE" "%vrcpps\t{%1, %0|%0, %1}" [(set_attr "type" "sse") (set_attr "atom_sse_attr" "rcp") @@ -803,12 +716,13 @@ (define_expand "sqrt2" [(set (match_operand:VF2 0 "register_operand" "") - (sqrt:VF2 (match_operand:VF2 1 "nonimmediate_operand" "")))]) + (sqrt:VF2 (match_operand:VF2 1 "nonimmediate_operand" "")))] + "TARGET_SSE2") (define_expand "sqrt2" [(set (match_operand:VF1 0 "register_operand" "") (sqrt:VF1 (match_operand:VF1 1 "nonimmediate_operand" "")))] - "" + "TARGET_SSE" { if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p () && flag_finite_math_only && !flag_trapping_math @@ -822,7 +736,7 @@ (define_insn "_sqrt2" [(set (match_operand:VF 0 "register_operand" "=x") (sqrt:VF (match_operand:VF 1 "nonimmediate_operand" "xm")))] - "" + "TARGET_SSE" "%vsqrt\t{%1, %0|%0, %1}" [(set_attr "type" "sse") (set_attr "atom_sse_attr" "sqrt") @@ -836,7 +750,7 @@ (match_operand:VF_128 1 "nonimmediate_operand" "xm,xm")) (match_operand:VF_128 2 "register_operand" "0,x") (const_int 1)))] - "" + "TARGET_SSE" "@ sqrt\t{%1, %0|%0, %1} vsqrt\t{%1, %2, %0|%0, %2, %1}" @@ -860,7 +774,7 @@ [(set (match_operand:VF1 0 "register_operand" "=x") (unspec:VF1 [(match_operand:VF1 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))] - "" + "TARGET_SSE" "%vrsqrtps\t{%1, %0|%0, %1}" [(set_attr "type" "sse") (set_attr "prefix" "maybe_vex") @@ -891,7 +805,7 @@ (smaxmin:VF (match_operand:VF 1 "nonimmediate_operand" "") (match_operand:VF 2 "nonimmediate_operand" "")))] - "" + "TARGET_SSE" { if (!flag_finite_math_only) operands[1] = force_reg (mode, operands[1]); @@ -903,7 +817,7 @@ (smaxmin:VF (match_operand:VF 1 "nonimmediate_operand" "%0,x") (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))] - "flag_finite_math_only + "TARGET_SSE && flag_finite_math_only && ix86_binary_operator_ok (, mode, operands)" "@ \t{%2, %0|%0, %2} @@ -918,7 +832,7 @@ (smaxmin:VF (match_operand:VF 1 "register_operand" "0,x") (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))] - "!flag_finite_math_only" + "TARGET_SSE && !flag_finite_math_only" "@ \t{%2, %0|%0, %2} v\t{%2, %1, %0|%0, %1, %2}" @@ -935,7 +849,7 @@ (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm")) (match_dup 1) (const_int 1)))] - "" + "TARGET_SSE" "@ \t{%2, %0|%0, %2} v\t{%2, %1, %0|%0, %1, %2}" @@ -956,10 +870,10 @@ [(match_operand:VF 1 "register_operand" "0,x") (match_operand:VF 2 "nonimmediate_operand" "xm,xm")] UNSPEC_IEEE_MIN))] - "" + "TARGET_SSE" "@ - vmin\t{%2, %1, %0|%0, %1, %2} - min\t{%2, %0|%0, %2}" + min\t{%2, %0|%0, %2} + vmin\t{%2, %1, %0|%0, %1, %2}" [(set_attr "isa" "noavx,avx") (set_attr "type" "sseadd") (set_attr "prefix" "orig,vex") @@ -971,10 +885,10 @@ [(match_operand:VF 1 "register_operand" "0,x") (match_operand:VF 2 "nonimmediate_operand" "xm,xm")] UNSPEC_IEEE_MAX))] - "" + "TARGET_SSE" "@ - vmax\t{%2, %1, %0|%0, %1, %2} - max\t{%2, %0|%0, %2}" + max\t{%2, %0|%0, %2} + vmax\t{%2, %1, %0|%0, %1, %2}" [(set_attr "isa" "noavx,avx") (set_attr "type" "sseadd") (set_attr "prefix" "orig,vex") @@ -1276,12 +1190,28 @@ (set_attr "prefix" "vex") (set_attr "mode" "")]) +(define_insn "*_maskcmp3_comm" + [(set (match_operand:VF 0 "register_operand" "=x,x") + (match_operator:VF 3 "sse_comparison_operator" + [(match_operand:VF 1 "register_operand" "%0,x") + (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]))] + "TARGET_SSE + && GET_RTX_CLASS (GET_CODE (operands[3])) == RTX_COMM_COMPARE" + "@ + cmp%D3\t{%2, %0|%0, %2} + vcmp%D3\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "ssecmp") + (set_attr "length_immediate" "1") + (set_attr "prefix" "orig,vex") + (set_attr "mode" "")]) + (define_insn "_maskcmp3" [(set (match_operand:VF 0 "register_operand" "=x,x") (match_operator:VF 3 "sse_comparison_operator" [(match_operand:VF 1 "register_operand" "0,x") (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]))] - "" + "TARGET_SSE" "@ cmp%D3\t{%2, %0|%0, %2} vcmp%D3\t{%2, %1, %0|%0, %1, %2}" @@ -1299,7 +1229,7 @@ (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm")]) (match_dup 1) (const_int 1)))] - "" + "TARGET_SSE" "@ cmp%D3\t{%2, %0|%0, %2} vcmp%D3\t{%2, %1, %0|%0, %1, %2}" @@ -1319,7 +1249,7 @@ (match_operand: 1 "nonimmediate_operand" "xm") (parallel [(const_int 0)]))))] "SSE_FLOAT_MODE_P (mode)" - "%vcomis\t{%1, %0|%0, %1}" + "%vcomi\t{%1, %0|%0, %1}" [(set_attr "type" "ssecomi") (set_attr "prefix" "maybe_vex") (set_attr "prefix_rep" "0") @@ -1339,7 +1269,7 @@ (match_operand: 1 "nonimmediate_operand" "xm") (parallel [(const_int 0)]))))] "SSE_FLOAT_MODE_P (mode)" - "%vucomis\t{%1, %0|%0, %1}" + "%vucomi\t{%1, %0|%0, %1}" [(set_attr "type" "ssecomi") (set_attr "prefix" "maybe_vex") (set_attr "prefix_rep" "0") @@ -1357,7 +1287,7 @@ (match_operand:VF 5 "nonimmediate_operand" "")]) (match_operand:VF 1 "general_operand" "") (match_operand:VF 2 "general_operand" "")))] - "" + "TARGET_SSE" { bool ok = ix86_expand_fp_vcond (operands); gcc_assert (ok); @@ -1376,7 +1306,7 @@ (not:VF (match_operand:VF 1 "register_operand" "0,x")) (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))] - "" + "TARGET_SSE" { static char buf[32]; const char *insn; @@ -1408,7 +1338,7 @@ (any_logic:VF (match_operand:VF 1 "nonimmediate_operand" "") (match_operand:VF 2 "nonimmediate_operand" "")))] - "" + "TARGET_SSE" "ix86_fixup_binary_operands_no_copy (, mode, operands);") (define_insn "*3" @@ -1416,7 +1346,7 @@ (any_logic:VF (match_operand:VF 1 "nonimmediate_operand" "%0,x") (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))] - "ix86_binary_operator_ok (, mode, operands)" + "TARGET_SSE && ix86_binary_operator_ok (, mode, operands)" { static char buf[32]; const char *insn; @@ -1453,7 +1383,7 @@ (match_operand:VF 2 "nonimmediate_operand" ""))) (set (match_operand:VF 0 "register_operand" "") (ior:VF (match_dup 4) (match_dup 5)))] - "" + "TARGET_SSE" { operands[3] = ix86_build_signbit_mask (mode, 1, 0); @@ -1477,15 +1407,15 @@ static char buf[32]; const char *insn; const char *suffix - = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "s" : ""; + = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : ""; switch (which_alternative) { case 0: - insn = "andnp%s\t{%%2, %%0|%%0, %%2}"; + insn = "andn%s\t{%%2, %%0|%%0, %%2}"; break; case 1: - insn = "vandnp%s\t{%%2, %%1, %%0|%%0, %%1, %%2}"; + insn = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}"; break; default: gcc_unreachable (); @@ -1509,15 +1439,15 @@ static char buf[32]; const char *insn; const char *suffix - = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "s" : ""; + = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : ""; switch (which_alternative) { case 0: - insn = "p%s\t{%%2, %%0|%%0, %%2}"; + insn = "%s\t{%%2, %%0|%%0, %%2}"; break; case 1: - insn = "vp%s\t{%%2, %%1, %%0|%%0, %%1, %%2}"; + insn = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}"; break; default: gcc_unreachable (); @@ -1563,8 +1493,7 @@ (match_operand:FMAMODE 1 "nonimmediate_operand") (match_operand:FMAMODE 2 "nonimmediate_operand") (match_operand:FMAMODE 3 "nonimmediate_operand")))] - "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH" - "") + "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH") (define_expand "fms4" [(set (match_operand:FMAMODE 0 "register_operand") @@ -1572,8 +1501,7 @@ (match_operand:FMAMODE 1 "nonimmediate_operand") (match_operand:FMAMODE 2 "nonimmediate_operand") (neg:FMAMODE (match_operand:FMAMODE 3 "nonimmediate_operand"))))] - "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH" - "") + "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH") (define_expand "fnma4" [(set (match_operand:FMAMODE 0 "register_operand") @@ -1581,8 +1509,7 @@ (neg:FMAMODE (match_operand:FMAMODE 1 "nonimmediate_operand")) (match_operand:FMAMODE 2 "nonimmediate_operand") (match_operand:FMAMODE 3 "nonimmediate_operand")))] - "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH" - "") + "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH") (define_expand "fnms4" [(set (match_operand:FMAMODE 0 "register_operand") @@ -1590,8 +1517,7 @@ (neg:FMAMODE (match_operand:FMAMODE 1 "nonimmediate_operand")) (match_operand:FMAMODE 2 "nonimmediate_operand") (neg:FMAMODE (match_operand:FMAMODE 3 "nonimmediate_operand"))))] - "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH" - "") + "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH") ;; The builtin for fma4intrin.h is not constrained by SSE math enabled. (define_expand "fma4i_fmadd_" @@ -1600,8 +1526,7 @@ (match_operand:FMAMODE 1 "nonimmediate_operand") (match_operand:FMAMODE 2 "nonimmediate_operand") (match_operand:FMAMODE 3 "nonimmediate_operand")))] - "TARGET_FMA || TARGET_FMA4" - "") + "TARGET_FMA || TARGET_FMA4") (define_insn "*fma4i_fmadd_" [(set (match_operand:FMAMODE 0 "register_operand" "=x,x") @@ -1655,12 +1580,12 @@ ;; entire destination register, with the high-order elements zeroed. (define_expand "fma4i_vmfmadd_" - [(set (match_operand:SSEMODEF2P 0 "register_operand") - (vec_merge:SSEMODEF2P - (fma:SSEMODEF2P - (match_operand:SSEMODEF2P 1 "nonimmediate_operand") - (match_operand:SSEMODEF2P 2 "nonimmediate_operand") - (match_operand:SSEMODEF2P 3 "nonimmediate_operand")) + [(set (match_operand:VF_128 0 "register_operand") + (vec_merge:VF_128 + (fma:VF_128 + (match_operand:VF_128 1 "nonimmediate_operand") + (match_operand:VF_128 2 "nonimmediate_operand") + (match_operand:VF_128 3 "nonimmediate_operand")) (match_dup 4) (const_int 1)))] "TARGET_FMA4" @@ -1669,13 +1594,13 @@ }) (define_insn "*fma4i_vmfmadd_" - [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x") - (vec_merge:SSEMODEF2P - (fma:SSEMODEF2P - (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x") - (match_operand:SSEMODEF2P 2 "nonimmediate_operand" " x,m") - (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")) - (match_operand:SSEMODEF2P 4 "const0_operand" "") + [(set (match_operand:VF_128 0 "register_operand" "=x,x") + (vec_merge:VF_128 + (fma:VF_128 + (match_operand:VF_128 1 "nonimmediate_operand" "%x,x") + (match_operand:VF_128 2 "nonimmediate_operand" " x,m") + (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")) + (match_operand:VF_128 4 "const0_operand" "") (const_int 1)))] "TARGET_FMA4" "vfmadd\t{%3, %2, %1, %0|%0, %1, %2, %3}" @@ -1683,14 +1608,14 @@ (set_attr "mode" "")]) (define_insn "*fma4i_vmfmsub_" - [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x") - (vec_merge:SSEMODEF2P - (fma:SSEMODEF2P - (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x") - (match_operand:SSEMODEF2P 2 "nonimmediate_operand" " x,m") - (neg:SSEMODEF2P - (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))) - (match_operand:SSEMODEF2P 4 "const0_operand" "") + [(set (match_operand:VF_128 0 "register_operand" "=x,x") + (vec_merge:VF_128 + (fma:VF_128 + (match_operand:VF_128 1 "nonimmediate_operand" "%x,x") + (match_operand:VF_128 2 "nonimmediate_operand" " x,m") + (neg:VF_128 + (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))) + (match_operand:VF_128 4 "const0_operand" "") (const_int 1)))] "TARGET_FMA4" "vfmsub\t{%3, %2, %1, %0|%0, %1, %2, %3}" @@ -1698,14 +1623,14 @@ (set_attr "mode" "")]) (define_insn "*fma4i_vmfnmadd_" - [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x") - (vec_merge:SSEMODEF2P - (fma:SSEMODEF2P - (neg:SSEMODEF2P - (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")) - (match_operand:SSEMODEF2P 2 "nonimmediate_operand" " x,m") - (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")) - (match_operand:SSEMODEF2P 4 "const0_operand" "") + [(set (match_operand:VF_128 0 "register_operand" "=x,x") + (vec_merge:VF_128 + (fma:VF_128 + (neg:VF_128 + (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")) + (match_operand:VF_128 2 "nonimmediate_operand" " x,m") + (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")) + (match_operand:VF_128 4 "const0_operand" "") (const_int 1)))] "TARGET_FMA4" "vfnmadd\t{%3, %2, %1, %0|%0, %1, %2, %3}" @@ -1713,15 +1638,15 @@ (set_attr "mode" "")]) (define_insn "*fma4i_vmfnmsub_" - [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x") - (vec_merge:SSEMODEF2P - (fma:SSEMODEF2P - (neg:SSEMODEF2P - (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")) - (match_operand:SSEMODEF2P 2 "nonimmediate_operand" " x,m") - (neg:SSEMODEF2P - (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))) - (match_operand:SSEMODEF2P 4 "const0_operand" "") + [(set (match_operand:VF_128 0 "register_operand" "=x,x") + (vec_merge:VF_128 + (fma:VF_128 + (neg:VF_128 + (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")) + (match_operand:VF_128 2 "nonimmediate_operand" " x,m") + (neg:VF_128 + (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))) + (match_operand:VF_128 4 "const0_operand" "") (const_int 1)))] "TARGET_FMA4" "vfnmsub\t{%3, %2, %1, %0|%0, %1, %2, %3}" @@ -1744,37 +1669,36 @@ ;; But this doesn't seem useful in practice. (define_expand "fmaddsub_" - [(set (match_operand:AVXMODEF2P 0 "register_operand") - (unspec:AVXMODEF2P - [(match_operand:AVXMODEF2P 1 "nonimmediate_operand") - (match_operand:AVXMODEF2P 2 "nonimmediate_operand") - (match_operand:AVXMODEF2P 3 "nonimmediate_operand")] + [(set (match_operand:VF 0 "register_operand") + (unspec:VF + [(match_operand:VF 1 "nonimmediate_operand") + (match_operand:VF 2 "nonimmediate_operand") + (match_operand:VF 3 "nonimmediate_operand")] UNSPEC_FMADDSUB))] - "TARGET_FMA || TARGET_FMA4" - "") + "TARGET_FMA || TARGET_FMA4") (define_insn "*fma4_fmaddsub_" - [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x,x") - (unspec:AVXMODEF2P - [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x,x") - (match_operand:AVXMODEF2P 2 "nonimmediate_operand" " x,m") - (match_operand:AVXMODEF2P 3 "nonimmediate_operand" "xm,x")] + [(set (match_operand:VF 0 "register_operand" "=x,x") + (unspec:VF + [(match_operand:VF 1 "nonimmediate_operand" "%x,x") + (match_operand:VF 2 "nonimmediate_operand" " x,m") + (match_operand:VF 3 "nonimmediate_operand" "xm,x")] UNSPEC_FMADDSUB))] "TARGET_FMA4" - "vfmaddsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}" + "vfmaddsub\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "")]) (define_insn "*fma4_fmsubadd_" - [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x,x") - (unspec:AVXMODEF2P - [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x,x") - (match_operand:AVXMODEF2P 2 "nonimmediate_operand" " x,m") - (neg:AVXMODEF2P - (match_operand:AVXMODEF2P 3 "nonimmediate_operand" "xm,x"))] + [(set (match_operand:VF 0 "register_operand" "=x,x") + (unspec:VF + [(match_operand:VF 1 "nonimmediate_operand" "%x,x") + (match_operand:VF 2 "nonimmediate_operand" " x,m") + (neg:VF + (match_operand:VF 3 "nonimmediate_operand" "xm,x"))] UNSPEC_FMADDSUB))] "TARGET_FMA4" - "vfmsubaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}" + "vfmsubadd\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "")]) @@ -1793,7 +1717,7 @@ "TARGET_FMA" "@ vfmadd132\t{%2, %3, %0|%0, %3, %2} - vfmadd312\t{%3, %2, %0|%0, %2, %3} + vfmadd213\t{%3, %2, %0|%0, %2, %3} vfmadd231\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "ssemuladd") (set_attr "mode" "")]) @@ -1808,12 +1732,12 @@ "TARGET_FMA" "@ vfmsub132\t{%2, %3, %0|%0, %3, %2} - vfmsub312\t{%3, %2, %0|%0, %2, %3} + vfmsub213\t{%3, %2, %0|%0, %2, %3} vfmsub231\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "ssemuladd") (set_attr "mode" "")]) -(define_insn "*fma_fmadd_" +(define_insn "*fma_fnmadd_" [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x") (fma:FMAMODE (neg:FMAMODE @@ -1823,12 +1747,12 @@ "TARGET_FMA" "@ vfnmadd132\t{%2, %3, %0|%0, %3, %2} - vfnmadd312\t{%3, %2, %0|%0, %2, %3} + vfnmadd213\t{%3, %2, %0|%0, %2, %3} vfnmadd231\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "ssemuladd") (set_attr "mode" "")]) -(define_insn "*fma_fmsub_" +(define_insn "*fma_fnmsub_" [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x") (fma:FMAMODE (neg:FMAMODE @@ -1839,17 +1763,17 @@ "TARGET_FMA" "@ vfnmsub132\t{%2, %3, %0|%0, %3, %2} - vfnmsub312\t{%3, %2, %0|%0, %2, %3} + vfnmsub231\t{%3, %2, %0|%0, %2, %3} vfnmsub231\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "ssemuladd") (set_attr "mode" "")]) (define_insn "*fma_fmaddsub_" - [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x,x,x") - (unspec:AVXMODEF2P - [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%0, 0,x") - (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm, x,xm") - (match_operand:AVXMODEF2P 3 "nonimmediate_operand" " x,xm,0")] + [(set (match_operand:VF 0 "register_operand" "=x,x,x") + (unspec:VF + [(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x") + (match_operand:VF 2 "nonimmediate_operand" "xm, x,xm") + (match_operand:VF 3 "nonimmediate_operand" " x,xm,0")] UNSPEC_FMADDSUB))] "TARGET_FMA" "@ @@ -1860,12 +1784,12 @@ (set_attr "mode" "")]) (define_insn "*fma_fmsubadd_" - [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x,x,x") - (unspec:AVXMODEF2P - [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%0, 0,x") - (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm, x,xm") - (neg:AVXMODEF2P - (match_operand:AVXMODEF2P 3 "nonimmediate_operand" " x,xm,0"))] + [(set (match_operand:VF 0 "register_operand" "=x,x,x") + (unspec:VF + [(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x") + (match_operand:VF 2 "nonimmediate_operand" "xm, x,xm") + (neg:VF + (match_operand:VF 3 "nonimmediate_operand" " x,xm,0"))] UNSPEC_FMADDSUB))] "TARGET_FMA" "@ @@ -2722,7 +2646,7 @@ ix86_build_const_vector (V2DFmode, 1, x)); operands[5] = gen_reg_rtx (V4SImode); - + for (i = 6; i < 9; i++) operands[i] = gen_reg_rtx (V2DFmode); }) @@ -2847,7 +2771,7 @@ "TARGET_SSE" { rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands); - + emit_insn (gen_sse_movhlps (dst, operands[1], operands[2])); /* Fix up the destination if needed. */ @@ -2874,7 +2798,7 @@ movlps\t{%H2, %0|%0, %H2} vmovlps\t{%H2, %1, %0|%0, %1, %H2} %vmovhps\t{%2, %0|%0, %2}" - [(set_attr "isa" "noavx,avx,noavx,avx,base") + [(set_attr "isa" "noavx,avx,noavx,avx,*") (set_attr "type" "ssemov") (set_attr "prefix" "orig,vex,orig,vex,maybe_vex") (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")]) @@ -2892,7 +2816,7 @@ "TARGET_SSE" { rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands); - + emit_insn (gen_sse_movlhps (dst, operands[1], operands[2])); /* Fix up the destination if needed. */ @@ -2919,7 +2843,7 @@ movhps\t{%2, %0|%0, %2} vmovhps\t{%2, %1, %0|%0, %1, %2} %vmovlps\t{%2, %H0|%H0, %2}" - [(set_attr "isa" "noavx,avx,noavx,avx,base") + [(set_attr "isa" "noavx,avx,noavx,avx,*") (set_attr "type" "ssemov") (set_attr "prefix" "orig,vex,orig,vex,maybe_vex") (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")]) @@ -3200,11 +3124,11 @@ }) (define_insn "sse_shufps_" - [(set (match_operand:SSEMODE4S 0 "register_operand" "=x,x") - (vec_select:SSEMODE4S - (vec_concat: - (match_operand:SSEMODE4S 1 "register_operand" "0,x") - (match_operand:SSEMODE4S 2 "nonimmediate_operand" "xm,xm")) + [(set (match_operand:VI4F_128 0 "register_operand" "=x,x") + (vec_select:VI4F_128 + (vec_concat: + (match_operand:VI4F_128 1 "register_operand" "0,x") + (match_operand:VI4F_128 2 "nonimmediate_operand" "xm,xm")) (parallel [(match_operand 3 "const_0_to_3_operand" "") (match_operand 4 "const_0_to_3_operand" "") (match_operand 5 "const_4_to_7_operand" "") @@ -3258,7 +3182,7 @@ "TARGET_SSE" { rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands); - + emit_insn (gen_sse_loadhps (dst, operands[1], operands[2])); /* Fix up the destination if needed. */ @@ -3282,7 +3206,7 @@ movlhps\t{%2, %0|%0, %2} vmovlhps\t{%2, %1, %0|%0, %1, %2} %vmovlps\t{%2, %H0|%H0, %2}" - [(set_attr "isa" "noavx,avx,noavx,avx,base") + [(set_attr "isa" "noavx,avx,noavx,avx,*") (set_attr "type" "ssemov") (set_attr "prefix" "orig,vex,orig,vex,maybe_vex") (set_attr "mode" "V2SF,V2SF,V4SF,V4SF,V2SF")]) @@ -3311,7 +3235,7 @@ "TARGET_SSE" { rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands); - + emit_insn (gen_sse_loadlps (dst, operands[1], operands[2])); /* Fix up the destination if needed. */ @@ -3335,7 +3259,7 @@ movlps\t{%2, %0|%0, %2} vmovlps\t{%2, %1, %0|%0, %1, %2} %vmovlps\t{%2, %0|%0, %2}" - [(set_attr "isa" "noavx,avx,noavx,avx,base") + [(set_attr "isa" "noavx,avx,noavx,avx,*") (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov") (set_attr "length_immediate" "1,1,*,*,*") (set_attr "prefix" "orig,vex,orig,vex,maybe_vex") @@ -3363,7 +3287,7 @@ "TARGET_SSE" { if (!TARGET_AVX) - operands[1] = force_reg (V4SFmode, operands[1]); + operands[1] = force_reg (SFmode, operands[1]); }) (define_insn "*vec_dupv4sf_avx" @@ -3406,7 +3330,7 @@ %vmovss\t{%1, %0|%0, %1} punpckldq\t{%2, %0|%0, %2} movd\t{%1, %0|%0, %1}" - [(set_attr "isa" "noavx,avx,noavx,avx,base,base,base") + [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*") (set_attr "type" "sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov") (set_attr "prefix_data16" "*,*,1,*,*,*,*") (set_attr "prefix_extra" "*,*,1,1,*,*,*") @@ -3431,7 +3355,7 @@ [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov") (set_attr "mode" "V4SF,SF,DI,DI")]) -(define_insn "*vec_concatv4sf_sse" +(define_insn "*vec_concatv4sf" [(set (match_operand:V4SF 0 "register_operand" "=x,x,x,x") (vec_concat:V4SF (match_operand:V2SF 1 "register_operand" " 0,x,0,x") @@ -3448,7 +3372,7 @@ (set_attr "mode" "V4SF,V4SF,V2SF,V2SF")]) (define_expand "vec_init" - [(match_operand:SSEMODE 0 "register_operand" "") + [(match_operand:V_128 0 "register_operand" "") (match_operand 1 "" "")] "TARGET_SSE" { @@ -3458,79 +3382,43 @@ ;; Avoid combining registers from different units in a single alternative, ;; see comment above inline_secondary_memory_needed function in i386.c -(define_insn "*vec_set_0_sse4_1" - [(set (match_operand:SSEMODE4S 0 "nonimmediate_operand" - "=x,x,x ,x,x,x ,x ,m,m,m") - (vec_merge:SSEMODE4S - (vec_duplicate:SSEMODE4S +(define_insn "vec_set_0" + [(set (match_operand:VI4F_128 0 "nonimmediate_operand" + "=Y4,Y2,Y2,x,x,x,Y4 ,x ,m,m ,m") + (vec_merge:VI4F_128 + (vec_duplicate:VI4F_128 (match_operand: 2 "general_operand" - " x,m,*r,x,x,*rm,*rm,x,*r,fF")) - (match_operand:SSEMODE4S 1 "vector_move_operand" - " C,C,C ,0,x,0 ,x ,0,0 ,0") + " Y4,m ,*r,m,x,x,*rm,*rm,x,fF,*r")) + (match_operand:VI4F_128 1 "vector_move_operand" + " C ,C ,C ,C,0,x,0 ,x ,0,0 ,0") (const_int 1)))] - "TARGET_SSE4_1" + "TARGET_SSE" "@ %vinsertps\t{$0xe, %d2, %0|%0, %d2, 0xe} %vmov\t{%2, %0|%0, %2} %vmovd\t{%2, %0|%0, %2} movss\t{%2, %0|%0, %2} + movss\t{%2, %0|%0, %2} vmovss\t{%2, %1, %0|%0, %1, %2} pinsrd\t{$0, %2, %0|%0, %2, 0} vpinsrd\t{$0, %2, %1, %0|%0, %1, %2, 0} # # #" - [(set_attr "isa" "base,base,base,noavx,avx,noavx,avx,base,base,base") - (set_attr "type" "sselog,ssemov,ssemov,ssemov,ssemov,sselog,sselog,*,*,*") - (set_attr "prefix_extra" "*,*,*,*,*,1,1,*,*,*") - (set_attr "length_immediate" "*,*,*,*,*,1,1,*,*,*") - (set_attr "prefix" "maybe_vex,maybe_vex,maybe_vex,orig,vex,orig,vex,*,*,*") - (set_attr "mode" "SF,,SI,SF,SF,TI,TI,*,*,*")]) - -;; Avoid combining registers from different units in a single alternative, -;; see comment above inline_secondary_memory_needed function in i386.c -(define_insn "*vec_set_0_sse2" - [(set (match_operand:SSEMODE4S 0 "nonimmediate_operand" - "=x,x ,x,m,m ,m") - (vec_merge:SSEMODE4S - (vec_duplicate:SSEMODE4S - (match_operand: 2 "general_operand" - " m,*r,x,x,*r,fF")) - (match_operand:SSEMODE4S 1 "vector_move_operand" - " C, C,0,0,0 ,0") - (const_int 1)))] - "TARGET_SSE2" - "@ - mov\t{%2, %0|%0, %2} - movd\t{%2, %0|%0, %2} - movss\t{%2, %0|%0, %2} - # - # - #" - [(set_attr "type" "ssemov") - (set_attr "mode" ",SI,SF,*,*,*")]) - -;; Avoid combining registers from different units in a single alternative, -;; see comment above inline_secondary_memory_needed function in i386.c -(define_insn "vec_set_0" - [(set (match_operand:SSEMODE4S 0 "nonimmediate_operand" - "=x,x,m,m ,m") - (vec_merge:SSEMODE4S - (vec_duplicate:SSEMODE4S - (match_operand: 2 "general_operand" - " m,x,x,*r,fF")) - (match_operand:SSEMODE4S 1 "vector_move_operand" - " C,0,0,0 ,0") - (const_int 1)))] - "TARGET_SSE" - "@ - movss\t{%2, %0|%0, %2} - movss\t{%2, %0|%0, %2} - # - # - #" - [(set_attr "type" "ssemov") - (set_attr "mode" "SF,SF,*,*,*")]) + [(set_attr "isa" "*,*,*,noavx,noavx,avx,noavx,avx,*,*,*") + (set (attr "type") + (cond [(eq_attr "alternative" "0,6,7") + (const_string "sselog") + (eq_attr "alternative" "9") + (const_string "fmov") + (eq_attr "alternative" "10") + (const_string "imov") + ] + (const_string "ssemov"))) + (set_attr "prefix_extra" "*,*,*,*,*,*,1,1,*,*,*") + (set_attr "length_immediate" "*,*,*,*,*,*,1,1,*,*,*") + (set_attr "prefix" "maybe_vex,maybe_vex,maybe_vex,orig,orig,vex,orig,vex,*,*,*") + (set_attr "mode" "SF,,SI,SF,SF,SF,TI,TI,*,*,*")]) ;; A subset is vec_setv4sf. (define_insn "*vec_setv4sf_sse4_1" @@ -3539,8 +3427,10 @@ (vec_duplicate:V4SF (match_operand:SF 2 "nonimmediate_operand" "xm,xm")) (match_operand:V4SF 1 "register_operand" "0,x") - (match_operand:SI 3 "const_pow2_1_to_8_operand" "n,n")))] - "TARGET_SSE4_1" + (match_operand:SI 3 "const_int_operand" "")))] + "TARGET_SSE4_1 + && ((unsigned) exact_log2 (INTVAL (operands[3])) + < GET_MODE_NUNITS (V4SFmode))" { operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4); switch (which_alternative) @@ -3568,9 +3458,24 @@ (match_operand:SI 3 "const_0_to_255_operand" "n,n")] UNSPEC_INSERTPS))] "TARGET_SSE4_1" - "@ - insertps\t{%3, %2, %0|%0, %2, %3} - vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}" +{ + if (MEM_P (operands[2])) + { + unsigned count_s = INTVAL (operands[3]) >> 6; + if (count_s) + operands[3] = GEN_INT (INTVAL (operands[3]) & 0x3f); + operands[2] = adjust_address_nv (operands[2], SFmode, count_s * 4); + } + switch (which_alternative) + { + case 0: + return "insertps\t{%3, %2, %0|%0, %2, %3}"; + case 1: + return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}"; + default: + gcc_unreachable (); + } +} [(set_attr "isa" "noavx,avx") (set_attr "type" "sselog") (set_attr "prefix_data16" "1,*") @@ -3580,9 +3485,9 @@ (set_attr "mode" "V4SF")]) (define_split - [(set (match_operand:SSEMODE4S 0 "memory_operand" "") - (vec_merge:SSEMODE4S - (vec_duplicate:SSEMODE4S + [(set (match_operand:VI4F_128 0 "memory_operand" "") + (vec_merge:VI4F_128 + (vec_duplicate:VI4F_128 (match_operand: 1 "nonmemory_operand" "")) (match_dup 0) (const_int 1)))] @@ -3595,7 +3500,7 @@ }) (define_expand "vec_set" - [(match_operand:SSEMODE 0 "register_operand" "") + [(match_operand:V_128 0 "register_operand" "") (match_operand: 1 "register_operand" "") (match_operand 2 "const_int_operand" "")] "TARGET_SSE" @@ -3625,8 +3530,8 @@ }) (define_expand "avx_vextractf128" - [(match_operand: 0 "nonimmediate_operand" "") - (match_operand:AVX256MODE 1 "register_operand" "") + [(match_operand: 0 "nonimmediate_operand" "") + (match_operand:V_256 1 "register_operand" "") (match_operand:SI 2 "const_0_to_1_operand" "")] "TARGET_AVX" { @@ -3649,9 +3554,9 @@ }) (define_insn_and_split "vec_extract_lo_" - [(set (match_operand: 0 "nonimmediate_operand" "=x,m") - (vec_select: - (match_operand:AVX256MODE4P 1 "nonimmediate_operand" "xm,x") + [(set (match_operand: 0 "nonimmediate_operand" "=x,m") + (vec_select: + (match_operand:VI8F_256 1 "nonimmediate_operand" "xm,x") (parallel [(const_int 0) (const_int 1)])))] "TARGET_AVX" "#" @@ -3660,17 +3565,17 @@ { rtx op1 = operands[1]; if (REG_P (op1)) - op1 = gen_rtx_REG (mode, REGNO (op1)); + op1 = gen_rtx_REG (mode, REGNO (op1)); else - op1 = gen_lowpart (mode, op1); + op1 = gen_lowpart (mode, op1); emit_move_insn (operands[0], op1); DONE; }) (define_insn "vec_extract_hi_" - [(set (match_operand: 0 "nonimmediate_operand" "=x,m") - (vec_select: - (match_operand:AVX256MODE4P 1 "register_operand" "x,x") + [(set (match_operand: 0 "nonimmediate_operand" "=x,m") + (vec_select: + (match_operand:VI8F_256 1 "register_operand" "x,x") (parallel [(const_int 2) (const_int 3)])))] "TARGET_AVX" "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}" @@ -3682,9 +3587,9 @@ (set_attr "mode" "V8SF")]) (define_insn_and_split "vec_extract_lo_" - [(set (match_operand: 0 "nonimmediate_operand" "=x,m") - (vec_select: - (match_operand:AVX256MODE8P 1 "nonimmediate_operand" "xm,x") + [(set (match_operand: 0 "nonimmediate_operand" "=x,m") + (vec_select: + (match_operand:VI4F_256 1 "nonimmediate_operand" "xm,x") (parallel [(const_int 0) (const_int 1) (const_int 2) (const_int 3)])))] "TARGET_AVX" @@ -3694,17 +3599,17 @@ { rtx op1 = operands[1]; if (REG_P (op1)) - op1 = gen_rtx_REG (mode, REGNO (op1)); + op1 = gen_rtx_REG (mode, REGNO (op1)); else - op1 = gen_lowpart (mode, op1); + op1 = gen_lowpart (mode, op1); emit_move_insn (operands[0], op1); DONE; }) (define_insn "vec_extract_hi_" - [(set (match_operand: 0 "nonimmediate_operand" "=x,m") - (vec_select: - (match_operand:AVX256MODE8P 1 "register_operand" "x,x") + [(set (match_operand: 0 "nonimmediate_operand" "=x,m") + (vec_select: + (match_operand:VI4F_256 1 "register_operand" "x,x") (parallel [(const_int 4) (const_int 5) (const_int 6) (const_int 7)])))] "TARGET_AVX" @@ -3832,8 +3737,14 @@ DONE; }) +;; Modes handled by vec_extract patterns. +(define_mode_iterator VEC_EXTRACT_MODE + [V16QI V8HI V4SI V2DI + (V8SF "TARGET_AVX") V4SF + (V4DF "TARGET_AVX") V2DF]) + (define_expand "vec_extract" - [(match_operand: 0 "register_operand" "") + [(match_operand: 0 "register_operand" "") (match_operand:VEC_EXTRACT_MODE 1 "register_operand" "") (match_operand 2 "const_int_operand" "")] "TARGET_SSE" @@ -3907,15 +3818,15 @@ operands[2] = force_reg (V2DFmode, operands[2]); }) -(define_insn "*sse3_interleave_highv2df" - [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m") +(define_insn "*vec_interleave_highv2df" + [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,Y3,x,x,m") (vec_select:V2DF (vec_concat:V4DF - (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,o,o,o,x") - (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,0,x,0")) + (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,o ,o,o,x") + (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1 ,0,x,0")) (parallel [(const_int 1) (const_int 3)])))] - "TARGET_SSE3 && ix86_vec_interleave_v2df_operator_ok (operands, 1)" + "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 1)" "@ unpckhpd\t{%2, %0|%0, %2} vunpckhpd\t{%2, %1, %0|%0, %1, %2} @@ -3923,29 +3834,12 @@ movlpd\t{%H1, %0|%0, %H1} vmovlpd\t{%H1, %2, %0|%0, %2, %H1} %vmovhpd\t{%1, %0|%0, %1}" - [(set_attr "isa" "noavx,avx,base,noavx,avx,base") + [(set_attr "isa" "noavx,avx,*,noavx,avx,*") (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov") (set_attr "prefix_data16" "*,*,*,1,*,1") (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex") (set_attr "mode" "V2DF,V2DF,V2DF,V1DF,V1DF,V1DF")]) -(define_insn "*sse2_interleave_highv2df" - [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m") - (vec_select:V2DF - (vec_concat:V4DF - (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,x") - (match_operand:V2DF 2 "nonimmediate_operand" " x,0,0")) - (parallel [(const_int 1) - (const_int 3)])))] - "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 1)" - "@ - unpckhpd\t{%2, %0|%0, %2} - movlpd\t{%H1, %0|%0, %H1} - movhpd\t{%1, %0|%0, %1}" - [(set_attr "type" "sselog,ssemov,ssemov") - (set_attr "prefix_data16" "*,1,1") - (set_attr "mode" "V2DF,V1DF,V1DF")]) - ;; Recall that the 256-bit unpck insns only shuffle within their lanes. (define_expand "avx_movddup256" [(set (match_operand:V4DF 0 "register_operand" "") @@ -4026,15 +3920,15 @@ operands[1] = force_reg (V2DFmode, operands[1]); }) -(define_insn "*sse3_interleave_lowv2df" - [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,o") +(define_insn "*vec_interleave_lowv2df" + [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,Y3,x,x,o") (vec_select:V2DF (vec_concat:V4DF - (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,m,0,x,0") - (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,m,m,x")) + (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,m ,0,x,0") + (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1 ,m,m,x")) (parallel [(const_int 0) (const_int 2)])))] - "TARGET_SSE3 && ix86_vec_interleave_v2df_operator_ok (operands, 0)" + "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 0)" "@ unpcklpd\t{%2, %0|%0, %2} vunpcklpd\t{%2, %1, %0|%0, %1, %2} @@ -4042,29 +3936,12 @@ movhpd\t{%2, %0|%0, %2} vmovhpd\t{%2, %1, %0|%0, %1, %2} %vmovlpd\t{%2, %H0|%H0, %2}" - [(set_attr "isa" "noavx,avx,base,noavx,avx,base") + [(set_attr "isa" "noavx,avx,*,noavx,avx,*") (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov") (set_attr "prefix_data16" "*,*,*,1,*,1") (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex") (set_attr "mode" "V2DF,V2DF,V2DF,V1DF,V1DF,V1DF")]) -(define_insn "*sse2_interleave_lowv2df" - [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o") - (vec_select:V2DF - (vec_concat:V4DF - (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0") - (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x")) - (parallel [(const_int 0) - (const_int 2)])))] - "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 0)" - "@ - unpcklpd\t{%2, %0|%0, %2} - movhpd\t{%2, %0|%0, %2} - movlpd\t{%2, %H0|%H0, %2}" - [(set_attr "type" "sselog,ssemov,ssemov") - (set_attr "prefix_data16" "*,1,1") - (set_attr "mode" "V2DF,V1DF,V1DF")]) - (define_split [(set (match_operand:V2DF 0 "memory_operand" "") (vec_select:V2DF @@ -4152,21 +4029,30 @@ DONE; }) +;; Modes handled by vec_extract_even/odd pattern. +(define_mode_iterator VEC_EXTRACT_EVENODD_MODE + [(V16QI "TARGET_SSE2") + (V8HI "TARGET_SSE2") + (V4SI "TARGET_SSE2") + (V2DI "TARGET_SSE2") + (V8SF "TARGET_AVX") V4SF + (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")]) + (define_expand "vec_extract_even" - [(match_operand:SSEMODE_EO 0 "register_operand" "") - (match_operand:SSEMODE_EO 1 "register_operand" "") - (match_operand:SSEMODE_EO 2 "register_operand" "")] - "" + [(match_operand:VEC_EXTRACT_EVENODD_MODE 0 "register_operand" "") + (match_operand:VEC_EXTRACT_EVENODD_MODE 1 "register_operand" "") + (match_operand:VEC_EXTRACT_EVENODD_MODE 2 "register_operand" "")] + "TARGET_SSE" { ix86_expand_vec_extract_even_odd (operands[0], operands[1], operands[2], 0); DONE; }) (define_expand "vec_extract_odd" - [(match_operand:SSEMODE_EO 0 "register_operand" "") - (match_operand:SSEMODE_EO 1 "register_operand" "") - (match_operand:SSEMODE_EO 2 "register_operand" "")] - "" + [(match_operand:VEC_EXTRACT_EVENODD_MODE 0 "register_operand" "") + (match_operand:VEC_EXTRACT_EVENODD_MODE 1 "register_operand" "") + (match_operand:VEC_EXTRACT_EVENODD_MODE 2 "register_operand" "")] + "TARGET_SSE" { ix86_expand_vec_extract_even_odd (operands[0], operands[1], operands[2], 1); DONE; @@ -4211,11 +4097,11 @@ (set_attr "mode" "TI")]) (define_insn "sse2_shufpd_" - [(set (match_operand:SSEMODE2D 0 "register_operand" "=x,x") - (vec_select:SSEMODE2D - (vec_concat: - (match_operand:SSEMODE2D 1 "register_operand" "0,x") - (match_operand:SSEMODE2D 2 "nonimmediate_operand" "xm,xm")) + [(set (match_operand:VI8F_128 0 "register_operand" "=x,x") + (vec_select:VI8F_128 + (vec_concat: + (match_operand:VI8F_128 1 "register_operand" "0,x") + (match_operand:VI8F_128 2 "nonimmediate_operand" "xm,xm")) (parallel [(match_operand 3 "const_0_to_1_operand" "") (match_operand 4 "const_2_to_3_operand" "")])))] "TARGET_SSE2" @@ -4256,7 +4142,7 @@ # # #" - [(set_attr "isa" "base,noavx,avx,base,base,base") + [(set_attr "isa" "*,noavx,avx,*,*,*") (set_attr "type" "ssemov,sselog1,sselog1,ssemov,fmov,imov") (set (attr "prefix_data16") (if_then_else @@ -4276,6 +4162,20 @@ [(set (match_dup 0) (match_dup 1))] "operands[1] = adjust_address (operands[1], DFmode, 8);") +(define_insn "*vec_extractv2df_1_sse" + [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x") + (vec_select:DF + (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o") + (parallel [(const_int 1)])))] + "!TARGET_SSE2 && TARGET_SSE + && !(MEM_P (operands[0]) && MEM_P (operands[1]))" + "@ + movhps\t{%1, %0|%0, %1} + movhlps\t{%1, %0|%0, %1} + movlps\t{%H1, %0|%0, %H1}" + [(set_attr "type" "ssemov") + (set_attr "mode" "V2SF,V4SF,V2SF")]) + ;; Avoid combining registers from different units in a single alternative, ;; see comment above inline_secondary_memory_needed function in i386.c (define_insn "sse2_storelpd" @@ -4312,6 +4212,20 @@ DONE; }) +(define_insn "*vec_extractv2df_0_sse" + [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x") + (vec_select:DF + (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m") + (parallel [(const_int 0)])))] + "!TARGET_SSE2 && TARGET_SSE + && !(MEM_P (operands[0]) && MEM_P (operands[1]))" + "@ + movlps\t{%1, %0|%0, %1} + movaps\t{%1, %0|%0, %1} + movlps\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") + (set_attr "mode" "V2SF,V4SF,V2SF")]) + (define_expand "sse2_loadhpd_exp" [(set (match_operand:V2DF 0 "nonimmediate_operand" "") (vec_concat:V2DF @@ -4322,7 +4236,7 @@ "TARGET_SSE2" { rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands); - + emit_insn (gen_sse2_loadhpd (dst, operands[1], operands[2])); /* Fix up the destination if needed. */ @@ -4336,30 +4250,28 @@ ;; see comment above inline_secondary_memory_needed function in i386.c (define_insn "sse2_loadhpd" [(set (match_operand:V2DF 0 "nonimmediate_operand" - "=x,x,x,x,x,o,o ,o") + "=x,x,x,x,o,o ,o") (vec_concat:V2DF (vec_select:DF (match_operand:V2DF 1 "nonimmediate_operand" - " 0,x,0,x,x,0,0 ,0") + " 0,x,0,x,0,0 ,0") (parallel [(const_int 0)])) (match_operand:DF 2 "nonimmediate_operand" - " m,m,x,x,0,x,*f,r")))] + " m,m,x,x,x,*f,r")))] "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))" "@ movhpd\t{%2, %0|%0, %2} vmovhpd\t{%2, %1, %0|%0, %1, %2} unpcklpd\t{%2, %0|%0, %2} vunpcklpd\t{%2, %1, %0|%0, %1, %2} - shufpd\t{$1, %1, %0|%0, %1, 1} # # #" - [(set_attr "isa" "noavx,avx,noavx,avx,noavx,base,base,base") - (set_attr "type" "ssemov,ssemov,sselog,sselog,sselog,ssemov,fmov,imov") - (set_attr "prefix_data16" "1,*,*,*,*,*,*,*") - (set_attr "length_immediate" "*,*,*,*,1,*,*,*") - (set_attr "prefix" "orig,vex,orig,vex,orig,*,*,*") - (set_attr "mode" "V1DF,V1DF,V2DF,V2DF,V2DF,DF,DF,DF")]) + [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*") + (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,fmov,imov") + (set_attr "prefix_data16" "1,*,*,*,*,*,*") + (set_attr "prefix" "orig,vex,orig,vex,*,*,*") + (set_attr "mode" "V1DF,V1DF,V2DF,V2DF,DF,DF,DF")]) (define_split [(set (match_operand:V2DF 0 "memory_operand" "") @@ -4380,7 +4292,7 @@ "TARGET_SSE2" { rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands); - + emit_insn (gen_sse2_loadlpd (dst, operands[1], operands[2])); /* Fix up the destination if needed. */ @@ -4415,8 +4327,16 @@ # # #" - [(set_attr "isa" "base,noavx,avx,noavx,avx,noavx,noavx,avx,base,base,base") - (set_attr "type" "ssemov,ssemov,ssemov,ssemov,ssemov,sselog,ssemov,ssemov,ssemov,fmov,imov") + [(set_attr "isa" "*,noavx,avx,noavx,avx,noavx,noavx,avx,*,*,*") + (set (attr "type") + (cond [(eq_attr "alternative" "5") + (const_string "sselog") + (eq_attr "alternative" "9") + (const_string "fmov") + (eq_attr "alternative" "10") + (const_string "imov") + ] + (const_string "ssemov"))) (set_attr "prefix_data16" "*,1,*,*,*,*,1,*,*,*,*") (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*,*,*") (set_attr "prefix" "maybe_vex,orig,vex,orig,vex,orig,orig,vex,*,*,*") @@ -4431,36 +4351,6 @@ [(set (match_dup 0) (match_dup 1))] "operands[0] = adjust_address (operands[0], DFmode, 8);") -;; Not sure these two are ever used, but it doesn't hurt to have -;; them. -aoliva -(define_insn "*vec_extractv2df_1_sse" - [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x") - (vec_select:DF - (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o") - (parallel [(const_int 1)])))] - "!TARGET_SSE2 && TARGET_SSE - && !(MEM_P (operands[0]) && MEM_P (operands[1]))" - "@ - movhps\t{%1, %0|%0, %1} - movhlps\t{%1, %0|%0, %1} - movlps\t{%H1, %0|%0, %H1}" - [(set_attr "type" "ssemov") - (set_attr "mode" "V2SF,V4SF,V2SF")]) - -(define_insn "*vec_extractv2df_0_sse" - [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x") - (vec_select:DF - (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m") - (parallel [(const_int 0)])))] - "!TARGET_SSE2 && TARGET_SSE - && !(MEM_P (operands[0]) && MEM_P (operands[1]))" - "@ - movlps\t{%1, %0|%0, %1} - movaps\t{%1, %0|%0, %1} - movlps\t{%1, %0|%0, %1}" - [(set_attr "type" "ssemov") - (set_attr "mode" "V2SF,V4SF,V2SF")]) - (define_insn "sse2_movsd" [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,m,x,x,x,o") (vec_merge:V2DF @@ -4478,8 +4368,12 @@ movhps\t{%H1, %0|%0, %H1} vmovhps\t{%H1, %2, %0|%0, %2, %H1} %vmovhps\t{%1, %H0|%H0, %1}" - [(set_attr "isa" "noavx,avx,noavx,avx,base,noavx,noavx,avx,base") - (set_attr "type" "ssemov,ssemov,ssemov,ssemov,ssemov,sselog,ssemov,ssemov,ssemov") + [(set_attr "isa" "noavx,avx,noavx,avx,*,noavx,noavx,avx,*") + (set (attr "type") + (if_then_else + (eq_attr "alternative" "5") + (const_string "sselog") + (const_string "ssemov"))) (set (attr "prefix_data16") (if_then_else (and (eq_attr "alternative" "2,4") @@ -4490,6 +4384,16 @@ (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig,vex,maybe_vex") (set_attr "mode" "DF,DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,V1DF")]) +(define_expand "vec_dupv2df" + [(set (match_operand:V2DF 0 "register_operand" "") + (vec_duplicate:V2DF + (match_operand:DF 1 "nonimmediate_operand" "")))] + "TARGET_SSE2" +{ + if (!TARGET_SSE3) + operands[1] = force_reg (DFmode, operands[1]); +}) + (define_insn "*vec_dupv2df_sse3" [(set (match_operand:V2DF 0 "register_operand" "=x") (vec_duplicate:V2DF @@ -4500,7 +4404,7 @@ (set_attr "prefix" "maybe_vex") (set_attr "mode" "DF")]) -(define_insn "vec_dupv2df" +(define_insn "*vec_dupv2df" [(set (match_operand:V2DF 0 "register_operand" "=x") (vec_duplicate:V2DF (match_operand:DF 1 "register_operand" "0")))] @@ -4534,8 +4438,12 @@ %vmovsd\t{%1, %0|%0, %1} movlhps\t{%2, %0|%0, %2} movhps\t{%2, %0|%0, %2}" - [(set_attr "isa" "noavx,avx,noavx,avx,base,noavx,noavx") - (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov,ssemov,ssemov") + [(set_attr "isa" "noavx,avx,noavx,avx,*,noavx,noavx") + (set (attr "type") + (if_then_else + (eq_attr "alternative" "0,1") + (const_string "sselog") + (const_string "ssemov"))) (set_attr "prefix_data16" "*,*,1,*,*,*,*") (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig") (set_attr "mode" "V2DF,V2DF,V1DF,V1DF,DF,V4SF,V2SF")]) @@ -4569,8 +4477,8 @@ (match_operand:VI_128 2 "nonimmediate_operand" "xm,xm")))] "TARGET_SSE2 && ix86_binary_operator_ok (, mode, operands)" "@ - p\t{%2, %0|%0, %2} - vp\t{%2, %1, %0|%0, %1, %2}" + p\t{%2, %0|%0, %2} + vp\t{%2, %1, %0|%0, %1, %2}" [(set_attr "isa" "noavx,avx") (set_attr "type" "sseiadd") (set_attr "prefix_data16" "1,*") @@ -4592,8 +4500,8 @@ (match_operand:VI12_128 2 "nonimmediate_operand" "xm,xm")))] "TARGET_SSE2 && ix86_binary_operator_ok (, mode, operands)" "@ - p\t{%2, %0|%0, %2} - vp\t{%2, %1, %0|%0, %1, %2}" + p\t{%2, %0|%0, %2} + vp\t{%2, %1, %0|%0, %1, %2}" [(set_attr "isa" "noavx,avx") (set_attr "type" "sseiadd") (set_attr "prefix_data16" "1,*") @@ -5217,8 +5125,8 @@ (match_operand:SI 2 "nonmemory_operand" "xN,xN")))] "TARGET_SSE2" "@ - psra\t{%2, %0|%0, %2} - vpsra\t{%2, %1, %0|%0, %1, %2}" + psra\t{%2, %0|%0, %2} + vpsra\t{%2, %1, %0|%0, %1, %2}" [(set_attr "isa" "noavx,avx") (set_attr "type" "sseishft") (set (attr "length_immediate") @@ -5236,8 +5144,8 @@ (match_operand:SI 2 "nonmemory_operand" "xN,xN")))] "TARGET_SSE2" "@ - psrl\t{%2, %0|%0, %2} - vpsrl\t{%2, %1, %0|%0, %1, %2}" + psrl\t{%2, %0|%0, %2} + vpsrl\t{%2, %1, %0|%0, %1, %2}" [(set_attr "isa" "noavx,avx") (set_attr "type" "sseishft") (set (attr "length_immediate") @@ -5255,8 +5163,8 @@ (match_operand:SI 2 "nonmemory_operand" "xN,xN")))] "TARGET_SSE2" "@ - psll\t{%2, %0|%0, %2} - vpsll\t{%2, %1, %0|%0, %1, %2}" + psll\t{%2, %0|%0, %2} + vpsll\t{%2, %1, %0|%0, %1, %2}" [(set_attr "isa" "noavx,avx") (set_attr "type" "sseishft") (set (attr "length_immediate") @@ -5349,8 +5257,8 @@ (match_operand:VI14_128 2 "nonimmediate_operand" "xm,xm")))] "TARGET_SSE4_1 && ix86_binary_operator_ok (, mode, operands)" "@ - p\t{%2, %0|%0, %2} - vp\t{%2, %1, %0|%0, %1, %2}" + p\t{%2, %0|%0, %2} + vp\t{%2, %1, %0|%0, %1, %2}" [(set_attr "isa" "noavx,avx") (set_attr "type" "sseiadd") (set_attr "prefix_extra" "1,*") @@ -5478,8 +5386,8 @@ (match_operand:VI24_128 2 "nonimmediate_operand" "xm,xm")))] "TARGET_SSE4_1 && ix86_binary_operator_ok (, mode, operands)" "@ - p\t{%2, %0|%0, %2} - vp\t{%2, %1, %0|%0, %1, %2}" + p\t{%2, %0|%0, %2} + vp\t{%2, %1, %0|%0, %1, %2}" [(set_attr "isa" "noavx,avx") (set_attr "type" "sseiadd") (set_attr "prefix_extra" "1,*") @@ -5648,8 +5556,8 @@ "TARGET_SSE2 && !TARGET_XOP && ix86_binary_operator_ok (EQ, mode, operands)" "@ - pcmpeq\t{%2, %0|%0, %2} - vpcmpeq\t{%2, %1, %0|%0, %1, %2}" + pcmpeq\t{%2, %0|%0, %2} + vpcmpeq\t{%2, %1, %0|%0, %1, %2}" [(set_attr "isa" "noavx,avx") (set_attr "type" "ssecmp") (set_attr "prefix_data16" "1,*") @@ -5694,8 +5602,8 @@ (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")))] "TARGET_SSE2 && !TARGET_XOP" "@ - pcmpgt\t{%2, %0|%0, %2} - vpcmpgt\t{%2, %1, %0|%0, %1, %2}" + pcmpgt\t{%2, %0|%0, %2} + vpcmpgt\t{%2, %1, %0|%0, %1, %2}" [(set_attr "isa" "noavx,avx") (set_attr "type" "ssecmp") (set_attr "prefix_data16" "1,*") @@ -5932,38 +5840,14 @@ ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -(define_expand "vec_pack_trunc_v8hi" - [(match_operand:V16QI 0 "register_operand" "") - (match_operand:V8HI 1 "register_operand" "") - (match_operand:V8HI 2 "register_operand" "")] - "TARGET_SSE2" -{ - rtx op1 = gen_lowpart (V16QImode, operands[1]); - rtx op2 = gen_lowpart (V16QImode, operands[2]); - ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0); - DONE; -}) - -(define_expand "vec_pack_trunc_v4si" - [(match_operand:V8HI 0 "register_operand" "") - (match_operand:V4SI 1 "register_operand" "") - (match_operand:V4SI 2 "register_operand" "")] - "TARGET_SSE2" -{ - rtx op1 = gen_lowpart (V8HImode, operands[1]); - rtx op2 = gen_lowpart (V8HImode, operands[2]); - ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0); - DONE; -}) - -(define_expand "vec_pack_trunc_v2di" - [(match_operand:V4SI 0 "register_operand" "") - (match_operand:V2DI 1 "register_operand" "") - (match_operand:V2DI 2 "register_operand" "")] +(define_expand "vec_pack_trunc_" + [(match_operand: 0 "register_operand" "") + (match_operand:VI248_128 1 "register_operand" "") + (match_operand:VI248_128 2 "register_operand" "")] "TARGET_SSE2" { - rtx op1 = gen_lowpart (V4SImode, operands[1]); - rtx op2 = gen_lowpart (V4SImode, operands[2]); + rtx op1 = gen_lowpart (mode, operands[1]); + rtx op2 = gen_lowpart (mode, operands[2]); ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0); DONE; }) @@ -6098,172 +5982,115 @@ (const_int 2) (const_int 10) (const_int 3) (const_int 11)])))] "TARGET_SSE2" - "@ - punpcklwd\t{%2, %0|%0, %2} - vpunpcklwd\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "isa" "noavx,avx") - (set_attr "type" "sselog") - (set_attr "prefix_data16" "1,*") - (set_attr "prefix" "orig,vex") - (set_attr "mode" "TI")]) - -(define_insn "vec_interleave_highv4si" - [(set (match_operand:V4SI 0 "register_operand" "=x,x") - (vec_select:V4SI - (vec_concat:V8SI - (match_operand:V4SI 1 "register_operand" "0,x") - (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")) - (parallel [(const_int 2) (const_int 6) - (const_int 3) (const_int 7)])))] - "TARGET_SSE2" - "@ - punpckhdq\t{%2, %0|%0, %2} - vpunpckhdq\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "isa" "noavx,avx") - (set_attr "type" "sselog") - (set_attr "prefix_data16" "1,*") - (set_attr "prefix" "orig,vex") - (set_attr "mode" "TI")]) - -(define_insn "vec_interleave_lowv4si" - [(set (match_operand:V4SI 0 "register_operand" "=x,x") - (vec_select:V4SI - (vec_concat:V8SI - (match_operand:V4SI 1 "register_operand" "0,x") - (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")) - (parallel [(const_int 0) (const_int 4) - (const_int 1) (const_int 5)])))] - "TARGET_SSE2" - "@ - punpckldq\t{%2, %0|%0, %2} - vpunpckldq\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "isa" "noavx,avx") - (set_attr "type" "sselog") - (set_attr "prefix_data16" "1,*") - (set_attr "prefix" "orig,vex") - (set_attr "mode" "TI")]) - -(define_insn "*sse4_1_pinsrb" - [(set (match_operand:V16QI 0 "register_operand" "=x,x,x,x") - (vec_merge:V16QI - (vec_duplicate:V16QI - (match_operand:QI 2 "nonimmediate_operand" "r,m,r,m")) - (match_operand:V16QI 1 "register_operand" "0,0,x,x") - (match_operand:SI 3 "const_pow2_1_to_32768_operand" "n,n,n,n")))] - "TARGET_SSE4_1" -{ - operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3]))); - - switch (which_alternative) - { - case 0: - return "pinsrb\t{%3, %k2, %0|%0, %k2, %3}"; - case 1: - return "pinsrb\t{%3, %2, %0|%0, %2, %3}"; - case 2: - return "vpinsrb\t{%3, %k2, %1, %0|%0, %1, %k2, %3}"; - case 3: - return "vpinsrb\t{%3, %2, %1, %0|%0, %1, %2, %3}"; - default: - gcc_unreachable (); - } -} - [(set_attr "isa" "noavx,noavx,avx,avx") - (set_attr "type" "sselog") - (set_attr "prefix_extra" "1") - (set_attr "length_immediate" "1") - (set_attr "prefix" "orig,orig,vex,vex") - (set_attr "mode" "TI")]) - -(define_insn "*sse2_pinsrw" - [(set (match_operand:V8HI 0 "register_operand" "=x,x,x,x") - (vec_merge:V8HI - (vec_duplicate:V8HI - (match_operand:HI 2 "nonimmediate_operand" "r,m,r,m")) - (match_operand:V8HI 1 "register_operand" "0,0,x,x") - (match_operand:SI 3 "const_pow2_1_to_128_operand" "n,n,n,n")))] - "TARGET_SSE2" -{ - operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3]))); - - switch (which_alternative) - { - case 0: - return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}"; - case 1: - return "pinsrw\t{%3, %2, %0|%0, %2, %3}"; - case 2: - return "vpinsrw\t{%3, %k2, %1, %0|%0, %1, %k2, %3}"; - case 3: - return "vpinsrw\t{%3, %2, %1, %0|%0, %1, %2, %3}"; - default: - gcc_unreachable (); - } -} - [(set_attr "isa" "noavx,noavx,avx,avx") + "@ + punpcklwd\t{%2, %0|%0, %2} + vpunpcklwd\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,avx") (set_attr "type" "sselog") - (set_attr "prefix_data16" "1,1,*,*") - (set_attr "prefix_extra" "*,*,1,1") - (set_attr "length_immediate" "1") - (set_attr "prefix" "orig,orig,vex,vex") + (set_attr "prefix_data16" "1,*") + (set_attr "prefix" "orig,vex") (set_attr "mode" "TI")]) -;; It must come before sse2_loadld since it is preferred. -(define_insn "*sse4_1_pinsrd" +(define_insn "vec_interleave_highv4si" [(set (match_operand:V4SI 0 "register_operand" "=x,x") - (vec_merge:V4SI - (vec_duplicate:V4SI - (match_operand:SI 2 "nonimmediate_operand" "rm,rm")) - (match_operand:V4SI 1 "register_operand" "0,x") - (match_operand:SI 3 "const_pow2_1_to_8_operand" "n,n")))] - "TARGET_SSE4_1" -{ - operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3]))); + (vec_select:V4SI + (vec_concat:V8SI + (match_operand:V4SI 1 "register_operand" "0,x") + (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")) + (parallel [(const_int 2) (const_int 6) + (const_int 3) (const_int 7)])))] + "TARGET_SSE2" + "@ + punpckhdq\t{%2, %0|%0, %2} + vpunpckhdq\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "sselog") + (set_attr "prefix_data16" "1,*") + (set_attr "prefix" "orig,vex") + (set_attr "mode" "TI")]) - switch (which_alternative) - { - case 0: - return "pinsrd\t{%3, %2, %0|%0, %2, %3}"; - case 1: - return "vpinsrd\t{%3, %2, %1, %0|%0, %1, %2, %3}"; - default: - gcc_unreachable (); - } -} +(define_insn "vec_interleave_lowv4si" + [(set (match_operand:V4SI 0 "register_operand" "=x,x") + (vec_select:V4SI + (vec_concat:V8SI + (match_operand:V4SI 1 "register_operand" "0,x") + (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")) + (parallel [(const_int 0) (const_int 4) + (const_int 1) (const_int 5)])))] + "TARGET_SSE2" + "@ + punpckldq\t{%2, %0|%0, %2} + vpunpckldq\t{%2, %1, %0|%0, %1, %2}" [(set_attr "isa" "noavx,avx") (set_attr "type" "sselog") - (set_attr "prefix_extra" "1") - (set_attr "length_immediate" "1") + (set_attr "prefix_data16" "1,*") (set_attr "prefix" "orig,vex") (set_attr "mode" "TI")]) -(define_insn "*sse4_1_pinsrq" - [(set (match_operand:V2DI 0 "register_operand" "=x,x") - (vec_merge:V2DI - (vec_duplicate:V2DI - (match_operand:DI 2 "nonimmediate_operand" "rm,rm")) - (match_operand:V2DI 1 "register_operand" "0,x") - (match_operand:SI 3 "const_pow2_1_to_2_operand" "n,n")))] - "TARGET_SSE4_1 && TARGET_64BIT" +;; Modes handled by pinsr patterns. +(define_mode_iterator PINSR_MODE + [(V16QI "TARGET_SSE4_1") V8HI + (V4SI "TARGET_SSE4_1") + (V2DI "TARGET_SSE4_1 && TARGET_64BIT")]) + +(define_mode_attr sse2p4_1 + [(V16QI "sse4_1") (V8HI "sse2") + (V4SI "sse4_1") (V2DI "sse4_1")]) + +;; sse4_1_pinsrd must come before sse2_loadld since it is preferred. +(define_insn "_pinsr" + [(set (match_operand:PINSR_MODE 0 "register_operand" "=x,x,x,x") + (vec_merge:PINSR_MODE + (vec_duplicate:PINSR_MODE + (match_operand: 2 "nonimmediate_operand" "r,m,r,m")) + (match_operand:PINSR_MODE 1 "register_operand" "0,0,x,x") + (match_operand:SI 3 "const_int_operand" "")))] + "TARGET_SSE2 + && ((unsigned) exact_log2 (INTVAL (operands[3])) + < GET_MODE_NUNITS (mode))" { operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3]))); switch (which_alternative) { case 0: - return "pinsrq\t{%3, %2, %0|%0, %2, %3}"; + if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (SImode)) + return "pinsr\t{%3, %k2, %0|%0, %k2, %3}"; + /* FALLTHRU */ case 1: - return "vpinsrq\t{%3, %2, %1, %0|%0, %1, %2, %3}"; + return "pinsr\t{%3, %2, %0|%0, %2, %3}"; + case 2: + if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (SImode)) + return "vpinsr\t{%3, %k2, %1, %0|%0, %1, %k2, %3}"; + /* FALLTHRU */ + case 3: + return "vpinsr\t{%3, %2, %1, %0|%0, %1, %2, %3}"; default: gcc_unreachable (); } } - [(set_attr "isa" "noavx,avx") + [(set_attr "isa" "noavx,noavx,avx,avx") (set_attr "type" "sselog") - (set_attr "prefix_rex" "1,*") - (set_attr "prefix_extra" "1") + (set (attr "prefix_rex") + (if_then_else + (and (eq (symbol_ref "TARGET_AVX") (const_int 0)) + (eq (const_string "mode") (const_string "V2DImode"))) + (const_string "1") + (const_string "*"))) + (set (attr "prefix_data16") + (if_then_else + (and (eq (symbol_ref "TARGET_AVX") (const_int 0)) + (eq (const_string "mode") (const_string "V8HImode"))) + (const_string "1") + (const_string "*"))) + (set (attr "prefix_extra") + (if_then_else + (and (eq (symbol_ref "TARGET_AVX") (const_int 0)) + (eq (const_string "mode") (const_string "V8HImode"))) + (const_string "*") + (const_string "1"))) (set_attr "length_immediate" "1") - (set_attr "prefix" "orig,vex") + (set_attr "prefix" "orig,orig,vex,vex") (set_attr "mode" "TI")]) (define_insn "*sse4_1_pextrb_" @@ -6347,7 +6174,7 @@ (set_attr "prefix" "maybe_vex") (set_attr "mode" "TI")]) -;; It must come before *vec_extractv2di_1_sse since it is preferred. +;; It must come before *vec_extractv2di_1_rex64 since it is preferred. (define_insn "*sse4_1_pextrq" [(set (match_operand:DI 0 "nonimmediate_operand" "=rm") (vec_select:DI @@ -6516,7 +6343,7 @@ movss\t{%2, %0|%0, %2} movss\t{%2, %0|%0, %2} vmovss\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "isa" "base,base,noavx,noavx,avx") + [(set_attr "isa" "*,*,noavx,noavx,avx") (set_attr "type" "ssemov") (set_attr "prefix" "maybe_vex,maybe_vex,orig,orig,vex") (set_attr "mode" "TI,TI,V4SF,SF,SF")]) @@ -6567,9 +6394,8 @@ "@ # # - %vmov{q}\t{%1, %0|%0, %1}" + mov{q}\t{%1, %0|%0, %1}" [(set_attr "type" "*,*,imov") - (set_attr "prefix" "*,*,maybe_vex") (set_attr "mode" "*,*,DI")]) (define_insn "*sse2_storeq" @@ -6604,47 +6430,34 @@ psrldq\t{$8, %0|%0, 8} vpsrldq\t{$8, %1, %0|%0, %1, 8} %vmovq\t{%H1, %0|%0, %H1} - %vmov{q}\t{%H1, %0|%0, %H1}" - [(set_attr "isa" "base,noavx,avx,base,base") + mov{q}\t{%H1, %0|%0, %H1}" + [(set_attr "isa" "*,noavx,avx,*,*") (set_attr "type" "ssemov,sseishft1,sseishft1,ssemov,imov") (set_attr "length_immediate" "*,1,1,*,*") (set_attr "memory" "*,none,none,*,*") - (set_attr "prefix" "maybe_vex,orig,vex,maybe_vex,maybe_vex") + (set_attr "prefix" "maybe_vex,orig,vex,maybe_vex,orig") (set_attr "mode" "V2SF,TI,TI,TI,DI")]) -(define_insn "*vec_extractv2di_1_sse2" - [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,x") +(define_insn "*vec_extractv2di_1" + [(set (match_operand:DI 0 "nonimmediate_operand" "=m,Y2,Y2,Y2,x,x") (vec_select:DI - (match_operand:V2DI 1 "nonimmediate_operand" " x,0,x,o") + (match_operand:V2DI 1 "nonimmediate_operand" " x,0 ,Y2,o ,x,o") (parallel [(const_int 1)])))] - "!TARGET_64BIT - && TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))" + "!TARGET_64BIT && TARGET_SSE + && !(MEM_P (operands[0]) && MEM_P (operands[1]))" "@ %vmovhps\t{%1, %0|%0, %1} psrldq\t{$8, %0|%0, 8} vpsrldq\t{$8, %1, %0|%0, %1, 8} - %vmovq\t{%H1, %0|%0, %H1}" - [(set_attr "isa" "base,noavx,avx,base") - (set_attr "type" "ssemov,sseishft1,sseishft1,ssemov") - (set_attr "length_immediate" "*,1,1,*") - (set_attr "memory" "*,none,none,*") - (set_attr "prefix" "maybe_vex,orig,vex,maybe_vex") - (set_attr "mode" "V2SF,TI,TI,TI")]) - -;; Not sure this is ever used, but it doesn't hurt to have it. -aoliva -(define_insn "*vec_extractv2di_1_sse" - [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x") - (vec_select:DI - (match_operand:V2DI 1 "nonimmediate_operand" " x,x,o") - (parallel [(const_int 1)])))] - "!TARGET_SSE2 && TARGET_SSE - && !(MEM_P (operands[0]) && MEM_P (operands[1]))" - "@ - movhps\t{%1, %0|%0, %1} + %vmovq\t{%H1, %0|%0, %H1} movhlps\t{%1, %0|%0, %1} movlps\t{%H1, %0|%0, %H1}" - [(set_attr "type" "ssemov") - (set_attr "mode" "V2SF,V4SF,V2SF")]) + [(set_attr "isa" "*,noavx,avx,*,noavx,noavx") + (set_attr "type" "ssemov,sseishft1,sseishft1,ssemov,ssemov,ssemov") + (set_attr "length_immediate" "*,1,1,*,*,*") + (set_attr "memory" "*,none,none,*,*,*") + (set_attr "prefix" "maybe_vex,orig,vex,maybe_vex,orig,orig") + (set_attr "mode" "V2SF,TI,TI,TI,V4SF,V2SF")]) (define_insn "*vec_dupv4si_avx" [(set (match_operand:V4SI 0 "register_operand" "=x,x") @@ -6681,7 +6494,7 @@ punpcklqdq\t%0, %0 vpunpcklqdq\t{%d1, %0|%0, %d1} %vmovddup\t{%1, %0|%0, %1}" - [(set_attr "isa" "noavx,avx,base") + [(set_attr "isa" "noavx,avx,*") (set_attr "type" "sselog1") (set_attr "prefix" "orig,vex,maybe_vex") (set_attr "mode" "TI,TI,DF")]) @@ -6704,14 +6517,14 @@ (match_operand:SI 2 "vector_move_operand" "rm,rm,x,x, C,*ym, C")))] "TARGET_SSE4_1" "@ - pinsrd\t{$0x1, %2, %0|%0, %2, 0x1} - vpinsrd\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1} + pinsrd\t{$1, %2, %0|%0, %2, 1} + vpinsrd\t{$1, %2, %1, %0|%0, %1, %2, 1} punpckldq\t{%2, %0|%0, %2} vpunpckldq\t{%2, %1, %0|%0, %1, %2} %vmovd\t{%1, %0|%0, %1} punpckldq\t{%2, %0|%0, %2} movd\t{%1, %0|%0, %1}" - [(set_attr "isa" "noavx,avx,noavx,avx,base,base,base") + [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*") (set_attr "type" "sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov") (set_attr "prefix_extra" "1,1,*,*,*,*,*") (set_attr "length_immediate" "1,1,*,*,*,*,*") @@ -6749,53 +6562,49 @@ [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov") (set_attr "mode" "V4SF,V4SF,DI,DI")]) -(define_insn "*vec_concatv4si_1_avx" - [(set (match_operand:V4SI 0 "register_operand" "=x,x") - (vec_concat:V4SI - (match_operand:V2SI 1 "register_operand" " x,x") - (match_operand:V2SI 2 "nonimmediate_operand" " x,m")))] - "TARGET_AVX" - "@ - vpunpcklqdq\t{%2, %1, %0|%0, %1, %2} - vmovhps\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "sselog,ssemov") - (set_attr "prefix" "vex") - (set_attr "mode" "TI,V2SF")]) - -(define_insn "*vec_concatv4si_1" - [(set (match_operand:V4SI 0 "register_operand" "=Y2,x,x") +(define_insn "*vec_concatv4si" + [(set (match_operand:V4SI 0 "register_operand" "=Y2,x,x,x,x") (vec_concat:V4SI - (match_operand:V2SI 1 "register_operand" " 0 ,0,0") - (match_operand:V2SI 2 "nonimmediate_operand" " Y2,x,m")))] + (match_operand:V2SI 1 "register_operand" " 0 ,x,0,0,x") + (match_operand:V2SI 2 "nonimmediate_operand" " Y2,x,x,m,m")))] "TARGET_SSE" "@ punpcklqdq\t{%2, %0|%0, %2} + vpunpcklqdq\t{%2, %1, %0|%0, %1, %2} movlhps\t{%2, %0|%0, %2} - movhps\t{%2, %0|%0, %2}" - [(set_attr "type" "sselog,ssemov,ssemov") - (set_attr "mode" "TI,V4SF,V2SF")]) + movhps\t{%2, %0|%0, %2} + vmovhps\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,avx,noavx,noavx,avx") + (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov") + (set_attr "prefix" "orig,vex,orig,orig,vex") + (set_attr "mode" "TI,TI,V4SF,V2SF,V2SF")]) -(define_insn "*vec_concatv2di_rex64_sse4_1" +;; movd instead of movq is required to handle broken assemblers. +(define_insn "*vec_concatv2di_rex64" [(set (match_operand:V2DI 0 "register_operand" - "=x, x, x,Yi,!x,x,x,x,x") + "=Y4,x ,x ,Yi,!x,x,x,x,x") (vec_concat:V2DI (match_operand:DI 1 "nonimmediate_operand" - " 0, x,xm,r ,*y,0,x,0,x") + " 0 ,x ,xm,r ,*y,0,x,0,x") (match_operand:DI 2 "vector_move_operand" - "rm,rm, C,C ,C ,x,x,m,m")))] - "TARGET_64BIT && TARGET_SSE4_1" + " rm,rm,C ,C ,C ,x,x,m,m")))] + "TARGET_64BIT" "@ - pinsrq\t{$0x1, %2, %0|%0, %2, 0x1} - vpinsrq\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1} - %vmovq\t{%1, %0|%0, %1} + pinsrq\t{$1, %2, %0|%0, %2, 1} + vpinsrq\t{$1, %2, %1, %0|%0, %1, %2, 1} %vmovq\t{%1, %0|%0, %1} + %vmovd\t{%1, %0|%0, %1} movq2dq\t{%1, %0|%0, %1} punpcklqdq\t{%2, %0|%0, %2} vpunpcklqdq\t{%2, %1, %0|%0, %1, %2} movhps\t{%2, %0|%0, %2} vmovhps\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "isa" "noavx,avx,base,base,base,noavx,avx,noavx,avx") - (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov,sselog,sselog,ssemov,ssemov") + [(set_attr "isa" "noavx,avx,*,*,*,noavx,avx,noavx,avx") + (set (attr "type") + (if_then_else + (eq_attr "alternative" "0,1,5,6") + (const_string "sselog") + (const_string "ssemov"))) (set (attr "prefix_rex") (if_then_else (and (eq_attr "alternative" "0,3") @@ -6807,23 +6616,6 @@ (set_attr "prefix" "orig,vex,maybe_vex,maybe_vex,orig,orig,vex,orig,vex") (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,V2SF,V2SF")]) -(define_insn "*vec_concatv2di_rex64_sse" - [(set (match_operand:V2DI 0 "register_operand" "=Y2,Yi,!Y2,Y2,x,x") - (vec_concat:V2DI - (match_operand:DI 1 "nonimmediate_operand" "Y2m,r ,*y ,0 ,0,0") - (match_operand:DI 2 "vector_move_operand" " C ,C ,C ,Y2,x,m")))] - "TARGET_64BIT && TARGET_SSE" - "@ - movq\t{%1, %0|%0, %1} - movq\t{%1, %0|%0, %1} - movq2dq\t{%1, %0|%0, %1} - punpcklqdq\t{%2, %0|%0, %2} - movlhps\t{%2, %0|%0, %2} - movhps\t{%2, %0|%0, %2}" - [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov") - (set_attr "prefix_rex" "*,1,*,*,*,*") - (set_attr "mode" "TI,TI,TI,TI,V4SF,V2SF")]) - (define_insn "vec_concatv2di" [(set (match_operand:V2DI 0 "register_operand" "=Y2,?Y2,Y2,x,x,x,x") (vec_concat:V2DI @@ -6838,154 +6630,34 @@ movlhps\t{%2, %0|%0, %2} movhps\t{%2, %0|%0, %2} vmovhps\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "isa" "base,base,noavx,avx,noavx,noavx,avx") + [(set_attr "isa" "*,*,noavx,avx,noavx,noavx,avx") (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,ssemov,ssemov") (set_attr "prefix" "maybe_vex,orig,orig,vex,orig,orig,vex") (set_attr "mode" "TI,TI,TI,TI,V4SF,V2SF,V2SF")]) -(define_expand "vec_unpacku_hi_v16qi" - [(match_operand:V8HI 0 "register_operand" "") - (match_operand:V16QI 1 "register_operand" "")] - "TARGET_SSE2" -{ - if (TARGET_SSE4_1) - ix86_expand_sse4_unpack (operands, true, true); - else - ix86_expand_sse_unpack (operands, true, true); - DONE; -}) - -(define_expand "vec_unpacks_hi_v16qi" - [(match_operand:V8HI 0 "register_operand" "") - (match_operand:V16QI 1 "register_operand" "")] - "TARGET_SSE2" -{ - if (TARGET_SSE4_1) - ix86_expand_sse4_unpack (operands, false, true); - else - ix86_expand_sse_unpack (operands, false, true); - DONE; -}) - -(define_expand "vec_unpacku_lo_v16qi" - [(match_operand:V8HI 0 "register_operand" "") - (match_operand:V16QI 1 "register_operand" "")] - "TARGET_SSE2" -{ - if (TARGET_SSE4_1) - ix86_expand_sse4_unpack (operands, true, false); - else - ix86_expand_sse_unpack (operands, true, false); - DONE; -}) - -(define_expand "vec_unpacks_lo_v16qi" - [(match_operand:V8HI 0 "register_operand" "") - (match_operand:V16QI 1 "register_operand" "")] - "TARGET_SSE2" -{ - if (TARGET_SSE4_1) - ix86_expand_sse4_unpack (operands, false, false); - else - ix86_expand_sse_unpack (operands, false, false); - DONE; -}) - -(define_expand "vec_unpacku_hi_v8hi" - [(match_operand:V4SI 0 "register_operand" "") - (match_operand:V8HI 1 "register_operand" "")] - "TARGET_SSE2" -{ - if (TARGET_SSE4_1) - ix86_expand_sse4_unpack (operands, true, true); - else - ix86_expand_sse_unpack (operands, true, true); - DONE; -}) - -(define_expand "vec_unpacks_hi_v8hi" - [(match_operand:V4SI 0 "register_operand" "") - (match_operand:V8HI 1 "register_operand" "")] - "TARGET_SSE2" -{ - if (TARGET_SSE4_1) - ix86_expand_sse4_unpack (operands, false, true); - else - ix86_expand_sse_unpack (operands, false, true); - DONE; -}) - -(define_expand "vec_unpacku_lo_v8hi" - [(match_operand:V4SI 0 "register_operand" "") - (match_operand:V8HI 1 "register_operand" "")] - "TARGET_SSE2" -{ - if (TARGET_SSE4_1) - ix86_expand_sse4_unpack (operands, true, false); - else - ix86_expand_sse_unpack (operands, true, false); - DONE; -}) - -(define_expand "vec_unpacks_lo_v8hi" - [(match_operand:V4SI 0 "register_operand" "") - (match_operand:V8HI 1 "register_operand" "")] - "TARGET_SSE2" -{ - if (TARGET_SSE4_1) - ix86_expand_sse4_unpack (operands, false, false); - else - ix86_expand_sse_unpack (operands, false, false); - DONE; -}) - -(define_expand "vec_unpacku_hi_v4si" - [(match_operand:V2DI 0 "register_operand" "") - (match_operand:V4SI 1 "register_operand" "")] +(define_expand "vec_unpacks_lo_" + [(match_operand: 0 "register_operand" "") + (match_operand:VI124_128 1 "register_operand" "")] "TARGET_SSE2" -{ - if (TARGET_SSE4_1) - ix86_expand_sse4_unpack (operands, true, true); - else - ix86_expand_sse_unpack (operands, true, true); - DONE; -}) + "ix86_expand_sse_unpack (operands, false, false); DONE;") -(define_expand "vec_unpacks_hi_v4si" - [(match_operand:V2DI 0 "register_operand" "") - (match_operand:V4SI 1 "register_operand" "")] +(define_expand "vec_unpacks_hi_" + [(match_operand: 0 "register_operand" "") + (match_operand:VI124_128 1 "register_operand" "")] "TARGET_SSE2" -{ - if (TARGET_SSE4_1) - ix86_expand_sse4_unpack (operands, false, true); - else - ix86_expand_sse_unpack (operands, false, true); - DONE; -}) + "ix86_expand_sse_unpack (operands, false, true); DONE;") -(define_expand "vec_unpacku_lo_v4si" - [(match_operand:V2DI 0 "register_operand" "") - (match_operand:V4SI 1 "register_operand" "")] +(define_expand "vec_unpacku_lo_" + [(match_operand: 0 "register_operand" "") + (match_operand:VI124_128 1 "register_operand" "")] "TARGET_SSE2" -{ - if (TARGET_SSE4_1) - ix86_expand_sse4_unpack (operands, true, false); - else - ix86_expand_sse_unpack (operands, true, false); - DONE; -}) + "ix86_expand_sse_unpack (operands, true, false); DONE;") -(define_expand "vec_unpacks_lo_v4si" - [(match_operand:V2DI 0 "register_operand" "") - (match_operand:V4SI 1 "register_operand" "")] +(define_expand "vec_unpacku_hi_" + [(match_operand: 0 "register_operand" "") + (match_operand:VI124_128 1 "register_operand" "")] "TARGET_SSE2" -{ - if (TARGET_SSE4_1) - ix86_expand_sse4_unpack (operands, false, false); - else - ix86_expand_sse_unpack (operands, false, false); - DONE; -}) + "ix86_expand_sse_unpack (operands, true, true); DONE;") ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; @@ -7105,23 +6777,12 @@ (set_attr "prefix" "orig,vex") (set_attr "mode" "TI")]) -(define_insn "avx_movmsk256" - [(set (match_operand:SI 0 "register_operand" "=r") - (unspec:SI - [(match_operand:AVX256MODEF2P 1 "register_operand" "x")] - UNSPEC_MOVMSK))] - "AVX256_VEC_FLOAT_MODE_P (mode)" - "vmovmsk\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecvt") - (set_attr "prefix" "vex") - (set_attr "mode" "")]) - -(define_insn "_movmsk" +(define_insn "_movmsk" [(set (match_operand:SI 0 "register_operand" "=r") (unspec:SI - [(match_operand:SSEMODEF2P 1 "register_operand" "x")] + [(match_operand:VF 1 "register_operand" "x")] UNSPEC_MOVMSK))] - "SSE_VEC_FLOAT_MODE_P (mode)" + "TARGET_SSE" "%vmovmsk\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov") (set_attr "prefix" "maybe_vex") @@ -7147,35 +6808,18 @@ "TARGET_SSE2") (define_insn "*sse2_maskmovdqu" - [(set (mem:V16QI (match_operand:SI 0 "register_operand" "D")) - (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x") - (match_operand:V16QI 2 "register_operand" "x") - (mem:V16QI (match_dup 0))] - UNSPEC_MASKMOV))] - "TARGET_SSE2 && !TARGET_64BIT" - ;; @@@ check ordering of operands in intel/nonintel syntax - "%vmaskmovdqu\t{%2, %1|%1, %2}" - [(set_attr "type" "ssemov") - (set_attr "prefix_data16" "1") - ;; The implicit %rdi operand confuses default length_vex computation. - (set_attr "length_vex" "3") - (set_attr "prefix" "maybe_vex") - (set_attr "mode" "TI")]) - -(define_insn "*sse2_maskmovdqu_rex64" - [(set (mem:V16QI (match_operand:DI 0 "register_operand" "D")) + [(set (mem:V16QI (match_operand:P 0 "register_operand" "D")) (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x") (match_operand:V16QI 2 "register_operand" "x") (mem:V16QI (match_dup 0))] UNSPEC_MASKMOV))] - "TARGET_SSE2 && TARGET_64BIT" - ;; @@@ check ordering of operands in intel/nonintel syntax + "TARGET_SSE2" "%vmaskmovdqu\t{%2, %1|%1, %2}" [(set_attr "type" "ssemov") (set_attr "prefix_data16" "1") ;; The implicit %rdi operand confuses default length_vex computation. (set (attr "length_vex") - (symbol_ref ("REGNO (operands[2]) >= FIRST_REX_SSE_REG ? 3 + 1 : 2 + 1"))) + (symbol_ref ("3 + REX_SSE_REGNO_P (REGNO (operands[2]))"))) (set_attr "prefix" "maybe_vex") (set_attr "mode" "TI")]) @@ -7616,7 +7260,7 @@ "@ phsubd\t{%2, %0|%0, %2} vphsubd\t{%2, %1, %0|%0, %1, %2}" - + [(set_attr "isa" "noavx,avx") (set_attr "type" "sseiadd") (set_attr "atom_unit" "complex") @@ -7939,15 +7583,15 @@ (set_attr "mode" "DI")]) (define_insn "ssse3_psign3" - [(set (match_operand:SSEMODE124 0 "register_operand" "=x,x") - (unspec:SSEMODE124 - [(match_operand:SSEMODE124 1 "register_operand" "0,x") - (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm,xm")] + [(set (match_operand:VI124_128 0 "register_operand" "=x,x") + (unspec:VI124_128 + [(match_operand:VI124_128 1 "register_operand" "0,x") + (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")] UNSPEC_PSIGN))] "TARGET_SSSE3" "@ - psign\t{%2, %0|%0, %2} - vpsign\t{%2, %1, %0|%0, %1, %2}" + psign\t{%2, %0|%0, %2} + vpsign\t{%2, %1, %0|%0, %1, %2}" [(set_attr "isa" "noavx,avx") (set_attr "type" "sselog1") (set_attr "prefix_data16" "1,*") @@ -8016,11 +7660,11 @@ (set_attr "mode" "DI")]) (define_insn "abs2" - [(set (match_operand:SSEMODE124 0 "register_operand" "=x") - (abs:SSEMODE124 - (match_operand:SSEMODE124 1 "nonimmediate_operand" "xm")))] + [(set (match_operand:VI124_128 0 "register_operand" "=x") + (abs:VI124_128 + (match_operand:VI124_128 1 "nonimmediate_operand" "xm")))] "TARGET_SSSE3" - "%vpabs\t{%1, %0|%0, %1}" + "%vpabs\t{%1, %0|%0, %1}" [(set_attr "type" "sselog1") (set_attr "prefix_data16" "1") (set_attr "prefix_extra" "1") @@ -8051,7 +7695,7 @@ [(match_operand:MODEF 1 "register_operand" "x")] UNSPEC_MOVNT))] "TARGET_SSE4A" - "movnts\t{%1, %0|%0, %1}" + "movnt\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov") (set_attr "mode" "")]) @@ -8059,7 +7703,7 @@ [(set (match_operand: 0 "memory_operand" "=m") (unspec: [(vec_select: - (match_operand:SSEMODEF2P 1 "register_operand" "x") + (match_operand:VF_128 1 "register_operand" "x") (parallel [(const_int 0)]))] UNSPEC_MOVNT))] "TARGET_SSE4A" @@ -8070,8 +7714,8 @@ (define_insn "sse4a_extrqi" [(set (match_operand:V2DI 0 "register_operand" "=x") (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0") - (match_operand 2 "const_int_operand" "") - (match_operand 3 "const_int_operand" "")] + (match_operand 2 "const_0_to_255_operand" "") + (match_operand 3 "const_0_to_255_operand" "")] UNSPEC_EXTRQI))] "TARGET_SSE4A" "extrq\t{%3, %2, %0|%0, %2, %3}" @@ -8095,8 +7739,8 @@ [(set (match_operand:V2DI 0 "register_operand" "=x") (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0") (match_operand:V2DI 2 "register_operand" "x") - (match_operand 3 "const_int_operand" "") - (match_operand 4 "const_int_operand" "")] + (match_operand 3 "const_0_to_255_operand" "") + (match_operand 4 "const_0_to_255_operand" "")] UNSPEC_INSERTQI))] "TARGET_SSE4A" "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}" @@ -8124,91 +7768,60 @@ ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -(define_insn "avx_blend" - [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x") - (vec_merge:AVXMODEF2P - (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm") - (match_operand:AVXMODEF2P 1 "register_operand" "x") - (match_operand:SI 3 "const_0_to__operand" "n")))] - "TARGET_AVX" - "vblend\t{%3, %2, %1, %0|%0, %1, %2, %3}" - [(set_attr "type" "ssemov") - (set_attr "prefix_extra" "1") - (set_attr "length_immediate" "1") - (set_attr "prefix" "vex") - (set_attr "mode" "")]) - -(define_insn "avx_blendv" - [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x") - (unspec:AVXMODEF2P - [(match_operand:AVXMODEF2P 1 "register_operand" "x") - (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm") - (match_operand:AVXMODEF2P 3 "register_operand" "x")] - UNSPEC_BLENDV))] - "TARGET_AVX" - "vblendv\t{%3, %2, %1, %0|%0, %1, %2, %3}" - [(set_attr "type" "ssemov") - (set_attr "prefix_extra" "1") - (set_attr "length_immediate" "1") - (set_attr "prefix" "vex") - (set_attr "mode" "")]) - -(define_insn "sse4_1_blend" - [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x") - (vec_merge:SSEMODEF2P - (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm") - (match_operand:SSEMODEF2P 1 "register_operand" "0") - (match_operand:SI 3 "const_0_to__operand" "n")))] - "TARGET_SSE4_1" - "blend\t{%3, %2, %0|%0, %2, %3}" - [(set_attr "type" "ssemov") - (set_attr "prefix_data16" "1") - (set_attr "prefix_extra" "1") - (set_attr "length_immediate" "1") - (set_attr "mode" "")]) - -(define_insn "sse4_1_blendv" - [(set (match_operand:SSEMODEF2P 0 "reg_not_xmm0_operand" "=x") - (unspec:SSEMODEF2P - [(match_operand:SSEMODEF2P 1 "reg_not_xmm0_operand" "0") - (match_operand:SSEMODEF2P 2 "nonimm_not_xmm0_operand" "xm") - (match_operand:SSEMODEF2P 3 "register_operand" "Yz")] - UNSPEC_BLENDV))] +(define_insn "_blend" + [(set (match_operand:VF 0 "register_operand" "=x,x") + (vec_merge:VF + (match_operand:VF 2 "nonimmediate_operand" "xm,xm") + (match_operand:VF 1 "register_operand" "0,x") + (match_operand:SI 3 "const_0_to__operand" "")))] "TARGET_SSE4_1" - "blendv\t{%3, %2, %0|%0, %2, %3}" - [(set_attr "type" "ssemov") - (set_attr "prefix_data16" "1") - (set_attr "prefix_extra" "1") - (set_attr "mode" "")]) - -(define_insn "avx_dp" - [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x") - (unspec:AVXMODEF2P - [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x") - (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm") - (match_operand:SI 3 "const_0_to_255_operand" "n")] - UNSPEC_DP))] - "TARGET_AVX" - "vdp\t{%3, %2, %1, %0|%0, %1, %2, %3}" - [(set_attr "type" "ssemul") - (set_attr "prefix" "vex") + "@ + blend\t{%3, %2, %0|%0, %2, %3} + vblend\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "ssemov") + (set_attr "length_immediate" "1") + (set_attr "prefix_data16" "1,*") (set_attr "prefix_extra" "1") + (set_attr "prefix" "orig,vex") + (set_attr "mode" "")]) + +(define_insn "_blendv" + [(set (match_operand:VF 0 "reg_not_xmm0_operand_maybe_avx" "=x,x") + (unspec:VF + [(match_operand:VF 1 "reg_not_xmm0_operand_maybe_avx" "0,x") + (match_operand:VF 2 "nonimm_not_xmm0_operand_maybe_avx" "xm,xm") + (match_operand:VF 3 "register_operand" "Yz,x")] + UNSPEC_BLENDV))] + "TARGET_SSE4_1" + "@ + blendv\t{%3, %2, %0|%0, %2, %3} + vblendv\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "ssemov") (set_attr "length_immediate" "1") - (set_attr "mode" "")]) + (set_attr "prefix_data16" "1,*") + (set_attr "prefix_extra" "1") + (set_attr "prefix" "orig,vex") + (set_attr "mode" "")]) -(define_insn "sse4_1_dp" - [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x") - (unspec:SSEMODEF2P - [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0") - (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm") - (match_operand:SI 3 "const_0_to_255_operand" "n")] +(define_insn "_dp" + [(set (match_operand:VF 0 "register_operand" "=x,x") + (unspec:VF + [(match_operand:VF 1 "nonimmediate_operand" "%0,x") + (match_operand:VF 2 "nonimmediate_operand" "xm,xm") + (match_operand:SI 3 "const_0_to_255_operand" "n,n")] UNSPEC_DP))] "TARGET_SSE4_1" - "dp\t{%3, %2, %0|%0, %2, %3}" - [(set_attr "type" "ssemul") - (set_attr "prefix_data16" "1") - (set_attr "prefix_extra" "1") + "@ + dp\t{%3, %2, %0|%0, %2, %3} + vdp\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "ssemul") (set_attr "length_immediate" "1") + (set_attr "prefix_data16" "1,*") + (set_attr "prefix_extra" "1") + (set_attr "prefix" "orig,vex") (set_attr "mode" "")]) (define_insn "sse4_1_movntdqa" @@ -8222,111 +7835,73 @@ (set_attr "prefix" "maybe_vex") (set_attr "mode" "TI")]) -(define_insn "*avx_mpsadbw" - [(set (match_operand:V16QI 0 "register_operand" "=x") - (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x") - (match_operand:V16QI 2 "nonimmediate_operand" "xm") - (match_operand:SI 3 "const_0_to_255_operand" "n")] - UNSPEC_MPSADBW))] - "TARGET_AVX" - "vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}" - [(set_attr "type" "sselog1") - (set_attr "prefix" "vex") - (set_attr "prefix_extra" "1") - (set_attr "length_immediate" "1") - (set_attr "mode" "TI")]) - (define_insn "sse4_1_mpsadbw" - [(set (match_operand:V16QI 0 "register_operand" "=x") - (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0") - (match_operand:V16QI 2 "nonimmediate_operand" "xm") - (match_operand:SI 3 "const_0_to_255_operand" "n")] + [(set (match_operand:V16QI 0 "register_operand" "=x,x") + (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0,x") + (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm") + (match_operand:SI 3 "const_0_to_255_operand" "n,n")] UNSPEC_MPSADBW))] "TARGET_SSE4_1" - "mpsadbw\t{%3, %2, %0|%0, %2, %3}" - [(set_attr "type" "sselog1") - (set_attr "prefix_extra" "1") + "@ + mpsadbw\t{%3, %2, %0|%0, %2, %3} + vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "sselog1") (set_attr "length_immediate" "1") - (set_attr "mode" "TI")]) - -(define_insn "*avx_packusdw" - [(set (match_operand:V8HI 0 "register_operand" "=x") - (vec_concat:V8HI - (us_truncate:V4HI - (match_operand:V4SI 1 "register_operand" "x")) - (us_truncate:V4HI - (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))] - "TARGET_AVX" - "vpackusdw\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "sselog") (set_attr "prefix_extra" "1") - (set_attr "prefix" "vex") + (set_attr "prefix" "orig,vex") (set_attr "mode" "TI")]) (define_insn "sse4_1_packusdw" - [(set (match_operand:V8HI 0 "register_operand" "=x") + [(set (match_operand:V8HI 0 "register_operand" "=x,x") (vec_concat:V8HI (us_truncate:V4HI - (match_operand:V4SI 1 "register_operand" "0")) + (match_operand:V4SI 1 "register_operand" "0,x")) (us_truncate:V4HI - (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))] + (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))))] "TARGET_SSE4_1" - "packusdw\t{%2, %0|%0, %2}" - [(set_attr "type" "sselog") - (set_attr "prefix_extra" "1") - (set_attr "mode" "TI")]) - -(define_insn "*avx_pblendvb" - [(set (match_operand:V16QI 0 "register_operand" "=x") - (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x") - (match_operand:V16QI 2 "nonimmediate_operand" "xm") - (match_operand:V16QI 3 "register_operand" "x")] - UNSPEC_BLENDV))] - "TARGET_AVX" - "vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}" - [(set_attr "type" "ssemov") + "@ + packusdw\t{%2, %0|%0, %2} + vpackusdw\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "sselog") (set_attr "prefix_extra" "1") - (set_attr "length_immediate" "1") - (set_attr "prefix" "vex") + (set_attr "prefix" "orig,vex") (set_attr "mode" "TI")]) (define_insn "sse4_1_pblendvb" - [(set (match_operand:V16QI 0 "reg_not_xmm0_operand" "=x") - (unspec:V16QI [(match_operand:V16QI 1 "reg_not_xmm0_operand" "0") - (match_operand:V16QI 2 "nonimm_not_xmm0_operand" "xm") - (match_operand:V16QI 3 "register_operand" "Yz")] - UNSPEC_BLENDV))] + [(set (match_operand:V16QI 0 "reg_not_xmm0_operand" "=x,x") + (unspec:V16QI + [(match_operand:V16QI 1 "reg_not_xmm0_operand_maybe_avx" "0,x") + (match_operand:V16QI 2 "nonimm_not_xmm0_operand_maybe_avx" "xm,xm") + (match_operand:V16QI 3 "register_operand" "Yz,x")] + UNSPEC_BLENDV))] "TARGET_SSE4_1" - "pblendvb\t{%3, %2, %0|%0, %2, %3}" - [(set_attr "type" "ssemov") - (set_attr "prefix_extra" "1") - (set_attr "mode" "TI")]) - -(define_insn "*avx_pblendw" - [(set (match_operand:V8HI 0 "register_operand" "=x") - (vec_merge:V8HI - (match_operand:V8HI 2 "nonimmediate_operand" "xm") - (match_operand:V8HI 1 "register_operand" "x") - (match_operand:SI 3 "const_0_to_255_operand" "n")))] - "TARGET_AVX" - "vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}" - [(set_attr "type" "ssemov") - (set_attr "prefix" "vex") + "@ + pblendvb\t{%3, %2, %0|%0, %2, %3} + vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "ssemov") (set_attr "prefix_extra" "1") - (set_attr "length_immediate" "1") + (set_attr "length_immediate" "*,1") + (set_attr "prefix" "orig,vex") (set_attr "mode" "TI")]) (define_insn "sse4_1_pblendw" - [(set (match_operand:V8HI 0 "register_operand" "=x") + [(set (match_operand:V8HI 0 "register_operand" "=x,x") (vec_merge:V8HI - (match_operand:V8HI 2 "nonimmediate_operand" "xm") - (match_operand:V8HI 1 "register_operand" "0") - (match_operand:SI 3 "const_0_to_255_operand" "n")))] + (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm") + (match_operand:V8HI 1 "register_operand" "0,x") + (match_operand:SI 3 "const_0_to_255_operand" "n,n")))] "TARGET_SSE4_1" - "pblendw\t{%3, %2, %0|%0, %2, %3}" - [(set_attr "type" "ssemov") + "@ + pblendw\t{%3, %2, %0|%0, %2, %3} + vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "ssemov") (set_attr "prefix_extra" "1") (set_attr "length_immediate" "1") + (set_attr "prefix" "orig,vex") (set_attr "mode" "TI")]) (define_insn "sse4_1_phminposuw" @@ -8436,10 +8011,10 @@ ;; ptestps/ptestpd are very similar to comiss and ucomiss when ;; setting FLAGS_REG. But it is not a really compare instruction. -(define_insn "avx_vtest" +(define_insn "avx_vtest" [(set (reg:CC FLAGS_REG) - (unspec:CC [(match_operand:AVXMODEF2P 0 "register_operand" "x") - (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "xm")] + (unspec:CC [(match_operand:VF 0 "register_operand" "x") + (match_operand:VF 1 "nonimmediate_operand" "xm")] UNSPEC_VTESTP))] "TARGET_AVX" "vtest\t{%1, %0|%0, %1}" @@ -8474,67 +8049,44 @@ (set_attr "prefix" "maybe_vex") (set_attr "mode" "TI")]) -(define_insn "avx_round256" - [(set (match_operand:AVX256MODEF2P 0 "register_operand" "=x") - (unspec:AVX256MODEF2P - [(match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "xm") - (match_operand:SI 2 "const_0_to_15_operand" "n")] - UNSPEC_ROUND))] - "TARGET_AVX" - "vround\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "ssecvt") - (set_attr "prefix_extra" "1") - (set_attr "length_immediate" "1") - (set_attr "prefix" "vex") - (set_attr "mode" "")]) - -(define_insn "sse4_1_round" - [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x") - (unspec:SSEMODEF2P - [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm") +(define_insn "_round" + [(set (match_operand:VF 0 "register_operand" "=x") + (unspec:VF + [(match_operand:VF 1 "nonimmediate_operand" "xm") (match_operand:SI 2 "const_0_to_15_operand" "n")] UNSPEC_ROUND))] "TARGET_ROUND" "%vround\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "ssecvt") - (set_attr "prefix_data16" "1") + (set (attr "prefix_data16") + (if_then_else + (ne (symbol_ref "TARGET_AVX") (const_int 0)) + (const_string "*") + (const_string "1"))) (set_attr "prefix_extra" "1") (set_attr "length_immediate" "1") (set_attr "prefix" "maybe_vex") (set_attr "mode" "")]) -(define_insn "*avx_round" - [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x") - (vec_merge:SSEMODEF2P - (unspec:SSEMODEF2P - [(match_operand:SSEMODEF2P 2 "register_operand" "x") - (match_operand:SI 3 "const_0_to_15_operand" "n")] - UNSPEC_ROUND) - (match_operand:SSEMODEF2P 1 "register_operand" "x") - (const_int 1)))] - "TARGET_AVX" - "vround\t{%3, %2, %1, %0|%0, %1, %2, %3}" - [(set_attr "type" "ssecvt") - (set_attr "prefix_extra" "1") - (set_attr "length_immediate" "1") - (set_attr "prefix" "vex") - (set_attr "mode" "")]) - (define_insn "sse4_1_round" - [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x") - (vec_merge:SSEMODEF2P - (unspec:SSEMODEF2P - [(match_operand:SSEMODEF2P 2 "register_operand" "x") - (match_operand:SI 3 "const_0_to_15_operand" "n")] + [(set (match_operand:VF_128 0 "register_operand" "=x,x") + (vec_merge:VF_128 + (unspec:VF_128 + [(match_operand:VF_128 2 "register_operand" "x,x") + (match_operand:SI 3 "const_0_to_15_operand" "n,n")] UNSPEC_ROUND) - (match_operand:SSEMODEF2P 1 "register_operand" "0") + (match_operand:VF_128 1 "register_operand" "0,x") (const_int 1)))] "TARGET_ROUND" - "round\t{%3, %2, %0|%0, %2, %3}" - [(set_attr "type" "ssecvt") - (set_attr "prefix_data16" "1") - (set_attr "prefix_extra" "1") + "@ + round\t{%3, %2, %0|%0, %2, %3} + vround\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "ssecvt") (set_attr "length_immediate" "1") + (set_attr "prefix_data16" "1,*") + (set_attr "prefix_extra" "1") + (set_attr "prefix" "orig,vex") (set_attr "mode" "")]) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -9145,22 +8697,12 @@ (set_attr "mode" "TI")]) ;; XOP parallel XMM conditional moves -(define_insn "xop_pcmov_" - [(set (match_operand:SSEMODE 0 "register_operand" "=x,x") - (if_then_else:SSEMODE - (match_operand:SSEMODE 3 "nonimmediate_operand" "x,m") - (match_operand:SSEMODE 1 "vector_move_operand" "x,x") - (match_operand:SSEMODE 2 "vector_move_operand" "xm,x")))] - "TARGET_XOP" - "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}" - [(set_attr "type" "sse4arg")]) - -(define_insn "xop_pcmov_256" - [(set (match_operand:AVX256MODE 0 "register_operand" "=x,x") - (if_then_else:AVX256MODE - (match_operand:AVX256MODE 3 "nonimmediate_operand" "x,m") - (match_operand:AVX256MODE 1 "vector_move_operand" "x,x") - (match_operand:AVX256MODE 2 "vector_move_operand" "xm,x")))] +(define_insn "xop_pcmov_" + [(set (match_operand:V 0 "register_operand" "=x,x") + (if_then_else:V + (match_operand:V 3 "nonimmediate_operand" "x,m") + (match_operand:V 1 "vector_move_operand" "x,x") + (match_operand:V 2 "vector_move_operand" "xm,x")))] "TARGET_XOP" "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "sse4arg")]) @@ -9660,9 +9202,9 @@ ;; XOP packed rotate instructions (define_expand "rotl3" - [(set (match_operand:SSEMODE1248 0 "register_operand" "") - (rotate:SSEMODE1248 - (match_operand:SSEMODE1248 1 "nonimmediate_operand" "") + [(set (match_operand:VI_128 0 "register_operand" "") + (rotate:VI_128 + (match_operand:VI_128 1 "nonimmediate_operand" "") (match_operand:SI 2 "general_operand")))] "TARGET_XOP" { @@ -9691,9 +9233,9 @@ }) (define_expand "rotr3" - [(set (match_operand:SSEMODE1248 0 "register_operand" "") - (rotatert:SSEMODE1248 - (match_operand:SSEMODE1248 1 "nonimmediate_operand" "") + [(set (match_operand:VI_128 0 "register_operand" "") + (rotatert:VI_128 + (match_operand:VI_128 1 "nonimmediate_operand" "") (match_operand:SI 2 "general_operand")))] "TARGET_XOP" { @@ -9724,34 +9266,34 @@ }) (define_insn "xop_rotl3" - [(set (match_operand:SSEMODE1248 0 "register_operand" "=x") - (rotate:SSEMODE1248 - (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm") + [(set (match_operand:VI_128 0 "register_operand" "=x") + (rotate:VI_128 + (match_operand:VI_128 1 "nonimmediate_operand" "xm") (match_operand:SI 2 "const_0_to__operand" "n")))] "TARGET_XOP" - "vprot\t{%2, %1, %0|%0, %1, %2}" + "vprot\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "sseishft") (set_attr "length_immediate" "1") (set_attr "mode" "TI")]) (define_insn "xop_rotr3" - [(set (match_operand:SSEMODE1248 0 "register_operand" "=x") - (rotatert:SSEMODE1248 - (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm") + [(set (match_operand:VI_128 0 "register_operand" "=x") + (rotatert:VI_128 + (match_operand:VI_128 1 "nonimmediate_operand" "xm") (match_operand:SI 2 "const_0_to__operand" "n")))] "TARGET_XOP" { operands[3] = GEN_INT (( * 8) - INTVAL (operands[2])); - return \"vprot\t{%3, %1, %0|%0, %1, %3}\"; + return \"vprot\t{%3, %1, %0|%0, %1, %3}\"; } [(set_attr "type" "sseishft") (set_attr "length_immediate" "1") (set_attr "mode" "TI")]) (define_expand "vrotr3" - [(match_operand:SSEMODE1248 0 "register_operand" "") - (match_operand:SSEMODE1248 1 "register_operand" "") - (match_operand:SSEMODE1248 2 "register_operand" "")] + [(match_operand:VI_128 0 "register_operand" "") + (match_operand:VI_128 1 "register_operand" "") + (match_operand:VI_128 2 "register_operand" "")] "TARGET_XOP" { rtx reg = gen_reg_rtx (mode); @@ -9761,9 +9303,9 @@ }) (define_expand "vrotl3" - [(match_operand:SSEMODE1248 0 "register_operand" "") - (match_operand:SSEMODE1248 1 "register_operand" "") - (match_operand:SSEMODE1248 2 "register_operand" "")] + [(match_operand:VI_128 0 "register_operand" "") + (match_operand:VI_128 1 "register_operand" "") + (match_operand:VI_128 2 "register_operand" "")] "TARGET_XOP" { emit_insn (gen_xop_vrotl3 (operands[0], operands[1], operands[2])); @@ -9771,19 +9313,19 @@ }) (define_insn "xop_vrotl3" - [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x") - (if_then_else:SSEMODE1248 - (ge:SSEMODE1248 - (match_operand:SSEMODE1248 2 "nonimmediate_operand" "x,m") + [(set (match_operand:VI_128 0 "register_operand" "=x,x") + (if_then_else:VI_128 + (ge:VI_128 + (match_operand:VI_128 2 "nonimmediate_operand" "x,m") (const_int 0)) - (rotate:SSEMODE1248 - (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm,x") + (rotate:VI_128 + (match_operand:VI_128 1 "nonimmediate_operand" "xm,x") (match_dup 2)) - (rotatert:SSEMODE1248 + (rotatert:VI_128 (match_dup 1) - (neg:SSEMODE1248 (match_dup 2)))))] + (neg:VI_128 (match_dup 2)))))] "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))" - "vprot\t{%2, %1, %0|%0, %1, %2}" + "vprot\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "sseishft") (set_attr "prefix_data16" "0") (set_attr "prefix_extra" "2") @@ -9792,9 +9334,9 @@ ;; XOP packed shift instructions. ;; FIXME: add V2DI back in (define_expand "vlshr3" - [(match_operand:SSEMODE124 0 "register_operand" "") - (match_operand:SSEMODE124 1 "register_operand" "") - (match_operand:SSEMODE124 2 "register_operand" "")] + [(match_operand:VI124_128 0 "register_operand" "") + (match_operand:VI124_128 1 "register_operand" "") + (match_operand:VI124_128 2 "register_operand" "")] "TARGET_XOP" { rtx neg = gen_reg_rtx (mode); @@ -9804,9 +9346,9 @@ }) (define_expand "vashr3" - [(match_operand:SSEMODE124 0 "register_operand" "") - (match_operand:SSEMODE124 1 "register_operand" "") - (match_operand:SSEMODE124 2 "register_operand" "")] + [(match_operand:VI124_128 0 "register_operand" "") + (match_operand:VI124_128 1 "register_operand" "") + (match_operand:VI124_128 2 "register_operand" "")] "TARGET_XOP" { rtx neg = gen_reg_rtx (mode); @@ -9816,9 +9358,9 @@ }) (define_expand "vashl3" - [(match_operand:SSEMODE124 0 "register_operand" "") - (match_operand:SSEMODE124 1 "register_operand" "") - (match_operand:SSEMODE124 2 "register_operand" "")] + [(match_operand:VI124_128 0 "register_operand" "") + (match_operand:VI124_128 1 "register_operand" "") + (match_operand:VI124_128 2 "register_operand" "")] "TARGET_XOP" { emit_insn (gen_xop_ashl3 (operands[0], operands[1], operands[2])); @@ -9826,38 +9368,38 @@ }) (define_insn "xop_ashl3" - [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x") - (if_then_else:SSEMODE1248 - (ge:SSEMODE1248 - (match_operand:SSEMODE1248 2 "nonimmediate_operand" "x,m") + [(set (match_operand:VI_128 0 "register_operand" "=x,x") + (if_then_else:VI_128 + (ge:VI_128 + (match_operand:VI_128 2 "nonimmediate_operand" "x,m") (const_int 0)) - (ashift:SSEMODE1248 - (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm,x") + (ashift:VI_128 + (match_operand:VI_128 1 "nonimmediate_operand" "xm,x") (match_dup 2)) - (ashiftrt:SSEMODE1248 + (ashiftrt:VI_128 (match_dup 1) - (neg:SSEMODE1248 (match_dup 2)))))] + (neg:VI_128 (match_dup 2)))))] "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))" - "vpsha\t{%2, %1, %0|%0, %1, %2}" + "vpsha\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "sseishft") (set_attr "prefix_data16" "0") (set_attr "prefix_extra" "2") (set_attr "mode" "TI")]) (define_insn "xop_lshl3" - [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x") - (if_then_else:SSEMODE1248 - (ge:SSEMODE1248 - (match_operand:SSEMODE1248 2 "nonimmediate_operand" "x,m") + [(set (match_operand:VI_128 0 "register_operand" "=x,x") + (if_then_else:VI_128 + (ge:VI_128 + (match_operand:VI_128 2 "nonimmediate_operand" "x,m") (const_int 0)) - (ashift:SSEMODE1248 - (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm,x") + (ashift:VI_128 + (match_operand:VI_128 1 "nonimmediate_operand" "xm,x") (match_dup 2)) - (lshiftrt:SSEMODE1248 + (lshiftrt:VI_128 (match_dup 1) - (neg:SSEMODE1248 (match_dup 2)))))] + (neg:VI_128 (match_dup 2)))))] "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))" - "vpshl\t{%2, %1, %0|%0, %1, %2}" + "vpshl\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "sseishft") (set_attr "prefix_data16" "0") (set_attr "prefix_extra" "2") @@ -9977,10 +9519,10 @@ ;; scalar insns (define_expand "xop_vmfrcz2" - [(set (match_operand:SSEMODEF2P 0 "register_operand") - (vec_merge:SSEMODEF2P - (unspec:SSEMODEF2P - [(match_operand:SSEMODEF2P 1 "nonimmediate_operand")] + [(set (match_operand:VF_128 0 "register_operand") + (vec_merge:VF_128 + (unspec:VF_128 + [(match_operand:VF_128 1 "nonimmediate_operand")] UNSPEC_FRCZ) (match_dup 3) (const_int 1)))] @@ -9990,12 +9532,12 @@ }) (define_insn "*xop_vmfrcz_" - [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x") - (vec_merge:SSEMODEF2P - (unspec:SSEMODEF2P - [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")] + [(set (match_operand:VF_128 0 "register_operand" "=x") + (vec_merge:VF_128 + (unspec:VF_128 + [(match_operand:VF_128 1 "nonimmediate_operand" "xm")] UNSPEC_FRCZ) - (match_operand:SSEMODEF2P 2 "const0_operand") + (match_operand:VF_128 2 "const0_operand") (const_int 1)))] "TARGET_XOP" "vfrcz\t{%1, %0|%0, %1}" @@ -10003,12 +9545,12 @@ (set_attr "mode" "")]) (define_insn "xop_maskcmp3" - [(set (match_operand:SSEMODE1248 0 "register_operand" "=x") - (match_operator:SSEMODE1248 1 "ix86_comparison_int_operator" - [(match_operand:SSEMODE1248 2 "register_operand" "x") - (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))] + [(set (match_operand:VI_128 0 "register_operand" "=x") + (match_operator:VI_128 1 "ix86_comparison_int_operator" + [(match_operand:VI_128 2 "register_operand" "x") + (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))] "TARGET_XOP" - "vpcom%Y1\t{%3, %2, %0|%0, %2, %3}" + "vpcom%Y1\t{%3, %2, %0|%0, %2, %3}" [(set_attr "type" "sse4arg") (set_attr "prefix_data16" "0") (set_attr "prefix_rep" "0") @@ -10017,12 +9559,12 @@ (set_attr "mode" "TI")]) (define_insn "xop_maskcmp_uns3" - [(set (match_operand:SSEMODE1248 0 "register_operand" "=x") - (match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator" - [(match_operand:SSEMODE1248 2 "register_operand" "x") - (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))] + [(set (match_operand:VI_128 0 "register_operand" "=x") + (match_operator:VI_128 1 "ix86_comparison_uns_operator" + [(match_operand:VI_128 2 "register_operand" "x") + (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))] "TARGET_XOP" - "vpcom%Y1u\t{%3, %2, %0|%0, %2, %3}" + "vpcom%Y1u\t{%3, %2, %0|%0, %2, %3}" [(set_attr "type" "ssecmp") (set_attr "prefix_data16" "0") (set_attr "prefix_rep" "0") @@ -10034,14 +9576,14 @@ ;; and pcomneu* not to be converted to the signed ones in case somebody needs ;; the exact instruction generated for the intrinsic. (define_insn "xop_maskcmp_uns23" - [(set (match_operand:SSEMODE1248 0 "register_operand" "=x") - (unspec:SSEMODE1248 - [(match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator" - [(match_operand:SSEMODE1248 2 "register_operand" "x") - (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")])] + [(set (match_operand:VI_128 0 "register_operand" "=x") + (unspec:VI_128 + [(match_operator:VI_128 1 "ix86_comparison_uns_operator" + [(match_operand:VI_128 2 "register_operand" "x") + (match_operand:VI_128 3 "nonimmediate_operand" "xm")])] UNSPEC_XOP_UNSIGNED_CMP))] "TARGET_XOP" - "vpcom%Y1u\t{%3, %2, %0|%0, %2, %3}" + "vpcom%Y1u\t{%3, %2, %0|%0, %2, %3}" [(set_attr "type" "ssecmp") (set_attr "prefix_data16" "0") (set_attr "prefix_extra" "2") @@ -10051,17 +9593,17 @@ ;; Pcomtrue and pcomfalse support. These are useless instructions, but are ;; being added here to be complete. (define_insn "xop_pcom_tf3" - [(set (match_operand:SSEMODE1248 0 "register_operand" "=x") - (unspec:SSEMODE1248 - [(match_operand:SSEMODE1248 1 "register_operand" "x") - (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm") + [(set (match_operand:VI_128 0 "register_operand" "=x") + (unspec:VI_128 + [(match_operand:VI_128 1 "register_operand" "x") + (match_operand:VI_128 2 "nonimmediate_operand" "xm") (match_operand:SI 3 "const_int_operand" "n")] UNSPEC_XOP_TRUEFALSE))] "TARGET_XOP" { return ((INTVAL (operands[3]) != 0) - ? "vpcomtrue\t{%2, %1, %0|%0, %1, %2}" - : "vpcomfalse\t{%2, %1, %0|%0, %1, %2}"); + ? "vpcomtrue\t{%2, %1, %0|%0, %1, %2}" + : "vpcomfalse\t{%2, %1, %0|%0, %1, %2}"); } [(set_attr "type" "ssecmp") (set_attr "prefix_data16" "0") @@ -10070,11 +9612,11 @@ (set_attr "mode" "TI")]) (define_insn "xop_vpermil23" - [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x") - (unspec:AVXMODEF2P - [(match_operand:AVXMODEF2P 1 "register_operand" "x") - (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "%x") - (match_operand: 3 "nonimmediate_operand" "xm") + [(set (match_operand:VF 0 "register_operand" "=x") + (unspec:VF + [(match_operand:VF 1 "register_operand" "x") + (match_operand:VF 2 "nonimmediate_operand" "%x") + (match_operand: 3 "nonimmediate_operand" "xm") (match_operand:SI 4 "const_0_to_3_operand" "n")] UNSPEC_VPERMIL2))] "TARGET_XOP" @@ -10084,96 +9626,65 @@ (set_attr "mode" "")]) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -(define_insn "*avx_aesenc" - [(set (match_operand:V2DI 0 "register_operand" "=x") - (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x") - (match_operand:V2DI 2 "nonimmediate_operand" "xm")] - UNSPEC_AESENC))] - "TARGET_AES && TARGET_AVX" - "vaesenc\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "sselog1") - (set_attr "prefix_extra" "1") - (set_attr "prefix" "vex") - (set_attr "mode" "TI")]) (define_insn "aesenc" - [(set (match_operand:V2DI 0 "register_operand" "=x") - (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0") - (match_operand:V2DI 2 "nonimmediate_operand" "xm")] + [(set (match_operand:V2DI 0 "register_operand" "=x,x") + (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x") + (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")] UNSPEC_AESENC))] "TARGET_AES" - "aesenc\t{%2, %0|%0, %2}" - [(set_attr "type" "sselog1") - (set_attr "prefix_extra" "1") - (set_attr "mode" "TI")]) - -(define_insn "*avx_aesenclast" - [(set (match_operand:V2DI 0 "register_operand" "=x") - (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x") - (match_operand:V2DI 2 "nonimmediate_operand" "xm")] - UNSPEC_AESENCLAST))] - "TARGET_AES && TARGET_AVX" - "vaesenclast\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "sselog1") + "@ + aesenc\t{%2, %0|%0, %2} + vaesenc\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "sselog1") (set_attr "prefix_extra" "1") - (set_attr "prefix" "vex") + (set_attr "prefix" "orig,vex") (set_attr "mode" "TI")]) (define_insn "aesenclast" - [(set (match_operand:V2DI 0 "register_operand" "=x") - (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0") - (match_operand:V2DI 2 "nonimmediate_operand" "xm")] + [(set (match_operand:V2DI 0 "register_operand" "=x,x") + (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x") + (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")] UNSPEC_AESENCLAST))] "TARGET_AES" - "aesenclast\t{%2, %0|%0, %2}" - [(set_attr "type" "sselog1") - (set_attr "prefix_extra" "1") - (set_attr "mode" "TI")]) - -(define_insn "*avx_aesdec" - [(set (match_operand:V2DI 0 "register_operand" "=x") - (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x") - (match_operand:V2DI 2 "nonimmediate_operand" "xm")] - UNSPEC_AESDEC))] - "TARGET_AES && TARGET_AVX" - "vaesdec\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "sselog1") + "@ + aesenclast\t{%2, %0|%0, %2} + vaesenclast\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "sselog1") (set_attr "prefix_extra" "1") - (set_attr "prefix" "vex") + (set_attr "prefix" "orig,vex") (set_attr "mode" "TI")]) (define_insn "aesdec" - [(set (match_operand:V2DI 0 "register_operand" "=x") - (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0") - (match_operand:V2DI 2 "nonimmediate_operand" "xm")] + [(set (match_operand:V2DI 0 "register_operand" "=x,x") + (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x") + (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")] UNSPEC_AESDEC))] "TARGET_AES" - "aesdec\t{%2, %0|%0, %2}" - [(set_attr "type" "sselog1") - (set_attr "prefix_extra" "1") - (set_attr "mode" "TI")]) - -(define_insn "*avx_aesdeclast" - [(set (match_operand:V2DI 0 "register_operand" "=x") - (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x") - (match_operand:V2DI 2 "nonimmediate_operand" "xm")] - UNSPEC_AESDECLAST))] - "TARGET_AES && TARGET_AVX" - "vaesdeclast\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "sselog1") + "@ + aesdec\t{%2, %0|%0, %2} + vaesdec\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "sselog1") (set_attr "prefix_extra" "1") - (set_attr "prefix" "vex") + (set_attr "prefix" "orig,vex") (set_attr "mode" "TI")]) (define_insn "aesdeclast" - [(set (match_operand:V2DI 0 "register_operand" "=x") - (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0") - (match_operand:V2DI 2 "nonimmediate_operand" "xm")] + [(set (match_operand:V2DI 0 "register_operand" "=x,x") + (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x") + (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")] UNSPEC_AESDECLAST))] "TARGET_AES" - "aesdeclast\t{%2, %0|%0, %2}" - [(set_attr "type" "sselog1") + "@ + aesdeclast\t{%2, %0|%0, %2} + vaesdeclast\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "sselog1") (set_attr "prefix_extra" "1") + (set_attr "prefix" "orig,vex") (set_attr "mode" "TI")]) (define_insn "aesimc" @@ -10200,31 +9711,21 @@ (set_attr "prefix" "maybe_vex") (set_attr "mode" "TI")]) -(define_insn "*vpclmulqdq" - [(set (match_operand:V2DI 0 "register_operand" "=x") - (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x") - (match_operand:V2DI 2 "nonimmediate_operand" "xm") - (match_operand:SI 3 "const_0_to_255_operand" "n")] - UNSPEC_PCLMUL))] - "TARGET_PCLMUL && TARGET_AVX" - "vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}" - [(set_attr "type" "sselog1") - (set_attr "prefix_extra" "1") - (set_attr "length_immediate" "1") - (set_attr "prefix" "vex") - (set_attr "mode" "TI")]) - (define_insn "pclmulqdq" - [(set (match_operand:V2DI 0 "register_operand" "=x") - (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0") - (match_operand:V2DI 2 "nonimmediate_operand" "xm") - (match_operand:SI 3 "const_0_to_255_operand" "n")] + [(set (match_operand:V2DI 0 "register_operand" "=x,x") + (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x") + (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm") + (match_operand:SI 3 "const_0_to_255_operand" "n,n")] UNSPEC_PCLMUL))] "TARGET_PCLMUL" - "pclmulqdq\t{%3, %2, %0|%0, %2, %3}" - [(set_attr "type" "sselog1") + "@ + pclmulqdq\t{%3, %2, %0|%0, %2, %3} + vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "sselog1") (set_attr "prefix_extra" "1") (set_attr "length_immediate" "1") + (set_attr "prefix" "orig,vex") (set_attr "mode" "TI")]) (define_expand "avx_vzeroall" @@ -10271,27 +9772,38 @@ (set_attr "prefix" "vex") (set_attr "mode" "OI")]) -(define_insn_and_split "vec_dup" - [(set (match_operand:AVX256MODE24P 0 "register_operand" "=x,x") - (vec_duplicate:AVX256MODE24P - (match_operand: 1 "nonimmediate_operand" "m,?x")))] +;; Modes handled by AVX vec_dup patterns. +(define_mode_iterator AVX_VEC_DUP_MODE + [V8SI V8SF V4DI V4DF]) + +(define_insn "vec_dup" + [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand" "=x,x") + (vec_duplicate:AVX_VEC_DUP_MODE + (match_operand: 1 "nonimmediate_operand" "m,?x")))] "TARGET_AVX" "@ vbroadcast\t{%1, %0|%0, %1} #" - "&& reload_completed && REG_P (operands[1])" - [(set (match_dup 2) (vec_duplicate: (match_dup 1))) - (set (match_dup 0) (vec_concat:AVX256MODE24P (match_dup 2) (match_dup 2)))] - "operands[2] = gen_rtx_REG (mode, REGNO (operands[0]));" [(set_attr "type" "ssemov") (set_attr "prefix_extra" "1") (set_attr "prefix" "vex") (set_attr "mode" "V8SF")]) +(define_split + [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand" "") + (vec_duplicate:AVX_VEC_DUP_MODE + (match_operand: 1 "register_operand" "")))] + "TARGET_AVX && reload_completed" + [(set (match_dup 2) + (vec_duplicate: (match_dup 1))) + (set (match_dup 0) + (vec_concat:AVX_VEC_DUP_MODE (match_dup 2) (match_dup 2)))] + "operands[2] = gen_rtx_REG (mode, REGNO (operands[0]));") + (define_insn "avx_vbroadcastf128_" - [(set (match_operand:AVX256MODE 0 "register_operand" "=x,x,x") - (vec_concat:AVX256MODE - (match_operand: 1 "nonimmediate_operand" "m,0,?x") + [(set (match_operand:V_256 0 "register_operand" "=x,x,x") + (vec_concat:V_256 + (match_operand: 1 "nonimmediate_operand" "m,0,?x") (match_dup 1)))] "TARGET_AVX" "@ @@ -10336,15 +9848,15 @@ (set_attr "mode" "SF,SF,V4SF")]) (define_insn_and_split "*avx_vperm_broadcast_" - [(set (match_operand:AVX256MODEF2P 0 "register_operand" "=x,x,x") - (vec_select:AVX256MODEF2P - (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "m,o,?x") + [(set (match_operand:VF_256 0 "register_operand" "=x,x,x") + (vec_select:VF_256 + (match_operand:VF_256 1 "nonimmediate_operand" "m,o,?x") (match_parallel 2 "avx_vbroadcast_operand" [(match_operand 3 "const_int_operand" "C,n,n")])))] "TARGET_AVX" "#" "&& reload_completed" - [(set (match_dup 0) (vec_duplicate:AVX256MODEF2P (match_dup 1)))] + [(set (match_dup 0) (vec_duplicate:VF_256 (match_dup 1)))] { rtx op0 = operands[0], op1 = operands[1]; int elt = INTVAL (operands[3]); @@ -10367,14 +9879,14 @@ DONE; } - operands[1] = adjust_address_nv (op1, mode, - elt * GET_MODE_SIZE (mode)); + operands[1] = adjust_address_nv (op1, mode, + elt * GET_MODE_SIZE (mode)); }) (define_expand "avx_vpermil" - [(set (match_operand:AVXMODEFDP 0 "register_operand" "") - (vec_select:AVXMODEFDP - (match_operand:AVXMODEFDP 1 "nonimmediate_operand" "") + [(set (match_operand:VF2 0 "register_operand" "") + (vec_select:VF2 + (match_operand:VF2 1 "nonimmediate_operand" "") (match_operand:SI 2 "const_0_to_255_operand" "")))] "TARGET_AVX" { @@ -10394,9 +9906,9 @@ }) (define_expand "avx_vpermil" - [(set (match_operand:AVXMODEFSP 0 "register_operand" "") - (vec_select:AVXMODEFSP - (match_operand:AVXMODEFSP 1 "nonimmediate_operand" "") + [(set (match_operand:VF1 0 "register_operand" "") + (vec_select:VF1 + (match_operand:VF1 1 "nonimmediate_operand" "") (match_operand:SI 2 "const_0_to_255_operand" "")))] "TARGET_AVX" { @@ -10420,12 +9932,13 @@ }) (define_insn "*avx_vpermilp" - [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x") - (vec_select:AVXMODEF2P - (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "xm") - (match_parallel 2 "avx_vpermilp__operand" + [(set (match_operand:VF 0 "register_operand" "=x") + (vec_select:VF + (match_operand:VF 1 "nonimmediate_operand" "xm") + (match_parallel 2 "" [(match_operand 3 "const_int_operand" "")])))] - "TARGET_AVX" + "TARGET_AVX + && avx_vpermilp_parallel (operands[2], mode)" { int mask = avx_vpermilp_parallel (operands[2], mode) - 1; operands[2] = GEN_INT (mask); @@ -10438,10 +9951,10 @@ (set_attr "mode" "")]) (define_insn "avx_vpermilvar3" - [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x") - (unspec:AVXMODEF2P - [(match_operand:AVXMODEF2P 1 "register_operand" "x") - (match_operand: 2 "nonimmediate_operand" "xm")] + [(set (match_operand:VF 0 "register_operand" "=x") + (unspec:VF + [(match_operand:VF 1 "register_operand" "x") + (match_operand: 2 "nonimmediate_operand" "xm")] UNSPEC_VPERMIL))] "TARGET_AVX" "vpermil\t{%2, %1, %0|%0, %1, %2}" @@ -10473,7 +9986,7 @@ for (i = 0; i < nelt2; ++i) perm[i + nelt2] = GEN_INT (base + i); - t2 = gen_rtx_VEC_CONCAT (mode, + t2 = gen_rtx_VEC_CONCAT (mode, operands[1], operands[2]); t1 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, perm)); t2 = gen_rtx_VEC_SELECT (mode, t2, t1); @@ -10505,12 +10018,13 @@ (define_insn "*avx_vperm2f128_nozero" [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x") (vec_select:AVX256MODE2P - (vec_concat: + (vec_concat: (match_operand:AVX256MODE2P 1 "register_operand" "x") (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")) - (match_parallel 3 "avx_vperm2f128__operand" + (match_parallel 3 "" [(match_operand 4 "const_int_operand" "")])))] - "TARGET_AVX" + "TARGET_AVX + && avx_vperm2f128_parallel (operands[3], mode)" { int mask = avx_vperm2f128_parallel (operands[3], mode) - 1; operands[3] = GEN_INT (mask); @@ -10523,9 +10037,9 @@ (set_attr "mode" "V8SF")]) (define_expand "avx_vinsertf128" - [(match_operand:AVX256MODE 0 "register_operand" "") - (match_operand:AVX256MODE 1 "register_operand" "") - (match_operand: 2 "nonimmediate_operand" "") + [(match_operand:V_256 0 "register_operand" "") + (match_operand:V_256 1 "register_operand" "") + (match_operand: 2 "nonimmediate_operand" "") (match_operand:SI 3 "const_0_to_1_operand" "")] "TARGET_AVX" { @@ -10548,11 +10062,11 @@ }) (define_insn "vec_set_lo_" - [(set (match_operand:AVX256MODE4P 0 "register_operand" "=x") - (vec_concat:AVX256MODE4P - (match_operand: 2 "nonimmediate_operand" "xm") - (vec_select: - (match_operand:AVX256MODE4P 1 "register_operand" "x") + [(set (match_operand:VI8F_256 0 "register_operand" "=x") + (vec_concat:VI8F_256 + (match_operand: 2 "nonimmediate_operand" "xm") + (vec_select: + (match_operand:VI8F_256 1 "register_operand" "x") (parallel [(const_int 2) (const_int 3)]))))] "TARGET_AVX" "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}" @@ -10563,12 +10077,12 @@ (set_attr "mode" "V8SF")]) (define_insn "vec_set_hi_" - [(set (match_operand:AVX256MODE4P 0 "register_operand" "=x") - (vec_concat:AVX256MODE4P - (vec_select: - (match_operand:AVX256MODE4P 1 "register_operand" "x") + [(set (match_operand:VI8F_256 0 "register_operand" "=x") + (vec_concat:VI8F_256 + (vec_select: + (match_operand:VI8F_256 1 "register_operand" "x") (parallel [(const_int 0) (const_int 1)])) - (match_operand: 2 "nonimmediate_operand" "xm")))] + (match_operand: 2 "nonimmediate_operand" "xm")))] "TARGET_AVX" "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}" [(set_attr "type" "sselog") @@ -10578,11 +10092,11 @@ (set_attr "mode" "V8SF")]) (define_insn "vec_set_lo_" - [(set (match_operand:AVX256MODE8P 0 "register_operand" "=x") - (vec_concat:AVX256MODE8P - (match_operand: 2 "nonimmediate_operand" "xm") - (vec_select: - (match_operand:AVX256MODE8P 1 "register_operand" "x") + [(set (match_operand:VI4F_256 0 "register_operand" "=x") + (vec_concat:VI4F_256 + (match_operand: 2 "nonimmediate_operand" "xm") + (vec_select: + (match_operand:VI4F_256 1 "register_operand" "x") (parallel [(const_int 4) (const_int 5) (const_int 6) (const_int 7)]))))] "TARGET_AVX" @@ -10594,13 +10108,13 @@ (set_attr "mode" "V8SF")]) (define_insn "vec_set_hi_" - [(set (match_operand:AVX256MODE8P 0 "register_operand" "=x") - (vec_concat:AVX256MODE8P - (vec_select: - (match_operand:AVX256MODE8P 1 "register_operand" "x") + [(set (match_operand:VI4F_256 0 "register_operand" "=x") + (vec_concat:VI4F_256 + (vec_select: + (match_operand:VI4F_256 1 "register_operand" "x") (parallel [(const_int 0) (const_int 1) (const_int 2) (const_int 3)])) - (match_operand: 2 "nonimmediate_operand" "xm")))] + (match_operand: 2 "nonimmediate_operand" "xm")))] "TARGET_AVX" "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}" [(set_attr "type" "sselog") @@ -10689,55 +10203,61 @@ (set_attr "prefix" "vex") (set_attr "mode" "V8SF")]) -(define_insn "avx_maskload" - [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x") - (unspec:AVXMODEF2P - [(match_operand:AVXMODEF2P 1 "memory_operand" "m") - (match_operand: 2 "register_operand" "x") +(define_expand "avx_maskload" + [(set (match_operand:VF 0 "register_operand" "") + (unspec:VF + [(match_operand: 2 "register_operand" "") + (match_operand:VF 1 "memory_operand" "") (match_dup 0)] - UNSPEC_MASKLOAD))] - "TARGET_AVX" - "vmaskmov\t{%1, %2, %0|%0, %2, %1}" - [(set_attr "type" "sselog1") - (set_attr "prefix_extra" "1") - (set_attr "prefix" "vex") - (set_attr "mode" "")]) + UNSPEC_MASKMOV))] + "TARGET_AVX") -(define_insn "avx_maskstore" - [(set (match_operand:AVXMODEF2P 0 "memory_operand" "=m") - (unspec:AVXMODEF2P - [(match_operand: 1 "register_operand" "x") - (match_operand:AVXMODEF2P 2 "register_operand" "x") +(define_expand "avx_maskstore" + [(set (match_operand:VF 0 "memory_operand" "") + (unspec:VF + [(match_operand: 1 "register_operand" "") + (match_operand:VF 2 "register_operand" "") (match_dup 0)] - UNSPEC_MASKSTORE))] - "TARGET_AVX" + UNSPEC_MASKMOV))] + "TARGET_AVX") + +(define_insn "*avx_maskmov" + [(set (match_operand:VF 0 "nonimmediate_operand" "=x,m") + (unspec:VF + [(match_operand: 1 "register_operand" "x,x") + (match_operand:VF 2 "nonimmediate_operand" "m,x") + (match_dup 0)] + UNSPEC_MASKMOV))] + "TARGET_AVX + && (REG_P (operands[0]) == MEM_P (operands[2]))" "vmaskmov\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "sselog1") (set_attr "prefix_extra" "1") (set_attr "prefix" "vex") (set_attr "mode" "")]) -(define_insn_and_split "avx__" +(define_insn_and_split "avx__" [(set (match_operand:AVX256MODE2P 0 "nonimmediate_operand" "=x,m") (unspec:AVX256MODE2P - [(match_operand: 1 "nonimmediate_operand" "xm,x")] + [(match_operand: 1 "nonimmediate_operand" "xm,x")] UNSPEC_CAST))] "TARGET_AVX" "#" "&& reload_completed" [(const_int 0)] { + rtx op0 = operands[0]; rtx op1 = operands[1]; - if (REG_P (op1)) - op1 = gen_rtx_REG (mode, REGNO (op1)); + if (REG_P (op0)) + op0 = gen_rtx_REG (mode, REGNO (op0)); else - op1 = gen_lowpart (mode, op1); - emit_move_insn (operands[0], op1); + op1 = gen_rtx_REG (mode, REGNO (op1)); + emit_move_insn (op0, op1); DONE; }) (define_expand "vec_init" - [(match_operand:AVX256MODE 0 "register_operand" "") + [(match_operand:V_256 0 "register_operand" "") (match_operand 1 "" "")] "TARGET_AVX" { @@ -10746,10 +10266,10 @@ }) (define_insn "*vec_concat_avx" - [(set (match_operand:AVX256MODE 0 "register_operand" "=x,x") - (vec_concat:AVX256MODE - (match_operand: 1 "register_operand" "x,x") - (match_operand: 2 "vector_move_operand" "xm,C")))] + [(set (match_operand:V_256 0 "register_operand" "=x,x") + (vec_concat:V_256 + (match_operand: 1 "register_operand" "x,x") + (match_operand: 2 "vector_move_operand" "xm,C")))] "TARGET_AVX" { switch (which_alternative) @@ -10774,7 +10294,7 @@ (set_attr "prefix_extra" "1,*") (set_attr "length_immediate" "1,*") (set_attr "prefix" "vex") - (set_attr "mode" "")]) + (set_attr "mode" "")]) (define_insn "vcvtph2ps" [(set (match_operand:V4SF 0 "register_operand" "=x") @@ -10813,7 +10333,7 @@ [(set (match_operand:V8HI 0 "register_operand" "") (vec_concat:V8HI (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "") - (match_operand:SI 2 "immediate_operand" "")] + (match_operand:SI 2 "const_0_to_255_operand" "")] UNSPEC_VCVTPS2PH) (match_dup 3)))] "TARGET_F16C" @@ -10823,7 +10343,7 @@ [(set (match_operand:V8HI 0 "register_operand" "=x") (vec_concat:V8HI (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x") - (match_operand:SI 2 "immediate_operand" "N")] + (match_operand:SI 2 "const_0_to_255_operand" "N")] UNSPEC_VCVTPS2PH) (match_operand:V4HI 3 "const0_operand" "")))] "TARGET_F16C" @@ -10835,7 +10355,7 @@ (define_insn "*vcvtps2ph_store" [(set (match_operand:V4HI 0 "memory_operand" "=m") (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x") - (match_operand:SI 2 "immediate_operand" "N")] + (match_operand:SI 2 "const_0_to_255_operand" "N")] UNSPEC_VCVTPS2PH))] "TARGET_F16C" "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}" @@ -10846,7 +10366,7 @@ (define_insn "vcvtps2ph256" [(set (match_operand:V8HI 0 "nonimmediate_operand" "=xm") (unspec:V8HI [(match_operand:V8SF 1 "register_operand" "x") - (match_operand:SI 2 "immediate_operand" "N")] + (match_operand:SI 2 "const_0_to_255_operand" "N")] UNSPEC_VCVTPS2PH))] "TARGET_F16C" "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"