From: hjl Date: Sun, 22 Oct 2006 17:40:21 +0000 (+0000) Subject: 2006-10-22 H.J. Lu X-Git-Url: http://git.sourceforge.jp/view?p=pf3gnuchains%2Fgcc-fork.git;a=commitdiff_plain;h=2b4894c5b58bccb32da6eac05e5947339422b278 2006-10-22 H.J. Lu * config.gcc (i[34567]86-*-*): Add tmmintrin.h to extra_headers. (x86_64-*-*): Likewise. * config/i386/i386.c (pta_flags): Add PTA_SSSE3. (override_options): Check SSSE3. (ix86_builtins): Add IX86_BUILTIN_PHADDW, IX86_BUILTIN_PHADDD, IX86_BUILTIN_PHADDSW, IX86_BUILTIN_PHSUBW, IX86_BUILTIN_PHSUBD, IX86_BUILTIN_PHSUBSW, IX86_BUILTIN_PMADDUBSW, IX86_BUILTIN_PMULHRSW, IX86_BUILTIN_PSHUFB, IX86_BUILTIN_PSIGNB, IX86_BUILTIN_PSIGNW, IX86_BUILTIN_PSIGND, IX86_BUILTIN_PALIGNR, IX86_BUILTIN_PABSB, IX86_BUILTIN_PABSW, IX86_BUILTIN_PABSD, IX86_BUILTIN_PHADDW128, IX86_BUILTIN_PHADDD128, IX86_BUILTIN_PHADDSW128, IX86_BUILTIN_PHSUBW128, IX86_BUILTIN_PHSUBD128, IX86_BUILTIN_PHSUBSW128, IX86_BUILTIN_PMADDUBSW128, IX86_BUILTIN_PMULHRSW128, IX86_BUILTIN_PSHUFB128, IX86_BUILTIN_PSIGNB128, IX86_BUILTIN_PSIGNW128, IX86_BUILTIN_PSIGND128, IX86_BUILTIN_PALIGNR128, IX86_BUILTIN_PABSB128, IX86_BUILTIN_PABSW128 and IX86_BUILTIN_PABSD128. (bdesc_2arg): Add SSSE3. (bdesc_1arg): Likewise. (ix86_init_mmx_sse_builtins): Support SSSE3. (ix86_expand_builtin): Likewise. * config/i386/i386.h (TARGET_CPU_CPP_BUILTINS): Likewise. * config/i386/i386.md (UNSPEC_PSHUFB): New. (UNSPEC_PSIGN): Likewise. (UNSPEC_PALIGNR): Likewise. Include mmx.md before sse.md. * config/i386/i386.opt: Add -mssse3. * config/i386/sse.md (ssse3_phaddwv8hi3): New pattern for SSSE3. (ssse3_phaddwv4hi3): Likewise. (ssse3_phadddv4si3): Likewise. (ssse3_phadddv2si3): Likewise. (ssse3_phaddswv8hi3): Likewise. (ssse3_phaddswv4hi3): Likewise. (ssse3_phsubwv8hi3): Likewise. (ssse3_phsubwv4hi3): Likewise. (ssse3_phsubdv4si3): Likewise. (ssse3_phsubdv2si3): Likewise. (ssse3_phsubswv8hi3): Likewise. (ssse3_phsubswv4hi3): Likewise. (ssse3_pmaddubswv8hi3): Likewise. (ssse3_pmaddubswv4hi3): Likewise. (ssse3_pmulhrswv8hi3): Likewise. (ssse3_pmulhrswv4hi3): Likewise. (ssse3_pshufbv16qi3): Likewise. (ssse3_pshufbv8qi3): Likewise. (ssse3_psign3): Likewise. (ssse3_psign3): Likewise. (ssse3_palignrti): Likewise. (ssse3_palignrdi): Likewise. (abs2): Likewise. (abs2): Likewise. * config/i386/tmmintrin.h: New file. * doc/extend.texi: Document SSSE3 built-in functions. * doc/invoke.texi: Document -mssse3/-mno-ssse3 switches. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@117958 138bc75d-0d04-0410-961f-82ee72b054a4 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index cbc5d7b1f70..235b0037757 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,69 @@ +2006-10-22 H.J. Lu + + * config.gcc (i[34567]86-*-*): Add tmmintrin.h to extra_headers. + (x86_64-*-*): Likewise. + + * config/i386/i386.c (pta_flags): Add PTA_SSSE3. + (override_options): Check SSSE3. + (ix86_builtins): Add IX86_BUILTIN_PHADDW, IX86_BUILTIN_PHADDD, + IX86_BUILTIN_PHADDSW, IX86_BUILTIN_PHSUBW, IX86_BUILTIN_PHSUBD, + IX86_BUILTIN_PHSUBSW, IX86_BUILTIN_PMADDUBSW, + IX86_BUILTIN_PMULHRSW, IX86_BUILTIN_PSHUFB, + IX86_BUILTIN_PSIGNB, IX86_BUILTIN_PSIGNW, IX86_BUILTIN_PSIGND, + IX86_BUILTIN_PALIGNR, IX86_BUILTIN_PABSB, IX86_BUILTIN_PABSW, + IX86_BUILTIN_PABSD, IX86_BUILTIN_PHADDW128, + IX86_BUILTIN_PHADDD128, IX86_BUILTIN_PHADDSW128, + IX86_BUILTIN_PHSUBW128, IX86_BUILTIN_PHSUBD128, + IX86_BUILTIN_PHSUBSW128, IX86_BUILTIN_PMADDUBSW128, + IX86_BUILTIN_PMULHRSW128, IX86_BUILTIN_PSHUFB128, + IX86_BUILTIN_PSIGNB128, IX86_BUILTIN_PSIGNW128, + IX86_BUILTIN_PSIGND128, IX86_BUILTIN_PALIGNR128, + IX86_BUILTIN_PABSB128, IX86_BUILTIN_PABSW128 and + IX86_BUILTIN_PABSD128. + (bdesc_2arg): Add SSSE3. + (bdesc_1arg): Likewise. + (ix86_init_mmx_sse_builtins): Support SSSE3. + (ix86_expand_builtin): Likewise. + * config/i386/i386.h (TARGET_CPU_CPP_BUILTINS): Likewise. + + * config/i386/i386.md (UNSPEC_PSHUFB): New. + (UNSPEC_PSIGN): Likewise. + (UNSPEC_PALIGNR): Likewise. + Include mmx.md before sse.md. + + * config/i386/i386.opt: Add -mssse3. + + * config/i386/sse.md (ssse3_phaddwv8hi3): New pattern for SSSE3. + (ssse3_phaddwv4hi3): Likewise. + (ssse3_phadddv4si3): Likewise. + (ssse3_phadddv2si3): Likewise. + (ssse3_phaddswv8hi3): Likewise. + (ssse3_phaddswv4hi3): Likewise. + (ssse3_phsubwv8hi3): Likewise. + (ssse3_phsubwv4hi3): Likewise. + (ssse3_phsubdv4si3): Likewise. + (ssse3_phsubdv2si3): Likewise. + (ssse3_phsubswv8hi3): Likewise. + (ssse3_phsubswv4hi3): Likewise. + (ssse3_pmaddubswv8hi3): Likewise. + (ssse3_pmaddubswv4hi3): Likewise. + (ssse3_pmulhrswv8hi3): Likewise. + (ssse3_pmulhrswv4hi3): Likewise. + (ssse3_pshufbv16qi3): Likewise. + (ssse3_pshufbv8qi3): Likewise. + (ssse3_psign3): Likewise. + (ssse3_psign3): Likewise. + (ssse3_palignrti): Likewise. + (ssse3_palignrdi): Likewise. + (abs2): Likewise. + (abs2): Likewise. + + * config/i386/tmmintrin.h: New file. + + * doc/extend.texi: Document SSSE3 built-in functions. + + * doc/invoke.texi: Document -mssse3/-mno-ssse3 switches. + 2006-10-22 Ira Rosen * tree-vect-transform.c (vectorizable_load): Use the type of the diff --git a/gcc/config.gcc b/gcc/config.gcc index 1e3c35887cd..7545c38f17a 100644 --- a/gcc/config.gcc +++ b/gcc/config.gcc @@ -268,11 +268,13 @@ xscale-*-*) ;; i[34567]86-*-*) cpu_type=i386 - extra_headers="mmintrin.h mm3dnow.h xmmintrin.h emmintrin.h pmmintrin.h" + extra_headers="mmintrin.h mm3dnow.h xmmintrin.h emmintrin.h + pmmintrin.h tmmintrin.h" ;; x86_64-*-*) cpu_type=i386 - extra_headers="mmintrin.h mm3dnow.h xmmintrin.h emmintrin.h pmmintrin.h" + extra_headers="mmintrin.h mm3dnow.h xmmintrin.h emmintrin.h + pmmintrin.h tmmintrin.h" need_64bit_hwint=yes ;; ia64-*-*) diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 8b2c798077c..d72120c63d0 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -1476,7 +1476,8 @@ override_options (void) PTA_PREFETCH_SSE = 16, PTA_3DNOW = 32, PTA_3DNOW_A = 64, - PTA_64BIT = 128 + PTA_64BIT = 128, + PTA_SSSE3 = 256 } flags; } const processor_alias_table[] = @@ -1684,6 +1685,9 @@ override_options (void) if (processor_alias_table[i].flags & PTA_SSE3 && !(target_flags_explicit & MASK_SSE3)) target_flags |= MASK_SSE3; + if (processor_alias_table[i].flags & PTA_SSSE3 + && !(target_flags_explicit & MASK_SSSE3)) + target_flags |= MASK_SSSE3; if (processor_alias_table[i].flags & PTA_PREFETCH_SSE) x86_prefetch_sse = true; if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT)) @@ -1860,6 +1864,10 @@ override_options (void) if (!TARGET_80387) target_flags |= MASK_NO_FANCY_MATH_387; + /* Turn on SSE3 builtins for -mssse3. */ + if (TARGET_SSSE3) + target_flags |= MASK_SSE3; + /* Turn on SSE2 builtins for -msse3. */ if (TARGET_SSE3) target_flags |= MASK_SSE2; @@ -14473,6 +14481,41 @@ enum ix86_builtins IX86_BUILTIN_MONITOR, IX86_BUILTIN_MWAIT, + /* SSSE3. */ + IX86_BUILTIN_PHADDW, + IX86_BUILTIN_PHADDD, + IX86_BUILTIN_PHADDSW, + IX86_BUILTIN_PHSUBW, + IX86_BUILTIN_PHSUBD, + IX86_BUILTIN_PHSUBSW, + IX86_BUILTIN_PMADDUBSW, + IX86_BUILTIN_PMULHRSW, + IX86_BUILTIN_PSHUFB, + IX86_BUILTIN_PSIGNB, + IX86_BUILTIN_PSIGNW, + IX86_BUILTIN_PSIGND, + IX86_BUILTIN_PALIGNR, + IX86_BUILTIN_PABSB, + IX86_BUILTIN_PABSW, + IX86_BUILTIN_PABSD, + + IX86_BUILTIN_PHADDW128, + IX86_BUILTIN_PHADDD128, + IX86_BUILTIN_PHADDSW128, + IX86_BUILTIN_PHSUBW128, + IX86_BUILTIN_PHSUBD128, + IX86_BUILTIN_PHSUBSW128, + IX86_BUILTIN_PMADDUBSW128, + IX86_BUILTIN_PMULHRSW128, + IX86_BUILTIN_PSHUFB128, + IX86_BUILTIN_PSIGNB128, + IX86_BUILTIN_PSIGNW128, + IX86_BUILTIN_PSIGND128, + IX86_BUILTIN_PALIGNR128, + IX86_BUILTIN_PABSB128, + IX86_BUILTIN_PABSW128, + IX86_BUILTIN_PABSD128, + IX86_BUILTIN_VEC_INIT_V2SI, IX86_BUILTIN_VEC_INIT_V4HI, IX86_BUILTIN_VEC_INIT_V8QI, @@ -14813,7 +14856,33 @@ static const struct builtin_description bdesc_2arg[] = { MASK_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 }, { MASK_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 }, { MASK_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 }, - { MASK_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 } + { MASK_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 }, + + /* SSSE3 */ + { MASK_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, 0, 0 }, + { MASK_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, 0, 0 }, + { MASK_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, 0, 0 }, + { MASK_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, 0, 0 }, + { MASK_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, 0, 0 }, + { MASK_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, 0, 0 }, + { MASK_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, 0, 0 }, + { MASK_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, 0, 0 }, + { MASK_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, 0, 0 }, + { MASK_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, 0, 0 }, + { MASK_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, 0, 0 }, + { MASK_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, 0, 0 }, + { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv8hi3, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, 0, 0 }, + { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv4hi3, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, 0, 0 }, + { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, 0, 0 }, + { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, 0, 0 }, + { MASK_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, 0, 0 }, + { MASK_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, 0, 0 }, + { MASK_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, 0, 0 }, + { MASK_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, 0, 0 }, + { MASK_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, 0, 0 }, + { MASK_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, 0, 0 }, + { MASK_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, 0, 0 }, + { MASK_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, 0, 0 } }; static const struct builtin_description bdesc_1arg[] = @@ -14860,6 +14929,14 @@ static const struct builtin_description bdesc_1arg[] = /* SSE3 */ { MASK_SSE3, CODE_FOR_sse3_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 }, { MASK_SSE3, CODE_FOR_sse3_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 }, + + /* SSSE3 */ + { MASK_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, 0, 0 }, + { MASK_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, 0, 0 }, + { MASK_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, 0, 0 }, + { MASK_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, 0, 0 }, + { MASK_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, 0, 0 }, + { MASK_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, 0, 0 }, }; static void @@ -14994,6 +15071,16 @@ ix86_init_mmx_sse_builtins (void) /* Normal vector unops. */ tree v4sf_ftype_v4sf = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE); + tree v16qi_ftype_v16qi + = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE); + tree v8hi_ftype_v8hi + = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE); + tree v4si_ftype_v4si + = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE); + tree v8qi_ftype_v8qi + = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE); + tree v4hi_ftype_v4hi + = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE); /* Normal vector binops. */ tree v4sf_ftype_v4sf_v4sf @@ -15013,6 +15100,12 @@ ix86_init_mmx_sse_builtins (void) long_long_unsigned_type_node, long_long_unsigned_type_node, NULL_TREE); + tree di_ftype_di_di_int + = build_function_type_list (long_long_unsigned_type_node, + long_long_unsigned_type_node, + long_long_unsigned_type_node, + integer_type_node, NULL_TREE); + tree v2si_ftype_v2sf = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE); tree v2sf_ftype_v2si @@ -15114,6 +15207,9 @@ ix86_init_mmx_sse_builtins (void) tree v2di_ftype_v2di_int = build_function_type_list (V2DI_type_node, V2DI_type_node, integer_type_node, NULL_TREE); + tree v2di_ftype_v2di_v2di_int + = build_function_type_list (V2DI_type_node, V2DI_type_node, + V2DI_type_node, integer_type_node, NULL_TREE); tree v4si_ftype_v4si_int = build_function_type_list (V4SI_type_node, V4SI_type_node, integer_type_node, NULL_TREE); @@ -15236,6 +15332,50 @@ ix86_init_mmx_sse_builtins (void) def_builtin (d->mask, d->name, type, d->code); } + /* Add all builtins that are more or less simple operations on 1 operand. */ + for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++) + { + enum machine_mode mode; + tree type; + + if (d->name == 0) + continue; + mode = insn_data[d->icode].operand[1].mode; + + switch (mode) + { + case V16QImode: + type = v16qi_ftype_v16qi; + break; + case V8HImode: + type = v8hi_ftype_v8hi; + break; + case V4SImode: + type = v4si_ftype_v4si; + break; + case V2DFmode: + type = v2df_ftype_v2df; + break; + case V4SFmode: + type = v4sf_ftype_v4sf; + break; + case V8QImode: + type = v8qi_ftype_v8qi; + break; + case V4HImode: + type = v4hi_ftype_v4hi; + break; + case V2SImode: + type = v2si_ftype_v2si; + break; + + default: + abort (); + } + + def_builtin (d->mask, d->name, type, d->code); + } + /* Add the remaining MMX insns with somewhat more complicated types. */ def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS); def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW); @@ -15435,6 +15575,12 @@ ix86_init_mmx_sse_builtins (void) def_builtin (MASK_SSE3, "__builtin_ia32_lddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU); + /* SSSE3. */ + def_builtin (MASK_SSSE3, "__builtin_ia32_palignr128", + v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PALIGNR128); + def_builtin (MASK_SSSE3, "__builtin_ia32_palignr", di_ftype_di_di_int, + IX86_BUILTIN_PALIGNR); + /* Access to the vec_init patterns. */ ftype = build_function_type_list (V2SI_type_node, integer_type_node, integer_type_node, NULL_TREE); @@ -15925,7 +16071,7 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, tree arglist = TREE_OPERAND (exp, 1); tree arg0, arg1, arg2; rtx op0, op1, op2, pat; - enum machine_mode tmode, mode0, mode1, mode2; + enum machine_mode tmode, mode0, mode1, mode2, mode3; unsigned int fcode = DECL_FUNCTION_CODE (fndecl); switch (fcode) @@ -16297,6 +16443,52 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, arglist, target, 1); + case IX86_BUILTIN_PALIGNR: + case IX86_BUILTIN_PALIGNR128: + if (fcode == IX86_BUILTIN_PALIGNR) + { + icode = CODE_FOR_ssse3_palignrdi; + mode = DImode; + } + else + { + icode = CODE_FOR_ssse3_palignrti; + mode = V2DImode; + } + arg0 = TREE_VALUE (arglist); + arg1 = TREE_VALUE (TREE_CHAIN (arglist)); + arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist))); + op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); + op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); + op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0); + tmode = insn_data[icode].operand[0].mode; + mode1 = insn_data[icode].operand[1].mode; + mode2 = insn_data[icode].operand[2].mode; + mode3 = insn_data[icode].operand[3].mode; + + if (! (*insn_data[icode].operand[1].predicate) (op0, mode1)) + { + op0 = copy_to_reg (op0); + op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0); + } + if (! (*insn_data[icode].operand[2].predicate) (op1, mode2)) + { + op1 = copy_to_reg (op1); + op1 = simplify_gen_subreg (mode2, op1, GET_MODE (op1), 0); + } + if (! (*insn_data[icode].operand[3].predicate) (op2, mode3)) + { + error ("shift must be an immediate"); + return const0_rtx; + } + target = gen_reg_rtx (mode); + pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, mode, 0), + op0, op1, op2); + if (! pat) + return 0; + emit_insn (pat); + return target; + case IX86_BUILTIN_VEC_INIT_V2SI: case IX86_BUILTIN_VEC_INIT_V4HI: case IX86_BUILTIN_VEC_INIT_V8QI: diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 4993a94f5ce..bebc91e77b9 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -409,6 +409,8 @@ extern const char *host_detect_local_cpu (int argc, const char **argv); builtin_define ("__SSE2__"); \ if (TARGET_SSE3) \ builtin_define ("__SSE3__"); \ + if (TARGET_SSSE3) \ + builtin_define ("__SSSE3__"); \ if (TARGET_SSE_MATH && TARGET_SSE) \ builtin_define ("__SSE_MATH__"); \ if (TARGET_SSE_MATH && TARGET_SSE2) \ diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 10b5eb871e4..049b86aa206 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -148,6 +148,11 @@ (UNSPEC_SP_TEST 101) (UNSPEC_SP_TLS_SET 102) (UNSPEC_SP_TLS_TEST 103) + + ; SSSE3 + (UNSPEC_PSHUFB 120) + (UNSPEC_PSIGN 121) + (UNSPEC_PALIGNR 122) ]) (define_constants @@ -20954,6 +20959,6 @@ } [(set_attr "type" "multi")]) -(include "sse.md") (include "mmx.md") +(include "sse.md") (include "sync.md") diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt index 2922f5d2fe0..aa249205e24 100644 --- a/gcc/config/i386/i386.opt +++ b/gcc/config/i386/i386.opt @@ -197,6 +197,10 @@ msse3 Target Report Mask(SSE3) Support MMX, SSE, SSE2 and SSE3 built-in functions and code generation +mssse3 +Target Report Mask(SSSE3) +Support MMX, SSE, SSE2, SSE3 and SSSE3 built-in functions and code generation + msseregparm Target RejectNegative Mask(SSEREGPARM) Use SSE register passing conventions for SF and DF mode diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 3264ccfcf3f..1424149c1ab 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -4000,3 +4000,578 @@ ;; zero extended to 64bit, we only need to set up 32bit registers. "monitor" [(set_attr "length" "3")]) + +;; SSSE3 +(define_insn "ssse3_phaddwv8hi3" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (vec_concat:V8HI + (vec_concat:V4HI + (vec_concat:V2HI + (plus:HI + (vec_select:HI + (match_operand:V8HI 1 "register_operand" "0") + (parallel [(const_int 0)])) + (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) + (plus:HI + (vec_select:HI (match_dup 1) (parallel [(const_int 2)])) + (vec_select:HI (match_dup 1) (parallel [(const_int 3)])))) + (vec_concat:V2HI + (plus:HI + (vec_select:HI (match_dup 1) (parallel [(const_int 4)])) + (vec_select:HI (match_dup 1) (parallel [(const_int 5)]))) + (plus:HI + (vec_select:HI (match_dup 1) (parallel [(const_int 6)])) + (vec_select:HI (match_dup 1) (parallel [(const_int 7)]))))) + (vec_concat:V4HI + (vec_concat:V2HI + (plus:HI + (vec_select:HI + (match_operand:V8HI 2 "nonimmediate_operand" "xm") + (parallel [(const_int 0)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 1)]))) + (plus:HI + (vec_select:HI (match_dup 2) (parallel [(const_int 2)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 3)])))) + (vec_concat:V2HI + (plus:HI + (vec_select:HI (match_dup 2) (parallel [(const_int 4)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 5)]))) + (plus:HI + (vec_select:HI (match_dup 2) (parallel [(const_int 6)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))] + "TARGET_SSSE3" + "phaddw\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "TI")]) + +(define_insn "ssse3_phaddwv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (vec_concat:V4HI + (vec_concat:V2HI + (plus:HI + (vec_select:HI + (match_operand:V4HI 1 "register_operand" "0") + (parallel [(const_int 0)])) + (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) + (plus:HI + (vec_select:HI (match_dup 1) (parallel [(const_int 2)])) + (vec_select:HI (match_dup 1) (parallel [(const_int 3)])))) + (vec_concat:V2HI + (plus:HI + (vec_select:HI + (match_operand:V4HI 2 "nonimmediate_operand" "ym") + (parallel [(const_int 0)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 1)]))) + (plus:HI + (vec_select:HI (match_dup 2) (parallel [(const_int 2)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))] + "TARGET_SSSE3" + "phaddw\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "DI")]) + +(define_insn "ssse3_phadddv4si3" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (vec_concat:V4SI + (vec_concat:V2SI + (plus:SI + (vec_select:SI + (match_operand:V4SI 1 "register_operand" "0") + (parallel [(const_int 0)])) + (vec_select:SI (match_dup 1) (parallel [(const_int 1)]))) + (plus:SI + (vec_select:SI (match_dup 1) (parallel [(const_int 2)])) + (vec_select:SI (match_dup 1) (parallel [(const_int 3)])))) + (vec_concat:V2SI + (plus:SI + (vec_select:SI + (match_operand:V4SI 2 "nonimmediate_operand" "xm") + (parallel [(const_int 0)])) + (vec_select:SI (match_dup 2) (parallel [(const_int 1)]))) + (plus:SI + (vec_select:SI (match_dup 2) (parallel [(const_int 2)])) + (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))] + "TARGET_SSSE3" + "phaddd\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "TI")]) + +(define_insn "ssse3_phadddv2si3" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (vec_concat:V2SI + (plus:SI + (vec_select:SI + (match_operand:V2SI 1 "register_operand" "0") + (parallel [(const_int 0)])) + (vec_select:SI (match_dup 1) (parallel [(const_int 1)]))) + (plus:SI + (vec_select:SI + (match_operand:V2SI 2 "nonimmediate_operand" "ym") + (parallel [(const_int 0)])) + (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))] + "TARGET_SSSE3" + "phaddd\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "DI")]) + +(define_insn "ssse3_phaddswv8hi3" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (vec_concat:V8HI + (vec_concat:V4HI + (vec_concat:V2HI + (ss_plus:HI + (vec_select:HI + (match_operand:V8HI 1 "register_operand" "0") + (parallel [(const_int 0)])) + (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) + (ss_plus:HI + (vec_select:HI (match_dup 1) (parallel [(const_int 2)])) + (vec_select:HI (match_dup 1) (parallel [(const_int 3)])))) + (vec_concat:V2HI + (ss_plus:HI + (vec_select:HI (match_dup 1) (parallel [(const_int 4)])) + (vec_select:HI (match_dup 1) (parallel [(const_int 5)]))) + (ss_plus:HI + (vec_select:HI (match_dup 1) (parallel [(const_int 6)])) + (vec_select:HI (match_dup 1) (parallel [(const_int 7)]))))) + (vec_concat:V4HI + (vec_concat:V2HI + (ss_plus:HI + (vec_select:HI + (match_operand:V8HI 2 "nonimmediate_operand" "xm") + (parallel [(const_int 0)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 1)]))) + (ss_plus:HI + (vec_select:HI (match_dup 2) (parallel [(const_int 2)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 3)])))) + (vec_concat:V2HI + (ss_plus:HI + (vec_select:HI (match_dup 2) (parallel [(const_int 4)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 5)]))) + (ss_plus:HI + (vec_select:HI (match_dup 2) (parallel [(const_int 6)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))] + "TARGET_SSSE3" + "phaddsw\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "TI")]) + +(define_insn "ssse3_phaddswv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (vec_concat:V4HI + (vec_concat:V2HI + (ss_plus:HI + (vec_select:HI + (match_operand:V4HI 1 "register_operand" "0") + (parallel [(const_int 0)])) + (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) + (ss_plus:HI + (vec_select:HI (match_dup 1) (parallel [(const_int 2)])) + (vec_select:HI (match_dup 1) (parallel [(const_int 3)])))) + (vec_concat:V2HI + (ss_plus:HI + (vec_select:HI + (match_operand:V4HI 2 "nonimmediate_operand" "ym") + (parallel [(const_int 0)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 1)]))) + (ss_plus:HI + (vec_select:HI (match_dup 2) (parallel [(const_int 2)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))] + "TARGET_SSSE3" + "phaddsw\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "DI")]) + +(define_insn "ssse3_phsubwv8hi3" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (vec_concat:V8HI + (vec_concat:V4HI + (vec_concat:V2HI + (minus:HI + (vec_select:HI + (match_operand:V8HI 1 "register_operand" "0") + (parallel [(const_int 0)])) + (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) + (minus:HI + (vec_select:HI (match_dup 1) (parallel [(const_int 2)])) + (vec_select:HI (match_dup 1) (parallel [(const_int 3)])))) + (vec_concat:V2HI + (minus:HI + (vec_select:HI (match_dup 1) (parallel [(const_int 4)])) + (vec_select:HI (match_dup 1) (parallel [(const_int 5)]))) + (minus:HI + (vec_select:HI (match_dup 1) (parallel [(const_int 6)])) + (vec_select:HI (match_dup 1) (parallel [(const_int 7)]))))) + (vec_concat:V4HI + (vec_concat:V2HI + (minus:HI + (vec_select:HI + (match_operand:V8HI 2 "nonimmediate_operand" "xm") + (parallel [(const_int 0)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 1)]))) + (minus:HI + (vec_select:HI (match_dup 2) (parallel [(const_int 2)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 3)])))) + (vec_concat:V2HI + (minus:HI + (vec_select:HI (match_dup 2) (parallel [(const_int 4)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 5)]))) + (minus:HI + (vec_select:HI (match_dup 2) (parallel [(const_int 6)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))] + "TARGET_SSSE3" + "phsubw\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "TI")]) + +(define_insn "ssse3_phsubwv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (vec_concat:V4HI + (vec_concat:V2HI + (minus:HI + (vec_select:HI + (match_operand:V4HI 1 "register_operand" "0") + (parallel [(const_int 0)])) + (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) + (minus:HI + (vec_select:HI (match_dup 1) (parallel [(const_int 2)])) + (vec_select:HI (match_dup 1) (parallel [(const_int 3)])))) + (vec_concat:V2HI + (minus:HI + (vec_select:HI + (match_operand:V4HI 2 "nonimmediate_operand" "ym") + (parallel [(const_int 0)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 1)]))) + (minus:HI + (vec_select:HI (match_dup 2) (parallel [(const_int 2)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))] + "TARGET_SSSE3" + "phsubw\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "DI")]) + +(define_insn "ssse3_phsubdv4si3" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (vec_concat:V4SI + (vec_concat:V2SI + (minus:SI + (vec_select:SI + (match_operand:V4SI 1 "register_operand" "0") + (parallel [(const_int 0)])) + (vec_select:SI (match_dup 1) (parallel [(const_int 1)]))) + (minus:SI + (vec_select:SI (match_dup 1) (parallel [(const_int 2)])) + (vec_select:SI (match_dup 1) (parallel [(const_int 3)])))) + (vec_concat:V2SI + (minus:SI + (vec_select:SI + (match_operand:V4SI 2 "nonimmediate_operand" "xm") + (parallel [(const_int 0)])) + (vec_select:SI (match_dup 2) (parallel [(const_int 1)]))) + (minus:SI + (vec_select:SI (match_dup 2) (parallel [(const_int 2)])) + (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))] + "TARGET_SSSE3" + "phsubd\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "TI")]) + +(define_insn "ssse3_phsubdv2si3" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (vec_concat:V2SI + (minus:SI + (vec_select:SI + (match_operand:V2SI 1 "register_operand" "0") + (parallel [(const_int 0)])) + (vec_select:SI (match_dup 1) (parallel [(const_int 1)]))) + (minus:SI + (vec_select:SI + (match_operand:V2SI 2 "nonimmediate_operand" "ym") + (parallel [(const_int 0)])) + (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))] + "TARGET_SSSE3" + "phsubd\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "DI")]) + +(define_insn "ssse3_phsubswv8hi3" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (vec_concat:V8HI + (vec_concat:V4HI + (vec_concat:V2HI + (ss_minus:HI + (vec_select:HI + (match_operand:V8HI 1 "register_operand" "0") + (parallel [(const_int 0)])) + (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) + (ss_minus:HI + (vec_select:HI (match_dup 1) (parallel [(const_int 2)])) + (vec_select:HI (match_dup 1) (parallel [(const_int 3)])))) + (vec_concat:V2HI + (ss_minus:HI + (vec_select:HI (match_dup 1) (parallel [(const_int 4)])) + (vec_select:HI (match_dup 1) (parallel [(const_int 5)]))) + (ss_minus:HI + (vec_select:HI (match_dup 1) (parallel [(const_int 6)])) + (vec_select:HI (match_dup 1) (parallel [(const_int 7)]))))) + (vec_concat:V4HI + (vec_concat:V2HI + (ss_minus:HI + (vec_select:HI + (match_operand:V8HI 2 "nonimmediate_operand" "xm") + (parallel [(const_int 0)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 1)]))) + (ss_minus:HI + (vec_select:HI (match_dup 2) (parallel [(const_int 2)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 3)])))) + (vec_concat:V2HI + (ss_minus:HI + (vec_select:HI (match_dup 2) (parallel [(const_int 4)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 5)]))) + (ss_minus:HI + (vec_select:HI (match_dup 2) (parallel [(const_int 6)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))] + "TARGET_SSSE3" + "phsubsw\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "TI")]) + +(define_insn "ssse3_phsubswv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (vec_concat:V4HI + (vec_concat:V2HI + (ss_minus:HI + (vec_select:HI + (match_operand:V4HI 1 "register_operand" "0") + (parallel [(const_int 0)])) + (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) + (ss_minus:HI + (vec_select:HI (match_dup 1) (parallel [(const_int 2)])) + (vec_select:HI (match_dup 1) (parallel [(const_int 3)])))) + (vec_concat:V2HI + (ss_minus:HI + (vec_select:HI + (match_operand:V4HI 2 "nonimmediate_operand" "ym") + (parallel [(const_int 0)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 1)]))) + (ss_minus:HI + (vec_select:HI (match_dup 2) (parallel [(const_int 2)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))] + "TARGET_SSSE3" + "phsubsw\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "DI")]) + +(define_insn "ssse3_pmaddubswv8hi3" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (ss_plus:V8HI + (mult:V8HI + (zero_extend:V8HI + (vec_select:V4QI + (match_operand:V16QI 1 "nonimmediate_operand" "%0") + (parallel [(const_int 0) + (const_int 2) + (const_int 4) + (const_int 6) + (const_int 8) + (const_int 10) + (const_int 12) + (const_int 14)]))) + (sign_extend:V8HI + (vec_select:V8QI + (match_operand:V16QI 2 "nonimmediate_operand" "xm") + (parallel [(const_int 0) + (const_int 2) + (const_int 4) + (const_int 6) + (const_int 8) + (const_int 10) + (const_int 12) + (const_int 14)])))) + (mult:V8HI + (zero_extend:V8HI + (vec_select:V16QI (match_dup 1) + (parallel [(const_int 1) + (const_int 3) + (const_int 5) + (const_int 7) + (const_int 9) + (const_int 11) + (const_int 13) + (const_int 15)]))) + (sign_extend:V8HI + (vec_select:V16QI (match_dup 2) + (parallel [(const_int 1) + (const_int 3) + (const_int 5) + (const_int 7) + (const_int 9) + (const_int 11) + (const_int 13) + (const_int 15)]))))))] + "TARGET_SSSE3" + "pmaddubsw\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "TI")]) + +(define_insn "ssse3_pmaddubswv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (ss_plus:V4HI + (mult:V4HI + (zero_extend:V4HI + (vec_select:V4QI + (match_operand:V8QI 1 "nonimmediate_operand" "%0") + (parallel [(const_int 0) + (const_int 2) + (const_int 4) + (const_int 6)]))) + (sign_extend:V4HI + (vec_select:V4QI + (match_operand:V8QI 2 "nonimmediate_operand" "ym") + (parallel [(const_int 0) + (const_int 2) + (const_int 4) + (const_int 6)])))) + (mult:V4HI + (zero_extend:V4HI + (vec_select:V8QI (match_dup 1) + (parallel [(const_int 1) + (const_int 3) + (const_int 5) + (const_int 7)]))) + (sign_extend:V4HI + (vec_select:V8QI (match_dup 2) + (parallel [(const_int 1) + (const_int 3) + (const_int 5) + (const_int 7)]))))))] + "TARGET_SSSE3" + "pmaddubsw\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "DI")]) + +(define_insn "ssse3_pmulhrswv8hi3" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (truncate:V8HI + (lshiftrt:V8SI + (plus:V8SI + (lshiftrt:V8SI + (mult:V8SI + (sign_extend:V8SI + (match_operand:V8HI 1 "nonimmediate_operand" "%0")) + (sign_extend:V8SI + (match_operand:V8HI 2 "nonimmediate_operand" "xm"))) + (const_int 14)) + (const_vector:V8HI [(const_int 1) (const_int 1) + (const_int 1) (const_int 1) + (const_int 1) (const_int 1) + (const_int 1) (const_int 1)])) + (const_int 1))))] + "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V8HImode, operands)" + "pmulhrsw\t{%2, %0|%0, %2}" + [(set_attr "type" "sseimul") + (set_attr "mode" "TI")]) + +(define_insn "ssse3_pmulhrswv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (truncate:V4HI + (lshiftrt:V4SI + (plus:V4SI + (lshiftrt:V4SI + (mult:V4SI + (sign_extend:V4SI + (match_operand:V4HI 1 "nonimmediate_operand" "%0")) + (sign_extend:V4SI + (match_operand:V4HI 2 "nonimmediate_operand" "ym"))) + (const_int 14)) + (const_vector:V4HI [(const_int 1) (const_int 1) + (const_int 1) (const_int 1)])) + (const_int 1))))] + "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)" + "pmulhrsw\t{%2, %0|%0, %2}" + [(set_attr "type" "sseimul") + (set_attr "mode" "DI")]) + +(define_insn "ssse3_pshufbv16qi3" + [(set (match_operand:V16QI 0 "register_operand" "=x") + (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0") + (match_operand:V16QI 2 "nonimmediate_operand" "xm")] + UNSPEC_PSHUFB))] + "TARGET_SSSE3" + "pshufb\t{%2, %0|%0, %2}"; + [(set_attr "type" "sselog1") + (set_attr "mode" "TI")]) + +(define_insn "ssse3_pshufbv8qi3" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0") + (match_operand:V8QI 2 "nonimmediate_operand" "ym")] + UNSPEC_PSHUFB))] + "TARGET_SSSE3" + "pshufb\t{%2, %0|%0, %2}"; + [(set_attr "type" "sselog1") + (set_attr "mode" "DI")]) + +(define_insn "ssse3_psign3" + [(set (match_operand:SSEMODE124 0 "register_operand" "=x") + (unspec:SSEMODE124 [(match_operand:SSEMODE124 1 "register_operand" "0") + (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")] + UNSPEC_PSIGN))] + "TARGET_SSSE3" + "psign\t{%2, %0|%0, %2}"; + [(set_attr "type" "sselog1") + (set_attr "mode" "TI")]) + +(define_insn "ssse3_psign3" + [(set (match_operand:MMXMODEI 0 "register_operand" "=y") + (unspec:MMXMODEI [(match_operand:MMXMODEI 1 "register_operand" "0") + (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")] + UNSPEC_PSIGN))] + "TARGET_SSSE3" + "psign\t{%2, %0|%0, %2}"; + [(set_attr "type" "sselog1") + (set_attr "mode" "DI")]) + +(define_insn "ssse3_palignrti" + [(set (match_operand:TI 0 "register_operand" "=x") + (unspec:TI [(match_operand:TI 1 "register_operand" "0") + (match_operand:TI 2 "nonimmediate_operand" "xm") + (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")] + UNSPEC_PALIGNR))] + "TARGET_SSSE3" +{ + operands[3] = GEN_INT (INTVAL (operands[3]) / 8); + return "palignr\t{%3, %2, %0|%0, %2, %3}"; +} + [(set_attr "type" "sseishft") + (set_attr "mode" "TI")]) + +(define_insn "ssse3_palignrdi" + [(set (match_operand:DI 0 "register_operand" "=y") + (unspec:DI [(match_operand:DI 1 "register_operand" "0") + (match_operand:DI 2 "nonimmediate_operand" "ym") + (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")] + UNSPEC_PALIGNR))] + "TARGET_SSSE3" +{ + operands[3] = GEN_INT (INTVAL (operands[3]) / 8); + return "palignr\t{%3, %2, %0|%0, %2, %3}"; +} + [(set_attr "type" "sseishft") + (set_attr "mode" "DI")]) + +(define_insn "abs2" + [(set (match_operand:SSEMODE124 0 "register_operand" "=x") + (abs:SSEMODE124 (match_operand:SSEMODE124 1 "nonimmediate_operand" "xm")))] + "TARGET_SSSE3" + "pabs\t{%1, %0|%0, %1}"; + [(set_attr "type" "sselog1") + (set_attr "mode" "TI")]) + +(define_insn "abs2" + [(set (match_operand:MMXMODEI 0 "register_operand" "=y") + (abs:MMXMODEI (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))] + "TARGET_SSSE3" + "pabs\t{%1, %0|%0, %1}"; + [(set_attr "type" "sselog1") + (set_attr "mode" "DI")]) diff --git a/gcc/config/i386/tmmintrin.h b/gcc/config/i386/tmmintrin.h new file mode 100644 index 00000000000..e1bedc56135 --- /dev/null +++ b/gcc/config/i386/tmmintrin.h @@ -0,0 +1,448 @@ +/* Copyright (C) 2006 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING. If not, write to + the Free Software Foundation, 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + +/* As a special exception, if you include this header file into source + files compiled by GCC, this header file does not by itself cause + the resulting executable to be covered by the GNU General Public + License. This exception does not however invalidate any other + reasons why the executable file might be covered by the GNU General + Public License. */ + +/* Implemented from the specification included in the Intel C++ Compiler + User Guide and Reference, version 9.1. */ + +#ifndef _TMMINTRIN_H_INCLUDED +#define _TMMINTRIN_H_INCLUDED + +#ifdef __SSSE3__ +#include + +static __inline __m128i __attribute__((__always_inline__)) +_mm_hadd_epi16 (__m128i __X, __m128i __Y) +{ + return (__m128i) __builtin_ia32_phaddw128 ((__v8hi)__X, (__v8hi)__Y); +} + +static __inline __m128i __attribute__((__always_inline__)) +_mm_hadd_epi32 (__m128i __X, __m128i __Y) +{ + return (__m128i) __builtin_ia32_phaddd128 ((__v4si)__X, (__v4si)__Y); +} + +static __inline __m128i __attribute__((__always_inline__)) +_mm_hadds_epi16 (__m128i __X, __m128i __Y) +{ + return (__m128i) __builtin_ia32_phaddsw128 ((__v8hi)__X, (__v8hi)__Y); +} + +static __inline __m64 __attribute__((__always_inline__)) +_mm_hadd_pi16 (__m64 __X, __m64 __Y) +{ + return (__m64) __builtin_ia32_phaddw ((__v4hi)__X, (__v4hi)__Y); +} + +static __inline __m64 __attribute__((__always_inline__)) +_mm_hadd_pi32 (__m64 __X, __m64 __Y) +{ + return (__m64) __builtin_ia32_phaddd ((__v2si)__X, (__v2si)__Y); +} + +static __inline __m64 __attribute__((__always_inline__)) +_mm_hadds_pi16 (__m64 __X, __m64 __Y) +{ + return (__m64) __builtin_ia32_phaddsw ((__v4hi)__X, (__v4hi)__Y); +} + +static __inline __m128i __attribute__((__always_inline__)) +_mm_hsub_epi16 (__m128i __X, __m128i __Y) +{ + return (__m128i) __builtin_ia32_phsubw128 ((__v8hi)__X, (__v8hi)__Y); +} + +static __inline __m128i __attribute__((__always_inline__)) +_mm_hsub_epi32 (__m128i __X, __m128i __Y) +{ + return (__m128i) __builtin_ia32_phsubd128 ((__v4si)__X, (__v4si)__Y); +} + +static __inline __m128i __attribute__((__always_inline__)) +_mm_hsubs_epi16 (__m128i __X, __m128i __Y) +{ + return (__m128i) __builtin_ia32_phsubsw128 ((__v8hi)__X, (__v8hi)__Y); +} + +static __inline __m64 __attribute__((__always_inline__)) +_mm_hsub_pi16 (__m64 __X, __m64 __Y) +{ + return (__m64) __builtin_ia32_phsubw ((__v4hi)__X, (__v4hi)__Y); +} + +static __inline __m64 __attribute__((__always_inline__)) +_mm_hsub_pi32 (__m64 __X, __m64 __Y) +{ + return (__m64) __builtin_ia32_phsubd ((__v2si)__X, (__v2si)__Y); +} + +static __inline __m64 __attribute__((__always_inline__)) +_mm_hsubs_pi16 (__m64 __X, __m64 __Y) +{ + return (__m64) __builtin_ia32_phsubsw ((__v4hi)__X, (__v4hi)__Y); +} + +static __inline __m128i __attribute__((__always_inline__)) +_mm_maddubs_epi16 (__m128i __X, __m128i __Y) +{ + return (__m128i) __builtin_ia32_pmaddubsw128 ((__v16qi)__X, (__v16qi)__Y); +} + +static __inline __m64 __attribute__((__always_inline__)) +_mm_maddubs_pi16 (__m64 __X, __m64 __Y) +{ + return (__m64) __builtin_ia32_pmaddubsw ((__v8qi)__X, (__v8qi)__Y); +} + +static __inline __m128i __attribute__((__always_inline__)) +_mm_mulhrs_epi16 (__m128i __X, __m128i __Y) +{ + return (__m128i) __builtin_ia32_pmulhrsw128 ((__v8hi)__X, (__v8hi)__Y); +} + +static __inline __m64 __attribute__((__always_inline__)) +_mm_mulhrs_pi16 (__m64 __X, __m64 __Y) +{ + return (__m64) __builtin_ia32_pmulhrsw ((__v4hi)__X, (__v4hi)__Y); +} + +static __inline __m128i __attribute__((__always_inline__)) +_mm_shuffle_epi8 (__m128i __X, __m128i __Y) +{ + return (__m128i) __builtin_ia32_pshufb128 ((__v16qi)__X, (__v16qi)__Y); +} + +static __inline __m64 __attribute__((__always_inline__)) +_mm_shuffle_pi8 (__m64 __X, __m64 __Y) +{ + return (__m64) __builtin_ia32_pshufb ((__v8qi)__X, (__v8qi)__Y); +} + +static __inline __m128i __attribute__((__always_inline__)) +_mm_sign_epi8 (__m128i __X, __m128i __Y) +{ + return (__m128i) __builtin_ia32_psignb128 ((__v16qi)__X, (__v16qi)__Y); +} + +static __inline __m128i __attribute__((__always_inline__)) +_mm_sign_epi16 (__m128i __X, __m128i __Y) +{ + return (__m128i) __builtin_ia32_psignw128 ((__v8hi)__X, (__v8hi)__Y); +} + +static __inline __m128i __attribute__((__always_inline__)) +_mm_sign_epi32 (__m128i __X, __m128i __Y) +{ + return (__m128i) __builtin_ia32_psignd128 ((__v4si)__X, (__v4si)__Y); +} + +static __inline __m64 __attribute__((__always_inline__)) +_mm_sign_pi8 (__m64 __X, __m64 __Y) +{ + return (__m64) __builtin_ia32_psignb ((__v8qi)__X, (__v8qi)__Y); +} + +static __inline __m64 __attribute__((__always_inline__)) +_mm_sign_pi16 (__m64 __X, __m64 __Y) +{ + return (__m64) __builtin_ia32_psignw ((__v4hi)__X, (__v4hi)__Y); +} + +static __inline __m64 __attribute__((__always_inline__)) +_mm_sign_pi32 (__m64 __X, __m64 __Y) +{ + return (__m64) __builtin_ia32_psignd ((__v2si)__X, (__v2si)__Y); +} + +#define _mm_alignr_epi8(__X, __Y, __N) \ + ((__m128i)__builtin_ia32_palignr128 ((__v2di) __X, (__v2di) __Y, (__N) * 8)) + +#define _mm_alignr_pi8(__X, __Y, __N) \ + ((__m64)__builtin_ia32_palignr ((long long) (__X), (long long) (__Y), (__N) * 8)) + +static __inline __m128i __attribute__((__always_inline__)) +_mm_abs_epi8 (__m128i __X) +{ + return (__m128i) __builtin_ia32_pabsb128 ((__v16qi)__X); +} + +static __inline __m128i __attribute__((__always_inline__)) +_mm_abs_epi16 (__m128i __X) +{ + return (__m128i) __builtin_ia32_pabsw128 ((__v8hi)__X); +} + +static __inline __m128i __attribute__((__always_inline__)) +_mm_abs_epi32 (__m128i __X) +{ + return (__m128i) __builtin_ia32_pabsd128 ((__v4si)__X); +} + +static __inline __m64 __attribute__((__always_inline__)) +_mm_abs_pi8 (__m64 __X) +{ + return (__m64) __builtin_ia32_pabsb ((__v8qi)__X); +} + +static __inline __m64 __attribute__((__always_inline__)) +_mm_abs_pi16 (__m64 __X) +{ + return (__m64) __builtin_ia32_pabsw ((__v4hi)__X); +} + +static __inline __m64 __attribute__((__always_inline__)) +_mm_abs_pi32 (__m64 __X) +{ + return (__m64) __builtin_ia32_pabsd ((__v2si)__X); +} + +#endif /* __SSSE3__ */ + +#endif /* _TMMINTRIN_H_INCLUDED */ +/* Copyright (C) 2006 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING. If not, write to + the Free Software Foundation, 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + +/* As a special exception, if you include this header file into source + files compiled by GCC, this header file does not by itself cause + the resulting executable to be covered by the GNU General Public + License. This exception does not however invalidate any other + reasons why the executable file might be covered by the GNU General + Public License. */ + +/* Implemented from the specification included in the Intel C++ Compiler + User Guide and Reference, version 9.1. */ + +#ifndef _TMMINTRIN_H_INCLUDED +#define _TMMINTRIN_H_INCLUDED + +#ifdef __SSSE3__ +#include + +static __inline __m128i __attribute__((__always_inline__)) +_mm_hadd_epi16 (__m128i __X, __m128i __Y) +{ + return (__m128i) __builtin_ia32_phaddw128 ((__v8hi)__X, (__v8hi)__Y); +} + +static __inline __m128i __attribute__((__always_inline__)) +_mm_hadd_epi32 (__m128i __X, __m128i __Y) +{ + return (__m128i) __builtin_ia32_phaddd128 ((__v4si)__X, (__v4si)__Y); +} + +static __inline __m128i __attribute__((__always_inline__)) +_mm_hadds_epi16 (__m128i __X, __m128i __Y) +{ + return (__m128i) __builtin_ia32_phaddsw128 ((__v8hi)__X, (__v8hi)__Y); +} + +static __inline __m64 __attribute__((__always_inline__)) +_mm_hadd_pi16 (__m64 __X, __m64 __Y) +{ + return (__m64) __builtin_ia32_phaddw ((__v4hi)__X, (__v4hi)__Y); +} + +static __inline __m64 __attribute__((__always_inline__)) +_mm_hadd_pi32 (__m64 __X, __m64 __Y) +{ + return (__m64) __builtin_ia32_phaddd ((__v2si)__X, (__v2si)__Y); +} + +static __inline __m64 __attribute__((__always_inline__)) +_mm_hadds_pi16 (__m64 __X, __m64 __Y) +{ + return (__m64) __builtin_ia32_phaddsw ((__v4hi)__X, (__v4hi)__Y); +} + +static __inline __m128i __attribute__((__always_inline__)) +_mm_hsub_epi16 (__m128i __X, __m128i __Y) +{ + return (__m128i) __builtin_ia32_phsubw128 ((__v8hi)__X, (__v8hi)__Y); +} + +static __inline __m128i __attribute__((__always_inline__)) +_mm_hsub_epi32 (__m128i __X, __m128i __Y) +{ + return (__m128i) __builtin_ia32_phsubd128 ((__v4si)__X, (__v4si)__Y); +} + +static __inline __m128i __attribute__((__always_inline__)) +_mm_hsubs_epi16 (__m128i __X, __m128i __Y) +{ + return (__m128i) __builtin_ia32_phsubsw128 ((__v8hi)__X, (__v8hi)__Y); +} + +static __inline __m64 __attribute__((__always_inline__)) +_mm_hsub_pi16 (__m64 __X, __m64 __Y) +{ + return (__m64) __builtin_ia32_phsubw ((__v4hi)__X, (__v4hi)__Y); +} + +static __inline __m64 __attribute__((__always_inline__)) +_mm_hsub_pi32 (__m64 __X, __m64 __Y) +{ + return (__m64) __builtin_ia32_phsubd ((__v2si)__X, (__v2si)__Y); +} + +static __inline __m64 __attribute__((__always_inline__)) +_mm_hsubs_pi16 (__m64 __X, __m64 __Y) +{ + return (__m64) __builtin_ia32_phsubsw ((__v4hi)__X, (__v4hi)__Y); +} + +static __inline __m128i __attribute__((__always_inline__)) +_mm_maddubs_epi16 (__m128i __X, __m128i __Y) +{ + return (__m128i) __builtin_ia32_pmaddubsw128 ((__v16qi)__X, (__v16qi)__Y); +} + +static __inline __m64 __attribute__((__always_inline__)) +_mm_maddubs_pi16 (__m64 __X, __m64 __Y) +{ + return (__m64) __builtin_ia32_pmaddubsw ((__v8qi)__X, (__v8qi)__Y); +} + +static __inline __m128i __attribute__((__always_inline__)) +_mm_mulhrs_epi16 (__m128i __X, __m128i __Y) +{ + return (__m128i) __builtin_ia32_pmulhrsw128 ((__v8hi)__X, (__v8hi)__Y); +} + +static __inline __m64 __attribute__((__always_inline__)) +_mm_mulhrs_pi16 (__m64 __X, __m64 __Y) +{ + return (__m64) __builtin_ia32_pmulhrsw ((__v4hi)__X, (__v4hi)__Y); +} + +static __inline __m128i __attribute__((__always_inline__)) +_mm_shuffle_epi8 (__m128i __X, __m128i __Y) +{ + return (__m128i) __builtin_ia32_pshufb128 ((__v16qi)__X, (__v16qi)__Y); +} + +static __inline __m64 __attribute__((__always_inline__)) +_mm_shuffle_pi8 (__m64 __X, __m64 __Y) +{ + return (__m64) __builtin_ia32_pshufb ((__v8qi)__X, (__v8qi)__Y); +} + +static __inline __m128i __attribute__((__always_inline__)) +_mm_sign_epi8 (__m128i __X, __m128i __Y) +{ + return (__m128i) __builtin_ia32_psignb128 ((__v16qi)__X, (__v16qi)__Y); +} + +static __inline __m128i __attribute__((__always_inline__)) +_mm_sign_epi16 (__m128i __X, __m128i __Y) +{ + return (__m128i) __builtin_ia32_psignw128 ((__v8hi)__X, (__v8hi)__Y); +} + +static __inline __m128i __attribute__((__always_inline__)) +_mm_sign_epi32 (__m128i __X, __m128i __Y) +{ + return (__m128i) __builtin_ia32_psignd128 ((__v4si)__X, (__v4si)__Y); +} + +static __inline __m64 __attribute__((__always_inline__)) +_mm_sign_pi8 (__m64 __X, __m64 __Y) +{ + return (__m64) __builtin_ia32_psignb ((__v8qi)__X, (__v8qi)__Y); +} + +static __inline __m64 __attribute__((__always_inline__)) +_mm_sign_pi16 (__m64 __X, __m64 __Y) +{ + return (__m64) __builtin_ia32_psignw ((__v4hi)__X, (__v4hi)__Y); +} + +static __inline __m64 __attribute__((__always_inline__)) +_mm_sign_pi32 (__m64 __X, __m64 __Y) +{ + return (__m64) __builtin_ia32_psignd ((__v2si)__X, (__v2si)__Y); +} + +#define _mm_alignr_epi8(__X, __Y, __N) \ + ((__m128i)__builtin_ia32_palignr128 ((__v2di) __X, (__v2di) __Y, (__N) * 8)) + +#define _mm_alignr_pi8(__X, __Y, __N) \ + ((__m64)__builtin_ia32_palignr ((long long) (__X), (long long) (__Y), (__N) * 8)) + +static __inline __m128i __attribute__((__always_inline__)) +_mm_abs_epi8 (__m128i __X) +{ + return (__m128i) __builtin_ia32_pabsb128 ((__v16qi)__X); +} + +static __inline __m128i __attribute__((__always_inline__)) +_mm_abs_epi16 (__m128i __X) +{ + return (__m128i) __builtin_ia32_pabsw128 ((__v8hi)__X); +} + +static __inline __m128i __attribute__((__always_inline__)) +_mm_abs_epi32 (__m128i __X) +{ + return (__m128i) __builtin_ia32_pabsd128 ((__v4si)__X); +} + +static __inline __m64 __attribute__((__always_inline__)) +_mm_abs_pi8 (__m64 __X) +{ + return (__m64) __builtin_ia32_pabsb ((__v8qi)__X); +} + +static __inline __m64 __attribute__((__always_inline__)) +_mm_abs_pi16 (__m64 __X) +{ + return (__m64) __builtin_ia32_pabsw ((__v4hi)__X); +} + +static __inline __m64 __attribute__((__always_inline__)) +_mm_abs_pi32 (__m64 __X) +{ + return (__m64) __builtin_ia32_pabsd ((__v2si)__X); +} + +#endif /* __SSSE3__ */ + +#endif /* _TMMINTRIN_H_INCLUDED */ diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi index 5df92a3c08f..a12879ce5ba 100644 --- a/gcc/doc/extend.texi +++ b/gcc/doc/extend.texi @@ -7169,6 +7169,52 @@ The following built-in functions are available when @option{-msse3} is used. Generates the @code{movddup} machine instruction as a load from memory. @end table +The following built-in functions are available when @option{-mssse3} is used. +All of them generate the machine instruction that is part of the name +with MMX registers. + +@smallexample +v2si __builtin_ia32_phaddd (v2si, v2si) +v4hi __builtin_ia32_phaddw (v4hi, v4hi) +v4hi __builtin_ia32_phaddsw (v4hi, v4hi) +v2si __builtin_ia32_phsubd (v2si, v2si) +v4hi __builtin_ia32_phsubw (v4hi, v4hi) +v4hi __builtin_ia32_phsubsw (v4hi, v4hi) +v8qi __builtin_ia32_pmaddubsw (v8qi, v8qi) +v4hi __builtin_ia32_pmulhrsw (v4hi, v4hi) +v8qi __builtin_ia32_pshufb (v8qi, v8qi) +v8qi __builtin_ia32_psignb (v8qi, v8qi) +v2si __builtin_ia32_psignd (v2si, v2si) +v4hi __builtin_ia32_psignw (v4hi, v4hi) +long long __builtin_ia32_palignr (long long, long long, int) +v8qi __builtin_ia32_pabsb (v8qi) +v2si __builtin_ia32_pabsd (v2si) +v4hi __builtin_ia32_pabsw (v4hi) +@end smallexample + +The following built-in functions are available when @option{-mssse3} is used. +All of them generate the machine instruction that is part of the name +with SSE registers. + +@smallexample +v4si __builtin_ia32_phaddd128 (v4si, v4si) +v8hi __builtin_ia32_phaddw128 (v8hi, v8hi) +v8hi __builtin_ia32_phaddsw128 (v8hi, v8hi) +v4si __builtin_ia32_phsubd128 (v4si, v4si) +v8hi __builtin_ia32_phsubw128 (v8hi, v8hi) +v8hi __builtin_ia32_phsubsw128 (v8hi, v8hi) +v16qi __builtin_ia32_pmaddubsw128 (v16qi, v16qi) +v8hi __builtin_ia32_pmulhrsw128 (v8hi, v8hi) +v16qi __builtin_ia32_pshufb128 (v16qi, v16qi) +v16qi __builtin_ia32_psignb128 (v16qi, v16qi) +v4si __builtin_ia32_psignd128 (v4si, v4si) +v8hi __builtin_ia32_psignw128 (v8hi, v8hi) +v2di __builtin_ia32_palignr (v2di, v2di, int) +v16qi __builtin_ia32_pabsb128 (v16qi) +v4si __builtin_ia32_pabsd128 (v4si) +v8hi __builtin_ia32_pabsw128 (v8hi) +@end smallexample + The following built-in functions are available when @option{-m3dnow} is used. All of them generate the machine instruction that is part of the name. diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 15666caae2d..a5209927e59 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -533,7 +533,7 @@ Objective-C and Objective-C++ Dialects}. -mno-fp-ret-in-387 -msoft-float -msvr3-shlib @gol -mno-wide-multiply -mrtd -malign-double @gol -mpreferred-stack-boundary=@var{num} @gol --mmmx -msse -msse2 -msse3 -m3dnow @gol +-mmmx -msse -msse2 -msse3 -mssse3 -m3dnow @gol -mthreads -mno-align-stringops -minline-all-stringops @gol -mpush-args -maccumulate-outgoing-args -m128bit-long-double @gol -m96bit-long-double -mregparm=@var{num} -msseregparm @gol @@ -9568,6 +9568,8 @@ preferred alignment to @option{-mpreferred-stack-boundary=2}. @itemx -mno-sse2 @item -msse3 @itemx -mno-sse3 +@item -mssse3 +@itemx -mno-ssse3 @item -m3dnow @itemx -mno-3dnow @opindex mmmx @@ -9577,9 +9579,10 @@ preferred alignment to @option{-mpreferred-stack-boundary=2}. @opindex m3dnow @opindex mno-3dnow These switches enable or disable the use of instructions in the MMX, -SSE, SSE2 or 3DNow! extended instruction sets. These extensions are -also available as built-in functions: see @ref{X86 Built-in Functions}, -for details of the functions enabled and disabled by these switches. +SSE, SSE2, SSE3, SSSE3 or 3DNow! extended instruction sets. +These extensions are also available as built-in functions: see +@ref{X86 Built-in Functions}, for details of the functions enabled and +disabled by these switches. To have SSE/SSE2 instructions generated automatically from floating-point code (as opposed to 387 instructions), see @option{-mfpmath=sse}.