+2012-09-15 Uros Bizjak <ubizjak@gmail.com>
+
+ Backport from mainline
+ 2012-10-15 Uros Bizjak <ubizjak@gmail.com>
+
+ * config/i386/sse.md (UNSPEC_MOVU): Remove.
+ (UNSPEC_LOADU): New.
+ (UNSPEC_STOREU): Ditto.
+ (<sse>_movu<ssemodesuffix><avxsizesuffix>): Split to ...
+ (<sse>_loadu<ssemodesuffix><avxsizesuffix>): ... this and ...
+ (<sse>_storeu<ssemodesuffix><avxsizesuffix>) ... this.
+ (<sse2>_movdqu<avxsizesuffix>): Split to ...
+ (<sse2>_loaddqu<avxsizesuffix>): ... this and ...
+ (<sse2>_storedqu<avxsizesuffix>): ... this.
+ (*sse4_2_pcmpestr_unaligned): Update.
+ (*sse4_2_pcmpistr_unaligned): Ditto.
+
+ * config/i386/i386.c (ix86_avx256_split_vector_move_misalign): Use
+ gen_avx_load{dqu,ups,upd}256 to load from unaligned memory and
+ gen_avx_store{dqu,ups,upd}256 to store to unaligned memory.
+ (ix86_expand_vector_move_misalign): Use gen_sse_loadups or
+ gen_sse2_load{dqu,upd} to load from unaligned memory and
+ gen_sse_loadups or gen_sse2_store{dqu,upd}256 to store to
+ unaligned memory.
+ (struct builtin_description bdesc_spec) <IX86_BUILTIN_LOADUPS>:
+ Use CODE_FOR_sse_loadups.
+ <IX86_BUILTIN_LOADUPD>: Use CODE_FOR_sse2_loadupd.
+ <IX86_BUILTIN_LOADDQU>: Use CODE_FOR_sse2_loaddqu.
+ <IX86_BUILTIN_STOREUPS>: Use CODE_FOR_sse_storeups.
+ <IX86_BUILTIN_STOREUPD>: Use CODE_FOR_sse2_storeupd.
+ <IX86_BUILTIN_STOREDQU>: Use CODE_FOR_sse2_storedqu.
+ <IX86_BUILTIN_LOADUPS256>: Use CODE_FOR_avx_loadups256.
+ <IX86_BUILTIN_LOADUPD256>: Use CODE_FOR_avx_loadupd256.
+ <IX86_BUILTIN_LOADDQU256>: Use CODE_FOR_avx_loaddqu256.
+ <IX86_BUILTIN_STOREUPS256>: Use CODE_FOR_avx_storeups256.
+ <IX86_BUILTIN_STOREUPD256>: Use CODE_FOR_avx_storeupd256.
+ <IX86_BUILTIN_STOREDQU256>: Use CODE_FOR_avx_storedqu256.
+
2012-10-15 Steven Bosscher <steven@gcc.gnu.org>
Backport from trunk (r190222):
* config.gcc (*-*-openbsd4.[3-9]|*-*-openbsd[5-9]*): Set
default_use_cxa_atexit to yes.
-
+
2012-10-05 John David Anglin <dave.anglin@nrc-cnrc.gc.ca>
* config/pa/pa.md: Adjust unamed HImode add insn pattern.
Backported from mainline
2012-10-03 Andrew W. Nosenko <andrew.w.nosenko@gmail.com>
- * config/i386/driver-i386.c (host_detect_local_cpu): Fix logic
+ * config/i386/driver-i386.c (host_detect_local_cpu): Fix logic
in SSE and YMM state support check for -march=native.
2012-10-03 Alexandre Oliva <aoliva@redhat.com>
PR target/54703
* simplify-rtx.c (simplify_binary_operation_1): Perform
- (x - (x & y)) -> (x & ~y) optimization only for integral
- modes.
+ (x - (x & y)) -> (x & ~y) optimization only for integral modes.
2012-09-24 Eric Botcazou <ebotcazou@adacore.com>
Backport from mainline
2012-09-07 Andi Kleen <ak@linux.intel.com>
- * gcc/lto-streamer.h (res_pair): Add.
- (lto_file_decl_data): Replace resolutions with respairs.
- Add max_index.
- * gcc/lto/lto.c (lto_resolution_read): Remove max_index. Add rp.
- Initialize respairs.
- (lto_file_finalize): Set up resolutions vector lazily from respairs.
+ * gcc/lto-streamer.h (res_pair): Add.
+ (lto_file_decl_data): Replace resolutions with respairs.
+ Add max_index.
+ * gcc/lto/lto.c (lto_resolution_read): Remove max_index. Add rp.
+ Initialize respairs.
+ (lto_file_finalize): Set up resolutions vector lazily from respairs.
2012-09-14 Walter Lee <walt@tilera.com>
2012-09-12 Christian Bruel <christian.bruel@st.com>
- * config/sh/newlib.h (NO_IMPLICIT_EXTERN_C): Define.
+ * config/sh/newlib.h (NO_IMPLICIT_EXTERN_C): Define.
2012-09-12 Jakub Jelinek <jakub@redhat.com>
{
rtx m;
rtx (*extract) (rtx, rtx, rtx);
- rtx (*move_unaligned) (rtx, rtx);
+ rtx (*load_unaligned) (rtx, rtx);
+ rtx (*store_unaligned) (rtx, rtx);
enum machine_mode mode;
switch (GET_MODE (op0))
gcc_unreachable ();
case V32QImode:
extract = gen_avx_vextractf128v32qi;
- move_unaligned = gen_avx_movdqu256;
+ load_unaligned = gen_avx_loaddqu256;
+ store_unaligned = gen_avx_storedqu256;
mode = V16QImode;
break;
case V8SFmode:
extract = gen_avx_vextractf128v8sf;
- move_unaligned = gen_avx_movups256;
+ load_unaligned = gen_avx_loadups256;
+ store_unaligned = gen_avx_storeups256;
mode = V4SFmode;
break;
case V4DFmode:
extract = gen_avx_vextractf128v4df;
- move_unaligned = gen_avx_movupd256;
+ load_unaligned = gen_avx_loadupd256;
+ store_unaligned = gen_avx_storeupd256;
mode = V2DFmode;
break;
}
- if (MEM_P (op1) && TARGET_AVX256_SPLIT_UNALIGNED_LOAD)
+ if (MEM_P (op1))
{
- rtx r = gen_reg_rtx (mode);
- m = adjust_address (op1, mode, 0);
- emit_move_insn (r, m);
- m = adjust_address (op1, mode, 16);
- r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
- emit_move_insn (op0, r);
+ if (TARGET_AVX256_SPLIT_UNALIGNED_LOAD)
+ {
+ rtx r = gen_reg_rtx (mode);
+ m = adjust_address (op1, mode, 0);
+ emit_move_insn (r, m);
+ m = adjust_address (op1, mode, 16);
+ r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
+ emit_move_insn (op0, r);
+ }
+ else
+ emit_insn (load_unaligned (op0, op1));
}
- else if (MEM_P (op0) && TARGET_AVX256_SPLIT_UNALIGNED_STORE)
+ else if (MEM_P (op0))
{
- m = adjust_address (op0, mode, 0);
- emit_insn (extract (m, op1, const0_rtx));
- m = adjust_address (op0, mode, 16);
- emit_insn (extract (m, op1, const1_rtx));
+ if (TARGET_AVX256_SPLIT_UNALIGNED_STORE)
+ {
+ m = adjust_address (op0, mode, 0);
+ emit_insn (extract (m, op1, const0_rtx));
+ m = adjust_address (op0, mode, 16);
+ emit_insn (extract (m, op1, const1_rtx));
+ }
+ else
+ emit_insn (store_unaligned (op0, op1));
}
else
- emit_insn (move_unaligned (op0, op1));
+ gcc_unreachable ();
}
/* Implement the movmisalign patterns for SSE. Non-SSE modes go
ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
{
rtx op0, op1, m;
+ rtx (*move_unaligned) (rtx, rtx);
op0 = operands[0];
op1 = operands[1];
/* If we're optimizing for size, movups is the smallest. */
if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
{
+ if (MEM_P (op1))
+ move_unaligned = gen_sse_loadups;
+ else if (MEM_P (op0))
+ move_unaligned = gen_sse_storeups;
+ else
+ gcc_unreachable ();
+
op0 = gen_lowpart (V4SFmode, op0);
op1 = gen_lowpart (V4SFmode, op1);
- emit_insn (gen_sse_movups (op0, op1));
+ emit_insn (move_unaligned (op0, op1));
return;
}
+ if (MEM_P (op1))
+ move_unaligned = gen_sse2_loaddqu;
+ else if (MEM_P (op0))
+ move_unaligned = gen_sse2_storedqu;
+ else
+ gcc_unreachable ();
+
op0 = gen_lowpart (V16QImode, op0);
op1 = gen_lowpart (V16QImode, op1);
- emit_insn (gen_sse2_movdqu (op0, op1));
+ emit_insn (move_unaligned (op0, op1));
break;
case 32:
op0 = gen_lowpart (V32QImode, op0);
switch (mode)
{
case V4SFmode:
- emit_insn (gen_sse_movups (op0, op1));
+ if (MEM_P (op1))
+ move_unaligned = gen_sse_loadups;
+ else if (MEM_P (op0))
+ move_unaligned = gen_sse_storeups;
+ else
+ gcc_unreachable ();
+
+ emit_insn (move_unaligned (op0, op1));
break;
case V8SFmode:
ix86_avx256_split_vector_move_misalign (op0, op1);
case V2DFmode:
if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
{
+ if (MEM_P (op1))
+ move_unaligned = gen_sse_loadups;
+ else if (MEM_P (op0))
+ move_unaligned = gen_sse_storeups;
+ else
+ gcc_unreachable ();
+
op0 = gen_lowpart (V4SFmode, op0);
op1 = gen_lowpart (V4SFmode, op1);
- emit_insn (gen_sse_movups (op0, op1));
+ emit_insn (move_unaligned (op0, op1));
return;
}
- emit_insn (gen_sse2_movupd (op0, op1));
+ if (MEM_P (op1))
+ move_unaligned = gen_sse2_loadupd;
+ else if (MEM_P (op0))
+ move_unaligned = gen_sse2_storeupd;
+ else
+ gcc_unreachable ();
+
+ emit_insn (move_unaligned (op0, op1));
break;
case V4DFmode:
ix86_avx256_split_vector_move_misalign (op0, op1);
{
op0 = gen_lowpart (V4SFmode, op0);
op1 = gen_lowpart (V4SFmode, op1);
- emit_insn (gen_sse_movups (op0, op1));
+ emit_insn (gen_sse_loadups (op0, op1));
return;
}
{
op0 = gen_lowpart (V16QImode, op0);
op1 = gen_lowpart (V16QImode, op1);
- emit_insn (gen_sse2_movdqu (op0, op1));
+ emit_insn (gen_sse2_loaddqu (op0, op1));
return;
}
{
op0 = gen_lowpart (V2DFmode, op0);
op1 = gen_lowpart (V2DFmode, op1);
- emit_insn (gen_sse2_movupd (op0, op1));
+ emit_insn (gen_sse2_loadupd (op0, op1));
return;
}
{
op0 = gen_lowpart (V4SFmode, op0);
op1 = gen_lowpart (V4SFmode, op1);
- emit_insn (gen_sse_movups (op0, op1));
+ emit_insn (gen_sse_loadups (op0, op1));
return;
}
{
op0 = gen_lowpart (V4SFmode, op0);
op1 = gen_lowpart (V4SFmode, op1);
- emit_insn (gen_sse_movups (op0, op1));
+ emit_insn (gen_sse_storeups (op0, op1));
return;
}
{
op0 = gen_lowpart (V16QImode, op0);
op1 = gen_lowpart (V16QImode, op1);
- emit_insn (gen_sse2_movdqu (op0, op1));
+ emit_insn (gen_sse2_storedqu (op0, op1));
return;
}
{
op0 = gen_lowpart (V2DFmode, op0);
op1 = gen_lowpart (V2DFmode, op1);
- emit_insn (gen_sse2_movupd (op0, op1));
+ emit_insn (gen_sse2_storeupd (op0, op1));
}
else
{
if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
{
op0 = gen_lowpart (V4SFmode, op0);
- emit_insn (gen_sse_movups (op0, op1));
+ emit_insn (gen_sse_storeups (op0, op1));
}
else
{
{ OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
/* SSE */
- { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
+ { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storeups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
{ OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
- { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
+ { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
{ OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
{ OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
/* SSE2 */
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
- { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
- { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storeupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storedqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntisi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
{ OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_movntidi, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64, UNKNOWN, (int) VOID_FTYPE_PLONGLONG_LONGLONG },
- { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
- { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loaddqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
- { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
- { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
- { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
- { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
- { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
- { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loaddqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storedqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },