X-Git-Url: http://git.sourceforge.jp/view?p=pf3gnuchains%2Fgcc-fork.git;a=blobdiff_plain;f=gcc%2Fconfig%2Fi386%2Fsync.md;h=1ba43bb1e01f27938c0b0ea29f03e2d2728cbd3d;hp=e2675744b01ae109bf348ca4e51b3710fe6e77cb;hb=497841c3dc0ff2f4456f76ce19c48f44c1a94f10;hpb=cfaf579ddfaec5cb9bc5d220eadd212786138f3d diff --git a/gcc/config/i386/sync.md b/gcc/config/i386/sync.md index e2675744b01..1ba43bb1e01 100644 --- a/gcc/config/i386/sync.md +++ b/gcc/config/i386/sync.md @@ -1,5 +1,5 @@ ;; GCC machine description for i386 synchronization instructions. -;; Copyright (C) 2005, 2006, 2007, 2008 +;; Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011 ;; Free Software Foundation, Inc. ;; ;; This file is part of GCC. @@ -18,255 +18,522 @@ ;; along with GCC; see the file COPYING3. If not see ;; . -(define_mode_iterator IMODE [QI HI SI (DI "TARGET_64BIT")]) -(define_mode_attr modesuffix [(QI "b") (HI "w") (SI "l") (DI "q")]) -(define_mode_attr modeconstraint [(QI "q") (HI "r") (SI "r") (DI "r")]) -(define_mode_attr immconstraint [(QI "i") (HI "i") (SI "i") (DI "e")]) +(define_c_enum "unspec" [ + UNSPEC_LFENCE + UNSPEC_SFENCE + UNSPEC_MFENCE + UNSPEC_MOVA ; For __atomic support + UNSPEC_LDA + UNSPEC_STA +]) -(define_mode_iterator CASMODE [QI HI SI (DI "TARGET_64BIT || TARGET_CMPXCHG8B") - (TI "TARGET_64BIT && TARGET_CMPXCHG16B")]) -(define_mode_iterator DCASMODE - [(DI "!TARGET_64BIT && TARGET_CMPXCHG8B && !flag_pic") - (TI "TARGET_64BIT && TARGET_CMPXCHG16B")]) -(define_mode_attr doublemodesuffix [(DI "8") (TI "16")]) -(define_mode_attr DCASHMODE [(DI "SI") (TI "DI")]) +(define_c_enum "unspecv" [ + UNSPECV_CMPXCHG_1 + UNSPECV_CMPXCHG_2 + UNSPECV_CMPXCHG_3 + UNSPECV_CMPXCHG_4 + UNSPECV_XCHG + UNSPECV_LOCK +]) -(define_expand "memory_barrier" +(define_expand "sse2_lfence" [(set (match_dup 0) - (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))] - "" + (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))] + "TARGET_SSE2" { operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); MEM_VOLATILE_P (operands[0]) = 1; +}) - if (!(TARGET_64BIT || TARGET_SSE2)) - { - emit_insn (gen_memory_barrier_nosse (operands[0])); - DONE; - } +(define_insn "*sse2_lfence" + [(set (match_operand:BLK 0 "" "") + (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))] + "TARGET_SSE2" + "lfence" + [(set_attr "type" "sse") + (set_attr "length_address" "0") + (set_attr "atom_sse_attr" "lfence") + (set_attr "memory" "unknown")]) + +(define_expand "sse_sfence" + [(set (match_dup 0) + (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))] + "TARGET_SSE || TARGET_3DNOW_A" +{ + operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); + MEM_VOLATILE_P (operands[0]) = 1; }) -(define_insn "memory_barrier_nosse" +(define_insn "*sse_sfence" + [(set (match_operand:BLK 0 "" "") + (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))] + "TARGET_SSE || TARGET_3DNOW_A" + "sfence" + [(set_attr "type" "sse") + (set_attr "length_address" "0") + (set_attr "atom_sse_attr" "fence") + (set_attr "memory" "unknown")]) + +(define_expand "sse2_mfence" + [(set (match_dup 0) + (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))] + "TARGET_SSE2" +{ + operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); + MEM_VOLATILE_P (operands[0]) = 1; +}) + +(define_insn "mfence_sse2" + [(set (match_operand:BLK 0 "" "") + (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))] + "TARGET_64BIT || TARGET_SSE2" + "mfence" + [(set_attr "type" "sse") + (set_attr "length_address" "0") + (set_attr "atom_sse_attr" "fence") + (set_attr "memory" "unknown")]) + +(define_insn "mfence_nosse" [(set (match_operand:BLK 0 "" "") (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE)) (clobber (reg:CC FLAGS_REG))] "!(TARGET_64BIT || TARGET_SSE2)" - "lock{%;| }or{l}\t{$0, (%%esp)|DWORD PTR [esp], 0}" + "lock{%;} or{l}\t{$0, (%%esp)|DWORD PTR [esp], 0}" [(set_attr "memory" "unknown")]) -;; ??? It would be possible to use cmpxchg8b on pentium for DImode -;; changes. It's complicated because the insn uses ecx:ebx as the -;; new value; note that the registers are reversed from the order -;; that they'd be in with (reg:DI 2 ecx). Similarly for TImode -;; data in 64-bit mode. - -(define_expand "sync_compare_and_swap" - [(parallel - [(set (match_operand:CASMODE 0 "register_operand" "") - (match_operand:CASMODE 1 "memory_operand" "")) - (set (match_dup 1) - (unspec_volatile:CASMODE - [(match_dup 1) - (match_operand:CASMODE 2 "register_operand" "") - (match_operand:CASMODE 3 "register_operand" "")] - UNSPECV_CMPXCHG)) - (clobber (reg:CC FLAGS_REG))])] - "TARGET_CMPXCHG" +(define_expand "mem_thread_fence" + [(match_operand:SI 0 "const_int_operand" "")] ;; model + "" { - if ((mode == DImode && !TARGET_64BIT) || mode == TImode) + /* Unless this is a SEQ_CST fence, the i386 memory model is strong + enough not to require barriers of any kind. */ + if (INTVAL (operands[0]) == MEMMODEL_SEQ_CST) { - enum machine_mode hmode = mode == DImode ? SImode : DImode; - rtx low = simplify_gen_subreg (hmode, operands[3], mode, 0); - rtx high = simplify_gen_subreg (hmode, operands[3], mode, - GET_MODE_SIZE (hmode)); - low = force_reg (hmode, low); - high = force_reg (hmode, high); - if (mode == DImode) - emit_insn (gen_sync_double_compare_and_swapdi - (operands[0], operands[1], operands[2], low, high)); - else if (mode == TImode) - emit_insn (gen_sync_double_compare_and_swapti - (operands[0], operands[1], operands[2], low, high)); + rtx (*mfence_insn)(rtx); + rtx mem; + + if (TARGET_64BIT || TARGET_SSE2) + mfence_insn = gen_mfence_sse2; else - gcc_unreachable (); - DONE; + mfence_insn = gen_mfence_nosse; + + mem = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); + MEM_VOLATILE_P (mem) = 1; + + emit_insn (mfence_insn (mem)); } + DONE; }) -(define_insn "*sync_compare_and_swap" - [(set (match_operand:IMODE 0 "register_operand" "=a") - (match_operand:IMODE 1 "memory_operand" "+m")) - (set (match_dup 1) - (unspec_volatile:IMODE - [(match_dup 1) - (match_operand:IMODE 2 "register_operand" "a") - (match_operand:IMODE 3 "register_operand" "")] - UNSPECV_CMPXCHG)) - (clobber (reg:CC FLAGS_REG))] - "TARGET_CMPXCHG" - "lock{%;| }cmpxchg{}\t{%3, %1|%1, %3}") +;; ??? From volume 3 section 8.1.1 Guaranteed Atomic Operations, +;; Only beginning at Pentium family processors do we get any guarantee of +;; atomicity in aligned 64-bit quantities. Beginning at P6, we get a +;; guarantee for 64-bit accesses that do not cross a cacheline boundary. +;; +;; Note that the TARGET_CMPXCHG8B test below is a stand-in for "Pentium". +;; +;; Importantly, *no* processor makes atomicity guarantees for larger +;; accesses. In particular, there's no way to perform an atomic TImode +;; move, despite the apparent applicability of MOVDQA et al. -(define_insn "sync_double_compare_and_swap" - [(set (match_operand:DCASMODE 0 "register_operand" "=A") - (match_operand:DCASMODE 1 "memory_operand" "+m")) - (set (match_dup 1) - (unspec_volatile:DCASMODE - [(match_dup 1) - (match_operand:DCASMODE 2 "register_operand" "A") - (match_operand: 3 "register_operand" "b") - (match_operand: 4 "register_operand" "c")] - UNSPECV_CMPXCHG)) - (clobber (reg:CC FLAGS_REG))] +(define_mode_iterator ATOMIC + [QI HI SI + (DI "TARGET_64BIT || (TARGET_CMPXCHG8B && (TARGET_80387 || TARGET_SSE))") + ]) + +(define_expand "atomic_load" + [(set (match_operand:ATOMIC 0 "register_operand" "") + (unspec:ATOMIC [(match_operand:ATOMIC 1 "memory_operand" "") + (match_operand:SI 2 "const_int_operand" "")] + UNSPEC_MOVA))] "" - "lock{%;| }cmpxchgb\t%1") - -;; Theoretically we'd like to use constraint "r" (any reg) for operand -;; 3, but that includes ecx. If operand 3 and 4 are the same (like when -;; the input is -1LL) GCC might chose to allocate operand 3 to ecx, like -;; operand 4. This breaks, as the xchg will move the PIC register contents -;; to %ecx then --> boom. Operands 3 and 4 really need to be different -;; registers, which in this case means operand 3 must not be ecx. -;; Instead of playing tricks with fake early clobbers or the like we -;; just enumerate all regs possible here, which (as this is !TARGET_64BIT) -;; are just esi and edi. -(define_insn "*sync_double_compare_and_swapdi_pic" - [(set (match_operand:DI 0 "register_operand" "=A") - (match_operand:DI 1 "memory_operand" "+m")) - (set (match_dup 1) - (unspec_volatile:DI - [(match_dup 1) - (match_operand:DI 2 "register_operand" "A") - (match_operand:SI 3 "register_operand" "SD") - (match_operand:SI 4 "register_operand" "c")] - UNSPECV_CMPXCHG)) - (clobber (reg:CC FLAGS_REG))] - "!TARGET_64BIT && TARGET_CMPXCHG8B && flag_pic" - "xchg{l}\t%%ebx, %3\;lock{%;| }cmpxchg8b\t%1\;xchg{l}\t%%ebx, %3") - -(define_expand "sync_compare_and_swap_cc" - [(parallel - [(set (match_operand:CASMODE 0 "register_operand" "") - (match_operand:CASMODE 1 "memory_operand" "")) - (set (match_dup 1) - (unspec_volatile:CASMODE - [(match_dup 1) - (match_operand:CASMODE 2 "register_operand" "") - (match_operand:CASMODE 3 "register_operand" "")] - UNSPECV_CMPXCHG)) - (set (match_dup 4) - (compare:CCZ - (unspec_volatile:CASMODE - [(match_dup 1) (match_dup 2) (match_dup 3)] UNSPECV_CMPXCHG) - (match_dup 2)))])] - "TARGET_CMPXCHG" { - operands[4] = gen_rtx_REG (CCZmode, FLAGS_REG); - ix86_compare_op0 = operands[3]; - ix86_compare_op1 = NULL; - ix86_compare_emitted = operands[4]; - if ((mode == DImode && !TARGET_64BIT) || mode == TImode) + /* For DImode on 32-bit, we can use the FPU to perform the load. */ + if (mode == DImode && !TARGET_64BIT) + emit_insn (gen_atomic_loaddi_fpu + (operands[0], operands[1], + assign_386_stack_local (DImode, + (virtuals_instantiated + ? SLOT_TEMP : SLOT_VIRTUAL)))); + else + emit_move_insn (operands[0], operands[1]); + DONE; +}) + +(define_insn_and_split "atomic_loaddi_fpu" + [(set (match_operand:DI 0 "nonimmediate_operand" "=x,m,?r") + (unspec:DI [(match_operand:DI 1 "memory_operand" "m,m,m")] + UNSPEC_MOVA)) + (clobber (match_operand:DI 2 "memory_operand" "=X,X,m")) + (clobber (match_scratch:DF 3 "=X,xf,xf"))] + "!TARGET_64BIT && (TARGET_80387 || TARGET_SSE)" + "#" + "&& reload_completed" + [(const_int 0)] +{ + rtx dst = operands[0], src = operands[1]; + rtx mem = operands[2], tmp = operands[3]; + + if (SSE_REG_P (dst)) + emit_move_insn (dst, src); + else + { + if (MEM_P (dst)) + mem = dst; + + if (FP_REG_P (tmp)) + { + emit_insn (gen_loaddi_via_fpu (tmp, src)); + emit_insn (gen_storedi_via_fpu (mem, tmp)); + } + else + { + adjust_reg_mode (tmp, DImode); + emit_move_insn (tmp, src); + emit_move_insn (mem, tmp); + } + + if (mem != dst) + emit_move_insn (dst, mem); + } + DONE; +}) + +(define_expand "atomic_store" + [(set (match_operand:ATOMIC 0 "memory_operand" "") + (unspec:ATOMIC [(match_operand:ATOMIC 1 "register_operand" "") + (match_operand:SI 2 "const_int_operand" "")] + UNSPEC_MOVA))] + "" +{ + enum memmodel model = (enum memmodel) INTVAL (operands[2]); + + if (mode == DImode && !TARGET_64BIT) + { + /* For DImode on 32-bit, we can use the FPU to perform the store. */ + /* Note that while we could perform a cmpxchg8b loop, that turns + out to be significantly larger than this plus a barrier. */ + emit_insn (gen_atomic_storedi_fpu + (operands[0], operands[1], + assign_386_stack_local (DImode, + (virtuals_instantiated + ? SLOT_TEMP : SLOT_VIRTUAL)))); + } + else + { + /* For seq-cst stores, when we lack MFENCE, use XCHG. */ + if (model == MEMMODEL_SEQ_CST && !(TARGET_64BIT || TARGET_SSE2)) + { + emit_insn (gen_atomic_exchange (gen_reg_rtx (mode), + operands[0], operands[1], + operands[2])); + DONE; + } + + /* Otherwise use a normal store. */ + emit_move_insn (operands[0], operands[1]); + } + /* ... followed by an MFENCE, if required. */ + if (model == MEMMODEL_SEQ_CST) + emit_insn (gen_mem_thread_fence (operands[2])); + DONE; +}) + +(define_insn_and_split "atomic_storedi_fpu" + [(set (match_operand:DI 0 "memory_operand" "=m,m,m") + (unspec:DI [(match_operand:DI 1 "register_operand" "x,m,?r")] + UNSPEC_MOVA)) + (clobber (match_operand:DI 2 "memory_operand" "=X,X,m")) + (clobber (match_scratch:DF 3 "=X,xf,xf"))] + "!TARGET_64BIT && (TARGET_80387 || TARGET_SSE)" + "#" + "&& reload_completed" + [(const_int 0)] +{ + rtx dst = operands[0], src = operands[1]; + rtx mem = operands[2], tmp = operands[3]; + + if (!SSE_REG_P (src)) { - enum machine_mode hmode = mode == DImode ? SImode : DImode; - rtx low = simplify_gen_subreg (hmode, operands[3], mode, 0); - rtx high = simplify_gen_subreg (hmode, operands[3], mode, - GET_MODE_SIZE (hmode)); - low = force_reg (hmode, low); - high = force_reg (hmode, high); - if (mode == DImode) - emit_insn (gen_sync_double_compare_and_swap_ccdi - (operands[0], operands[1], operands[2], low, high)); - else if (mode == TImode) - emit_insn (gen_sync_double_compare_and_swap_ccti - (operands[0], operands[1], operands[2], low, high)); + if (REG_P (src)) + { + emit_move_insn (mem, src); + src = mem; + } + + if (FP_REG_P (tmp)) + { + emit_insn (gen_loaddi_via_fpu (tmp, src)); + emit_insn (gen_storedi_via_fpu (dst, tmp)); + DONE; + } else - gcc_unreachable (); - DONE; + { + adjust_reg_mode (tmp, DImode); + emit_move_insn (tmp, mem); + src = tmp; + } } + emit_move_insn (dst, src); + DONE; }) -(define_insn "*sync_compare_and_swap_cc" - [(set (match_operand:IMODE 0 "register_operand" "=a") - (match_operand:IMODE 1 "memory_operand" "+m")) +;; ??? You'd think that we'd be able to perform this via FLOAT + FIX_TRUNC +;; operations. But the fix_trunc patterns want way more setup than we want +;; to provide. Note that the scratch is DFmode instead of XFmode in order +;; to make it easy to allocate a scratch in either SSE or FP_REGs above. + +(define_insn "loaddi_via_fpu" + [(set (match_operand:DF 0 "register_operand" "=f") + (unspec:DF [(match_operand:DI 1 "memory_operand" "m")] UNSPEC_LDA))] + "TARGET_80387" + "fild%Z1\t%1" + [(set_attr "type" "fmov") + (set_attr "mode" "DF") + (set_attr "fp_int_src" "true")]) + +(define_insn "storedi_via_fpu" + [(set (match_operand:DI 0 "memory_operand" "=m") + (unspec:DI [(match_operand:DF 1 "register_operand" "f")] UNSPEC_STA))] + "TARGET_80387" +{ + gcc_assert (find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != NULL_RTX); + + return "fistp%Z0\t%0"; +} + [(set_attr "type" "fmov") + (set_attr "mode" "DI")]) + +(define_expand "atomic_compare_and_swap" + [(match_operand:QI 0 "register_operand" "") ;; bool success output + (match_operand:SWI124 1 "register_operand" "") ;; oldval output + (match_operand:SWI124 2 "memory_operand" "") ;; memory + (match_operand:SWI124 3 "register_operand" "") ;; expected input + (match_operand:SWI124 4 "register_operand" "") ;; newval input + (match_operand:SI 5 "const_int_operand" "") ;; is_weak + (match_operand:SI 6 "const_int_operand" "") ;; success model + (match_operand:SI 7 "const_int_operand" "")] ;; failure model + "TARGET_CMPXCHG" +{ + emit_insn (gen_atomic_compare_and_swap_single + (operands[1], operands[2], operands[3], operands[4])); + ix86_expand_setcc (operands[0], EQ, gen_rtx_REG (CCZmode, FLAGS_REG), + const0_rtx); + DONE; +}) + +(define_mode_iterator CASMODE + [(DI "TARGET_64BIT || TARGET_CMPXCHG8B") + (TI "TARGET_64BIT && TARGET_CMPXCHG16B")]) +(define_mode_iterator DCASMODE + [(DI "!TARGET_64BIT && TARGET_CMPXCHG8B && !flag_pic") + (TI "TARGET_64BIT && TARGET_CMPXCHG16B")]) +(define_mode_attr doublemodesuffix [(DI "8") (TI "16")]) +(define_mode_attr DCASHMODE [(DI "SI") (TI "DI")]) + +(define_expand "atomic_compare_and_swap" + [(match_operand:QI 0 "register_operand" "") ;; bool success output + (match_operand:CASMODE 1 "register_operand" "") ;; oldval output + (match_operand:CASMODE 2 "memory_operand" "") ;; memory + (match_operand:CASMODE 3 "register_operand" "") ;; expected input + (match_operand:CASMODE 4 "register_operand" "") ;; newval input + (match_operand:SI 5 "const_int_operand" "") ;; is_weak + (match_operand:SI 6 "const_int_operand" "") ;; success model + (match_operand:SI 7 "const_int_operand" "")] ;; failure model + "TARGET_CMPXCHG" +{ + if (mode == DImode && TARGET_64BIT) + { + emit_insn (gen_atomic_compare_and_swap_singledi + (operands[1], operands[2], operands[3], operands[4])); + } + else + { + enum machine_mode hmode = mode; + rtx lo_o, lo_e, lo_n, hi_o, hi_e, hi_n, mem; + + lo_o = operands[1]; + mem = operands[2]; + lo_e = operands[3]; + lo_n = operands[4]; + hi_o = gen_highpart (hmode, lo_o); + hi_e = gen_highpart (hmode, lo_e); + hi_n = gen_highpart (hmode, lo_n); + lo_o = gen_lowpart (hmode, lo_o); + lo_e = gen_lowpart (hmode, lo_e); + lo_n = gen_lowpart (hmode, lo_n); + + if (mode == DImode + && !TARGET_64BIT + && flag_pic + && !cmpxchg8b_pic_memory_operand (mem, DImode)) + mem = replace_equiv_address (mem, force_reg (Pmode, XEXP (mem, 0))); + + emit_insn (gen_atomic_compare_and_swap_double + (lo_o, hi_o, mem, lo_e, hi_e, lo_n, hi_n)); + } + ix86_expand_setcc (operands[0], EQ, gen_rtx_REG (CCZmode, FLAGS_REG), + const0_rtx); + DONE; +}) + +(define_insn "atomic_compare_and_swap_single" + [(set (match_operand:SWI 0 "register_operand" "=a") + (unspec_volatile:SWI + [(match_operand:SWI 1 "memory_operand" "+m") + (match_operand:SWI 2 "register_operand" "0") + (match_operand:SWI 3 "register_operand" "")] + UNSPECV_CMPXCHG_1)) (set (match_dup 1) - (unspec_volatile:IMODE - [(match_dup 1) - (match_operand:IMODE 2 "register_operand" "a") - (match_operand:IMODE 3 "register_operand" "")] - UNSPECV_CMPXCHG)) + (unspec_volatile:SWI [(const_int 0)] UNSPECV_CMPXCHG_2)) (set (reg:CCZ FLAGS_REG) - (compare:CCZ - (unspec_volatile:IMODE - [(match_dup 1) (match_dup 2) (match_dup 3)] UNSPECV_CMPXCHG) - (match_dup 2)))] + (unspec_volatile:CCZ [(const_int 0)] UNSPECV_CMPXCHG_3))] "TARGET_CMPXCHG" - "lock{%;| }cmpxchg{}\t{%3, %1|%1, %3}") + "lock{%;} cmpxchg{}\t{%3, %1|%1, %3}") -(define_insn "sync_double_compare_and_swap_cc" - [(set (match_operand:DCASMODE 0 "register_operand" "=A") - (match_operand:DCASMODE 1 "memory_operand" "+m")) - (set (match_dup 1) - (unspec_volatile:DCASMODE - [(match_dup 1) - (match_operand:DCASMODE 2 "register_operand" "A") - (match_operand: 3 "register_operand" "b") - (match_operand: 4 "register_operand" "c")] - UNSPECV_CMPXCHG)) +;; For double-word compare and swap, we are obliged to play tricks with +;; the input newval (op5:op6) because the Intel register numbering does +;; not match the gcc register numbering, so the pair must be CX:BX. +;; That said, in order to take advantage of possible lower-subreg opts, +;; treat all of the integral operands in the same way. +(define_insn "atomic_compare_and_swap_double" + [(set (match_operand: 0 "register_operand" "=a") + (unspec_volatile: + [(match_operand:DCASMODE 2 "memory_operand" "+m") + (match_operand: 3 "register_operand" "0") + (match_operand: 4 "register_operand" "1") + (match_operand: 5 "register_operand" "b") + (match_operand: 6 "register_operand" "c")] + UNSPECV_CMPXCHG_1)) + (set (match_operand: 1 "register_operand" "=d") + (unspec_volatile: [(const_int 0)] UNSPECV_CMPXCHG_2)) + (set (match_dup 2) + (unspec_volatile:DCASMODE [(const_int 0)] UNSPECV_CMPXCHG_3)) (set (reg:CCZ FLAGS_REG) - (compare:CCZ - (unspec_volatile:DCASMODE - [(match_dup 1) (match_dup 2) (match_dup 3) (match_dup 4)] - UNSPECV_CMPXCHG) - (match_dup 2)))] + (unspec_volatile:CCZ [(const_int 0)] UNSPECV_CMPXCHG_4))] "" - "lock{%;| }cmpxchgb\t%1") + "lock{%;} cmpxchgb\t%2") -;; See above for the explanation of using the constraint "SD" for -;; operand 3. -(define_insn "*sync_double_compare_and_swap_ccdi_pic" - [(set (match_operand:DI 0 "register_operand" "=A") - (match_operand:DI 1 "memory_operand" "+m")) - (set (match_dup 1) - (unspec_volatile:DI - [(match_dup 1) - (match_operand:DI 2 "register_operand" "A") - (match_operand:SI 3 "register_operand" "SD") - (match_operand:SI 4 "register_operand" "c")] - UNSPECV_CMPXCHG)) +;; Theoretically we'd like to use constraint "r" (any reg) for op5, +;; but that includes ecx. If op5 and op6 are the same (like when +;; the input is -1LL) GCC might chose to allocate op5 to ecx, like +;; op6. This breaks, as the xchg will move the PIC register contents +;; to %ecx then --> boom. Operands 5 and 6 really need to be different +;; registers, which in this case means op5 must not be ecx. Instead +;; of playing tricks with fake early clobbers or the like we just +;; enumerate all regs possible here, which (as this is !TARGET_64BIT) +;; are just esi and edi. +(define_insn "*atomic_compare_and_swap_doubledi_pic" + [(set (match_operand:SI 0 "register_operand" "=a") + (unspec_volatile:SI + [(match_operand:DI 2 "cmpxchg8b_pic_memory_operand" "+m") + (match_operand:SI 3 "register_operand" "0") + (match_operand:SI 4 "register_operand" "1") + (match_operand:SI 5 "register_operand" "SD") + (match_operand:SI 6 "register_operand" "c")] + UNSPECV_CMPXCHG_1)) + (set (match_operand:SI 1 "register_operand" "=d") + (unspec_volatile:SI [(const_int 0)] UNSPECV_CMPXCHG_2)) + (set (match_dup 2) + (unspec_volatile:DI [(const_int 0)] UNSPECV_CMPXCHG_3)) (set (reg:CCZ FLAGS_REG) - (compare:CCZ - (unspec_volatile:DI - [(match_dup 1) (match_dup 2) (match_dup 3) (match_dup 4)] - UNSPECV_CMPXCHG) - (match_dup 2)))] + (unspec_volatile:CCZ [(const_int 0)] UNSPECV_CMPXCHG_4))] "!TARGET_64BIT && TARGET_CMPXCHG8B && flag_pic" - "xchg{l}\t%%ebx, %3\;lock{%;| }cmpxchg8b\t%1\;xchg{l}\t%%ebx, %3") + "xchg{l}\t%%ebx, %5\;lock{%;} cmpxchg8b\t%2\;xchg{l}\t%%ebx, %5") -(define_insn "sync_old_add" - [(set (match_operand:IMODE 0 "register_operand" "=") - (unspec_volatile:IMODE - [(match_operand:IMODE 1 "memory_operand" "+m")] UNSPECV_XCHG)) +;; For operand 2 nonmemory_operand predicate is used instead of +;; register_operand to allow combiner to better optimize atomic +;; additions of constants. +(define_insn "atomic_fetch_add" + [(set (match_operand:SWI 0 "register_operand" "=") + (unspec_volatile:SWI + [(match_operand:SWI 1 "memory_operand" "+m") + (match_operand:SI 3 "const_int_operand" "")] ;; model + UNSPECV_XCHG)) (set (match_dup 1) - (plus:IMODE (match_dup 1) - (match_operand:IMODE 2 "register_operand" "0"))) + (plus:SWI (match_dup 1) + (match_operand:SWI 2 "nonmemory_operand" "0"))) (clobber (reg:CC FLAGS_REG))] "TARGET_XADD" - "lock{%;| }xadd{}\t{%0, %1|%1, %0}") + "lock{%;} xadd{}\t{%0, %1|%1, %0}") + +;; This peephole2 and following insn optimize +;; __sync_fetch_and_add (x, -N) == N into just lock {add,sub,inc,dec} +;; followed by testing of flags instead of lock xadd and comparisons. +(define_peephole2 + [(set (match_operand:SWI 0 "register_operand" "") + (match_operand:SWI 2 "const_int_operand" "")) + (parallel [(set (match_dup 0) + (unspec_volatile:SWI + [(match_operand:SWI 1 "memory_operand" "") + (match_operand:SI 4 "const_int_operand" "")] + UNSPECV_XCHG)) + (set (match_dup 1) + (plus:SWI (match_dup 1) + (match_dup 0))) + (clobber (reg:CC FLAGS_REG))]) + (set (reg:CCZ FLAGS_REG) + (compare:CCZ (match_dup 0) + (match_operand:SWI 3 "const_int_operand" "")))] + "peep2_reg_dead_p (3, operands[0]) + && (unsigned HOST_WIDE_INT) INTVAL (operands[2]) + == -(unsigned HOST_WIDE_INT) INTVAL (operands[3]) + && !reg_overlap_mentioned_p (operands[0], operands[1])" + [(parallel [(set (reg:CCZ FLAGS_REG) + (compare:CCZ + (unspec_volatile:SWI [(match_dup 1) (match_dup 4)] + UNSPECV_XCHG) + (match_dup 3))) + (set (match_dup 1) + (plus:SWI (match_dup 1) + (match_dup 2)))])]) + +(define_insn "*atomic_fetch_add_cmp" + [(set (reg:CCZ FLAGS_REG) + (compare:CCZ (unspec_volatile:SWI + [(match_operand:SWI 0 "memory_operand" "+m") + (match_operand:SI 3 "const_int_operand" "")] + UNSPECV_XCHG) + (match_operand:SWI 2 "const_int_operand" "i"))) + (set (match_dup 0) + (plus:SWI (match_dup 0) + (match_operand:SWI 1 "const_int_operand" "i")))] + "(unsigned HOST_WIDE_INT) INTVAL (operands[1]) + == -(unsigned HOST_WIDE_INT) INTVAL (operands[2])" +{ + if (TARGET_USE_INCDEC) + { + if (operands[1] == const1_rtx) + return "lock{%;} inc{}\t%0"; + if (operands[1] == constm1_rtx) + return "lock{%;} dec{}\t%0"; + } + + if (x86_maybe_negate_const_int (&operands[1], mode)) + return "lock{%;} sub{}\t{%1, %0|%0, %1}"; + + return "lock{%;} add{}\t{%1, %0|%0, %1}"; +}) ;; Recall that xchg implicitly sets LOCK#, so adding it again wastes space. -(define_insn "sync_lock_test_and_set" - [(set (match_operand:IMODE 0 "register_operand" "=") - (unspec_volatile:IMODE - [(match_operand:IMODE 1 "memory_operand" "+m")] UNSPECV_XCHG)) +;; In addition, it is always a full barrier, so we can ignore the memory model. +(define_insn "atomic_exchange" + [(set (match_operand:SWI 0 "register_operand" "=") ;; output + (unspec_volatile:SWI + [(match_operand:SWI 1 "memory_operand" "+m") ;; memory + (match_operand:SI 3 "const_int_operand" "")] ;; model + UNSPECV_XCHG)) (set (match_dup 1) - (match_operand:IMODE 2 "register_operand" "0"))] + (match_operand:SWI 2 "register_operand" "0"))] ;; input "" - "xchg{}\t{%1, %0|%0, %1}") + "xchg{}\t{%1, %0|%0, %1}") -(define_insn "sync_add" - [(set (match_operand:IMODE 0 "memory_operand" "+m") - (unspec_volatile:IMODE - [(plus:IMODE (match_dup 0) - (match_operand:IMODE 1 "nonmemory_operand" ""))] +(define_insn "atomic_add" + [(set (match_operand:SWI 0 "memory_operand" "+m") + (unspec_volatile:SWI + [(plus:SWI (match_dup 0) + (match_operand:SWI 1 "nonmemory_operand" "")) + (match_operand:SI 2 "const_int_operand" "")] ;; model UNSPECV_LOCK)) (clobber (reg:CC FLAGS_REG))] "" @@ -274,19 +541,23 @@ if (TARGET_USE_INCDEC) { if (operands[1] == const1_rtx) - return "lock{%;| }inc{}\t%0"; + return "lock{%;} inc{}\t%0"; if (operands[1] == constm1_rtx) - return "lock{%;| }dec{}\t%0"; + return "lock{%;} dec{}\t%0"; } - return "lock{%;| }add{}\t{%1, %0|%0, %1}"; + if (x86_maybe_negate_const_int (&operands[1], mode)) + return "lock{%;} sub{}\t{%1, %0|%0, %1}"; + + return "lock{%;} add{}\t{%1, %0|%0, %1}"; }) -(define_insn "sync_sub" - [(set (match_operand:IMODE 0 "memory_operand" "+m") - (unspec_volatile:IMODE - [(minus:IMODE (match_dup 0) - (match_operand:IMODE 1 "nonmemory_operand" ""))] +(define_insn "atomic_sub" + [(set (match_operand:SWI 0 "memory_operand" "+m") + (unspec_volatile:SWI + [(minus:SWI (match_dup 0) + (match_operand:SWI 1 "nonmemory_operand" "")) + (match_operand:SI 2 "const_int_operand" "")] ;; model UNSPECV_LOCK)) (clobber (reg:CC FLAGS_REG))] "" @@ -294,40 +565,24 @@ if (TARGET_USE_INCDEC) { if (operands[1] == const1_rtx) - return "lock{%;| }dec{}\t%0"; + return "lock{%;} dec{}\t%0"; if (operands[1] == constm1_rtx) - return "lock{%;| }inc{}\t%0"; + return "lock{%;} inc{}\t%0"; } - return "lock{%;| }sub{}\t{%1, %0|%0, %1}"; -}) + if (x86_maybe_negate_const_int (&operands[1], mode)) + return "lock{%;} add{}\t{%1, %0|%0, %1}"; -(define_insn "sync_ior" - [(set (match_operand:IMODE 0 "memory_operand" "+m") - (unspec_volatile:IMODE - [(ior:IMODE (match_dup 0) - (match_operand:IMODE 1 "nonmemory_operand" ""))] - UNSPECV_LOCK)) - (clobber (reg:CC FLAGS_REG))] - "" - "lock{%;| }or{}\t{%1, %0|%0, %1}") - -(define_insn "sync_and" - [(set (match_operand:IMODE 0 "memory_operand" "+m") - (unspec_volatile:IMODE - [(and:IMODE (match_dup 0) - (match_operand:IMODE 1 "nonmemory_operand" ""))] - UNSPECV_LOCK)) - (clobber (reg:CC FLAGS_REG))] - "" - "lock{%;| }and{}\t{%1, %0|%0, %1}") + return "lock{%;} sub{}\t{%1, %0|%0, %1}"; +}) -(define_insn "sync_xor" - [(set (match_operand:IMODE 0 "memory_operand" "+m") - (unspec_volatile:IMODE - [(xor:IMODE (match_dup 0) - (match_operand:IMODE 1 "nonmemory_operand" ""))] +(define_insn "atomic_" + [(set (match_operand:SWI 0 "memory_operand" "+m") + (unspec_volatile:SWI + [(any_logic:SWI (match_dup 0) + (match_operand:SWI 1 "nonmemory_operand" "")) + (match_operand:SI 2 "const_int_operand" "")] ;; model UNSPECV_LOCK)) (clobber (reg:CC FLAGS_REG))] "" - "lock{%;| }xor{}\t{%1, %0|%0, %1}") + "lock{%;} {}\t{%1, %0|%0, %1}")