1 ;; GCC machine description for i386 synchronization instructions.
2 ;; Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011
3 ;; Free Software Foundation, Inc.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
21 (define_c_enum "unspec" [
25 UNSPEC_MOVA ; For __atomic support
30 (define_c_enum "unspecv" [
39 (define_expand "sse2_lfence"
41 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
44 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
45 MEM_VOLATILE_P (operands[0]) = 1;
48 (define_insn "*sse2_lfence"
49 [(set (match_operand:BLK 0 "" "")
50 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
53 [(set_attr "type" "sse")
54 (set_attr "length_address" "0")
55 (set_attr "atom_sse_attr" "lfence")
56 (set_attr "memory" "unknown")])
58 (define_expand "sse_sfence"
60 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
61 "TARGET_SSE || TARGET_3DNOW_A"
63 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
64 MEM_VOLATILE_P (operands[0]) = 1;
67 (define_insn "*sse_sfence"
68 [(set (match_operand:BLK 0 "" "")
69 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
70 "TARGET_SSE || TARGET_3DNOW_A"
72 [(set_attr "type" "sse")
73 (set_attr "length_address" "0")
74 (set_attr "atom_sse_attr" "fence")
75 (set_attr "memory" "unknown")])
77 (define_expand "sse2_mfence"
79 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
82 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
83 MEM_VOLATILE_P (operands[0]) = 1;
86 (define_insn "mfence_sse2"
87 [(set (match_operand:BLK 0 "" "")
88 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
89 "TARGET_64BIT || TARGET_SSE2"
91 [(set_attr "type" "sse")
92 (set_attr "length_address" "0")
93 (set_attr "atom_sse_attr" "fence")
94 (set_attr "memory" "unknown")])
96 (define_insn "mfence_nosse"
97 [(set (match_operand:BLK 0 "" "")
98 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))
99 (clobber (reg:CC FLAGS_REG))]
100 "!(TARGET_64BIT || TARGET_SSE2)"
101 "lock{%;} or{l}\t{$0, (%%esp)|DWORD PTR [esp], 0}"
102 [(set_attr "memory" "unknown")])
104 (define_expand "mem_thread_fence"
105 [(match_operand:SI 0 "const_int_operand" "")] ;; model
108 /* Unless this is a SEQ_CST fence, the i386 memory model is strong
109 enough not to require barriers of any kind. */
110 if (INTVAL (operands[0]) == MEMMODEL_SEQ_CST)
112 rtx (*mfence_insn)(rtx);
115 if (TARGET_64BIT || TARGET_SSE2)
116 mfence_insn = gen_mfence_sse2;
118 mfence_insn = gen_mfence_nosse;
120 mem = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
121 MEM_VOLATILE_P (mem) = 1;
123 emit_insn (mfence_insn (mem));
128 ;; ??? From volume 3 section 8.1.1 Guaranteed Atomic Operations,
129 ;; Only beginning at Pentium family processors do we get any guarantee of
130 ;; atomicity in aligned 64-bit quantities. Beginning at P6, we get a
131 ;; guarantee for 64-bit accesses that do not cross a cacheline boundary.
133 ;; Note that the TARGET_CMPXCHG8B test below is a stand-in for "Pentium".
135 ;; Importantly, *no* processor makes atomicity guarantees for larger
136 ;; accesses. In particular, there's no way to perform an atomic TImode
137 ;; move, despite the apparent applicability of MOVDQA et al.
139 (define_mode_iterator ATOMIC
141 (DI "TARGET_64BIT || (TARGET_CMPXCHG8B && (TARGET_80387 || TARGET_SSE))")
144 (define_expand "atomic_load<mode>"
145 [(set (match_operand:ATOMIC 0 "register_operand" "")
146 (unspec:ATOMIC [(match_operand:ATOMIC 1 "memory_operand" "")
147 (match_operand:SI 2 "const_int_operand" "")]
151 /* For DImode on 32-bit, we can use the FPU to perform the load. */
152 if (<MODE>mode == DImode && !TARGET_64BIT)
153 emit_insn (gen_atomic_loaddi_fpu
154 (operands[0], operands[1],
155 assign_386_stack_local (DImode,
156 (virtuals_instantiated
157 ? SLOT_TEMP : SLOT_VIRTUAL))));
159 emit_move_insn (operands[0], operands[1]);
163 (define_insn_and_split "atomic_loaddi_fpu"
164 [(set (match_operand:DI 0 "nonimmediate_operand" "=x,m,?r")
165 (unspec:DI [(match_operand:DI 1 "memory_operand" "m,m,m")]
167 (clobber (match_operand:DI 2 "memory_operand" "=X,X,m"))
168 (clobber (match_scratch:DF 3 "=X,xf,xf"))]
169 "!TARGET_64BIT && (TARGET_80387 || TARGET_SSE)"
171 "&& reload_completed"
174 rtx dst = operands[0], src = operands[1];
175 rtx mem = operands[2], tmp = operands[3];
178 emit_move_insn (dst, src);
186 emit_insn (gen_loaddi_via_fpu (tmp, src));
187 emit_insn (gen_storedi_via_fpu (mem, tmp));
191 adjust_reg_mode (tmp, DImode);
192 emit_move_insn (tmp, src);
193 emit_move_insn (mem, tmp);
197 emit_move_insn (dst, mem);
202 (define_expand "atomic_store<mode>"
203 [(set (match_operand:ATOMIC 0 "memory_operand" "")
204 (unspec:ATOMIC [(match_operand:ATOMIC 1 "register_operand" "")
205 (match_operand:SI 2 "const_int_operand" "")]
209 enum memmodel model = (enum memmodel) INTVAL (operands[2]);
211 if (<MODE>mode == DImode && !TARGET_64BIT)
213 /* For DImode on 32-bit, we can use the FPU to perform the store. */
214 /* Note that while we could perform a cmpxchg8b loop, that turns
215 out to be significantly larger than this plus a barrier. */
216 emit_insn (gen_atomic_storedi_fpu
217 (operands[0], operands[1],
218 assign_386_stack_local (DImode,
219 (virtuals_instantiated
220 ? SLOT_TEMP : SLOT_VIRTUAL))));
224 /* For seq-cst stores, when we lack MFENCE, use XCHG. */
225 if (model == MEMMODEL_SEQ_CST && !(TARGET_64BIT || TARGET_SSE2))
227 emit_insn (gen_atomic_exchange<mode> (gen_reg_rtx (<MODE>mode),
228 operands[0], operands[1],
233 /* Otherwise use a normal store. */
234 emit_move_insn (operands[0], operands[1]);
236 /* ... followed by an MFENCE, if required. */
237 if (model == MEMMODEL_SEQ_CST)
238 emit_insn (gen_mem_thread_fence (operands[2]));
242 (define_insn_and_split "atomic_storedi_fpu"
243 [(set (match_operand:DI 0 "memory_operand" "=m,m,m")
244 (unspec:DI [(match_operand:DI 1 "register_operand" "x,m,?r")]
246 (clobber (match_operand:DI 2 "memory_operand" "=X,X,m"))
247 (clobber (match_scratch:DF 3 "=X,xf,xf"))]
248 "!TARGET_64BIT && (TARGET_80387 || TARGET_SSE)"
250 "&& reload_completed"
253 rtx dst = operands[0], src = operands[1];
254 rtx mem = operands[2], tmp = operands[3];
256 if (!SSE_REG_P (src))
260 emit_move_insn (mem, src);
266 emit_insn (gen_loaddi_via_fpu (tmp, src));
267 emit_insn (gen_storedi_via_fpu (dst, tmp));
272 adjust_reg_mode (tmp, DImode);
273 emit_move_insn (tmp, mem);
277 emit_move_insn (dst, src);
281 ;; ??? You'd think that we'd be able to perform this via FLOAT + FIX_TRUNC
282 ;; operations. But the fix_trunc patterns want way more setup than we want
283 ;; to provide. Note that the scratch is DFmode instead of XFmode in order
284 ;; to make it easy to allocate a scratch in either SSE or FP_REGs above.
286 (define_insn "loaddi_via_fpu"
287 [(set (match_operand:DF 0 "register_operand" "=f")
288 (unspec:DF [(match_operand:DI 1 "memory_operand" "m")] UNSPEC_LDA))]
291 [(set_attr "type" "fmov")
292 (set_attr "mode" "DF")
293 (set_attr "fp_int_src" "true")])
295 (define_insn "storedi_via_fpu"
296 [(set (match_operand:DI 0 "memory_operand" "=m")
297 (unspec:DI [(match_operand:DF 1 "register_operand" "f")] UNSPEC_STA))]
300 gcc_assert (find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != NULL_RTX);
302 return "fistp%Z0\t%0";
304 [(set_attr "type" "fmov")
305 (set_attr "mode" "DI")])
307 (define_expand "atomic_compare_and_swap<mode>"
308 [(match_operand:QI 0 "register_operand" "") ;; bool success output
309 (match_operand:SWI124 1 "register_operand" "") ;; oldval output
310 (match_operand:SWI124 2 "memory_operand" "") ;; memory
311 (match_operand:SWI124 3 "register_operand" "") ;; expected input
312 (match_operand:SWI124 4 "register_operand" "") ;; newval input
313 (match_operand:SI 5 "const_int_operand" "") ;; is_weak
314 (match_operand:SI 6 "const_int_operand" "") ;; success model
315 (match_operand:SI 7 "const_int_operand" "")] ;; failure model
318 emit_insn (gen_atomic_compare_and_swap_single<mode>
319 (operands[1], operands[2], operands[3], operands[4]));
320 ix86_expand_setcc (operands[0], EQ, gen_rtx_REG (CCZmode, FLAGS_REG),
325 (define_mode_iterator CASMODE
326 [(DI "TARGET_64BIT || TARGET_CMPXCHG8B")
327 (TI "TARGET_64BIT && TARGET_CMPXCHG16B")])
328 (define_mode_iterator DCASMODE
329 [(DI "!TARGET_64BIT && TARGET_CMPXCHG8B && !flag_pic")
330 (TI "TARGET_64BIT && TARGET_CMPXCHG16B")])
331 (define_mode_attr doublemodesuffix [(DI "8") (TI "16")])
332 (define_mode_attr DCASHMODE [(DI "SI") (TI "DI")])
334 (define_expand "atomic_compare_and_swap<mode>"
335 [(match_operand:QI 0 "register_operand" "") ;; bool success output
336 (match_operand:CASMODE 1 "register_operand" "") ;; oldval output
337 (match_operand:CASMODE 2 "memory_operand" "") ;; memory
338 (match_operand:CASMODE 3 "register_operand" "") ;; expected input
339 (match_operand:CASMODE 4 "register_operand" "") ;; newval input
340 (match_operand:SI 5 "const_int_operand" "") ;; is_weak
341 (match_operand:SI 6 "const_int_operand" "") ;; success model
342 (match_operand:SI 7 "const_int_operand" "")] ;; failure model
345 if (<MODE>mode == DImode && TARGET_64BIT)
347 emit_insn (gen_atomic_compare_and_swap_singledi
348 (operands[1], operands[2], operands[3], operands[4]));
352 enum machine_mode hmode = <DCASHMODE>mode;
353 rtx lo_o, lo_e, lo_n, hi_o, hi_e, hi_n, mem;
359 hi_o = gen_highpart (hmode, lo_o);
360 hi_e = gen_highpart (hmode, lo_e);
361 hi_n = gen_highpart (hmode, lo_n);
362 lo_o = gen_lowpart (hmode, lo_o);
363 lo_e = gen_lowpart (hmode, lo_e);
364 lo_n = gen_lowpart (hmode, lo_n);
366 if (<MODE>mode == DImode
369 && !cmpxchg8b_pic_memory_operand (mem, DImode))
370 mem = replace_equiv_address (mem, force_reg (Pmode, XEXP (mem, 0)));
372 emit_insn (gen_atomic_compare_and_swap_double<mode>
373 (lo_o, hi_o, mem, lo_e, hi_e, lo_n, hi_n));
375 ix86_expand_setcc (operands[0], EQ, gen_rtx_REG (CCZmode, FLAGS_REG),
380 (define_insn "atomic_compare_and_swap_single<mode>"
381 [(set (match_operand:SWI 0 "register_operand" "=a")
383 [(match_operand:SWI 1 "memory_operand" "+m")
384 (match_operand:SWI 2 "register_operand" "0")
385 (match_operand:SWI 3 "register_operand" "<r>")]
388 (unspec_volatile:SWI [(const_int 0)] UNSPECV_CMPXCHG_2))
389 (set (reg:CCZ FLAGS_REG)
390 (unspec_volatile:CCZ [(const_int 0)] UNSPECV_CMPXCHG_3))]
392 "lock{%;} cmpxchg{<imodesuffix>}\t{%3, %1|%1, %3}")
394 ;; For double-word compare and swap, we are obliged to play tricks with
395 ;; the input newval (op5:op6) because the Intel register numbering does
396 ;; not match the gcc register numbering, so the pair must be CX:BX.
397 ;; That said, in order to take advantage of possible lower-subreg opts,
398 ;; treat all of the integral operands in the same way.
399 (define_insn "atomic_compare_and_swap_double<mode>"
400 [(set (match_operand:<DCASHMODE> 0 "register_operand" "=a")
401 (unspec_volatile:<DCASHMODE>
402 [(match_operand:DCASMODE 2 "memory_operand" "+m")
403 (match_operand:<DCASHMODE> 3 "register_operand" "0")
404 (match_operand:<DCASHMODE> 4 "register_operand" "1")
405 (match_operand:<DCASHMODE> 5 "register_operand" "b")
406 (match_operand:<DCASHMODE> 6 "register_operand" "c")]
408 (set (match_operand:<DCASHMODE> 1 "register_operand" "=d")
409 (unspec_volatile:<DCASHMODE> [(const_int 0)] UNSPECV_CMPXCHG_2))
411 (unspec_volatile:DCASMODE [(const_int 0)] UNSPECV_CMPXCHG_3))
412 (set (reg:CCZ FLAGS_REG)
413 (unspec_volatile:CCZ [(const_int 0)] UNSPECV_CMPXCHG_4))]
415 "lock{%;} cmpxchg<doublemodesuffix>b\t%2")
417 ;; Theoretically we'd like to use constraint "r" (any reg) for op5,
418 ;; but that includes ecx. If op5 and op6 are the same (like when
419 ;; the input is -1LL) GCC might chose to allocate op5 to ecx, like
420 ;; op6. This breaks, as the xchg will move the PIC register contents
421 ;; to %ecx then --> boom. Operands 5 and 6 really need to be different
422 ;; registers, which in this case means op5 must not be ecx. Instead
423 ;; of playing tricks with fake early clobbers or the like we just
424 ;; enumerate all regs possible here, which (as this is !TARGET_64BIT)
425 ;; are just esi and edi.
426 (define_insn "*atomic_compare_and_swap_doubledi_pic"
427 [(set (match_operand:SI 0 "register_operand" "=a")
429 [(match_operand:DI 2 "cmpxchg8b_pic_memory_operand" "+m")
430 (match_operand:SI 3 "register_operand" "0")
431 (match_operand:SI 4 "register_operand" "1")
432 (match_operand:SI 5 "register_operand" "SD")
433 (match_operand:SI 6 "register_operand" "c")]
435 (set (match_operand:SI 1 "register_operand" "=d")
436 (unspec_volatile:SI [(const_int 0)] UNSPECV_CMPXCHG_2))
438 (unspec_volatile:DI [(const_int 0)] UNSPECV_CMPXCHG_3))
439 (set (reg:CCZ FLAGS_REG)
440 (unspec_volatile:CCZ [(const_int 0)] UNSPECV_CMPXCHG_4))]
441 "!TARGET_64BIT && TARGET_CMPXCHG8B && flag_pic"
442 "xchg{l}\t%%ebx, %5\;lock{%;} cmpxchg8b\t%2\;xchg{l}\t%%ebx, %5")
444 ;; For operand 2 nonmemory_operand predicate is used instead of
445 ;; register_operand to allow combiner to better optimize atomic
446 ;; additions of constants.
447 (define_insn "atomic_fetch_add<mode>"
448 [(set (match_operand:SWI 0 "register_operand" "=<r>")
450 [(match_operand:SWI 1 "memory_operand" "+m")
451 (match_operand:SI 3 "const_int_operand" "")] ;; model
454 (plus:SWI (match_dup 1)
455 (match_operand:SWI 2 "nonmemory_operand" "0")))
456 (clobber (reg:CC FLAGS_REG))]
458 "lock{%;} xadd{<imodesuffix>}\t{%0, %1|%1, %0}")
460 ;; This peephole2 and following insn optimize
461 ;; __sync_fetch_and_add (x, -N) == N into just lock {add,sub,inc,dec}
462 ;; followed by testing of flags instead of lock xadd and comparisons.
464 [(set (match_operand:SWI 0 "register_operand" "")
465 (match_operand:SWI 2 "const_int_operand" ""))
466 (parallel [(set (match_dup 0)
468 [(match_operand:SWI 1 "memory_operand" "")
469 (match_operand:SI 4 "const_int_operand" "")]
472 (plus:SWI (match_dup 1)
474 (clobber (reg:CC FLAGS_REG))])
475 (set (reg:CCZ FLAGS_REG)
476 (compare:CCZ (match_dup 0)
477 (match_operand:SWI 3 "const_int_operand" "")))]
478 "peep2_reg_dead_p (3, operands[0])
479 && (unsigned HOST_WIDE_INT) INTVAL (operands[2])
480 == -(unsigned HOST_WIDE_INT) INTVAL (operands[3])
481 && !reg_overlap_mentioned_p (operands[0], operands[1])"
482 [(parallel [(set (reg:CCZ FLAGS_REG)
484 (unspec_volatile:SWI [(match_dup 1) (match_dup 4)]
488 (plus:SWI (match_dup 1)
491 (define_insn "*atomic_fetch_add_cmp<mode>"
492 [(set (reg:CCZ FLAGS_REG)
493 (compare:CCZ (unspec_volatile:SWI
494 [(match_operand:SWI 0 "memory_operand" "+m")
495 (match_operand:SI 3 "const_int_operand" "")]
497 (match_operand:SWI 2 "const_int_operand" "i")))
499 (plus:SWI (match_dup 0)
500 (match_operand:SWI 1 "const_int_operand" "i")))]
501 "(unsigned HOST_WIDE_INT) INTVAL (operands[1])
502 == -(unsigned HOST_WIDE_INT) INTVAL (operands[2])"
504 if (TARGET_USE_INCDEC)
506 if (operands[1] == const1_rtx)
507 return "lock{%;} inc{<imodesuffix>}\t%0";
508 if (operands[1] == constm1_rtx)
509 return "lock{%;} dec{<imodesuffix>}\t%0";
512 if (x86_maybe_negate_const_int (&operands[1], <MODE>mode))
513 return "lock{%;} sub{<imodesuffix>}\t{%1, %0|%0, %1}";
515 return "lock{%;} add{<imodesuffix>}\t{%1, %0|%0, %1}";
518 ;; Recall that xchg implicitly sets LOCK#, so adding it again wastes space.
519 ;; In addition, it is always a full barrier, so we can ignore the memory model.
520 (define_insn "atomic_exchange<mode>"
521 [(set (match_operand:SWI 0 "register_operand" "=<r>") ;; output
523 [(match_operand:SWI 1 "memory_operand" "+m") ;; memory
524 (match_operand:SI 3 "const_int_operand" "")] ;; model
527 (match_operand:SWI 2 "register_operand" "0"))] ;; input
529 "xchg{<imodesuffix>}\t{%1, %0|%0, %1}")
531 (define_insn "atomic_add<mode>"
532 [(set (match_operand:SWI 0 "memory_operand" "+m")
534 [(plus:SWI (match_dup 0)
535 (match_operand:SWI 1 "nonmemory_operand" "<r><i>"))
536 (match_operand:SI 2 "const_int_operand" "")] ;; model
538 (clobber (reg:CC FLAGS_REG))]
541 if (TARGET_USE_INCDEC)
543 if (operands[1] == const1_rtx)
544 return "lock{%;} inc{<imodesuffix>}\t%0";
545 if (operands[1] == constm1_rtx)
546 return "lock{%;} dec{<imodesuffix>}\t%0";
549 if (x86_maybe_negate_const_int (&operands[1], <MODE>mode))
550 return "lock{%;} sub{<imodesuffix>}\t{%1, %0|%0, %1}";
552 return "lock{%;} add{<imodesuffix>}\t{%1, %0|%0, %1}";
555 (define_insn "atomic_sub<mode>"
556 [(set (match_operand:SWI 0 "memory_operand" "+m")
558 [(minus:SWI (match_dup 0)
559 (match_operand:SWI 1 "nonmemory_operand" "<r><i>"))
560 (match_operand:SI 2 "const_int_operand" "")] ;; model
562 (clobber (reg:CC FLAGS_REG))]
565 if (TARGET_USE_INCDEC)
567 if (operands[1] == const1_rtx)
568 return "lock{%;} dec{<imodesuffix>}\t%0";
569 if (operands[1] == constm1_rtx)
570 return "lock{%;} inc{<imodesuffix>}\t%0";
573 if (x86_maybe_negate_const_int (&operands[1], <MODE>mode))
574 return "lock{%;} add{<imodesuffix>}\t{%1, %0|%0, %1}";
576 return "lock{%;} sub{<imodesuffix>}\t{%1, %0|%0, %1}";
579 (define_insn "atomic_<code><mode>"
580 [(set (match_operand:SWI 0 "memory_operand" "+m")
582 [(any_logic:SWI (match_dup 0)
583 (match_operand:SWI 1 "nonmemory_operand" "<r><i>"))
584 (match_operand:SI 2 "const_int_operand" "")] ;; model
586 (clobber (reg:CC FLAGS_REG))]
588 "lock{%;} <logic>{<imodesuffix>}\t{%1, %0|%0, %1}")