X-Git-Url: http://git.sourceforge.jp/view?a=blobdiff_plain;f=gcc%2Foptabs.c;h=0d5cd73c3a2417de85df24dfbd5dab7c6ffc5dd0;hb=7dc5d28ee6bb7c1e9a6748c8d454d91816a6364c;hp=7901b95f6321535ff9f0a25e80376f805ca84a1a;hpb=8808bf16125e1bea5cd2e969d19a53b9618593f1;p=pf3gnuchains%2Fgcc-fork.git

diff --git a/gcc/optabs.c b/gcc/optabs.c
index 7901b95f632..0d5cd73c3a2 100644
--- a/gcc/optabs.c
+++ b/gcc/optabs.c
@@ -2052,11 +2052,11 @@ expand_binop (enum machine_mode mode, optab binoptab, rtx op0, rtx op1,
 	    {
 	      rtx temp = emit_move_insn (target, xtarget);
 
-	      set_unique_reg_note (temp,
-				   REG_EQUAL,
-				   gen_rtx_fmt_ee (binoptab->code, mode,
-						   copy_rtx (xop0),
-						   copy_rtx (xop1)));
+	      set_dst_reg_note (temp, REG_EQUAL,
+				gen_rtx_fmt_ee (binoptab->code, mode,
+						copy_rtx (xop0),
+						copy_rtx (xop1)),
+				target);
 	    }
 	  else
 	    target = xtarget;
@@ -2104,11 +2104,12 @@ expand_binop (enum machine_mode mode, optab binoptab, rtx op0, rtx op1,
 	  if (optab_handler (mov_optab, mode) != CODE_FOR_nothing)
 	    {
 	      temp = emit_move_insn (target ? target : product, product);
-	      set_unique_reg_note (temp,
-				   REG_EQUAL,
-				   gen_rtx_fmt_ee (MULT, mode,
-						   copy_rtx (op0),
-						   copy_rtx (op1)));
+	      set_dst_reg_note (temp,
+				REG_EQUAL,
+				gen_rtx_fmt_ee (MULT, mode,
+						copy_rtx (op0),
+						copy_rtx (op1)),
+				target ? target : product);
 	    }
 	  return product;
 	}
@@ -2966,8 +2967,9 @@ expand_absneg_bit (enum rtx_code code, enum machine_mode mode,
 		           gen_lowpart (imode, target), 1, OPTAB_LIB_WIDEN);
       target = lowpart_subreg_maybe_copy (mode, temp, imode);
 
-      set_unique_reg_note (get_last_insn (), REG_EQUAL,
-			   gen_rtx_fmt_e (code, mode, copy_rtx (op0)));
+      set_dst_reg_note (get_last_insn (), REG_EQUAL,
+			gen_rtx_fmt_e (code, mode, copy_rtx (op0)),
+			target);
     }
 
   return target;
@@ -3899,8 +3901,7 @@ emit_libcall_block (rtx insns, rtx target, rtx result, rtx equiv)
     }
 
   last = emit_move_insn (target, result);
-  if (optab_handler (mov_optab, GET_MODE (target)) != CODE_FOR_nothing)
-    set_unique_reg_note (last, REG_EQUAL, copy_rtx (equiv));
+  set_dst_reg_note (last, REG_EQUAL, copy_rtx (equiv), target);
 
   if (final_dest != target)
     emit_move_insn (final_dest, target);
@@ -5213,11 +5214,10 @@ expand_fix (rtx to, rtx from, int unsignedp)
 	    {
 	      /* Make a place for a REG_NOTE and add it.  */
 	      insn = emit_move_insn (to, to);
-	      set_unique_reg_note (insn,
-	                           REG_EQUAL,
-				   gen_rtx_fmt_e (UNSIGNED_FIX,
-						  GET_MODE (to),
-						  copy_rtx (from)));
+	      set_dst_reg_note (insn, REG_EQUAL,
+				gen_rtx_fmt_e (UNSIGNED_FIX, GET_MODE (to),
+					       copy_rtx (from)),
+				to);
 	    }
 
 	  return;
@@ -6586,6 +6586,57 @@ init_optabs (void)
   targetm.init_libfuncs ();
 }
 
+/* A helper function for init_sync_libfuncs.  Using the basename BASE,
+   install libfuncs into TAB for BASE_N for 1 <= N <= MAX.  */
+
+static void
+init_sync_libfuncs_1 (optab tab, const char *base, int max)
+{
+  enum machine_mode mode;
+  char buf[64];
+  size_t len = strlen (base);
+  int i;
+
+  gcc_assert (max <= 8);
+  gcc_assert (len + 3 < sizeof (buf));
+
+  memcpy (buf, base, len);
+  buf[len] = '_';
+  buf[len + 1] = '0';
+  buf[len + 2] = '\0';
+
+  mode = QImode;
+  for (i = 1; i <= max; i *= 2)
+    {
+      buf[len + 1] = '0' + i;
+      set_optab_libfunc (tab, mode, buf);
+      mode = GET_MODE_2XWIDER_MODE (mode);
+    }
+}
+
+void
+init_sync_libfuncs (int max)
+{
+  init_sync_libfuncs_1 (sync_compare_and_swap_optab,
+			"__sync_val_compare_and_swap", max);
+  init_sync_libfuncs_1 (sync_lock_test_and_set_optab,
+			"__sync_lock_test_and_set", max);
+
+  init_sync_libfuncs_1 (sync_old_add_optab, "__sync_fetch_and_add", max);
+  init_sync_libfuncs_1 (sync_old_sub_optab, "__sync_fetch_and_sub", max);
+  init_sync_libfuncs_1 (sync_old_ior_optab, "__sync_fetch_and_or", max);
+  init_sync_libfuncs_1 (sync_old_and_optab, "__sync_fetch_and_and", max);
+  init_sync_libfuncs_1 (sync_old_xor_optab, "__sync_fetch_and_xor", max);
+  init_sync_libfuncs_1 (sync_old_nand_optab, "__sync_fetch_and_nand", max);
+
+  init_sync_libfuncs_1 (sync_new_add_optab, "__sync_add_and_fetch", max);
+  init_sync_libfuncs_1 (sync_new_sub_optab, "__sync_sub_and_fetch", max);
+  init_sync_libfuncs_1 (sync_new_ior_optab, "__sync_or_and_fetch", max);
+  init_sync_libfuncs_1 (sync_new_and_optab, "__sync_and_and_fetch", max);
+  init_sync_libfuncs_1 (sync_new_xor_optab, "__sync_xor_and_fetch", max);
+  init_sync_libfuncs_1 (sync_new_nand_optab, "__sync_nand_and_fetch", max);
+}
+
 /* Print information about the current contents of the optabs on
    STDERR.  */
 
@@ -6881,9 +6932,9 @@ can_vec_perm_for_code_p (enum tree_code code, enum machine_mode mode,
 	  break;
 
 	case VEC_INTERLEAVE_HIGH_EXPR:
-	  alt = nelt / 2;
-	  /* FALLTHRU */
 	case VEC_INTERLEAVE_LOW_EXPR:
+	  if ((BYTES_BIG_ENDIAN != 0) ^ (code == VEC_INTERLEAVE_HIGH_EXPR))
+	    alt = nelt / 2;
 	  for (i = 0; i < nelt / 2; ++i)
 	    {
 	      data[i * 2] = i + alt;
@@ -6987,7 +7038,8 @@ expand_vec_perm (enum machine_mode mode, rtx v0, rtx v1, rtx sel, rtx target)
     }
 
   /* If the input is a constant, expand it specially.  */
-  if (CONSTANT_P (sel))
+  gcc_assert (GET_MODE_CLASS (GET_MODE (sel)) == MODE_VECTOR_INT);
+  if (GET_CODE (sel) == CONST_VECTOR)
     {
       icode = direct_optab_handler (vec_perm_const_optab, mode);
       if (icode != CODE_FOR_nothing)
@@ -7005,7 +7057,7 @@ expand_vec_perm (enum machine_mode mode, rtx v0, rtx v1, rtx sel, rtx target)
 	    {
 	      unsigned int j, this_e;
 
-	      this_e = INTVAL (XVECEXP (sel, 0, i));
+	      this_e = INTVAL (CONST_VECTOR_ELT (sel, i));
 	      this_e &= 2 * e - 1;
 	      this_e *= u;
 
@@ -7165,24 +7217,46 @@ expand_vec_cond_expr (tree vec_cond_type, tree op0, tree op1, tree op2,
 /* Return true if there is a compare_and_swap pattern.  */
 
 bool
-can_compare_and_swap_p (enum machine_mode mode)
+can_compare_and_swap_p (enum machine_mode mode, bool allow_libcall)
 {
   enum insn_code icode;
 
-  /* Check for __sync_compare_and_swap.  */
-  icode = direct_optab_handler (sync_compare_and_swap_optab, mode);
-  if (icode != CODE_FOR_nothing)
-      return true;
-
   /* Check for __atomic_compare_and_swap.  */
   icode = direct_optab_handler (atomic_compare_and_swap_optab, mode);
   if (icode != CODE_FOR_nothing)
-      return true;
+    return true;
+
+  /* Check for __sync_compare_and_swap.  */
+  icode = optab_handler (sync_compare_and_swap_optab, mode);
+  if (icode != CODE_FOR_nothing)
+    return true;
+  if (allow_libcall && optab_libfunc (sync_compare_and_swap_optab, mode))
+    return true;
 
   /* No inline compare and swap.  */
   return false;
 }
 
+/* Return true if an atomic exchange can be performed.  */
+
+bool
+can_atomic_exchange_p (enum machine_mode mode, bool allow_libcall)
+{
+  enum insn_code icode;
+
+  /* Check for __atomic_exchange.  */
+  icode = direct_optab_handler (atomic_exchange_optab, mode);
+  if (icode != CODE_FOR_nothing)
+    return true;
+
+  /* Don't check __sync_test_and_set, as on some platforms that
+     has reduced functionality.  Targets that really do support
+     a proper exchange should simply be updated to the __atomics.  */
+
+  return can_compare_and_swap_p (mode, allow_libcall);
+}
+
+
 /* Helper function to find the MODE_CC set in a sync_compare_and_swap
    pattern.  */
 
@@ -7252,21 +7326,15 @@ expand_compare_and_swap_loop (rtx mem, rtx old_reg, rtx new_reg, rtx seq)
 }
 
 
-/* This function expands the atomic exchange operation:
-   atomically store VAL in MEM and return the previous value in MEM.
-
-   MEMMODEL is the memory model variant to use.
-   TARGET is an optional place to stick the return value.  
-   USE_TEST_AND_SET indicates whether __sync_lock_test_and_set should be used
-   as a fall back if the atomic_exchange pattern does not exist.  */
-
-rtx
-expand_atomic_exchange (rtx target, rtx mem, rtx val, enum memmodel model,
-			bool use_test_and_set)			
+/* This function tries to emit an atomic_exchange intruction.  VAL is written
+   to *MEM using memory model MODEL. The previous contents of *MEM are returned,
+   using TARGET if possible.  */
+   
+static rtx
+maybe_emit_atomic_exchange (rtx target, rtx mem, rtx val, enum memmodel model)
 {
   enum machine_mode mode = GET_MODE (mem);
   enum insn_code icode;
-  rtx last_insn;
 
   /* If the target supports the exchange directly, great.  */
   icode = direct_optab_handler (atomic_exchange_optab, mode);
@@ -7283,46 +7351,79 @@ expand_atomic_exchange (rtx target, rtx mem, rtx val, enum memmodel model,
 	return ops[0].value;
     }
 
-  /* Legacy sync_lock_test_and_set works the same, but is only defined as an 
-     acquire barrier.  If the pattern exists, and the memory model is stronger
-     than acquire, add a release barrier before the instruction.
-     The barrier is not needed if sync_lock_test_and_set doesn't exist since
-     it will expand into a compare-and-swap loop.
+  return NULL_RTX;
+}
+
+/* This function tries to implement an atomic exchange operation using
+   __sync_lock_test_and_set. VAL is written to *MEM using memory model MODEL.
+   The previous contents of *MEM are returned, using TARGET if possible.
+   Since this instructionn is an acquire barrier only, stronger memory
+   models may require additional barriers to be emitted.  */
+
+static rtx
+maybe_emit_sync_lock_test_and_set (rtx target, rtx mem, rtx val,
+				   enum memmodel model)
+{
+  enum machine_mode mode = GET_MODE (mem);
+  enum insn_code icode;
+  rtx last_insn = get_last_insn ();
 
-     Some targets have non-compliant test_and_sets, so it would be incorrect
-     to emit a test_and_set in place of an __atomic_exchange.  The test_and_set
-     builtin shares this expander since exchange can always replace the
-     test_and_set.  */
+  icode = optab_handler (sync_lock_test_and_set_optab, mode);
 
-  if (use_test_and_set)
+  /* Legacy sync_lock_test_and_set is an acquire barrier.  If the pattern
+     exists, and the memory model is stronger than acquire, add a release 
+     barrier before the instruction.  */
+
+  if (model == MEMMODEL_SEQ_CST
+      || model == MEMMODEL_RELEASE
+      || model == MEMMODEL_ACQ_REL)
+    expand_mem_thread_fence (model);
+
+  if (icode != CODE_FOR_nothing)
     {
-      icode = direct_optab_handler (sync_lock_test_and_set_optab, mode);
-      last_insn = get_last_insn ();
-      if ((icode != CODE_FOR_nothing) && (model == MEMMODEL_SEQ_CST || 
-					  model == MEMMODEL_RELEASE ||
-					  model == MEMMODEL_ACQ_REL))
-	expand_builtin_mem_thread_fence (model);
+      struct expand_operand ops[3];
+      create_output_operand (&ops[0], target, mode);
+      create_fixed_operand (&ops[1], mem);
+      /* VAL may have been promoted to a wider mode.  Shrink it if so.  */
+      create_convert_operand_to (&ops[2], val, mode, true);
+      if (maybe_expand_insn (icode, 3, ops))
+	return ops[0].value;
+    }
 
-      if (icode != CODE_FOR_nothing)
+  /* If an external test-and-set libcall is provided, use that instead of
+     any external compare-and-swap that we might get from the compare-and-
+     swap-loop expansion later.  */
+  if (!can_compare_and_swap_p (mode, false))
+    {
+      rtx libfunc = optab_libfunc (sync_lock_test_and_set_optab, mode);
+      if (libfunc != NULL)
 	{
-	  struct expand_operand ops[3];
-
-	  create_output_operand (&ops[0], target, mode);
-	  create_fixed_operand (&ops[1], mem);
-	  /* VAL may have been promoted to a wider mode.  Shrink it if so.  */
-	  create_convert_operand_to (&ops[2], val, mode, true);
-	  if (maybe_expand_insn (icode, 3, ops))
-	    return ops[0].value;
-	}
+	  rtx addr;
 
-      /* Remove any fence that was inserted since a compare and swap loop is
-	 already a full memory barrier.  */
-      if (last_insn != get_last_insn ())
-	delete_insns_since (last_insn);
+	  addr = convert_memory_address (ptr_mode, XEXP (mem, 0));
+	  return emit_library_call_value (libfunc, NULL_RTX, LCT_NORMAL,
+					  mode, 2, addr, ptr_mode,
+					  val, mode);
+	}
     }
 
-  /* Otherwise, use a compare-and-swap loop for the exchange.  */
-  if (can_compare_and_swap_p (mode))
+  /* If the test_and_set can't be emitted, eliminate any barrier that might
+     have been emitted.  */
+  delete_insns_since (last_insn);
+  return NULL_RTX;
+}
+
+/* This function tries to implement an atomic exchange operation using a 
+   compare_and_swap loop. VAL is written to *MEM.  The previous contents of
+   *MEM are returned, using TARGET if possible.  No memory model is required
+   since a compare_and_swap loop is seq-cst.  */
+
+static rtx 
+maybe_emit_compare_and_swap_exchange_loop (rtx target, rtx mem, rtx val)
+{
+  enum machine_mode mode = GET_MODE (mem);
+
+  if (can_compare_and_swap_p (mode, true))
     {
       if (!target || !register_operand (target, mode))
 	target = gen_reg_rtx (mode);
@@ -7335,6 +7436,105 @@ expand_atomic_exchange (rtx target, rtx mem, rtx val, enum memmodel model,
   return NULL_RTX;
 }
 
+#ifndef HAVE_atomic_test_and_set
+#define HAVE_atomic_test_and_set 0
+#define gen_atomic_test_and_set(x,y,z)  (gcc_unreachable (), NULL_RTX)
+#endif
+
+/* This function expands the legacy _sync_lock test_and_set operation which is
+   generally an atomic exchange.  Some limited targets only allow the
+   constant 1 to be stored.  This is an ACQUIRE operation. 
+
+   TARGET is an optional place to stick the return value.  
+   MEM is where VAL is stored.  */
+
+rtx
+expand_sync_lock_test_and_set (rtx target, rtx mem, rtx val)
+{
+  rtx ret;
+
+  /* Try an atomic_exchange first.  */
+  ret = maybe_emit_atomic_exchange (target, mem, val, MEMMODEL_ACQUIRE);
+
+  if (!ret)
+    ret = maybe_emit_sync_lock_test_and_set (target, mem, val,
+					     MEMMODEL_ACQUIRE);
+  if (!ret)
+    ret = maybe_emit_compare_and_swap_exchange_loop (target, mem, val);
+
+  /* If there are no other options, try atomic_test_and_set if the value
+     being stored is 1.  */
+  if (!ret && val == const1_rtx && HAVE_atomic_test_and_set)
+    {
+      ret = gen_atomic_test_and_set (target, mem, GEN_INT (MEMMODEL_ACQUIRE));
+      emit_insn (ret);
+    }
+
+  return ret;
+}
+
+/* This function expands the atomic test_and_set operation:
+   atomically store a boolean TRUE into MEM and return the previous value.
+
+   MEMMODEL is the memory model variant to use.
+   TARGET is an optional place to stick the return value.  */
+
+rtx
+expand_atomic_test_and_set (rtx target, rtx mem, enum memmodel model)
+{
+  enum machine_mode mode = GET_MODE (mem);
+  rtx ret = NULL_RTX;
+
+  if (target == NULL_RTX)
+    target = gen_reg_rtx (mode);
+
+  if (HAVE_atomic_test_and_set)
+    {
+      ret = gen_atomic_test_and_set (target, mem, GEN_INT (MEMMODEL_ACQUIRE));
+      emit_insn (ret);
+      return ret;
+    }
+
+  /* If there is no test and set, try exchange, then a compare_and_swap loop,
+     then __sync_test_and_set.  */
+  ret = maybe_emit_atomic_exchange (target, mem, const1_rtx, model);
+
+  if (!ret)
+    ret = maybe_emit_compare_and_swap_exchange_loop (target, mem, const1_rtx);
+
+  if (!ret)
+    ret = maybe_emit_sync_lock_test_and_set (target, mem, const1_rtx, model);
+
+  if (ret)
+    return ret;
+
+  /* Failing all else, assume a single threaded environment and simply perform
+     the operation.  */
+  emit_move_insn (target, mem);
+  emit_move_insn (mem, const1_rtx);
+  return target;
+}
+
+/* This function expands the atomic exchange operation:
+   atomically store VAL in MEM and return the previous value in MEM.
+
+   MEMMODEL is the memory model variant to use.
+   TARGET is an optional place to stick the return value.  */
+
+rtx
+expand_atomic_exchange (rtx target, rtx mem, rtx val, enum memmodel model)
+{
+  rtx ret;
+
+  ret = maybe_emit_atomic_exchange (target, mem, val, model);
+
+  /* Next try a compare-and-swap loop for the exchange.  */
+  if (!ret)
+    ret = maybe_emit_compare_and_swap_exchange_loop (target, mem, val);
+
+  return ret;
+}
+
 /* This function expands the atomic compare exchange operation:
 
    *PTARGET_BOOL is an optional place to store the boolean success/failure.
@@ -7356,7 +7556,8 @@ expand_atomic_compare_and_swap (rtx *ptarget_bool, rtx *ptarget_oval,
   enum machine_mode mode = GET_MODE (mem);
   struct expand_operand ops[8];
   enum insn_code icode;
-  rtx target_bool, target_oval;
+  rtx target_oval, target_bool = NULL_RTX;
+  rtx libfunc;
 
   /* Load expected into a register for the compare and swap.  */
   if (MEM_P (expected))
@@ -7400,7 +7601,7 @@ expand_atomic_compare_and_swap (rtx *ptarget_bool, rtx *ptarget_oval,
 
   /* Otherwise fall back to the original __sync_val_compare_and_swap
      which is always seq-cst.  */
-  icode = direct_optab_handler (sync_compare_and_swap_optab, mode);
+  icode = optab_handler (sync_compare_and_swap_optab, mode);
   if (icode != CODE_FOR_nothing)
     {
       rtx cc_reg;
@@ -7413,7 +7614,6 @@ expand_atomic_compare_and_swap (rtx *ptarget_bool, rtx *ptarget_oval,
 	return false;
 
       target_oval = ops[0].value;
-      target_bool = NULL_RTX;
 
       /* If the caller isn't interested in the boolean return value,
 	 skip the computation of it.  */
@@ -7424,17 +7624,37 @@ expand_atomic_compare_and_swap (rtx *ptarget_bool, rtx *ptarget_oval,
       cc_reg = NULL_RTX;
       if (have_insn_for (COMPARE, CCmode))
 	note_stores (PATTERN (get_last_insn ()), find_cc_set, &cc_reg);
+      if (cc_reg)
+	{
+	  target_bool = emit_store_flag_force (target_bool, EQ, cc_reg,
+					       const0_rtx, VOIDmode, 0, 1);
+	  goto success;
+	}
+      goto success_bool_from_val;
+    }
 
-      target_bool
-	= (cc_reg
-	   ? emit_store_flag_force (target_bool, EQ, cc_reg,
-				    const0_rtx, VOIDmode, 0, 1)
-	   : emit_store_flag_force (target_bool, EQ, target_oval,
-				    expected, VOIDmode, 1, 1));
-      goto success;
+  /* Also check for library support for __sync_val_compare_and_swap.  */
+  libfunc = optab_libfunc (sync_compare_and_swap_optab, mode);
+  if (libfunc != NULL)
+    {
+      rtx addr = convert_memory_address (ptr_mode, XEXP (mem, 0));
+      target_oval = emit_library_call_value (libfunc, NULL_RTX, LCT_NORMAL,
+					     mode, 3, addr, ptr_mode,
+					     expected, mode, desired, mode);
+
+      /* Compute the boolean return value only if requested.  */
+      if (ptarget_bool)
+	goto success_bool_from_val;
+      else
+	goto success;
     }
+
+  /* Failure.  */
   return false;
 
+ success_bool_from_val:
+   target_bool = emit_store_flag_force (target_bool, EQ, target_oval,
+					expected, VOIDmode, 1, 1);
  success:
   /* Make sure that the oval output winds up where the caller asked.  */
   if (ptarget_oval)
@@ -7444,6 +7664,76 @@ expand_atomic_compare_and_swap (rtx *ptarget_bool, rtx *ptarget_oval,
   return true;
 }
 
+/* Generate asm volatile("" : : : "memory") as the memory barrier.  */
+
+static void
+expand_asm_memory_barrier (void)
+{
+  rtx asm_op, clob;
+
+  asm_op = gen_rtx_ASM_OPERANDS (VOIDmode, empty_string, empty_string, 0,
+				 rtvec_alloc (0), rtvec_alloc (0),
+				 rtvec_alloc (0), UNKNOWN_LOCATION);
+  MEM_VOLATILE_P (asm_op) = 1;
+
+  clob = gen_rtx_SCRATCH (VOIDmode);
+  clob = gen_rtx_MEM (BLKmode, clob);
+  clob = gen_rtx_CLOBBER (VOIDmode, clob);
+
+  emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, asm_op, clob)));
+}
+
+/* This routine will either emit the mem_thread_fence pattern or issue a 
+   sync_synchronize to generate a fence for memory model MEMMODEL.  */
+
+#ifndef HAVE_mem_thread_fence
+# define HAVE_mem_thread_fence 0
+# define gen_mem_thread_fence(x) (gcc_unreachable (), NULL_RTX)
+#endif
+#ifndef HAVE_memory_barrier
+# define HAVE_memory_barrier 0
+# define gen_memory_barrier()  (gcc_unreachable (), NULL_RTX)
+#endif
+
+void
+expand_mem_thread_fence (enum memmodel model)
+{
+  if (HAVE_mem_thread_fence)
+    emit_insn (gen_mem_thread_fence (GEN_INT (model)));
+  else if (model != MEMMODEL_RELAXED)
+    {
+      if (HAVE_memory_barrier)
+	emit_insn (gen_memory_barrier ());
+      else if (synchronize_libfunc != NULL_RTX)
+	emit_library_call (synchronize_libfunc, LCT_NORMAL, VOIDmode, 0);
+      else
+	expand_asm_memory_barrier ();
+    }
+}
+
+/* This routine will either emit the mem_signal_fence pattern or issue a 
+   sync_synchronize to generate a fence for memory model MEMMODEL.  */
+
+#ifndef HAVE_mem_signal_fence
+# define HAVE_mem_signal_fence 0
+# define gen_mem_signal_fence(x) (gcc_unreachable (), NULL_RTX)
+#endif
+
+void
+expand_mem_signal_fence (enum memmodel model)
+{
+  if (HAVE_mem_signal_fence)
+    emit_insn (gen_mem_signal_fence (GEN_INT (model)));
+  else if (model != MEMMODEL_RELAXED)
+    {
+      /* By default targets are coherent between a thread and the signal
+	 handler running on the same thread.  Thus this really becomes a
+	 compiler barrier, in that stores must not be sunk past
+	 (or raised above) a given point.  */
+      expand_asm_memory_barrier ();
+    }
+}
+
 /* This function expands the atomic load operation:
    return the atomically loaded value in MEM.
 
@@ -7486,13 +7776,13 @@ expand_atomic_load (rtx target, rtx mem, enum memmodel model)
     target = gen_reg_rtx (mode);
 
   /* Emit the appropriate barrier before the load.  */
-  expand_builtin_mem_thread_fence (model);
+  expand_mem_thread_fence (model);
 
   emit_move_insn (target, mem);
 
   /* For SEQ_CST, also emit a barrier after the load.  */
   if (model == MEMMODEL_SEQ_CST)
-    expand_builtin_mem_thread_fence (model);
+    expand_mem_thread_fence (model);
 
   return target;
 }
@@ -7533,7 +7823,7 @@ expand_atomic_store (rtx mem, rtx val, enum memmodel model, bool use_release)
 	    {
 	      /* lock_release is only a release barrier.  */
 	      if (model == MEMMODEL_SEQ_CST)
-		expand_builtin_mem_thread_fence (model);
+		expand_mem_thread_fence (model);
 	      return const0_rtx;
 	    }
 	}
@@ -7544,7 +7834,9 @@ expand_atomic_store (rtx mem, rtx val, enum memmodel model, bool use_release)
      the result.  If that doesn't work, don't do anything.  */
   if (GET_MODE_PRECISION(mode) > BITS_PER_WORD)
     {
-      rtx target = expand_atomic_exchange (NULL_RTX, mem, val, model, false);
+      rtx target = maybe_emit_atomic_exchange (NULL_RTX, mem, val, model);
+      if (!target)
+        target = maybe_emit_compare_and_swap_exchange_loop (NULL_RTX, mem, val);
       if (target)
         return const0_rtx;
       else
@@ -7553,13 +7845,13 @@ expand_atomic_store (rtx mem, rtx val, enum memmodel model, bool use_release)
 
   /* If there is no mem_store, default to a move with barriers */
   if (model == MEMMODEL_SEQ_CST || model == MEMMODEL_RELEASE)
-    expand_builtin_mem_thread_fence (model);
+    expand_mem_thread_fence (model);
 
   emit_move_insn (mem, val);
 
   /* For SEQ_CST, also emit a barrier after the load.  */
   if (model == MEMMODEL_SEQ_CST)
-    expand_builtin_mem_thread_fence (model);
+    expand_mem_thread_fence (model);
 
   return const0_rtx;
 }
@@ -7570,57 +7862,123 @@ expand_atomic_store (rtx mem, rtx val, enum memmodel model, bool use_release)
 
 struct atomic_op_functions
 {
-  struct direct_optab_d *mem_fetch_before;
-  struct direct_optab_d *mem_fetch_after;
-  struct direct_optab_d *mem_no_result;
-  struct direct_optab_d *fetch_before;
-  struct direct_optab_d *fetch_after;
-  struct direct_optab_d *no_result;
+  direct_optab mem_fetch_before;
+  direct_optab mem_fetch_after;
+  direct_optab mem_no_result;
+  optab fetch_before;
+  optab fetch_after;
+  direct_optab no_result;
   enum rtx_code reverse_code;
 };
 
-static const struct atomic_op_functions *
-get_atomic_op_for_code (enum rtx_code code)
-{
-  static const struct atomic_op_functions add_op = {
-    atomic_fetch_add_optab, atomic_add_fetch_optab, atomic_add_optab,
-    sync_old_add_optab, sync_new_add_optab, sync_add_optab, MINUS
-  }, sub_op = {
-    atomic_fetch_sub_optab, atomic_sub_fetch_optab, atomic_sub_optab,
-    sync_old_sub_optab, sync_new_sub_optab, sync_sub_optab, PLUS
-  }, xor_op = {
-    atomic_fetch_xor_optab, atomic_xor_fetch_optab, atomic_xor_optab,
-    sync_old_xor_optab, sync_new_xor_optab, sync_xor_optab, XOR
-  }, and_op = {
-    atomic_fetch_and_optab, atomic_and_fetch_optab, atomic_and_optab,
-    sync_old_and_optab, sync_new_and_optab, sync_and_optab, UNKNOWN
-  }, nand_op = {
-    atomic_fetch_nand_optab, atomic_nand_fetch_optab, atomic_nand_optab,
-    sync_old_nand_optab, sync_new_nand_optab, sync_nand_optab, UNKNOWN
-  }, ior_op = {
-    atomic_fetch_or_optab, atomic_or_fetch_optab, atomic_or_optab,
-    sync_old_ior_optab, sync_new_ior_optab, sync_ior_optab, UNKNOWN
-  };
 
+/* Fill in structure pointed to by OP with the various optab entries for an 
+   operation of type CODE.  */
+
+static void
+get_atomic_op_for_code (struct atomic_op_functions *op, enum rtx_code code)
+{
+  gcc_assert (op!= NULL);
+
+  /* If SWITCHABLE_TARGET is defined, then subtargets can be switched
+     in the source code during compilation, and the optab entries are not
+     computable until runtime.  Fill in the values at runtime.  */
   switch (code)
     {
     case PLUS:
-      return &add_op;
+      op->mem_fetch_before = atomic_fetch_add_optab;
+      op->mem_fetch_after = atomic_add_fetch_optab;
+      op->mem_no_result = atomic_add_optab;
+      op->fetch_before = sync_old_add_optab;
+      op->fetch_after = sync_new_add_optab;
+      op->no_result = sync_add_optab;
+      op->reverse_code = MINUS;
+      break;
     case MINUS:
-      return &sub_op;
+      op->mem_fetch_before = atomic_fetch_sub_optab;
+      op->mem_fetch_after = atomic_sub_fetch_optab;
+      op->mem_no_result = atomic_sub_optab;
+      op->fetch_before = sync_old_sub_optab;
+      op->fetch_after = sync_new_sub_optab;
+      op->no_result = sync_sub_optab;
+      op->reverse_code = PLUS;
+      break;
     case XOR:
-      return &xor_op;
+      op->mem_fetch_before = atomic_fetch_xor_optab;
+      op->mem_fetch_after = atomic_xor_fetch_optab;
+      op->mem_no_result = atomic_xor_optab;
+      op->fetch_before = sync_old_xor_optab;
+      op->fetch_after = sync_new_xor_optab;
+      op->no_result = sync_xor_optab;
+      op->reverse_code = XOR;
+      break;
     case AND:
-      return &and_op;
+      op->mem_fetch_before = atomic_fetch_and_optab;
+      op->mem_fetch_after = atomic_and_fetch_optab;
+      op->mem_no_result = atomic_and_optab;
+      op->fetch_before = sync_old_and_optab;
+      op->fetch_after = sync_new_and_optab;
+      op->no_result = sync_and_optab;
+      op->reverse_code = UNKNOWN;
+      break;
     case IOR:
-      return &ior_op;
+      op->mem_fetch_before = atomic_fetch_or_optab;
+      op->mem_fetch_after = atomic_or_fetch_optab;
+      op->mem_no_result = atomic_or_optab;
+      op->fetch_before = sync_old_ior_optab;
+      op->fetch_after = sync_new_ior_optab;
+      op->no_result = sync_ior_optab;
+      op->reverse_code = UNKNOWN;
+      break;
     case NOT:
-      return &nand_op;
+      op->mem_fetch_before = atomic_fetch_nand_optab;
+      op->mem_fetch_after = atomic_nand_fetch_optab;
+      op->mem_no_result = atomic_nand_optab;
+      op->fetch_before = sync_old_nand_optab;
+      op->fetch_after = sync_new_nand_optab;
+      op->no_result = sync_nand_optab;
+      op->reverse_code = UNKNOWN;
+      break;
     default:
       gcc_unreachable ();
     }
 }
 
+/* See if there is a more optimal way to implement the operation "*MEM CODE VAL"
+   using memory order MODEL.  If AFTER is true the operation needs to return
+   the value of *MEM after the operation, otherwise the previous value.  
+   TARGET is an optional place to place the result.  The result is unused if
+   it is const0_rtx.
+   Return the result if there is a better sequence, otherwise NULL_RTX.  */
+
+static rtx
+maybe_optimize_fetch_op (rtx target, rtx mem, rtx val, enum rtx_code code,
+			 enum memmodel model, bool after)
+{
+  /* If the value is prefetched, or not used, it may be possible to replace
+     the sequence with a native exchange operation.  */
+  if (!after || target == const0_rtx)
+    {
+      /* fetch_and (&x, 0, m) can be replaced with exchange (&x, 0, m).  */
+      if (code == AND && val == const0_rtx)
+        {
+	  if (target == const0_rtx)
+	    target = gen_reg_rtx (GET_MODE (mem));
+	  return maybe_emit_atomic_exchange (target, mem, val, model);
+	}
+
+      /* fetch_or (&x, -1, m) can be replaced with exchange (&x, -1, m).  */
+      if (code == IOR && val == constm1_rtx)
+        {
+	  if (target == const0_rtx)
+	    target = gen_reg_rtx (GET_MODE (mem));
+	  return maybe_emit_atomic_exchange (target, mem, val, model);
+	}
+    }
+
+  return NULL_RTX;
+}
+
 /* Try to emit an instruction for a specific operation varaition. 
    OPTAB contains the OP functions.
    TARGET is an optional place to return the result. const0_rtx means unused.
@@ -7635,7 +7993,6 @@ maybe_emit_op (const struct atomic_op_functions *optab, rtx target, rtx mem,
 	       rtx val, bool use_memmodel, enum memmodel model, bool after)
 {
   enum machine_mode mode = GET_MODE (mem);
-  struct direct_optab_d *this_optab;
   struct expand_operand ops[4];
   enum insn_code icode;
   int op_counter = 0;
@@ -7646,13 +8003,13 @@ maybe_emit_op (const struct atomic_op_functions *optab, rtx target, rtx mem,
     {
       if (use_memmodel)
         {
-	  this_optab = optab->mem_no_result;
+	  icode = direct_optab_handler (optab->mem_no_result, mode);
 	  create_integer_operand (&ops[2], model);
 	  num_ops = 3;
 	}
       else
         {
-	  this_optab = optab->no_result;
+	  icode = direct_optab_handler (optab->no_result, mode);
 	  num_ops = 2;
 	}
     }
@@ -7661,19 +8018,19 @@ maybe_emit_op (const struct atomic_op_functions *optab, rtx target, rtx mem,
     {
       if (use_memmodel)
         {
-	  this_optab = after ? optab->mem_fetch_after : optab->mem_fetch_before;
+	  icode = direct_optab_handler (after ? optab->mem_fetch_after
+					: optab->mem_fetch_before, mode);
 	  create_integer_operand (&ops[3], model);
-	  num_ops= 4;
+	  num_ops = 4;
 	}
       else
 	{
-	  this_optab = after ? optab->fetch_after : optab->fetch_before;
+	  icode = optab_handler (after ? optab->fetch_after
+				 : optab->fetch_before, mode);
 	  num_ops = 3;
 	}
       create_output_operand (&ops[op_counter++], target, mode);
     }
-
-  icode = direct_optab_handler (this_optab, mode);
   if (icode == CODE_FOR_nothing)
     return NULL_RTX;
 
@@ -7682,7 +8039,7 @@ maybe_emit_op (const struct atomic_op_functions *optab, rtx target, rtx mem,
   create_convert_operand_to (&ops[op_counter++], val, mode, true);
 
   if (maybe_expand_insn (icode, num_ops, ops))
-    return ((target == const0_rtx) ? const0_rtx : ops[0].value);
+    return (target == const0_rtx ? const0_rtx : ops[0].value);
 
   return NULL_RTX;
 } 
@@ -7701,22 +8058,27 @@ expand_atomic_fetch_op (rtx target, rtx mem, rtx val, enum rtx_code code,
 			enum memmodel model, bool after)
 {
   enum machine_mode mode = GET_MODE (mem);
-  const struct atomic_op_functions *optab;
+  struct atomic_op_functions optab;
   rtx result;
   bool unused_result = (target == const0_rtx);
 
-  optab = get_atomic_op_for_code (code);
+  get_atomic_op_for_code (&optab, code);
+
+  /* Check to see if there are any better instructions.  */
+  result = maybe_optimize_fetch_op (target, mem, val, code, model, after);
+  if (result)
+    return result;
 
   /* Check for the case where the result isn't used and try those patterns.  */
   if (unused_result)
     {
       /* Try the memory model variant first.  */
-      result = maybe_emit_op (optab, target, mem, val, true, model, true);
+      result = maybe_emit_op (&optab, target, mem, val, true, model, true);
       if (result)
         return result;
 
       /* Next try the old style withuot a memory model.  */
-      result = maybe_emit_op (optab, target, mem, val, false, model, true);
+      result = maybe_emit_op (&optab, target, mem, val, false, model, true);
       if (result)
         return result;
 
@@ -7725,42 +8087,79 @@ expand_atomic_fetch_op (rtx target, rtx mem, rtx val, enum rtx_code code,
     }
 
   /* Try the __atomic version.  */
-  result = maybe_emit_op (optab, target, mem, val, true, model, after);
+  result = maybe_emit_op (&optab, target, mem, val, true, model, after);
   if (result)
     return result;
 
   /* Try the older __sync version.  */
-  result = maybe_emit_op (optab, target, mem, val, false, model, after);
+  result = maybe_emit_op (&optab, target, mem, val, false, model, after);
   if (result)
     return result;
 
   /* If the fetch value can be calculated from the other variation of fetch,
      try that operation.  */
-  if (after || optab->reverse_code != UNKNOWN || target == const0_rtx) 
+  if (after || unused_result || optab.reverse_code != UNKNOWN)
     {
       /* Try the __atomic version, then the older __sync version.  */
-      result = maybe_emit_op (optab, target, mem, val, true, model, !after);
+      result = maybe_emit_op (&optab, target, mem, val, true, model, !after);
       if (!result)
-	result = maybe_emit_op (optab, target, mem, val, false, model, !after);
+	result = maybe_emit_op (&optab, target, mem, val, false, model, !after);
 
       if (result)
 	{
 	  /* If the result isn't used, no need to do compensation code.  */
 	  if (unused_result)
-	    return target;
+	    return result;
 
 	  /* Issue compensation code.  Fetch_after  == fetch_before OP val.
 	     Fetch_before == after REVERSE_OP val.  */
 	  if (!after)
-	    code = optab->reverse_code;
-	  result = expand_simple_binop (mode, code, result, val, NULL_RTX, true,
-					OPTAB_LIB_WIDEN);
+	    code = optab.reverse_code;
+	  if (code == NOT)
+	    {
+	      result = expand_simple_binop (mode, AND, result, val, NULL_RTX,
+					    true, OPTAB_LIB_WIDEN);
+	      result = expand_simple_unop (mode, NOT, result, target, true);
+	    }
+	  else
+	    result = expand_simple_binop (mode, code, result, val, target,
+					  true, OPTAB_LIB_WIDEN);
+	  return result;
+	}
+    }
+
+  /* Try the __sync libcalls only if we can't do compare-and-swap inline.  */
+  if (!can_compare_and_swap_p (mode, false))
+    {
+      rtx libfunc;
+      bool fixup = false;
+
+      libfunc = optab_libfunc (after ? optab.fetch_after
+			       : optab.fetch_before, mode);
+      if (libfunc == NULL
+	  && (after || unused_result || optab.reverse_code != UNKNOWN))
+	{
+	  fixup = true;
+	  if (!after)
+	    code = optab.reverse_code;
+	  libfunc = optab_libfunc (after ? optab.fetch_before
+				   : optab.fetch_after, mode);
+	}
+      if (libfunc != NULL)
+	{
+	  rtx addr = convert_memory_address (ptr_mode, XEXP (mem, 0));
+	  result = emit_library_call_value (libfunc, NULL, LCT_NORMAL, mode,
+					    2, addr, ptr_mode, val, mode);
+
+	  if (!unused_result && fixup)
+	    result = expand_simple_binop (mode, code, result, val, target,
+					  true, OPTAB_LIB_WIDEN);
 	  return result;
 	}
     }
 
   /* If nothing else has succeeded, default to a compare and swap loop.  */
-  if (can_compare_and_swap_p (mode))
+  if (can_compare_and_swap_p (mode, true))
     {
       rtx insn;
       rtx t0 = gen_reg_rtx (mode), t1;
@@ -7843,24 +8242,31 @@ maybe_legitimize_operand_same_code (enum insn_code icode, unsigned int opno,
     return true;
 
   /* If the operand is a memory whose address has no side effects,
-     try forcing the address into a register.  The check for side
-     effects is important because force_reg cannot handle things
-     like auto-modified addresses.  */
-  if (insn_data[(int) icode].operand[opno].allows_mem
-      && MEM_P (op->value)
-      && !side_effects_p (XEXP (op->value, 0)))
-    {
-      rtx addr, mem, last;
-
-      last = get_last_insn ();
-      addr = force_reg (Pmode, XEXP (op->value, 0));
-      mem = replace_equiv_address (op->value, addr);
-      if (insn_operand_matches (icode, opno, mem))
+     try forcing the address into a non-virtual pseudo register.
+     The check for side effects is important because copy_to_mode_reg
+     cannot handle things like auto-modified addresses.  */
+  if (insn_data[(int) icode].operand[opno].allows_mem && MEM_P (op->value))
+    {
+      rtx addr, mem;
+
+      mem = op->value;
+      addr = XEXP (mem, 0);
+      if (!(REG_P (addr) && REGNO (addr) > LAST_VIRTUAL_REGISTER)
+	  && !side_effects_p (addr))
 	{
-	  op->value = mem;
-	  return true;
+	  rtx last;
+	  enum machine_mode mode;
+
+	  last = get_last_insn ();
+	  mode = targetm.addr_space.address_mode (MEM_ADDR_SPACE (mem));
+	  mem = replace_equiv_address (mem, copy_to_mode_reg (mode, addr));
+	  if (insn_operand_matches (icode, opno, mem))
+	    {
+	      op->value = mem;
+	      return true;
+	    }
+	  delete_insns_since (last);
 	}
-      delete_insns_since (last);
     }
 
   return false;