+/* Copy back a local mark stack. */
+/* low and high are inclusive bounds. */
+void GC_return_mark_stack(mse * low, mse * high)
+{
+ mse * my_top;
+ mse * my_start;
+ size_t stack_size;
+
+ if (high < low) return;
+ stack_size = high - low + 1;
+ GC_acquire_mark_lock();
+ my_top = GC_mark_stack_top;
+ my_start = my_top + 1;
+ if (my_start - GC_mark_stack + stack_size > GC_mark_stack_size) {
+# ifdef CONDPRINT
+ if (GC_print_stats) {
+ GC_printf0("No room to copy back mark stack.");
+ }
+# endif
+ GC_mark_state = MS_INVALID;
+ GC_mark_stack_too_small = TRUE;
+ /* We drop the local mark stack. We'll fix things later. */
+ } else {
+ BCOPY(low, my_start, stack_size * sizeof(mse));
+ GC_ASSERT(GC_mark_stack_top = my_top);
+# if !defined(IA64) && !defined(HP_PA)
+ GC_memory_barrier();
+# endif
+ /* On IA64, the volatile write acts as a release barrier. */
+ GC_mark_stack_top = my_top + stack_size;
+ }
+ GC_release_mark_lock();
+ GC_notify_all_marker();
+}
+
+/* Mark from the local mark stack. */
+/* On return, the local mark stack is empty. */
+/* But this may be achieved by copying the */
+/* local mark stack back into the global one. */
+void GC_do_local_mark(mse *local_mark_stack, mse *local_top)
+{
+ unsigned n;
+# define N_LOCAL_ITERS 1
+
+# ifdef GC_ASSERTIONS
+ /* Make sure we don't hold mark lock. */
+ GC_acquire_mark_lock();
+ GC_release_mark_lock();
+# endif
+ for (;;) {
+ for (n = 0; n < N_LOCAL_ITERS; ++n) {
+ local_top = GC_mark_from(local_top, local_mark_stack,
+ local_mark_stack + LOCAL_MARK_STACK_SIZE);
+ if (local_top < local_mark_stack) return;
+ if (local_top - local_mark_stack >= LOCAL_MARK_STACK_SIZE/2) {
+ GC_return_mark_stack(local_mark_stack, local_top);
+ return;
+ }
+ }
+ if (GC_mark_stack_top < GC_first_nonempty &&
+ GC_active_count < GC_helper_count
+ && local_top > local_mark_stack + 1) {
+ /* Try to share the load, since the main stack is empty, */
+ /* and helper threads are waiting for a refill. */
+ /* The entries near the bottom of the stack are likely */
+ /* to require more work. Thus we return those, eventhough */
+ /* it's harder. */
+ mse * p;
+ mse * new_bottom = local_mark_stack
+ + (local_top - local_mark_stack)/2;
+ GC_ASSERT(new_bottom > local_mark_stack
+ && new_bottom < local_top);
+ GC_return_mark_stack(local_mark_stack, new_bottom - 1);
+ memmove(local_mark_stack, new_bottom,
+ (local_top - new_bottom + 1) * sizeof(mse));
+ local_top -= (new_bottom - local_mark_stack);
+ }
+ }
+}
+
+#define ENTRIES_TO_GET 5
+
+long GC_markers = 2; /* Normally changed by thread-library- */
+ /* -specific code. */
+
+/* Mark using the local mark stack until the global mark stack is empty */
+/* and there are no active workers. Update GC_first_nonempty to reflect */
+/* progress. */
+/* Caller does not hold mark lock. */
+/* Caller has already incremented GC_helper_count. We decrement it, */
+/* and maintain GC_active_count. */
+void GC_mark_local(mse *local_mark_stack, int id)
+{
+ mse * my_first_nonempty;
+
+ GC_acquire_mark_lock();
+ GC_active_count++;
+ my_first_nonempty = GC_first_nonempty;
+ GC_ASSERT(GC_first_nonempty >= GC_mark_stack &&
+ GC_first_nonempty <= GC_mark_stack_top + 1);
+# ifdef PRINTSTATS
+ GC_printf1("Starting mark helper %lu\n", (unsigned long)id);
+# endif
+ GC_release_mark_lock();
+ for (;;) {
+ size_t n_on_stack;
+ size_t n_to_get;
+ mse *next;
+ mse * my_top;
+ mse * local_top;
+ mse * global_first_nonempty = GC_first_nonempty;
+
+ GC_ASSERT(my_first_nonempty >= GC_mark_stack &&
+ my_first_nonempty <= GC_mark_stack_top + 1);
+ GC_ASSERT(global_first_nonempty >= GC_mark_stack &&
+ global_first_nonempty <= GC_mark_stack_top + 1);
+ if (my_first_nonempty < global_first_nonempty) {
+ my_first_nonempty = global_first_nonempty;
+ } else if (global_first_nonempty < my_first_nonempty) {
+ GC_compare_and_exchange((word *)(&GC_first_nonempty),
+ (word) global_first_nonempty,
+ (word) my_first_nonempty);
+ /* If this fails, we just go ahead, without updating */
+ /* GC_first_nonempty. */
+ }
+ /* Perhaps we should also update GC_first_nonempty, if it */
+ /* is less. But that would require using atomic updates. */
+ my_top = GC_mark_stack_top;
+ n_on_stack = my_top - my_first_nonempty + 1;
+ if (0 == n_on_stack) {
+ GC_acquire_mark_lock();
+ my_top = GC_mark_stack_top;
+ n_on_stack = my_top - my_first_nonempty + 1;
+ if (0 == n_on_stack) {
+ GC_active_count--;
+ GC_ASSERT(GC_active_count <= GC_helper_count);
+ /* Other markers may redeposit objects */
+ /* on the stack. */
+ if (0 == GC_active_count) GC_notify_all_marker();
+ while (GC_active_count > 0
+ && GC_first_nonempty > GC_mark_stack_top) {
+ /* We will be notified if either GC_active_count */
+ /* reaches zero, or if more objects are pushed on */
+ /* the global mark stack. */
+ GC_wait_marker();
+ }
+ if (GC_active_count == 0 &&
+ GC_first_nonempty > GC_mark_stack_top) {
+ GC_bool need_to_notify = FALSE;
+ /* The above conditions can't be falsified while we */
+ /* hold the mark lock, since neither */
+ /* GC_active_count nor GC_mark_stack_top can */
+ /* change. GC_first_nonempty can only be */
+ /* incremented asynchronously. Thus we know that */
+ /* both conditions actually held simultaneously. */
+ GC_helper_count--;
+ if (0 == GC_helper_count) need_to_notify = TRUE;
+# ifdef PRINTSTATS
+ GC_printf1(
+ "Finished mark helper %lu\n", (unsigned long)id);
+# endif
+ GC_release_mark_lock();
+ if (need_to_notify) GC_notify_all_marker();
+ return;
+ }
+ /* else there's something on the stack again, or */
+ /* another helper may push something. */
+ GC_active_count++;
+ GC_ASSERT(GC_active_count > 0);
+ GC_release_mark_lock();
+ continue;
+ } else {
+ GC_release_mark_lock();
+ }
+ }
+ n_to_get = ENTRIES_TO_GET;
+ if (n_on_stack < 2 * ENTRIES_TO_GET) n_to_get = 1;
+ local_top = GC_steal_mark_stack(my_first_nonempty, my_top,
+ local_mark_stack, n_to_get,
+ &my_first_nonempty);
+ GC_ASSERT(my_first_nonempty >= GC_mark_stack &&
+ my_first_nonempty <= GC_mark_stack_top + 1);
+ GC_do_local_mark(local_mark_stack, local_top);
+ }
+}
+
+/* Perform Parallel mark. */
+/* We hold the GC lock, not the mark lock. */
+/* Currently runs until the mark stack is */
+/* empty. */
+void GC_do_parallel_mark()
+{
+ mse local_mark_stack[LOCAL_MARK_STACK_SIZE];
+ mse * local_top;
+ mse * my_top;
+
+ GC_acquire_mark_lock();
+ GC_ASSERT(I_HOLD_LOCK());
+ /* This could be a GC_ASSERT, but it seems safer to keep it on */
+ /* all the time, especially since it's cheap. */
+ if (GC_help_wanted || GC_active_count != 0 || GC_helper_count != 0)
+ ABORT("Tried to start parallel mark in bad state");
+# ifdef PRINTSTATS
+ GC_printf1("Starting marking for mark phase number %lu\n",
+ (unsigned long)GC_mark_no);
+# endif
+ GC_first_nonempty = GC_mark_stack;
+ GC_active_count = 0;
+ GC_helper_count = 1;
+ GC_help_wanted = TRUE;
+ GC_release_mark_lock();
+ GC_notify_all_marker();
+ /* Wake up potential helpers. */
+ GC_mark_local(local_mark_stack, 0);
+ GC_acquire_mark_lock();
+ GC_help_wanted = FALSE;
+ /* Done; clean up. */
+ while (GC_helper_count > 0) GC_wait_marker();
+ /* GC_helper_count cannot be incremented while GC_help_wanted == FALSE */
+# ifdef PRINTSTATS
+ GC_printf1(
+ "Finished marking for mark phase number %lu\n",
+ (unsigned long)GC_mark_no);
+# endif
+ GC_mark_no++;
+ GC_release_mark_lock();
+ GC_notify_all_marker();
+}
+
+
+/* Try to help out the marker, if it's running. */
+/* We do not hold the GC lock, but the requestor does. */
+void GC_help_marker(word my_mark_no)
+{
+ mse local_mark_stack[LOCAL_MARK_STACK_SIZE];
+ unsigned my_id;
+ mse * my_first_nonempty;
+
+ if (!GC_parallel) return;
+ GC_acquire_mark_lock();
+ while (GC_mark_no < my_mark_no
+ || !GC_help_wanted && GC_mark_no == my_mark_no) {
+ GC_wait_marker();
+ }
+ my_id = GC_helper_count;
+ if (GC_mark_no != my_mark_no || my_id >= GC_markers) {
+ /* Second test is useful only if original threads can also */
+ /* act as helpers. Under Linux they can't. */
+ GC_release_mark_lock();
+ return;
+ }
+ GC_helper_count = my_id + 1;
+ GC_release_mark_lock();
+ GC_mark_local(local_mark_stack, my_id);
+ /* GC_mark_local decrements GC_helper_count. */
+}
+
+#endif /* PARALLEL_MARK */
+