// Garbage collector.
+#include <unistd.h>
+
#include "runtime.h"
#include "arch.h"
#include "malloc.h"
+#ifdef USING_SPLIT_STACK
+
+extern void * __splitstack_find (void *, void *, size_t *, void **, void **,
+ void **);
+
+extern void * __splitstack_find_context (void *context[10], size_t *, void **,
+ void **, void **);
+
+#endif
+
enum {
Debug = 0,
PtrSize = sizeof(void*),
#define bitMask (bitBlockBoundary | bitAllocated | bitMarked | bitSpecial)
+// Holding worldsema grants an M the right to try to stop the world.
+// The procedure is:
+//
+// runtime_semacquire(&runtime_worldsema);
+// m->gcing = 1;
+// runtime_stoptheworld();
+//
+// ... do stuff ...
+//
+// m->gcing = 0;
+// runtime_semrelease(&runtime_worldsema);
+// runtime_starttheworld();
+//
+uint32 runtime_worldsema = 1;
+
// TODO: Make these per-M.
-static uint64 nlookup;
-static uint64 nsizelookup;
-static uint64 naddrlookup;
static uint64 nhandoff;
static int32 gctrace;
Finalizer fin[1];
};
-static bool finstarted;
-static pthread_mutex_t finqlock = PTHREAD_MUTEX_INITIALIZER;
-static pthread_cond_t finqcond = PTHREAD_COND_INITIALIZER;
+static G *fing;
static FinBlock *finq; // list of finalizers that are to be executed
static FinBlock *finc; // cache of free blocks
static FinBlock *allfin; // list of all blocks
// Otherwise consult span table to find beginning.
// (Manually inlined copy of MHeap_LookupMaybe.)
- nlookup++;
- naddrlookup++;
k = (uintptr)obj>>PageShift;
x = k;
if(sizeof(void*) == 8)
b = *--wp;
nobj--;
- // Figure out n = size of b. Start by loading bits for b.
- off = (uintptr*)b - (uintptr*)arena_start;
- bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
- shift = off % wordsPerBitmapWord;
- xbits = *bitp;
- bits = xbits >> shift;
-
- // Might be small; look for nearby block boundary.
- // A block boundary is marked by either bitBlockBoundary
- // or bitAllocated being set (see notes near their definition).
- enum {
- boundary = bitBlockBoundary|bitAllocated
- };
- // Look for a block boundary both after and before b
- // in the same bitmap word.
- //
- // A block boundary j words after b is indicated by
- // bits>>j & boundary
- // assuming shift+j < bitShift. (If shift+j >= bitShift then
- // we'll be bleeding other bit types like bitMarked into our test.)
- // Instead of inserting the conditional shift+j < bitShift into the loop,
- // we can let j range from 1 to bitShift as long as we first
- // apply a mask to keep only the bits corresponding
- // to shift+j < bitShift aka j < bitShift-shift.
- bits &= (boundary<<(bitShift-shift)) - boundary;
-
- // A block boundary j words before b is indicated by
- // xbits>>(shift-j) & boundary
- // (assuming shift >= j). There is no cleverness here
- // avoid the test, because when j gets too large the shift
- // turns negative, which is undefined in C.
-
- for(j=1; j<bitShift; j++) {
- if(((bits>>j)&boundary) != 0 || (shift>=j && ((xbits>>(shift-j))&boundary) != 0)) {
- n = j*PtrSize;
- goto scan;
- }
- }
-
- // Fall back to asking span about size class.
+ // Ask span about size class.
// (Manually inlined copy of MHeap_Lookup.)
- nlookup++;
- nsizelookup++;
x = (uintptr)b>>PageShift;
if(sizeof(void*) == 8)
x -= (uintptr)arena_start>>PageShift;
n = s->npages<<PageShift;
else
n = runtime_class_to_size[s->sizeclass];
- scan:;
}
}
return b1;
}
+// Scanstack calls scanblock on each of gp's stack segments.
+static void
+scanstack(void (*scanblock)(byte*, int64), G *gp)
+{
+#ifdef USING_SPLIT_STACK
+ M *mp;
+ void* sp;
+ size_t spsize;
+ void* next_segment;
+ void* next_sp;
+ void* initial_sp;
+
+ if(gp == runtime_g()) {
+ // Scanning our own stack.
+ sp = __splitstack_find(nil, nil, &spsize, &next_segment,
+ &next_sp, &initial_sp);
+ } else if((mp = gp->m) != nil && mp->helpgc) {
+ // gchelper's stack is in active use and has no interesting pointers.
+ return;
+ } else {
+ // Scanning another goroutine's stack.
+ // The goroutine is usually asleep (the world is stopped).
+
+ // The exception is that if the goroutine is about to enter or might
+ // have just exited a system call, it may be executing code such
+ // as schedlock and may have needed to start a new stack segment.
+ // Use the stack segment and stack pointer at the time of
+ // the system call instead, since that won't change underfoot.
+ if(gp->gcstack != nil) {
+ sp = gp->gcstack;
+ spsize = gp->gcstack_size;
+ next_segment = gp->gcnext_segment;
+ next_sp = gp->gcnext_sp;
+ initial_sp = gp->gcinitial_sp;
+ } else {
+ sp = __splitstack_find_context(&gp->stack_context[0],
+ &spsize, &next_segment,
+ &next_sp, &initial_sp);
+ }
+ }
+ if(sp != nil) {
+ scanblock(sp, spsize);
+ while((sp = __splitstack_find(next_segment, next_sp,
+ &spsize, &next_segment,
+ &next_sp, &initial_sp)) != nil)
+ scanblock(sp, spsize);
+ }
+#else
+ M *mp;
+ byte* bottom;
+ byte* top;
+
+ if(gp == runtime_g()) {
+ // Scanning our own stack.
+ bottom = (byte*)&gp;
+ } else if((mp = gp->m) != nil && mp->helpgc) {
+ // gchelper's stack is in active use and has no interesting pointers.
+ return;
+ } else {
+ // Scanning another goroutine's stack.
+ // The goroutine is usually asleep (the world is stopped).
+ bottom = (byte*)gp->gcnext_sp;
+ if(bottom == nil)
+ return;
+ }
+ top = (byte*)gp->gcinitial_sp + gp->gcstack_size;
+ if(top > bottom)
+ scanblock(bottom, top - bottom);
+ else
+ scanblock(top, bottom - top);
+#endif
+}
+
// Markfin calls scanblock on the blocks that have finalizers:
// the things pointed at cannot be freed until the finalizers have run.
static void
scanblock(v, size);
}
-struct root_list {
- struct root_list *next;
- struct root {
- void *decl;
- size_t size;
- } roots[];
-};
-
static struct root_list* roots;
void
mark(void (*scan)(byte*, int64))
{
struct root_list *pl;
+ G *gp;
FinBlock *fb;
+ // mark data+bss.
for(pl = roots; pl != nil; pl = pl->next) {
struct root* pr = &pl->roots[0];
while(1) {
}
}
- scan((byte*)&m0, sizeof m0);
- scan((byte*)&finq, sizeof finq);
+ scan((byte*)&runtime_m0, sizeof runtime_m0);
+ scan((byte*)&runtime_g0, sizeof runtime_g0);
+ scan((byte*)&runtime_allg, sizeof runtime_allg);
+ scan((byte*)&runtime_allm, sizeof runtime_allm);
runtime_MProf_Mark(scan);
+ runtime_time_scan(scan);
// mark stacks
- __go_scanstacks(scan);
+ for(gp=runtime_allg; gp!=nil; gp=gp->alllink) {
+ switch(gp->status){
+ default:
+ runtime_printf("unexpected G.status %d\n", gp->status);
+ runtime_throw("mark - bad status");
+ case Gdead:
+ break;
+ case Grunning:
+ if(gp != runtime_g())
+ runtime_throw("mark - world not stopped");
+ scanstack(scan, gp);
+ break;
+ case Grunnable:
+ case Gsyscall:
+ case Gwaiting:
+ scanstack(scan, gp);
+ break;
+ }
+ }
// mark things pointed at by objects with finalizers
if(scan == debug_scanblock)
static void
sweep(void)
{
+ M *m;
MSpan *s;
int32 cl, n, npages;
uintptr size;
byte *p;
MCache *c;
byte *arena_start;
+ int64 now;
+ m = runtime_m();
arena_start = runtime_mheap.arena_start;
+ now = runtime_nanotime();
for(;;) {
s = work.spans;
if(!runtime_casp(&work.spans, s, s->allnext))
continue;
+ // Stamp newly unused spans. The scavenger will use that
+ // info to potentially give back some pages to the OS.
+ if(s->state == MSpanFree && s->unusedsince == 0)
+ s->unusedsince = now;
+
if(s->state != MSpanInUse)
continue;
}
}
-static pthread_mutex_t gcsema = PTHREAD_MUTEX_INITIALIZER;
-
void
runtime_gchelper(void)
{
// extra memory used).
static int32 gcpercent = -2;
+static void
+stealcache(void)
+{
+ M *m;
+
+ for(m=runtime_allm; m; m=m->alllink)
+ runtime_MCache_ReleaseAll(m->mcache);
+}
+
+static void
+cachestats(void)
+{
+ M *m;
+ MCache *c;
+ uint32 i;
+ uint64 stacks_inuse;
+ uint64 stacks_sys;
+
+ stacks_inuse = 0;
+ stacks_sys = runtime_stacks_sys;
+ for(m=runtime_allm; m; m=m->alllink) {
+ runtime_purgecachedstats(m);
+ // stacks_inuse += m->stackalloc->inuse;
+ // stacks_sys += m->stackalloc->sys;
+ c = m->mcache;
+ for(i=0; i<nelem(c->local_by_size); i++) {
+ mstats.by_size[i].nmalloc += c->local_by_size[i].nmalloc;
+ c->local_by_size[i].nmalloc = 0;
+ mstats.by_size[i].nfree += c->local_by_size[i].nfree;
+ c->local_by_size[i].nfree = 0;
+ }
+ }
+ mstats.stacks_inuse = stacks_inuse;
+ mstats.stacks_sys = stacks_sys;
+}
+
void
-runtime_gc(int32 force __attribute__ ((unused)))
+runtime_gc(int32 force)
{
+ M *m;
int64 t0, t1, t2, t3;
uint64 heap0, heap1, obj0, obj1;
- char *p;
+ const byte *p;
bool extra;
+ // Make sure all registers are saved on stack so that
+ // scanstack sees them.
+ __builtin_unwind_init();
+
// The gc is turned off (via enablegc) until
// the bootstrap has completed.
// Also, malloc gets called in the guts
// problems, don't bother trying to run gc
// while holding a lock. The next mallocgc
// without a lock will do the gc instead.
- if(!mstats.enablegc || m->locks > 0 /* || runtime_panicking */)
+ m = runtime_m();
+ if(!mstats.enablegc || m->locks > 0 || runtime_panicking)
return;
if(gcpercent == -2) { // first time through
p = runtime_getenv("GOGC");
if(p == nil || p[0] == '\0')
gcpercent = 100;
- else if(runtime_strcmp(p, "off") == 0)
+ else if(runtime_strcmp((const char*)p, "off") == 0)
gcpercent = -1;
else
gcpercent = runtime_atoi(p);
p = runtime_getenv("GOGCTRACE");
if(p != nil)
gctrace = runtime_atoi(p);
-
- runtime_initlock(&work.fmu);
- runtime_initlock(&work.emu);
- runtime_initlock(&work.markgate);
- runtime_initlock(&work.sweepgate);
- runtime_initlock(&work.Lock);
}
if(gcpercent < 0)
return;
- pthread_mutex_lock(&finqlock);
- pthread_mutex_lock(&gcsema);
+ runtime_semacquire(&runtime_worldsema);
if(!force && mstats.heap_alloc < mstats.next_gc) {
- pthread_mutex_unlock(&gcsema);
- pthread_mutex_unlock(&finqlock);
+ runtime_semrelease(&runtime_worldsema);
return;
}
t0 = runtime_nanotime();
- nlookup = 0;
- nsizelookup = 0;
- naddrlookup = 0;
nhandoff = 0;
m->gcing = 1;
runtime_stoptheworld();
- __go_cachestats();
+ cachestats();
heap0 = mstats.heap_alloc;
obj0 = mstats.nmalloc - mstats.nfree;
extra = false;
work.nproc = 1;
-#if 0
if(runtime_gomaxprocs > 1 && runtime_ncpu > 1) {
runtime_noteclear(&work.alldone);
work.nproc += runtime_helpgc(&extra);
}
-#endif
work.nwait = 0;
work.ndone = 0;
runtime_notesleep(&work.alldone);
t2 = runtime_nanotime();
- __go_stealcache();
- __go_cachestats();
+ stealcache();
+ cachestats();
- mstats.next_gc = mstats.heap_alloc+mstats.heap_alloc*gcpercent/100;
+ mstats.next_gc = mstats.heap_alloc+(mstats.heap_alloc-runtime_stacks_sys)*gcpercent/100;
m->gcing = 0;
m->locks++; // disable gc during the mallocs in newproc
+ if(finq != nil) {
+ // kick off or wake up goroutine to run queued finalizers
+ if(fing == nil)
+ fing = __go_go(runfinq, nil);
+ else if(fingwait) {
+ fingwait = 0;
+ runtime_ready(fing);
+ }
+ }
+ m->locks--;
+ cachestats();
heap1 = mstats.heap_alloc;
obj1 = mstats.nmalloc - mstats.nfree;
t3 = runtime_nanotime();
+ mstats.last_gc = t3;
mstats.pause_ns[mstats.numgc%nelem(mstats.pause_ns)] = t3 - t0;
mstats.pause_total_ns += t3 - t0;
mstats.numgc++;
runtime_printf("pause %llu\n", (unsigned long long)t3-t0);
if(gctrace) {
- runtime_printf("gc%d: %llu+%llu+%llu ms %llu -> %llu MB %llu -> %llu (%llu-%llu) objects %llu pointer lookups (%llu size, %llu addr) %llu handoff\n",
- mstats.numgc, (unsigned long long)(t1-t0)/1000000, (unsigned long long)(t2-t1)/1000000, (unsigned long long)(t3-t2)/1000000,
+ runtime_printf("gc%d(%d): %llu+%llu+%llu ms %llu -> %llu MB %llu -> %llu (%llu-%llu) objects %llu handoff\n",
+ mstats.numgc, work.nproc, (unsigned long long)(t1-t0)/1000000, (unsigned long long)(t2-t1)/1000000, (unsigned long long)(t3-t2)/1000000,
(unsigned long long)heap0>>20, (unsigned long long)heap1>>20, (unsigned long long)obj0, (unsigned long long)obj1,
- (unsigned long long)mstats.nmalloc, (unsigned long long)mstats.nfree,
- (unsigned long long)nlookup, (unsigned long long)nsizelookup, (unsigned long long)naddrlookup, (unsigned long long) nhandoff);
+ (unsigned long long) mstats.nmalloc, (unsigned long long)mstats.nfree,
+ (unsigned long long) nhandoff);
}
-
- pthread_mutex_unlock(&gcsema);
+
+ runtime_MProf_GC();
+ runtime_semrelease(&runtime_worldsema);
// If we could have used another helper proc, start one now,
// in the hope that it will be available next time.
// the maximum number of procs.
runtime_starttheworld(extra);
- // finqlock is still held.
- if(finq != nil) {
- // kick off or wake up goroutine to run queued finalizers
- if(!finstarted) {
- __go_go(runfinq, nil);
- finstarted = 1;
- }
- else if(fingwait) {
- fingwait = 0;
- pthread_cond_signal(&finqcond);
- }
- }
- m->locks--;
- pthread_mutex_unlock(&finqlock);
+ // give the queued finalizers, if any, a chance to run
+ if(finq != nil)
+ runtime_gosched();
if(gctrace > 1 && !force)
runtime_gc(1);
}
-void runtime_UpdateMemStats(void)
- __asm__("libgo_runtime.runtime.UpdateMemStats");
+void runtime_ReadMemStats(MStats *)
+ __asm__("libgo_runtime.runtime.ReadMemStats");
void
-runtime_UpdateMemStats(void)
+runtime_ReadMemStats(MStats *stats)
{
- // Have to acquire gcsema to stop the world,
+ M *m;
+
+ // Have to acquire worldsema to stop the world,
// because stoptheworld can only be used by
// one goroutine at a time, and there might be
// a pending garbage collection already calling it.
- pthread_mutex_lock(&gcsema);
+ runtime_semacquire(&runtime_worldsema);
+ m = runtime_m();
m->gcing = 1;
runtime_stoptheworld();
- __go_cachestats();
+ cachestats();
+ *stats = mstats;
m->gcing = 0;
- pthread_mutex_unlock(&gcsema);
+ runtime_semrelease(&runtime_worldsema);
runtime_starttheworld(false);
}
static void
-runfinq(void* dummy)
+runfinq(void* dummy __attribute__ ((unused)))
{
+ G* gp;
Finalizer *f;
FinBlock *fb, *next;
uint32 i;
- USED(dummy);
-
+ gp = runtime_g();
for(;;) {
- pthread_mutex_lock(&finqlock);
+ // There's no need for a lock in this section
+ // because it only conflicts with the garbage
+ // collector, and the garbage collector only
+ // runs when everyone else is stopped, and
+ // runfinq only stops at the gosched() or
+ // during the calls in the for loop.
fb = finq;
finq = nil;
if(fb == nil) {
fingwait = 1;
- pthread_cond_wait(&finqcond, &finqlock);
- pthread_mutex_unlock(&finqlock);
+ gp->status = Gwaiting;
+ gp->waitreason = "finalizer wait";
+ runtime_gosched();
continue;
}
- pthread_mutex_unlock(&finqlock);
for(; fb; fb=next) {
next = fb->next;
for(i=0; i<(uint32)fb->cnt; i++) {
}
}
-#define runtime_singleproc 0
-
// mark the block at v of size n as allocated.
// If noptr is true, mark it as having no pointers.
void
void
runtime_MHeap_MapBits(MHeap *h)
{
+ size_t page_size;
+
// Caller has added extra mappings to the arena.
// Add extra mappings of bitmap words as needed.
// We allocate extra bitmap pieces in chunks of bitmapChunk.
if(h->bitmap_mapped >= n)
return;
+ page_size = getpagesize();
+ n = (n+page_size-1) & ~(page_size-1);
+
runtime_SysMap(h->arena_start - n, n - h->bitmap_mapped);
h->bitmap_mapped = n;
}
-
-void
-__go_enable_gc()
-{
- mstats.enablegc = 1;
-}