1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
11 #ifdef USING_SPLIT_STACK
13 extern void * __splitstack_find (void *, void *, size_t *, void **, void **,
16 extern void * __splitstack_find_context (void *context[10], size_t *, void **,
23 PtrSize = sizeof(void*),
24 DebugMark = 0, // run second pass to check mark
26 // Four bits per word (see #defines below).
27 wordsPerBitmapWord = sizeof(void*)*8/4,
28 bitShift = sizeof(void*)*8/4,
31 // Bits in per-word bitmap.
32 // #defines because enum might not be able to hold the values.
34 // Each word in the bitmap describes wordsPerBitmapWord words
35 // of heap memory. There are 4 bitmap bits dedicated to each heap word,
36 // so on a 64-bit system there is one bitmap word per 16 heap words.
37 // The bits in the word are packed together by type first, then by
38 // heap location, so each 64-bit bitmap word consists of, from top to bottom,
39 // the 16 bitSpecial bits for the corresponding heap words, then the 16 bitMarked bits,
40 // then the 16 bitNoPointers/bitBlockBoundary bits, then the 16 bitAllocated bits.
41 // This layout makes it easier to iterate over the bits of a given type.
43 // The bitmap starts at mheap.arena_start and extends *backward* from
44 // there. On a 64-bit system the off'th word in the arena is tracked by
45 // the off/16+1'th word before mheap.arena_start. (On a 32-bit system,
46 // the only difference is that the divisor is 8.)
48 // To pull out the bits corresponding to a given pointer p, we use:
50 // off = p - (uintptr*)mheap.arena_start; // word offset
51 // b = (uintptr*)mheap.arena_start - off/wordsPerBitmapWord - 1;
52 // shift = off % wordsPerBitmapWord
53 // bits = *b >> shift;
54 // /* then test bits & bitAllocated, bits & bitMarked, etc. */
56 #define bitAllocated ((uintptr)1<<(bitShift*0))
57 #define bitNoPointers ((uintptr)1<<(bitShift*1)) /* when bitAllocated is set */
58 #define bitMarked ((uintptr)1<<(bitShift*2)) /* when bitAllocated is set */
59 #define bitSpecial ((uintptr)1<<(bitShift*3)) /* when bitAllocated is set - has finalizer or being profiled */
60 #define bitBlockBoundary ((uintptr)1<<(bitShift*1)) /* when bitAllocated is NOT set */
62 #define bitMask (bitBlockBoundary | bitAllocated | bitMarked | bitSpecial)
64 // Holding worldsema grants an M the right to try to stop the world.
67 // runtime_semacquire(&runtime_worldsema);
69 // runtime_stoptheworld();
74 // runtime_semrelease(&runtime_worldsema);
75 // runtime_starttheworld();
77 uint32 runtime_worldsema = 1;
79 // TODO: Make these per-M.
80 static uint64 nhandoff;
84 typedef struct Workbuf Workbuf;
92 typedef struct Finalizer Finalizer;
97 const struct __go_func_type *ft;
100 typedef struct FinBlock FinBlock;
111 static FinBlock *finq; // list of finalizers that are to be executed
112 static FinBlock *finc; // cache of free blocks
113 static FinBlock *allfin; // list of all blocks
115 static int32 fingwait;
117 static void runfinq(void*);
118 static Workbuf* getempty(Workbuf*);
119 static Workbuf* getfull(Workbuf*);
120 static void putempty(Workbuf*);
121 static Workbuf* handoff(Workbuf*);
129 volatile uint32 nwait;
130 volatile uint32 ndone;
141 // scanblock scans a block of n bytes starting at pointer b for references
142 // to other objects, scanning any it finds recursively until there are no
143 // unscanned objects left. Instead of using an explicit recursion, it keeps
144 // a work list in the Workbuf* structures and loops in the main function
145 // body. Keeping an explicit work list is easier on the stack allocator and
148 scanblock(byte *b, int64 n)
150 byte *obj, *arena_start, *arena_used, *p;
152 uintptr size, *bitp, bits, shift, i, j, x, xbits, off, nobj, nproc;
159 if((int64)(uintptr)n != n || n < 0) {
160 // runtime_printf("scanblock %p %lld\n", b, (long long)n);
161 runtime_throw("scanblock");
164 // Memory arena parameters.
165 arena_start = runtime_mheap.arena_start;
166 arena_used = runtime_mheap.arena_used;
169 wbuf = nil; // current work buffer
170 wp = nil; // storage for next queued pointer (write pointer)
171 nobj = 0; // number of queued objects
173 // Scanblock helpers pass b==nil.
174 // The main proc needs to return to make more
175 // calls to scanblock. But if work.nproc==1 then
176 // might as well process blocks as soon as we
178 keepworking = b == nil || work.nproc == 1;
180 // Align b to a word boundary.
181 off = (uintptr)b & (PtrSize-1);
188 // Each iteration scans the block b of length n, queueing pointers in
191 runtime_printf("scanblock %p %lld\n", b, (long long) n);
194 n >>= (2+PtrSize/8); /* n /= PtrSize (4 or 8) */
195 for(i=0; i<(uintptr)n; i++) {
198 // Words outside the arena cannot be pointers.
199 if((byte*)obj < arena_start || (byte*)obj >= arena_used)
202 // obj may be a pointer to a live object.
203 // Try to find the beginning of the object.
205 // Round down to word boundary.
206 obj = (void*)((uintptr)obj & ~((uintptr)PtrSize-1));
208 // Find bits for this word.
209 off = (uintptr*)obj - (uintptr*)arena_start;
210 bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
211 shift = off % wordsPerBitmapWord;
213 bits = xbits >> shift;
215 // Pointing at the beginning of a block?
216 if((bits & (bitAllocated|bitBlockBoundary)) != 0)
219 // Pointing just past the beginning?
220 // Scan backward a little to find a block boundary.
221 for(j=shift; j-->0; ) {
222 if(((xbits>>j) & (bitAllocated|bitBlockBoundary)) != 0) {
223 obj = (byte*)obj - (shift-j)*PtrSize;
230 // Otherwise consult span table to find beginning.
231 // (Manually inlined copy of MHeap_LookupMaybe.)
232 k = (uintptr)obj>>PageShift;
234 if(sizeof(void*) == 8)
235 x -= (uintptr)arena_start>>PageShift;
236 s = runtime_mheap.map[x];
237 if(s == nil || k < s->start || k - s->start >= s->npages || s->state != MSpanInUse)
239 p = (byte*)((uintptr)s->start<<PageShift);
240 if(s->sizeclass == 0) {
243 if((byte*)obj >= (byte*)s->limit)
245 size = runtime_class_to_size[s->sizeclass];
246 int32 i = ((byte*)obj - p)/size;
250 // Now that we know the object header, reload bits.
251 off = (uintptr*)obj - (uintptr*)arena_start;
252 bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
253 shift = off % wordsPerBitmapWord;
255 bits = xbits >> shift;
258 // Now we have bits, bitp, and shift correct for
259 // obj pointing at the base of the object.
260 // Only care about allocated and not marked.
261 if((bits & (bitAllocated|bitMarked)) != bitAllocated)
264 *bitp |= bitMarked<<shift;
268 if(x & (bitMarked<<shift))
270 if(runtime_casp((void**)bitp, (void*)x, (void*)(x|(bitMarked<<shift))))
275 // If object has no pointers, don't need to scan further.
276 if((bits & bitNoPointers) != 0)
279 // If another proc wants a pointer, give it some.
280 if(nobj > 4 && work.nwait > 0 && work.full == nil) {
282 wbuf = handoff(wbuf);
284 wp = (void**)(wbuf->obj + nobj);
287 // If buffer is full, get a new one.
288 if(wbuf == nil || nobj >= nelem(wbuf->obj)) {
291 wbuf = getempty(wbuf);
292 wp = (void**)(wbuf->obj);
300 // Done scanning [b, b+n). Prepare for the next iteration of
301 // the loop by setting b and n to the parameters for the next block.
303 // Fetch b from the work buffer.
309 // Emptied our buffer: refill.
310 wbuf = getfull(wbuf);
314 wp = (void**)(wbuf->obj + wbuf->nobj);
319 // Ask span about size class.
320 // (Manually inlined copy of MHeap_Lookup.)
321 x = (uintptr)b>>PageShift;
322 if(sizeof(void*) == 8)
323 x -= (uintptr)arena_start>>PageShift;
324 s = runtime_mheap.map[x];
325 if(s->sizeclass == 0)
326 n = s->npages<<PageShift;
328 n = runtime_class_to_size[s->sizeclass];
332 // debug_scanblock is the debug copy of scanblock.
333 // it is simpler, slower, single-threaded, recursive,
334 // and uses bitSpecial as the mark bit.
336 debug_scanblock(byte *b, int64 n)
340 uintptr size, *bitp, bits, shift, i, xbits, off;
344 runtime_throw("debug_scanblock without DebugMark");
346 if((int64)(uintptr)n != n || n < 0) {
347 //runtime_printf("debug_scanblock %p %D\n", b, n);
348 runtime_throw("debug_scanblock");
351 // Align b to a word boundary.
352 off = (uintptr)b & (PtrSize-1);
360 for(i=0; i<(uintptr)n; i++) {
363 // Words outside the arena cannot be pointers.
364 if((byte*)obj < runtime_mheap.arena_start || (byte*)obj >= runtime_mheap.arena_used)
367 // Round down to word boundary.
368 obj = (void*)((uintptr)obj & ~((uintptr)PtrSize-1));
370 // Consult span table to find beginning.
371 s = runtime_MHeap_LookupMaybe(&runtime_mheap, obj);
376 p = (byte*)((uintptr)s->start<<PageShift);
377 if(s->sizeclass == 0) {
379 size = (uintptr)s->npages<<PageShift;
381 if((byte*)obj >= (byte*)s->limit)
383 size = runtime_class_to_size[s->sizeclass];
384 int32 i = ((byte*)obj - p)/size;
388 // Now that we know the object header, reload bits.
389 off = (uintptr*)obj - (uintptr*)runtime_mheap.arena_start;
390 bitp = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
391 shift = off % wordsPerBitmapWord;
393 bits = xbits >> shift;
395 // Now we have bits, bitp, and shift correct for
396 // obj pointing at the base of the object.
397 // If not allocated or already marked, done.
398 if((bits & bitAllocated) == 0 || (bits & bitSpecial) != 0) // NOTE: bitSpecial not bitMarked
400 *bitp |= bitSpecial<<shift;
401 if(!(bits & bitMarked))
402 runtime_printf("found unmarked block %p in %p\n", obj, vp+i);
404 // If object has no pointers, don't need to scan further.
405 if((bits & bitNoPointers) != 0)
408 debug_scanblock(obj, size);
412 // Get an empty work buffer off the work.empty list,
413 // allocating new buffers as needed.
417 if(work.nproc == 1) {
418 // Put b on full list.
423 // Grab from empty list if possible.
426 work.empty = b->next;
430 // Put b on full list.
432 runtime_lock(&work.fmu);
435 runtime_unlock(&work.fmu);
437 // Grab from empty list if possible.
438 runtime_lock(&work.emu);
441 work.empty = b->next;
442 runtime_unlock(&work.emu);
449 if(work.nchunk < sizeof *b) {
451 work.chunk = runtime_SysAlloc(work.nchunk);
453 b = (Workbuf*)work.chunk;
454 work.chunk += sizeof *b;
455 work.nchunk -= sizeof *b;
456 runtime_unlock(&work);
469 if(work.nproc == 1) {
470 b->next = work.empty;
475 runtime_lock(&work.emu);
476 b->next = work.empty;
478 runtime_unlock(&work.emu);
481 // Get a full work buffer off the work.full list, or return nil.
488 if(work.nproc == 1) {
489 // Put b on empty list.
491 b->next = work.empty;
494 // Grab from full list if possible.
495 // Since work.nproc==1, no one else is
496 // going to give us work.
505 // Grab buffer from full list if possible.
510 runtime_lock(&work.fmu);
511 if(work.full != nil) {
513 work.full = b1->next;
514 runtime_unlock(&work.fmu);
517 runtime_unlock(&work.fmu);
520 runtime_xadd(&work.nwait, +1);
524 runtime_lock(&work.fmu);
525 if(work.full != nil) {
526 runtime_xadd(&work.nwait, -1);
528 work.full = b1->next;
529 runtime_unlock(&work.fmu);
532 runtime_unlock(&work.fmu);
535 if(work.nwait == work.nproc)
538 runtime_procyield(20);
552 // Make new buffer with half of b's pointers.
557 runtime_memmove(b1->obj, b->obj+b->nobj, n*sizeof b1->obj[0]);
560 // Put b on full list - let first half of b get stolen.
561 runtime_lock(&work.fmu);
564 runtime_unlock(&work.fmu);
569 // Scanstack calls scanblock on each of gp's stack segments.
571 scanstack(void (*scanblock)(byte*, int64), G *gp)
573 #ifdef USING_SPLIT_STACK
581 if(gp == runtime_g()) {
582 // Scanning our own stack.
583 sp = __splitstack_find(nil, nil, &spsize, &next_segment,
584 &next_sp, &initial_sp);
585 } else if((mp = gp->m) != nil && mp->helpgc) {
586 // gchelper's stack is in active use and has no interesting pointers.
589 // Scanning another goroutine's stack.
590 // The goroutine is usually asleep (the world is stopped).
592 // The exception is that if the goroutine is about to enter or might
593 // have just exited a system call, it may be executing code such
594 // as schedlock and may have needed to start a new stack segment.
595 // Use the stack segment and stack pointer at the time of
596 // the system call instead, since that won't change underfoot.
597 if(gp->gcstack != nil) {
599 spsize = gp->gcstack_size;
600 next_segment = gp->gcnext_segment;
601 next_sp = gp->gcnext_sp;
602 initial_sp = gp->gcinitial_sp;
604 sp = __splitstack_find_context(&gp->stack_context[0],
605 &spsize, &next_segment,
606 &next_sp, &initial_sp);
610 scanblock(sp, spsize);
611 while((sp = __splitstack_find(next_segment, next_sp,
612 &spsize, &next_segment,
613 &next_sp, &initial_sp)) != nil)
614 scanblock(sp, spsize);
621 if(gp == runtime_g()) {
622 // Scanning our own stack.
624 } else if((mp = gp->m) != nil && mp->helpgc) {
625 // gchelper's stack is in active use and has no interesting pointers.
628 // Scanning another goroutine's stack.
629 // The goroutine is usually asleep (the world is stopped).
630 bottom = (byte*)gp->gcnext_sp;
634 top = (byte*)gp->gcinitial_sp + gp->gcstack_size;
636 scanblock(bottom, top - bottom);
638 scanblock(top, bottom - top);
642 // Markfin calls scanblock on the blocks that have finalizers:
643 // the things pointed at cannot be freed until the finalizers have run.
650 if(!runtime_mlookup(v, (byte**)&v, &size, nil) || !runtime_blockspecial(v))
651 runtime_throw("mark - finalizer inconsistency");
653 // do not mark the finalizer block itself. just mark the things it points at.
657 static struct root_list* roots;
660 __go_register_gc_roots (struct root_list* r)
662 // FIXME: This needs locking if multiple goroutines can call
663 // dlopen simultaneously.
669 debug_markfin(void *v)
673 if(!runtime_mlookup(v, (byte**)&v, &size, nil))
674 runtime_throw("debug_mark - finalizer inconsistency");
675 debug_scanblock(v, size);
680 mark(void (*scan)(byte*, int64))
682 struct root_list *pl;
687 for(pl = roots; pl != nil; pl = pl->next) {
688 struct root* pr = &pl->roots[0];
690 void *decl = pr->decl;
693 scanblock(decl, pr->size);
698 scan((byte*)&runtime_m0, sizeof runtime_m0);
699 scan((byte*)&runtime_g0, sizeof runtime_g0);
700 scan((byte*)&runtime_allg, sizeof runtime_allg);
701 scan((byte*)&runtime_allm, sizeof runtime_allm);
702 runtime_MProf_Mark(scan);
703 runtime_time_scan(scan);
706 for(gp=runtime_allg; gp!=nil; gp=gp->alllink) {
709 runtime_printf("unexpected G.status %d\n", gp->status);
710 runtime_throw("mark - bad status");
714 if(gp != runtime_g())
715 runtime_throw("mark - world not stopped");
726 // mark things pointed at by objects with finalizers
727 if(scan == debug_scanblock)
728 runtime_walkfintab(debug_markfin, scan);
730 runtime_walkfintab(markfin, scan);
732 for(fb=allfin; fb; fb=fb->alllink)
733 scanblock((byte*)fb->fin, fb->cnt*sizeof(fb->fin[0]));
735 // in multiproc mode, join in the queued work.
740 handlespecial(byte *p, uintptr size)
743 const struct __go_func_type *ft;
747 if(!runtime_getfinalizer(p, true, &fn, &ft)) {
748 runtime_setblockspecial(p, false);
749 runtime_MProf_Free(p, size);
753 runtime_lock(&finlock);
754 if(finq == nil || finq->cnt == finq->cap) {
756 finc = runtime_SysAlloc(PageSize);
757 finc->cap = (PageSize - sizeof(FinBlock)) / sizeof(Finalizer) + 1;
758 finc->alllink = allfin;
766 f = &finq->fin[finq->cnt];
771 runtime_unlock(&finlock);
775 // Sweep frees or collects finalizers for blocks not marked in the mark phase.
776 // It clears the mark bits in preparation for the next GC round.
790 arena_start = runtime_mheap.arena_start;
791 now = runtime_nanotime();
797 if(!runtime_casp(&work.spans, s, s->allnext))
800 // Stamp newly unused spans. The scavenger will use that
801 // info to potentially give back some pages to the OS.
802 if(s->state == MSpanFree && s->unusedsince == 0)
803 s->unusedsince = now;
805 if(s->state != MSpanInUse)
808 p = (byte*)(s->start << PageShift);
811 size = s->npages<<PageShift;
814 // Chunk full of small blocks.
815 size = runtime_class_to_size[cl];
816 npages = runtime_class_to_allocnpages[cl];
817 n = (npages << PageShift) / size;
820 // Sweep through n objects of given size starting at p.
821 // This thread owns the span now, so it can manipulate
822 // the block bitmap without atomic operations.
823 for(; n > 0; n--, p += size) {
824 uintptr off, *bitp, shift, bits;
826 off = (uintptr*)p - (uintptr*)arena_start;
827 bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
828 shift = off % wordsPerBitmapWord;
831 if((bits & bitAllocated) == 0)
834 if((bits & bitMarked) != 0) {
836 if(!(bits & bitSpecial))
837 runtime_printf("found spurious mark on %p\n", p);
838 *bitp &= ~(bitSpecial<<shift);
840 *bitp &= ~(bitMarked<<shift);
844 // Special means it has a finalizer or is being profiled.
845 // In DebugMark mode, the bit has been coopted so
846 // we have to assume all blocks are special.
847 if(DebugMark || (bits & bitSpecial) != 0) {
848 if(handlespecial(p, size))
852 // Mark freed; restore block boundary bit.
853 *bitp = (*bitp & ~(bitMask<<shift)) | (bitBlockBoundary<<shift);
856 if(s->sizeclass == 0) {
858 runtime_unmarkspan(p, 1<<PageShift);
859 *(uintptr*)p = 1; // needs zeroing
860 runtime_MHeap_Free(&runtime_mheap, s, 1);
862 // Free small object.
863 if(size > sizeof(uintptr))
864 ((uintptr*)p)[1] = 1; // mark as "needs to be zeroed"
865 c->local_by_size[s->sizeclass].nfree++;
866 runtime_MCache_Free(c, p, s->sizeclass, size);
868 c->local_alloc -= size;
875 runtime_gchelper(void)
877 // Wait until main proc is ready for mark help.
878 runtime_lock(&work.markgate);
879 runtime_unlock(&work.markgate);
882 // Wait until main proc is ready for sweep help.
883 runtime_lock(&work.sweepgate);
884 runtime_unlock(&work.sweepgate);
887 if(runtime_xadd(&work.ndone, +1) == work.nproc-1)
888 runtime_notewakeup(&work.alldone);
891 // Initialized from $GOGC. GOGC=off means no gc.
893 // Next gc is after we've allocated an extra amount of
894 // memory proportional to the amount already in use.
895 // If gcpercent=100 and we're using 4M, we'll gc again
896 // when we get to 8M. This keeps the gc cost in linear
897 // proportion to the allocation cost. Adjusting gcpercent
898 // just changes the linear constant (and also the amount of
899 // extra memory used).
900 static int32 gcpercent = -2;
907 for(m=runtime_allm; m; m=m->alllink)
908 runtime_MCache_ReleaseAll(m->mcache);
922 for(m=runtime_allm; m; m=m->alllink) {
923 runtime_purgecachedstats(m);
924 // stacks_inuse += m->stackalloc->inuse;
925 // stacks_sys += m->stackalloc->sys;
927 for(i=0; i<nelem(c->local_by_size); i++) {
928 mstats.by_size[i].nmalloc += c->local_by_size[i].nmalloc;
929 c->local_by_size[i].nmalloc = 0;
930 mstats.by_size[i].nfree += c->local_by_size[i].nfree;
931 c->local_by_size[i].nfree = 0;
934 mstats.stacks_inuse = stacks_inuse;
935 mstats.stacks_sys = stacks_sys;
939 runtime_gc(int32 force)
942 int64 t0, t1, t2, t3;
943 uint64 heap0, heap1, obj0, obj1;
947 // Make sure all registers are saved on stack so that
948 // scanstack sees them.
949 __builtin_unwind_init();
951 // The gc is turned off (via enablegc) until
952 // the bootstrap has completed.
953 // Also, malloc gets called in the guts
954 // of a number of libraries that might be
955 // holding locks. To avoid priority inversion
956 // problems, don't bother trying to run gc
957 // while holding a lock. The next mallocgc
958 // without a lock will do the gc instead.
960 if(!mstats.enablegc || m->locks > 0 || runtime_panicking)
963 if(gcpercent == -2) { // first time through
964 p = runtime_getenv("GOGC");
965 if(p == nil || p[0] == '\0')
967 else if(runtime_strcmp((const char*)p, "off") == 0)
970 gcpercent = runtime_atoi(p);
972 p = runtime_getenv("GOGCTRACE");
974 gctrace = runtime_atoi(p);
979 runtime_semacquire(&runtime_worldsema);
980 if(!force && mstats.heap_alloc < mstats.next_gc) {
981 runtime_semrelease(&runtime_worldsema);
985 t0 = runtime_nanotime();
989 runtime_stoptheworld();
992 heap0 = mstats.heap_alloc;
993 obj0 = mstats.nmalloc - mstats.nfree;
995 runtime_lock(&work.markgate);
996 runtime_lock(&work.sweepgate);
1000 if(runtime_gomaxprocs > 1 && runtime_ncpu > 1) {
1001 runtime_noteclear(&work.alldone);
1002 work.nproc += runtime_helpgc(&extra);
1007 runtime_unlock(&work.markgate); // let the helpers in
1010 mark(debug_scanblock);
1011 t1 = runtime_nanotime();
1013 work.spans = runtime_mheap.allspans;
1014 runtime_unlock(&work.sweepgate); // let the helpers in
1017 runtime_notesleep(&work.alldone);
1018 t2 = runtime_nanotime();
1023 mstats.next_gc = mstats.heap_alloc+mstats.heap_alloc*gcpercent/100;
1026 m->locks++; // disable gc during the mallocs in newproc
1028 // kick off or wake up goroutine to run queued finalizers
1030 fing = __go_go(runfinq, nil);
1033 runtime_ready(fing);
1039 heap1 = mstats.heap_alloc;
1040 obj1 = mstats.nmalloc - mstats.nfree;
1042 t3 = runtime_nanotime();
1043 mstats.last_gc = t3;
1044 mstats.pause_ns[mstats.numgc%nelem(mstats.pause_ns)] = t3 - t0;
1045 mstats.pause_total_ns += t3 - t0;
1048 runtime_printf("pause %llu\n", (unsigned long long)t3-t0);
1051 runtime_printf("gc%d(%d): %llu+%llu+%llu ms %llu -> %llu MB %llu -> %llu (%llu-%llu) objects %llu handoff\n",
1052 mstats.numgc, work.nproc, (unsigned long long)(t1-t0)/1000000, (unsigned long long)(t2-t1)/1000000, (unsigned long long)(t3-t2)/1000000,
1053 (unsigned long long)heap0>>20, (unsigned long long)heap1>>20, (unsigned long long)obj0, (unsigned long long)obj1,
1054 (unsigned long long) mstats.nmalloc, (unsigned long long)mstats.nfree,
1055 (unsigned long long) nhandoff);
1059 runtime_semrelease(&runtime_worldsema);
1061 // If we could have used another helper proc, start one now,
1062 // in the hope that it will be available next time.
1063 // It would have been even better to start it before the collection,
1064 // but doing so requires allocating memory, so it's tricky to
1065 // coordinate. This lazy approach works out in practice:
1066 // we don't mind if the first couple gc rounds don't have quite
1067 // the maximum number of procs.
1068 runtime_starttheworld(extra);
1070 // give the queued finalizers, if any, a chance to run
1074 if(gctrace > 1 && !force)
1078 void runtime_ReadMemStats(MStats *)
1079 __asm__("libgo_runtime.runtime.ReadMemStats");
1082 runtime_ReadMemStats(MStats *stats)
1086 // Have to acquire worldsema to stop the world,
1087 // because stoptheworld can only be used by
1088 // one goroutine at a time, and there might be
1089 // a pending garbage collection already calling it.
1090 runtime_semacquire(&runtime_worldsema);
1093 runtime_stoptheworld();
1097 runtime_semrelease(&runtime_worldsema);
1098 runtime_starttheworld(false);
1102 runfinq(void* dummy __attribute__ ((unused)))
1106 FinBlock *fb, *next;
1111 // There's no need for a lock in this section
1112 // because it only conflicts with the garbage
1113 // collector, and the garbage collector only
1114 // runs when everyone else is stopped, and
1115 // runfinq only stops at the gosched() or
1116 // during the calls in the for loop.
1121 gp->status = Gwaiting;
1122 gp->waitreason = "finalizer wait";
1126 for(; fb; fb=next) {
1128 for(i=0; i<(uint32)fb->cnt; i++) {
1132 params[0] = &f->arg;
1133 runtime_setblockspecial(f->arg, false);
1134 reflect_call(f->ft, (void*)f->fn, 0, 0, params, nil);
1142 runtime_gc(1); // trigger another gc to clean up the finalized objects, if possible
1146 // mark the block at v of size n as allocated.
1147 // If noptr is true, mark it as having no pointers.
1149 runtime_markallocated(void *v, uintptr n, bool noptr)
1151 uintptr *b, obits, bits, off, shift;
1154 // runtime_printf("markallocated %p+%p\n", v, n);
1156 if((byte*)v+n > (byte*)runtime_mheap.arena_used || (byte*)v < runtime_mheap.arena_start)
1157 runtime_throw("markallocated: bad pointer");
1159 off = (uintptr*)v - (uintptr*)runtime_mheap.arena_start; // word offset
1160 b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
1161 shift = off % wordsPerBitmapWord;
1165 bits = (obits & ~(bitMask<<shift)) | (bitAllocated<<shift);
1167 bits |= bitNoPointers<<shift;
1168 if(runtime_singleproc) {
1172 // more than one goroutine is potentially running: use atomic op
1173 if(runtime_casp((void**)b, (void*)obits, (void*)bits))
1179 // mark the block at v of size n as freed.
1181 runtime_markfreed(void *v, uintptr n)
1183 uintptr *b, obits, bits, off, shift;
1186 // runtime_printf("markallocated %p+%p\n", v, n);
1188 if((byte*)v+n > (byte*)runtime_mheap.arena_used || (byte*)v < runtime_mheap.arena_start)
1189 runtime_throw("markallocated: bad pointer");
1191 off = (uintptr*)v - (uintptr*)runtime_mheap.arena_start; // word offset
1192 b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
1193 shift = off % wordsPerBitmapWord;
1197 bits = (obits & ~(bitMask<<shift)) | (bitBlockBoundary<<shift);
1198 if(runtime_singleproc) {
1202 // more than one goroutine is potentially running: use atomic op
1203 if(runtime_casp((void**)b, (void*)obits, (void*)bits))
1209 // check that the block at v of size n is marked freed.
1211 runtime_checkfreed(void *v, uintptr n)
1213 uintptr *b, bits, off, shift;
1215 if(!runtime_checking)
1218 if((byte*)v+n > (byte*)runtime_mheap.arena_used || (byte*)v < runtime_mheap.arena_start)
1219 return; // not allocated, so okay
1221 off = (uintptr*)v - (uintptr*)runtime_mheap.arena_start; // word offset
1222 b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
1223 shift = off % wordsPerBitmapWord;
1226 if((bits & bitAllocated) != 0) {
1227 runtime_printf("checkfreed %p+%p: off=%p have=%p\n",
1228 v, (void*)n, (void*)off, (void*)(bits & bitMask));
1229 runtime_throw("checkfreed: not freed");
1233 // mark the span of memory at v as having n blocks of the given size.
1234 // if leftover is true, there is left over space at the end of the span.
1236 runtime_markspan(void *v, uintptr size, uintptr n, bool leftover)
1238 uintptr *b, off, shift;
1241 if((byte*)v+size*n > (byte*)runtime_mheap.arena_used || (byte*)v < runtime_mheap.arena_start)
1242 runtime_throw("markspan: bad pointer");
1245 if(leftover) // mark a boundary just past end of last block too
1247 for(; n-- > 0; p += size) {
1248 // Okay to use non-atomic ops here, because we control
1249 // the entire span, and each bitmap word has bits for only
1250 // one span, so no other goroutines are changing these
1252 off = (uintptr*)p - (uintptr*)runtime_mheap.arena_start; // word offset
1253 b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
1254 shift = off % wordsPerBitmapWord;
1255 *b = (*b & ~(bitMask<<shift)) | (bitBlockBoundary<<shift);
1259 // unmark the span of memory at v of length n bytes.
1261 runtime_unmarkspan(void *v, uintptr n)
1263 uintptr *p, *b, off;
1265 if((byte*)v+n > (byte*)runtime_mheap.arena_used || (byte*)v < runtime_mheap.arena_start)
1266 runtime_throw("markspan: bad pointer");
1269 off = p - (uintptr*)runtime_mheap.arena_start; // word offset
1270 if(off % wordsPerBitmapWord != 0)
1271 runtime_throw("markspan: unaligned pointer");
1272 b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
1274 if(n%wordsPerBitmapWord != 0)
1275 runtime_throw("unmarkspan: unaligned length");
1276 // Okay to use non-atomic ops here, because we control
1277 // the entire span, and each bitmap word has bits for only
1278 // one span, so no other goroutines are changing these
1280 n /= wordsPerBitmapWord;
1286 runtime_blockspecial(void *v)
1288 uintptr *b, off, shift;
1293 off = (uintptr*)v - (uintptr*)runtime_mheap.arena_start;
1294 b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
1295 shift = off % wordsPerBitmapWord;
1297 return (*b & (bitSpecial<<shift)) != 0;
1301 runtime_setblockspecial(void *v, bool s)
1303 uintptr *b, off, shift, bits, obits;
1308 off = (uintptr*)v - (uintptr*)runtime_mheap.arena_start;
1309 b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
1310 shift = off % wordsPerBitmapWord;
1315 bits = obits | (bitSpecial<<shift);
1317 bits = obits & ~(bitSpecial<<shift);
1318 if(runtime_singleproc) {
1322 // more than one goroutine is potentially running: use atomic op
1323 if(runtime_casp((void**)b, (void*)obits, (void*)bits))
1330 runtime_MHeap_MapBits(MHeap *h)
1332 // Caller has added extra mappings to the arena.
1333 // Add extra mappings of bitmap words as needed.
1334 // We allocate extra bitmap pieces in chunks of bitmapChunk.
1340 n = (h->arena_used - h->arena_start) / wordsPerBitmapWord;
1341 n = (n+bitmapChunk-1) & ~(bitmapChunk-1);
1342 if(h->bitmap_mapped >= n)
1345 runtime_SysMap(h->arena_start - n, n - h->bitmap_mapped);
1346 h->bitmap_mapped = n;