1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
11 #ifdef USING_SPLIT_STACK
13 extern void * __splitstack_find (void *, void *, size_t *, void **, void **,
16 extern void * __splitstack_find_context (void *context[10], size_t *, void **,
23 PtrSize = sizeof(void*),
24 DebugMark = 0, // run second pass to check mark
26 // Four bits per word (see #defines below).
27 wordsPerBitmapWord = sizeof(void*)*8/4,
28 bitShift = sizeof(void*)*8/4,
31 // Bits in per-word bitmap.
32 // #defines because enum might not be able to hold the values.
34 // Each word in the bitmap describes wordsPerBitmapWord words
35 // of heap memory. There are 4 bitmap bits dedicated to each heap word,
36 // so on a 64-bit system there is one bitmap word per 16 heap words.
37 // The bits in the word are packed together by type first, then by
38 // heap location, so each 64-bit bitmap word consists of, from top to bottom,
39 // the 16 bitSpecial bits for the corresponding heap words, then the 16 bitMarked bits,
40 // then the 16 bitNoPointers/bitBlockBoundary bits, then the 16 bitAllocated bits.
41 // This layout makes it easier to iterate over the bits of a given type.
43 // The bitmap starts at mheap.arena_start and extends *backward* from
44 // there. On a 64-bit system the off'th word in the arena is tracked by
45 // the off/16+1'th word before mheap.arena_start. (On a 32-bit system,
46 // the only difference is that the divisor is 8.)
48 // To pull out the bits corresponding to a given pointer p, we use:
50 // off = p - (uintptr*)mheap.arena_start; // word offset
51 // b = (uintptr*)mheap.arena_start - off/wordsPerBitmapWord - 1;
52 // shift = off % wordsPerBitmapWord
53 // bits = *b >> shift;
54 // /* then test bits & bitAllocated, bits & bitMarked, etc. */
56 #define bitAllocated ((uintptr)1<<(bitShift*0))
57 #define bitNoPointers ((uintptr)1<<(bitShift*1)) /* when bitAllocated is set */
58 #define bitMarked ((uintptr)1<<(bitShift*2)) /* when bitAllocated is set */
59 #define bitSpecial ((uintptr)1<<(bitShift*3)) /* when bitAllocated is set - has finalizer or being profiled */
60 #define bitBlockBoundary ((uintptr)1<<(bitShift*1)) /* when bitAllocated is NOT set */
62 #define bitMask (bitBlockBoundary | bitAllocated | bitMarked | bitSpecial)
64 // TODO: Make these per-M.
65 static uint64 nlookup;
66 static uint64 nsizelookup;
67 static uint64 naddrlookup;
68 static uint64 nhandoff;
72 typedef struct Workbuf Workbuf;
80 typedef struct Finalizer Finalizer;
85 const struct __go_func_type *ft;
88 typedef struct FinBlock FinBlock;
100 static FinBlock *finq; // list of finalizers that are to be executed
101 static FinBlock *finc; // cache of free blocks
102 static FinBlock *allfin; // list of all blocks
104 static int32 fingwait;
106 static void runfinq(void*);
107 static Workbuf* getempty(Workbuf*);
108 static Workbuf* getfull(Workbuf*);
109 static void putempty(Workbuf*);
110 static Workbuf* handoff(Workbuf*);
118 volatile uint32 nwait;
119 volatile uint32 ndone;
130 // scanblock scans a block of n bytes starting at pointer b for references
131 // to other objects, scanning any it finds recursively until there are no
132 // unscanned objects left. Instead of using an explicit recursion, it keeps
133 // a work list in the Workbuf* structures and loops in the main function
134 // body. Keeping an explicit work list is easier on the stack allocator and
137 scanblock(byte *b, int64 n)
139 byte *obj, *arena_start, *arena_used, *p;
141 uintptr size, *bitp, bits, shift, i, j, x, xbits, off, nobj, nproc;
148 if((int64)(uintptr)n != n || n < 0) {
149 // runtime_printf("scanblock %p %lld\n", b, (long long)n);
150 runtime_throw("scanblock");
153 // Memory arena parameters.
154 arena_start = runtime_mheap.arena_start;
155 arena_used = runtime_mheap.arena_used;
158 wbuf = nil; // current work buffer
159 wp = nil; // storage for next queued pointer (write pointer)
160 nobj = 0; // number of queued objects
162 // Scanblock helpers pass b==nil.
163 // The main proc needs to return to make more
164 // calls to scanblock. But if work.nproc==1 then
165 // might as well process blocks as soon as we
167 keepworking = b == nil || work.nproc == 1;
169 // Align b to a word boundary.
170 off = (uintptr)b & (PtrSize-1);
177 // Each iteration scans the block b of length n, queueing pointers in
180 runtime_printf("scanblock %p %lld\n", b, (long long) n);
183 n >>= (2+PtrSize/8); /* n /= PtrSize (4 or 8) */
184 for(i=0; i<(uintptr)n; i++) {
187 // Words outside the arena cannot be pointers.
188 if((byte*)obj < arena_start || (byte*)obj >= arena_used)
191 // obj may be a pointer to a live object.
192 // Try to find the beginning of the object.
194 // Round down to word boundary.
195 obj = (void*)((uintptr)obj & ~((uintptr)PtrSize-1));
197 // Find bits for this word.
198 off = (uintptr*)obj - (uintptr*)arena_start;
199 bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
200 shift = off % wordsPerBitmapWord;
202 bits = xbits >> shift;
204 // Pointing at the beginning of a block?
205 if((bits & (bitAllocated|bitBlockBoundary)) != 0)
208 // Pointing just past the beginning?
209 // Scan backward a little to find a block boundary.
210 for(j=shift; j-->0; ) {
211 if(((xbits>>j) & (bitAllocated|bitBlockBoundary)) != 0) {
212 obj = (byte*)obj - (shift-j)*PtrSize;
219 // Otherwise consult span table to find beginning.
220 // (Manually inlined copy of MHeap_LookupMaybe.)
223 k = (uintptr)obj>>PageShift;
225 if(sizeof(void*) == 8)
226 x -= (uintptr)arena_start>>PageShift;
227 s = runtime_mheap.map[x];
228 if(s == nil || k < s->start || k - s->start >= s->npages || s->state != MSpanInUse)
230 p = (byte*)((uintptr)s->start<<PageShift);
231 if(s->sizeclass == 0) {
234 if((byte*)obj >= (byte*)s->limit)
236 size = runtime_class_to_size[s->sizeclass];
237 int32 i = ((byte*)obj - p)/size;
241 // Now that we know the object header, reload bits.
242 off = (uintptr*)obj - (uintptr*)arena_start;
243 bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
244 shift = off % wordsPerBitmapWord;
246 bits = xbits >> shift;
249 // Now we have bits, bitp, and shift correct for
250 // obj pointing at the base of the object.
251 // Only care about allocated and not marked.
252 if((bits & (bitAllocated|bitMarked)) != bitAllocated)
255 *bitp |= bitMarked<<shift;
259 if(x & (bitMarked<<shift))
261 if(runtime_casp((void**)bitp, (void*)x, (void*)(x|(bitMarked<<shift))))
266 // If object has no pointers, don't need to scan further.
267 if((bits & bitNoPointers) != 0)
270 // If another proc wants a pointer, give it some.
271 if(nobj > 4 && work.nwait > 0 && work.full == nil) {
273 wbuf = handoff(wbuf);
275 wp = (void**)(wbuf->obj + nobj);
278 // If buffer is full, get a new one.
279 if(wbuf == nil || nobj >= nelem(wbuf->obj)) {
282 wbuf = getempty(wbuf);
283 wp = (void**)(wbuf->obj);
291 // Done scanning [b, b+n). Prepare for the next iteration of
292 // the loop by setting b and n to the parameters for the next block.
294 // Fetch b from the work buffer.
300 // Emptied our buffer: refill.
301 wbuf = getfull(wbuf);
305 wp = (void**)(wbuf->obj + wbuf->nobj);
310 // Figure out n = size of b. Start by loading bits for b.
311 off = (uintptr*)b - (uintptr*)arena_start;
312 bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
313 shift = off % wordsPerBitmapWord;
315 bits = xbits >> shift;
317 // Might be small; look for nearby block boundary.
318 // A block boundary is marked by either bitBlockBoundary
319 // or bitAllocated being set (see notes near their definition).
321 boundary = bitBlockBoundary|bitAllocated
323 // Look for a block boundary both after and before b
324 // in the same bitmap word.
326 // A block boundary j words after b is indicated by
327 // bits>>j & boundary
328 // assuming shift+j < bitShift. (If shift+j >= bitShift then
329 // we'll be bleeding other bit types like bitMarked into our test.)
330 // Instead of inserting the conditional shift+j < bitShift into the loop,
331 // we can let j range from 1 to bitShift as long as we first
332 // apply a mask to keep only the bits corresponding
333 // to shift+j < bitShift aka j < bitShift-shift.
334 bits &= (boundary<<(bitShift-shift)) - boundary;
336 // A block boundary j words before b is indicated by
337 // xbits>>(shift-j) & boundary
338 // (assuming shift >= j). There is no cleverness here
339 // avoid the test, because when j gets too large the shift
340 // turns negative, which is undefined in C.
342 for(j=1; j<bitShift; j++) {
343 if(((bits>>j)&boundary) != 0 || (shift>=j && ((xbits>>(shift-j))&boundary) != 0)) {
349 // Fall back to asking span about size class.
350 // (Manually inlined copy of MHeap_Lookup.)
353 x = (uintptr)b>>PageShift;
354 if(sizeof(void*) == 8)
355 x -= (uintptr)arena_start>>PageShift;
356 s = runtime_mheap.map[x];
357 if(s->sizeclass == 0)
358 n = s->npages<<PageShift;
360 n = runtime_class_to_size[s->sizeclass];
365 // debug_scanblock is the debug copy of scanblock.
366 // it is simpler, slower, single-threaded, recursive,
367 // and uses bitSpecial as the mark bit.
369 debug_scanblock(byte *b, int64 n)
373 uintptr size, *bitp, bits, shift, i, xbits, off;
377 runtime_throw("debug_scanblock without DebugMark");
379 if((int64)(uintptr)n != n || n < 0) {
380 //runtime_printf("debug_scanblock %p %D\n", b, n);
381 runtime_throw("debug_scanblock");
384 // Align b to a word boundary.
385 off = (uintptr)b & (PtrSize-1);
393 for(i=0; i<(uintptr)n; i++) {
396 // Words outside the arena cannot be pointers.
397 if((byte*)obj < runtime_mheap.arena_start || (byte*)obj >= runtime_mheap.arena_used)
400 // Round down to word boundary.
401 obj = (void*)((uintptr)obj & ~((uintptr)PtrSize-1));
403 // Consult span table to find beginning.
404 s = runtime_MHeap_LookupMaybe(&runtime_mheap, obj);
409 p = (byte*)((uintptr)s->start<<PageShift);
410 if(s->sizeclass == 0) {
412 size = (uintptr)s->npages<<PageShift;
414 if((byte*)obj >= (byte*)s->limit)
416 size = runtime_class_to_size[s->sizeclass];
417 int32 i = ((byte*)obj - p)/size;
421 // Now that we know the object header, reload bits.
422 off = (uintptr*)obj - (uintptr*)runtime_mheap.arena_start;
423 bitp = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
424 shift = off % wordsPerBitmapWord;
426 bits = xbits >> shift;
428 // Now we have bits, bitp, and shift correct for
429 // obj pointing at the base of the object.
430 // If not allocated or already marked, done.
431 if((bits & bitAllocated) == 0 || (bits & bitSpecial) != 0) // NOTE: bitSpecial not bitMarked
433 *bitp |= bitSpecial<<shift;
434 if(!(bits & bitMarked))
435 runtime_printf("found unmarked block %p in %p\n", obj, vp+i);
437 // If object has no pointers, don't need to scan further.
438 if((bits & bitNoPointers) != 0)
441 debug_scanblock(obj, size);
445 // Get an empty work buffer off the work.empty list,
446 // allocating new buffers as needed.
450 if(work.nproc == 1) {
451 // Put b on full list.
456 // Grab from empty list if possible.
459 work.empty = b->next;
463 // Put b on full list.
465 runtime_lock(&work.fmu);
468 runtime_unlock(&work.fmu);
470 // Grab from empty list if possible.
471 runtime_lock(&work.emu);
474 work.empty = b->next;
475 runtime_unlock(&work.emu);
482 if(work.nchunk < sizeof *b) {
484 work.chunk = runtime_SysAlloc(work.nchunk);
486 b = (Workbuf*)work.chunk;
487 work.chunk += sizeof *b;
488 work.nchunk -= sizeof *b;
489 runtime_unlock(&work);
502 if(work.nproc == 1) {
503 b->next = work.empty;
508 runtime_lock(&work.emu);
509 b->next = work.empty;
511 runtime_unlock(&work.emu);
514 // Get a full work buffer off the work.full list, or return nil.
521 if(work.nproc == 1) {
522 // Put b on empty list.
524 b->next = work.empty;
527 // Grab from full list if possible.
528 // Since work.nproc==1, no one else is
529 // going to give us work.
538 // Grab buffer from full list if possible.
543 runtime_lock(&work.fmu);
544 if(work.full != nil) {
546 work.full = b1->next;
547 runtime_unlock(&work.fmu);
550 runtime_unlock(&work.fmu);
553 runtime_xadd(&work.nwait, +1);
557 runtime_lock(&work.fmu);
558 if(work.full != nil) {
559 runtime_xadd(&work.nwait, -1);
561 work.full = b1->next;
562 runtime_unlock(&work.fmu);
565 runtime_unlock(&work.fmu);
568 if(work.nwait == work.nproc)
571 runtime_procyield(20);
585 // Make new buffer with half of b's pointers.
590 runtime_memmove(b1->obj, b->obj+b->nobj, n*sizeof b1->obj[0]);
593 // Put b on full list - let first half of b get stolen.
594 runtime_lock(&work.fmu);
597 runtime_unlock(&work.fmu);
602 // Scanstack calls scanblock on each of gp's stack segments.
604 scanstack(void (*scanblock)(byte*, int64), G *gp)
606 #ifdef USING_SPLIT_STACK
614 if(gp == runtime_g()) {
615 // Scanning our own stack.
616 sp = __splitstack_find(nil, nil, &spsize, &next_segment,
617 &next_sp, &initial_sp);
618 } else if((mp = gp->m) != nil && mp->helpgc) {
619 // gchelper's stack is in active use and has no interesting pointers.
622 // Scanning another goroutine's stack.
623 // The goroutine is usually asleep (the world is stopped).
625 // The exception is that if the goroutine is about to enter or might
626 // have just exited a system call, it may be executing code such
627 // as schedlock and may have needed to start a new stack segment.
628 // Use the stack segment and stack pointer at the time of
629 // the system call instead, since that won't change underfoot.
630 if(gp->gcstack != nil) {
632 spsize = gp->gcstack_size;
633 next_segment = gp->gcnext_segment;
634 next_sp = gp->gcnext_sp;
635 initial_sp = gp->gcinitial_sp;
637 sp = __splitstack_find_context(&gp->stack_context[0],
638 &spsize, &next_segment,
639 &next_sp, &initial_sp);
643 scanblock(sp, spsize);
644 while((sp = __splitstack_find(next_segment, next_sp,
645 &spsize, &next_segment,
646 &next_sp, &initial_sp)) != nil)
647 scanblock(sp, spsize);
654 if(gp == runtime_g()) {
655 // Scanning our own stack.
657 } else if((mp = gp->m) != nil && mp->helpgc) {
658 // gchelper's stack is in active use and has no interesting pointers.
661 // Scanning another goroutine's stack.
662 // The goroutine is usually asleep (the world is stopped).
663 bottom = (byte*)gp->gcnext_sp;
667 top = (byte*)gp->gcinitial_sp + gp->gcstack_size;
669 scanblock(bottom, top - bottom);
671 scanblock(top, bottom - top);
675 // Markfin calls scanblock on the blocks that have finalizers:
676 // the things pointed at cannot be freed until the finalizers have run.
683 if(!runtime_mlookup(v, (byte**)&v, &size, nil) || !runtime_blockspecial(v))
684 runtime_throw("mark - finalizer inconsistency");
686 // do not mark the finalizer block itself. just mark the things it points at.
691 struct root_list *next;
698 static struct root_list* roots;
701 __go_register_gc_roots (struct root_list* r)
703 // FIXME: This needs locking if multiple goroutines can call
704 // dlopen simultaneously.
710 debug_markfin(void *v)
714 if(!runtime_mlookup(v, (byte**)&v, &size, nil))
715 runtime_throw("debug_mark - finalizer inconsistency");
716 debug_scanblock(v, size);
721 mark(void (*scan)(byte*, int64))
723 struct root_list *pl;
728 for(pl = roots; pl != nil; pl = pl->next) {
729 struct root* pr = &pl->roots[0];
731 void *decl = pr->decl;
734 scanblock(decl, pr->size);
739 scan((byte*)&runtime_m0, sizeof runtime_m0);
740 scan((byte*)&runtime_g0, sizeof runtime_g0);
741 scan((byte*)&runtime_allg, sizeof runtime_allg);
742 scan((byte*)&runtime_allm, sizeof runtime_allm);
743 runtime_MProf_Mark(scan);
744 runtime_time_scan(scan);
747 for(gp=runtime_allg; gp!=nil; gp=gp->alllink) {
750 runtime_printf("unexpected G.status %d\n", gp->status);
751 runtime_throw("mark - bad status");
755 if(gp != runtime_g())
756 runtime_throw("mark - world not stopped");
767 // mark things pointed at by objects with finalizers
768 if(scan == debug_scanblock)
769 runtime_walkfintab(debug_markfin, scan);
771 runtime_walkfintab(markfin, scan);
773 for(fb=allfin; fb; fb=fb->alllink)
774 scanblock((byte*)fb->fin, fb->cnt*sizeof(fb->fin[0]));
776 // in multiproc mode, join in the queued work.
781 handlespecial(byte *p, uintptr size)
784 const struct __go_func_type *ft;
788 if(!runtime_getfinalizer(p, true, &fn, &ft)) {
789 runtime_setblockspecial(p, false);
790 runtime_MProf_Free(p, size);
794 runtime_lock(&finlock);
795 if(finq == nil || finq->cnt == finq->cap) {
797 finc = runtime_SysAlloc(PageSize);
798 finc->cap = (PageSize - sizeof(FinBlock)) / sizeof(Finalizer) + 1;
799 finc->alllink = allfin;
807 f = &finq->fin[finq->cnt];
812 runtime_unlock(&finlock);
816 // Sweep frees or collects finalizers for blocks not marked in the mark phase.
817 // It clears the mark bits in preparation for the next GC round.
830 arena_start = runtime_mheap.arena_start;
836 if(!runtime_casp(&work.spans, s, s->allnext))
839 if(s->state != MSpanInUse)
842 p = (byte*)(s->start << PageShift);
845 size = s->npages<<PageShift;
848 // Chunk full of small blocks.
849 size = runtime_class_to_size[cl];
850 npages = runtime_class_to_allocnpages[cl];
851 n = (npages << PageShift) / size;
854 // Sweep through n objects of given size starting at p.
855 // This thread owns the span now, so it can manipulate
856 // the block bitmap without atomic operations.
857 for(; n > 0; n--, p += size) {
858 uintptr off, *bitp, shift, bits;
860 off = (uintptr*)p - (uintptr*)arena_start;
861 bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
862 shift = off % wordsPerBitmapWord;
865 if((bits & bitAllocated) == 0)
868 if((bits & bitMarked) != 0) {
870 if(!(bits & bitSpecial))
871 runtime_printf("found spurious mark on %p\n", p);
872 *bitp &= ~(bitSpecial<<shift);
874 *bitp &= ~(bitMarked<<shift);
878 // Special means it has a finalizer or is being profiled.
879 // In DebugMark mode, the bit has been coopted so
880 // we have to assume all blocks are special.
881 if(DebugMark || (bits & bitSpecial) != 0) {
882 if(handlespecial(p, size))
886 // Mark freed; restore block boundary bit.
887 *bitp = (*bitp & ~(bitMask<<shift)) | (bitBlockBoundary<<shift);
890 if(s->sizeclass == 0) {
892 runtime_unmarkspan(p, 1<<PageShift);
893 *(uintptr*)p = 1; // needs zeroing
894 runtime_MHeap_Free(&runtime_mheap, s, 1);
896 // Free small object.
897 if(size > sizeof(uintptr))
898 ((uintptr*)p)[1] = 1; // mark as "needs to be zeroed"
899 c->local_by_size[s->sizeclass].nfree++;
900 runtime_MCache_Free(c, p, s->sizeclass, size);
902 c->local_alloc -= size;
909 runtime_gchelper(void)
911 // Wait until main proc is ready for mark help.
912 runtime_lock(&work.markgate);
913 runtime_unlock(&work.markgate);
916 // Wait until main proc is ready for sweep help.
917 runtime_lock(&work.sweepgate);
918 runtime_unlock(&work.sweepgate);
921 if(runtime_xadd(&work.ndone, +1) == work.nproc-1)
922 runtime_notewakeup(&work.alldone);
925 // Semaphore, not Lock, so that the goroutine
926 // reschedules when there is contention rather
928 static uint32 gcsema = 1;
930 // Initialized from $GOGC. GOGC=off means no gc.
932 // Next gc is after we've allocated an extra amount of
933 // memory proportional to the amount already in use.
934 // If gcpercent=100 and we're using 4M, we'll gc again
935 // when we get to 8M. This keeps the gc cost in linear
936 // proportion to the allocation cost. Adjusting gcpercent
937 // just changes the linear constant (and also the amount of
938 // extra memory used).
939 static int32 gcpercent = -2;
946 for(m=runtime_allm; m; m=m->alllink)
947 runtime_MCache_ReleaseAll(m->mcache);
961 for(m=runtime_allm; m; m=m->alllink) {
962 runtime_purgecachedstats(m);
963 // stacks_inuse += m->stackalloc->inuse;
964 // stacks_sys += m->stackalloc->sys;
966 for(i=0; i<nelem(c->local_by_size); i++) {
967 mstats.by_size[i].nmalloc += c->local_by_size[i].nmalloc;
968 c->local_by_size[i].nmalloc = 0;
969 mstats.by_size[i].nfree += c->local_by_size[i].nfree;
970 c->local_by_size[i].nfree = 0;
973 mstats.stacks_inuse = stacks_inuse;
974 mstats.stacks_sys = stacks_sys;
978 runtime_gc(int32 force)
981 int64 t0, t1, t2, t3;
982 uint64 heap0, heap1, obj0, obj1;
986 // The gc is turned off (via enablegc) until
987 // the bootstrap has completed.
988 // Also, malloc gets called in the guts
989 // of a number of libraries that might be
990 // holding locks. To avoid priority inversion
991 // problems, don't bother trying to run gc
992 // while holding a lock. The next mallocgc
993 // without a lock will do the gc instead.
995 if(!mstats.enablegc || m->locks > 0 || runtime_panicking)
998 if(gcpercent == -2) { // first time through
999 p = runtime_getenv("GOGC");
1000 if(p == nil || p[0] == '\0')
1002 else if(runtime_strcmp((const char*)p, "off") == 0)
1005 gcpercent = runtime_atoi(p);
1007 p = runtime_getenv("GOGCTRACE");
1009 gctrace = runtime_atoi(p);
1014 runtime_semacquire(&gcsema);
1015 if(!force && mstats.heap_alloc < mstats.next_gc) {
1016 runtime_semrelease(&gcsema);
1020 t0 = runtime_nanotime();
1027 runtime_stoptheworld();
1030 heap0 = mstats.heap_alloc;
1031 obj0 = mstats.nmalloc - mstats.nfree;
1033 runtime_lock(&work.markgate);
1034 runtime_lock(&work.sweepgate);
1038 if(runtime_gomaxprocs > 1 && runtime_ncpu > 1) {
1039 runtime_noteclear(&work.alldone);
1040 work.nproc += runtime_helpgc(&extra);
1045 runtime_unlock(&work.markgate); // let the helpers in
1048 mark(debug_scanblock);
1049 t1 = runtime_nanotime();
1051 work.spans = runtime_mheap.allspans;
1052 runtime_unlock(&work.sweepgate); // let the helpers in
1055 runtime_notesleep(&work.alldone);
1056 t2 = runtime_nanotime();
1061 mstats.next_gc = mstats.heap_alloc+mstats.heap_alloc*gcpercent/100;
1064 m->locks++; // disable gc during the mallocs in newproc
1066 // kick off or wake up goroutine to run queued finalizers
1068 fing = __go_go(runfinq, nil);
1071 runtime_ready(fing);
1077 heap1 = mstats.heap_alloc;
1078 obj1 = mstats.nmalloc - mstats.nfree;
1080 t3 = runtime_nanotime();
1081 mstats.pause_ns[mstats.numgc%nelem(mstats.pause_ns)] = t3 - t0;
1082 mstats.pause_total_ns += t3 - t0;
1085 runtime_printf("pause %llu\n", (unsigned long long)t3-t0);
1088 runtime_printf("gc%d: %llu+%llu+%llu ms %llu -> %llu MB %llu -> %llu (%llu-%llu) objects %llu pointer lookups (%llu size, %llu addr) %llu handoff\n",
1089 mstats.numgc, (unsigned long long)(t1-t0)/1000000, (unsigned long long)(t2-t1)/1000000, (unsigned long long)(t3-t2)/1000000,
1090 (unsigned long long)heap0>>20, (unsigned long long)heap1>>20, (unsigned long long)obj0, (unsigned long long)obj1,
1091 (unsigned long long)mstats.nmalloc, (unsigned long long)mstats.nfree,
1092 (unsigned long long)nlookup, (unsigned long long)nsizelookup, (unsigned long long)naddrlookup, (unsigned long long) nhandoff);
1095 runtime_semrelease(&gcsema);
1097 // If we could have used another helper proc, start one now,
1098 // in the hope that it will be available next time.
1099 // It would have been even better to start it before the collection,
1100 // but doing so requires allocating memory, so it's tricky to
1101 // coordinate. This lazy approach works out in practice:
1102 // we don't mind if the first couple gc rounds don't have quite
1103 // the maximum number of procs.
1104 runtime_starttheworld(extra);
1106 // give the queued finalizers, if any, a chance to run
1110 if(gctrace > 1 && !force)
1114 void runtime_UpdateMemStats(void)
1115 __asm__("libgo_runtime.runtime.UpdateMemStats");
1118 runtime_UpdateMemStats(void)
1122 // Have to acquire gcsema to stop the world,
1123 // because stoptheworld can only be used by
1124 // one goroutine at a time, and there might be
1125 // a pending garbage collection already calling it.
1126 runtime_semacquire(&gcsema);
1129 runtime_stoptheworld();
1132 runtime_semrelease(&gcsema);
1133 runtime_starttheworld(false);
1137 runfinq(void* dummy __attribute__ ((unused)))
1141 FinBlock *fb, *next;
1146 // There's no need for a lock in this section
1147 // because it only conflicts with the garbage
1148 // collector, and the garbage collector only
1149 // runs when everyone else is stopped, and
1150 // runfinq only stops at the gosched() or
1151 // during the calls in the for loop.
1156 gp->status = Gwaiting;
1157 gp->waitreason = "finalizer wait";
1161 for(; fb; fb=next) {
1163 for(i=0; i<(uint32)fb->cnt; i++) {
1167 params[0] = &f->arg;
1168 runtime_setblockspecial(f->arg, false);
1169 reflect_call(f->ft, (void*)f->fn, 0, 0, params, nil);
1177 runtime_gc(1); // trigger another gc to clean up the finalized objects, if possible
1181 // mark the block at v of size n as allocated.
1182 // If noptr is true, mark it as having no pointers.
1184 runtime_markallocated(void *v, uintptr n, bool noptr)
1186 uintptr *b, obits, bits, off, shift;
1189 // runtime_printf("markallocated %p+%p\n", v, n);
1191 if((byte*)v+n > (byte*)runtime_mheap.arena_used || (byte*)v < runtime_mheap.arena_start)
1192 runtime_throw("markallocated: bad pointer");
1194 off = (uintptr*)v - (uintptr*)runtime_mheap.arena_start; // word offset
1195 b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
1196 shift = off % wordsPerBitmapWord;
1200 bits = (obits & ~(bitMask<<shift)) | (bitAllocated<<shift);
1202 bits |= bitNoPointers<<shift;
1203 if(runtime_singleproc) {
1207 // more than one goroutine is potentially running: use atomic op
1208 if(runtime_casp((void**)b, (void*)obits, (void*)bits))
1214 // mark the block at v of size n as freed.
1216 runtime_markfreed(void *v, uintptr n)
1218 uintptr *b, obits, bits, off, shift;
1221 // runtime_printf("markallocated %p+%p\n", v, n);
1223 if((byte*)v+n > (byte*)runtime_mheap.arena_used || (byte*)v < runtime_mheap.arena_start)
1224 runtime_throw("markallocated: bad pointer");
1226 off = (uintptr*)v - (uintptr*)runtime_mheap.arena_start; // word offset
1227 b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
1228 shift = off % wordsPerBitmapWord;
1232 bits = (obits & ~(bitMask<<shift)) | (bitBlockBoundary<<shift);
1233 if(runtime_singleproc) {
1237 // more than one goroutine is potentially running: use atomic op
1238 if(runtime_casp((void**)b, (void*)obits, (void*)bits))
1244 // check that the block at v of size n is marked freed.
1246 runtime_checkfreed(void *v, uintptr n)
1248 uintptr *b, bits, off, shift;
1250 if(!runtime_checking)
1253 if((byte*)v+n > (byte*)runtime_mheap.arena_used || (byte*)v < runtime_mheap.arena_start)
1254 return; // not allocated, so okay
1256 off = (uintptr*)v - (uintptr*)runtime_mheap.arena_start; // word offset
1257 b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
1258 shift = off % wordsPerBitmapWord;
1261 if((bits & bitAllocated) != 0) {
1262 runtime_printf("checkfreed %p+%p: off=%p have=%p\n",
1263 v, (void*)n, (void*)off, (void*)(bits & bitMask));
1264 runtime_throw("checkfreed: not freed");
1268 // mark the span of memory at v as having n blocks of the given size.
1269 // if leftover is true, there is left over space at the end of the span.
1271 runtime_markspan(void *v, uintptr size, uintptr n, bool leftover)
1273 uintptr *b, off, shift;
1276 if((byte*)v+size*n > (byte*)runtime_mheap.arena_used || (byte*)v < runtime_mheap.arena_start)
1277 runtime_throw("markspan: bad pointer");
1280 if(leftover) // mark a boundary just past end of last block too
1282 for(; n-- > 0; p += size) {
1283 // Okay to use non-atomic ops here, because we control
1284 // the entire span, and each bitmap word has bits for only
1285 // one span, so no other goroutines are changing these
1287 off = (uintptr*)p - (uintptr*)runtime_mheap.arena_start; // word offset
1288 b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
1289 shift = off % wordsPerBitmapWord;
1290 *b = (*b & ~(bitMask<<shift)) | (bitBlockBoundary<<shift);
1294 // unmark the span of memory at v of length n bytes.
1296 runtime_unmarkspan(void *v, uintptr n)
1298 uintptr *p, *b, off;
1300 if((byte*)v+n > (byte*)runtime_mheap.arena_used || (byte*)v < runtime_mheap.arena_start)
1301 runtime_throw("markspan: bad pointer");
1304 off = p - (uintptr*)runtime_mheap.arena_start; // word offset
1305 if(off % wordsPerBitmapWord != 0)
1306 runtime_throw("markspan: unaligned pointer");
1307 b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
1309 if(n%wordsPerBitmapWord != 0)
1310 runtime_throw("unmarkspan: unaligned length");
1311 // Okay to use non-atomic ops here, because we control
1312 // the entire span, and each bitmap word has bits for only
1313 // one span, so no other goroutines are changing these
1315 n /= wordsPerBitmapWord;
1321 runtime_blockspecial(void *v)
1323 uintptr *b, off, shift;
1328 off = (uintptr*)v - (uintptr*)runtime_mheap.arena_start;
1329 b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
1330 shift = off % wordsPerBitmapWord;
1332 return (*b & (bitSpecial<<shift)) != 0;
1336 runtime_setblockspecial(void *v, bool s)
1338 uintptr *b, off, shift, bits, obits;
1343 off = (uintptr*)v - (uintptr*)runtime_mheap.arena_start;
1344 b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
1345 shift = off % wordsPerBitmapWord;
1350 bits = obits | (bitSpecial<<shift);
1352 bits = obits & ~(bitSpecial<<shift);
1353 if(runtime_singleproc) {
1357 // more than one goroutine is potentially running: use atomic op
1358 if(runtime_casp((void**)b, (void*)obits, (void*)bits))
1365 runtime_MHeap_MapBits(MHeap *h)
1367 // Caller has added extra mappings to the arena.
1368 // Add extra mappings of bitmap words as needed.
1369 // We allocate extra bitmap pieces in chunks of bitmapChunk.
1375 n = (h->arena_used - h->arena_start) / wordsPerBitmapWord;
1376 n = (n+bitmapChunk-1) & ~(bitmapChunk-1);
1377 if(h->bitmap_mapped >= n)
1380 runtime_SysMap(h->arena_start - n, n - h->bitmap_mapped);
1381 h->bitmap_mapped = n;