1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
13 PtrSize = sizeof(void*),
15 // Four bits per word (see #defines below).
16 wordsPerBitmapWord = sizeof(void*)*8/4,
17 bitShift = sizeof(void*)*8/4,
20 // Bits in per-word bitmap.
21 // #defines because enum might not be able to hold the values.
23 // Each word in the bitmap describes wordsPerBitmapWord words
24 // of heap memory. There are 4 bitmap bits dedicated to each heap word,
25 // so on a 64-bit system there is one bitmap word per 16 heap words.
26 // The bits in the word are packed together by type first, then by
27 // heap location, so each 64-bit bitmap word consists of, from top to bottom,
28 // the 16 bitSpecial bits for the corresponding heap words, then the 16 bitMarked bits,
29 // then the 16 bitNoPointers/bitBlockBoundary bits, then the 16 bitAllocated bits.
30 // This layout makes it easier to iterate over the bits of a given type.
32 // The bitmap starts at mheap.arena_start and extends *backward* from
33 // there. On a 64-bit system the off'th word in the arena is tracked by
34 // the off/16+1'th word before mheap.arena_start. (On a 32-bit system,
35 // the only difference is that the divisor is 8.)
37 // To pull out the bits corresponding to a given pointer p, we use:
39 // off = p - (uintptr*)mheap.arena_start; // word offset
40 // b = (uintptr*)mheap.arena_start - off/wordsPerBitmapWord - 1;
41 // shift = off % wordsPerBitmapWord
42 // bits = *b >> shift;
43 // /* then test bits & bitAllocated, bits & bitMarked, etc. */
45 #define bitAllocated ((uintptr)1<<(bitShift*0))
46 #define bitNoPointers ((uintptr)1<<(bitShift*1)) /* when bitAllocated is set */
47 #define bitMarked ((uintptr)1<<(bitShift*2)) /* when bitAllocated is set */
48 #define bitSpecial ((uintptr)1<<(bitShift*3)) /* when bitAllocated is set - has finalizer or being profiled */
49 #define bitBlockBoundary ((uintptr)1<<(bitShift*1)) /* when bitAllocated is NOT set */
51 #define bitMask (bitBlockBoundary | bitAllocated | bitMarked | bitSpecial)
53 static uint64 nlookup;
54 static uint64 nsizelookup;
55 static uint64 naddrlookup;
58 typedef struct Workbuf Workbuf;
66 static bool finstarted;
67 static pthread_mutex_t finqlock = PTHREAD_MUTEX_INITIALIZER;
68 static pthread_cond_t finqcond = PTHREAD_COND_INITIALIZER;
69 static Finalizer *finq;
70 static int32 fingwait;
72 static void runfinq(void*);
73 static Workbuf* getempty(Workbuf*);
74 static Workbuf* getfull(Workbuf*);
76 // scanblock scans a block of n bytes starting at pointer b for references
77 // to other objects, scanning any it finds recursively until there are no
78 // unscanned objects left. Instead of using an explicit recursion, it keeps
79 // a work list in the Workbuf* structures and loops in the main function
80 // body. Keeping an explicit work list is easier on the stack allocator and
83 scanblock(byte *b, int64 n)
85 byte *obj, *arena_start, *p;
87 uintptr size, *bitp, bits, shift, i, j, x, xbits, off;
93 if((int64)(uintptr)n != n || n < 0) {
94 // runtime_printf("scanblock %p %lld\n", b, (long long)n);
95 runtime_throw("scanblock");
98 // Memory arena parameters.
99 arena_start = runtime_mheap.arena_start;
101 wbuf = nil; // current work buffer
102 ew = nil; // end of work buffer
103 bw = nil; // beginning of work buffer
104 w = nil; // current pointer into work buffer
106 // Align b to a word boundary.
107 off = (uintptr)b & (PtrSize-1);
114 // Each iteration scans the block b of length n, queueing pointers in
117 runtime_printf("scanblock %p %lld\n", b, (long long) n);
121 for(i=0; i<(uintptr)n; i++) {
124 // Words outside the arena cannot be pointers.
125 if((byte*)obj < arena_start || (byte*)obj >= runtime_mheap.arena_used)
128 // obj may be a pointer to a live object.
129 // Try to find the beginning of the object.
131 // Round down to word boundary.
132 obj = (void*)((uintptr)obj & ~((uintptr)PtrSize-1));
134 // Find bits for this word.
135 off = (uintptr*)obj - (uintptr*)arena_start;
136 bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
137 shift = off % wordsPerBitmapWord;
139 bits = xbits >> shift;
141 // Pointing at the beginning of a block?
142 if((bits & (bitAllocated|bitBlockBoundary)) != 0)
145 // Pointing just past the beginning?
146 // Scan backward a little to find a block boundary.
147 for(j=shift; j-->0; ) {
148 if(((xbits>>j) & (bitAllocated|bitBlockBoundary)) != 0) {
149 obj = (byte*)obj - (shift-j)*PtrSize;
156 // Otherwise consult span table to find beginning.
157 // (Manually inlined copy of MHeap_LookupMaybe.)
160 k = (uintptr)obj>>PageShift;
162 if(sizeof(void*) == 8)
163 x -= (uintptr)arena_start>>PageShift;
164 s = runtime_mheap.map[x];
165 if(s == nil || k < s->start || k - s->start >= s->npages || s->state != MSpanInUse)
167 p = (byte*)((uintptr)s->start<<PageShift);
168 if(s->sizeclass == 0) {
171 if((byte*)obj >= (byte*)s->limit)
173 size = runtime_class_to_size[s->sizeclass];
174 int32 i = ((byte*)obj - p)/size;
178 // Now that we know the object header, reload bits.
179 off = (uintptr*)obj - (uintptr*)arena_start;
180 bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
181 shift = off % wordsPerBitmapWord;
183 bits = xbits >> shift;
186 // Now we have bits, bitp, and shift correct for
187 // obj pointing at the base of the object.
188 // If not allocated or already marked, done.
189 if((bits & bitAllocated) == 0 || (bits & bitMarked) != 0)
191 *bitp |= bitMarked<<shift;
193 // If object has no pointers, don't need to scan further.
194 if((bits & bitNoPointers) != 0)
197 // If buffer is full, get a new one.
199 wbuf = getempty(wbuf);
200 bw = (void**)wbuf->w;
202 ew = bw + nelem(wbuf->w);
207 // Done scanning [b, b+n). Prepare for the next iteration of
208 // the loop by setting b and n to the parameters for the next block.
210 // Fetch b from the work buffers.
212 // Emptied our buffer: refill.
213 wbuf = getfull(wbuf);
216 bw = (void**)wbuf->w;
217 ew = (void**)(wbuf->w + nelem(wbuf->w));
222 // Figure out n = size of b. Start by loading bits for b.
223 off = (uintptr*)b - (uintptr*)arena_start;
224 bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
225 shift = off % wordsPerBitmapWord;
227 bits = xbits >> shift;
229 // Might be small; look for nearby block boundary.
230 // A block boundary is marked by either bitBlockBoundary
231 // or bitAllocated being set (see notes near their definition).
233 boundary = bitBlockBoundary|bitAllocated
235 // Look for a block boundary both after and before b
236 // in the same bitmap word.
238 // A block boundary j words after b is indicated by
239 // bits>>j & boundary
240 // assuming shift+j < bitShift. (If shift+j >= bitShift then
241 // we'll be bleeding other bit types like bitMarked into our test.)
242 // Instead of inserting the conditional shift+j < bitShift into the loop,
243 // we can let j range from 1 to bitShift as long as we first
244 // apply a mask to keep only the bits corresponding
245 // to shift+j < bitShift aka j < bitShift-shift.
246 bits &= (boundary<<(bitShift-shift)) - boundary;
248 // A block boundary j words before b is indicated by
249 // xbits>>(shift-j) & boundary
250 // (assuming shift >= j). There is no cleverness here
251 // avoid the test, because when j gets too large the shift
252 // turns negative, which is undefined in C.
254 for(j=1; j<bitShift; j++) {
255 if(((bits>>j)&boundary) != 0 || (shift>=j && ((xbits>>(shift-j))&boundary) != 0)) {
261 // Fall back to asking span about size class.
262 // (Manually inlined copy of MHeap_Lookup.)
265 x = (uintptr)b>>PageShift;
266 if(sizeof(void*) == 8)
267 x -= (uintptr)arena_start>>PageShift;
268 s = runtime_mheap.map[x];
269 if(s->sizeclass == 0)
270 n = s->npages<<PageShift;
272 n = runtime_class_to_size[s->sizeclass];
284 // Get an empty work buffer off the work.empty list,
285 // allocating new buffers as needed.
296 work.empty = b->next;
300 if(work.nchunk < sizeof *b) {
302 work.chunk = runtime_SysAlloc(work.nchunk);
304 b = (Workbuf*)work.chunk;
305 work.chunk += sizeof *b;
306 work.nchunk -= sizeof *b;
310 // Get a full work buffer off the work.full list, or return nil.
316 b->next = work.empty;
325 // Scanstack calls scanblock on each of gp's stack segments.
332 if(!runtime_mlookup(v, (byte**)&v, &size, nil) || !runtime_blockspecial(v))
333 runtime_throw("mark - finalizer inconsistency");
335 // do not mark the finalizer block itself. just mark the things it points at.
340 struct root_list *next;
347 static struct root_list* roots;
350 __go_register_gc_roots (struct root_list* r)
352 // FIXME: This needs locking if multiple goroutines can call
353 // dlopen simultaneously.
362 struct root_list *pl;
364 for(pl = roots; pl != nil; pl = pl->next) {
365 struct root* pr = &pl->roots[0];
367 void *decl = pr->decl;
370 scanblock(decl, pr->size);
375 scanblock((byte*)&m0, sizeof m0);
376 scanblock((byte*)&finq, sizeof finq);
377 runtime_MProf_Mark(scanblock);
380 __go_scanstacks(scanblock);
382 // mark things pointed at by objects with finalizers
383 runtime_walkfintab(markfin, scanblock);
386 // Sweep frees or calls finalizers for blocks not marked in the mark phase.
387 // It clears the mark bits in preparation for the next GC round.
398 for(s = runtime_mheap.allspans; s != nil; s = s->allnext) {
399 if(s->state != MSpanInUse)
402 p = (byte*)(s->start << PageShift);
405 size = s->npages<<PageShift;
408 // Chunk full of small blocks.
409 size = runtime_class_to_size[cl];
410 npages = runtime_class_to_allocnpages[cl];
411 n = (npages << PageShift) / size;
414 // sweep through n objects of given size starting at p.
415 for(; n > 0; n--, p += size) {
416 uintptr off, *bitp, shift, bits;
418 off = (uintptr*)p - (uintptr*)runtime_mheap.arena_start;
419 bitp = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
420 shift = off % wordsPerBitmapWord;
423 if((bits & bitAllocated) == 0)
426 if((bits & bitMarked) != 0) {
427 *bitp &= ~(bitMarked<<shift);
431 if((bits & bitSpecial) != 0) {
432 // Special means it has a finalizer or is being profiled.
433 f = runtime_getfinalizer(p, 1);
440 runtime_MProf_Free(p, size);
443 // Mark freed; restore block boundary bit.
444 *bitp = (*bitp & ~(bitMask<<shift)) | (bitBlockBoundary<<shift);
447 if(s->sizeclass == 0) {
449 runtime_unmarkspan(p, 1<<PageShift);
450 *(uintptr*)p = 1; // needs zeroing
451 runtime_MHeap_Free(&runtime_mheap, s, 1);
453 // Free small object.
454 if(size > sizeof(uintptr))
455 ((uintptr*)p)[1] = 1; // mark as "needs to be zeroed"
456 c->local_by_size[s->sizeclass].nfree++;
457 runtime_MCache_Free(c, p, s->sizeclass, size);
459 c->local_alloc -= size;
465 static pthread_mutex_t gcsema = PTHREAD_MUTEX_INITIALIZER;
467 // Initialized from $GOGC. GOGC=off means no gc.
469 // Next gc is after we've allocated an extra amount of
470 // memory proportional to the amount already in use.
471 // If gcpercent=100 and we're using 4M, we'll gc again
472 // when we get to 8M. This keeps the gc cost in linear
473 // proportion to the allocation cost. Adjusting gcpercent
474 // just changes the linear constant (and also the amount of
475 // extra memory used).
476 static int32 gcpercent = -2;
479 runtime_gc(int32 force __attribute__ ((unused)))
481 int64 t0, t1, t2, t3;
482 uint64 heap0, heap1, obj0, obj1;
486 // The gc is turned off (via enablegc) until
487 // the bootstrap has completed.
488 // Also, malloc gets called in the guts
489 // of a number of libraries that might be
490 // holding locks. To avoid priority inversion
491 // problems, don't bother trying to run gc
492 // while holding a lock. The next mallocgc
493 // without a lock will do the gc instead.
494 if(!mstats.enablegc || m->locks > 0 /* || runtime_panicking */)
497 if(gcpercent == -2) { // first time through
498 p = runtime_getenv("GOGC");
499 if(p == nil || p[0] == '\0')
501 else if(runtime_strcmp(p, "off") == 0)
504 gcpercent = runtime_atoi(p);
506 p = runtime_getenv("GOGCTRACE");
508 gctrace = runtime_atoi(p);
513 pthread_mutex_lock(&finqlock);
514 pthread_mutex_lock(&gcsema);
515 if(!force && mstats.heap_alloc < mstats.next_gc) {
516 pthread_mutex_unlock(&gcsema);
517 pthread_mutex_unlock(&finqlock);
521 t0 = runtime_nanotime();
527 runtime_stoptheworld();
528 if(runtime_mheap.Lock.key != 0)
529 runtime_throw("runtime_mheap locked during gc");
532 heap0 = mstats.heap_alloc;
533 obj0 = mstats.nmalloc - mstats.nfree;
536 t1 = runtime_nanotime();
538 t2 = runtime_nanotime();
542 mstats.next_gc = mstats.heap_alloc+mstats.heap_alloc*gcpercent/100;
545 m->locks++; // disable gc during the mallocs in newproc
547 heap1 = mstats.heap_alloc;
548 obj1 = mstats.nmalloc - mstats.nfree;
550 t3 = runtime_nanotime();
551 mstats.pause_ns[mstats.numgc%nelem(mstats.pause_ns)] = t3 - t0;
552 mstats.pause_total_ns += t3 - t0;
555 runtime_printf("pause %llu\n", (unsigned long long)t3-t0);
558 runtime_printf("gc%d: %llu+%llu+%llu ms %llu -> %llu MB %llu -> %llu (%llu-%llu) objects %llu pointer lookups (%llu size, %llu addr)\n",
559 mstats.numgc, (unsigned long long)(t1-t0)/1000000, (unsigned long long)(t2-t1)/1000000, (unsigned long long)(t3-t2)/1000000,
560 (unsigned long long)heap0>>20, (unsigned long long)heap1>>20, (unsigned long long)obj0, (unsigned long long)obj1,
561 (unsigned long long)mstats.nmalloc, (unsigned long long)mstats.nfree,
562 (unsigned long long)nlookup, (unsigned long long)nsizelookup, (unsigned long long)naddrlookup);
565 pthread_mutex_unlock(&gcsema);
566 runtime_starttheworld();
568 // finqlock is still held.
571 // kick off or wake up goroutine to run queued finalizers
573 __go_go(runfinq, nil);
578 pthread_cond_signal(&finqcond);
582 pthread_mutex_unlock(&finqlock);
584 if(gctrace > 1 && !force)
588 void runtime_UpdateMemStats(void)
589 __asm__("libgo_runtime.runtime.UpdateMemStats");
592 runtime_UpdateMemStats(void)
594 // Have to acquire gcsema to stop the world,
595 // because stoptheworld can only be used by
596 // one goroutine at a time, and there might be
597 // a pending garbage collection already calling it.
598 pthread_mutex_lock(&gcsema);
600 runtime_stoptheworld();
603 pthread_mutex_unlock(&gcsema);
604 runtime_starttheworld();
615 pthread_mutex_lock(&finqlock);
620 pthread_cond_wait(&finqcond, &finqlock);
621 pthread_mutex_unlock(&finqlock);
624 pthread_mutex_unlock(&finqlock);
630 reflect_call(f->ft, (void*)f->fn, 0, 0, params, nil);
636 runtime_gc(1); // trigger another gc to clean up the finalized objects, if possible
640 #define runtime_singleproc 0
642 // mark the block at v of size n as allocated.
643 // If noptr is true, mark it as having no pointers.
645 runtime_markallocated(void *v, uintptr n, bool noptr)
647 uintptr *b, obits, bits, off, shift;
650 // runtime_printf("markallocated %p+%p\n", v, n);
652 if((byte*)v+n > (byte*)runtime_mheap.arena_used || (byte*)v < runtime_mheap.arena_start)
653 runtime_throw("markallocated: bad pointer");
655 off = (uintptr*)v - (uintptr*)runtime_mheap.arena_start; // word offset
656 b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
657 shift = off % wordsPerBitmapWord;
661 bits = (obits & ~(bitMask<<shift)) | (bitAllocated<<shift);
663 bits |= bitNoPointers<<shift;
664 if(runtime_singleproc) {
668 // more than one goroutine is potentially running: use atomic op
669 if(runtime_casp((void**)b, (void*)obits, (void*)bits))
675 // mark the block at v of size n as freed.
677 runtime_markfreed(void *v, uintptr n)
679 uintptr *b, obits, bits, off, shift;
682 // runtime_printf("markallocated %p+%p\n", v, n);
684 if((byte*)v+n > (byte*)runtime_mheap.arena_used || (byte*)v < runtime_mheap.arena_start)
685 runtime_throw("markallocated: bad pointer");
687 off = (uintptr*)v - (uintptr*)runtime_mheap.arena_start; // word offset
688 b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
689 shift = off % wordsPerBitmapWord;
693 bits = (obits & ~(bitMask<<shift)) | (bitBlockBoundary<<shift);
694 if(runtime_singleproc) {
698 // more than one goroutine is potentially running: use atomic op
699 if(runtime_casp((void**)b, (void*)obits, (void*)bits))
705 // check that the block at v of size n is marked freed.
707 runtime_checkfreed(void *v, uintptr n)
709 uintptr *b, bits, off, shift;
711 if(!runtime_checking)
714 if((byte*)v+n > (byte*)runtime_mheap.arena_used || (byte*)v < runtime_mheap.arena_start)
715 return; // not allocated, so okay
717 off = (uintptr*)v - (uintptr*)runtime_mheap.arena_start; // word offset
718 b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
719 shift = off % wordsPerBitmapWord;
722 if((bits & bitAllocated) != 0) {
723 runtime_printf("checkfreed %p+%p: off=%p have=%p\n",
724 v, (void*)n, (void*)off, (void*)(bits & bitMask));
725 runtime_throw("checkfreed: not freed");
729 // mark the span of memory at v as having n blocks of the given size.
730 // if leftover is true, there is left over space at the end of the span.
732 runtime_markspan(void *v, uintptr size, uintptr n, bool leftover)
734 uintptr *b, off, shift;
737 if((byte*)v+size*n > (byte*)runtime_mheap.arena_used || (byte*)v < runtime_mheap.arena_start)
738 runtime_throw("markspan: bad pointer");
741 if(leftover) // mark a boundary just past end of last block too
743 for(; n-- > 0; p += size) {
744 // Okay to use non-atomic ops here, because we control
745 // the entire span, and each bitmap word has bits for only
746 // one span, so no other goroutines are changing these
748 off = (uintptr*)p - (uintptr*)runtime_mheap.arena_start; // word offset
749 b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
750 shift = off % wordsPerBitmapWord;
751 *b = (*b & ~(bitMask<<shift)) | (bitBlockBoundary<<shift);
755 // unmark the span of memory at v of length n bytes.
757 runtime_unmarkspan(void *v, uintptr n)
761 if((byte*)v+n > (byte*)runtime_mheap.arena_used || (byte*)v < runtime_mheap.arena_start)
762 runtime_throw("markspan: bad pointer");
765 off = p - (uintptr*)runtime_mheap.arena_start; // word offset
766 if(off % wordsPerBitmapWord != 0)
767 runtime_throw("markspan: unaligned pointer");
768 b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
770 if(n%wordsPerBitmapWord != 0)
771 runtime_throw("unmarkspan: unaligned length");
772 // Okay to use non-atomic ops here, because we control
773 // the entire span, and each bitmap word has bits for only
774 // one span, so no other goroutines are changing these
776 n /= wordsPerBitmapWord;
782 runtime_blockspecial(void *v)
784 uintptr *b, off, shift;
786 off = (uintptr*)v - (uintptr*)runtime_mheap.arena_start;
787 b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
788 shift = off % wordsPerBitmapWord;
790 return (*b & (bitSpecial<<shift)) != 0;
794 runtime_setblockspecial(void *v)
796 uintptr *b, off, shift, bits, obits;
798 off = (uintptr*)v - (uintptr*)runtime_mheap.arena_start;
799 b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
800 shift = off % wordsPerBitmapWord;
804 bits = obits | (bitSpecial<<shift);
805 if(runtime_singleproc) {
809 // more than one goroutine is potentially running: use atomic op
810 if(runtime_casp((void**)b, (void*)obits, (void*)bits))
817 runtime_MHeap_MapBits(MHeap *h)
819 // Caller has added extra mappings to the arena.
820 // Add extra mappings of bitmap words as needed.
821 // We allocate extra bitmap pieces in chunks of bitmapChunk.
827 n = (h->arena_used - h->arena_start) / wordsPerBitmapWord;
828 n = (n+bitmapChunk-1) & ~(bitmapChunk-1);
829 if(h->bitmap_mapped >= n)
832 runtime_SysMap(h->arena_start - n, n - h->bitmap_mapped);
833 h->bitmap_mapped = n;