1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
13 #ifdef USING_SPLIT_STACK
15 extern void * __splitstack_find (void *, void *, size_t *, void **, void **,
18 extern void * __splitstack_find_context (void *context[10], size_t *, void **,
25 PtrSize = sizeof(void*),
26 DebugMark = 0, // run second pass to check mark
28 // Four bits per word (see #defines below).
29 wordsPerBitmapWord = sizeof(void*)*8/4,
30 bitShift = sizeof(void*)*8/4,
33 // Bits in per-word bitmap.
34 // #defines because enum might not be able to hold the values.
36 // Each word in the bitmap describes wordsPerBitmapWord words
37 // of heap memory. There are 4 bitmap bits dedicated to each heap word,
38 // so on a 64-bit system there is one bitmap word per 16 heap words.
39 // The bits in the word are packed together by type first, then by
40 // heap location, so each 64-bit bitmap word consists of, from top to bottom,
41 // the 16 bitSpecial bits for the corresponding heap words, then the 16 bitMarked bits,
42 // then the 16 bitNoPointers/bitBlockBoundary bits, then the 16 bitAllocated bits.
43 // This layout makes it easier to iterate over the bits of a given type.
45 // The bitmap starts at mheap.arena_start and extends *backward* from
46 // there. On a 64-bit system the off'th word in the arena is tracked by
47 // the off/16+1'th word before mheap.arena_start. (On a 32-bit system,
48 // the only difference is that the divisor is 8.)
50 // To pull out the bits corresponding to a given pointer p, we use:
52 // off = p - (uintptr*)mheap.arena_start; // word offset
53 // b = (uintptr*)mheap.arena_start - off/wordsPerBitmapWord - 1;
54 // shift = off % wordsPerBitmapWord
55 // bits = *b >> shift;
56 // /* then test bits & bitAllocated, bits & bitMarked, etc. */
58 #define bitAllocated ((uintptr)1<<(bitShift*0))
59 #define bitNoPointers ((uintptr)1<<(bitShift*1)) /* when bitAllocated is set */
60 #define bitMarked ((uintptr)1<<(bitShift*2)) /* when bitAllocated is set */
61 #define bitSpecial ((uintptr)1<<(bitShift*3)) /* when bitAllocated is set - has finalizer or being profiled */
62 #define bitBlockBoundary ((uintptr)1<<(bitShift*1)) /* when bitAllocated is NOT set */
64 #define bitMask (bitBlockBoundary | bitAllocated | bitMarked | bitSpecial)
66 // Holding worldsema grants an M the right to try to stop the world.
69 // runtime_semacquire(&runtime_worldsema);
71 // runtime_stoptheworld();
76 // runtime_semrelease(&runtime_worldsema);
77 // runtime_starttheworld();
79 uint32 runtime_worldsema = 1;
81 // TODO: Make these per-M.
82 static uint64 nhandoff;
86 typedef struct Workbuf Workbuf;
94 typedef struct Finalizer Finalizer;
99 const struct __go_func_type *ft;
102 typedef struct FinBlock FinBlock;
113 static FinBlock *finq; // list of finalizers that are to be executed
114 static FinBlock *finc; // cache of free blocks
115 static FinBlock *allfin; // list of all blocks
117 static int32 fingwait;
119 static void runfinq(void*);
120 static Workbuf* getempty(Workbuf*);
121 static Workbuf* getfull(Workbuf*);
122 static void putempty(Workbuf*);
123 static Workbuf* handoff(Workbuf*);
131 volatile uint32 nwait;
132 volatile uint32 ndone;
143 // scanblock scans a block of n bytes starting at pointer b for references
144 // to other objects, scanning any it finds recursively until there are no
145 // unscanned objects left. Instead of using an explicit recursion, it keeps
146 // a work list in the Workbuf* structures and loops in the main function
147 // body. Keeping an explicit work list is easier on the stack allocator and
150 scanblock(byte *b, int64 n)
152 byte *obj, *arena_start, *arena_used, *p;
154 uintptr size, *bitp, bits, shift, i, j, x, xbits, off, nobj, nproc;
161 if((int64)(uintptr)n != n || n < 0) {
162 // runtime_printf("scanblock %p %lld\n", b, (long long)n);
163 runtime_throw("scanblock");
166 // Memory arena parameters.
167 arena_start = runtime_mheap.arena_start;
168 arena_used = runtime_mheap.arena_used;
171 wbuf = nil; // current work buffer
172 wp = nil; // storage for next queued pointer (write pointer)
173 nobj = 0; // number of queued objects
175 // Scanblock helpers pass b==nil.
176 // The main proc needs to return to make more
177 // calls to scanblock. But if work.nproc==1 then
178 // might as well process blocks as soon as we
180 keepworking = b == nil || work.nproc == 1;
182 // Align b to a word boundary.
183 off = (uintptr)b & (PtrSize-1);
190 // Each iteration scans the block b of length n, queueing pointers in
193 runtime_printf("scanblock %p %lld\n", b, (long long) n);
196 n >>= (2+PtrSize/8); /* n /= PtrSize (4 or 8) */
197 for(i=0; i<(uintptr)n; i++) {
200 // Words outside the arena cannot be pointers.
201 if((byte*)obj < arena_start || (byte*)obj >= arena_used)
204 // obj may be a pointer to a live object.
205 // Try to find the beginning of the object.
207 // Round down to word boundary.
208 obj = (void*)((uintptr)obj & ~((uintptr)PtrSize-1));
210 // Find bits for this word.
211 off = (uintptr*)obj - (uintptr*)arena_start;
212 bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
213 shift = off % wordsPerBitmapWord;
215 bits = xbits >> shift;
217 // Pointing at the beginning of a block?
218 if((bits & (bitAllocated|bitBlockBoundary)) != 0)
221 // Pointing just past the beginning?
222 // Scan backward a little to find a block boundary.
223 for(j=shift; j-->0; ) {
224 if(((xbits>>j) & (bitAllocated|bitBlockBoundary)) != 0) {
225 obj = (byte*)obj - (shift-j)*PtrSize;
232 // Otherwise consult span table to find beginning.
233 // (Manually inlined copy of MHeap_LookupMaybe.)
234 k = (uintptr)obj>>PageShift;
236 if(sizeof(void*) == 8)
237 x -= (uintptr)arena_start>>PageShift;
238 s = runtime_mheap.map[x];
239 if(s == nil || k < s->start || k - s->start >= s->npages || s->state != MSpanInUse)
241 p = (byte*)((uintptr)s->start<<PageShift);
242 if(s->sizeclass == 0) {
245 if((byte*)obj >= (byte*)s->limit)
247 size = runtime_class_to_size[s->sizeclass];
248 int32 i = ((byte*)obj - p)/size;
252 // Now that we know the object header, reload bits.
253 off = (uintptr*)obj - (uintptr*)arena_start;
254 bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
255 shift = off % wordsPerBitmapWord;
257 bits = xbits >> shift;
260 // Now we have bits, bitp, and shift correct for
261 // obj pointing at the base of the object.
262 // Only care about allocated and not marked.
263 if((bits & (bitAllocated|bitMarked)) != bitAllocated)
266 *bitp |= bitMarked<<shift;
270 if(x & (bitMarked<<shift))
272 if(runtime_casp((void**)bitp, (void*)x, (void*)(x|(bitMarked<<shift))))
277 // If object has no pointers, don't need to scan further.
278 if((bits & bitNoPointers) != 0)
281 // If another proc wants a pointer, give it some.
282 if(nobj > 4 && work.nwait > 0 && work.full == nil) {
284 wbuf = handoff(wbuf);
286 wp = (void**)(wbuf->obj + nobj);
289 // If buffer is full, get a new one.
290 if(wbuf == nil || nobj >= nelem(wbuf->obj)) {
293 wbuf = getempty(wbuf);
294 wp = (void**)(wbuf->obj);
302 // Done scanning [b, b+n). Prepare for the next iteration of
303 // the loop by setting b and n to the parameters for the next block.
305 // Fetch b from the work buffer.
311 // Emptied our buffer: refill.
312 wbuf = getfull(wbuf);
316 wp = (void**)(wbuf->obj + wbuf->nobj);
321 // Ask span about size class.
322 // (Manually inlined copy of MHeap_Lookup.)
323 x = (uintptr)b>>PageShift;
324 if(sizeof(void*) == 8)
325 x -= (uintptr)arena_start>>PageShift;
326 s = runtime_mheap.map[x];
327 if(s->sizeclass == 0)
328 n = s->npages<<PageShift;
330 n = runtime_class_to_size[s->sizeclass];
334 // debug_scanblock is the debug copy of scanblock.
335 // it is simpler, slower, single-threaded, recursive,
336 // and uses bitSpecial as the mark bit.
338 debug_scanblock(byte *b, int64 n)
342 uintptr size, *bitp, bits, shift, i, xbits, off;
346 runtime_throw("debug_scanblock without DebugMark");
348 if((int64)(uintptr)n != n || n < 0) {
349 //runtime_printf("debug_scanblock %p %D\n", b, n);
350 runtime_throw("debug_scanblock");
353 // Align b to a word boundary.
354 off = (uintptr)b & (PtrSize-1);
362 for(i=0; i<(uintptr)n; i++) {
365 // Words outside the arena cannot be pointers.
366 if((byte*)obj < runtime_mheap.arena_start || (byte*)obj >= runtime_mheap.arena_used)
369 // Round down to word boundary.
370 obj = (void*)((uintptr)obj & ~((uintptr)PtrSize-1));
372 // Consult span table to find beginning.
373 s = runtime_MHeap_LookupMaybe(&runtime_mheap, obj);
378 p = (byte*)((uintptr)s->start<<PageShift);
379 if(s->sizeclass == 0) {
381 size = (uintptr)s->npages<<PageShift;
383 if((byte*)obj >= (byte*)s->limit)
385 size = runtime_class_to_size[s->sizeclass];
386 int32 i = ((byte*)obj - p)/size;
390 // Now that we know the object header, reload bits.
391 off = (uintptr*)obj - (uintptr*)runtime_mheap.arena_start;
392 bitp = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
393 shift = off % wordsPerBitmapWord;
395 bits = xbits >> shift;
397 // Now we have bits, bitp, and shift correct for
398 // obj pointing at the base of the object.
399 // If not allocated or already marked, done.
400 if((bits & bitAllocated) == 0 || (bits & bitSpecial) != 0) // NOTE: bitSpecial not bitMarked
402 *bitp |= bitSpecial<<shift;
403 if(!(bits & bitMarked))
404 runtime_printf("found unmarked block %p in %p\n", obj, vp+i);
406 // If object has no pointers, don't need to scan further.
407 if((bits & bitNoPointers) != 0)
410 debug_scanblock(obj, size);
414 // Get an empty work buffer off the work.empty list,
415 // allocating new buffers as needed.
419 if(work.nproc == 1) {
420 // Put b on full list.
425 // Grab from empty list if possible.
428 work.empty = b->next;
432 // Put b on full list.
434 runtime_lock(&work.fmu);
437 runtime_unlock(&work.fmu);
439 // Grab from empty list if possible.
440 runtime_lock(&work.emu);
443 work.empty = b->next;
444 runtime_unlock(&work.emu);
451 if(work.nchunk < sizeof *b) {
453 work.chunk = runtime_SysAlloc(work.nchunk);
455 b = (Workbuf*)work.chunk;
456 work.chunk += sizeof *b;
457 work.nchunk -= sizeof *b;
458 runtime_unlock(&work);
471 if(work.nproc == 1) {
472 b->next = work.empty;
477 runtime_lock(&work.emu);
478 b->next = work.empty;
480 runtime_unlock(&work.emu);
483 // Get a full work buffer off the work.full list, or return nil.
490 if(work.nproc == 1) {
491 // Put b on empty list.
493 b->next = work.empty;
496 // Grab from full list if possible.
497 // Since work.nproc==1, no one else is
498 // going to give us work.
507 // Grab buffer from full list if possible.
512 runtime_lock(&work.fmu);
513 if(work.full != nil) {
515 work.full = b1->next;
516 runtime_unlock(&work.fmu);
519 runtime_unlock(&work.fmu);
522 runtime_xadd(&work.nwait, +1);
526 runtime_lock(&work.fmu);
527 if(work.full != nil) {
528 runtime_xadd(&work.nwait, -1);
530 work.full = b1->next;
531 runtime_unlock(&work.fmu);
534 runtime_unlock(&work.fmu);
537 if(work.nwait == work.nproc)
540 runtime_procyield(20);
554 // Make new buffer with half of b's pointers.
559 runtime_memmove(b1->obj, b->obj+b->nobj, n*sizeof b1->obj[0]);
562 // Put b on full list - let first half of b get stolen.
563 runtime_lock(&work.fmu);
566 runtime_unlock(&work.fmu);
571 // Scanstack calls scanblock on each of gp's stack segments.
573 scanstack(void (*scanblock)(byte*, int64), G *gp)
575 #ifdef USING_SPLIT_STACK
583 if(gp == runtime_g()) {
584 // Scanning our own stack.
585 sp = __splitstack_find(nil, nil, &spsize, &next_segment,
586 &next_sp, &initial_sp);
587 } else if((mp = gp->m) != nil && mp->helpgc) {
588 // gchelper's stack is in active use and has no interesting pointers.
591 // Scanning another goroutine's stack.
592 // The goroutine is usually asleep (the world is stopped).
594 // The exception is that if the goroutine is about to enter or might
595 // have just exited a system call, it may be executing code such
596 // as schedlock and may have needed to start a new stack segment.
597 // Use the stack segment and stack pointer at the time of
598 // the system call instead, since that won't change underfoot.
599 if(gp->gcstack != nil) {
601 spsize = gp->gcstack_size;
602 next_segment = gp->gcnext_segment;
603 next_sp = gp->gcnext_sp;
604 initial_sp = gp->gcinitial_sp;
606 sp = __splitstack_find_context(&gp->stack_context[0],
607 &spsize, &next_segment,
608 &next_sp, &initial_sp);
612 scanblock(sp, spsize);
613 while((sp = __splitstack_find(next_segment, next_sp,
614 &spsize, &next_segment,
615 &next_sp, &initial_sp)) != nil)
616 scanblock(sp, spsize);
623 if(gp == runtime_g()) {
624 // Scanning our own stack.
626 } else if((mp = gp->m) != nil && mp->helpgc) {
627 // gchelper's stack is in active use and has no interesting pointers.
630 // Scanning another goroutine's stack.
631 // The goroutine is usually asleep (the world is stopped).
632 bottom = (byte*)gp->gcnext_sp;
636 top = (byte*)gp->gcinitial_sp + gp->gcstack_size;
638 scanblock(bottom, top - bottom);
640 scanblock(top, bottom - top);
644 // Markfin calls scanblock on the blocks that have finalizers:
645 // the things pointed at cannot be freed until the finalizers have run.
652 if(!runtime_mlookup(v, (byte**)&v, &size, nil) || !runtime_blockspecial(v))
653 runtime_throw("mark - finalizer inconsistency");
655 // do not mark the finalizer block itself. just mark the things it points at.
659 static struct root_list* roots;
662 __go_register_gc_roots (struct root_list* r)
664 // FIXME: This needs locking if multiple goroutines can call
665 // dlopen simultaneously.
671 debug_markfin(void *v)
675 if(!runtime_mlookup(v, (byte**)&v, &size, nil))
676 runtime_throw("debug_mark - finalizer inconsistency");
677 debug_scanblock(v, size);
682 mark(void (*scan)(byte*, int64))
684 struct root_list *pl;
689 for(pl = roots; pl != nil; pl = pl->next) {
690 struct root* pr = &pl->roots[0];
692 void *decl = pr->decl;
695 scanblock(decl, pr->size);
700 scan((byte*)&runtime_m0, sizeof runtime_m0);
701 scan((byte*)&runtime_g0, sizeof runtime_g0);
702 scan((byte*)&runtime_allg, sizeof runtime_allg);
703 scan((byte*)&runtime_allm, sizeof runtime_allm);
704 runtime_MProf_Mark(scan);
705 runtime_time_scan(scan);
708 for(gp=runtime_allg; gp!=nil; gp=gp->alllink) {
711 runtime_printf("unexpected G.status %d\n", gp->status);
712 runtime_throw("mark - bad status");
716 if(gp != runtime_g())
717 runtime_throw("mark - world not stopped");
728 // mark things pointed at by objects with finalizers
729 if(scan == debug_scanblock)
730 runtime_walkfintab(debug_markfin, scan);
732 runtime_walkfintab(markfin, scan);
734 for(fb=allfin; fb; fb=fb->alllink)
735 scanblock((byte*)fb->fin, fb->cnt*sizeof(fb->fin[0]));
737 // in multiproc mode, join in the queued work.
742 handlespecial(byte *p, uintptr size)
745 const struct __go_func_type *ft;
749 if(!runtime_getfinalizer(p, true, &fn, &ft)) {
750 runtime_setblockspecial(p, false);
751 runtime_MProf_Free(p, size);
755 runtime_lock(&finlock);
756 if(finq == nil || finq->cnt == finq->cap) {
758 finc = runtime_SysAlloc(PageSize);
759 finc->cap = (PageSize - sizeof(FinBlock)) / sizeof(Finalizer) + 1;
760 finc->alllink = allfin;
768 f = &finq->fin[finq->cnt];
773 runtime_unlock(&finlock);
777 // Sweep frees or collects finalizers for blocks not marked in the mark phase.
778 // It clears the mark bits in preparation for the next GC round.
792 arena_start = runtime_mheap.arena_start;
793 now = runtime_nanotime();
799 if(!runtime_casp(&work.spans, s, s->allnext))
802 // Stamp newly unused spans. The scavenger will use that
803 // info to potentially give back some pages to the OS.
804 if(s->state == MSpanFree && s->unusedsince == 0)
805 s->unusedsince = now;
807 if(s->state != MSpanInUse)
810 p = (byte*)(s->start << PageShift);
813 size = s->npages<<PageShift;
816 // Chunk full of small blocks.
817 size = runtime_class_to_size[cl];
818 npages = runtime_class_to_allocnpages[cl];
819 n = (npages << PageShift) / size;
822 // Sweep through n objects of given size starting at p.
823 // This thread owns the span now, so it can manipulate
824 // the block bitmap without atomic operations.
825 for(; n > 0; n--, p += size) {
826 uintptr off, *bitp, shift, bits;
828 off = (uintptr*)p - (uintptr*)arena_start;
829 bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
830 shift = off % wordsPerBitmapWord;
833 if((bits & bitAllocated) == 0)
836 if((bits & bitMarked) != 0) {
838 if(!(bits & bitSpecial))
839 runtime_printf("found spurious mark on %p\n", p);
840 *bitp &= ~(bitSpecial<<shift);
842 *bitp &= ~(bitMarked<<shift);
846 // Special means it has a finalizer or is being profiled.
847 // In DebugMark mode, the bit has been coopted so
848 // we have to assume all blocks are special.
849 if(DebugMark || (bits & bitSpecial) != 0) {
850 if(handlespecial(p, size))
854 // Mark freed; restore block boundary bit.
855 *bitp = (*bitp & ~(bitMask<<shift)) | (bitBlockBoundary<<shift);
858 if(s->sizeclass == 0) {
860 runtime_unmarkspan(p, 1<<PageShift);
861 *(uintptr*)p = 1; // needs zeroing
862 runtime_MHeap_Free(&runtime_mheap, s, 1);
864 // Free small object.
865 if(size > sizeof(uintptr))
866 ((uintptr*)p)[1] = 1; // mark as "needs to be zeroed"
867 c->local_by_size[s->sizeclass].nfree++;
868 runtime_MCache_Free(c, p, s->sizeclass, size);
870 c->local_alloc -= size;
877 runtime_gchelper(void)
879 // Wait until main proc is ready for mark help.
880 runtime_lock(&work.markgate);
881 runtime_unlock(&work.markgate);
884 // Wait until main proc is ready for sweep help.
885 runtime_lock(&work.sweepgate);
886 runtime_unlock(&work.sweepgate);
889 if(runtime_xadd(&work.ndone, +1) == work.nproc-1)
890 runtime_notewakeup(&work.alldone);
893 // Initialized from $GOGC. GOGC=off means no gc.
895 // Next gc is after we've allocated an extra amount of
896 // memory proportional to the amount already in use.
897 // If gcpercent=100 and we're using 4M, we'll gc again
898 // when we get to 8M. This keeps the gc cost in linear
899 // proportion to the allocation cost. Adjusting gcpercent
900 // just changes the linear constant (and also the amount of
901 // extra memory used).
902 static int32 gcpercent = -2;
909 for(m=runtime_allm; m; m=m->alllink)
910 runtime_MCache_ReleaseAll(m->mcache);
923 stacks_sys = runtime_stacks_sys;
924 for(m=runtime_allm; m; m=m->alllink) {
925 runtime_purgecachedstats(m);
926 // stacks_inuse += m->stackalloc->inuse;
927 // stacks_sys += m->stackalloc->sys;
929 for(i=0; i<nelem(c->local_by_size); i++) {
930 mstats.by_size[i].nmalloc += c->local_by_size[i].nmalloc;
931 c->local_by_size[i].nmalloc = 0;
932 mstats.by_size[i].nfree += c->local_by_size[i].nfree;
933 c->local_by_size[i].nfree = 0;
936 mstats.stacks_inuse = stacks_inuse;
937 mstats.stacks_sys = stacks_sys;
941 runtime_gc(int32 force)
944 int64 t0, t1, t2, t3;
945 uint64 heap0, heap1, obj0, obj1;
949 // Make sure all registers are saved on stack so that
950 // scanstack sees them.
951 __builtin_unwind_init();
953 // The gc is turned off (via enablegc) until
954 // the bootstrap has completed.
955 // Also, malloc gets called in the guts
956 // of a number of libraries that might be
957 // holding locks. To avoid priority inversion
958 // problems, don't bother trying to run gc
959 // while holding a lock. The next mallocgc
960 // without a lock will do the gc instead.
962 if(!mstats.enablegc || m->locks > 0 || runtime_panicking)
965 if(gcpercent == -2) { // first time through
966 p = runtime_getenv("GOGC");
967 if(p == nil || p[0] == '\0')
969 else if(runtime_strcmp((const char*)p, "off") == 0)
972 gcpercent = runtime_atoi(p);
974 p = runtime_getenv("GOGCTRACE");
976 gctrace = runtime_atoi(p);
981 runtime_semacquire(&runtime_worldsema);
982 if(!force && mstats.heap_alloc < mstats.next_gc) {
983 runtime_semrelease(&runtime_worldsema);
987 t0 = runtime_nanotime();
991 runtime_stoptheworld();
994 heap0 = mstats.heap_alloc;
995 obj0 = mstats.nmalloc - mstats.nfree;
997 runtime_lock(&work.markgate);
998 runtime_lock(&work.sweepgate);
1002 if(runtime_gomaxprocs > 1 && runtime_ncpu > 1) {
1003 runtime_noteclear(&work.alldone);
1004 work.nproc += runtime_helpgc(&extra);
1009 runtime_unlock(&work.markgate); // let the helpers in
1012 mark(debug_scanblock);
1013 t1 = runtime_nanotime();
1015 work.spans = runtime_mheap.allspans;
1016 runtime_unlock(&work.sweepgate); // let the helpers in
1019 runtime_notesleep(&work.alldone);
1020 t2 = runtime_nanotime();
1025 mstats.next_gc = mstats.heap_alloc+(mstats.heap_alloc-runtime_stacks_sys)*gcpercent/100;
1028 m->locks++; // disable gc during the mallocs in newproc
1030 // kick off or wake up goroutine to run queued finalizers
1032 fing = __go_go(runfinq, nil);
1035 runtime_ready(fing);
1041 heap1 = mstats.heap_alloc;
1042 obj1 = mstats.nmalloc - mstats.nfree;
1044 t3 = runtime_nanotime();
1045 mstats.last_gc = t3;
1046 mstats.pause_ns[mstats.numgc%nelem(mstats.pause_ns)] = t3 - t0;
1047 mstats.pause_total_ns += t3 - t0;
1050 runtime_printf("pause %llu\n", (unsigned long long)t3-t0);
1053 runtime_printf("gc%d(%d): %llu+%llu+%llu ms %llu -> %llu MB %llu -> %llu (%llu-%llu) objects %llu handoff\n",
1054 mstats.numgc, work.nproc, (unsigned long long)(t1-t0)/1000000, (unsigned long long)(t2-t1)/1000000, (unsigned long long)(t3-t2)/1000000,
1055 (unsigned long long)heap0>>20, (unsigned long long)heap1>>20, (unsigned long long)obj0, (unsigned long long)obj1,
1056 (unsigned long long) mstats.nmalloc, (unsigned long long)mstats.nfree,
1057 (unsigned long long) nhandoff);
1061 runtime_semrelease(&runtime_worldsema);
1063 // If we could have used another helper proc, start one now,
1064 // in the hope that it will be available next time.
1065 // It would have been even better to start it before the collection,
1066 // but doing so requires allocating memory, so it's tricky to
1067 // coordinate. This lazy approach works out in practice:
1068 // we don't mind if the first couple gc rounds don't have quite
1069 // the maximum number of procs.
1070 runtime_starttheworld(extra);
1072 // give the queued finalizers, if any, a chance to run
1076 if(gctrace > 1 && !force)
1080 void runtime_ReadMemStats(MStats *)
1081 __asm__("runtime.ReadMemStats");
1084 runtime_ReadMemStats(MStats *stats)
1088 // Have to acquire worldsema to stop the world,
1089 // because stoptheworld can only be used by
1090 // one goroutine at a time, and there might be
1091 // a pending garbage collection already calling it.
1092 runtime_semacquire(&runtime_worldsema);
1095 runtime_stoptheworld();
1099 runtime_semrelease(&runtime_worldsema);
1100 runtime_starttheworld(false);
1104 runfinq(void* dummy __attribute__ ((unused)))
1108 FinBlock *fb, *next;
1113 // There's no need for a lock in this section
1114 // because it only conflicts with the garbage
1115 // collector, and the garbage collector only
1116 // runs when everyone else is stopped, and
1117 // runfinq only stops at the gosched() or
1118 // during the calls in the for loop.
1123 gp->status = Gwaiting;
1124 gp->waitreason = "finalizer wait";
1128 for(; fb; fb=next) {
1130 for(i=0; i<(uint32)fb->cnt; i++) {
1134 params[0] = &f->arg;
1135 runtime_setblockspecial(f->arg, false);
1136 reflect_call(f->ft, (void*)f->fn, 0, 0, params, nil);
1144 runtime_gc(1); // trigger another gc to clean up the finalized objects, if possible
1148 // mark the block at v of size n as allocated.
1149 // If noptr is true, mark it as having no pointers.
1151 runtime_markallocated(void *v, uintptr n, bool noptr)
1153 uintptr *b, obits, bits, off, shift;
1156 // runtime_printf("markallocated %p+%p\n", v, n);
1158 if((byte*)v+n > (byte*)runtime_mheap.arena_used || (byte*)v < runtime_mheap.arena_start)
1159 runtime_throw("markallocated: bad pointer");
1161 off = (uintptr*)v - (uintptr*)runtime_mheap.arena_start; // word offset
1162 b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
1163 shift = off % wordsPerBitmapWord;
1167 bits = (obits & ~(bitMask<<shift)) | (bitAllocated<<shift);
1169 bits |= bitNoPointers<<shift;
1170 if(runtime_singleproc) {
1174 // more than one goroutine is potentially running: use atomic op
1175 if(runtime_casp((void**)b, (void*)obits, (void*)bits))
1181 // mark the block at v of size n as freed.
1183 runtime_markfreed(void *v, uintptr n)
1185 uintptr *b, obits, bits, off, shift;
1188 // runtime_printf("markallocated %p+%p\n", v, n);
1190 if((byte*)v+n > (byte*)runtime_mheap.arena_used || (byte*)v < runtime_mheap.arena_start)
1191 runtime_throw("markallocated: bad pointer");
1193 off = (uintptr*)v - (uintptr*)runtime_mheap.arena_start; // word offset
1194 b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
1195 shift = off % wordsPerBitmapWord;
1199 bits = (obits & ~(bitMask<<shift)) | (bitBlockBoundary<<shift);
1200 if(runtime_singleproc) {
1204 // more than one goroutine is potentially running: use atomic op
1205 if(runtime_casp((void**)b, (void*)obits, (void*)bits))
1211 // check that the block at v of size n is marked freed.
1213 runtime_checkfreed(void *v, uintptr n)
1215 uintptr *b, bits, off, shift;
1217 if(!runtime_checking)
1220 if((byte*)v+n > (byte*)runtime_mheap.arena_used || (byte*)v < runtime_mheap.arena_start)
1221 return; // not allocated, so okay
1223 off = (uintptr*)v - (uintptr*)runtime_mheap.arena_start; // word offset
1224 b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
1225 shift = off % wordsPerBitmapWord;
1228 if((bits & bitAllocated) != 0) {
1229 runtime_printf("checkfreed %p+%p: off=%p have=%p\n",
1230 v, (void*)n, (void*)off, (void*)(bits & bitMask));
1231 runtime_throw("checkfreed: not freed");
1235 // mark the span of memory at v as having n blocks of the given size.
1236 // if leftover is true, there is left over space at the end of the span.
1238 runtime_markspan(void *v, uintptr size, uintptr n, bool leftover)
1240 uintptr *b, off, shift;
1243 if((byte*)v+size*n > (byte*)runtime_mheap.arena_used || (byte*)v < runtime_mheap.arena_start)
1244 runtime_throw("markspan: bad pointer");
1247 if(leftover) // mark a boundary just past end of last block too
1249 for(; n-- > 0; p += size) {
1250 // Okay to use non-atomic ops here, because we control
1251 // the entire span, and each bitmap word has bits for only
1252 // one span, so no other goroutines are changing these
1254 off = (uintptr*)p - (uintptr*)runtime_mheap.arena_start; // word offset
1255 b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
1256 shift = off % wordsPerBitmapWord;
1257 *b = (*b & ~(bitMask<<shift)) | (bitBlockBoundary<<shift);
1261 // unmark the span of memory at v of length n bytes.
1263 runtime_unmarkspan(void *v, uintptr n)
1265 uintptr *p, *b, off;
1267 if((byte*)v+n > (byte*)runtime_mheap.arena_used || (byte*)v < runtime_mheap.arena_start)
1268 runtime_throw("markspan: bad pointer");
1271 off = p - (uintptr*)runtime_mheap.arena_start; // word offset
1272 if(off % wordsPerBitmapWord != 0)
1273 runtime_throw("markspan: unaligned pointer");
1274 b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
1276 if(n%wordsPerBitmapWord != 0)
1277 runtime_throw("unmarkspan: unaligned length");
1278 // Okay to use non-atomic ops here, because we control
1279 // the entire span, and each bitmap word has bits for only
1280 // one span, so no other goroutines are changing these
1282 n /= wordsPerBitmapWord;
1288 runtime_blockspecial(void *v)
1290 uintptr *b, off, shift;
1295 off = (uintptr*)v - (uintptr*)runtime_mheap.arena_start;
1296 b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
1297 shift = off % wordsPerBitmapWord;
1299 return (*b & (bitSpecial<<shift)) != 0;
1303 runtime_setblockspecial(void *v, bool s)
1305 uintptr *b, off, shift, bits, obits;
1310 off = (uintptr*)v - (uintptr*)runtime_mheap.arena_start;
1311 b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
1312 shift = off % wordsPerBitmapWord;
1317 bits = obits | (bitSpecial<<shift);
1319 bits = obits & ~(bitSpecial<<shift);
1320 if(runtime_singleproc) {
1324 // more than one goroutine is potentially running: use atomic op
1325 if(runtime_casp((void**)b, (void*)obits, (void*)bits))
1332 runtime_MHeap_MapBits(MHeap *h)
1336 // Caller has added extra mappings to the arena.
1337 // Add extra mappings of bitmap words as needed.
1338 // We allocate extra bitmap pieces in chunks of bitmapChunk.
1344 n = (h->arena_used - h->arena_start) / wordsPerBitmapWord;
1345 n = (n+bitmapChunk-1) & ~(bitmapChunk-1);
1346 if(h->bitmap_mapped >= n)
1349 page_size = getpagesize();
1350 n = (n+page_size-1) & ~(page_size-1);
1352 runtime_SysMap(h->arena_start - n, n - h->bitmap_mapped);
1353 h->bitmap_mapped = n;