1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
13 #ifdef USING_SPLIT_STACK
15 extern void * __splitstack_find (void *, void *, size_t *, void **, void **,
18 extern void * __splitstack_find_context (void *context[10], size_t *, void **,
25 PtrSize = sizeof(void*),
26 DebugMark = 0, // run second pass to check mark
28 // Four bits per word (see #defines below).
29 wordsPerBitmapWord = sizeof(void*)*8/4,
30 bitShift = sizeof(void*)*8/4,
33 // Bits in per-word bitmap.
34 // #defines because enum might not be able to hold the values.
36 // Each word in the bitmap describes wordsPerBitmapWord words
37 // of heap memory. There are 4 bitmap bits dedicated to each heap word,
38 // so on a 64-bit system there is one bitmap word per 16 heap words.
39 // The bits in the word are packed together by type first, then by
40 // heap location, so each 64-bit bitmap word consists of, from top to bottom,
41 // the 16 bitSpecial bits for the corresponding heap words, then the 16 bitMarked bits,
42 // then the 16 bitNoPointers/bitBlockBoundary bits, then the 16 bitAllocated bits.
43 // This layout makes it easier to iterate over the bits of a given type.
45 // The bitmap starts at mheap.arena_start and extends *backward* from
46 // there. On a 64-bit system the off'th word in the arena is tracked by
47 // the off/16+1'th word before mheap.arena_start. (On a 32-bit system,
48 // the only difference is that the divisor is 8.)
50 // To pull out the bits corresponding to a given pointer p, we use:
52 // off = p - (uintptr*)mheap.arena_start; // word offset
53 // b = (uintptr*)mheap.arena_start - off/wordsPerBitmapWord - 1;
54 // shift = off % wordsPerBitmapWord
55 // bits = *b >> shift;
56 // /* then test bits & bitAllocated, bits & bitMarked, etc. */
58 #define bitAllocated ((uintptr)1<<(bitShift*0))
59 #define bitNoPointers ((uintptr)1<<(bitShift*1)) /* when bitAllocated is set */
60 #define bitMarked ((uintptr)1<<(bitShift*2)) /* when bitAllocated is set */
61 #define bitSpecial ((uintptr)1<<(bitShift*3)) /* when bitAllocated is set - has finalizer or being profiled */
62 #define bitBlockBoundary ((uintptr)1<<(bitShift*1)) /* when bitAllocated is NOT set */
64 #define bitMask (bitBlockBoundary | bitAllocated | bitMarked | bitSpecial)
66 // Holding worldsema grants an M the right to try to stop the world.
69 // runtime_semacquire(&runtime_worldsema);
71 // runtime_stoptheworld();
76 // runtime_semrelease(&runtime_worldsema);
77 // runtime_starttheworld();
79 uint32 runtime_worldsema = 1;
81 // TODO: Make these per-M.
82 static uint64 nhandoff;
86 typedef struct Workbuf Workbuf;
94 typedef struct Finalizer Finalizer;
99 const struct __go_func_type *ft;
102 typedef struct FinBlock FinBlock;
113 static FinBlock *finq; // list of finalizers that are to be executed
114 static FinBlock *finc; // cache of free blocks
115 static FinBlock *allfin; // list of all blocks
117 static int32 fingwait;
119 static void runfinq(void*);
120 static Workbuf* getempty(Workbuf*);
121 static Workbuf* getfull(Workbuf*);
122 static void putempty(Workbuf*);
123 static Workbuf* handoff(Workbuf*);
131 volatile uint32 nwait;
132 volatile uint32 ndone;
143 // scanblock scans a block of n bytes starting at pointer b for references
144 // to other objects, scanning any it finds recursively until there are no
145 // unscanned objects left. Instead of using an explicit recursion, it keeps
146 // a work list in the Workbuf* structures and loops in the main function
147 // body. Keeping an explicit work list is easier on the stack allocator and
150 scanblock(byte *b, int64 n)
152 byte *obj, *arena_start, *arena_used, *p;
154 uintptr size, *bitp, bits, shift, i, j, x, xbits, off, nobj, nproc;
161 if((int64)(uintptr)n != n || n < 0) {
162 runtime_printf("scanblock %p %D\n", b, n);
163 runtime_throw("scanblock");
166 // Memory arena parameters.
167 arena_start = runtime_mheap.arena_start;
168 arena_used = runtime_mheap.arena_used;
171 wbuf = nil; // current work buffer
172 wp = nil; // storage for next queued pointer (write pointer)
173 nobj = 0; // number of queued objects
175 // Scanblock helpers pass b==nil.
176 // The main proc needs to return to make more
177 // calls to scanblock. But if work.nproc==1 then
178 // might as well process blocks as soon as we
180 keepworking = b == nil || work.nproc == 1;
182 // Align b to a word boundary.
183 off = (uintptr)b & (PtrSize-1);
190 // Each iteration scans the block b of length n, queueing pointers in
193 runtime_printf("scanblock %p %D\n", b, n);
196 n >>= (2+PtrSize/8); /* n /= PtrSize (4 or 8) */
197 for(i=0; i<(uintptr)n; i++) {
200 // Words outside the arena cannot be pointers.
201 if((byte*)obj < arena_start || (byte*)obj >= arena_used)
204 // obj may be a pointer to a live object.
205 // Try to find the beginning of the object.
207 // Round down to word boundary.
208 obj = (void*)((uintptr)obj & ~((uintptr)PtrSize-1));
210 // Find bits for this word.
211 off = (uintptr*)obj - (uintptr*)arena_start;
212 bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
213 shift = off % wordsPerBitmapWord;
215 bits = xbits >> shift;
217 // Pointing at the beginning of a block?
218 if((bits & (bitAllocated|bitBlockBoundary)) != 0)
221 // Pointing just past the beginning?
222 // Scan backward a little to find a block boundary.
223 for(j=shift; j-->0; ) {
224 if(((xbits>>j) & (bitAllocated|bitBlockBoundary)) != 0) {
225 obj = (byte*)obj - (shift-j)*PtrSize;
232 // Otherwise consult span table to find beginning.
233 // (Manually inlined copy of MHeap_LookupMaybe.)
234 k = (uintptr)obj>>PageShift;
236 if(sizeof(void*) == 8)
237 x -= (uintptr)arena_start>>PageShift;
238 s = runtime_mheap.map[x];
239 if(s == nil || k < s->start || k - s->start >= s->npages || s->state != MSpanInUse)
241 p = (byte*)((uintptr)s->start<<PageShift);
242 if(s->sizeclass == 0) {
245 if((byte*)obj >= (byte*)s->limit)
247 size = runtime_class_to_size[s->sizeclass];
248 int32 i = ((byte*)obj - p)/size;
252 // Now that we know the object header, reload bits.
253 off = (uintptr*)obj - (uintptr*)arena_start;
254 bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
255 shift = off % wordsPerBitmapWord;
257 bits = xbits >> shift;
260 // Now we have bits, bitp, and shift correct for
261 // obj pointing at the base of the object.
262 // Only care about allocated and not marked.
263 if((bits & (bitAllocated|bitMarked)) != bitAllocated)
266 *bitp |= bitMarked<<shift;
270 if(x & (bitMarked<<shift))
272 if(runtime_casp((void**)bitp, (void*)x, (void*)(x|(bitMarked<<shift))))
277 // If object has no pointers, don't need to scan further.
278 if((bits & bitNoPointers) != 0)
281 // If another proc wants a pointer, give it some.
282 if(nobj > 4 && work.nwait > 0 && work.full == nil) {
284 wbuf = handoff(wbuf);
286 wp = (void**)(wbuf->obj + nobj);
289 // If buffer is full, get a new one.
290 if(wbuf == nil || nobj >= nelem(wbuf->obj)) {
293 wbuf = getempty(wbuf);
294 wp = (void**)(wbuf->obj);
302 // Done scanning [b, b+n). Prepare for the next iteration of
303 // the loop by setting b and n to the parameters for the next block.
305 // Fetch b from the work buffer.
311 // Emptied our buffer: refill.
312 wbuf = getfull(wbuf);
316 wp = (void**)(wbuf->obj + wbuf->nobj);
321 // Ask span about size class.
322 // (Manually inlined copy of MHeap_Lookup.)
323 x = (uintptr)b>>PageShift;
324 if(sizeof(void*) == 8)
325 x -= (uintptr)arena_start>>PageShift;
326 s = runtime_mheap.map[x];
327 if(s->sizeclass == 0)
328 n = s->npages<<PageShift;
330 n = runtime_class_to_size[s->sizeclass];
334 // debug_scanblock is the debug copy of scanblock.
335 // it is simpler, slower, single-threaded, recursive,
336 // and uses bitSpecial as the mark bit.
338 debug_scanblock(byte *b, int64 n)
342 uintptr size, *bitp, bits, shift, i, xbits, off;
346 runtime_throw("debug_scanblock without DebugMark");
348 if((int64)(uintptr)n != n || n < 0) {
349 runtime_printf("debug_scanblock %p %D\n", b, n);
350 runtime_throw("debug_scanblock");
353 // Align b to a word boundary.
354 off = (uintptr)b & (PtrSize-1);
362 for(i=0; i<(uintptr)n; i++) {
365 // Words outside the arena cannot be pointers.
366 if((byte*)obj < runtime_mheap.arena_start || (byte*)obj >= runtime_mheap.arena_used)
369 // Round down to word boundary.
370 obj = (void*)((uintptr)obj & ~((uintptr)PtrSize-1));
372 // Consult span table to find beginning.
373 s = runtime_MHeap_LookupMaybe(&runtime_mheap, obj);
378 p = (byte*)((uintptr)s->start<<PageShift);
379 if(s->sizeclass == 0) {
381 size = (uintptr)s->npages<<PageShift;
383 if((byte*)obj >= (byte*)s->limit)
385 size = runtime_class_to_size[s->sizeclass];
386 int32 i = ((byte*)obj - p)/size;
390 // Now that we know the object header, reload bits.
391 off = (uintptr*)obj - (uintptr*)runtime_mheap.arena_start;
392 bitp = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
393 shift = off % wordsPerBitmapWord;
395 bits = xbits >> shift;
397 // Now we have bits, bitp, and shift correct for
398 // obj pointing at the base of the object.
399 // If not allocated or already marked, done.
400 if((bits & bitAllocated) == 0 || (bits & bitSpecial) != 0) // NOTE: bitSpecial not bitMarked
402 *bitp |= bitSpecial<<shift;
403 if(!(bits & bitMarked))
404 runtime_printf("found unmarked block %p in %p\n", obj, vp+i);
406 // If object has no pointers, don't need to scan further.
407 if((bits & bitNoPointers) != 0)
410 debug_scanblock(obj, size);
414 // Get an empty work buffer off the work.empty list,
415 // allocating new buffers as needed.
419 if(work.nproc == 1) {
420 // Put b on full list.
425 // Grab from empty list if possible.
428 work.empty = b->next;
432 // Put b on full list.
434 runtime_lock(&work.fmu);
437 runtime_unlock(&work.fmu);
439 // Grab from empty list if possible.
440 runtime_lock(&work.emu);
443 work.empty = b->next;
444 runtime_unlock(&work.emu);
451 if(work.nchunk < sizeof *b) {
453 work.chunk = runtime_SysAlloc(work.nchunk);
455 b = (Workbuf*)work.chunk;
456 work.chunk += sizeof *b;
457 work.nchunk -= sizeof *b;
458 runtime_unlock(&work);
471 if(work.nproc == 1) {
472 b->next = work.empty;
477 runtime_lock(&work.emu);
478 b->next = work.empty;
480 runtime_unlock(&work.emu);
483 // Get a full work buffer off the work.full list, or return nil.
490 if(work.nproc == 1) {
491 // Put b on empty list.
493 b->next = work.empty;
496 // Grab from full list if possible.
497 // Since work.nproc==1, no one else is
498 // going to give us work.
507 // Grab buffer from full list if possible.
512 runtime_lock(&work.fmu);
513 if(work.full != nil) {
515 work.full = b1->next;
516 runtime_unlock(&work.fmu);
519 runtime_unlock(&work.fmu);
522 runtime_xadd(&work.nwait, +1);
526 runtime_lock(&work.fmu);
527 if(work.full != nil) {
528 runtime_xadd(&work.nwait, -1);
530 work.full = b1->next;
531 runtime_unlock(&work.fmu);
534 runtime_unlock(&work.fmu);
537 if(work.nwait == work.nproc)
540 runtime_procyield(20);
554 // Make new buffer with half of b's pointers.
559 runtime_memmove(b1->obj, b->obj+b->nobj, n*sizeof b1->obj[0]);
562 // Put b on full list - let first half of b get stolen.
563 runtime_lock(&work.fmu);
566 runtime_unlock(&work.fmu);
571 // Scanstack calls scanblock on each of gp's stack segments.
573 scanstack(void (*scanblock)(byte*, int64), G *gp)
575 #ifdef USING_SPLIT_STACK
583 if(gp == runtime_g()) {
584 // Scanning our own stack.
585 sp = __splitstack_find(nil, nil, &spsize, &next_segment,
586 &next_sp, &initial_sp);
587 } else if((mp = gp->m) != nil && mp->helpgc) {
588 // gchelper's stack is in active use and has no interesting pointers.
591 // Scanning another goroutine's stack.
592 // The goroutine is usually asleep (the world is stopped).
594 // The exception is that if the goroutine is about to enter or might
595 // have just exited a system call, it may be executing code such
596 // as schedlock and may have needed to start a new stack segment.
597 // Use the stack segment and stack pointer at the time of
598 // the system call instead, since that won't change underfoot.
599 if(gp->gcstack != nil) {
601 spsize = gp->gcstack_size;
602 next_segment = gp->gcnext_segment;
603 next_sp = gp->gcnext_sp;
604 initial_sp = gp->gcinitial_sp;
606 sp = __splitstack_find_context(&gp->stack_context[0],
607 &spsize, &next_segment,
608 &next_sp, &initial_sp);
612 scanblock(sp, spsize);
613 while((sp = __splitstack_find(next_segment, next_sp,
614 &spsize, &next_segment,
615 &next_sp, &initial_sp)) != nil)
616 scanblock(sp, spsize);
623 if(gp == runtime_g()) {
624 // Scanning our own stack.
626 } else if((mp = gp->m) != nil && mp->helpgc) {
627 // gchelper's stack is in active use and has no interesting pointers.
630 // Scanning another goroutine's stack.
631 // The goroutine is usually asleep (the world is stopped).
632 bottom = (byte*)gp->gcnext_sp;
636 top = (byte*)gp->gcinitial_sp + gp->gcstack_size;
638 scanblock(bottom, top - bottom);
640 scanblock(top, bottom - top);
644 // Markfin calls scanblock on the blocks that have finalizers:
645 // the things pointed at cannot be freed until the finalizers have run.
652 if(!runtime_mlookup(v, (byte**)&v, &size, nil) || !runtime_blockspecial(v))
653 runtime_throw("mark - finalizer inconsistency");
655 // do not mark the finalizer block itself. just mark the things it points at.
659 static struct root_list* roots;
662 __go_register_gc_roots (struct root_list* r)
664 // FIXME: This needs locking if multiple goroutines can call
665 // dlopen simultaneously.
671 debug_markfin(void *v)
675 if(!runtime_mlookup(v, (byte**)&v, &size, nil))
676 runtime_throw("debug_mark - finalizer inconsistency");
677 debug_scanblock(v, size);
682 mark(void (*scan)(byte*, int64))
684 struct root_list *pl;
689 for(pl = roots; pl != nil; pl = pl->next) {
690 struct root* pr = &pl->roots[0];
692 void *decl = pr->decl;
695 scanblock(decl, pr->size);
700 scan((byte*)&runtime_m0, sizeof runtime_m0);
701 scan((byte*)&runtime_g0, sizeof runtime_g0);
702 scan((byte*)&runtime_allg, sizeof runtime_allg);
703 scan((byte*)&runtime_allm, sizeof runtime_allm);
704 runtime_MProf_Mark(scan);
705 runtime_time_scan(scan);
706 runtime_trampoline_scan(scan);
709 for(gp=runtime_allg; gp!=nil; gp=gp->alllink) {
712 runtime_printf("unexpected G.status %d\n", gp->status);
713 runtime_throw("mark - bad status");
717 if(gp != runtime_g())
718 runtime_throw("mark - world not stopped");
729 // mark things pointed at by objects with finalizers
730 if(scan == debug_scanblock)
731 runtime_walkfintab(debug_markfin, scan);
733 runtime_walkfintab(markfin, scan);
735 for(fb=allfin; fb; fb=fb->alllink)
736 scanblock((byte*)fb->fin, fb->cnt*sizeof(fb->fin[0]));
738 // in multiproc mode, join in the queued work.
743 handlespecial(byte *p, uintptr size)
746 const struct __go_func_type *ft;
750 if(!runtime_getfinalizer(p, true, &fn, &ft)) {
751 runtime_setblockspecial(p, false);
752 runtime_MProf_Free(p, size);
756 runtime_lock(&finlock);
757 if(finq == nil || finq->cnt == finq->cap) {
759 finc = runtime_SysAlloc(PageSize);
760 finc->cap = (PageSize - sizeof(FinBlock)) / sizeof(Finalizer) + 1;
761 finc->alllink = allfin;
769 f = &finq->fin[finq->cnt];
774 runtime_unlock(&finlock);
778 // Sweep frees or collects finalizers for blocks not marked in the mark phase.
779 // It clears the mark bits in preparation for the next GC round.
793 arena_start = runtime_mheap.arena_start;
794 now = runtime_nanotime();
800 if(!runtime_casp(&work.spans, s, s->allnext))
803 // Stamp newly unused spans. The scavenger will use that
804 // info to potentially give back some pages to the OS.
805 if(s->state == MSpanFree && s->unusedsince == 0)
806 s->unusedsince = now;
808 if(s->state != MSpanInUse)
811 p = (byte*)(s->start << PageShift);
814 size = s->npages<<PageShift;
817 // Chunk full of small blocks.
818 size = runtime_class_to_size[cl];
819 npages = runtime_class_to_allocnpages[cl];
820 n = (npages << PageShift) / size;
823 // Sweep through n objects of given size starting at p.
824 // This thread owns the span now, so it can manipulate
825 // the block bitmap without atomic operations.
826 for(; n > 0; n--, p += size) {
827 uintptr off, *bitp, shift, bits;
829 off = (uintptr*)p - (uintptr*)arena_start;
830 bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
831 shift = off % wordsPerBitmapWord;
834 if((bits & bitAllocated) == 0)
837 if((bits & bitMarked) != 0) {
839 if(!(bits & bitSpecial))
840 runtime_printf("found spurious mark on %p\n", p);
841 *bitp &= ~(bitSpecial<<shift);
843 *bitp &= ~(bitMarked<<shift);
847 // Special means it has a finalizer or is being profiled.
848 // In DebugMark mode, the bit has been coopted so
849 // we have to assume all blocks are special.
850 if(DebugMark || (bits & bitSpecial) != 0) {
851 if(handlespecial(p, size))
855 // Mark freed; restore block boundary bit.
856 *bitp = (*bitp & ~(bitMask<<shift)) | (bitBlockBoundary<<shift);
859 if(s->sizeclass == 0) {
861 runtime_unmarkspan(p, 1<<PageShift);
862 *(uintptr*)p = 1; // needs zeroing
863 runtime_MHeap_Free(&runtime_mheap, s, 1);
865 // Free small object.
866 if(size > sizeof(uintptr))
867 ((uintptr*)p)[1] = 1; // mark as "needs to be zeroed"
868 c->local_by_size[s->sizeclass].nfree++;
869 runtime_MCache_Free(c, p, s->sizeclass, size);
871 c->local_alloc -= size;
878 runtime_gchelper(void)
880 // Wait until main proc is ready for mark help.
881 runtime_lock(&work.markgate);
882 runtime_unlock(&work.markgate);
885 // Wait until main proc is ready for sweep help.
886 runtime_lock(&work.sweepgate);
887 runtime_unlock(&work.sweepgate);
890 if(runtime_xadd(&work.ndone, +1) == work.nproc-1)
891 runtime_notewakeup(&work.alldone);
894 // Initialized from $GOGC. GOGC=off means no gc.
896 // Next gc is after we've allocated an extra amount of
897 // memory proportional to the amount already in use.
898 // If gcpercent=100 and we're using 4M, we'll gc again
899 // when we get to 8M. This keeps the gc cost in linear
900 // proportion to the allocation cost. Adjusting gcpercent
901 // just changes the linear constant (and also the amount of
902 // extra memory used).
903 static int32 gcpercent = -2;
910 for(m=runtime_allm; m; m=m->alllink)
911 runtime_MCache_ReleaseAll(m->mcache);
924 stacks_sys = runtime_stacks_sys;
925 for(m=runtime_allm; m; m=m->alllink) {
926 runtime_purgecachedstats(m);
927 // stacks_inuse += m->stackalloc->inuse;
928 // stacks_sys += m->stackalloc->sys;
930 for(i=0; i<nelem(c->local_by_size); i++) {
931 mstats.by_size[i].nmalloc += c->local_by_size[i].nmalloc;
932 c->local_by_size[i].nmalloc = 0;
933 mstats.by_size[i].nfree += c->local_by_size[i].nfree;
934 c->local_by_size[i].nfree = 0;
937 mstats.stacks_inuse = stacks_inuse;
938 mstats.stacks_sys = stacks_sys;
942 runtime_gc(int32 force)
945 int64 t0, t1, t2, t3;
946 uint64 heap0, heap1, obj0, obj1;
950 // Make sure all registers are saved on stack so that
951 // scanstack sees them.
952 __builtin_unwind_init();
954 // The gc is turned off (via enablegc) until
955 // the bootstrap has completed.
956 // Also, malloc gets called in the guts
957 // of a number of libraries that might be
958 // holding locks. To avoid priority inversion
959 // problems, don't bother trying to run gc
960 // while holding a lock. The next mallocgc
961 // without a lock will do the gc instead.
963 if(!mstats.enablegc || m->locks > 0 || runtime_panicking)
966 if(gcpercent == -2) { // first time through
967 p = runtime_getenv("GOGC");
968 if(p == nil || p[0] == '\0')
970 else if(runtime_strcmp((const char*)p, "off") == 0)
973 gcpercent = runtime_atoi(p);
975 p = runtime_getenv("GOGCTRACE");
977 gctrace = runtime_atoi(p);
982 runtime_semacquire(&runtime_worldsema);
983 if(!force && mstats.heap_alloc < mstats.next_gc) {
984 runtime_semrelease(&runtime_worldsema);
988 t0 = runtime_nanotime();
992 runtime_stoptheworld();
995 heap0 = mstats.heap_alloc;
996 obj0 = mstats.nmalloc - mstats.nfree;
998 runtime_lock(&work.markgate);
999 runtime_lock(&work.sweepgate);
1003 if(runtime_gomaxprocs > 1 && runtime_ncpu > 1) {
1004 runtime_noteclear(&work.alldone);
1005 work.nproc += runtime_helpgc(&extra);
1010 runtime_unlock(&work.markgate); // let the helpers in
1013 mark(debug_scanblock);
1014 t1 = runtime_nanotime();
1016 work.spans = runtime_mheap.allspans;
1017 runtime_unlock(&work.sweepgate); // let the helpers in
1020 runtime_notesleep(&work.alldone);
1021 t2 = runtime_nanotime();
1026 mstats.next_gc = mstats.heap_alloc+(mstats.heap_alloc-runtime_stacks_sys)*gcpercent/100;
1029 m->locks++; // disable gc during the mallocs in newproc
1031 // kick off or wake up goroutine to run queued finalizers
1033 fing = __go_go(runfinq, nil);
1036 runtime_ready(fing);
1042 heap1 = mstats.heap_alloc;
1043 obj1 = mstats.nmalloc - mstats.nfree;
1045 t3 = runtime_nanotime();
1046 mstats.last_gc = t3;
1047 mstats.pause_ns[mstats.numgc%nelem(mstats.pause_ns)] = t3 - t0;
1048 mstats.pause_total_ns += t3 - t0;
1051 runtime_printf("pause %D\n", t3-t0);
1054 runtime_printf("gc%d(%d): %D+%D+%D ms, %D -> %D MB %D -> %D (%D-%D) objects\n",
1055 mstats.numgc, work.nproc, (t1-t0)/1000000, (t2-t1)/1000000, (t3-t2)/1000000,
1056 heap0>>20, heap1>>20, obj0, obj1,
1057 mstats.nmalloc, mstats.nfree);
1061 runtime_semrelease(&runtime_worldsema);
1063 // If we could have used another helper proc, start one now,
1064 // in the hope that it will be available next time.
1065 // It would have been even better to start it before the collection,
1066 // but doing so requires allocating memory, so it's tricky to
1067 // coordinate. This lazy approach works out in practice:
1068 // we don't mind if the first couple gc rounds don't have quite
1069 // the maximum number of procs.
1070 runtime_starttheworld(extra);
1072 // give the queued finalizers, if any, a chance to run
1076 if(gctrace > 1 && !force)
1080 void runtime_ReadMemStats(MStats *)
1081 __asm__("runtime.ReadMemStats");
1084 runtime_ReadMemStats(MStats *stats)
1088 // Have to acquire worldsema to stop the world,
1089 // because stoptheworld can only be used by
1090 // one goroutine at a time, and there might be
1091 // a pending garbage collection already calling it.
1092 runtime_semacquire(&runtime_worldsema);
1095 runtime_stoptheworld();
1099 runtime_semrelease(&runtime_worldsema);
1100 runtime_starttheworld(false);
1104 runfinq(void* dummy __attribute__ ((unused)))
1108 FinBlock *fb, *next;
1113 // There's no need for a lock in this section
1114 // because it only conflicts with the garbage
1115 // collector, and the garbage collector only
1116 // runs when everyone else is stopped, and
1117 // runfinq only stops at the gosched() or
1118 // during the calls in the for loop.
1123 gp->status = Gwaiting;
1124 gp->waitreason = "finalizer wait";
1128 for(; fb; fb=next) {
1130 for(i=0; i<(uint32)fb->cnt; i++) {
1134 params[0] = &f->arg;
1135 reflect_call(f->ft, (void*)f->fn, 0, 0, params, nil);
1143 runtime_gc(1); // trigger another gc to clean up the finalized objects, if possible
1147 // mark the block at v of size n as allocated.
1148 // If noptr is true, mark it as having no pointers.
1150 runtime_markallocated(void *v, uintptr n, bool noptr)
1152 uintptr *b, obits, bits, off, shift;
1155 runtime_printf("markallocated %p+%p\n", v, n);
1157 if((byte*)v+n > (byte*)runtime_mheap.arena_used || (byte*)v < runtime_mheap.arena_start)
1158 runtime_throw("markallocated: bad pointer");
1160 off = (uintptr*)v - (uintptr*)runtime_mheap.arena_start; // word offset
1161 b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
1162 shift = off % wordsPerBitmapWord;
1166 bits = (obits & ~(bitMask<<shift)) | (bitAllocated<<shift);
1168 bits |= bitNoPointers<<shift;
1169 if(runtime_singleproc) {
1173 // more than one goroutine is potentially running: use atomic op
1174 if(runtime_casp((void**)b, (void*)obits, (void*)bits))
1180 // mark the block at v of size n as freed.
1182 runtime_markfreed(void *v, uintptr n)
1184 uintptr *b, obits, bits, off, shift;
1187 runtime_printf("markallocated %p+%p\n", v, n);
1189 if((byte*)v+n > (byte*)runtime_mheap.arena_used || (byte*)v < runtime_mheap.arena_start)
1190 runtime_throw("markallocated: bad pointer");
1192 off = (uintptr*)v - (uintptr*)runtime_mheap.arena_start; // word offset
1193 b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
1194 shift = off % wordsPerBitmapWord;
1198 bits = (obits & ~(bitMask<<shift)) | (bitBlockBoundary<<shift);
1199 if(runtime_singleproc) {
1203 // more than one goroutine is potentially running: use atomic op
1204 if(runtime_casp((void**)b, (void*)obits, (void*)bits))
1210 // check that the block at v of size n is marked freed.
1212 runtime_checkfreed(void *v, uintptr n)
1214 uintptr *b, bits, off, shift;
1216 if(!runtime_checking)
1219 if((byte*)v+n > (byte*)runtime_mheap.arena_used || (byte*)v < runtime_mheap.arena_start)
1220 return; // not allocated, so okay
1222 off = (uintptr*)v - (uintptr*)runtime_mheap.arena_start; // word offset
1223 b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
1224 shift = off % wordsPerBitmapWord;
1227 if((bits & bitAllocated) != 0) {
1228 runtime_printf("checkfreed %p+%p: off=%p have=%p\n",
1229 v, n, off, bits & bitMask);
1230 runtime_throw("checkfreed: not freed");
1234 // mark the span of memory at v as having n blocks of the given size.
1235 // if leftover is true, there is left over space at the end of the span.
1237 runtime_markspan(void *v, uintptr size, uintptr n, bool leftover)
1239 uintptr *b, off, shift;
1242 if((byte*)v+size*n > (byte*)runtime_mheap.arena_used || (byte*)v < runtime_mheap.arena_start)
1243 runtime_throw("markspan: bad pointer");
1246 if(leftover) // mark a boundary just past end of last block too
1248 for(; n-- > 0; p += size) {
1249 // Okay to use non-atomic ops here, because we control
1250 // the entire span, and each bitmap word has bits for only
1251 // one span, so no other goroutines are changing these
1253 off = (uintptr*)p - (uintptr*)runtime_mheap.arena_start; // word offset
1254 b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
1255 shift = off % wordsPerBitmapWord;
1256 *b = (*b & ~(bitMask<<shift)) | (bitBlockBoundary<<shift);
1260 // unmark the span of memory at v of length n bytes.
1262 runtime_unmarkspan(void *v, uintptr n)
1264 uintptr *p, *b, off;
1266 if((byte*)v+n > (byte*)runtime_mheap.arena_used || (byte*)v < runtime_mheap.arena_start)
1267 runtime_throw("markspan: bad pointer");
1270 off = p - (uintptr*)runtime_mheap.arena_start; // word offset
1271 if(off % wordsPerBitmapWord != 0)
1272 runtime_throw("markspan: unaligned pointer");
1273 b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
1275 if(n%wordsPerBitmapWord != 0)
1276 runtime_throw("unmarkspan: unaligned length");
1277 // Okay to use non-atomic ops here, because we control
1278 // the entire span, and each bitmap word has bits for only
1279 // one span, so no other goroutines are changing these
1281 n /= wordsPerBitmapWord;
1287 runtime_blockspecial(void *v)
1289 uintptr *b, off, shift;
1294 off = (uintptr*)v - (uintptr*)runtime_mheap.arena_start;
1295 b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
1296 shift = off % wordsPerBitmapWord;
1298 return (*b & (bitSpecial<<shift)) != 0;
1302 runtime_setblockspecial(void *v, bool s)
1304 uintptr *b, off, shift, bits, obits;
1309 off = (uintptr*)v - (uintptr*)runtime_mheap.arena_start;
1310 b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
1311 shift = off % wordsPerBitmapWord;
1316 bits = obits | (bitSpecial<<shift);
1318 bits = obits & ~(bitSpecial<<shift);
1319 if(runtime_singleproc) {
1323 // more than one goroutine is potentially running: use atomic op
1324 if(runtime_casp((void**)b, (void*)obits, (void*)bits))
1331 runtime_MHeap_MapBits(MHeap *h)
1335 // Caller has added extra mappings to the arena.
1336 // Add extra mappings of bitmap words as needed.
1337 // We allocate extra bitmap pieces in chunks of bitmapChunk.
1343 n = (h->arena_used - h->arena_start) / wordsPerBitmapWord;
1344 n = (n+bitmapChunk-1) & ~(bitmapChunk-1);
1345 if(h->bitmap_mapped >= n)
1348 page_size = getpagesize();
1349 n = (n+page_size-1) & ~(page_size-1);
1351 runtime_SysMap(h->arena_start - n, n - h->bitmap_mapped);
1352 h->bitmap_mapped = n;