1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
11 #ifdef USING_SPLIT_STACK
13 extern void * __splitstack_find (void *, void *, size_t *, void **, void **,
16 extern void * __splitstack_find_context (void *context[10], size_t *, void **,
23 PtrSize = sizeof(void*),
24 DebugMark = 0, // run second pass to check mark
26 // Four bits per word (see #defines below).
27 wordsPerBitmapWord = sizeof(void*)*8/4,
28 bitShift = sizeof(void*)*8/4,
31 // Bits in per-word bitmap.
32 // #defines because enum might not be able to hold the values.
34 // Each word in the bitmap describes wordsPerBitmapWord words
35 // of heap memory. There are 4 bitmap bits dedicated to each heap word,
36 // so on a 64-bit system there is one bitmap word per 16 heap words.
37 // The bits in the word are packed together by type first, then by
38 // heap location, so each 64-bit bitmap word consists of, from top to bottom,
39 // the 16 bitSpecial bits for the corresponding heap words, then the 16 bitMarked bits,
40 // then the 16 bitNoPointers/bitBlockBoundary bits, then the 16 bitAllocated bits.
41 // This layout makes it easier to iterate over the bits of a given type.
43 // The bitmap starts at mheap.arena_start and extends *backward* from
44 // there. On a 64-bit system the off'th word in the arena is tracked by
45 // the off/16+1'th word before mheap.arena_start. (On a 32-bit system,
46 // the only difference is that the divisor is 8.)
48 // To pull out the bits corresponding to a given pointer p, we use:
50 // off = p - (uintptr*)mheap.arena_start; // word offset
51 // b = (uintptr*)mheap.arena_start - off/wordsPerBitmapWord - 1;
52 // shift = off % wordsPerBitmapWord
53 // bits = *b >> shift;
54 // /* then test bits & bitAllocated, bits & bitMarked, etc. */
56 #define bitAllocated ((uintptr)1<<(bitShift*0))
57 #define bitNoPointers ((uintptr)1<<(bitShift*1)) /* when bitAllocated is set */
58 #define bitMarked ((uintptr)1<<(bitShift*2)) /* when bitAllocated is set */
59 #define bitSpecial ((uintptr)1<<(bitShift*3)) /* when bitAllocated is set - has finalizer or being profiled */
60 #define bitBlockBoundary ((uintptr)1<<(bitShift*1)) /* when bitAllocated is NOT set */
62 #define bitMask (bitBlockBoundary | bitAllocated | bitMarked | bitSpecial)
64 // TODO: Make these per-M.
65 static uint64 nhandoff;
69 typedef struct Workbuf Workbuf;
77 typedef struct Finalizer Finalizer;
82 const struct __go_func_type *ft;
85 typedef struct FinBlock FinBlock;
97 static FinBlock *finq; // list of finalizers that are to be executed
98 static FinBlock *finc; // cache of free blocks
99 static FinBlock *allfin; // list of all blocks
101 static int32 fingwait;
103 static void runfinq(void*);
104 static Workbuf* getempty(Workbuf*);
105 static Workbuf* getfull(Workbuf*);
106 static void putempty(Workbuf*);
107 static Workbuf* handoff(Workbuf*);
115 volatile uint32 nwait;
116 volatile uint32 ndone;
127 // scanblock scans a block of n bytes starting at pointer b for references
128 // to other objects, scanning any it finds recursively until there are no
129 // unscanned objects left. Instead of using an explicit recursion, it keeps
130 // a work list in the Workbuf* structures and loops in the main function
131 // body. Keeping an explicit work list is easier on the stack allocator and
134 scanblock(byte *b, int64 n)
136 byte *obj, *arena_start, *arena_used, *p;
138 uintptr size, *bitp, bits, shift, i, j, x, xbits, off, nobj, nproc;
145 if((int64)(uintptr)n != n || n < 0) {
146 // runtime_printf("scanblock %p %lld\n", b, (long long)n);
147 runtime_throw("scanblock");
150 // Memory arena parameters.
151 arena_start = runtime_mheap.arena_start;
152 arena_used = runtime_mheap.arena_used;
155 wbuf = nil; // current work buffer
156 wp = nil; // storage for next queued pointer (write pointer)
157 nobj = 0; // number of queued objects
159 // Scanblock helpers pass b==nil.
160 // The main proc needs to return to make more
161 // calls to scanblock. But if work.nproc==1 then
162 // might as well process blocks as soon as we
164 keepworking = b == nil || work.nproc == 1;
166 // Align b to a word boundary.
167 off = (uintptr)b & (PtrSize-1);
174 // Each iteration scans the block b of length n, queueing pointers in
177 runtime_printf("scanblock %p %lld\n", b, (long long) n);
180 n >>= (2+PtrSize/8); /* n /= PtrSize (4 or 8) */
181 for(i=0; i<(uintptr)n; i++) {
184 // Words outside the arena cannot be pointers.
185 if((byte*)obj < arena_start || (byte*)obj >= arena_used)
188 // obj may be a pointer to a live object.
189 // Try to find the beginning of the object.
191 // Round down to word boundary.
192 obj = (void*)((uintptr)obj & ~((uintptr)PtrSize-1));
194 // Find bits for this word.
195 off = (uintptr*)obj - (uintptr*)arena_start;
196 bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
197 shift = off % wordsPerBitmapWord;
199 bits = xbits >> shift;
201 // Pointing at the beginning of a block?
202 if((bits & (bitAllocated|bitBlockBoundary)) != 0)
205 // Pointing just past the beginning?
206 // Scan backward a little to find a block boundary.
207 for(j=shift; j-->0; ) {
208 if(((xbits>>j) & (bitAllocated|bitBlockBoundary)) != 0) {
209 obj = (byte*)obj - (shift-j)*PtrSize;
216 // Otherwise consult span table to find beginning.
217 // (Manually inlined copy of MHeap_LookupMaybe.)
218 k = (uintptr)obj>>PageShift;
220 if(sizeof(void*) == 8)
221 x -= (uintptr)arena_start>>PageShift;
222 s = runtime_mheap.map[x];
223 if(s == nil || k < s->start || k - s->start >= s->npages || s->state != MSpanInUse)
225 p = (byte*)((uintptr)s->start<<PageShift);
226 if(s->sizeclass == 0) {
229 if((byte*)obj >= (byte*)s->limit)
231 size = runtime_class_to_size[s->sizeclass];
232 int32 i = ((byte*)obj - p)/size;
236 // Now that we know the object header, reload bits.
237 off = (uintptr*)obj - (uintptr*)arena_start;
238 bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
239 shift = off % wordsPerBitmapWord;
241 bits = xbits >> shift;
244 // Now we have bits, bitp, and shift correct for
245 // obj pointing at the base of the object.
246 // Only care about allocated and not marked.
247 if((bits & (bitAllocated|bitMarked)) != bitAllocated)
250 *bitp |= bitMarked<<shift;
254 if(x & (bitMarked<<shift))
256 if(runtime_casp((void**)bitp, (void*)x, (void*)(x|(bitMarked<<shift))))
261 // If object has no pointers, don't need to scan further.
262 if((bits & bitNoPointers) != 0)
265 // If another proc wants a pointer, give it some.
266 if(nobj > 4 && work.nwait > 0 && work.full == nil) {
268 wbuf = handoff(wbuf);
270 wp = (void**)(wbuf->obj + nobj);
273 // If buffer is full, get a new one.
274 if(wbuf == nil || nobj >= nelem(wbuf->obj)) {
277 wbuf = getempty(wbuf);
278 wp = (void**)(wbuf->obj);
286 // Done scanning [b, b+n). Prepare for the next iteration of
287 // the loop by setting b and n to the parameters for the next block.
289 // Fetch b from the work buffer.
295 // Emptied our buffer: refill.
296 wbuf = getfull(wbuf);
300 wp = (void**)(wbuf->obj + wbuf->nobj);
305 // Ask span about size class.
306 // (Manually inlined copy of MHeap_Lookup.)
307 x = (uintptr)b>>PageShift;
308 if(sizeof(void*) == 8)
309 x -= (uintptr)arena_start>>PageShift;
310 s = runtime_mheap.map[x];
311 if(s->sizeclass == 0)
312 n = s->npages<<PageShift;
314 n = runtime_class_to_size[s->sizeclass];
318 // debug_scanblock is the debug copy of scanblock.
319 // it is simpler, slower, single-threaded, recursive,
320 // and uses bitSpecial as the mark bit.
322 debug_scanblock(byte *b, int64 n)
326 uintptr size, *bitp, bits, shift, i, xbits, off;
330 runtime_throw("debug_scanblock without DebugMark");
332 if((int64)(uintptr)n != n || n < 0) {
333 //runtime_printf("debug_scanblock %p %D\n", b, n);
334 runtime_throw("debug_scanblock");
337 // Align b to a word boundary.
338 off = (uintptr)b & (PtrSize-1);
346 for(i=0; i<(uintptr)n; i++) {
349 // Words outside the arena cannot be pointers.
350 if((byte*)obj < runtime_mheap.arena_start || (byte*)obj >= runtime_mheap.arena_used)
353 // Round down to word boundary.
354 obj = (void*)((uintptr)obj & ~((uintptr)PtrSize-1));
356 // Consult span table to find beginning.
357 s = runtime_MHeap_LookupMaybe(&runtime_mheap, obj);
362 p = (byte*)((uintptr)s->start<<PageShift);
363 if(s->sizeclass == 0) {
365 size = (uintptr)s->npages<<PageShift;
367 if((byte*)obj >= (byte*)s->limit)
369 size = runtime_class_to_size[s->sizeclass];
370 int32 i = ((byte*)obj - p)/size;
374 // Now that we know the object header, reload bits.
375 off = (uintptr*)obj - (uintptr*)runtime_mheap.arena_start;
376 bitp = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
377 shift = off % wordsPerBitmapWord;
379 bits = xbits >> shift;
381 // Now we have bits, bitp, and shift correct for
382 // obj pointing at the base of the object.
383 // If not allocated or already marked, done.
384 if((bits & bitAllocated) == 0 || (bits & bitSpecial) != 0) // NOTE: bitSpecial not bitMarked
386 *bitp |= bitSpecial<<shift;
387 if(!(bits & bitMarked))
388 runtime_printf("found unmarked block %p in %p\n", obj, vp+i);
390 // If object has no pointers, don't need to scan further.
391 if((bits & bitNoPointers) != 0)
394 debug_scanblock(obj, size);
398 // Get an empty work buffer off the work.empty list,
399 // allocating new buffers as needed.
403 if(work.nproc == 1) {
404 // Put b on full list.
409 // Grab from empty list if possible.
412 work.empty = b->next;
416 // Put b on full list.
418 runtime_lock(&work.fmu);
421 runtime_unlock(&work.fmu);
423 // Grab from empty list if possible.
424 runtime_lock(&work.emu);
427 work.empty = b->next;
428 runtime_unlock(&work.emu);
435 if(work.nchunk < sizeof *b) {
437 work.chunk = runtime_SysAlloc(work.nchunk);
439 b = (Workbuf*)work.chunk;
440 work.chunk += sizeof *b;
441 work.nchunk -= sizeof *b;
442 runtime_unlock(&work);
455 if(work.nproc == 1) {
456 b->next = work.empty;
461 runtime_lock(&work.emu);
462 b->next = work.empty;
464 runtime_unlock(&work.emu);
467 // Get a full work buffer off the work.full list, or return nil.
474 if(work.nproc == 1) {
475 // Put b on empty list.
477 b->next = work.empty;
480 // Grab from full list if possible.
481 // Since work.nproc==1, no one else is
482 // going to give us work.
491 // Grab buffer from full list if possible.
496 runtime_lock(&work.fmu);
497 if(work.full != nil) {
499 work.full = b1->next;
500 runtime_unlock(&work.fmu);
503 runtime_unlock(&work.fmu);
506 runtime_xadd(&work.nwait, +1);
510 runtime_lock(&work.fmu);
511 if(work.full != nil) {
512 runtime_xadd(&work.nwait, -1);
514 work.full = b1->next;
515 runtime_unlock(&work.fmu);
518 runtime_unlock(&work.fmu);
521 if(work.nwait == work.nproc)
524 runtime_procyield(20);
538 // Make new buffer with half of b's pointers.
543 runtime_memmove(b1->obj, b->obj+b->nobj, n*sizeof b1->obj[0]);
546 // Put b on full list - let first half of b get stolen.
547 runtime_lock(&work.fmu);
550 runtime_unlock(&work.fmu);
555 // Scanstack calls scanblock on each of gp's stack segments.
557 scanstack(void (*scanblock)(byte*, int64), G *gp)
559 #ifdef USING_SPLIT_STACK
567 if(gp == runtime_g()) {
568 // Scanning our own stack.
569 sp = __splitstack_find(nil, nil, &spsize, &next_segment,
570 &next_sp, &initial_sp);
571 } else if((mp = gp->m) != nil && mp->helpgc) {
572 // gchelper's stack is in active use and has no interesting pointers.
575 // Scanning another goroutine's stack.
576 // The goroutine is usually asleep (the world is stopped).
578 // The exception is that if the goroutine is about to enter or might
579 // have just exited a system call, it may be executing code such
580 // as schedlock and may have needed to start a new stack segment.
581 // Use the stack segment and stack pointer at the time of
582 // the system call instead, since that won't change underfoot.
583 if(gp->gcstack != nil) {
585 spsize = gp->gcstack_size;
586 next_segment = gp->gcnext_segment;
587 next_sp = gp->gcnext_sp;
588 initial_sp = gp->gcinitial_sp;
590 sp = __splitstack_find_context(&gp->stack_context[0],
591 &spsize, &next_segment,
592 &next_sp, &initial_sp);
596 scanblock(sp, spsize);
597 while((sp = __splitstack_find(next_segment, next_sp,
598 &spsize, &next_segment,
599 &next_sp, &initial_sp)) != nil)
600 scanblock(sp, spsize);
607 if(gp == runtime_g()) {
608 // Scanning our own stack.
610 } else if((mp = gp->m) != nil && mp->helpgc) {
611 // gchelper's stack is in active use and has no interesting pointers.
614 // Scanning another goroutine's stack.
615 // The goroutine is usually asleep (the world is stopped).
616 bottom = (byte*)gp->gcnext_sp;
620 top = (byte*)gp->gcinitial_sp + gp->gcstack_size;
622 scanblock(bottom, top - bottom);
624 scanblock(top, bottom - top);
628 // Markfin calls scanblock on the blocks that have finalizers:
629 // the things pointed at cannot be freed until the finalizers have run.
636 if(!runtime_mlookup(v, (byte**)&v, &size, nil) || !runtime_blockspecial(v))
637 runtime_throw("mark - finalizer inconsistency");
639 // do not mark the finalizer block itself. just mark the things it points at.
644 struct root_list *next;
651 static struct root_list* roots;
654 __go_register_gc_roots (struct root_list* r)
656 // FIXME: This needs locking if multiple goroutines can call
657 // dlopen simultaneously.
663 debug_markfin(void *v)
667 if(!runtime_mlookup(v, (byte**)&v, &size, nil))
668 runtime_throw("debug_mark - finalizer inconsistency");
669 debug_scanblock(v, size);
674 mark(void (*scan)(byte*, int64))
676 struct root_list *pl;
681 for(pl = roots; pl != nil; pl = pl->next) {
682 struct root* pr = &pl->roots[0];
684 void *decl = pr->decl;
687 scanblock(decl, pr->size);
692 scan((byte*)&runtime_m0, sizeof runtime_m0);
693 scan((byte*)&runtime_g0, sizeof runtime_g0);
694 scan((byte*)&runtime_allg, sizeof runtime_allg);
695 scan((byte*)&runtime_allm, sizeof runtime_allm);
696 runtime_MProf_Mark(scan);
697 runtime_time_scan(scan);
700 for(gp=runtime_allg; gp!=nil; gp=gp->alllink) {
703 runtime_printf("unexpected G.status %d\n", gp->status);
704 runtime_throw("mark - bad status");
708 if(gp != runtime_g())
709 runtime_throw("mark - world not stopped");
720 // mark things pointed at by objects with finalizers
721 if(scan == debug_scanblock)
722 runtime_walkfintab(debug_markfin, scan);
724 runtime_walkfintab(markfin, scan);
726 for(fb=allfin; fb; fb=fb->alllink)
727 scanblock((byte*)fb->fin, fb->cnt*sizeof(fb->fin[0]));
729 // in multiproc mode, join in the queued work.
734 handlespecial(byte *p, uintptr size)
737 const struct __go_func_type *ft;
741 if(!runtime_getfinalizer(p, true, &fn, &ft)) {
742 runtime_setblockspecial(p, false);
743 runtime_MProf_Free(p, size);
747 runtime_lock(&finlock);
748 if(finq == nil || finq->cnt == finq->cap) {
750 finc = runtime_SysAlloc(PageSize);
751 finc->cap = (PageSize - sizeof(FinBlock)) / sizeof(Finalizer) + 1;
752 finc->alllink = allfin;
760 f = &finq->fin[finq->cnt];
765 runtime_unlock(&finlock);
769 // Sweep frees or collects finalizers for blocks not marked in the mark phase.
770 // It clears the mark bits in preparation for the next GC round.
783 arena_start = runtime_mheap.arena_start;
789 if(!runtime_casp(&work.spans, s, s->allnext))
792 if(s->state != MSpanInUse)
795 p = (byte*)(s->start << PageShift);
798 size = s->npages<<PageShift;
801 // Chunk full of small blocks.
802 size = runtime_class_to_size[cl];
803 npages = runtime_class_to_allocnpages[cl];
804 n = (npages << PageShift) / size;
807 // Sweep through n objects of given size starting at p.
808 // This thread owns the span now, so it can manipulate
809 // the block bitmap without atomic operations.
810 for(; n > 0; n--, p += size) {
811 uintptr off, *bitp, shift, bits;
813 off = (uintptr*)p - (uintptr*)arena_start;
814 bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
815 shift = off % wordsPerBitmapWord;
818 if((bits & bitAllocated) == 0)
821 if((bits & bitMarked) != 0) {
823 if(!(bits & bitSpecial))
824 runtime_printf("found spurious mark on %p\n", p);
825 *bitp &= ~(bitSpecial<<shift);
827 *bitp &= ~(bitMarked<<shift);
831 // Special means it has a finalizer or is being profiled.
832 // In DebugMark mode, the bit has been coopted so
833 // we have to assume all blocks are special.
834 if(DebugMark || (bits & bitSpecial) != 0) {
835 if(handlespecial(p, size))
839 // Mark freed; restore block boundary bit.
840 *bitp = (*bitp & ~(bitMask<<shift)) | (bitBlockBoundary<<shift);
843 if(s->sizeclass == 0) {
845 runtime_unmarkspan(p, 1<<PageShift);
846 *(uintptr*)p = 1; // needs zeroing
847 runtime_MHeap_Free(&runtime_mheap, s, 1);
849 // Free small object.
850 if(size > sizeof(uintptr))
851 ((uintptr*)p)[1] = 1; // mark as "needs to be zeroed"
852 c->local_by_size[s->sizeclass].nfree++;
853 runtime_MCache_Free(c, p, s->sizeclass, size);
855 c->local_alloc -= size;
862 runtime_gchelper(void)
864 // Wait until main proc is ready for mark help.
865 runtime_lock(&work.markgate);
866 runtime_unlock(&work.markgate);
869 // Wait until main proc is ready for sweep help.
870 runtime_lock(&work.sweepgate);
871 runtime_unlock(&work.sweepgate);
874 if(runtime_xadd(&work.ndone, +1) == work.nproc-1)
875 runtime_notewakeup(&work.alldone);
878 // Semaphore, not Lock, so that the goroutine
879 // reschedules when there is contention rather
881 static uint32 gcsema = 1;
883 // Initialized from $GOGC. GOGC=off means no gc.
885 // Next gc is after we've allocated an extra amount of
886 // memory proportional to the amount already in use.
887 // If gcpercent=100 and we're using 4M, we'll gc again
888 // when we get to 8M. This keeps the gc cost in linear
889 // proportion to the allocation cost. Adjusting gcpercent
890 // just changes the linear constant (and also the amount of
891 // extra memory used).
892 static int32 gcpercent = -2;
899 for(m=runtime_allm; m; m=m->alllink)
900 runtime_MCache_ReleaseAll(m->mcache);
914 for(m=runtime_allm; m; m=m->alllink) {
915 runtime_purgecachedstats(m);
916 // stacks_inuse += m->stackalloc->inuse;
917 // stacks_sys += m->stackalloc->sys;
919 for(i=0; i<nelem(c->local_by_size); i++) {
920 mstats.by_size[i].nmalloc += c->local_by_size[i].nmalloc;
921 c->local_by_size[i].nmalloc = 0;
922 mstats.by_size[i].nfree += c->local_by_size[i].nfree;
923 c->local_by_size[i].nfree = 0;
926 mstats.stacks_inuse = stacks_inuse;
927 mstats.stacks_sys = stacks_sys;
931 runtime_gc(int32 force)
934 int64 t0, t1, t2, t3;
935 uint64 heap0, heap1, obj0, obj1;
939 // Make sure all registers are saved on stack so that
940 // scanstack sees them.
941 __builtin_unwind_init();
943 // The gc is turned off (via enablegc) until
944 // the bootstrap has completed.
945 // Also, malloc gets called in the guts
946 // of a number of libraries that might be
947 // holding locks. To avoid priority inversion
948 // problems, don't bother trying to run gc
949 // while holding a lock. The next mallocgc
950 // without a lock will do the gc instead.
952 if(!mstats.enablegc || m->locks > 0 || runtime_panicking)
955 if(gcpercent == -2) { // first time through
956 p = runtime_getenv("GOGC");
957 if(p == nil || p[0] == '\0')
959 else if(runtime_strcmp((const char*)p, "off") == 0)
962 gcpercent = runtime_atoi(p);
964 p = runtime_getenv("GOGCTRACE");
966 gctrace = runtime_atoi(p);
971 runtime_semacquire(&gcsema);
972 if(!force && mstats.heap_alloc < mstats.next_gc) {
973 runtime_semrelease(&gcsema);
977 t0 = runtime_nanotime();
981 runtime_stoptheworld();
984 heap0 = mstats.heap_alloc;
985 obj0 = mstats.nmalloc - mstats.nfree;
987 runtime_lock(&work.markgate);
988 runtime_lock(&work.sweepgate);
992 if(runtime_gomaxprocs > 1 && runtime_ncpu > 1) {
993 runtime_noteclear(&work.alldone);
994 work.nproc += runtime_helpgc(&extra);
999 runtime_unlock(&work.markgate); // let the helpers in
1002 mark(debug_scanblock);
1003 t1 = runtime_nanotime();
1005 work.spans = runtime_mheap.allspans;
1006 runtime_unlock(&work.sweepgate); // let the helpers in
1009 runtime_notesleep(&work.alldone);
1010 t2 = runtime_nanotime();
1015 mstats.next_gc = mstats.heap_alloc+mstats.heap_alloc*gcpercent/100;
1018 m->locks++; // disable gc during the mallocs in newproc
1020 // kick off or wake up goroutine to run queued finalizers
1022 fing = __go_go(runfinq, nil);
1025 runtime_ready(fing);
1031 heap1 = mstats.heap_alloc;
1032 obj1 = mstats.nmalloc - mstats.nfree;
1034 t3 = runtime_nanotime();
1035 mstats.pause_ns[mstats.numgc%nelem(mstats.pause_ns)] = t3 - t0;
1036 mstats.pause_total_ns += t3 - t0;
1039 runtime_printf("pause %llu\n", (unsigned long long)t3-t0);
1042 runtime_printf("gc%d(%d): %llu+%llu+%llu ms %llu -> %llu MB %llu -> %llu (%llu-%llu) objects %llu handoff\n",
1043 mstats.numgc, work.nproc, (unsigned long long)(t1-t0)/1000000, (unsigned long long)(t2-t1)/1000000, (unsigned long long)(t3-t2)/1000000,
1044 (unsigned long long)heap0>>20, (unsigned long long)heap1>>20, (unsigned long long)obj0, (unsigned long long)obj1,
1045 (unsigned long long) mstats.nmalloc, (unsigned long long)mstats.nfree,
1046 (unsigned long long) nhandoff);
1049 runtime_semrelease(&gcsema);
1051 // If we could have used another helper proc, start one now,
1052 // in the hope that it will be available next time.
1053 // It would have been even better to start it before the collection,
1054 // but doing so requires allocating memory, so it's tricky to
1055 // coordinate. This lazy approach works out in practice:
1056 // we don't mind if the first couple gc rounds don't have quite
1057 // the maximum number of procs.
1058 runtime_starttheworld(extra);
1060 // give the queued finalizers, if any, a chance to run
1064 if(gctrace > 1 && !force)
1068 void runtime_ReadMemStats(MStats *)
1069 __asm__("libgo_runtime.runtime.ReadMemStats");
1072 runtime_ReadMemStats(MStats *stats)
1076 // Have to acquire gcsema to stop the world,
1077 // because stoptheworld can only be used by
1078 // one goroutine at a time, and there might be
1079 // a pending garbage collection already calling it.
1080 runtime_semacquire(&gcsema);
1083 runtime_stoptheworld();
1087 runtime_semrelease(&gcsema);
1088 runtime_starttheworld(false);
1092 runfinq(void* dummy __attribute__ ((unused)))
1096 FinBlock *fb, *next;
1101 // There's no need for a lock in this section
1102 // because it only conflicts with the garbage
1103 // collector, and the garbage collector only
1104 // runs when everyone else is stopped, and
1105 // runfinq only stops at the gosched() or
1106 // during the calls in the for loop.
1111 gp->status = Gwaiting;
1112 gp->waitreason = "finalizer wait";
1116 for(; fb; fb=next) {
1118 for(i=0; i<(uint32)fb->cnt; i++) {
1122 params[0] = &f->arg;
1123 runtime_setblockspecial(f->arg, false);
1124 reflect_call(f->ft, (void*)f->fn, 0, 0, params, nil);
1132 runtime_gc(1); // trigger another gc to clean up the finalized objects, if possible
1136 // mark the block at v of size n as allocated.
1137 // If noptr is true, mark it as having no pointers.
1139 runtime_markallocated(void *v, uintptr n, bool noptr)
1141 uintptr *b, obits, bits, off, shift;
1144 // runtime_printf("markallocated %p+%p\n", v, n);
1146 if((byte*)v+n > (byte*)runtime_mheap.arena_used || (byte*)v < runtime_mheap.arena_start)
1147 runtime_throw("markallocated: bad pointer");
1149 off = (uintptr*)v - (uintptr*)runtime_mheap.arena_start; // word offset
1150 b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
1151 shift = off % wordsPerBitmapWord;
1155 bits = (obits & ~(bitMask<<shift)) | (bitAllocated<<shift);
1157 bits |= bitNoPointers<<shift;
1158 if(runtime_singleproc) {
1162 // more than one goroutine is potentially running: use atomic op
1163 if(runtime_casp((void**)b, (void*)obits, (void*)bits))
1169 // mark the block at v of size n as freed.
1171 runtime_markfreed(void *v, uintptr n)
1173 uintptr *b, obits, bits, off, shift;
1176 // runtime_printf("markallocated %p+%p\n", v, n);
1178 if((byte*)v+n > (byte*)runtime_mheap.arena_used || (byte*)v < runtime_mheap.arena_start)
1179 runtime_throw("markallocated: bad pointer");
1181 off = (uintptr*)v - (uintptr*)runtime_mheap.arena_start; // word offset
1182 b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
1183 shift = off % wordsPerBitmapWord;
1187 bits = (obits & ~(bitMask<<shift)) | (bitBlockBoundary<<shift);
1188 if(runtime_singleproc) {
1192 // more than one goroutine is potentially running: use atomic op
1193 if(runtime_casp((void**)b, (void*)obits, (void*)bits))
1199 // check that the block at v of size n is marked freed.
1201 runtime_checkfreed(void *v, uintptr n)
1203 uintptr *b, bits, off, shift;
1205 if(!runtime_checking)
1208 if((byte*)v+n > (byte*)runtime_mheap.arena_used || (byte*)v < runtime_mheap.arena_start)
1209 return; // not allocated, so okay
1211 off = (uintptr*)v - (uintptr*)runtime_mheap.arena_start; // word offset
1212 b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
1213 shift = off % wordsPerBitmapWord;
1216 if((bits & bitAllocated) != 0) {
1217 runtime_printf("checkfreed %p+%p: off=%p have=%p\n",
1218 v, (void*)n, (void*)off, (void*)(bits & bitMask));
1219 runtime_throw("checkfreed: not freed");
1223 // mark the span of memory at v as having n blocks of the given size.
1224 // if leftover is true, there is left over space at the end of the span.
1226 runtime_markspan(void *v, uintptr size, uintptr n, bool leftover)
1228 uintptr *b, off, shift;
1231 if((byte*)v+size*n > (byte*)runtime_mheap.arena_used || (byte*)v < runtime_mheap.arena_start)
1232 runtime_throw("markspan: bad pointer");
1235 if(leftover) // mark a boundary just past end of last block too
1237 for(; n-- > 0; p += size) {
1238 // Okay to use non-atomic ops here, because we control
1239 // the entire span, and each bitmap word has bits for only
1240 // one span, so no other goroutines are changing these
1242 off = (uintptr*)p - (uintptr*)runtime_mheap.arena_start; // word offset
1243 b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
1244 shift = off % wordsPerBitmapWord;
1245 *b = (*b & ~(bitMask<<shift)) | (bitBlockBoundary<<shift);
1249 // unmark the span of memory at v of length n bytes.
1251 runtime_unmarkspan(void *v, uintptr n)
1253 uintptr *p, *b, off;
1255 if((byte*)v+n > (byte*)runtime_mheap.arena_used || (byte*)v < runtime_mheap.arena_start)
1256 runtime_throw("markspan: bad pointer");
1259 off = p - (uintptr*)runtime_mheap.arena_start; // word offset
1260 if(off % wordsPerBitmapWord != 0)
1261 runtime_throw("markspan: unaligned pointer");
1262 b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
1264 if(n%wordsPerBitmapWord != 0)
1265 runtime_throw("unmarkspan: unaligned length");
1266 // Okay to use non-atomic ops here, because we control
1267 // the entire span, and each bitmap word has bits for only
1268 // one span, so no other goroutines are changing these
1270 n /= wordsPerBitmapWord;
1276 runtime_blockspecial(void *v)
1278 uintptr *b, off, shift;
1283 off = (uintptr*)v - (uintptr*)runtime_mheap.arena_start;
1284 b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
1285 shift = off % wordsPerBitmapWord;
1287 return (*b & (bitSpecial<<shift)) != 0;
1291 runtime_setblockspecial(void *v, bool s)
1293 uintptr *b, off, shift, bits, obits;
1298 off = (uintptr*)v - (uintptr*)runtime_mheap.arena_start;
1299 b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
1300 shift = off % wordsPerBitmapWord;
1305 bits = obits | (bitSpecial<<shift);
1307 bits = obits & ~(bitSpecial<<shift);
1308 if(runtime_singleproc) {
1312 // more than one goroutine is potentially running: use atomic op
1313 if(runtime_casp((void**)b, (void*)obits, (void*)bits))
1320 runtime_MHeap_MapBits(MHeap *h)
1322 // Caller has added extra mappings to the arena.
1323 // Add extra mappings of bitmap words as needed.
1324 // We allocate extra bitmap pieces in chunks of bitmapChunk.
1330 n = (h->arena_used - h->arena_start) / wordsPerBitmapWord;
1331 n = (n+bitmapChunk-1) & ~(bitmapChunk-1);
1332 if(h->bitmap_mapped >= n)
1335 runtime_SysMap(h->arena_start - n, n - h->bitmap_mapped);
1336 h->bitmap_mapped = n;