1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
13 PtrSize = sizeof(void*),
14 DebugMark = 0, // run second pass to check mark
16 // Four bits per word (see #defines below).
17 wordsPerBitmapWord = sizeof(void*)*8/4,
18 bitShift = sizeof(void*)*8/4,
21 // Bits in per-word bitmap.
22 // #defines because enum might not be able to hold the values.
24 // Each word in the bitmap describes wordsPerBitmapWord words
25 // of heap memory. There are 4 bitmap bits dedicated to each heap word,
26 // so on a 64-bit system there is one bitmap word per 16 heap words.
27 // The bits in the word are packed together by type first, then by
28 // heap location, so each 64-bit bitmap word consists of, from top to bottom,
29 // the 16 bitSpecial bits for the corresponding heap words, then the 16 bitMarked bits,
30 // then the 16 bitNoPointers/bitBlockBoundary bits, then the 16 bitAllocated bits.
31 // This layout makes it easier to iterate over the bits of a given type.
33 // The bitmap starts at mheap.arena_start and extends *backward* from
34 // there. On a 64-bit system the off'th word in the arena is tracked by
35 // the off/16+1'th word before mheap.arena_start. (On a 32-bit system,
36 // the only difference is that the divisor is 8.)
38 // To pull out the bits corresponding to a given pointer p, we use:
40 // off = p - (uintptr*)mheap.arena_start; // word offset
41 // b = (uintptr*)mheap.arena_start - off/wordsPerBitmapWord - 1;
42 // shift = off % wordsPerBitmapWord
43 // bits = *b >> shift;
44 // /* then test bits & bitAllocated, bits & bitMarked, etc. */
46 #define bitAllocated ((uintptr)1<<(bitShift*0))
47 #define bitNoPointers ((uintptr)1<<(bitShift*1)) /* when bitAllocated is set */
48 #define bitMarked ((uintptr)1<<(bitShift*2)) /* when bitAllocated is set */
49 #define bitSpecial ((uintptr)1<<(bitShift*3)) /* when bitAllocated is set - has finalizer or being profiled */
50 #define bitBlockBoundary ((uintptr)1<<(bitShift*1)) /* when bitAllocated is NOT set */
52 #define bitMask (bitBlockBoundary | bitAllocated | bitMarked | bitSpecial)
54 // TODO: Make these per-M.
55 static uint64 nlookup;
56 static uint64 nsizelookup;
57 static uint64 naddrlookup;
58 static uint64 nhandoff;
62 typedef struct Workbuf Workbuf;
70 typedef struct Finalizer Finalizer;
75 const struct __go_func_type *ft;
78 typedef struct FinBlock FinBlock;
88 static bool finstarted;
89 static pthread_mutex_t finqlock = PTHREAD_MUTEX_INITIALIZER;
90 static pthread_cond_t finqcond = PTHREAD_COND_INITIALIZER;
91 static FinBlock *finq; // list of finalizers that are to be executed
92 static FinBlock *finc; // cache of free blocks
93 static FinBlock *allfin; // list of all blocks
95 static int32 fingwait;
97 static void runfinq(void*);
98 static Workbuf* getempty(Workbuf*);
99 static Workbuf* getfull(Workbuf*);
100 static void putempty(Workbuf*);
101 static Workbuf* handoff(Workbuf*);
109 volatile uint32 nwait;
110 volatile uint32 ndone;
121 // scanblock scans a block of n bytes starting at pointer b for references
122 // to other objects, scanning any it finds recursively until there are no
123 // unscanned objects left. Instead of using an explicit recursion, it keeps
124 // a work list in the Workbuf* structures and loops in the main function
125 // body. Keeping an explicit work list is easier on the stack allocator and
128 scanblock(byte *b, int64 n)
130 byte *obj, *arena_start, *arena_used, *p;
132 uintptr size, *bitp, bits, shift, i, j, x, xbits, off, nobj, nproc;
139 if((int64)(uintptr)n != n || n < 0) {
140 // runtime_printf("scanblock %p %lld\n", b, (long long)n);
141 runtime_throw("scanblock");
144 // Memory arena parameters.
145 arena_start = runtime_mheap.arena_start;
146 arena_used = runtime_mheap.arena_used;
149 wbuf = nil; // current work buffer
150 wp = nil; // storage for next queued pointer (write pointer)
151 nobj = 0; // number of queued objects
153 // Scanblock helpers pass b==nil.
154 // The main proc needs to return to make more
155 // calls to scanblock. But if work.nproc==1 then
156 // might as well process blocks as soon as we
158 keepworking = b == nil || work.nproc == 1;
160 // Align b to a word boundary.
161 off = (uintptr)b & (PtrSize-1);
168 // Each iteration scans the block b of length n, queueing pointers in
171 runtime_printf("scanblock %p %lld\n", b, (long long) n);
174 n >>= (2+PtrSize/8); /* n /= PtrSize (4 or 8) */
175 for(i=0; i<(uintptr)n; i++) {
178 // Words outside the arena cannot be pointers.
179 if((byte*)obj < arena_start || (byte*)obj >= arena_used)
182 // obj may be a pointer to a live object.
183 // Try to find the beginning of the object.
185 // Round down to word boundary.
186 obj = (void*)((uintptr)obj & ~((uintptr)PtrSize-1));
188 // Find bits for this word.
189 off = (uintptr*)obj - (uintptr*)arena_start;
190 bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
191 shift = off % wordsPerBitmapWord;
193 bits = xbits >> shift;
195 // Pointing at the beginning of a block?
196 if((bits & (bitAllocated|bitBlockBoundary)) != 0)
199 // Pointing just past the beginning?
200 // Scan backward a little to find a block boundary.
201 for(j=shift; j-->0; ) {
202 if(((xbits>>j) & (bitAllocated|bitBlockBoundary)) != 0) {
203 obj = (byte*)obj - (shift-j)*PtrSize;
210 // Otherwise consult span table to find beginning.
211 // (Manually inlined copy of MHeap_LookupMaybe.)
214 k = (uintptr)obj>>PageShift;
216 if(sizeof(void*) == 8)
217 x -= (uintptr)arena_start>>PageShift;
218 s = runtime_mheap.map[x];
219 if(s == nil || k < s->start || k - s->start >= s->npages || s->state != MSpanInUse)
221 p = (byte*)((uintptr)s->start<<PageShift);
222 if(s->sizeclass == 0) {
225 if((byte*)obj >= (byte*)s->limit)
227 size = runtime_class_to_size[s->sizeclass];
228 int32 i = ((byte*)obj - p)/size;
232 // Now that we know the object header, reload bits.
233 off = (uintptr*)obj - (uintptr*)arena_start;
234 bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
235 shift = off % wordsPerBitmapWord;
237 bits = xbits >> shift;
240 // Now we have bits, bitp, and shift correct for
241 // obj pointing at the base of the object.
242 // Only care about allocated and not marked.
243 if((bits & (bitAllocated|bitMarked)) != bitAllocated)
246 *bitp |= bitMarked<<shift;
250 if(x & (bitMarked<<shift))
252 if(runtime_casp((void**)bitp, (void*)x, (void*)(x|(bitMarked<<shift))))
257 // If object has no pointers, don't need to scan further.
258 if((bits & bitNoPointers) != 0)
261 // If another proc wants a pointer, give it some.
262 if(nobj > 4 && work.nwait > 0 && work.full == nil) {
264 wbuf = handoff(wbuf);
266 wp = (void**)(wbuf->obj + nobj);
269 // If buffer is full, get a new one.
270 if(wbuf == nil || nobj >= nelem(wbuf->obj)) {
273 wbuf = getempty(wbuf);
274 wp = (void**)(wbuf->obj);
282 // Done scanning [b, b+n). Prepare for the next iteration of
283 // the loop by setting b and n to the parameters for the next block.
285 // Fetch b from the work buffer.
291 // Emptied our buffer: refill.
292 wbuf = getfull(wbuf);
296 wp = (void**)(wbuf->obj + wbuf->nobj);
301 // Figure out n = size of b. Start by loading bits for b.
302 off = (uintptr*)b - (uintptr*)arena_start;
303 bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
304 shift = off % wordsPerBitmapWord;
306 bits = xbits >> shift;
308 // Might be small; look for nearby block boundary.
309 // A block boundary is marked by either bitBlockBoundary
310 // or bitAllocated being set (see notes near their definition).
312 boundary = bitBlockBoundary|bitAllocated
314 // Look for a block boundary both after and before b
315 // in the same bitmap word.
317 // A block boundary j words after b is indicated by
318 // bits>>j & boundary
319 // assuming shift+j < bitShift. (If shift+j >= bitShift then
320 // we'll be bleeding other bit types like bitMarked into our test.)
321 // Instead of inserting the conditional shift+j < bitShift into the loop,
322 // we can let j range from 1 to bitShift as long as we first
323 // apply a mask to keep only the bits corresponding
324 // to shift+j < bitShift aka j < bitShift-shift.
325 bits &= (boundary<<(bitShift-shift)) - boundary;
327 // A block boundary j words before b is indicated by
328 // xbits>>(shift-j) & boundary
329 // (assuming shift >= j). There is no cleverness here
330 // avoid the test, because when j gets too large the shift
331 // turns negative, which is undefined in C.
333 for(j=1; j<bitShift; j++) {
334 if(((bits>>j)&boundary) != 0 || (shift>=j && ((xbits>>(shift-j))&boundary) != 0)) {
340 // Fall back to asking span about size class.
341 // (Manually inlined copy of MHeap_Lookup.)
344 x = (uintptr)b>>PageShift;
345 if(sizeof(void*) == 8)
346 x -= (uintptr)arena_start>>PageShift;
347 s = runtime_mheap.map[x];
348 if(s->sizeclass == 0)
349 n = s->npages<<PageShift;
351 n = runtime_class_to_size[s->sizeclass];
356 // debug_scanblock is the debug copy of scanblock.
357 // it is simpler, slower, single-threaded, recursive,
358 // and uses bitSpecial as the mark bit.
360 debug_scanblock(byte *b, int64 n)
364 uintptr size, *bitp, bits, shift, i, xbits, off;
368 runtime_throw("debug_scanblock without DebugMark");
370 if((int64)(uintptr)n != n || n < 0) {
371 //runtime_printf("debug_scanblock %p %D\n", b, n);
372 runtime_throw("debug_scanblock");
375 // Align b to a word boundary.
376 off = (uintptr)b & (PtrSize-1);
384 for(i=0; i<(uintptr)n; i++) {
387 // Words outside the arena cannot be pointers.
388 if((byte*)obj < runtime_mheap.arena_start || (byte*)obj >= runtime_mheap.arena_used)
391 // Round down to word boundary.
392 obj = (void*)((uintptr)obj & ~((uintptr)PtrSize-1));
394 // Consult span table to find beginning.
395 s = runtime_MHeap_LookupMaybe(&runtime_mheap, obj);
400 p = (byte*)((uintptr)s->start<<PageShift);
401 if(s->sizeclass == 0) {
403 size = (uintptr)s->npages<<PageShift;
405 if((byte*)obj >= (byte*)s->limit)
407 size = runtime_class_to_size[s->sizeclass];
408 int32 i = ((byte*)obj - p)/size;
412 // Now that we know the object header, reload bits.
413 off = (uintptr*)obj - (uintptr*)runtime_mheap.arena_start;
414 bitp = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
415 shift = off % wordsPerBitmapWord;
417 bits = xbits >> shift;
419 // Now we have bits, bitp, and shift correct for
420 // obj pointing at the base of the object.
421 // If not allocated or already marked, done.
422 if((bits & bitAllocated) == 0 || (bits & bitSpecial) != 0) // NOTE: bitSpecial not bitMarked
424 *bitp |= bitSpecial<<shift;
425 if(!(bits & bitMarked))
426 runtime_printf("found unmarked block %p in %p\n", obj, vp+i);
428 // If object has no pointers, don't need to scan further.
429 if((bits & bitNoPointers) != 0)
432 debug_scanblock(obj, size);
436 // Get an empty work buffer off the work.empty list,
437 // allocating new buffers as needed.
441 if(work.nproc == 1) {
442 // Put b on full list.
447 // Grab from empty list if possible.
450 work.empty = b->next;
454 // Put b on full list.
456 runtime_lock(&work.fmu);
459 runtime_unlock(&work.fmu);
461 // Grab from empty list if possible.
462 runtime_lock(&work.emu);
465 work.empty = b->next;
466 runtime_unlock(&work.emu);
473 if(work.nchunk < sizeof *b) {
475 work.chunk = runtime_SysAlloc(work.nchunk);
477 b = (Workbuf*)work.chunk;
478 work.chunk += sizeof *b;
479 work.nchunk -= sizeof *b;
480 runtime_unlock(&work);
493 if(work.nproc == 1) {
494 b->next = work.empty;
499 runtime_lock(&work.emu);
500 b->next = work.empty;
502 runtime_unlock(&work.emu);
505 // Get a full work buffer off the work.full list, or return nil.
512 if(work.nproc == 1) {
513 // Put b on empty list.
515 b->next = work.empty;
518 // Grab from full list if possible.
519 // Since work.nproc==1, no one else is
520 // going to give us work.
529 // Grab buffer from full list if possible.
534 runtime_lock(&work.fmu);
535 if(work.full != nil) {
537 work.full = b1->next;
538 runtime_unlock(&work.fmu);
541 runtime_unlock(&work.fmu);
544 runtime_xadd(&work.nwait, +1);
548 runtime_lock(&work.fmu);
549 if(work.full != nil) {
550 runtime_xadd(&work.nwait, -1);
552 work.full = b1->next;
553 runtime_unlock(&work.fmu);
556 runtime_unlock(&work.fmu);
559 if(work.nwait == work.nproc)
562 runtime_procyield(20);
576 // Make new buffer with half of b's pointers.
581 runtime_memmove(b1->obj, b->obj+b->nobj, n*sizeof b1->obj[0]);
584 // Put b on full list - let first half of b get stolen.
585 runtime_lock(&work.fmu);
588 runtime_unlock(&work.fmu);
593 // Markfin calls scanblock on the blocks that have finalizers:
594 // the things pointed at cannot be freed until the finalizers have run.
601 if(!runtime_mlookup(v, (byte**)&v, &size, nil) || !runtime_blockspecial(v))
602 runtime_throw("mark - finalizer inconsistency");
604 // do not mark the finalizer block itself. just mark the things it points at.
609 struct root_list *next;
616 static struct root_list* roots;
619 __go_register_gc_roots (struct root_list* r)
621 // FIXME: This needs locking if multiple goroutines can call
622 // dlopen simultaneously.
628 debug_markfin(void *v)
632 if(!runtime_mlookup(v, (byte**)&v, &size, nil))
633 runtime_throw("debug_mark - finalizer inconsistency");
634 debug_scanblock(v, size);
639 mark(void (*scan)(byte*, int64))
641 struct root_list *pl;
644 for(pl = roots; pl != nil; pl = pl->next) {
645 struct root* pr = &pl->roots[0];
647 void *decl = pr->decl;
650 scanblock(decl, pr->size);
655 scan((byte*)&runtime_m0, sizeof runtime_m0);
656 scan((byte*)&runtime_g0, sizeof runtime_g0);
657 scan((byte*)&finq, sizeof finq);
658 runtime_MProf_Mark(scan);
661 __go_scanstacks(scan);
663 // mark things pointed at by objects with finalizers
664 if(scan == debug_scanblock)
665 runtime_walkfintab(debug_markfin, scan);
667 runtime_walkfintab(markfin, scan);
669 for(fb=allfin; fb; fb=fb->alllink)
670 scanblock((byte*)fb->fin, fb->cnt*sizeof(fb->fin[0]));
672 // in multiproc mode, join in the queued work.
677 handlespecial(byte *p, uintptr size)
680 const struct __go_func_type *ft;
684 if(!runtime_getfinalizer(p, true, &fn, &ft)) {
685 runtime_setblockspecial(p, false);
686 runtime_MProf_Free(p, size);
690 runtime_lock(&finlock);
691 if(finq == nil || finq->cnt == finq->cap) {
693 finc = runtime_SysAlloc(PageSize);
694 finc->cap = (PageSize - sizeof(FinBlock)) / sizeof(Finalizer) + 1;
695 finc->alllink = allfin;
703 f = &finq->fin[finq->cnt];
708 runtime_unlock(&finlock);
712 // Sweep frees or collects finalizers for blocks not marked in the mark phase.
713 // It clears the mark bits in preparation for the next GC round.
724 arena_start = runtime_mheap.arena_start;
730 if(!runtime_casp(&work.spans, s, s->allnext))
733 if(s->state != MSpanInUse)
736 p = (byte*)(s->start << PageShift);
739 size = s->npages<<PageShift;
742 // Chunk full of small blocks.
743 size = runtime_class_to_size[cl];
744 npages = runtime_class_to_allocnpages[cl];
745 n = (npages << PageShift) / size;
748 // Sweep through n objects of given size starting at p.
749 // This thread owns the span now, so it can manipulate
750 // the block bitmap without atomic operations.
751 for(; n > 0; n--, p += size) {
752 uintptr off, *bitp, shift, bits;
754 off = (uintptr*)p - (uintptr*)arena_start;
755 bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
756 shift = off % wordsPerBitmapWord;
759 if((bits & bitAllocated) == 0)
762 if((bits & bitMarked) != 0) {
764 if(!(bits & bitSpecial))
765 runtime_printf("found spurious mark on %p\n", p);
766 *bitp &= ~(bitSpecial<<shift);
768 *bitp &= ~(bitMarked<<shift);
772 // Special means it has a finalizer or is being profiled.
773 // In DebugMark mode, the bit has been coopted so
774 // we have to assume all blocks are special.
775 if(DebugMark || (bits & bitSpecial) != 0) {
776 if(handlespecial(p, size))
780 // Mark freed; restore block boundary bit.
781 *bitp = (*bitp & ~(bitMask<<shift)) | (bitBlockBoundary<<shift);
784 if(s->sizeclass == 0) {
786 runtime_unmarkspan(p, 1<<PageShift);
787 *(uintptr*)p = 1; // needs zeroing
788 runtime_MHeap_Free(&runtime_mheap, s, 1);
790 // Free small object.
791 if(size > sizeof(uintptr))
792 ((uintptr*)p)[1] = 1; // mark as "needs to be zeroed"
793 c->local_by_size[s->sizeclass].nfree++;
794 runtime_MCache_Free(c, p, s->sizeclass, size);
796 c->local_alloc -= size;
802 static pthread_mutex_t gcsema = PTHREAD_MUTEX_INITIALIZER;
805 runtime_gchelper(void)
807 // Wait until main proc is ready for mark help.
808 runtime_lock(&work.markgate);
809 runtime_unlock(&work.markgate);
812 // Wait until main proc is ready for sweep help.
813 runtime_lock(&work.sweepgate);
814 runtime_unlock(&work.sweepgate);
817 if(runtime_xadd(&work.ndone, +1) == work.nproc-1)
818 runtime_notewakeup(&work.alldone);
821 // Initialized from $GOGC. GOGC=off means no gc.
823 // Next gc is after we've allocated an extra amount of
824 // memory proportional to the amount already in use.
825 // If gcpercent=100 and we're using 4M, we'll gc again
826 // when we get to 8M. This keeps the gc cost in linear
827 // proportion to the allocation cost. Adjusting gcpercent
828 // just changes the linear constant (and also the amount of
829 // extra memory used).
830 static int32 gcpercent = -2;
833 runtime_gc(int32 force __attribute__ ((unused)))
835 int64 t0, t1, t2, t3;
836 uint64 heap0, heap1, obj0, obj1;
840 // The gc is turned off (via enablegc) until
841 // the bootstrap has completed.
842 // Also, malloc gets called in the guts
843 // of a number of libraries that might be
844 // holding locks. To avoid priority inversion
845 // problems, don't bother trying to run gc
846 // while holding a lock. The next mallocgc
847 // without a lock will do the gc instead.
848 if(!mstats.enablegc || m->locks > 0 /* || runtime_panicking */)
851 if(gcpercent == -2) { // first time through
852 p = runtime_getenv("GOGC");
853 if(p == nil || p[0] == '\0')
855 else if(runtime_strcmp((const char*)p, "off") == 0)
858 gcpercent = runtime_atoi(p);
860 p = runtime_getenv("GOGCTRACE");
862 gctrace = runtime_atoi(p);
867 pthread_mutex_lock(&finqlock);
868 pthread_mutex_lock(&gcsema);
869 if(!force && mstats.heap_alloc < mstats.next_gc) {
870 pthread_mutex_unlock(&gcsema);
871 pthread_mutex_unlock(&finqlock);
875 t0 = runtime_nanotime();
882 runtime_stoptheworld();
885 heap0 = mstats.heap_alloc;
886 obj0 = mstats.nmalloc - mstats.nfree;
888 runtime_lock(&work.markgate);
889 runtime_lock(&work.sweepgate);
894 if(runtime_gomaxprocs > 1 && runtime_ncpu > 1) {
895 runtime_noteclear(&work.alldone);
896 work.nproc += runtime_helpgc(&extra);
902 runtime_unlock(&work.markgate); // let the helpers in
905 mark(debug_scanblock);
906 t1 = runtime_nanotime();
908 work.spans = runtime_mheap.allspans;
909 runtime_unlock(&work.sweepgate); // let the helpers in
912 runtime_notesleep(&work.alldone);
913 t2 = runtime_nanotime();
918 mstats.next_gc = mstats.heap_alloc+mstats.heap_alloc*gcpercent/100;
921 m->locks++; // disable gc during the mallocs in newproc
923 heap1 = mstats.heap_alloc;
924 obj1 = mstats.nmalloc - mstats.nfree;
926 t3 = runtime_nanotime();
927 mstats.pause_ns[mstats.numgc%nelem(mstats.pause_ns)] = t3 - t0;
928 mstats.pause_total_ns += t3 - t0;
931 runtime_printf("pause %llu\n", (unsigned long long)t3-t0);
934 runtime_printf("gc%d: %llu+%llu+%llu ms %llu -> %llu MB %llu -> %llu (%llu-%llu) objects %llu pointer lookups (%llu size, %llu addr) %llu handoff\n",
935 mstats.numgc, (unsigned long long)(t1-t0)/1000000, (unsigned long long)(t2-t1)/1000000, (unsigned long long)(t3-t2)/1000000,
936 (unsigned long long)heap0>>20, (unsigned long long)heap1>>20, (unsigned long long)obj0, (unsigned long long)obj1,
937 (unsigned long long)mstats.nmalloc, (unsigned long long)mstats.nfree,
938 (unsigned long long)nlookup, (unsigned long long)nsizelookup, (unsigned long long)naddrlookup, (unsigned long long) nhandoff);
941 pthread_mutex_unlock(&gcsema);
943 // If we could have used another helper proc, start one now,
944 // in the hope that it will be available next time.
945 // It would have been even better to start it before the collection,
946 // but doing so requires allocating memory, so it's tricky to
947 // coordinate. This lazy approach works out in practice:
948 // we don't mind if the first couple gc rounds don't have quite
949 // the maximum number of procs.
950 runtime_starttheworld(extra);
952 // finqlock is still held.
954 // kick off or wake up goroutine to run queued finalizers
956 __go_go(runfinq, nil);
961 pthread_cond_signal(&finqcond);
965 pthread_mutex_unlock(&finqlock);
967 if(gctrace > 1 && !force)
971 void runtime_UpdateMemStats(void)
972 __asm__("libgo_runtime.runtime.UpdateMemStats");
975 runtime_UpdateMemStats(void)
977 // Have to acquire gcsema to stop the world,
978 // because stoptheworld can only be used by
979 // one goroutine at a time, and there might be
980 // a pending garbage collection already calling it.
981 pthread_mutex_lock(&gcsema);
983 runtime_stoptheworld();
986 pthread_mutex_unlock(&gcsema);
987 runtime_starttheworld(false);
1000 pthread_mutex_lock(&finqlock);
1005 pthread_cond_wait(&finqcond, &finqlock);
1006 pthread_mutex_unlock(&finqlock);
1009 pthread_mutex_unlock(&finqlock);
1010 for(; fb; fb=next) {
1012 for(i=0; i<(uint32)fb->cnt; i++) {
1016 params[0] = &f->arg;
1017 runtime_setblockspecial(f->arg, false);
1018 reflect_call(f->ft, (void*)f->fn, 0, 0, params, nil);
1026 runtime_gc(1); // trigger another gc to clean up the finalized objects, if possible
1030 #define runtime_singleproc 0
1032 // mark the block at v of size n as allocated.
1033 // If noptr is true, mark it as having no pointers.
1035 runtime_markallocated(void *v, uintptr n, bool noptr)
1037 uintptr *b, obits, bits, off, shift;
1040 // runtime_printf("markallocated %p+%p\n", v, n);
1042 if((byte*)v+n > (byte*)runtime_mheap.arena_used || (byte*)v < runtime_mheap.arena_start)
1043 runtime_throw("markallocated: bad pointer");
1045 off = (uintptr*)v - (uintptr*)runtime_mheap.arena_start; // word offset
1046 b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
1047 shift = off % wordsPerBitmapWord;
1051 bits = (obits & ~(bitMask<<shift)) | (bitAllocated<<shift);
1053 bits |= bitNoPointers<<shift;
1054 if(runtime_singleproc) {
1058 // more than one goroutine is potentially running: use atomic op
1059 if(runtime_casp((void**)b, (void*)obits, (void*)bits))
1065 // mark the block at v of size n as freed.
1067 runtime_markfreed(void *v, uintptr n)
1069 uintptr *b, obits, bits, off, shift;
1072 // runtime_printf("markallocated %p+%p\n", v, n);
1074 if((byte*)v+n > (byte*)runtime_mheap.arena_used || (byte*)v < runtime_mheap.arena_start)
1075 runtime_throw("markallocated: bad pointer");
1077 off = (uintptr*)v - (uintptr*)runtime_mheap.arena_start; // word offset
1078 b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
1079 shift = off % wordsPerBitmapWord;
1083 bits = (obits & ~(bitMask<<shift)) | (bitBlockBoundary<<shift);
1084 if(runtime_singleproc) {
1088 // more than one goroutine is potentially running: use atomic op
1089 if(runtime_casp((void**)b, (void*)obits, (void*)bits))
1095 // check that the block at v of size n is marked freed.
1097 runtime_checkfreed(void *v, uintptr n)
1099 uintptr *b, bits, off, shift;
1101 if(!runtime_checking)
1104 if((byte*)v+n > (byte*)runtime_mheap.arena_used || (byte*)v < runtime_mheap.arena_start)
1105 return; // not allocated, so okay
1107 off = (uintptr*)v - (uintptr*)runtime_mheap.arena_start; // word offset
1108 b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
1109 shift = off % wordsPerBitmapWord;
1112 if((bits & bitAllocated) != 0) {
1113 runtime_printf("checkfreed %p+%p: off=%p have=%p\n",
1114 v, (void*)n, (void*)off, (void*)(bits & bitMask));
1115 runtime_throw("checkfreed: not freed");
1119 // mark the span of memory at v as having n blocks of the given size.
1120 // if leftover is true, there is left over space at the end of the span.
1122 runtime_markspan(void *v, uintptr size, uintptr n, bool leftover)
1124 uintptr *b, off, shift;
1127 if((byte*)v+size*n > (byte*)runtime_mheap.arena_used || (byte*)v < runtime_mheap.arena_start)
1128 runtime_throw("markspan: bad pointer");
1131 if(leftover) // mark a boundary just past end of last block too
1133 for(; n-- > 0; p += size) {
1134 // Okay to use non-atomic ops here, because we control
1135 // the entire span, and each bitmap word has bits for only
1136 // one span, so no other goroutines are changing these
1138 off = (uintptr*)p - (uintptr*)runtime_mheap.arena_start; // word offset
1139 b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
1140 shift = off % wordsPerBitmapWord;
1141 *b = (*b & ~(bitMask<<shift)) | (bitBlockBoundary<<shift);
1145 // unmark the span of memory at v of length n bytes.
1147 runtime_unmarkspan(void *v, uintptr n)
1149 uintptr *p, *b, off;
1151 if((byte*)v+n > (byte*)runtime_mheap.arena_used || (byte*)v < runtime_mheap.arena_start)
1152 runtime_throw("markspan: bad pointer");
1155 off = p - (uintptr*)runtime_mheap.arena_start; // word offset
1156 if(off % wordsPerBitmapWord != 0)
1157 runtime_throw("markspan: unaligned pointer");
1158 b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
1160 if(n%wordsPerBitmapWord != 0)
1161 runtime_throw("unmarkspan: unaligned length");
1162 // Okay to use non-atomic ops here, because we control
1163 // the entire span, and each bitmap word has bits for only
1164 // one span, so no other goroutines are changing these
1166 n /= wordsPerBitmapWord;
1172 runtime_blockspecial(void *v)
1174 uintptr *b, off, shift;
1179 off = (uintptr*)v - (uintptr*)runtime_mheap.arena_start;
1180 b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
1181 shift = off % wordsPerBitmapWord;
1183 return (*b & (bitSpecial<<shift)) != 0;
1187 runtime_setblockspecial(void *v, bool s)
1189 uintptr *b, off, shift, bits, obits;
1194 off = (uintptr*)v - (uintptr*)runtime_mheap.arena_start;
1195 b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
1196 shift = off % wordsPerBitmapWord;
1201 bits = obits | (bitSpecial<<shift);
1203 bits = obits & ~(bitSpecial<<shift);
1204 if(runtime_singleproc) {
1208 // more than one goroutine is potentially running: use atomic op
1209 if(runtime_casp((void**)b, (void*)obits, (void*)bits))
1216 runtime_MHeap_MapBits(MHeap *h)
1218 // Caller has added extra mappings to the arena.
1219 // Add extra mappings of bitmap words as needed.
1220 // We allocate extra bitmap pieces in chunks of bitmapChunk.
1226 n = (h->arena_used - h->arena_start) / wordsPerBitmapWord;
1227 n = (n+bitmapChunk-1) & ~(bitmapChunk-1);
1228 if(h->bitmap_mapped >= n)
1231 runtime_SysMap(h->arena_start - n, n - h->bitmap_mapped);
1232 h->bitmap_mapped = n;
1238 mstats.enablegc = 1;