OSDN Git Service

Update Go library to r60.
[pf3gnuchains/gcc-fork.git] / libgo / runtime / mgc0.c
1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 // Garbage collector.
6
7 #include "runtime.h"
8 #include "malloc.h"
9
10 enum {
11         Debug = 0,
12         UseCas = 1,
13         PtrSize = sizeof(void*),
14         
15         // Four bits per word (see #defines below).
16         wordsPerBitmapWord = sizeof(void*)*8/4,
17         bitShift = sizeof(void*)*8/4,
18 };
19
20 // Bits in per-word bitmap.
21 // #defines because enum might not be able to hold the values.
22 //
23 // Each word in the bitmap describes wordsPerBitmapWord words
24 // of heap memory.  There are 4 bitmap bits dedicated to each heap word,
25 // so on a 64-bit system there is one bitmap word per 16 heap words.
26 // The bits in the word are packed together by type first, then by
27 // heap location, so each 64-bit bitmap word consists of, from top to bottom,
28 // the 16 bitSpecial bits for the corresponding heap words, then the 16 bitMarked bits,
29 // then the 16 bitNoPointers/bitBlockBoundary bits, then the 16 bitAllocated bits.
30 // This layout makes it easier to iterate over the bits of a given type.
31 //
32 // The bitmap starts at mheap.arena_start and extends *backward* from
33 // there.  On a 64-bit system the off'th word in the arena is tracked by
34 // the off/16+1'th word before mheap.arena_start.  (On a 32-bit system,
35 // the only difference is that the divisor is 8.)
36 //
37 // To pull out the bits corresponding to a given pointer p, we use:
38 //
39 //      off = p - (uintptr*)mheap.arena_start;  // word offset
40 //      b = (uintptr*)mheap.arena_start - off/wordsPerBitmapWord - 1;
41 //      shift = off % wordsPerBitmapWord
42 //      bits = *b >> shift;
43 //      /* then test bits & bitAllocated, bits & bitMarked, etc. */
44 //
45 #define bitAllocated            ((uintptr)1<<(bitShift*0))
46 #define bitNoPointers           ((uintptr)1<<(bitShift*1))      /* when bitAllocated is set */
47 #define bitMarked               ((uintptr)1<<(bitShift*2))      /* when bitAllocated is set */
48 #define bitSpecial              ((uintptr)1<<(bitShift*3))      /* when bitAllocated is set - has finalizer or being profiled */
49 #define bitBlockBoundary        ((uintptr)1<<(bitShift*1))      /* when bitAllocated is NOT set */
50
51 #define bitMask (bitBlockBoundary | bitAllocated | bitMarked | bitSpecial)
52
53 static uint64 nlookup;
54 static uint64 nsizelookup;
55 static uint64 naddrlookup;
56 static int32 gctrace;
57
58 typedef struct Workbuf Workbuf;
59 struct Workbuf
60 {
61         Workbuf *next;
62         uintptr nw;
63         byte *w[2048-2];
64 };
65
66 static bool finstarted;
67 static pthread_mutex_t finqlock = PTHREAD_MUTEX_INITIALIZER;
68 static pthread_cond_t finqcond = PTHREAD_COND_INITIALIZER;
69 static Finalizer *finq;
70 static int32 fingwait;
71
72 static void runfinq(void*);
73 static Workbuf* getempty(Workbuf*);
74 static Workbuf* getfull(Workbuf*);
75
76 // scanblock scans a block of n bytes starting at pointer b for references
77 // to other objects, scanning any it finds recursively until there are no
78 // unscanned objects left.  Instead of using an explicit recursion, it keeps
79 // a work list in the Workbuf* structures and loops in the main function
80 // body.  Keeping an explicit work list is easier on the stack allocator and
81 // more efficient.
82 static void
83 scanblock(byte *b, int64 n)
84 {
85         byte *obj, *arena_start, *p;
86         void **vp;
87         uintptr size, *bitp, bits, shift, i, j, x, xbits, off;
88         MSpan *s;
89         PageID k;
90         void **bw, **w, **ew;
91         Workbuf *wbuf;
92
93         if((int64)(uintptr)n != n || n < 0) {
94                 // runtime_printf("scanblock %p %lld\n", b, (long long)n);
95                 runtime_throw("scanblock");
96         }
97
98         // Memory arena parameters.
99         arena_start = runtime_mheap.arena_start;
100         
101         wbuf = nil;  // current work buffer
102         ew = nil;  // end of work buffer
103         bw = nil;  // beginning of work buffer
104         w = nil;  // current pointer into work buffer
105
106         // Align b to a word boundary.
107         off = (uintptr)b & (PtrSize-1);
108         if(off != 0) {
109                 b += PtrSize - off;
110                 n -= PtrSize - off;
111         }
112
113         for(;;) {
114                 // Each iteration scans the block b of length n, queueing pointers in
115                 // the work buffer.
116                 if(Debug > 1)
117                         runtime_printf("scanblock %p %lld\n", b, (long long) n);
118
119                 vp = (void**)b;
120                 n /= PtrSize;
121                 for(i=0; i<(uintptr)n; i++) {
122                         obj = (byte*)vp[i];
123                         
124                         // Words outside the arena cannot be pointers.
125                         if((byte*)obj < arena_start || (byte*)obj >= runtime_mheap.arena_used)
126                                 continue;
127                         
128                         // obj may be a pointer to a live object.
129                         // Try to find the beginning of the object.
130                         
131                         // Round down to word boundary.
132                         obj = (void*)((uintptr)obj & ~((uintptr)PtrSize-1));
133
134                         // Find bits for this word.
135                         off = (uintptr*)obj - (uintptr*)arena_start;
136                         bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
137                         shift = off % wordsPerBitmapWord;
138                         xbits = *bitp;
139                         bits = xbits >> shift;
140
141                         // Pointing at the beginning of a block?
142                         if((bits & (bitAllocated|bitBlockBoundary)) != 0)
143                                 goto found;
144
145                         // Pointing just past the beginning?
146                         // Scan backward a little to find a block boundary.
147                         for(j=shift; j-->0; ) {
148                                 if(((xbits>>j) & (bitAllocated|bitBlockBoundary)) != 0) {
149                                         obj = (byte*)obj - (shift-j)*PtrSize;
150                                         shift = j;
151                                         bits = xbits>>shift;
152                                         goto found;
153                                 }
154                         }
155
156                         // Otherwise consult span table to find beginning.
157                         // (Manually inlined copy of MHeap_LookupMaybe.)
158                         nlookup++;
159                         naddrlookup++;
160                         k = (uintptr)obj>>PageShift;
161                         x = k;
162                         if(sizeof(void*) == 8)
163                                 x -= (uintptr)arena_start>>PageShift;
164                         s = runtime_mheap.map[x];
165                         if(s == nil || k < s->start || k - s->start >= s->npages || s->state != MSpanInUse)
166                                 continue;
167                         p =  (byte*)((uintptr)s->start<<PageShift);
168                         if(s->sizeclass == 0) {
169                                 obj = p;
170                         } else {
171                                 if((byte*)obj >= (byte*)s->limit)
172                                         continue;
173                                 size = runtime_class_to_size[s->sizeclass];
174                                 int32 i = ((byte*)obj - p)/size;
175                                 obj = p+i*size;
176                         }
177
178                         // Now that we know the object header, reload bits.
179                         off = (uintptr*)obj - (uintptr*)arena_start;
180                         bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
181                         shift = off % wordsPerBitmapWord;
182                         xbits = *bitp;
183                         bits = xbits >> shift;
184
185                 found:
186                         // Now we have bits, bitp, and shift correct for
187                         // obj pointing at the base of the object.
188                         // If not allocated or already marked, done.
189                         if((bits & bitAllocated) == 0 || (bits & bitMarked) != 0)
190                                 continue;
191                         *bitp |= bitMarked<<shift;
192
193                         // If object has no pointers, don't need to scan further.
194                         if((bits & bitNoPointers) != 0)
195                                 continue;
196
197                         // If buffer is full, get a new one.
198                         if(w >= ew) {
199                                 wbuf = getempty(wbuf);
200                                 bw = (void**)wbuf->w;
201                                 w = bw;
202                                 ew = bw + nelem(wbuf->w);
203                         }
204                         *w++ = obj;
205                 }
206                 
207                 // Done scanning [b, b+n).  Prepare for the next iteration of
208                 // the loop by setting b and n to the parameters for the next block.
209
210                 // Fetch b from the work buffers.
211                 if(w <= bw) {
212                         // Emptied our buffer: refill.
213                         wbuf = getfull(wbuf);
214                         if(wbuf == nil)
215                                 break;
216                         bw = (void**)wbuf->w;
217                         ew = (void**)(wbuf->w + nelem(wbuf->w));
218                         w = bw+wbuf->nw;
219                 }
220                 b = *--w;
221         
222                 // Figure out n = size of b.  Start by loading bits for b.
223                 off = (uintptr*)b - (uintptr*)arena_start;
224                 bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
225                 shift = off % wordsPerBitmapWord;
226                 xbits = *bitp;
227                 bits = xbits >> shift;
228                 
229                 // Might be small; look for nearby block boundary.
230                 // A block boundary is marked by either bitBlockBoundary
231                 // or bitAllocated being set (see notes near their definition).
232                 enum {
233                         boundary = bitBlockBoundary|bitAllocated
234                 };
235                 // Look for a block boundary both after and before b
236                 // in the same bitmap word.
237                 //
238                 // A block boundary j words after b is indicated by
239                 //      bits>>j & boundary
240                 // assuming shift+j < bitShift.  (If shift+j >= bitShift then
241                 // we'll be bleeding other bit types like bitMarked into our test.)
242                 // Instead of inserting the conditional shift+j < bitShift into the loop,
243                 // we can let j range from 1 to bitShift as long as we first
244                 // apply a mask to keep only the bits corresponding
245                 // to shift+j < bitShift aka j < bitShift-shift.
246                 bits &= (boundary<<(bitShift-shift)) - boundary;
247                 
248                 // A block boundary j words before b is indicated by
249                 //      xbits>>(shift-j) & boundary
250                 // (assuming shift >= j).  There is no cleverness here
251                 // avoid the test, because when j gets too large the shift
252                 // turns negative, which is undefined in C.             
253
254                 for(j=1; j<bitShift; j++) {
255                         if(((bits>>j)&boundary) != 0 || (shift>=j && ((xbits>>(shift-j))&boundary) != 0)) {
256                                 n = j*PtrSize;
257                                 goto scan;
258                         }
259                 }
260                 
261                 // Fall back to asking span about size class.
262                 // (Manually inlined copy of MHeap_Lookup.)
263                 nlookup++;
264                 nsizelookup++;
265                 x = (uintptr)b>>PageShift;
266                 if(sizeof(void*) == 8)
267                         x -= (uintptr)arena_start>>PageShift;
268                 s = runtime_mheap.map[x];
269                 if(s->sizeclass == 0)
270                         n = s->npages<<PageShift;
271                 else
272                         n = runtime_class_to_size[s->sizeclass];
273         scan:;
274         }
275 }
276
277 static struct {
278         Workbuf *full;
279         Workbuf *empty;
280         byte    *chunk;
281         uintptr nchunk;
282 } work;
283
284 // Get an empty work buffer off the work.empty list,
285 // allocating new buffers as needed.
286 static Workbuf*
287 getempty(Workbuf *b)
288 {
289         if(b != nil) {
290                 b->nw = nelem(b->w);
291                 b->next = work.full;
292                 work.full = b;
293         }
294         b = work.empty;
295         if(b != nil) {
296                 work.empty = b->next;
297                 return b;
298         }
299         
300         if(work.nchunk < sizeof *b) {
301                 work.nchunk = 1<<20;
302                 work.chunk = runtime_SysAlloc(work.nchunk);
303         }
304         b = (Workbuf*)work.chunk;
305         work.chunk += sizeof *b;
306         work.nchunk -= sizeof *b;
307         return b;
308 }
309
310 // Get a full work buffer off the work.full list, or return nil.
311 static Workbuf*
312 getfull(Workbuf *b)
313 {
314         if(b != nil) {
315                 b->nw = 0;
316                 b->next = work.empty;
317                 work.empty = b;
318         }
319         b = work.full;
320         if(b != nil)
321                 work.full = b->next;
322         return b;
323 }
324
325 // Scanstack calls scanblock on each of gp's stack segments.
326 static void
327 markfin(void *v)
328 {
329         uintptr size;
330
331         size = 0;
332         if(!runtime_mlookup(v, (byte**)&v, &size, nil) || !runtime_blockspecial(v))
333                 runtime_throw("mark - finalizer inconsistency");
334
335         // do not mark the finalizer block itself.  just mark the things it points at.
336         scanblock(v, size);
337 }
338
339 struct root_list {
340         struct root_list *next;
341         struct root {
342                 void *decl;
343                 size_t size;
344         } roots[];
345 };
346
347 static struct root_list* roots;
348
349 void
350 __go_register_gc_roots (struct root_list* r)
351 {
352         // FIXME: This needs locking if multiple goroutines can call
353         // dlopen simultaneously.
354         r->next = roots;
355         roots = r;
356 }
357
358 // Mark 
359 static void
360 mark(void)
361 {
362         struct root_list *pl;
363
364         for(pl = roots; pl != nil; pl = pl->next) {
365                 struct root* pr = &pl->roots[0];
366                 while(1) {
367                         void *decl = pr->decl;
368                         if(decl == nil)
369                                 break;
370                         scanblock(decl, pr->size);
371                         pr++;
372                 }
373         }
374
375         scanblock((byte*)&m0, sizeof m0);
376         scanblock((byte*)&finq, sizeof finq);
377         runtime_MProf_Mark(scanblock);
378
379         // mark stacks
380         __go_scanstacks(scanblock);
381
382         // mark things pointed at by objects with finalizers
383         runtime_walkfintab(markfin, scanblock);
384 }
385
386 // Sweep frees or calls finalizers for blocks not marked in the mark phase.
387 // It clears the mark bits in preparation for the next GC round.
388 static void
389 sweep(void)
390 {
391         MSpan *s;
392         int32 cl, n, npages;
393         uintptr size;
394         byte *p;
395         MCache *c;
396         Finalizer *f;
397
398         for(s = runtime_mheap.allspans; s != nil; s = s->allnext) {
399                 if(s->state != MSpanInUse)
400                         continue;
401
402                 p = (byte*)(s->start << PageShift);
403                 cl = s->sizeclass;
404                 if(cl == 0) {
405                         size = s->npages<<PageShift;
406                         n = 1;
407                 } else {
408                         // Chunk full of small blocks.
409                         size = runtime_class_to_size[cl];
410                         npages = runtime_class_to_allocnpages[cl];
411                         n = (npages << PageShift) / size;
412                 }
413         
414                 // sweep through n objects of given size starting at p.
415                 for(; n > 0; n--, p += size) {
416                         uintptr off, *bitp, shift, bits;
417
418                         off = (uintptr*)p - (uintptr*)runtime_mheap.arena_start;
419                         bitp = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
420                         shift = off % wordsPerBitmapWord;
421                         bits = *bitp>>shift;
422
423                         if((bits & bitAllocated) == 0)
424                                 continue;
425
426                         if((bits & bitMarked) != 0) {
427                                 *bitp &= ~(bitMarked<<shift);
428                                 continue;
429                         }
430
431                         if((bits & bitSpecial) != 0) {
432                                 // Special means it has a finalizer or is being profiled.
433                                 f = runtime_getfinalizer(p, 1);
434                                 if(f != nil) {
435                                         f->arg = p;
436                                         f->next = finq;
437                                         finq = f;
438                                         continue;
439                                 }
440                                 runtime_MProf_Free(p, size);
441                         }
442
443                         // Mark freed; restore block boundary bit.
444                         *bitp = (*bitp & ~(bitMask<<shift)) | (bitBlockBoundary<<shift);
445
446                         c = m->mcache;
447                         if(s->sizeclass == 0) {
448                                 // Free large span.
449                                 runtime_unmarkspan(p, 1<<PageShift);
450                                 *(uintptr*)p = 1;       // needs zeroing
451                                 runtime_MHeap_Free(&runtime_mheap, s, 1);
452                         } else {
453                                 // Free small object.
454                                 if(size > sizeof(uintptr))
455                                         ((uintptr*)p)[1] = 1;   // mark as "needs to be zeroed"
456                                 c->local_by_size[s->sizeclass].nfree++;
457                                 runtime_MCache_Free(c, p, s->sizeclass, size);
458                         }
459                         c->local_alloc -= size;
460                         c->local_nfree++;
461                 }
462         }
463 }
464
465 static pthread_mutex_t gcsema = PTHREAD_MUTEX_INITIALIZER;
466
467 // Initialized from $GOGC.  GOGC=off means no gc.
468 //
469 // Next gc is after we've allocated an extra amount of
470 // memory proportional to the amount already in use.
471 // If gcpercent=100 and we're using 4M, we'll gc again
472 // when we get to 8M.  This keeps the gc cost in linear
473 // proportion to the allocation cost.  Adjusting gcpercent
474 // just changes the linear constant (and also the amount of
475 // extra memory used).
476 static int32 gcpercent = -2;
477
478 void
479 runtime_gc(int32 force __attribute__ ((unused)))
480 {
481         int64 t0, t1, t2, t3;
482         uint64 heap0, heap1, obj0, obj1;
483         char *p;
484         Finalizer *fp;
485
486         // The gc is turned off (via enablegc) until
487         // the bootstrap has completed.
488         // Also, malloc gets called in the guts
489         // of a number of libraries that might be
490         // holding locks.  To avoid priority inversion
491         // problems, don't bother trying to run gc
492         // while holding a lock.  The next mallocgc
493         // without a lock will do the gc instead.
494         if(!mstats.enablegc || m->locks > 0 /* || runtime_panicking */)
495                 return;
496
497         if(gcpercent == -2) {   // first time through
498                 p = runtime_getenv("GOGC");
499                 if(p == nil || p[0] == '\0')
500                         gcpercent = 100;
501                 else if(runtime_strcmp(p, "off") == 0)
502                         gcpercent = -1;
503                 else
504                         gcpercent = runtime_atoi(p);
505                 
506                 p = runtime_getenv("GOGCTRACE");
507                 if(p != nil)
508                         gctrace = runtime_atoi(p);
509         }
510         if(gcpercent < 0)
511                 return;
512
513         pthread_mutex_lock(&finqlock);
514         pthread_mutex_lock(&gcsema);
515         if(!force && mstats.heap_alloc < mstats.next_gc) {
516                 pthread_mutex_unlock(&gcsema);
517                 pthread_mutex_unlock(&finqlock);
518                 return;
519         }
520
521         t0 = runtime_nanotime();
522         nlookup = 0;
523         nsizelookup = 0;
524         naddrlookup = 0;
525
526         m->gcing = 1;
527         runtime_stoptheworld();
528         if(runtime_mheap.Lock.key != 0)
529                 runtime_throw("runtime_mheap locked during gc");
530
531         __go_cachestats();
532         heap0 = mstats.heap_alloc;
533         obj0 = mstats.nmalloc - mstats.nfree;
534
535         mark();
536         t1 = runtime_nanotime();
537         sweep();
538         t2 = runtime_nanotime();
539         __go_stealcache();
540         __go_cachestats();
541
542         mstats.next_gc = mstats.heap_alloc+mstats.heap_alloc*gcpercent/100;
543         m->gcing = 0;
544
545         m->locks++;     // disable gc during the mallocs in newproc
546
547         heap1 = mstats.heap_alloc;
548         obj1 = mstats.nmalloc - mstats.nfree;
549
550         t3 = runtime_nanotime();
551         mstats.pause_ns[mstats.numgc%nelem(mstats.pause_ns)] = t3 - t0;
552         mstats.pause_total_ns += t3 - t0;
553         mstats.numgc++;
554         if(mstats.debuggc)
555                 runtime_printf("pause %llu\n", (unsigned long long)t3-t0);
556         
557         if(gctrace) {
558                 runtime_printf("gc%d: %llu+%llu+%llu ms %llu -> %llu MB %llu -> %llu (%llu-%llu) objects %llu pointer lookups (%llu size, %llu addr)\n",
559                         mstats.numgc, (unsigned long long)(t1-t0)/1000000, (unsigned long long)(t2-t1)/1000000, (unsigned long long)(t3-t2)/1000000,
560                         (unsigned long long)heap0>>20, (unsigned long long)heap1>>20, (unsigned long long)obj0, (unsigned long long)obj1,
561                         (unsigned long long)mstats.nmalloc, (unsigned long long)mstats.nfree,
562                         (unsigned long long)nlookup, (unsigned long long)nsizelookup, (unsigned long long)naddrlookup);
563         }
564
565         pthread_mutex_unlock(&gcsema);
566         runtime_starttheworld();
567
568         // finqlock is still held.
569         fp = finq;
570         if(fp != nil) {
571                 // kick off or wake up goroutine to run queued finalizers
572                 if(!finstarted) {
573                         __go_go(runfinq, nil);
574                         finstarted = 1;
575                 }
576                 else if(fingwait) {
577                         fingwait = 0;
578                         pthread_cond_signal(&finqcond);
579                 }
580         }
581         m->locks--;
582         pthread_mutex_unlock(&finqlock);
583
584         if(gctrace > 1 && !force)
585                 runtime_gc(1);
586 }
587
588 void runtime_UpdateMemStats(void)
589   __asm__("libgo_runtime.runtime.UpdateMemStats");
590
591 void
592 runtime_UpdateMemStats(void)
593 {
594         // Have to acquire gcsema to stop the world,
595         // because stoptheworld can only be used by
596         // one goroutine at a time, and there might be
597         // a pending garbage collection already calling it.
598         pthread_mutex_lock(&gcsema);
599         m->gcing = 1;
600         runtime_stoptheworld();
601         __go_cachestats();
602         m->gcing = 0;
603         pthread_mutex_unlock(&gcsema);
604         runtime_starttheworld();
605 }
606
607 static void
608 runfinq(void* dummy)
609 {
610         Finalizer *f, *next;
611
612         USED(dummy);
613
614         for(;;) {
615                 pthread_mutex_lock(&finqlock);
616                 f = finq;
617                 finq = nil;
618                 if(f == nil) {
619                         fingwait = 1;
620                         pthread_cond_wait(&finqcond, &finqlock);
621                         pthread_mutex_unlock(&finqlock);
622                         continue;
623                 }
624                 pthread_mutex_unlock(&finqlock);
625                 for(; f; f=next) {
626                         void *params[1];
627
628                         next = f->next;
629                         params[0] = &f->arg;
630                         reflect_call(f->ft, (void*)f->fn, 0, 0, params, nil);
631                         f->fn = nil;
632                         f->arg = nil;
633                         f->next = nil;
634                         runtime_free(f);
635                 }
636                 runtime_gc(1);  // trigger another gc to clean up the finalized objects, if possible
637         }
638 }
639
640 #define runtime_singleproc 0
641
642 // mark the block at v of size n as allocated.
643 // If noptr is true, mark it as having no pointers.
644 void
645 runtime_markallocated(void *v, uintptr n, bool noptr)
646 {
647         uintptr *b, obits, bits, off, shift;
648
649         // if(0)
650                 // runtime_printf("markallocated %p+%p\n", v, n);
651
652         if((byte*)v+n > (byte*)runtime_mheap.arena_used || (byte*)v < runtime_mheap.arena_start)
653                 runtime_throw("markallocated: bad pointer");
654
655         off = (uintptr*)v - (uintptr*)runtime_mheap.arena_start;  // word offset
656         b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
657         shift = off % wordsPerBitmapWord;
658
659         for(;;) {
660                 obits = *b;
661                 bits = (obits & ~(bitMask<<shift)) | (bitAllocated<<shift);
662                 if(noptr)
663                         bits |= bitNoPointers<<shift;
664                 if(runtime_singleproc) {
665                         *b = bits;
666                         break;
667                 } else {
668                         // more than one goroutine is potentially running: use atomic op
669                         if(runtime_casp((void**)b, (void*)obits, (void*)bits))
670                                 break;
671                 }
672         }
673 }
674
675 // mark the block at v of size n as freed.
676 void
677 runtime_markfreed(void *v, uintptr n)
678 {
679         uintptr *b, obits, bits, off, shift;
680
681         // if(0)
682                 // runtime_printf("markallocated %p+%p\n", v, n);
683
684         if((byte*)v+n > (byte*)runtime_mheap.arena_used || (byte*)v < runtime_mheap.arena_start)
685                 runtime_throw("markallocated: bad pointer");
686
687         off = (uintptr*)v - (uintptr*)runtime_mheap.arena_start;  // word offset
688         b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
689         shift = off % wordsPerBitmapWord;
690
691         for(;;) {
692                 obits = *b;
693                 bits = (obits & ~(bitMask<<shift)) | (bitBlockBoundary<<shift);
694                 if(runtime_singleproc) {
695                         *b = bits;
696                         break;
697                 } else {
698                         // more than one goroutine is potentially running: use atomic op
699                         if(runtime_casp((void**)b, (void*)obits, (void*)bits))
700                                 break;
701                 }
702         }
703 }
704
705 // check that the block at v of size n is marked freed.
706 void
707 runtime_checkfreed(void *v, uintptr n)
708 {
709         uintptr *b, bits, off, shift;
710
711         if(!runtime_checking)
712                 return;
713
714         if((byte*)v+n > (byte*)runtime_mheap.arena_used || (byte*)v < runtime_mheap.arena_start)
715                 return; // not allocated, so okay
716
717         off = (uintptr*)v - (uintptr*)runtime_mheap.arena_start;  // word offset
718         b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
719         shift = off % wordsPerBitmapWord;
720
721         bits = *b>>shift;
722         if((bits & bitAllocated) != 0) {
723                 runtime_printf("checkfreed %p+%p: off=%p have=%p\n",
724                         v, (void*)n, (void*)off, (void*)(bits & bitMask));
725                 runtime_throw("checkfreed: not freed");
726         }
727 }
728
729 // mark the span of memory at v as having n blocks of the given size.
730 // if leftover is true, there is left over space at the end of the span.
731 void
732 runtime_markspan(void *v, uintptr size, uintptr n, bool leftover)
733 {
734         uintptr *b, off, shift;
735         byte *p;
736
737         if((byte*)v+size*n > (byte*)runtime_mheap.arena_used || (byte*)v < runtime_mheap.arena_start)
738                 runtime_throw("markspan: bad pointer");
739
740         p = v;
741         if(leftover)    // mark a boundary just past end of last block too
742                 n++;
743         for(; n-- > 0; p += size) {
744                 // Okay to use non-atomic ops here, because we control
745                 // the entire span, and each bitmap word has bits for only
746                 // one span, so no other goroutines are changing these
747                 // bitmap words.
748                 off = (uintptr*)p - (uintptr*)runtime_mheap.arena_start;  // word offset
749                 b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
750                 shift = off % wordsPerBitmapWord;
751                 *b = (*b & ~(bitMask<<shift)) | (bitBlockBoundary<<shift);
752         }
753 }
754
755 // unmark the span of memory at v of length n bytes.
756 void
757 runtime_unmarkspan(void *v, uintptr n)
758 {
759         uintptr *p, *b, off;
760
761         if((byte*)v+n > (byte*)runtime_mheap.arena_used || (byte*)v < runtime_mheap.arena_start)
762                 runtime_throw("markspan: bad pointer");
763
764         p = v;
765         off = p - (uintptr*)runtime_mheap.arena_start;  // word offset
766         if(off % wordsPerBitmapWord != 0)
767                 runtime_throw("markspan: unaligned pointer");
768         b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
769         n /= PtrSize;
770         if(n%wordsPerBitmapWord != 0)
771                 runtime_throw("unmarkspan: unaligned length");
772         // Okay to use non-atomic ops here, because we control
773         // the entire span, and each bitmap word has bits for only
774         // one span, so no other goroutines are changing these
775         // bitmap words.
776         n /= wordsPerBitmapWord;
777         while(n-- > 0)
778                 *b-- = 0;
779 }
780
781 bool
782 runtime_blockspecial(void *v)
783 {
784         uintptr *b, off, shift;
785
786         off = (uintptr*)v - (uintptr*)runtime_mheap.arena_start;
787         b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
788         shift = off % wordsPerBitmapWord;
789
790         return (*b & (bitSpecial<<shift)) != 0;
791 }
792
793 void
794 runtime_setblockspecial(void *v)
795 {
796         uintptr *b, off, shift, bits, obits;
797
798         off = (uintptr*)v - (uintptr*)runtime_mheap.arena_start;
799         b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
800         shift = off % wordsPerBitmapWord;
801
802         for(;;) {
803                 obits = *b;
804                 bits = obits | (bitSpecial<<shift);
805                 if(runtime_singleproc) {
806                         *b = bits;
807                         break;
808                 } else {
809                         // more than one goroutine is potentially running: use atomic op
810                         if(runtime_casp((void**)b, (void*)obits, (void*)bits))
811                                 break;
812                 }
813         }
814 }
815  
816 void
817 runtime_MHeap_MapBits(MHeap *h)
818 {
819         // Caller has added extra mappings to the arena.
820         // Add extra mappings of bitmap words as needed.
821         // We allocate extra bitmap pieces in chunks of bitmapChunk.
822         enum {
823                 bitmapChunk = 8192
824         };
825         uintptr n;
826         
827         n = (h->arena_used - h->arena_start) / wordsPerBitmapWord;
828         n = (n+bitmapChunk-1) & ~(bitmapChunk-1);
829         if(h->bitmap_mapped >= n)
830                 return;
831
832         runtime_SysMap(h->arena_start - n, n - h->bitmap_mapped);
833         h->bitmap_mapped = n;
834 }
835
836 void
837 __go_enable_gc()
838 {
839   mstats.enablegc = 1;
840 }