OSDN Git Service

PR go/50654
[pf3gnuchains/gcc-fork.git] / libgo / runtime / proc.c
1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 #include <limits.h>
6 #include <stdlib.h>
7 #include <pthread.h>
8 #include <unistd.h>
9
10 #include "config.h"
11 #include "runtime.h"
12 #include "arch.h"
13 #include "defs.h"
14 #include "malloc.h"
15 #include "go-defer.h"
16
17 #ifdef USING_SPLIT_STACK
18
19 /* FIXME: These are not declared anywhere.  */
20
21 extern void __splitstack_getcontext(void *context[10]);
22
23 extern void __splitstack_setcontext(void *context[10]);
24
25 extern void *__splitstack_makecontext(size_t, void *context[10], size_t *);
26
27 extern void * __splitstack_resetcontext(void *context[10], size_t *);
28
29 extern void *__splitstack_find(void *, void *, size_t *, void **, void **,
30                                void **);
31
32 extern void __splitstack_block_signals (int *, int *);
33
34 extern void __splitstack_block_signals_context (void *context[10], int *,
35                                                 int *);
36
37 #endif
38
39 #if defined(USING_SPLIT_STACK) && defined(LINKER_SUPPORTS_SPLIT_STACK)
40 # ifdef PTHREAD_STACK_MIN
41 #  define StackMin PTHREAD_STACK_MIN
42 # else
43 #  define StackMin 8192
44 # endif
45 #else
46 # define StackMin 2 * 1024 * 1024
47 #endif
48
49 static void schedule(G*);
50
51 typedef struct Sched Sched;
52
53 M       runtime_m0;
54 G       runtime_g0;     // idle goroutine for m0
55
56 #ifdef __rtems__
57 #define __thread
58 #endif
59
60 static __thread G *g;
61 static __thread M *m;
62
63 #ifndef SETCONTEXT_CLOBBERS_TLS
64
65 static inline void
66 initcontext(void)
67 {
68 }
69
70 static inline void
71 fixcontext(ucontext_t *c __attribute__ ((unused)))
72 {
73 }
74
75 # else
76
77 # if defined(__x86_64__) && defined(__sun__)
78
79 // x86_64 Solaris 10 and 11 have a bug: setcontext switches the %fs
80 // register to that of the thread which called getcontext.  The effect
81 // is that the address of all __thread variables changes.  This bug
82 // also affects pthread_self() and pthread_getspecific.  We work
83 // around it by clobbering the context field directly to keep %fs the
84 // same.
85
86 static __thread greg_t fs;
87
88 static inline void
89 initcontext(void)
90 {
91         ucontext_t c;
92
93         getcontext(&c);
94         fs = c.uc_mcontext.gregs[REG_FSBASE];
95 }
96
97 static inline void
98 fixcontext(ucontext_t* c)
99 {
100         c->uc_mcontext.gregs[REG_FSBASE] = fs;
101 }
102
103 # else
104
105 #  error unknown case for SETCONTEXT_CLOBBERS_TLS
106
107 # endif
108
109 #endif
110
111 // We can not always refer to the TLS variables directly.  The
112 // compiler will call tls_get_addr to get the address of the variable,
113 // and it may hold it in a register across a call to schedule.  When
114 // we get back from the call we may be running in a different thread,
115 // in which case the register now points to the TLS variable for a
116 // different thread.  We use non-inlinable functions to avoid this
117 // when necessary.
118
119 G* runtime_g(void) __attribute__ ((noinline, no_split_stack));
120
121 G*
122 runtime_g(void)
123 {
124         return g;
125 }
126
127 M* runtime_m(void) __attribute__ ((noinline, no_split_stack));
128
129 M*
130 runtime_m(void)
131 {
132         return m;
133 }
134
135 int32   runtime_gcwaiting;
136
137 // Go scheduler
138 //
139 // The go scheduler's job is to match ready-to-run goroutines (`g's)
140 // with waiting-for-work schedulers (`m's).  If there are ready g's
141 // and no waiting m's, ready() will start a new m running in a new
142 // OS thread, so that all ready g's can run simultaneously, up to a limit.
143 // For now, m's never go away.
144 //
145 // By default, Go keeps only one kernel thread (m) running user code
146 // at a single time; other threads may be blocked in the operating system.
147 // Setting the environment variable $GOMAXPROCS or calling
148 // runtime.GOMAXPROCS() will change the number of user threads
149 // allowed to execute simultaneously.  $GOMAXPROCS is thus an
150 // approximation of the maximum number of cores to use.
151 //
152 // Even a program that can run without deadlock in a single process
153 // might use more m's if given the chance.  For example, the prime
154 // sieve will use as many m's as there are primes (up to runtime_sched.mmax),
155 // allowing different stages of the pipeline to execute in parallel.
156 // We could revisit this choice, only kicking off new m's for blocking
157 // system calls, but that would limit the amount of parallel computation
158 // that go would try to do.
159 //
160 // In general, one could imagine all sorts of refinements to the
161 // scheduler, but the goal now is just to get something working on
162 // Linux and OS X.
163
164 struct Sched {
165         Lock;
166
167         G *gfree;       // available g's (status == Gdead)
168         int32 goidgen;
169
170         G *ghead;       // g's waiting to run
171         G *gtail;
172         int32 gwait;    // number of g's waiting to run
173         int32 gcount;   // number of g's that are alive
174         int32 grunning; // number of g's running on cpu or in syscall
175
176         M *mhead;       // m's waiting for work
177         int32 mwait;    // number of m's waiting for work
178         int32 mcount;   // number of m's that have been created
179
180         volatile uint32 atomic; // atomic scheduling word (see below)
181
182         int32 profilehz;        // cpu profiling rate
183
184         bool init;  // running initialization
185         bool lockmain;  // init called runtime.LockOSThread
186
187         Note    stopped;        // one g can set waitstop and wait here for m's to stop
188 };
189
190 // The atomic word in sched is an atomic uint32 that
191 // holds these fields.
192 //
193 //      [15 bits] mcpu          number of m's executing on cpu
194 //      [15 bits] mcpumax       max number of m's allowed on cpu
195 //      [1 bit] waitstop        some g is waiting on stopped
196 //      [1 bit] gwaiting        gwait != 0
197 //
198 // These fields are the information needed by entersyscall
199 // and exitsyscall to decide whether to coordinate with the
200 // scheduler.  Packing them into a single machine word lets
201 // them use a fast path with a single atomic read/write and
202 // no lock/unlock.  This greatly reduces contention in
203 // syscall- or cgo-heavy multithreaded programs.
204 //
205 // Except for entersyscall and exitsyscall, the manipulations
206 // to these fields only happen while holding the schedlock,
207 // so the routines holding schedlock only need to worry about
208 // what entersyscall and exitsyscall do, not the other routines
209 // (which also use the schedlock).
210 //
211 // In particular, entersyscall and exitsyscall only read mcpumax,
212 // waitstop, and gwaiting.  They never write them.  Thus, writes to those
213 // fields can be done (holding schedlock) without fear of write conflicts.
214 // There may still be logic conflicts: for example, the set of waitstop must
215 // be conditioned on mcpu >= mcpumax or else the wait may be a
216 // spurious sleep.  The Promela model in proc.p verifies these accesses.
217 enum {
218         mcpuWidth = 15,
219         mcpuMask = (1<<mcpuWidth) - 1,
220         mcpuShift = 0,
221         mcpumaxShift = mcpuShift + mcpuWidth,
222         waitstopShift = mcpumaxShift + mcpuWidth,
223         gwaitingShift = waitstopShift+1,
224
225         // The max value of GOMAXPROCS is constrained
226         // by the max value we can store in the bit fields
227         // of the atomic word.  Reserve a few high values
228         // so that we can detect accidental decrement
229         // beyond zero.
230         maxgomaxprocs = mcpuMask - 10,
231 };
232
233 #define atomic_mcpu(v)          (((v)>>mcpuShift)&mcpuMask)
234 #define atomic_mcpumax(v)       (((v)>>mcpumaxShift)&mcpuMask)
235 #define atomic_waitstop(v)      (((v)>>waitstopShift)&1)
236 #define atomic_gwaiting(v)      (((v)>>gwaitingShift)&1)
237
238 Sched runtime_sched;
239 int32 runtime_gomaxprocs;
240 bool runtime_singleproc;
241
242 static bool canaddmcpu(void);
243
244 // An m that is waiting for notewakeup(&m->havenextg).  This may
245 // only be accessed while the scheduler lock is held.  This is used to
246 // minimize the number of times we call notewakeup while the scheduler
247 // lock is held, since the m will normally move quickly to lock the
248 // scheduler itself, producing lock contention.
249 static M* mwakeup;
250
251 // Scheduling helpers.  Sched must be locked.
252 static void gput(G*);   // put/get on ghead/gtail
253 static G* gget(void);
254 static void mput(M*);   // put/get on mhead
255 static M* mget(G*);
256 static void gfput(G*);  // put/get on gfree
257 static G* gfget(void);
258 static void matchmg(void);      // match m's to g's
259 static void readylocked(G*);    // ready, but sched is locked
260 static void mnextg(M*, G*);
261 static void mcommoninit(M*);
262
263 void
264 setmcpumax(uint32 n)
265 {
266         uint32 v, w;
267
268         for(;;) {
269                 v = runtime_sched.atomic;
270                 w = v;
271                 w &= ~(mcpuMask<<mcpumaxShift);
272                 w |= n<<mcpumaxShift;
273                 if(runtime_cas(&runtime_sched.atomic, v, w))
274                         break;
275         }
276 }
277
278 // First function run by a new goroutine.  This replaces gogocall.
279 static void
280 kickoff(void)
281 {
282         void (*fn)(void*);
283
284         fn = (void (*)(void*))(g->entry);
285         fn(g->param);
286         runtime_goexit();
287 }
288
289 // Switch context to a different goroutine.  This is like longjmp.
290 static void runtime_gogo(G*) __attribute__ ((noinline));
291 static void
292 runtime_gogo(G* newg)
293 {
294 #ifdef USING_SPLIT_STACK
295         __splitstack_setcontext(&newg->stack_context[0]);
296 #endif
297         g = newg;
298         newg->fromgogo = true;
299         fixcontext(&newg->context);
300         setcontext(&newg->context);
301         runtime_throw("gogo setcontext returned");
302 }
303
304 // Save context and call fn passing g as a parameter.  This is like
305 // setjmp.  Because getcontext always returns 0, unlike setjmp, we use
306 // g->fromgogo as a code.  It will be true if we got here via
307 // setcontext.  g == nil the first time this is called in a new m.
308 static void runtime_mcall(void (*)(G*)) __attribute__ ((noinline));
309 static void
310 runtime_mcall(void (*pfn)(G*))
311 {
312         M *mp;
313         G *gp;
314 #ifndef USING_SPLIT_STACK
315         int i;
316 #endif
317
318         // Ensure that all registers are on the stack for the garbage
319         // collector.
320         __builtin_unwind_init();
321
322         mp = m;
323         gp = g;
324         if(gp == mp->g0)
325                 runtime_throw("runtime: mcall called on m->g0 stack");
326
327         if(gp != nil) {
328
329 #ifdef USING_SPLIT_STACK
330                 __splitstack_getcontext(&g->stack_context[0]);
331 #else
332                 gp->gcnext_sp = &i;
333 #endif
334                 gp->fromgogo = false;
335                 getcontext(&gp->context);
336
337                 // When we return from getcontext, we may be running
338                 // in a new thread.  That means that m and g may have
339                 // changed.  They are global variables so we will
340                 // reload them, but the addresses of m and g may be
341                 // cached in our local stack frame, and those
342                 // addresses may be wrong.  Call functions to reload
343                 // the values for this thread.
344                 mp = runtime_m();
345                 gp = runtime_g();
346         }
347         if (gp == nil || !gp->fromgogo) {
348 #ifdef USING_SPLIT_STACK
349                 __splitstack_setcontext(&mp->g0->stack_context[0]);
350 #endif
351                 mp->g0->entry = (byte*)pfn;
352                 mp->g0->param = gp;
353
354                 // It's OK to set g directly here because this case
355                 // can not occur if we got here via a setcontext to
356                 // the getcontext call just above.
357                 g = mp->g0;
358
359                 fixcontext(&mp->g0->context);
360                 setcontext(&mp->g0->context);
361                 runtime_throw("runtime: mcall function returned");
362         }
363 }
364
365 // The bootstrap sequence is:
366 //
367 //      call osinit
368 //      call schedinit
369 //      make & queue new G
370 //      call runtime_mstart
371 //
372 // The new G calls runtime_main.
373 void
374 runtime_schedinit(void)
375 {
376         int32 n;
377         const byte *p;
378
379         m = &runtime_m0;
380         g = &runtime_g0;
381         m->g0 = g;
382         m->curg = g;
383         g->m = m;
384
385         initcontext();
386
387         m->nomemprof++;
388         runtime_mallocinit();
389         mcommoninit(m);
390
391         runtime_goargs();
392         runtime_goenvs();
393
394         // For debugging:
395         // Allocate internal symbol table representation now,
396         // so that we don't need to call malloc when we crash.
397         // runtime_findfunc(0);
398
399         runtime_gomaxprocs = 1;
400         p = runtime_getenv("GOMAXPROCS");
401         if(p != nil && (n = runtime_atoi(p)) != 0) {
402                 if(n > maxgomaxprocs)
403                         n = maxgomaxprocs;
404                 runtime_gomaxprocs = n;
405         }
406         setmcpumax(runtime_gomaxprocs);
407         runtime_singleproc = runtime_gomaxprocs == 1;
408
409         canaddmcpu();   // mcpu++ to account for bootstrap m
410         m->helpgc = 1;  // flag to tell schedule() to mcpu--
411         runtime_sched.grunning++;
412
413         // Can not enable GC until all roots are registered.
414         // mstats.enablegc = 1;
415         m->nomemprof--;
416 }
417
418 extern void main_init(void) __asm__ ("__go_init_main");
419 extern void main_main(void) __asm__ ("main.main");
420
421 // The main goroutine.
422 void
423 runtime_main(void)
424 {
425         // Lock the main goroutine onto this, the main OS thread,
426         // during initialization.  Most programs won't care, but a few
427         // do require certain calls to be made by the main thread.
428         // Those can arrange for main.main to run in the main thread
429         // by calling runtime.LockOSThread during initialization
430         // to preserve the lock.
431         runtime_LockOSThread();
432         runtime_sched.init = true;
433         main_init();
434         runtime_sched.init = false;
435         if(!runtime_sched.lockmain)
436                 runtime_UnlockOSThread();
437
438         // For gccgo we have to wait until after main is initialized
439         // to enable GC, because initializing main registers the GC
440         // roots.
441         mstats.enablegc = 1;
442
443         main_main();
444         runtime_exit(0);
445         for(;;)
446                 *(int32*)0 = 0;
447 }
448
449 // Lock the scheduler.
450 static void
451 schedlock(void)
452 {
453         runtime_lock(&runtime_sched);
454 }
455
456 // Unlock the scheduler.
457 static void
458 schedunlock(void)
459 {
460         M *m;
461
462         m = mwakeup;
463         mwakeup = nil;
464         runtime_unlock(&runtime_sched);
465         if(m != nil)
466                 runtime_notewakeup(&m->havenextg);
467 }
468
469 void
470 runtime_goexit(void)
471 {
472         g->status = Gmoribund;
473         runtime_gosched();
474 }
475
476 void
477 runtime_goroutineheader(G *g)
478 {
479         const char *status;
480
481         switch(g->status) {
482         case Gidle:
483                 status = "idle";
484                 break;
485         case Grunnable:
486                 status = "runnable";
487                 break;
488         case Grunning:
489                 status = "running";
490                 break;
491         case Gsyscall:
492                 status = "syscall";
493                 break;
494         case Gwaiting:
495                 if(g->waitreason)
496                         status = g->waitreason;
497                 else
498                         status = "waiting";
499                 break;
500         case Gmoribund:
501                 status = "moribund";
502                 break;
503         default:
504                 status = "???";
505                 break;
506         }
507         runtime_printf("goroutine %d [%s]:\n", g->goid, status);
508 }
509
510 void
511 runtime_tracebackothers(G *me)
512 {
513         G *g;
514
515         for(g = runtime_allg; g != nil; g = g->alllink) {
516                 if(g == me || g->status == Gdead)
517                         continue;
518                 runtime_printf("\n");
519                 runtime_goroutineheader(g);
520                 // runtime_traceback(g->sched.pc, g->sched.sp, 0, g);
521         }
522 }
523
524 // Mark this g as m's idle goroutine.
525 // This functionality might be used in environments where programs
526 // are limited to a single thread, to simulate a select-driven
527 // network server.  It is not exposed via the standard runtime API.
528 void
529 runtime_idlegoroutine(void)
530 {
531         if(g->idlem != nil)
532                 runtime_throw("g is already an idle goroutine");
533         g->idlem = m;
534 }
535
536 static void
537 mcommoninit(M *m)
538 {
539         // Add to runtime_allm so garbage collector doesn't free m
540         // when it is just in a register or thread-local storage.
541         m->alllink = runtime_allm;
542         // runtime_Cgocalls() iterates over allm w/o schedlock,
543         // so we need to publish it safely.
544         runtime_atomicstorep((void**)&runtime_allm, m);
545
546         m->id = runtime_sched.mcount++;
547         m->fastrand = 0x49f6428aUL + m->id + runtime_cputicks();
548
549         if(m->mcache == nil)
550                 m->mcache = runtime_allocmcache();
551 }
552
553 // Try to increment mcpu.  Report whether succeeded.
554 static bool
555 canaddmcpu(void)
556 {
557         uint32 v;
558
559         for(;;) {
560                 v = runtime_sched.atomic;
561                 if(atomic_mcpu(v) >= atomic_mcpumax(v))
562                         return 0;
563                 if(runtime_cas(&runtime_sched.atomic, v, v+(1<<mcpuShift)))
564                         return 1;
565         }
566 }
567
568 // Put on `g' queue.  Sched must be locked.
569 static void
570 gput(G *g)
571 {
572         M *m;
573
574         // If g is wired, hand it off directly.
575         if((m = g->lockedm) != nil && canaddmcpu()) {
576                 mnextg(m, g);
577                 return;
578         }
579
580         // If g is the idle goroutine for an m, hand it off.
581         if(g->idlem != nil) {
582                 if(g->idlem->idleg != nil) {
583                         runtime_printf("m%d idle out of sync: g%d g%d\n",
584                                 g->idlem->id,
585                                 g->idlem->idleg->goid, g->goid);
586                         runtime_throw("runtime: double idle");
587                 }
588                 g->idlem->idleg = g;
589                 return;
590         }
591
592         g->schedlink = nil;
593         if(runtime_sched.ghead == nil)
594                 runtime_sched.ghead = g;
595         else
596                 runtime_sched.gtail->schedlink = g;
597         runtime_sched.gtail = g;
598
599         // increment gwait.
600         // if it transitions to nonzero, set atomic gwaiting bit.
601         if(runtime_sched.gwait++ == 0)
602                 runtime_xadd(&runtime_sched.atomic, 1<<gwaitingShift);
603 }
604
605 // Report whether gget would return something.
606 static bool
607 haveg(void)
608 {
609         return runtime_sched.ghead != nil || m->idleg != nil;
610 }
611
612 // Get from `g' queue.  Sched must be locked.
613 static G*
614 gget(void)
615 {
616         G *g;
617
618         g = runtime_sched.ghead;
619         if(g){
620                 runtime_sched.ghead = g->schedlink;
621                 if(runtime_sched.ghead == nil)
622                         runtime_sched.gtail = nil;
623                 // decrement gwait.
624                 // if it transitions to zero, clear atomic gwaiting bit.
625                 if(--runtime_sched.gwait == 0)
626                         runtime_xadd(&runtime_sched.atomic, -1<<gwaitingShift);
627         } else if(m->idleg != nil) {
628                 g = m->idleg;
629                 m->idleg = nil;
630         }
631         return g;
632 }
633
634 // Put on `m' list.  Sched must be locked.
635 static void
636 mput(M *m)
637 {
638         m->schedlink = runtime_sched.mhead;
639         runtime_sched.mhead = m;
640         runtime_sched.mwait++;
641 }
642
643 // Get an `m' to run `g'.  Sched must be locked.
644 static M*
645 mget(G *g)
646 {
647         M *m;
648
649         // if g has its own m, use it.
650         if(g && (m = g->lockedm) != nil)
651                 return m;
652
653         // otherwise use general m pool.
654         if((m = runtime_sched.mhead) != nil){
655                 runtime_sched.mhead = m->schedlink;
656                 runtime_sched.mwait--;
657         }
658         return m;
659 }
660
661 // Mark g ready to run.
662 void
663 runtime_ready(G *g)
664 {
665         schedlock();
666         readylocked(g);
667         schedunlock();
668 }
669
670 // Mark g ready to run.  Sched is already locked.
671 // G might be running already and about to stop.
672 // The sched lock protects g->status from changing underfoot.
673 static void
674 readylocked(G *g)
675 {
676         if(g->m){
677                 // Running on another machine.
678                 // Ready it when it stops.
679                 g->readyonstop = 1;
680                 return;
681         }
682
683         // Mark runnable.
684         if(g->status == Grunnable || g->status == Grunning) {
685                 runtime_printf("goroutine %d has status %d\n", g->goid, g->status);
686                 runtime_throw("bad g->status in ready");
687         }
688         g->status = Grunnable;
689
690         gput(g);
691         matchmg();
692 }
693
694 // Same as readylocked but a different symbol so that
695 // debuggers can set a breakpoint here and catch all
696 // new goroutines.
697 static void
698 newprocreadylocked(G *g)
699 {
700         readylocked(g);
701 }
702
703 // Pass g to m for running.
704 // Caller has already incremented mcpu.
705 static void
706 mnextg(M *m, G *g)
707 {
708         runtime_sched.grunning++;
709         m->nextg = g;
710         if(m->waitnextg) {
711                 m->waitnextg = 0;
712                 if(mwakeup != nil)
713                         runtime_notewakeup(&mwakeup->havenextg);
714                 mwakeup = m;
715         }
716 }
717
718 // Get the next goroutine that m should run.
719 // Sched must be locked on entry, is unlocked on exit.
720 // Makes sure that at most $GOMAXPROCS g's are
721 // running on cpus (not in system calls) at any given time.
722 static G*
723 nextgandunlock(void)
724 {
725         G *gp;
726         uint32 v;
727
728 top:
729         if(atomic_mcpu(runtime_sched.atomic) >= maxgomaxprocs)
730                 runtime_throw("negative mcpu");
731
732         // If there is a g waiting as m->nextg, the mcpu++
733         // happened before it was passed to mnextg.
734         if(m->nextg != nil) {
735                 gp = m->nextg;
736                 m->nextg = nil;
737                 schedunlock();
738                 return gp;
739         }
740
741         if(m->lockedg != nil) {
742                 // We can only run one g, and it's not available.
743                 // Make sure some other cpu is running to handle
744                 // the ordinary run queue.
745                 if(runtime_sched.gwait != 0) {
746                         matchmg();
747                         // m->lockedg might have been on the queue.
748                         if(m->nextg != nil) {
749                                 gp = m->nextg;
750                                 m->nextg = nil;
751                                 schedunlock();
752                                 return gp;
753                         }
754                 }
755         } else {
756                 // Look for work on global queue.
757                 while(haveg() && canaddmcpu()) {
758                         gp = gget();
759                         if(gp == nil)
760                                 runtime_throw("gget inconsistency");
761
762                         if(gp->lockedm) {
763                                 mnextg(gp->lockedm, gp);
764                                 continue;
765                         }
766                         runtime_sched.grunning++;
767                         schedunlock();
768                         return gp;
769                 }
770
771                 // The while loop ended either because the g queue is empty
772                 // or because we have maxed out our m procs running go
773                 // code (mcpu >= mcpumax).  We need to check that
774                 // concurrent actions by entersyscall/exitsyscall cannot
775                 // invalidate the decision to end the loop.
776                 //
777                 // We hold the sched lock, so no one else is manipulating the
778                 // g queue or changing mcpumax.  Entersyscall can decrement
779                 // mcpu, but if does so when there is something on the g queue,
780                 // the gwait bit will be set, so entersyscall will take the slow path
781                 // and use the sched lock.  So it cannot invalidate our decision.
782                 //
783                 // Wait on global m queue.
784                 mput(m);
785         }
786
787         v = runtime_atomicload(&runtime_sched.atomic);
788         if(runtime_sched.grunning == 0)
789                 runtime_throw("all goroutines are asleep - deadlock!");
790         m->nextg = nil;
791         m->waitnextg = 1;
792         runtime_noteclear(&m->havenextg);
793
794         // Stoptheworld is waiting for all but its cpu to go to stop.
795         // Entersyscall might have decremented mcpu too, but if so
796         // it will see the waitstop and take the slow path.
797         // Exitsyscall never increments mcpu beyond mcpumax.
798         if(atomic_waitstop(v) && atomic_mcpu(v) <= atomic_mcpumax(v)) {
799                 // set waitstop = 0 (known to be 1)
800                 runtime_xadd(&runtime_sched.atomic, -1<<waitstopShift);
801                 runtime_notewakeup(&runtime_sched.stopped);
802         }
803         schedunlock();
804
805         runtime_notesleep(&m->havenextg);
806         if(m->helpgc) {
807                 runtime_gchelper();
808                 m->helpgc = 0;
809                 runtime_lock(&runtime_sched);
810                 goto top;
811         }
812         if((gp = m->nextg) == nil)
813                 runtime_throw("bad m->nextg in nextgoroutine");
814         m->nextg = nil;
815         return gp;
816 }
817
818 int32
819 runtime_helpgc(bool *extra)
820 {
821         M *mp;
822         int32 n, max;
823
824         // Figure out how many CPUs to use.
825         // Limited by gomaxprocs, number of actual CPUs, and MaxGcproc.
826         max = runtime_gomaxprocs;
827         if(max > runtime_ncpu)
828                 max = runtime_ncpu > 0 ? runtime_ncpu : 1;
829         if(max > MaxGcproc)
830                 max = MaxGcproc;
831
832         // We're going to use one CPU no matter what.
833         // Figure out the max number of additional CPUs.
834         max--;
835
836         runtime_lock(&runtime_sched);
837         n = 0;
838         while(n < max && (mp = mget(nil)) != nil) {
839                 n++;
840                 mp->helpgc = 1;
841                 mp->waitnextg = 0;
842                 runtime_notewakeup(&mp->havenextg);
843         }
844         runtime_unlock(&runtime_sched);
845         if(extra)
846                 *extra = n != max;
847         return n;
848 }
849
850 void
851 runtime_stoptheworld(void)
852 {
853         uint32 v;
854
855         schedlock();
856         runtime_gcwaiting = 1;
857
858         setmcpumax(1);
859
860         // while mcpu > 1
861         for(;;) {
862                 v = runtime_sched.atomic;
863                 if(atomic_mcpu(v) <= 1)
864                         break;
865
866                 // It would be unsafe for multiple threads to be using
867                 // the stopped note at once, but there is only
868                 // ever one thread doing garbage collection.
869                 runtime_noteclear(&runtime_sched.stopped);
870                 if(atomic_waitstop(v))
871                         runtime_throw("invalid waitstop");
872
873                 // atomic { waitstop = 1 }, predicated on mcpu <= 1 check above
874                 // still being true.
875                 if(!runtime_cas(&runtime_sched.atomic, v, v+(1<<waitstopShift)))
876                         continue;
877
878                 schedunlock();
879                 runtime_notesleep(&runtime_sched.stopped);
880                 schedlock();
881         }
882         runtime_singleproc = runtime_gomaxprocs == 1;
883         schedunlock();
884 }
885
886 void
887 runtime_starttheworld(bool extra)
888 {
889         M *m;
890
891         schedlock();
892         runtime_gcwaiting = 0;
893         setmcpumax(runtime_gomaxprocs);
894         matchmg();
895         if(extra && canaddmcpu()) {
896                 // Start a new m that will (we hope) be idle
897                 // and so available to help when the next
898                 // garbage collection happens.
899                 // canaddmcpu above did mcpu++
900                 // (necessary, because m will be doing various
901                 // initialization work so is definitely running),
902                 // but m is not running a specific goroutine,
903                 // so set the helpgc flag as a signal to m's
904                 // first schedule(nil) to mcpu-- and grunning--.
905                 m = runtime_newm();
906                 m->helpgc = 1;
907                 runtime_sched.grunning++;
908         }
909         schedunlock();
910 }
911
912 // Called to start an M.
913 void*
914 runtime_mstart(void* mp)
915 {
916         m = (M*)mp;
917         g = m->g0;
918
919         initcontext();
920
921         g->entry = nil;
922         g->param = nil;
923
924         // Record top of stack for use by mcall.
925         // Once we call schedule we're never coming back,
926         // so other calls can reuse this stack space.
927 #ifdef USING_SPLIT_STACK
928         __splitstack_getcontext(&g->stack_context[0]);
929 #else
930         g->gcinitial_sp = &mp;
931         // Setting gcstack_size to 0 is a marker meaning that gcinitial_sp
932         // is the top of the stack, not the bottom.
933         g->gcstack_size = 0;
934         g->gcnext_sp = &mp;
935 #endif
936         getcontext(&g->context);
937
938         if(g->entry != nil) {
939                 // Got here from mcall.
940                 void (*pfn)(G*) = (void (*)(G*))g->entry;
941                 G* gp = (G*)g->param;
942                 pfn(gp);
943                 *(int*)0x21 = 0x21;
944         }
945         runtime_minit();
946
947 #ifdef USING_SPLIT_STACK
948         {
949           int dont_block_signals = 0;
950           __splitstack_block_signals(&dont_block_signals, nil);
951         }
952 #endif
953
954         schedule(nil);
955         return nil;
956 }
957
958 typedef struct CgoThreadStart CgoThreadStart;
959 struct CgoThreadStart
960 {
961         M *m;
962         G *g;
963         void (*fn)(void);
964 };
965
966 // Kick off new m's as needed (up to mcpumax).
967 // Sched is locked.
968 static void
969 matchmg(void)
970 {
971         G *gp;
972         M *mp;
973
974         if(m->mallocing || m->gcing)
975                 return;
976
977         while(haveg() && canaddmcpu()) {
978                 gp = gget();
979                 if(gp == nil)
980                         runtime_throw("gget inconsistency");
981
982                 // Find the m that will run gp.
983                 if((mp = mget(gp)) == nil)
984                         mp = runtime_newm();
985                 mnextg(mp, gp);
986         }
987 }
988
989 // Create a new m.  It will start off with a call to runtime_mstart.
990 M*
991 runtime_newm(void)
992 {
993         M *m;
994         pthread_attr_t attr;
995         pthread_t tid;
996
997         m = runtime_malloc(sizeof(M));
998         mcommoninit(m);
999         m->g0 = runtime_malg(-1, nil, nil);
1000
1001         if(pthread_attr_init(&attr) != 0)
1002                 runtime_throw("pthread_attr_init");
1003         if(pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED) != 0)
1004                 runtime_throw("pthread_attr_setdetachstate");
1005
1006 #ifndef PTHREAD_STACK_MIN
1007 #define PTHREAD_STACK_MIN 8192
1008 #endif
1009         if(pthread_attr_setstacksize(&attr, PTHREAD_STACK_MIN) != 0)
1010                 runtime_throw("pthread_attr_setstacksize");
1011
1012         if(pthread_create(&tid, &attr, runtime_mstart, m) != 0)
1013                 runtime_throw("pthread_create");
1014
1015         return m;
1016 }
1017
1018 // One round of scheduler: find a goroutine and run it.
1019 // The argument is the goroutine that was running before
1020 // schedule was called, or nil if this is the first call.
1021 // Never returns.
1022 static void
1023 schedule(G *gp)
1024 {
1025         int32 hz;
1026         uint32 v;
1027
1028         schedlock();
1029         if(gp != nil) {
1030                 // Just finished running gp.
1031                 gp->m = nil;
1032                 runtime_sched.grunning--;
1033
1034                 // atomic { mcpu-- }
1035                 v = runtime_xadd(&runtime_sched.atomic, -1<<mcpuShift);
1036                 if(atomic_mcpu(v) > maxgomaxprocs)
1037                         runtime_throw("negative mcpu in scheduler");
1038
1039                 switch(gp->status){
1040                 case Grunnable:
1041                 case Gdead:
1042                         // Shouldn't have been running!
1043                         runtime_throw("bad gp->status in sched");
1044                 case Grunning:
1045                         gp->status = Grunnable;
1046                         gput(gp);
1047                         break;
1048                 case Gmoribund:
1049                         gp->status = Gdead;
1050                         if(gp->lockedm) {
1051                                 gp->lockedm = nil;
1052                                 m->lockedg = nil;
1053                         }
1054                         gp->idlem = nil;
1055                         gfput(gp);
1056                         if(--runtime_sched.gcount == 0)
1057                                 runtime_exit(0);
1058                         break;
1059                 }
1060                 if(gp->readyonstop){
1061                         gp->readyonstop = 0;
1062                         readylocked(gp);
1063                 }
1064         } else if(m->helpgc) {
1065                 // Bootstrap m or new m started by starttheworld.
1066                 // atomic { mcpu-- }
1067                 v = runtime_xadd(&runtime_sched.atomic, -1<<mcpuShift);
1068                 if(atomic_mcpu(v) > maxgomaxprocs)
1069                         runtime_throw("negative mcpu in scheduler");
1070                 // Compensate for increment in starttheworld().
1071                 runtime_sched.grunning--;
1072                 m->helpgc = 0;
1073         } else if(m->nextg != nil) {
1074                 // New m started by matchmg.
1075         } else {
1076                 runtime_throw("invalid m state in scheduler");
1077         }
1078
1079         // Find (or wait for) g to run.  Unlocks runtime_sched.
1080         gp = nextgandunlock();
1081         gp->readyonstop = 0;
1082         gp->status = Grunning;
1083         m->curg = gp;
1084         gp->m = m;
1085
1086         // Check whether the profiler needs to be turned on or off.
1087         hz = runtime_sched.profilehz;
1088         if(m->profilehz != hz)
1089                 runtime_resetcpuprofiler(hz);
1090
1091         runtime_gogo(gp);
1092 }
1093
1094 // Enter scheduler.  If g->status is Grunning,
1095 // re-queues g and runs everyone else who is waiting
1096 // before running g again.  If g->status is Gmoribund,
1097 // kills off g.
1098 void
1099 runtime_gosched(void)
1100 {
1101         if(m->locks != 0)
1102                 runtime_throw("gosched holding locks");
1103         if(g == m->g0)
1104                 runtime_throw("gosched of g0");
1105         runtime_mcall(schedule);
1106 }
1107
1108 // The goroutine g is about to enter a system call.
1109 // Record that it's not using the cpu anymore.
1110 // This is called only from the go syscall library and cgocall,
1111 // not from the low-level system calls used by the runtime.
1112 //
1113 // Entersyscall cannot split the stack: the runtime_gosave must
1114 // make g->sched refer to the caller's stack segment, because
1115 // entersyscall is going to return immediately after.
1116 // It's okay to call matchmg and notewakeup even after
1117 // decrementing mcpu, because we haven't released the
1118 // sched lock yet, so the garbage collector cannot be running.
1119
1120 void runtime_entersyscall(void) __attribute__ ((no_split_stack));
1121
1122 void
1123 runtime_entersyscall(void)
1124 {
1125         uint32 v;
1126
1127         // Leave SP around for gc and traceback.
1128 #ifdef USING_SPLIT_STACK
1129         g->gcstack = __splitstack_find(NULL, NULL, &g->gcstack_size,
1130                                        &g->gcnext_segment, &g->gcnext_sp,
1131                                        &g->gcinitial_sp);
1132 #else
1133         g->gcnext_sp = (byte *) &v;
1134 #endif
1135
1136         // Save the registers in the g structure so that any pointers
1137         // held in registers will be seen by the garbage collector.
1138         // We could use getcontext here, but setjmp is more efficient
1139         // because it doesn't need to save the signal mask.
1140         setjmp(g->gcregs);
1141
1142         g->status = Gsyscall;
1143
1144         // Fast path.
1145         // The slow path inside the schedlock/schedunlock will get
1146         // through without stopping if it does:
1147         //      mcpu--
1148         //      gwait not true
1149         //      waitstop && mcpu <= mcpumax not true
1150         // If we can do the same with a single atomic add,
1151         // then we can skip the locks.
1152         v = runtime_xadd(&runtime_sched.atomic, -1<<mcpuShift);
1153         if(!atomic_gwaiting(v) && (!atomic_waitstop(v) || atomic_mcpu(v) > atomic_mcpumax(v)))
1154                 return;
1155
1156         schedlock();
1157         v = runtime_atomicload(&runtime_sched.atomic);
1158         if(atomic_gwaiting(v)) {
1159                 matchmg();
1160                 v = runtime_atomicload(&runtime_sched.atomic);
1161         }
1162         if(atomic_waitstop(v) && atomic_mcpu(v) <= atomic_mcpumax(v)) {
1163                 runtime_xadd(&runtime_sched.atomic, -1<<waitstopShift);
1164                 runtime_notewakeup(&runtime_sched.stopped);
1165         }
1166
1167         schedunlock();
1168 }
1169
1170 // The goroutine g exited its system call.
1171 // Arrange for it to run on a cpu again.
1172 // This is called only from the go syscall library, not
1173 // from the low-level system calls used by the runtime.
1174 void
1175 runtime_exitsyscall(void)
1176 {
1177         G *gp;
1178         uint32 v;
1179
1180         // Fast path.
1181         // If we can do the mcpu++ bookkeeping and
1182         // find that we still have mcpu <= mcpumax, then we can
1183         // start executing Go code immediately, without having to
1184         // schedlock/schedunlock.
1185         gp = g;
1186         v = runtime_xadd(&runtime_sched.atomic, (1<<mcpuShift));
1187         if(m->profilehz == runtime_sched.profilehz && atomic_mcpu(v) <= atomic_mcpumax(v)) {
1188                 // There's a cpu for us, so we can run.
1189                 gp->status = Grunning;
1190                 // Garbage collector isn't running (since we are),
1191                 // so okay to clear gcstack.
1192 #ifdef USING_SPLIT_STACK
1193                 gp->gcstack = nil;
1194 #endif
1195                 gp->gcnext_sp = nil;
1196                 runtime_memclr(gp->gcregs, sizeof gp->gcregs);
1197                 return;
1198         }
1199
1200         // Tell scheduler to put g back on the run queue:
1201         // mostly equivalent to g->status = Grunning,
1202         // but keeps the garbage collector from thinking
1203         // that g is running right now, which it's not.
1204         gp->readyonstop = 1;
1205
1206         // All the cpus are taken.
1207         // The scheduler will ready g and put this m to sleep.
1208         // When the scheduler takes g away from m,
1209         // it will undo the runtime_sched.mcpu++ above.
1210         runtime_gosched();
1211
1212         // Gosched returned, so we're allowed to run now.
1213         // Delete the gcstack information that we left for
1214         // the garbage collector during the system call.
1215         // Must wait until now because until gosched returns
1216         // we don't know for sure that the garbage collector
1217         // is not running.
1218 #ifdef USING_SPLIT_STACK
1219         gp->gcstack = nil;
1220 #endif
1221         gp->gcnext_sp = nil;
1222         runtime_memclr(gp->gcregs, sizeof gp->gcregs);
1223 }
1224
1225 // Allocate a new g, with a stack big enough for stacksize bytes.
1226 G*
1227 runtime_malg(int32 stacksize, byte** ret_stack, size_t* ret_stacksize)
1228 {
1229         G *newg;
1230
1231         newg = runtime_malloc(sizeof(G));
1232         if(stacksize >= 0) {
1233 #if USING_SPLIT_STACK
1234                 int dont_block_signals = 0;
1235
1236                 *ret_stack = __splitstack_makecontext(stacksize,
1237                                                       &newg->stack_context[0],
1238                                                       ret_stacksize);
1239                 __splitstack_block_signals_context(&newg->stack_context[0],
1240                                                    &dont_block_signals, nil);
1241 #else
1242                 *ret_stack = runtime_mallocgc(stacksize, FlagNoProfiling|FlagNoGC, 0, 0);
1243                 *ret_stacksize = stacksize;
1244                 newg->gcinitial_sp = *ret_stack;
1245                 newg->gcstack_size = stacksize;
1246 #endif
1247         }
1248         return newg;
1249 }
1250
1251 /* For runtime package testing.  */
1252
1253 void runtime_testing_entersyscall(void)
1254   __asm__("libgo_runtime.runtime.entersyscall");
1255
1256 void
1257 runtime_testing_entersyscall()
1258 {
1259         runtime_entersyscall();
1260 }
1261
1262 void runtime_testing_exitsyscall(void)
1263   __asm__("libgo_runtime.runtime.exitsyscall");
1264
1265 void
1266 runtime_testing_exitsyscall()
1267 {
1268         runtime_exitsyscall();
1269 }
1270
1271 G*
1272 __go_go(void (*fn)(void*), void* arg)
1273 {
1274         byte *sp;
1275         size_t spsize;
1276         G * volatile newg;      // volatile to avoid longjmp warning
1277
1278         schedlock();
1279
1280         if((newg = gfget()) != nil){
1281 #ifdef USING_SPLIT_STACK
1282                 int dont_block_signals = 0;
1283
1284                 sp = __splitstack_resetcontext(&newg->stack_context[0],
1285                                                &spsize);
1286                 __splitstack_block_signals_context(&newg->stack_context[0],
1287                                                    &dont_block_signals, nil);
1288 #else
1289                 sp = newg->gcinitial_sp;
1290                 spsize = newg->gcstack_size;
1291                 if(spsize == 0)
1292                         runtime_throw("bad spsize in __go_go");
1293                 newg->gcnext_sp = sp;
1294 #endif
1295         } else {
1296                 newg = runtime_malg(StackMin, &sp, &spsize);
1297                 if(runtime_lastg == nil)
1298                         runtime_allg = newg;
1299                 else
1300                         runtime_lastg->alllink = newg;
1301                 runtime_lastg = newg;
1302         }
1303         newg->status = Gwaiting;
1304         newg->waitreason = "new goroutine";
1305
1306         newg->entry = (byte*)fn;
1307         newg->param = arg;
1308         newg->gopc = (uintptr)__builtin_return_address(0);
1309
1310         runtime_sched.gcount++;
1311         runtime_sched.goidgen++;
1312         newg->goid = runtime_sched.goidgen;
1313
1314         if(sp == nil)
1315                 runtime_throw("nil g->stack0");
1316
1317         getcontext(&newg->context);
1318         newg->context.uc_stack.ss_sp = sp;
1319         newg->context.uc_stack.ss_size = spsize;
1320         makecontext(&newg->context, kickoff, 0);
1321
1322         newprocreadylocked(newg);
1323         schedunlock();
1324
1325         return newg;
1326 //printf(" goid=%d\n", newg->goid);
1327 }
1328
1329 // Put on gfree list.  Sched must be locked.
1330 static void
1331 gfput(G *g)
1332 {
1333         g->schedlink = runtime_sched.gfree;
1334         runtime_sched.gfree = g;
1335 }
1336
1337 // Get from gfree list.  Sched must be locked.
1338 static G*
1339 gfget(void)
1340 {
1341         G *g;
1342
1343         g = runtime_sched.gfree;
1344         if(g)
1345                 runtime_sched.gfree = g->schedlink;
1346         return g;
1347 }
1348
1349 // Run all deferred functions for the current goroutine.
1350 static void
1351 rundefer(void)
1352 {
1353         Defer *d;
1354
1355         while((d = g->defer) != nil) {
1356                 void (*pfn)(void*);
1357
1358                 pfn = d->__pfn;
1359                 d->__pfn = nil;
1360                 if (pfn != nil)
1361                         (*pfn)(d->__arg);
1362                 g->defer = d->__next;
1363                 runtime_free(d);
1364         }
1365 }
1366
1367 void runtime_Goexit (void) asm ("libgo_runtime.runtime.Goexit");
1368
1369 void
1370 runtime_Goexit(void)
1371 {
1372         rundefer();
1373         runtime_goexit();
1374 }
1375
1376 void runtime_Gosched (void) asm ("libgo_runtime.runtime.Gosched");
1377
1378 void
1379 runtime_Gosched(void)
1380 {
1381         runtime_gosched();
1382 }
1383
1384 // Implementation of runtime.GOMAXPROCS.
1385 // delete when scheduler is stronger
1386 int32
1387 runtime_gomaxprocsfunc(int32 n)
1388 {
1389         int32 ret;
1390         uint32 v;
1391
1392         schedlock();
1393         ret = runtime_gomaxprocs;
1394         if(n <= 0)
1395                 n = ret;
1396         if(n > maxgomaxprocs)
1397                 n = maxgomaxprocs;
1398         runtime_gomaxprocs = n;
1399         if(runtime_gomaxprocs > 1)
1400                 runtime_singleproc = false;
1401         if(runtime_gcwaiting != 0) {
1402                 if(atomic_mcpumax(runtime_sched.atomic) != 1)
1403                         runtime_throw("invalid mcpumax during gc");
1404                 schedunlock();
1405                 return ret;
1406         }
1407
1408         setmcpumax(n);
1409
1410         // If there are now fewer allowed procs
1411         // than procs running, stop.
1412         v = runtime_atomicload(&runtime_sched.atomic);
1413         if((int32)atomic_mcpu(v) > n) {
1414                 schedunlock();
1415                 runtime_gosched();
1416                 return ret;
1417         }
1418         // handle more procs
1419         matchmg();
1420         schedunlock();
1421         return ret;
1422 }
1423
1424 void
1425 runtime_LockOSThread(void)
1426 {
1427         if(m == &runtime_m0 && runtime_sched.init) {
1428                 runtime_sched.lockmain = true;
1429                 return;
1430         }
1431         m->lockedg = g;
1432         g->lockedm = m;
1433 }
1434
1435 void
1436 runtime_UnlockOSThread(void)
1437 {
1438         if(m == &runtime_m0 && runtime_sched.init) {
1439                 runtime_sched.lockmain = false;
1440                 return;
1441         }
1442         m->lockedg = nil;
1443         g->lockedm = nil;
1444 }
1445
1446 bool
1447 runtime_lockedOSThread(void)
1448 {
1449         return g->lockedm != nil && m->lockedg != nil;
1450 }
1451
1452 // for testing of callbacks
1453
1454 _Bool runtime_golockedOSThread(void)
1455   asm("libgo_runtime.runtime.golockedOSThread");
1456
1457 _Bool
1458 runtime_golockedOSThread(void)
1459 {
1460         return runtime_lockedOSThread();
1461 }
1462
1463 // for testing of wire, unwire
1464 uint32
1465 runtime_mid()
1466 {
1467         return m->id;
1468 }
1469
1470 int32 runtime_Goroutines (void)
1471   __asm__ ("libgo_runtime.runtime.Goroutines");
1472
1473 int32
1474 runtime_Goroutines()
1475 {
1476         return runtime_sched.gcount;
1477 }
1478
1479 int32
1480 runtime_mcount(void)
1481 {
1482         return runtime_sched.mcount;
1483 }
1484
1485 static struct {
1486         Lock;
1487         void (*fn)(uintptr*, int32);
1488         int32 hz;
1489         uintptr pcbuf[100];
1490 } prof;
1491
1492 // Called if we receive a SIGPROF signal.
1493 void
1494 runtime_sigprof(uint8 *pc __attribute__ ((unused)),
1495                 uint8 *sp __attribute__ ((unused)),
1496                 uint8 *lr __attribute__ ((unused)),
1497                 G *gp __attribute__ ((unused)))
1498 {
1499         // int32 n;
1500
1501         if(prof.fn == nil || prof.hz == 0)
1502                 return;
1503
1504         runtime_lock(&prof);
1505         if(prof.fn == nil) {
1506                 runtime_unlock(&prof);
1507                 return;
1508         }
1509         // n = runtime_gentraceback(pc, sp, lr, gp, 0, prof.pcbuf, nelem(prof.pcbuf));
1510         // if(n > 0)
1511         //      prof.fn(prof.pcbuf, n);
1512         runtime_unlock(&prof);
1513 }
1514
1515 // Arrange to call fn with a traceback hz times a second.
1516 void
1517 runtime_setcpuprofilerate(void (*fn)(uintptr*, int32), int32 hz)
1518 {
1519         // Force sane arguments.
1520         if(hz < 0)
1521                 hz = 0;
1522         if(hz == 0)
1523                 fn = nil;
1524         if(fn == nil)
1525                 hz = 0;
1526
1527         // Stop profiler on this cpu so that it is safe to lock prof.
1528         // if a profiling signal came in while we had prof locked,
1529         // it would deadlock.
1530         runtime_resetcpuprofiler(0);
1531
1532         runtime_lock(&prof);
1533         prof.fn = fn;
1534         prof.hz = hz;
1535         runtime_unlock(&prof);
1536         runtime_lock(&runtime_sched);
1537         runtime_sched.profilehz = hz;
1538         runtime_unlock(&runtime_sched);
1539
1540         if(hz != 0)
1541                 runtime_resetcpuprofiler(hz);
1542 }