libgo/runtime/proc.c

   1 // Copyright 2009 The Go Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style
   3 // license that can be found in the LICENSE file.
   4
   5 #include <limits.h>
   6 #include <stdlib.h>
   7 #include <pthread.h>
   8 #include <unistd.h>
   9
  10 #include "config.h"
  11 #include "runtime.h"
  12 #include "arch.h"
  13 #include "defs.h"
  14 #include "malloc.h"
  15 #include "go-defer.h"
  16
  17 #ifdef USING_SPLIT_STACK
  18
  19 /* FIXME: These are not declared anywhere.  */
  20
  21 extern void __splitstack_getcontext(void *context[10]);
  22
  23 extern void __splitstack_setcontext(void *context[10]);
  24
  25 extern void *__splitstack_makecontext(size_t, void *context[10], size_t *);
  26
  27 extern void * __splitstack_resetcontext(void *context[10], size_t *);
  28
  29 extern void *__splitstack_find(void *, void *, size_t *, void **, void **,
  30                                void **);
  31
  32 #endif
  33
  34 #if defined(USING_SPLIT_STACK) && defined(LINKER_SUPPORTS_SPLIT_STACK)
  35 # ifdef PTHREAD_STACK_MIN
  36 #  define StackMin PTHREAD_STACK_MIN
  37 # else
  38 #  define StackMin 8192
  39 # endif
  40 #else
  41 # define StackMin 2 * 1024 * 1024
  42 #endif
  43
  44 static void schedule(G*);
  45 static M *startm(void);
  46
  47 typedef struct Sched Sched;
  48
  49 M       runtime_m0;
  50 G       runtime_g0;     // idle goroutine for m0
  51
  52 #ifdef __rtems__
  53 #define __thread
  54 #endif
  55
  56 static __thread G *g;
  57 static __thread M *m;
  58
  59 // We can not always refer to the TLS variables directly.  The
  60 // compiler will call tls_get_addr to get the address of the variable,
  61 // and it may hold it in a register across a call to schedule.  When
  62 // we get back from the call we may be running in a different thread,
  63 // in which case the register now points to the TLS variable for a
  64 // different thread.  We use non-inlinable functions to avoid this
  65 // when necessary.
  66
  67 G* runtime_g(void) __attribute__ ((noinline, no_split_stack));
  68
  69 G*
  70 runtime_g(void)
  71 {
  72         return g;
  73 }
  74
  75 M* runtime_m(void) __attribute__ ((noinline, no_split_stack));
  76
  77 M*
  78 runtime_m(void)
  79 {
  80         return m;
  81 }
  82
  83 int32   runtime_gcwaiting;
  84
  85 // Go scheduler
  86 //
  87 // The go scheduler's job is to match ready-to-run goroutines (`g's)
  88 // with waiting-for-work schedulers (`m's).  If there are ready g's
  89 // and no waiting m's, ready() will start a new m running in a new
  90 // OS thread, so that all ready g's can run simultaneously, up to a limit.
  91 // For now, m's never go away.
  92 //
  93 // By default, Go keeps only one kernel thread (m) running user code
  94 // at a single time; other threads may be blocked in the operating system.
  95 // Setting the environment variable $GOMAXPROCS or calling
  96 // runtime.GOMAXPROCS() will change the number of user threads
  97 // allowed to execute simultaneously.  $GOMAXPROCS is thus an
  98 // approximation of the maximum number of cores to use.
  99 //
 100 // Even a program that can run without deadlock in a single process
 101 // might use more m's if given the chance.  For example, the prime
 102 // sieve will use as many m's as there are primes (up to runtime_sched.mmax),
 103 // allowing different stages of the pipeline to execute in parallel.
 104 // We could revisit this choice, only kicking off new m's for blocking
 105 // system calls, but that would limit the amount of parallel computation
 106 // that go would try to do.
 107 //
 108 // In general, one could imagine all sorts of refinements to the
 109 // scheduler, but the goal now is just to get something working on
 110 // Linux and OS X.
 111
 112 struct Sched {
 113         Lock;
 114
 115         G *gfree;       // available g's (status == Gdead)
 116         int32 goidgen;
 117
 118         G *ghead;       // g's waiting to run
 119         G *gtail;
 120         int32 gwait;    // number of g's waiting to run
 121         int32 gcount;   // number of g's that are alive
 122         int32 grunning; // number of g's running on cpu or in syscall
 123
 124         M *mhead;       // m's waiting for work
 125         int32 mwait;    // number of m's waiting for work
 126         int32 mcount;   // number of m's that have been created
 127
 128         volatile uint32 atomic; // atomic scheduling word (see below)
 129
 130         int32 profilehz;        // cpu profiling rate
 131
 132         Note    stopped;        // one g can set waitstop and wait here for m's to stop
 133 };
 134
 135 // The atomic word in sched is an atomic uint32 that
 136 // holds these fields.
 137 //
 138 //      [15 bits] mcpu          number of m's executing on cpu
 139 //      [15 bits] mcpumax       max number of m's allowed on cpu
 140 //      [1 bit] waitstop        some g is waiting on stopped
 141 //      [1 bit] gwaiting        gwait != 0
 142 //
 143 // These fields are the information needed by entersyscall
 144 // and exitsyscall to decide whether to coordinate with the
 145 // scheduler.  Packing them into a single machine word lets
 146 // them use a fast path with a single atomic read/write and
 147 // no lock/unlock.  This greatly reduces contention in
 148 // syscall- or cgo-heavy multithreaded programs.
 149 //
 150 // Except for entersyscall and exitsyscall, the manipulations
 151 // to these fields only happen while holding the schedlock,
 152 // so the routines holding schedlock only need to worry about
 153 // what entersyscall and exitsyscall do, not the other routines
 154 // (which also use the schedlock).
 155 //
 156 // In particular, entersyscall and exitsyscall only read mcpumax,
 157 // waitstop, and gwaiting.  They never write them.  Thus, writes to those
 158 // fields can be done (holding schedlock) without fear of write conflicts.
 159 // There may still be logic conflicts: for example, the set of waitstop must
 160 // be conditioned on mcpu >= mcpumax or else the wait may be a
 161 // spurious sleep.  The Promela model in proc.p verifies these accesses.
 162 enum {
 163         mcpuWidth = 15,
 164         mcpuMask = (1<<mcpuWidth) - 1,
 165         mcpuShift = 0,
 166         mcpumaxShift = mcpuShift + mcpuWidth,
 167         waitstopShift = mcpumaxShift + mcpuWidth,
 168         gwaitingShift = waitstopShift+1,
 169
 170         // The max value of GOMAXPROCS is constrained
 171         // by the max value we can store in the bit fields
 172         // of the atomic word.  Reserve a few high values
 173         // so that we can detect accidental decrement
 174         // beyond zero.
 175         maxgomaxprocs = mcpuMask - 10,
 176 };
 177
 178 #define atomic_mcpu(v)          (((v)>>mcpuShift)&mcpuMask)
 179 #define atomic_mcpumax(v)       (((v)>>mcpumaxShift)&mcpuMask)
 180 #define atomic_waitstop(v)      (((v)>>waitstopShift)&1)
 181 #define atomic_gwaiting(v)      (((v)>>gwaitingShift)&1)
 182
 183 Sched runtime_sched;
 184 int32 runtime_gomaxprocs;
 185 bool runtime_singleproc;
 186
 187 static bool canaddmcpu(void);
 188
 189 // An m that is waiting for notewakeup(&m->havenextg).  This may
 190 // only be accessed while the scheduler lock is held.  This is used to
 191 // minimize the number of times we call notewakeup while the scheduler
 192 // lock is held, since the m will normally move quickly to lock the
 193 // scheduler itself, producing lock contention.
 194 static M* mwakeup;
 195
 196 // Scheduling helpers.  Sched must be locked.
 197 static void gput(G*);   // put/get on ghead/gtail
 198 static G* gget(void);
 199 static void mput(M*);   // put/get on mhead
 200 static M* mget(G*);
 201 static void gfput(G*);  // put/get on gfree
 202 static G* gfget(void);
 203 static void matchmg(void);      // match m's to g's
 204 static void readylocked(G*);    // ready, but sched is locked
 205 static void mnextg(M*, G*);
 206 static void mcommoninit(M*);
 207
 208 void
 209 setmcpumax(uint32 n)
 210 {
 211         uint32 v, w;
 212
 213         for(;;) {
 214                 v = runtime_sched.atomic;
 215                 w = v;
 216                 w &= ~(mcpuMask<<mcpumaxShift);
 217                 w |= n<<mcpumaxShift;
 218                 if(runtime_cas(&runtime_sched.atomic, v, w))
 219                         break;
 220         }
 221 }
 222
 223 // First function run by a new goroutine.  This replaces gogocall.
 224 static void
 225 kickoff(void)
 226 {
 227         void (*fn)(void*);
 228
 229         fn = (void (*)(void*))(g->entry);
 230         fn(g->param);
 231         runtime_goexit();
 232 }
 233
 234 // Switch context to a different goroutine.  This is like longjmp.
 235 static void runtime_gogo(G*) __attribute__ ((noinline));
 236 static void
 237 runtime_gogo(G* newg)
 238 {
 239 #ifdef USING_SPLIT_STACK
 240         __splitstack_setcontext(&newg->stack_context[0]);
 241 #endif
 242         g = newg;
 243         newg->fromgogo = true;
 244         setcontext(&newg->context);
 245 }
 246
 247 // Save context and call fn passing g as a parameter.  This is like
 248 // setjmp.  Because getcontext always returns 0, unlike setjmp, we use
 249 // g->fromgogo as a code.  It will be true if we got here via
 250 // setcontext.  g == nil the first time this is called in a new m.
 251 static void runtime_mcall(void (*)(G*)) __attribute__ ((noinline));
 252 static void
 253 runtime_mcall(void (*pfn)(G*))
 254 {
 255 #ifndef USING_SPLIT_STACK
 256         int i;
 257 #endif
 258
 259         // Ensure that all registers are on the stack for the garbage
 260         // collector.
 261         __builtin_unwind_init();
 262
 263         if(g == m->g0)
 264                 runtime_throw("runtime: mcall called on m->g0 stack");
 265
 266         if(g != nil) {
 267
 268 #ifdef USING_SPLIT_STACK
 269                 __splitstack_getcontext(&g->stack_context[0]);
 270 #else
 271                 g->gcnext_sp = &i;
 272 #endif
 273                 g->fromgogo = false;
 274                 getcontext(&g->context);
 275         }
 276         if (g == nil || !g->fromgogo) {
 277 #ifdef USING_SPLIT_STACK
 278                 __splitstack_setcontext(&m->g0->stack_context[0]);
 279 #endif
 280                 m->g0->entry = (byte*)pfn;
 281                 m->g0->param = g;
 282                 g = m->g0;
 283                 setcontext(&m->g0->context);
 284                 runtime_throw("runtime: mcall function returned");
 285         }
 286 }
 287
 288 // The bootstrap sequence is:
 289 //
 290 //      call osinit
 291 //      call schedinit
 292 //      make & queue new G
 293 //      call runtime_mstart
 294 //
 295 // The new G does:
 296 //
 297 //      call main_init_function
 298 //      call initdone
 299 //      call main_main
 300 void
 301 runtime_schedinit(void)
 302 {
 303         int32 n;
 304         const byte *p;
 305
 306         m = &runtime_m0;
 307         g = &runtime_g0;
 308         m->g0 = g;
 309         m->curg = g;
 310         g->m = m;
 311
 312         m->nomemprof++;
 313         runtime_mallocinit();
 314         mcommoninit(m);
 315
 316         runtime_goargs();
 317         runtime_goenvs();
 318
 319         // For debugging:
 320         // Allocate internal symbol table representation now,
 321         // so that we don't need to call malloc when we crash.
 322         // runtime_findfunc(0);
 323
 324         runtime_gomaxprocs = 1;
 325         p = runtime_getenv("GOMAXPROCS");
 326         if(p != nil && (n = runtime_atoi(p)) != 0) {
 327                 if(n > maxgomaxprocs)
 328                         n = maxgomaxprocs;
 329                 runtime_gomaxprocs = n;
 330         }
 331         setmcpumax(runtime_gomaxprocs);
 332         runtime_singleproc = runtime_gomaxprocs == 1;
 333
 334         canaddmcpu();   // mcpu++ to account for bootstrap m
 335         m->helpgc = 1;  // flag to tell schedule() to mcpu--
 336         runtime_sched.grunning++;
 337
 338         // Can not enable GC until all roots are registered.
 339         // mstats.enablegc = 1;
 340         m->nomemprof--;
 341 }
 342
 343 // Lock the scheduler.
 344 static void
 345 schedlock(void)
 346 {
 347         runtime_lock(&runtime_sched);
 348 }
 349
 350 // Unlock the scheduler.
 351 static void
 352 schedunlock(void)
 353 {
 354         M *m;
 355
 356         m = mwakeup;
 357         mwakeup = nil;
 358         runtime_unlock(&runtime_sched);
 359         if(m != nil)
 360                 runtime_notewakeup(&m->havenextg);
 361 }
 362
 363 void
 364 runtime_goexit(void)
 365 {
 366         g->status = Gmoribund;
 367         runtime_gosched();
 368 }
 369
 370 void
 371 runtime_goroutineheader(G *g)
 372 {
 373         const char *status;
 374
 375         switch(g->status) {
 376         case Gidle:
 377                 status = "idle";
 378                 break;
 379         case Grunnable:
 380                 status = "runnable";
 381                 break;
 382         case Grunning:
 383                 status = "running";
 384                 break;
 385         case Gsyscall:
 386                 status = "syscall";
 387                 break;
 388         case Gwaiting:
 389                 if(g->waitreason)
 390                         status = g->waitreason;
 391                 else
 392                         status = "waiting";
 393                 break;
 394         case Gmoribund:
 395                 status = "moribund";
 396                 break;
 397         default:
 398                 status = "???";
 399                 break;
 400         }
 401         runtime_printf("goroutine %d [%s]:\n", g->goid, status);
 402 }
 403
 404 void
 405 runtime_tracebackothers(G *me)
 406 {
 407         G *g;
 408
 409         for(g = runtime_allg; g != nil; g = g->alllink) {
 410                 if(g == me || g->status == Gdead)
 411                         continue;
 412                 runtime_printf("\n");
 413                 runtime_goroutineheader(g);
 414                 // runtime_traceback(g->sched.pc, g->sched.sp, 0, g);
 415         }
 416 }
 417
 418 // Mark this g as m's idle goroutine.
 419 // This functionality might be used in environments where programs
 420 // are limited to a single thread, to simulate a select-driven
 421 // network server.  It is not exposed via the standard runtime API.
 422 void
 423 runtime_idlegoroutine(void)
 424 {
 425         if(g->idlem != nil)
 426                 runtime_throw("g is already an idle goroutine");
 427         g->idlem = m;
 428 }
 429
 430 static void
 431 mcommoninit(M *m)
 432 {
 433         // Add to runtime_allm so garbage collector doesn't free m
 434         // when it is just in a register or thread-local storage.
 435         m->alllink = runtime_allm;
 436         // runtime_Cgocalls() iterates over allm w/o schedlock,
 437         // so we need to publish it safely.
 438         runtime_atomicstorep((void**)&runtime_allm, m);
 439
 440         m->id = runtime_sched.mcount++;
 441         m->fastrand = 0x49f6428aUL + m->id;
 442
 443         if(m->mcache == nil)
 444                 m->mcache = runtime_allocmcache();
 445 }
 446
 447 // Try to increment mcpu.  Report whether succeeded.
 448 static bool
 449 canaddmcpu(void)
 450 {
 451         uint32 v;
 452
 453         for(;;) {
 454                 v = runtime_sched.atomic;
 455                 if(atomic_mcpu(v) >= atomic_mcpumax(v))
 456                         return 0;
 457                 if(runtime_cas(&runtime_sched.atomic, v, v+(1<<mcpuShift)))
 458                         return 1;
 459         }
 460 }
 461
 462 // Put on `g' queue.  Sched must be locked.
 463 static void
 464 gput(G *g)
 465 {
 466         M *m;
 467
 468         // If g is wired, hand it off directly.
 469         if((m = g->lockedm) != nil && canaddmcpu()) {
 470                 mnextg(m, g);
 471                 return;
 472         }
 473
 474         // If g is the idle goroutine for an m, hand it off.
 475         if(g->idlem != nil) {
 476                 if(g->idlem->idleg != nil) {
 477                         runtime_printf("m%d idle out of sync: g%d g%d\n",
 478                                 g->idlem->id,
 479                                 g->idlem->idleg->goid, g->goid);
 480                         runtime_throw("runtime: double idle");
 481                 }
 482                 g->idlem->idleg = g;
 483                 return;
 484         }
 485
 486         g->schedlink = nil;
 487         if(runtime_sched.ghead == nil)
 488                 runtime_sched.ghead = g;
 489         else
 490                 runtime_sched.gtail->schedlink = g;
 491         runtime_sched.gtail = g;
 492
 493         // increment gwait.
 494         // if it transitions to nonzero, set atomic gwaiting bit.
 495         if(runtime_sched.gwait++ == 0)
 496                 runtime_xadd(&runtime_sched.atomic, 1<<gwaitingShift);
 497 }
 498
 499 // Report whether gget would return something.
 500 static bool
 501 haveg(void)
 502 {
 503         return runtime_sched.ghead != nil || m->idleg != nil;
 504 }
 505
 506 // Get from `g' queue.  Sched must be locked.
 507 static G*
 508 gget(void)
 509 {
 510         G *g;
 511
 512         g = runtime_sched.ghead;
 513         if(g){
 514                 runtime_sched.ghead = g->schedlink;
 515                 if(runtime_sched.ghead == nil)
 516                         runtime_sched.gtail = nil;
 517                 // decrement gwait.
 518                 // if it transitions to zero, clear atomic gwaiting bit.
 519                 if(--runtime_sched.gwait == 0)
 520                         runtime_xadd(&runtime_sched.atomic, -1<<gwaitingShift);
 521         } else if(m->idleg != nil) {
 522                 g = m->idleg;
 523                 m->idleg = nil;
 524         }
 525         return g;
 526 }
 527
 528 // Put on `m' list.  Sched must be locked.
 529 static void
 530 mput(M *m)
 531 {
 532         m->schedlink = runtime_sched.mhead;
 533         runtime_sched.mhead = m;
 534         runtime_sched.mwait++;
 535 }
 536
 537 // Get an `m' to run `g'.  Sched must be locked.
 538 static M*
 539 mget(G *g)
 540 {
 541         M *m;
 542
 543         // if g has its own m, use it.
 544         if(g && (m = g->lockedm) != nil)
 545                 return m;
 546
 547         // otherwise use general m pool.
 548         if((m = runtime_sched.mhead) != nil){
 549                 runtime_sched.mhead = m->schedlink;
 550                 runtime_sched.mwait--;
 551         }
 552         return m;
 553 }
 554
 555 // Mark g ready to run.
 556 void
 557 runtime_ready(G *g)
 558 {
 559         schedlock();
 560         readylocked(g);
 561         schedunlock();
 562 }
 563
 564 // Mark g ready to run.  Sched is already locked.
 565 // G might be running already and about to stop.
 566 // The sched lock protects g->status from changing underfoot.
 567 static void
 568 readylocked(G *g)
 569 {
 570         if(g->m){
 571                 // Running on another machine.
 572                 // Ready it when it stops.
 573                 g->readyonstop = 1;
 574                 return;
 575         }
 576
 577         // Mark runnable.
 578         if(g->status == Grunnable || g->status == Grunning) {
 579                 runtime_printf("goroutine %d has status %d\n", g->goid, g->status);
 580                 runtime_throw("bad g->status in ready");
 581         }
 582         g->status = Grunnable;
 583
 584         gput(g);
 585         matchmg();
 586 }
 587
 588 // Same as readylocked but a different symbol so that
 589 // debuggers can set a breakpoint here and catch all
 590 // new goroutines.
 591 static void
 592 newprocreadylocked(G *g)
 593 {
 594         readylocked(g);
 595 }
 596
 597 // Pass g to m for running.
 598 // Caller has already incremented mcpu.
 599 static void
 600 mnextg(M *m, G *g)
 601 {
 602         runtime_sched.grunning++;
 603         m->nextg = g;
 604         if(m->waitnextg) {
 605                 m->waitnextg = 0;
 606                 if(mwakeup != nil)
 607                         runtime_notewakeup(&mwakeup->havenextg);
 608                 mwakeup = m;
 609         }
 610 }
 611
 612 // Get the next goroutine that m should run.
 613 // Sched must be locked on entry, is unlocked on exit.
 614 // Makes sure that at most $GOMAXPROCS g's are
 615 // running on cpus (not in system calls) at any given time.
 616 static G*
 617 nextgandunlock(void)
 618 {
 619         G *gp;
 620         uint32 v;
 621
 622 top:
 623         if(atomic_mcpu(runtime_sched.atomic) >= maxgomaxprocs)
 624                 runtime_throw("negative mcpu");
 625
 626         // If there is a g waiting as m->nextg, the mcpu++
 627         // happened before it was passed to mnextg.
 628         if(m->nextg != nil) {
 629                 gp = m->nextg;
 630                 m->nextg = nil;
 631                 schedunlock();
 632                 return gp;
 633         }
 634
 635         if(m->lockedg != nil) {
 636                 // We can only run one g, and it's not available.
 637                 // Make sure some other cpu is running to handle
 638                 // the ordinary run queue.
 639                 if(runtime_sched.gwait != 0) {
 640                         matchmg();
 641                         // m->lockedg might have been on the queue.
 642                         if(m->nextg != nil) {
 643                                 gp = m->nextg;
 644                                 m->nextg = nil;
 645                                 schedunlock();
 646                                 return gp;
 647                         }
 648                 }
 649         } else {
 650                 // Look for work on global queue.
 651                 while(haveg() && canaddmcpu()) {
 652                         gp = gget();
 653                         if(gp == nil)
 654                                 runtime_throw("gget inconsistency");
 655
 656                         if(gp->lockedm) {
 657                                 mnextg(gp->lockedm, gp);
 658                                 continue;
 659                         }
 660                         runtime_sched.grunning++;
 661                         schedunlock();
 662                         return gp;
 663                 }
 664
 665                 // The while loop ended either because the g queue is empty
 666                 // or because we have maxed out our m procs running go
 667                 // code (mcpu >= mcpumax).  We need to check that
 668                 // concurrent actions by entersyscall/exitsyscall cannot
 669                 // invalidate the decision to end the loop.
 670                 //
 671                 // We hold the sched lock, so no one else is manipulating the
 672                 // g queue or changing mcpumax.  Entersyscall can decrement
 673                 // mcpu, but if does so when there is something on the g queue,
 674                 // the gwait bit will be set, so entersyscall will take the slow path
 675                 // and use the sched lock.  So it cannot invalidate our decision.
 676                 //
 677                 // Wait on global m queue.
 678                 mput(m);
 679         }
 680
 681         v = runtime_atomicload(&runtime_sched.atomic);
 682         if(runtime_sched.grunning == 0)
 683                 runtime_throw("all goroutines are asleep - deadlock!");
 684         m->nextg = nil;
 685         m->waitnextg = 1;
 686         runtime_noteclear(&m->havenextg);
 687
 688         // Stoptheworld is waiting for all but its cpu to go to stop.
 689         // Entersyscall might have decremented mcpu too, but if so
 690         // it will see the waitstop and take the slow path.
 691         // Exitsyscall never increments mcpu beyond mcpumax.
 692         if(atomic_waitstop(v) && atomic_mcpu(v) <= atomic_mcpumax(v)) {
 693                 // set waitstop = 0 (known to be 1)
 694                 runtime_xadd(&runtime_sched.atomic, -1<<waitstopShift);
 695                 runtime_notewakeup(&runtime_sched.stopped);
 696         }
 697         schedunlock();
 698
 699         runtime_notesleep(&m->havenextg);
 700         if(m->helpgc) {
 701                 runtime_gchelper();
 702                 m->helpgc = 0;
 703                 runtime_lock(&runtime_sched);
 704                 goto top;
 705         }
 706         if((gp = m->nextg) == nil)
 707                 runtime_throw("bad m->nextg in nextgoroutine");
 708         m->nextg = nil;
 709         return gp;
 710 }
 711
 712 int32
 713 runtime_helpgc(bool *extra)
 714 {
 715         M *mp;
 716         int32 n, max;
 717
 718         // Figure out how many CPUs to use.
 719         // Limited by gomaxprocs, number of actual CPUs, and MaxGcproc.
 720         max = runtime_gomaxprocs;
 721         if(max > runtime_ncpu)
 722                 max = runtime_ncpu > 0 ? runtime_ncpu : 1;
 723         if(max > MaxGcproc)
 724                 max = MaxGcproc;
 725
 726         // We're going to use one CPU no matter what.
 727         // Figure out the max number of additional CPUs.
 728         max--;
 729
 730         runtime_lock(&runtime_sched);
 731         n = 0;
 732         while(n < max && (mp = mget(nil)) != nil) {
 733                 n++;
 734                 mp->helpgc = 1;
 735                 mp->waitnextg = 0;
 736                 runtime_notewakeup(&mp->havenextg);
 737         }
 738         runtime_unlock(&runtime_sched);
 739         if(extra)
 740                 *extra = n != max;
 741         return n;
 742 }
 743
 744 void
 745 runtime_stoptheworld(void)
 746 {
 747         uint32 v;
 748
 749         schedlock();
 750         runtime_gcwaiting = 1;
 751
 752         setmcpumax(1);
 753
 754         // while mcpu > 1
 755         for(;;) {
 756                 v = runtime_sched.atomic;
 757                 if(atomic_mcpu(v) <= 1)
 758                         break;
 759
 760                 // It would be unsafe for multiple threads to be using
 761                 // the stopped note at once, but there is only
 762                 // ever one thread doing garbage collection.
 763                 runtime_noteclear(&runtime_sched.stopped);
 764                 if(atomic_waitstop(v))
 765                         runtime_throw("invalid waitstop");
 766
 767                 // atomic { waitstop = 1 }, predicated on mcpu <= 1 check above
 768                 // still being true.
 769                 if(!runtime_cas(&runtime_sched.atomic, v, v+(1<<waitstopShift)))
 770                         continue;
 771
 772                 schedunlock();
 773                 runtime_notesleep(&runtime_sched.stopped);
 774                 schedlock();
 775         }
 776         runtime_singleproc = runtime_gomaxprocs == 1;
 777         schedunlock();
 778 }
 779
 780 void
 781 runtime_starttheworld(bool extra)
 782 {
 783         M *m;
 784
 785         schedlock();
 786         runtime_gcwaiting = 0;
 787         setmcpumax(runtime_gomaxprocs);
 788         matchmg();
 789         if(extra && canaddmcpu()) {
 790                 // Start a new m that will (we hope) be idle
 791                 // and so available to help when the next
 792                 // garbage collection happens.
 793                 // canaddmcpu above did mcpu++
 794                 // (necessary, because m will be doing various
 795                 // initialization work so is definitely running),
 796                 // but m is not running a specific goroutine,
 797                 // so set the helpgc flag as a signal to m's
 798                 // first schedule(nil) to mcpu-- and grunning--.
 799                 m = startm();
 800                 m->helpgc = 1;
 801                 runtime_sched.grunning++;
 802         }
 803         schedunlock();
 804 }
 805
 806 // Called to start an M.
 807 void*
 808 runtime_mstart(void* mp)
 809 {
 810         m = (M*)mp;
 811         g = m->g0;
 812
 813         g->entry = nil;
 814         g->param = nil;
 815
 816         // Record top of stack for use by mcall.
 817         // Once we call schedule we're never coming back,
 818         // so other calls can reuse this stack space.
 819 #ifdef USING_SPLIT_STACK
 820         __splitstack_getcontext(&g->stack_context[0]);
 821 #else
 822         g->gcinitial_sp = &mp;
 823         g->gcstack_size = StackMin;
 824         g->gcnext_sp = &mp;
 825 #endif
 826         getcontext(&g->context);
 827
 828         if(g->entry != nil) {
 829                 // Got here from mcall.
 830                 void (*pfn)(G*) = (void (*)(G*))g->entry;
 831                 G* gp = (G*)g->param;
 832                 pfn(gp);
 833                 *(int*)0x21 = 0x21;
 834         }
 835         runtime_minit();
 836         schedule(nil);
 837         return nil;
 838 }
 839
 840 typedef struct CgoThreadStart CgoThreadStart;
 841 struct CgoThreadStart
 842 {
 843         M *m;
 844         G *g;
 845         void (*fn)(void);
 846 };
 847
 848 // Kick off new m's as needed (up to mcpumax).
 849 // There are already `other' other cpus that will
 850 // start looking for goroutines shortly.
 851 // Sched is locked.
 852 static void
 853 matchmg(void)
 854 {
 855         G *gp;
 856         M *mp;
 857
 858         if(m->mallocing || m->gcing)
 859                 return;
 860
 861         while(haveg() && canaddmcpu()) {
 862                 gp = gget();
 863                 if(gp == nil)
 864                         runtime_throw("gget inconsistency");
 865
 866                 // Find the m that will run gp.
 867                 if((mp = mget(gp)) == nil)
 868                         mp = startm();
 869                 mnextg(mp, gp);
 870         }
 871 }
 872
 873 static M*
 874 startm(void)
 875 {
 876         M *m;
 877         pthread_attr_t attr;
 878         pthread_t tid;
 879
 880         m = runtime_malloc(sizeof(M));
 881         mcommoninit(m);
 882         m->g0 = runtime_malg(-1, nil, nil);
 883
 884         if(pthread_attr_init(&attr) != 0)
 885                 runtime_throw("pthread_attr_init");
 886         if(pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED) != 0)
 887                 runtime_throw("pthread_attr_setdetachstate");
 888
 889 #ifndef PTHREAD_STACK_MIN
 890 #define PTHREAD_STACK_MIN 8192
 891 #endif
 892         if(pthread_attr_setstacksize(&attr, PTHREAD_STACK_MIN) != 0)
 893                 runtime_throw("pthread_attr_setstacksize");
 894
 895         if(pthread_create(&tid, &attr, runtime_mstart, m) != 0)
 896                 runtime_throw("pthread_create");
 897
 898         return m;
 899 }
 900
 901 // One round of scheduler: find a goroutine and run it.
 902 // The argument is the goroutine that was running before
 903 // schedule was called, or nil if this is the first call.
 904 // Never returns.
 905 static void
 906 schedule(G *gp)
 907 {
 908         int32 hz;
 909         uint32 v;
 910
 911         schedlock();
 912         if(gp != nil) {
 913                 // Just finished running gp.
 914                 gp->m = nil;
 915                 runtime_sched.grunning--;
 916
 917                 // atomic { mcpu-- }
 918                 v = runtime_xadd(&runtime_sched.atomic, -1<<mcpuShift);
 919                 if(atomic_mcpu(v) > maxgomaxprocs)
 920                         runtime_throw("negative mcpu in scheduler");
 921
 922                 switch(gp->status){
 923                 case Grunnable:
 924                 case Gdead:
 925                         // Shouldn't have been running!
 926                         runtime_throw("bad gp->status in sched");
 927                 case Grunning:
 928                         gp->status = Grunnable;
 929                         gput(gp);
 930                         break;
 931                 case Gmoribund:
 932                         gp->status = Gdead;
 933                         if(gp->lockedm) {
 934                                 gp->lockedm = nil;
 935                                 m->lockedg = nil;
 936                         }
 937                         gp->idlem = nil;
 938                         gfput(gp);
 939                         if(--runtime_sched.gcount == 0)
 940                                 runtime_exit(0);
 941                         break;
 942                 }
 943                 if(gp->readyonstop){
 944                         gp->readyonstop = 0;
 945                         readylocked(gp);
 946                 }
 947         } else if(m->helpgc) {
 948                 // Bootstrap m or new m started by starttheworld.
 949                 // atomic { mcpu-- }
 950                 v = runtime_xadd(&runtime_sched.atomic, -1<<mcpuShift);
 951                 if(atomic_mcpu(v) > maxgomaxprocs)
 952                         runtime_throw("negative mcpu in scheduler");
 953                 // Compensate for increment in starttheworld().
 954                 runtime_sched.grunning--;
 955                 m->helpgc = 0;
 956         } else if(m->nextg != nil) {
 957                 // New m started by matchmg.
 958         } else {
 959                 runtime_throw("invalid m state in scheduler");
 960         }
 961
 962         // Find (or wait for) g to run.  Unlocks runtime_sched.
 963         gp = nextgandunlock();
 964         gp->readyonstop = 0;
 965         gp->status = Grunning;
 966         m->curg = gp;
 967         gp->m = m;
 968
 969         // Check whether the profiler needs to be turned on or off.
 970         hz = runtime_sched.profilehz;
 971         if(m->profilehz != hz)
 972                 runtime_resetcpuprofiler(hz);
 973
 974         runtime_gogo(gp);
 975 }
 976
 977 // Enter scheduler.  If g->status is Grunning,
 978 // re-queues g and runs everyone else who is waiting
 979 // before running g again.  If g->status is Gmoribund,
 980 // kills off g.
 981 void
 982 runtime_gosched(void)
 983 {
 984         if(m->locks != 0)
 985                 runtime_throw("gosched holding locks");
 986         if(g == m->g0)
 987                 runtime_throw("gosched of g0");
 988         runtime_mcall(schedule);
 989 }
 990
 991 // The goroutine g is about to enter a system call.
 992 // Record that it's not using the cpu anymore.
 993 // This is called only from the go syscall library and cgocall,
 994 // not from the low-level system calls used by the runtime.
 995 //
 996 // Entersyscall cannot split the stack: the runtime_gosave must
 997 // make g->sched refer to the caller's stack segment, because
 998 // entersyscall is going to return immediately after.
 999 // It's okay to call matchmg and notewakeup even after
1000 // decrementing mcpu, because we haven't released the
1001 // sched lock yet, so the garbage collector cannot be running.
1002
1003 void runtime_entersyscall(void) __attribute__ ((no_split_stack));
1004
1005 void
1006 runtime_entersyscall(void)
1007 {
1008         uint32 v;
1009
1010         // Leave SP around for gc and traceback.
1011 #ifdef USING_SPLIT_STACK
1012         g->gcstack = __splitstack_find(NULL, NULL, &g->gcstack_size,
1013                                        &g->gcnext_segment, &g->gcnext_sp,
1014                                        &g->gcinitial_sp);
1015 #else
1016         g->gcnext_sp = (byte *) &v;
1017 #endif
1018
1019         // Save the registers in the g structure so that any pointers
1020         // held in registers will be seen by the garbage collector.
1021         // We could use getcontext here, but setjmp is more efficient
1022         // because it doesn't need to save the signal mask.
1023         setjmp(g->gcregs);
1024
1025         g->status = Gsyscall;
1026
1027         // Fast path.
1028         // The slow path inside the schedlock/schedunlock will get
1029         // through without stopping if it does:
1030         //      mcpu--
1031         //      gwait not true
1032         //      waitstop && mcpu <= mcpumax not true
1033         // If we can do the same with a single atomic add,
1034         // then we can skip the locks.
1035         v = runtime_xadd(&runtime_sched.atomic, -1<<mcpuShift);
1036         if(!atomic_gwaiting(v) && (!atomic_waitstop(v) || atomic_mcpu(v) > atomic_mcpumax(v)))
1037                 return;
1038
1039         schedlock();
1040         v = runtime_atomicload(&runtime_sched.atomic);
1041         if(atomic_gwaiting(v)) {
1042                 matchmg();
1043                 v = runtime_atomicload(&runtime_sched.atomic);
1044         }
1045         if(atomic_waitstop(v) && atomic_mcpu(v) <= atomic_mcpumax(v)) {
1046                 runtime_xadd(&runtime_sched.atomic, -1<<waitstopShift);
1047                 runtime_notewakeup(&runtime_sched.stopped);
1048         }
1049
1050         schedunlock();
1051 }
1052
1053 // The goroutine g exited its system call.
1054 // Arrange for it to run on a cpu again.
1055 // This is called only from the go syscall library, not
1056 // from the low-level system calls used by the runtime.
1057 void
1058 runtime_exitsyscall(void)
1059 {
1060         G *gp;
1061         uint32 v;
1062
1063         // Fast path.
1064         // If we can do the mcpu++ bookkeeping and
1065         // find that we still have mcpu <= mcpumax, then we can
1066         // start executing Go code immediately, without having to
1067         // schedlock/schedunlock.
1068         gp = g;
1069         v = runtime_xadd(&runtime_sched.atomic, (1<<mcpuShift));
1070         if(m->profilehz == runtime_sched.profilehz && atomic_mcpu(v) <= atomic_mcpumax(v)) {
1071                 // There's a cpu for us, so we can run.
1072                 gp->status = Grunning;
1073                 // Garbage collector isn't running (since we are),
1074                 // so okay to clear gcstack.
1075 #ifdef USING_SPLIT_STACK
1076                 gp->gcstack = nil;
1077 #endif
1078                 gp->gcnext_sp = nil;
1079                 runtime_memclr(gp->gcregs, sizeof gp->gcregs);
1080                 return;
1081         }
1082
1083         // Tell scheduler to put g back on the run queue:
1084         // mostly equivalent to g->status = Grunning,
1085         // but keeps the garbage collector from thinking
1086         // that g is running right now, which it's not.
1087         gp->readyonstop = 1;
1088
1089         // All the cpus are taken.
1090         // The scheduler will ready g and put this m to sleep.
1091         // When the scheduler takes g away from m,
1092         // it will undo the runtime_sched.mcpu++ above.
1093         runtime_gosched();
1094
1095         // Gosched returned, so we're allowed to run now.
1096         // Delete the gcstack information that we left for
1097         // the garbage collector during the system call.
1098         // Must wait until now because until gosched returns
1099         // we don't know for sure that the garbage collector
1100         // is not running.
1101 #ifdef USING_SPLIT_STACK
1102         gp->gcstack = nil;
1103 #endif
1104         gp->gcnext_sp = nil;
1105         runtime_memclr(gp->gcregs, sizeof gp->gcregs);
1106 }
1107
1108 G*
1109 runtime_malg(int32 stacksize, byte** ret_stack, size_t* ret_stacksize)
1110 {
1111         G *newg;
1112
1113         newg = runtime_malloc(sizeof(G));
1114         if(stacksize >= 0) {
1115 #if USING_SPLIT_STACK
1116                 *ret_stack = __splitstack_makecontext(stacksize,
1117                                                       &newg->stack_context[0],
1118                                                       ret_stacksize);
1119 #else
1120                 *ret_stack = runtime_mallocgc(stacksize, FlagNoProfiling|FlagNoGC, 0, 0);
1121                 *ret_stacksize = stacksize;
1122                 newg->gcinitial_sp = *ret_stack;
1123                 newg->gcstack_size = stacksize;
1124 #endif
1125         }
1126         return newg;
1127 }
1128
1129 G*
1130 __go_go(void (*fn)(void*), void* arg)
1131 {
1132         byte *sp;
1133         size_t spsize;
1134         G * volatile newg;      // volatile to avoid longjmp warning
1135
1136         schedlock();
1137
1138         if((newg = gfget()) != nil){
1139 #ifdef USING_SPLIT_STACK
1140                 sp = __splitstack_resetcontext(&newg->stack_context[0],
1141                                                &spsize);
1142 #else
1143                 sp = newg->gcinitial_sp;
1144                 spsize = newg->gcstack_size;
1145                 newg->gcnext_sp = sp;
1146 #endif
1147         } else {
1148                 newg = runtime_malg(StackMin, &sp, &spsize);
1149                 if(runtime_lastg == nil)
1150                         runtime_allg = newg;
1151                 else
1152                         runtime_lastg->alllink = newg;
1153                 runtime_lastg = newg;
1154         }
1155         newg->status = Gwaiting;
1156         newg->waitreason = "new goroutine";
1157
1158         newg->entry = (byte*)fn;
1159         newg->param = arg;
1160         newg->gopc = (uintptr)__builtin_return_address(0);
1161
1162         runtime_sched.gcount++;
1163         runtime_sched.goidgen++;
1164         newg->goid = runtime_sched.goidgen;
1165
1166         if(sp == nil)
1167                 runtime_throw("nil g->stack0");
1168
1169         getcontext(&newg->context);
1170         newg->context.uc_stack.ss_sp = sp;
1171         newg->context.uc_stack.ss_size = spsize;
1172         makecontext(&newg->context, kickoff, 0);
1173
1174         newprocreadylocked(newg);
1175         schedunlock();
1176
1177         return newg;
1178 //printf(" goid=%d\n", newg->goid);
1179 }
1180
1181 // Put on gfree list.  Sched must be locked.
1182 static void
1183 gfput(G *g)
1184 {
1185         g->schedlink = runtime_sched.gfree;
1186         runtime_sched.gfree = g;
1187 }
1188
1189 // Get from gfree list.  Sched must be locked.
1190 static G*
1191 gfget(void)
1192 {
1193         G *g;
1194
1195         g = runtime_sched.gfree;
1196         if(g)
1197                 runtime_sched.gfree = g->schedlink;
1198         return g;
1199 }
1200
1201 // Run all deferred functions for the current goroutine.
1202 static void
1203 rundefer(void)
1204 {
1205         Defer *d;
1206
1207         while((d = g->defer) != nil) {
1208                 void (*pfn)(void*);
1209
1210                 pfn = d->__pfn;
1211                 d->__pfn = nil;
1212                 if (pfn != nil)
1213                         (*pfn)(d->__arg);
1214                 g->defer = d->__next;
1215                 runtime_free(d);
1216         }
1217 }
1218
1219 void runtime_Goexit (void) asm ("libgo_runtime.runtime.Goexit");
1220
1221 void
1222 runtime_Goexit(void)
1223 {
1224         rundefer();
1225         runtime_goexit();
1226 }
1227
1228 void runtime_Gosched (void) asm ("libgo_runtime.runtime.Gosched");
1229
1230 void
1231 runtime_Gosched(void)
1232 {
1233         runtime_gosched();
1234 }
1235
1236 void runtime_LockOSThread (void)
1237   __asm__ ("libgo_runtime.runtime.LockOSThread");
1238
1239 void
1240 runtime_LockOSThread(void)
1241 {
1242         m->lockedg = g;
1243         g->lockedm = m;
1244 }
1245
1246 // delete when scheduler is stronger
1247 int32
1248 runtime_gomaxprocsfunc(int32 n)
1249 {
1250         int32 ret;
1251         uint32 v;
1252
1253         schedlock();
1254         ret = runtime_gomaxprocs;
1255         if(n <= 0)
1256                 n = ret;
1257         if(n > maxgomaxprocs)
1258                 n = maxgomaxprocs;
1259         runtime_gomaxprocs = n;
1260         if(runtime_gomaxprocs > 1)
1261                 runtime_singleproc = false;
1262         if(runtime_gcwaiting != 0) {
1263                 if(atomic_mcpumax(runtime_sched.atomic) != 1)
1264                         runtime_throw("invalid mcpumax during gc");
1265                 schedunlock();
1266                 return ret;
1267         }
1268
1269         setmcpumax(n);
1270
1271         // If there are now fewer allowed procs
1272         // than procs running, stop.
1273         v = runtime_atomicload(&runtime_sched.atomic);
1274         if((int32)atomic_mcpu(v) > n) {
1275                 schedunlock();
1276                 runtime_gosched();
1277                 return ret;
1278         }
1279         // handle more procs
1280         matchmg();
1281         schedunlock();
1282         return ret;
1283 }
1284
1285 void runtime_UnlockOSThread (void)
1286   __asm__ ("libgo_runtime.runtime.UnlockOSThread");
1287
1288 void
1289 runtime_UnlockOSThread(void)
1290 {
1291         m->lockedg = nil;
1292         g->lockedm = nil;
1293 }
1294
1295 bool
1296 runtime_lockedOSThread(void)
1297 {
1298         return g->lockedm != nil && m->lockedg != nil;
1299 }
1300
1301 // for testing of wire, unwire
1302 uint32
1303 runtime_mid()
1304 {
1305         return m->id;
1306 }
1307
1308 int32 runtime_Goroutines (void)
1309   __asm__ ("libgo_runtime.runtime.Goroutines");
1310
1311 int32
1312 runtime_Goroutines()
1313 {
1314         return runtime_sched.gcount;
1315 }
1316
1317 int32
1318 runtime_mcount(void)
1319 {
1320         return runtime_sched.mcount;
1321 }
1322
1323 static struct {
1324         Lock;
1325         void (*fn)(uintptr*, int32);
1326         int32 hz;
1327         uintptr pcbuf[100];
1328 } prof;
1329
1330 void
1331 runtime_sigprof(uint8 *pc __attribute__ ((unused)),
1332                 uint8 *sp __attribute__ ((unused)),
1333                 uint8 *lr __attribute__ ((unused)),
1334                 G *gp __attribute__ ((unused)))
1335 {
1336         // int32 n;
1337
1338         if(prof.fn == nil || prof.hz == 0)
1339                 return;
1340
1341         runtime_lock(&prof);
1342         if(prof.fn == nil) {
1343                 runtime_unlock(&prof);
1344                 return;
1345         }
1346         // n = runtime_gentraceback(pc, sp, lr, gp, 0, prof.pcbuf, nelem(prof.pcbuf));
1347         // if(n > 0)
1348         //      prof.fn(prof.pcbuf, n);
1349         runtime_unlock(&prof);
1350 }
1351
1352 void
1353 runtime_setcpuprofilerate(void (*fn)(uintptr*, int32), int32 hz)
1354 {
1355         // Force sane arguments.
1356         if(hz < 0)
1357                 hz = 0;
1358         if(hz == 0)
1359                 fn = nil;
1360         if(fn == nil)
1361                 hz = 0;
1362
1363         // Stop profiler on this cpu so that it is safe to lock prof.
1364         // if a profiling signal came in while we had prof locked,
1365         // it would deadlock.
1366         runtime_resetcpuprofiler(0);
1367
1368         runtime_lock(&prof);
1369         prof.fn = fn;
1370         prof.hz = hz;
1371         runtime_unlock(&prof);
1372         runtime_lock(&runtime_sched);
1373         runtime_sched.profilehz = hz;
1374         runtime_unlock(&runtime_sched);
1375
1376         if(hz != 0)
1377                 runtime_resetcpuprofiler(hz);
1378 }