libgo/runtime/proc.c

   1 // Copyright 2009 The Go Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style
   3 // license that can be found in the LICENSE file.
   4
   5 #include <limits.h>
   6 #include <stdlib.h>
   7 #include <pthread.h>
   8 #include <unistd.h>
   9
  10 #include "config.h"
  11 #include "runtime.h"
  12 #include "arch.h"
  13 #include "defs.h"
  14 #include "malloc.h"
  15 #include "go-defer.h"
  16
  17 #ifdef USING_SPLIT_STACK
  18
  19 /* FIXME: These are not declared anywhere.  */
  20
  21 extern void __splitstack_getcontext(void *context[10]);
  22
  23 extern void __splitstack_setcontext(void *context[10]);
  24
  25 extern void *__splitstack_makecontext(size_t, void *context[10], size_t *);
  26
  27 extern void * __splitstack_resetcontext(void *context[10], size_t *);
  28
  29 extern void *__splitstack_find(void *, void *, size_t *, void **, void **,
  30                                void **);
  31
  32 #endif
  33
  34 #if defined(USING_SPLIT_STACK) && defined(LINKER_SUPPORTS_SPLIT_STACK)
  35 # ifdef PTHREAD_STACK_MIN
  36 #  define StackMin PTHREAD_STACK_MIN
  37 # else
  38 #  define StackMin 8192
  39 # endif
  40 #else
  41 # define StackMin 2 * 1024 * 1024
  42 #endif
  43
  44 static void schedule(G*);
  45 static M *startm(void);
  46
  47 typedef struct Sched Sched;
  48
  49 M       runtime_m0;
  50 G       runtime_g0;     // idle goroutine for m0
  51
  52 #ifdef __rtems__
  53 #define __thread
  54 #endif
  55
  56 static __thread G *g;
  57 static __thread M *m;
  58
  59 // We can not always refer to the TLS variables directly.  The
  60 // compiler will call tls_get_addr to get the address of the variable,
  61 // and it may hold it in a register across a call to schedule.  When
  62 // we get back from the call we may be running in a different thread,
  63 // in which case the register now points to the TLS variable for a
  64 // different thread.  We use non-inlinable functions to avoid this
  65 // when necessary.
  66
  67 G* runtime_g(void) __attribute__ ((noinline, no_split_stack));
  68
  69 G*
  70 runtime_g(void)
  71 {
  72         return g;
  73 }
  74
  75 M* runtime_m(void) __attribute__ ((noinline, no_split_stack));
  76
  77 M*
  78 runtime_m(void)
  79 {
  80         return m;
  81 }
  82
  83 int32   runtime_gcwaiting;
  84
  85 // Go scheduler
  86 //
  87 // The go scheduler's job is to match ready-to-run goroutines (`g's)
  88 // with waiting-for-work schedulers (`m's).  If there are ready g's
  89 // and no waiting m's, ready() will start a new m running in a new
  90 // OS thread, so that all ready g's can run simultaneously, up to a limit.
  91 // For now, m's never go away.
  92 //
  93 // By default, Go keeps only one kernel thread (m) running user code
  94 // at a single time; other threads may be blocked in the operating system.
  95 // Setting the environment variable $GOMAXPROCS or calling
  96 // runtime.GOMAXPROCS() will change the number of user threads
  97 // allowed to execute simultaneously.  $GOMAXPROCS is thus an
  98 // approximation of the maximum number of cores to use.
  99 //
 100 // Even a program that can run without deadlock in a single process
 101 // might use more m's if given the chance.  For example, the prime
 102 // sieve will use as many m's as there are primes (up to runtime_sched.mmax),
 103 // allowing different stages of the pipeline to execute in parallel.
 104 // We could revisit this choice, only kicking off new m's for blocking
 105 // system calls, but that would limit the amount of parallel computation
 106 // that go would try to do.
 107 //
 108 // In general, one could imagine all sorts of refinements to the
 109 // scheduler, but the goal now is just to get something working on
 110 // Linux and OS X.
 111
 112 struct Sched {
 113         Lock;
 114
 115         G *gfree;       // available g's (status == Gdead)
 116         int32 goidgen;
 117
 118         G *ghead;       // g's waiting to run
 119         G *gtail;
 120         int32 gwait;    // number of g's waiting to run
 121         int32 gcount;   // number of g's that are alive
 122         int32 grunning; // number of g's running on cpu or in syscall
 123
 124         M *mhead;       // m's waiting for work
 125         int32 mwait;    // number of m's waiting for work
 126         int32 mcount;   // number of m's that have been created
 127
 128         volatile uint32 atomic; // atomic scheduling word (see below)
 129
 130         int32 profilehz;        // cpu profiling rate
 131
 132         bool init;  // running initialization
 133         bool lockmain;  // init called runtime.LockOSThread
 134
 135         Note    stopped;        // one g can set waitstop and wait here for m's to stop
 136 };
 137
 138 // The atomic word in sched is an atomic uint32 that
 139 // holds these fields.
 140 //
 141 //      [15 bits] mcpu          number of m's executing on cpu
 142 //      [15 bits] mcpumax       max number of m's allowed on cpu
 143 //      [1 bit] waitstop        some g is waiting on stopped
 144 //      [1 bit] gwaiting        gwait != 0
 145 //
 146 // These fields are the information needed by entersyscall
 147 // and exitsyscall to decide whether to coordinate with the
 148 // scheduler.  Packing them into a single machine word lets
 149 // them use a fast path with a single atomic read/write and
 150 // no lock/unlock.  This greatly reduces contention in
 151 // syscall- or cgo-heavy multithreaded programs.
 152 //
 153 // Except for entersyscall and exitsyscall, the manipulations
 154 // to these fields only happen while holding the schedlock,
 155 // so the routines holding schedlock only need to worry about
 156 // what entersyscall and exitsyscall do, not the other routines
 157 // (which also use the schedlock).
 158 //
 159 // In particular, entersyscall and exitsyscall only read mcpumax,
 160 // waitstop, and gwaiting.  They never write them.  Thus, writes to those
 161 // fields can be done (holding schedlock) without fear of write conflicts.
 162 // There may still be logic conflicts: for example, the set of waitstop must
 163 // be conditioned on mcpu >= mcpumax or else the wait may be a
 164 // spurious sleep.  The Promela model in proc.p verifies these accesses.
 165 enum {
 166         mcpuWidth = 15,
 167         mcpuMask = (1<<mcpuWidth) - 1,
 168         mcpuShift = 0,
 169         mcpumaxShift = mcpuShift + mcpuWidth,
 170         waitstopShift = mcpumaxShift + mcpuWidth,
 171         gwaitingShift = waitstopShift+1,
 172
 173         // The max value of GOMAXPROCS is constrained
 174         // by the max value we can store in the bit fields
 175         // of the atomic word.  Reserve a few high values
 176         // so that we can detect accidental decrement
 177         // beyond zero.
 178         maxgomaxprocs = mcpuMask - 10,
 179 };
 180
 181 #define atomic_mcpu(v)          (((v)>>mcpuShift)&mcpuMask)
 182 #define atomic_mcpumax(v)       (((v)>>mcpumaxShift)&mcpuMask)
 183 #define atomic_waitstop(v)      (((v)>>waitstopShift)&1)
 184 #define atomic_gwaiting(v)      (((v)>>gwaitingShift)&1)
 185
 186 Sched runtime_sched;
 187 int32 runtime_gomaxprocs;
 188 bool runtime_singleproc;
 189
 190 static bool canaddmcpu(void);
 191
 192 // An m that is waiting for notewakeup(&m->havenextg).  This may
 193 // only be accessed while the scheduler lock is held.  This is used to
 194 // minimize the number of times we call notewakeup while the scheduler
 195 // lock is held, since the m will normally move quickly to lock the
 196 // scheduler itself, producing lock contention.
 197 static M* mwakeup;
 198
 199 // Scheduling helpers.  Sched must be locked.
 200 static void gput(G*);   // put/get on ghead/gtail
 201 static G* gget(void);
 202 static void mput(M*);   // put/get on mhead
 203 static M* mget(G*);
 204 static void gfput(G*);  // put/get on gfree
 205 static G* gfget(void);
 206 static void matchmg(void);      // match m's to g's
 207 static void readylocked(G*);    // ready, but sched is locked
 208 static void mnextg(M*, G*);
 209 static void mcommoninit(M*);
 210
 211 void
 212 setmcpumax(uint32 n)
 213 {
 214         uint32 v, w;
 215
 216         for(;;) {
 217                 v = runtime_sched.atomic;
 218                 w = v;
 219                 w &= ~(mcpuMask<<mcpumaxShift);
 220                 w |= n<<mcpumaxShift;
 221                 if(runtime_cas(&runtime_sched.atomic, v, w))
 222                         break;
 223         }
 224 }
 225
 226 // First function run by a new goroutine.  This replaces gogocall.
 227 static void
 228 kickoff(void)
 229 {
 230         void (*fn)(void*);
 231
 232         fn = (void (*)(void*))(g->entry);
 233         fn(g->param);
 234         runtime_goexit();
 235 }
 236
 237 // Switch context to a different goroutine.  This is like longjmp.
 238 static void runtime_gogo(G*) __attribute__ ((noinline));
 239 static void
 240 runtime_gogo(G* newg)
 241 {
 242 #ifdef USING_SPLIT_STACK
 243         __splitstack_setcontext(&newg->stack_context[0]);
 244 #endif
 245         g = newg;
 246         newg->fromgogo = true;
 247         setcontext(&newg->context);
 248 }
 249
 250 // Save context and call fn passing g as a parameter.  This is like
 251 // setjmp.  Because getcontext always returns 0, unlike setjmp, we use
 252 // g->fromgogo as a code.  It will be true if we got here via
 253 // setcontext.  g == nil the first time this is called in a new m.
 254 static void runtime_mcall(void (*)(G*)) __attribute__ ((noinline));
 255 static void
 256 runtime_mcall(void (*pfn)(G*))
 257 {
 258 #ifndef USING_SPLIT_STACK
 259         int i;
 260 #endif
 261
 262         // Ensure that all registers are on the stack for the garbage
 263         // collector.
 264         __builtin_unwind_init();
 265
 266         if(g == m->g0)
 267                 runtime_throw("runtime: mcall called on m->g0 stack");
 268
 269         if(g != nil) {
 270
 271 #ifdef USING_SPLIT_STACK
 272                 __splitstack_getcontext(&g->stack_context[0]);
 273 #else
 274                 g->gcnext_sp = &i;
 275 #endif
 276                 g->fromgogo = false;
 277                 getcontext(&g->context);
 278         }
 279         if (g == nil || !g->fromgogo) {
 280 #ifdef USING_SPLIT_STACK
 281                 __splitstack_setcontext(&m->g0->stack_context[0]);
 282 #endif
 283                 m->g0->entry = (byte*)pfn;
 284                 m->g0->param = g;
 285                 g = m->g0;
 286                 setcontext(&m->g0->context);
 287                 runtime_throw("runtime: mcall function returned");
 288         }
 289 }
 290
 291 // The bootstrap sequence is:
 292 //
 293 //      call osinit
 294 //      call schedinit
 295 //      make & queue new G
 296 //      call runtime_mstart
 297 //
 298 // The new G calls runtime_main.
 299 void
 300 runtime_schedinit(void)
 301 {
 302         int32 n;
 303         const byte *p;
 304
 305         m = &runtime_m0;
 306         g = &runtime_g0;
 307         m->g0 = g;
 308         m->curg = g;
 309         g->m = m;
 310
 311         m->nomemprof++;
 312         runtime_mallocinit();
 313         mcommoninit(m);
 314
 315         runtime_goargs();
 316         runtime_goenvs();
 317
 318         // For debugging:
 319         // Allocate internal symbol table representation now,
 320         // so that we don't need to call malloc when we crash.
 321         // runtime_findfunc(0);
 322
 323         runtime_gomaxprocs = 1;
 324         p = runtime_getenv("GOMAXPROCS");
 325         if(p != nil && (n = runtime_atoi(p)) != 0) {
 326                 if(n > maxgomaxprocs)
 327                         n = maxgomaxprocs;
 328                 runtime_gomaxprocs = n;
 329         }
 330         setmcpumax(runtime_gomaxprocs);
 331         runtime_singleproc = runtime_gomaxprocs == 1;
 332
 333         canaddmcpu();   // mcpu++ to account for bootstrap m
 334         m->helpgc = 1;  // flag to tell schedule() to mcpu--
 335         runtime_sched.grunning++;
 336
 337         // Can not enable GC until all roots are registered.
 338         // mstats.enablegc = 1;
 339         m->nomemprof--;
 340 }
 341
 342 extern void main_init(void) __asm__ ("__go_init_main");
 343 extern void main_main(void) __asm__ ("main.main");
 344
 345 // The main goroutine.
 346 void
 347 runtime_main(void)
 348 {
 349         // Lock the main goroutine onto this, the main OS thread,
 350         // during initialization.  Most programs won't care, but a few
 351         // do require certain calls to be made by the main thread.
 352         // Those can arrange for main.main to run in the main thread
 353         // by calling runtime.LockOSThread during initialization
 354         // to preserve the lock.
 355         runtime_LockOSThread();
 356         runtime_sched.init = true;
 357         main_init();
 358         runtime_sched.init = false;
 359         if(!runtime_sched.lockmain)
 360                 runtime_UnlockOSThread();
 361
 362         // For gccgo we have to wait until after main is initialized
 363         // to enable GC, because initializing main registers the GC
 364         // roots.
 365         mstats.enablegc = 1;
 366
 367         main_main();
 368         runtime_exit(0);
 369         for(;;)
 370                 *(int32*)0 = 0;
 371 }
 372
 373 // Lock the scheduler.
 374 static void
 375 schedlock(void)
 376 {
 377         runtime_lock(&runtime_sched);
 378 }
 379
 380 // Unlock the scheduler.
 381 static void
 382 schedunlock(void)
 383 {
 384         M *m;
 385
 386         m = mwakeup;
 387         mwakeup = nil;
 388         runtime_unlock(&runtime_sched);
 389         if(m != nil)
 390                 runtime_notewakeup(&m->havenextg);
 391 }
 392
 393 void
 394 runtime_goexit(void)
 395 {
 396         g->status = Gmoribund;
 397         runtime_gosched();
 398 }
 399
 400 void
 401 runtime_goroutineheader(G *g)
 402 {
 403         const char *status;
 404
 405         switch(g->status) {
 406         case Gidle:
 407                 status = "idle";
 408                 break;
 409         case Grunnable:
 410                 status = "runnable";
 411                 break;
 412         case Grunning:
 413                 status = "running";
 414                 break;
 415         case Gsyscall:
 416                 status = "syscall";
 417                 break;
 418         case Gwaiting:
 419                 if(g->waitreason)
 420                         status = g->waitreason;
 421                 else
 422                         status = "waiting";
 423                 break;
 424         case Gmoribund:
 425                 status = "moribund";
 426                 break;
 427         default:
 428                 status = "???";
 429                 break;
 430         }
 431         runtime_printf("goroutine %d [%s]:\n", g->goid, status);
 432 }
 433
 434 void
 435 runtime_tracebackothers(G *me)
 436 {
 437         G *g;
 438
 439         for(g = runtime_allg; g != nil; g = g->alllink) {
 440                 if(g == me || g->status == Gdead)
 441                         continue;
 442                 runtime_printf("\n");
 443                 runtime_goroutineheader(g);
 444                 // runtime_traceback(g->sched.pc, g->sched.sp, 0, g);
 445         }
 446 }
 447
 448 // Mark this g as m's idle goroutine.
 449 // This functionality might be used in environments where programs
 450 // are limited to a single thread, to simulate a select-driven
 451 // network server.  It is not exposed via the standard runtime API.
 452 void
 453 runtime_idlegoroutine(void)
 454 {
 455         if(g->idlem != nil)
 456                 runtime_throw("g is already an idle goroutine");
 457         g->idlem = m;
 458 }
 459
 460 static void
 461 mcommoninit(M *m)
 462 {
 463         // Add to runtime_allm so garbage collector doesn't free m
 464         // when it is just in a register or thread-local storage.
 465         m->alllink = runtime_allm;
 466         // runtime_Cgocalls() iterates over allm w/o schedlock,
 467         // so we need to publish it safely.
 468         runtime_atomicstorep((void**)&runtime_allm, m);
 469
 470         m->id = runtime_sched.mcount++;
 471         m->fastrand = 0x49f6428aUL + m->id;
 472
 473         if(m->mcache == nil)
 474                 m->mcache = runtime_allocmcache();
 475 }
 476
 477 // Try to increment mcpu.  Report whether succeeded.
 478 static bool
 479 canaddmcpu(void)
 480 {
 481         uint32 v;
 482
 483         for(;;) {
 484                 v = runtime_sched.atomic;
 485                 if(atomic_mcpu(v) >= atomic_mcpumax(v))
 486                         return 0;
 487                 if(runtime_cas(&runtime_sched.atomic, v, v+(1<<mcpuShift)))
 488                         return 1;
 489         }
 490 }
 491
 492 // Put on `g' queue.  Sched must be locked.
 493 static void
 494 gput(G *g)
 495 {
 496         M *m;
 497
 498         // If g is wired, hand it off directly.
 499         if((m = g->lockedm) != nil && canaddmcpu()) {
 500                 mnextg(m, g);
 501                 return;
 502         }
 503
 504         // If g is the idle goroutine for an m, hand it off.
 505         if(g->idlem != nil) {
 506                 if(g->idlem->idleg != nil) {
 507                         runtime_printf("m%d idle out of sync: g%d g%d\n",
 508                                 g->idlem->id,
 509                                 g->idlem->idleg->goid, g->goid);
 510                         runtime_throw("runtime: double idle");
 511                 }
 512                 g->idlem->idleg = g;
 513                 return;
 514         }
 515
 516         g->schedlink = nil;
 517         if(runtime_sched.ghead == nil)
 518                 runtime_sched.ghead = g;
 519         else
 520                 runtime_sched.gtail->schedlink = g;
 521         runtime_sched.gtail = g;
 522
 523         // increment gwait.
 524         // if it transitions to nonzero, set atomic gwaiting bit.
 525         if(runtime_sched.gwait++ == 0)
 526                 runtime_xadd(&runtime_sched.atomic, 1<<gwaitingShift);
 527 }
 528
 529 // Report whether gget would return something.
 530 static bool
 531 haveg(void)
 532 {
 533         return runtime_sched.ghead != nil || m->idleg != nil;
 534 }
 535
 536 // Get from `g' queue.  Sched must be locked.
 537 static G*
 538 gget(void)
 539 {
 540         G *g;
 541
 542         g = runtime_sched.ghead;
 543         if(g){
 544                 runtime_sched.ghead = g->schedlink;
 545                 if(runtime_sched.ghead == nil)
 546                         runtime_sched.gtail = nil;
 547                 // decrement gwait.
 548                 // if it transitions to zero, clear atomic gwaiting bit.
 549                 if(--runtime_sched.gwait == 0)
 550                         runtime_xadd(&runtime_sched.atomic, -1<<gwaitingShift);
 551         } else if(m->idleg != nil) {
 552                 g = m->idleg;
 553                 m->idleg = nil;
 554         }
 555         return g;
 556 }
 557
 558 // Put on `m' list.  Sched must be locked.
 559 static void
 560 mput(M *m)
 561 {
 562         m->schedlink = runtime_sched.mhead;
 563         runtime_sched.mhead = m;
 564         runtime_sched.mwait++;
 565 }
 566
 567 // Get an `m' to run `g'.  Sched must be locked.
 568 static M*
 569 mget(G *g)
 570 {
 571         M *m;
 572
 573         // if g has its own m, use it.
 574         if(g && (m = g->lockedm) != nil)
 575                 return m;
 576
 577         // otherwise use general m pool.
 578         if((m = runtime_sched.mhead) != nil){
 579                 runtime_sched.mhead = m->schedlink;
 580                 runtime_sched.mwait--;
 581         }
 582         return m;
 583 }
 584
 585 // Mark g ready to run.
 586 void
 587 runtime_ready(G *g)
 588 {
 589         schedlock();
 590         readylocked(g);
 591         schedunlock();
 592 }
 593
 594 // Mark g ready to run.  Sched is already locked.
 595 // G might be running already and about to stop.
 596 // The sched lock protects g->status from changing underfoot.
 597 static void
 598 readylocked(G *g)
 599 {
 600         if(g->m){
 601                 // Running on another machine.
 602                 // Ready it when it stops.
 603                 g->readyonstop = 1;
 604                 return;
 605         }
 606
 607         // Mark runnable.
 608         if(g->status == Grunnable || g->status == Grunning) {
 609                 runtime_printf("goroutine %d has status %d\n", g->goid, g->status);
 610                 runtime_throw("bad g->status in ready");
 611         }
 612         g->status = Grunnable;
 613
 614         gput(g);
 615         matchmg();
 616 }
 617
 618 // Same as readylocked but a different symbol so that
 619 // debuggers can set a breakpoint here and catch all
 620 // new goroutines.
 621 static void
 622 newprocreadylocked(G *g)
 623 {
 624         readylocked(g);
 625 }
 626
 627 // Pass g to m for running.
 628 // Caller has already incremented mcpu.
 629 static void
 630 mnextg(M *m, G *g)
 631 {
 632         runtime_sched.grunning++;
 633         m->nextg = g;
 634         if(m->waitnextg) {
 635                 m->waitnextg = 0;
 636                 if(mwakeup != nil)
 637                         runtime_notewakeup(&mwakeup->havenextg);
 638                 mwakeup = m;
 639         }
 640 }
 641
 642 // Get the next goroutine that m should run.
 643 // Sched must be locked on entry, is unlocked on exit.
 644 // Makes sure that at most $GOMAXPROCS g's are
 645 // running on cpus (not in system calls) at any given time.
 646 static G*
 647 nextgandunlock(void)
 648 {
 649         G *gp;
 650         uint32 v;
 651
 652 top:
 653         if(atomic_mcpu(runtime_sched.atomic) >= maxgomaxprocs)
 654                 runtime_throw("negative mcpu");
 655
 656         // If there is a g waiting as m->nextg, the mcpu++
 657         // happened before it was passed to mnextg.
 658         if(m->nextg != nil) {
 659                 gp = m->nextg;
 660                 m->nextg = nil;
 661                 schedunlock();
 662                 return gp;
 663         }
 664
 665         if(m->lockedg != nil) {
 666                 // We can only run one g, and it's not available.
 667                 // Make sure some other cpu is running to handle
 668                 // the ordinary run queue.
 669                 if(runtime_sched.gwait != 0) {
 670                         matchmg();
 671                         // m->lockedg might have been on the queue.
 672                         if(m->nextg != nil) {
 673                                 gp = m->nextg;
 674                                 m->nextg = nil;
 675                                 schedunlock();
 676                                 return gp;
 677                         }
 678                 }
 679         } else {
 680                 // Look for work on global queue.
 681                 while(haveg() && canaddmcpu()) {
 682                         gp = gget();
 683                         if(gp == nil)
 684                                 runtime_throw("gget inconsistency");
 685
 686                         if(gp->lockedm) {
 687                                 mnextg(gp->lockedm, gp);
 688                                 continue;
 689                         }
 690                         runtime_sched.grunning++;
 691                         schedunlock();
 692                         return gp;
 693                 }
 694
 695                 // The while loop ended either because the g queue is empty
 696                 // or because we have maxed out our m procs running go
 697                 // code (mcpu >= mcpumax).  We need to check that
 698                 // concurrent actions by entersyscall/exitsyscall cannot
 699                 // invalidate the decision to end the loop.
 700                 //
 701                 // We hold the sched lock, so no one else is manipulating the
 702                 // g queue or changing mcpumax.  Entersyscall can decrement
 703                 // mcpu, but if does so when there is something on the g queue,
 704                 // the gwait bit will be set, so entersyscall will take the slow path
 705                 // and use the sched lock.  So it cannot invalidate our decision.
 706                 //
 707                 // Wait on global m queue.
 708                 mput(m);
 709         }
 710
 711         v = runtime_atomicload(&runtime_sched.atomic);
 712         if(runtime_sched.grunning == 0)
 713                 runtime_throw("all goroutines are asleep - deadlock!");
 714         m->nextg = nil;
 715         m->waitnextg = 1;
 716         runtime_noteclear(&m->havenextg);
 717
 718         // Stoptheworld is waiting for all but its cpu to go to stop.
 719         // Entersyscall might have decremented mcpu too, but if so
 720         // it will see the waitstop and take the slow path.
 721         // Exitsyscall never increments mcpu beyond mcpumax.
 722         if(atomic_waitstop(v) && atomic_mcpu(v) <= atomic_mcpumax(v)) {
 723                 // set waitstop = 0 (known to be 1)
 724                 runtime_xadd(&runtime_sched.atomic, -1<<waitstopShift);
 725                 runtime_notewakeup(&runtime_sched.stopped);
 726         }
 727         schedunlock();
 728
 729         runtime_notesleep(&m->havenextg);
 730         if(m->helpgc) {
 731                 runtime_gchelper();
 732                 m->helpgc = 0;
 733                 runtime_lock(&runtime_sched);
 734                 goto top;
 735         }
 736         if((gp = m->nextg) == nil)
 737                 runtime_throw("bad m->nextg in nextgoroutine");
 738         m->nextg = nil;
 739         return gp;
 740 }
 741
 742 int32
 743 runtime_helpgc(bool *extra)
 744 {
 745         M *mp;
 746         int32 n, max;
 747
 748         // Figure out how many CPUs to use.
 749         // Limited by gomaxprocs, number of actual CPUs, and MaxGcproc.
 750         max = runtime_gomaxprocs;
 751         if(max > runtime_ncpu)
 752                 max = runtime_ncpu > 0 ? runtime_ncpu : 1;
 753         if(max > MaxGcproc)
 754                 max = MaxGcproc;
 755
 756         // We're going to use one CPU no matter what.
 757         // Figure out the max number of additional CPUs.
 758         max--;
 759
 760         runtime_lock(&runtime_sched);
 761         n = 0;
 762         while(n < max && (mp = mget(nil)) != nil) {
 763                 n++;
 764                 mp->helpgc = 1;
 765                 mp->waitnextg = 0;
 766                 runtime_notewakeup(&mp->havenextg);
 767         }
 768         runtime_unlock(&runtime_sched);
 769         if(extra)
 770                 *extra = n != max;
 771         return n;
 772 }
 773
 774 void
 775 runtime_stoptheworld(void)
 776 {
 777         uint32 v;
 778
 779         schedlock();
 780         runtime_gcwaiting = 1;
 781
 782         setmcpumax(1);
 783
 784         // while mcpu > 1
 785         for(;;) {
 786                 v = runtime_sched.atomic;
 787                 if(atomic_mcpu(v) <= 1)
 788                         break;
 789
 790                 // It would be unsafe for multiple threads to be using
 791                 // the stopped note at once, but there is only
 792                 // ever one thread doing garbage collection.
 793                 runtime_noteclear(&runtime_sched.stopped);
 794                 if(atomic_waitstop(v))
 795                         runtime_throw("invalid waitstop");
 796
 797                 // atomic { waitstop = 1 }, predicated on mcpu <= 1 check above
 798                 // still being true.
 799                 if(!runtime_cas(&runtime_sched.atomic, v, v+(1<<waitstopShift)))
 800                         continue;
 801
 802                 schedunlock();
 803                 runtime_notesleep(&runtime_sched.stopped);
 804                 schedlock();
 805         }
 806         runtime_singleproc = runtime_gomaxprocs == 1;
 807         schedunlock();
 808 }
 809
 810 void
 811 runtime_starttheworld(bool extra)
 812 {
 813         M *m;
 814
 815         schedlock();
 816         runtime_gcwaiting = 0;
 817         setmcpumax(runtime_gomaxprocs);
 818         matchmg();
 819         if(extra && canaddmcpu()) {
 820                 // Start a new m that will (we hope) be idle
 821                 // and so available to help when the next
 822                 // garbage collection happens.
 823                 // canaddmcpu above did mcpu++
 824                 // (necessary, because m will be doing various
 825                 // initialization work so is definitely running),
 826                 // but m is not running a specific goroutine,
 827                 // so set the helpgc flag as a signal to m's
 828                 // first schedule(nil) to mcpu-- and grunning--.
 829                 m = startm();
 830                 m->helpgc = 1;
 831                 runtime_sched.grunning++;
 832         }
 833         schedunlock();
 834 }
 835
 836 // Called to start an M.
 837 void*
 838 runtime_mstart(void* mp)
 839 {
 840         m = (M*)mp;
 841         g = m->g0;
 842
 843         g->entry = nil;
 844         g->param = nil;
 845
 846         // Record top of stack for use by mcall.
 847         // Once we call schedule we're never coming back,
 848         // so other calls can reuse this stack space.
 849 #ifdef USING_SPLIT_STACK
 850         __splitstack_getcontext(&g->stack_context[0]);
 851 #else
 852         g->gcinitial_sp = &mp;
 853         g->gcstack_size = StackMin;
 854         g->gcnext_sp = &mp;
 855 #endif
 856         getcontext(&g->context);
 857
 858         if(g->entry != nil) {
 859                 // Got here from mcall.
 860                 void (*pfn)(G*) = (void (*)(G*))g->entry;
 861                 G* gp = (G*)g->param;
 862                 pfn(gp);
 863                 *(int*)0x21 = 0x21;
 864         }
 865         runtime_minit();
 866         schedule(nil);
 867         return nil;
 868 }
 869
 870 typedef struct CgoThreadStart CgoThreadStart;
 871 struct CgoThreadStart
 872 {
 873         M *m;
 874         G *g;
 875         void (*fn)(void);
 876 };
 877
 878 // Kick off new m's as needed (up to mcpumax).
 879 // There are already `other' other cpus that will
 880 // start looking for goroutines shortly.
 881 // Sched is locked.
 882 static void
 883 matchmg(void)
 884 {
 885         G *gp;
 886         M *mp;
 887
 888         if(m->mallocing || m->gcing)
 889                 return;
 890
 891         while(haveg() && canaddmcpu()) {
 892                 gp = gget();
 893                 if(gp == nil)
 894                         runtime_throw("gget inconsistency");
 895
 896                 // Find the m that will run gp.
 897                 if((mp = mget(gp)) == nil)
 898                         mp = startm();
 899                 mnextg(mp, gp);
 900         }
 901 }
 902
 903 static M*
 904 startm(void)
 905 {
 906         M *m;
 907         pthread_attr_t attr;
 908         pthread_t tid;
 909
 910         m = runtime_malloc(sizeof(M));
 911         mcommoninit(m);
 912         m->g0 = runtime_malg(-1, nil, nil);
 913
 914         if(pthread_attr_init(&attr) != 0)
 915                 runtime_throw("pthread_attr_init");
 916         if(pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED) != 0)
 917                 runtime_throw("pthread_attr_setdetachstate");
 918
 919 #ifndef PTHREAD_STACK_MIN
 920 #define PTHREAD_STACK_MIN 8192
 921 #endif
 922         if(pthread_attr_setstacksize(&attr, PTHREAD_STACK_MIN) != 0)
 923                 runtime_throw("pthread_attr_setstacksize");
 924
 925         if(pthread_create(&tid, &attr, runtime_mstart, m) != 0)
 926                 runtime_throw("pthread_create");
 927
 928         return m;
 929 }
 930
 931 // One round of scheduler: find a goroutine and run it.
 932 // The argument is the goroutine that was running before
 933 // schedule was called, or nil if this is the first call.
 934 // Never returns.
 935 static void
 936 schedule(G *gp)
 937 {
 938         int32 hz;
 939         uint32 v;
 940
 941         schedlock();
 942         if(gp != nil) {
 943                 // Just finished running gp.
 944                 gp->m = nil;
 945                 runtime_sched.grunning--;
 946
 947                 // atomic { mcpu-- }
 948                 v = runtime_xadd(&runtime_sched.atomic, -1<<mcpuShift);
 949                 if(atomic_mcpu(v) > maxgomaxprocs)
 950                         runtime_throw("negative mcpu in scheduler");
 951
 952                 switch(gp->status){
 953                 case Grunnable:
 954                 case Gdead:
 955                         // Shouldn't have been running!
 956                         runtime_throw("bad gp->status in sched");
 957                 case Grunning:
 958                         gp->status = Grunnable;
 959                         gput(gp);
 960                         break;
 961                 case Gmoribund:
 962                         gp->status = Gdead;
 963                         if(gp->lockedm) {
 964                                 gp->lockedm = nil;
 965                                 m->lockedg = nil;
 966                         }
 967                         gp->idlem = nil;
 968                         gfput(gp);
 969                         if(--runtime_sched.gcount == 0)
 970                                 runtime_exit(0);
 971                         break;
 972                 }
 973                 if(gp->readyonstop){
 974                         gp->readyonstop = 0;
 975                         readylocked(gp);
 976                 }
 977         } else if(m->helpgc) {
 978                 // Bootstrap m or new m started by starttheworld.
 979                 // atomic { mcpu-- }
 980                 v = runtime_xadd(&runtime_sched.atomic, -1<<mcpuShift);
 981                 if(atomic_mcpu(v) > maxgomaxprocs)
 982                         runtime_throw("negative mcpu in scheduler");
 983                 // Compensate for increment in starttheworld().
 984                 runtime_sched.grunning--;
 985                 m->helpgc = 0;
 986         } else if(m->nextg != nil) {
 987                 // New m started by matchmg.
 988         } else {
 989                 runtime_throw("invalid m state in scheduler");
 990         }
 991
 992         // Find (or wait for) g to run.  Unlocks runtime_sched.
 993         gp = nextgandunlock();
 994         gp->readyonstop = 0;
 995         gp->status = Grunning;
 996         m->curg = gp;
 997         gp->m = m;
 998
 999         // Check whether the profiler needs to be turned on or off.
1000         hz = runtime_sched.profilehz;
1001         if(m->profilehz != hz)
1002                 runtime_resetcpuprofiler(hz);
1003
1004         runtime_gogo(gp);
1005 }
1006
1007 // Enter scheduler.  If g->status is Grunning,
1008 // re-queues g and runs everyone else who is waiting
1009 // before running g again.  If g->status is Gmoribund,
1010 // kills off g.
1011 void
1012 runtime_gosched(void)
1013 {
1014         if(m->locks != 0)
1015                 runtime_throw("gosched holding locks");
1016         if(g == m->g0)
1017                 runtime_throw("gosched of g0");
1018         runtime_mcall(schedule);
1019 }
1020
1021 // The goroutine g is about to enter a system call.
1022 // Record that it's not using the cpu anymore.
1023 // This is called only from the go syscall library and cgocall,
1024 // not from the low-level system calls used by the runtime.
1025 //
1026 // Entersyscall cannot split the stack: the runtime_gosave must
1027 // make g->sched refer to the caller's stack segment, because
1028 // entersyscall is going to return immediately after.
1029 // It's okay to call matchmg and notewakeup even after
1030 // decrementing mcpu, because we haven't released the
1031 // sched lock yet, so the garbage collector cannot be running.
1032
1033 void runtime_entersyscall(void) __attribute__ ((no_split_stack));
1034
1035 void
1036 runtime_entersyscall(void)
1037 {
1038         uint32 v;
1039
1040         // Leave SP around for gc and traceback.
1041 #ifdef USING_SPLIT_STACK
1042         g->gcstack = __splitstack_find(NULL, NULL, &g->gcstack_size,
1043                                        &g->gcnext_segment, &g->gcnext_sp,
1044                                        &g->gcinitial_sp);
1045 #else
1046         g->gcnext_sp = (byte *) &v;
1047 #endif
1048
1049         // Save the registers in the g structure so that any pointers
1050         // held in registers will be seen by the garbage collector.
1051         // We could use getcontext here, but setjmp is more efficient
1052         // because it doesn't need to save the signal mask.
1053         setjmp(g->gcregs);
1054
1055         g->status = Gsyscall;
1056
1057         // Fast path.
1058         // The slow path inside the schedlock/schedunlock will get
1059         // through without stopping if it does:
1060         //      mcpu--
1061         //      gwait not true
1062         //      waitstop && mcpu <= mcpumax not true
1063         // If we can do the same with a single atomic add,
1064         // then we can skip the locks.
1065         v = runtime_xadd(&runtime_sched.atomic, -1<<mcpuShift);
1066         if(!atomic_gwaiting(v) && (!atomic_waitstop(v) || atomic_mcpu(v) > atomic_mcpumax(v)))
1067                 return;
1068
1069         schedlock();
1070         v = runtime_atomicload(&runtime_sched.atomic);
1071         if(atomic_gwaiting(v)) {
1072                 matchmg();
1073                 v = runtime_atomicload(&runtime_sched.atomic);
1074         }
1075         if(atomic_waitstop(v) && atomic_mcpu(v) <= atomic_mcpumax(v)) {
1076                 runtime_xadd(&runtime_sched.atomic, -1<<waitstopShift);
1077                 runtime_notewakeup(&runtime_sched.stopped);
1078         }
1079
1080         schedunlock();
1081 }
1082
1083 // The goroutine g exited its system call.
1084 // Arrange for it to run on a cpu again.
1085 // This is called only from the go syscall library, not
1086 // from the low-level system calls used by the runtime.
1087 void
1088 runtime_exitsyscall(void)
1089 {
1090         G *gp;
1091         uint32 v;
1092
1093         // Fast path.
1094         // If we can do the mcpu++ bookkeeping and
1095         // find that we still have mcpu <= mcpumax, then we can
1096         // start executing Go code immediately, without having to
1097         // schedlock/schedunlock.
1098         gp = g;
1099         v = runtime_xadd(&runtime_sched.atomic, (1<<mcpuShift));
1100         if(m->profilehz == runtime_sched.profilehz && atomic_mcpu(v) <= atomic_mcpumax(v)) {
1101                 // There's a cpu for us, so we can run.
1102                 gp->status = Grunning;
1103                 // Garbage collector isn't running (since we are),
1104                 // so okay to clear gcstack.
1105 #ifdef USING_SPLIT_STACK
1106                 gp->gcstack = nil;
1107 #endif
1108                 gp->gcnext_sp = nil;
1109                 runtime_memclr(gp->gcregs, sizeof gp->gcregs);
1110                 return;
1111         }
1112
1113         // Tell scheduler to put g back on the run queue:
1114         // mostly equivalent to g->status = Grunning,
1115         // but keeps the garbage collector from thinking
1116         // that g is running right now, which it's not.
1117         gp->readyonstop = 1;
1118
1119         // All the cpus are taken.
1120         // The scheduler will ready g and put this m to sleep.
1121         // When the scheduler takes g away from m,
1122         // it will undo the runtime_sched.mcpu++ above.
1123         runtime_gosched();
1124
1125         // Gosched returned, so we're allowed to run now.
1126         // Delete the gcstack information that we left for
1127         // the garbage collector during the system call.
1128         // Must wait until now because until gosched returns
1129         // we don't know for sure that the garbage collector
1130         // is not running.
1131 #ifdef USING_SPLIT_STACK
1132         gp->gcstack = nil;
1133 #endif
1134         gp->gcnext_sp = nil;
1135         runtime_memclr(gp->gcregs, sizeof gp->gcregs);
1136 }
1137
1138 G*
1139 runtime_malg(int32 stacksize, byte** ret_stack, size_t* ret_stacksize)
1140 {
1141         G *newg;
1142
1143         newg = runtime_malloc(sizeof(G));
1144         if(stacksize >= 0) {
1145 #if USING_SPLIT_STACK
1146                 *ret_stack = __splitstack_makecontext(stacksize,
1147                                                       &newg->stack_context[0],
1148                                                       ret_stacksize);
1149 #else
1150                 *ret_stack = runtime_mallocgc(stacksize, FlagNoProfiling|FlagNoGC, 0, 0);
1151                 *ret_stacksize = stacksize;
1152                 newg->gcinitial_sp = *ret_stack;
1153                 newg->gcstack_size = stacksize;
1154 #endif
1155         }
1156         return newg;
1157 }
1158
1159 /* For runtime package testing.  */
1160
1161 void runtime_testing_entersyscall(void)
1162   __asm__("libgo_runtime.runtime.entersyscall");
1163
1164 void
1165 runtime_testing_entersyscall()
1166 {
1167         runtime_entersyscall();
1168 }
1169
1170 void runtime_testing_exitsyscall(void)
1171   __asm__("libgo_runtime.runtime.exitsyscall");
1172
1173 void
1174 runtime_testing_exitsyscall()
1175 {
1176         runtime_exitsyscall();
1177 }
1178
1179 G*
1180 __go_go(void (*fn)(void*), void* arg)
1181 {
1182         byte *sp;
1183         size_t spsize;
1184         G * volatile newg;      // volatile to avoid longjmp warning
1185
1186         schedlock();
1187
1188         if((newg = gfget()) != nil){
1189 #ifdef USING_SPLIT_STACK
1190                 sp = __splitstack_resetcontext(&newg->stack_context[0],
1191                                                &spsize);
1192 #else
1193                 sp = newg->gcinitial_sp;
1194                 spsize = newg->gcstack_size;
1195                 newg->gcnext_sp = sp;
1196 #endif
1197         } else {
1198                 newg = runtime_malg(StackMin, &sp, &spsize);
1199                 if(runtime_lastg == nil)
1200                         runtime_allg = newg;
1201                 else
1202                         runtime_lastg->alllink = newg;
1203                 runtime_lastg = newg;
1204         }
1205         newg->status = Gwaiting;
1206         newg->waitreason = "new goroutine";
1207
1208         newg->entry = (byte*)fn;
1209         newg->param = arg;
1210         newg->gopc = (uintptr)__builtin_return_address(0);
1211
1212         runtime_sched.gcount++;
1213         runtime_sched.goidgen++;
1214         newg->goid = runtime_sched.goidgen;
1215
1216         if(sp == nil)
1217                 runtime_throw("nil g->stack0");
1218
1219         getcontext(&newg->context);
1220         newg->context.uc_stack.ss_sp = sp;
1221         newg->context.uc_stack.ss_size = spsize;
1222         makecontext(&newg->context, kickoff, 0);
1223
1224         newprocreadylocked(newg);
1225         schedunlock();
1226
1227         return newg;
1228 //printf(" goid=%d\n", newg->goid);
1229 }
1230
1231 // Put on gfree list.  Sched must be locked.
1232 static void
1233 gfput(G *g)
1234 {
1235         g->schedlink = runtime_sched.gfree;
1236         runtime_sched.gfree = g;
1237 }
1238
1239 // Get from gfree list.  Sched must be locked.
1240 static G*
1241 gfget(void)
1242 {
1243         G *g;
1244
1245         g = runtime_sched.gfree;
1246         if(g)
1247                 runtime_sched.gfree = g->schedlink;
1248         return g;
1249 }
1250
1251 // Run all deferred functions for the current goroutine.
1252 static void
1253 rundefer(void)
1254 {
1255         Defer *d;
1256
1257         while((d = g->defer) != nil) {
1258                 void (*pfn)(void*);
1259
1260                 pfn = d->__pfn;
1261                 d->__pfn = nil;
1262                 if (pfn != nil)
1263                         (*pfn)(d->__arg);
1264                 g->defer = d->__next;
1265                 runtime_free(d);
1266         }
1267 }
1268
1269 void runtime_Goexit (void) asm ("libgo_runtime.runtime.Goexit");
1270
1271 void
1272 runtime_Goexit(void)
1273 {
1274         rundefer();
1275         runtime_goexit();
1276 }
1277
1278 void runtime_Gosched (void) asm ("libgo_runtime.runtime.Gosched");
1279
1280 void
1281 runtime_Gosched(void)
1282 {
1283         runtime_gosched();
1284 }
1285
1286 // delete when scheduler is stronger
1287 int32
1288 runtime_gomaxprocsfunc(int32 n)
1289 {
1290         int32 ret;
1291         uint32 v;
1292
1293         schedlock();
1294         ret = runtime_gomaxprocs;
1295         if(n <= 0)
1296                 n = ret;
1297         if(n > maxgomaxprocs)
1298                 n = maxgomaxprocs;
1299         runtime_gomaxprocs = n;
1300         if(runtime_gomaxprocs > 1)
1301                 runtime_singleproc = false;
1302         if(runtime_gcwaiting != 0) {
1303                 if(atomic_mcpumax(runtime_sched.atomic) != 1)
1304                         runtime_throw("invalid mcpumax during gc");
1305                 schedunlock();
1306                 return ret;
1307         }
1308
1309         setmcpumax(n);
1310
1311         // If there are now fewer allowed procs
1312         // than procs running, stop.
1313         v = runtime_atomicload(&runtime_sched.atomic);
1314         if((int32)atomic_mcpu(v) > n) {
1315                 schedunlock();
1316                 runtime_gosched();
1317                 return ret;
1318         }
1319         // handle more procs
1320         matchmg();
1321         schedunlock();
1322         return ret;
1323 }
1324
1325 void
1326 runtime_LockOSThread(void)
1327 {
1328         if(m == &runtime_m0 && runtime_sched.init) {
1329                 runtime_sched.lockmain = true;
1330                 return;
1331         }
1332         m->lockedg = g;
1333         g->lockedm = m;
1334 }
1335
1336 void
1337 runtime_UnlockOSThread(void)
1338 {
1339         if(m == &runtime_m0 && runtime_sched.init) {
1340                 runtime_sched.lockmain = false;
1341                 return;
1342         }
1343         m->lockedg = nil;
1344         g->lockedm = nil;
1345 }
1346
1347 bool
1348 runtime_lockedOSThread(void)
1349 {
1350         return g->lockedm != nil && m->lockedg != nil;
1351 }
1352
1353 // for testing of callbacks
1354
1355 _Bool runtime_golockedOSThread(void)
1356   asm("libgo_runtime.runtime.golockedOSThread");
1357
1358 _Bool
1359 runtime_golockedOSThread(void)
1360 {
1361         return runtime_lockedOSThread();
1362 }
1363
1364 // for testing of wire, unwire
1365 uint32
1366 runtime_mid()
1367 {
1368         return m->id;
1369 }
1370
1371 int32 runtime_Goroutines (void)
1372   __asm__ ("libgo_runtime.runtime.Goroutines");
1373
1374 int32
1375 runtime_Goroutines()
1376 {
1377         return runtime_sched.gcount;
1378 }
1379
1380 int32
1381 runtime_mcount(void)
1382 {
1383         return runtime_sched.mcount;
1384 }
1385
1386 static struct {
1387         Lock;
1388         void (*fn)(uintptr*, int32);
1389         int32 hz;
1390         uintptr pcbuf[100];
1391 } prof;
1392
1393 void
1394 runtime_sigprof(uint8 *pc __attribute__ ((unused)),
1395                 uint8 *sp __attribute__ ((unused)),
1396                 uint8 *lr __attribute__ ((unused)),
1397                 G *gp __attribute__ ((unused)))
1398 {
1399         // int32 n;
1400
1401         if(prof.fn == nil || prof.hz == 0)
1402                 return;
1403
1404         runtime_lock(&prof);
1405         if(prof.fn == nil) {
1406                 runtime_unlock(&prof);
1407                 return;
1408         }
1409         // n = runtime_gentraceback(pc, sp, lr, gp, 0, prof.pcbuf, nelem(prof.pcbuf));
1410         // if(n > 0)
1411         //      prof.fn(prof.pcbuf, n);
1412         runtime_unlock(&prof);
1413 }
1414
1415 void
1416 runtime_setcpuprofilerate(void (*fn)(uintptr*, int32), int32 hz)
1417 {
1418         // Force sane arguments.
1419         if(hz < 0)
1420                 hz = 0;
1421         if(hz == 0)
1422                 fn = nil;
1423         if(fn == nil)
1424                 hz = 0;
1425
1426         // Stop profiler on this cpu so that it is safe to lock prof.
1427         // if a profiling signal came in while we had prof locked,
1428         // it would deadlock.
1429         runtime_resetcpuprofiler(0);
1430
1431         runtime_lock(&prof);
1432         prof.fn = fn;
1433         prof.hz = hz;
1434         runtime_unlock(&prof);
1435         runtime_lock(&runtime_sched);
1436         runtime_sched.profilehz = hz;
1437         runtime_unlock(&runtime_sched);
1438
1439         if(hz != 0)
1440                 runtime_resetcpuprofiler(hz);
1441 }