libgo/runtime/proc.c

   1 // Copyright 2009 The Go Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style
   3 // license that can be found in the LICENSE file.
   4
   5 #include <limits.h>
   6 #include <stdlib.h>
   7 #include <pthread.h>
   8 #include <unistd.h>
   9
  10 #include "config.h"
  11 #include "runtime.h"
  12 #include "arch.h"
  13 #include "defs.h"
  14 #include "malloc.h"
  15 #include "go-defer.h"
  16
  17 #ifdef USING_SPLIT_STACK
  18
  19 /* FIXME: These are not declared anywhere.  */
  20
  21 extern void __splitstack_getcontext(void *context[10]);
  22
  23 extern void __splitstack_setcontext(void *context[10]);
  24
  25 extern void *__splitstack_makecontext(size_t, void *context[10], size_t *);
  26
  27 extern void * __splitstack_resetcontext(void *context[10], size_t *);
  28
  29 extern void *__splitstack_find(void *, void *, size_t *, void **, void **,
  30                                void **);
  31
  32 extern void __splitstack_block_signals (int *, int *);
  33
  34 extern void __splitstack_block_signals_context (void *context[10], int *,
  35                                                 int *);
  36
  37 #endif
  38
  39 #if defined(USING_SPLIT_STACK) && defined(LINKER_SUPPORTS_SPLIT_STACK)
  40 # ifdef PTHREAD_STACK_MIN
  41 #  define StackMin PTHREAD_STACK_MIN
  42 # else
  43 #  define StackMin 8192
  44 # endif
  45 #else
  46 # define StackMin 2 * 1024 * 1024
  47 #endif
  48
  49 uintptr runtime_stacks_sys;
  50
  51 static void schedule(G*);
  52
  53 typedef struct Sched Sched;
  54
  55 M       runtime_m0;
  56 G       runtime_g0;     // idle goroutine for m0
  57
  58 #ifdef __rtems__
  59 #define __thread
  60 #endif
  61
  62 static __thread G *g;
  63 static __thread M *m;
  64
  65 #ifndef SETCONTEXT_CLOBBERS_TLS
  66
  67 static inline void
  68 initcontext(void)
  69 {
  70 }
  71
  72 static inline void
  73 fixcontext(ucontext_t *c __attribute__ ((unused)))
  74 {
  75 }
  76
  77 # else
  78
  79 # if defined(__x86_64__) && defined(__sun__)
  80
  81 // x86_64 Solaris 10 and 11 have a bug: setcontext switches the %fs
  82 // register to that of the thread which called getcontext.  The effect
  83 // is that the address of all __thread variables changes.  This bug
  84 // also affects pthread_self() and pthread_getspecific.  We work
  85 // around it by clobbering the context field directly to keep %fs the
  86 // same.
  87
  88 static __thread greg_t fs;
  89
  90 static inline void
  91 initcontext(void)
  92 {
  93         ucontext_t c;
  94
  95         getcontext(&c);
  96         fs = c.uc_mcontext.gregs[REG_FSBASE];
  97 }
  98
  99 static inline void
 100 fixcontext(ucontext_t* c)
 101 {
 102         c->uc_mcontext.gregs[REG_FSBASE] = fs;
 103 }
 104
 105 # else
 106
 107 #  error unknown case for SETCONTEXT_CLOBBERS_TLS
 108
 109 # endif
 110
 111 #endif
 112
 113 // We can not always refer to the TLS variables directly.  The
 114 // compiler will call tls_get_addr to get the address of the variable,
 115 // and it may hold it in a register across a call to schedule.  When
 116 // we get back from the call we may be running in a different thread,
 117 // in which case the register now points to the TLS variable for a
 118 // different thread.  We use non-inlinable functions to avoid this
 119 // when necessary.
 120
 121 G* runtime_g(void) __attribute__ ((noinline, no_split_stack));
 122
 123 G*
 124 runtime_g(void)
 125 {
 126         return g;
 127 }
 128
 129 M* runtime_m(void) __attribute__ ((noinline, no_split_stack));
 130
 131 M*
 132 runtime_m(void)
 133 {
 134         return m;
 135 }
 136
 137 int32   runtime_gcwaiting;
 138
 139 // Go scheduler
 140 //
 141 // The go scheduler's job is to match ready-to-run goroutines (`g's)
 142 // with waiting-for-work schedulers (`m's).  If there are ready g's
 143 // and no waiting m's, ready() will start a new m running in a new
 144 // OS thread, so that all ready g's can run simultaneously, up to a limit.
 145 // For now, m's never go away.
 146 //
 147 // By default, Go keeps only one kernel thread (m) running user code
 148 // at a single time; other threads may be blocked in the operating system.
 149 // Setting the environment variable $GOMAXPROCS or calling
 150 // runtime.GOMAXPROCS() will change the number of user threads
 151 // allowed to execute simultaneously.  $GOMAXPROCS is thus an
 152 // approximation of the maximum number of cores to use.
 153 //
 154 // Even a program that can run without deadlock in a single process
 155 // might use more m's if given the chance.  For example, the prime
 156 // sieve will use as many m's as there are primes (up to runtime_sched.mmax),
 157 // allowing different stages of the pipeline to execute in parallel.
 158 // We could revisit this choice, only kicking off new m's for blocking
 159 // system calls, but that would limit the amount of parallel computation
 160 // that go would try to do.
 161 //
 162 // In general, one could imagine all sorts of refinements to the
 163 // scheduler, but the goal now is just to get something working on
 164 // Linux and OS X.
 165
 166 struct Sched {
 167         Lock;
 168
 169         G *gfree;       // available g's (status == Gdead)
 170         int32 goidgen;
 171
 172         G *ghead;       // g's waiting to run
 173         G *gtail;
 174         int32 gwait;    // number of g's waiting to run
 175         int32 gcount;   // number of g's that are alive
 176         int32 grunning; // number of g's running on cpu or in syscall
 177
 178         M *mhead;       // m's waiting for work
 179         int32 mwait;    // number of m's waiting for work
 180         int32 mcount;   // number of m's that have been created
 181
 182         volatile uint32 atomic; // atomic scheduling word (see below)
 183
 184         int32 profilehz;        // cpu profiling rate
 185
 186         bool init;  // running initialization
 187         bool lockmain;  // init called runtime.LockOSThread
 188
 189         Note    stopped;        // one g can set waitstop and wait here for m's to stop
 190 };
 191
 192 // The atomic word in sched is an atomic uint32 that
 193 // holds these fields.
 194 //
 195 //      [15 bits] mcpu          number of m's executing on cpu
 196 //      [15 bits] mcpumax       max number of m's allowed on cpu
 197 //      [1 bit] waitstop        some g is waiting on stopped
 198 //      [1 bit] gwaiting        gwait != 0
 199 //
 200 // These fields are the information needed by entersyscall
 201 // and exitsyscall to decide whether to coordinate with the
 202 // scheduler.  Packing them into a single machine word lets
 203 // them use a fast path with a single atomic read/write and
 204 // no lock/unlock.  This greatly reduces contention in
 205 // syscall- or cgo-heavy multithreaded programs.
 206 //
 207 // Except for entersyscall and exitsyscall, the manipulations
 208 // to these fields only happen while holding the schedlock,
 209 // so the routines holding schedlock only need to worry about
 210 // what entersyscall and exitsyscall do, not the other routines
 211 // (which also use the schedlock).
 212 //
 213 // In particular, entersyscall and exitsyscall only read mcpumax,
 214 // waitstop, and gwaiting.  They never write them.  Thus, writes to those
 215 // fields can be done (holding schedlock) without fear of write conflicts.
 216 // There may still be logic conflicts: for example, the set of waitstop must
 217 // be conditioned on mcpu >= mcpumax or else the wait may be a
 218 // spurious sleep.  The Promela model in proc.p verifies these accesses.
 219 enum {
 220         mcpuWidth = 15,
 221         mcpuMask = (1<<mcpuWidth) - 1,
 222         mcpuShift = 0,
 223         mcpumaxShift = mcpuShift + mcpuWidth,
 224         waitstopShift = mcpumaxShift + mcpuWidth,
 225         gwaitingShift = waitstopShift+1,
 226
 227         // The max value of GOMAXPROCS is constrained
 228         // by the max value we can store in the bit fields
 229         // of the atomic word.  Reserve a few high values
 230         // so that we can detect accidental decrement
 231         // beyond zero.
 232         maxgomaxprocs = mcpuMask - 10,
 233 };
 234
 235 #define atomic_mcpu(v)          (((v)>>mcpuShift)&mcpuMask)
 236 #define atomic_mcpumax(v)       (((v)>>mcpumaxShift)&mcpuMask)
 237 #define atomic_waitstop(v)      (((v)>>waitstopShift)&1)
 238 #define atomic_gwaiting(v)      (((v)>>gwaitingShift)&1)
 239
 240 Sched runtime_sched;
 241 int32 runtime_gomaxprocs;
 242 bool runtime_singleproc;
 243
 244 static bool canaddmcpu(void);
 245
 246 // An m that is waiting for notewakeup(&m->havenextg).  This may
 247 // only be accessed while the scheduler lock is held.  This is used to
 248 // minimize the number of times we call notewakeup while the scheduler
 249 // lock is held, since the m will normally move quickly to lock the
 250 // scheduler itself, producing lock contention.
 251 static M* mwakeup;
 252
 253 // Scheduling helpers.  Sched must be locked.
 254 static void gput(G*);   // put/get on ghead/gtail
 255 static G* gget(void);
 256 static void mput(M*);   // put/get on mhead
 257 static M* mget(G*);
 258 static void gfput(G*);  // put/get on gfree
 259 static G* gfget(void);
 260 static void matchmg(void);      // match m's to g's
 261 static void readylocked(G*);    // ready, but sched is locked
 262 static void mnextg(M*, G*);
 263 static void mcommoninit(M*);
 264
 265 void
 266 setmcpumax(uint32 n)
 267 {
 268         uint32 v, w;
 269
 270         for(;;) {
 271                 v = runtime_sched.atomic;
 272                 w = v;
 273                 w &= ~(mcpuMask<<mcpumaxShift);
 274                 w |= n<<mcpumaxShift;
 275                 if(runtime_cas(&runtime_sched.atomic, v, w))
 276                         break;
 277         }
 278 }
 279
 280 // First function run by a new goroutine.  This replaces gogocall.
 281 static void
 282 kickoff(void)
 283 {
 284         void (*fn)(void*);
 285
 286         fn = (void (*)(void*))(g->entry);
 287         fn(g->param);
 288         runtime_goexit();
 289 }
 290
 291 // Switch context to a different goroutine.  This is like longjmp.
 292 static void runtime_gogo(G*) __attribute__ ((noinline));
 293 static void
 294 runtime_gogo(G* newg)
 295 {
 296 #ifdef USING_SPLIT_STACK
 297         __splitstack_setcontext(&newg->stack_context[0]);
 298 #endif
 299         g = newg;
 300         newg->fromgogo = true;
 301         fixcontext(&newg->context);
 302         setcontext(&newg->context);
 303         runtime_throw("gogo setcontext returned");
 304 }
 305
 306 // Save context and call fn passing g as a parameter.  This is like
 307 // setjmp.  Because getcontext always returns 0, unlike setjmp, we use
 308 // g->fromgogo as a code.  It will be true if we got here via
 309 // setcontext.  g == nil the first time this is called in a new m.
 310 static void runtime_mcall(void (*)(G*)) __attribute__ ((noinline));
 311 static void
 312 runtime_mcall(void (*pfn)(G*))
 313 {
 314         M *mp;
 315         G *gp;
 316 #ifndef USING_SPLIT_STACK
 317         int i;
 318 #endif
 319
 320         // Ensure that all registers are on the stack for the garbage
 321         // collector.
 322         __builtin_unwind_init();
 323
 324         mp = m;
 325         gp = g;
 326         if(gp == mp->g0)
 327                 runtime_throw("runtime: mcall called on m->g0 stack");
 328
 329         if(gp != nil) {
 330
 331 #ifdef USING_SPLIT_STACK
 332                 __splitstack_getcontext(&g->stack_context[0]);
 333 #else
 334                 gp->gcnext_sp = &i;
 335 #endif
 336                 gp->fromgogo = false;
 337                 getcontext(&gp->context);
 338
 339                 // When we return from getcontext, we may be running
 340                 // in a new thread.  That means that m and g may have
 341                 // changed.  They are global variables so we will
 342                 // reload them, but the addresses of m and g may be
 343                 // cached in our local stack frame, and those
 344                 // addresses may be wrong.  Call functions to reload
 345                 // the values for this thread.
 346                 mp = runtime_m();
 347                 gp = runtime_g();
 348         }
 349         if (gp == nil || !gp->fromgogo) {
 350 #ifdef USING_SPLIT_STACK
 351                 __splitstack_setcontext(&mp->g0->stack_context[0]);
 352 #endif
 353                 mp->g0->entry = (byte*)pfn;
 354                 mp->g0->param = gp;
 355
 356                 // It's OK to set g directly here because this case
 357                 // can not occur if we got here via a setcontext to
 358                 // the getcontext call just above.
 359                 g = mp->g0;
 360
 361                 fixcontext(&mp->g0->context);
 362                 setcontext(&mp->g0->context);
 363                 runtime_throw("runtime: mcall function returned");
 364         }
 365 }
 366
 367 // Keep trace of scavenger's goroutine for deadlock detection.
 368 static G *scvg;
 369
 370 // The bootstrap sequence is:
 371 //
 372 //      call osinit
 373 //      call schedinit
 374 //      make & queue new G
 375 //      call runtime_mstart
 376 //
 377 // The new G calls runtime_main.
 378 void
 379 runtime_schedinit(void)
 380 {
 381         int32 n;
 382         const byte *p;
 383
 384         m = &runtime_m0;
 385         g = &runtime_g0;
 386         m->g0 = g;
 387         m->curg = g;
 388         g->m = m;
 389
 390         initcontext();
 391
 392         m->nomemprof++;
 393         runtime_mallocinit();
 394         mcommoninit(m);
 395
 396         runtime_goargs();
 397         runtime_goenvs();
 398
 399         // For debugging:
 400         // Allocate internal symbol table representation now,
 401         // so that we don't need to call malloc when we crash.
 402         // runtime_findfunc(0);
 403
 404         runtime_gomaxprocs = 1;
 405         p = runtime_getenv("GOMAXPROCS");
 406         if(p != nil && (n = runtime_atoi(p)) != 0) {
 407                 if(n > maxgomaxprocs)
 408                         n = maxgomaxprocs;
 409                 runtime_gomaxprocs = n;
 410         }
 411         // wait for the main goroutine to start before taking
 412         // GOMAXPROCS into account.
 413         setmcpumax(1);
 414         runtime_singleproc = runtime_gomaxprocs == 1;
 415
 416         canaddmcpu();   // mcpu++ to account for bootstrap m
 417         m->helpgc = 1;  // flag to tell schedule() to mcpu--
 418         runtime_sched.grunning++;
 419
 420         // Can not enable GC until all roots are registered.
 421         // mstats.enablegc = 1;
 422         m->nomemprof--;
 423 }
 424
 425 extern void main_init(void) __asm__ ("__go_init_main");
 426 extern void main_main(void) __asm__ ("main.main");
 427
 428 // The main goroutine.
 429 void
 430 runtime_main(void)
 431 {
 432         // Lock the main goroutine onto this, the main OS thread,
 433         // during initialization.  Most programs won't care, but a few
 434         // do require certain calls to be made by the main thread.
 435         // Those can arrange for main.main to run in the main thread
 436         // by calling runtime.LockOSThread during initialization
 437         // to preserve the lock.
 438         runtime_LockOSThread();
 439         // From now on, newgoroutines may use non-main threads.
 440         setmcpumax(runtime_gomaxprocs);
 441         runtime_sched.init = true;
 442         scvg = __go_go(runtime_MHeap_Scavenger, nil);
 443         main_init();
 444         runtime_sched.init = false;
 445         if(!runtime_sched.lockmain)
 446                 runtime_UnlockOSThread();
 447
 448         // For gccgo we have to wait until after main is initialized
 449         // to enable GC, because initializing main registers the GC
 450         // roots.
 451         mstats.enablegc = 1;
 452
 453         // The deadlock detection has false negatives.
 454         // Let scvg start up, to eliminate the false negative
 455         // for the trivial program func main() { select{} }.
 456         runtime_gosched();
 457
 458         main_main();
 459         runtime_exit(0);
 460         for(;;)
 461                 *(int32*)0 = 0;
 462 }
 463
 464 // Lock the scheduler.
 465 static void
 466 schedlock(void)
 467 {
 468         runtime_lock(&runtime_sched);
 469 }
 470
 471 // Unlock the scheduler.
 472 static void
 473 schedunlock(void)
 474 {
 475         M *m;
 476
 477         m = mwakeup;
 478         mwakeup = nil;
 479         runtime_unlock(&runtime_sched);
 480         if(m != nil)
 481                 runtime_notewakeup(&m->havenextg);
 482 }
 483
 484 void
 485 runtime_goexit(void)
 486 {
 487         g->status = Gmoribund;
 488         runtime_gosched();
 489 }
 490
 491 void
 492 runtime_goroutineheader(G *g)
 493 {
 494         const char *status;
 495
 496         switch(g->status) {
 497         case Gidle:
 498                 status = "idle";
 499                 break;
 500         case Grunnable:
 501                 status = "runnable";
 502                 break;
 503         case Grunning:
 504                 status = "running";
 505                 break;
 506         case Gsyscall:
 507                 status = "syscall";
 508                 break;
 509         case Gwaiting:
 510                 if(g->waitreason)
 511                         status = g->waitreason;
 512                 else
 513                         status = "waiting";
 514                 break;
 515         case Gmoribund:
 516                 status = "moribund";
 517                 break;
 518         default:
 519                 status = "???";
 520                 break;
 521         }
 522         runtime_printf("goroutine %d [%s]:\n", g->goid, status);
 523 }
 524
 525 void
 526 runtime_tracebackothers(G *me)
 527 {
 528         G *g;
 529
 530         for(g = runtime_allg; g != nil; g = g->alllink) {
 531                 if(g == me || g->status == Gdead)
 532                         continue;
 533                 runtime_printf("\n");
 534                 runtime_goroutineheader(g);
 535                 // runtime_traceback(g->sched.pc, g->sched.sp, 0, g);
 536         }
 537 }
 538
 539 // Mark this g as m's idle goroutine.
 540 // This functionality might be used in environments where programs
 541 // are limited to a single thread, to simulate a select-driven
 542 // network server.  It is not exposed via the standard runtime API.
 543 void
 544 runtime_idlegoroutine(void)
 545 {
 546         if(g->idlem != nil)
 547                 runtime_throw("g is already an idle goroutine");
 548         g->idlem = m;
 549 }
 550
 551 static void
 552 mcommoninit(M *m)
 553 {
 554         m->id = runtime_sched.mcount++;
 555         m->fastrand = 0x49f6428aUL + m->id + runtime_cputicks();
 556
 557         if(m->mcache == nil)
 558                 m->mcache = runtime_allocmcache();
 559
 560         runtime_callers(1, m->createstack, nelem(m->createstack));
 561
 562         // Add to runtime_allm so garbage collector doesn't free m
 563         // when it is just in a register or thread-local storage.
 564         m->alllink = runtime_allm;
 565         // runtime_NumCgoCall() iterates over allm w/o schedlock,
 566         // so we need to publish it safely.
 567         runtime_atomicstorep(&runtime_allm, m);
 568 }
 569
 570 // Try to increment mcpu.  Report whether succeeded.
 571 static bool
 572 canaddmcpu(void)
 573 {
 574         uint32 v;
 575
 576         for(;;) {
 577                 v = runtime_sched.atomic;
 578                 if(atomic_mcpu(v) >= atomic_mcpumax(v))
 579                         return 0;
 580                 if(runtime_cas(&runtime_sched.atomic, v, v+(1<<mcpuShift)))
 581                         return 1;
 582         }
 583 }
 584
 585 // Put on `g' queue.  Sched must be locked.
 586 static void
 587 gput(G *g)
 588 {
 589         M *m;
 590
 591         // If g is wired, hand it off directly.
 592         if((m = g->lockedm) != nil && canaddmcpu()) {
 593                 mnextg(m, g);
 594                 return;
 595         }
 596
 597         // If g is the idle goroutine for an m, hand it off.
 598         if(g->idlem != nil) {
 599                 if(g->idlem->idleg != nil) {
 600                         runtime_printf("m%d idle out of sync: g%d g%d\n",
 601                                 g->idlem->id,
 602                                 g->idlem->idleg->goid, g->goid);
 603                         runtime_throw("runtime: double idle");
 604                 }
 605                 g->idlem->idleg = g;
 606                 return;
 607         }
 608
 609         g->schedlink = nil;
 610         if(runtime_sched.ghead == nil)
 611                 runtime_sched.ghead = g;
 612         else
 613                 runtime_sched.gtail->schedlink = g;
 614         runtime_sched.gtail = g;
 615
 616         // increment gwait.
 617         // if it transitions to nonzero, set atomic gwaiting bit.
 618         if(runtime_sched.gwait++ == 0)
 619                 runtime_xadd(&runtime_sched.atomic, 1<<gwaitingShift);
 620 }
 621
 622 // Report whether gget would return something.
 623 static bool
 624 haveg(void)
 625 {
 626         return runtime_sched.ghead != nil || m->idleg != nil;
 627 }
 628
 629 // Get from `g' queue.  Sched must be locked.
 630 static G*
 631 gget(void)
 632 {
 633         G *g;
 634
 635         g = runtime_sched.ghead;
 636         if(g){
 637                 runtime_sched.ghead = g->schedlink;
 638                 if(runtime_sched.ghead == nil)
 639                         runtime_sched.gtail = nil;
 640                 // decrement gwait.
 641                 // if it transitions to zero, clear atomic gwaiting bit.
 642                 if(--runtime_sched.gwait == 0)
 643                         runtime_xadd(&runtime_sched.atomic, -1<<gwaitingShift);
 644         } else if(m->idleg != nil) {
 645                 g = m->idleg;
 646                 m->idleg = nil;
 647         }
 648         return g;
 649 }
 650
 651 // Put on `m' list.  Sched must be locked.
 652 static void
 653 mput(M *m)
 654 {
 655         m->schedlink = runtime_sched.mhead;
 656         runtime_sched.mhead = m;
 657         runtime_sched.mwait++;
 658 }
 659
 660 // Get an `m' to run `g'.  Sched must be locked.
 661 static M*
 662 mget(G *g)
 663 {
 664         M *m;
 665
 666         // if g has its own m, use it.
 667         if(g && (m = g->lockedm) != nil)
 668                 return m;
 669
 670         // otherwise use general m pool.
 671         if((m = runtime_sched.mhead) != nil){
 672                 runtime_sched.mhead = m->schedlink;
 673                 runtime_sched.mwait--;
 674         }
 675         return m;
 676 }
 677
 678 // Mark g ready to run.
 679 void
 680 runtime_ready(G *g)
 681 {
 682         schedlock();
 683         readylocked(g);
 684         schedunlock();
 685 }
 686
 687 // Mark g ready to run.  Sched is already locked.
 688 // G might be running already and about to stop.
 689 // The sched lock protects g->status from changing underfoot.
 690 static void
 691 readylocked(G *g)
 692 {
 693         if(g->m){
 694                 // Running on another machine.
 695                 // Ready it when it stops.
 696                 g->readyonstop = 1;
 697                 return;
 698         }
 699
 700         // Mark runnable.
 701         if(g->status == Grunnable || g->status == Grunning) {
 702                 runtime_printf("goroutine %d has status %d\n", g->goid, g->status);
 703                 runtime_throw("bad g->status in ready");
 704         }
 705         g->status = Grunnable;
 706
 707         gput(g);
 708         matchmg();
 709 }
 710
 711 // Same as readylocked but a different symbol so that
 712 // debuggers can set a breakpoint here and catch all
 713 // new goroutines.
 714 static void
 715 newprocreadylocked(G *g)
 716 {
 717         readylocked(g);
 718 }
 719
 720 // Pass g to m for running.
 721 // Caller has already incremented mcpu.
 722 static void
 723 mnextg(M *m, G *g)
 724 {
 725         runtime_sched.grunning++;
 726         m->nextg = g;
 727         if(m->waitnextg) {
 728                 m->waitnextg = 0;
 729                 if(mwakeup != nil)
 730                         runtime_notewakeup(&mwakeup->havenextg);
 731                 mwakeup = m;
 732         }
 733 }
 734
 735 // Get the next goroutine that m should run.
 736 // Sched must be locked on entry, is unlocked on exit.
 737 // Makes sure that at most $GOMAXPROCS g's are
 738 // running on cpus (not in system calls) at any given time.
 739 static G*
 740 nextgandunlock(void)
 741 {
 742         G *gp;
 743         uint32 v;
 744
 745 top:
 746         if(atomic_mcpu(runtime_sched.atomic) >= maxgomaxprocs)
 747                 runtime_throw("negative mcpu");
 748
 749         // If there is a g waiting as m->nextg, the mcpu++
 750         // happened before it was passed to mnextg.
 751         if(m->nextg != nil) {
 752                 gp = m->nextg;
 753                 m->nextg = nil;
 754                 schedunlock();
 755                 return gp;
 756         }
 757
 758         if(m->lockedg != nil) {
 759                 // We can only run one g, and it's not available.
 760                 // Make sure some other cpu is running to handle
 761                 // the ordinary run queue.
 762                 if(runtime_sched.gwait != 0) {
 763                         matchmg();
 764                         // m->lockedg might have been on the queue.
 765                         if(m->nextg != nil) {
 766                                 gp = m->nextg;
 767                                 m->nextg = nil;
 768                                 schedunlock();
 769                                 return gp;
 770                         }
 771                 }
 772         } else {
 773                 // Look for work on global queue.
 774                 while(haveg() && canaddmcpu()) {
 775                         gp = gget();
 776                         if(gp == nil)
 777                                 runtime_throw("gget inconsistency");
 778
 779                         if(gp->lockedm) {
 780                                 mnextg(gp->lockedm, gp);
 781                                 continue;
 782                         }
 783                         runtime_sched.grunning++;
 784                         schedunlock();
 785                         return gp;
 786                 }
 787
 788                 // The while loop ended either because the g queue is empty
 789                 // or because we have maxed out our m procs running go
 790                 // code (mcpu >= mcpumax).  We need to check that
 791                 // concurrent actions by entersyscall/exitsyscall cannot
 792                 // invalidate the decision to end the loop.
 793                 //
 794                 // We hold the sched lock, so no one else is manipulating the
 795                 // g queue or changing mcpumax.  Entersyscall can decrement
 796                 // mcpu, but if does so when there is something on the g queue,
 797                 // the gwait bit will be set, so entersyscall will take the slow path
 798                 // and use the sched lock.  So it cannot invalidate our decision.
 799                 //
 800                 // Wait on global m queue.
 801                 mput(m);
 802         }
 803
 804         // Look for deadlock situation.
 805         // There is a race with the scavenger that causes false negatives:
 806         // if the scavenger is just starting, then we have
 807         //      scvg != nil && grunning == 0 && gwait == 0
 808         // and we do not detect a deadlock.  It is possible that we should
 809         // add that case to the if statement here, but it is too close to Go 1
 810         // to make such a subtle change.  Instead, we work around the
 811         // false negative in trivial programs by calling runtime.gosched
 812         // from the main goroutine just before main.main.
 813         // See runtime_main above.
 814         //
 815         // On a related note, it is also possible that the scvg == nil case is
 816         // wrong and should include gwait, but that does not happen in
 817         // standard Go programs, which all start the scavenger.
 818         //
 819         if((scvg == nil && runtime_sched.grunning == 0) ||
 820            (scvg != nil && runtime_sched.grunning == 1 && runtime_sched.gwait == 0 &&
 821             (scvg->status == Grunning || scvg->status == Gsyscall))) {
 822                 runtime_throw("all goroutines are asleep - deadlock!");
 823         }
 824
 825         m->nextg = nil;
 826         m->waitnextg = 1;
 827         runtime_noteclear(&m->havenextg);
 828
 829         // Stoptheworld is waiting for all but its cpu to go to stop.
 830         // Entersyscall might have decremented mcpu too, but if so
 831         // it will see the waitstop and take the slow path.
 832         // Exitsyscall never increments mcpu beyond mcpumax.
 833         v = runtime_atomicload(&runtime_sched.atomic);
 834         if(atomic_waitstop(v) && atomic_mcpu(v) <= atomic_mcpumax(v)) {
 835                 // set waitstop = 0 (known to be 1)
 836                 runtime_xadd(&runtime_sched.atomic, -1<<waitstopShift);
 837                 runtime_notewakeup(&runtime_sched.stopped);
 838         }
 839         schedunlock();
 840
 841         runtime_notesleep(&m->havenextg);
 842         if(m->helpgc) {
 843                 runtime_gchelper();
 844                 m->helpgc = 0;
 845                 runtime_lock(&runtime_sched);
 846                 goto top;
 847         }
 848         if((gp = m->nextg) == nil)
 849                 runtime_throw("bad m->nextg in nextgoroutine");
 850         m->nextg = nil;
 851         return gp;
 852 }
 853
 854 int32
 855 runtime_helpgc(bool *extra)
 856 {
 857         M *mp;
 858         int32 n, max;
 859
 860         // Figure out how many CPUs to use.
 861         // Limited by gomaxprocs, number of actual CPUs, and MaxGcproc.
 862         max = runtime_gomaxprocs;
 863         if(max > runtime_ncpu)
 864                 max = runtime_ncpu > 0 ? runtime_ncpu : 1;
 865         if(max > MaxGcproc)
 866                 max = MaxGcproc;
 867
 868         // We're going to use one CPU no matter what.
 869         // Figure out the max number of additional CPUs.
 870         max--;
 871
 872         runtime_lock(&runtime_sched);
 873         n = 0;
 874         while(n < max && (mp = mget(nil)) != nil) {
 875                 n++;
 876                 mp->helpgc = 1;
 877                 mp->waitnextg = 0;
 878                 runtime_notewakeup(&mp->havenextg);
 879         }
 880         runtime_unlock(&runtime_sched);
 881         if(extra)
 882                 *extra = n != max;
 883         return n;
 884 }
 885
 886 void
 887 runtime_stoptheworld(void)
 888 {
 889         uint32 v;
 890
 891         schedlock();
 892         runtime_gcwaiting = 1;
 893
 894         setmcpumax(1);
 895
 896         // while mcpu > 1
 897         for(;;) {
 898                 v = runtime_sched.atomic;
 899                 if(atomic_mcpu(v) <= 1)
 900                         break;
 901
 902                 // It would be unsafe for multiple threads to be using
 903                 // the stopped note at once, but there is only
 904                 // ever one thread doing garbage collection.
 905                 runtime_noteclear(&runtime_sched.stopped);
 906                 if(atomic_waitstop(v))
 907                         runtime_throw("invalid waitstop");
 908
 909                 // atomic { waitstop = 1 }, predicated on mcpu <= 1 check above
 910                 // still being true.
 911                 if(!runtime_cas(&runtime_sched.atomic, v, v+(1<<waitstopShift)))
 912                         continue;
 913
 914                 schedunlock();
 915                 runtime_notesleep(&runtime_sched.stopped);
 916                 schedlock();
 917         }
 918         runtime_singleproc = runtime_gomaxprocs == 1;
 919         schedunlock();
 920 }
 921
 922 void
 923 runtime_starttheworld(bool extra)
 924 {
 925         M *m;
 926
 927         schedlock();
 928         runtime_gcwaiting = 0;
 929         setmcpumax(runtime_gomaxprocs);
 930         matchmg();
 931         if(extra && canaddmcpu()) {
 932                 // Start a new m that will (we hope) be idle
 933                 // and so available to help when the next
 934                 // garbage collection happens.
 935                 // canaddmcpu above did mcpu++
 936                 // (necessary, because m will be doing various
 937                 // initialization work so is definitely running),
 938                 // but m is not running a specific goroutine,
 939                 // so set the helpgc flag as a signal to m's
 940                 // first schedule(nil) to mcpu-- and grunning--.
 941                 m = runtime_newm();
 942                 m->helpgc = 1;
 943                 runtime_sched.grunning++;
 944         }
 945         schedunlock();
 946 }
 947
 948 // Called to start an M.
 949 void*
 950 runtime_mstart(void* mp)
 951 {
 952         m = (M*)mp;
 953         g = m->g0;
 954
 955         initcontext();
 956
 957         g->entry = nil;
 958         g->param = nil;
 959
 960         // Record top of stack for use by mcall.
 961         // Once we call schedule we're never coming back,
 962         // so other calls can reuse this stack space.
 963 #ifdef USING_SPLIT_STACK
 964         __splitstack_getcontext(&g->stack_context[0]);
 965 #else
 966         g->gcinitial_sp = &mp;
 967         // Setting gcstack_size to 0 is a marker meaning that gcinitial_sp
 968         // is the top of the stack, not the bottom.
 969         g->gcstack_size = 0;
 970         g->gcnext_sp = &mp;
 971 #endif
 972         getcontext(&g->context);
 973
 974         if(g->entry != nil) {
 975                 // Got here from mcall.
 976                 void (*pfn)(G*) = (void (*)(G*))g->entry;
 977                 G* gp = (G*)g->param;
 978                 pfn(gp);
 979                 *(int*)0x21 = 0x21;
 980         }
 981         runtime_minit();
 982
 983 #ifdef USING_SPLIT_STACK
 984         {
 985           int dont_block_signals = 0;
 986           __splitstack_block_signals(&dont_block_signals, nil);
 987         }
 988 #endif
 989
 990         // Install signal handlers; after minit so that minit can
 991         // prepare the thread to be able to handle the signals.
 992         if(m == &runtime_m0)
 993                 runtime_initsig();
 994
 995         schedule(nil);
 996         return nil;
 997 }
 998
 999 typedef struct CgoThreadStart CgoThreadStart;
1000 struct CgoThreadStart
1001 {
1002         M *m;
1003         G *g;
1004         void (*fn)(void);
1005 };
1006
1007 // Kick off new m's as needed (up to mcpumax).
1008 // Sched is locked.
1009 static void
1010 matchmg(void)
1011 {
1012         G *gp;
1013         M *mp;
1014
1015         if(m->mallocing || m->gcing)
1016                 return;
1017
1018         while(haveg() && canaddmcpu()) {
1019                 gp = gget();
1020                 if(gp == nil)
1021                         runtime_throw("gget inconsistency");
1022
1023                 // Find the m that will run gp.
1024                 if((mp = mget(gp)) == nil)
1025                         mp = runtime_newm();
1026                 mnextg(mp, gp);
1027         }
1028 }
1029
1030 // Create a new m.  It will start off with a call to runtime_mstart.
1031 M*
1032 runtime_newm(void)
1033 {
1034         M *m;
1035         pthread_attr_t attr;
1036         pthread_t tid;
1037
1038         m = runtime_malloc(sizeof(M));
1039         mcommoninit(m);
1040         m->g0 = runtime_malg(-1, nil, nil);
1041
1042         if(pthread_attr_init(&attr) != 0)
1043                 runtime_throw("pthread_attr_init");
1044         if(pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED) != 0)
1045                 runtime_throw("pthread_attr_setdetachstate");
1046
1047 #ifndef PTHREAD_STACK_MIN
1048 #define PTHREAD_STACK_MIN 8192
1049 #endif
1050         if(pthread_attr_setstacksize(&attr, PTHREAD_STACK_MIN) != 0)
1051                 runtime_throw("pthread_attr_setstacksize");
1052
1053         if(pthread_create(&tid, &attr, runtime_mstart, m) != 0)
1054                 runtime_throw("pthread_create");
1055
1056         return m;
1057 }
1058
1059 // One round of scheduler: find a goroutine and run it.
1060 // The argument is the goroutine that was running before
1061 // schedule was called, or nil if this is the first call.
1062 // Never returns.
1063 static void
1064 schedule(G *gp)
1065 {
1066         int32 hz;
1067         uint32 v;
1068
1069         schedlock();
1070         if(gp != nil) {
1071                 // Just finished running gp.
1072                 gp->m = nil;
1073                 runtime_sched.grunning--;
1074
1075                 // atomic { mcpu-- }
1076                 v = runtime_xadd(&runtime_sched.atomic, -1<<mcpuShift);
1077                 if(atomic_mcpu(v) > maxgomaxprocs)
1078                         runtime_throw("negative mcpu in scheduler");
1079
1080                 switch(gp->status){
1081                 case Grunnable:
1082                 case Gdead:
1083                         // Shouldn't have been running!
1084                         runtime_throw("bad gp->status in sched");
1085                 case Grunning:
1086                         gp->status = Grunnable;
1087                         gput(gp);
1088                         break;
1089                 case Gmoribund:
1090                         gp->status = Gdead;
1091                         if(gp->lockedm) {
1092                                 gp->lockedm = nil;
1093                                 m->lockedg = nil;
1094                         }
1095                         gp->idlem = nil;
1096                         runtime_memclr(&gp->context, sizeof gp->context);
1097                         gfput(gp);
1098                         if(--runtime_sched.gcount == 0)
1099                                 runtime_exit(0);
1100                         break;
1101                 }
1102                 if(gp->readyonstop){
1103                         gp->readyonstop = 0;
1104                         readylocked(gp);
1105                 }
1106         } else if(m->helpgc) {
1107                 // Bootstrap m or new m started by starttheworld.
1108                 // atomic { mcpu-- }
1109                 v = runtime_xadd(&runtime_sched.atomic, -1<<mcpuShift);
1110                 if(atomic_mcpu(v) > maxgomaxprocs)
1111                         runtime_throw("negative mcpu in scheduler");
1112                 // Compensate for increment in starttheworld().
1113                 runtime_sched.grunning--;
1114                 m->helpgc = 0;
1115         } else if(m->nextg != nil) {
1116                 // New m started by matchmg.
1117         } else {
1118                 runtime_throw("invalid m state in scheduler");
1119         }
1120
1121         // Find (or wait for) g to run.  Unlocks runtime_sched.
1122         gp = nextgandunlock();
1123         gp->readyonstop = 0;
1124         gp->status = Grunning;
1125         m->curg = gp;
1126         gp->m = m;
1127
1128         // Check whether the profiler needs to be turned on or off.
1129         hz = runtime_sched.profilehz;
1130         if(m->profilehz != hz)
1131                 runtime_resetcpuprofiler(hz);
1132
1133         runtime_gogo(gp);
1134 }
1135
1136 // Enter scheduler.  If g->status is Grunning,
1137 // re-queues g and runs everyone else who is waiting
1138 // before running g again.  If g->status is Gmoribund,
1139 // kills off g.
1140 void
1141 runtime_gosched(void)
1142 {
1143         if(m->locks != 0)
1144                 runtime_throw("gosched holding locks");
1145         if(g == m->g0)
1146                 runtime_throw("gosched of g0");
1147         runtime_mcall(schedule);
1148 }
1149
1150 // The goroutine g is about to enter a system call.
1151 // Record that it's not using the cpu anymore.
1152 // This is called only from the go syscall library and cgocall,
1153 // not from the low-level system calls used by the runtime.
1154 //
1155 // Entersyscall cannot split the stack: the runtime_gosave must
1156 // make g->sched refer to the caller's stack segment, because
1157 // entersyscall is going to return immediately after.
1158 // It's okay to call matchmg and notewakeup even after
1159 // decrementing mcpu, because we haven't released the
1160 // sched lock yet, so the garbage collector cannot be running.
1161
1162 void runtime_entersyscall(void) __attribute__ ((no_split_stack));
1163
1164 void
1165 runtime_entersyscall(void)
1166 {
1167         uint32 v;
1168
1169         if(m->profilehz > 0)
1170                 runtime_setprof(false);
1171
1172         // Leave SP around for gc and traceback.
1173 #ifdef USING_SPLIT_STACK
1174         g->gcstack = __splitstack_find(NULL, NULL, &g->gcstack_size,
1175                                        &g->gcnext_segment, &g->gcnext_sp,
1176                                        &g->gcinitial_sp);
1177 #else
1178         g->gcnext_sp = (byte *) &v;
1179 #endif
1180
1181         // Save the registers in the g structure so that any pointers
1182         // held in registers will be seen by the garbage collector.
1183         // We could use getcontext here, but setjmp is more efficient
1184         // because it doesn't need to save the signal mask.
1185         setjmp(g->gcregs);
1186
1187         g->status = Gsyscall;
1188
1189         // Fast path.
1190         // The slow path inside the schedlock/schedunlock will get
1191         // through without stopping if it does:
1192         //      mcpu--
1193         //      gwait not true
1194         //      waitstop && mcpu <= mcpumax not true
1195         // If we can do the same with a single atomic add,
1196         // then we can skip the locks.
1197         v = runtime_xadd(&runtime_sched.atomic, -1<<mcpuShift);
1198         if(!atomic_gwaiting(v) && (!atomic_waitstop(v) || atomic_mcpu(v) > atomic_mcpumax(v)))
1199                 return;
1200
1201         schedlock();
1202         v = runtime_atomicload(&runtime_sched.atomic);
1203         if(atomic_gwaiting(v)) {
1204                 matchmg();
1205                 v = runtime_atomicload(&runtime_sched.atomic);
1206         }
1207         if(atomic_waitstop(v) && atomic_mcpu(v) <= atomic_mcpumax(v)) {
1208                 runtime_xadd(&runtime_sched.atomic, -1<<waitstopShift);
1209                 runtime_notewakeup(&runtime_sched.stopped);
1210         }
1211
1212         schedunlock();
1213 }
1214
1215 // The goroutine g exited its system call.
1216 // Arrange for it to run on a cpu again.
1217 // This is called only from the go syscall library, not
1218 // from the low-level system calls used by the runtime.
1219 void
1220 runtime_exitsyscall(void)
1221 {
1222         G *gp;
1223         uint32 v;
1224
1225         // Fast path.
1226         // If we can do the mcpu++ bookkeeping and
1227         // find that we still have mcpu <= mcpumax, then we can
1228         // start executing Go code immediately, without having to
1229         // schedlock/schedunlock.
1230         gp = g;
1231         v = runtime_xadd(&runtime_sched.atomic, (1<<mcpuShift));
1232         if(m->profilehz == runtime_sched.profilehz && atomic_mcpu(v) <= atomic_mcpumax(v)) {
1233                 // There's a cpu for us, so we can run.
1234                 gp->status = Grunning;
1235                 // Garbage collector isn't running (since we are),
1236                 // so okay to clear gcstack.
1237 #ifdef USING_SPLIT_STACK
1238                 gp->gcstack = nil;
1239 #endif
1240                 gp->gcnext_sp = nil;
1241                 runtime_memclr(gp->gcregs, sizeof gp->gcregs);
1242
1243                 if(m->profilehz > 0)
1244                         runtime_setprof(true);
1245                 return;
1246         }
1247
1248         // Tell scheduler to put g back on the run queue:
1249         // mostly equivalent to g->status = Grunning,
1250         // but keeps the garbage collector from thinking
1251         // that g is running right now, which it's not.
1252         gp->readyonstop = 1;
1253
1254         // All the cpus are taken.
1255         // The scheduler will ready g and put this m to sleep.
1256         // When the scheduler takes g away from m,
1257         // it will undo the runtime_sched.mcpu++ above.
1258         runtime_gosched();
1259
1260         // Gosched returned, so we're allowed to run now.
1261         // Delete the gcstack information that we left for
1262         // the garbage collector during the system call.
1263         // Must wait until now because until gosched returns
1264         // we don't know for sure that the garbage collector
1265         // is not running.
1266 #ifdef USING_SPLIT_STACK
1267         gp->gcstack = nil;
1268 #endif
1269         gp->gcnext_sp = nil;
1270         runtime_memclr(gp->gcregs, sizeof gp->gcregs);
1271 }
1272
1273 // Allocate a new g, with a stack big enough for stacksize bytes.
1274 G*
1275 runtime_malg(int32 stacksize, byte** ret_stack, size_t* ret_stacksize)
1276 {
1277         G *newg;
1278
1279         newg = runtime_malloc(sizeof(G));
1280         if(stacksize >= 0) {
1281 #if USING_SPLIT_STACK
1282                 int dont_block_signals = 0;
1283
1284                 *ret_stack = __splitstack_makecontext(stacksize,
1285                                                       &newg->stack_context[0],
1286                                                       ret_stacksize);
1287                 __splitstack_block_signals_context(&newg->stack_context[0],
1288                                                    &dont_block_signals, nil);
1289 #else
1290                 *ret_stack = runtime_mallocgc(stacksize, FlagNoProfiling|FlagNoGC, 0, 0);
1291                 *ret_stacksize = stacksize;
1292                 newg->gcinitial_sp = *ret_stack;
1293                 newg->gcstack_size = stacksize;
1294                 runtime_xadd(&runtime_stacks_sys, stacksize);
1295 #endif
1296         }
1297         return newg;
1298 }
1299
1300 /* For runtime package testing.  */
1301
1302 void runtime_testing_entersyscall(void)
1303   __asm__("libgo_runtime.runtime.entersyscall");
1304
1305 void
1306 runtime_testing_entersyscall()
1307 {
1308         runtime_entersyscall();
1309 }
1310
1311 void runtime_testing_exitsyscall(void)
1312   __asm__("libgo_runtime.runtime.exitsyscall");
1313
1314 void
1315 runtime_testing_exitsyscall()
1316 {
1317         runtime_exitsyscall();
1318 }
1319
1320 G*
1321 __go_go(void (*fn)(void*), void* arg)
1322 {
1323         byte *sp;
1324         size_t spsize;
1325         G * volatile newg;      // volatile to avoid longjmp warning
1326
1327         schedlock();
1328
1329         if((newg = gfget()) != nil){
1330 #ifdef USING_SPLIT_STACK
1331                 int dont_block_signals = 0;
1332
1333                 sp = __splitstack_resetcontext(&newg->stack_context[0],
1334                                                &spsize);
1335                 __splitstack_block_signals_context(&newg->stack_context[0],
1336                                                    &dont_block_signals, nil);
1337 #else
1338                 sp = newg->gcinitial_sp;
1339                 spsize = newg->gcstack_size;
1340                 if(spsize == 0)
1341                         runtime_throw("bad spsize in __go_go");
1342                 newg->gcnext_sp = sp;
1343 #endif
1344         } else {
1345                 newg = runtime_malg(StackMin, &sp, &spsize);
1346                 if(runtime_lastg == nil)
1347                         runtime_allg = newg;
1348                 else
1349                         runtime_lastg->alllink = newg;
1350                 runtime_lastg = newg;
1351         }
1352         newg->status = Gwaiting;
1353         newg->waitreason = "new goroutine";
1354
1355         newg->entry = (byte*)fn;
1356         newg->param = arg;
1357         newg->gopc = (uintptr)__builtin_return_address(0);
1358
1359         runtime_sched.gcount++;
1360         runtime_sched.goidgen++;
1361         newg->goid = runtime_sched.goidgen;
1362
1363         if(sp == nil)
1364                 runtime_throw("nil g->stack0");
1365
1366         getcontext(&newg->context);
1367         newg->context.uc_stack.ss_sp = sp;
1368 #ifdef MAKECONTEXT_STACK_TOP
1369         newg->context.uc_stack.ss_sp += spsize;
1370 #endif
1371         newg->context.uc_stack.ss_size = spsize;
1372         makecontext(&newg->context, kickoff, 0);
1373
1374         newprocreadylocked(newg);
1375         schedunlock();
1376
1377         return newg;
1378 //printf(" goid=%d\n", newg->goid);
1379 }
1380
1381 // Put on gfree list.  Sched must be locked.
1382 static void
1383 gfput(G *g)
1384 {
1385         g->schedlink = runtime_sched.gfree;
1386         runtime_sched.gfree = g;
1387 }
1388
1389 // Get from gfree list.  Sched must be locked.
1390 static G*
1391 gfget(void)
1392 {
1393         G *g;
1394
1395         g = runtime_sched.gfree;
1396         if(g)
1397                 runtime_sched.gfree = g->schedlink;
1398         return g;
1399 }
1400
1401 // Run all deferred functions for the current goroutine.
1402 static void
1403 rundefer(void)
1404 {
1405         Defer *d;
1406
1407         while((d = g->defer) != nil) {
1408                 void (*pfn)(void*);
1409
1410                 pfn = d->__pfn;
1411                 d->__pfn = nil;
1412                 if (pfn != nil)
1413                         (*pfn)(d->__arg);
1414                 g->defer = d->__next;
1415                 runtime_free(d);
1416         }
1417 }
1418
1419 void runtime_Goexit (void) asm ("libgo_runtime.runtime.Goexit");
1420
1421 void
1422 runtime_Goexit(void)
1423 {
1424         rundefer();
1425         runtime_goexit();
1426 }
1427
1428 void runtime_Gosched (void) asm ("libgo_runtime.runtime.Gosched");
1429
1430 void
1431 runtime_Gosched(void)
1432 {
1433         runtime_gosched();
1434 }
1435
1436 // Implementation of runtime.GOMAXPROCS.
1437 // delete when scheduler is stronger
1438 int32
1439 runtime_gomaxprocsfunc(int32 n)
1440 {
1441         int32 ret;
1442         uint32 v;
1443
1444         schedlock();
1445         ret = runtime_gomaxprocs;
1446         if(n <= 0)
1447                 n = ret;
1448         if(n > maxgomaxprocs)
1449                 n = maxgomaxprocs;
1450         runtime_gomaxprocs = n;
1451         if(runtime_gomaxprocs > 1)
1452                 runtime_singleproc = false;
1453         if(runtime_gcwaiting != 0) {
1454                 if(atomic_mcpumax(runtime_sched.atomic) != 1)
1455                         runtime_throw("invalid mcpumax during gc");
1456                 schedunlock();
1457                 return ret;
1458         }
1459
1460         setmcpumax(n);
1461
1462         // If there are now fewer allowed procs
1463         // than procs running, stop.
1464         v = runtime_atomicload(&runtime_sched.atomic);
1465         if((int32)atomic_mcpu(v) > n) {
1466                 schedunlock();
1467                 runtime_gosched();
1468                 return ret;
1469         }
1470         // handle more procs
1471         matchmg();
1472         schedunlock();
1473         return ret;
1474 }
1475
1476 void
1477 runtime_LockOSThread(void)
1478 {
1479         if(m == &runtime_m0 && runtime_sched.init) {
1480                 runtime_sched.lockmain = true;
1481                 return;
1482         }
1483         m->lockedg = g;
1484         g->lockedm = m;
1485 }
1486
1487 void
1488 runtime_UnlockOSThread(void)
1489 {
1490         if(m == &runtime_m0 && runtime_sched.init) {
1491                 runtime_sched.lockmain = false;
1492                 return;
1493         }
1494         m->lockedg = nil;
1495         g->lockedm = nil;
1496 }
1497
1498 bool
1499 runtime_lockedOSThread(void)
1500 {
1501         return g->lockedm != nil && m->lockedg != nil;
1502 }
1503
1504 // for testing of callbacks
1505
1506 _Bool runtime_golockedOSThread(void)
1507   asm("libgo_runtime.runtime.golockedOSThread");
1508
1509 _Bool
1510 runtime_golockedOSThread(void)
1511 {
1512         return runtime_lockedOSThread();
1513 }
1514
1515 // for testing of wire, unwire
1516 uint32
1517 runtime_mid()
1518 {
1519         return m->id;
1520 }
1521
1522 int32 runtime_NumGoroutine (void)
1523   __asm__ ("libgo_runtime.runtime.NumGoroutine");
1524
1525 int32
1526 runtime_NumGoroutine()
1527 {
1528         return runtime_sched.gcount;
1529 }
1530
1531 int32
1532 runtime_gcount(void)
1533 {
1534         return runtime_sched.gcount;
1535 }
1536
1537 int32
1538 runtime_mcount(void)
1539 {
1540         return runtime_sched.mcount;
1541 }
1542
1543 static struct {
1544         Lock;
1545         void (*fn)(uintptr*, int32);
1546         int32 hz;
1547         uintptr pcbuf[100];
1548 } prof;
1549
1550 // Called if we receive a SIGPROF signal.
1551 void
1552 runtime_sigprof(uint8 *pc __attribute__ ((unused)),
1553                 uint8 *sp __attribute__ ((unused)),
1554                 uint8 *lr __attribute__ ((unused)),
1555                 G *gp __attribute__ ((unused)))
1556 {
1557         // int32 n;
1558
1559         if(prof.fn == nil || prof.hz == 0)
1560                 return;
1561
1562         runtime_lock(&prof);
1563         if(prof.fn == nil) {
1564                 runtime_unlock(&prof);
1565                 return;
1566         }
1567         // n = runtime_gentraceback(pc, sp, lr, gp, 0, prof.pcbuf, nelem(prof.pcbuf));
1568         // if(n > 0)
1569         //      prof.fn(prof.pcbuf, n);
1570         runtime_unlock(&prof);
1571 }
1572
1573 // Arrange to call fn with a traceback hz times a second.
1574 void
1575 runtime_setcpuprofilerate(void (*fn)(uintptr*, int32), int32 hz)
1576 {
1577         // Force sane arguments.
1578         if(hz < 0)
1579                 hz = 0;
1580         if(hz == 0)
1581                 fn = nil;
1582         if(fn == nil)
1583                 hz = 0;
1584
1585         // Stop profiler on this cpu so that it is safe to lock prof.
1586         // if a profiling signal came in while we had prof locked,
1587         // it would deadlock.
1588         runtime_resetcpuprofiler(0);
1589
1590         runtime_lock(&prof);
1591         prof.fn = fn;
1592         prof.hz = hz;
1593         runtime_unlock(&prof);
1594         runtime_lock(&runtime_sched);
1595         runtime_sched.profilehz = hz;
1596         runtime_unlock(&runtime_sched);
1597
1598         if(hz != 0)
1599                 runtime_resetcpuprofiler(hz);
1600 }