libgomp/loop.c

   1 /* Copyright (C) 2005, 2008, 2009 Free Software Foundation, Inc.
   2    Contributed by Richard Henderson <rth@redhat.com>.
   3
   4    This file is part of the GNU OpenMP Library (libgomp).
   5
   6    Libgomp is free software; you can redistribute it and/or modify it
   7    under the terms of the GNU General Public License as published by
   8    the Free Software Foundation; either version 3, or (at your option)
   9    any later version.
  10
  11    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
  12    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  13    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  14    more details.
  15
  16    Under Section 7 of GPL version 3, you are granted additional
  17    permissions described in the GCC Runtime Library Exception, version
  18    3.1, as published by the Free Software Foundation.
  19
  20    You should have received a copy of the GNU General Public License and
  21    a copy of the GCC Runtime Library Exception along with this program;
  22    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
  23    <http://www.gnu.org/licenses/>.  */
  24
  25 /* This file handles the LOOP (FOR/DO) construct.  */
  26
  27 #include <limits.h>
  28 #include <stdlib.h>
  29 #include "libgomp.h"
  30
  31
  32 /* Initialize the given work share construct from the given arguments.  */
  33
  34 static inline void
  35 gomp_loop_init (struct gomp_work_share *ws, long start, long end, long incr,
  36                 enum gomp_schedule_type sched, long chunk_size)
  37 {
  38   ws->sched = sched;
  39   ws->chunk_size = chunk_size;
  40   /* Canonicalize loops that have zero iterations to ->next == ->end.  */
  41   ws->end = ((incr > 0 && start > end) || (incr < 0 && start < end))
  42             ? start : end;
  43   ws->incr = incr;
  44   ws->next = start;
  45   if (sched == GFS_DYNAMIC)
  46     {
  47       ws->chunk_size *= incr;
  48
  49 #ifdef HAVE_SYNC_BUILTINS
  50       {
  51         /* For dynamic scheduling prepare things to make each iteration
  52            faster.  */
  53         struct gomp_thread *thr = gomp_thread ();
  54         struct gomp_team *team = thr->ts.team;
  55         long nthreads = team ? team->nthreads : 1;
  56
  57         if (__builtin_expect (incr > 0, 1))
  58           {
  59             /* Cheap overflow protection.  */
  60             if (__builtin_expect ((nthreads | ws->chunk_size)
  61                                   >= 1UL << (sizeof (long)
  62                                              * __CHAR_BIT__ / 2 - 1), 0))
  63               ws->mode = 0;
  64             else
  65               ws->mode = ws->end < (LONG_MAX
  66                                     - (nthreads + 1) * ws->chunk_size);
  67           }
  68         /* Cheap overflow protection.  */
  69         else if (__builtin_expect ((nthreads | -ws->chunk_size)
  70                                    >= 1UL << (sizeof (long)
  71                                               * __CHAR_BIT__ / 2 - 1), 0))
  72           ws->mode = 0;
  73         else
  74           ws->mode = ws->end > (nthreads + 1) * -ws->chunk_size - LONG_MAX;
  75       }
  76 #endif
  77     }
  78 }
  79
  80 /* The *_start routines are called when first encountering a loop construct
  81    that is not bound directly to a parallel construct.  The first thread
  82    that arrives will create the work-share construct; subsequent threads
  83    will see the construct exists and allocate work from it.
  84
  85    START, END, INCR are the bounds of the loop; due to the restrictions of
  86    OpenMP, these values must be the same in every thread.  This is not
  87    verified (nor is it entirely verifiable, since START is not necessarily
  88    retained intact in the work-share data structure).  CHUNK_SIZE is the
  89    scheduling parameter; again this must be identical in all threads.
  90
  91    Returns true if there's any work for this thread to perform.  If so,
  92    *ISTART and *IEND are filled with the bounds of the iteration block
  93    allocated to this thread.  Returns false if all work was assigned to
  94    other threads prior to this thread's arrival.  */
  95
  96 static bool
  97 gomp_loop_static_start (long start, long end, long incr, long chunk_size,
  98                         long *istart, long *iend)
  99 {
 100   struct gomp_thread *thr = gomp_thread ();
 101
 102   thr->ts.static_trip = 0;
 103   if (gomp_work_share_start (false))
 104     {
 105       gomp_loop_init (thr->ts.work_share, start, end, incr,
 106                       GFS_STATIC, chunk_size);
 107       gomp_work_share_init_done ();
 108     }
 109
 110   return !gomp_iter_static_next (istart, iend);
 111 }
 112
 113 static bool
 114 gomp_loop_dynamic_start (long start, long end, long incr, long chunk_size,
 115                          long *istart, long *iend)
 116 {
 117   struct gomp_thread *thr = gomp_thread ();
 118   bool ret;
 119
 120   if (gomp_work_share_start (false))
 121     {
 122       gomp_loop_init (thr->ts.work_share, start, end, incr,
 123                       GFS_DYNAMIC, chunk_size);
 124       gomp_work_share_init_done ();
 125     }
 126
 127 #ifdef HAVE_SYNC_BUILTINS
 128   ret = gomp_iter_dynamic_next (istart, iend);
 129 #else
 130   gomp_mutex_lock (&thr->ts.work_share->lock);
 131   ret = gomp_iter_dynamic_next_locked (istart, iend);
 132   gomp_mutex_unlock (&thr->ts.work_share->lock);
 133 #endif
 134
 135   return ret;
 136 }
 137
 138 static bool
 139 gomp_loop_guided_start (long start, long end, long incr, long chunk_size,
 140                         long *istart, long *iend)
 141 {
 142   struct gomp_thread *thr = gomp_thread ();
 143   bool ret;
 144
 145   if (gomp_work_share_start (false))
 146     {
 147       gomp_loop_init (thr->ts.work_share, start, end, incr,
 148                       GFS_GUIDED, chunk_size);
 149       gomp_work_share_init_done ();
 150     }
 151
 152 #ifdef HAVE_SYNC_BUILTINS
 153   ret = gomp_iter_guided_next (istart, iend);
 154 #else
 155   gomp_mutex_lock (&thr->ts.work_share->lock);
 156   ret = gomp_iter_guided_next_locked (istart, iend);
 157   gomp_mutex_unlock (&thr->ts.work_share->lock);
 158 #endif
 159
 160   return ret;
 161 }
 162
 163 bool
 164 GOMP_loop_runtime_start (long start, long end, long incr,
 165                          long *istart, long *iend)
 166 {
 167   struct gomp_task_icv *icv = gomp_icv (false);
 168   switch (icv->run_sched_var)
 169     {
 170     case GFS_STATIC:
 171       return gomp_loop_static_start (start, end, incr, icv->run_sched_modifier,
 172                                      istart, iend);
 173     case GFS_DYNAMIC:
 174       return gomp_loop_dynamic_start (start, end, incr, icv->run_sched_modifier,
 175                                       istart, iend);
 176     case GFS_GUIDED:
 177       return gomp_loop_guided_start (start, end, incr, icv->run_sched_modifier,
 178                                      istart, iend);
 179     case GFS_AUTO:
 180       /* For now map to schedule(static), later on we could play with feedback
 181          driven choice.  */
 182       return gomp_loop_static_start (start, end, incr, 0, istart, iend);
 183     default:
 184       abort ();
 185     }
 186 }
 187
 188 /* The *_ordered_*_start routines are similar.  The only difference is that
 189    this work-share construct is initialized to expect an ORDERED section.  */
 190
 191 static bool
 192 gomp_loop_ordered_static_start (long start, long end, long incr,
 193                                 long chunk_size, long *istart, long *iend)
 194 {
 195   struct gomp_thread *thr = gomp_thread ();
 196
 197   thr->ts.static_trip = 0;
 198   if (gomp_work_share_start (true))
 199     {
 200       gomp_loop_init (thr->ts.work_share, start, end, incr,
 201                       GFS_STATIC, chunk_size);
 202       gomp_ordered_static_init ();
 203       gomp_work_share_init_done ();
 204     }
 205
 206   return !gomp_iter_static_next (istart, iend);
 207 }
 208
 209 static bool
 210 gomp_loop_ordered_dynamic_start (long start, long end, long incr,
 211                                  long chunk_size, long *istart, long *iend)
 212 {
 213   struct gomp_thread *thr = gomp_thread ();
 214   bool ret;
 215
 216   if (gomp_work_share_start (true))
 217     {
 218       gomp_loop_init (thr->ts.work_share, start, end, incr,
 219                       GFS_DYNAMIC, chunk_size);
 220       gomp_mutex_lock (&thr->ts.work_share->lock);
 221       gomp_work_share_init_done ();
 222     }
 223   else
 224     gomp_mutex_lock (&thr->ts.work_share->lock);
 225
 226   ret = gomp_iter_dynamic_next_locked (istart, iend);
 227   if (ret)
 228     gomp_ordered_first ();
 229   gomp_mutex_unlock (&thr->ts.work_share->lock);
 230
 231   return ret;
 232 }
 233
 234 static bool
 235 gomp_loop_ordered_guided_start (long start, long end, long incr,
 236                                 long chunk_size, long *istart, long *iend)
 237 {
 238   struct gomp_thread *thr = gomp_thread ();
 239   bool ret;
 240
 241   if (gomp_work_share_start (true))
 242     {
 243       gomp_loop_init (thr->ts.work_share, start, end, incr,
 244                       GFS_GUIDED, chunk_size);
 245       gomp_mutex_lock (&thr->ts.work_share->lock);
 246       gomp_work_share_init_done ();
 247     }
 248   else
 249     gomp_mutex_lock (&thr->ts.work_share->lock);
 250
 251   ret = gomp_iter_guided_next_locked (istart, iend);
 252   if (ret)
 253     gomp_ordered_first ();
 254   gomp_mutex_unlock (&thr->ts.work_share->lock);
 255
 256   return ret;
 257 }
 258
 259 bool
 260 GOMP_loop_ordered_runtime_start (long start, long end, long incr,
 261                                  long *istart, long *iend)
 262 {
 263   struct gomp_task_icv *icv = gomp_icv (false);
 264   switch (icv->run_sched_var)
 265     {
 266     case GFS_STATIC:
 267       return gomp_loop_ordered_static_start (start, end, incr,
 268                                              icv->run_sched_modifier,
 269                                              istart, iend);
 270     case GFS_DYNAMIC:
 271       return gomp_loop_ordered_dynamic_start (start, end, incr,
 272                                               icv->run_sched_modifier,
 273                                               istart, iend);
 274     case GFS_GUIDED:
 275       return gomp_loop_ordered_guided_start (start, end, incr,
 276                                              icv->run_sched_modifier,
 277                                              istart, iend);
 278     case GFS_AUTO:
 279       /* For now map to schedule(static), later on we could play with feedback
 280          driven choice.  */
 281       return gomp_loop_ordered_static_start (start, end, incr,
 282                                              0, istart, iend);
 283     default:
 284       abort ();
 285     }
 286 }
 287
 288 /* The *_next routines are called when the thread completes processing of
 289    the iteration block currently assigned to it.  If the work-share
 290    construct is bound directly to a parallel construct, then the iteration
 291    bounds may have been set up before the parallel.  In which case, this
 292    may be the first iteration for the thread.
 293
 294    Returns true if there is work remaining to be performed; *ISTART and
 295    *IEND are filled with a new iteration block.  Returns false if all work
 296    has been assigned.  */
 297
 298 static bool
 299 gomp_loop_static_next (long *istart, long *iend)
 300 {
 301   return !gomp_iter_static_next (istart, iend);
 302 }
 303
 304 static bool
 305 gomp_loop_dynamic_next (long *istart, long *iend)
 306 {
 307   bool ret;
 308
 309 #ifdef HAVE_SYNC_BUILTINS
 310   ret = gomp_iter_dynamic_next (istart, iend);
 311 #else
 312   struct gomp_thread *thr = gomp_thread ();
 313   gomp_mutex_lock (&thr->ts.work_share->lock);
 314   ret = gomp_iter_dynamic_next_locked (istart, iend);
 315   gomp_mutex_unlock (&thr->ts.work_share->lock);
 316 #endif
 317
 318   return ret;
 319 }
 320
 321 static bool
 322 gomp_loop_guided_next (long *istart, long *iend)
 323 {
 324   bool ret;
 325
 326 #ifdef HAVE_SYNC_BUILTINS
 327   ret = gomp_iter_guided_next (istart, iend);
 328 #else
 329   struct gomp_thread *thr = gomp_thread ();
 330   gomp_mutex_lock (&thr->ts.work_share->lock);
 331   ret = gomp_iter_guided_next_locked (istart, iend);
 332   gomp_mutex_unlock (&thr->ts.work_share->lock);
 333 #endif
 334
 335   return ret;
 336 }
 337
 338 bool
 339 GOMP_loop_runtime_next (long *istart, long *iend)
 340 {
 341   struct gomp_thread *thr = gomp_thread ();
 342
 343   switch (thr->ts.work_share->sched)
 344     {
 345     case GFS_STATIC:
 346     case GFS_AUTO:
 347       return gomp_loop_static_next (istart, iend);
 348     case GFS_DYNAMIC:
 349       return gomp_loop_dynamic_next (istart, iend);
 350     case GFS_GUIDED:
 351       return gomp_loop_guided_next (istart, iend);
 352     default:
 353       abort ();
 354     }
 355 }
 356
 357 /* The *_ordered_*_next routines are called when the thread completes
 358    processing of the iteration block currently assigned to it.
 359
 360    Returns true if there is work remaining to be performed; *ISTART and
 361    *IEND are filled with a new iteration block.  Returns false if all work
 362    has been assigned.  */
 363
 364 static bool
 365 gomp_loop_ordered_static_next (long *istart, long *iend)
 366 {
 367   struct gomp_thread *thr = gomp_thread ();
 368   int test;
 369
 370   gomp_ordered_sync ();
 371   gomp_mutex_lock (&thr->ts.work_share->lock);
 372   test = gomp_iter_static_next (istart, iend);
 373   if (test >= 0)
 374     gomp_ordered_static_next ();
 375   gomp_mutex_unlock (&thr->ts.work_share->lock);
 376
 377   return test == 0;
 378 }
 379
 380 static bool
 381 gomp_loop_ordered_dynamic_next (long *istart, long *iend)
 382 {
 383   struct gomp_thread *thr = gomp_thread ();
 384   bool ret;
 385
 386   gomp_ordered_sync ();
 387   gomp_mutex_lock (&thr->ts.work_share->lock);
 388   ret = gomp_iter_dynamic_next_locked (istart, iend);
 389   if (ret)
 390     gomp_ordered_next ();
 391   else
 392     gomp_ordered_last ();
 393   gomp_mutex_unlock (&thr->ts.work_share->lock);
 394
 395   return ret;
 396 }
 397
 398 static bool
 399 gomp_loop_ordered_guided_next (long *istart, long *iend)
 400 {
 401   struct gomp_thread *thr = gomp_thread ();
 402   bool ret;
 403
 404   gomp_ordered_sync ();
 405   gomp_mutex_lock (&thr->ts.work_share->lock);
 406   ret = gomp_iter_guided_next_locked (istart, iend);
 407   if (ret)
 408     gomp_ordered_next ();
 409   else
 410     gomp_ordered_last ();
 411   gomp_mutex_unlock (&thr->ts.work_share->lock);
 412
 413   return ret;
 414 }
 415
 416 bool
 417 GOMP_loop_ordered_runtime_next (long *istart, long *iend)
 418 {
 419   struct gomp_thread *thr = gomp_thread ();
 420
 421   switch (thr->ts.work_share->sched)
 422     {
 423     case GFS_STATIC:
 424     case GFS_AUTO:
 425       return gomp_loop_ordered_static_next (istart, iend);
 426     case GFS_DYNAMIC:
 427       return gomp_loop_ordered_dynamic_next (istart, iend);
 428     case GFS_GUIDED:
 429       return gomp_loop_ordered_guided_next (istart, iend);
 430     default:
 431       abort ();
 432     }
 433 }
 434
 435 /* The GOMP_parallel_loop_* routines pre-initialize a work-share construct
 436    to avoid one synchronization once we get into the loop.  */
 437
 438 static void
 439 gomp_parallel_loop_start (void (*fn) (void *), void *data,
 440                           unsigned num_threads, long start, long end,
 441                           long incr, enum gomp_schedule_type sched,
 442                           long chunk_size)
 443 {
 444   struct gomp_team *team;
 445
 446   num_threads = gomp_resolve_num_threads (num_threads, 0);
 447   team = gomp_new_team (num_threads);
 448   gomp_loop_init (&team->work_shares[0], start, end, incr, sched, chunk_size);
 449   gomp_team_start (fn, data, num_threads, team);
 450 }
 451
 452 void
 453 GOMP_parallel_loop_static_start (void (*fn) (void *), void *data,
 454                                  unsigned num_threads, long start, long end,
 455                                  long incr, long chunk_size)
 456 {
 457   gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
 458                             GFS_STATIC, chunk_size);
 459 }
 460
 461 void
 462 GOMP_parallel_loop_dynamic_start (void (*fn) (void *), void *data,
 463                                   unsigned num_threads, long start, long end,
 464                                   long incr, long chunk_size)
 465 {
 466   gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
 467                             GFS_DYNAMIC, chunk_size);
 468 }
 469
 470 void
 471 GOMP_parallel_loop_guided_start (void (*fn) (void *), void *data,
 472                                  unsigned num_threads, long start, long end,
 473                                  long incr, long chunk_size)
 474 {
 475   gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
 476                             GFS_GUIDED, chunk_size);
 477 }
 478
 479 void
 480 GOMP_parallel_loop_runtime_start (void (*fn) (void *), void *data,
 481                                   unsigned num_threads, long start, long end,
 482                                   long incr)
 483 {
 484   struct gomp_task_icv *icv = gomp_icv (false);
 485   gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
 486                             icv->run_sched_var, icv->run_sched_modifier);
 487 }
 488
 489 /* The GOMP_loop_end* routines are called after the thread is told that
 490    all loop iterations are complete.  This first version synchronizes
 491    all threads; the nowait version does not.  */
 492
 493 void
 494 GOMP_loop_end (void)
 495 {
 496   gomp_work_share_end ();
 497 }
 498
 499 void
 500 GOMP_loop_end_nowait (void)
 501 {
 502   gomp_work_share_end_nowait ();
 503 }
 504
 505
 506 /* We use static functions above so that we're sure that the "runtime"
 507    function can defer to the proper routine without interposition.  We
 508    export the static function with a strong alias when possible, or with
 509    a wrapper function otherwise.  */
 510
 511 #ifdef HAVE_ATTRIBUTE_ALIAS
 512 extern __typeof(gomp_loop_static_start) GOMP_loop_static_start
 513         __attribute__((alias ("gomp_loop_static_start")));
 514 extern __typeof(gomp_loop_dynamic_start) GOMP_loop_dynamic_start
 515         __attribute__((alias ("gomp_loop_dynamic_start")));
 516 extern __typeof(gomp_loop_guided_start) GOMP_loop_guided_start
 517         __attribute__((alias ("gomp_loop_guided_start")));
 518
 519 extern __typeof(gomp_loop_ordered_static_start) GOMP_loop_ordered_static_start
 520         __attribute__((alias ("gomp_loop_ordered_static_start")));
 521 extern __typeof(gomp_loop_ordered_dynamic_start) GOMP_loop_ordered_dynamic_start
 522         __attribute__((alias ("gomp_loop_ordered_dynamic_start")));
 523 extern __typeof(gomp_loop_ordered_guided_start) GOMP_loop_ordered_guided_start
 524         __attribute__((alias ("gomp_loop_ordered_guided_start")));
 525
 526 extern __typeof(gomp_loop_static_next) GOMP_loop_static_next
 527         __attribute__((alias ("gomp_loop_static_next")));
 528 extern __typeof(gomp_loop_dynamic_next) GOMP_loop_dynamic_next
 529         __attribute__((alias ("gomp_loop_dynamic_next")));
 530 extern __typeof(gomp_loop_guided_next) GOMP_loop_guided_next
 531         __attribute__((alias ("gomp_loop_guided_next")));
 532
 533 extern __typeof(gomp_loop_ordered_static_next) GOMP_loop_ordered_static_next
 534         __attribute__((alias ("gomp_loop_ordered_static_next")));
 535 extern __typeof(gomp_loop_ordered_dynamic_next) GOMP_loop_ordered_dynamic_next
 536         __attribute__((alias ("gomp_loop_ordered_dynamic_next")));
 537 extern __typeof(gomp_loop_ordered_guided_next) GOMP_loop_ordered_guided_next
 538         __attribute__((alias ("gomp_loop_ordered_guided_next")));
 539 #else
 540 bool
 541 GOMP_loop_static_start (long start, long end, long incr, long chunk_size,
 542                         long *istart, long *iend)
 543 {
 544   return gomp_loop_static_start (start, end, incr, chunk_size, istart, iend);
 545 }
 546
 547 bool
 548 GOMP_loop_dynamic_start (long start, long end, long incr, long chunk_size,
 549                          long *istart, long *iend)
 550 {
 551   return gomp_loop_dynamic_start (start, end, incr, chunk_size, istart, iend);
 552 }
 553
 554 bool
 555 GOMP_loop_guided_start (long start, long end, long incr, long chunk_size,
 556                         long *istart, long *iend)
 557 {
 558   return gomp_loop_guided_start (start, end, incr, chunk_size, istart, iend);
 559 }
 560
 561 bool
 562 GOMP_loop_ordered_static_start (long start, long end, long incr,
 563                                 long chunk_size, long *istart, long *iend)
 564 {
 565   return gomp_loop_ordered_static_start (start, end, incr, chunk_size,
 566                                          istart, iend);
 567 }
 568
 569 bool
 570 GOMP_loop_ordered_dynamic_start (long start, long end, long incr,
 571                                  long chunk_size, long *istart, long *iend)
 572 {
 573   return gomp_loop_ordered_dynamic_start (start, end, incr, chunk_size,
 574                                           istart, iend);
 575 }
 576
 577 bool
 578 GOMP_loop_ordered_guided_start (long start, long end, long incr,
 579                                 long chunk_size, long *istart, long *iend)
 580 {
 581   return gomp_loop_ordered_guided_start (start, end, incr, chunk_size,
 582                                          istart, iend);
 583 }
 584
 585 bool
 586 GOMP_loop_static_next (long *istart, long *iend)
 587 {
 588   return gomp_loop_static_next (istart, iend);
 589 }
 590
 591 bool
 592 GOMP_loop_dynamic_next (long *istart, long *iend)
 593 {
 594   return gomp_loop_dynamic_next (istart, iend);
 595 }
 596
 597 bool
 598 GOMP_loop_guided_next (long *istart, long *iend)
 599 {
 600   return gomp_loop_guided_next (istart, iend);
 601 }
 602
 603 bool
 604 GOMP_loop_ordered_static_next (long *istart, long *iend)
 605 {
 606   return gomp_loop_ordered_static_next (istart, iend);
 607 }
 608
 609 bool
 610 GOMP_loop_ordered_dynamic_next (long *istart, long *iend)
 611 {
 612   return gomp_loop_ordered_dynamic_next (istart, iend);
 613 }
 614
 615 bool
 616 GOMP_loop_ordered_guided_next (long *istart, long *iend)
 617 {
 618   return gomp_loop_ordered_guided_next (istart, iend);
 619 }
 620 #endif