gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
   4    1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
   5    2011
   6    Free Software Foundation, Inc.
   7
   8 This file is part of GCC.
   9
  10 GCC is free software; you can redistribute it and/or modify it under
  11 the terms of the GNU General Public License as published by the Free
  12 Software Foundation; either version 3, or (at your option) any later
  13 version.
  14
  15 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  16 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  17 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  18 for more details.
  19
  20 You should have received a copy of the GNU General Public License
  21 along with GCC; see the file COPYING3.  If not see
  22 <http://www.gnu.org/licenses/>.  */
  23
  24
  25 #include "config.h"
  26 #include "system.h"
  27 #include "coretypes.h"
  28 #include "tm.h"
  29 #include "diagnostic-core.h"
  30 #include "rtl.h"
  31 #include "tree.h"
  32 #include "tm_p.h"
  33 #include "flags.h"
  34 #include "insn-config.h"
  35 #include "expr.h"
  36 #include "optabs.h"
  37 #include "recog.h"
  38 #include "langhooks.h"
  39 #include "df.h"
  40 #include "target.h"
  41 #include "expmed.h"
  42
  43 struct target_expmed default_target_expmed;
  44 #if SWITCHABLE_TARGET
  45 struct target_expmed *this_target_expmed = &default_target_expmed;
  46 #endif
  47
  48 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
  49                                    unsigned HOST_WIDE_INT,
  50                                    unsigned HOST_WIDE_INT,
  51                                    unsigned HOST_WIDE_INT,
  52                                    unsigned HOST_WIDE_INT,
  53                                    rtx);
  54 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  55                                    unsigned HOST_WIDE_INT,
  56                                    unsigned HOST_WIDE_INT,
  57                                    unsigned HOST_WIDE_INT,
  58                                    rtx);
  59 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
  60                                     unsigned HOST_WIDE_INT,
  61                                     unsigned HOST_WIDE_INT,
  62                                     unsigned HOST_WIDE_INT, rtx, int, bool);
  63 static rtx mask_rtx (enum machine_mode, int, int, int);
  64 static rtx lshift_value (enum machine_mode, rtx, int, int);
  65 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  66                                     unsigned HOST_WIDE_INT, int);
  67 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
  68 static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  69 static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  70
  71 /* Test whether a value is zero of a power of two.  */
  72 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
  73
  74 #ifndef SLOW_UNALIGNED_ACCESS
  75 #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
  76 #endif
  77
  78
  79 /* Reduce conditional compilation elsewhere.  */
  80 #ifndef HAVE_insv
  81 #define HAVE_insv       0
  82 #define CODE_FOR_insv   CODE_FOR_nothing
  83 #define gen_insv(a,b,c,d) NULL_RTX
  84 #endif
  85 #ifndef HAVE_extv
  86 #define HAVE_extv       0
  87 #define CODE_FOR_extv   CODE_FOR_nothing
  88 #define gen_extv(a,b,c,d) NULL_RTX
  89 #endif
  90 #ifndef HAVE_extzv
  91 #define HAVE_extzv      0
  92 #define CODE_FOR_extzv  CODE_FOR_nothing
  93 #define gen_extzv(a,b,c,d) NULL_RTX
  94 #endif
  95
  96 void
  97 init_expmed (void)
  98 {
  99   struct
 100   {
 101     struct rtx_def reg;         rtunion reg_fld[2];
 102     struct rtx_def plus;        rtunion plus_fld1;
 103     struct rtx_def neg;
 104     struct rtx_def mult;        rtunion mult_fld1;
 105     struct rtx_def sdiv;        rtunion sdiv_fld1;
 106     struct rtx_def udiv;        rtunion udiv_fld1;
 107     struct rtx_def zext;
 108     struct rtx_def sdiv_32;     rtunion sdiv_32_fld1;
 109     struct rtx_def smod_32;     rtunion smod_32_fld1;
 110     struct rtx_def wide_mult;   rtunion wide_mult_fld1;
 111     struct rtx_def wide_lshr;   rtunion wide_lshr_fld1;
 112     struct rtx_def wide_trunc;
 113     struct rtx_def shift;       rtunion shift_fld1;
 114     struct rtx_def shift_mult;  rtunion shift_mult_fld1;
 115     struct rtx_def shift_add;   rtunion shift_add_fld1;
 116     struct rtx_def shift_sub0;  rtunion shift_sub0_fld1;
 117     struct rtx_def shift_sub1;  rtunion shift_sub1_fld1;
 118   } all;
 119
 120   rtx pow2[MAX_BITS_PER_WORD];
 121   rtx cint[MAX_BITS_PER_WORD];
 122   int m, n;
 123   enum machine_mode mode, wider_mode;
 124   int speed;
 125
 126
 127   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 128     {
 129       pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
 130       cint[m] = GEN_INT (m);
 131     }
 132   memset (&all, 0, sizeof all);
 133
 134   PUT_CODE (&all.reg, REG);
 135   /* Avoid using hard regs in ways which may be unsupported.  */
 136   SET_REGNO (&all.reg, LAST_VIRTUAL_REGISTER + 1);
 137
 138   PUT_CODE (&all.plus, PLUS);
 139   XEXP (&all.plus, 0) = &all.reg;
 140   XEXP (&all.plus, 1) = &all.reg;
 141
 142   PUT_CODE (&all.neg, NEG);
 143   XEXP (&all.neg, 0) = &all.reg;
 144
 145   PUT_CODE (&all.mult, MULT);
 146   XEXP (&all.mult, 0) = &all.reg;
 147   XEXP (&all.mult, 1) = &all.reg;
 148
 149   PUT_CODE (&all.sdiv, DIV);
 150   XEXP (&all.sdiv, 0) = &all.reg;
 151   XEXP (&all.sdiv, 1) = &all.reg;
 152
 153   PUT_CODE (&all.udiv, UDIV);
 154   XEXP (&all.udiv, 0) = &all.reg;
 155   XEXP (&all.udiv, 1) = &all.reg;
 156
 157   PUT_CODE (&all.sdiv_32, DIV);
 158   XEXP (&all.sdiv_32, 0) = &all.reg;
 159   XEXP (&all.sdiv_32, 1) = 32 < MAX_BITS_PER_WORD ? cint[32] : GEN_INT (32);
 160
 161   PUT_CODE (&all.smod_32, MOD);
 162   XEXP (&all.smod_32, 0) = &all.reg;
 163   XEXP (&all.smod_32, 1) = XEXP (&all.sdiv_32, 1);
 164
 165   PUT_CODE (&all.zext, ZERO_EXTEND);
 166   XEXP (&all.zext, 0) = &all.reg;
 167
 168   PUT_CODE (&all.wide_mult, MULT);
 169   XEXP (&all.wide_mult, 0) = &all.zext;
 170   XEXP (&all.wide_mult, 1) = &all.zext;
 171
 172   PUT_CODE (&all.wide_lshr, LSHIFTRT);
 173   XEXP (&all.wide_lshr, 0) = &all.wide_mult;
 174
 175   PUT_CODE (&all.wide_trunc, TRUNCATE);
 176   XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
 177
 178   PUT_CODE (&all.shift, ASHIFT);
 179   XEXP (&all.shift, 0) = &all.reg;
 180
 181   PUT_CODE (&all.shift_mult, MULT);
 182   XEXP (&all.shift_mult, 0) = &all.reg;
 183
 184   PUT_CODE (&all.shift_add, PLUS);
 185   XEXP (&all.shift_add, 0) = &all.shift_mult;
 186   XEXP (&all.shift_add, 1) = &all.reg;
 187
 188   PUT_CODE (&all.shift_sub0, MINUS);
 189   XEXP (&all.shift_sub0, 0) = &all.shift_mult;
 190   XEXP (&all.shift_sub0, 1) = &all.reg;
 191
 192   PUT_CODE (&all.shift_sub1, MINUS);
 193   XEXP (&all.shift_sub1, 0) = &all.reg;
 194   XEXP (&all.shift_sub1, 1) = &all.shift_mult;
 195
 196   for (speed = 0; speed < 2; speed++)
 197     {
 198       crtl->maybe_hot_insn_p = speed;
 199       zero_cost[speed] = set_src_cost (const0_rtx, speed);
 200
 201       for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
 202            mode != VOIDmode;
 203            mode = GET_MODE_WIDER_MODE (mode))
 204         {
 205           PUT_MODE (&all.reg, mode);
 206           PUT_MODE (&all.plus, mode);
 207           PUT_MODE (&all.neg, mode);
 208           PUT_MODE (&all.mult, mode);
 209           PUT_MODE (&all.sdiv, mode);
 210           PUT_MODE (&all.udiv, mode);
 211           PUT_MODE (&all.sdiv_32, mode);
 212           PUT_MODE (&all.smod_32, mode);
 213           PUT_MODE (&all.wide_trunc, mode);
 214           PUT_MODE (&all.shift, mode);
 215           PUT_MODE (&all.shift_mult, mode);
 216           PUT_MODE (&all.shift_add, mode);
 217           PUT_MODE (&all.shift_sub0, mode);
 218           PUT_MODE (&all.shift_sub1, mode);
 219
 220           add_cost[speed][mode] = set_src_cost (&all.plus, speed);
 221           neg_cost[speed][mode] = set_src_cost (&all.neg, speed);
 222           mul_cost[speed][mode] = set_src_cost (&all.mult, speed);
 223           sdiv_cost[speed][mode] = set_src_cost (&all.sdiv, speed);
 224           udiv_cost[speed][mode] = set_src_cost (&all.udiv, speed);
 225
 226           sdiv_pow2_cheap[speed][mode] = (set_src_cost (&all.sdiv_32, speed)
 227                                           <= 2 * add_cost[speed][mode]);
 228           smod_pow2_cheap[speed][mode] = (set_src_cost (&all.smod_32, speed)
 229                                           <= 4 * add_cost[speed][mode]);
 230
 231           wider_mode = GET_MODE_WIDER_MODE (mode);
 232           if (wider_mode != VOIDmode)
 233             {
 234               PUT_MODE (&all.zext, wider_mode);
 235               PUT_MODE (&all.wide_mult, wider_mode);
 236               PUT_MODE (&all.wide_lshr, wider_mode);
 237               XEXP (&all.wide_lshr, 1) = GEN_INT (GET_MODE_BITSIZE (mode));
 238
 239               mul_widen_cost[speed][wider_mode]
 240                 = set_src_cost (&all.wide_mult, speed);
 241               mul_highpart_cost[speed][mode]
 242                 = set_src_cost (&all.wide_trunc, speed);
 243             }
 244
 245           shift_cost[speed][mode][0] = 0;
 246           shiftadd_cost[speed][mode][0] = shiftsub0_cost[speed][mode][0]
 247             = shiftsub1_cost[speed][mode][0] = add_cost[speed][mode];
 248
 249           n = MIN (MAX_BITS_PER_WORD, GET_MODE_BITSIZE (mode));
 250           for (m = 1; m < n; m++)
 251             {
 252               XEXP (&all.shift, 1) = cint[m];
 253               XEXP (&all.shift_mult, 1) = pow2[m];
 254
 255               shift_cost[speed][mode][m] = set_src_cost (&all.shift, speed);
 256               shiftadd_cost[speed][mode][m] = set_src_cost (&all.shift_add,
 257                                                             speed);
 258               shiftsub0_cost[speed][mode][m] = set_src_cost (&all.shift_sub0,
 259                                                              speed);
 260               shiftsub1_cost[speed][mode][m] = set_src_cost (&all.shift_sub1,
 261                                                              speed);
 262             }
 263         }
 264     }
 265   if (alg_hash_used_p)
 266     memset (alg_hash, 0, sizeof (alg_hash));
 267   else
 268     alg_hash_used_p = true;
 269   default_rtl_profile ();
 270 }
 271
 272 /* Return an rtx representing minus the value of X.
 273    MODE is the intended mode of the result,
 274    useful if X is a CONST_INT.  */
 275
 276 rtx
 277 negate_rtx (enum machine_mode mode, rtx x)
 278 {
 279   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 280
 281   if (result == 0)
 282     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 283
 284   return result;
 285 }
 286
 287 /* Report on the availability of insv/extv/extzv and the desired mode
 288    of each of their operands.  Returns MAX_MACHINE_MODE if HAVE_foo
 289    is false; else the mode of the specified operand.  If OPNO is -1,
 290    all the caller cares about is whether the insn is available.  */
 291 enum machine_mode
 292 mode_for_extraction (enum extraction_pattern pattern, int opno)
 293 {
 294   const struct insn_data_d *data;
 295
 296   switch (pattern)
 297     {
 298     case EP_insv:
 299       if (HAVE_insv)
 300         {
 301           data = &insn_data[CODE_FOR_insv];
 302           break;
 303         }
 304       return MAX_MACHINE_MODE;
 305
 306     case EP_extv:
 307       if (HAVE_extv)
 308         {
 309           data = &insn_data[CODE_FOR_extv];
 310           break;
 311         }
 312       return MAX_MACHINE_MODE;
 313
 314     case EP_extzv:
 315       if (HAVE_extzv)
 316         {
 317           data = &insn_data[CODE_FOR_extzv];
 318           break;
 319         }
 320       return MAX_MACHINE_MODE;
 321
 322     default:
 323       gcc_unreachable ();
 324     }
 325
 326   if (opno == -1)
 327     return VOIDmode;
 328
 329   /* Everyone who uses this function used to follow it with
 330      if (result == VOIDmode) result = word_mode; */
 331   if (data->operand[opno].mode == VOIDmode)
 332     return word_mode;
 333   return data->operand[opno].mode;
 334 }
 335 \f
 336 /* A subroutine of store_bit_field, with the same arguments.  Return true
 337    if the operation could be implemented.
 338
 339    If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
 340    no other way of implementing the operation.  If FALLBACK_P is false,
 341    return false instead.  */
 342
 343 static bool
 344 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 345                    unsigned HOST_WIDE_INT bitnum,
 346                    unsigned HOST_WIDE_INT bitregion_start,
 347                    unsigned HOST_WIDE_INT bitregion_end,
 348                    enum machine_mode fieldmode,
 349                    rtx value, bool fallback_p)
 350 {
 351   unsigned int unit
 352     = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
 353   unsigned HOST_WIDE_INT offset, bitpos;
 354   rtx op0 = str_rtx;
 355   int byte_offset;
 356   rtx orig_value;
 357
 358   enum machine_mode op_mode = mode_for_extraction (EP_insv, 3);
 359
 360   while (GET_CODE (op0) == SUBREG)
 361     {
 362       /* The following line once was done only if WORDS_BIG_ENDIAN,
 363          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 364          meaningful at a much higher level; when structures are copied
 365          between memory and regs, the higher-numbered regs
 366          always get higher addresses.  */
 367       int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
 368       int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
 369
 370       byte_offset = 0;
 371
 372       /* Paradoxical subregs need special handling on big endian machines.  */
 373       if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
 374         {
 375           int difference = inner_mode_size - outer_mode_size;
 376
 377           if (WORDS_BIG_ENDIAN)
 378             byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
 379           if (BYTES_BIG_ENDIAN)
 380             byte_offset += difference % UNITS_PER_WORD;
 381         }
 382       else
 383         byte_offset = SUBREG_BYTE (op0);
 384
 385       bitnum += byte_offset * BITS_PER_UNIT;
 386       op0 = SUBREG_REG (op0);
 387     }
 388
 389   /* No action is needed if the target is a register and if the field
 390      lies completely outside that register.  This can occur if the source
 391      code contains an out-of-bounds access to a small array.  */
 392   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
 393     return true;
 394
 395   /* Use vec_set patterns for inserting parts of vectors whenever
 396      available.  */
 397   if (VECTOR_MODE_P (GET_MODE (op0))
 398       && !MEM_P (op0)
 399       && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
 400       && fieldmode == GET_MODE_INNER (GET_MODE (op0))
 401       && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
 402       && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
 403     {
 404       struct expand_operand ops[3];
 405       enum machine_mode outermode = GET_MODE (op0);
 406       enum machine_mode innermode = GET_MODE_INNER (outermode);
 407       enum insn_code icode = optab_handler (vec_set_optab, outermode);
 408       int pos = bitnum / GET_MODE_BITSIZE (innermode);
 409
 410       create_fixed_operand (&ops[0], op0);
 411       create_input_operand (&ops[1], value, innermode);
 412       create_integer_operand (&ops[2], pos);
 413       if (maybe_expand_insn (icode, 3, ops))
 414         return true;
 415     }
 416
 417   /* If the target is a register, overwriting the entire object, or storing
 418      a full-word or multi-word field can be done with just a SUBREG.
 419
 420      If the target is memory, storing any naturally aligned field can be
 421      done with a simple store.  For targets that support fast unaligned
 422      memory, any naturally sized, unit aligned field can be done directly.  */
 423
 424   offset = bitnum / unit;
 425   bitpos = bitnum % unit;
 426   byte_offset = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 427                 + (offset * UNITS_PER_WORD);
 428
 429   if (bitpos == 0
 430       && bitsize == GET_MODE_BITSIZE (fieldmode)
 431       && (!MEM_P (op0)
 432           ? ((GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD
 433               || GET_MODE_SIZE (GET_MODE (op0)) == GET_MODE_SIZE (fieldmode))
 434              && ((GET_MODE (op0) == fieldmode && byte_offset == 0)
 435                  || validate_subreg (fieldmode, GET_MODE (op0), op0,
 436                                      byte_offset)))
 437           : (! SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0))
 438              || (offset * BITS_PER_UNIT % bitsize == 0
 439                  && MEM_ALIGN (op0) % GET_MODE_BITSIZE (fieldmode) == 0))))
 440     {
 441       if (MEM_P (op0))
 442         op0 = adjust_address (op0, fieldmode, offset);
 443       else if (GET_MODE (op0) != fieldmode)
 444         op0 = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
 445                                    byte_offset);
 446       emit_move_insn (op0, value);
 447       return true;
 448     }
 449
 450   /* Make sure we are playing with integral modes.  Pun with subregs
 451      if we aren't.  This must come after the entire register case above,
 452      since that case is valid for any mode.  The following cases are only
 453      valid for integral modes.  */
 454   {
 455     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
 456     if (imode != GET_MODE (op0))
 457       {
 458         if (MEM_P (op0))
 459           op0 = adjust_address (op0, imode, 0);
 460         else
 461           {
 462             gcc_assert (imode != BLKmode);
 463             op0 = gen_lowpart (imode, op0);
 464           }
 465       }
 466   }
 467
 468   /* We may be accessing data outside the field, which means
 469      we can alias adjacent data.  */
 470   /* ?? not always for C++0x memory model ?? */
 471   if (MEM_P (op0))
 472     {
 473       op0 = shallow_copy_rtx (op0);
 474       set_mem_alias_set (op0, 0);
 475       set_mem_expr (op0, 0);
 476     }
 477
 478   /* If OP0 is a register, BITPOS must count within a word.
 479      But as we have it, it counts within whatever size OP0 now has.
 480      On a bigendian machine, these are not the same, so convert.  */
 481   if (BYTES_BIG_ENDIAN
 482       && !MEM_P (op0)
 483       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
 484     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 485
 486   /* Storing an lsb-aligned field in a register
 487      can be done with a movestrict instruction.  */
 488
 489   if (!MEM_P (op0)
 490       && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
 491       && bitsize == GET_MODE_BITSIZE (fieldmode)
 492       && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
 493     {
 494       struct expand_operand ops[2];
 495       enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
 496       rtx arg0 = op0;
 497       unsigned HOST_WIDE_INT subreg_off;
 498
 499       if (GET_CODE (arg0) == SUBREG)
 500         {
 501           /* Else we've got some float mode source being extracted into
 502              a different float mode destination -- this combination of
 503              subregs results in Severe Tire Damage.  */
 504           gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
 505                       || GET_MODE_CLASS (fieldmode) == MODE_INT
 506                       || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
 507           arg0 = SUBREG_REG (arg0);
 508         }
 509
 510       subreg_off = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 511                    + (offset * UNITS_PER_WORD);
 512       if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
 513         {
 514           arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
 515
 516           create_fixed_operand (&ops[0], arg0);
 517           /* Shrink the source operand to FIELDMODE.  */
 518           create_convert_operand_to (&ops[1], value, fieldmode, false);
 519           if (maybe_expand_insn (icode, 2, ops))
 520             return true;
 521         }
 522     }
 523
 524   /* Handle fields bigger than a word.  */
 525
 526   if (bitsize > BITS_PER_WORD)
 527     {
 528       /* Here we transfer the words of the field
 529          in the order least significant first.
 530          This is because the most significant word is the one which may
 531          be less than full.
 532          However, only do that if the value is not BLKmode.  */
 533
 534       unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 535       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 536       unsigned int i;
 537       rtx last;
 538
 539       /* This is the mode we must force value to, so that there will be enough
 540          subwords to extract.  Note that fieldmode will often (always?) be
 541          VOIDmode, because that is what store_field uses to indicate that this
 542          is a bit field, but passing VOIDmode to operand_subword_force
 543          is not allowed.  */
 544       fieldmode = GET_MODE (value);
 545       if (fieldmode == VOIDmode)
 546         fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
 547
 548       last = get_last_insn ();
 549       for (i = 0; i < nwords; i++)
 550         {
 551           /* If I is 0, use the low-order word in both field and target;
 552              if I is 1, use the next to lowest word; and so on.  */
 553           unsigned int wordnum = (backwards ? nwords - i - 1 : i);
 554           unsigned int bit_offset = (backwards
 555                                      ? MAX ((int) bitsize - ((int) i + 1)
 556                                             * BITS_PER_WORD,
 557                                             0)
 558                                      : (int) i * BITS_PER_WORD);
 559           rtx value_word = operand_subword_force (value, wordnum, fieldmode);
 560           unsigned HOST_WIDE_INT new_bitsize =
 561             MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
 562
 563           /* If the remaining chunk doesn't have full wordsize we have
 564              to make sure that for big endian machines the higher order
 565              bits are used.  */
 566           if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN)
 567             value_word = extract_bit_field (value_word, new_bitsize, 0,
 568                                             true, false, NULL_RTX,
 569                                             BLKmode, word_mode);
 570
 571           if (!store_bit_field_1 (op0, new_bitsize,
 572                                   bitnum + bit_offset,
 573                                   bitregion_start, bitregion_end,
 574                                   word_mode,
 575                                   value_word, fallback_p))
 576             {
 577               delete_insns_since (last);
 578               return false;
 579             }
 580         }
 581       return true;
 582     }
 583
 584   /* From here on we can assume that the field to be stored in is
 585      a full-word (whatever type that is), since it is shorter than a word.  */
 586
 587   /* OFFSET is the number of words or bytes (UNIT says which)
 588      from STR_RTX to the first word or byte containing part of the field.  */
 589
 590   if (!MEM_P (op0))
 591     {
 592       if (offset != 0
 593           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
 594         {
 595           if (!REG_P (op0))
 596             {
 597               /* Since this is a destination (lvalue), we can't copy
 598                  it to a pseudo.  We can remove a SUBREG that does not
 599                  change the size of the operand.  Such a SUBREG may
 600                  have been added above.  */
 601               gcc_assert (GET_CODE (op0) == SUBREG
 602                           && (GET_MODE_SIZE (GET_MODE (op0))
 603                               == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)))));
 604               op0 = SUBREG_REG (op0);
 605             }
 606           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
 607                                 op0, (offset * UNITS_PER_WORD));
 608         }
 609       offset = 0;
 610     }
 611
 612   /* If VALUE has a floating-point or complex mode, access it as an
 613      integer of the corresponding size.  This can occur on a machine
 614      with 64 bit registers that uses SFmode for float.  It can also
 615      occur for unaligned float or complex fields.  */
 616   orig_value = value;
 617   if (GET_MODE (value) != VOIDmode
 618       && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
 619       && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
 620     {
 621       value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
 622       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
 623     }
 624
 625   /* Now OFFSET is nonzero only if OP0 is memory
 626      and is therefore always measured in bytes.  */
 627
 628   if (HAVE_insv
 629       && GET_MODE (value) != BLKmode
 630       && bitsize > 0
 631       && GET_MODE_BITSIZE (op_mode) >= bitsize
 632       /* Do not use insv for volatile bitfields when
 633          -fstrict-volatile-bitfields is in effect.  */
 634       && !(MEM_P (op0) && MEM_VOLATILE_P (op0)
 635            && flag_strict_volatile_bitfields > 0)
 636       && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
 637             && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode))))
 638     {
 639       struct expand_operand ops[4];
 640       int xbitpos = bitpos;
 641       rtx value1;
 642       rtx xop0 = op0;
 643       rtx last = get_last_insn ();
 644       bool copy_back = false;
 645
 646       /* Add OFFSET into OP0's address.  */
 647       if (MEM_P (xop0))
 648         xop0 = adjust_address (xop0, byte_mode, offset);
 649
 650       /* If xop0 is a register, we need it in OP_MODE
 651          to make it acceptable to the format of insv.  */
 652       if (GET_CODE (xop0) == SUBREG)
 653         /* We can't just change the mode, because this might clobber op0,
 654            and we will need the original value of op0 if insv fails.  */
 655         xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 656       if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
 657         xop0 = gen_lowpart_SUBREG (op_mode, xop0);
 658
 659       /* If the destination is a paradoxical subreg such that we need a
 660          truncate to the inner mode, perform the insertion on a temporary and
 661          truncate the result to the original destination.  Note that we can't
 662          just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
 663          X) 0)) is (reg:N X).  */
 664       if (GET_CODE (xop0) == SUBREG
 665           && REG_P (SUBREG_REG (xop0))
 666           && (!TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
 667                                               op_mode)))
 668         {
 669           rtx tem = gen_reg_rtx (op_mode);
 670           emit_move_insn (tem, xop0);
 671           xop0 = tem;
 672           copy_back = true;
 673         }
 674
 675       /* We have been counting XBITPOS within UNIT.
 676          Count instead within the size of the register.  */
 677       if (BYTES_BIG_ENDIAN && !MEM_P (xop0))
 678         xbitpos += GET_MODE_BITSIZE (op_mode) - unit;
 679
 680       unit = GET_MODE_BITSIZE (op_mode);
 681
 682       /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
 683          "backwards" from the size of the unit we are inserting into.
 684          Otherwise, we count bits from the most significant on a
 685          BYTES/BITS_BIG_ENDIAN machine.  */
 686
 687       if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 688         xbitpos = unit - bitsize - xbitpos;
 689
 690       /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
 691       value1 = value;
 692       if (GET_MODE (value) != op_mode)
 693         {
 694           if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 695             {
 696               /* Optimization: Don't bother really extending VALUE
 697                  if it has all the bits we will actually use.  However,
 698                  if we must narrow it, be sure we do it correctly.  */
 699
 700               if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
 701                 {
 702                   rtx tmp;
 703
 704                   tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
 705                   if (! tmp)
 706                     tmp = simplify_gen_subreg (op_mode,
 707                                                force_reg (GET_MODE (value),
 708                                                           value1),
 709                                                GET_MODE (value), 0);
 710                   value1 = tmp;
 711                 }
 712               else
 713                 value1 = gen_lowpart (op_mode, value1);
 714             }
 715           else if (CONST_INT_P (value))
 716             value1 = gen_int_mode (INTVAL (value), op_mode);
 717           else
 718             /* Parse phase is supposed to make VALUE's data type
 719                match that of the component reference, which is a type
 720                at least as wide as the field; so VALUE should have
 721                a mode that corresponds to that type.  */
 722             gcc_assert (CONSTANT_P (value));
 723         }
 724
 725       create_fixed_operand (&ops[0], xop0);
 726       create_integer_operand (&ops[1], bitsize);
 727       create_integer_operand (&ops[2], xbitpos);
 728       create_input_operand (&ops[3], value1, op_mode);
 729       if (maybe_expand_insn (CODE_FOR_insv, 4, ops))
 730         {
 731           if (copy_back)
 732             convert_move (op0, xop0, true);
 733           return true;
 734         }
 735       delete_insns_since (last);
 736     }
 737
 738   /* If OP0 is a memory, try copying it to a register and seeing if a
 739      cheap register alternative is available.  */
 740   if (HAVE_insv && MEM_P (op0))
 741     {
 742       enum machine_mode bestmode;
 743       unsigned HOST_WIDE_INT maxbits = MAX_FIXED_MODE_SIZE;
 744
 745       if (bitregion_end)
 746         maxbits = bitregion_end - bitregion_start + 1;
 747
 748       /* Get the mode to use for inserting into this field.  If OP0 is
 749          BLKmode, get the smallest mode consistent with the alignment. If
 750          OP0 is a non-BLKmode object that is no wider than OP_MODE, use its
 751          mode. Otherwise, use the smallest mode containing the field.  */
 752
 753       if (GET_MODE (op0) == BLKmode
 754           || GET_MODE_BITSIZE (GET_MODE (op0)) > maxbits
 755           || (op_mode != MAX_MACHINE_MODE
 756               && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (op_mode)))
 757         bestmode = get_best_mode  (bitsize, bitnum,
 758                                   bitregion_start, bitregion_end,
 759                                   MEM_ALIGN (op0),
 760                                   (op_mode == MAX_MACHINE_MODE
 761                                    ? VOIDmode : op_mode),
 762                                   MEM_VOLATILE_P (op0));
 763       else
 764         bestmode = GET_MODE (op0);
 765
 766       if (bestmode != VOIDmode
 767           && GET_MODE_SIZE (bestmode) >= GET_MODE_SIZE (fieldmode)
 768           && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
 769                && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
 770         {
 771           rtx last, tempreg, xop0;
 772           unsigned HOST_WIDE_INT xoffset, xbitpos;
 773
 774           last = get_last_insn ();
 775
 776           /* Adjust address to point to the containing unit of
 777              that mode.  Compute the offset as a multiple of this unit,
 778              counting in bytes.  */
 779           unit = GET_MODE_BITSIZE (bestmode);
 780           xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
 781           xbitpos = bitnum % unit;
 782           xop0 = adjust_address (op0, bestmode, xoffset);
 783
 784           /* Fetch that unit, store the bitfield in it, then store
 785              the unit.  */
 786           tempreg = copy_to_reg (xop0);
 787           if (store_bit_field_1 (tempreg, bitsize, xbitpos,
 788                                  bitregion_start, bitregion_end,
 789                                  fieldmode, orig_value, false))
 790             {
 791               emit_move_insn (xop0, tempreg);
 792               return true;
 793             }
 794           delete_insns_since (last);
 795         }
 796     }
 797
 798   if (!fallback_p)
 799     return false;
 800
 801   store_fixed_bit_field (op0, offset, bitsize, bitpos,
 802                          bitregion_start, bitregion_end, value);
 803   return true;
 804 }
 805
 806 /* Generate code to store value from rtx VALUE
 807    into a bit-field within structure STR_RTX
 808    containing BITSIZE bits starting at bit BITNUM.
 809
 810    BITREGION_START is bitpos of the first bitfield in this region.
 811    BITREGION_END is the bitpos of the ending bitfield in this region.
 812    These two fields are 0, if the C++ memory model does not apply,
 813    or we are not interested in keeping track of bitfield regions.
 814
 815    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.  */
 816
 817 void
 818 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 819                  unsigned HOST_WIDE_INT bitnum,
 820                  unsigned HOST_WIDE_INT bitregion_start,
 821                  unsigned HOST_WIDE_INT bitregion_end,
 822                  enum machine_mode fieldmode,
 823                  rtx value)
 824 {
 825   /* Under the C++0x memory model, we must not touch bits outside the
 826      bit region.  Adjust the address to start at the beginning of the
 827      bit region.  */
 828   if (MEM_P (str_rtx)
 829       && bitregion_start > 0)
 830     {
 831       enum machine_mode bestmode;
 832       enum machine_mode op_mode;
 833       unsigned HOST_WIDE_INT offset;
 834
 835       op_mode = mode_for_extraction (EP_insv, 3);
 836       if (op_mode == MAX_MACHINE_MODE)
 837         op_mode = VOIDmode;
 838
 839       offset = bitregion_start / BITS_PER_UNIT;
 840       bitnum -= bitregion_start;
 841       bitregion_end -= bitregion_start;
 842       bitregion_start = 0;
 843       bestmode = get_best_mode (bitsize, bitnum,
 844                                 bitregion_start, bitregion_end,
 845                                 MEM_ALIGN (str_rtx),
 846                                 op_mode,
 847                                 MEM_VOLATILE_P (str_rtx));
 848       str_rtx = adjust_address (str_rtx, bestmode, offset);
 849     }
 850
 851   if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
 852                           bitregion_start, bitregion_end,
 853                           fieldmode, value, true))
 854     gcc_unreachable ();
 855 }
 856 \f
 857 /* Use shifts and boolean operations to store VALUE
 858    into a bit field of width BITSIZE
 859    in a memory location specified by OP0 except offset by OFFSET bytes.
 860      (OFFSET must be 0 if OP0 is a register.)
 861    The field starts at position BITPOS within the byte.
 862     (If OP0 is a register, it may be a full word or a narrower mode,
 863      but BITPOS still counts within a full word,
 864      which is significant on bigendian machines.)  */
 865
 866 static void
 867 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT offset,
 868                        unsigned HOST_WIDE_INT bitsize,
 869                        unsigned HOST_WIDE_INT bitpos,
 870                        unsigned HOST_WIDE_INT bitregion_start,
 871                        unsigned HOST_WIDE_INT bitregion_end,
 872                        rtx value)
 873 {
 874   enum machine_mode mode;
 875   unsigned int total_bits = BITS_PER_WORD;
 876   rtx temp;
 877   int all_zero = 0;
 878   int all_one = 0;
 879
 880   /* There is a case not handled here:
 881      a structure with a known alignment of just a halfword
 882      and a field split across two aligned halfwords within the structure.
 883      Or likewise a structure with a known alignment of just a byte
 884      and a field split across two bytes.
 885      Such cases are not supposed to be able to occur.  */
 886
 887   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
 888     {
 889       gcc_assert (!offset);
 890       /* Special treatment for a bit field split across two registers.  */
 891       if (bitsize + bitpos > BITS_PER_WORD)
 892         {
 893           store_split_bit_field (op0, bitsize, bitpos,
 894                                  bitregion_start, bitregion_end,
 895                                  value);
 896           return;
 897         }
 898     }
 899   else
 900     {
 901       unsigned HOST_WIDE_INT maxbits = MAX_FIXED_MODE_SIZE;
 902
 903       if (bitregion_end)
 904         maxbits = bitregion_end - bitregion_start + 1;
 905
 906       /* Get the proper mode to use for this field.  We want a mode that
 907          includes the entire field.  If such a mode would be larger than
 908          a word, we won't be doing the extraction the normal way.
 909          We don't want a mode bigger than the destination.  */
 910
 911       mode = GET_MODE (op0);
 912       if (GET_MODE_BITSIZE (mode) == 0
 913           || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
 914         mode = word_mode;
 915
 916       if (MEM_VOLATILE_P (op0)
 917           && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
 918           && GET_MODE_BITSIZE (GET_MODE (op0)) <= maxbits
 919           && flag_strict_volatile_bitfields > 0)
 920         mode = GET_MODE (op0);
 921       else
 922         mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
 923                               bitregion_start, bitregion_end,
 924                               MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
 925
 926       if (mode == VOIDmode)
 927         {
 928           /* The only way this should occur is if the field spans word
 929              boundaries.  */
 930           store_split_bit_field (op0, bitsize, bitpos + offset * BITS_PER_UNIT,
 931                                  bitregion_start, bitregion_end, value);
 932           return;
 933         }
 934
 935       total_bits = GET_MODE_BITSIZE (mode);
 936
 937       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
 938          be in the range 0 to total_bits-1, and put any excess bytes in
 939          OFFSET.  */
 940       if (bitpos >= total_bits)
 941         {
 942           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
 943           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
 944                      * BITS_PER_UNIT);
 945         }
 946
 947       /* Get ref to an aligned byte, halfword, or word containing the field.
 948          Adjust BITPOS to be position within a word,
 949          and OFFSET to be the offset of that word.
 950          Then alter OP0 to refer to that word.  */
 951       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
 952       offset -= (offset % (total_bits / BITS_PER_UNIT));
 953       op0 = adjust_address (op0, mode, offset);
 954     }
 955
 956   mode = GET_MODE (op0);
 957
 958   /* Now MODE is either some integral mode for a MEM as OP0,
 959      or is a full-word for a REG as OP0.  TOTAL_BITS corresponds.
 960      The bit field is contained entirely within OP0.
 961      BITPOS is the starting bit number within OP0.
 962      (OP0's mode may actually be narrower than MODE.)  */
 963
 964   if (BYTES_BIG_ENDIAN)
 965       /* BITPOS is the distance between our msb
 966          and that of the containing datum.
 967          Convert it to the distance from the lsb.  */
 968       bitpos = total_bits - bitsize - bitpos;
 969
 970   /* Now BITPOS is always the distance between our lsb
 971      and that of OP0.  */
 972
 973   /* Shift VALUE left by BITPOS bits.  If VALUE is not constant,
 974      we must first convert its mode to MODE.  */
 975
 976   if (CONST_INT_P (value))
 977     {
 978       HOST_WIDE_INT v = INTVAL (value);
 979
 980       if (bitsize < HOST_BITS_PER_WIDE_INT)
 981         v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
 982
 983       if (v == 0)
 984         all_zero = 1;
 985       else if ((bitsize < HOST_BITS_PER_WIDE_INT
 986                 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
 987                || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
 988         all_one = 1;
 989
 990       value = lshift_value (mode, value, bitpos, bitsize);
 991     }
 992   else
 993     {
 994       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
 995                       && bitpos + bitsize != GET_MODE_BITSIZE (mode));
 996
 997       if (GET_MODE (value) != mode)
 998         value = convert_to_mode (mode, value, 1);
 999
1000       if (must_and)
1001         value = expand_binop (mode, and_optab, value,
1002                               mask_rtx (mode, 0, bitsize, 0),
1003                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
1004       if (bitpos > 0)
1005         value = expand_shift (LSHIFT_EXPR, mode, value,
1006                               bitpos, NULL_RTX, 1);
1007     }
1008
1009   /* Now clear the chosen bits in OP0,
1010      except that if VALUE is -1 we need not bother.  */
1011   /* We keep the intermediates in registers to allow CSE to combine
1012      consecutive bitfield assignments.  */
1013
1014   temp = force_reg (mode, op0);
1015
1016   if (! all_one)
1017     {
1018       temp = expand_binop (mode, and_optab, temp,
1019                            mask_rtx (mode, bitpos, bitsize, 1),
1020                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1021       temp = force_reg (mode, temp);
1022     }
1023
1024   /* Now logical-or VALUE into OP0, unless it is zero.  */
1025
1026   if (! all_zero)
1027     {
1028       temp = expand_binop (mode, ior_optab, temp, value,
1029                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1030       temp = force_reg (mode, temp);
1031     }
1032
1033   if (op0 != temp)
1034     {
1035       op0 = copy_rtx (op0);
1036       emit_move_insn (op0, temp);
1037     }
1038 }
1039 \f
1040 /* Store a bit field that is split across multiple accessible memory objects.
1041
1042    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1043    BITSIZE is the field width; BITPOS the position of its first bit
1044    (within the word).
1045    VALUE is the value to store.
1046
1047    This does not yet handle fields wider than BITS_PER_WORD.  */
1048
1049 static void
1050 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1051                        unsigned HOST_WIDE_INT bitpos,
1052                        unsigned HOST_WIDE_INT bitregion_start,
1053                        unsigned HOST_WIDE_INT bitregion_end,
1054                        rtx value)
1055 {
1056   unsigned int unit;
1057   unsigned int bitsdone = 0;
1058
1059   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1060      much at a time.  */
1061   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1062     unit = BITS_PER_WORD;
1063   else
1064     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1065
1066   /* If VALUE is a constant other than a CONST_INT, get it into a register in
1067      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
1068      that VALUE might be a floating-point constant.  */
1069   if (CONSTANT_P (value) && !CONST_INT_P (value))
1070     {
1071       rtx word = gen_lowpart_common (word_mode, value);
1072
1073       if (word && (value != word))
1074         value = word;
1075       else
1076         value = gen_lowpart_common (word_mode,
1077                                     force_reg (GET_MODE (value) != VOIDmode
1078                                                ? GET_MODE (value)
1079                                                : word_mode, value));
1080     }
1081
1082   while (bitsdone < bitsize)
1083     {
1084       unsigned HOST_WIDE_INT thissize;
1085       rtx part, word;
1086       unsigned HOST_WIDE_INT thispos;
1087       unsigned HOST_WIDE_INT offset;
1088
1089       offset = (bitpos + bitsdone) / unit;
1090       thispos = (bitpos + bitsdone) % unit;
1091
1092       /* THISSIZE must not overrun a word boundary.  Otherwise,
1093          store_fixed_bit_field will call us again, and we will mutually
1094          recurse forever.  */
1095       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1096       thissize = MIN (thissize, unit - thispos);
1097
1098       if (BYTES_BIG_ENDIAN)
1099         {
1100           int total_bits;
1101
1102           /* We must do an endian conversion exactly the same way as it is
1103              done in extract_bit_field, so that the two calls to
1104              extract_fixed_bit_field will have comparable arguments.  */
1105           if (!MEM_P (value) || GET_MODE (value) == BLKmode)
1106             total_bits = BITS_PER_WORD;
1107           else
1108             total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1109
1110           /* Fetch successively less significant portions.  */
1111           if (CONST_INT_P (value))
1112             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1113                              >> (bitsize - bitsdone - thissize))
1114                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1115           else
1116             /* The args are chosen so that the last part includes the
1117                lsb.  Give extract_bit_field the value it needs (with
1118                endianness compensation) to fetch the piece we want.  */
1119             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1120                                             total_bits - bitsize + bitsdone,
1121                                             NULL_RTX, 1, false);
1122         }
1123       else
1124         {
1125           /* Fetch successively more significant portions.  */
1126           if (CONST_INT_P (value))
1127             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1128                              >> bitsdone)
1129                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1130           else
1131             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1132                                             bitsdone, NULL_RTX, 1, false);
1133         }
1134
1135       /* If OP0 is a register, then handle OFFSET here.
1136
1137          When handling multiword bitfields, extract_bit_field may pass
1138          down a word_mode SUBREG of a larger REG for a bitfield that actually
1139          crosses a word boundary.  Thus, for a SUBREG, we must find
1140          the current word starting from the base register.  */
1141       if (GET_CODE (op0) == SUBREG)
1142         {
1143           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1144           enum machine_mode sub_mode = GET_MODE (SUBREG_REG (op0));
1145           if (sub_mode != BLKmode && GET_MODE_SIZE (sub_mode) < UNITS_PER_WORD)
1146             word = word_offset ? const0_rtx : op0;
1147           else
1148             word = operand_subword_force (SUBREG_REG (op0), word_offset,
1149                                           GET_MODE (SUBREG_REG (op0)));
1150           offset = 0;
1151         }
1152       else if (REG_P (op0))
1153         {
1154           enum machine_mode op0_mode = GET_MODE (op0);
1155           if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1156             word = offset ? const0_rtx : op0;
1157           else
1158             word = operand_subword_force (op0, offset, GET_MODE (op0));
1159           offset = 0;
1160         }
1161       else
1162         word = op0;
1163
1164       /* OFFSET is in UNITs, and UNIT is in bits.
1165          store_fixed_bit_field wants offset in bytes.  If WORD is const0_rtx,
1166          it is just an out-of-bounds access.  Ignore it.  */
1167       if (word != const0_rtx)
1168         store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT, thissize,
1169                                thispos, bitregion_start, bitregion_end, part);
1170       bitsdone += thissize;
1171     }
1172 }
1173 \f
1174 /* A subroutine of extract_bit_field_1 that converts return value X
1175    to either MODE or TMODE.  MODE, TMODE and UNSIGNEDP are arguments
1176    to extract_bit_field.  */
1177
1178 static rtx
1179 convert_extracted_bit_field (rtx x, enum machine_mode mode,
1180                              enum machine_mode tmode, bool unsignedp)
1181 {
1182   if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1183     return x;
1184
1185   /* If the x mode is not a scalar integral, first convert to the
1186      integer mode of that size and then access it as a floating-point
1187      value via a SUBREG.  */
1188   if (!SCALAR_INT_MODE_P (tmode))
1189     {
1190       enum machine_mode smode;
1191
1192       smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1193       x = convert_to_mode (smode, x, unsignedp);
1194       x = force_reg (smode, x);
1195       return gen_lowpart (tmode, x);
1196     }
1197
1198   return convert_to_mode (tmode, x, unsignedp);
1199 }
1200
1201 /* A subroutine of extract_bit_field, with the same arguments.
1202    If FALLBACK_P is true, fall back to extract_fixed_bit_field
1203    if we can find no other means of implementing the operation.
1204    if FALLBACK_P is false, return NULL instead.  */
1205
1206 static rtx
1207 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1208                      unsigned HOST_WIDE_INT bitnum,
1209                      int unsignedp, bool packedp, rtx target,
1210                      enum machine_mode mode, enum machine_mode tmode,
1211                      bool fallback_p)
1212 {
1213   unsigned int unit
1214     = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
1215   unsigned HOST_WIDE_INT offset, bitpos;
1216   rtx op0 = str_rtx;
1217   enum machine_mode int_mode;
1218   enum machine_mode ext_mode;
1219   enum machine_mode mode1;
1220   int byte_offset;
1221
1222   if (tmode == VOIDmode)
1223     tmode = mode;
1224
1225   while (GET_CODE (op0) == SUBREG)
1226     {
1227       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1228       op0 = SUBREG_REG (op0);
1229     }
1230
1231   /* If we have an out-of-bounds access to a register, just return an
1232      uninitialized register of the required mode.  This can occur if the
1233      source code contains an out-of-bounds access to a small array.  */
1234   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1235     return gen_reg_rtx (tmode);
1236
1237   if (REG_P (op0)
1238       && mode == GET_MODE (op0)
1239       && bitnum == 0
1240       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1241     {
1242       /* We're trying to extract a full register from itself.  */
1243       return op0;
1244     }
1245
1246   /* See if we can get a better vector mode before extracting.  */
1247   if (VECTOR_MODE_P (GET_MODE (op0))
1248       && !MEM_P (op0)
1249       && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1250     {
1251       enum machine_mode new_mode;
1252
1253       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1254         new_mode = MIN_MODE_VECTOR_FLOAT;
1255       else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1256         new_mode = MIN_MODE_VECTOR_FRACT;
1257       else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1258         new_mode = MIN_MODE_VECTOR_UFRACT;
1259       else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1260         new_mode = MIN_MODE_VECTOR_ACCUM;
1261       else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1262         new_mode = MIN_MODE_VECTOR_UACCUM;
1263       else
1264         new_mode = MIN_MODE_VECTOR_INT;
1265
1266       for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1267         if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1268             && targetm.vector_mode_supported_p (new_mode))
1269           break;
1270       if (new_mode != VOIDmode)
1271         op0 = gen_lowpart (new_mode, op0);
1272     }
1273
1274   /* Use vec_extract patterns for extracting parts of vectors whenever
1275      available.  */
1276   if (VECTOR_MODE_P (GET_MODE (op0))
1277       && !MEM_P (op0)
1278       && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing
1279       && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1280           == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1281     {
1282       struct expand_operand ops[3];
1283       enum machine_mode outermode = GET_MODE (op0);
1284       enum machine_mode innermode = GET_MODE_INNER (outermode);
1285       enum insn_code icode = optab_handler (vec_extract_optab, outermode);
1286       unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1287
1288       create_output_operand (&ops[0], target, innermode);
1289       create_input_operand (&ops[1], op0, outermode);
1290       create_integer_operand (&ops[2], pos);
1291       if (maybe_expand_insn (icode, 3, ops))
1292         {
1293           target = ops[0].value;
1294           if (GET_MODE (target) != mode)
1295             return gen_lowpart (tmode, target);
1296           return target;
1297         }
1298     }
1299
1300   /* Make sure we are playing with integral modes.  Pun with subregs
1301      if we aren't.  */
1302   {
1303     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1304     if (imode != GET_MODE (op0))
1305       {
1306         if (MEM_P (op0))
1307           op0 = adjust_address (op0, imode, 0);
1308         else if (imode != BLKmode)
1309           {
1310             op0 = gen_lowpart (imode, op0);
1311
1312             /* If we got a SUBREG, force it into a register since we
1313                aren't going to be able to do another SUBREG on it.  */
1314             if (GET_CODE (op0) == SUBREG)
1315               op0 = force_reg (imode, op0);
1316           }
1317         else if (REG_P (op0))
1318           {
1319             rtx reg, subreg;
1320             imode = smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0)),
1321                                             MODE_INT);
1322             reg = gen_reg_rtx (imode);
1323             subreg = gen_lowpart_SUBREG (GET_MODE (op0), reg);
1324             emit_move_insn (subreg, op0);
1325             op0 = reg;
1326             bitnum += SUBREG_BYTE (subreg) * BITS_PER_UNIT;
1327           }
1328         else
1329           {
1330             rtx mem = assign_stack_temp (GET_MODE (op0),
1331                                          GET_MODE_SIZE (GET_MODE (op0)), 0);
1332             emit_move_insn (mem, op0);
1333             op0 = adjust_address (mem, BLKmode, 0);
1334           }
1335       }
1336   }
1337
1338   /* We may be accessing data outside the field, which means
1339      we can alias adjacent data.  */
1340   if (MEM_P (op0))
1341     {
1342       op0 = shallow_copy_rtx (op0);
1343       set_mem_alias_set (op0, 0);
1344       set_mem_expr (op0, 0);
1345     }
1346
1347   /* Extraction of a full-word or multi-word value from a structure
1348      in a register or aligned memory can be done with just a SUBREG.
1349      A subword value in the least significant part of a register
1350      can also be extracted with a SUBREG.  For this, we need the
1351      byte offset of the value in op0.  */
1352
1353   bitpos = bitnum % unit;
1354   offset = bitnum / unit;
1355   byte_offset = bitpos / BITS_PER_UNIT + offset * UNITS_PER_WORD;
1356
1357   /* If OP0 is a register, BITPOS must count within a word.
1358      But as we have it, it counts within whatever size OP0 now has.
1359      On a bigendian machine, these are not the same, so convert.  */
1360   if (BYTES_BIG_ENDIAN
1361       && !MEM_P (op0)
1362       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
1363     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1364
1365   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1366      If that's wrong, the solution is to test for it and set TARGET to 0
1367      if needed.  */
1368
1369   /* Only scalar integer modes can be converted via subregs.  There is an
1370      additional problem for FP modes here in that they can have a precision
1371      which is different from the size.  mode_for_size uses precision, but
1372      we want a mode based on the size, so we must avoid calling it for FP
1373      modes.  */
1374   mode1  = (SCALAR_INT_MODE_P (tmode)
1375             ? mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0)
1376             : mode);
1377
1378   /* If the bitfield is volatile, we need to make sure the access
1379      remains on a type-aligned boundary.  */
1380   if (GET_CODE (op0) == MEM
1381       && MEM_VOLATILE_P (op0)
1382       && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
1383       && flag_strict_volatile_bitfields > 0)
1384     goto no_subreg_mode_swap;
1385
1386   if (((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
1387         && bitpos % BITS_PER_WORD == 0)
1388        || (mode1 != BLKmode
1389            /* ??? The big endian test here is wrong.  This is correct
1390               if the value is in a register, and if mode_for_size is not
1391               the same mode as op0.  This causes us to get unnecessarily
1392               inefficient code from the Thumb port when -mbig-endian.  */
1393            && (BYTES_BIG_ENDIAN
1394                ? bitpos + bitsize == BITS_PER_WORD
1395                : bitpos == 0)))
1396       && ((!MEM_P (op0)
1397            && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0))
1398            && GET_MODE_SIZE (mode1) != 0
1399            && byte_offset % GET_MODE_SIZE (mode1) == 0)
1400           || (MEM_P (op0)
1401               && (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
1402                   || (offset * BITS_PER_UNIT % bitsize == 0
1403                       && MEM_ALIGN (op0) % bitsize == 0)))))
1404     {
1405       if (MEM_P (op0))
1406         op0 = adjust_address (op0, mode1, offset);
1407       else if (mode1 != GET_MODE (op0))
1408         {
1409           rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1410                                          byte_offset);
1411           if (sub == NULL)
1412             goto no_subreg_mode_swap;
1413           op0 = sub;
1414         }
1415       if (mode1 != mode)
1416         return convert_to_mode (tmode, op0, unsignedp);
1417       return op0;
1418     }
1419  no_subreg_mode_swap:
1420
1421   /* Handle fields bigger than a word.  */
1422
1423   if (bitsize > BITS_PER_WORD)
1424     {
1425       /* Here we transfer the words of the field
1426          in the order least significant first.
1427          This is because the most significant word is the one which may
1428          be less than full.  */
1429
1430       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1431       unsigned int i;
1432
1433       if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1434         target = gen_reg_rtx (mode);
1435
1436       /* Indicate for flow that the entire target reg is being set.  */
1437       emit_clobber (target);
1438
1439       for (i = 0; i < nwords; i++)
1440         {
1441           /* If I is 0, use the low-order word in both field and target;
1442              if I is 1, use the next to lowest word; and so on.  */
1443           /* Word number in TARGET to use.  */
1444           unsigned int wordnum
1445             = (WORDS_BIG_ENDIAN
1446                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1447                : i);
1448           /* Offset from start of field in OP0.  */
1449           unsigned int bit_offset = (WORDS_BIG_ENDIAN
1450                                      ? MAX (0, ((int) bitsize - ((int) i + 1)
1451                                                 * (int) BITS_PER_WORD))
1452                                      : (int) i * BITS_PER_WORD);
1453           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1454           rtx result_part
1455             = extract_bit_field (op0, MIN (BITS_PER_WORD,
1456                                            bitsize - i * BITS_PER_WORD),
1457                                  bitnum + bit_offset, 1, false, target_part, mode,
1458                                  word_mode);
1459
1460           gcc_assert (target_part);
1461
1462           if (result_part != target_part)
1463             emit_move_insn (target_part, result_part);
1464         }
1465
1466       if (unsignedp)
1467         {
1468           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1469              need to be zero'd out.  */
1470           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1471             {
1472               unsigned int i, total_words;
1473
1474               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1475               for (i = nwords; i < total_words; i++)
1476                 emit_move_insn
1477                   (operand_subword (target,
1478                                     WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1479                                     1, VOIDmode),
1480                    const0_rtx);
1481             }
1482           return target;
1483         }
1484
1485       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1486       target = expand_shift (LSHIFT_EXPR, mode, target,
1487                              GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1488       return expand_shift (RSHIFT_EXPR, mode, target,
1489                            GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1490     }
1491
1492   /* From here on we know the desired field is smaller than a word.  */
1493
1494   /* Check if there is a correspondingly-sized integer field, so we can
1495      safely extract it as one size of integer, if necessary; then
1496      truncate or extend to the size that is wanted; then use SUBREGs or
1497      convert_to_mode to get one of the modes we really wanted.  */
1498
1499   int_mode = int_mode_for_mode (tmode);
1500   if (int_mode == BLKmode)
1501     int_mode = int_mode_for_mode (mode);
1502   /* Should probably push op0 out to memory and then do a load.  */
1503   gcc_assert (int_mode != BLKmode);
1504
1505   /* OFFSET is the number of words or bytes (UNIT says which)
1506      from STR_RTX to the first word or byte containing part of the field.  */
1507   if (!MEM_P (op0))
1508     {
1509       if (offset != 0
1510           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1511         {
1512           if (!REG_P (op0))
1513             op0 = copy_to_reg (op0);
1514           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
1515                                 op0, (offset * UNITS_PER_WORD));
1516         }
1517       offset = 0;
1518     }
1519
1520   /* Now OFFSET is nonzero only for memory operands.  */
1521   ext_mode = mode_for_extraction (unsignedp ? EP_extzv : EP_extv, 0);
1522   if (ext_mode != MAX_MACHINE_MODE
1523       && bitsize > 0
1524       && GET_MODE_BITSIZE (ext_mode) >= bitsize
1525       /* Do not use extv/extzv for volatile bitfields when
1526          -fstrict-volatile-bitfields is in effect.  */
1527       && !(MEM_P (op0) && MEM_VOLATILE_P (op0)
1528            && flag_strict_volatile_bitfields > 0)
1529       /* If op0 is a register, we need it in EXT_MODE to make it
1530          acceptable to the format of ext(z)v.  */
1531       && !(GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1532       && !((REG_P (op0) || GET_CODE (op0) == SUBREG)
1533            && (bitsize + bitpos > GET_MODE_BITSIZE (ext_mode))))
1534     {
1535       struct expand_operand ops[4];
1536       unsigned HOST_WIDE_INT xbitpos = bitpos, xoffset = offset;
1537       rtx xop0 = op0;
1538       rtx xtarget = target;
1539       rtx xspec_target = target;
1540       rtx xspec_target_subreg = 0;
1541
1542       /* If op0 is a register, we need it in EXT_MODE to make it
1543          acceptable to the format of ext(z)v.  */
1544       if (REG_P (xop0) && GET_MODE (xop0) != ext_mode)
1545         xop0 = gen_lowpart_SUBREG (ext_mode, xop0);
1546       if (MEM_P (xop0))
1547         /* Get ref to first byte containing part of the field.  */
1548         xop0 = adjust_address (xop0, byte_mode, xoffset);
1549
1550       /* Now convert from counting within UNIT to counting in EXT_MODE.  */
1551       if (BYTES_BIG_ENDIAN && !MEM_P (xop0))
1552         xbitpos += GET_MODE_BITSIZE (ext_mode) - unit;
1553
1554       unit = GET_MODE_BITSIZE (ext_mode);
1555
1556       /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1557          "backwards" from the size of the unit we are extracting from.
1558          Otherwise, we count bits from the most significant on a
1559          BYTES/BITS_BIG_ENDIAN machine.  */
1560
1561       if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1562         xbitpos = unit - bitsize - xbitpos;
1563
1564       if (xtarget == 0)
1565         xtarget = xspec_target = gen_reg_rtx (tmode);
1566
1567       if (GET_MODE (xtarget) != ext_mode)
1568         {
1569           /* Don't use LHS paradoxical subreg if explicit truncation is needed
1570              between the mode of the extraction (word_mode) and the target
1571              mode.  Instead, create a temporary and use convert_move to set
1572              the target.  */
1573           if (REG_P (xtarget)
1574               && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (xtarget), ext_mode))
1575             {
1576               xtarget = gen_lowpart (ext_mode, xtarget);
1577               if (GET_MODE_PRECISION (ext_mode)
1578                   > GET_MODE_PRECISION (GET_MODE (xspec_target)))
1579                 xspec_target_subreg = xtarget;
1580             }
1581           else
1582             xtarget = gen_reg_rtx (ext_mode);
1583         }
1584
1585       create_output_operand (&ops[0], xtarget, ext_mode);
1586       create_fixed_operand (&ops[1], xop0);
1587       create_integer_operand (&ops[2], bitsize);
1588       create_integer_operand (&ops[3], xbitpos);
1589       if (maybe_expand_insn (unsignedp ? CODE_FOR_extzv : CODE_FOR_extv,
1590                              4, ops))
1591         {
1592           xtarget = ops[0].value;
1593           if (xtarget == xspec_target)
1594             return xtarget;
1595           if (xtarget == xspec_target_subreg)
1596             return xspec_target;
1597           return convert_extracted_bit_field (xtarget, mode, tmode, unsignedp);
1598         }
1599     }
1600
1601   /* If OP0 is a memory, try copying it to a register and seeing if a
1602      cheap register alternative is available.  */
1603   if (ext_mode != MAX_MACHINE_MODE && MEM_P (op0))
1604     {
1605       enum machine_mode bestmode;
1606
1607       /* Get the mode to use for inserting into this field.  If
1608          OP0 is BLKmode, get the smallest mode consistent with the
1609          alignment. If OP0 is a non-BLKmode object that is no
1610          wider than EXT_MODE, use its mode. Otherwise, use the
1611          smallest mode containing the field.  */
1612
1613       if (GET_MODE (op0) == BLKmode
1614           || (ext_mode != MAX_MACHINE_MODE
1615               && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (ext_mode)))
1616         bestmode = get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0),
1617                                   (ext_mode == MAX_MACHINE_MODE
1618                                    ? VOIDmode : ext_mode),
1619                                   MEM_VOLATILE_P (op0));
1620       else
1621         bestmode = GET_MODE (op0);
1622
1623       if (bestmode != VOIDmode
1624           && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
1625                && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
1626         {
1627           unsigned HOST_WIDE_INT xoffset, xbitpos;
1628
1629           /* Compute the offset as a multiple of this unit,
1630              counting in bytes.  */
1631           unit = GET_MODE_BITSIZE (bestmode);
1632           xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1633           xbitpos = bitnum % unit;
1634
1635           /* Make sure the register is big enough for the whole field.  */
1636           if (xoffset * BITS_PER_UNIT + unit
1637               >= offset * BITS_PER_UNIT + bitsize)
1638             {
1639               rtx last, result, xop0;
1640
1641               last = get_last_insn ();
1642
1643               /* Fetch it to a register in that size.  */
1644               xop0 = adjust_address (op0, bestmode, xoffset);
1645               xop0 = force_reg (bestmode, xop0);
1646               result = extract_bit_field_1 (xop0, bitsize, xbitpos,
1647                                             unsignedp, packedp, target,
1648                                             mode, tmode, false);
1649               if (result)
1650                 return result;
1651
1652               delete_insns_since (last);
1653             }
1654         }
1655     }
1656
1657   if (!fallback_p)
1658     return NULL;
1659
1660   target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1661                                     bitpos, target, unsignedp, packedp);
1662   return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1663 }
1664
1665 /* Generate code to extract a byte-field from STR_RTX
1666    containing BITSIZE bits, starting at BITNUM,
1667    and put it in TARGET if possible (if TARGET is nonzero).
1668    Regardless of TARGET, we return the rtx for where the value is placed.
1669
1670    STR_RTX is the structure containing the byte (a REG or MEM).
1671    UNSIGNEDP is nonzero if this is an unsigned bit field.
1672    PACKEDP is nonzero if the field has the packed attribute.
1673    MODE is the natural mode of the field value once extracted.
1674    TMODE is the mode the caller would like the value to have;
1675    but the value may be returned with type MODE instead.
1676
1677    If a TARGET is specified and we can store in it at no extra cost,
1678    we do so, and return TARGET.
1679    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1680    if they are equally easy.  */
1681
1682 rtx
1683 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1684                    unsigned HOST_WIDE_INT bitnum, int unsignedp, bool packedp,
1685                    rtx target, enum machine_mode mode, enum machine_mode tmode)
1686 {
1687   return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp, packedp,
1688                               target, mode, tmode, true);
1689 }
1690 \f
1691 /* Extract a bit field using shifts and boolean operations
1692    Returns an rtx to represent the value.
1693    OP0 addresses a register (word) or memory (byte).
1694    BITPOS says which bit within the word or byte the bit field starts in.
1695    OFFSET says how many bytes farther the bit field starts;
1696     it is 0 if OP0 is a register.
1697    BITSIZE says how many bits long the bit field is.
1698     (If OP0 is a register, it may be narrower than a full word,
1699      but BITPOS still counts within a full word,
1700      which is significant on bigendian machines.)
1701
1702    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1703    PACKEDP is true if the field has the packed attribute.
1704
1705    If TARGET is nonzero, attempts to store the value there
1706    and return TARGET, but this is not guaranteed.
1707    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1708
1709 static rtx
1710 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1711                          unsigned HOST_WIDE_INT offset,
1712                          unsigned HOST_WIDE_INT bitsize,
1713                          unsigned HOST_WIDE_INT bitpos, rtx target,
1714                          int unsignedp, bool packedp)
1715 {
1716   unsigned int total_bits = BITS_PER_WORD;
1717   enum machine_mode mode;
1718
1719   if (GET_CODE (op0) == SUBREG || REG_P (op0))
1720     {
1721       /* Special treatment for a bit field split across two registers.  */
1722       if (bitsize + bitpos > BITS_PER_WORD)
1723         return extract_split_bit_field (op0, bitsize, bitpos, unsignedp);
1724     }
1725   else
1726     {
1727       /* Get the proper mode to use for this field.  We want a mode that
1728          includes the entire field.  If such a mode would be larger than
1729          a word, we won't be doing the extraction the normal way.  */
1730
1731       if (MEM_VOLATILE_P (op0)
1732           && flag_strict_volatile_bitfields > 0)
1733         {
1734           if (GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1735             mode = GET_MODE (op0);
1736           else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1737             mode = GET_MODE (target);
1738           else
1739             mode = tmode;
1740         }
1741       else
1742         mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT, 0, 0,
1743                               MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1744
1745       if (mode == VOIDmode)
1746         /* The only way this should occur is if the field spans word
1747            boundaries.  */
1748         return extract_split_bit_field (op0, bitsize,
1749                                         bitpos + offset * BITS_PER_UNIT,
1750                                         unsignedp);
1751
1752       total_bits = GET_MODE_BITSIZE (mode);
1753
1754       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
1755          be in the range 0 to total_bits-1, and put any excess bytes in
1756          OFFSET.  */
1757       if (bitpos >= total_bits)
1758         {
1759           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1760           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1761                      * BITS_PER_UNIT);
1762         }
1763
1764       /* If we're accessing a volatile MEM, we can't do the next
1765          alignment step if it results in a multi-word access where we
1766          otherwise wouldn't have one.  So, check for that case
1767          here.  */
1768       if (MEM_P (op0)
1769           && MEM_VOLATILE_P (op0)
1770           && flag_strict_volatile_bitfields > 0
1771           && bitpos + bitsize <= total_bits
1772           && bitpos + bitsize + (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT > total_bits)
1773         {
1774           if (STRICT_ALIGNMENT)
1775             {
1776               static bool informed_about_misalignment = false;
1777               bool warned;
1778
1779               if (packedp)
1780                 {
1781                   if (bitsize == total_bits)
1782                     warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1783                                          "multiple accesses to volatile structure member"
1784                                          " because of packed attribute");
1785                   else
1786                     warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1787                                          "multiple accesses to volatile structure bitfield"
1788                                          " because of packed attribute");
1789
1790                   return extract_split_bit_field (op0, bitsize,
1791                                                   bitpos + offset * BITS_PER_UNIT,
1792                                                   unsignedp);
1793                 }
1794
1795               if (bitsize == total_bits)
1796                 warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1797                                      "mis-aligned access used for structure member");
1798               else
1799                 warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1800                                      "mis-aligned access used for structure bitfield");
1801
1802               if (! informed_about_misalignment && warned)
1803                 {
1804                   informed_about_misalignment = true;
1805                   inform (input_location,
1806                           "when a volatile object spans multiple type-sized locations,"
1807                           " the compiler must choose between using a single mis-aligned access to"
1808                           " preserve the volatility, or using multiple aligned accesses to avoid"
1809                           " runtime faults; this code may fail at runtime if the hardware does"
1810                           " not allow this access");
1811                 }
1812             }
1813         }
1814       else
1815         {
1816
1817           /* Get ref to an aligned byte, halfword, or word containing the field.
1818              Adjust BITPOS to be position within a word,
1819              and OFFSET to be the offset of that word.
1820              Then alter OP0 to refer to that word.  */
1821           bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1822           offset -= (offset % (total_bits / BITS_PER_UNIT));
1823         }
1824
1825       op0 = adjust_address (op0, mode, offset);
1826     }
1827
1828   mode = GET_MODE (op0);
1829
1830   if (BYTES_BIG_ENDIAN)
1831     /* BITPOS is the distance between our msb and that of OP0.
1832        Convert it to the distance from the lsb.  */
1833     bitpos = total_bits - bitsize - bitpos;
1834
1835   /* Now BITPOS is always the distance between the field's lsb and that of OP0.
1836      We have reduced the big-endian case to the little-endian case.  */
1837
1838   if (unsignedp)
1839     {
1840       if (bitpos)
1841         {
1842           /* If the field does not already start at the lsb,
1843              shift it so it does.  */
1844           /* Maybe propagate the target for the shift.  */
1845           /* But not if we will return it--could confuse integrate.c.  */
1846           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1847           if (tmode != mode) subtarget = 0;
1848           op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitpos, subtarget, 1);
1849         }
1850       /* Convert the value to the desired mode.  */
1851       if (mode != tmode)
1852         op0 = convert_to_mode (tmode, op0, 1);
1853
1854       /* Unless the msb of the field used to be the msb when we shifted,
1855          mask out the upper bits.  */
1856
1857       if (GET_MODE_BITSIZE (mode) != bitpos + bitsize)
1858         return expand_binop (GET_MODE (op0), and_optab, op0,
1859                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1860                              target, 1, OPTAB_LIB_WIDEN);
1861       return op0;
1862     }
1863
1864   /* To extract a signed bit-field, first shift its msb to the msb of the word,
1865      then arithmetic-shift its lsb to the lsb of the word.  */
1866   op0 = force_reg (mode, op0);
1867
1868   /* Find the narrowest integer mode that contains the field.  */
1869
1870   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1871        mode = GET_MODE_WIDER_MODE (mode))
1872     if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos)
1873       {
1874         op0 = convert_to_mode (mode, op0, 0);
1875         break;
1876       }
1877
1878   if (mode != tmode)
1879     target = 0;
1880
1881   if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos))
1882     {
1883       int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitpos);
1884       /* Maybe propagate the target for the shift.  */
1885       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1886       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1887     }
1888
1889   return expand_shift (RSHIFT_EXPR, mode, op0,
1890                        GET_MODE_BITSIZE (mode) - bitsize, target, 0);
1891 }
1892 \f
1893 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1894    of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1895    complement of that if COMPLEMENT.  The mask is truncated if
1896    necessary to the width of mode MODE.  The mask is zero-extended if
1897    BITSIZE+BITPOS is too small for MODE.  */
1898
1899 static rtx
1900 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1901 {
1902   double_int mask;
1903
1904   mask = double_int_mask (bitsize);
1905   mask = double_int_lshift (mask, bitpos, HOST_BITS_PER_DOUBLE_INT, false);
1906
1907   if (complement)
1908     mask = double_int_not (mask);
1909
1910   return immed_double_int_const (mask, mode);
1911 }
1912
1913 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1914    VALUE truncated to BITSIZE bits and then shifted left BITPOS bits.  */
1915
1916 static rtx
1917 lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
1918 {
1919   double_int val;
1920
1921   val = double_int_zext (uhwi_to_double_int (INTVAL (value)), bitsize);
1922   val = double_int_lshift (val, bitpos, HOST_BITS_PER_DOUBLE_INT, false);
1923
1924   return immed_double_int_const (val, mode);
1925 }
1926 \f
1927 /* Extract a bit field that is split across two words
1928    and return an RTX for the result.
1929
1930    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1931    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1932    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.  */
1933
1934 static rtx
1935 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1936                          unsigned HOST_WIDE_INT bitpos, int unsignedp)
1937 {
1938   unsigned int unit;
1939   unsigned int bitsdone = 0;
1940   rtx result = NULL_RTX;
1941   int first = 1;
1942
1943   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1944      much at a time.  */
1945   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1946     unit = BITS_PER_WORD;
1947   else
1948     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1949
1950   while (bitsdone < bitsize)
1951     {
1952       unsigned HOST_WIDE_INT thissize;
1953       rtx part, word;
1954       unsigned HOST_WIDE_INT thispos;
1955       unsigned HOST_WIDE_INT offset;
1956
1957       offset = (bitpos + bitsdone) / unit;
1958       thispos = (bitpos + bitsdone) % unit;
1959
1960       /* THISSIZE must not overrun a word boundary.  Otherwise,
1961          extract_fixed_bit_field will call us again, and we will mutually
1962          recurse forever.  */
1963       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1964       thissize = MIN (thissize, unit - thispos);
1965
1966       /* If OP0 is a register, then handle OFFSET here.
1967
1968          When handling multiword bitfields, extract_bit_field may pass
1969          down a word_mode SUBREG of a larger REG for a bitfield that actually
1970          crosses a word boundary.  Thus, for a SUBREG, we must find
1971          the current word starting from the base register.  */
1972       if (GET_CODE (op0) == SUBREG)
1973         {
1974           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1975           word = operand_subword_force (SUBREG_REG (op0), word_offset,
1976                                         GET_MODE (SUBREG_REG (op0)));
1977           offset = 0;
1978         }
1979       else if (REG_P (op0))
1980         {
1981           word = operand_subword_force (op0, offset, GET_MODE (op0));
1982           offset = 0;
1983         }
1984       else
1985         word = op0;
1986
1987       /* Extract the parts in bit-counting order,
1988          whose meaning is determined by BYTES_PER_UNIT.
1989          OFFSET is in UNITs, and UNIT is in bits.
1990          extract_fixed_bit_field wants offset in bytes.  */
1991       part = extract_fixed_bit_field (word_mode, word,
1992                                       offset * unit / BITS_PER_UNIT,
1993                                       thissize, thispos, 0, 1, false);
1994       bitsdone += thissize;
1995
1996       /* Shift this part into place for the result.  */
1997       if (BYTES_BIG_ENDIAN)
1998         {
1999           if (bitsize != bitsdone)
2000             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2001                                  bitsize - bitsdone, 0, 1);
2002         }
2003       else
2004         {
2005           if (bitsdone != thissize)
2006             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2007                                  bitsdone - thissize, 0, 1);
2008         }
2009
2010       if (first)
2011         result = part;
2012       else
2013         /* Combine the parts with bitwise or.  This works
2014            because we extracted each part as an unsigned bit field.  */
2015         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2016                                OPTAB_LIB_WIDEN);
2017
2018       first = 0;
2019     }
2020
2021   /* Unsigned bit field: we are done.  */
2022   if (unsignedp)
2023     return result;
2024   /* Signed bit field: sign-extend with two arithmetic shifts.  */
2025   result = expand_shift (LSHIFT_EXPR, word_mode, result,
2026                          BITS_PER_WORD - bitsize, NULL_RTX, 0);
2027   return expand_shift (RSHIFT_EXPR, word_mode, result,
2028                        BITS_PER_WORD - bitsize, NULL_RTX, 0);
2029 }
2030 \f
2031 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2032    the bit pattern.  SRC_MODE is the mode of SRC; if this is smaller than
2033    MODE, fill the upper bits with zeros.  Fail if the layout of either
2034    mode is unknown (as for CC modes) or if the extraction would involve
2035    unprofitable mode punning.  Return the value on success, otherwise
2036    return null.
2037
2038    This is different from gen_lowpart* in these respects:
2039
2040      - the returned value must always be considered an rvalue
2041
2042      - when MODE is wider than SRC_MODE, the extraction involves
2043        a zero extension
2044
2045      - when MODE is smaller than SRC_MODE, the extraction involves
2046        a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2047
2048    In other words, this routine performs a computation, whereas the
2049    gen_lowpart* routines are conceptually lvalue or rvalue subreg
2050    operations.  */
2051
2052 rtx
2053 extract_low_bits (enum machine_mode mode, enum machine_mode src_mode, rtx src)
2054 {
2055   enum machine_mode int_mode, src_int_mode;
2056
2057   if (mode == src_mode)
2058     return src;
2059
2060   if (CONSTANT_P (src))
2061     {
2062       /* simplify_gen_subreg can't be used here, as if simplify_subreg
2063          fails, it will happily create (subreg (symbol_ref)) or similar
2064          invalid SUBREGs.  */
2065       unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2066       rtx ret = simplify_subreg (mode, src, src_mode, byte);
2067       if (ret)
2068         return ret;
2069
2070       if (GET_MODE (src) == VOIDmode
2071           || !validate_subreg (mode, src_mode, src, byte))
2072         return NULL_RTX;
2073
2074       src = force_reg (GET_MODE (src), src);
2075       return gen_rtx_SUBREG (mode, src, byte);
2076     }
2077
2078   if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2079     return NULL_RTX;
2080
2081   if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2082       && MODES_TIEABLE_P (mode, src_mode))
2083     {
2084       rtx x = gen_lowpart_common (mode, src);
2085       if (x)
2086         return x;
2087     }
2088
2089   src_int_mode = int_mode_for_mode (src_mode);
2090   int_mode = int_mode_for_mode (mode);
2091   if (src_int_mode == BLKmode || int_mode == BLKmode)
2092     return NULL_RTX;
2093
2094   if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2095     return NULL_RTX;
2096   if (!MODES_TIEABLE_P (int_mode, mode))
2097     return NULL_RTX;
2098
2099   src = gen_lowpart (src_int_mode, src);
2100   src = convert_modes (int_mode, src_int_mode, src, true);
2101   src = gen_lowpart (mode, src);
2102   return src;
2103 }
2104 \f
2105 /* Add INC into TARGET.  */
2106
2107 void
2108 expand_inc (rtx target, rtx inc)
2109 {
2110   rtx value = expand_binop (GET_MODE (target), add_optab,
2111                             target, inc,
2112                             target, 0, OPTAB_LIB_WIDEN);
2113   if (value != target)
2114     emit_move_insn (target, value);
2115 }
2116
2117 /* Subtract DEC from TARGET.  */
2118
2119 void
2120 expand_dec (rtx target, rtx dec)
2121 {
2122   rtx value = expand_binop (GET_MODE (target), sub_optab,
2123                             target, dec,
2124                             target, 0, OPTAB_LIB_WIDEN);
2125   if (value != target)
2126     emit_move_insn (target, value);
2127 }
2128 \f
2129 /* Output a shift instruction for expression code CODE,
2130    with SHIFTED being the rtx for the value to shift,
2131    and AMOUNT the rtx for the amount to shift by.
2132    Store the result in the rtx TARGET, if that is convenient.
2133    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2134    Return the rtx for where the value is.  */
2135
2136 static rtx
2137 expand_shift_1 (enum tree_code code, enum machine_mode mode, rtx shifted,
2138                 rtx amount, rtx target, int unsignedp)
2139 {
2140   rtx op1, temp = 0;
2141   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2142   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2143   optab lshift_optab = ashl_optab;
2144   optab rshift_arith_optab = ashr_optab;
2145   optab rshift_uns_optab = lshr_optab;
2146   optab lrotate_optab = rotl_optab;
2147   optab rrotate_optab = rotr_optab;
2148   enum machine_mode op1_mode;
2149   int attempt;
2150   bool speed = optimize_insn_for_speed_p ();
2151
2152   op1 = amount;
2153   op1_mode = GET_MODE (op1);
2154
2155   /* Determine whether the shift/rotate amount is a vector, or scalar.  If the
2156      shift amount is a vector, use the vector/vector shift patterns.  */
2157   if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2158     {
2159       lshift_optab = vashl_optab;
2160       rshift_arith_optab = vashr_optab;
2161       rshift_uns_optab = vlshr_optab;
2162       lrotate_optab = vrotl_optab;
2163       rrotate_optab = vrotr_optab;
2164     }
2165
2166   /* Previously detected shift-counts computed by NEGATE_EXPR
2167      and shifted in the other direction; but that does not work
2168      on all machines.  */
2169
2170   if (SHIFT_COUNT_TRUNCATED)
2171     {
2172       if (CONST_INT_P (op1)
2173           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2174               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
2175         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2176                        % GET_MODE_BITSIZE (mode));
2177       else if (GET_CODE (op1) == SUBREG
2178                && subreg_lowpart_p (op1)
2179                && INTEGRAL_MODE_P (GET_MODE (SUBREG_REG (op1))))
2180         op1 = SUBREG_REG (op1);
2181     }
2182
2183   if (op1 == const0_rtx)
2184     return shifted;
2185
2186   /* Check whether its cheaper to implement a left shift by a constant
2187      bit count by a sequence of additions.  */
2188   if (code == LSHIFT_EXPR
2189       && CONST_INT_P (op1)
2190       && INTVAL (op1) > 0
2191       && INTVAL (op1) < GET_MODE_PRECISION (mode)
2192       && INTVAL (op1) < MAX_BITS_PER_WORD
2193       && shift_cost[speed][mode][INTVAL (op1)] > INTVAL (op1) * add_cost[speed][mode]
2194       && shift_cost[speed][mode][INTVAL (op1)] != MAX_COST)
2195     {
2196       int i;
2197       for (i = 0; i < INTVAL (op1); i++)
2198         {
2199           temp = force_reg (mode, shifted);
2200           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2201                                   unsignedp, OPTAB_LIB_WIDEN);
2202         }
2203       return shifted;
2204     }
2205
2206   for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2207     {
2208       enum optab_methods methods;
2209
2210       if (attempt == 0)
2211         methods = OPTAB_DIRECT;
2212       else if (attempt == 1)
2213         methods = OPTAB_WIDEN;
2214       else
2215         methods = OPTAB_LIB_WIDEN;
2216
2217       if (rotate)
2218         {
2219           /* Widening does not work for rotation.  */
2220           if (methods == OPTAB_WIDEN)
2221             continue;
2222           else if (methods == OPTAB_LIB_WIDEN)
2223             {
2224               /* If we have been unable to open-code this by a rotation,
2225                  do it as the IOR of two shifts.  I.e., to rotate A
2226                  by N bits, compute (A << N) | ((unsigned) A >> (C - N))
2227                  where C is the bitsize of A.
2228
2229                  It is theoretically possible that the target machine might
2230                  not be able to perform either shift and hence we would
2231                  be making two libcalls rather than just the one for the
2232                  shift (similarly if IOR could not be done).  We will allow
2233                  this extremely unlikely lossage to avoid complicating the
2234                  code below.  */
2235
2236               rtx subtarget = target == shifted ? 0 : target;
2237               rtx new_amount, other_amount;
2238               rtx temp1;
2239
2240               new_amount = op1;
2241               if (CONST_INT_P (op1))
2242                 other_amount = GEN_INT (GET_MODE_BITSIZE (mode)
2243                                         - INTVAL (op1));
2244               else
2245                 other_amount
2246                   = simplify_gen_binary (MINUS, GET_MODE (op1),
2247                                          GEN_INT (GET_MODE_PRECISION (mode)),
2248                                          op1);
2249
2250               shifted = force_reg (mode, shifted);
2251
2252               temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2253                                      mode, shifted, new_amount, 0, 1);
2254               temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2255                                       mode, shifted, other_amount,
2256                                       subtarget, 1);
2257               return expand_binop (mode, ior_optab, temp, temp1, target,
2258                                    unsignedp, methods);
2259             }
2260
2261           temp = expand_binop (mode,
2262                                left ? lrotate_optab : rrotate_optab,
2263                                shifted, op1, target, unsignedp, methods);
2264         }
2265       else if (unsignedp)
2266         temp = expand_binop (mode,
2267                              left ? lshift_optab : rshift_uns_optab,
2268                              shifted, op1, target, unsignedp, methods);
2269
2270       /* Do arithmetic shifts.
2271          Also, if we are going to widen the operand, we can just as well
2272          use an arithmetic right-shift instead of a logical one.  */
2273       if (temp == 0 && ! rotate
2274           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2275         {
2276           enum optab_methods methods1 = methods;
2277
2278           /* If trying to widen a log shift to an arithmetic shift,
2279              don't accept an arithmetic shift of the same size.  */
2280           if (unsignedp)
2281             methods1 = OPTAB_MUST_WIDEN;
2282
2283           /* Arithmetic shift */
2284
2285           temp = expand_binop (mode,
2286                                left ? lshift_optab : rshift_arith_optab,
2287                                shifted, op1, target, unsignedp, methods1);
2288         }
2289
2290       /* We used to try extzv here for logical right shifts, but that was
2291          only useful for one machine, the VAX, and caused poor code
2292          generation there for lshrdi3, so the code was deleted and a
2293          define_expand for lshrsi3 was added to vax.md.  */
2294     }
2295
2296   gcc_assert (temp);
2297   return temp;
2298 }
2299
2300 /* Output a shift instruction for expression code CODE,
2301    with SHIFTED being the rtx for the value to shift,
2302    and AMOUNT the amount to shift by.
2303    Store the result in the rtx TARGET, if that is convenient.
2304    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2305    Return the rtx for where the value is.  */
2306
2307 rtx
2308 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2309               int amount, rtx target, int unsignedp)
2310 {
2311   return expand_shift_1 (code, mode,
2312                          shifted, GEN_INT (amount), target, unsignedp);
2313 }
2314
2315 /* Output a shift instruction for expression code CODE,
2316    with SHIFTED being the rtx for the value to shift,
2317    and AMOUNT the tree for the amount to shift by.
2318    Store the result in the rtx TARGET, if that is convenient.
2319    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2320    Return the rtx for where the value is.  */
2321
2322 rtx
2323 expand_variable_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2324                        tree amount, rtx target, int unsignedp)
2325 {
2326   return expand_shift_1 (code, mode,
2327                          shifted, expand_normal (amount), target, unsignedp);
2328 }
2329
2330 \f
2331 /* Indicates the type of fixup needed after a constant multiplication.
2332    BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2333    the result should be negated, and ADD_VARIANT means that the
2334    multiplicand should be added to the result.  */
2335 enum mult_variant {basic_variant, negate_variant, add_variant};
2336
2337 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2338                         const struct mult_cost *, enum machine_mode mode);
2339 static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2340                                  struct algorithm *, enum mult_variant *, int);
2341 static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2342                               const struct algorithm *, enum mult_variant);
2343 static unsigned HOST_WIDE_INT choose_multiplier (unsigned HOST_WIDE_INT, int,
2344                                                  int, rtx *, int *, int *);
2345 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2346 static rtx extract_high_half (enum machine_mode, rtx);
2347 static rtx expand_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
2348 static rtx expand_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2349                                        int, int);
2350 /* Compute and return the best algorithm for multiplying by T.
2351    The algorithm must cost less than cost_limit
2352    If retval.cost >= COST_LIMIT, no algorithm was found and all
2353    other field of the returned struct are undefined.
2354    MODE is the machine mode of the multiplication.  */
2355
2356 static void
2357 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2358             const struct mult_cost *cost_limit, enum machine_mode mode)
2359 {
2360   int m;
2361   struct algorithm *alg_in, *best_alg;
2362   struct mult_cost best_cost;
2363   struct mult_cost new_limit;
2364   int op_cost, op_latency;
2365   unsigned HOST_WIDE_INT orig_t = t;
2366   unsigned HOST_WIDE_INT q;
2367   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
2368   int hash_index;
2369   bool cache_hit = false;
2370   enum alg_code cache_alg = alg_zero;
2371   bool speed = optimize_insn_for_speed_p ();
2372
2373   /* Indicate that no algorithm is yet found.  If no algorithm
2374      is found, this value will be returned and indicate failure.  */
2375   alg_out->cost.cost = cost_limit->cost + 1;
2376   alg_out->cost.latency = cost_limit->latency + 1;
2377
2378   if (cost_limit->cost < 0
2379       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2380     return;
2381
2382   /* Restrict the bits of "t" to the multiplication's mode.  */
2383   t &= GET_MODE_MASK (mode);
2384
2385   /* t == 1 can be done in zero cost.  */
2386   if (t == 1)
2387     {
2388       alg_out->ops = 1;
2389       alg_out->cost.cost = 0;
2390       alg_out->cost.latency = 0;
2391       alg_out->op[0] = alg_m;
2392       return;
2393     }
2394
2395   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2396      fail now.  */
2397   if (t == 0)
2398     {
2399       if (MULT_COST_LESS (cost_limit, zero_cost[speed]))
2400         return;
2401       else
2402         {
2403           alg_out->ops = 1;
2404           alg_out->cost.cost = zero_cost[speed];
2405           alg_out->cost.latency = zero_cost[speed];
2406           alg_out->op[0] = alg_zero;
2407           return;
2408         }
2409     }
2410
2411   /* We'll be needing a couple extra algorithm structures now.  */
2412
2413   alg_in = XALLOCA (struct algorithm);
2414   best_alg = XALLOCA (struct algorithm);
2415   best_cost = *cost_limit;
2416
2417   /* Compute the hash index.  */
2418   hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2419
2420   /* See if we already know what to do for T.  */
2421   if (alg_hash[hash_index].t == t
2422       && alg_hash[hash_index].mode == mode
2423       && alg_hash[hash_index].mode == mode
2424       && alg_hash[hash_index].speed == speed
2425       && alg_hash[hash_index].alg != alg_unknown)
2426     {
2427       cache_alg = alg_hash[hash_index].alg;
2428
2429       if (cache_alg == alg_impossible)
2430         {
2431           /* The cache tells us that it's impossible to synthesize
2432              multiplication by T within alg_hash[hash_index].cost.  */
2433           if (!CHEAPER_MULT_COST (&alg_hash[hash_index].cost, cost_limit))
2434             /* COST_LIMIT is at least as restrictive as the one
2435                recorded in the hash table, in which case we have no
2436                hope of synthesizing a multiplication.  Just
2437                return.  */
2438             return;
2439
2440           /* If we get here, COST_LIMIT is less restrictive than the
2441              one recorded in the hash table, so we may be able to
2442              synthesize a multiplication.  Proceed as if we didn't
2443              have the cache entry.  */
2444         }
2445       else
2446         {
2447           if (CHEAPER_MULT_COST (cost_limit, &alg_hash[hash_index].cost))
2448             /* The cached algorithm shows that this multiplication
2449                requires more cost than COST_LIMIT.  Just return.  This
2450                way, we don't clobber this cache entry with
2451                alg_impossible but retain useful information.  */
2452             return;
2453
2454           cache_hit = true;
2455
2456           switch (cache_alg)
2457             {
2458             case alg_shift:
2459               goto do_alg_shift;
2460
2461             case alg_add_t_m2:
2462             case alg_sub_t_m2:
2463               goto do_alg_addsub_t_m2;
2464
2465             case alg_add_factor:
2466             case alg_sub_factor:
2467               goto do_alg_addsub_factor;
2468
2469             case alg_add_t2_m:
2470               goto do_alg_add_t2_m;
2471
2472             case alg_sub_t2_m:
2473               goto do_alg_sub_t2_m;
2474
2475             default:
2476               gcc_unreachable ();
2477             }
2478         }
2479     }
2480
2481   /* If we have a group of zero bits at the low-order part of T, try
2482      multiplying by the remaining bits and then doing a shift.  */
2483
2484   if ((t & 1) == 0)
2485     {
2486     do_alg_shift:
2487       m = floor_log2 (t & -t);  /* m = number of low zero bits */
2488       if (m < maxm)
2489         {
2490           q = t >> m;
2491           /* The function expand_shift will choose between a shift and
2492              a sequence of additions, so the observed cost is given as
2493              MIN (m * add_cost[speed][mode], shift_cost[speed][mode][m]).  */
2494           op_cost = m * add_cost[speed][mode];
2495           if (shift_cost[speed][mode][m] < op_cost)
2496             op_cost = shift_cost[speed][mode][m];
2497           new_limit.cost = best_cost.cost - op_cost;
2498           new_limit.latency = best_cost.latency - op_cost;
2499           synth_mult (alg_in, q, &new_limit, mode);
2500
2501           alg_in->cost.cost += op_cost;
2502           alg_in->cost.latency += op_cost;
2503           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2504             {
2505               struct algorithm *x;
2506               best_cost = alg_in->cost;
2507               x = alg_in, alg_in = best_alg, best_alg = x;
2508               best_alg->log[best_alg->ops] = m;
2509               best_alg->op[best_alg->ops] = alg_shift;
2510             }
2511
2512           /* See if treating ORIG_T as a signed number yields a better
2513              sequence.  Try this sequence only for a negative ORIG_T
2514              as it would be useless for a non-negative ORIG_T.  */
2515           if ((HOST_WIDE_INT) orig_t < 0)
2516             {
2517               /* Shift ORIG_T as follows because a right shift of a
2518                  negative-valued signed type is implementation
2519                  defined.  */
2520               q = ~(~orig_t >> m);
2521               /* The function expand_shift will choose between a shift
2522                  and a sequence of additions, so the observed cost is
2523                  given as MIN (m * add_cost[speed][mode],
2524                  shift_cost[speed][mode][m]).  */
2525               op_cost = m * add_cost[speed][mode];
2526               if (shift_cost[speed][mode][m] < op_cost)
2527                 op_cost = shift_cost[speed][mode][m];
2528               new_limit.cost = best_cost.cost - op_cost;
2529               new_limit.latency = best_cost.latency - op_cost;
2530               synth_mult (alg_in, q, &new_limit, mode);
2531
2532               alg_in->cost.cost += op_cost;
2533               alg_in->cost.latency += op_cost;
2534               if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2535                 {
2536                   struct algorithm *x;
2537                   best_cost = alg_in->cost;
2538                   x = alg_in, alg_in = best_alg, best_alg = x;
2539                   best_alg->log[best_alg->ops] = m;
2540                   best_alg->op[best_alg->ops] = alg_shift;
2541                 }
2542             }
2543         }
2544       if (cache_hit)
2545         goto done;
2546     }
2547
2548   /* If we have an odd number, add or subtract one.  */
2549   if ((t & 1) != 0)
2550     {
2551       unsigned HOST_WIDE_INT w;
2552
2553     do_alg_addsub_t_m2:
2554       for (w = 1; (w & t) != 0; w <<= 1)
2555         ;
2556       /* If T was -1, then W will be zero after the loop.  This is another
2557          case where T ends with ...111.  Handling this with (T + 1) and
2558          subtract 1 produces slightly better code and results in algorithm
2559          selection much faster than treating it like the ...0111 case
2560          below.  */
2561       if (w == 0
2562           || (w > 2
2563               /* Reject the case where t is 3.
2564                  Thus we prefer addition in that case.  */
2565               && t != 3))
2566         {
2567           /* T ends with ...111.  Multiply by (T + 1) and subtract 1.  */
2568
2569           op_cost = add_cost[speed][mode];
2570           new_limit.cost = best_cost.cost - op_cost;
2571           new_limit.latency = best_cost.latency - op_cost;
2572           synth_mult (alg_in, t + 1, &new_limit, mode);
2573
2574           alg_in->cost.cost += op_cost;
2575           alg_in->cost.latency += op_cost;
2576           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2577             {
2578               struct algorithm *x;
2579               best_cost = alg_in->cost;
2580               x = alg_in, alg_in = best_alg, best_alg = x;
2581               best_alg->log[best_alg->ops] = 0;
2582               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2583             }
2584         }
2585       else
2586         {
2587           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add 1.  */
2588
2589           op_cost = add_cost[speed][mode];
2590           new_limit.cost = best_cost.cost - op_cost;
2591           new_limit.latency = best_cost.latency - op_cost;
2592           synth_mult (alg_in, t - 1, &new_limit, mode);
2593
2594           alg_in->cost.cost += op_cost;
2595           alg_in->cost.latency += op_cost;
2596           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2597             {
2598               struct algorithm *x;
2599               best_cost = alg_in->cost;
2600               x = alg_in, alg_in = best_alg, best_alg = x;
2601               best_alg->log[best_alg->ops] = 0;
2602               best_alg->op[best_alg->ops] = alg_add_t_m2;
2603             }
2604         }
2605
2606       /* We may be able to calculate a * -7, a * -15, a * -31, etc
2607          quickly with a - a * n for some appropriate constant n.  */
2608       m = exact_log2 (-orig_t + 1);
2609       if (m >= 0 && m < maxm)
2610         {
2611           op_cost = shiftsub1_cost[speed][mode][m];
2612           new_limit.cost = best_cost.cost - op_cost;
2613           new_limit.latency = best_cost.latency - op_cost;
2614           synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m, &new_limit, mode);
2615
2616           alg_in->cost.cost += op_cost;
2617           alg_in->cost.latency += op_cost;
2618           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2619             {
2620               struct algorithm *x;
2621               best_cost = alg_in->cost;
2622               x = alg_in, alg_in = best_alg, best_alg = x;
2623               best_alg->log[best_alg->ops] = m;
2624               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2625             }
2626         }
2627
2628       if (cache_hit)
2629         goto done;
2630     }
2631
2632   /* Look for factors of t of the form
2633      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2634      If we find such a factor, we can multiply by t using an algorithm that
2635      multiplies by q, shift the result by m and add/subtract it to itself.
2636
2637      We search for large factors first and loop down, even if large factors
2638      are less probable than small; if we find a large factor we will find a
2639      good sequence quickly, and therefore be able to prune (by decreasing
2640      COST_LIMIT) the search.  */
2641
2642  do_alg_addsub_factor:
2643   for (m = floor_log2 (t - 1); m >= 2; m--)
2644     {
2645       unsigned HOST_WIDE_INT d;
2646
2647       d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2648       if (t % d == 0 && t > d && m < maxm
2649           && (!cache_hit || cache_alg == alg_add_factor))
2650         {
2651           /* If the target has a cheap shift-and-add instruction use
2652              that in preference to a shift insn followed by an add insn.
2653              Assume that the shift-and-add is "atomic" with a latency
2654              equal to its cost, otherwise assume that on superscalar
2655              hardware the shift may be executed concurrently with the
2656              earlier steps in the algorithm.  */
2657           op_cost = add_cost[speed][mode] + shift_cost[speed][mode][m];
2658           if (shiftadd_cost[speed][mode][m] < op_cost)
2659             {
2660               op_cost = shiftadd_cost[speed][mode][m];
2661               op_latency = op_cost;
2662             }
2663           else
2664             op_latency = add_cost[speed][mode];
2665
2666           new_limit.cost = best_cost.cost - op_cost;
2667           new_limit.latency = best_cost.latency - op_latency;
2668           synth_mult (alg_in, t / d, &new_limit, mode);
2669
2670           alg_in->cost.cost += op_cost;
2671           alg_in->cost.latency += op_latency;
2672           if (alg_in->cost.latency < op_cost)
2673             alg_in->cost.latency = op_cost;
2674           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2675             {
2676               struct algorithm *x;
2677               best_cost = alg_in->cost;
2678               x = alg_in, alg_in = best_alg, best_alg = x;
2679               best_alg->log[best_alg->ops] = m;
2680               best_alg->op[best_alg->ops] = alg_add_factor;
2681             }
2682           /* Other factors will have been taken care of in the recursion.  */
2683           break;
2684         }
2685
2686       d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2687       if (t % d == 0 && t > d && m < maxm
2688           && (!cache_hit || cache_alg == alg_sub_factor))
2689         {
2690           /* If the target has a cheap shift-and-subtract insn use
2691              that in preference to a shift insn followed by a sub insn.
2692              Assume that the shift-and-sub is "atomic" with a latency
2693              equal to it's cost, otherwise assume that on superscalar
2694              hardware the shift may be executed concurrently with the
2695              earlier steps in the algorithm.  */
2696           op_cost = add_cost[speed][mode] + shift_cost[speed][mode][m];
2697           if (shiftsub0_cost[speed][mode][m] < op_cost)
2698             {
2699               op_cost = shiftsub0_cost[speed][mode][m];
2700               op_latency = op_cost;
2701             }
2702           else
2703             op_latency = add_cost[speed][mode];
2704
2705           new_limit.cost = best_cost.cost - op_cost;
2706           new_limit.latency = best_cost.latency - op_latency;
2707           synth_mult (alg_in, t / d, &new_limit, mode);
2708
2709           alg_in->cost.cost += op_cost;
2710           alg_in->cost.latency += op_latency;
2711           if (alg_in->cost.latency < op_cost)
2712             alg_in->cost.latency = op_cost;
2713           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2714             {
2715               struct algorithm *x;
2716               best_cost = alg_in->cost;
2717               x = alg_in, alg_in = best_alg, best_alg = x;
2718               best_alg->log[best_alg->ops] = m;
2719               best_alg->op[best_alg->ops] = alg_sub_factor;
2720             }
2721           break;
2722         }
2723     }
2724   if (cache_hit)
2725     goto done;
2726
2727   /* Try shift-and-add (load effective address) instructions,
2728      i.e. do a*3, a*5, a*9.  */
2729   if ((t & 1) != 0)
2730     {
2731     do_alg_add_t2_m:
2732       q = t - 1;
2733       q = q & -q;
2734       m = exact_log2 (q);
2735       if (m >= 0 && m < maxm)
2736         {
2737           op_cost = shiftadd_cost[speed][mode][m];
2738           new_limit.cost = best_cost.cost - op_cost;
2739           new_limit.latency = best_cost.latency - op_cost;
2740           synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2741
2742           alg_in->cost.cost += op_cost;
2743           alg_in->cost.latency += op_cost;
2744           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2745             {
2746               struct algorithm *x;
2747               best_cost = alg_in->cost;
2748               x = alg_in, alg_in = best_alg, best_alg = x;
2749               best_alg->log[best_alg->ops] = m;
2750               best_alg->op[best_alg->ops] = alg_add_t2_m;
2751             }
2752         }
2753       if (cache_hit)
2754         goto done;
2755
2756     do_alg_sub_t2_m:
2757       q = t + 1;
2758       q = q & -q;
2759       m = exact_log2 (q);
2760       if (m >= 0 && m < maxm)
2761         {
2762           op_cost = shiftsub0_cost[speed][mode][m];
2763           new_limit.cost = best_cost.cost - op_cost;
2764           new_limit.latency = best_cost.latency - op_cost;
2765           synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2766
2767           alg_in->cost.cost += op_cost;
2768           alg_in->cost.latency += op_cost;
2769           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2770             {
2771               struct algorithm *x;
2772               best_cost = alg_in->cost;
2773               x = alg_in, alg_in = best_alg, best_alg = x;
2774               best_alg->log[best_alg->ops] = m;
2775               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2776             }
2777         }
2778       if (cache_hit)
2779         goto done;
2780     }
2781
2782  done:
2783   /* If best_cost has not decreased, we have not found any algorithm.  */
2784   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2785     {
2786       /* We failed to find an algorithm.  Record alg_impossible for
2787          this case (that is, <T, MODE, COST_LIMIT>) so that next time
2788          we are asked to find an algorithm for T within the same or
2789          lower COST_LIMIT, we can immediately return to the
2790          caller.  */
2791       alg_hash[hash_index].t = t;
2792       alg_hash[hash_index].mode = mode;
2793       alg_hash[hash_index].speed = speed;
2794       alg_hash[hash_index].alg = alg_impossible;
2795       alg_hash[hash_index].cost = *cost_limit;
2796       return;
2797     }
2798
2799   /* Cache the result.  */
2800   if (!cache_hit)
2801     {
2802       alg_hash[hash_index].t = t;
2803       alg_hash[hash_index].mode = mode;
2804       alg_hash[hash_index].speed = speed;
2805       alg_hash[hash_index].alg = best_alg->op[best_alg->ops];
2806       alg_hash[hash_index].cost.cost = best_cost.cost;
2807       alg_hash[hash_index].cost.latency = best_cost.latency;
2808     }
2809
2810   /* If we are getting a too long sequence for `struct algorithm'
2811      to record, make this search fail.  */
2812   if (best_alg->ops == MAX_BITS_PER_WORD)
2813     return;
2814
2815   /* Copy the algorithm from temporary space to the space at alg_out.
2816      We avoid using structure assignment because the majority of
2817      best_alg is normally undefined, and this is a critical function.  */
2818   alg_out->ops = best_alg->ops + 1;
2819   alg_out->cost = best_cost;
2820   memcpy (alg_out->op, best_alg->op,
2821           alg_out->ops * sizeof *alg_out->op);
2822   memcpy (alg_out->log, best_alg->log,
2823           alg_out->ops * sizeof *alg_out->log);
2824 }
2825 \f
2826 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2827    Try three variations:
2828
2829        - a shift/add sequence based on VAL itself
2830        - a shift/add sequence based on -VAL, followed by a negation
2831        - a shift/add sequence based on VAL - 1, followed by an addition.
2832
2833    Return true if the cheapest of these cost less than MULT_COST,
2834    describing the algorithm in *ALG and final fixup in *VARIANT.  */
2835
2836 static bool
2837 choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2838                      struct algorithm *alg, enum mult_variant *variant,
2839                      int mult_cost)
2840 {
2841   struct algorithm alg2;
2842   struct mult_cost limit;
2843   int op_cost;
2844   bool speed = optimize_insn_for_speed_p ();
2845
2846   /* Fail quickly for impossible bounds.  */
2847   if (mult_cost < 0)
2848     return false;
2849
2850   /* Ensure that mult_cost provides a reasonable upper bound.
2851      Any constant multiplication can be performed with less
2852      than 2 * bits additions.  */
2853   op_cost = 2 * GET_MODE_BITSIZE (mode) * add_cost[speed][mode];
2854   if (mult_cost > op_cost)
2855     mult_cost = op_cost;
2856
2857   *variant = basic_variant;
2858   limit.cost = mult_cost;
2859   limit.latency = mult_cost;
2860   synth_mult (alg, val, &limit, mode);
2861
2862   /* This works only if the inverted value actually fits in an
2863      `unsigned int' */
2864   if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
2865     {
2866       op_cost = neg_cost[speed][mode];
2867       if (MULT_COST_LESS (&alg->cost, mult_cost))
2868         {
2869           limit.cost = alg->cost.cost - op_cost;
2870           limit.latency = alg->cost.latency - op_cost;
2871         }
2872       else
2873         {
2874           limit.cost = mult_cost - op_cost;
2875           limit.latency = mult_cost - op_cost;
2876         }
2877
2878       synth_mult (&alg2, -val, &limit, mode);
2879       alg2.cost.cost += op_cost;
2880       alg2.cost.latency += op_cost;
2881       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2882         *alg = alg2, *variant = negate_variant;
2883     }
2884
2885   /* This proves very useful for division-by-constant.  */
2886   op_cost = add_cost[speed][mode];
2887   if (MULT_COST_LESS (&alg->cost, mult_cost))
2888     {
2889       limit.cost = alg->cost.cost - op_cost;
2890       limit.latency = alg->cost.latency - op_cost;
2891     }
2892   else
2893     {
2894       limit.cost = mult_cost - op_cost;
2895       limit.latency = mult_cost - op_cost;
2896     }
2897
2898   synth_mult (&alg2, val - 1, &limit, mode);
2899   alg2.cost.cost += op_cost;
2900   alg2.cost.latency += op_cost;
2901   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2902     *alg = alg2, *variant = add_variant;
2903
2904   return MULT_COST_LESS (&alg->cost, mult_cost);
2905 }
2906
2907 /* A subroutine of expand_mult, used for constant multiplications.
2908    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2909    convenient.  Use the shift/add sequence described by ALG and apply
2910    the final fixup specified by VARIANT.  */
2911
2912 static rtx
2913 expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
2914                    rtx target, const struct algorithm *alg,
2915                    enum mult_variant variant)
2916 {
2917   HOST_WIDE_INT val_so_far;
2918   rtx insn, accum, tem;
2919   int opno;
2920   enum machine_mode nmode;
2921
2922   /* Avoid referencing memory over and over and invalid sharing
2923      on SUBREGs.  */
2924   op0 = force_reg (mode, op0);
2925
2926   /* ACCUM starts out either as OP0 or as a zero, depending on
2927      the first operation.  */
2928
2929   if (alg->op[0] == alg_zero)
2930     {
2931       accum = copy_to_mode_reg (mode, const0_rtx);
2932       val_so_far = 0;
2933     }
2934   else if (alg->op[0] == alg_m)
2935     {
2936       accum = copy_to_mode_reg (mode, op0);
2937       val_so_far = 1;
2938     }
2939   else
2940     gcc_unreachable ();
2941
2942   for (opno = 1; opno < alg->ops; opno++)
2943     {
2944       int log = alg->log[opno];
2945       rtx shift_subtarget = optimize ? 0 : accum;
2946       rtx add_target
2947         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2948            && !optimize)
2949           ? target : 0;
2950       rtx accum_target = optimize ? 0 : accum;
2951
2952       switch (alg->op[opno])
2953         {
2954         case alg_shift:
2955           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2956           /* REG_EQUAL note will be attached to the following insn.  */
2957           emit_move_insn (accum, tem);
2958           val_so_far <<= log;
2959           break;
2960
2961         case alg_add_t_m2:
2962           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
2963           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2964                                  add_target ? add_target : accum_target);
2965           val_so_far += (HOST_WIDE_INT) 1 << log;
2966           break;
2967
2968         case alg_sub_t_m2:
2969           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
2970           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2971                                  add_target ? add_target : accum_target);
2972           val_so_far -= (HOST_WIDE_INT) 1 << log;
2973           break;
2974
2975         case alg_add_t2_m:
2976           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2977                                 log, shift_subtarget, 0);
2978           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
2979                                  add_target ? add_target : accum_target);
2980           val_so_far = (val_so_far << log) + 1;
2981           break;
2982
2983         case alg_sub_t2_m:
2984           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2985                                 log, shift_subtarget, 0);
2986           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
2987                                  add_target ? add_target : accum_target);
2988           val_so_far = (val_so_far << log) - 1;
2989           break;
2990
2991         case alg_add_factor:
2992           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2993           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2994                                  add_target ? add_target : accum_target);
2995           val_so_far += val_so_far << log;
2996           break;
2997
2998         case alg_sub_factor:
2999           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3000           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3001                                  (add_target
3002                                   ? add_target : (optimize ? 0 : tem)));
3003           val_so_far = (val_so_far << log) - val_so_far;
3004           break;
3005
3006         default:
3007           gcc_unreachable ();
3008         }
3009
3010       /* Write a REG_EQUAL note on the last insn so that we can cse
3011          multiplication sequences.  Note that if ACCUM is a SUBREG,
3012          we've set the inner register and must properly indicate
3013          that.  */
3014
3015       tem = op0, nmode = mode;
3016       if (GET_CODE (accum) == SUBREG)
3017         {
3018           nmode = GET_MODE (SUBREG_REG (accum));
3019           tem = gen_lowpart (nmode, op0);
3020         }
3021
3022       insn = get_last_insn ();
3023       set_unique_reg_note (insn, REG_EQUAL,
3024                            gen_rtx_MULT (nmode, tem,
3025                                          GEN_INT (val_so_far)));
3026     }
3027
3028   if (variant == negate_variant)
3029     {
3030       val_so_far = -val_so_far;
3031       accum = expand_unop (mode, neg_optab, accum, target, 0);
3032     }
3033   else if (variant == add_variant)
3034     {
3035       val_so_far = val_so_far + 1;
3036       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3037     }
3038
3039   /* Compare only the bits of val and val_so_far that are significant
3040      in the result mode, to avoid sign-/zero-extension confusion.  */
3041   val &= GET_MODE_MASK (mode);
3042   val_so_far &= GET_MODE_MASK (mode);
3043   gcc_assert (val == val_so_far);
3044
3045   return accum;
3046 }
3047
3048 /* Perform a multiplication and return an rtx for the result.
3049    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3050    TARGET is a suggestion for where to store the result (an rtx).
3051
3052    We check specially for a constant integer as OP1.
3053    If you want this check for OP0 as well, then before calling
3054    you should swap the two operands if OP0 would be constant.  */
3055
3056 rtx
3057 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3058              int unsignedp)
3059 {
3060   enum mult_variant variant;
3061   struct algorithm algorithm;
3062   int max_cost;
3063   bool speed = optimize_insn_for_speed_p ();
3064
3065   /* Handling const0_rtx here allows us to use zero as a rogue value for
3066      coeff below.  */
3067   if (op1 == const0_rtx)
3068     return const0_rtx;
3069   if (op1 == const1_rtx)
3070     return op0;
3071   if (op1 == constm1_rtx)
3072     return expand_unop (mode,
3073                         GET_MODE_CLASS (mode) == MODE_INT
3074                         && !unsignedp && flag_trapv
3075                         ? negv_optab : neg_optab,
3076                         op0, target, 0);
3077
3078   /* These are the operations that are potentially turned into a sequence
3079      of shifts and additions.  */
3080   if (SCALAR_INT_MODE_P (mode)
3081       && (unsignedp || !flag_trapv))
3082     {
3083       HOST_WIDE_INT coeff = 0;
3084       rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3085
3086       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3087          less than or equal in size to `unsigned int' this doesn't matter.
3088          If the mode is larger than `unsigned int', then synth_mult works
3089          only if the constant value exactly fits in an `unsigned int' without
3090          any truncation.  This means that multiplying by negative values does
3091          not work; results are off by 2^32 on a 32 bit machine.  */
3092
3093       if (CONST_INT_P (op1))
3094         {
3095           /* Attempt to handle multiplication of DImode values by negative
3096              coefficients, by performing the multiplication by a positive
3097              multiplier and then inverting the result.  */
3098           if (INTVAL (op1) < 0
3099               && GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT)
3100             {
3101               /* Its safe to use -INTVAL (op1) even for INT_MIN, as the
3102                  result is interpreted as an unsigned coefficient.
3103                  Exclude cost of op0 from max_cost to match the cost
3104                  calculation of the synth_mult.  */
3105               max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1),
3106                                         speed)
3107                           - neg_cost[speed][mode]);
3108               if (max_cost > 0
3109                   && choose_mult_variant (mode, -INTVAL (op1), &algorithm,
3110                                           &variant, max_cost))
3111                 {
3112                   rtx temp = expand_mult_const (mode, op0, -INTVAL (op1),
3113                                                 NULL_RTX, &algorithm,
3114                                                 variant);
3115                   return expand_unop (mode, neg_optab, temp, target, 0);
3116                 }
3117             }
3118           else coeff = INTVAL (op1);
3119         }
3120       else if (GET_CODE (op1) == CONST_DOUBLE)
3121         {
3122           /* If we are multiplying in DImode, it may still be a win
3123              to try to work with shifts and adds.  */
3124           if (CONST_DOUBLE_HIGH (op1) == 0
3125               && CONST_DOUBLE_LOW (op1) > 0)
3126             coeff = CONST_DOUBLE_LOW (op1);
3127           else if (CONST_DOUBLE_LOW (op1) == 0
3128                    && EXACT_POWER_OF_2_OR_ZERO_P (CONST_DOUBLE_HIGH (op1)))
3129             {
3130               int shift = floor_log2 (CONST_DOUBLE_HIGH (op1))
3131                           + HOST_BITS_PER_WIDE_INT;
3132               return expand_shift (LSHIFT_EXPR, mode, op0,
3133                                    shift, target, unsignedp);
3134             }
3135         }
3136
3137       /* We used to test optimize here, on the grounds that it's better to
3138          produce a smaller program when -O is not used.  But this causes
3139          such a terrible slowdown sometimes that it seems better to always
3140          use synth_mult.  */
3141       if (coeff != 0)
3142         {
3143           /* Special case powers of two.  */
3144           if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3145             return expand_shift (LSHIFT_EXPR, mode, op0,
3146                                  floor_log2 (coeff), target, unsignedp);
3147
3148           /* Exclude cost of op0 from max_cost to match the cost
3149              calculation of the synth_mult.  */
3150           max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed);
3151           if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3152                                    max_cost))
3153             return expand_mult_const (mode, op0, coeff, target,
3154                                       &algorithm, variant);
3155         }
3156     }
3157
3158   if (GET_CODE (op0) == CONST_DOUBLE)
3159     {
3160       rtx temp = op0;
3161       op0 = op1;
3162       op1 = temp;
3163     }
3164
3165   /* Expand x*2.0 as x+x.  */
3166   if (GET_CODE (op1) == CONST_DOUBLE
3167       && SCALAR_FLOAT_MODE_P (mode))
3168     {
3169       REAL_VALUE_TYPE d;
3170       REAL_VALUE_FROM_CONST_DOUBLE (d, op1);
3171
3172       if (REAL_VALUES_EQUAL (d, dconst2))
3173         {
3174           op0 = force_reg (GET_MODE (op0), op0);
3175           return expand_binop (mode, add_optab, op0, op0,
3176                                target, unsignedp, OPTAB_LIB_WIDEN);
3177         }
3178     }
3179
3180   /* This used to use umul_optab if unsigned, but for non-widening multiply
3181      there is no difference between signed and unsigned.  */
3182   op0 = expand_binop (mode,
3183                       ! unsignedp
3184                       && flag_trapv && (GET_MODE_CLASS(mode) == MODE_INT)
3185                       ? smulv_optab : smul_optab,
3186                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3187   gcc_assert (op0);
3188   return op0;
3189 }
3190
3191 /* Perform a widening multiplication and return an rtx for the result.
3192    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3193    TARGET is a suggestion for where to store the result (an rtx).
3194    THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3195    or smul_widen_optab.
3196
3197    We check specially for a constant integer as OP1, comparing the
3198    cost of a widening multiply against the cost of a sequence of shifts
3199    and adds.  */
3200
3201 rtx
3202 expand_widening_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3203                       int unsignedp, optab this_optab)
3204 {
3205   bool speed = optimize_insn_for_speed_p ();
3206   rtx cop1;
3207
3208   if (CONST_INT_P (op1)
3209       && GET_MODE (op0) != VOIDmode
3210       && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3211                                 this_optab == umul_widen_optab))
3212       && CONST_INT_P (cop1)
3213       && (INTVAL (cop1) >= 0
3214           || HWI_COMPUTABLE_MODE_P (mode)))
3215     {
3216       HOST_WIDE_INT coeff = INTVAL (cop1);
3217       int max_cost;
3218       enum mult_variant variant;
3219       struct algorithm algorithm;
3220
3221       /* Special case powers of two.  */
3222       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3223         {
3224           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3225           return expand_shift (LSHIFT_EXPR, mode, op0,
3226                                floor_log2 (coeff), target, unsignedp);
3227         }
3228
3229       /* Exclude cost of op0 from max_cost to match the cost
3230          calculation of the synth_mult.  */
3231       max_cost = mul_widen_cost[speed][mode];
3232       if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3233                                max_cost))
3234         {
3235           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3236           return expand_mult_const (mode, op0, coeff, target,
3237                                     &algorithm, variant);
3238         }
3239     }
3240   return expand_binop (mode, this_optab, op0, op1, target,
3241                        unsignedp, OPTAB_LIB_WIDEN);
3242 }
3243 \f
3244 /* Return the smallest n such that 2**n >= X.  */
3245
3246 int
3247 ceil_log2 (unsigned HOST_WIDE_INT x)
3248 {
3249   return floor_log2 (x - 1) + 1;
3250 }
3251
3252 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3253    replace division by D, and put the least significant N bits of the result
3254    in *MULTIPLIER_PTR and return the most significant bit.
3255
3256    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3257    needed precision is in PRECISION (should be <= N).
3258
3259    PRECISION should be as small as possible so this function can choose
3260    multiplier more freely.
3261
3262    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3263    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3264
3265    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3266    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3267
3268 static
3269 unsigned HOST_WIDE_INT
3270 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3271                    rtx *multiplier_ptr, int *post_shift_ptr, int *lgup_ptr)
3272 {
3273   HOST_WIDE_INT mhigh_hi, mlow_hi;
3274   unsigned HOST_WIDE_INT mhigh_lo, mlow_lo;
3275   int lgup, post_shift;
3276   int pow, pow2;
3277   unsigned HOST_WIDE_INT nl, dummy1;
3278   HOST_WIDE_INT nh, dummy2;
3279
3280   /* lgup = ceil(log2(divisor)); */
3281   lgup = ceil_log2 (d);
3282
3283   gcc_assert (lgup <= n);
3284
3285   pow = n + lgup;
3286   pow2 = n + lgup - precision;
3287
3288   /* We could handle this with some effort, but this case is much
3289      better handled directly with a scc insn, so rely on caller using
3290      that.  */
3291   gcc_assert (pow != 2 * HOST_BITS_PER_WIDE_INT);
3292
3293   /* mlow = 2^(N + lgup)/d */
3294  if (pow >= HOST_BITS_PER_WIDE_INT)
3295     {
3296       nh = (HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT);
3297       nl = 0;
3298     }
3299   else
3300     {
3301       nh = 0;
3302       nl = (unsigned HOST_WIDE_INT) 1 << pow;
3303     }
3304   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3305                         &mlow_lo, &mlow_hi, &dummy1, &dummy2);
3306
3307   /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */
3308   if (pow2 >= HOST_BITS_PER_WIDE_INT)
3309     nh |= (HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT);
3310   else
3311     nl |= (unsigned HOST_WIDE_INT) 1 << pow2;
3312   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3313                         &mhigh_lo, &mhigh_hi, &dummy1, &dummy2);
3314
3315   gcc_assert (!mhigh_hi || nh - d < d);
3316   gcc_assert (mhigh_hi <= 1 && mlow_hi <= 1);
3317   /* Assert that mlow < mhigh.  */
3318   gcc_assert (mlow_hi < mhigh_hi
3319               || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo));
3320
3321   /* If precision == N, then mlow, mhigh exceed 2^N
3322      (but they do not exceed 2^(N+1)).  */
3323
3324   /* Reduce to lowest terms.  */
3325   for (post_shift = lgup; post_shift > 0; post_shift--)
3326     {
3327       unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1);
3328       unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1);
3329       if (ml_lo >= mh_lo)
3330         break;
3331
3332       mlow_hi = 0;
3333       mlow_lo = ml_lo;
3334       mhigh_hi = 0;
3335       mhigh_lo = mh_lo;
3336     }
3337
3338   *post_shift_ptr = post_shift;
3339   *lgup_ptr = lgup;
3340   if (n < HOST_BITS_PER_WIDE_INT)
3341     {
3342       unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3343       *multiplier_ptr = GEN_INT (mhigh_lo & mask);
3344       return mhigh_lo >= mask;
3345     }
3346   else
3347     {
3348       *multiplier_ptr = GEN_INT (mhigh_lo);
3349       return mhigh_hi;
3350     }
3351 }
3352
3353 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3354    congruent to 1 (mod 2**N).  */
3355
3356 static unsigned HOST_WIDE_INT
3357 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3358 {
3359   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3360
3361   /* The algorithm notes that the choice y = x satisfies
3362      x*y == 1 mod 2^3, since x is assumed odd.
3363      Each iteration doubles the number of bits of significance in y.  */
3364
3365   unsigned HOST_WIDE_INT mask;
3366   unsigned HOST_WIDE_INT y = x;
3367   int nbit = 3;
3368
3369   mask = (n == HOST_BITS_PER_WIDE_INT
3370           ? ~(unsigned HOST_WIDE_INT) 0
3371           : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3372
3373   while (nbit < n)
3374     {
3375       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3376       nbit *= 2;
3377     }
3378   return y;
3379 }
3380
3381 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3382    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3383    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3384    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3385    become signed.
3386
3387    The result is put in TARGET if that is convenient.
3388
3389    MODE is the mode of operation.  */
3390
3391 rtx
3392 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
3393                              rtx op1, rtx target, int unsignedp)
3394 {
3395   rtx tem;
3396   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3397
3398   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3399                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3400   tem = expand_and (mode, tem, op1, NULL_RTX);
3401   adj_operand
3402     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3403                      adj_operand);
3404
3405   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3406                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3407   tem = expand_and (mode, tem, op0, NULL_RTX);
3408   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3409                           target);
3410
3411   return target;
3412 }
3413
3414 /* Subroutine of expand_mult_highpart.  Return the MODE high part of OP.  */
3415
3416 static rtx
3417 extract_high_half (enum machine_mode mode, rtx op)
3418 {
3419   enum machine_mode wider_mode;
3420
3421   if (mode == word_mode)
3422     return gen_highpart (mode, op);
3423
3424   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3425
3426   wider_mode = GET_MODE_WIDER_MODE (mode);
3427   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3428                      GET_MODE_BITSIZE (mode), 0, 1);
3429   return convert_modes (mode, wider_mode, op, 0);
3430 }
3431
3432 /* Like expand_mult_highpart, but only consider using a multiplication
3433    optab.  OP1 is an rtx for the constant operand.  */
3434
3435 static rtx
3436 expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
3437                             rtx target, int unsignedp, int max_cost)
3438 {
3439   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3440   enum machine_mode wider_mode;
3441   optab moptab;
3442   rtx tem;
3443   int size;
3444   bool speed = optimize_insn_for_speed_p ();
3445
3446   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3447
3448   wider_mode = GET_MODE_WIDER_MODE (mode);
3449   size = GET_MODE_BITSIZE (mode);
3450
3451   /* Firstly, try using a multiplication insn that only generates the needed
3452      high part of the product, and in the sign flavor of unsignedp.  */
3453   if (mul_highpart_cost[speed][mode] < max_cost)
3454     {
3455       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3456       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3457                           unsignedp, OPTAB_DIRECT);
3458       if (tem)
3459         return tem;
3460     }
3461
3462   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3463      Need to adjust the result after the multiplication.  */
3464   if (size - 1 < BITS_PER_WORD
3465       && (mul_highpart_cost[speed][mode] + 2 * shift_cost[speed][mode][size-1]
3466           + 4 * add_cost[speed][mode] < max_cost))
3467     {
3468       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3469       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3470                           unsignedp, OPTAB_DIRECT);
3471       if (tem)
3472         /* We used the wrong signedness.  Adjust the result.  */
3473         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3474                                             tem, unsignedp);
3475     }
3476
3477   /* Try widening multiplication.  */
3478   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3479   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3480       && mul_widen_cost[speed][wider_mode] < max_cost)
3481     {
3482       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3483                           unsignedp, OPTAB_WIDEN);
3484       if (tem)
3485         return extract_high_half (mode, tem);
3486     }
3487
3488   /* Try widening the mode and perform a non-widening multiplication.  */
3489   if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3490       && size - 1 < BITS_PER_WORD
3491       && mul_cost[speed][wider_mode] + shift_cost[speed][mode][size-1] < max_cost)
3492     {
3493       rtx insns, wop0, wop1;
3494
3495       /* We need to widen the operands, for example to ensure the
3496          constant multiplier is correctly sign or zero extended.
3497          Use a sequence to clean-up any instructions emitted by
3498          the conversions if things don't work out.  */
3499       start_sequence ();
3500       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3501       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3502       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3503                           unsignedp, OPTAB_WIDEN);
3504       insns = get_insns ();
3505       end_sequence ();
3506
3507       if (tem)
3508         {
3509           emit_insn (insns);
3510           return extract_high_half (mode, tem);
3511         }
3512     }
3513
3514   /* Try widening multiplication of opposite signedness, and adjust.  */
3515   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3516   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3517       && size - 1 < BITS_PER_WORD
3518       && (mul_widen_cost[speed][wider_mode] + 2 * shift_cost[speed][mode][size-1]
3519           + 4 * add_cost[speed][mode] < max_cost))
3520     {
3521       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3522                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3523       if (tem != 0)
3524         {
3525           tem = extract_high_half (mode, tem);
3526           /* We used the wrong signedness.  Adjust the result.  */
3527           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3528                                               target, unsignedp);
3529         }
3530     }
3531
3532   return 0;
3533 }
3534
3535 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3536    putting the high half of the result in TARGET if that is convenient,
3537    and return where the result is.  If the operation can not be performed,
3538    0 is returned.
3539
3540    MODE is the mode of operation and result.
3541
3542    UNSIGNEDP nonzero means unsigned multiply.
3543
3544    MAX_COST is the total allowed cost for the expanded RTL.  */
3545
3546 static rtx
3547 expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
3548                       rtx target, int unsignedp, int max_cost)
3549 {
3550   enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3551   unsigned HOST_WIDE_INT cnst1;
3552   int extra_cost;
3553   bool sign_adjust = false;
3554   enum mult_variant variant;
3555   struct algorithm alg;
3556   rtx tem;
3557   bool speed = optimize_insn_for_speed_p ();
3558
3559   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3560   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3561   gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3562
3563   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3564
3565   /* We can't optimize modes wider than BITS_PER_WORD.
3566      ??? We might be able to perform double-word arithmetic if
3567      mode == word_mode, however all the cost calculations in
3568      synth_mult etc. assume single-word operations.  */
3569   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3570     return expand_mult_highpart_optab (mode, op0, op1, target,
3571                                        unsignedp, max_cost);
3572
3573   extra_cost = shift_cost[speed][mode][GET_MODE_BITSIZE (mode) - 1];
3574
3575   /* Check whether we try to multiply by a negative constant.  */
3576   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3577     {
3578       sign_adjust = true;
3579       extra_cost += add_cost[speed][mode];
3580     }
3581
3582   /* See whether shift/add multiplication is cheap enough.  */
3583   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3584                            max_cost - extra_cost))
3585     {
3586       /* See whether the specialized multiplication optabs are
3587          cheaper than the shift/add version.  */
3588       tem = expand_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3589                                         alg.cost.cost + extra_cost);
3590       if (tem)
3591         return tem;
3592
3593       tem = convert_to_mode (wider_mode, op0, unsignedp);
3594       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3595       tem = extract_high_half (mode, tem);
3596
3597       /* Adjust result for signedness.  */
3598       if (sign_adjust)
3599         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3600
3601       return tem;
3602     }
3603   return expand_mult_highpart_optab (mode, op0, op1, target,
3604                                      unsignedp, max_cost);
3605 }
3606
3607
3608 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3609
3610 static rtx
3611 expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3612 {
3613   unsigned HOST_WIDE_INT masklow, maskhigh;
3614   rtx result, temp, shift, label;
3615   int logd;
3616
3617   logd = floor_log2 (d);
3618   result = gen_reg_rtx (mode);
3619
3620   /* Avoid conditional branches when they're expensive.  */
3621   if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3622       && optimize_insn_for_speed_p ())
3623     {
3624       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3625                                       mode, 0, -1);
3626       if (signmask)
3627         {
3628           signmask = force_reg (mode, signmask);
3629           masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3630           shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3631
3632           /* Use the rtx_cost of a LSHIFTRT instruction to determine
3633              which instruction sequence to use.  If logical right shifts
3634              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3635              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3636
3637           temp = gen_rtx_LSHIFTRT (mode, result, shift);
3638           if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3639               || (set_src_cost (temp, optimize_insn_for_speed_p ())
3640                   > COSTS_N_INSNS (2)))
3641             {
3642               temp = expand_binop (mode, xor_optab, op0, signmask,
3643                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3644               temp = expand_binop (mode, sub_optab, temp, signmask,
3645                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3646               temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3647                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3648               temp = expand_binop (mode, xor_optab, temp, signmask,
3649                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3650               temp = expand_binop (mode, sub_optab, temp, signmask,
3651                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3652             }
3653           else
3654             {
3655               signmask = expand_binop (mode, lshr_optab, signmask, shift,
3656                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
3657               signmask = force_reg (mode, signmask);
3658
3659               temp = expand_binop (mode, add_optab, op0, signmask,
3660                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3661               temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3662                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3663               temp = expand_binop (mode, sub_optab, temp, signmask,
3664                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3665             }
3666           return temp;
3667         }
3668     }
3669
3670   /* Mask contains the mode's signbit and the significant bits of the
3671      modulus.  By including the signbit in the operation, many targets
3672      can avoid an explicit compare operation in the following comparison
3673      against zero.  */
3674
3675   masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3676   if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
3677     {
3678       masklow |= (HOST_WIDE_INT) -1 << (GET_MODE_BITSIZE (mode) - 1);
3679       maskhigh = -1;
3680     }
3681   else
3682     maskhigh = (HOST_WIDE_INT) -1
3683                  << (GET_MODE_BITSIZE (mode) - HOST_BITS_PER_WIDE_INT - 1);
3684
3685   temp = expand_binop (mode, and_optab, op0,
3686                        immed_double_const (masklow, maskhigh, mode),
3687                        result, 1, OPTAB_LIB_WIDEN);
3688   if (temp != result)
3689     emit_move_insn (result, temp);
3690
3691   label = gen_label_rtx ();
3692   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3693
3694   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3695                        0, OPTAB_LIB_WIDEN);
3696   masklow = (HOST_WIDE_INT) -1 << logd;
3697   maskhigh = -1;
3698   temp = expand_binop (mode, ior_optab, temp,
3699                        immed_double_const (masklow, maskhigh, mode),
3700                        result, 1, OPTAB_LIB_WIDEN);
3701   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3702                        0, OPTAB_LIB_WIDEN);
3703   if (temp != result)
3704     emit_move_insn (result, temp);
3705   emit_label (label);
3706   return result;
3707 }
3708
3709 /* Expand signed division of OP0 by a power of two D in mode MODE.
3710    This routine is only called for positive values of D.  */
3711
3712 static rtx
3713 expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3714 {
3715   rtx temp, label;
3716   int logd;
3717
3718   logd = floor_log2 (d);
3719
3720   if (d == 2
3721       && BRANCH_COST (optimize_insn_for_speed_p (),
3722                       false) >= 1)
3723     {
3724       temp = gen_reg_rtx (mode);
3725       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3726       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3727                            0, OPTAB_LIB_WIDEN);
3728       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3729     }
3730
3731 #ifdef HAVE_conditional_move
3732   if (BRANCH_COST (optimize_insn_for_speed_p (), false)
3733       >= 2)
3734     {
3735       rtx temp2;
3736
3737       /* ??? emit_conditional_move forces a stack adjustment via
3738          compare_from_rtx so, if the sequence is discarded, it will
3739          be lost.  Do it now instead.  */
3740       do_pending_stack_adjust ();
3741
3742       start_sequence ();
3743       temp2 = copy_to_mode_reg (mode, op0);
3744       temp = expand_binop (mode, add_optab, temp2, GEN_INT (d-1),
3745                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
3746       temp = force_reg (mode, temp);
3747
3748       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
3749       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3750                                      mode, temp, temp2, mode, 0);
3751       if (temp2)
3752         {
3753           rtx seq = get_insns ();
3754           end_sequence ();
3755           emit_insn (seq);
3756           return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3757         }
3758       end_sequence ();
3759     }
3760 #endif
3761
3762   if (BRANCH_COST (optimize_insn_for_speed_p (),
3763                    false) >= 2)
3764     {
3765       int ushift = GET_MODE_BITSIZE (mode) - logd;
3766
3767       temp = gen_reg_rtx (mode);
3768       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3769       if (shift_cost[optimize_insn_for_speed_p ()][mode][ushift] > COSTS_N_INSNS (1))
3770         temp = expand_binop (mode, and_optab, temp, GEN_INT (d - 1),
3771                              NULL_RTX, 0, OPTAB_LIB_WIDEN);
3772       else
3773         temp = expand_shift (RSHIFT_EXPR, mode, temp,
3774                              ushift, NULL_RTX, 1);
3775       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3776                            0, OPTAB_LIB_WIDEN);
3777       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3778     }
3779
3780   label = gen_label_rtx ();
3781   temp = copy_to_mode_reg (mode, op0);
3782   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3783   expand_inc (temp, GEN_INT (d - 1));
3784   emit_label (label);
3785   return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3786 }
3787 \f
3788 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3789    if that is convenient, and returning where the result is.
3790    You may request either the quotient or the remainder as the result;
3791    specify REM_FLAG nonzero to get the remainder.
3792
3793    CODE is the expression code for which kind of division this is;
3794    it controls how rounding is done.  MODE is the machine mode to use.
3795    UNSIGNEDP nonzero means do unsigned division.  */
3796
3797 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3798    and then correct it by or'ing in missing high bits
3799    if result of ANDI is nonzero.
3800    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3801    This could optimize to a bfexts instruction.
3802    But C doesn't use these operations, so their optimizations are
3803    left for later.  */
3804 /* ??? For modulo, we don't actually need the highpart of the first product,
3805    the low part will do nicely.  And for small divisors, the second multiply
3806    can also be a low-part only multiply or even be completely left out.
3807    E.g. to calculate the remainder of a division by 3 with a 32 bit
3808    multiply, multiply with 0x55555556 and extract the upper two bits;
3809    the result is exact for inputs up to 0x1fffffff.
3810    The input range can be reduced by using cross-sum rules.
3811    For odd divisors >= 3, the following table gives right shift counts
3812    so that if a number is shifted by an integer multiple of the given
3813    amount, the remainder stays the same:
3814    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3815    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3816    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3817    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3818    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3819
3820    Cross-sum rules for even numbers can be derived by leaving as many bits
3821    to the right alone as the divisor has zeros to the right.
3822    E.g. if x is an unsigned 32 bit number:
3823    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3824    */
3825
3826 rtx
3827 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3828                rtx op0, rtx op1, rtx target, int unsignedp)
3829 {
3830   enum machine_mode compute_mode;
3831   rtx tquotient;
3832   rtx quotient = 0, remainder = 0;
3833   rtx last;
3834   int size;
3835   rtx insn, set;
3836   optab optab1, optab2;
3837   int op1_is_constant, op1_is_pow2 = 0;
3838   int max_cost, extra_cost;
3839   static HOST_WIDE_INT last_div_const = 0;
3840   static HOST_WIDE_INT ext_op1;
3841   bool speed = optimize_insn_for_speed_p ();
3842
3843   op1_is_constant = CONST_INT_P (op1);
3844   if (op1_is_constant)
3845     {
3846       ext_op1 = INTVAL (op1);
3847       if (unsignedp)
3848         ext_op1 &= GET_MODE_MASK (mode);
3849       op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3850                      || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3851     }
3852
3853   /*
3854      This is the structure of expand_divmod:
3855
3856      First comes code to fix up the operands so we can perform the operations
3857      correctly and efficiently.
3858
3859      Second comes a switch statement with code specific for each rounding mode.
3860      For some special operands this code emits all RTL for the desired
3861      operation, for other cases, it generates only a quotient and stores it in
3862      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
3863      to indicate that it has not done anything.
3864
3865      Last comes code that finishes the operation.  If QUOTIENT is set and
3866      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
3867      QUOTIENT is not set, it is computed using trunc rounding.
3868
3869      We try to generate special code for division and remainder when OP1 is a
3870      constant.  If |OP1| = 2**n we can use shifts and some other fast
3871      operations.  For other values of OP1, we compute a carefully selected
3872      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3873      by m.
3874
3875      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3876      half of the product.  Different strategies for generating the product are
3877      implemented in expand_mult_highpart.
3878
3879      If what we actually want is the remainder, we generate that by another
3880      by-constant multiplication and a subtraction.  */
3881
3882   /* We shouldn't be called with OP1 == const1_rtx, but some of the
3883      code below will malfunction if we are, so check here and handle
3884      the special case if so.  */
3885   if (op1 == const1_rtx)
3886     return rem_flag ? const0_rtx : op0;
3887
3888     /* When dividing by -1, we could get an overflow.
3889      negv_optab can handle overflows.  */
3890   if (! unsignedp && op1 == constm1_rtx)
3891     {
3892       if (rem_flag)
3893         return const0_rtx;
3894       return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
3895                           ? negv_optab : neg_optab, op0, target, 0);
3896     }
3897
3898   if (target
3899       /* Don't use the function value register as a target
3900          since we have to read it as well as write it,
3901          and function-inlining gets confused by this.  */
3902       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3903           /* Don't clobber an operand while doing a multi-step calculation.  */
3904           || ((rem_flag || op1_is_constant)
3905               && (reg_mentioned_p (target, op0)
3906                   || (MEM_P (op0) && MEM_P (target))))
3907           || reg_mentioned_p (target, op1)
3908           || (MEM_P (op1) && MEM_P (target))))
3909     target = 0;
3910
3911   /* Get the mode in which to perform this computation.  Normally it will
3912      be MODE, but sometimes we can't do the desired operation in MODE.
3913      If so, pick a wider mode in which we can do the operation.  Convert
3914      to that mode at the start to avoid repeated conversions.
3915
3916      First see what operations we need.  These depend on the expression
3917      we are evaluating.  (We assume that divxx3 insns exist under the
3918      same conditions that modxx3 insns and that these insns don't normally
3919      fail.  If these assumptions are not correct, we may generate less
3920      efficient code in some cases.)
3921
3922      Then see if we find a mode in which we can open-code that operation
3923      (either a division, modulus, or shift).  Finally, check for the smallest
3924      mode for which we can do the operation with a library call.  */
3925
3926   /* We might want to refine this now that we have division-by-constant
3927      optimization.  Since expand_mult_highpart tries so many variants, it is
3928      not straightforward to generalize this.  Maybe we should make an array
3929      of possible modes in init_expmed?  Save this for GCC 2.7.  */
3930
3931   optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3932             ? (unsignedp ? lshr_optab : ashr_optab)
3933             : (unsignedp ? udiv_optab : sdiv_optab));
3934   optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3935             ? optab1
3936             : (unsignedp ? udivmod_optab : sdivmod_optab));
3937
3938   for (compute_mode = mode; compute_mode != VOIDmode;
3939        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3940     if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
3941         || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
3942       break;
3943
3944   if (compute_mode == VOIDmode)
3945     for (compute_mode = mode; compute_mode != VOIDmode;
3946          compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3947       if (optab_libfunc (optab1, compute_mode)
3948           || optab_libfunc (optab2, compute_mode))
3949         break;
3950
3951   /* If we still couldn't find a mode, use MODE, but expand_binop will
3952      probably die.  */
3953   if (compute_mode == VOIDmode)
3954     compute_mode = mode;
3955
3956   if (target && GET_MODE (target) == compute_mode)
3957     tquotient = target;
3958   else
3959     tquotient = gen_reg_rtx (compute_mode);
3960
3961   size = GET_MODE_BITSIZE (compute_mode);
3962 #if 0
3963   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3964      (mode), and thereby get better code when OP1 is a constant.  Do that
3965      later.  It will require going over all usages of SIZE below.  */
3966   size = GET_MODE_BITSIZE (mode);
3967 #endif
3968
3969   /* Only deduct something for a REM if the last divide done was
3970      for a different constant.   Then set the constant of the last
3971      divide.  */
3972   max_cost = unsignedp ? udiv_cost[speed][compute_mode] : sdiv_cost[speed][compute_mode];
3973   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
3974                      && INTVAL (op1) == last_div_const))
3975     max_cost -= mul_cost[speed][compute_mode] + add_cost[speed][compute_mode];
3976
3977   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
3978
3979   /* Now convert to the best mode to use.  */
3980   if (compute_mode != mode)
3981     {
3982       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
3983       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
3984
3985       /* convert_modes may have placed op1 into a register, so we
3986          must recompute the following.  */
3987       op1_is_constant = CONST_INT_P (op1);
3988       op1_is_pow2 = (op1_is_constant
3989                      && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3990                           || (! unsignedp
3991                               && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
3992     }
3993
3994   /* If one of the operands is a volatile MEM, copy it into a register.  */
3995
3996   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
3997     op0 = force_reg (compute_mode, op0);
3998   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
3999     op1 = force_reg (compute_mode, op1);
4000
4001   /* If we need the remainder or if OP1 is constant, we need to
4002      put OP0 in a register in case it has any queued subexpressions.  */
4003   if (rem_flag || op1_is_constant)
4004     op0 = force_reg (compute_mode, op0);
4005
4006   last = get_last_insn ();
4007
4008   /* Promote floor rounding to trunc rounding for unsigned operations.  */
4009   if (unsignedp)
4010     {
4011       if (code == FLOOR_DIV_EXPR)
4012         code = TRUNC_DIV_EXPR;
4013       if (code == FLOOR_MOD_EXPR)
4014         code = TRUNC_MOD_EXPR;
4015       if (code == EXACT_DIV_EXPR && op1_is_pow2)
4016         code = TRUNC_DIV_EXPR;
4017     }
4018
4019   if (op1 != const0_rtx)
4020     switch (code)
4021       {
4022       case TRUNC_MOD_EXPR:
4023       case TRUNC_DIV_EXPR:
4024         if (op1_is_constant)
4025           {
4026             if (unsignedp)
4027               {
4028                 unsigned HOST_WIDE_INT mh;
4029                 int pre_shift, post_shift;
4030                 int dummy;
4031                 rtx ml;
4032                 unsigned HOST_WIDE_INT d = (INTVAL (op1)
4033                                             & GET_MODE_MASK (compute_mode));
4034
4035                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4036                   {
4037                     pre_shift = floor_log2 (d);
4038                     if (rem_flag)
4039                       {
4040                         remainder
4041                           = expand_binop (compute_mode, and_optab, op0,
4042                                           GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4043                                           remainder, 1,
4044                                           OPTAB_LIB_WIDEN);
4045                         if (remainder)
4046                           return gen_lowpart (mode, remainder);
4047                       }
4048                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4049                                              pre_shift, tquotient, 1);
4050                   }
4051                 else if (size <= HOST_BITS_PER_WIDE_INT)
4052                   {
4053                     if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
4054                       {
4055                         /* Most significant bit of divisor is set; emit an scc
4056                            insn.  */
4057                         quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4058                                                           compute_mode, 1, 1);
4059                       }
4060                     else
4061                       {
4062                         /* Find a suitable multiplier and right shift count
4063                            instead of multiplying with D.  */
4064
4065                         mh = choose_multiplier (d, size, size,
4066                                                 &ml, &post_shift, &dummy);
4067
4068                         /* If the suggested multiplier is more than SIZE bits,
4069                            we can do better for even divisors, using an
4070                            initial right shift.  */
4071                         if (mh != 0 && (d & 1) == 0)
4072                           {
4073                             pre_shift = floor_log2 (d & -d);
4074                             mh = choose_multiplier (d >> pre_shift, size,
4075                                                     size - pre_shift,
4076                                                     &ml, &post_shift, &dummy);
4077                             gcc_assert (!mh);
4078                           }
4079                         else
4080                           pre_shift = 0;
4081
4082                         if (mh != 0)
4083                           {
4084                             rtx t1, t2, t3, t4;
4085
4086                             if (post_shift - 1 >= BITS_PER_WORD)
4087                               goto fail1;
4088
4089                             extra_cost
4090                               = (shift_cost[speed][compute_mode][post_shift - 1]
4091                                  + shift_cost[speed][compute_mode][1]
4092                                  + 2 * add_cost[speed][compute_mode]);
4093                             t1 = expand_mult_highpart (compute_mode, op0, ml,
4094                                                        NULL_RTX, 1,
4095                                                        max_cost - extra_cost);
4096                             if (t1 == 0)
4097                               goto fail1;
4098                             t2 = force_operand (gen_rtx_MINUS (compute_mode,
4099                                                                op0, t1),
4100                                                 NULL_RTX);
4101                             t3 = expand_shift (RSHIFT_EXPR, compute_mode,
4102                                                t2, 1, NULL_RTX, 1);
4103                             t4 = force_operand (gen_rtx_PLUS (compute_mode,
4104                                                               t1, t3),
4105                                                 NULL_RTX);
4106                             quotient = expand_shift
4107                               (RSHIFT_EXPR, compute_mode, t4,
4108                                post_shift - 1, tquotient, 1);
4109                           }
4110                         else
4111                           {
4112                             rtx t1, t2;
4113
4114                             if (pre_shift >= BITS_PER_WORD
4115                                 || post_shift >= BITS_PER_WORD)
4116                               goto fail1;
4117
4118                             t1 = expand_shift
4119                               (RSHIFT_EXPR, compute_mode, op0,
4120                                pre_shift, NULL_RTX, 1);
4121                             extra_cost
4122                               = (shift_cost[speed][compute_mode][pre_shift]
4123                                  + shift_cost[speed][compute_mode][post_shift]);
4124                             t2 = expand_mult_highpart (compute_mode, t1, ml,
4125                                                        NULL_RTX, 1,
4126                                                        max_cost - extra_cost);
4127                             if (t2 == 0)
4128                               goto fail1;
4129                             quotient = expand_shift
4130                               (RSHIFT_EXPR, compute_mode, t2,
4131                                post_shift, tquotient, 1);
4132                           }
4133                       }
4134                   }
4135                 else            /* Too wide mode to use tricky code */
4136                   break;
4137
4138                 insn = get_last_insn ();
4139                 if (insn != last
4140                     && (set = single_set (insn)) != 0
4141                     && SET_DEST (set) == quotient)
4142                   set_unique_reg_note (insn,
4143                                        REG_EQUAL,
4144                                        gen_rtx_UDIV (compute_mode, op0, op1));
4145               }
4146             else                /* TRUNC_DIV, signed */
4147               {
4148                 unsigned HOST_WIDE_INT ml;
4149                 int lgup, post_shift;
4150                 rtx mlr;
4151                 HOST_WIDE_INT d = INTVAL (op1);
4152                 unsigned HOST_WIDE_INT abs_d;
4153
4154                 /* Since d might be INT_MIN, we have to cast to
4155                    unsigned HOST_WIDE_INT before negating to avoid
4156                    undefined signed overflow.  */
4157                 abs_d = (d >= 0
4158                          ? (unsigned HOST_WIDE_INT) d
4159                          : - (unsigned HOST_WIDE_INT) d);
4160
4161                 /* n rem d = n rem -d */
4162                 if (rem_flag && d < 0)
4163                   {
4164                     d = abs_d;
4165                     op1 = gen_int_mode (abs_d, compute_mode);
4166                   }
4167
4168                 if (d == 1)
4169                   quotient = op0;
4170                 else if (d == -1)
4171                   quotient = expand_unop (compute_mode, neg_optab, op0,
4172                                           tquotient, 0);
4173                 else if (HOST_BITS_PER_WIDE_INT >= size
4174                          && abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4175                   {
4176                     /* This case is not handled correctly below.  */
4177                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
4178                                                 compute_mode, 1, 1);
4179                     if (quotient == 0)
4180                       goto fail1;
4181                   }
4182                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4183                          && (rem_flag ? smod_pow2_cheap[speed][compute_mode]
4184                                       : sdiv_pow2_cheap[speed][compute_mode])
4185                          /* We assume that cheap metric is true if the
4186                             optab has an expander for this mode.  */
4187                          && ((optab_handler ((rem_flag ? smod_optab
4188                                               : sdiv_optab),
4189                                              compute_mode)
4190                               != CODE_FOR_nothing)
4191                              || (optab_handler (sdivmod_optab,
4192                                                 compute_mode)
4193                                  != CODE_FOR_nothing)))
4194                   ;
4195                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4196                   {
4197                     if (rem_flag)
4198                       {
4199                         remainder = expand_smod_pow2 (compute_mode, op0, d);
4200                         if (remainder)
4201                           return gen_lowpart (mode, remainder);
4202                       }
4203
4204                     if (sdiv_pow2_cheap[speed][compute_mode]
4205                         && ((optab_handler (sdiv_optab, compute_mode)
4206                              != CODE_FOR_nothing)
4207                             || (optab_handler (sdivmod_optab, compute_mode)
4208                                 != CODE_FOR_nothing)))
4209                       quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4210                                                 compute_mode, op0,
4211                                                 gen_int_mode (abs_d,
4212                                                               compute_mode),
4213                                                 NULL_RTX, 0);
4214                     else
4215                       quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4216
4217                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4218                        negate the quotient.  */
4219                     if (d < 0)
4220                       {
4221                         insn = get_last_insn ();
4222                         if (insn != last
4223                             && (set = single_set (insn)) != 0
4224                             && SET_DEST (set) == quotient
4225                             && abs_d < ((unsigned HOST_WIDE_INT) 1
4226                                         << (HOST_BITS_PER_WIDE_INT - 1)))
4227                           set_unique_reg_note (insn,
4228                                                REG_EQUAL,
4229                                                gen_rtx_DIV (compute_mode,
4230                                                             op0,
4231                                                             GEN_INT
4232                                                             (trunc_int_for_mode
4233                                                              (abs_d,
4234                                                               compute_mode))));
4235
4236                         quotient = expand_unop (compute_mode, neg_optab,
4237                                                 quotient, quotient, 0);
4238                       }
4239                   }
4240                 else if (size <= HOST_BITS_PER_WIDE_INT)
4241                   {
4242                     choose_multiplier (abs_d, size, size - 1,
4243                                        &mlr, &post_shift, &lgup);
4244                     ml = (unsigned HOST_WIDE_INT) INTVAL (mlr);
4245                     if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4246                       {
4247                         rtx t1, t2, t3;
4248
4249                         if (post_shift >= BITS_PER_WORD
4250                             || size - 1 >= BITS_PER_WORD)
4251                           goto fail1;
4252
4253                         extra_cost = (shift_cost[speed][compute_mode][post_shift]
4254                                       + shift_cost[speed][compute_mode][size - 1]
4255                                       + add_cost[speed][compute_mode]);
4256                         t1 = expand_mult_highpart (compute_mode, op0, mlr,
4257                                                    NULL_RTX, 0,
4258                                                    max_cost - extra_cost);
4259                         if (t1 == 0)
4260                           goto fail1;
4261                         t2 = expand_shift
4262                           (RSHIFT_EXPR, compute_mode, t1,
4263                            post_shift, NULL_RTX, 0);
4264                         t3 = expand_shift
4265                           (RSHIFT_EXPR, compute_mode, op0,
4266                            size - 1, NULL_RTX, 0);
4267                         if (d < 0)
4268                           quotient
4269                             = force_operand (gen_rtx_MINUS (compute_mode,
4270                                                             t3, t2),
4271                                              tquotient);
4272                         else
4273                           quotient
4274                             = force_operand (gen_rtx_MINUS (compute_mode,
4275                                                             t2, t3),
4276                                              tquotient);
4277                       }
4278                     else
4279                       {
4280                         rtx t1, t2, t3, t4;
4281
4282                         if (post_shift >= BITS_PER_WORD
4283                             || size - 1 >= BITS_PER_WORD)
4284                           goto fail1;
4285
4286                         ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4287                         mlr = gen_int_mode (ml, compute_mode);
4288                         extra_cost = (shift_cost[speed][compute_mode][post_shift]
4289                                       + shift_cost[speed][compute_mode][size - 1]
4290                                       + 2 * add_cost[speed][compute_mode]);
4291                         t1 = expand_mult_highpart (compute_mode, op0, mlr,
4292                                                    NULL_RTX, 0,
4293                                                    max_cost - extra_cost);
4294                         if (t1 == 0)
4295                           goto fail1;
4296                         t2 = force_operand (gen_rtx_PLUS (compute_mode,
4297                                                           t1, op0),
4298                                             NULL_RTX);
4299                         t3 = expand_shift
4300                           (RSHIFT_EXPR, compute_mode, t2,
4301                            post_shift, NULL_RTX, 0);
4302                         t4 = expand_shift
4303                           (RSHIFT_EXPR, compute_mode, op0,
4304                            size - 1, NULL_RTX, 0);
4305                         if (d < 0)
4306                           quotient
4307                             = force_operand (gen_rtx_MINUS (compute_mode,
4308                                                             t4, t3),
4309                                              tquotient);
4310                         else
4311                           quotient
4312                             = force_operand (gen_rtx_MINUS (compute_mode,
4313                                                             t3, t4),
4314                                              tquotient);
4315                       }
4316                   }
4317                 else            /* Too wide mode to use tricky code */
4318                   break;
4319
4320                 insn = get_last_insn ();
4321                 if (insn != last
4322                     && (set = single_set (insn)) != 0
4323                     && SET_DEST (set) == quotient)
4324                   set_unique_reg_note (insn,
4325                                        REG_EQUAL,
4326                                        gen_rtx_DIV (compute_mode, op0, op1));
4327               }
4328             break;
4329           }
4330       fail1:
4331         delete_insns_since (last);
4332         break;
4333
4334       case FLOOR_DIV_EXPR:
4335       case FLOOR_MOD_EXPR:
4336       /* We will come here only for signed operations.  */
4337         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4338           {
4339             unsigned HOST_WIDE_INT mh;
4340             int pre_shift, lgup, post_shift;
4341             HOST_WIDE_INT d = INTVAL (op1);
4342             rtx ml;
4343
4344             if (d > 0)
4345               {
4346                 /* We could just as easily deal with negative constants here,
4347                    but it does not seem worth the trouble for GCC 2.6.  */
4348                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4349                   {
4350                     pre_shift = floor_log2 (d);
4351                     if (rem_flag)
4352                       {
4353                         remainder = expand_binop (compute_mode, and_optab, op0,
4354                                                   GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4355                                                   remainder, 0, OPTAB_LIB_WIDEN);
4356                         if (remainder)
4357                           return gen_lowpart (mode, remainder);
4358                       }
4359                     quotient = expand_shift
4360                       (RSHIFT_EXPR, compute_mode, op0,
4361                        pre_shift, tquotient, 0);
4362                   }
4363                 else
4364                   {
4365                     rtx t1, t2, t3, t4;
4366
4367                     mh = choose_multiplier (d, size, size - 1,
4368                                             &ml, &post_shift, &lgup);
4369                     gcc_assert (!mh);
4370
4371                     if (post_shift < BITS_PER_WORD
4372                         && size - 1 < BITS_PER_WORD)
4373                       {
4374                         t1 = expand_shift
4375                           (RSHIFT_EXPR, compute_mode, op0,
4376                            size - 1, NULL_RTX, 0);
4377                         t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4378                                            NULL_RTX, 0, OPTAB_WIDEN);
4379                         extra_cost = (shift_cost[speed][compute_mode][post_shift]
4380                                       + shift_cost[speed][compute_mode][size - 1]
4381                                       + 2 * add_cost[speed][compute_mode]);
4382                         t3 = expand_mult_highpart (compute_mode, t2, ml,
4383                                                    NULL_RTX, 1,
4384                                                    max_cost - extra_cost);
4385                         if (t3 != 0)
4386                           {
4387                             t4 = expand_shift
4388                               (RSHIFT_EXPR, compute_mode, t3,
4389                                post_shift, NULL_RTX, 1);
4390                             quotient = expand_binop (compute_mode, xor_optab,
4391                                                      t4, t1, tquotient, 0,
4392                                                      OPTAB_WIDEN);
4393                           }
4394                       }
4395                   }
4396               }
4397             else
4398               {
4399                 rtx nsign, t1, t2, t3, t4;
4400                 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4401                                                   op0, constm1_rtx), NULL_RTX);
4402                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4403                                    0, OPTAB_WIDEN);
4404                 nsign = expand_shift
4405                   (RSHIFT_EXPR, compute_mode, t2,
4406                    size - 1, NULL_RTX, 0);
4407                 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4408                                     NULL_RTX);
4409                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4410                                     NULL_RTX, 0);
4411                 if (t4)
4412                   {
4413                     rtx t5;
4414                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4415                                       NULL_RTX, 0);
4416                     quotient = force_operand (gen_rtx_PLUS (compute_mode,
4417                                                             t4, t5),
4418                                               tquotient);
4419                   }
4420               }
4421           }
4422
4423         if (quotient != 0)
4424           break;
4425         delete_insns_since (last);
4426
4427         /* Try using an instruction that produces both the quotient and
4428            remainder, using truncation.  We can easily compensate the quotient
4429            or remainder to get floor rounding, once we have the remainder.
4430            Notice that we compute also the final remainder value here,
4431            and return the result right away.  */
4432         if (target == 0 || GET_MODE (target) != compute_mode)
4433           target = gen_reg_rtx (compute_mode);
4434
4435         if (rem_flag)
4436           {
4437             remainder
4438               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4439             quotient = gen_reg_rtx (compute_mode);
4440           }
4441         else
4442           {
4443             quotient
4444               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4445             remainder = gen_reg_rtx (compute_mode);
4446           }
4447
4448         if (expand_twoval_binop (sdivmod_optab, op0, op1,
4449                                  quotient, remainder, 0))
4450           {
4451             /* This could be computed with a branch-less sequence.
4452                Save that for later.  */
4453             rtx tem;
4454             rtx label = gen_label_rtx ();
4455             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4456             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4457                                 NULL_RTX, 0, OPTAB_WIDEN);
4458             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4459             expand_dec (quotient, const1_rtx);
4460             expand_inc (remainder, op1);
4461             emit_label (label);
4462             return gen_lowpart (mode, rem_flag ? remainder : quotient);
4463           }
4464
4465         /* No luck with division elimination or divmod.  Have to do it
4466            by conditionally adjusting op0 *and* the result.  */
4467         {
4468           rtx label1, label2, label3, label4, label5;
4469           rtx adjusted_op0;
4470           rtx tem;
4471
4472           quotient = gen_reg_rtx (compute_mode);
4473           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4474           label1 = gen_label_rtx ();
4475           label2 = gen_label_rtx ();
4476           label3 = gen_label_rtx ();
4477           label4 = gen_label_rtx ();
4478           label5 = gen_label_rtx ();
4479           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4480           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4481           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4482                               quotient, 0, OPTAB_LIB_WIDEN);
4483           if (tem != quotient)
4484             emit_move_insn (quotient, tem);
4485           emit_jump_insn (gen_jump (label5));
4486           emit_barrier ();
4487           emit_label (label1);
4488           expand_inc (adjusted_op0, const1_rtx);
4489           emit_jump_insn (gen_jump (label4));
4490           emit_barrier ();
4491           emit_label (label2);
4492           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4493           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4494                               quotient, 0, OPTAB_LIB_WIDEN);
4495           if (tem != quotient)
4496             emit_move_insn (quotient, tem);
4497           emit_jump_insn (gen_jump (label5));
4498           emit_barrier ();
4499           emit_label (label3);
4500           expand_dec (adjusted_op0, const1_rtx);
4501           emit_label (label4);
4502           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4503                               quotient, 0, OPTAB_LIB_WIDEN);
4504           if (tem != quotient)
4505             emit_move_insn (quotient, tem);
4506           expand_dec (quotient, const1_rtx);
4507           emit_label (label5);
4508         }
4509         break;
4510
4511       case CEIL_DIV_EXPR:
4512       case CEIL_MOD_EXPR:
4513         if (unsignedp)
4514           {
4515             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4516               {
4517                 rtx t1, t2, t3;
4518                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4519                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4520                                    floor_log2 (d), tquotient, 1);
4521                 t2 = expand_binop (compute_mode, and_optab, op0,
4522                                    GEN_INT (d - 1),
4523                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4524                 t3 = gen_reg_rtx (compute_mode);
4525                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4526                                       compute_mode, 1, 1);
4527                 if (t3 == 0)
4528                   {
4529                     rtx lab;
4530                     lab = gen_label_rtx ();
4531                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4532                     expand_inc (t1, const1_rtx);
4533                     emit_label (lab);
4534                     quotient = t1;
4535                   }
4536                 else
4537                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4538                                                           t1, t3),
4539                                             tquotient);
4540                 break;
4541               }
4542
4543             /* Try using an instruction that produces both the quotient and
4544                remainder, using truncation.  We can easily compensate the
4545                quotient or remainder to get ceiling rounding, once we have the
4546                remainder.  Notice that we compute also the final remainder
4547                value here, and return the result right away.  */
4548             if (target == 0 || GET_MODE (target) != compute_mode)
4549               target = gen_reg_rtx (compute_mode);
4550
4551             if (rem_flag)
4552               {
4553                 remainder = (REG_P (target)
4554                              ? target : gen_reg_rtx (compute_mode));
4555                 quotient = gen_reg_rtx (compute_mode);
4556               }
4557             else
4558               {
4559                 quotient = (REG_P (target)
4560                             ? target : gen_reg_rtx (compute_mode));
4561                 remainder = gen_reg_rtx (compute_mode);
4562               }
4563
4564             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4565                                      remainder, 1))
4566               {
4567                 /* This could be computed with a branch-less sequence.
4568                    Save that for later.  */
4569                 rtx label = gen_label_rtx ();
4570                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4571                                  compute_mode, label);
4572                 expand_inc (quotient, const1_rtx);
4573                 expand_dec (remainder, op1);
4574                 emit_label (label);
4575                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4576               }
4577
4578             /* No luck with division elimination or divmod.  Have to do it
4579                by conditionally adjusting op0 *and* the result.  */
4580             {
4581               rtx label1, label2;
4582               rtx adjusted_op0, tem;
4583
4584               quotient = gen_reg_rtx (compute_mode);
4585               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4586               label1 = gen_label_rtx ();
4587               label2 = gen_label_rtx ();
4588               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4589                                compute_mode, label1);
4590               emit_move_insn  (quotient, const0_rtx);
4591               emit_jump_insn (gen_jump (label2));
4592               emit_barrier ();
4593               emit_label (label1);
4594               expand_dec (adjusted_op0, const1_rtx);
4595               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4596                                   quotient, 1, OPTAB_LIB_WIDEN);
4597               if (tem != quotient)
4598                 emit_move_insn (quotient, tem);
4599               expand_inc (quotient, const1_rtx);
4600               emit_label (label2);
4601             }
4602           }
4603         else /* signed */
4604           {
4605             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4606                 && INTVAL (op1) >= 0)
4607               {
4608                 /* This is extremely similar to the code for the unsigned case
4609                    above.  For 2.7 we should merge these variants, but for
4610                    2.6.1 I don't want to touch the code for unsigned since that
4611                    get used in C.  The signed case will only be used by other
4612                    languages (Ada).  */
4613
4614                 rtx t1, t2, t3;
4615                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4616                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4617                                    floor_log2 (d), tquotient, 0);
4618                 t2 = expand_binop (compute_mode, and_optab, op0,
4619                                    GEN_INT (d - 1),
4620                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4621                 t3 = gen_reg_rtx (compute_mode);
4622                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4623                                       compute_mode, 1, 1);
4624                 if (t3 == 0)
4625                   {
4626                     rtx lab;
4627                     lab = gen_label_rtx ();
4628                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4629                     expand_inc (t1, const1_rtx);
4630                     emit_label (lab);
4631                     quotient = t1;
4632                   }
4633                 else
4634                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4635                                                           t1, t3),
4636                                             tquotient);
4637                 break;
4638               }
4639
4640             /* Try using an instruction that produces both the quotient and
4641                remainder, using truncation.  We can easily compensate the
4642                quotient or remainder to get ceiling rounding, once we have the
4643                remainder.  Notice that we compute also the final remainder
4644                value here, and return the result right away.  */
4645             if (target == 0 || GET_MODE (target) != compute_mode)
4646               target = gen_reg_rtx (compute_mode);
4647             if (rem_flag)
4648               {
4649                 remainder= (REG_P (target)
4650                             ? target : gen_reg_rtx (compute_mode));
4651                 quotient = gen_reg_rtx (compute_mode);
4652               }
4653             else
4654               {
4655                 quotient = (REG_P (target)
4656                             ? target : gen_reg_rtx (compute_mode));
4657                 remainder = gen_reg_rtx (compute_mode);
4658               }
4659
4660             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4661                                      remainder, 0))
4662               {
4663                 /* This could be computed with a branch-less sequence.
4664                    Save that for later.  */
4665                 rtx tem;
4666                 rtx label = gen_label_rtx ();
4667                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4668                                  compute_mode, label);
4669                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4670                                     NULL_RTX, 0, OPTAB_WIDEN);
4671                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4672                 expand_inc (quotient, const1_rtx);
4673                 expand_dec (remainder, op1);
4674                 emit_label (label);
4675                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4676               }
4677
4678             /* No luck with division elimination or divmod.  Have to do it
4679                by conditionally adjusting op0 *and* the result.  */
4680             {
4681               rtx label1, label2, label3, label4, label5;
4682               rtx adjusted_op0;
4683               rtx tem;
4684
4685               quotient = gen_reg_rtx (compute_mode);
4686               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4687               label1 = gen_label_rtx ();
4688               label2 = gen_label_rtx ();
4689               label3 = gen_label_rtx ();
4690               label4 = gen_label_rtx ();
4691               label5 = gen_label_rtx ();
4692               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4693               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4694                                compute_mode, label1);
4695               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4696                                   quotient, 0, OPTAB_LIB_WIDEN);
4697               if (tem != quotient)
4698                 emit_move_insn (quotient, tem);
4699               emit_jump_insn (gen_jump (label5));
4700               emit_barrier ();
4701               emit_label (label1);
4702               expand_dec (adjusted_op0, const1_rtx);
4703               emit_jump_insn (gen_jump (label4));
4704               emit_barrier ();
4705               emit_label (label2);
4706               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4707                                compute_mode, label3);
4708               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4709                                   quotient, 0, OPTAB_LIB_WIDEN);
4710               if (tem != quotient)
4711                 emit_move_insn (quotient, tem);
4712               emit_jump_insn (gen_jump (label5));
4713               emit_barrier ();
4714               emit_label (label3);
4715               expand_inc (adjusted_op0, const1_rtx);
4716               emit_label (label4);
4717               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4718                                   quotient, 0, OPTAB_LIB_WIDEN);
4719               if (tem != quotient)
4720                 emit_move_insn (quotient, tem);
4721               expand_inc (quotient, const1_rtx);
4722               emit_label (label5);
4723             }
4724           }
4725         break;
4726
4727       case EXACT_DIV_EXPR:
4728         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4729           {
4730             HOST_WIDE_INT d = INTVAL (op1);
4731             unsigned HOST_WIDE_INT ml;
4732             int pre_shift;
4733             rtx t1;
4734
4735             pre_shift = floor_log2 (d & -d);
4736             ml = invert_mod2n (d >> pre_shift, size);
4737             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4738                                pre_shift, NULL_RTX, unsignedp);
4739             quotient = expand_mult (compute_mode, t1,
4740                                     gen_int_mode (ml, compute_mode),
4741                                     NULL_RTX, 1);
4742
4743             insn = get_last_insn ();
4744             set_unique_reg_note (insn,
4745                                  REG_EQUAL,
4746                                  gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4747                                                  compute_mode,
4748                                                  op0, op1));
4749           }
4750         break;
4751
4752       case ROUND_DIV_EXPR:
4753       case ROUND_MOD_EXPR:
4754         if (unsignedp)
4755           {
4756             rtx tem;
4757             rtx label;
4758             label = gen_label_rtx ();
4759             quotient = gen_reg_rtx (compute_mode);
4760             remainder = gen_reg_rtx (compute_mode);
4761             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4762               {
4763                 rtx tem;
4764                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4765                                          quotient, 1, OPTAB_LIB_WIDEN);
4766                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4767                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4768                                           remainder, 1, OPTAB_LIB_WIDEN);
4769               }
4770             tem = plus_constant (op1, -1);
4771             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1);
4772             do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4773             expand_inc (quotient, const1_rtx);
4774             expand_dec (remainder, op1);
4775             emit_label (label);
4776           }
4777         else
4778           {
4779             rtx abs_rem, abs_op1, tem, mask;
4780             rtx label;
4781             label = gen_label_rtx ();
4782             quotient = gen_reg_rtx (compute_mode);
4783             remainder = gen_reg_rtx (compute_mode);
4784             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4785               {
4786                 rtx tem;
4787                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4788                                          quotient, 0, OPTAB_LIB_WIDEN);
4789                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4790                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4791                                           remainder, 0, OPTAB_LIB_WIDEN);
4792               }
4793             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4794             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4795             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4796                                 1, NULL_RTX, 1);
4797             do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4798             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4799                                 NULL_RTX, 0, OPTAB_WIDEN);
4800             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4801                                  size - 1, NULL_RTX, 0);
4802             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4803                                 NULL_RTX, 0, OPTAB_WIDEN);
4804             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4805                                 NULL_RTX, 0, OPTAB_WIDEN);
4806             expand_inc (quotient, tem);
4807             tem = expand_binop (compute_mode, xor_optab, mask, op1,
4808                                 NULL_RTX, 0, OPTAB_WIDEN);
4809             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4810                                 NULL_RTX, 0, OPTAB_WIDEN);
4811             expand_dec (remainder, tem);
4812             emit_label (label);
4813           }
4814         return gen_lowpart (mode, rem_flag ? remainder : quotient);
4815
4816       default:
4817         gcc_unreachable ();
4818       }
4819
4820   if (quotient == 0)
4821     {
4822       if (target && GET_MODE (target) != compute_mode)
4823         target = 0;
4824
4825       if (rem_flag)
4826         {
4827           /* Try to produce the remainder without producing the quotient.
4828              If we seem to have a divmod pattern that does not require widening,
4829              don't try widening here.  We should really have a WIDEN argument
4830              to expand_twoval_binop, since what we'd really like to do here is
4831              1) try a mod insn in compute_mode
4832              2) try a divmod insn in compute_mode
4833              3) try a div insn in compute_mode and multiply-subtract to get
4834                 remainder
4835              4) try the same things with widening allowed.  */
4836           remainder
4837             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4838                                  op0, op1, target,
4839                                  unsignedp,
4840                                  ((optab_handler (optab2, compute_mode)
4841                                    != CODE_FOR_nothing)
4842                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
4843           if (remainder == 0)
4844             {
4845               /* No luck there.  Can we do remainder and divide at once
4846                  without a library call?  */
4847               remainder = gen_reg_rtx (compute_mode);
4848               if (! expand_twoval_binop ((unsignedp
4849                                           ? udivmod_optab
4850                                           : sdivmod_optab),
4851                                          op0, op1,
4852                                          NULL_RTX, remainder, unsignedp))
4853                 remainder = 0;
4854             }
4855
4856           if (remainder)
4857             return gen_lowpart (mode, remainder);
4858         }
4859
4860       /* Produce the quotient.  Try a quotient insn, but not a library call.
4861          If we have a divmod in this mode, use it in preference to widening
4862          the div (for this test we assume it will not fail). Note that optab2
4863          is set to the one of the two optabs that the call below will use.  */
4864       quotient
4865         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4866                              op0, op1, rem_flag ? NULL_RTX : target,
4867                              unsignedp,
4868                              ((optab_handler (optab2, compute_mode)
4869                                != CODE_FOR_nothing)
4870                               ? OPTAB_DIRECT : OPTAB_WIDEN));
4871
4872       if (quotient == 0)
4873         {
4874           /* No luck there.  Try a quotient-and-remainder insn,
4875              keeping the quotient alone.  */
4876           quotient = gen_reg_rtx (compute_mode);
4877           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4878                                      op0, op1,
4879                                      quotient, NULL_RTX, unsignedp))
4880             {
4881               quotient = 0;
4882               if (! rem_flag)
4883                 /* Still no luck.  If we are not computing the remainder,
4884                    use a library call for the quotient.  */
4885                 quotient = sign_expand_binop (compute_mode,
4886                                               udiv_optab, sdiv_optab,
4887                                               op0, op1, target,
4888                                               unsignedp, OPTAB_LIB_WIDEN);
4889             }
4890         }
4891     }
4892
4893   if (rem_flag)
4894     {
4895       if (target && GET_MODE (target) != compute_mode)
4896         target = 0;
4897
4898       if (quotient == 0)
4899         {
4900           /* No divide instruction either.  Use library for remainder.  */
4901           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4902                                          op0, op1, target,
4903                                          unsignedp, OPTAB_LIB_WIDEN);
4904           /* No remainder function.  Try a quotient-and-remainder
4905              function, keeping the remainder.  */
4906           if (!remainder)
4907             {
4908               remainder = gen_reg_rtx (compute_mode);
4909               if (!expand_twoval_binop_libfunc
4910                   (unsignedp ? udivmod_optab : sdivmod_optab,
4911                    op0, op1,
4912                    NULL_RTX, remainder,
4913                    unsignedp ? UMOD : MOD))
4914                 remainder = NULL_RTX;
4915             }
4916         }
4917       else
4918         {
4919           /* We divided.  Now finish doing X - Y * (X / Y).  */
4920           remainder = expand_mult (compute_mode, quotient, op1,
4921                                    NULL_RTX, unsignedp);
4922           remainder = expand_binop (compute_mode, sub_optab, op0,
4923                                     remainder, target, unsignedp,
4924                                     OPTAB_LIB_WIDEN);
4925         }
4926     }
4927
4928   return gen_lowpart (mode, rem_flag ? remainder : quotient);
4929 }
4930 \f
4931 /* Return a tree node with data type TYPE, describing the value of X.
4932    Usually this is an VAR_DECL, if there is no obvious better choice.
4933    X may be an expression, however we only support those expressions
4934    generated by loop.c.  */
4935
4936 tree
4937 make_tree (tree type, rtx x)
4938 {
4939   tree t;
4940
4941   switch (GET_CODE (x))
4942     {
4943     case CONST_INT:
4944       {
4945         HOST_WIDE_INT hi = 0;
4946
4947         if (INTVAL (x) < 0
4948             && !(TYPE_UNSIGNED (type)
4949                  && (GET_MODE_BITSIZE (TYPE_MODE (type))
4950                      < HOST_BITS_PER_WIDE_INT)))
4951           hi = -1;
4952
4953         t = build_int_cst_wide (type, INTVAL (x), hi);
4954
4955         return t;
4956       }
4957
4958     case CONST_DOUBLE:
4959       if (GET_MODE (x) == VOIDmode)
4960         t = build_int_cst_wide (type,
4961                                 CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
4962       else
4963         {
4964           REAL_VALUE_TYPE d;
4965
4966           REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4967           t = build_real (type, d);
4968         }
4969
4970       return t;
4971
4972     case CONST_VECTOR:
4973       {
4974         int units = CONST_VECTOR_NUNITS (x);
4975         tree itype = TREE_TYPE (type);
4976         tree t = NULL_TREE;
4977         int i;
4978
4979
4980         /* Build a tree with vector elements.  */
4981         for (i = units - 1; i >= 0; --i)
4982           {
4983             rtx elt = CONST_VECTOR_ELT (x, i);
4984             t = tree_cons (NULL_TREE, make_tree (itype, elt), t);
4985           }
4986
4987         return build_vector (type, t);
4988       }
4989
4990     case PLUS:
4991       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4992                           make_tree (type, XEXP (x, 1)));
4993
4994     case MINUS:
4995       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4996                           make_tree (type, XEXP (x, 1)));
4997
4998     case NEG:
4999       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5000
5001     case MULT:
5002       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5003                           make_tree (type, XEXP (x, 1)));
5004
5005     case ASHIFT:
5006       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5007                           make_tree (type, XEXP (x, 1)));
5008
5009     case LSHIFTRT:
5010       t = unsigned_type_for (type);
5011       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5012                                          make_tree (t, XEXP (x, 0)),
5013                                          make_tree (type, XEXP (x, 1))));
5014
5015     case ASHIFTRT:
5016       t = signed_type_for (type);
5017       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5018                                          make_tree (t, XEXP (x, 0)),
5019                                          make_tree (type, XEXP (x, 1))));
5020
5021     case DIV:
5022       if (TREE_CODE (type) != REAL_TYPE)
5023         t = signed_type_for (type);
5024       else
5025         t = type;
5026
5027       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5028                                          make_tree (t, XEXP (x, 0)),
5029                                          make_tree (t, XEXP (x, 1))));
5030     case UDIV:
5031       t = unsigned_type_for (type);
5032       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5033                                          make_tree (t, XEXP (x, 0)),
5034                                          make_tree (t, XEXP (x, 1))));
5035
5036     case SIGN_EXTEND:
5037     case ZERO_EXTEND:
5038       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5039                                           GET_CODE (x) == ZERO_EXTEND);
5040       return fold_convert (type, make_tree (t, XEXP (x, 0)));
5041
5042     case CONST:
5043       return make_tree (type, XEXP (x, 0));
5044
5045     case SYMBOL_REF:
5046       t = SYMBOL_REF_DECL (x);
5047       if (t)
5048         return fold_convert (type, build_fold_addr_expr (t));
5049       /* else fall through.  */
5050
5051     default:
5052       t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5053
5054       /* If TYPE is a POINTER_TYPE, we might need to convert X from
5055          address mode to pointer mode.  */
5056       if (POINTER_TYPE_P (type))
5057         x = convert_memory_address_addr_space
5058               (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5059
5060       /* Note that we do *not* use SET_DECL_RTL here, because we do not
5061          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5062       t->decl_with_rtl.rtl = x;
5063
5064       return t;
5065     }
5066 }
5067 \f
5068 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5069    and returning TARGET.
5070
5071    If TARGET is 0, a pseudo-register or constant is returned.  */
5072
5073 rtx
5074 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
5075 {
5076   rtx tem = 0;
5077
5078   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5079     tem = simplify_binary_operation (AND, mode, op0, op1);
5080   if (tem == 0)
5081     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5082
5083   if (target == 0)
5084     target = tem;
5085   else if (tem != target)
5086     emit_move_insn (target, tem);
5087   return target;
5088 }
5089
5090 /* Helper function for emit_store_flag.  */
5091 static rtx
5092 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5093              enum machine_mode mode, enum machine_mode compare_mode,
5094              int unsignedp, rtx x, rtx y, int normalizep,
5095              enum machine_mode target_mode)
5096 {
5097   struct expand_operand ops[4];
5098   rtx op0, last, comparison, subtarget;
5099   enum machine_mode result_mode = insn_data[(int) icode].operand[0].mode;
5100
5101   last = get_last_insn ();
5102   x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5103   y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5104   if (!x || !y)
5105     {
5106       delete_insns_since (last);
5107       return NULL_RTX;
5108     }
5109
5110   if (target_mode == VOIDmode)
5111     target_mode = result_mode;
5112   if (!target)
5113     target = gen_reg_rtx (target_mode);
5114
5115   comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5116
5117   create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5118   create_fixed_operand (&ops[1], comparison);
5119   create_fixed_operand (&ops[2], x);
5120   create_fixed_operand (&ops[3], y);
5121   if (!maybe_expand_insn (icode, 4, ops))
5122     {
5123       delete_insns_since (last);
5124       return NULL_RTX;
5125     }
5126   subtarget = ops[0].value;
5127
5128   /* If we are converting to a wider mode, first convert to
5129      TARGET_MODE, then normalize.  This produces better combining
5130      opportunities on machines that have a SIGN_EXTRACT when we are
5131      testing a single bit.  This mostly benefits the 68k.
5132
5133      If STORE_FLAG_VALUE does not have the sign bit set when
5134      interpreted in MODE, we can do this conversion as unsigned, which
5135      is usually more efficient.  */
5136   if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5137     {
5138       convert_move (target, subtarget,
5139                     val_signbit_known_clear_p (result_mode,
5140                                                STORE_FLAG_VALUE));
5141       op0 = target;
5142       result_mode = target_mode;
5143     }
5144   else
5145     op0 = subtarget;
5146
5147   /* If we want to keep subexpressions around, don't reuse our last
5148      target.  */
5149   if (optimize)
5150     subtarget = 0;
5151
5152   /* Now normalize to the proper value in MODE.  Sometimes we don't
5153      have to do anything.  */
5154   if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5155     ;
5156   /* STORE_FLAG_VALUE might be the most negative number, so write
5157      the comparison this way to avoid a compiler-time warning.  */
5158   else if (- normalizep == STORE_FLAG_VALUE)
5159     op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5160
5161   /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5162      it hard to use a value of just the sign bit due to ANSI integer
5163      constant typing rules.  */
5164   else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5165     op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5166                         GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5167                         normalizep == 1);
5168   else
5169     {
5170       gcc_assert (STORE_FLAG_VALUE & 1);
5171
5172       op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5173       if (normalizep == -1)
5174         op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5175     }
5176
5177   /* If we were converting to a smaller mode, do the conversion now.  */
5178   if (target_mode != result_mode)
5179     {
5180       convert_move (target, op0, 0);
5181       return target;
5182     }
5183   else
5184     return op0;
5185 }
5186
5187
5188 /* A subroutine of emit_store_flag only including "tricks" that do not
5189    need a recursive call.  These are kept separate to avoid infinite
5190    loops.  */
5191
5192 static rtx
5193 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5194                    enum machine_mode mode, int unsignedp, int normalizep,
5195                    enum machine_mode target_mode)
5196 {
5197   rtx subtarget;
5198   enum insn_code icode;
5199   enum machine_mode compare_mode;
5200   enum mode_class mclass;
5201   enum rtx_code scode;
5202   rtx tem;
5203
5204   if (unsignedp)
5205     code = unsigned_condition (code);
5206   scode = swap_condition (code);
5207
5208   /* If one operand is constant, make it the second one.  Only do this
5209      if the other operand is not constant as well.  */
5210
5211   if (swap_commutative_operands_p (op0, op1))
5212     {
5213       tem = op0;
5214       op0 = op1;
5215       op1 = tem;
5216       code = swap_condition (code);
5217     }
5218
5219   if (mode == VOIDmode)
5220     mode = GET_MODE (op0);
5221
5222   /* For some comparisons with 1 and -1, we can convert this to
5223      comparisons with zero.  This will often produce more opportunities for
5224      store-flag insns.  */
5225
5226   switch (code)
5227     {
5228     case LT:
5229       if (op1 == const1_rtx)
5230         op1 = const0_rtx, code = LE;
5231       break;
5232     case LE:
5233       if (op1 == constm1_rtx)
5234         op1 = const0_rtx, code = LT;
5235       break;
5236     case GE:
5237       if (op1 == const1_rtx)
5238         op1 = const0_rtx, code = GT;
5239       break;
5240     case GT:
5241       if (op1 == constm1_rtx)
5242         op1 = const0_rtx, code = GE;
5243       break;
5244     case GEU:
5245       if (op1 == const1_rtx)
5246         op1 = const0_rtx, code = NE;
5247       break;
5248     case LTU:
5249       if (op1 == const1_rtx)
5250         op1 = const0_rtx, code = EQ;
5251       break;
5252     default:
5253       break;
5254     }
5255
5256   /* If we are comparing a double-word integer with zero or -1, we can
5257      convert the comparison into one involving a single word.  */
5258   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5259       && GET_MODE_CLASS (mode) == MODE_INT
5260       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5261     {
5262       if ((code == EQ || code == NE)
5263           && (op1 == const0_rtx || op1 == constm1_rtx))
5264         {
5265           rtx op00, op01;
5266
5267           /* Do a logical OR or AND of the two words and compare the
5268              result.  */
5269           op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5270           op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5271           tem = expand_binop (word_mode,
5272                               op1 == const0_rtx ? ior_optab : and_optab,
5273                               op00, op01, NULL_RTX, unsignedp,
5274                               OPTAB_DIRECT);
5275
5276           if (tem != 0)
5277             tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5278                                    unsignedp, normalizep);
5279         }
5280       else if ((code == LT || code == GE) && op1 == const0_rtx)
5281         {
5282           rtx op0h;
5283
5284           /* If testing the sign bit, can just test on high word.  */
5285           op0h = simplify_gen_subreg (word_mode, op0, mode,
5286                                       subreg_highpart_offset (word_mode,
5287                                                               mode));
5288           tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5289                                  unsignedp, normalizep);
5290         }
5291       else
5292         tem = NULL_RTX;
5293
5294       if (tem)
5295         {
5296           if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5297             return tem;
5298           if (!target)
5299             target = gen_reg_rtx (target_mode);
5300
5301           convert_move (target, tem,
5302                         !val_signbit_known_set_p (word_mode,
5303                                                   (normalizep ? normalizep
5304                                                    : STORE_FLAG_VALUE)));
5305           return target;
5306         }
5307     }
5308
5309   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5310      complement of A (for GE) and shifting the sign bit to the low bit.  */
5311   if (op1 == const0_rtx && (code == LT || code == GE)
5312       && GET_MODE_CLASS (mode) == MODE_INT
5313       && (normalizep || STORE_FLAG_VALUE == 1
5314           || val_signbit_p (mode, STORE_FLAG_VALUE)))
5315     {
5316       subtarget = target;
5317
5318       if (!target)
5319         target_mode = mode;
5320
5321       /* If the result is to be wider than OP0, it is best to convert it
5322          first.  If it is to be narrower, it is *incorrect* to convert it
5323          first.  */
5324       else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5325         {
5326           op0 = convert_modes (target_mode, mode, op0, 0);
5327           mode = target_mode;
5328         }
5329
5330       if (target_mode != mode)
5331         subtarget = 0;
5332
5333       if (code == GE)
5334         op0 = expand_unop (mode, one_cmpl_optab, op0,
5335                            ((STORE_FLAG_VALUE == 1 || normalizep)
5336                             ? 0 : subtarget), 0);
5337
5338       if (STORE_FLAG_VALUE == 1 || normalizep)
5339         /* If we are supposed to produce a 0/1 value, we want to do
5340            a logical shift from the sign bit to the low-order bit; for
5341            a -1/0 value, we do an arithmetic shift.  */
5342         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5343                             GET_MODE_BITSIZE (mode) - 1,
5344                             subtarget, normalizep != -1);
5345
5346       if (mode != target_mode)
5347         op0 = convert_modes (target_mode, mode, op0, 0);
5348
5349       return op0;
5350     }
5351
5352   mclass = GET_MODE_CLASS (mode);
5353   for (compare_mode = mode; compare_mode != VOIDmode;
5354        compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5355     {
5356      enum machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5357      icode = optab_handler (cstore_optab, optab_mode);
5358      if (icode != CODE_FOR_nothing)
5359         {
5360           do_pending_stack_adjust ();
5361           tem = emit_cstore (target, icode, code, mode, compare_mode,
5362                              unsignedp, op0, op1, normalizep, target_mode);
5363           if (tem)
5364             return tem;
5365
5366           if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5367             {
5368               tem = emit_cstore (target, icode, scode, mode, compare_mode,
5369                                  unsignedp, op1, op0, normalizep, target_mode);
5370               if (tem)
5371                 return tem;
5372             }
5373           break;
5374         }
5375     }
5376
5377   return 0;
5378 }
5379
5380 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5381    and storing in TARGET.  Normally return TARGET.
5382    Return 0 if that cannot be done.
5383
5384    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5385    it is VOIDmode, they cannot both be CONST_INT.
5386
5387    UNSIGNEDP is for the case where we have to widen the operands
5388    to perform the operation.  It says to use zero-extension.
5389
5390    NORMALIZEP is 1 if we should convert the result to be either zero
5391    or one.  Normalize is -1 if we should convert the result to be
5392    either zero or -1.  If NORMALIZEP is zero, the result will be left
5393    "raw" out of the scc insn.  */
5394
5395 rtx
5396 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5397                  enum machine_mode mode, int unsignedp, int normalizep)
5398 {
5399   enum machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5400   enum rtx_code rcode;
5401   rtx subtarget;
5402   rtx tem, last, trueval;
5403
5404   tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5405                            target_mode);
5406   if (tem)
5407     return tem;
5408
5409   /* If we reached here, we can't do this with a scc insn, however there
5410      are some comparisons that can be done in other ways.  Don't do any
5411      of these cases if branches are very cheap.  */
5412   if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5413     return 0;
5414
5415   /* See what we need to return.  We can only return a 1, -1, or the
5416      sign bit.  */
5417
5418   if (normalizep == 0)
5419     {
5420       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5421         normalizep = STORE_FLAG_VALUE;
5422
5423       else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5424         ;
5425       else
5426         return 0;
5427     }
5428
5429   last = get_last_insn ();
5430
5431   /* If optimizing, use different pseudo registers for each insn, instead
5432      of reusing the same pseudo.  This leads to better CSE, but slows
5433      down the compiler, since there are more pseudos */
5434   subtarget = (!optimize
5435                && (target_mode == mode)) ? target : NULL_RTX;
5436   trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5437
5438   /* For floating-point comparisons, try the reverse comparison or try
5439      changing the "orderedness" of the comparison.  */
5440   if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5441     {
5442       enum rtx_code first_code;
5443       bool and_them;
5444
5445       rcode = reverse_condition_maybe_unordered (code);
5446       if (can_compare_p (rcode, mode, ccp_store_flag)
5447           && (code == ORDERED || code == UNORDERED
5448               || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5449               || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5450         {
5451           int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5452                           || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5453
5454           /* For the reverse comparison, use either an addition or a XOR.  */
5455           if (want_add
5456               && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5457                            optimize_insn_for_speed_p ()) == 0)
5458             {
5459               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5460                                        STORE_FLAG_VALUE, target_mode);
5461               if (tem)
5462                 return expand_binop (target_mode, add_optab, tem,
5463                                      GEN_INT (normalizep),
5464                                      target, 0, OPTAB_WIDEN);
5465             }
5466           else if (!want_add
5467                    && rtx_cost (trueval, XOR, 1,
5468                                 optimize_insn_for_speed_p ()) == 0)
5469             {
5470               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5471                                        normalizep, target_mode);
5472               if (tem)
5473                 return expand_binop (target_mode, xor_optab, tem, trueval,
5474                                      target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5475             }
5476         }
5477
5478       delete_insns_since (last);
5479
5480       /* Cannot split ORDERED and UNORDERED, only try the above trick.   */
5481       if (code == ORDERED || code == UNORDERED)
5482         return 0;
5483
5484       and_them = split_comparison (code, mode, &first_code, &code);
5485
5486       /* If there are no NaNs, the first comparison should always fall through.
5487          Effectively change the comparison to the other one.  */
5488       if (!HONOR_NANS (mode))
5489         {
5490           gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5491           return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5492                                     target_mode);
5493         }
5494
5495 #ifdef HAVE_conditional_move
5496       /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5497          conditional move.  */
5498       tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5499                                normalizep, target_mode);
5500       if (tem == 0)
5501         return 0;
5502
5503       if (and_them)
5504         tem = emit_conditional_move (target, code, op0, op1, mode,
5505                                      tem, const0_rtx, GET_MODE (tem), 0);
5506       else
5507         tem = emit_conditional_move (target, code, op0, op1, mode,
5508                                      trueval, tem, GET_MODE (tem), 0);
5509
5510       if (tem == 0)
5511         delete_insns_since (last);
5512       return tem;
5513 #else
5514       return 0;
5515 #endif
5516     }
5517
5518   /* The remaining tricks only apply to integer comparisons.  */
5519
5520   if (GET_MODE_CLASS (mode) != MODE_INT)
5521     return 0;
5522
5523   /* If this is an equality comparison of integers, we can try to exclusive-or
5524      (or subtract) the two operands and use a recursive call to try the
5525      comparison with zero.  Don't do any of these cases if branches are
5526      very cheap.  */
5527
5528   if ((code == EQ || code == NE) && op1 != const0_rtx)
5529     {
5530       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5531                           OPTAB_WIDEN);
5532
5533       if (tem == 0)
5534         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5535                             OPTAB_WIDEN);
5536       if (tem != 0)
5537         tem = emit_store_flag (target, code, tem, const0_rtx,
5538                                mode, unsignedp, normalizep);
5539       if (tem != 0)
5540         return tem;
5541
5542       delete_insns_since (last);
5543     }
5544
5545   /* For integer comparisons, try the reverse comparison.  However, for
5546      small X and if we'd have anyway to extend, implementing "X != 0"
5547      as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0".  */
5548   rcode = reverse_condition (code);
5549   if (can_compare_p (rcode, mode, ccp_store_flag)
5550       && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5551             && code == NE
5552             && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5553             && op1 == const0_rtx))
5554     {
5555       int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5556                       || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5557
5558       /* Again, for the reverse comparison, use either an addition or a XOR.  */
5559       if (want_add
5560           && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5561                        optimize_insn_for_speed_p ()) == 0)
5562         {
5563           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5564                                    STORE_FLAG_VALUE, target_mode);
5565           if (tem != 0)
5566             tem = expand_binop (target_mode, add_optab, tem,
5567                                 GEN_INT (normalizep), target, 0, OPTAB_WIDEN);
5568         }
5569       else if (!want_add
5570                && rtx_cost (trueval, XOR, 1,
5571                             optimize_insn_for_speed_p ()) == 0)
5572         {
5573           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5574                                    normalizep, target_mode);
5575           if (tem != 0)
5576             tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5577                                 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5578         }
5579
5580       if (tem != 0)
5581         return tem;
5582       delete_insns_since (last);
5583     }
5584
5585   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5586      the constant zero.  Reject all other comparisons at this point.  Only
5587      do LE and GT if branches are expensive since they are expensive on
5588      2-operand machines.  */
5589
5590   if (op1 != const0_rtx
5591       || (code != EQ && code != NE
5592           && (BRANCH_COST (optimize_insn_for_speed_p (),
5593                            false) <= 1 || (code != LE && code != GT))))
5594     return 0;
5595
5596   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5597      do the necessary operation below.  */
5598
5599   tem = 0;
5600
5601   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5602      the sign bit set.  */
5603
5604   if (code == LE)
5605     {
5606       /* This is destructive, so SUBTARGET can't be OP0.  */
5607       if (rtx_equal_p (subtarget, op0))
5608         subtarget = 0;
5609
5610       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5611                           OPTAB_WIDEN);
5612       if (tem)
5613         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5614                             OPTAB_WIDEN);
5615     }
5616
5617   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5618      number of bits in the mode of OP0, minus one.  */
5619
5620   if (code == GT)
5621     {
5622       if (rtx_equal_p (subtarget, op0))
5623         subtarget = 0;
5624
5625       tem = expand_shift (RSHIFT_EXPR, mode, op0,
5626                           GET_MODE_BITSIZE (mode) - 1,
5627                           subtarget, 0);
5628       tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5629                           OPTAB_WIDEN);
5630     }
5631
5632   if (code == EQ || code == NE)
5633     {
5634       /* For EQ or NE, one way to do the comparison is to apply an operation
5635          that converts the operand into a positive number if it is nonzero
5636          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5637          for NE we negate.  This puts the result in the sign bit.  Then we
5638          normalize with a shift, if needed.
5639
5640          Two operations that can do the above actions are ABS and FFS, so try
5641          them.  If that doesn't work, and MODE is smaller than a full word,
5642          we can use zero-extension to the wider mode (an unsigned conversion)
5643          as the operation.  */
5644
5645       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5646          that is compensated by the subsequent overflow when subtracting
5647          one / negating.  */
5648
5649       if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5650         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5651       else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5652         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5653       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5654         {
5655           tem = convert_modes (word_mode, mode, op0, 1);
5656           mode = word_mode;
5657         }
5658
5659       if (tem != 0)
5660         {
5661           if (code == EQ)
5662             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5663                                 0, OPTAB_WIDEN);
5664           else
5665             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5666         }
5667
5668       /* If we couldn't do it that way, for NE we can "or" the two's complement
5669          of the value with itself.  For EQ, we take the one's complement of
5670          that "or", which is an extra insn, so we only handle EQ if branches
5671          are expensive.  */
5672
5673       if (tem == 0
5674           && (code == NE
5675               || BRANCH_COST (optimize_insn_for_speed_p (),
5676                               false) > 1))
5677         {
5678           if (rtx_equal_p (subtarget, op0))
5679             subtarget = 0;
5680
5681           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5682           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5683                               OPTAB_WIDEN);
5684
5685           if (tem && code == EQ)
5686             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5687         }
5688     }
5689
5690   if (tem && normalizep)
5691     tem = expand_shift (RSHIFT_EXPR, mode, tem,
5692                         GET_MODE_BITSIZE (mode) - 1,
5693                         subtarget, normalizep == 1);
5694
5695   if (tem)
5696     {
5697       if (!target)
5698         ;
5699       else if (GET_MODE (tem) != target_mode)
5700         {
5701           convert_move (target, tem, 0);
5702           tem = target;
5703         }
5704       else if (!subtarget)
5705         {
5706           emit_move_insn (target, tem);
5707           tem = target;
5708         }
5709     }
5710   else
5711     delete_insns_since (last);
5712
5713   return tem;
5714 }
5715
5716 /* Like emit_store_flag, but always succeeds.  */
5717
5718 rtx
5719 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5720                        enum machine_mode mode, int unsignedp, int normalizep)
5721 {
5722   rtx tem, label;
5723   rtx trueval, falseval;
5724
5725   /* First see if emit_store_flag can do the job.  */
5726   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5727   if (tem != 0)
5728     return tem;
5729
5730   if (!target)
5731     target = gen_reg_rtx (word_mode);
5732
5733   /* If this failed, we have to do this with set/compare/jump/set code.
5734      For foo != 0, if foo is in OP0, just replace it with 1 if nonzero.  */
5735   trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5736   if (code == NE
5737       && GET_MODE_CLASS (mode) == MODE_INT
5738       && REG_P (target)
5739       && op0 == target
5740       && op1 == const0_rtx)
5741     {
5742       label = gen_label_rtx ();
5743       do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp,
5744                                mode, NULL_RTX, NULL_RTX, label, -1);
5745       emit_move_insn (target, trueval);
5746       emit_label (label);
5747       return target;
5748     }
5749
5750   if (!REG_P (target)
5751       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5752     target = gen_reg_rtx (GET_MODE (target));
5753
5754   /* Jump in the right direction if the target cannot implement CODE
5755      but can jump on its reverse condition.  */
5756   falseval = const0_rtx;
5757   if (! can_compare_p (code, mode, ccp_jump)
5758       && (! FLOAT_MODE_P (mode)
5759           || code == ORDERED || code == UNORDERED
5760           || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5761           || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5762     {
5763       enum rtx_code rcode;
5764       if (FLOAT_MODE_P (mode))
5765         rcode = reverse_condition_maybe_unordered (code);
5766       else
5767         rcode = reverse_condition (code);
5768
5769       /* Canonicalize to UNORDERED for the libcall.  */
5770       if (can_compare_p (rcode, mode, ccp_jump)
5771           || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5772         {
5773           falseval = trueval;
5774           trueval = const0_rtx;
5775           code = rcode;
5776         }
5777     }
5778
5779   emit_move_insn (target, trueval);
5780   label = gen_label_rtx ();
5781   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5782                            NULL_RTX, label, -1);
5783
5784   emit_move_insn (target, falseval);
5785   emit_label (label);
5786
5787   return target;
5788 }
5789 \f
5790 /* Perform possibly multi-word comparison and conditional jump to LABEL
5791    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
5792    now a thin wrapper around do_compare_rtx_and_jump.  */
5793
5794 static void
5795 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5796                  rtx label)
5797 {
5798   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5799   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode,
5800                            NULL_RTX, NULL_RTX, label, -1);
5801 }