gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
   4    1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
   5    2011
   6    Free Software Foundation, Inc.
   7
   8 This file is part of GCC.
   9
  10 GCC is free software; you can redistribute it and/or modify it under
  11 the terms of the GNU General Public License as published by the Free
  12 Software Foundation; either version 3, or (at your option) any later
  13 version.
  14
  15 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  16 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  17 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  18 for more details.
  19
  20 You should have received a copy of the GNU General Public License
  21 along with GCC; see the file COPYING3.  If not see
  22 <http://www.gnu.org/licenses/>.  */
  23
  24
  25 #include "config.h"
  26 #include "system.h"
  27 #include "coretypes.h"
  28 #include "tm.h"
  29 #include "diagnostic-core.h"
  30 #include "rtl.h"
  31 #include "tree.h"
  32 #include "tm_p.h"
  33 #include "flags.h"
  34 #include "insn-config.h"
  35 #include "expr.h"
  36 #include "optabs.h"
  37 #include "recog.h"
  38 #include "langhooks.h"
  39 #include "df.h"
  40 #include "target.h"
  41 #include "expmed.h"
  42
  43 struct target_expmed default_target_expmed;
  44 #if SWITCHABLE_TARGET
  45 struct target_expmed *this_target_expmed = &default_target_expmed;
  46 #endif
  47
  48 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
  49                                    unsigned HOST_WIDE_INT,
  50                                    unsigned HOST_WIDE_INT,
  51                                    unsigned HOST_WIDE_INT,
  52                                    unsigned HOST_WIDE_INT,
  53                                    rtx);
  54 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  55                                    unsigned HOST_WIDE_INT,
  56                                    unsigned HOST_WIDE_INT,
  57                                    unsigned HOST_WIDE_INT,
  58                                    rtx);
  59 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
  60                                     unsigned HOST_WIDE_INT,
  61                                     unsigned HOST_WIDE_INT,
  62                                     unsigned HOST_WIDE_INT, rtx, int, bool);
  63 static rtx mask_rtx (enum machine_mode, int, int, int);
  64 static rtx lshift_value (enum machine_mode, rtx, int, int);
  65 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  66                                     unsigned HOST_WIDE_INT, int);
  67 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
  68 static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  69 static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  70
  71 /* Test whether a value is zero of a power of two.  */
  72 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
  73
  74 #ifndef SLOW_UNALIGNED_ACCESS
  75 #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
  76 #endif
  77
  78
  79 /* Reduce conditional compilation elsewhere.  */
  80 #ifndef HAVE_insv
  81 #define HAVE_insv       0
  82 #define CODE_FOR_insv   CODE_FOR_nothing
  83 #define gen_insv(a,b,c,d) NULL_RTX
  84 #endif
  85 #ifndef HAVE_extv
  86 #define HAVE_extv       0
  87 #define CODE_FOR_extv   CODE_FOR_nothing
  88 #define gen_extv(a,b,c,d) NULL_RTX
  89 #endif
  90 #ifndef HAVE_extzv
  91 #define HAVE_extzv      0
  92 #define CODE_FOR_extzv  CODE_FOR_nothing
  93 #define gen_extzv(a,b,c,d) NULL_RTX
  94 #endif
  95
  96 void
  97 init_expmed (void)
  98 {
  99   struct
 100   {
 101     struct rtx_def reg;         rtunion reg_fld[2];
 102     struct rtx_def plus;        rtunion plus_fld1;
 103     struct rtx_def neg;
 104     struct rtx_def mult;        rtunion mult_fld1;
 105     struct rtx_def sdiv;        rtunion sdiv_fld1;
 106     struct rtx_def udiv;        rtunion udiv_fld1;
 107     struct rtx_def zext;
 108     struct rtx_def sdiv_32;     rtunion sdiv_32_fld1;
 109     struct rtx_def smod_32;     rtunion smod_32_fld1;
 110     struct rtx_def wide_mult;   rtunion wide_mult_fld1;
 111     struct rtx_def wide_lshr;   rtunion wide_lshr_fld1;
 112     struct rtx_def wide_trunc;
 113     struct rtx_def shift;       rtunion shift_fld1;
 114     struct rtx_def shift_mult;  rtunion shift_mult_fld1;
 115     struct rtx_def shift_add;   rtunion shift_add_fld1;
 116     struct rtx_def shift_sub0;  rtunion shift_sub0_fld1;
 117     struct rtx_def shift_sub1;  rtunion shift_sub1_fld1;
 118   } all;
 119
 120   rtx pow2[MAX_BITS_PER_WORD];
 121   rtx cint[MAX_BITS_PER_WORD];
 122   int m, n;
 123   enum machine_mode mode, wider_mode;
 124   int speed;
 125
 126
 127   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 128     {
 129       pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
 130       cint[m] = GEN_INT (m);
 131     }
 132   memset (&all, 0, sizeof all);
 133
 134   PUT_CODE (&all.reg, REG);
 135   /* Avoid using hard regs in ways which may be unsupported.  */
 136   SET_REGNO (&all.reg, LAST_VIRTUAL_REGISTER + 1);
 137
 138   PUT_CODE (&all.plus, PLUS);
 139   XEXP (&all.plus, 0) = &all.reg;
 140   XEXP (&all.plus, 1) = &all.reg;
 141
 142   PUT_CODE (&all.neg, NEG);
 143   XEXP (&all.neg, 0) = &all.reg;
 144
 145   PUT_CODE (&all.mult, MULT);
 146   XEXP (&all.mult, 0) = &all.reg;
 147   XEXP (&all.mult, 1) = &all.reg;
 148
 149   PUT_CODE (&all.sdiv, DIV);
 150   XEXP (&all.sdiv, 0) = &all.reg;
 151   XEXP (&all.sdiv, 1) = &all.reg;
 152
 153   PUT_CODE (&all.udiv, UDIV);
 154   XEXP (&all.udiv, 0) = &all.reg;
 155   XEXP (&all.udiv, 1) = &all.reg;
 156
 157   PUT_CODE (&all.sdiv_32, DIV);
 158   XEXP (&all.sdiv_32, 0) = &all.reg;
 159   XEXP (&all.sdiv_32, 1) = 32 < MAX_BITS_PER_WORD ? cint[32] : GEN_INT (32);
 160
 161   PUT_CODE (&all.smod_32, MOD);
 162   XEXP (&all.smod_32, 0) = &all.reg;
 163   XEXP (&all.smod_32, 1) = XEXP (&all.sdiv_32, 1);
 164
 165   PUT_CODE (&all.zext, ZERO_EXTEND);
 166   XEXP (&all.zext, 0) = &all.reg;
 167
 168   PUT_CODE (&all.wide_mult, MULT);
 169   XEXP (&all.wide_mult, 0) = &all.zext;
 170   XEXP (&all.wide_mult, 1) = &all.zext;
 171
 172   PUT_CODE (&all.wide_lshr, LSHIFTRT);
 173   XEXP (&all.wide_lshr, 0) = &all.wide_mult;
 174
 175   PUT_CODE (&all.wide_trunc, TRUNCATE);
 176   XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
 177
 178   PUT_CODE (&all.shift, ASHIFT);
 179   XEXP (&all.shift, 0) = &all.reg;
 180
 181   PUT_CODE (&all.shift_mult, MULT);
 182   XEXP (&all.shift_mult, 0) = &all.reg;
 183
 184   PUT_CODE (&all.shift_add, PLUS);
 185   XEXP (&all.shift_add, 0) = &all.shift_mult;
 186   XEXP (&all.shift_add, 1) = &all.reg;
 187
 188   PUT_CODE (&all.shift_sub0, MINUS);
 189   XEXP (&all.shift_sub0, 0) = &all.shift_mult;
 190   XEXP (&all.shift_sub0, 1) = &all.reg;
 191
 192   PUT_CODE (&all.shift_sub1, MINUS);
 193   XEXP (&all.shift_sub1, 0) = &all.reg;
 194   XEXP (&all.shift_sub1, 1) = &all.shift_mult;
 195
 196   for (speed = 0; speed < 2; speed++)
 197     {
 198       crtl->maybe_hot_insn_p = speed;
 199       zero_cost[speed] = set_src_cost (const0_rtx, speed);
 200
 201       for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
 202            mode != VOIDmode;
 203            mode = GET_MODE_WIDER_MODE (mode))
 204         {
 205           PUT_MODE (&all.reg, mode);
 206           PUT_MODE (&all.plus, mode);
 207           PUT_MODE (&all.neg, mode);
 208           PUT_MODE (&all.mult, mode);
 209           PUT_MODE (&all.sdiv, mode);
 210           PUT_MODE (&all.udiv, mode);
 211           PUT_MODE (&all.sdiv_32, mode);
 212           PUT_MODE (&all.smod_32, mode);
 213           PUT_MODE (&all.wide_trunc, mode);
 214           PUT_MODE (&all.shift, mode);
 215           PUT_MODE (&all.shift_mult, mode);
 216           PUT_MODE (&all.shift_add, mode);
 217           PUT_MODE (&all.shift_sub0, mode);
 218           PUT_MODE (&all.shift_sub1, mode);
 219
 220           add_cost[speed][mode] = set_src_cost (&all.plus, speed);
 221           neg_cost[speed][mode] = set_src_cost (&all.neg, speed);
 222           mul_cost[speed][mode] = set_src_cost (&all.mult, speed);
 223           sdiv_cost[speed][mode] = set_src_cost (&all.sdiv, speed);
 224           udiv_cost[speed][mode] = set_src_cost (&all.udiv, speed);
 225
 226           sdiv_pow2_cheap[speed][mode] = (set_src_cost (&all.sdiv_32, speed)
 227                                           <= 2 * add_cost[speed][mode]);
 228           smod_pow2_cheap[speed][mode] = (set_src_cost (&all.smod_32, speed)
 229                                           <= 4 * add_cost[speed][mode]);
 230
 231           wider_mode = GET_MODE_WIDER_MODE (mode);
 232           if (wider_mode != VOIDmode)
 233             {
 234               PUT_MODE (&all.zext, wider_mode);
 235               PUT_MODE (&all.wide_mult, wider_mode);
 236               PUT_MODE (&all.wide_lshr, wider_mode);
 237               XEXP (&all.wide_lshr, 1) = GEN_INT (GET_MODE_BITSIZE (mode));
 238
 239               mul_widen_cost[speed][wider_mode]
 240                 = set_src_cost (&all.wide_mult, speed);
 241               mul_highpart_cost[speed][mode]
 242                 = set_src_cost (&all.wide_trunc, speed);
 243             }
 244
 245           shift_cost[speed][mode][0] = 0;
 246           shiftadd_cost[speed][mode][0] = shiftsub0_cost[speed][mode][0]
 247             = shiftsub1_cost[speed][mode][0] = add_cost[speed][mode];
 248
 249           n = MIN (MAX_BITS_PER_WORD, GET_MODE_BITSIZE (mode));
 250           for (m = 1; m < n; m++)
 251             {
 252               XEXP (&all.shift, 1) = cint[m];
 253               XEXP (&all.shift_mult, 1) = pow2[m];
 254
 255               shift_cost[speed][mode][m] = set_src_cost (&all.shift, speed);
 256               shiftadd_cost[speed][mode][m] = set_src_cost (&all.shift_add,
 257                                                             speed);
 258               shiftsub0_cost[speed][mode][m] = set_src_cost (&all.shift_sub0,
 259                                                              speed);
 260               shiftsub1_cost[speed][mode][m] = set_src_cost (&all.shift_sub1,
 261                                                              speed);
 262             }
 263         }
 264     }
 265   if (alg_hash_used_p)
 266     memset (alg_hash, 0, sizeof (alg_hash));
 267   else
 268     alg_hash_used_p = true;
 269   default_rtl_profile ();
 270 }
 271
 272 /* Return an rtx representing minus the value of X.
 273    MODE is the intended mode of the result,
 274    useful if X is a CONST_INT.  */
 275
 276 rtx
 277 negate_rtx (enum machine_mode mode, rtx x)
 278 {
 279   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 280
 281   if (result == 0)
 282     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 283
 284   return result;
 285 }
 286
 287 /* Report on the availability of insv/extv/extzv and the desired mode
 288    of each of their operands.  Returns MAX_MACHINE_MODE if HAVE_foo
 289    is false; else the mode of the specified operand.  If OPNO is -1,
 290    all the caller cares about is whether the insn is available.  */
 291 enum machine_mode
 292 mode_for_extraction (enum extraction_pattern pattern, int opno)
 293 {
 294   const struct insn_data_d *data;
 295
 296   switch (pattern)
 297     {
 298     case EP_insv:
 299       if (HAVE_insv)
 300         {
 301           data = &insn_data[CODE_FOR_insv];
 302           break;
 303         }
 304       return MAX_MACHINE_MODE;
 305
 306     case EP_extv:
 307       if (HAVE_extv)
 308         {
 309           data = &insn_data[CODE_FOR_extv];
 310           break;
 311         }
 312       return MAX_MACHINE_MODE;
 313
 314     case EP_extzv:
 315       if (HAVE_extzv)
 316         {
 317           data = &insn_data[CODE_FOR_extzv];
 318           break;
 319         }
 320       return MAX_MACHINE_MODE;
 321
 322     default:
 323       gcc_unreachable ();
 324     }
 325
 326   if (opno == -1)
 327     return VOIDmode;
 328
 329   /* Everyone who uses this function used to follow it with
 330      if (result == VOIDmode) result = word_mode; */
 331   if (data->operand[opno].mode == VOIDmode)
 332     return word_mode;
 333   return data->operand[opno].mode;
 334 }
 335 \f
 336 /* A subroutine of store_bit_field, with the same arguments.  Return true
 337    if the operation could be implemented.
 338
 339    If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
 340    no other way of implementing the operation.  If FALLBACK_P is false,
 341    return false instead.  */
 342
 343 static bool
 344 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 345                    unsigned HOST_WIDE_INT bitnum,
 346                    unsigned HOST_WIDE_INT bitregion_start,
 347                    unsigned HOST_WIDE_INT bitregion_end,
 348                    enum machine_mode fieldmode,
 349                    rtx value, bool fallback_p)
 350 {
 351   unsigned int unit
 352     = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
 353   unsigned HOST_WIDE_INT offset, bitpos;
 354   rtx op0 = str_rtx;
 355   int byte_offset;
 356   rtx orig_value;
 357
 358   enum machine_mode op_mode = mode_for_extraction (EP_insv, 3);
 359
 360   while (GET_CODE (op0) == SUBREG)
 361     {
 362       /* The following line once was done only if WORDS_BIG_ENDIAN,
 363          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 364          meaningful at a much higher level; when structures are copied
 365          between memory and regs, the higher-numbered regs
 366          always get higher addresses.  */
 367       int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
 368       int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
 369
 370       byte_offset = 0;
 371
 372       /* Paradoxical subregs need special handling on big endian machines.  */
 373       if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
 374         {
 375           int difference = inner_mode_size - outer_mode_size;
 376
 377           if (WORDS_BIG_ENDIAN)
 378             byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
 379           if (BYTES_BIG_ENDIAN)
 380             byte_offset += difference % UNITS_PER_WORD;
 381         }
 382       else
 383         byte_offset = SUBREG_BYTE (op0);
 384
 385       bitnum += byte_offset * BITS_PER_UNIT;
 386       op0 = SUBREG_REG (op0);
 387     }
 388
 389   /* No action is needed if the target is a register and if the field
 390      lies completely outside that register.  This can occur if the source
 391      code contains an out-of-bounds access to a small array.  */
 392   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
 393     return true;
 394
 395   /* Use vec_set patterns for inserting parts of vectors whenever
 396      available.  */
 397   if (VECTOR_MODE_P (GET_MODE (op0))
 398       && !MEM_P (op0)
 399       && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
 400       && fieldmode == GET_MODE_INNER (GET_MODE (op0))
 401       && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
 402       && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
 403     {
 404       struct expand_operand ops[3];
 405       enum machine_mode outermode = GET_MODE (op0);
 406       enum machine_mode innermode = GET_MODE_INNER (outermode);
 407       enum insn_code icode = optab_handler (vec_set_optab, outermode);
 408       int pos = bitnum / GET_MODE_BITSIZE (innermode);
 409
 410       create_fixed_operand (&ops[0], op0);
 411       create_input_operand (&ops[1], value, innermode);
 412       create_integer_operand (&ops[2], pos);
 413       if (maybe_expand_insn (icode, 3, ops))
 414         return true;
 415     }
 416
 417   /* If the target is a register, overwriting the entire object, or storing
 418      a full-word or multi-word field can be done with just a SUBREG.
 419
 420      If the target is memory, storing any naturally aligned field can be
 421      done with a simple store.  For targets that support fast unaligned
 422      memory, any naturally sized, unit aligned field can be done directly.  */
 423
 424   offset = bitnum / unit;
 425   bitpos = bitnum % unit;
 426   byte_offset = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 427                 + (offset * UNITS_PER_WORD);
 428
 429   if (bitpos == 0
 430       && bitsize == GET_MODE_BITSIZE (fieldmode)
 431       && (!MEM_P (op0)
 432           ? ((GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD
 433               || GET_MODE_SIZE (GET_MODE (op0)) == GET_MODE_SIZE (fieldmode))
 434              && ((GET_MODE (op0) == fieldmode && byte_offset == 0)
 435                  || validate_subreg (fieldmode, GET_MODE (op0), op0,
 436                                      byte_offset)))
 437           : (! SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0))
 438              || (offset * BITS_PER_UNIT % bitsize == 0
 439                  && MEM_ALIGN (op0) % GET_MODE_BITSIZE (fieldmode) == 0))))
 440     {
 441       if (MEM_P (op0))
 442         op0 = adjust_address (op0, fieldmode, offset);
 443       else if (GET_MODE (op0) != fieldmode)
 444         op0 = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
 445                                    byte_offset);
 446       emit_move_insn (op0, value);
 447       return true;
 448     }
 449
 450   /* Make sure we are playing with integral modes.  Pun with subregs
 451      if we aren't.  This must come after the entire register case above,
 452      since that case is valid for any mode.  The following cases are only
 453      valid for integral modes.  */
 454   {
 455     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
 456     if (imode != GET_MODE (op0))
 457       {
 458         if (MEM_P (op0))
 459           op0 = adjust_address (op0, imode, 0);
 460         else
 461           {
 462             gcc_assert (imode != BLKmode);
 463             op0 = gen_lowpart (imode, op0);
 464           }
 465       }
 466   }
 467
 468   /* We may be accessing data outside the field, which means
 469      we can alias adjacent data.  */
 470   /* ?? not always for C++0x memory model ?? */
 471   if (MEM_P (op0))
 472     {
 473       op0 = shallow_copy_rtx (op0);
 474       set_mem_alias_set (op0, 0);
 475       set_mem_expr (op0, 0);
 476     }
 477
 478   /* If OP0 is a register, BITPOS must count within a word.
 479      But as we have it, it counts within whatever size OP0 now has.
 480      On a bigendian machine, these are not the same, so convert.  */
 481   if (BYTES_BIG_ENDIAN
 482       && !MEM_P (op0)
 483       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
 484     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 485
 486   /* Storing an lsb-aligned field in a register
 487      can be done with a movestrict instruction.  */
 488
 489   if (!MEM_P (op0)
 490       && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
 491       && bitsize == GET_MODE_BITSIZE (fieldmode)
 492       && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
 493     {
 494       struct expand_operand ops[2];
 495       enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
 496       rtx arg0 = op0;
 497       unsigned HOST_WIDE_INT subreg_off;
 498
 499       if (GET_CODE (arg0) == SUBREG)
 500         {
 501           /* Else we've got some float mode source being extracted into
 502              a different float mode destination -- this combination of
 503              subregs results in Severe Tire Damage.  */
 504           gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
 505                       || GET_MODE_CLASS (fieldmode) == MODE_INT
 506                       || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
 507           arg0 = SUBREG_REG (arg0);
 508         }
 509
 510       subreg_off = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 511                    + (offset * UNITS_PER_WORD);
 512       if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
 513         {
 514           arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
 515
 516           create_fixed_operand (&ops[0], arg0);
 517           /* Shrink the source operand to FIELDMODE.  */
 518           create_convert_operand_to (&ops[1], value, fieldmode, false);
 519           if (maybe_expand_insn (icode, 2, ops))
 520             return true;
 521         }
 522     }
 523
 524   /* Handle fields bigger than a word.  */
 525
 526   if (bitsize > BITS_PER_WORD)
 527     {
 528       /* Here we transfer the words of the field
 529          in the order least significant first.
 530          This is because the most significant word is the one which may
 531          be less than full.
 532          However, only do that if the value is not BLKmode.  */
 533
 534       unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 535       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 536       unsigned int i;
 537       rtx last;
 538
 539       /* This is the mode we must force value to, so that there will be enough
 540          subwords to extract.  Note that fieldmode will often (always?) be
 541          VOIDmode, because that is what store_field uses to indicate that this
 542          is a bit field, but passing VOIDmode to operand_subword_force
 543          is not allowed.  */
 544       fieldmode = GET_MODE (value);
 545       if (fieldmode == VOIDmode)
 546         fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
 547
 548       last = get_last_insn ();
 549       for (i = 0; i < nwords; i++)
 550         {
 551           /* If I is 0, use the low-order word in both field and target;
 552              if I is 1, use the next to lowest word; and so on.  */
 553           unsigned int wordnum = (backwards
 554                                   ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD
 555                                   - i - 1
 556                                   : i);
 557           unsigned int bit_offset = (backwards
 558                                      ? MAX ((int) bitsize - ((int) i + 1)
 559                                             * BITS_PER_WORD,
 560                                             0)
 561                                      : (int) i * BITS_PER_WORD);
 562           rtx value_word = operand_subword_force (value, wordnum, fieldmode);
 563           unsigned HOST_WIDE_INT new_bitsize =
 564             MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
 565
 566           /* If the remaining chunk doesn't have full wordsize we have
 567              to make sure that for big endian machines the higher order
 568              bits are used.  */
 569           if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
 570             value_word = simplify_expand_binop (word_mode, lshr_optab,
 571                                                 value_word,
 572                                                 GEN_INT (BITS_PER_WORD
 573                                                          - new_bitsize),
 574                                                 NULL_RTX, true,
 575                                                 OPTAB_LIB_WIDEN);
 576
 577           if (!store_bit_field_1 (op0, new_bitsize,
 578                                   bitnum + bit_offset,
 579                                   bitregion_start, bitregion_end,
 580                                   word_mode,
 581                                   value_word, fallback_p))
 582             {
 583               delete_insns_since (last);
 584               return false;
 585             }
 586         }
 587       return true;
 588     }
 589
 590   /* From here on we can assume that the field to be stored in is
 591      a full-word (whatever type that is), since it is shorter than a word.  */
 592
 593   /* OFFSET is the number of words or bytes (UNIT says which)
 594      from STR_RTX to the first word or byte containing part of the field.  */
 595
 596   if (!MEM_P (op0))
 597     {
 598       if (offset != 0
 599           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
 600         {
 601           if (!REG_P (op0))
 602             {
 603               /* Since this is a destination (lvalue), we can't copy
 604                  it to a pseudo.  We can remove a SUBREG that does not
 605                  change the size of the operand.  Such a SUBREG may
 606                  have been added above.  */
 607               gcc_assert (GET_CODE (op0) == SUBREG
 608                           && (GET_MODE_SIZE (GET_MODE (op0))
 609                               == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)))));
 610               op0 = SUBREG_REG (op0);
 611             }
 612           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
 613                                 op0, (offset * UNITS_PER_WORD));
 614         }
 615       offset = 0;
 616     }
 617
 618   /* If VALUE has a floating-point or complex mode, access it as an
 619      integer of the corresponding size.  This can occur on a machine
 620      with 64 bit registers that uses SFmode for float.  It can also
 621      occur for unaligned float or complex fields.  */
 622   orig_value = value;
 623   if (GET_MODE (value) != VOIDmode
 624       && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
 625       && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
 626     {
 627       value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
 628       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
 629     }
 630
 631   /* Now OFFSET is nonzero only if OP0 is memory
 632      and is therefore always measured in bytes.  */
 633
 634   if (HAVE_insv
 635       && GET_MODE (value) != BLKmode
 636       && bitsize > 0
 637       && GET_MODE_BITSIZE (op_mode) >= bitsize
 638       /* Do not use insv for volatile bitfields when
 639          -fstrict-volatile-bitfields is in effect.  */
 640       && !(MEM_P (op0) && MEM_VOLATILE_P (op0)
 641            && flag_strict_volatile_bitfields > 0)
 642       && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
 643             && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode)))
 644       /* Do not use insv if the bit region is restricted and
 645          op_mode integer at offset doesn't fit into the
 646          restricted region.  */
 647       && !(MEM_P (op0) && bitregion_end
 648            && bitnum - bitpos + GET_MODE_BITSIZE (op_mode)
 649               > bitregion_end + 1))
 650     {
 651       struct expand_operand ops[4];
 652       int xbitpos = bitpos;
 653       rtx value1;
 654       rtx xop0 = op0;
 655       rtx last = get_last_insn ();
 656       bool copy_back = false;
 657
 658       /* Add OFFSET into OP0's address.  */
 659       if (MEM_P (xop0))
 660         xop0 = adjust_address (xop0, byte_mode, offset);
 661
 662       /* If xop0 is a register, we need it in OP_MODE
 663          to make it acceptable to the format of insv.  */
 664       if (GET_CODE (xop0) == SUBREG)
 665         /* We can't just change the mode, because this might clobber op0,
 666            and we will need the original value of op0 if insv fails.  */
 667         xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 668       if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
 669         xop0 = gen_lowpart_SUBREG (op_mode, xop0);
 670
 671       /* If the destination is a paradoxical subreg such that we need a
 672          truncate to the inner mode, perform the insertion on a temporary and
 673          truncate the result to the original destination.  Note that we can't
 674          just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
 675          X) 0)) is (reg:N X).  */
 676       if (GET_CODE (xop0) == SUBREG
 677           && REG_P (SUBREG_REG (xop0))
 678           && (!TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
 679                                               op_mode)))
 680         {
 681           rtx tem = gen_reg_rtx (op_mode);
 682           emit_move_insn (tem, xop0);
 683           xop0 = tem;
 684           copy_back = true;
 685         }
 686
 687       /* We have been counting XBITPOS within UNIT.
 688          Count instead within the size of the register.  */
 689       if (BYTES_BIG_ENDIAN && !MEM_P (xop0))
 690         xbitpos += GET_MODE_BITSIZE (op_mode) - unit;
 691
 692       unit = GET_MODE_BITSIZE (op_mode);
 693
 694       /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
 695          "backwards" from the size of the unit we are inserting into.
 696          Otherwise, we count bits from the most significant on a
 697          BYTES/BITS_BIG_ENDIAN machine.  */
 698
 699       if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 700         xbitpos = unit - bitsize - xbitpos;
 701
 702       /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
 703       value1 = value;
 704       if (GET_MODE (value) != op_mode)
 705         {
 706           if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 707             {
 708               /* Optimization: Don't bother really extending VALUE
 709                  if it has all the bits we will actually use.  However,
 710                  if we must narrow it, be sure we do it correctly.  */
 711
 712               if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
 713                 {
 714                   rtx tmp;
 715
 716                   tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
 717                   if (! tmp)
 718                     tmp = simplify_gen_subreg (op_mode,
 719                                                force_reg (GET_MODE (value),
 720                                                           value1),
 721                                                GET_MODE (value), 0);
 722                   value1 = tmp;
 723                 }
 724               else
 725                 value1 = gen_lowpart (op_mode, value1);
 726             }
 727           else if (CONST_INT_P (value))
 728             value1 = gen_int_mode (INTVAL (value), op_mode);
 729           else
 730             /* Parse phase is supposed to make VALUE's data type
 731                match that of the component reference, which is a type
 732                at least as wide as the field; so VALUE should have
 733                a mode that corresponds to that type.  */
 734             gcc_assert (CONSTANT_P (value));
 735         }
 736
 737       create_fixed_operand (&ops[0], xop0);
 738       create_integer_operand (&ops[1], bitsize);
 739       create_integer_operand (&ops[2], xbitpos);
 740       create_input_operand (&ops[3], value1, op_mode);
 741       if (maybe_expand_insn (CODE_FOR_insv, 4, ops))
 742         {
 743           if (copy_back)
 744             convert_move (op0, xop0, true);
 745           return true;
 746         }
 747       delete_insns_since (last);
 748     }
 749
 750   /* If OP0 is a memory, try copying it to a register and seeing if a
 751      cheap register alternative is available.  */
 752   if (HAVE_insv && MEM_P (op0))
 753     {
 754       enum machine_mode bestmode;
 755       unsigned HOST_WIDE_INT maxbits = MAX_FIXED_MODE_SIZE;
 756
 757       if (bitregion_end)
 758         maxbits = bitregion_end - bitregion_start + 1;
 759
 760       /* Get the mode to use for inserting into this field.  If OP0 is
 761          BLKmode, get the smallest mode consistent with the alignment. If
 762          OP0 is a non-BLKmode object that is no wider than OP_MODE, use its
 763          mode. Otherwise, use the smallest mode containing the field.  */
 764
 765       if (GET_MODE (op0) == BLKmode
 766           || GET_MODE_BITSIZE (GET_MODE (op0)) > maxbits
 767           || (op_mode != MAX_MACHINE_MODE
 768               && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (op_mode)))
 769         bestmode = get_best_mode (bitsize, bitnum,
 770                                   bitregion_start, bitregion_end,
 771                                   MEM_ALIGN (op0),
 772                                   (op_mode == MAX_MACHINE_MODE
 773                                    ? VOIDmode : op_mode),
 774                                   MEM_VOLATILE_P (op0));
 775       else
 776         bestmode = GET_MODE (op0);
 777
 778       if (bestmode != VOIDmode
 779           && GET_MODE_SIZE (bestmode) >= GET_MODE_SIZE (fieldmode)
 780           && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
 781                && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
 782         {
 783           rtx last, tempreg, xop0;
 784           unsigned HOST_WIDE_INT xoffset, xbitpos;
 785
 786           last = get_last_insn ();
 787
 788           /* Adjust address to point to the containing unit of
 789              that mode.  Compute the offset as a multiple of this unit,
 790              counting in bytes.  */
 791           unit = GET_MODE_BITSIZE (bestmode);
 792           xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
 793           xbitpos = bitnum % unit;
 794           xop0 = adjust_address (op0, bestmode, xoffset);
 795
 796           /* Fetch that unit, store the bitfield in it, then store
 797              the unit.  */
 798           tempreg = copy_to_reg (xop0);
 799           if (store_bit_field_1 (tempreg, bitsize, xbitpos,
 800                                  bitregion_start, bitregion_end,
 801                                  fieldmode, orig_value, false))
 802             {
 803               emit_move_insn (xop0, tempreg);
 804               return true;
 805             }
 806           delete_insns_since (last);
 807         }
 808     }
 809
 810   if (!fallback_p)
 811     return false;
 812
 813   store_fixed_bit_field (op0, offset, bitsize, bitpos,
 814                          bitregion_start, bitregion_end, value);
 815   return true;
 816 }
 817
 818 /* Generate code to store value from rtx VALUE
 819    into a bit-field within structure STR_RTX
 820    containing BITSIZE bits starting at bit BITNUM.
 821
 822    BITREGION_START is bitpos of the first bitfield in this region.
 823    BITREGION_END is the bitpos of the ending bitfield in this region.
 824    These two fields are 0, if the C++ memory model does not apply,
 825    or we are not interested in keeping track of bitfield regions.
 826
 827    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.  */
 828
 829 void
 830 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 831                  unsigned HOST_WIDE_INT bitnum,
 832                  unsigned HOST_WIDE_INT bitregion_start,
 833                  unsigned HOST_WIDE_INT bitregion_end,
 834                  enum machine_mode fieldmode,
 835                  rtx value)
 836 {
 837   /* Under the C++0x memory model, we must not touch bits outside the
 838      bit region.  Adjust the address to start at the beginning of the
 839      bit region.  */
 840   if (MEM_P (str_rtx) && bitregion_start > 0)
 841     {
 842       enum machine_mode bestmode;
 843       enum machine_mode op_mode;
 844       unsigned HOST_WIDE_INT offset;
 845
 846       op_mode = mode_for_extraction (EP_insv, 3);
 847       if (op_mode == MAX_MACHINE_MODE)
 848         op_mode = VOIDmode;
 849
 850       gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0);
 851
 852       offset = bitregion_start / BITS_PER_UNIT;
 853       bitnum -= bitregion_start;
 854       bitregion_end -= bitregion_start;
 855       bitregion_start = 0;
 856       bestmode = get_best_mode (bitsize, bitnum,
 857                                 bitregion_start, bitregion_end,
 858                                 MEM_ALIGN (str_rtx),
 859                                 op_mode,
 860                                 MEM_VOLATILE_P (str_rtx));
 861       str_rtx = adjust_address (str_rtx, bestmode, offset);
 862     }
 863
 864   if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
 865                           bitregion_start, bitregion_end,
 866                           fieldmode, value, true))
 867     gcc_unreachable ();
 868 }
 869 \f
 870 /* Use shifts and boolean operations to store VALUE
 871    into a bit field of width BITSIZE
 872    in a memory location specified by OP0 except offset by OFFSET bytes.
 873      (OFFSET must be 0 if OP0 is a register.)
 874    The field starts at position BITPOS within the byte.
 875     (If OP0 is a register, it may be a full word or a narrower mode,
 876      but BITPOS still counts within a full word,
 877      which is significant on bigendian machines.)  */
 878
 879 static void
 880 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT offset,
 881                        unsigned HOST_WIDE_INT bitsize,
 882                        unsigned HOST_WIDE_INT bitpos,
 883                        unsigned HOST_WIDE_INT bitregion_start,
 884                        unsigned HOST_WIDE_INT bitregion_end,
 885                        rtx value)
 886 {
 887   enum machine_mode mode;
 888   unsigned int total_bits = BITS_PER_WORD;
 889   rtx temp;
 890   int all_zero = 0;
 891   int all_one = 0;
 892
 893   /* There is a case not handled here:
 894      a structure with a known alignment of just a halfword
 895      and a field split across two aligned halfwords within the structure.
 896      Or likewise a structure with a known alignment of just a byte
 897      and a field split across two bytes.
 898      Such cases are not supposed to be able to occur.  */
 899
 900   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
 901     {
 902       gcc_assert (!offset);
 903       /* Special treatment for a bit field split across two registers.  */
 904       if (bitsize + bitpos > BITS_PER_WORD)
 905         {
 906           store_split_bit_field (op0, bitsize, bitpos,
 907                                  bitregion_start, bitregion_end,
 908                                  value);
 909           return;
 910         }
 911     }
 912   else
 913     {
 914       unsigned HOST_WIDE_INT maxbits = MAX_FIXED_MODE_SIZE;
 915
 916       if (bitregion_end)
 917         maxbits = bitregion_end - bitregion_start + 1;
 918
 919       /* Get the proper mode to use for this field.  We want a mode that
 920          includes the entire field.  If such a mode would be larger than
 921          a word, we won't be doing the extraction the normal way.
 922          We don't want a mode bigger than the destination.  */
 923
 924       mode = GET_MODE (op0);
 925       if (GET_MODE_BITSIZE (mode) == 0
 926           || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
 927         mode = word_mode;
 928
 929       if (MEM_VOLATILE_P (op0)
 930           && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
 931           && GET_MODE_BITSIZE (GET_MODE (op0)) <= maxbits
 932           && flag_strict_volatile_bitfields > 0)
 933         mode = GET_MODE (op0);
 934       else
 935         mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
 936                               bitregion_start, bitregion_end,
 937                               MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
 938
 939       if (mode == VOIDmode)
 940         {
 941           /* The only way this should occur is if the field spans word
 942              boundaries.  */
 943           store_split_bit_field (op0, bitsize, bitpos + offset * BITS_PER_UNIT,
 944                                  bitregion_start, bitregion_end, value);
 945           return;
 946         }
 947
 948       total_bits = GET_MODE_BITSIZE (mode);
 949
 950       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
 951          be in the range 0 to total_bits-1, and put any excess bytes in
 952          OFFSET.  */
 953       if (bitpos >= total_bits)
 954         {
 955           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
 956           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
 957                      * BITS_PER_UNIT);
 958         }
 959
 960       /* Get ref to an aligned byte, halfword, or word containing the field.
 961          Adjust BITPOS to be position within a word,
 962          and OFFSET to be the offset of that word.
 963          Then alter OP0 to refer to that word.  */
 964       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
 965       offset -= (offset % (total_bits / BITS_PER_UNIT));
 966       op0 = adjust_address (op0, mode, offset);
 967     }
 968
 969   mode = GET_MODE (op0);
 970
 971   /* Now MODE is either some integral mode for a MEM as OP0,
 972      or is a full-word for a REG as OP0.  TOTAL_BITS corresponds.
 973      The bit field is contained entirely within OP0.
 974      BITPOS is the starting bit number within OP0.
 975      (OP0's mode may actually be narrower than MODE.)  */
 976
 977   if (BYTES_BIG_ENDIAN)
 978       /* BITPOS is the distance between our msb
 979          and that of the containing datum.
 980          Convert it to the distance from the lsb.  */
 981       bitpos = total_bits - bitsize - bitpos;
 982
 983   /* Now BITPOS is always the distance between our lsb
 984      and that of OP0.  */
 985
 986   /* Shift VALUE left by BITPOS bits.  If VALUE is not constant,
 987      we must first convert its mode to MODE.  */
 988
 989   if (CONST_INT_P (value))
 990     {
 991       HOST_WIDE_INT v = INTVAL (value);
 992
 993       if (bitsize < HOST_BITS_PER_WIDE_INT)
 994         v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
 995
 996       if (v == 0)
 997         all_zero = 1;
 998       else if ((bitsize < HOST_BITS_PER_WIDE_INT
 999                 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
1000                || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
1001         all_one = 1;
1002
1003       value = lshift_value (mode, value, bitpos, bitsize);
1004     }
1005   else
1006     {
1007       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
1008                       && bitpos + bitsize != GET_MODE_BITSIZE (mode));
1009
1010       if (GET_MODE (value) != mode)
1011         value = convert_to_mode (mode, value, 1);
1012
1013       if (must_and)
1014         value = expand_binop (mode, and_optab, value,
1015                               mask_rtx (mode, 0, bitsize, 0),
1016                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
1017       if (bitpos > 0)
1018         value = expand_shift (LSHIFT_EXPR, mode, value,
1019                               bitpos, NULL_RTX, 1);
1020     }
1021
1022   /* Now clear the chosen bits in OP0,
1023      except that if VALUE is -1 we need not bother.  */
1024   /* We keep the intermediates in registers to allow CSE to combine
1025      consecutive bitfield assignments.  */
1026
1027   temp = force_reg (mode, op0);
1028
1029   if (! all_one)
1030     {
1031       temp = expand_binop (mode, and_optab, temp,
1032                            mask_rtx (mode, bitpos, bitsize, 1),
1033                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1034       temp = force_reg (mode, temp);
1035     }
1036
1037   /* Now logical-or VALUE into OP0, unless it is zero.  */
1038
1039   if (! all_zero)
1040     {
1041       temp = expand_binop (mode, ior_optab, temp, value,
1042                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1043       temp = force_reg (mode, temp);
1044     }
1045
1046   if (op0 != temp)
1047     {
1048       op0 = copy_rtx (op0);
1049       emit_move_insn (op0, temp);
1050     }
1051 }
1052 \f
1053 /* Store a bit field that is split across multiple accessible memory objects.
1054
1055    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1056    BITSIZE is the field width; BITPOS the position of its first bit
1057    (within the word).
1058    VALUE is the value to store.
1059
1060    This does not yet handle fields wider than BITS_PER_WORD.  */
1061
1062 static void
1063 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1064                        unsigned HOST_WIDE_INT bitpos,
1065                        unsigned HOST_WIDE_INT bitregion_start,
1066                        unsigned HOST_WIDE_INT bitregion_end,
1067                        rtx value)
1068 {
1069   unsigned int unit;
1070   unsigned int bitsdone = 0;
1071
1072   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1073      much at a time.  */
1074   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1075     unit = BITS_PER_WORD;
1076   else
1077     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1078
1079   /* If VALUE is a constant other than a CONST_INT, get it into a register in
1080      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
1081      that VALUE might be a floating-point constant.  */
1082   if (CONSTANT_P (value) && !CONST_INT_P (value))
1083     {
1084       rtx word = gen_lowpart_common (word_mode, value);
1085
1086       if (word && (value != word))
1087         value = word;
1088       else
1089         value = gen_lowpart_common (word_mode,
1090                                     force_reg (GET_MODE (value) != VOIDmode
1091                                                ? GET_MODE (value)
1092                                                : word_mode, value));
1093     }
1094
1095   while (bitsdone < bitsize)
1096     {
1097       unsigned HOST_WIDE_INT thissize;
1098       rtx part, word;
1099       unsigned HOST_WIDE_INT thispos;
1100       unsigned HOST_WIDE_INT offset;
1101
1102       offset = (bitpos + bitsdone) / unit;
1103       thispos = (bitpos + bitsdone) % unit;
1104
1105       /* When region of bytes we can touch is restricted, decrease
1106          UNIT close to the end of the region as needed.  */
1107       if (bitregion_end
1108           && unit > BITS_PER_UNIT
1109           && bitpos + bitsdone - thispos + unit > bitregion_end + 1)
1110         {
1111           unit = unit / 2;
1112           continue;
1113         }
1114
1115       /* THISSIZE must not overrun a word boundary.  Otherwise,
1116          store_fixed_bit_field will call us again, and we will mutually
1117          recurse forever.  */
1118       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1119       thissize = MIN (thissize, unit - thispos);
1120
1121       if (BYTES_BIG_ENDIAN)
1122         {
1123           int total_bits;
1124
1125           /* We must do an endian conversion exactly the same way as it is
1126              done in extract_bit_field, so that the two calls to
1127              extract_fixed_bit_field will have comparable arguments.  */
1128           if (!MEM_P (value) || GET_MODE (value) == BLKmode)
1129             total_bits = BITS_PER_WORD;
1130           else
1131             total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1132
1133           /* Fetch successively less significant portions.  */
1134           if (CONST_INT_P (value))
1135             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1136                              >> (bitsize - bitsdone - thissize))
1137                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1138           else
1139             /* The args are chosen so that the last part includes the
1140                lsb.  Give extract_bit_field the value it needs (with
1141                endianness compensation) to fetch the piece we want.  */
1142             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1143                                             total_bits - bitsize + bitsdone,
1144                                             NULL_RTX, 1, false);
1145         }
1146       else
1147         {
1148           /* Fetch successively more significant portions.  */
1149           if (CONST_INT_P (value))
1150             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1151                              >> bitsdone)
1152                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1153           else
1154             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1155                                             bitsdone, NULL_RTX, 1, false);
1156         }
1157
1158       /* If OP0 is a register, then handle OFFSET here.
1159
1160          When handling multiword bitfields, extract_bit_field may pass
1161          down a word_mode SUBREG of a larger REG for a bitfield that actually
1162          crosses a word boundary.  Thus, for a SUBREG, we must find
1163          the current word starting from the base register.  */
1164       if (GET_CODE (op0) == SUBREG)
1165         {
1166           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1167           enum machine_mode sub_mode = GET_MODE (SUBREG_REG (op0));
1168           if (sub_mode != BLKmode && GET_MODE_SIZE (sub_mode) < UNITS_PER_WORD)
1169             word = word_offset ? const0_rtx : op0;
1170           else
1171             word = operand_subword_force (SUBREG_REG (op0), word_offset,
1172                                           GET_MODE (SUBREG_REG (op0)));
1173           offset = 0;
1174         }
1175       else if (REG_P (op0))
1176         {
1177           enum machine_mode op0_mode = GET_MODE (op0);
1178           if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1179             word = offset ? const0_rtx : op0;
1180           else
1181             word = operand_subword_force (op0, offset, GET_MODE (op0));
1182           offset = 0;
1183         }
1184       else
1185         word = op0;
1186
1187       /* OFFSET is in UNITs, and UNIT is in bits.
1188          store_fixed_bit_field wants offset in bytes.  If WORD is const0_rtx,
1189          it is just an out-of-bounds access.  Ignore it.  */
1190       if (word != const0_rtx)
1191         store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT, thissize,
1192                                thispos, bitregion_start, bitregion_end, part);
1193       bitsdone += thissize;
1194     }
1195 }
1196 \f
1197 /* A subroutine of extract_bit_field_1 that converts return value X
1198    to either MODE or TMODE.  MODE, TMODE and UNSIGNEDP are arguments
1199    to extract_bit_field.  */
1200
1201 static rtx
1202 convert_extracted_bit_field (rtx x, enum machine_mode mode,
1203                              enum machine_mode tmode, bool unsignedp)
1204 {
1205   if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1206     return x;
1207
1208   /* If the x mode is not a scalar integral, first convert to the
1209      integer mode of that size and then access it as a floating-point
1210      value via a SUBREG.  */
1211   if (!SCALAR_INT_MODE_P (tmode))
1212     {
1213       enum machine_mode smode;
1214
1215       smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1216       x = convert_to_mode (smode, x, unsignedp);
1217       x = force_reg (smode, x);
1218       return gen_lowpart (tmode, x);
1219     }
1220
1221   return convert_to_mode (tmode, x, unsignedp);
1222 }
1223
1224 /* A subroutine of extract_bit_field, with the same arguments.
1225    If FALLBACK_P is true, fall back to extract_fixed_bit_field
1226    if we can find no other means of implementing the operation.
1227    if FALLBACK_P is false, return NULL instead.  */
1228
1229 static rtx
1230 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1231                      unsigned HOST_WIDE_INT bitnum,
1232                      int unsignedp, bool packedp, rtx target,
1233                      enum machine_mode mode, enum machine_mode tmode,
1234                      bool fallback_p)
1235 {
1236   unsigned int unit
1237     = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
1238   unsigned HOST_WIDE_INT offset, bitpos;
1239   rtx op0 = str_rtx;
1240   enum machine_mode int_mode;
1241   enum machine_mode ext_mode;
1242   enum machine_mode mode1;
1243   int byte_offset;
1244
1245   if (tmode == VOIDmode)
1246     tmode = mode;
1247
1248   while (GET_CODE (op0) == SUBREG)
1249     {
1250       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1251       op0 = SUBREG_REG (op0);
1252     }
1253
1254   /* If we have an out-of-bounds access to a register, just return an
1255      uninitialized register of the required mode.  This can occur if the
1256      source code contains an out-of-bounds access to a small array.  */
1257   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1258     return gen_reg_rtx (tmode);
1259
1260   if (REG_P (op0)
1261       && mode == GET_MODE (op0)
1262       && bitnum == 0
1263       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1264     {
1265       /* We're trying to extract a full register from itself.  */
1266       return op0;
1267     }
1268
1269   /* See if we can get a better vector mode before extracting.  */
1270   if (VECTOR_MODE_P (GET_MODE (op0))
1271       && !MEM_P (op0)
1272       && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1273     {
1274       enum machine_mode new_mode;
1275
1276       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1277         new_mode = MIN_MODE_VECTOR_FLOAT;
1278       else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1279         new_mode = MIN_MODE_VECTOR_FRACT;
1280       else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1281         new_mode = MIN_MODE_VECTOR_UFRACT;
1282       else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1283         new_mode = MIN_MODE_VECTOR_ACCUM;
1284       else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1285         new_mode = MIN_MODE_VECTOR_UACCUM;
1286       else
1287         new_mode = MIN_MODE_VECTOR_INT;
1288
1289       for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1290         if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1291             && targetm.vector_mode_supported_p (new_mode))
1292           break;
1293       if (new_mode != VOIDmode)
1294         op0 = gen_lowpart (new_mode, op0);
1295     }
1296
1297   /* Use vec_extract patterns for extracting parts of vectors whenever
1298      available.  */
1299   if (VECTOR_MODE_P (GET_MODE (op0))
1300       && !MEM_P (op0)
1301       && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing
1302       && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1303           == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1304     {
1305       struct expand_operand ops[3];
1306       enum machine_mode outermode = GET_MODE (op0);
1307       enum machine_mode innermode = GET_MODE_INNER (outermode);
1308       enum insn_code icode = optab_handler (vec_extract_optab, outermode);
1309       unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1310
1311       create_output_operand (&ops[0], target, innermode);
1312       create_input_operand (&ops[1], op0, outermode);
1313       create_integer_operand (&ops[2], pos);
1314       if (maybe_expand_insn (icode, 3, ops))
1315         {
1316           target = ops[0].value;
1317           if (GET_MODE (target) != mode)
1318             return gen_lowpart (tmode, target);
1319           return target;
1320         }
1321     }
1322
1323   /* Make sure we are playing with integral modes.  Pun with subregs
1324      if we aren't.  */
1325   {
1326     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1327     if (imode != GET_MODE (op0))
1328       {
1329         if (MEM_P (op0))
1330           op0 = adjust_address (op0, imode, 0);
1331         else if (imode != BLKmode)
1332           {
1333             op0 = gen_lowpart (imode, op0);
1334
1335             /* If we got a SUBREG, force it into a register since we
1336                aren't going to be able to do another SUBREG on it.  */
1337             if (GET_CODE (op0) == SUBREG)
1338               op0 = force_reg (imode, op0);
1339           }
1340         else if (REG_P (op0))
1341           {
1342             rtx reg, subreg;
1343             imode = smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0)),
1344                                             MODE_INT);
1345             reg = gen_reg_rtx (imode);
1346             subreg = gen_lowpart_SUBREG (GET_MODE (op0), reg);
1347             emit_move_insn (subreg, op0);
1348             op0 = reg;
1349             bitnum += SUBREG_BYTE (subreg) * BITS_PER_UNIT;
1350           }
1351         else
1352           {
1353             rtx mem = assign_stack_temp (GET_MODE (op0),
1354                                          GET_MODE_SIZE (GET_MODE (op0)), 0);
1355             emit_move_insn (mem, op0);
1356             op0 = adjust_address (mem, BLKmode, 0);
1357           }
1358       }
1359   }
1360
1361   /* We may be accessing data outside the field, which means
1362      we can alias adjacent data.  */
1363   if (MEM_P (op0))
1364     {
1365       op0 = shallow_copy_rtx (op0);
1366       set_mem_alias_set (op0, 0);
1367       set_mem_expr (op0, 0);
1368     }
1369
1370   /* Extraction of a full-word or multi-word value from a structure
1371      in a register or aligned memory can be done with just a SUBREG.
1372      A subword value in the least significant part of a register
1373      can also be extracted with a SUBREG.  For this, we need the
1374      byte offset of the value in op0.  */
1375
1376   bitpos = bitnum % unit;
1377   offset = bitnum / unit;
1378   byte_offset = bitpos / BITS_PER_UNIT + offset * UNITS_PER_WORD;
1379
1380   /* If OP0 is a register, BITPOS must count within a word.
1381      But as we have it, it counts within whatever size OP0 now has.
1382      On a bigendian machine, these are not the same, so convert.  */
1383   if (BYTES_BIG_ENDIAN
1384       && !MEM_P (op0)
1385       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
1386     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1387
1388   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1389      If that's wrong, the solution is to test for it and set TARGET to 0
1390      if needed.  */
1391
1392   /* Only scalar integer modes can be converted via subregs.  There is an
1393      additional problem for FP modes here in that they can have a precision
1394      which is different from the size.  mode_for_size uses precision, but
1395      we want a mode based on the size, so we must avoid calling it for FP
1396      modes.  */
1397   mode1  = (SCALAR_INT_MODE_P (tmode)
1398             ? mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0)
1399             : mode);
1400
1401   /* If the bitfield is volatile, we need to make sure the access
1402      remains on a type-aligned boundary.  */
1403   if (GET_CODE (op0) == MEM
1404       && MEM_VOLATILE_P (op0)
1405       && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
1406       && flag_strict_volatile_bitfields > 0)
1407     goto no_subreg_mode_swap;
1408
1409   if (((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
1410         && bitpos % BITS_PER_WORD == 0)
1411        || (mode1 != BLKmode
1412            /* ??? The big endian test here is wrong.  This is correct
1413               if the value is in a register, and if mode_for_size is not
1414               the same mode as op0.  This causes us to get unnecessarily
1415               inefficient code from the Thumb port when -mbig-endian.  */
1416            && (BYTES_BIG_ENDIAN
1417                ? bitpos + bitsize == BITS_PER_WORD
1418                : bitpos == 0)))
1419       && ((!MEM_P (op0)
1420            && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0))
1421            && GET_MODE_SIZE (mode1) != 0
1422            && byte_offset % GET_MODE_SIZE (mode1) == 0)
1423           || (MEM_P (op0)
1424               && (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
1425                   || (offset * BITS_PER_UNIT % bitsize == 0
1426                       && MEM_ALIGN (op0) % bitsize == 0)))))
1427     {
1428       if (MEM_P (op0))
1429         op0 = adjust_address (op0, mode1, offset);
1430       else if (mode1 != GET_MODE (op0))
1431         {
1432           rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1433                                          byte_offset);
1434           if (sub == NULL)
1435             goto no_subreg_mode_swap;
1436           op0 = sub;
1437         }
1438       if (mode1 != mode)
1439         return convert_to_mode (tmode, op0, unsignedp);
1440       return op0;
1441     }
1442  no_subreg_mode_swap:
1443
1444   /* Handle fields bigger than a word.  */
1445
1446   if (bitsize > BITS_PER_WORD)
1447     {
1448       /* Here we transfer the words of the field
1449          in the order least significant first.
1450          This is because the most significant word is the one which may
1451          be less than full.  */
1452
1453       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1454       unsigned int i;
1455
1456       if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1457         target = gen_reg_rtx (mode);
1458
1459       /* Indicate for flow that the entire target reg is being set.  */
1460       emit_clobber (target);
1461
1462       for (i = 0; i < nwords; i++)
1463         {
1464           /* If I is 0, use the low-order word in both field and target;
1465              if I is 1, use the next to lowest word; and so on.  */
1466           /* Word number in TARGET to use.  */
1467           unsigned int wordnum
1468             = (WORDS_BIG_ENDIAN
1469                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1470                : i);
1471           /* Offset from start of field in OP0.  */
1472           unsigned int bit_offset = (WORDS_BIG_ENDIAN
1473                                      ? MAX (0, ((int) bitsize - ((int) i + 1)
1474                                                 * (int) BITS_PER_WORD))
1475                                      : (int) i * BITS_PER_WORD);
1476           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1477           rtx result_part
1478             = extract_bit_field (op0, MIN (BITS_PER_WORD,
1479                                            bitsize - i * BITS_PER_WORD),
1480                                  bitnum + bit_offset, 1, false, target_part, mode,
1481                                  word_mode);
1482
1483           gcc_assert (target_part);
1484
1485           if (result_part != target_part)
1486             emit_move_insn (target_part, result_part);
1487         }
1488
1489       if (unsignedp)
1490         {
1491           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1492              need to be zero'd out.  */
1493           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1494             {
1495               unsigned int i, total_words;
1496
1497               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1498               for (i = nwords; i < total_words; i++)
1499                 emit_move_insn
1500                   (operand_subword (target,
1501                                     WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1502                                     1, VOIDmode),
1503                    const0_rtx);
1504             }
1505           return target;
1506         }
1507
1508       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1509       target = expand_shift (LSHIFT_EXPR, mode, target,
1510                              GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1511       return expand_shift (RSHIFT_EXPR, mode, target,
1512                            GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1513     }
1514
1515   /* From here on we know the desired field is smaller than a word.  */
1516
1517   /* Check if there is a correspondingly-sized integer field, so we can
1518      safely extract it as one size of integer, if necessary; then
1519      truncate or extend to the size that is wanted; then use SUBREGs or
1520      convert_to_mode to get one of the modes we really wanted.  */
1521
1522   int_mode = int_mode_for_mode (tmode);
1523   if (int_mode == BLKmode)
1524     int_mode = int_mode_for_mode (mode);
1525   /* Should probably push op0 out to memory and then do a load.  */
1526   gcc_assert (int_mode != BLKmode);
1527
1528   /* OFFSET is the number of words or bytes (UNIT says which)
1529      from STR_RTX to the first word or byte containing part of the field.  */
1530   if (!MEM_P (op0))
1531     {
1532       if (offset != 0
1533           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1534         {
1535           if (!REG_P (op0))
1536             op0 = copy_to_reg (op0);
1537           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
1538                                 op0, (offset * UNITS_PER_WORD));
1539         }
1540       offset = 0;
1541     }
1542
1543   /* Now OFFSET is nonzero only for memory operands.  */
1544   ext_mode = mode_for_extraction (unsignedp ? EP_extzv : EP_extv, 0);
1545   if (ext_mode != MAX_MACHINE_MODE
1546       && bitsize > 0
1547       && GET_MODE_BITSIZE (ext_mode) >= bitsize
1548       /* Do not use extv/extzv for volatile bitfields when
1549          -fstrict-volatile-bitfields is in effect.  */
1550       && !(MEM_P (op0) && MEM_VOLATILE_P (op0)
1551            && flag_strict_volatile_bitfields > 0)
1552       /* If op0 is a register, we need it in EXT_MODE to make it
1553          acceptable to the format of ext(z)v.  */
1554       && !(GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1555       && !((REG_P (op0) || GET_CODE (op0) == SUBREG)
1556            && (bitsize + bitpos > GET_MODE_BITSIZE (ext_mode))))
1557     {
1558       struct expand_operand ops[4];
1559       unsigned HOST_WIDE_INT xbitpos = bitpos, xoffset = offset;
1560       rtx xop0 = op0;
1561       rtx xtarget = target;
1562       rtx xspec_target = target;
1563       rtx xspec_target_subreg = 0;
1564
1565       /* If op0 is a register, we need it in EXT_MODE to make it
1566          acceptable to the format of ext(z)v.  */
1567       if (REG_P (xop0) && GET_MODE (xop0) != ext_mode)
1568         xop0 = gen_lowpart_SUBREG (ext_mode, xop0);
1569       if (MEM_P (xop0))
1570         /* Get ref to first byte containing part of the field.  */
1571         xop0 = adjust_address (xop0, byte_mode, xoffset);
1572
1573       /* Now convert from counting within UNIT to counting in EXT_MODE.  */
1574       if (BYTES_BIG_ENDIAN && !MEM_P (xop0))
1575         xbitpos += GET_MODE_BITSIZE (ext_mode) - unit;
1576
1577       unit = GET_MODE_BITSIZE (ext_mode);
1578
1579       /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1580          "backwards" from the size of the unit we are extracting from.
1581          Otherwise, we count bits from the most significant on a
1582          BYTES/BITS_BIG_ENDIAN machine.  */
1583
1584       if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1585         xbitpos = unit - bitsize - xbitpos;
1586
1587       if (xtarget == 0)
1588         xtarget = xspec_target = gen_reg_rtx (tmode);
1589
1590       if (GET_MODE (xtarget) != ext_mode)
1591         {
1592           /* Don't use LHS paradoxical subreg if explicit truncation is needed
1593              between the mode of the extraction (word_mode) and the target
1594              mode.  Instead, create a temporary and use convert_move to set
1595              the target.  */
1596           if (REG_P (xtarget)
1597               && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (xtarget), ext_mode))
1598             {
1599               xtarget = gen_lowpart (ext_mode, xtarget);
1600               if (GET_MODE_PRECISION (ext_mode)
1601                   > GET_MODE_PRECISION (GET_MODE (xspec_target)))
1602                 xspec_target_subreg = xtarget;
1603             }
1604           else
1605             xtarget = gen_reg_rtx (ext_mode);
1606         }
1607
1608       create_output_operand (&ops[0], xtarget, ext_mode);
1609       create_fixed_operand (&ops[1], xop0);
1610       create_integer_operand (&ops[2], bitsize);
1611       create_integer_operand (&ops[3], xbitpos);
1612       if (maybe_expand_insn (unsignedp ? CODE_FOR_extzv : CODE_FOR_extv,
1613                              4, ops))
1614         {
1615           xtarget = ops[0].value;
1616           if (xtarget == xspec_target)
1617             return xtarget;
1618           if (xtarget == xspec_target_subreg)
1619             return xspec_target;
1620           return convert_extracted_bit_field (xtarget, mode, tmode, unsignedp);
1621         }
1622     }
1623
1624   /* If OP0 is a memory, try copying it to a register and seeing if a
1625      cheap register alternative is available.  */
1626   if (ext_mode != MAX_MACHINE_MODE && MEM_P (op0))
1627     {
1628       enum machine_mode bestmode;
1629
1630       /* Get the mode to use for inserting into this field.  If
1631          OP0 is BLKmode, get the smallest mode consistent with the
1632          alignment. If OP0 is a non-BLKmode object that is no
1633          wider than EXT_MODE, use its mode. Otherwise, use the
1634          smallest mode containing the field.  */
1635
1636       if (GET_MODE (op0) == BLKmode
1637           || (ext_mode != MAX_MACHINE_MODE
1638               && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (ext_mode)))
1639         bestmode = get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0),
1640                                   (ext_mode == MAX_MACHINE_MODE
1641                                    ? VOIDmode : ext_mode),
1642                                   MEM_VOLATILE_P (op0));
1643       else
1644         bestmode = GET_MODE (op0);
1645
1646       if (bestmode != VOIDmode
1647           && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
1648                && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
1649         {
1650           unsigned HOST_WIDE_INT xoffset, xbitpos;
1651
1652           /* Compute the offset as a multiple of this unit,
1653              counting in bytes.  */
1654           unit = GET_MODE_BITSIZE (bestmode);
1655           xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1656           xbitpos = bitnum % unit;
1657
1658           /* Make sure the register is big enough for the whole field.  */
1659           if (xoffset * BITS_PER_UNIT + unit
1660               >= offset * BITS_PER_UNIT + bitsize)
1661             {
1662               rtx last, result, xop0;
1663
1664               last = get_last_insn ();
1665
1666               /* Fetch it to a register in that size.  */
1667               xop0 = adjust_address (op0, bestmode, xoffset);
1668               xop0 = force_reg (bestmode, xop0);
1669               result = extract_bit_field_1 (xop0, bitsize, xbitpos,
1670                                             unsignedp, packedp, target,
1671                                             mode, tmode, false);
1672               if (result)
1673                 return result;
1674
1675               delete_insns_since (last);
1676             }
1677         }
1678     }
1679
1680   if (!fallback_p)
1681     return NULL;
1682
1683   target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1684                                     bitpos, target, unsignedp, packedp);
1685   return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1686 }
1687
1688 /* Generate code to extract a byte-field from STR_RTX
1689    containing BITSIZE bits, starting at BITNUM,
1690    and put it in TARGET if possible (if TARGET is nonzero).
1691    Regardless of TARGET, we return the rtx for where the value is placed.
1692
1693    STR_RTX is the structure containing the byte (a REG or MEM).
1694    UNSIGNEDP is nonzero if this is an unsigned bit field.
1695    PACKEDP is nonzero if the field has the packed attribute.
1696    MODE is the natural mode of the field value once extracted.
1697    TMODE is the mode the caller would like the value to have;
1698    but the value may be returned with type MODE instead.
1699
1700    If a TARGET is specified and we can store in it at no extra cost,
1701    we do so, and return TARGET.
1702    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1703    if they are equally easy.  */
1704
1705 rtx
1706 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1707                    unsigned HOST_WIDE_INT bitnum, int unsignedp, bool packedp,
1708                    rtx target, enum machine_mode mode, enum machine_mode tmode)
1709 {
1710   return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp, packedp,
1711                               target, mode, tmode, true);
1712 }
1713 \f
1714 /* Extract a bit field using shifts and boolean operations
1715    Returns an rtx to represent the value.
1716    OP0 addresses a register (word) or memory (byte).
1717    BITPOS says which bit within the word or byte the bit field starts in.
1718    OFFSET says how many bytes farther the bit field starts;
1719     it is 0 if OP0 is a register.
1720    BITSIZE says how many bits long the bit field is.
1721     (If OP0 is a register, it may be narrower than a full word,
1722      but BITPOS still counts within a full word,
1723      which is significant on bigendian machines.)
1724
1725    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1726    PACKEDP is true if the field has the packed attribute.
1727
1728    If TARGET is nonzero, attempts to store the value there
1729    and return TARGET, but this is not guaranteed.
1730    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1731
1732 static rtx
1733 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1734                          unsigned HOST_WIDE_INT offset,
1735                          unsigned HOST_WIDE_INT bitsize,
1736                          unsigned HOST_WIDE_INT bitpos, rtx target,
1737                          int unsignedp, bool packedp)
1738 {
1739   unsigned int total_bits = BITS_PER_WORD;
1740   enum machine_mode mode;
1741
1742   if (GET_CODE (op0) == SUBREG || REG_P (op0))
1743     {
1744       /* Special treatment for a bit field split across two registers.  */
1745       if (bitsize + bitpos > BITS_PER_WORD)
1746         return extract_split_bit_field (op0, bitsize, bitpos, unsignedp);
1747     }
1748   else
1749     {
1750       /* Get the proper mode to use for this field.  We want a mode that
1751          includes the entire field.  If such a mode would be larger than
1752          a word, we won't be doing the extraction the normal way.  */
1753
1754       if (MEM_VOLATILE_P (op0)
1755           && flag_strict_volatile_bitfields > 0)
1756         {
1757           if (GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1758             mode = GET_MODE (op0);
1759           else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1760             mode = GET_MODE (target);
1761           else
1762             mode = tmode;
1763         }
1764       else
1765         mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT, 0, 0,
1766                               MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1767
1768       if (mode == VOIDmode)
1769         /* The only way this should occur is if the field spans word
1770            boundaries.  */
1771         return extract_split_bit_field (op0, bitsize,
1772                                         bitpos + offset * BITS_PER_UNIT,
1773                                         unsignedp);
1774
1775       total_bits = GET_MODE_BITSIZE (mode);
1776
1777       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
1778          be in the range 0 to total_bits-1, and put any excess bytes in
1779          OFFSET.  */
1780       if (bitpos >= total_bits)
1781         {
1782           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1783           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1784                      * BITS_PER_UNIT);
1785         }
1786
1787       /* If we're accessing a volatile MEM, we can't do the next
1788          alignment step if it results in a multi-word access where we
1789          otherwise wouldn't have one.  So, check for that case
1790          here.  */
1791       if (MEM_P (op0)
1792           && MEM_VOLATILE_P (op0)
1793           && flag_strict_volatile_bitfields > 0
1794           && bitpos + bitsize <= total_bits
1795           && bitpos + bitsize + (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT > total_bits)
1796         {
1797           if (STRICT_ALIGNMENT)
1798             {
1799               static bool informed_about_misalignment = false;
1800               bool warned;
1801
1802               if (packedp)
1803                 {
1804                   if (bitsize == total_bits)
1805                     warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1806                                          "multiple accesses to volatile structure member"
1807                                          " because of packed attribute");
1808                   else
1809                     warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1810                                          "multiple accesses to volatile structure bitfield"
1811                                          " because of packed attribute");
1812
1813                   return extract_split_bit_field (op0, bitsize,
1814                                                   bitpos + offset * BITS_PER_UNIT,
1815                                                   unsignedp);
1816                 }
1817
1818               if (bitsize == total_bits)
1819                 warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1820                                      "mis-aligned access used for structure member");
1821               else
1822                 warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1823                                      "mis-aligned access used for structure bitfield");
1824
1825               if (! informed_about_misalignment && warned)
1826                 {
1827                   informed_about_misalignment = true;
1828                   inform (input_location,
1829                           "when a volatile object spans multiple type-sized locations,"
1830                           " the compiler must choose between using a single mis-aligned access to"
1831                           " preserve the volatility, or using multiple aligned accesses to avoid"
1832                           " runtime faults; this code may fail at runtime if the hardware does"
1833                           " not allow this access");
1834                 }
1835             }
1836         }
1837       else
1838         {
1839
1840           /* Get ref to an aligned byte, halfword, or word containing the field.
1841              Adjust BITPOS to be position within a word,
1842              and OFFSET to be the offset of that word.
1843              Then alter OP0 to refer to that word.  */
1844           bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1845           offset -= (offset % (total_bits / BITS_PER_UNIT));
1846         }
1847
1848       op0 = adjust_address (op0, mode, offset);
1849     }
1850
1851   mode = GET_MODE (op0);
1852
1853   if (BYTES_BIG_ENDIAN)
1854     /* BITPOS is the distance between our msb and that of OP0.
1855        Convert it to the distance from the lsb.  */
1856     bitpos = total_bits - bitsize - bitpos;
1857
1858   /* Now BITPOS is always the distance between the field's lsb and that of OP0.
1859      We have reduced the big-endian case to the little-endian case.  */
1860
1861   if (unsignedp)
1862     {
1863       if (bitpos)
1864         {
1865           /* If the field does not already start at the lsb,
1866              shift it so it does.  */
1867           /* Maybe propagate the target for the shift.  */
1868           /* But not if we will return it--could confuse integrate.c.  */
1869           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1870           if (tmode != mode) subtarget = 0;
1871           op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitpos, subtarget, 1);
1872         }
1873       /* Convert the value to the desired mode.  */
1874       if (mode != tmode)
1875         op0 = convert_to_mode (tmode, op0, 1);
1876
1877       /* Unless the msb of the field used to be the msb when we shifted,
1878          mask out the upper bits.  */
1879
1880       if (GET_MODE_BITSIZE (mode) != bitpos + bitsize)
1881         return expand_binop (GET_MODE (op0), and_optab, op0,
1882                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1883                              target, 1, OPTAB_LIB_WIDEN);
1884       return op0;
1885     }
1886
1887   /* To extract a signed bit-field, first shift its msb to the msb of the word,
1888      then arithmetic-shift its lsb to the lsb of the word.  */
1889   op0 = force_reg (mode, op0);
1890
1891   /* Find the narrowest integer mode that contains the field.  */
1892
1893   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1894        mode = GET_MODE_WIDER_MODE (mode))
1895     if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos)
1896       {
1897         op0 = convert_to_mode (mode, op0, 0);
1898         break;
1899       }
1900
1901   if (mode != tmode)
1902     target = 0;
1903
1904   if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos))
1905     {
1906       int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitpos);
1907       /* Maybe propagate the target for the shift.  */
1908       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1909       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1910     }
1911
1912   return expand_shift (RSHIFT_EXPR, mode, op0,
1913                        GET_MODE_BITSIZE (mode) - bitsize, target, 0);
1914 }
1915 \f
1916 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1917    of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1918    complement of that if COMPLEMENT.  The mask is truncated if
1919    necessary to the width of mode MODE.  The mask is zero-extended if
1920    BITSIZE+BITPOS is too small for MODE.  */
1921
1922 static rtx
1923 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1924 {
1925   double_int mask;
1926
1927   mask = double_int_mask (bitsize);
1928   mask = double_int_lshift (mask, bitpos, HOST_BITS_PER_DOUBLE_INT, false);
1929
1930   if (complement)
1931     mask = double_int_not (mask);
1932
1933   return immed_double_int_const (mask, mode);
1934 }
1935
1936 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1937    VALUE truncated to BITSIZE bits and then shifted left BITPOS bits.  */
1938
1939 static rtx
1940 lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
1941 {
1942   double_int val;
1943
1944   val = double_int_zext (uhwi_to_double_int (INTVAL (value)), bitsize);
1945   val = double_int_lshift (val, bitpos, HOST_BITS_PER_DOUBLE_INT, false);
1946
1947   return immed_double_int_const (val, mode);
1948 }
1949 \f
1950 /* Extract a bit field that is split across two words
1951    and return an RTX for the result.
1952
1953    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1954    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1955    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.  */
1956
1957 static rtx
1958 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1959                          unsigned HOST_WIDE_INT bitpos, int unsignedp)
1960 {
1961   unsigned int unit;
1962   unsigned int bitsdone = 0;
1963   rtx result = NULL_RTX;
1964   int first = 1;
1965
1966   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1967      much at a time.  */
1968   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1969     unit = BITS_PER_WORD;
1970   else
1971     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1972
1973   while (bitsdone < bitsize)
1974     {
1975       unsigned HOST_WIDE_INT thissize;
1976       rtx part, word;
1977       unsigned HOST_WIDE_INT thispos;
1978       unsigned HOST_WIDE_INT offset;
1979
1980       offset = (bitpos + bitsdone) / unit;
1981       thispos = (bitpos + bitsdone) % unit;
1982
1983       /* THISSIZE must not overrun a word boundary.  Otherwise,
1984          extract_fixed_bit_field will call us again, and we will mutually
1985          recurse forever.  */
1986       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1987       thissize = MIN (thissize, unit - thispos);
1988
1989       /* If OP0 is a register, then handle OFFSET here.
1990
1991          When handling multiword bitfields, extract_bit_field may pass
1992          down a word_mode SUBREG of a larger REG for a bitfield that actually
1993          crosses a word boundary.  Thus, for a SUBREG, we must find
1994          the current word starting from the base register.  */
1995       if (GET_CODE (op0) == SUBREG)
1996         {
1997           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1998           word = operand_subword_force (SUBREG_REG (op0), word_offset,
1999                                         GET_MODE (SUBREG_REG (op0)));
2000           offset = 0;
2001         }
2002       else if (REG_P (op0))
2003         {
2004           word = operand_subword_force (op0, offset, GET_MODE (op0));
2005           offset = 0;
2006         }
2007       else
2008         word = op0;
2009
2010       /* Extract the parts in bit-counting order,
2011          whose meaning is determined by BYTES_PER_UNIT.
2012          OFFSET is in UNITs, and UNIT is in bits.
2013          extract_fixed_bit_field wants offset in bytes.  */
2014       part = extract_fixed_bit_field (word_mode, word,
2015                                       offset * unit / BITS_PER_UNIT,
2016                                       thissize, thispos, 0, 1, false);
2017       bitsdone += thissize;
2018
2019       /* Shift this part into place for the result.  */
2020       if (BYTES_BIG_ENDIAN)
2021         {
2022           if (bitsize != bitsdone)
2023             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2024                                  bitsize - bitsdone, 0, 1);
2025         }
2026       else
2027         {
2028           if (bitsdone != thissize)
2029             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2030                                  bitsdone - thissize, 0, 1);
2031         }
2032
2033       if (first)
2034         result = part;
2035       else
2036         /* Combine the parts with bitwise or.  This works
2037            because we extracted each part as an unsigned bit field.  */
2038         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2039                                OPTAB_LIB_WIDEN);
2040
2041       first = 0;
2042     }
2043
2044   /* Unsigned bit field: we are done.  */
2045   if (unsignedp)
2046     return result;
2047   /* Signed bit field: sign-extend with two arithmetic shifts.  */
2048   result = expand_shift (LSHIFT_EXPR, word_mode, result,
2049                          BITS_PER_WORD - bitsize, NULL_RTX, 0);
2050   return expand_shift (RSHIFT_EXPR, word_mode, result,
2051                        BITS_PER_WORD - bitsize, NULL_RTX, 0);
2052 }
2053 \f
2054 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2055    the bit pattern.  SRC_MODE is the mode of SRC; if this is smaller than
2056    MODE, fill the upper bits with zeros.  Fail if the layout of either
2057    mode is unknown (as for CC modes) or if the extraction would involve
2058    unprofitable mode punning.  Return the value on success, otherwise
2059    return null.
2060
2061    This is different from gen_lowpart* in these respects:
2062
2063      - the returned value must always be considered an rvalue
2064
2065      - when MODE is wider than SRC_MODE, the extraction involves
2066        a zero extension
2067
2068      - when MODE is smaller than SRC_MODE, the extraction involves
2069        a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2070
2071    In other words, this routine performs a computation, whereas the
2072    gen_lowpart* routines are conceptually lvalue or rvalue subreg
2073    operations.  */
2074
2075 rtx
2076 extract_low_bits (enum machine_mode mode, enum machine_mode src_mode, rtx src)
2077 {
2078   enum machine_mode int_mode, src_int_mode;
2079
2080   if (mode == src_mode)
2081     return src;
2082
2083   if (CONSTANT_P (src))
2084     {
2085       /* simplify_gen_subreg can't be used here, as if simplify_subreg
2086          fails, it will happily create (subreg (symbol_ref)) or similar
2087          invalid SUBREGs.  */
2088       unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2089       rtx ret = simplify_subreg (mode, src, src_mode, byte);
2090       if (ret)
2091         return ret;
2092
2093       if (GET_MODE (src) == VOIDmode
2094           || !validate_subreg (mode, src_mode, src, byte))
2095         return NULL_RTX;
2096
2097       src = force_reg (GET_MODE (src), src);
2098       return gen_rtx_SUBREG (mode, src, byte);
2099     }
2100
2101   if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2102     return NULL_RTX;
2103
2104   if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2105       && MODES_TIEABLE_P (mode, src_mode))
2106     {
2107       rtx x = gen_lowpart_common (mode, src);
2108       if (x)
2109         return x;
2110     }
2111
2112   src_int_mode = int_mode_for_mode (src_mode);
2113   int_mode = int_mode_for_mode (mode);
2114   if (src_int_mode == BLKmode || int_mode == BLKmode)
2115     return NULL_RTX;
2116
2117   if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2118     return NULL_RTX;
2119   if (!MODES_TIEABLE_P (int_mode, mode))
2120     return NULL_RTX;
2121
2122   src = gen_lowpart (src_int_mode, src);
2123   src = convert_modes (int_mode, src_int_mode, src, true);
2124   src = gen_lowpart (mode, src);
2125   return src;
2126 }
2127 \f
2128 /* Add INC into TARGET.  */
2129
2130 void
2131 expand_inc (rtx target, rtx inc)
2132 {
2133   rtx value = expand_binop (GET_MODE (target), add_optab,
2134                             target, inc,
2135                             target, 0, OPTAB_LIB_WIDEN);
2136   if (value != target)
2137     emit_move_insn (target, value);
2138 }
2139
2140 /* Subtract DEC from TARGET.  */
2141
2142 void
2143 expand_dec (rtx target, rtx dec)
2144 {
2145   rtx value = expand_binop (GET_MODE (target), sub_optab,
2146                             target, dec,
2147                             target, 0, OPTAB_LIB_WIDEN);
2148   if (value != target)
2149     emit_move_insn (target, value);
2150 }
2151 \f
2152 /* Output a shift instruction for expression code CODE,
2153    with SHIFTED being the rtx for the value to shift,
2154    and AMOUNT the rtx for the amount to shift by.
2155    Store the result in the rtx TARGET, if that is convenient.
2156    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2157    Return the rtx for where the value is.  */
2158
2159 static rtx
2160 expand_shift_1 (enum tree_code code, enum machine_mode mode, rtx shifted,
2161                 rtx amount, rtx target, int unsignedp)
2162 {
2163   rtx op1, temp = 0;
2164   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2165   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2166   optab lshift_optab = ashl_optab;
2167   optab rshift_arith_optab = ashr_optab;
2168   optab rshift_uns_optab = lshr_optab;
2169   optab lrotate_optab = rotl_optab;
2170   optab rrotate_optab = rotr_optab;
2171   enum machine_mode op1_mode;
2172   int attempt;
2173   bool speed = optimize_insn_for_speed_p ();
2174
2175   op1 = amount;
2176   op1_mode = GET_MODE (op1);
2177
2178   /* Determine whether the shift/rotate amount is a vector, or scalar.  If the
2179      shift amount is a vector, use the vector/vector shift patterns.  */
2180   if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2181     {
2182       lshift_optab = vashl_optab;
2183       rshift_arith_optab = vashr_optab;
2184       rshift_uns_optab = vlshr_optab;
2185       lrotate_optab = vrotl_optab;
2186       rrotate_optab = vrotr_optab;
2187     }
2188
2189   /* Previously detected shift-counts computed by NEGATE_EXPR
2190      and shifted in the other direction; but that does not work
2191      on all machines.  */
2192
2193   if (SHIFT_COUNT_TRUNCATED)
2194     {
2195       if (CONST_INT_P (op1)
2196           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2197               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
2198         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2199                        % GET_MODE_BITSIZE (mode));
2200       else if (GET_CODE (op1) == SUBREG
2201                && subreg_lowpart_p (op1)
2202                && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2203                && SCALAR_INT_MODE_P (GET_MODE (op1)))
2204         op1 = SUBREG_REG (op1);
2205     }
2206
2207   if (op1 == const0_rtx)
2208     return shifted;
2209
2210   /* Check whether its cheaper to implement a left shift by a constant
2211      bit count by a sequence of additions.  */
2212   if (code == LSHIFT_EXPR
2213       && CONST_INT_P (op1)
2214       && INTVAL (op1) > 0
2215       && INTVAL (op1) < GET_MODE_PRECISION (mode)
2216       && INTVAL (op1) < MAX_BITS_PER_WORD
2217       && shift_cost[speed][mode][INTVAL (op1)] > INTVAL (op1) * add_cost[speed][mode]
2218       && shift_cost[speed][mode][INTVAL (op1)] != MAX_COST)
2219     {
2220       int i;
2221       for (i = 0; i < INTVAL (op1); i++)
2222         {
2223           temp = force_reg (mode, shifted);
2224           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2225                                   unsignedp, OPTAB_LIB_WIDEN);
2226         }
2227       return shifted;
2228     }
2229
2230   for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2231     {
2232       enum optab_methods methods;
2233
2234       if (attempt == 0)
2235         methods = OPTAB_DIRECT;
2236       else if (attempt == 1)
2237         methods = OPTAB_WIDEN;
2238       else
2239         methods = OPTAB_LIB_WIDEN;
2240
2241       if (rotate)
2242         {
2243           /* Widening does not work for rotation.  */
2244           if (methods == OPTAB_WIDEN)
2245             continue;
2246           else if (methods == OPTAB_LIB_WIDEN)
2247             {
2248               /* If we have been unable to open-code this by a rotation,
2249                  do it as the IOR of two shifts.  I.e., to rotate A
2250                  by N bits, compute (A << N) | ((unsigned) A >> (C - N))
2251                  where C is the bitsize of A.
2252
2253                  It is theoretically possible that the target machine might
2254                  not be able to perform either shift and hence we would
2255                  be making two libcalls rather than just the one for the
2256                  shift (similarly if IOR could not be done).  We will allow
2257                  this extremely unlikely lossage to avoid complicating the
2258                  code below.  */
2259
2260               rtx subtarget = target == shifted ? 0 : target;
2261               rtx new_amount, other_amount;
2262               rtx temp1;
2263
2264               new_amount = op1;
2265               if (CONST_INT_P (op1))
2266                 other_amount = GEN_INT (GET_MODE_BITSIZE (mode)
2267                                         - INTVAL (op1));
2268               else
2269                 other_amount
2270                   = simplify_gen_binary (MINUS, GET_MODE (op1),
2271                                          GEN_INT (GET_MODE_PRECISION (mode)),
2272                                          op1);
2273
2274               shifted = force_reg (mode, shifted);
2275
2276               temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2277                                      mode, shifted, new_amount, 0, 1);
2278               temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2279                                       mode, shifted, other_amount,
2280                                       subtarget, 1);
2281               return expand_binop (mode, ior_optab, temp, temp1, target,
2282                                    unsignedp, methods);
2283             }
2284
2285           temp = expand_binop (mode,
2286                                left ? lrotate_optab : rrotate_optab,
2287                                shifted, op1, target, unsignedp, methods);
2288         }
2289       else if (unsignedp)
2290         temp = expand_binop (mode,
2291                              left ? lshift_optab : rshift_uns_optab,
2292                              shifted, op1, target, unsignedp, methods);
2293
2294       /* Do arithmetic shifts.
2295          Also, if we are going to widen the operand, we can just as well
2296          use an arithmetic right-shift instead of a logical one.  */
2297       if (temp == 0 && ! rotate
2298           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2299         {
2300           enum optab_methods methods1 = methods;
2301
2302           /* If trying to widen a log shift to an arithmetic shift,
2303              don't accept an arithmetic shift of the same size.  */
2304           if (unsignedp)
2305             methods1 = OPTAB_MUST_WIDEN;
2306
2307           /* Arithmetic shift */
2308
2309           temp = expand_binop (mode,
2310                                left ? lshift_optab : rshift_arith_optab,
2311                                shifted, op1, target, unsignedp, methods1);
2312         }
2313
2314       /* We used to try extzv here for logical right shifts, but that was
2315          only useful for one machine, the VAX, and caused poor code
2316          generation there for lshrdi3, so the code was deleted and a
2317          define_expand for lshrsi3 was added to vax.md.  */
2318     }
2319
2320   gcc_assert (temp);
2321   return temp;
2322 }
2323
2324 /* Output a shift instruction for expression code CODE,
2325    with SHIFTED being the rtx for the value to shift,
2326    and AMOUNT the amount to shift by.
2327    Store the result in the rtx TARGET, if that is convenient.
2328    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2329    Return the rtx for where the value is.  */
2330
2331 rtx
2332 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2333               int amount, rtx target, int unsignedp)
2334 {
2335   return expand_shift_1 (code, mode,
2336                          shifted, GEN_INT (amount), target, unsignedp);
2337 }
2338
2339 /* Output a shift instruction for expression code CODE,
2340    with SHIFTED being the rtx for the value to shift,
2341    and AMOUNT the tree for the amount to shift by.
2342    Store the result in the rtx TARGET, if that is convenient.
2343    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2344    Return the rtx for where the value is.  */
2345
2346 rtx
2347 expand_variable_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2348                        tree amount, rtx target, int unsignedp)
2349 {
2350   return expand_shift_1 (code, mode,
2351                          shifted, expand_normal (amount), target, unsignedp);
2352 }
2353
2354 \f
2355 /* Indicates the type of fixup needed after a constant multiplication.
2356    BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2357    the result should be negated, and ADD_VARIANT means that the
2358    multiplicand should be added to the result.  */
2359 enum mult_variant {basic_variant, negate_variant, add_variant};
2360
2361 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2362                         const struct mult_cost *, enum machine_mode mode);
2363 static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2364                                  struct algorithm *, enum mult_variant *, int);
2365 static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2366                               const struct algorithm *, enum mult_variant);
2367 static unsigned HOST_WIDE_INT choose_multiplier (unsigned HOST_WIDE_INT, int,
2368                                                  int, rtx *, int *, int *);
2369 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2370 static rtx extract_high_half (enum machine_mode, rtx);
2371 static rtx expand_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
2372 static rtx expand_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2373                                        int, int);
2374 /* Compute and return the best algorithm for multiplying by T.
2375    The algorithm must cost less than cost_limit
2376    If retval.cost >= COST_LIMIT, no algorithm was found and all
2377    other field of the returned struct are undefined.
2378    MODE is the machine mode of the multiplication.  */
2379
2380 static void
2381 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2382             const struct mult_cost *cost_limit, enum machine_mode mode)
2383 {
2384   int m;
2385   struct algorithm *alg_in, *best_alg;
2386   struct mult_cost best_cost;
2387   struct mult_cost new_limit;
2388   int op_cost, op_latency;
2389   unsigned HOST_WIDE_INT orig_t = t;
2390   unsigned HOST_WIDE_INT q;
2391   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
2392   int hash_index;
2393   bool cache_hit = false;
2394   enum alg_code cache_alg = alg_zero;
2395   bool speed = optimize_insn_for_speed_p ();
2396
2397   /* Indicate that no algorithm is yet found.  If no algorithm
2398      is found, this value will be returned and indicate failure.  */
2399   alg_out->cost.cost = cost_limit->cost + 1;
2400   alg_out->cost.latency = cost_limit->latency + 1;
2401
2402   if (cost_limit->cost < 0
2403       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2404     return;
2405
2406   /* Restrict the bits of "t" to the multiplication's mode.  */
2407   t &= GET_MODE_MASK (mode);
2408
2409   /* t == 1 can be done in zero cost.  */
2410   if (t == 1)
2411     {
2412       alg_out->ops = 1;
2413       alg_out->cost.cost = 0;
2414       alg_out->cost.latency = 0;
2415       alg_out->op[0] = alg_m;
2416       return;
2417     }
2418
2419   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2420      fail now.  */
2421   if (t == 0)
2422     {
2423       if (MULT_COST_LESS (cost_limit, zero_cost[speed]))
2424         return;
2425       else
2426         {
2427           alg_out->ops = 1;
2428           alg_out->cost.cost = zero_cost[speed];
2429           alg_out->cost.latency = zero_cost[speed];
2430           alg_out->op[0] = alg_zero;
2431           return;
2432         }
2433     }
2434
2435   /* We'll be needing a couple extra algorithm structures now.  */
2436
2437   alg_in = XALLOCA (struct algorithm);
2438   best_alg = XALLOCA (struct algorithm);
2439   best_cost = *cost_limit;
2440
2441   /* Compute the hash index.  */
2442   hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2443
2444   /* See if we already know what to do for T.  */
2445   if (alg_hash[hash_index].t == t
2446       && alg_hash[hash_index].mode == mode
2447       && alg_hash[hash_index].mode == mode
2448       && alg_hash[hash_index].speed == speed
2449       && alg_hash[hash_index].alg != alg_unknown)
2450     {
2451       cache_alg = alg_hash[hash_index].alg;
2452
2453       if (cache_alg == alg_impossible)
2454         {
2455           /* The cache tells us that it's impossible to synthesize
2456              multiplication by T within alg_hash[hash_index].cost.  */
2457           if (!CHEAPER_MULT_COST (&alg_hash[hash_index].cost, cost_limit))
2458             /* COST_LIMIT is at least as restrictive as the one
2459                recorded in the hash table, in which case we have no
2460                hope of synthesizing a multiplication.  Just
2461                return.  */
2462             return;
2463
2464           /* If we get here, COST_LIMIT is less restrictive than the
2465              one recorded in the hash table, so we may be able to
2466              synthesize a multiplication.  Proceed as if we didn't
2467              have the cache entry.  */
2468         }
2469       else
2470         {
2471           if (CHEAPER_MULT_COST (cost_limit, &alg_hash[hash_index].cost))
2472             /* The cached algorithm shows that this multiplication
2473                requires more cost than COST_LIMIT.  Just return.  This
2474                way, we don't clobber this cache entry with
2475                alg_impossible but retain useful information.  */
2476             return;
2477
2478           cache_hit = true;
2479
2480           switch (cache_alg)
2481             {
2482             case alg_shift:
2483               goto do_alg_shift;
2484
2485             case alg_add_t_m2:
2486             case alg_sub_t_m2:
2487               goto do_alg_addsub_t_m2;
2488
2489             case alg_add_factor:
2490             case alg_sub_factor:
2491               goto do_alg_addsub_factor;
2492
2493             case alg_add_t2_m:
2494               goto do_alg_add_t2_m;
2495
2496             case alg_sub_t2_m:
2497               goto do_alg_sub_t2_m;
2498
2499             default:
2500               gcc_unreachable ();
2501             }
2502         }
2503     }
2504
2505   /* If we have a group of zero bits at the low-order part of T, try
2506      multiplying by the remaining bits and then doing a shift.  */
2507
2508   if ((t & 1) == 0)
2509     {
2510     do_alg_shift:
2511       m = floor_log2 (t & -t);  /* m = number of low zero bits */
2512       if (m < maxm)
2513         {
2514           q = t >> m;
2515           /* The function expand_shift will choose between a shift and
2516              a sequence of additions, so the observed cost is given as
2517              MIN (m * add_cost[speed][mode], shift_cost[speed][mode][m]).  */
2518           op_cost = m * add_cost[speed][mode];
2519           if (shift_cost[speed][mode][m] < op_cost)
2520             op_cost = shift_cost[speed][mode][m];
2521           new_limit.cost = best_cost.cost - op_cost;
2522           new_limit.latency = best_cost.latency - op_cost;
2523           synth_mult (alg_in, q, &new_limit, mode);
2524
2525           alg_in->cost.cost += op_cost;
2526           alg_in->cost.latency += op_cost;
2527           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2528             {
2529               struct algorithm *x;
2530               best_cost = alg_in->cost;
2531               x = alg_in, alg_in = best_alg, best_alg = x;
2532               best_alg->log[best_alg->ops] = m;
2533               best_alg->op[best_alg->ops] = alg_shift;
2534             }
2535
2536           /* See if treating ORIG_T as a signed number yields a better
2537              sequence.  Try this sequence only for a negative ORIG_T
2538              as it would be useless for a non-negative ORIG_T.  */
2539           if ((HOST_WIDE_INT) orig_t < 0)
2540             {
2541               /* Shift ORIG_T as follows because a right shift of a
2542                  negative-valued signed type is implementation
2543                  defined.  */
2544               q = ~(~orig_t >> m);
2545               /* The function expand_shift will choose between a shift
2546                  and a sequence of additions, so the observed cost is
2547                  given as MIN (m * add_cost[speed][mode],
2548                  shift_cost[speed][mode][m]).  */
2549               op_cost = m * add_cost[speed][mode];
2550               if (shift_cost[speed][mode][m] < op_cost)
2551                 op_cost = shift_cost[speed][mode][m];
2552               new_limit.cost = best_cost.cost - op_cost;
2553               new_limit.latency = best_cost.latency - op_cost;
2554               synth_mult (alg_in, q, &new_limit, mode);
2555
2556               alg_in->cost.cost += op_cost;
2557               alg_in->cost.latency += op_cost;
2558               if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2559                 {
2560                   struct algorithm *x;
2561                   best_cost = alg_in->cost;
2562                   x = alg_in, alg_in = best_alg, best_alg = x;
2563                   best_alg->log[best_alg->ops] = m;
2564                   best_alg->op[best_alg->ops] = alg_shift;
2565                 }
2566             }
2567         }
2568       if (cache_hit)
2569         goto done;
2570     }
2571
2572   /* If we have an odd number, add or subtract one.  */
2573   if ((t & 1) != 0)
2574     {
2575       unsigned HOST_WIDE_INT w;
2576
2577     do_alg_addsub_t_m2:
2578       for (w = 1; (w & t) != 0; w <<= 1)
2579         ;
2580       /* If T was -1, then W will be zero after the loop.  This is another
2581          case where T ends with ...111.  Handling this with (T + 1) and
2582          subtract 1 produces slightly better code and results in algorithm
2583          selection much faster than treating it like the ...0111 case
2584          below.  */
2585       if (w == 0
2586           || (w > 2
2587               /* Reject the case where t is 3.
2588                  Thus we prefer addition in that case.  */
2589               && t != 3))
2590         {
2591           /* T ends with ...111.  Multiply by (T + 1) and subtract 1.  */
2592
2593           op_cost = add_cost[speed][mode];
2594           new_limit.cost = best_cost.cost - op_cost;
2595           new_limit.latency = best_cost.latency - op_cost;
2596           synth_mult (alg_in, t + 1, &new_limit, mode);
2597
2598           alg_in->cost.cost += op_cost;
2599           alg_in->cost.latency += op_cost;
2600           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2601             {
2602               struct algorithm *x;
2603               best_cost = alg_in->cost;
2604               x = alg_in, alg_in = best_alg, best_alg = x;
2605               best_alg->log[best_alg->ops] = 0;
2606               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2607             }
2608         }
2609       else
2610         {
2611           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add 1.  */
2612
2613           op_cost = add_cost[speed][mode];
2614           new_limit.cost = best_cost.cost - op_cost;
2615           new_limit.latency = best_cost.latency - op_cost;
2616           synth_mult (alg_in, t - 1, &new_limit, mode);
2617
2618           alg_in->cost.cost += op_cost;
2619           alg_in->cost.latency += op_cost;
2620           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2621             {
2622               struct algorithm *x;
2623               best_cost = alg_in->cost;
2624               x = alg_in, alg_in = best_alg, best_alg = x;
2625               best_alg->log[best_alg->ops] = 0;
2626               best_alg->op[best_alg->ops] = alg_add_t_m2;
2627             }
2628         }
2629
2630       /* We may be able to calculate a * -7, a * -15, a * -31, etc
2631          quickly with a - a * n for some appropriate constant n.  */
2632       m = exact_log2 (-orig_t + 1);
2633       if (m >= 0 && m < maxm)
2634         {
2635           op_cost = shiftsub1_cost[speed][mode][m];
2636           new_limit.cost = best_cost.cost - op_cost;
2637           new_limit.latency = best_cost.latency - op_cost;
2638           synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m, &new_limit, mode);
2639
2640           alg_in->cost.cost += op_cost;
2641           alg_in->cost.latency += op_cost;
2642           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2643             {
2644               struct algorithm *x;
2645               best_cost = alg_in->cost;
2646               x = alg_in, alg_in = best_alg, best_alg = x;
2647               best_alg->log[best_alg->ops] = m;
2648               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2649             }
2650         }
2651
2652       if (cache_hit)
2653         goto done;
2654     }
2655
2656   /* Look for factors of t of the form
2657      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2658      If we find such a factor, we can multiply by t using an algorithm that
2659      multiplies by q, shift the result by m and add/subtract it to itself.
2660
2661      We search for large factors first and loop down, even if large factors
2662      are less probable than small; if we find a large factor we will find a
2663      good sequence quickly, and therefore be able to prune (by decreasing
2664      COST_LIMIT) the search.  */
2665
2666  do_alg_addsub_factor:
2667   for (m = floor_log2 (t - 1); m >= 2; m--)
2668     {
2669       unsigned HOST_WIDE_INT d;
2670
2671       d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2672       if (t % d == 0 && t > d && m < maxm
2673           && (!cache_hit || cache_alg == alg_add_factor))
2674         {
2675           /* If the target has a cheap shift-and-add instruction use
2676              that in preference to a shift insn followed by an add insn.
2677              Assume that the shift-and-add is "atomic" with a latency
2678              equal to its cost, otherwise assume that on superscalar
2679              hardware the shift may be executed concurrently with the
2680              earlier steps in the algorithm.  */
2681           op_cost = add_cost[speed][mode] + shift_cost[speed][mode][m];
2682           if (shiftadd_cost[speed][mode][m] < op_cost)
2683             {
2684               op_cost = shiftadd_cost[speed][mode][m];
2685               op_latency = op_cost;
2686             }
2687           else
2688             op_latency = add_cost[speed][mode];
2689
2690           new_limit.cost = best_cost.cost - op_cost;
2691           new_limit.latency = best_cost.latency - op_latency;
2692           synth_mult (alg_in, t / d, &new_limit, mode);
2693
2694           alg_in->cost.cost += op_cost;
2695           alg_in->cost.latency += op_latency;
2696           if (alg_in->cost.latency < op_cost)
2697             alg_in->cost.latency = op_cost;
2698           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2699             {
2700               struct algorithm *x;
2701               best_cost = alg_in->cost;
2702               x = alg_in, alg_in = best_alg, best_alg = x;
2703               best_alg->log[best_alg->ops] = m;
2704               best_alg->op[best_alg->ops] = alg_add_factor;
2705             }
2706           /* Other factors will have been taken care of in the recursion.  */
2707           break;
2708         }
2709
2710       d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2711       if (t % d == 0 && t > d && m < maxm
2712           && (!cache_hit || cache_alg == alg_sub_factor))
2713         {
2714           /* If the target has a cheap shift-and-subtract insn use
2715              that in preference to a shift insn followed by a sub insn.
2716              Assume that the shift-and-sub is "atomic" with a latency
2717              equal to it's cost, otherwise assume that on superscalar
2718              hardware the shift may be executed concurrently with the
2719              earlier steps in the algorithm.  */
2720           op_cost = add_cost[speed][mode] + shift_cost[speed][mode][m];
2721           if (shiftsub0_cost[speed][mode][m] < op_cost)
2722             {
2723               op_cost = shiftsub0_cost[speed][mode][m];
2724               op_latency = op_cost;
2725             }
2726           else
2727             op_latency = add_cost[speed][mode];
2728
2729           new_limit.cost = best_cost.cost - op_cost;
2730           new_limit.latency = best_cost.latency - op_latency;
2731           synth_mult (alg_in, t / d, &new_limit, mode);
2732
2733           alg_in->cost.cost += op_cost;
2734           alg_in->cost.latency += op_latency;
2735           if (alg_in->cost.latency < op_cost)
2736             alg_in->cost.latency = op_cost;
2737           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2738             {
2739               struct algorithm *x;
2740               best_cost = alg_in->cost;
2741               x = alg_in, alg_in = best_alg, best_alg = x;
2742               best_alg->log[best_alg->ops] = m;
2743               best_alg->op[best_alg->ops] = alg_sub_factor;
2744             }
2745           break;
2746         }
2747     }
2748   if (cache_hit)
2749     goto done;
2750
2751   /* Try shift-and-add (load effective address) instructions,
2752      i.e. do a*3, a*5, a*9.  */
2753   if ((t & 1) != 0)
2754     {
2755     do_alg_add_t2_m:
2756       q = t - 1;
2757       q = q & -q;
2758       m = exact_log2 (q);
2759       if (m >= 0 && m < maxm)
2760         {
2761           op_cost = shiftadd_cost[speed][mode][m];
2762           new_limit.cost = best_cost.cost - op_cost;
2763           new_limit.latency = best_cost.latency - op_cost;
2764           synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2765
2766           alg_in->cost.cost += op_cost;
2767           alg_in->cost.latency += op_cost;
2768           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2769             {
2770               struct algorithm *x;
2771               best_cost = alg_in->cost;
2772               x = alg_in, alg_in = best_alg, best_alg = x;
2773               best_alg->log[best_alg->ops] = m;
2774               best_alg->op[best_alg->ops] = alg_add_t2_m;
2775             }
2776         }
2777       if (cache_hit)
2778         goto done;
2779
2780     do_alg_sub_t2_m:
2781       q = t + 1;
2782       q = q & -q;
2783       m = exact_log2 (q);
2784       if (m >= 0 && m < maxm)
2785         {
2786           op_cost = shiftsub0_cost[speed][mode][m];
2787           new_limit.cost = best_cost.cost - op_cost;
2788           new_limit.latency = best_cost.latency - op_cost;
2789           synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2790
2791           alg_in->cost.cost += op_cost;
2792           alg_in->cost.latency += op_cost;
2793           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2794             {
2795               struct algorithm *x;
2796               best_cost = alg_in->cost;
2797               x = alg_in, alg_in = best_alg, best_alg = x;
2798               best_alg->log[best_alg->ops] = m;
2799               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2800             }
2801         }
2802       if (cache_hit)
2803         goto done;
2804     }
2805
2806  done:
2807   /* If best_cost has not decreased, we have not found any algorithm.  */
2808   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2809     {
2810       /* We failed to find an algorithm.  Record alg_impossible for
2811          this case (that is, <T, MODE, COST_LIMIT>) so that next time
2812          we are asked to find an algorithm for T within the same or
2813          lower COST_LIMIT, we can immediately return to the
2814          caller.  */
2815       alg_hash[hash_index].t = t;
2816       alg_hash[hash_index].mode = mode;
2817       alg_hash[hash_index].speed = speed;
2818       alg_hash[hash_index].alg = alg_impossible;
2819       alg_hash[hash_index].cost = *cost_limit;
2820       return;
2821     }
2822
2823   /* Cache the result.  */
2824   if (!cache_hit)
2825     {
2826       alg_hash[hash_index].t = t;
2827       alg_hash[hash_index].mode = mode;
2828       alg_hash[hash_index].speed = speed;
2829       alg_hash[hash_index].alg = best_alg->op[best_alg->ops];
2830       alg_hash[hash_index].cost.cost = best_cost.cost;
2831       alg_hash[hash_index].cost.latency = best_cost.latency;
2832     }
2833
2834   /* If we are getting a too long sequence for `struct algorithm'
2835      to record, make this search fail.  */
2836   if (best_alg->ops == MAX_BITS_PER_WORD)
2837     return;
2838
2839   /* Copy the algorithm from temporary space to the space at alg_out.
2840      We avoid using structure assignment because the majority of
2841      best_alg is normally undefined, and this is a critical function.  */
2842   alg_out->ops = best_alg->ops + 1;
2843   alg_out->cost = best_cost;
2844   memcpy (alg_out->op, best_alg->op,
2845           alg_out->ops * sizeof *alg_out->op);
2846   memcpy (alg_out->log, best_alg->log,
2847           alg_out->ops * sizeof *alg_out->log);
2848 }
2849 \f
2850 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2851    Try three variations:
2852
2853        - a shift/add sequence based on VAL itself
2854        - a shift/add sequence based on -VAL, followed by a negation
2855        - a shift/add sequence based on VAL - 1, followed by an addition.
2856
2857    Return true if the cheapest of these cost less than MULT_COST,
2858    describing the algorithm in *ALG and final fixup in *VARIANT.  */
2859
2860 static bool
2861 choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2862                      struct algorithm *alg, enum mult_variant *variant,
2863                      int mult_cost)
2864 {
2865   struct algorithm alg2;
2866   struct mult_cost limit;
2867   int op_cost;
2868   bool speed = optimize_insn_for_speed_p ();
2869
2870   /* Fail quickly for impossible bounds.  */
2871   if (mult_cost < 0)
2872     return false;
2873
2874   /* Ensure that mult_cost provides a reasonable upper bound.
2875      Any constant multiplication can be performed with less
2876      than 2 * bits additions.  */
2877   op_cost = 2 * GET_MODE_BITSIZE (mode) * add_cost[speed][mode];
2878   if (mult_cost > op_cost)
2879     mult_cost = op_cost;
2880
2881   *variant = basic_variant;
2882   limit.cost = mult_cost;
2883   limit.latency = mult_cost;
2884   synth_mult (alg, val, &limit, mode);
2885
2886   /* This works only if the inverted value actually fits in an
2887      `unsigned int' */
2888   if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
2889     {
2890       op_cost = neg_cost[speed][mode];
2891       if (MULT_COST_LESS (&alg->cost, mult_cost))
2892         {
2893           limit.cost = alg->cost.cost - op_cost;
2894           limit.latency = alg->cost.latency - op_cost;
2895         }
2896       else
2897         {
2898           limit.cost = mult_cost - op_cost;
2899           limit.latency = mult_cost - op_cost;
2900         }
2901
2902       synth_mult (&alg2, -val, &limit, mode);
2903       alg2.cost.cost += op_cost;
2904       alg2.cost.latency += op_cost;
2905       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2906         *alg = alg2, *variant = negate_variant;
2907     }
2908
2909   /* This proves very useful for division-by-constant.  */
2910   op_cost = add_cost[speed][mode];
2911   if (MULT_COST_LESS (&alg->cost, mult_cost))
2912     {
2913       limit.cost = alg->cost.cost - op_cost;
2914       limit.latency = alg->cost.latency - op_cost;
2915     }
2916   else
2917     {
2918       limit.cost = mult_cost - op_cost;
2919       limit.latency = mult_cost - op_cost;
2920     }
2921
2922   synth_mult (&alg2, val - 1, &limit, mode);
2923   alg2.cost.cost += op_cost;
2924   alg2.cost.latency += op_cost;
2925   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2926     *alg = alg2, *variant = add_variant;
2927
2928   return MULT_COST_LESS (&alg->cost, mult_cost);
2929 }
2930
2931 /* A subroutine of expand_mult, used for constant multiplications.
2932    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2933    convenient.  Use the shift/add sequence described by ALG and apply
2934    the final fixup specified by VARIANT.  */
2935
2936 static rtx
2937 expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
2938                    rtx target, const struct algorithm *alg,
2939                    enum mult_variant variant)
2940 {
2941   HOST_WIDE_INT val_so_far;
2942   rtx insn, accum, tem;
2943   int opno;
2944   enum machine_mode nmode;
2945
2946   /* Avoid referencing memory over and over and invalid sharing
2947      on SUBREGs.  */
2948   op0 = force_reg (mode, op0);
2949
2950   /* ACCUM starts out either as OP0 or as a zero, depending on
2951      the first operation.  */
2952
2953   if (alg->op[0] == alg_zero)
2954     {
2955       accum = copy_to_mode_reg (mode, const0_rtx);
2956       val_so_far = 0;
2957     }
2958   else if (alg->op[0] == alg_m)
2959     {
2960       accum = copy_to_mode_reg (mode, op0);
2961       val_so_far = 1;
2962     }
2963   else
2964     gcc_unreachable ();
2965
2966   for (opno = 1; opno < alg->ops; opno++)
2967     {
2968       int log = alg->log[opno];
2969       rtx shift_subtarget = optimize ? 0 : accum;
2970       rtx add_target
2971         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2972            && !optimize)
2973           ? target : 0;
2974       rtx accum_target = optimize ? 0 : accum;
2975       rtx accum_inner;
2976
2977       switch (alg->op[opno])
2978         {
2979         case alg_shift:
2980           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2981           /* REG_EQUAL note will be attached to the following insn.  */
2982           emit_move_insn (accum, tem);
2983           val_so_far <<= log;
2984           break;
2985
2986         case alg_add_t_m2:
2987           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
2988           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2989                                  add_target ? add_target : accum_target);
2990           val_so_far += (HOST_WIDE_INT) 1 << log;
2991           break;
2992
2993         case alg_sub_t_m2:
2994           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
2995           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2996                                  add_target ? add_target : accum_target);
2997           val_so_far -= (HOST_WIDE_INT) 1 << log;
2998           break;
2999
3000         case alg_add_t2_m:
3001           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3002                                 log, shift_subtarget, 0);
3003           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3004                                  add_target ? add_target : accum_target);
3005           val_so_far = (val_so_far << log) + 1;
3006           break;
3007
3008         case alg_sub_t2_m:
3009           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3010                                 log, shift_subtarget, 0);
3011           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3012                                  add_target ? add_target : accum_target);
3013           val_so_far = (val_so_far << log) - 1;
3014           break;
3015
3016         case alg_add_factor:
3017           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3018           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3019                                  add_target ? add_target : accum_target);
3020           val_so_far += val_so_far << log;
3021           break;
3022
3023         case alg_sub_factor:
3024           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3025           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3026                                  (add_target
3027                                   ? add_target : (optimize ? 0 : tem)));
3028           val_so_far = (val_so_far << log) - val_so_far;
3029           break;
3030
3031         default:
3032           gcc_unreachable ();
3033         }
3034
3035       /* Write a REG_EQUAL note on the last insn so that we can cse
3036          multiplication sequences.  Note that if ACCUM is a SUBREG,
3037          we've set the inner register and must properly indicate
3038          that.  */
3039
3040       tem = op0, nmode = mode;
3041       accum_inner = accum;
3042       if (GET_CODE (accum) == SUBREG)
3043         {
3044           accum_inner = SUBREG_REG (accum);
3045           nmode = GET_MODE (accum_inner);
3046           tem = gen_lowpart (nmode, op0);
3047         }
3048
3049       insn = get_last_insn ();
3050       set_dst_reg_note (insn, REG_EQUAL,
3051                         gen_rtx_MULT (nmode, tem, GEN_INT (val_so_far)),
3052                         accum_inner);
3053     }
3054
3055   if (variant == negate_variant)
3056     {
3057       val_so_far = -val_so_far;
3058       accum = expand_unop (mode, neg_optab, accum, target, 0);
3059     }
3060   else if (variant == add_variant)
3061     {
3062       val_so_far = val_so_far + 1;
3063       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3064     }
3065
3066   /* Compare only the bits of val and val_so_far that are significant
3067      in the result mode, to avoid sign-/zero-extension confusion.  */
3068   val &= GET_MODE_MASK (mode);
3069   val_so_far &= GET_MODE_MASK (mode);
3070   gcc_assert (val == val_so_far);
3071
3072   return accum;
3073 }
3074
3075 /* Perform a multiplication and return an rtx for the result.
3076    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3077    TARGET is a suggestion for where to store the result (an rtx).
3078
3079    We check specially for a constant integer as OP1.
3080    If you want this check for OP0 as well, then before calling
3081    you should swap the two operands if OP0 would be constant.  */
3082
3083 rtx
3084 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3085              int unsignedp)
3086 {
3087   enum mult_variant variant;
3088   struct algorithm algorithm;
3089   int max_cost;
3090   bool speed = optimize_insn_for_speed_p ();
3091
3092   /* Handling const0_rtx here allows us to use zero as a rogue value for
3093      coeff below.  */
3094   if (op1 == const0_rtx)
3095     return const0_rtx;
3096   if (op1 == const1_rtx)
3097     return op0;
3098   if (op1 == constm1_rtx)
3099     return expand_unop (mode,
3100                         GET_MODE_CLASS (mode) == MODE_INT
3101                         && !unsignedp && flag_trapv
3102                         ? negv_optab : neg_optab,
3103                         op0, target, 0);
3104
3105   /* These are the operations that are potentially turned into a sequence
3106      of shifts and additions.  */
3107   if (SCALAR_INT_MODE_P (mode)
3108       && (unsignedp || !flag_trapv))
3109     {
3110       HOST_WIDE_INT coeff = 0;
3111       rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3112
3113       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3114          less than or equal in size to `unsigned int' this doesn't matter.
3115          If the mode is larger than `unsigned int', then synth_mult works
3116          only if the constant value exactly fits in an `unsigned int' without
3117          any truncation.  This means that multiplying by negative values does
3118          not work; results are off by 2^32 on a 32 bit machine.  */
3119
3120       if (CONST_INT_P (op1))
3121         {
3122           /* Attempt to handle multiplication of DImode values by negative
3123              coefficients, by performing the multiplication by a positive
3124              multiplier and then inverting the result.  */
3125           if (INTVAL (op1) < 0
3126               && GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT)
3127             {
3128               /* Its safe to use -INTVAL (op1) even for INT_MIN, as the
3129                  result is interpreted as an unsigned coefficient.
3130                  Exclude cost of op0 from max_cost to match the cost
3131                  calculation of the synth_mult.  */
3132               max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1),
3133                                         speed)
3134                           - neg_cost[speed][mode]);
3135               if (max_cost > 0
3136                   && choose_mult_variant (mode, -INTVAL (op1), &algorithm,
3137                                           &variant, max_cost))
3138                 {
3139                   rtx temp = expand_mult_const (mode, op0, -INTVAL (op1),
3140                                                 NULL_RTX, &algorithm,
3141                                                 variant);
3142                   return expand_unop (mode, neg_optab, temp, target, 0);
3143                 }
3144             }
3145           else coeff = INTVAL (op1);
3146         }
3147       else if (GET_CODE (op1) == CONST_DOUBLE)
3148         {
3149           /* If we are multiplying in DImode, it may still be a win
3150              to try to work with shifts and adds.  */
3151           if (CONST_DOUBLE_HIGH (op1) == 0
3152               && CONST_DOUBLE_LOW (op1) > 0)
3153             coeff = CONST_DOUBLE_LOW (op1);
3154           else if (CONST_DOUBLE_LOW (op1) == 0
3155                    && EXACT_POWER_OF_2_OR_ZERO_P (CONST_DOUBLE_HIGH (op1)))
3156             {
3157               int shift = floor_log2 (CONST_DOUBLE_HIGH (op1))
3158                           + HOST_BITS_PER_WIDE_INT;
3159               return expand_shift (LSHIFT_EXPR, mode, op0,
3160                                    shift, target, unsignedp);
3161             }
3162         }
3163
3164       /* We used to test optimize here, on the grounds that it's better to
3165          produce a smaller program when -O is not used.  But this causes
3166          such a terrible slowdown sometimes that it seems better to always
3167          use synth_mult.  */
3168       if (coeff != 0)
3169         {
3170           /* Special case powers of two.  */
3171           if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3172             return expand_shift (LSHIFT_EXPR, mode, op0,
3173                                  floor_log2 (coeff), target, unsignedp);
3174
3175           /* Exclude cost of op0 from max_cost to match the cost
3176              calculation of the synth_mult.  */
3177           max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed);
3178           if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3179                                    max_cost))
3180             return expand_mult_const (mode, op0, coeff, target,
3181                                       &algorithm, variant);
3182         }
3183     }
3184
3185   if (GET_CODE (op0) == CONST_DOUBLE)
3186     {
3187       rtx temp = op0;
3188       op0 = op1;
3189       op1 = temp;
3190     }
3191
3192   /* Expand x*2.0 as x+x.  */
3193   if (GET_CODE (op1) == CONST_DOUBLE
3194       && SCALAR_FLOAT_MODE_P (mode))
3195     {
3196       REAL_VALUE_TYPE d;
3197       REAL_VALUE_FROM_CONST_DOUBLE (d, op1);
3198
3199       if (REAL_VALUES_EQUAL (d, dconst2))
3200         {
3201           op0 = force_reg (GET_MODE (op0), op0);
3202           return expand_binop (mode, add_optab, op0, op0,
3203                                target, unsignedp, OPTAB_LIB_WIDEN);
3204         }
3205     }
3206
3207   /* This used to use umul_optab if unsigned, but for non-widening multiply
3208      there is no difference between signed and unsigned.  */
3209   op0 = expand_binop (mode,
3210                       ! unsignedp
3211                       && flag_trapv && (GET_MODE_CLASS(mode) == MODE_INT)
3212                       ? smulv_optab : smul_optab,
3213                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3214   gcc_assert (op0);
3215   return op0;
3216 }
3217
3218 /* Perform a widening multiplication and return an rtx for the result.
3219    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3220    TARGET is a suggestion for where to store the result (an rtx).
3221    THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3222    or smul_widen_optab.
3223
3224    We check specially for a constant integer as OP1, comparing the
3225    cost of a widening multiply against the cost of a sequence of shifts
3226    and adds.  */
3227
3228 rtx
3229 expand_widening_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3230                       int unsignedp, optab this_optab)
3231 {
3232   bool speed = optimize_insn_for_speed_p ();
3233   rtx cop1;
3234
3235   if (CONST_INT_P (op1)
3236       && GET_MODE (op0) != VOIDmode
3237       && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3238                                 this_optab == umul_widen_optab))
3239       && CONST_INT_P (cop1)
3240       && (INTVAL (cop1) >= 0
3241           || HWI_COMPUTABLE_MODE_P (mode)))
3242     {
3243       HOST_WIDE_INT coeff = INTVAL (cop1);
3244       int max_cost;
3245       enum mult_variant variant;
3246       struct algorithm algorithm;
3247
3248       /* Special case powers of two.  */
3249       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3250         {
3251           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3252           return expand_shift (LSHIFT_EXPR, mode, op0,
3253                                floor_log2 (coeff), target, unsignedp);
3254         }
3255
3256       /* Exclude cost of op0 from max_cost to match the cost
3257          calculation of the synth_mult.  */
3258       max_cost = mul_widen_cost[speed][mode];
3259       if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3260                                max_cost))
3261         {
3262           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3263           return expand_mult_const (mode, op0, coeff, target,
3264                                     &algorithm, variant);
3265         }
3266     }
3267   return expand_binop (mode, this_optab, op0, op1, target,
3268                        unsignedp, OPTAB_LIB_WIDEN);
3269 }
3270 \f
3271 /* Return the smallest n such that 2**n >= X.  */
3272
3273 int
3274 ceil_log2 (unsigned HOST_WIDE_INT x)
3275 {
3276   return floor_log2 (x - 1) + 1;
3277 }
3278
3279 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3280    replace division by D, and put the least significant N bits of the result
3281    in *MULTIPLIER_PTR and return the most significant bit.
3282
3283    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3284    needed precision is in PRECISION (should be <= N).
3285
3286    PRECISION should be as small as possible so this function can choose
3287    multiplier more freely.
3288
3289    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3290    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3291
3292    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3293    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3294
3295 static
3296 unsigned HOST_WIDE_INT
3297 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3298                    rtx *multiplier_ptr, int *post_shift_ptr, int *lgup_ptr)
3299 {
3300   HOST_WIDE_INT mhigh_hi, mlow_hi;
3301   unsigned HOST_WIDE_INT mhigh_lo, mlow_lo;
3302   int lgup, post_shift;
3303   int pow, pow2;
3304   unsigned HOST_WIDE_INT nl, dummy1;
3305   HOST_WIDE_INT nh, dummy2;
3306
3307   /* lgup = ceil(log2(divisor)); */
3308   lgup = ceil_log2 (d);
3309
3310   gcc_assert (lgup <= n);
3311
3312   pow = n + lgup;
3313   pow2 = n + lgup - precision;
3314
3315   /* We could handle this with some effort, but this case is much
3316      better handled directly with a scc insn, so rely on caller using
3317      that.  */
3318   gcc_assert (pow != 2 * HOST_BITS_PER_WIDE_INT);
3319
3320   /* mlow = 2^(N + lgup)/d */
3321  if (pow >= HOST_BITS_PER_WIDE_INT)
3322     {
3323       nh = (HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT);
3324       nl = 0;
3325     }
3326   else
3327     {
3328       nh = 0;
3329       nl = (unsigned HOST_WIDE_INT) 1 << pow;
3330     }
3331   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3332                         &mlow_lo, &mlow_hi, &dummy1, &dummy2);
3333
3334   /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */
3335   if (pow2 >= HOST_BITS_PER_WIDE_INT)
3336     nh |= (HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT);
3337   else
3338     nl |= (unsigned HOST_WIDE_INT) 1 << pow2;
3339   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3340                         &mhigh_lo, &mhigh_hi, &dummy1, &dummy2);
3341
3342   gcc_assert (!mhigh_hi || nh - d < d);
3343   gcc_assert (mhigh_hi <= 1 && mlow_hi <= 1);
3344   /* Assert that mlow < mhigh.  */
3345   gcc_assert (mlow_hi < mhigh_hi
3346               || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo));
3347
3348   /* If precision == N, then mlow, mhigh exceed 2^N
3349      (but they do not exceed 2^(N+1)).  */
3350
3351   /* Reduce to lowest terms.  */
3352   for (post_shift = lgup; post_shift > 0; post_shift--)
3353     {
3354       unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1);
3355       unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1);
3356       if (ml_lo >= mh_lo)
3357         break;
3358
3359       mlow_hi = 0;
3360       mlow_lo = ml_lo;
3361       mhigh_hi = 0;
3362       mhigh_lo = mh_lo;
3363     }
3364
3365   *post_shift_ptr = post_shift;
3366   *lgup_ptr = lgup;
3367   if (n < HOST_BITS_PER_WIDE_INT)
3368     {
3369       unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3370       *multiplier_ptr = GEN_INT (mhigh_lo & mask);
3371       return mhigh_lo >= mask;
3372     }
3373   else
3374     {
3375       *multiplier_ptr = GEN_INT (mhigh_lo);
3376       return mhigh_hi;
3377     }
3378 }
3379
3380 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3381    congruent to 1 (mod 2**N).  */
3382
3383 static unsigned HOST_WIDE_INT
3384 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3385 {
3386   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3387
3388   /* The algorithm notes that the choice y = x satisfies
3389      x*y == 1 mod 2^3, since x is assumed odd.
3390      Each iteration doubles the number of bits of significance in y.  */
3391
3392   unsigned HOST_WIDE_INT mask;
3393   unsigned HOST_WIDE_INT y = x;
3394   int nbit = 3;
3395
3396   mask = (n == HOST_BITS_PER_WIDE_INT
3397           ? ~(unsigned HOST_WIDE_INT) 0
3398           : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3399
3400   while (nbit < n)
3401     {
3402       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3403       nbit *= 2;
3404     }
3405   return y;
3406 }
3407
3408 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3409    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3410    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3411    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3412    become signed.
3413
3414    The result is put in TARGET if that is convenient.
3415
3416    MODE is the mode of operation.  */
3417
3418 rtx
3419 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
3420                              rtx op1, rtx target, int unsignedp)
3421 {
3422   rtx tem;
3423   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3424
3425   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3426                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3427   tem = expand_and (mode, tem, op1, NULL_RTX);
3428   adj_operand
3429     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3430                      adj_operand);
3431
3432   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3433                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3434   tem = expand_and (mode, tem, op0, NULL_RTX);
3435   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3436                           target);
3437
3438   return target;
3439 }
3440
3441 /* Subroutine of expand_mult_highpart.  Return the MODE high part of OP.  */
3442
3443 static rtx
3444 extract_high_half (enum machine_mode mode, rtx op)
3445 {
3446   enum machine_mode wider_mode;
3447
3448   if (mode == word_mode)
3449     return gen_highpart (mode, op);
3450
3451   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3452
3453   wider_mode = GET_MODE_WIDER_MODE (mode);
3454   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3455                      GET_MODE_BITSIZE (mode), 0, 1);
3456   return convert_modes (mode, wider_mode, op, 0);
3457 }
3458
3459 /* Like expand_mult_highpart, but only consider using a multiplication
3460    optab.  OP1 is an rtx for the constant operand.  */
3461
3462 static rtx
3463 expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
3464                             rtx target, int unsignedp, int max_cost)
3465 {
3466   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3467   enum machine_mode wider_mode;
3468   optab moptab;
3469   rtx tem;
3470   int size;
3471   bool speed = optimize_insn_for_speed_p ();
3472
3473   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3474
3475   wider_mode = GET_MODE_WIDER_MODE (mode);
3476   size = GET_MODE_BITSIZE (mode);
3477
3478   /* Firstly, try using a multiplication insn that only generates the needed
3479      high part of the product, and in the sign flavor of unsignedp.  */
3480   if (mul_highpart_cost[speed][mode] < max_cost)
3481     {
3482       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3483       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3484                           unsignedp, OPTAB_DIRECT);
3485       if (tem)
3486         return tem;
3487     }
3488
3489   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3490      Need to adjust the result after the multiplication.  */
3491   if (size - 1 < BITS_PER_WORD
3492       && (mul_highpart_cost[speed][mode] + 2 * shift_cost[speed][mode][size-1]
3493           + 4 * add_cost[speed][mode] < max_cost))
3494     {
3495       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3496       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3497                           unsignedp, OPTAB_DIRECT);
3498       if (tem)
3499         /* We used the wrong signedness.  Adjust the result.  */
3500         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3501                                             tem, unsignedp);
3502     }
3503
3504   /* Try widening multiplication.  */
3505   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3506   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3507       && mul_widen_cost[speed][wider_mode] < max_cost)
3508     {
3509       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3510                           unsignedp, OPTAB_WIDEN);
3511       if (tem)
3512         return extract_high_half (mode, tem);
3513     }
3514
3515   /* Try widening the mode and perform a non-widening multiplication.  */
3516   if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3517       && size - 1 < BITS_PER_WORD
3518       && mul_cost[speed][wider_mode] + shift_cost[speed][mode][size-1] < max_cost)
3519     {
3520       rtx insns, wop0, wop1;
3521
3522       /* We need to widen the operands, for example to ensure the
3523          constant multiplier is correctly sign or zero extended.
3524          Use a sequence to clean-up any instructions emitted by
3525          the conversions if things don't work out.  */
3526       start_sequence ();
3527       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3528       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3529       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3530                           unsignedp, OPTAB_WIDEN);
3531       insns = get_insns ();
3532       end_sequence ();
3533
3534       if (tem)
3535         {
3536           emit_insn (insns);
3537           return extract_high_half (mode, tem);
3538         }
3539     }
3540
3541   /* Try widening multiplication of opposite signedness, and adjust.  */
3542   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3543   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3544       && size - 1 < BITS_PER_WORD
3545       && (mul_widen_cost[speed][wider_mode] + 2 * shift_cost[speed][mode][size-1]
3546           + 4 * add_cost[speed][mode] < max_cost))
3547     {
3548       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3549                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3550       if (tem != 0)
3551         {
3552           tem = extract_high_half (mode, tem);
3553           /* We used the wrong signedness.  Adjust the result.  */
3554           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3555                                               target, unsignedp);
3556         }
3557     }
3558
3559   return 0;
3560 }
3561
3562 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3563    putting the high half of the result in TARGET if that is convenient,
3564    and return where the result is.  If the operation can not be performed,
3565    0 is returned.
3566
3567    MODE is the mode of operation and result.
3568
3569    UNSIGNEDP nonzero means unsigned multiply.
3570
3571    MAX_COST is the total allowed cost for the expanded RTL.  */
3572
3573 static rtx
3574 expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
3575                       rtx target, int unsignedp, int max_cost)
3576 {
3577   enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3578   unsigned HOST_WIDE_INT cnst1;
3579   int extra_cost;
3580   bool sign_adjust = false;
3581   enum mult_variant variant;
3582   struct algorithm alg;
3583   rtx tem;
3584   bool speed = optimize_insn_for_speed_p ();
3585
3586   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3587   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3588   gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3589
3590   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3591
3592   /* We can't optimize modes wider than BITS_PER_WORD.
3593      ??? We might be able to perform double-word arithmetic if
3594      mode == word_mode, however all the cost calculations in
3595      synth_mult etc. assume single-word operations.  */
3596   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3597     return expand_mult_highpart_optab (mode, op0, op1, target,
3598                                        unsignedp, max_cost);
3599
3600   extra_cost = shift_cost[speed][mode][GET_MODE_BITSIZE (mode) - 1];
3601
3602   /* Check whether we try to multiply by a negative constant.  */
3603   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3604     {
3605       sign_adjust = true;
3606       extra_cost += add_cost[speed][mode];
3607     }
3608
3609   /* See whether shift/add multiplication is cheap enough.  */
3610   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3611                            max_cost - extra_cost))
3612     {
3613       /* See whether the specialized multiplication optabs are
3614          cheaper than the shift/add version.  */
3615       tem = expand_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3616                                         alg.cost.cost + extra_cost);
3617       if (tem)
3618         return tem;
3619
3620       tem = convert_to_mode (wider_mode, op0, unsignedp);
3621       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3622       tem = extract_high_half (mode, tem);
3623
3624       /* Adjust result for signedness.  */
3625       if (sign_adjust)
3626         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3627
3628       return tem;
3629     }
3630   return expand_mult_highpart_optab (mode, op0, op1, target,
3631                                      unsignedp, max_cost);
3632 }
3633
3634
3635 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3636
3637 static rtx
3638 expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3639 {
3640   unsigned HOST_WIDE_INT masklow, maskhigh;
3641   rtx result, temp, shift, label;
3642   int logd;
3643
3644   logd = floor_log2 (d);
3645   result = gen_reg_rtx (mode);
3646
3647   /* Avoid conditional branches when they're expensive.  */
3648   if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3649       && optimize_insn_for_speed_p ())
3650     {
3651       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3652                                       mode, 0, -1);
3653       if (signmask)
3654         {
3655           signmask = force_reg (mode, signmask);
3656           masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3657           shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3658
3659           /* Use the rtx_cost of a LSHIFTRT instruction to determine
3660              which instruction sequence to use.  If logical right shifts
3661              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3662              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3663
3664           temp = gen_rtx_LSHIFTRT (mode, result, shift);
3665           if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3666               || (set_src_cost (temp, optimize_insn_for_speed_p ())
3667                   > COSTS_N_INSNS (2)))
3668             {
3669               temp = expand_binop (mode, xor_optab, op0, signmask,
3670                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3671               temp = expand_binop (mode, sub_optab, temp, signmask,
3672                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3673               temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3674                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3675               temp = expand_binop (mode, xor_optab, temp, signmask,
3676                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3677               temp = expand_binop (mode, sub_optab, temp, signmask,
3678                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3679             }
3680           else
3681             {
3682               signmask = expand_binop (mode, lshr_optab, signmask, shift,
3683                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
3684               signmask = force_reg (mode, signmask);
3685
3686               temp = expand_binop (mode, add_optab, op0, signmask,
3687                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3688               temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3689                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3690               temp = expand_binop (mode, sub_optab, temp, signmask,
3691                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3692             }
3693           return temp;
3694         }
3695     }
3696
3697   /* Mask contains the mode's signbit and the significant bits of the
3698      modulus.  By including the signbit in the operation, many targets
3699      can avoid an explicit compare operation in the following comparison
3700      against zero.  */
3701
3702   masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3703   if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
3704     {
3705       masklow |= (HOST_WIDE_INT) -1 << (GET_MODE_BITSIZE (mode) - 1);
3706       maskhigh = -1;
3707     }
3708   else
3709     maskhigh = (HOST_WIDE_INT) -1
3710                  << (GET_MODE_BITSIZE (mode) - HOST_BITS_PER_WIDE_INT - 1);
3711
3712   temp = expand_binop (mode, and_optab, op0,
3713                        immed_double_const (masklow, maskhigh, mode),
3714                        result, 1, OPTAB_LIB_WIDEN);
3715   if (temp != result)
3716     emit_move_insn (result, temp);
3717
3718   label = gen_label_rtx ();
3719   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3720
3721   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3722                        0, OPTAB_LIB_WIDEN);
3723   masklow = (HOST_WIDE_INT) -1 << logd;
3724   maskhigh = -1;
3725   temp = expand_binop (mode, ior_optab, temp,
3726                        immed_double_const (masklow, maskhigh, mode),
3727                        result, 1, OPTAB_LIB_WIDEN);
3728   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3729                        0, OPTAB_LIB_WIDEN);
3730   if (temp != result)
3731     emit_move_insn (result, temp);
3732   emit_label (label);
3733   return result;
3734 }
3735
3736 /* Expand signed division of OP0 by a power of two D in mode MODE.
3737    This routine is only called for positive values of D.  */
3738
3739 static rtx
3740 expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3741 {
3742   rtx temp, label;
3743   int logd;
3744
3745   logd = floor_log2 (d);
3746
3747   if (d == 2
3748       && BRANCH_COST (optimize_insn_for_speed_p (),
3749                       false) >= 1)
3750     {
3751       temp = gen_reg_rtx (mode);
3752       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3753       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3754                            0, OPTAB_LIB_WIDEN);
3755       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3756     }
3757
3758 #ifdef HAVE_conditional_move
3759   if (BRANCH_COST (optimize_insn_for_speed_p (), false)
3760       >= 2)
3761     {
3762       rtx temp2;
3763
3764       /* ??? emit_conditional_move forces a stack adjustment via
3765          compare_from_rtx so, if the sequence is discarded, it will
3766          be lost.  Do it now instead.  */
3767       do_pending_stack_adjust ();
3768
3769       start_sequence ();
3770       temp2 = copy_to_mode_reg (mode, op0);
3771       temp = expand_binop (mode, add_optab, temp2, GEN_INT (d-1),
3772                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
3773       temp = force_reg (mode, temp);
3774
3775       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
3776       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3777                                      mode, temp, temp2, mode, 0);
3778       if (temp2)
3779         {
3780           rtx seq = get_insns ();
3781           end_sequence ();
3782           emit_insn (seq);
3783           return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3784         }
3785       end_sequence ();
3786     }
3787 #endif
3788
3789   if (BRANCH_COST (optimize_insn_for_speed_p (),
3790                    false) >= 2)
3791     {
3792       int ushift = GET_MODE_BITSIZE (mode) - logd;
3793
3794       temp = gen_reg_rtx (mode);
3795       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3796       if (shift_cost[optimize_insn_for_speed_p ()][mode][ushift] > COSTS_N_INSNS (1))
3797         temp = expand_binop (mode, and_optab, temp, GEN_INT (d - 1),
3798                              NULL_RTX, 0, OPTAB_LIB_WIDEN);
3799       else
3800         temp = expand_shift (RSHIFT_EXPR, mode, temp,
3801                              ushift, NULL_RTX, 1);
3802       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3803                            0, OPTAB_LIB_WIDEN);
3804       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3805     }
3806
3807   label = gen_label_rtx ();
3808   temp = copy_to_mode_reg (mode, op0);
3809   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3810   expand_inc (temp, GEN_INT (d - 1));
3811   emit_label (label);
3812   return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3813 }
3814 \f
3815 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3816    if that is convenient, and returning where the result is.
3817    You may request either the quotient or the remainder as the result;
3818    specify REM_FLAG nonzero to get the remainder.
3819
3820    CODE is the expression code for which kind of division this is;
3821    it controls how rounding is done.  MODE is the machine mode to use.
3822    UNSIGNEDP nonzero means do unsigned division.  */
3823
3824 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3825    and then correct it by or'ing in missing high bits
3826    if result of ANDI is nonzero.
3827    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3828    This could optimize to a bfexts instruction.
3829    But C doesn't use these operations, so their optimizations are
3830    left for later.  */
3831 /* ??? For modulo, we don't actually need the highpart of the first product,
3832    the low part will do nicely.  And for small divisors, the second multiply
3833    can also be a low-part only multiply or even be completely left out.
3834    E.g. to calculate the remainder of a division by 3 with a 32 bit
3835    multiply, multiply with 0x55555556 and extract the upper two bits;
3836    the result is exact for inputs up to 0x1fffffff.
3837    The input range can be reduced by using cross-sum rules.
3838    For odd divisors >= 3, the following table gives right shift counts
3839    so that if a number is shifted by an integer multiple of the given
3840    amount, the remainder stays the same:
3841    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3842    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3843    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3844    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3845    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3846
3847    Cross-sum rules for even numbers can be derived by leaving as many bits
3848    to the right alone as the divisor has zeros to the right.
3849    E.g. if x is an unsigned 32 bit number:
3850    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3851    */
3852
3853 rtx
3854 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3855                rtx op0, rtx op1, rtx target, int unsignedp)
3856 {
3857   enum machine_mode compute_mode;
3858   rtx tquotient;
3859   rtx quotient = 0, remainder = 0;
3860   rtx last;
3861   int size;
3862   rtx insn;
3863   optab optab1, optab2;
3864   int op1_is_constant, op1_is_pow2 = 0;
3865   int max_cost, extra_cost;
3866   static HOST_WIDE_INT last_div_const = 0;
3867   static HOST_WIDE_INT ext_op1;
3868   bool speed = optimize_insn_for_speed_p ();
3869
3870   op1_is_constant = CONST_INT_P (op1);
3871   if (op1_is_constant)
3872     {
3873       ext_op1 = INTVAL (op1);
3874       if (unsignedp)
3875         ext_op1 &= GET_MODE_MASK (mode);
3876       op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3877                      || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3878     }
3879
3880   /*
3881      This is the structure of expand_divmod:
3882
3883      First comes code to fix up the operands so we can perform the operations
3884      correctly and efficiently.
3885
3886      Second comes a switch statement with code specific for each rounding mode.
3887      For some special operands this code emits all RTL for the desired
3888      operation, for other cases, it generates only a quotient and stores it in
3889      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
3890      to indicate that it has not done anything.
3891
3892      Last comes code that finishes the operation.  If QUOTIENT is set and
3893      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
3894      QUOTIENT is not set, it is computed using trunc rounding.
3895
3896      We try to generate special code for division and remainder when OP1 is a
3897      constant.  If |OP1| = 2**n we can use shifts and some other fast
3898      operations.  For other values of OP1, we compute a carefully selected
3899      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3900      by m.
3901
3902      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3903      half of the product.  Different strategies for generating the product are
3904      implemented in expand_mult_highpart.
3905
3906      If what we actually want is the remainder, we generate that by another
3907      by-constant multiplication and a subtraction.  */
3908
3909   /* We shouldn't be called with OP1 == const1_rtx, but some of the
3910      code below will malfunction if we are, so check here and handle
3911      the special case if so.  */
3912   if (op1 == const1_rtx)
3913     return rem_flag ? const0_rtx : op0;
3914
3915     /* When dividing by -1, we could get an overflow.
3916      negv_optab can handle overflows.  */
3917   if (! unsignedp && op1 == constm1_rtx)
3918     {
3919       if (rem_flag)
3920         return const0_rtx;
3921       return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
3922                           ? negv_optab : neg_optab, op0, target, 0);
3923     }
3924
3925   if (target
3926       /* Don't use the function value register as a target
3927          since we have to read it as well as write it,
3928          and function-inlining gets confused by this.  */
3929       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3930           /* Don't clobber an operand while doing a multi-step calculation.  */
3931           || ((rem_flag || op1_is_constant)
3932               && (reg_mentioned_p (target, op0)
3933                   || (MEM_P (op0) && MEM_P (target))))
3934           || reg_mentioned_p (target, op1)
3935           || (MEM_P (op1) && MEM_P (target))))
3936     target = 0;
3937
3938   /* Get the mode in which to perform this computation.  Normally it will
3939      be MODE, but sometimes we can't do the desired operation in MODE.
3940      If so, pick a wider mode in which we can do the operation.  Convert
3941      to that mode at the start to avoid repeated conversions.
3942
3943      First see what operations we need.  These depend on the expression
3944      we are evaluating.  (We assume that divxx3 insns exist under the
3945      same conditions that modxx3 insns and that these insns don't normally
3946      fail.  If these assumptions are not correct, we may generate less
3947      efficient code in some cases.)
3948
3949      Then see if we find a mode in which we can open-code that operation
3950      (either a division, modulus, or shift).  Finally, check for the smallest
3951      mode for which we can do the operation with a library call.  */
3952
3953   /* We might want to refine this now that we have division-by-constant
3954      optimization.  Since expand_mult_highpart tries so many variants, it is
3955      not straightforward to generalize this.  Maybe we should make an array
3956      of possible modes in init_expmed?  Save this for GCC 2.7.  */
3957
3958   optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3959             ? (unsignedp ? lshr_optab : ashr_optab)
3960             : (unsignedp ? udiv_optab : sdiv_optab));
3961   optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3962             ? optab1
3963             : (unsignedp ? udivmod_optab : sdivmod_optab));
3964
3965   for (compute_mode = mode; compute_mode != VOIDmode;
3966        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3967     if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
3968         || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
3969       break;
3970
3971   if (compute_mode == VOIDmode)
3972     for (compute_mode = mode; compute_mode != VOIDmode;
3973          compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3974       if (optab_libfunc (optab1, compute_mode)
3975           || optab_libfunc (optab2, compute_mode))
3976         break;
3977
3978   /* If we still couldn't find a mode, use MODE, but expand_binop will
3979      probably die.  */
3980   if (compute_mode == VOIDmode)
3981     compute_mode = mode;
3982
3983   if (target && GET_MODE (target) == compute_mode)
3984     tquotient = target;
3985   else
3986     tquotient = gen_reg_rtx (compute_mode);
3987
3988   size = GET_MODE_BITSIZE (compute_mode);
3989 #if 0
3990   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3991      (mode), and thereby get better code when OP1 is a constant.  Do that
3992      later.  It will require going over all usages of SIZE below.  */
3993   size = GET_MODE_BITSIZE (mode);
3994 #endif
3995
3996   /* Only deduct something for a REM if the last divide done was
3997      for a different constant.   Then set the constant of the last
3998      divide.  */
3999   max_cost = unsignedp ? udiv_cost[speed][compute_mode] : sdiv_cost[speed][compute_mode];
4000   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4001                      && INTVAL (op1) == last_div_const))
4002     max_cost -= mul_cost[speed][compute_mode] + add_cost[speed][compute_mode];
4003
4004   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4005
4006   /* Now convert to the best mode to use.  */
4007   if (compute_mode != mode)
4008     {
4009       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4010       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4011
4012       /* convert_modes may have placed op1 into a register, so we
4013          must recompute the following.  */
4014       op1_is_constant = CONST_INT_P (op1);
4015       op1_is_pow2 = (op1_is_constant
4016                      && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4017                           || (! unsignedp
4018                               && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
4019     }
4020
4021   /* If one of the operands is a volatile MEM, copy it into a register.  */
4022
4023   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4024     op0 = force_reg (compute_mode, op0);
4025   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4026     op1 = force_reg (compute_mode, op1);
4027
4028   /* If we need the remainder or if OP1 is constant, we need to
4029      put OP0 in a register in case it has any queued subexpressions.  */
4030   if (rem_flag || op1_is_constant)
4031     op0 = force_reg (compute_mode, op0);
4032
4033   last = get_last_insn ();
4034
4035   /* Promote floor rounding to trunc rounding for unsigned operations.  */
4036   if (unsignedp)
4037     {
4038       if (code == FLOOR_DIV_EXPR)
4039         code = TRUNC_DIV_EXPR;
4040       if (code == FLOOR_MOD_EXPR)
4041         code = TRUNC_MOD_EXPR;
4042       if (code == EXACT_DIV_EXPR && op1_is_pow2)
4043         code = TRUNC_DIV_EXPR;
4044     }
4045
4046   if (op1 != const0_rtx)
4047     switch (code)
4048       {
4049       case TRUNC_MOD_EXPR:
4050       case TRUNC_DIV_EXPR:
4051         if (op1_is_constant)
4052           {
4053             if (unsignedp)
4054               {
4055                 unsigned HOST_WIDE_INT mh;
4056                 int pre_shift, post_shift;
4057                 int dummy;
4058                 rtx ml;
4059                 unsigned HOST_WIDE_INT d = (INTVAL (op1)
4060                                             & GET_MODE_MASK (compute_mode));
4061
4062                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4063                   {
4064                     pre_shift = floor_log2 (d);
4065                     if (rem_flag)
4066                       {
4067                         remainder
4068                           = expand_binop (compute_mode, and_optab, op0,
4069                                           GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4070                                           remainder, 1,
4071                                           OPTAB_LIB_WIDEN);
4072                         if (remainder)
4073                           return gen_lowpart (mode, remainder);
4074                       }
4075                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4076                                              pre_shift, tquotient, 1);
4077                   }
4078                 else if (size <= HOST_BITS_PER_WIDE_INT)
4079                   {
4080                     if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
4081                       {
4082                         /* Most significant bit of divisor is set; emit an scc
4083                            insn.  */
4084                         quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4085                                                           compute_mode, 1, 1);
4086                       }
4087                     else
4088                       {
4089                         /* Find a suitable multiplier and right shift count
4090                            instead of multiplying with D.  */
4091
4092                         mh = choose_multiplier (d, size, size,
4093                                                 &ml, &post_shift, &dummy);
4094
4095                         /* If the suggested multiplier is more than SIZE bits,
4096                            we can do better for even divisors, using an
4097                            initial right shift.  */
4098                         if (mh != 0 && (d & 1) == 0)
4099                           {
4100                             pre_shift = floor_log2 (d & -d);
4101                             mh = choose_multiplier (d >> pre_shift, size,
4102                                                     size - pre_shift,
4103                                                     &ml, &post_shift, &dummy);
4104                             gcc_assert (!mh);
4105                           }
4106                         else
4107                           pre_shift = 0;
4108
4109                         if (mh != 0)
4110                           {
4111                             rtx t1, t2, t3, t4;
4112
4113                             if (post_shift - 1 >= BITS_PER_WORD)
4114                               goto fail1;
4115
4116                             extra_cost
4117                               = (shift_cost[speed][compute_mode][post_shift - 1]
4118                                  + shift_cost[speed][compute_mode][1]
4119                                  + 2 * add_cost[speed][compute_mode]);
4120                             t1 = expand_mult_highpart (compute_mode, op0, ml,
4121                                                        NULL_RTX, 1,
4122                                                        max_cost - extra_cost);
4123                             if (t1 == 0)
4124                               goto fail1;
4125                             t2 = force_operand (gen_rtx_MINUS (compute_mode,
4126                                                                op0, t1),
4127                                                 NULL_RTX);
4128                             t3 = expand_shift (RSHIFT_EXPR, compute_mode,
4129                                                t2, 1, NULL_RTX, 1);
4130                             t4 = force_operand (gen_rtx_PLUS (compute_mode,
4131                                                               t1, t3),
4132                                                 NULL_RTX);
4133                             quotient = expand_shift
4134                               (RSHIFT_EXPR, compute_mode, t4,
4135                                post_shift - 1, tquotient, 1);
4136                           }
4137                         else
4138                           {
4139                             rtx t1, t2;
4140
4141                             if (pre_shift >= BITS_PER_WORD
4142                                 || post_shift >= BITS_PER_WORD)
4143                               goto fail1;
4144
4145                             t1 = expand_shift
4146                               (RSHIFT_EXPR, compute_mode, op0,
4147                                pre_shift, NULL_RTX, 1);
4148                             extra_cost
4149                               = (shift_cost[speed][compute_mode][pre_shift]
4150                                  + shift_cost[speed][compute_mode][post_shift]);
4151                             t2 = expand_mult_highpart (compute_mode, t1, ml,
4152                                                        NULL_RTX, 1,
4153                                                        max_cost - extra_cost);
4154                             if (t2 == 0)
4155                               goto fail1;
4156                             quotient = expand_shift
4157                               (RSHIFT_EXPR, compute_mode, t2,
4158                                post_shift, tquotient, 1);
4159                           }
4160                       }
4161                   }
4162                 else            /* Too wide mode to use tricky code */
4163                   break;
4164
4165                 insn = get_last_insn ();
4166                 if (insn != last)
4167                   set_dst_reg_note (insn, REG_EQUAL,
4168                                     gen_rtx_UDIV (compute_mode, op0, op1),
4169                                     quotient);
4170               }
4171             else                /* TRUNC_DIV, signed */
4172               {
4173                 unsigned HOST_WIDE_INT ml;
4174                 int lgup, post_shift;
4175                 rtx mlr;
4176                 HOST_WIDE_INT d = INTVAL (op1);
4177                 unsigned HOST_WIDE_INT abs_d;
4178
4179                 /* Since d might be INT_MIN, we have to cast to
4180                    unsigned HOST_WIDE_INT before negating to avoid
4181                    undefined signed overflow.  */
4182                 abs_d = (d >= 0
4183                          ? (unsigned HOST_WIDE_INT) d
4184                          : - (unsigned HOST_WIDE_INT) d);
4185
4186                 /* n rem d = n rem -d */
4187                 if (rem_flag && d < 0)
4188                   {
4189                     d = abs_d;
4190                     op1 = gen_int_mode (abs_d, compute_mode);
4191                   }
4192
4193                 if (d == 1)
4194                   quotient = op0;
4195                 else if (d == -1)
4196                   quotient = expand_unop (compute_mode, neg_optab, op0,
4197                                           tquotient, 0);
4198                 else if (HOST_BITS_PER_WIDE_INT >= size
4199                          && abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4200                   {
4201                     /* This case is not handled correctly below.  */
4202                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
4203                                                 compute_mode, 1, 1);
4204                     if (quotient == 0)
4205                       goto fail1;
4206                   }
4207                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4208                          && (rem_flag ? smod_pow2_cheap[speed][compute_mode]
4209                                       : sdiv_pow2_cheap[speed][compute_mode])
4210                          /* We assume that cheap metric is true if the
4211                             optab has an expander for this mode.  */
4212                          && ((optab_handler ((rem_flag ? smod_optab
4213                                               : sdiv_optab),
4214                                              compute_mode)
4215                               != CODE_FOR_nothing)
4216                              || (optab_handler (sdivmod_optab,
4217                                                 compute_mode)
4218                                  != CODE_FOR_nothing)))
4219                   ;
4220                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4221                   {
4222                     if (rem_flag)
4223                       {
4224                         remainder = expand_smod_pow2 (compute_mode, op0, d);
4225                         if (remainder)
4226                           return gen_lowpart (mode, remainder);
4227                       }
4228
4229                     if (sdiv_pow2_cheap[speed][compute_mode]
4230                         && ((optab_handler (sdiv_optab, compute_mode)
4231                              != CODE_FOR_nothing)
4232                             || (optab_handler (sdivmod_optab, compute_mode)
4233                                 != CODE_FOR_nothing)))
4234                       quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4235                                                 compute_mode, op0,
4236                                                 gen_int_mode (abs_d,
4237                                                               compute_mode),
4238                                                 NULL_RTX, 0);
4239                     else
4240                       quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4241
4242                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4243                        negate the quotient.  */
4244                     if (d < 0)
4245                       {
4246                         insn = get_last_insn ();
4247                         if (insn != last
4248                             && abs_d < ((unsigned HOST_WIDE_INT) 1
4249                                         << (HOST_BITS_PER_WIDE_INT - 1)))
4250                           set_dst_reg_note (insn, REG_EQUAL,
4251                                             gen_rtx_DIV (compute_mode, op0,
4252                                                          gen_int_mode
4253                                                            (abs_d,
4254                                                             compute_mode)),
4255                                             quotient);
4256
4257                         quotient = expand_unop (compute_mode, neg_optab,
4258                                                 quotient, quotient, 0);
4259                       }
4260                   }
4261                 else if (size <= HOST_BITS_PER_WIDE_INT)
4262                   {
4263                     choose_multiplier (abs_d, size, size - 1,
4264                                        &mlr, &post_shift, &lgup);
4265                     ml = (unsigned HOST_WIDE_INT) INTVAL (mlr);
4266                     if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4267                       {
4268                         rtx t1, t2, t3;
4269
4270                         if (post_shift >= BITS_PER_WORD
4271                             || size - 1 >= BITS_PER_WORD)
4272                           goto fail1;
4273
4274                         extra_cost = (shift_cost[speed][compute_mode][post_shift]
4275                                       + shift_cost[speed][compute_mode][size - 1]
4276                                       + add_cost[speed][compute_mode]);
4277                         t1 = expand_mult_highpart (compute_mode, op0, mlr,
4278                                                    NULL_RTX, 0,
4279                                                    max_cost - extra_cost);
4280                         if (t1 == 0)
4281                           goto fail1;
4282                         t2 = expand_shift
4283                           (RSHIFT_EXPR, compute_mode, t1,
4284                            post_shift, NULL_RTX, 0);
4285                         t3 = expand_shift
4286                           (RSHIFT_EXPR, compute_mode, op0,
4287                            size - 1, NULL_RTX, 0);
4288                         if (d < 0)
4289                           quotient
4290                             = force_operand (gen_rtx_MINUS (compute_mode,
4291                                                             t3, t2),
4292                                              tquotient);
4293                         else
4294                           quotient
4295                             = force_operand (gen_rtx_MINUS (compute_mode,
4296                                                             t2, t3),
4297                                              tquotient);
4298                       }
4299                     else
4300                       {
4301                         rtx t1, t2, t3, t4;
4302
4303                         if (post_shift >= BITS_PER_WORD
4304                             || size - 1 >= BITS_PER_WORD)
4305                           goto fail1;
4306
4307                         ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4308                         mlr = gen_int_mode (ml, compute_mode);
4309                         extra_cost = (shift_cost[speed][compute_mode][post_shift]
4310                                       + shift_cost[speed][compute_mode][size - 1]
4311                                       + 2 * add_cost[speed][compute_mode]);
4312                         t1 = expand_mult_highpart (compute_mode, op0, mlr,
4313                                                    NULL_RTX, 0,
4314                                                    max_cost - extra_cost);
4315                         if (t1 == 0)
4316                           goto fail1;
4317                         t2 = force_operand (gen_rtx_PLUS (compute_mode,
4318                                                           t1, op0),
4319                                             NULL_RTX);
4320                         t3 = expand_shift
4321                           (RSHIFT_EXPR, compute_mode, t2,
4322                            post_shift, NULL_RTX, 0);
4323                         t4 = expand_shift
4324                           (RSHIFT_EXPR, compute_mode, op0,
4325                            size - 1, NULL_RTX, 0);
4326                         if (d < 0)
4327                           quotient
4328                             = force_operand (gen_rtx_MINUS (compute_mode,
4329                                                             t4, t3),
4330                                              tquotient);
4331                         else
4332                           quotient
4333                             = force_operand (gen_rtx_MINUS (compute_mode,
4334                                                             t3, t4),
4335                                              tquotient);
4336                       }
4337                   }
4338                 else            /* Too wide mode to use tricky code */
4339                   break;
4340
4341                 insn = get_last_insn ();
4342                 if (insn != last)
4343                   set_dst_reg_note (insn, REG_EQUAL,
4344                                     gen_rtx_DIV (compute_mode, op0, op1),
4345                                     quotient);
4346               }
4347             break;
4348           }
4349       fail1:
4350         delete_insns_since (last);
4351         break;
4352
4353       case FLOOR_DIV_EXPR:
4354       case FLOOR_MOD_EXPR:
4355       /* We will come here only for signed operations.  */
4356         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4357           {
4358             unsigned HOST_WIDE_INT mh;
4359             int pre_shift, lgup, post_shift;
4360             HOST_WIDE_INT d = INTVAL (op1);
4361             rtx ml;
4362
4363             if (d > 0)
4364               {
4365                 /* We could just as easily deal with negative constants here,
4366                    but it does not seem worth the trouble for GCC 2.6.  */
4367                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4368                   {
4369                     pre_shift = floor_log2 (d);
4370                     if (rem_flag)
4371                       {
4372                         remainder = expand_binop (compute_mode, and_optab, op0,
4373                                                   GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4374                                                   remainder, 0, OPTAB_LIB_WIDEN);
4375                         if (remainder)
4376                           return gen_lowpart (mode, remainder);
4377                       }
4378                     quotient = expand_shift
4379                       (RSHIFT_EXPR, compute_mode, op0,
4380                        pre_shift, tquotient, 0);
4381                   }
4382                 else
4383                   {
4384                     rtx t1, t2, t3, t4;
4385
4386                     mh = choose_multiplier (d, size, size - 1,
4387                                             &ml, &post_shift, &lgup);
4388                     gcc_assert (!mh);
4389
4390                     if (post_shift < BITS_PER_WORD
4391                         && size - 1 < BITS_PER_WORD)
4392                       {
4393                         t1 = expand_shift
4394                           (RSHIFT_EXPR, compute_mode, op0,
4395                            size - 1, NULL_RTX, 0);
4396                         t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4397                                            NULL_RTX, 0, OPTAB_WIDEN);
4398                         extra_cost = (shift_cost[speed][compute_mode][post_shift]
4399                                       + shift_cost[speed][compute_mode][size - 1]
4400                                       + 2 * add_cost[speed][compute_mode]);
4401                         t3 = expand_mult_highpart (compute_mode, t2, ml,
4402                                                    NULL_RTX, 1,
4403                                                    max_cost - extra_cost);
4404                         if (t3 != 0)
4405                           {
4406                             t4 = expand_shift
4407                               (RSHIFT_EXPR, compute_mode, t3,
4408                                post_shift, NULL_RTX, 1);
4409                             quotient = expand_binop (compute_mode, xor_optab,
4410                                                      t4, t1, tquotient, 0,
4411                                                      OPTAB_WIDEN);
4412                           }
4413                       }
4414                   }
4415               }
4416             else
4417               {
4418                 rtx nsign, t1, t2, t3, t4;
4419                 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4420                                                   op0, constm1_rtx), NULL_RTX);
4421                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4422                                    0, OPTAB_WIDEN);
4423                 nsign = expand_shift
4424                   (RSHIFT_EXPR, compute_mode, t2,
4425                    size - 1, NULL_RTX, 0);
4426                 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4427                                     NULL_RTX);
4428                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4429                                     NULL_RTX, 0);
4430                 if (t4)
4431                   {
4432                     rtx t5;
4433                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4434                                       NULL_RTX, 0);
4435                     quotient = force_operand (gen_rtx_PLUS (compute_mode,
4436                                                             t4, t5),
4437                                               tquotient);
4438                   }
4439               }
4440           }
4441
4442         if (quotient != 0)
4443           break;
4444         delete_insns_since (last);
4445
4446         /* Try using an instruction that produces both the quotient and
4447            remainder, using truncation.  We can easily compensate the quotient
4448            or remainder to get floor rounding, once we have the remainder.
4449            Notice that we compute also the final remainder value here,
4450            and return the result right away.  */
4451         if (target == 0 || GET_MODE (target) != compute_mode)
4452           target = gen_reg_rtx (compute_mode);
4453
4454         if (rem_flag)
4455           {
4456             remainder
4457               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4458             quotient = gen_reg_rtx (compute_mode);
4459           }
4460         else
4461           {
4462             quotient
4463               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4464             remainder = gen_reg_rtx (compute_mode);
4465           }
4466
4467         if (expand_twoval_binop (sdivmod_optab, op0, op1,
4468                                  quotient, remainder, 0))
4469           {
4470             /* This could be computed with a branch-less sequence.
4471                Save that for later.  */
4472             rtx tem;
4473             rtx label = gen_label_rtx ();
4474             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4475             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4476                                 NULL_RTX, 0, OPTAB_WIDEN);
4477             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4478             expand_dec (quotient, const1_rtx);
4479             expand_inc (remainder, op1);
4480             emit_label (label);
4481             return gen_lowpart (mode, rem_flag ? remainder : quotient);
4482           }
4483
4484         /* No luck with division elimination or divmod.  Have to do it
4485            by conditionally adjusting op0 *and* the result.  */
4486         {
4487           rtx label1, label2, label3, label4, label5;
4488           rtx adjusted_op0;
4489           rtx tem;
4490
4491           quotient = gen_reg_rtx (compute_mode);
4492           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4493           label1 = gen_label_rtx ();
4494           label2 = gen_label_rtx ();
4495           label3 = gen_label_rtx ();
4496           label4 = gen_label_rtx ();
4497           label5 = gen_label_rtx ();
4498           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4499           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4500           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4501                               quotient, 0, OPTAB_LIB_WIDEN);
4502           if (tem != quotient)
4503             emit_move_insn (quotient, tem);
4504           emit_jump_insn (gen_jump (label5));
4505           emit_barrier ();
4506           emit_label (label1);
4507           expand_inc (adjusted_op0, const1_rtx);
4508           emit_jump_insn (gen_jump (label4));
4509           emit_barrier ();
4510           emit_label (label2);
4511           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4512           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4513                               quotient, 0, OPTAB_LIB_WIDEN);
4514           if (tem != quotient)
4515             emit_move_insn (quotient, tem);
4516           emit_jump_insn (gen_jump (label5));
4517           emit_barrier ();
4518           emit_label (label3);
4519           expand_dec (adjusted_op0, const1_rtx);
4520           emit_label (label4);
4521           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4522                               quotient, 0, OPTAB_LIB_WIDEN);
4523           if (tem != quotient)
4524             emit_move_insn (quotient, tem);
4525           expand_dec (quotient, const1_rtx);
4526           emit_label (label5);
4527         }
4528         break;
4529
4530       case CEIL_DIV_EXPR:
4531       case CEIL_MOD_EXPR:
4532         if (unsignedp)
4533           {
4534             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4535               {
4536                 rtx t1, t2, t3;
4537                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4538                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4539                                    floor_log2 (d), tquotient, 1);
4540                 t2 = expand_binop (compute_mode, and_optab, op0,
4541                                    GEN_INT (d - 1),
4542                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4543                 t3 = gen_reg_rtx (compute_mode);
4544                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4545                                       compute_mode, 1, 1);
4546                 if (t3 == 0)
4547                   {
4548                     rtx lab;
4549                     lab = gen_label_rtx ();
4550                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4551                     expand_inc (t1, const1_rtx);
4552                     emit_label (lab);
4553                     quotient = t1;
4554                   }
4555                 else
4556                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4557                                                           t1, t3),
4558                                             tquotient);
4559                 break;
4560               }
4561
4562             /* Try using an instruction that produces both the quotient and
4563                remainder, using truncation.  We can easily compensate the
4564                quotient or remainder to get ceiling rounding, once we have the
4565                remainder.  Notice that we compute also the final remainder
4566                value here, and return the result right away.  */
4567             if (target == 0 || GET_MODE (target) != compute_mode)
4568               target = gen_reg_rtx (compute_mode);
4569
4570             if (rem_flag)
4571               {
4572                 remainder = (REG_P (target)
4573                              ? target : gen_reg_rtx (compute_mode));
4574                 quotient = gen_reg_rtx (compute_mode);
4575               }
4576             else
4577               {
4578                 quotient = (REG_P (target)
4579                             ? target : gen_reg_rtx (compute_mode));
4580                 remainder = gen_reg_rtx (compute_mode);
4581               }
4582
4583             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4584                                      remainder, 1))
4585               {
4586                 /* This could be computed with a branch-less sequence.
4587                    Save that for later.  */
4588                 rtx label = gen_label_rtx ();
4589                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4590                                  compute_mode, label);
4591                 expand_inc (quotient, const1_rtx);
4592                 expand_dec (remainder, op1);
4593                 emit_label (label);
4594                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4595               }
4596
4597             /* No luck with division elimination or divmod.  Have to do it
4598                by conditionally adjusting op0 *and* the result.  */
4599             {
4600               rtx label1, label2;
4601               rtx adjusted_op0, tem;
4602
4603               quotient = gen_reg_rtx (compute_mode);
4604               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4605               label1 = gen_label_rtx ();
4606               label2 = gen_label_rtx ();
4607               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4608                                compute_mode, label1);
4609               emit_move_insn  (quotient, const0_rtx);
4610               emit_jump_insn (gen_jump (label2));
4611               emit_barrier ();
4612               emit_label (label1);
4613               expand_dec (adjusted_op0, const1_rtx);
4614               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4615                                   quotient, 1, OPTAB_LIB_WIDEN);
4616               if (tem != quotient)
4617                 emit_move_insn (quotient, tem);
4618               expand_inc (quotient, const1_rtx);
4619               emit_label (label2);
4620             }
4621           }
4622         else /* signed */
4623           {
4624             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4625                 && INTVAL (op1) >= 0)
4626               {
4627                 /* This is extremely similar to the code for the unsigned case
4628                    above.  For 2.7 we should merge these variants, but for
4629                    2.6.1 I don't want to touch the code for unsigned since that
4630                    get used in C.  The signed case will only be used by other
4631                    languages (Ada).  */
4632
4633                 rtx t1, t2, t3;
4634                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4635                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4636                                    floor_log2 (d), tquotient, 0);
4637                 t2 = expand_binop (compute_mode, and_optab, op0,
4638                                    GEN_INT (d - 1),
4639                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4640                 t3 = gen_reg_rtx (compute_mode);
4641                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4642                                       compute_mode, 1, 1);
4643                 if (t3 == 0)
4644                   {
4645                     rtx lab;
4646                     lab = gen_label_rtx ();
4647                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4648                     expand_inc (t1, const1_rtx);
4649                     emit_label (lab);
4650                     quotient = t1;
4651                   }
4652                 else
4653                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4654                                                           t1, t3),
4655                                             tquotient);
4656                 break;
4657               }
4658
4659             /* Try using an instruction that produces both the quotient and
4660                remainder, using truncation.  We can easily compensate the
4661                quotient or remainder to get ceiling rounding, once we have the
4662                remainder.  Notice that we compute also the final remainder
4663                value here, and return the result right away.  */
4664             if (target == 0 || GET_MODE (target) != compute_mode)
4665               target = gen_reg_rtx (compute_mode);
4666             if (rem_flag)
4667               {
4668                 remainder= (REG_P (target)
4669                             ? target : gen_reg_rtx (compute_mode));
4670                 quotient = gen_reg_rtx (compute_mode);
4671               }
4672             else
4673               {
4674                 quotient = (REG_P (target)
4675                             ? target : gen_reg_rtx (compute_mode));
4676                 remainder = gen_reg_rtx (compute_mode);
4677               }
4678
4679             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4680                                      remainder, 0))
4681               {
4682                 /* This could be computed with a branch-less sequence.
4683                    Save that for later.  */
4684                 rtx tem;
4685                 rtx label = gen_label_rtx ();
4686                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4687                                  compute_mode, label);
4688                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4689                                     NULL_RTX, 0, OPTAB_WIDEN);
4690                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4691                 expand_inc (quotient, const1_rtx);
4692                 expand_dec (remainder, op1);
4693                 emit_label (label);
4694                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4695               }
4696
4697             /* No luck with division elimination or divmod.  Have to do it
4698                by conditionally adjusting op0 *and* the result.  */
4699             {
4700               rtx label1, label2, label3, label4, label5;
4701               rtx adjusted_op0;
4702               rtx tem;
4703
4704               quotient = gen_reg_rtx (compute_mode);
4705               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4706               label1 = gen_label_rtx ();
4707               label2 = gen_label_rtx ();
4708               label3 = gen_label_rtx ();
4709               label4 = gen_label_rtx ();
4710               label5 = gen_label_rtx ();
4711               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4712               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4713                                compute_mode, label1);
4714               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4715                                   quotient, 0, OPTAB_LIB_WIDEN);
4716               if (tem != quotient)
4717                 emit_move_insn (quotient, tem);
4718               emit_jump_insn (gen_jump (label5));
4719               emit_barrier ();
4720               emit_label (label1);
4721               expand_dec (adjusted_op0, const1_rtx);
4722               emit_jump_insn (gen_jump (label4));
4723               emit_barrier ();
4724               emit_label (label2);
4725               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4726                                compute_mode, label3);
4727               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4728                                   quotient, 0, OPTAB_LIB_WIDEN);
4729               if (tem != quotient)
4730                 emit_move_insn (quotient, tem);
4731               emit_jump_insn (gen_jump (label5));
4732               emit_barrier ();
4733               emit_label (label3);
4734               expand_inc (adjusted_op0, const1_rtx);
4735               emit_label (label4);
4736               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4737                                   quotient, 0, OPTAB_LIB_WIDEN);
4738               if (tem != quotient)
4739                 emit_move_insn (quotient, tem);
4740               expand_inc (quotient, const1_rtx);
4741               emit_label (label5);
4742             }
4743           }
4744         break;
4745
4746       case EXACT_DIV_EXPR:
4747         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4748           {
4749             HOST_WIDE_INT d = INTVAL (op1);
4750             unsigned HOST_WIDE_INT ml;
4751             int pre_shift;
4752             rtx t1;
4753
4754             pre_shift = floor_log2 (d & -d);
4755             ml = invert_mod2n (d >> pre_shift, size);
4756             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4757                                pre_shift, NULL_RTX, unsignedp);
4758             quotient = expand_mult (compute_mode, t1,
4759                                     gen_int_mode (ml, compute_mode),
4760                                     NULL_RTX, 1);
4761
4762             insn = get_last_insn ();
4763             set_dst_reg_note (insn, REG_EQUAL,
4764                               gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4765                                               compute_mode, op0, op1),
4766                               quotient);
4767           }
4768         break;
4769
4770       case ROUND_DIV_EXPR:
4771       case ROUND_MOD_EXPR:
4772         if (unsignedp)
4773           {
4774             rtx tem;
4775             rtx label;
4776             label = gen_label_rtx ();
4777             quotient = gen_reg_rtx (compute_mode);
4778             remainder = gen_reg_rtx (compute_mode);
4779             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4780               {
4781                 rtx tem;
4782                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4783                                          quotient, 1, OPTAB_LIB_WIDEN);
4784                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4785                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4786                                           remainder, 1, OPTAB_LIB_WIDEN);
4787               }
4788             tem = plus_constant (op1, -1);
4789             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1);
4790             do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4791             expand_inc (quotient, const1_rtx);
4792             expand_dec (remainder, op1);
4793             emit_label (label);
4794           }
4795         else
4796           {
4797             rtx abs_rem, abs_op1, tem, mask;
4798             rtx label;
4799             label = gen_label_rtx ();
4800             quotient = gen_reg_rtx (compute_mode);
4801             remainder = gen_reg_rtx (compute_mode);
4802             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4803               {
4804                 rtx tem;
4805                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4806                                          quotient, 0, OPTAB_LIB_WIDEN);
4807                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4808                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4809                                           remainder, 0, OPTAB_LIB_WIDEN);
4810               }
4811             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4812             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4813             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4814                                 1, NULL_RTX, 1);
4815             do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4816             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4817                                 NULL_RTX, 0, OPTAB_WIDEN);
4818             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4819                                  size - 1, NULL_RTX, 0);
4820             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4821                                 NULL_RTX, 0, OPTAB_WIDEN);
4822             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4823                                 NULL_RTX, 0, OPTAB_WIDEN);
4824             expand_inc (quotient, tem);
4825             tem = expand_binop (compute_mode, xor_optab, mask, op1,
4826                                 NULL_RTX, 0, OPTAB_WIDEN);
4827             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4828                                 NULL_RTX, 0, OPTAB_WIDEN);
4829             expand_dec (remainder, tem);
4830             emit_label (label);
4831           }
4832         return gen_lowpart (mode, rem_flag ? remainder : quotient);
4833
4834       default:
4835         gcc_unreachable ();
4836       }
4837
4838   if (quotient == 0)
4839     {
4840       if (target && GET_MODE (target) != compute_mode)
4841         target = 0;
4842
4843       if (rem_flag)
4844         {
4845           /* Try to produce the remainder without producing the quotient.
4846              If we seem to have a divmod pattern that does not require widening,
4847              don't try widening here.  We should really have a WIDEN argument
4848              to expand_twoval_binop, since what we'd really like to do here is
4849              1) try a mod insn in compute_mode
4850              2) try a divmod insn in compute_mode
4851              3) try a div insn in compute_mode and multiply-subtract to get
4852                 remainder
4853              4) try the same things with widening allowed.  */
4854           remainder
4855             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4856                                  op0, op1, target,
4857                                  unsignedp,
4858                                  ((optab_handler (optab2, compute_mode)
4859                                    != CODE_FOR_nothing)
4860                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
4861           if (remainder == 0)
4862             {
4863               /* No luck there.  Can we do remainder and divide at once
4864                  without a library call?  */
4865               remainder = gen_reg_rtx (compute_mode);
4866               if (! expand_twoval_binop ((unsignedp
4867                                           ? udivmod_optab
4868                                           : sdivmod_optab),
4869                                          op0, op1,
4870                                          NULL_RTX, remainder, unsignedp))
4871                 remainder = 0;
4872             }
4873
4874           if (remainder)
4875             return gen_lowpart (mode, remainder);
4876         }
4877
4878       /* Produce the quotient.  Try a quotient insn, but not a library call.
4879          If we have a divmod in this mode, use it in preference to widening
4880          the div (for this test we assume it will not fail). Note that optab2
4881          is set to the one of the two optabs that the call below will use.  */
4882       quotient
4883         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4884                              op0, op1, rem_flag ? NULL_RTX : target,
4885                              unsignedp,
4886                              ((optab_handler (optab2, compute_mode)
4887                                != CODE_FOR_nothing)
4888                               ? OPTAB_DIRECT : OPTAB_WIDEN));
4889
4890       if (quotient == 0)
4891         {
4892           /* No luck there.  Try a quotient-and-remainder insn,
4893              keeping the quotient alone.  */
4894           quotient = gen_reg_rtx (compute_mode);
4895           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4896                                      op0, op1,
4897                                      quotient, NULL_RTX, unsignedp))
4898             {
4899               quotient = 0;
4900               if (! rem_flag)
4901                 /* Still no luck.  If we are not computing the remainder,
4902                    use a library call for the quotient.  */
4903                 quotient = sign_expand_binop (compute_mode,
4904                                               udiv_optab, sdiv_optab,
4905                                               op0, op1, target,
4906                                               unsignedp, OPTAB_LIB_WIDEN);
4907             }
4908         }
4909     }
4910
4911   if (rem_flag)
4912     {
4913       if (target && GET_MODE (target) != compute_mode)
4914         target = 0;
4915
4916       if (quotient == 0)
4917         {
4918           /* No divide instruction either.  Use library for remainder.  */
4919           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4920                                          op0, op1, target,
4921                                          unsignedp, OPTAB_LIB_WIDEN);
4922           /* No remainder function.  Try a quotient-and-remainder
4923              function, keeping the remainder.  */
4924           if (!remainder)
4925             {
4926               remainder = gen_reg_rtx (compute_mode);
4927               if (!expand_twoval_binop_libfunc
4928                   (unsignedp ? udivmod_optab : sdivmod_optab,
4929                    op0, op1,
4930                    NULL_RTX, remainder,
4931                    unsignedp ? UMOD : MOD))
4932                 remainder = NULL_RTX;
4933             }
4934         }
4935       else
4936         {
4937           /* We divided.  Now finish doing X - Y * (X / Y).  */
4938           remainder = expand_mult (compute_mode, quotient, op1,
4939                                    NULL_RTX, unsignedp);
4940           remainder = expand_binop (compute_mode, sub_optab, op0,
4941                                     remainder, target, unsignedp,
4942                                     OPTAB_LIB_WIDEN);
4943         }
4944     }
4945
4946   return gen_lowpart (mode, rem_flag ? remainder : quotient);
4947 }
4948 \f
4949 /* Return a tree node with data type TYPE, describing the value of X.
4950    Usually this is an VAR_DECL, if there is no obvious better choice.
4951    X may be an expression, however we only support those expressions
4952    generated by loop.c.  */
4953
4954 tree
4955 make_tree (tree type, rtx x)
4956 {
4957   tree t;
4958
4959   switch (GET_CODE (x))
4960     {
4961     case CONST_INT:
4962       {
4963         HOST_WIDE_INT hi = 0;
4964
4965         if (INTVAL (x) < 0
4966             && !(TYPE_UNSIGNED (type)
4967                  && (GET_MODE_BITSIZE (TYPE_MODE (type))
4968                      < HOST_BITS_PER_WIDE_INT)))
4969           hi = -1;
4970
4971         t = build_int_cst_wide (type, INTVAL (x), hi);
4972
4973         return t;
4974       }
4975
4976     case CONST_DOUBLE:
4977       if (GET_MODE (x) == VOIDmode)
4978         t = build_int_cst_wide (type,
4979                                 CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
4980       else
4981         {
4982           REAL_VALUE_TYPE d;
4983
4984           REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4985           t = build_real (type, d);
4986         }
4987
4988       return t;
4989
4990     case CONST_VECTOR:
4991       {
4992         int units = CONST_VECTOR_NUNITS (x);
4993         tree itype = TREE_TYPE (type);
4994         tree t = NULL_TREE;
4995         int i;
4996
4997
4998         /* Build a tree with vector elements.  */
4999         for (i = units - 1; i >= 0; --i)
5000           {
5001             rtx elt = CONST_VECTOR_ELT (x, i);
5002             t = tree_cons (NULL_TREE, make_tree (itype, elt), t);
5003           }
5004
5005         return build_vector (type, t);
5006       }
5007
5008     case PLUS:
5009       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5010                           make_tree (type, XEXP (x, 1)));
5011
5012     case MINUS:
5013       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5014                           make_tree (type, XEXP (x, 1)));
5015
5016     case NEG:
5017       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5018
5019     case MULT:
5020       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5021                           make_tree (type, XEXP (x, 1)));
5022
5023     case ASHIFT:
5024       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5025                           make_tree (type, XEXP (x, 1)));
5026
5027     case LSHIFTRT:
5028       t = unsigned_type_for (type);
5029       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5030                                          make_tree (t, XEXP (x, 0)),
5031                                          make_tree (type, XEXP (x, 1))));
5032
5033     case ASHIFTRT:
5034       t = signed_type_for (type);
5035       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5036                                          make_tree (t, XEXP (x, 0)),
5037                                          make_tree (type, XEXP (x, 1))));
5038
5039     case DIV:
5040       if (TREE_CODE (type) != REAL_TYPE)
5041         t = signed_type_for (type);
5042       else
5043         t = type;
5044
5045       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5046                                          make_tree (t, XEXP (x, 0)),
5047                                          make_tree (t, XEXP (x, 1))));
5048     case UDIV:
5049       t = unsigned_type_for (type);
5050       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5051                                          make_tree (t, XEXP (x, 0)),
5052                                          make_tree (t, XEXP (x, 1))));
5053
5054     case SIGN_EXTEND:
5055     case ZERO_EXTEND:
5056       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5057                                           GET_CODE (x) == ZERO_EXTEND);
5058       return fold_convert (type, make_tree (t, XEXP (x, 0)));
5059
5060     case CONST:
5061       return make_tree (type, XEXP (x, 0));
5062
5063     case SYMBOL_REF:
5064       t = SYMBOL_REF_DECL (x);
5065       if (t)
5066         return fold_convert (type, build_fold_addr_expr (t));
5067       /* else fall through.  */
5068
5069     default:
5070       t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5071
5072       /* If TYPE is a POINTER_TYPE, we might need to convert X from
5073          address mode to pointer mode.  */
5074       if (POINTER_TYPE_P (type))
5075         x = convert_memory_address_addr_space
5076               (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5077
5078       /* Note that we do *not* use SET_DECL_RTL here, because we do not
5079          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5080       t->decl_with_rtl.rtl = x;
5081
5082       return t;
5083     }
5084 }
5085 \f
5086 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5087    and returning TARGET.
5088
5089    If TARGET is 0, a pseudo-register or constant is returned.  */
5090
5091 rtx
5092 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
5093 {
5094   rtx tem = 0;
5095
5096   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5097     tem = simplify_binary_operation (AND, mode, op0, op1);
5098   if (tem == 0)
5099     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5100
5101   if (target == 0)
5102     target = tem;
5103   else if (tem != target)
5104     emit_move_insn (target, tem);
5105   return target;
5106 }
5107
5108 /* Helper function for emit_store_flag.  */
5109 static rtx
5110 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5111              enum machine_mode mode, enum machine_mode compare_mode,
5112              int unsignedp, rtx x, rtx y, int normalizep,
5113              enum machine_mode target_mode)
5114 {
5115   struct expand_operand ops[4];
5116   rtx op0, last, comparison, subtarget;
5117   enum machine_mode result_mode = insn_data[(int) icode].operand[0].mode;
5118
5119   last = get_last_insn ();
5120   x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5121   y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5122   if (!x || !y)
5123     {
5124       delete_insns_since (last);
5125       return NULL_RTX;
5126     }
5127
5128   if (target_mode == VOIDmode)
5129     target_mode = result_mode;
5130   if (!target)
5131     target = gen_reg_rtx (target_mode);
5132
5133   comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5134
5135   create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5136   create_fixed_operand (&ops[1], comparison);
5137   create_fixed_operand (&ops[2], x);
5138   create_fixed_operand (&ops[3], y);
5139   if (!maybe_expand_insn (icode, 4, ops))
5140     {
5141       delete_insns_since (last);
5142       return NULL_RTX;
5143     }
5144   subtarget = ops[0].value;
5145
5146   /* If we are converting to a wider mode, first convert to
5147      TARGET_MODE, then normalize.  This produces better combining
5148      opportunities on machines that have a SIGN_EXTRACT when we are
5149      testing a single bit.  This mostly benefits the 68k.
5150
5151      If STORE_FLAG_VALUE does not have the sign bit set when
5152      interpreted in MODE, we can do this conversion as unsigned, which
5153      is usually more efficient.  */
5154   if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5155     {
5156       convert_move (target, subtarget,
5157                     val_signbit_known_clear_p (result_mode,
5158                                                STORE_FLAG_VALUE));
5159       op0 = target;
5160       result_mode = target_mode;
5161     }
5162   else
5163     op0 = subtarget;
5164
5165   /* If we want to keep subexpressions around, don't reuse our last
5166      target.  */
5167   if (optimize)
5168     subtarget = 0;
5169
5170   /* Now normalize to the proper value in MODE.  Sometimes we don't
5171      have to do anything.  */
5172   if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5173     ;
5174   /* STORE_FLAG_VALUE might be the most negative number, so write
5175      the comparison this way to avoid a compiler-time warning.  */
5176   else if (- normalizep == STORE_FLAG_VALUE)
5177     op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5178
5179   /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5180      it hard to use a value of just the sign bit due to ANSI integer
5181      constant typing rules.  */
5182   else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5183     op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5184                         GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5185                         normalizep == 1);
5186   else
5187     {
5188       gcc_assert (STORE_FLAG_VALUE & 1);
5189
5190       op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5191       if (normalizep == -1)
5192         op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5193     }
5194
5195   /* If we were converting to a smaller mode, do the conversion now.  */
5196   if (target_mode != result_mode)
5197     {
5198       convert_move (target, op0, 0);
5199       return target;
5200     }
5201   else
5202     return op0;
5203 }
5204
5205
5206 /* A subroutine of emit_store_flag only including "tricks" that do not
5207    need a recursive call.  These are kept separate to avoid infinite
5208    loops.  */
5209
5210 static rtx
5211 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5212                    enum machine_mode mode, int unsignedp, int normalizep,
5213                    enum machine_mode target_mode)
5214 {
5215   rtx subtarget;
5216   enum insn_code icode;
5217   enum machine_mode compare_mode;
5218   enum mode_class mclass;
5219   enum rtx_code scode;
5220   rtx tem;
5221
5222   if (unsignedp)
5223     code = unsigned_condition (code);
5224   scode = swap_condition (code);
5225
5226   /* If one operand is constant, make it the second one.  Only do this
5227      if the other operand is not constant as well.  */
5228
5229   if (swap_commutative_operands_p (op0, op1))
5230     {
5231       tem = op0;
5232       op0 = op1;
5233       op1 = tem;
5234       code = swap_condition (code);
5235     }
5236
5237   if (mode == VOIDmode)
5238     mode = GET_MODE (op0);
5239
5240   /* For some comparisons with 1 and -1, we can convert this to
5241      comparisons with zero.  This will often produce more opportunities for
5242      store-flag insns.  */
5243
5244   switch (code)
5245     {
5246     case LT:
5247       if (op1 == const1_rtx)
5248         op1 = const0_rtx, code = LE;
5249       break;
5250     case LE:
5251       if (op1 == constm1_rtx)
5252         op1 = const0_rtx, code = LT;
5253       break;
5254     case GE:
5255       if (op1 == const1_rtx)
5256         op1 = const0_rtx, code = GT;
5257       break;
5258     case GT:
5259       if (op1 == constm1_rtx)
5260         op1 = const0_rtx, code = GE;
5261       break;
5262     case GEU:
5263       if (op1 == const1_rtx)
5264         op1 = const0_rtx, code = NE;
5265       break;
5266     case LTU:
5267       if (op1 == const1_rtx)
5268         op1 = const0_rtx, code = EQ;
5269       break;
5270     default:
5271       break;
5272     }
5273
5274   /* If we are comparing a double-word integer with zero or -1, we can
5275      convert the comparison into one involving a single word.  */
5276   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5277       && GET_MODE_CLASS (mode) == MODE_INT
5278       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5279     {
5280       if ((code == EQ || code == NE)
5281           && (op1 == const0_rtx || op1 == constm1_rtx))
5282         {
5283           rtx op00, op01;
5284
5285           /* Do a logical OR or AND of the two words and compare the
5286              result.  */
5287           op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5288           op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5289           tem = expand_binop (word_mode,
5290                               op1 == const0_rtx ? ior_optab : and_optab,
5291                               op00, op01, NULL_RTX, unsignedp,
5292                               OPTAB_DIRECT);
5293
5294           if (tem != 0)
5295             tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5296                                    unsignedp, normalizep);
5297         }
5298       else if ((code == LT || code == GE) && op1 == const0_rtx)
5299         {
5300           rtx op0h;
5301
5302           /* If testing the sign bit, can just test on high word.  */
5303           op0h = simplify_gen_subreg (word_mode, op0, mode,
5304                                       subreg_highpart_offset (word_mode,
5305                                                               mode));
5306           tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5307                                  unsignedp, normalizep);
5308         }
5309       else
5310         tem = NULL_RTX;
5311
5312       if (tem)
5313         {
5314           if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5315             return tem;
5316           if (!target)
5317             target = gen_reg_rtx (target_mode);
5318
5319           convert_move (target, tem,
5320                         !val_signbit_known_set_p (word_mode,
5321                                                   (normalizep ? normalizep
5322                                                    : STORE_FLAG_VALUE)));
5323           return target;
5324         }
5325     }
5326
5327   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5328      complement of A (for GE) and shifting the sign bit to the low bit.  */
5329   if (op1 == const0_rtx && (code == LT || code == GE)
5330       && GET_MODE_CLASS (mode) == MODE_INT
5331       && (normalizep || STORE_FLAG_VALUE == 1
5332           || val_signbit_p (mode, STORE_FLAG_VALUE)))
5333     {
5334       subtarget = target;
5335
5336       if (!target)
5337         target_mode = mode;
5338
5339       /* If the result is to be wider than OP0, it is best to convert it
5340          first.  If it is to be narrower, it is *incorrect* to convert it
5341          first.  */
5342       else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5343         {
5344           op0 = convert_modes (target_mode, mode, op0, 0);
5345           mode = target_mode;
5346         }
5347
5348       if (target_mode != mode)
5349         subtarget = 0;
5350
5351       if (code == GE)
5352         op0 = expand_unop (mode, one_cmpl_optab, op0,
5353                            ((STORE_FLAG_VALUE == 1 || normalizep)
5354                             ? 0 : subtarget), 0);
5355
5356       if (STORE_FLAG_VALUE == 1 || normalizep)
5357         /* If we are supposed to produce a 0/1 value, we want to do
5358            a logical shift from the sign bit to the low-order bit; for
5359            a -1/0 value, we do an arithmetic shift.  */
5360         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5361                             GET_MODE_BITSIZE (mode) - 1,
5362                             subtarget, normalizep != -1);
5363
5364       if (mode != target_mode)
5365         op0 = convert_modes (target_mode, mode, op0, 0);
5366
5367       return op0;
5368     }
5369
5370   mclass = GET_MODE_CLASS (mode);
5371   for (compare_mode = mode; compare_mode != VOIDmode;
5372        compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5373     {
5374      enum machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5375      icode = optab_handler (cstore_optab, optab_mode);
5376      if (icode != CODE_FOR_nothing)
5377         {
5378           do_pending_stack_adjust ();
5379           tem = emit_cstore (target, icode, code, mode, compare_mode,
5380                              unsignedp, op0, op1, normalizep, target_mode);
5381           if (tem)
5382             return tem;
5383
5384           if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5385             {
5386               tem = emit_cstore (target, icode, scode, mode, compare_mode,
5387                                  unsignedp, op1, op0, normalizep, target_mode);
5388               if (tem)
5389                 return tem;
5390             }
5391           break;
5392         }
5393     }
5394
5395   return 0;
5396 }
5397
5398 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5399    and storing in TARGET.  Normally return TARGET.
5400    Return 0 if that cannot be done.
5401
5402    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5403    it is VOIDmode, they cannot both be CONST_INT.
5404
5405    UNSIGNEDP is for the case where we have to widen the operands
5406    to perform the operation.  It says to use zero-extension.
5407
5408    NORMALIZEP is 1 if we should convert the result to be either zero
5409    or one.  Normalize is -1 if we should convert the result to be
5410    either zero or -1.  If NORMALIZEP is zero, the result will be left
5411    "raw" out of the scc insn.  */
5412
5413 rtx
5414 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5415                  enum machine_mode mode, int unsignedp, int normalizep)
5416 {
5417   enum machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5418   enum rtx_code rcode;
5419   rtx subtarget;
5420   rtx tem, last, trueval;
5421
5422   tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5423                            target_mode);
5424   if (tem)
5425     return tem;
5426
5427   /* If we reached here, we can't do this with a scc insn, however there
5428      are some comparisons that can be done in other ways.  Don't do any
5429      of these cases if branches are very cheap.  */
5430   if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5431     return 0;
5432
5433   /* See what we need to return.  We can only return a 1, -1, or the
5434      sign bit.  */
5435
5436   if (normalizep == 0)
5437     {
5438       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5439         normalizep = STORE_FLAG_VALUE;
5440
5441       else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5442         ;
5443       else
5444         return 0;
5445     }
5446
5447   last = get_last_insn ();
5448
5449   /* If optimizing, use different pseudo registers for each insn, instead
5450      of reusing the same pseudo.  This leads to better CSE, but slows
5451      down the compiler, since there are more pseudos */
5452   subtarget = (!optimize
5453                && (target_mode == mode)) ? target : NULL_RTX;
5454   trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5455
5456   /* For floating-point comparisons, try the reverse comparison or try
5457      changing the "orderedness" of the comparison.  */
5458   if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5459     {
5460       enum rtx_code first_code;
5461       bool and_them;
5462
5463       rcode = reverse_condition_maybe_unordered (code);
5464       if (can_compare_p (rcode, mode, ccp_store_flag)
5465           && (code == ORDERED || code == UNORDERED
5466               || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5467               || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5468         {
5469           int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5470                           || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5471
5472           /* For the reverse comparison, use either an addition or a XOR.  */
5473           if (want_add
5474               && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5475                            optimize_insn_for_speed_p ()) == 0)
5476             {
5477               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5478                                        STORE_FLAG_VALUE, target_mode);
5479               if (tem)
5480                 return expand_binop (target_mode, add_optab, tem,
5481                                      GEN_INT (normalizep),
5482                                      target, 0, OPTAB_WIDEN);
5483             }
5484           else if (!want_add
5485                    && rtx_cost (trueval, XOR, 1,
5486                                 optimize_insn_for_speed_p ()) == 0)
5487             {
5488               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5489                                        normalizep, target_mode);
5490               if (tem)
5491                 return expand_binop (target_mode, xor_optab, tem, trueval,
5492                                      target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5493             }
5494         }
5495
5496       delete_insns_since (last);
5497
5498       /* Cannot split ORDERED and UNORDERED, only try the above trick.   */
5499       if (code == ORDERED || code == UNORDERED)
5500         return 0;
5501
5502       and_them = split_comparison (code, mode, &first_code, &code);
5503
5504       /* If there are no NaNs, the first comparison should always fall through.
5505          Effectively change the comparison to the other one.  */
5506       if (!HONOR_NANS (mode))
5507         {
5508           gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5509           return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5510                                     target_mode);
5511         }
5512
5513 #ifdef HAVE_conditional_move
5514       /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5515          conditional move.  */
5516       tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5517                                normalizep, target_mode);
5518       if (tem == 0)
5519         return 0;
5520
5521       if (and_them)
5522         tem = emit_conditional_move (target, code, op0, op1, mode,
5523                                      tem, const0_rtx, GET_MODE (tem), 0);
5524       else
5525         tem = emit_conditional_move (target, code, op0, op1, mode,
5526                                      trueval, tem, GET_MODE (tem), 0);
5527
5528       if (tem == 0)
5529         delete_insns_since (last);
5530       return tem;
5531 #else
5532       return 0;
5533 #endif
5534     }
5535
5536   /* The remaining tricks only apply to integer comparisons.  */
5537
5538   if (GET_MODE_CLASS (mode) != MODE_INT)
5539     return 0;
5540
5541   /* If this is an equality comparison of integers, we can try to exclusive-or
5542      (or subtract) the two operands and use a recursive call to try the
5543      comparison with zero.  Don't do any of these cases if branches are
5544      very cheap.  */
5545
5546   if ((code == EQ || code == NE) && op1 != const0_rtx)
5547     {
5548       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5549                           OPTAB_WIDEN);
5550
5551       if (tem == 0)
5552         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5553                             OPTAB_WIDEN);
5554       if (tem != 0)
5555         tem = emit_store_flag (target, code, tem, const0_rtx,
5556                                mode, unsignedp, normalizep);
5557       if (tem != 0)
5558         return tem;
5559
5560       delete_insns_since (last);
5561     }
5562
5563   /* For integer comparisons, try the reverse comparison.  However, for
5564      small X and if we'd have anyway to extend, implementing "X != 0"
5565      as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0".  */
5566   rcode = reverse_condition (code);
5567   if (can_compare_p (rcode, mode, ccp_store_flag)
5568       && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5569             && code == NE
5570             && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5571             && op1 == const0_rtx))
5572     {
5573       int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5574                       || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5575
5576       /* Again, for the reverse comparison, use either an addition or a XOR.  */
5577       if (want_add
5578           && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5579                        optimize_insn_for_speed_p ()) == 0)
5580         {
5581           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5582                                    STORE_FLAG_VALUE, target_mode);
5583           if (tem != 0)
5584             tem = expand_binop (target_mode, add_optab, tem,
5585                                 GEN_INT (normalizep), target, 0, OPTAB_WIDEN);
5586         }
5587       else if (!want_add
5588                && rtx_cost (trueval, XOR, 1,
5589                             optimize_insn_for_speed_p ()) == 0)
5590         {
5591           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5592                                    normalizep, target_mode);
5593           if (tem != 0)
5594             tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5595                                 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5596         }
5597
5598       if (tem != 0)
5599         return tem;
5600       delete_insns_since (last);
5601     }
5602
5603   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5604      the constant zero.  Reject all other comparisons at this point.  Only
5605      do LE and GT if branches are expensive since they are expensive on
5606      2-operand machines.  */
5607
5608   if (op1 != const0_rtx
5609       || (code != EQ && code != NE
5610           && (BRANCH_COST (optimize_insn_for_speed_p (),
5611                            false) <= 1 || (code != LE && code != GT))))
5612     return 0;
5613
5614   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5615      do the necessary operation below.  */
5616
5617   tem = 0;
5618
5619   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5620      the sign bit set.  */
5621
5622   if (code == LE)
5623     {
5624       /* This is destructive, so SUBTARGET can't be OP0.  */
5625       if (rtx_equal_p (subtarget, op0))
5626         subtarget = 0;
5627
5628       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5629                           OPTAB_WIDEN);
5630       if (tem)
5631         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5632                             OPTAB_WIDEN);
5633     }
5634
5635   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5636      number of bits in the mode of OP0, minus one.  */
5637
5638   if (code == GT)
5639     {
5640       if (rtx_equal_p (subtarget, op0))
5641         subtarget = 0;
5642
5643       tem = expand_shift (RSHIFT_EXPR, mode, op0,
5644                           GET_MODE_BITSIZE (mode) - 1,
5645                           subtarget, 0);
5646       tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5647                           OPTAB_WIDEN);
5648     }
5649
5650   if (code == EQ || code == NE)
5651     {
5652       /* For EQ or NE, one way to do the comparison is to apply an operation
5653          that converts the operand into a positive number if it is nonzero
5654          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5655          for NE we negate.  This puts the result in the sign bit.  Then we
5656          normalize with a shift, if needed.
5657
5658          Two operations that can do the above actions are ABS and FFS, so try
5659          them.  If that doesn't work, and MODE is smaller than a full word,
5660          we can use zero-extension to the wider mode (an unsigned conversion)
5661          as the operation.  */
5662
5663       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5664          that is compensated by the subsequent overflow when subtracting
5665          one / negating.  */
5666
5667       if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5668         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5669       else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5670         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5671       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5672         {
5673           tem = convert_modes (word_mode, mode, op0, 1);
5674           mode = word_mode;
5675         }
5676
5677       if (tem != 0)
5678         {
5679           if (code == EQ)
5680             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5681                                 0, OPTAB_WIDEN);
5682           else
5683             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5684         }
5685
5686       /* If we couldn't do it that way, for NE we can "or" the two's complement
5687          of the value with itself.  For EQ, we take the one's complement of
5688          that "or", which is an extra insn, so we only handle EQ if branches
5689          are expensive.  */
5690
5691       if (tem == 0
5692           && (code == NE
5693               || BRANCH_COST (optimize_insn_for_speed_p (),
5694                               false) > 1))
5695         {
5696           if (rtx_equal_p (subtarget, op0))
5697             subtarget = 0;
5698
5699           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5700           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5701                               OPTAB_WIDEN);
5702
5703           if (tem && code == EQ)
5704             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5705         }
5706     }
5707
5708   if (tem && normalizep)
5709     tem = expand_shift (RSHIFT_EXPR, mode, tem,
5710                         GET_MODE_BITSIZE (mode) - 1,
5711                         subtarget, normalizep == 1);
5712
5713   if (tem)
5714     {
5715       if (!target)
5716         ;
5717       else if (GET_MODE (tem) != target_mode)
5718         {
5719           convert_move (target, tem, 0);
5720           tem = target;
5721         }
5722       else if (!subtarget)
5723         {
5724           emit_move_insn (target, tem);
5725           tem = target;
5726         }
5727     }
5728   else
5729     delete_insns_since (last);
5730
5731   return tem;
5732 }
5733
5734 /* Like emit_store_flag, but always succeeds.  */
5735
5736 rtx
5737 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5738                        enum machine_mode mode, int unsignedp, int normalizep)
5739 {
5740   rtx tem, label;
5741   rtx trueval, falseval;
5742
5743   /* First see if emit_store_flag can do the job.  */
5744   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5745   if (tem != 0)
5746     return tem;
5747
5748   if (!target)
5749     target = gen_reg_rtx (word_mode);
5750
5751   /* If this failed, we have to do this with set/compare/jump/set code.
5752      For foo != 0, if foo is in OP0, just replace it with 1 if nonzero.  */
5753   trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5754   if (code == NE
5755       && GET_MODE_CLASS (mode) == MODE_INT
5756       && REG_P (target)
5757       && op0 == target
5758       && op1 == const0_rtx)
5759     {
5760       label = gen_label_rtx ();
5761       do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp,
5762                                mode, NULL_RTX, NULL_RTX, label, -1);
5763       emit_move_insn (target, trueval);
5764       emit_label (label);
5765       return target;
5766     }
5767
5768   if (!REG_P (target)
5769       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5770     target = gen_reg_rtx (GET_MODE (target));
5771
5772   /* Jump in the right direction if the target cannot implement CODE
5773      but can jump on its reverse condition.  */
5774   falseval = const0_rtx;
5775   if (! can_compare_p (code, mode, ccp_jump)
5776       && (! FLOAT_MODE_P (mode)
5777           || code == ORDERED || code == UNORDERED
5778           || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5779           || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5780     {
5781       enum rtx_code rcode;
5782       if (FLOAT_MODE_P (mode))
5783         rcode = reverse_condition_maybe_unordered (code);
5784       else
5785         rcode = reverse_condition (code);
5786
5787       /* Canonicalize to UNORDERED for the libcall.  */
5788       if (can_compare_p (rcode, mode, ccp_jump)
5789           || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5790         {
5791           falseval = trueval;
5792           trueval = const0_rtx;
5793           code = rcode;
5794         }
5795     }
5796
5797   emit_move_insn (target, trueval);
5798   label = gen_label_rtx ();
5799   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5800                            NULL_RTX, label, -1);
5801
5802   emit_move_insn (target, falseval);
5803   emit_label (label);
5804
5805   return target;
5806 }
5807 \f
5808 /* Perform possibly multi-word comparison and conditional jump to LABEL
5809    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
5810    now a thin wrapper around do_compare_rtx_and_jump.  */
5811
5812 static void
5813 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5814                  rtx label)
5815 {
5816   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5817   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode,
5818                            NULL_RTX, NULL_RTX, label, -1);
5819 }