gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
   4    1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
   5    2011
   6    Free Software Foundation, Inc.
   7
   8 This file is part of GCC.
   9
  10 GCC is free software; you can redistribute it and/or modify it under
  11 the terms of the GNU General Public License as published by the Free
  12 Software Foundation; either version 3, or (at your option) any later
  13 version.
  14
  15 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  16 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  17 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  18 for more details.
  19
  20 You should have received a copy of the GNU General Public License
  21 along with GCC; see the file COPYING3.  If not see
  22 <http://www.gnu.org/licenses/>.  */
  23
  24
  25 #include "config.h"
  26 #include "system.h"
  27 #include "coretypes.h"
  28 #include "tm.h"
  29 #include "diagnostic-core.h"
  30 #include "rtl.h"
  31 #include "tree.h"
  32 #include "tm_p.h"
  33 #include "flags.h"
  34 #include "insn-config.h"
  35 #include "expr.h"
  36 #include "optabs.h"
  37 #include "recog.h"
  38 #include "langhooks.h"
  39 #include "df.h"
  40 #include "target.h"
  41 #include "expmed.h"
  42
  43 struct target_expmed default_target_expmed;
  44 #if SWITCHABLE_TARGET
  45 struct target_expmed *this_target_expmed = &default_target_expmed;
  46 #endif
  47
  48 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
  49                                    unsigned HOST_WIDE_INT,
  50                                    unsigned HOST_WIDE_INT,
  51                                    unsigned HOST_WIDE_INT,
  52                                    unsigned HOST_WIDE_INT,
  53                                    rtx);
  54 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  55                                    unsigned HOST_WIDE_INT,
  56                                    unsigned HOST_WIDE_INT,
  57                                    unsigned HOST_WIDE_INT,
  58                                    rtx);
  59 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
  60                                     unsigned HOST_WIDE_INT,
  61                                     unsigned HOST_WIDE_INT,
  62                                     unsigned HOST_WIDE_INT, rtx, int, bool);
  63 static rtx mask_rtx (enum machine_mode, int, int, int);
  64 static rtx lshift_value (enum machine_mode, rtx, int, int);
  65 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  66                                     unsigned HOST_WIDE_INT, int);
  67 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
  68 static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  69 static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  70
  71 /* Test whether a value is zero of a power of two.  */
  72 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
  73
  74 #ifndef SLOW_UNALIGNED_ACCESS
  75 #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
  76 #endif
  77
  78
  79 /* Reduce conditional compilation elsewhere.  */
  80 #ifndef HAVE_insv
  81 #define HAVE_insv       0
  82 #define CODE_FOR_insv   CODE_FOR_nothing
  83 #define gen_insv(a,b,c,d) NULL_RTX
  84 #endif
  85 #ifndef HAVE_extv
  86 #define HAVE_extv       0
  87 #define CODE_FOR_extv   CODE_FOR_nothing
  88 #define gen_extv(a,b,c,d) NULL_RTX
  89 #endif
  90 #ifndef HAVE_extzv
  91 #define HAVE_extzv      0
  92 #define CODE_FOR_extzv  CODE_FOR_nothing
  93 #define gen_extzv(a,b,c,d) NULL_RTX
  94 #endif
  95
  96 void
  97 init_expmed (void)
  98 {
  99   struct
 100   {
 101     struct rtx_def reg;         rtunion reg_fld[2];
 102     struct rtx_def plus;        rtunion plus_fld1;
 103     struct rtx_def neg;
 104     struct rtx_def mult;        rtunion mult_fld1;
 105     struct rtx_def sdiv;        rtunion sdiv_fld1;
 106     struct rtx_def udiv;        rtunion udiv_fld1;
 107     struct rtx_def zext;
 108     struct rtx_def sdiv_32;     rtunion sdiv_32_fld1;
 109     struct rtx_def smod_32;     rtunion smod_32_fld1;
 110     struct rtx_def wide_mult;   rtunion wide_mult_fld1;
 111     struct rtx_def wide_lshr;   rtunion wide_lshr_fld1;
 112     struct rtx_def wide_trunc;
 113     struct rtx_def shift;       rtunion shift_fld1;
 114     struct rtx_def shift_mult;  rtunion shift_mult_fld1;
 115     struct rtx_def shift_add;   rtunion shift_add_fld1;
 116     struct rtx_def shift_sub0;  rtunion shift_sub0_fld1;
 117     struct rtx_def shift_sub1;  rtunion shift_sub1_fld1;
 118   } all;
 119
 120   rtx pow2[MAX_BITS_PER_WORD];
 121   rtx cint[MAX_BITS_PER_WORD];
 122   int m, n;
 123   enum machine_mode mode, wider_mode;
 124   int speed;
 125
 126
 127   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 128     {
 129       pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
 130       cint[m] = GEN_INT (m);
 131     }
 132   memset (&all, 0, sizeof all);
 133
 134   PUT_CODE (&all.reg, REG);
 135   /* Avoid using hard regs in ways which may be unsupported.  */
 136   SET_REGNO (&all.reg, LAST_VIRTUAL_REGISTER + 1);
 137
 138   PUT_CODE (&all.plus, PLUS);
 139   XEXP (&all.plus, 0) = &all.reg;
 140   XEXP (&all.plus, 1) = &all.reg;
 141
 142   PUT_CODE (&all.neg, NEG);
 143   XEXP (&all.neg, 0) = &all.reg;
 144
 145   PUT_CODE (&all.mult, MULT);
 146   XEXP (&all.mult, 0) = &all.reg;
 147   XEXP (&all.mult, 1) = &all.reg;
 148
 149   PUT_CODE (&all.sdiv, DIV);
 150   XEXP (&all.sdiv, 0) = &all.reg;
 151   XEXP (&all.sdiv, 1) = &all.reg;
 152
 153   PUT_CODE (&all.udiv, UDIV);
 154   XEXP (&all.udiv, 0) = &all.reg;
 155   XEXP (&all.udiv, 1) = &all.reg;
 156
 157   PUT_CODE (&all.sdiv_32, DIV);
 158   XEXP (&all.sdiv_32, 0) = &all.reg;
 159   XEXP (&all.sdiv_32, 1) = 32 < MAX_BITS_PER_WORD ? cint[32] : GEN_INT (32);
 160
 161   PUT_CODE (&all.smod_32, MOD);
 162   XEXP (&all.smod_32, 0) = &all.reg;
 163   XEXP (&all.smod_32, 1) = XEXP (&all.sdiv_32, 1);
 164
 165   PUT_CODE (&all.zext, ZERO_EXTEND);
 166   XEXP (&all.zext, 0) = &all.reg;
 167
 168   PUT_CODE (&all.wide_mult, MULT);
 169   XEXP (&all.wide_mult, 0) = &all.zext;
 170   XEXP (&all.wide_mult, 1) = &all.zext;
 171
 172   PUT_CODE (&all.wide_lshr, LSHIFTRT);
 173   XEXP (&all.wide_lshr, 0) = &all.wide_mult;
 174
 175   PUT_CODE (&all.wide_trunc, TRUNCATE);
 176   XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
 177
 178   PUT_CODE (&all.shift, ASHIFT);
 179   XEXP (&all.shift, 0) = &all.reg;
 180
 181   PUT_CODE (&all.shift_mult, MULT);
 182   XEXP (&all.shift_mult, 0) = &all.reg;
 183
 184   PUT_CODE (&all.shift_add, PLUS);
 185   XEXP (&all.shift_add, 0) = &all.shift_mult;
 186   XEXP (&all.shift_add, 1) = &all.reg;
 187
 188   PUT_CODE (&all.shift_sub0, MINUS);
 189   XEXP (&all.shift_sub0, 0) = &all.shift_mult;
 190   XEXP (&all.shift_sub0, 1) = &all.reg;
 191
 192   PUT_CODE (&all.shift_sub1, MINUS);
 193   XEXP (&all.shift_sub1, 0) = &all.reg;
 194   XEXP (&all.shift_sub1, 1) = &all.shift_mult;
 195
 196   for (speed = 0; speed < 2; speed++)
 197     {
 198       crtl->maybe_hot_insn_p = speed;
 199       zero_cost[speed] = set_src_cost (const0_rtx, speed);
 200
 201       for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
 202            mode != VOIDmode;
 203            mode = GET_MODE_WIDER_MODE (mode))
 204         {
 205           PUT_MODE (&all.reg, mode);
 206           PUT_MODE (&all.plus, mode);
 207           PUT_MODE (&all.neg, mode);
 208           PUT_MODE (&all.mult, mode);
 209           PUT_MODE (&all.sdiv, mode);
 210           PUT_MODE (&all.udiv, mode);
 211           PUT_MODE (&all.sdiv_32, mode);
 212           PUT_MODE (&all.smod_32, mode);
 213           PUT_MODE (&all.wide_trunc, mode);
 214           PUT_MODE (&all.shift, mode);
 215           PUT_MODE (&all.shift_mult, mode);
 216           PUT_MODE (&all.shift_add, mode);
 217           PUT_MODE (&all.shift_sub0, mode);
 218           PUT_MODE (&all.shift_sub1, mode);
 219
 220           add_cost[speed][mode] = set_src_cost (&all.plus, speed);
 221           neg_cost[speed][mode] = set_src_cost (&all.neg, speed);
 222           mul_cost[speed][mode] = set_src_cost (&all.mult, speed);
 223           sdiv_cost[speed][mode] = set_src_cost (&all.sdiv, speed);
 224           udiv_cost[speed][mode] = set_src_cost (&all.udiv, speed);
 225
 226           sdiv_pow2_cheap[speed][mode] = (set_src_cost (&all.sdiv_32, speed)
 227                                           <= 2 * add_cost[speed][mode]);
 228           smod_pow2_cheap[speed][mode] = (set_src_cost (&all.smod_32, speed)
 229                                           <= 4 * add_cost[speed][mode]);
 230
 231           wider_mode = GET_MODE_WIDER_MODE (mode);
 232           if (wider_mode != VOIDmode)
 233             {
 234               PUT_MODE (&all.zext, wider_mode);
 235               PUT_MODE (&all.wide_mult, wider_mode);
 236               PUT_MODE (&all.wide_lshr, wider_mode);
 237               XEXP (&all.wide_lshr, 1) = GEN_INT (GET_MODE_BITSIZE (mode));
 238
 239               mul_widen_cost[speed][wider_mode]
 240                 = set_src_cost (&all.wide_mult, speed);
 241               mul_highpart_cost[speed][mode]
 242                 = set_src_cost (&all.wide_trunc, speed);
 243             }
 244
 245           shift_cost[speed][mode][0] = 0;
 246           shiftadd_cost[speed][mode][0] = shiftsub0_cost[speed][mode][0]
 247             = shiftsub1_cost[speed][mode][0] = add_cost[speed][mode];
 248
 249           n = MIN (MAX_BITS_PER_WORD, GET_MODE_BITSIZE (mode));
 250           for (m = 1; m < n; m++)
 251             {
 252               XEXP (&all.shift, 1) = cint[m];
 253               XEXP (&all.shift_mult, 1) = pow2[m];
 254
 255               shift_cost[speed][mode][m] = set_src_cost (&all.shift, speed);
 256               shiftadd_cost[speed][mode][m] = set_src_cost (&all.shift_add,
 257                                                             speed);
 258               shiftsub0_cost[speed][mode][m] = set_src_cost (&all.shift_sub0,
 259                                                              speed);
 260               shiftsub1_cost[speed][mode][m] = set_src_cost (&all.shift_sub1,
 261                                                              speed);
 262             }
 263         }
 264     }
 265   if (alg_hash_used_p)
 266     memset (alg_hash, 0, sizeof (alg_hash));
 267   else
 268     alg_hash_used_p = true;
 269   default_rtl_profile ();
 270 }
 271
 272 /* Return an rtx representing minus the value of X.
 273    MODE is the intended mode of the result,
 274    useful if X is a CONST_INT.  */
 275
 276 rtx
 277 negate_rtx (enum machine_mode mode, rtx x)
 278 {
 279   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 280
 281   if (result == 0)
 282     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 283
 284   return result;
 285 }
 286
 287 /* Report on the availability of insv/extv/extzv and the desired mode
 288    of each of their operands.  Returns MAX_MACHINE_MODE if HAVE_foo
 289    is false; else the mode of the specified operand.  If OPNO is -1,
 290    all the caller cares about is whether the insn is available.  */
 291 enum machine_mode
 292 mode_for_extraction (enum extraction_pattern pattern, int opno)
 293 {
 294   const struct insn_data_d *data;
 295
 296   switch (pattern)
 297     {
 298     case EP_insv:
 299       if (HAVE_insv)
 300         {
 301           data = &insn_data[CODE_FOR_insv];
 302           break;
 303         }
 304       return MAX_MACHINE_MODE;
 305
 306     case EP_extv:
 307       if (HAVE_extv)
 308         {
 309           data = &insn_data[CODE_FOR_extv];
 310           break;
 311         }
 312       return MAX_MACHINE_MODE;
 313
 314     case EP_extzv:
 315       if (HAVE_extzv)
 316         {
 317           data = &insn_data[CODE_FOR_extzv];
 318           break;
 319         }
 320       return MAX_MACHINE_MODE;
 321
 322     default:
 323       gcc_unreachable ();
 324     }
 325
 326   if (opno == -1)
 327     return VOIDmode;
 328
 329   /* Everyone who uses this function used to follow it with
 330      if (result == VOIDmode) result = word_mode; */
 331   if (data->operand[opno].mode == VOIDmode)
 332     return word_mode;
 333   return data->operand[opno].mode;
 334 }
 335 \f
 336 /* A subroutine of store_bit_field, with the same arguments.  Return true
 337    if the operation could be implemented.
 338
 339    If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
 340    no other way of implementing the operation.  If FALLBACK_P is false,
 341    return false instead.  */
 342
 343 static bool
 344 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 345                    unsigned HOST_WIDE_INT bitnum,
 346                    unsigned HOST_WIDE_INT bitregion_start,
 347                    unsigned HOST_WIDE_INT bitregion_end,
 348                    enum machine_mode fieldmode,
 349                    rtx value, bool fallback_p)
 350 {
 351   unsigned int unit
 352     = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
 353   unsigned HOST_WIDE_INT offset, bitpos;
 354   rtx op0 = str_rtx;
 355   int byte_offset;
 356   rtx orig_value;
 357
 358   enum machine_mode op_mode = mode_for_extraction (EP_insv, 3);
 359
 360   while (GET_CODE (op0) == SUBREG)
 361     {
 362       /* The following line once was done only if WORDS_BIG_ENDIAN,
 363          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 364          meaningful at a much higher level; when structures are copied
 365          between memory and regs, the higher-numbered regs
 366          always get higher addresses.  */
 367       int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
 368       int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
 369
 370       byte_offset = 0;
 371
 372       /* Paradoxical subregs need special handling on big endian machines.  */
 373       if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
 374         {
 375           int difference = inner_mode_size - outer_mode_size;
 376
 377           if (WORDS_BIG_ENDIAN)
 378             byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
 379           if (BYTES_BIG_ENDIAN)
 380             byte_offset += difference % UNITS_PER_WORD;
 381         }
 382       else
 383         byte_offset = SUBREG_BYTE (op0);
 384
 385       bitnum += byte_offset * BITS_PER_UNIT;
 386       op0 = SUBREG_REG (op0);
 387     }
 388
 389   /* No action is needed if the target is a register and if the field
 390      lies completely outside that register.  This can occur if the source
 391      code contains an out-of-bounds access to a small array.  */
 392   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
 393     return true;
 394
 395   /* Use vec_set patterns for inserting parts of vectors whenever
 396      available.  */
 397   if (VECTOR_MODE_P (GET_MODE (op0))
 398       && !MEM_P (op0)
 399       && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
 400       && fieldmode == GET_MODE_INNER (GET_MODE (op0))
 401       && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
 402       && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
 403     {
 404       struct expand_operand ops[3];
 405       enum machine_mode outermode = GET_MODE (op0);
 406       enum machine_mode innermode = GET_MODE_INNER (outermode);
 407       enum insn_code icode = optab_handler (vec_set_optab, outermode);
 408       int pos = bitnum / GET_MODE_BITSIZE (innermode);
 409
 410       create_fixed_operand (&ops[0], op0);
 411       create_input_operand (&ops[1], value, innermode);
 412       create_integer_operand (&ops[2], pos);
 413       if (maybe_expand_insn (icode, 3, ops))
 414         return true;
 415     }
 416
 417   /* If the target is a register, overwriting the entire object, or storing
 418      a full-word or multi-word field can be done with just a SUBREG.
 419
 420      If the target is memory, storing any naturally aligned field can be
 421      done with a simple store.  For targets that support fast unaligned
 422      memory, any naturally sized, unit aligned field can be done directly.  */
 423
 424   offset = bitnum / unit;
 425   bitpos = bitnum % unit;
 426   byte_offset = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 427                 + (offset * UNITS_PER_WORD);
 428
 429   if (bitpos == 0
 430       && bitsize == GET_MODE_BITSIZE (fieldmode)
 431       && (!MEM_P (op0)
 432           ? ((GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD
 433               || GET_MODE_SIZE (GET_MODE (op0)) == GET_MODE_SIZE (fieldmode))
 434              && ((GET_MODE (op0) == fieldmode && byte_offset == 0)
 435                  || validate_subreg (fieldmode, GET_MODE (op0), op0,
 436                                      byte_offset)))
 437           : (! SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0))
 438              || (offset * BITS_PER_UNIT % bitsize == 0
 439                  && MEM_ALIGN (op0) % GET_MODE_BITSIZE (fieldmode) == 0))))
 440     {
 441       if (MEM_P (op0))
 442         op0 = adjust_address (op0, fieldmode, offset);
 443       else if (GET_MODE (op0) != fieldmode)
 444         op0 = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
 445                                    byte_offset);
 446       emit_move_insn (op0, value);
 447       return true;
 448     }
 449
 450   /* Make sure we are playing with integral modes.  Pun with subregs
 451      if we aren't.  This must come after the entire register case above,
 452      since that case is valid for any mode.  The following cases are only
 453      valid for integral modes.  */
 454   {
 455     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
 456     if (imode != GET_MODE (op0))
 457       {
 458         if (MEM_P (op0))
 459           op0 = adjust_address (op0, imode, 0);
 460         else
 461           {
 462             gcc_assert (imode != BLKmode);
 463             op0 = gen_lowpart (imode, op0);
 464           }
 465       }
 466   }
 467
 468   /* We may be accessing data outside the field, which means
 469      we can alias adjacent data.  */
 470   /* ?? not always for C++0x memory model ?? */
 471   if (MEM_P (op0))
 472     {
 473       op0 = shallow_copy_rtx (op0);
 474       set_mem_alias_set (op0, 0);
 475       set_mem_expr (op0, 0);
 476     }
 477
 478   /* If OP0 is a register, BITPOS must count within a word.
 479      But as we have it, it counts within whatever size OP0 now has.
 480      On a bigendian machine, these are not the same, so convert.  */
 481   if (BYTES_BIG_ENDIAN
 482       && !MEM_P (op0)
 483       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
 484     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 485
 486   /* Storing an lsb-aligned field in a register
 487      can be done with a movestrict instruction.  */
 488
 489   if (!MEM_P (op0)
 490       && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
 491       && bitsize == GET_MODE_BITSIZE (fieldmode)
 492       && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
 493     {
 494       struct expand_operand ops[2];
 495       enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
 496       rtx arg0 = op0;
 497       unsigned HOST_WIDE_INT subreg_off;
 498
 499       if (GET_CODE (arg0) == SUBREG)
 500         {
 501           /* Else we've got some float mode source being extracted into
 502              a different float mode destination -- this combination of
 503              subregs results in Severe Tire Damage.  */
 504           gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
 505                       || GET_MODE_CLASS (fieldmode) == MODE_INT
 506                       || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
 507           arg0 = SUBREG_REG (arg0);
 508         }
 509
 510       subreg_off = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 511                    + (offset * UNITS_PER_WORD);
 512       if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
 513         {
 514           arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
 515
 516           create_fixed_operand (&ops[0], arg0);
 517           /* Shrink the source operand to FIELDMODE.  */
 518           create_convert_operand_to (&ops[1], value, fieldmode, false);
 519           if (maybe_expand_insn (icode, 2, ops))
 520             return true;
 521         }
 522     }
 523
 524   /* Handle fields bigger than a word.  */
 525
 526   if (bitsize > BITS_PER_WORD)
 527     {
 528       /* Here we transfer the words of the field
 529          in the order least significant first.
 530          This is because the most significant word is the one which may
 531          be less than full.
 532          However, only do that if the value is not BLKmode.  */
 533
 534       unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 535       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 536       unsigned int i;
 537       rtx last;
 538
 539       /* This is the mode we must force value to, so that there will be enough
 540          subwords to extract.  Note that fieldmode will often (always?) be
 541          VOIDmode, because that is what store_field uses to indicate that this
 542          is a bit field, but passing VOIDmode to operand_subword_force
 543          is not allowed.  */
 544       fieldmode = GET_MODE (value);
 545       if (fieldmode == VOIDmode)
 546         fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
 547
 548       last = get_last_insn ();
 549       for (i = 0; i < nwords; i++)
 550         {
 551           /* If I is 0, use the low-order word in both field and target;
 552              if I is 1, use the next to lowest word; and so on.  */
 553           unsigned int wordnum = (backwards
 554                                   ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD
 555                                   - i - 1
 556                                   : i);
 557           unsigned int bit_offset = (backwards
 558                                      ? MAX ((int) bitsize - ((int) i + 1)
 559                                             * BITS_PER_WORD,
 560                                             0)
 561                                      : (int) i * BITS_PER_WORD);
 562           rtx value_word = operand_subword_force (value, wordnum, fieldmode);
 563           unsigned HOST_WIDE_INT new_bitsize =
 564             MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
 565
 566           /* If the remaining chunk doesn't have full wordsize we have
 567              to make sure that for big endian machines the higher order
 568              bits are used.  */
 569           if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
 570             value_word = simplify_expand_binop (word_mode, lshr_optab,
 571                                                 value_word,
 572                                                 GEN_INT (BITS_PER_WORD
 573                                                          - new_bitsize),
 574                                                 NULL_RTX, true,
 575                                                 OPTAB_LIB_WIDEN);
 576
 577           if (!store_bit_field_1 (op0, new_bitsize,
 578                                   bitnum + bit_offset,
 579                                   bitregion_start, bitregion_end,
 580                                   word_mode,
 581                                   value_word, fallback_p))
 582             {
 583               delete_insns_since (last);
 584               return false;
 585             }
 586         }
 587       return true;
 588     }
 589
 590   /* From here on we can assume that the field to be stored in is
 591      a full-word (whatever type that is), since it is shorter than a word.  */
 592
 593   /* OFFSET is the number of words or bytes (UNIT says which)
 594      from STR_RTX to the first word or byte containing part of the field.  */
 595
 596   if (!MEM_P (op0))
 597     {
 598       if (offset != 0
 599           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
 600         {
 601           if (!REG_P (op0))
 602             {
 603               /* Since this is a destination (lvalue), we can't copy
 604                  it to a pseudo.  We can remove a SUBREG that does not
 605                  change the size of the operand.  Such a SUBREG may
 606                  have been added above.  */
 607               gcc_assert (GET_CODE (op0) == SUBREG
 608                           && (GET_MODE_SIZE (GET_MODE (op0))
 609                               == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)))));
 610               op0 = SUBREG_REG (op0);
 611             }
 612           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
 613                                 op0, (offset * UNITS_PER_WORD));
 614         }
 615       offset = 0;
 616     }
 617
 618   /* If VALUE has a floating-point or complex mode, access it as an
 619      integer of the corresponding size.  This can occur on a machine
 620      with 64 bit registers that uses SFmode for float.  It can also
 621      occur for unaligned float or complex fields.  */
 622   orig_value = value;
 623   if (GET_MODE (value) != VOIDmode
 624       && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
 625       && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
 626     {
 627       value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
 628       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
 629     }
 630
 631   /* Now OFFSET is nonzero only if OP0 is memory
 632      and is therefore always measured in bytes.  */
 633
 634   if (HAVE_insv
 635       && GET_MODE (value) != BLKmode
 636       && bitsize > 0
 637       && GET_MODE_BITSIZE (op_mode) >= bitsize
 638       /* Do not use insv for volatile bitfields when
 639          -fstrict-volatile-bitfields is in effect.  */
 640       && !(MEM_P (op0) && MEM_VOLATILE_P (op0)
 641            && flag_strict_volatile_bitfields > 0)
 642       && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
 643             && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode))))
 644     {
 645       struct expand_operand ops[4];
 646       int xbitpos = bitpos;
 647       rtx value1;
 648       rtx xop0 = op0;
 649       rtx last = get_last_insn ();
 650       bool copy_back = false;
 651
 652       /* Add OFFSET into OP0's address.  */
 653       if (MEM_P (xop0))
 654         xop0 = adjust_address (xop0, byte_mode, offset);
 655
 656       /* If xop0 is a register, we need it in OP_MODE
 657          to make it acceptable to the format of insv.  */
 658       if (GET_CODE (xop0) == SUBREG)
 659         /* We can't just change the mode, because this might clobber op0,
 660            and we will need the original value of op0 if insv fails.  */
 661         xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 662       if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
 663         xop0 = gen_lowpart_SUBREG (op_mode, xop0);
 664
 665       /* If the destination is a paradoxical subreg such that we need a
 666          truncate to the inner mode, perform the insertion on a temporary and
 667          truncate the result to the original destination.  Note that we can't
 668          just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
 669          X) 0)) is (reg:N X).  */
 670       if (GET_CODE (xop0) == SUBREG
 671           && REG_P (SUBREG_REG (xop0))
 672           && (!TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
 673                                               op_mode)))
 674         {
 675           rtx tem = gen_reg_rtx (op_mode);
 676           emit_move_insn (tem, xop0);
 677           xop0 = tem;
 678           copy_back = true;
 679         }
 680
 681       /* We have been counting XBITPOS within UNIT.
 682          Count instead within the size of the register.  */
 683       if (BYTES_BIG_ENDIAN && !MEM_P (xop0))
 684         xbitpos += GET_MODE_BITSIZE (op_mode) - unit;
 685
 686       unit = GET_MODE_BITSIZE (op_mode);
 687
 688       /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
 689          "backwards" from the size of the unit we are inserting into.
 690          Otherwise, we count bits from the most significant on a
 691          BYTES/BITS_BIG_ENDIAN machine.  */
 692
 693       if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 694         xbitpos = unit - bitsize - xbitpos;
 695
 696       /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
 697       value1 = value;
 698       if (GET_MODE (value) != op_mode)
 699         {
 700           if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 701             {
 702               /* Optimization: Don't bother really extending VALUE
 703                  if it has all the bits we will actually use.  However,
 704                  if we must narrow it, be sure we do it correctly.  */
 705
 706               if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
 707                 {
 708                   rtx tmp;
 709
 710                   tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
 711                   if (! tmp)
 712                     tmp = simplify_gen_subreg (op_mode,
 713                                                force_reg (GET_MODE (value),
 714                                                           value1),
 715                                                GET_MODE (value), 0);
 716                   value1 = tmp;
 717                 }
 718               else
 719                 value1 = gen_lowpart (op_mode, value1);
 720             }
 721           else if (CONST_INT_P (value))
 722             value1 = gen_int_mode (INTVAL (value), op_mode);
 723           else
 724             /* Parse phase is supposed to make VALUE's data type
 725                match that of the component reference, which is a type
 726                at least as wide as the field; so VALUE should have
 727                a mode that corresponds to that type.  */
 728             gcc_assert (CONSTANT_P (value));
 729         }
 730
 731       create_fixed_operand (&ops[0], xop0);
 732       create_integer_operand (&ops[1], bitsize);
 733       create_integer_operand (&ops[2], xbitpos);
 734       create_input_operand (&ops[3], value1, op_mode);
 735       if (maybe_expand_insn (CODE_FOR_insv, 4, ops))
 736         {
 737           if (copy_back)
 738             convert_move (op0, xop0, true);
 739           return true;
 740         }
 741       delete_insns_since (last);
 742     }
 743
 744   /* If OP0 is a memory, try copying it to a register and seeing if a
 745      cheap register alternative is available.  */
 746   if (HAVE_insv && MEM_P (op0))
 747     {
 748       enum machine_mode bestmode;
 749       unsigned HOST_WIDE_INT maxbits = MAX_FIXED_MODE_SIZE;
 750
 751       if (bitregion_end)
 752         maxbits = bitregion_end - bitregion_start + 1;
 753
 754       /* Get the mode to use for inserting into this field.  If OP0 is
 755          BLKmode, get the smallest mode consistent with the alignment. If
 756          OP0 is a non-BLKmode object that is no wider than OP_MODE, use its
 757          mode. Otherwise, use the smallest mode containing the field.  */
 758
 759       if (GET_MODE (op0) == BLKmode
 760           || GET_MODE_BITSIZE (GET_MODE (op0)) > maxbits
 761           || (op_mode != MAX_MACHINE_MODE
 762               && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (op_mode)))
 763         bestmode = get_best_mode  (bitsize, bitnum,
 764                                   bitregion_start, bitregion_end,
 765                                   MEM_ALIGN (op0),
 766                                   (op_mode == MAX_MACHINE_MODE
 767                                    ? VOIDmode : op_mode),
 768                                   MEM_VOLATILE_P (op0));
 769       else
 770         bestmode = GET_MODE (op0);
 771
 772       if (bestmode != VOIDmode
 773           && GET_MODE_SIZE (bestmode) >= GET_MODE_SIZE (fieldmode)
 774           && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
 775                && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
 776         {
 777           rtx last, tempreg, xop0;
 778           unsigned HOST_WIDE_INT xoffset, xbitpos;
 779
 780           last = get_last_insn ();
 781
 782           /* Adjust address to point to the containing unit of
 783              that mode.  Compute the offset as a multiple of this unit,
 784              counting in bytes.  */
 785           unit = GET_MODE_BITSIZE (bestmode);
 786           xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
 787           xbitpos = bitnum % unit;
 788           xop0 = adjust_address (op0, bestmode, xoffset);
 789
 790           /* Fetch that unit, store the bitfield in it, then store
 791              the unit.  */
 792           tempreg = copy_to_reg (xop0);
 793           if (store_bit_field_1 (tempreg, bitsize, xbitpos,
 794                                  bitregion_start, bitregion_end,
 795                                  fieldmode, orig_value, false))
 796             {
 797               emit_move_insn (xop0, tempreg);
 798               return true;
 799             }
 800           delete_insns_since (last);
 801         }
 802     }
 803
 804   if (!fallback_p)
 805     return false;
 806
 807   store_fixed_bit_field (op0, offset, bitsize, bitpos,
 808                          bitregion_start, bitregion_end, value);
 809   return true;
 810 }
 811
 812 /* Generate code to store value from rtx VALUE
 813    into a bit-field within structure STR_RTX
 814    containing BITSIZE bits starting at bit BITNUM.
 815
 816    BITREGION_START is bitpos of the first bitfield in this region.
 817    BITREGION_END is the bitpos of the ending bitfield in this region.
 818    These two fields are 0, if the C++ memory model does not apply,
 819    or we are not interested in keeping track of bitfield regions.
 820
 821    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.  */
 822
 823 void
 824 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 825                  unsigned HOST_WIDE_INT bitnum,
 826                  unsigned HOST_WIDE_INT bitregion_start,
 827                  unsigned HOST_WIDE_INT bitregion_end,
 828                  enum machine_mode fieldmode,
 829                  rtx value)
 830 {
 831   /* Under the C++0x memory model, we must not touch bits outside the
 832      bit region.  Adjust the address to start at the beginning of the
 833      bit region.  */
 834   if (MEM_P (str_rtx) && bitregion_start > 0)
 835     {
 836       enum machine_mode bestmode;
 837       enum machine_mode op_mode;
 838       unsigned HOST_WIDE_INT offset;
 839
 840       op_mode = mode_for_extraction (EP_insv, 3);
 841       if (op_mode == MAX_MACHINE_MODE)
 842         op_mode = VOIDmode;
 843
 844       gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0);
 845
 846       offset = bitregion_start / BITS_PER_UNIT;
 847       bitnum -= bitregion_start;
 848       bitregion_end -= bitregion_start;
 849       bitregion_start = 0;
 850       bestmode = get_best_mode (bitsize, bitnum,
 851                                 bitregion_start, bitregion_end,
 852                                 MEM_ALIGN (str_rtx),
 853                                 op_mode,
 854                                 MEM_VOLATILE_P (str_rtx));
 855       str_rtx = adjust_address (str_rtx, bestmode, offset);
 856     }
 857
 858   if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
 859                           bitregion_start, bitregion_end,
 860                           fieldmode, value, true))
 861     gcc_unreachable ();
 862 }
 863 \f
 864 /* Use shifts and boolean operations to store VALUE
 865    into a bit field of width BITSIZE
 866    in a memory location specified by OP0 except offset by OFFSET bytes.
 867      (OFFSET must be 0 if OP0 is a register.)
 868    The field starts at position BITPOS within the byte.
 869     (If OP0 is a register, it may be a full word or a narrower mode,
 870      but BITPOS still counts within a full word,
 871      which is significant on bigendian machines.)  */
 872
 873 static void
 874 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT offset,
 875                        unsigned HOST_WIDE_INT bitsize,
 876                        unsigned HOST_WIDE_INT bitpos,
 877                        unsigned HOST_WIDE_INT bitregion_start,
 878                        unsigned HOST_WIDE_INT bitregion_end,
 879                        rtx value)
 880 {
 881   enum machine_mode mode;
 882   unsigned int total_bits = BITS_PER_WORD;
 883   rtx temp;
 884   int all_zero = 0;
 885   int all_one = 0;
 886
 887   /* There is a case not handled here:
 888      a structure with a known alignment of just a halfword
 889      and a field split across two aligned halfwords within the structure.
 890      Or likewise a structure with a known alignment of just a byte
 891      and a field split across two bytes.
 892      Such cases are not supposed to be able to occur.  */
 893
 894   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
 895     {
 896       gcc_assert (!offset);
 897       /* Special treatment for a bit field split across two registers.  */
 898       if (bitsize + bitpos > BITS_PER_WORD)
 899         {
 900           store_split_bit_field (op0, bitsize, bitpos,
 901                                  bitregion_start, bitregion_end,
 902                                  value);
 903           return;
 904         }
 905     }
 906   else
 907     {
 908       unsigned HOST_WIDE_INT maxbits = MAX_FIXED_MODE_SIZE;
 909
 910       if (bitregion_end)
 911         maxbits = bitregion_end - bitregion_start + 1;
 912
 913       /* Get the proper mode to use for this field.  We want a mode that
 914          includes the entire field.  If such a mode would be larger than
 915          a word, we won't be doing the extraction the normal way.
 916          We don't want a mode bigger than the destination.  */
 917
 918       mode = GET_MODE (op0);
 919       if (GET_MODE_BITSIZE (mode) == 0
 920           || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
 921         mode = word_mode;
 922
 923       if (MEM_VOLATILE_P (op0)
 924           && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
 925           && GET_MODE_BITSIZE (GET_MODE (op0)) <= maxbits
 926           && flag_strict_volatile_bitfields > 0)
 927         mode = GET_MODE (op0);
 928       else
 929         mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
 930                               bitregion_start, bitregion_end,
 931                               MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
 932
 933       if (mode == VOIDmode)
 934         {
 935           /* The only way this should occur is if the field spans word
 936              boundaries.  */
 937           store_split_bit_field (op0, bitsize, bitpos + offset * BITS_PER_UNIT,
 938                                  bitregion_start, bitregion_end, value);
 939           return;
 940         }
 941
 942       total_bits = GET_MODE_BITSIZE (mode);
 943
 944       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
 945          be in the range 0 to total_bits-1, and put any excess bytes in
 946          OFFSET.  */
 947       if (bitpos >= total_bits)
 948         {
 949           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
 950           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
 951                      * BITS_PER_UNIT);
 952         }
 953
 954       /* Get ref to an aligned byte, halfword, or word containing the field.
 955          Adjust BITPOS to be position within a word,
 956          and OFFSET to be the offset of that word.
 957          Then alter OP0 to refer to that word.  */
 958       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
 959       offset -= (offset % (total_bits / BITS_PER_UNIT));
 960       op0 = adjust_address (op0, mode, offset);
 961     }
 962
 963   mode = GET_MODE (op0);
 964
 965   /* Now MODE is either some integral mode for a MEM as OP0,
 966      or is a full-word for a REG as OP0.  TOTAL_BITS corresponds.
 967      The bit field is contained entirely within OP0.
 968      BITPOS is the starting bit number within OP0.
 969      (OP0's mode may actually be narrower than MODE.)  */
 970
 971   if (BYTES_BIG_ENDIAN)
 972       /* BITPOS is the distance between our msb
 973          and that of the containing datum.
 974          Convert it to the distance from the lsb.  */
 975       bitpos = total_bits - bitsize - bitpos;
 976
 977   /* Now BITPOS is always the distance between our lsb
 978      and that of OP0.  */
 979
 980   /* Shift VALUE left by BITPOS bits.  If VALUE is not constant,
 981      we must first convert its mode to MODE.  */
 982
 983   if (CONST_INT_P (value))
 984     {
 985       HOST_WIDE_INT v = INTVAL (value);
 986
 987       if (bitsize < HOST_BITS_PER_WIDE_INT)
 988         v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
 989
 990       if (v == 0)
 991         all_zero = 1;
 992       else if ((bitsize < HOST_BITS_PER_WIDE_INT
 993                 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
 994                || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
 995         all_one = 1;
 996
 997       value = lshift_value (mode, value, bitpos, bitsize);
 998     }
 999   else
1000     {
1001       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
1002                       && bitpos + bitsize != GET_MODE_BITSIZE (mode));
1003
1004       if (GET_MODE (value) != mode)
1005         value = convert_to_mode (mode, value, 1);
1006
1007       if (must_and)
1008         value = expand_binop (mode, and_optab, value,
1009                               mask_rtx (mode, 0, bitsize, 0),
1010                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
1011       if (bitpos > 0)
1012         value = expand_shift (LSHIFT_EXPR, mode, value,
1013                               bitpos, NULL_RTX, 1);
1014     }
1015
1016   /* Now clear the chosen bits in OP0,
1017      except that if VALUE is -1 we need not bother.  */
1018   /* We keep the intermediates in registers to allow CSE to combine
1019      consecutive bitfield assignments.  */
1020
1021   temp = force_reg (mode, op0);
1022
1023   if (! all_one)
1024     {
1025       temp = expand_binop (mode, and_optab, temp,
1026                            mask_rtx (mode, bitpos, bitsize, 1),
1027                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1028       temp = force_reg (mode, temp);
1029     }
1030
1031   /* Now logical-or VALUE into OP0, unless it is zero.  */
1032
1033   if (! all_zero)
1034     {
1035       temp = expand_binop (mode, ior_optab, temp, value,
1036                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1037       temp = force_reg (mode, temp);
1038     }
1039
1040   if (op0 != temp)
1041     {
1042       op0 = copy_rtx (op0);
1043       emit_move_insn (op0, temp);
1044     }
1045 }
1046 \f
1047 /* Store a bit field that is split across multiple accessible memory objects.
1048
1049    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1050    BITSIZE is the field width; BITPOS the position of its first bit
1051    (within the word).
1052    VALUE is the value to store.
1053
1054    This does not yet handle fields wider than BITS_PER_WORD.  */
1055
1056 static void
1057 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1058                        unsigned HOST_WIDE_INT bitpos,
1059                        unsigned HOST_WIDE_INT bitregion_start,
1060                        unsigned HOST_WIDE_INT bitregion_end,
1061                        rtx value)
1062 {
1063   unsigned int unit;
1064   unsigned int bitsdone = 0;
1065
1066   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1067      much at a time.  */
1068   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1069     unit = BITS_PER_WORD;
1070   else
1071     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1072
1073   /* If VALUE is a constant other than a CONST_INT, get it into a register in
1074      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
1075      that VALUE might be a floating-point constant.  */
1076   if (CONSTANT_P (value) && !CONST_INT_P (value))
1077     {
1078       rtx word = gen_lowpart_common (word_mode, value);
1079
1080       if (word && (value != word))
1081         value = word;
1082       else
1083         value = gen_lowpart_common (word_mode,
1084                                     force_reg (GET_MODE (value) != VOIDmode
1085                                                ? GET_MODE (value)
1086                                                : word_mode, value));
1087     }
1088
1089   while (bitsdone < bitsize)
1090     {
1091       unsigned HOST_WIDE_INT thissize;
1092       rtx part, word;
1093       unsigned HOST_WIDE_INT thispos;
1094       unsigned HOST_WIDE_INT offset;
1095
1096       offset = (bitpos + bitsdone) / unit;
1097       thispos = (bitpos + bitsdone) % unit;
1098
1099       /* THISSIZE must not overrun a word boundary.  Otherwise,
1100          store_fixed_bit_field will call us again, and we will mutually
1101          recurse forever.  */
1102       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1103       thissize = MIN (thissize, unit - thispos);
1104
1105       if (BYTES_BIG_ENDIAN)
1106         {
1107           int total_bits;
1108
1109           /* We must do an endian conversion exactly the same way as it is
1110              done in extract_bit_field, so that the two calls to
1111              extract_fixed_bit_field will have comparable arguments.  */
1112           if (!MEM_P (value) || GET_MODE (value) == BLKmode)
1113             total_bits = BITS_PER_WORD;
1114           else
1115             total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1116
1117           /* Fetch successively less significant portions.  */
1118           if (CONST_INT_P (value))
1119             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1120                              >> (bitsize - bitsdone - thissize))
1121                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1122           else
1123             /* The args are chosen so that the last part includes the
1124                lsb.  Give extract_bit_field the value it needs (with
1125                endianness compensation) to fetch the piece we want.  */
1126             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1127                                             total_bits - bitsize + bitsdone,
1128                                             NULL_RTX, 1, false);
1129         }
1130       else
1131         {
1132           /* Fetch successively more significant portions.  */
1133           if (CONST_INT_P (value))
1134             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1135                              >> bitsdone)
1136                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1137           else
1138             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1139                                             bitsdone, NULL_RTX, 1, false);
1140         }
1141
1142       /* If OP0 is a register, then handle OFFSET here.
1143
1144          When handling multiword bitfields, extract_bit_field may pass
1145          down a word_mode SUBREG of a larger REG for a bitfield that actually
1146          crosses a word boundary.  Thus, for a SUBREG, we must find
1147          the current word starting from the base register.  */
1148       if (GET_CODE (op0) == SUBREG)
1149         {
1150           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1151           enum machine_mode sub_mode = GET_MODE (SUBREG_REG (op0));
1152           if (sub_mode != BLKmode && GET_MODE_SIZE (sub_mode) < UNITS_PER_WORD)
1153             word = word_offset ? const0_rtx : op0;
1154           else
1155             word = operand_subword_force (SUBREG_REG (op0), word_offset,
1156                                           GET_MODE (SUBREG_REG (op0)));
1157           offset = 0;
1158         }
1159       else if (REG_P (op0))
1160         {
1161           enum machine_mode op0_mode = GET_MODE (op0);
1162           if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1163             word = offset ? const0_rtx : op0;
1164           else
1165             word = operand_subword_force (op0, offset, GET_MODE (op0));
1166           offset = 0;
1167         }
1168       else
1169         word = op0;
1170
1171       /* OFFSET is in UNITs, and UNIT is in bits.
1172          store_fixed_bit_field wants offset in bytes.  If WORD is const0_rtx,
1173          it is just an out-of-bounds access.  Ignore it.  */
1174       if (word != const0_rtx)
1175         store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT, thissize,
1176                                thispos, bitregion_start, bitregion_end, part);
1177       bitsdone += thissize;
1178     }
1179 }
1180 \f
1181 /* A subroutine of extract_bit_field_1 that converts return value X
1182    to either MODE or TMODE.  MODE, TMODE and UNSIGNEDP are arguments
1183    to extract_bit_field.  */
1184
1185 static rtx
1186 convert_extracted_bit_field (rtx x, enum machine_mode mode,
1187                              enum machine_mode tmode, bool unsignedp)
1188 {
1189   if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1190     return x;
1191
1192   /* If the x mode is not a scalar integral, first convert to the
1193      integer mode of that size and then access it as a floating-point
1194      value via a SUBREG.  */
1195   if (!SCALAR_INT_MODE_P (tmode))
1196     {
1197       enum machine_mode smode;
1198
1199       smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1200       x = convert_to_mode (smode, x, unsignedp);
1201       x = force_reg (smode, x);
1202       return gen_lowpart (tmode, x);
1203     }
1204
1205   return convert_to_mode (tmode, x, unsignedp);
1206 }
1207
1208 /* A subroutine of extract_bit_field, with the same arguments.
1209    If FALLBACK_P is true, fall back to extract_fixed_bit_field
1210    if we can find no other means of implementing the operation.
1211    if FALLBACK_P is false, return NULL instead.  */
1212
1213 static rtx
1214 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1215                      unsigned HOST_WIDE_INT bitnum,
1216                      int unsignedp, bool packedp, rtx target,
1217                      enum machine_mode mode, enum machine_mode tmode,
1218                      bool fallback_p)
1219 {
1220   unsigned int unit
1221     = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
1222   unsigned HOST_WIDE_INT offset, bitpos;
1223   rtx op0 = str_rtx;
1224   enum machine_mode int_mode;
1225   enum machine_mode ext_mode;
1226   enum machine_mode mode1;
1227   int byte_offset;
1228
1229   if (tmode == VOIDmode)
1230     tmode = mode;
1231
1232   while (GET_CODE (op0) == SUBREG)
1233     {
1234       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1235       op0 = SUBREG_REG (op0);
1236     }
1237
1238   /* If we have an out-of-bounds access to a register, just return an
1239      uninitialized register of the required mode.  This can occur if the
1240      source code contains an out-of-bounds access to a small array.  */
1241   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1242     return gen_reg_rtx (tmode);
1243
1244   if (REG_P (op0)
1245       && mode == GET_MODE (op0)
1246       && bitnum == 0
1247       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1248     {
1249       /* We're trying to extract a full register from itself.  */
1250       return op0;
1251     }
1252
1253   /* See if we can get a better vector mode before extracting.  */
1254   if (VECTOR_MODE_P (GET_MODE (op0))
1255       && !MEM_P (op0)
1256       && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1257     {
1258       enum machine_mode new_mode;
1259
1260       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1261         new_mode = MIN_MODE_VECTOR_FLOAT;
1262       else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1263         new_mode = MIN_MODE_VECTOR_FRACT;
1264       else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1265         new_mode = MIN_MODE_VECTOR_UFRACT;
1266       else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1267         new_mode = MIN_MODE_VECTOR_ACCUM;
1268       else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1269         new_mode = MIN_MODE_VECTOR_UACCUM;
1270       else
1271         new_mode = MIN_MODE_VECTOR_INT;
1272
1273       for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1274         if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1275             && targetm.vector_mode_supported_p (new_mode))
1276           break;
1277       if (new_mode != VOIDmode)
1278         op0 = gen_lowpart (new_mode, op0);
1279     }
1280
1281   /* Use vec_extract patterns for extracting parts of vectors whenever
1282      available.  */
1283   if (VECTOR_MODE_P (GET_MODE (op0))
1284       && !MEM_P (op0)
1285       && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing
1286       && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1287           == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1288     {
1289       struct expand_operand ops[3];
1290       enum machine_mode outermode = GET_MODE (op0);
1291       enum machine_mode innermode = GET_MODE_INNER (outermode);
1292       enum insn_code icode = optab_handler (vec_extract_optab, outermode);
1293       unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1294
1295       create_output_operand (&ops[0], target, innermode);
1296       create_input_operand (&ops[1], op0, outermode);
1297       create_integer_operand (&ops[2], pos);
1298       if (maybe_expand_insn (icode, 3, ops))
1299         {
1300           target = ops[0].value;
1301           if (GET_MODE (target) != mode)
1302             return gen_lowpart (tmode, target);
1303           return target;
1304         }
1305     }
1306
1307   /* Make sure we are playing with integral modes.  Pun with subregs
1308      if we aren't.  */
1309   {
1310     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1311     if (imode != GET_MODE (op0))
1312       {
1313         if (MEM_P (op0))
1314           op0 = adjust_address (op0, imode, 0);
1315         else if (imode != BLKmode)
1316           {
1317             op0 = gen_lowpart (imode, op0);
1318
1319             /* If we got a SUBREG, force it into a register since we
1320                aren't going to be able to do another SUBREG on it.  */
1321             if (GET_CODE (op0) == SUBREG)
1322               op0 = force_reg (imode, op0);
1323           }
1324         else if (REG_P (op0))
1325           {
1326             rtx reg, subreg;
1327             imode = smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0)),
1328                                             MODE_INT);
1329             reg = gen_reg_rtx (imode);
1330             subreg = gen_lowpart_SUBREG (GET_MODE (op0), reg);
1331             emit_move_insn (subreg, op0);
1332             op0 = reg;
1333             bitnum += SUBREG_BYTE (subreg) * BITS_PER_UNIT;
1334           }
1335         else
1336           {
1337             rtx mem = assign_stack_temp (GET_MODE (op0),
1338                                          GET_MODE_SIZE (GET_MODE (op0)), 0);
1339             emit_move_insn (mem, op0);
1340             op0 = adjust_address (mem, BLKmode, 0);
1341           }
1342       }
1343   }
1344
1345   /* We may be accessing data outside the field, which means
1346      we can alias adjacent data.  */
1347   if (MEM_P (op0))
1348     {
1349       op0 = shallow_copy_rtx (op0);
1350       set_mem_alias_set (op0, 0);
1351       set_mem_expr (op0, 0);
1352     }
1353
1354   /* Extraction of a full-word or multi-word value from a structure
1355      in a register or aligned memory can be done with just a SUBREG.
1356      A subword value in the least significant part of a register
1357      can also be extracted with a SUBREG.  For this, we need the
1358      byte offset of the value in op0.  */
1359
1360   bitpos = bitnum % unit;
1361   offset = bitnum / unit;
1362   byte_offset = bitpos / BITS_PER_UNIT + offset * UNITS_PER_WORD;
1363
1364   /* If OP0 is a register, BITPOS must count within a word.
1365      But as we have it, it counts within whatever size OP0 now has.
1366      On a bigendian machine, these are not the same, so convert.  */
1367   if (BYTES_BIG_ENDIAN
1368       && !MEM_P (op0)
1369       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
1370     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1371
1372   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1373      If that's wrong, the solution is to test for it and set TARGET to 0
1374      if needed.  */
1375
1376   /* Only scalar integer modes can be converted via subregs.  There is an
1377      additional problem for FP modes here in that they can have a precision
1378      which is different from the size.  mode_for_size uses precision, but
1379      we want a mode based on the size, so we must avoid calling it for FP
1380      modes.  */
1381   mode1  = (SCALAR_INT_MODE_P (tmode)
1382             ? mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0)
1383             : mode);
1384
1385   /* If the bitfield is volatile, we need to make sure the access
1386      remains on a type-aligned boundary.  */
1387   if (GET_CODE (op0) == MEM
1388       && MEM_VOLATILE_P (op0)
1389       && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
1390       && flag_strict_volatile_bitfields > 0)
1391     goto no_subreg_mode_swap;
1392
1393   if (((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
1394         && bitpos % BITS_PER_WORD == 0)
1395        || (mode1 != BLKmode
1396            /* ??? The big endian test here is wrong.  This is correct
1397               if the value is in a register, and if mode_for_size is not
1398               the same mode as op0.  This causes us to get unnecessarily
1399               inefficient code from the Thumb port when -mbig-endian.  */
1400            && (BYTES_BIG_ENDIAN
1401                ? bitpos + bitsize == BITS_PER_WORD
1402                : bitpos == 0)))
1403       && ((!MEM_P (op0)
1404            && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0))
1405            && GET_MODE_SIZE (mode1) != 0
1406            && byte_offset % GET_MODE_SIZE (mode1) == 0)
1407           || (MEM_P (op0)
1408               && (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
1409                   || (offset * BITS_PER_UNIT % bitsize == 0
1410                       && MEM_ALIGN (op0) % bitsize == 0)))))
1411     {
1412       if (MEM_P (op0))
1413         op0 = adjust_address (op0, mode1, offset);
1414       else if (mode1 != GET_MODE (op0))
1415         {
1416           rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1417                                          byte_offset);
1418           if (sub == NULL)
1419             goto no_subreg_mode_swap;
1420           op0 = sub;
1421         }
1422       if (mode1 != mode)
1423         return convert_to_mode (tmode, op0, unsignedp);
1424       return op0;
1425     }
1426  no_subreg_mode_swap:
1427
1428   /* Handle fields bigger than a word.  */
1429
1430   if (bitsize > BITS_PER_WORD)
1431     {
1432       /* Here we transfer the words of the field
1433          in the order least significant first.
1434          This is because the most significant word is the one which may
1435          be less than full.  */
1436
1437       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1438       unsigned int i;
1439
1440       if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1441         target = gen_reg_rtx (mode);
1442
1443       /* Indicate for flow that the entire target reg is being set.  */
1444       emit_clobber (target);
1445
1446       for (i = 0; i < nwords; i++)
1447         {
1448           /* If I is 0, use the low-order word in both field and target;
1449              if I is 1, use the next to lowest word; and so on.  */
1450           /* Word number in TARGET to use.  */
1451           unsigned int wordnum
1452             = (WORDS_BIG_ENDIAN
1453                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1454                : i);
1455           /* Offset from start of field in OP0.  */
1456           unsigned int bit_offset = (WORDS_BIG_ENDIAN
1457                                      ? MAX (0, ((int) bitsize - ((int) i + 1)
1458                                                 * (int) BITS_PER_WORD))
1459                                      : (int) i * BITS_PER_WORD);
1460           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1461           rtx result_part
1462             = extract_bit_field (op0, MIN (BITS_PER_WORD,
1463                                            bitsize - i * BITS_PER_WORD),
1464                                  bitnum + bit_offset, 1, false, target_part, mode,
1465                                  word_mode);
1466
1467           gcc_assert (target_part);
1468
1469           if (result_part != target_part)
1470             emit_move_insn (target_part, result_part);
1471         }
1472
1473       if (unsignedp)
1474         {
1475           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1476              need to be zero'd out.  */
1477           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1478             {
1479               unsigned int i, total_words;
1480
1481               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1482               for (i = nwords; i < total_words; i++)
1483                 emit_move_insn
1484                   (operand_subword (target,
1485                                     WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1486                                     1, VOIDmode),
1487                    const0_rtx);
1488             }
1489           return target;
1490         }
1491
1492       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1493       target = expand_shift (LSHIFT_EXPR, mode, target,
1494                              GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1495       return expand_shift (RSHIFT_EXPR, mode, target,
1496                            GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1497     }
1498
1499   /* From here on we know the desired field is smaller than a word.  */
1500
1501   /* Check if there is a correspondingly-sized integer field, so we can
1502      safely extract it as one size of integer, if necessary; then
1503      truncate or extend to the size that is wanted; then use SUBREGs or
1504      convert_to_mode to get one of the modes we really wanted.  */
1505
1506   int_mode = int_mode_for_mode (tmode);
1507   if (int_mode == BLKmode)
1508     int_mode = int_mode_for_mode (mode);
1509   /* Should probably push op0 out to memory and then do a load.  */
1510   gcc_assert (int_mode != BLKmode);
1511
1512   /* OFFSET is the number of words or bytes (UNIT says which)
1513      from STR_RTX to the first word or byte containing part of the field.  */
1514   if (!MEM_P (op0))
1515     {
1516       if (offset != 0
1517           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1518         {
1519           if (!REG_P (op0))
1520             op0 = copy_to_reg (op0);
1521           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
1522                                 op0, (offset * UNITS_PER_WORD));
1523         }
1524       offset = 0;
1525     }
1526
1527   /* Now OFFSET is nonzero only for memory operands.  */
1528   ext_mode = mode_for_extraction (unsignedp ? EP_extzv : EP_extv, 0);
1529   if (ext_mode != MAX_MACHINE_MODE
1530       && bitsize > 0
1531       && GET_MODE_BITSIZE (ext_mode) >= bitsize
1532       /* Do not use extv/extzv for volatile bitfields when
1533          -fstrict-volatile-bitfields is in effect.  */
1534       && !(MEM_P (op0) && MEM_VOLATILE_P (op0)
1535            && flag_strict_volatile_bitfields > 0)
1536       /* If op0 is a register, we need it in EXT_MODE to make it
1537          acceptable to the format of ext(z)v.  */
1538       && !(GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1539       && !((REG_P (op0) || GET_CODE (op0) == SUBREG)
1540            && (bitsize + bitpos > GET_MODE_BITSIZE (ext_mode))))
1541     {
1542       struct expand_operand ops[4];
1543       unsigned HOST_WIDE_INT xbitpos = bitpos, xoffset = offset;
1544       rtx xop0 = op0;
1545       rtx xtarget = target;
1546       rtx xspec_target = target;
1547       rtx xspec_target_subreg = 0;
1548
1549       /* If op0 is a register, we need it in EXT_MODE to make it
1550          acceptable to the format of ext(z)v.  */
1551       if (REG_P (xop0) && GET_MODE (xop0) != ext_mode)
1552         xop0 = gen_lowpart_SUBREG (ext_mode, xop0);
1553       if (MEM_P (xop0))
1554         /* Get ref to first byte containing part of the field.  */
1555         xop0 = adjust_address (xop0, byte_mode, xoffset);
1556
1557       /* Now convert from counting within UNIT to counting in EXT_MODE.  */
1558       if (BYTES_BIG_ENDIAN && !MEM_P (xop0))
1559         xbitpos += GET_MODE_BITSIZE (ext_mode) - unit;
1560
1561       unit = GET_MODE_BITSIZE (ext_mode);
1562
1563       /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1564          "backwards" from the size of the unit we are extracting from.
1565          Otherwise, we count bits from the most significant on a
1566          BYTES/BITS_BIG_ENDIAN machine.  */
1567
1568       if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1569         xbitpos = unit - bitsize - xbitpos;
1570
1571       if (xtarget == 0)
1572         xtarget = xspec_target = gen_reg_rtx (tmode);
1573
1574       if (GET_MODE (xtarget) != ext_mode)
1575         {
1576           /* Don't use LHS paradoxical subreg if explicit truncation is needed
1577              between the mode of the extraction (word_mode) and the target
1578              mode.  Instead, create a temporary and use convert_move to set
1579              the target.  */
1580           if (REG_P (xtarget)
1581               && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (xtarget), ext_mode))
1582             {
1583               xtarget = gen_lowpart (ext_mode, xtarget);
1584               if (GET_MODE_PRECISION (ext_mode)
1585                   > GET_MODE_PRECISION (GET_MODE (xspec_target)))
1586                 xspec_target_subreg = xtarget;
1587             }
1588           else
1589             xtarget = gen_reg_rtx (ext_mode);
1590         }
1591
1592       create_output_operand (&ops[0], xtarget, ext_mode);
1593       create_fixed_operand (&ops[1], xop0);
1594       create_integer_operand (&ops[2], bitsize);
1595       create_integer_operand (&ops[3], xbitpos);
1596       if (maybe_expand_insn (unsignedp ? CODE_FOR_extzv : CODE_FOR_extv,
1597                              4, ops))
1598         {
1599           xtarget = ops[0].value;
1600           if (xtarget == xspec_target)
1601             return xtarget;
1602           if (xtarget == xspec_target_subreg)
1603             return xspec_target;
1604           return convert_extracted_bit_field (xtarget, mode, tmode, unsignedp);
1605         }
1606     }
1607
1608   /* If OP0 is a memory, try copying it to a register and seeing if a
1609      cheap register alternative is available.  */
1610   if (ext_mode != MAX_MACHINE_MODE && MEM_P (op0))
1611     {
1612       enum machine_mode bestmode;
1613
1614       /* Get the mode to use for inserting into this field.  If
1615          OP0 is BLKmode, get the smallest mode consistent with the
1616          alignment. If OP0 is a non-BLKmode object that is no
1617          wider than EXT_MODE, use its mode. Otherwise, use the
1618          smallest mode containing the field.  */
1619
1620       if (GET_MODE (op0) == BLKmode
1621           || (ext_mode != MAX_MACHINE_MODE
1622               && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (ext_mode)))
1623         bestmode = get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0),
1624                                   (ext_mode == MAX_MACHINE_MODE
1625                                    ? VOIDmode : ext_mode),
1626                                   MEM_VOLATILE_P (op0));
1627       else
1628         bestmode = GET_MODE (op0);
1629
1630       if (bestmode != VOIDmode
1631           && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
1632                && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
1633         {
1634           unsigned HOST_WIDE_INT xoffset, xbitpos;
1635
1636           /* Compute the offset as a multiple of this unit,
1637              counting in bytes.  */
1638           unit = GET_MODE_BITSIZE (bestmode);
1639           xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1640           xbitpos = bitnum % unit;
1641
1642           /* Make sure the register is big enough for the whole field.  */
1643           if (xoffset * BITS_PER_UNIT + unit
1644               >= offset * BITS_PER_UNIT + bitsize)
1645             {
1646               rtx last, result, xop0;
1647
1648               last = get_last_insn ();
1649
1650               /* Fetch it to a register in that size.  */
1651               xop0 = adjust_address (op0, bestmode, xoffset);
1652               xop0 = force_reg (bestmode, xop0);
1653               result = extract_bit_field_1 (xop0, bitsize, xbitpos,
1654                                             unsignedp, packedp, target,
1655                                             mode, tmode, false);
1656               if (result)
1657                 return result;
1658
1659               delete_insns_since (last);
1660             }
1661         }
1662     }
1663
1664   if (!fallback_p)
1665     return NULL;
1666
1667   target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1668                                     bitpos, target, unsignedp, packedp);
1669   return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1670 }
1671
1672 /* Generate code to extract a byte-field from STR_RTX
1673    containing BITSIZE bits, starting at BITNUM,
1674    and put it in TARGET if possible (if TARGET is nonzero).
1675    Regardless of TARGET, we return the rtx for where the value is placed.
1676
1677    STR_RTX is the structure containing the byte (a REG or MEM).
1678    UNSIGNEDP is nonzero if this is an unsigned bit field.
1679    PACKEDP is nonzero if the field has the packed attribute.
1680    MODE is the natural mode of the field value once extracted.
1681    TMODE is the mode the caller would like the value to have;
1682    but the value may be returned with type MODE instead.
1683
1684    If a TARGET is specified and we can store in it at no extra cost,
1685    we do so, and return TARGET.
1686    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1687    if they are equally easy.  */
1688
1689 rtx
1690 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1691                    unsigned HOST_WIDE_INT bitnum, int unsignedp, bool packedp,
1692                    rtx target, enum machine_mode mode, enum machine_mode tmode)
1693 {
1694   return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp, packedp,
1695                               target, mode, tmode, true);
1696 }
1697 \f
1698 /* Extract a bit field using shifts and boolean operations
1699    Returns an rtx to represent the value.
1700    OP0 addresses a register (word) or memory (byte).
1701    BITPOS says which bit within the word or byte the bit field starts in.
1702    OFFSET says how many bytes farther the bit field starts;
1703     it is 0 if OP0 is a register.
1704    BITSIZE says how many bits long the bit field is.
1705     (If OP0 is a register, it may be narrower than a full word,
1706      but BITPOS still counts within a full word,
1707      which is significant on bigendian machines.)
1708
1709    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1710    PACKEDP is true if the field has the packed attribute.
1711
1712    If TARGET is nonzero, attempts to store the value there
1713    and return TARGET, but this is not guaranteed.
1714    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1715
1716 static rtx
1717 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1718                          unsigned HOST_WIDE_INT offset,
1719                          unsigned HOST_WIDE_INT bitsize,
1720                          unsigned HOST_WIDE_INT bitpos, rtx target,
1721                          int unsignedp, bool packedp)
1722 {
1723   unsigned int total_bits = BITS_PER_WORD;
1724   enum machine_mode mode;
1725
1726   if (GET_CODE (op0) == SUBREG || REG_P (op0))
1727     {
1728       /* Special treatment for a bit field split across two registers.  */
1729       if (bitsize + bitpos > BITS_PER_WORD)
1730         return extract_split_bit_field (op0, bitsize, bitpos, unsignedp);
1731     }
1732   else
1733     {
1734       /* Get the proper mode to use for this field.  We want a mode that
1735          includes the entire field.  If such a mode would be larger than
1736          a word, we won't be doing the extraction the normal way.  */
1737
1738       if (MEM_VOLATILE_P (op0)
1739           && flag_strict_volatile_bitfields > 0)
1740         {
1741           if (GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1742             mode = GET_MODE (op0);
1743           else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1744             mode = GET_MODE (target);
1745           else
1746             mode = tmode;
1747         }
1748       else
1749         mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT, 0, 0,
1750                               MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1751
1752       if (mode == VOIDmode)
1753         /* The only way this should occur is if the field spans word
1754            boundaries.  */
1755         return extract_split_bit_field (op0, bitsize,
1756                                         bitpos + offset * BITS_PER_UNIT,
1757                                         unsignedp);
1758
1759       total_bits = GET_MODE_BITSIZE (mode);
1760
1761       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
1762          be in the range 0 to total_bits-1, and put any excess bytes in
1763          OFFSET.  */
1764       if (bitpos >= total_bits)
1765         {
1766           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1767           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1768                      * BITS_PER_UNIT);
1769         }
1770
1771       /* If we're accessing a volatile MEM, we can't do the next
1772          alignment step if it results in a multi-word access where we
1773          otherwise wouldn't have one.  So, check for that case
1774          here.  */
1775       if (MEM_P (op0)
1776           && MEM_VOLATILE_P (op0)
1777           && flag_strict_volatile_bitfields > 0
1778           && bitpos + bitsize <= total_bits
1779           && bitpos + bitsize + (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT > total_bits)
1780         {
1781           if (STRICT_ALIGNMENT)
1782             {
1783               static bool informed_about_misalignment = false;
1784               bool warned;
1785
1786               if (packedp)
1787                 {
1788                   if (bitsize == total_bits)
1789                     warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1790                                          "multiple accesses to volatile structure member"
1791                                          " because of packed attribute");
1792                   else
1793                     warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1794                                          "multiple accesses to volatile structure bitfield"
1795                                          " because of packed attribute");
1796
1797                   return extract_split_bit_field (op0, bitsize,
1798                                                   bitpos + offset * BITS_PER_UNIT,
1799                                                   unsignedp);
1800                 }
1801
1802               if (bitsize == total_bits)
1803                 warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1804                                      "mis-aligned access used for structure member");
1805               else
1806                 warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1807                                      "mis-aligned access used for structure bitfield");
1808
1809               if (! informed_about_misalignment && warned)
1810                 {
1811                   informed_about_misalignment = true;
1812                   inform (input_location,
1813                           "when a volatile object spans multiple type-sized locations,"
1814                           " the compiler must choose between using a single mis-aligned access to"
1815                           " preserve the volatility, or using multiple aligned accesses to avoid"
1816                           " runtime faults; this code may fail at runtime if the hardware does"
1817                           " not allow this access");
1818                 }
1819             }
1820         }
1821       else
1822         {
1823
1824           /* Get ref to an aligned byte, halfword, or word containing the field.
1825              Adjust BITPOS to be position within a word,
1826              and OFFSET to be the offset of that word.
1827              Then alter OP0 to refer to that word.  */
1828           bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1829           offset -= (offset % (total_bits / BITS_PER_UNIT));
1830         }
1831
1832       op0 = adjust_address (op0, mode, offset);
1833     }
1834
1835   mode = GET_MODE (op0);
1836
1837   if (BYTES_BIG_ENDIAN)
1838     /* BITPOS is the distance between our msb and that of OP0.
1839        Convert it to the distance from the lsb.  */
1840     bitpos = total_bits - bitsize - bitpos;
1841
1842   /* Now BITPOS is always the distance between the field's lsb and that of OP0.
1843      We have reduced the big-endian case to the little-endian case.  */
1844
1845   if (unsignedp)
1846     {
1847       if (bitpos)
1848         {
1849           /* If the field does not already start at the lsb,
1850              shift it so it does.  */
1851           /* Maybe propagate the target for the shift.  */
1852           /* But not if we will return it--could confuse integrate.c.  */
1853           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1854           if (tmode != mode) subtarget = 0;
1855           op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitpos, subtarget, 1);
1856         }
1857       /* Convert the value to the desired mode.  */
1858       if (mode != tmode)
1859         op0 = convert_to_mode (tmode, op0, 1);
1860
1861       /* Unless the msb of the field used to be the msb when we shifted,
1862          mask out the upper bits.  */
1863
1864       if (GET_MODE_BITSIZE (mode) != bitpos + bitsize)
1865         return expand_binop (GET_MODE (op0), and_optab, op0,
1866                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1867                              target, 1, OPTAB_LIB_WIDEN);
1868       return op0;
1869     }
1870
1871   /* To extract a signed bit-field, first shift its msb to the msb of the word,
1872      then arithmetic-shift its lsb to the lsb of the word.  */
1873   op0 = force_reg (mode, op0);
1874
1875   /* Find the narrowest integer mode that contains the field.  */
1876
1877   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1878        mode = GET_MODE_WIDER_MODE (mode))
1879     if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos)
1880       {
1881         op0 = convert_to_mode (mode, op0, 0);
1882         break;
1883       }
1884
1885   if (mode != tmode)
1886     target = 0;
1887
1888   if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos))
1889     {
1890       int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitpos);
1891       /* Maybe propagate the target for the shift.  */
1892       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1893       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1894     }
1895
1896   return expand_shift (RSHIFT_EXPR, mode, op0,
1897                        GET_MODE_BITSIZE (mode) - bitsize, target, 0);
1898 }
1899 \f
1900 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1901    of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1902    complement of that if COMPLEMENT.  The mask is truncated if
1903    necessary to the width of mode MODE.  The mask is zero-extended if
1904    BITSIZE+BITPOS is too small for MODE.  */
1905
1906 static rtx
1907 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1908 {
1909   double_int mask;
1910
1911   mask = double_int_mask (bitsize);
1912   mask = double_int_lshift (mask, bitpos, HOST_BITS_PER_DOUBLE_INT, false);
1913
1914   if (complement)
1915     mask = double_int_not (mask);
1916
1917   return immed_double_int_const (mask, mode);
1918 }
1919
1920 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1921    VALUE truncated to BITSIZE bits and then shifted left BITPOS bits.  */
1922
1923 static rtx
1924 lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
1925 {
1926   double_int val;
1927
1928   val = double_int_zext (uhwi_to_double_int (INTVAL (value)), bitsize);
1929   val = double_int_lshift (val, bitpos, HOST_BITS_PER_DOUBLE_INT, false);
1930
1931   return immed_double_int_const (val, mode);
1932 }
1933 \f
1934 /* Extract a bit field that is split across two words
1935    and return an RTX for the result.
1936
1937    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1938    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1939    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.  */
1940
1941 static rtx
1942 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1943                          unsigned HOST_WIDE_INT bitpos, int unsignedp)
1944 {
1945   unsigned int unit;
1946   unsigned int bitsdone = 0;
1947   rtx result = NULL_RTX;
1948   int first = 1;
1949
1950   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1951      much at a time.  */
1952   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1953     unit = BITS_PER_WORD;
1954   else
1955     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1956
1957   while (bitsdone < bitsize)
1958     {
1959       unsigned HOST_WIDE_INT thissize;
1960       rtx part, word;
1961       unsigned HOST_WIDE_INT thispos;
1962       unsigned HOST_WIDE_INT offset;
1963
1964       offset = (bitpos + bitsdone) / unit;
1965       thispos = (bitpos + bitsdone) % unit;
1966
1967       /* THISSIZE must not overrun a word boundary.  Otherwise,
1968          extract_fixed_bit_field will call us again, and we will mutually
1969          recurse forever.  */
1970       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1971       thissize = MIN (thissize, unit - thispos);
1972
1973       /* If OP0 is a register, then handle OFFSET here.
1974
1975          When handling multiword bitfields, extract_bit_field may pass
1976          down a word_mode SUBREG of a larger REG for a bitfield that actually
1977          crosses a word boundary.  Thus, for a SUBREG, we must find
1978          the current word starting from the base register.  */
1979       if (GET_CODE (op0) == SUBREG)
1980         {
1981           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1982           word = operand_subword_force (SUBREG_REG (op0), word_offset,
1983                                         GET_MODE (SUBREG_REG (op0)));
1984           offset = 0;
1985         }
1986       else if (REG_P (op0))
1987         {
1988           word = operand_subword_force (op0, offset, GET_MODE (op0));
1989           offset = 0;
1990         }
1991       else
1992         word = op0;
1993
1994       /* Extract the parts in bit-counting order,
1995          whose meaning is determined by BYTES_PER_UNIT.
1996          OFFSET is in UNITs, and UNIT is in bits.
1997          extract_fixed_bit_field wants offset in bytes.  */
1998       part = extract_fixed_bit_field (word_mode, word,
1999                                       offset * unit / BITS_PER_UNIT,
2000                                       thissize, thispos, 0, 1, false);
2001       bitsdone += thissize;
2002
2003       /* Shift this part into place for the result.  */
2004       if (BYTES_BIG_ENDIAN)
2005         {
2006           if (bitsize != bitsdone)
2007             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2008                                  bitsize - bitsdone, 0, 1);
2009         }
2010       else
2011         {
2012           if (bitsdone != thissize)
2013             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2014                                  bitsdone - thissize, 0, 1);
2015         }
2016
2017       if (first)
2018         result = part;
2019       else
2020         /* Combine the parts with bitwise or.  This works
2021            because we extracted each part as an unsigned bit field.  */
2022         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2023                                OPTAB_LIB_WIDEN);
2024
2025       first = 0;
2026     }
2027
2028   /* Unsigned bit field: we are done.  */
2029   if (unsignedp)
2030     return result;
2031   /* Signed bit field: sign-extend with two arithmetic shifts.  */
2032   result = expand_shift (LSHIFT_EXPR, word_mode, result,
2033                          BITS_PER_WORD - bitsize, NULL_RTX, 0);
2034   return expand_shift (RSHIFT_EXPR, word_mode, result,
2035                        BITS_PER_WORD - bitsize, NULL_RTX, 0);
2036 }
2037 \f
2038 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2039    the bit pattern.  SRC_MODE is the mode of SRC; if this is smaller than
2040    MODE, fill the upper bits with zeros.  Fail if the layout of either
2041    mode is unknown (as for CC modes) or if the extraction would involve
2042    unprofitable mode punning.  Return the value on success, otherwise
2043    return null.
2044
2045    This is different from gen_lowpart* in these respects:
2046
2047      - the returned value must always be considered an rvalue
2048
2049      - when MODE is wider than SRC_MODE, the extraction involves
2050        a zero extension
2051
2052      - when MODE is smaller than SRC_MODE, the extraction involves
2053        a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2054
2055    In other words, this routine performs a computation, whereas the
2056    gen_lowpart* routines are conceptually lvalue or rvalue subreg
2057    operations.  */
2058
2059 rtx
2060 extract_low_bits (enum machine_mode mode, enum machine_mode src_mode, rtx src)
2061 {
2062   enum machine_mode int_mode, src_int_mode;
2063
2064   if (mode == src_mode)
2065     return src;
2066
2067   if (CONSTANT_P (src))
2068     {
2069       /* simplify_gen_subreg can't be used here, as if simplify_subreg
2070          fails, it will happily create (subreg (symbol_ref)) or similar
2071          invalid SUBREGs.  */
2072       unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2073       rtx ret = simplify_subreg (mode, src, src_mode, byte);
2074       if (ret)
2075         return ret;
2076
2077       if (GET_MODE (src) == VOIDmode
2078           || !validate_subreg (mode, src_mode, src, byte))
2079         return NULL_RTX;
2080
2081       src = force_reg (GET_MODE (src), src);
2082       return gen_rtx_SUBREG (mode, src, byte);
2083     }
2084
2085   if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2086     return NULL_RTX;
2087
2088   if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2089       && MODES_TIEABLE_P (mode, src_mode))
2090     {
2091       rtx x = gen_lowpart_common (mode, src);
2092       if (x)
2093         return x;
2094     }
2095
2096   src_int_mode = int_mode_for_mode (src_mode);
2097   int_mode = int_mode_for_mode (mode);
2098   if (src_int_mode == BLKmode || int_mode == BLKmode)
2099     return NULL_RTX;
2100
2101   if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2102     return NULL_RTX;
2103   if (!MODES_TIEABLE_P (int_mode, mode))
2104     return NULL_RTX;
2105
2106   src = gen_lowpart (src_int_mode, src);
2107   src = convert_modes (int_mode, src_int_mode, src, true);
2108   src = gen_lowpart (mode, src);
2109   return src;
2110 }
2111 \f
2112 /* Add INC into TARGET.  */
2113
2114 void
2115 expand_inc (rtx target, rtx inc)
2116 {
2117   rtx value = expand_binop (GET_MODE (target), add_optab,
2118                             target, inc,
2119                             target, 0, OPTAB_LIB_WIDEN);
2120   if (value != target)
2121     emit_move_insn (target, value);
2122 }
2123
2124 /* Subtract DEC from TARGET.  */
2125
2126 void
2127 expand_dec (rtx target, rtx dec)
2128 {
2129   rtx value = expand_binop (GET_MODE (target), sub_optab,
2130                             target, dec,
2131                             target, 0, OPTAB_LIB_WIDEN);
2132   if (value != target)
2133     emit_move_insn (target, value);
2134 }
2135 \f
2136 /* Output a shift instruction for expression code CODE,
2137    with SHIFTED being the rtx for the value to shift,
2138    and AMOUNT the rtx for the amount to shift by.
2139    Store the result in the rtx TARGET, if that is convenient.
2140    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2141    Return the rtx for where the value is.  */
2142
2143 static rtx
2144 expand_shift_1 (enum tree_code code, enum machine_mode mode, rtx shifted,
2145                 rtx amount, rtx target, int unsignedp)
2146 {
2147   rtx op1, temp = 0;
2148   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2149   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2150   optab lshift_optab = ashl_optab;
2151   optab rshift_arith_optab = ashr_optab;
2152   optab rshift_uns_optab = lshr_optab;
2153   optab lrotate_optab = rotl_optab;
2154   optab rrotate_optab = rotr_optab;
2155   enum machine_mode op1_mode;
2156   int attempt;
2157   bool speed = optimize_insn_for_speed_p ();
2158
2159   op1 = amount;
2160   op1_mode = GET_MODE (op1);
2161
2162   /* Determine whether the shift/rotate amount is a vector, or scalar.  If the
2163      shift amount is a vector, use the vector/vector shift patterns.  */
2164   if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2165     {
2166       lshift_optab = vashl_optab;
2167       rshift_arith_optab = vashr_optab;
2168       rshift_uns_optab = vlshr_optab;
2169       lrotate_optab = vrotl_optab;
2170       rrotate_optab = vrotr_optab;
2171     }
2172
2173   /* Previously detected shift-counts computed by NEGATE_EXPR
2174      and shifted in the other direction; but that does not work
2175      on all machines.  */
2176
2177   if (SHIFT_COUNT_TRUNCATED)
2178     {
2179       if (CONST_INT_P (op1)
2180           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2181               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
2182         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2183                        % GET_MODE_BITSIZE (mode));
2184       else if (GET_CODE (op1) == SUBREG
2185                && subreg_lowpart_p (op1)
2186                && INTEGRAL_MODE_P (GET_MODE (SUBREG_REG (op1))))
2187         op1 = SUBREG_REG (op1);
2188     }
2189
2190   if (op1 == const0_rtx)
2191     return shifted;
2192
2193   /* Check whether its cheaper to implement a left shift by a constant
2194      bit count by a sequence of additions.  */
2195   if (code == LSHIFT_EXPR
2196       && CONST_INT_P (op1)
2197       && INTVAL (op1) > 0
2198       && INTVAL (op1) < GET_MODE_PRECISION (mode)
2199       && INTVAL (op1) < MAX_BITS_PER_WORD
2200       && shift_cost[speed][mode][INTVAL (op1)] > INTVAL (op1) * add_cost[speed][mode]
2201       && shift_cost[speed][mode][INTVAL (op1)] != MAX_COST)
2202     {
2203       int i;
2204       for (i = 0; i < INTVAL (op1); i++)
2205         {
2206           temp = force_reg (mode, shifted);
2207           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2208                                   unsignedp, OPTAB_LIB_WIDEN);
2209         }
2210       return shifted;
2211     }
2212
2213   for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2214     {
2215       enum optab_methods methods;
2216
2217       if (attempt == 0)
2218         methods = OPTAB_DIRECT;
2219       else if (attempt == 1)
2220         methods = OPTAB_WIDEN;
2221       else
2222         methods = OPTAB_LIB_WIDEN;
2223
2224       if (rotate)
2225         {
2226           /* Widening does not work for rotation.  */
2227           if (methods == OPTAB_WIDEN)
2228             continue;
2229           else if (methods == OPTAB_LIB_WIDEN)
2230             {
2231               /* If we have been unable to open-code this by a rotation,
2232                  do it as the IOR of two shifts.  I.e., to rotate A
2233                  by N bits, compute (A << N) | ((unsigned) A >> (C - N))
2234                  where C is the bitsize of A.
2235
2236                  It is theoretically possible that the target machine might
2237                  not be able to perform either shift and hence we would
2238                  be making two libcalls rather than just the one for the
2239                  shift (similarly if IOR could not be done).  We will allow
2240                  this extremely unlikely lossage to avoid complicating the
2241                  code below.  */
2242
2243               rtx subtarget = target == shifted ? 0 : target;
2244               rtx new_amount, other_amount;
2245               rtx temp1;
2246
2247               new_amount = op1;
2248               if (CONST_INT_P (op1))
2249                 other_amount = GEN_INT (GET_MODE_BITSIZE (mode)
2250                                         - INTVAL (op1));
2251               else
2252                 other_amount
2253                   = simplify_gen_binary (MINUS, GET_MODE (op1),
2254                                          GEN_INT (GET_MODE_PRECISION (mode)),
2255                                          op1);
2256
2257               shifted = force_reg (mode, shifted);
2258
2259               temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2260                                      mode, shifted, new_amount, 0, 1);
2261               temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2262                                       mode, shifted, other_amount,
2263                                       subtarget, 1);
2264               return expand_binop (mode, ior_optab, temp, temp1, target,
2265                                    unsignedp, methods);
2266             }
2267
2268           temp = expand_binop (mode,
2269                                left ? lrotate_optab : rrotate_optab,
2270                                shifted, op1, target, unsignedp, methods);
2271         }
2272       else if (unsignedp)
2273         temp = expand_binop (mode,
2274                              left ? lshift_optab : rshift_uns_optab,
2275                              shifted, op1, target, unsignedp, methods);
2276
2277       /* Do arithmetic shifts.
2278          Also, if we are going to widen the operand, we can just as well
2279          use an arithmetic right-shift instead of a logical one.  */
2280       if (temp == 0 && ! rotate
2281           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2282         {
2283           enum optab_methods methods1 = methods;
2284
2285           /* If trying to widen a log shift to an arithmetic shift,
2286              don't accept an arithmetic shift of the same size.  */
2287           if (unsignedp)
2288             methods1 = OPTAB_MUST_WIDEN;
2289
2290           /* Arithmetic shift */
2291
2292           temp = expand_binop (mode,
2293                                left ? lshift_optab : rshift_arith_optab,
2294                                shifted, op1, target, unsignedp, methods1);
2295         }
2296
2297       /* We used to try extzv here for logical right shifts, but that was
2298          only useful for one machine, the VAX, and caused poor code
2299          generation there for lshrdi3, so the code was deleted and a
2300          define_expand for lshrsi3 was added to vax.md.  */
2301     }
2302
2303   gcc_assert (temp);
2304   return temp;
2305 }
2306
2307 /* Output a shift instruction for expression code CODE,
2308    with SHIFTED being the rtx for the value to shift,
2309    and AMOUNT the amount to shift by.
2310    Store the result in the rtx TARGET, if that is convenient.
2311    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2312    Return the rtx for where the value is.  */
2313
2314 rtx
2315 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2316               int amount, rtx target, int unsignedp)
2317 {
2318   return expand_shift_1 (code, mode,
2319                          shifted, GEN_INT (amount), target, unsignedp);
2320 }
2321
2322 /* Output a shift instruction for expression code CODE,
2323    with SHIFTED being the rtx for the value to shift,
2324    and AMOUNT the tree for the amount to shift by.
2325    Store the result in the rtx TARGET, if that is convenient.
2326    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2327    Return the rtx for where the value is.  */
2328
2329 rtx
2330 expand_variable_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2331                        tree amount, rtx target, int unsignedp)
2332 {
2333   return expand_shift_1 (code, mode,
2334                          shifted, expand_normal (amount), target, unsignedp);
2335 }
2336
2337 \f
2338 /* Indicates the type of fixup needed after a constant multiplication.
2339    BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2340    the result should be negated, and ADD_VARIANT means that the
2341    multiplicand should be added to the result.  */
2342 enum mult_variant {basic_variant, negate_variant, add_variant};
2343
2344 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2345                         const struct mult_cost *, enum machine_mode mode);
2346 static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2347                                  struct algorithm *, enum mult_variant *, int);
2348 static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2349                               const struct algorithm *, enum mult_variant);
2350 static unsigned HOST_WIDE_INT choose_multiplier (unsigned HOST_WIDE_INT, int,
2351                                                  int, rtx *, int *, int *);
2352 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2353 static rtx extract_high_half (enum machine_mode, rtx);
2354 static rtx expand_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
2355 static rtx expand_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2356                                        int, int);
2357 /* Compute and return the best algorithm for multiplying by T.
2358    The algorithm must cost less than cost_limit
2359    If retval.cost >= COST_LIMIT, no algorithm was found and all
2360    other field of the returned struct are undefined.
2361    MODE is the machine mode of the multiplication.  */
2362
2363 static void
2364 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2365             const struct mult_cost *cost_limit, enum machine_mode mode)
2366 {
2367   int m;
2368   struct algorithm *alg_in, *best_alg;
2369   struct mult_cost best_cost;
2370   struct mult_cost new_limit;
2371   int op_cost, op_latency;
2372   unsigned HOST_WIDE_INT orig_t = t;
2373   unsigned HOST_WIDE_INT q;
2374   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
2375   int hash_index;
2376   bool cache_hit = false;
2377   enum alg_code cache_alg = alg_zero;
2378   bool speed = optimize_insn_for_speed_p ();
2379
2380   /* Indicate that no algorithm is yet found.  If no algorithm
2381      is found, this value will be returned and indicate failure.  */
2382   alg_out->cost.cost = cost_limit->cost + 1;
2383   alg_out->cost.latency = cost_limit->latency + 1;
2384
2385   if (cost_limit->cost < 0
2386       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2387     return;
2388
2389   /* Restrict the bits of "t" to the multiplication's mode.  */
2390   t &= GET_MODE_MASK (mode);
2391
2392   /* t == 1 can be done in zero cost.  */
2393   if (t == 1)
2394     {
2395       alg_out->ops = 1;
2396       alg_out->cost.cost = 0;
2397       alg_out->cost.latency = 0;
2398       alg_out->op[0] = alg_m;
2399       return;
2400     }
2401
2402   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2403      fail now.  */
2404   if (t == 0)
2405     {
2406       if (MULT_COST_LESS (cost_limit, zero_cost[speed]))
2407         return;
2408       else
2409         {
2410           alg_out->ops = 1;
2411           alg_out->cost.cost = zero_cost[speed];
2412           alg_out->cost.latency = zero_cost[speed];
2413           alg_out->op[0] = alg_zero;
2414           return;
2415         }
2416     }
2417
2418   /* We'll be needing a couple extra algorithm structures now.  */
2419
2420   alg_in = XALLOCA (struct algorithm);
2421   best_alg = XALLOCA (struct algorithm);
2422   best_cost = *cost_limit;
2423
2424   /* Compute the hash index.  */
2425   hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2426
2427   /* See if we already know what to do for T.  */
2428   if (alg_hash[hash_index].t == t
2429       && alg_hash[hash_index].mode == mode
2430       && alg_hash[hash_index].mode == mode
2431       && alg_hash[hash_index].speed == speed
2432       && alg_hash[hash_index].alg != alg_unknown)
2433     {
2434       cache_alg = alg_hash[hash_index].alg;
2435
2436       if (cache_alg == alg_impossible)
2437         {
2438           /* The cache tells us that it's impossible to synthesize
2439              multiplication by T within alg_hash[hash_index].cost.  */
2440           if (!CHEAPER_MULT_COST (&alg_hash[hash_index].cost, cost_limit))
2441             /* COST_LIMIT is at least as restrictive as the one
2442                recorded in the hash table, in which case we have no
2443                hope of synthesizing a multiplication.  Just
2444                return.  */
2445             return;
2446
2447           /* If we get here, COST_LIMIT is less restrictive than the
2448              one recorded in the hash table, so we may be able to
2449              synthesize a multiplication.  Proceed as if we didn't
2450              have the cache entry.  */
2451         }
2452       else
2453         {
2454           if (CHEAPER_MULT_COST (cost_limit, &alg_hash[hash_index].cost))
2455             /* The cached algorithm shows that this multiplication
2456                requires more cost than COST_LIMIT.  Just return.  This
2457                way, we don't clobber this cache entry with
2458                alg_impossible but retain useful information.  */
2459             return;
2460
2461           cache_hit = true;
2462
2463           switch (cache_alg)
2464             {
2465             case alg_shift:
2466               goto do_alg_shift;
2467
2468             case alg_add_t_m2:
2469             case alg_sub_t_m2:
2470               goto do_alg_addsub_t_m2;
2471
2472             case alg_add_factor:
2473             case alg_sub_factor:
2474               goto do_alg_addsub_factor;
2475
2476             case alg_add_t2_m:
2477               goto do_alg_add_t2_m;
2478
2479             case alg_sub_t2_m:
2480               goto do_alg_sub_t2_m;
2481
2482             default:
2483               gcc_unreachable ();
2484             }
2485         }
2486     }
2487
2488   /* If we have a group of zero bits at the low-order part of T, try
2489      multiplying by the remaining bits and then doing a shift.  */
2490
2491   if ((t & 1) == 0)
2492     {
2493     do_alg_shift:
2494       m = floor_log2 (t & -t);  /* m = number of low zero bits */
2495       if (m < maxm)
2496         {
2497           q = t >> m;
2498           /* The function expand_shift will choose between a shift and
2499              a sequence of additions, so the observed cost is given as
2500              MIN (m * add_cost[speed][mode], shift_cost[speed][mode][m]).  */
2501           op_cost = m * add_cost[speed][mode];
2502           if (shift_cost[speed][mode][m] < op_cost)
2503             op_cost = shift_cost[speed][mode][m];
2504           new_limit.cost = best_cost.cost - op_cost;
2505           new_limit.latency = best_cost.latency - op_cost;
2506           synth_mult (alg_in, q, &new_limit, mode);
2507
2508           alg_in->cost.cost += op_cost;
2509           alg_in->cost.latency += op_cost;
2510           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2511             {
2512               struct algorithm *x;
2513               best_cost = alg_in->cost;
2514               x = alg_in, alg_in = best_alg, best_alg = x;
2515               best_alg->log[best_alg->ops] = m;
2516               best_alg->op[best_alg->ops] = alg_shift;
2517             }
2518
2519           /* See if treating ORIG_T as a signed number yields a better
2520              sequence.  Try this sequence only for a negative ORIG_T
2521              as it would be useless for a non-negative ORIG_T.  */
2522           if ((HOST_WIDE_INT) orig_t < 0)
2523             {
2524               /* Shift ORIG_T as follows because a right shift of a
2525                  negative-valued signed type is implementation
2526                  defined.  */
2527               q = ~(~orig_t >> m);
2528               /* The function expand_shift will choose between a shift
2529                  and a sequence of additions, so the observed cost is
2530                  given as MIN (m * add_cost[speed][mode],
2531                  shift_cost[speed][mode][m]).  */
2532               op_cost = m * add_cost[speed][mode];
2533               if (shift_cost[speed][mode][m] < op_cost)
2534                 op_cost = shift_cost[speed][mode][m];
2535               new_limit.cost = best_cost.cost - op_cost;
2536               new_limit.latency = best_cost.latency - op_cost;
2537               synth_mult (alg_in, q, &new_limit, mode);
2538
2539               alg_in->cost.cost += op_cost;
2540               alg_in->cost.latency += op_cost;
2541               if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2542                 {
2543                   struct algorithm *x;
2544                   best_cost = alg_in->cost;
2545                   x = alg_in, alg_in = best_alg, best_alg = x;
2546                   best_alg->log[best_alg->ops] = m;
2547                   best_alg->op[best_alg->ops] = alg_shift;
2548                 }
2549             }
2550         }
2551       if (cache_hit)
2552         goto done;
2553     }
2554
2555   /* If we have an odd number, add or subtract one.  */
2556   if ((t & 1) != 0)
2557     {
2558       unsigned HOST_WIDE_INT w;
2559
2560     do_alg_addsub_t_m2:
2561       for (w = 1; (w & t) != 0; w <<= 1)
2562         ;
2563       /* If T was -1, then W will be zero after the loop.  This is another
2564          case where T ends with ...111.  Handling this with (T + 1) and
2565          subtract 1 produces slightly better code and results in algorithm
2566          selection much faster than treating it like the ...0111 case
2567          below.  */
2568       if (w == 0
2569           || (w > 2
2570               /* Reject the case where t is 3.
2571                  Thus we prefer addition in that case.  */
2572               && t != 3))
2573         {
2574           /* T ends with ...111.  Multiply by (T + 1) and subtract 1.  */
2575
2576           op_cost = add_cost[speed][mode];
2577           new_limit.cost = best_cost.cost - op_cost;
2578           new_limit.latency = best_cost.latency - op_cost;
2579           synth_mult (alg_in, t + 1, &new_limit, mode);
2580
2581           alg_in->cost.cost += op_cost;
2582           alg_in->cost.latency += op_cost;
2583           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2584             {
2585               struct algorithm *x;
2586               best_cost = alg_in->cost;
2587               x = alg_in, alg_in = best_alg, best_alg = x;
2588               best_alg->log[best_alg->ops] = 0;
2589               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2590             }
2591         }
2592       else
2593         {
2594           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add 1.  */
2595
2596           op_cost = add_cost[speed][mode];
2597           new_limit.cost = best_cost.cost - op_cost;
2598           new_limit.latency = best_cost.latency - op_cost;
2599           synth_mult (alg_in, t - 1, &new_limit, mode);
2600
2601           alg_in->cost.cost += op_cost;
2602           alg_in->cost.latency += op_cost;
2603           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2604             {
2605               struct algorithm *x;
2606               best_cost = alg_in->cost;
2607               x = alg_in, alg_in = best_alg, best_alg = x;
2608               best_alg->log[best_alg->ops] = 0;
2609               best_alg->op[best_alg->ops] = alg_add_t_m2;
2610             }
2611         }
2612
2613       /* We may be able to calculate a * -7, a * -15, a * -31, etc
2614          quickly with a - a * n for some appropriate constant n.  */
2615       m = exact_log2 (-orig_t + 1);
2616       if (m >= 0 && m < maxm)
2617         {
2618           op_cost = shiftsub1_cost[speed][mode][m];
2619           new_limit.cost = best_cost.cost - op_cost;
2620           new_limit.latency = best_cost.latency - op_cost;
2621           synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m, &new_limit, mode);
2622
2623           alg_in->cost.cost += op_cost;
2624           alg_in->cost.latency += op_cost;
2625           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2626             {
2627               struct algorithm *x;
2628               best_cost = alg_in->cost;
2629               x = alg_in, alg_in = best_alg, best_alg = x;
2630               best_alg->log[best_alg->ops] = m;
2631               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2632             }
2633         }
2634
2635       if (cache_hit)
2636         goto done;
2637     }
2638
2639   /* Look for factors of t of the form
2640      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2641      If we find such a factor, we can multiply by t using an algorithm that
2642      multiplies by q, shift the result by m and add/subtract it to itself.
2643
2644      We search for large factors first and loop down, even if large factors
2645      are less probable than small; if we find a large factor we will find a
2646      good sequence quickly, and therefore be able to prune (by decreasing
2647      COST_LIMIT) the search.  */
2648
2649  do_alg_addsub_factor:
2650   for (m = floor_log2 (t - 1); m >= 2; m--)
2651     {
2652       unsigned HOST_WIDE_INT d;
2653
2654       d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2655       if (t % d == 0 && t > d && m < maxm
2656           && (!cache_hit || cache_alg == alg_add_factor))
2657         {
2658           /* If the target has a cheap shift-and-add instruction use
2659              that in preference to a shift insn followed by an add insn.
2660              Assume that the shift-and-add is "atomic" with a latency
2661              equal to its cost, otherwise assume that on superscalar
2662              hardware the shift may be executed concurrently with the
2663              earlier steps in the algorithm.  */
2664           op_cost = add_cost[speed][mode] + shift_cost[speed][mode][m];
2665           if (shiftadd_cost[speed][mode][m] < op_cost)
2666             {
2667               op_cost = shiftadd_cost[speed][mode][m];
2668               op_latency = op_cost;
2669             }
2670           else
2671             op_latency = add_cost[speed][mode];
2672
2673           new_limit.cost = best_cost.cost - op_cost;
2674           new_limit.latency = best_cost.latency - op_latency;
2675           synth_mult (alg_in, t / d, &new_limit, mode);
2676
2677           alg_in->cost.cost += op_cost;
2678           alg_in->cost.latency += op_latency;
2679           if (alg_in->cost.latency < op_cost)
2680             alg_in->cost.latency = op_cost;
2681           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2682             {
2683               struct algorithm *x;
2684               best_cost = alg_in->cost;
2685               x = alg_in, alg_in = best_alg, best_alg = x;
2686               best_alg->log[best_alg->ops] = m;
2687               best_alg->op[best_alg->ops] = alg_add_factor;
2688             }
2689           /* Other factors will have been taken care of in the recursion.  */
2690           break;
2691         }
2692
2693       d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2694       if (t % d == 0 && t > d && m < maxm
2695           && (!cache_hit || cache_alg == alg_sub_factor))
2696         {
2697           /* If the target has a cheap shift-and-subtract insn use
2698              that in preference to a shift insn followed by a sub insn.
2699              Assume that the shift-and-sub is "atomic" with a latency
2700              equal to it's cost, otherwise assume that on superscalar
2701              hardware the shift may be executed concurrently with the
2702              earlier steps in the algorithm.  */
2703           op_cost = add_cost[speed][mode] + shift_cost[speed][mode][m];
2704           if (shiftsub0_cost[speed][mode][m] < op_cost)
2705             {
2706               op_cost = shiftsub0_cost[speed][mode][m];
2707               op_latency = op_cost;
2708             }
2709           else
2710             op_latency = add_cost[speed][mode];
2711
2712           new_limit.cost = best_cost.cost - op_cost;
2713           new_limit.latency = best_cost.latency - op_latency;
2714           synth_mult (alg_in, t / d, &new_limit, mode);
2715
2716           alg_in->cost.cost += op_cost;
2717           alg_in->cost.latency += op_latency;
2718           if (alg_in->cost.latency < op_cost)
2719             alg_in->cost.latency = op_cost;
2720           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2721             {
2722               struct algorithm *x;
2723               best_cost = alg_in->cost;
2724               x = alg_in, alg_in = best_alg, best_alg = x;
2725               best_alg->log[best_alg->ops] = m;
2726               best_alg->op[best_alg->ops] = alg_sub_factor;
2727             }
2728           break;
2729         }
2730     }
2731   if (cache_hit)
2732     goto done;
2733
2734   /* Try shift-and-add (load effective address) instructions,
2735      i.e. do a*3, a*5, a*9.  */
2736   if ((t & 1) != 0)
2737     {
2738     do_alg_add_t2_m:
2739       q = t - 1;
2740       q = q & -q;
2741       m = exact_log2 (q);
2742       if (m >= 0 && m < maxm)
2743         {
2744           op_cost = shiftadd_cost[speed][mode][m];
2745           new_limit.cost = best_cost.cost - op_cost;
2746           new_limit.latency = best_cost.latency - op_cost;
2747           synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2748
2749           alg_in->cost.cost += op_cost;
2750           alg_in->cost.latency += op_cost;
2751           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2752             {
2753               struct algorithm *x;
2754               best_cost = alg_in->cost;
2755               x = alg_in, alg_in = best_alg, best_alg = x;
2756               best_alg->log[best_alg->ops] = m;
2757               best_alg->op[best_alg->ops] = alg_add_t2_m;
2758             }
2759         }
2760       if (cache_hit)
2761         goto done;
2762
2763     do_alg_sub_t2_m:
2764       q = t + 1;
2765       q = q & -q;
2766       m = exact_log2 (q);
2767       if (m >= 0 && m < maxm)
2768         {
2769           op_cost = shiftsub0_cost[speed][mode][m];
2770           new_limit.cost = best_cost.cost - op_cost;
2771           new_limit.latency = best_cost.latency - op_cost;
2772           synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2773
2774           alg_in->cost.cost += op_cost;
2775           alg_in->cost.latency += op_cost;
2776           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2777             {
2778               struct algorithm *x;
2779               best_cost = alg_in->cost;
2780               x = alg_in, alg_in = best_alg, best_alg = x;
2781               best_alg->log[best_alg->ops] = m;
2782               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2783             }
2784         }
2785       if (cache_hit)
2786         goto done;
2787     }
2788
2789  done:
2790   /* If best_cost has not decreased, we have not found any algorithm.  */
2791   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2792     {
2793       /* We failed to find an algorithm.  Record alg_impossible for
2794          this case (that is, <T, MODE, COST_LIMIT>) so that next time
2795          we are asked to find an algorithm for T within the same or
2796          lower COST_LIMIT, we can immediately return to the
2797          caller.  */
2798       alg_hash[hash_index].t = t;
2799       alg_hash[hash_index].mode = mode;
2800       alg_hash[hash_index].speed = speed;
2801       alg_hash[hash_index].alg = alg_impossible;
2802       alg_hash[hash_index].cost = *cost_limit;
2803       return;
2804     }
2805
2806   /* Cache the result.  */
2807   if (!cache_hit)
2808     {
2809       alg_hash[hash_index].t = t;
2810       alg_hash[hash_index].mode = mode;
2811       alg_hash[hash_index].speed = speed;
2812       alg_hash[hash_index].alg = best_alg->op[best_alg->ops];
2813       alg_hash[hash_index].cost.cost = best_cost.cost;
2814       alg_hash[hash_index].cost.latency = best_cost.latency;
2815     }
2816
2817   /* If we are getting a too long sequence for `struct algorithm'
2818      to record, make this search fail.  */
2819   if (best_alg->ops == MAX_BITS_PER_WORD)
2820     return;
2821
2822   /* Copy the algorithm from temporary space to the space at alg_out.
2823      We avoid using structure assignment because the majority of
2824      best_alg is normally undefined, and this is a critical function.  */
2825   alg_out->ops = best_alg->ops + 1;
2826   alg_out->cost = best_cost;
2827   memcpy (alg_out->op, best_alg->op,
2828           alg_out->ops * sizeof *alg_out->op);
2829   memcpy (alg_out->log, best_alg->log,
2830           alg_out->ops * sizeof *alg_out->log);
2831 }
2832 \f
2833 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2834    Try three variations:
2835
2836        - a shift/add sequence based on VAL itself
2837        - a shift/add sequence based on -VAL, followed by a negation
2838        - a shift/add sequence based on VAL - 1, followed by an addition.
2839
2840    Return true if the cheapest of these cost less than MULT_COST,
2841    describing the algorithm in *ALG and final fixup in *VARIANT.  */
2842
2843 static bool
2844 choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2845                      struct algorithm *alg, enum mult_variant *variant,
2846                      int mult_cost)
2847 {
2848   struct algorithm alg2;
2849   struct mult_cost limit;
2850   int op_cost;
2851   bool speed = optimize_insn_for_speed_p ();
2852
2853   /* Fail quickly for impossible bounds.  */
2854   if (mult_cost < 0)
2855     return false;
2856
2857   /* Ensure that mult_cost provides a reasonable upper bound.
2858      Any constant multiplication can be performed with less
2859      than 2 * bits additions.  */
2860   op_cost = 2 * GET_MODE_BITSIZE (mode) * add_cost[speed][mode];
2861   if (mult_cost > op_cost)
2862     mult_cost = op_cost;
2863
2864   *variant = basic_variant;
2865   limit.cost = mult_cost;
2866   limit.latency = mult_cost;
2867   synth_mult (alg, val, &limit, mode);
2868
2869   /* This works only if the inverted value actually fits in an
2870      `unsigned int' */
2871   if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
2872     {
2873       op_cost = neg_cost[speed][mode];
2874       if (MULT_COST_LESS (&alg->cost, mult_cost))
2875         {
2876           limit.cost = alg->cost.cost - op_cost;
2877           limit.latency = alg->cost.latency - op_cost;
2878         }
2879       else
2880         {
2881           limit.cost = mult_cost - op_cost;
2882           limit.latency = mult_cost - op_cost;
2883         }
2884
2885       synth_mult (&alg2, -val, &limit, mode);
2886       alg2.cost.cost += op_cost;
2887       alg2.cost.latency += op_cost;
2888       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2889         *alg = alg2, *variant = negate_variant;
2890     }
2891
2892   /* This proves very useful for division-by-constant.  */
2893   op_cost = add_cost[speed][mode];
2894   if (MULT_COST_LESS (&alg->cost, mult_cost))
2895     {
2896       limit.cost = alg->cost.cost - op_cost;
2897       limit.latency = alg->cost.latency - op_cost;
2898     }
2899   else
2900     {
2901       limit.cost = mult_cost - op_cost;
2902       limit.latency = mult_cost - op_cost;
2903     }
2904
2905   synth_mult (&alg2, val - 1, &limit, mode);
2906   alg2.cost.cost += op_cost;
2907   alg2.cost.latency += op_cost;
2908   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2909     *alg = alg2, *variant = add_variant;
2910
2911   return MULT_COST_LESS (&alg->cost, mult_cost);
2912 }
2913
2914 /* A subroutine of expand_mult, used for constant multiplications.
2915    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2916    convenient.  Use the shift/add sequence described by ALG and apply
2917    the final fixup specified by VARIANT.  */
2918
2919 static rtx
2920 expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
2921                    rtx target, const struct algorithm *alg,
2922                    enum mult_variant variant)
2923 {
2924   HOST_WIDE_INT val_so_far;
2925   rtx insn, accum, tem;
2926   int opno;
2927   enum machine_mode nmode;
2928
2929   /* Avoid referencing memory over and over and invalid sharing
2930      on SUBREGs.  */
2931   op0 = force_reg (mode, op0);
2932
2933   /* ACCUM starts out either as OP0 or as a zero, depending on
2934      the first operation.  */
2935
2936   if (alg->op[0] == alg_zero)
2937     {
2938       accum = copy_to_mode_reg (mode, const0_rtx);
2939       val_so_far = 0;
2940     }
2941   else if (alg->op[0] == alg_m)
2942     {
2943       accum = copy_to_mode_reg (mode, op0);
2944       val_so_far = 1;
2945     }
2946   else
2947     gcc_unreachable ();
2948
2949   for (opno = 1; opno < alg->ops; opno++)
2950     {
2951       int log = alg->log[opno];
2952       rtx shift_subtarget = optimize ? 0 : accum;
2953       rtx add_target
2954         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2955            && !optimize)
2956           ? target : 0;
2957       rtx accum_target = optimize ? 0 : accum;
2958       rtx accum_inner;
2959
2960       switch (alg->op[opno])
2961         {
2962         case alg_shift:
2963           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2964           /* REG_EQUAL note will be attached to the following insn.  */
2965           emit_move_insn (accum, tem);
2966           val_so_far <<= log;
2967           break;
2968
2969         case alg_add_t_m2:
2970           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
2971           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2972                                  add_target ? add_target : accum_target);
2973           val_so_far += (HOST_WIDE_INT) 1 << log;
2974           break;
2975
2976         case alg_sub_t_m2:
2977           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
2978           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2979                                  add_target ? add_target : accum_target);
2980           val_so_far -= (HOST_WIDE_INT) 1 << log;
2981           break;
2982
2983         case alg_add_t2_m:
2984           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2985                                 log, shift_subtarget, 0);
2986           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
2987                                  add_target ? add_target : accum_target);
2988           val_so_far = (val_so_far << log) + 1;
2989           break;
2990
2991         case alg_sub_t2_m:
2992           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2993                                 log, shift_subtarget, 0);
2994           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
2995                                  add_target ? add_target : accum_target);
2996           val_so_far = (val_so_far << log) - 1;
2997           break;
2998
2999         case alg_add_factor:
3000           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3001           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3002                                  add_target ? add_target : accum_target);
3003           val_so_far += val_so_far << log;
3004           break;
3005
3006         case alg_sub_factor:
3007           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3008           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3009                                  (add_target
3010                                   ? add_target : (optimize ? 0 : tem)));
3011           val_so_far = (val_so_far << log) - val_so_far;
3012           break;
3013
3014         default:
3015           gcc_unreachable ();
3016         }
3017
3018       /* Write a REG_EQUAL note on the last insn so that we can cse
3019          multiplication sequences.  Note that if ACCUM is a SUBREG,
3020          we've set the inner register and must properly indicate
3021          that.  */
3022
3023       tem = op0, nmode = mode;
3024       accum_inner = accum;
3025       if (GET_CODE (accum) == SUBREG)
3026         {
3027           accum_inner = SUBREG_REG (accum);
3028           nmode = GET_MODE (accum_inner);
3029           tem = gen_lowpart (nmode, op0);
3030         }
3031
3032       insn = get_last_insn ();
3033       set_dst_reg_note (insn, REG_EQUAL,
3034                         gen_rtx_MULT (nmode, tem, GEN_INT (val_so_far)),
3035                         accum_inner);
3036     }
3037
3038   if (variant == negate_variant)
3039     {
3040       val_so_far = -val_so_far;
3041       accum = expand_unop (mode, neg_optab, accum, target, 0);
3042     }
3043   else if (variant == add_variant)
3044     {
3045       val_so_far = val_so_far + 1;
3046       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3047     }
3048
3049   /* Compare only the bits of val and val_so_far that are significant
3050      in the result mode, to avoid sign-/zero-extension confusion.  */
3051   val &= GET_MODE_MASK (mode);
3052   val_so_far &= GET_MODE_MASK (mode);
3053   gcc_assert (val == val_so_far);
3054
3055   return accum;
3056 }
3057
3058 /* Perform a multiplication and return an rtx for the result.
3059    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3060    TARGET is a suggestion for where to store the result (an rtx).
3061
3062    We check specially for a constant integer as OP1.
3063    If you want this check for OP0 as well, then before calling
3064    you should swap the two operands if OP0 would be constant.  */
3065
3066 rtx
3067 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3068              int unsignedp)
3069 {
3070   enum mult_variant variant;
3071   struct algorithm algorithm;
3072   int max_cost;
3073   bool speed = optimize_insn_for_speed_p ();
3074
3075   /* Handling const0_rtx here allows us to use zero as a rogue value for
3076      coeff below.  */
3077   if (op1 == const0_rtx)
3078     return const0_rtx;
3079   if (op1 == const1_rtx)
3080     return op0;
3081   if (op1 == constm1_rtx)
3082     return expand_unop (mode,
3083                         GET_MODE_CLASS (mode) == MODE_INT
3084                         && !unsignedp && flag_trapv
3085                         ? negv_optab : neg_optab,
3086                         op0, target, 0);
3087
3088   /* These are the operations that are potentially turned into a sequence
3089      of shifts and additions.  */
3090   if (SCALAR_INT_MODE_P (mode)
3091       && (unsignedp || !flag_trapv))
3092     {
3093       HOST_WIDE_INT coeff = 0;
3094       rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3095
3096       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3097          less than or equal in size to `unsigned int' this doesn't matter.
3098          If the mode is larger than `unsigned int', then synth_mult works
3099          only if the constant value exactly fits in an `unsigned int' without
3100          any truncation.  This means that multiplying by negative values does
3101          not work; results are off by 2^32 on a 32 bit machine.  */
3102
3103       if (CONST_INT_P (op1))
3104         {
3105           /* Attempt to handle multiplication of DImode values by negative
3106              coefficients, by performing the multiplication by a positive
3107              multiplier and then inverting the result.  */
3108           if (INTVAL (op1) < 0
3109               && GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT)
3110             {
3111               /* Its safe to use -INTVAL (op1) even for INT_MIN, as the
3112                  result is interpreted as an unsigned coefficient.
3113                  Exclude cost of op0 from max_cost to match the cost
3114                  calculation of the synth_mult.  */
3115               max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1),
3116                                         speed)
3117                           - neg_cost[speed][mode]);
3118               if (max_cost > 0
3119                   && choose_mult_variant (mode, -INTVAL (op1), &algorithm,
3120                                           &variant, max_cost))
3121                 {
3122                   rtx temp = expand_mult_const (mode, op0, -INTVAL (op1),
3123                                                 NULL_RTX, &algorithm,
3124                                                 variant);
3125                   return expand_unop (mode, neg_optab, temp, target, 0);
3126                 }
3127             }
3128           else coeff = INTVAL (op1);
3129         }
3130       else if (GET_CODE (op1) == CONST_DOUBLE)
3131         {
3132           /* If we are multiplying in DImode, it may still be a win
3133              to try to work with shifts and adds.  */
3134           if (CONST_DOUBLE_HIGH (op1) == 0
3135               && CONST_DOUBLE_LOW (op1) > 0)
3136             coeff = CONST_DOUBLE_LOW (op1);
3137           else if (CONST_DOUBLE_LOW (op1) == 0
3138                    && EXACT_POWER_OF_2_OR_ZERO_P (CONST_DOUBLE_HIGH (op1)))
3139             {
3140               int shift = floor_log2 (CONST_DOUBLE_HIGH (op1))
3141                           + HOST_BITS_PER_WIDE_INT;
3142               return expand_shift (LSHIFT_EXPR, mode, op0,
3143                                    shift, target, unsignedp);
3144             }
3145         }
3146
3147       /* We used to test optimize here, on the grounds that it's better to
3148          produce a smaller program when -O is not used.  But this causes
3149          such a terrible slowdown sometimes that it seems better to always
3150          use synth_mult.  */
3151       if (coeff != 0)
3152         {
3153           /* Special case powers of two.  */
3154           if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3155             return expand_shift (LSHIFT_EXPR, mode, op0,
3156                                  floor_log2 (coeff), target, unsignedp);
3157
3158           /* Exclude cost of op0 from max_cost to match the cost
3159              calculation of the synth_mult.  */
3160           max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed);
3161           if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3162                                    max_cost))
3163             return expand_mult_const (mode, op0, coeff, target,
3164                                       &algorithm, variant);
3165         }
3166     }
3167
3168   if (GET_CODE (op0) == CONST_DOUBLE)
3169     {
3170       rtx temp = op0;
3171       op0 = op1;
3172       op1 = temp;
3173     }
3174
3175   /* Expand x*2.0 as x+x.  */
3176   if (GET_CODE (op1) == CONST_DOUBLE
3177       && SCALAR_FLOAT_MODE_P (mode))
3178     {
3179       REAL_VALUE_TYPE d;
3180       REAL_VALUE_FROM_CONST_DOUBLE (d, op1);
3181
3182       if (REAL_VALUES_EQUAL (d, dconst2))
3183         {
3184           op0 = force_reg (GET_MODE (op0), op0);
3185           return expand_binop (mode, add_optab, op0, op0,
3186                                target, unsignedp, OPTAB_LIB_WIDEN);
3187         }
3188     }
3189
3190   /* This used to use umul_optab if unsigned, but for non-widening multiply
3191      there is no difference between signed and unsigned.  */
3192   op0 = expand_binop (mode,
3193                       ! unsignedp
3194                       && flag_trapv && (GET_MODE_CLASS(mode) == MODE_INT)
3195                       ? smulv_optab : smul_optab,
3196                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3197   gcc_assert (op0);
3198   return op0;
3199 }
3200
3201 /* Perform a widening multiplication and return an rtx for the result.
3202    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3203    TARGET is a suggestion for where to store the result (an rtx).
3204    THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3205    or smul_widen_optab.
3206
3207    We check specially for a constant integer as OP1, comparing the
3208    cost of a widening multiply against the cost of a sequence of shifts
3209    and adds.  */
3210
3211 rtx
3212 expand_widening_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3213                       int unsignedp, optab this_optab)
3214 {
3215   bool speed = optimize_insn_for_speed_p ();
3216   rtx cop1;
3217
3218   if (CONST_INT_P (op1)
3219       && GET_MODE (op0) != VOIDmode
3220       && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3221                                 this_optab == umul_widen_optab))
3222       && CONST_INT_P (cop1)
3223       && (INTVAL (cop1) >= 0
3224           || HWI_COMPUTABLE_MODE_P (mode)))
3225     {
3226       HOST_WIDE_INT coeff = INTVAL (cop1);
3227       int max_cost;
3228       enum mult_variant variant;
3229       struct algorithm algorithm;
3230
3231       /* Special case powers of two.  */
3232       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3233         {
3234           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3235           return expand_shift (LSHIFT_EXPR, mode, op0,
3236                                floor_log2 (coeff), target, unsignedp);
3237         }
3238
3239       /* Exclude cost of op0 from max_cost to match the cost
3240          calculation of the synth_mult.  */
3241       max_cost = mul_widen_cost[speed][mode];
3242       if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3243                                max_cost))
3244         {
3245           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3246           return expand_mult_const (mode, op0, coeff, target,
3247                                     &algorithm, variant);
3248         }
3249     }
3250   return expand_binop (mode, this_optab, op0, op1, target,
3251                        unsignedp, OPTAB_LIB_WIDEN);
3252 }
3253 \f
3254 /* Return the smallest n such that 2**n >= X.  */
3255
3256 int
3257 ceil_log2 (unsigned HOST_WIDE_INT x)
3258 {
3259   return floor_log2 (x - 1) + 1;
3260 }
3261
3262 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3263    replace division by D, and put the least significant N bits of the result
3264    in *MULTIPLIER_PTR and return the most significant bit.
3265
3266    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3267    needed precision is in PRECISION (should be <= N).
3268
3269    PRECISION should be as small as possible so this function can choose
3270    multiplier more freely.
3271
3272    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3273    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3274
3275    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3276    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3277
3278 static
3279 unsigned HOST_WIDE_INT
3280 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3281                    rtx *multiplier_ptr, int *post_shift_ptr, int *lgup_ptr)
3282 {
3283   HOST_WIDE_INT mhigh_hi, mlow_hi;
3284   unsigned HOST_WIDE_INT mhigh_lo, mlow_lo;
3285   int lgup, post_shift;
3286   int pow, pow2;
3287   unsigned HOST_WIDE_INT nl, dummy1;
3288   HOST_WIDE_INT nh, dummy2;
3289
3290   /* lgup = ceil(log2(divisor)); */
3291   lgup = ceil_log2 (d);
3292
3293   gcc_assert (lgup <= n);
3294
3295   pow = n + lgup;
3296   pow2 = n + lgup - precision;
3297
3298   /* We could handle this with some effort, but this case is much
3299      better handled directly with a scc insn, so rely on caller using
3300      that.  */
3301   gcc_assert (pow != 2 * HOST_BITS_PER_WIDE_INT);
3302
3303   /* mlow = 2^(N + lgup)/d */
3304  if (pow >= HOST_BITS_PER_WIDE_INT)
3305     {
3306       nh = (HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT);
3307       nl = 0;
3308     }
3309   else
3310     {
3311       nh = 0;
3312       nl = (unsigned HOST_WIDE_INT) 1 << pow;
3313     }
3314   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3315                         &mlow_lo, &mlow_hi, &dummy1, &dummy2);
3316
3317   /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */
3318   if (pow2 >= HOST_BITS_PER_WIDE_INT)
3319     nh |= (HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT);
3320   else
3321     nl |= (unsigned HOST_WIDE_INT) 1 << pow2;
3322   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3323                         &mhigh_lo, &mhigh_hi, &dummy1, &dummy2);
3324
3325   gcc_assert (!mhigh_hi || nh - d < d);
3326   gcc_assert (mhigh_hi <= 1 && mlow_hi <= 1);
3327   /* Assert that mlow < mhigh.  */
3328   gcc_assert (mlow_hi < mhigh_hi
3329               || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo));
3330
3331   /* If precision == N, then mlow, mhigh exceed 2^N
3332      (but they do not exceed 2^(N+1)).  */
3333
3334   /* Reduce to lowest terms.  */
3335   for (post_shift = lgup; post_shift > 0; post_shift--)
3336     {
3337       unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1);
3338       unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1);
3339       if (ml_lo >= mh_lo)
3340         break;
3341
3342       mlow_hi = 0;
3343       mlow_lo = ml_lo;
3344       mhigh_hi = 0;
3345       mhigh_lo = mh_lo;
3346     }
3347
3348   *post_shift_ptr = post_shift;
3349   *lgup_ptr = lgup;
3350   if (n < HOST_BITS_PER_WIDE_INT)
3351     {
3352       unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3353       *multiplier_ptr = GEN_INT (mhigh_lo & mask);
3354       return mhigh_lo >= mask;
3355     }
3356   else
3357     {
3358       *multiplier_ptr = GEN_INT (mhigh_lo);
3359       return mhigh_hi;
3360     }
3361 }
3362
3363 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3364    congruent to 1 (mod 2**N).  */
3365
3366 static unsigned HOST_WIDE_INT
3367 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3368 {
3369   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3370
3371   /* The algorithm notes that the choice y = x satisfies
3372      x*y == 1 mod 2^3, since x is assumed odd.
3373      Each iteration doubles the number of bits of significance in y.  */
3374
3375   unsigned HOST_WIDE_INT mask;
3376   unsigned HOST_WIDE_INT y = x;
3377   int nbit = 3;
3378
3379   mask = (n == HOST_BITS_PER_WIDE_INT
3380           ? ~(unsigned HOST_WIDE_INT) 0
3381           : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3382
3383   while (nbit < n)
3384     {
3385       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3386       nbit *= 2;
3387     }
3388   return y;
3389 }
3390
3391 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3392    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3393    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3394    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3395    become signed.
3396
3397    The result is put in TARGET if that is convenient.
3398
3399    MODE is the mode of operation.  */
3400
3401 rtx
3402 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
3403                              rtx op1, rtx target, int unsignedp)
3404 {
3405   rtx tem;
3406   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3407
3408   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3409                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3410   tem = expand_and (mode, tem, op1, NULL_RTX);
3411   adj_operand
3412     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3413                      adj_operand);
3414
3415   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3416                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3417   tem = expand_and (mode, tem, op0, NULL_RTX);
3418   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3419                           target);
3420
3421   return target;
3422 }
3423
3424 /* Subroutine of expand_mult_highpart.  Return the MODE high part of OP.  */
3425
3426 static rtx
3427 extract_high_half (enum machine_mode mode, rtx op)
3428 {
3429   enum machine_mode wider_mode;
3430
3431   if (mode == word_mode)
3432     return gen_highpart (mode, op);
3433
3434   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3435
3436   wider_mode = GET_MODE_WIDER_MODE (mode);
3437   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3438                      GET_MODE_BITSIZE (mode), 0, 1);
3439   return convert_modes (mode, wider_mode, op, 0);
3440 }
3441
3442 /* Like expand_mult_highpart, but only consider using a multiplication
3443    optab.  OP1 is an rtx for the constant operand.  */
3444
3445 static rtx
3446 expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
3447                             rtx target, int unsignedp, int max_cost)
3448 {
3449   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3450   enum machine_mode wider_mode;
3451   optab moptab;
3452   rtx tem;
3453   int size;
3454   bool speed = optimize_insn_for_speed_p ();
3455
3456   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3457
3458   wider_mode = GET_MODE_WIDER_MODE (mode);
3459   size = GET_MODE_BITSIZE (mode);
3460
3461   /* Firstly, try using a multiplication insn that only generates the needed
3462      high part of the product, and in the sign flavor of unsignedp.  */
3463   if (mul_highpart_cost[speed][mode] < max_cost)
3464     {
3465       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3466       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3467                           unsignedp, OPTAB_DIRECT);
3468       if (tem)
3469         return tem;
3470     }
3471
3472   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3473      Need to adjust the result after the multiplication.  */
3474   if (size - 1 < BITS_PER_WORD
3475       && (mul_highpart_cost[speed][mode] + 2 * shift_cost[speed][mode][size-1]
3476           + 4 * add_cost[speed][mode] < max_cost))
3477     {
3478       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3479       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3480                           unsignedp, OPTAB_DIRECT);
3481       if (tem)
3482         /* We used the wrong signedness.  Adjust the result.  */
3483         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3484                                             tem, unsignedp);
3485     }
3486
3487   /* Try widening multiplication.  */
3488   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3489   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3490       && mul_widen_cost[speed][wider_mode] < max_cost)
3491     {
3492       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3493                           unsignedp, OPTAB_WIDEN);
3494       if (tem)
3495         return extract_high_half (mode, tem);
3496     }
3497
3498   /* Try widening the mode and perform a non-widening multiplication.  */
3499   if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3500       && size - 1 < BITS_PER_WORD
3501       && mul_cost[speed][wider_mode] + shift_cost[speed][mode][size-1] < max_cost)
3502     {
3503       rtx insns, wop0, wop1;
3504
3505       /* We need to widen the operands, for example to ensure the
3506          constant multiplier is correctly sign or zero extended.
3507          Use a sequence to clean-up any instructions emitted by
3508          the conversions if things don't work out.  */
3509       start_sequence ();
3510       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3511       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3512       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3513                           unsignedp, OPTAB_WIDEN);
3514       insns = get_insns ();
3515       end_sequence ();
3516
3517       if (tem)
3518         {
3519           emit_insn (insns);
3520           return extract_high_half (mode, tem);
3521         }
3522     }
3523
3524   /* Try widening multiplication of opposite signedness, and adjust.  */
3525   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3526   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3527       && size - 1 < BITS_PER_WORD
3528       && (mul_widen_cost[speed][wider_mode] + 2 * shift_cost[speed][mode][size-1]
3529           + 4 * add_cost[speed][mode] < max_cost))
3530     {
3531       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3532                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3533       if (tem != 0)
3534         {
3535           tem = extract_high_half (mode, tem);
3536           /* We used the wrong signedness.  Adjust the result.  */
3537           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3538                                               target, unsignedp);
3539         }
3540     }
3541
3542   return 0;
3543 }
3544
3545 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3546    putting the high half of the result in TARGET if that is convenient,
3547    and return where the result is.  If the operation can not be performed,
3548    0 is returned.
3549
3550    MODE is the mode of operation and result.
3551
3552    UNSIGNEDP nonzero means unsigned multiply.
3553
3554    MAX_COST is the total allowed cost for the expanded RTL.  */
3555
3556 static rtx
3557 expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
3558                       rtx target, int unsignedp, int max_cost)
3559 {
3560   enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3561   unsigned HOST_WIDE_INT cnst1;
3562   int extra_cost;
3563   bool sign_adjust = false;
3564   enum mult_variant variant;
3565   struct algorithm alg;
3566   rtx tem;
3567   bool speed = optimize_insn_for_speed_p ();
3568
3569   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3570   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3571   gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3572
3573   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3574
3575   /* We can't optimize modes wider than BITS_PER_WORD.
3576      ??? We might be able to perform double-word arithmetic if
3577      mode == word_mode, however all the cost calculations in
3578      synth_mult etc. assume single-word operations.  */
3579   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3580     return expand_mult_highpart_optab (mode, op0, op1, target,
3581                                        unsignedp, max_cost);
3582
3583   extra_cost = shift_cost[speed][mode][GET_MODE_BITSIZE (mode) - 1];
3584
3585   /* Check whether we try to multiply by a negative constant.  */
3586   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3587     {
3588       sign_adjust = true;
3589       extra_cost += add_cost[speed][mode];
3590     }
3591
3592   /* See whether shift/add multiplication is cheap enough.  */
3593   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3594                            max_cost - extra_cost))
3595     {
3596       /* See whether the specialized multiplication optabs are
3597          cheaper than the shift/add version.  */
3598       tem = expand_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3599                                         alg.cost.cost + extra_cost);
3600       if (tem)
3601         return tem;
3602
3603       tem = convert_to_mode (wider_mode, op0, unsignedp);
3604       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3605       tem = extract_high_half (mode, tem);
3606
3607       /* Adjust result for signedness.  */
3608       if (sign_adjust)
3609         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3610
3611       return tem;
3612     }
3613   return expand_mult_highpart_optab (mode, op0, op1, target,
3614                                      unsignedp, max_cost);
3615 }
3616
3617
3618 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3619
3620 static rtx
3621 expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3622 {
3623   unsigned HOST_WIDE_INT masklow, maskhigh;
3624   rtx result, temp, shift, label;
3625   int logd;
3626
3627   logd = floor_log2 (d);
3628   result = gen_reg_rtx (mode);
3629
3630   /* Avoid conditional branches when they're expensive.  */
3631   if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3632       && optimize_insn_for_speed_p ())
3633     {
3634       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3635                                       mode, 0, -1);
3636       if (signmask)
3637         {
3638           signmask = force_reg (mode, signmask);
3639           masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3640           shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3641
3642           /* Use the rtx_cost of a LSHIFTRT instruction to determine
3643              which instruction sequence to use.  If logical right shifts
3644              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3645              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3646
3647           temp = gen_rtx_LSHIFTRT (mode, result, shift);
3648           if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3649               || (set_src_cost (temp, optimize_insn_for_speed_p ())
3650                   > COSTS_N_INSNS (2)))
3651             {
3652               temp = expand_binop (mode, xor_optab, op0, signmask,
3653                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3654               temp = expand_binop (mode, sub_optab, temp, signmask,
3655                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3656               temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3657                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3658               temp = expand_binop (mode, xor_optab, temp, signmask,
3659                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3660               temp = expand_binop (mode, sub_optab, temp, signmask,
3661                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3662             }
3663           else
3664             {
3665               signmask = expand_binop (mode, lshr_optab, signmask, shift,
3666                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
3667               signmask = force_reg (mode, signmask);
3668
3669               temp = expand_binop (mode, add_optab, op0, signmask,
3670                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3671               temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3672                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3673               temp = expand_binop (mode, sub_optab, temp, signmask,
3674                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3675             }
3676           return temp;
3677         }
3678     }
3679
3680   /* Mask contains the mode's signbit and the significant bits of the
3681      modulus.  By including the signbit in the operation, many targets
3682      can avoid an explicit compare operation in the following comparison
3683      against zero.  */
3684
3685   masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3686   if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
3687     {
3688       masklow |= (HOST_WIDE_INT) -1 << (GET_MODE_BITSIZE (mode) - 1);
3689       maskhigh = -1;
3690     }
3691   else
3692     maskhigh = (HOST_WIDE_INT) -1
3693                  << (GET_MODE_BITSIZE (mode) - HOST_BITS_PER_WIDE_INT - 1);
3694
3695   temp = expand_binop (mode, and_optab, op0,
3696                        immed_double_const (masklow, maskhigh, mode),
3697                        result, 1, OPTAB_LIB_WIDEN);
3698   if (temp != result)
3699     emit_move_insn (result, temp);
3700
3701   label = gen_label_rtx ();
3702   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3703
3704   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3705                        0, OPTAB_LIB_WIDEN);
3706   masklow = (HOST_WIDE_INT) -1 << logd;
3707   maskhigh = -1;
3708   temp = expand_binop (mode, ior_optab, temp,
3709                        immed_double_const (masklow, maskhigh, mode),
3710                        result, 1, OPTAB_LIB_WIDEN);
3711   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3712                        0, OPTAB_LIB_WIDEN);
3713   if (temp != result)
3714     emit_move_insn (result, temp);
3715   emit_label (label);
3716   return result;
3717 }
3718
3719 /* Expand signed division of OP0 by a power of two D in mode MODE.
3720    This routine is only called for positive values of D.  */
3721
3722 static rtx
3723 expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3724 {
3725   rtx temp, label;
3726   int logd;
3727
3728   logd = floor_log2 (d);
3729
3730   if (d == 2
3731       && BRANCH_COST (optimize_insn_for_speed_p (),
3732                       false) >= 1)
3733     {
3734       temp = gen_reg_rtx (mode);
3735       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3736       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3737                            0, OPTAB_LIB_WIDEN);
3738       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3739     }
3740
3741 #ifdef HAVE_conditional_move
3742   if (BRANCH_COST (optimize_insn_for_speed_p (), false)
3743       >= 2)
3744     {
3745       rtx temp2;
3746
3747       /* ??? emit_conditional_move forces a stack adjustment via
3748          compare_from_rtx so, if the sequence is discarded, it will
3749          be lost.  Do it now instead.  */
3750       do_pending_stack_adjust ();
3751
3752       start_sequence ();
3753       temp2 = copy_to_mode_reg (mode, op0);
3754       temp = expand_binop (mode, add_optab, temp2, GEN_INT (d-1),
3755                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
3756       temp = force_reg (mode, temp);
3757
3758       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
3759       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3760                                      mode, temp, temp2, mode, 0);
3761       if (temp2)
3762         {
3763           rtx seq = get_insns ();
3764           end_sequence ();
3765           emit_insn (seq);
3766           return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3767         }
3768       end_sequence ();
3769     }
3770 #endif
3771
3772   if (BRANCH_COST (optimize_insn_for_speed_p (),
3773                    false) >= 2)
3774     {
3775       int ushift = GET_MODE_BITSIZE (mode) - logd;
3776
3777       temp = gen_reg_rtx (mode);
3778       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3779       if (shift_cost[optimize_insn_for_speed_p ()][mode][ushift] > COSTS_N_INSNS (1))
3780         temp = expand_binop (mode, and_optab, temp, GEN_INT (d - 1),
3781                              NULL_RTX, 0, OPTAB_LIB_WIDEN);
3782       else
3783         temp = expand_shift (RSHIFT_EXPR, mode, temp,
3784                              ushift, NULL_RTX, 1);
3785       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3786                            0, OPTAB_LIB_WIDEN);
3787       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3788     }
3789
3790   label = gen_label_rtx ();
3791   temp = copy_to_mode_reg (mode, op0);
3792   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3793   expand_inc (temp, GEN_INT (d - 1));
3794   emit_label (label);
3795   return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3796 }
3797 \f
3798 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3799    if that is convenient, and returning where the result is.
3800    You may request either the quotient or the remainder as the result;
3801    specify REM_FLAG nonzero to get the remainder.
3802
3803    CODE is the expression code for which kind of division this is;
3804    it controls how rounding is done.  MODE is the machine mode to use.
3805    UNSIGNEDP nonzero means do unsigned division.  */
3806
3807 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3808    and then correct it by or'ing in missing high bits
3809    if result of ANDI is nonzero.
3810    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3811    This could optimize to a bfexts instruction.
3812    But C doesn't use these operations, so their optimizations are
3813    left for later.  */
3814 /* ??? For modulo, we don't actually need the highpart of the first product,
3815    the low part will do nicely.  And for small divisors, the second multiply
3816    can also be a low-part only multiply or even be completely left out.
3817    E.g. to calculate the remainder of a division by 3 with a 32 bit
3818    multiply, multiply with 0x55555556 and extract the upper two bits;
3819    the result is exact for inputs up to 0x1fffffff.
3820    The input range can be reduced by using cross-sum rules.
3821    For odd divisors >= 3, the following table gives right shift counts
3822    so that if a number is shifted by an integer multiple of the given
3823    amount, the remainder stays the same:
3824    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3825    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3826    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3827    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3828    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3829
3830    Cross-sum rules for even numbers can be derived by leaving as many bits
3831    to the right alone as the divisor has zeros to the right.
3832    E.g. if x is an unsigned 32 bit number:
3833    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3834    */
3835
3836 rtx
3837 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3838                rtx op0, rtx op1, rtx target, int unsignedp)
3839 {
3840   enum machine_mode compute_mode;
3841   rtx tquotient;
3842   rtx quotient = 0, remainder = 0;
3843   rtx last;
3844   int size;
3845   rtx insn;
3846   optab optab1, optab2;
3847   int op1_is_constant, op1_is_pow2 = 0;
3848   int max_cost, extra_cost;
3849   static HOST_WIDE_INT last_div_const = 0;
3850   static HOST_WIDE_INT ext_op1;
3851   bool speed = optimize_insn_for_speed_p ();
3852
3853   op1_is_constant = CONST_INT_P (op1);
3854   if (op1_is_constant)
3855     {
3856       ext_op1 = INTVAL (op1);
3857       if (unsignedp)
3858         ext_op1 &= GET_MODE_MASK (mode);
3859       op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3860                      || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3861     }
3862
3863   /*
3864      This is the structure of expand_divmod:
3865
3866      First comes code to fix up the operands so we can perform the operations
3867      correctly and efficiently.
3868
3869      Second comes a switch statement with code specific for each rounding mode.
3870      For some special operands this code emits all RTL for the desired
3871      operation, for other cases, it generates only a quotient and stores it in
3872      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
3873      to indicate that it has not done anything.
3874
3875      Last comes code that finishes the operation.  If QUOTIENT is set and
3876      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
3877      QUOTIENT is not set, it is computed using trunc rounding.
3878
3879      We try to generate special code for division and remainder when OP1 is a
3880      constant.  If |OP1| = 2**n we can use shifts and some other fast
3881      operations.  For other values of OP1, we compute a carefully selected
3882      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3883      by m.
3884
3885      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3886      half of the product.  Different strategies for generating the product are
3887      implemented in expand_mult_highpart.
3888
3889      If what we actually want is the remainder, we generate that by another
3890      by-constant multiplication and a subtraction.  */
3891
3892   /* We shouldn't be called with OP1 == const1_rtx, but some of the
3893      code below will malfunction if we are, so check here and handle
3894      the special case if so.  */
3895   if (op1 == const1_rtx)
3896     return rem_flag ? const0_rtx : op0;
3897
3898     /* When dividing by -1, we could get an overflow.
3899      negv_optab can handle overflows.  */
3900   if (! unsignedp && op1 == constm1_rtx)
3901     {
3902       if (rem_flag)
3903         return const0_rtx;
3904       return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
3905                           ? negv_optab : neg_optab, op0, target, 0);
3906     }
3907
3908   if (target
3909       /* Don't use the function value register as a target
3910          since we have to read it as well as write it,
3911          and function-inlining gets confused by this.  */
3912       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3913           /* Don't clobber an operand while doing a multi-step calculation.  */
3914           || ((rem_flag || op1_is_constant)
3915               && (reg_mentioned_p (target, op0)
3916                   || (MEM_P (op0) && MEM_P (target))))
3917           || reg_mentioned_p (target, op1)
3918           || (MEM_P (op1) && MEM_P (target))))
3919     target = 0;
3920
3921   /* Get the mode in which to perform this computation.  Normally it will
3922      be MODE, but sometimes we can't do the desired operation in MODE.
3923      If so, pick a wider mode in which we can do the operation.  Convert
3924      to that mode at the start to avoid repeated conversions.
3925
3926      First see what operations we need.  These depend on the expression
3927      we are evaluating.  (We assume that divxx3 insns exist under the
3928      same conditions that modxx3 insns and that these insns don't normally
3929      fail.  If these assumptions are not correct, we may generate less
3930      efficient code in some cases.)
3931
3932      Then see if we find a mode in which we can open-code that operation
3933      (either a division, modulus, or shift).  Finally, check for the smallest
3934      mode for which we can do the operation with a library call.  */
3935
3936   /* We might want to refine this now that we have division-by-constant
3937      optimization.  Since expand_mult_highpart tries so many variants, it is
3938      not straightforward to generalize this.  Maybe we should make an array
3939      of possible modes in init_expmed?  Save this for GCC 2.7.  */
3940
3941   optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3942             ? (unsignedp ? lshr_optab : ashr_optab)
3943             : (unsignedp ? udiv_optab : sdiv_optab));
3944   optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3945             ? optab1
3946             : (unsignedp ? udivmod_optab : sdivmod_optab));
3947
3948   for (compute_mode = mode; compute_mode != VOIDmode;
3949        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3950     if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
3951         || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
3952       break;
3953
3954   if (compute_mode == VOIDmode)
3955     for (compute_mode = mode; compute_mode != VOIDmode;
3956          compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3957       if (optab_libfunc (optab1, compute_mode)
3958           || optab_libfunc (optab2, compute_mode))
3959         break;
3960
3961   /* If we still couldn't find a mode, use MODE, but expand_binop will
3962      probably die.  */
3963   if (compute_mode == VOIDmode)
3964     compute_mode = mode;
3965
3966   if (target && GET_MODE (target) == compute_mode)
3967     tquotient = target;
3968   else
3969     tquotient = gen_reg_rtx (compute_mode);
3970
3971   size = GET_MODE_BITSIZE (compute_mode);
3972 #if 0
3973   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3974      (mode), and thereby get better code when OP1 is a constant.  Do that
3975      later.  It will require going over all usages of SIZE below.  */
3976   size = GET_MODE_BITSIZE (mode);
3977 #endif
3978
3979   /* Only deduct something for a REM if the last divide done was
3980      for a different constant.   Then set the constant of the last
3981      divide.  */
3982   max_cost = unsignedp ? udiv_cost[speed][compute_mode] : sdiv_cost[speed][compute_mode];
3983   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
3984                      && INTVAL (op1) == last_div_const))
3985     max_cost -= mul_cost[speed][compute_mode] + add_cost[speed][compute_mode];
3986
3987   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
3988
3989   /* Now convert to the best mode to use.  */
3990   if (compute_mode != mode)
3991     {
3992       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
3993       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
3994
3995       /* convert_modes may have placed op1 into a register, so we
3996          must recompute the following.  */
3997       op1_is_constant = CONST_INT_P (op1);
3998       op1_is_pow2 = (op1_is_constant
3999                      && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4000                           || (! unsignedp
4001                               && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
4002     }
4003
4004   /* If one of the operands is a volatile MEM, copy it into a register.  */
4005
4006   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4007     op0 = force_reg (compute_mode, op0);
4008   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4009     op1 = force_reg (compute_mode, op1);
4010
4011   /* If we need the remainder or if OP1 is constant, we need to
4012      put OP0 in a register in case it has any queued subexpressions.  */
4013   if (rem_flag || op1_is_constant)
4014     op0 = force_reg (compute_mode, op0);
4015
4016   last = get_last_insn ();
4017
4018   /* Promote floor rounding to trunc rounding for unsigned operations.  */
4019   if (unsignedp)
4020     {
4021       if (code == FLOOR_DIV_EXPR)
4022         code = TRUNC_DIV_EXPR;
4023       if (code == FLOOR_MOD_EXPR)
4024         code = TRUNC_MOD_EXPR;
4025       if (code == EXACT_DIV_EXPR && op1_is_pow2)
4026         code = TRUNC_DIV_EXPR;
4027     }
4028
4029   if (op1 != const0_rtx)
4030     switch (code)
4031       {
4032       case TRUNC_MOD_EXPR:
4033       case TRUNC_DIV_EXPR:
4034         if (op1_is_constant)
4035           {
4036             if (unsignedp)
4037               {
4038                 unsigned HOST_WIDE_INT mh;
4039                 int pre_shift, post_shift;
4040                 int dummy;
4041                 rtx ml;
4042                 unsigned HOST_WIDE_INT d = (INTVAL (op1)
4043                                             & GET_MODE_MASK (compute_mode));
4044
4045                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4046                   {
4047                     pre_shift = floor_log2 (d);
4048                     if (rem_flag)
4049                       {
4050                         remainder
4051                           = expand_binop (compute_mode, and_optab, op0,
4052                                           GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4053                                           remainder, 1,
4054                                           OPTAB_LIB_WIDEN);
4055                         if (remainder)
4056                           return gen_lowpart (mode, remainder);
4057                       }
4058                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4059                                              pre_shift, tquotient, 1);
4060                   }
4061                 else if (size <= HOST_BITS_PER_WIDE_INT)
4062                   {
4063                     if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
4064                       {
4065                         /* Most significant bit of divisor is set; emit an scc
4066                            insn.  */
4067                         quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4068                                                           compute_mode, 1, 1);
4069                       }
4070                     else
4071                       {
4072                         /* Find a suitable multiplier and right shift count
4073                            instead of multiplying with D.  */
4074
4075                         mh = choose_multiplier (d, size, size,
4076                                                 &ml, &post_shift, &dummy);
4077
4078                         /* If the suggested multiplier is more than SIZE bits,
4079                            we can do better for even divisors, using an
4080                            initial right shift.  */
4081                         if (mh != 0 && (d & 1) == 0)
4082                           {
4083                             pre_shift = floor_log2 (d & -d);
4084                             mh = choose_multiplier (d >> pre_shift, size,
4085                                                     size - pre_shift,
4086                                                     &ml, &post_shift, &dummy);
4087                             gcc_assert (!mh);
4088                           }
4089                         else
4090                           pre_shift = 0;
4091
4092                         if (mh != 0)
4093                           {
4094                             rtx t1, t2, t3, t4;
4095
4096                             if (post_shift - 1 >= BITS_PER_WORD)
4097                               goto fail1;
4098
4099                             extra_cost
4100                               = (shift_cost[speed][compute_mode][post_shift - 1]
4101                                  + shift_cost[speed][compute_mode][1]
4102                                  + 2 * add_cost[speed][compute_mode]);
4103                             t1 = expand_mult_highpart (compute_mode, op0, ml,
4104                                                        NULL_RTX, 1,
4105                                                        max_cost - extra_cost);
4106                             if (t1 == 0)
4107                               goto fail1;
4108                             t2 = force_operand (gen_rtx_MINUS (compute_mode,
4109                                                                op0, t1),
4110                                                 NULL_RTX);
4111                             t3 = expand_shift (RSHIFT_EXPR, compute_mode,
4112                                                t2, 1, NULL_RTX, 1);
4113                             t4 = force_operand (gen_rtx_PLUS (compute_mode,
4114                                                               t1, t3),
4115                                                 NULL_RTX);
4116                             quotient = expand_shift
4117                               (RSHIFT_EXPR, compute_mode, t4,
4118                                post_shift - 1, tquotient, 1);
4119                           }
4120                         else
4121                           {
4122                             rtx t1, t2;
4123
4124                             if (pre_shift >= BITS_PER_WORD
4125                                 || post_shift >= BITS_PER_WORD)
4126                               goto fail1;
4127
4128                             t1 = expand_shift
4129                               (RSHIFT_EXPR, compute_mode, op0,
4130                                pre_shift, NULL_RTX, 1);
4131                             extra_cost
4132                               = (shift_cost[speed][compute_mode][pre_shift]
4133                                  + shift_cost[speed][compute_mode][post_shift]);
4134                             t2 = expand_mult_highpart (compute_mode, t1, ml,
4135                                                        NULL_RTX, 1,
4136                                                        max_cost - extra_cost);
4137                             if (t2 == 0)
4138                               goto fail1;
4139                             quotient = expand_shift
4140                               (RSHIFT_EXPR, compute_mode, t2,
4141                                post_shift, tquotient, 1);
4142                           }
4143                       }
4144                   }
4145                 else            /* Too wide mode to use tricky code */
4146                   break;
4147
4148                 insn = get_last_insn ();
4149                 if (insn != last)
4150                   set_dst_reg_note (insn, REG_EQUAL,
4151                                     gen_rtx_UDIV (compute_mode, op0, op1),
4152                                     quotient);
4153               }
4154             else                /* TRUNC_DIV, signed */
4155               {
4156                 unsigned HOST_WIDE_INT ml;
4157                 int lgup, post_shift;
4158                 rtx mlr;
4159                 HOST_WIDE_INT d = INTVAL (op1);
4160                 unsigned HOST_WIDE_INT abs_d;
4161
4162                 /* Since d might be INT_MIN, we have to cast to
4163                    unsigned HOST_WIDE_INT before negating to avoid
4164                    undefined signed overflow.  */
4165                 abs_d = (d >= 0
4166                          ? (unsigned HOST_WIDE_INT) d
4167                          : - (unsigned HOST_WIDE_INT) d);
4168
4169                 /* n rem d = n rem -d */
4170                 if (rem_flag && d < 0)
4171                   {
4172                     d = abs_d;
4173                     op1 = gen_int_mode (abs_d, compute_mode);
4174                   }
4175
4176                 if (d == 1)
4177                   quotient = op0;
4178                 else if (d == -1)
4179                   quotient = expand_unop (compute_mode, neg_optab, op0,
4180                                           tquotient, 0);
4181                 else if (HOST_BITS_PER_WIDE_INT >= size
4182                          && abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4183                   {
4184                     /* This case is not handled correctly below.  */
4185                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
4186                                                 compute_mode, 1, 1);
4187                     if (quotient == 0)
4188                       goto fail1;
4189                   }
4190                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4191                          && (rem_flag ? smod_pow2_cheap[speed][compute_mode]
4192                                       : sdiv_pow2_cheap[speed][compute_mode])
4193                          /* We assume that cheap metric is true if the
4194                             optab has an expander for this mode.  */
4195                          && ((optab_handler ((rem_flag ? smod_optab
4196                                               : sdiv_optab),
4197                                              compute_mode)
4198                               != CODE_FOR_nothing)
4199                              || (optab_handler (sdivmod_optab,
4200                                                 compute_mode)
4201                                  != CODE_FOR_nothing)))
4202                   ;
4203                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4204                   {
4205                     if (rem_flag)
4206                       {
4207                         remainder = expand_smod_pow2 (compute_mode, op0, d);
4208                         if (remainder)
4209                           return gen_lowpart (mode, remainder);
4210                       }
4211
4212                     if (sdiv_pow2_cheap[speed][compute_mode]
4213                         && ((optab_handler (sdiv_optab, compute_mode)
4214                              != CODE_FOR_nothing)
4215                             || (optab_handler (sdivmod_optab, compute_mode)
4216                                 != CODE_FOR_nothing)))
4217                       quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4218                                                 compute_mode, op0,
4219                                                 gen_int_mode (abs_d,
4220                                                               compute_mode),
4221                                                 NULL_RTX, 0);
4222                     else
4223                       quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4224
4225                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4226                        negate the quotient.  */
4227                     if (d < 0)
4228                       {
4229                         insn = get_last_insn ();
4230                         if (insn != last
4231                             && abs_d < ((unsigned HOST_WIDE_INT) 1
4232                                         << (HOST_BITS_PER_WIDE_INT - 1)))
4233                           set_dst_reg_note (insn, REG_EQUAL,
4234                                             gen_rtx_DIV (compute_mode, op0,
4235                                                          gen_int_mode
4236                                                            (abs_d,
4237                                                             compute_mode)),
4238                                             quotient);
4239
4240                         quotient = expand_unop (compute_mode, neg_optab,
4241                                                 quotient, quotient, 0);
4242                       }
4243                   }
4244                 else if (size <= HOST_BITS_PER_WIDE_INT)
4245                   {
4246                     choose_multiplier (abs_d, size, size - 1,
4247                                        &mlr, &post_shift, &lgup);
4248                     ml = (unsigned HOST_WIDE_INT) INTVAL (mlr);
4249                     if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4250                       {
4251                         rtx t1, t2, t3;
4252
4253                         if (post_shift >= BITS_PER_WORD
4254                             || size - 1 >= BITS_PER_WORD)
4255                           goto fail1;
4256
4257                         extra_cost = (shift_cost[speed][compute_mode][post_shift]
4258                                       + shift_cost[speed][compute_mode][size - 1]
4259                                       + add_cost[speed][compute_mode]);
4260                         t1 = expand_mult_highpart (compute_mode, op0, mlr,
4261                                                    NULL_RTX, 0,
4262                                                    max_cost - extra_cost);
4263                         if (t1 == 0)
4264                           goto fail1;
4265                         t2 = expand_shift
4266                           (RSHIFT_EXPR, compute_mode, t1,
4267                            post_shift, NULL_RTX, 0);
4268                         t3 = expand_shift
4269                           (RSHIFT_EXPR, compute_mode, op0,
4270                            size - 1, NULL_RTX, 0);
4271                         if (d < 0)
4272                           quotient
4273                             = force_operand (gen_rtx_MINUS (compute_mode,
4274                                                             t3, t2),
4275                                              tquotient);
4276                         else
4277                           quotient
4278                             = force_operand (gen_rtx_MINUS (compute_mode,
4279                                                             t2, t3),
4280                                              tquotient);
4281                       }
4282                     else
4283                       {
4284                         rtx t1, t2, t3, t4;
4285
4286                         if (post_shift >= BITS_PER_WORD
4287                             || size - 1 >= BITS_PER_WORD)
4288                           goto fail1;
4289
4290                         ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4291                         mlr = gen_int_mode (ml, compute_mode);
4292                         extra_cost = (shift_cost[speed][compute_mode][post_shift]
4293                                       + shift_cost[speed][compute_mode][size - 1]
4294                                       + 2 * add_cost[speed][compute_mode]);
4295                         t1 = expand_mult_highpart (compute_mode, op0, mlr,
4296                                                    NULL_RTX, 0,
4297                                                    max_cost - extra_cost);
4298                         if (t1 == 0)
4299                           goto fail1;
4300                         t2 = force_operand (gen_rtx_PLUS (compute_mode,
4301                                                           t1, op0),
4302                                             NULL_RTX);
4303                         t3 = expand_shift
4304                           (RSHIFT_EXPR, compute_mode, t2,
4305                            post_shift, NULL_RTX, 0);
4306                         t4 = expand_shift
4307                           (RSHIFT_EXPR, compute_mode, op0,
4308                            size - 1, NULL_RTX, 0);
4309                         if (d < 0)
4310                           quotient
4311                             = force_operand (gen_rtx_MINUS (compute_mode,
4312                                                             t4, t3),
4313                                              tquotient);
4314                         else
4315                           quotient
4316                             = force_operand (gen_rtx_MINUS (compute_mode,
4317                                                             t3, t4),
4318                                              tquotient);
4319                       }
4320                   }
4321                 else            /* Too wide mode to use tricky code */
4322                   break;
4323
4324                 insn = get_last_insn ();
4325                 if (insn != last)
4326                   set_dst_reg_note (insn, REG_EQUAL,
4327                                     gen_rtx_DIV (compute_mode, op0, op1),
4328                                     quotient);
4329               }
4330             break;
4331           }
4332       fail1:
4333         delete_insns_since (last);
4334         break;
4335
4336       case FLOOR_DIV_EXPR:
4337       case FLOOR_MOD_EXPR:
4338       /* We will come here only for signed operations.  */
4339         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4340           {
4341             unsigned HOST_WIDE_INT mh;
4342             int pre_shift, lgup, post_shift;
4343             HOST_WIDE_INT d = INTVAL (op1);
4344             rtx ml;
4345
4346             if (d > 0)
4347               {
4348                 /* We could just as easily deal with negative constants here,
4349                    but it does not seem worth the trouble for GCC 2.6.  */
4350                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4351                   {
4352                     pre_shift = floor_log2 (d);
4353                     if (rem_flag)
4354                       {
4355                         remainder = expand_binop (compute_mode, and_optab, op0,
4356                                                   GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4357                                                   remainder, 0, OPTAB_LIB_WIDEN);
4358                         if (remainder)
4359                           return gen_lowpart (mode, remainder);
4360                       }
4361                     quotient = expand_shift
4362                       (RSHIFT_EXPR, compute_mode, op0,
4363                        pre_shift, tquotient, 0);
4364                   }
4365                 else
4366                   {
4367                     rtx t1, t2, t3, t4;
4368
4369                     mh = choose_multiplier (d, size, size - 1,
4370                                             &ml, &post_shift, &lgup);
4371                     gcc_assert (!mh);
4372
4373                     if (post_shift < BITS_PER_WORD
4374                         && size - 1 < BITS_PER_WORD)
4375                       {
4376                         t1 = expand_shift
4377                           (RSHIFT_EXPR, compute_mode, op0,
4378                            size - 1, NULL_RTX, 0);
4379                         t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4380                                            NULL_RTX, 0, OPTAB_WIDEN);
4381                         extra_cost = (shift_cost[speed][compute_mode][post_shift]
4382                                       + shift_cost[speed][compute_mode][size - 1]
4383                                       + 2 * add_cost[speed][compute_mode]);
4384                         t3 = expand_mult_highpart (compute_mode, t2, ml,
4385                                                    NULL_RTX, 1,
4386                                                    max_cost - extra_cost);
4387                         if (t3 != 0)
4388                           {
4389                             t4 = expand_shift
4390                               (RSHIFT_EXPR, compute_mode, t3,
4391                                post_shift, NULL_RTX, 1);
4392                             quotient = expand_binop (compute_mode, xor_optab,
4393                                                      t4, t1, tquotient, 0,
4394                                                      OPTAB_WIDEN);
4395                           }
4396                       }
4397                   }
4398               }
4399             else
4400               {
4401                 rtx nsign, t1, t2, t3, t4;
4402                 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4403                                                   op0, constm1_rtx), NULL_RTX);
4404                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4405                                    0, OPTAB_WIDEN);
4406                 nsign = expand_shift
4407                   (RSHIFT_EXPR, compute_mode, t2,
4408                    size - 1, NULL_RTX, 0);
4409                 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4410                                     NULL_RTX);
4411                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4412                                     NULL_RTX, 0);
4413                 if (t4)
4414                   {
4415                     rtx t5;
4416                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4417                                       NULL_RTX, 0);
4418                     quotient = force_operand (gen_rtx_PLUS (compute_mode,
4419                                                             t4, t5),
4420                                               tquotient);
4421                   }
4422               }
4423           }
4424
4425         if (quotient != 0)
4426           break;
4427         delete_insns_since (last);
4428
4429         /* Try using an instruction that produces both the quotient and
4430            remainder, using truncation.  We can easily compensate the quotient
4431            or remainder to get floor rounding, once we have the remainder.
4432            Notice that we compute also the final remainder value here,
4433            and return the result right away.  */
4434         if (target == 0 || GET_MODE (target) != compute_mode)
4435           target = gen_reg_rtx (compute_mode);
4436
4437         if (rem_flag)
4438           {
4439             remainder
4440               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4441             quotient = gen_reg_rtx (compute_mode);
4442           }
4443         else
4444           {
4445             quotient
4446               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4447             remainder = gen_reg_rtx (compute_mode);
4448           }
4449
4450         if (expand_twoval_binop (sdivmod_optab, op0, op1,
4451                                  quotient, remainder, 0))
4452           {
4453             /* This could be computed with a branch-less sequence.
4454                Save that for later.  */
4455             rtx tem;
4456             rtx label = gen_label_rtx ();
4457             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4458             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4459                                 NULL_RTX, 0, OPTAB_WIDEN);
4460             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4461             expand_dec (quotient, const1_rtx);
4462             expand_inc (remainder, op1);
4463             emit_label (label);
4464             return gen_lowpart (mode, rem_flag ? remainder : quotient);
4465           }
4466
4467         /* No luck with division elimination or divmod.  Have to do it
4468            by conditionally adjusting op0 *and* the result.  */
4469         {
4470           rtx label1, label2, label3, label4, label5;
4471           rtx adjusted_op0;
4472           rtx tem;
4473
4474           quotient = gen_reg_rtx (compute_mode);
4475           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4476           label1 = gen_label_rtx ();
4477           label2 = gen_label_rtx ();
4478           label3 = gen_label_rtx ();
4479           label4 = gen_label_rtx ();
4480           label5 = gen_label_rtx ();
4481           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4482           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4483           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4484                               quotient, 0, OPTAB_LIB_WIDEN);
4485           if (tem != quotient)
4486             emit_move_insn (quotient, tem);
4487           emit_jump_insn (gen_jump (label5));
4488           emit_barrier ();
4489           emit_label (label1);
4490           expand_inc (adjusted_op0, const1_rtx);
4491           emit_jump_insn (gen_jump (label4));
4492           emit_barrier ();
4493           emit_label (label2);
4494           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4495           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4496                               quotient, 0, OPTAB_LIB_WIDEN);
4497           if (tem != quotient)
4498             emit_move_insn (quotient, tem);
4499           emit_jump_insn (gen_jump (label5));
4500           emit_barrier ();
4501           emit_label (label3);
4502           expand_dec (adjusted_op0, const1_rtx);
4503           emit_label (label4);
4504           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4505                               quotient, 0, OPTAB_LIB_WIDEN);
4506           if (tem != quotient)
4507             emit_move_insn (quotient, tem);
4508           expand_dec (quotient, const1_rtx);
4509           emit_label (label5);
4510         }
4511         break;
4512
4513       case CEIL_DIV_EXPR:
4514       case CEIL_MOD_EXPR:
4515         if (unsignedp)
4516           {
4517             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4518               {
4519                 rtx t1, t2, t3;
4520                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4521                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4522                                    floor_log2 (d), tquotient, 1);
4523                 t2 = expand_binop (compute_mode, and_optab, op0,
4524                                    GEN_INT (d - 1),
4525                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4526                 t3 = gen_reg_rtx (compute_mode);
4527                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4528                                       compute_mode, 1, 1);
4529                 if (t3 == 0)
4530                   {
4531                     rtx lab;
4532                     lab = gen_label_rtx ();
4533                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4534                     expand_inc (t1, const1_rtx);
4535                     emit_label (lab);
4536                     quotient = t1;
4537                   }
4538                 else
4539                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4540                                                           t1, t3),
4541                                             tquotient);
4542                 break;
4543               }
4544
4545             /* Try using an instruction that produces both the quotient and
4546                remainder, using truncation.  We can easily compensate the
4547                quotient or remainder to get ceiling rounding, once we have the
4548                remainder.  Notice that we compute also the final remainder
4549                value here, and return the result right away.  */
4550             if (target == 0 || GET_MODE (target) != compute_mode)
4551               target = gen_reg_rtx (compute_mode);
4552
4553             if (rem_flag)
4554               {
4555                 remainder = (REG_P (target)
4556                              ? target : gen_reg_rtx (compute_mode));
4557                 quotient = gen_reg_rtx (compute_mode);
4558               }
4559             else
4560               {
4561                 quotient = (REG_P (target)
4562                             ? target : gen_reg_rtx (compute_mode));
4563                 remainder = gen_reg_rtx (compute_mode);
4564               }
4565
4566             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4567                                      remainder, 1))
4568               {
4569                 /* This could be computed with a branch-less sequence.
4570                    Save that for later.  */
4571                 rtx label = gen_label_rtx ();
4572                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4573                                  compute_mode, label);
4574                 expand_inc (quotient, const1_rtx);
4575                 expand_dec (remainder, op1);
4576                 emit_label (label);
4577                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4578               }
4579
4580             /* No luck with division elimination or divmod.  Have to do it
4581                by conditionally adjusting op0 *and* the result.  */
4582             {
4583               rtx label1, label2;
4584               rtx adjusted_op0, tem;
4585
4586               quotient = gen_reg_rtx (compute_mode);
4587               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4588               label1 = gen_label_rtx ();
4589               label2 = gen_label_rtx ();
4590               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4591                                compute_mode, label1);
4592               emit_move_insn  (quotient, const0_rtx);
4593               emit_jump_insn (gen_jump (label2));
4594               emit_barrier ();
4595               emit_label (label1);
4596               expand_dec (adjusted_op0, const1_rtx);
4597               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4598                                   quotient, 1, OPTAB_LIB_WIDEN);
4599               if (tem != quotient)
4600                 emit_move_insn (quotient, tem);
4601               expand_inc (quotient, const1_rtx);
4602               emit_label (label2);
4603             }
4604           }
4605         else /* signed */
4606           {
4607             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4608                 && INTVAL (op1) >= 0)
4609               {
4610                 /* This is extremely similar to the code for the unsigned case
4611                    above.  For 2.7 we should merge these variants, but for
4612                    2.6.1 I don't want to touch the code for unsigned since that
4613                    get used in C.  The signed case will only be used by other
4614                    languages (Ada).  */
4615
4616                 rtx t1, t2, t3;
4617                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4618                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4619                                    floor_log2 (d), tquotient, 0);
4620                 t2 = expand_binop (compute_mode, and_optab, op0,
4621                                    GEN_INT (d - 1),
4622                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4623                 t3 = gen_reg_rtx (compute_mode);
4624                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4625                                       compute_mode, 1, 1);
4626                 if (t3 == 0)
4627                   {
4628                     rtx lab;
4629                     lab = gen_label_rtx ();
4630                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4631                     expand_inc (t1, const1_rtx);
4632                     emit_label (lab);
4633                     quotient = t1;
4634                   }
4635                 else
4636                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4637                                                           t1, t3),
4638                                             tquotient);
4639                 break;
4640               }
4641
4642             /* Try using an instruction that produces both the quotient and
4643                remainder, using truncation.  We can easily compensate the
4644                quotient or remainder to get ceiling rounding, once we have the
4645                remainder.  Notice that we compute also the final remainder
4646                value here, and return the result right away.  */
4647             if (target == 0 || GET_MODE (target) != compute_mode)
4648               target = gen_reg_rtx (compute_mode);
4649             if (rem_flag)
4650               {
4651                 remainder= (REG_P (target)
4652                             ? target : gen_reg_rtx (compute_mode));
4653                 quotient = gen_reg_rtx (compute_mode);
4654               }
4655             else
4656               {
4657                 quotient = (REG_P (target)
4658                             ? target : gen_reg_rtx (compute_mode));
4659                 remainder = gen_reg_rtx (compute_mode);
4660               }
4661
4662             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4663                                      remainder, 0))
4664               {
4665                 /* This could be computed with a branch-less sequence.
4666                    Save that for later.  */
4667                 rtx tem;
4668                 rtx label = gen_label_rtx ();
4669                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4670                                  compute_mode, label);
4671                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4672                                     NULL_RTX, 0, OPTAB_WIDEN);
4673                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4674                 expand_inc (quotient, const1_rtx);
4675                 expand_dec (remainder, op1);
4676                 emit_label (label);
4677                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4678               }
4679
4680             /* No luck with division elimination or divmod.  Have to do it
4681                by conditionally adjusting op0 *and* the result.  */
4682             {
4683               rtx label1, label2, label3, label4, label5;
4684               rtx adjusted_op0;
4685               rtx tem;
4686
4687               quotient = gen_reg_rtx (compute_mode);
4688               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4689               label1 = gen_label_rtx ();
4690               label2 = gen_label_rtx ();
4691               label3 = gen_label_rtx ();
4692               label4 = gen_label_rtx ();
4693               label5 = gen_label_rtx ();
4694               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4695               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4696                                compute_mode, label1);
4697               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4698                                   quotient, 0, OPTAB_LIB_WIDEN);
4699               if (tem != quotient)
4700                 emit_move_insn (quotient, tem);
4701               emit_jump_insn (gen_jump (label5));
4702               emit_barrier ();
4703               emit_label (label1);
4704               expand_dec (adjusted_op0, const1_rtx);
4705               emit_jump_insn (gen_jump (label4));
4706               emit_barrier ();
4707               emit_label (label2);
4708               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4709                                compute_mode, label3);
4710               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4711                                   quotient, 0, OPTAB_LIB_WIDEN);
4712               if (tem != quotient)
4713                 emit_move_insn (quotient, tem);
4714               emit_jump_insn (gen_jump (label5));
4715               emit_barrier ();
4716               emit_label (label3);
4717               expand_inc (adjusted_op0, const1_rtx);
4718               emit_label (label4);
4719               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4720                                   quotient, 0, OPTAB_LIB_WIDEN);
4721               if (tem != quotient)
4722                 emit_move_insn (quotient, tem);
4723               expand_inc (quotient, const1_rtx);
4724               emit_label (label5);
4725             }
4726           }
4727         break;
4728
4729       case EXACT_DIV_EXPR:
4730         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4731           {
4732             HOST_WIDE_INT d = INTVAL (op1);
4733             unsigned HOST_WIDE_INT ml;
4734             int pre_shift;
4735             rtx t1;
4736
4737             pre_shift = floor_log2 (d & -d);
4738             ml = invert_mod2n (d >> pre_shift, size);
4739             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4740                                pre_shift, NULL_RTX, unsignedp);
4741             quotient = expand_mult (compute_mode, t1,
4742                                     gen_int_mode (ml, compute_mode),
4743                                     NULL_RTX, 1);
4744
4745             insn = get_last_insn ();
4746             set_dst_reg_note (insn, REG_EQUAL,
4747                               gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4748                                               compute_mode, op0, op1),
4749                               quotient);
4750           }
4751         break;
4752
4753       case ROUND_DIV_EXPR:
4754       case ROUND_MOD_EXPR:
4755         if (unsignedp)
4756           {
4757             rtx tem;
4758             rtx label;
4759             label = gen_label_rtx ();
4760             quotient = gen_reg_rtx (compute_mode);
4761             remainder = gen_reg_rtx (compute_mode);
4762             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4763               {
4764                 rtx tem;
4765                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4766                                          quotient, 1, OPTAB_LIB_WIDEN);
4767                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4768                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4769                                           remainder, 1, OPTAB_LIB_WIDEN);
4770               }
4771             tem = plus_constant (op1, -1);
4772             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1);
4773             do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4774             expand_inc (quotient, const1_rtx);
4775             expand_dec (remainder, op1);
4776             emit_label (label);
4777           }
4778         else
4779           {
4780             rtx abs_rem, abs_op1, tem, mask;
4781             rtx label;
4782             label = gen_label_rtx ();
4783             quotient = gen_reg_rtx (compute_mode);
4784             remainder = gen_reg_rtx (compute_mode);
4785             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4786               {
4787                 rtx tem;
4788                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4789                                          quotient, 0, OPTAB_LIB_WIDEN);
4790                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4791                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4792                                           remainder, 0, OPTAB_LIB_WIDEN);
4793               }
4794             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4795             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4796             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4797                                 1, NULL_RTX, 1);
4798             do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4799             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4800                                 NULL_RTX, 0, OPTAB_WIDEN);
4801             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4802                                  size - 1, NULL_RTX, 0);
4803             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4804                                 NULL_RTX, 0, OPTAB_WIDEN);
4805             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4806                                 NULL_RTX, 0, OPTAB_WIDEN);
4807             expand_inc (quotient, tem);
4808             tem = expand_binop (compute_mode, xor_optab, mask, op1,
4809                                 NULL_RTX, 0, OPTAB_WIDEN);
4810             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4811                                 NULL_RTX, 0, OPTAB_WIDEN);
4812             expand_dec (remainder, tem);
4813             emit_label (label);
4814           }
4815         return gen_lowpart (mode, rem_flag ? remainder : quotient);
4816
4817       default:
4818         gcc_unreachable ();
4819       }
4820
4821   if (quotient == 0)
4822     {
4823       if (target && GET_MODE (target) != compute_mode)
4824         target = 0;
4825
4826       if (rem_flag)
4827         {
4828           /* Try to produce the remainder without producing the quotient.
4829              If we seem to have a divmod pattern that does not require widening,
4830              don't try widening here.  We should really have a WIDEN argument
4831              to expand_twoval_binop, since what we'd really like to do here is
4832              1) try a mod insn in compute_mode
4833              2) try a divmod insn in compute_mode
4834              3) try a div insn in compute_mode and multiply-subtract to get
4835                 remainder
4836              4) try the same things with widening allowed.  */
4837           remainder
4838             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4839                                  op0, op1, target,
4840                                  unsignedp,
4841                                  ((optab_handler (optab2, compute_mode)
4842                                    != CODE_FOR_nothing)
4843                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
4844           if (remainder == 0)
4845             {
4846               /* No luck there.  Can we do remainder and divide at once
4847                  without a library call?  */
4848               remainder = gen_reg_rtx (compute_mode);
4849               if (! expand_twoval_binop ((unsignedp
4850                                           ? udivmod_optab
4851                                           : sdivmod_optab),
4852                                          op0, op1,
4853                                          NULL_RTX, remainder, unsignedp))
4854                 remainder = 0;
4855             }
4856
4857           if (remainder)
4858             return gen_lowpart (mode, remainder);
4859         }
4860
4861       /* Produce the quotient.  Try a quotient insn, but not a library call.
4862          If we have a divmod in this mode, use it in preference to widening
4863          the div (for this test we assume it will not fail). Note that optab2
4864          is set to the one of the two optabs that the call below will use.  */
4865       quotient
4866         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4867                              op0, op1, rem_flag ? NULL_RTX : target,
4868                              unsignedp,
4869                              ((optab_handler (optab2, compute_mode)
4870                                != CODE_FOR_nothing)
4871                               ? OPTAB_DIRECT : OPTAB_WIDEN));
4872
4873       if (quotient == 0)
4874         {
4875           /* No luck there.  Try a quotient-and-remainder insn,
4876              keeping the quotient alone.  */
4877           quotient = gen_reg_rtx (compute_mode);
4878           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4879                                      op0, op1,
4880                                      quotient, NULL_RTX, unsignedp))
4881             {
4882               quotient = 0;
4883               if (! rem_flag)
4884                 /* Still no luck.  If we are not computing the remainder,
4885                    use a library call for the quotient.  */
4886                 quotient = sign_expand_binop (compute_mode,
4887                                               udiv_optab, sdiv_optab,
4888                                               op0, op1, target,
4889                                               unsignedp, OPTAB_LIB_WIDEN);
4890             }
4891         }
4892     }
4893
4894   if (rem_flag)
4895     {
4896       if (target && GET_MODE (target) != compute_mode)
4897         target = 0;
4898
4899       if (quotient == 0)
4900         {
4901           /* No divide instruction either.  Use library for remainder.  */
4902           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4903                                          op0, op1, target,
4904                                          unsignedp, OPTAB_LIB_WIDEN);
4905           /* No remainder function.  Try a quotient-and-remainder
4906              function, keeping the remainder.  */
4907           if (!remainder)
4908             {
4909               remainder = gen_reg_rtx (compute_mode);
4910               if (!expand_twoval_binop_libfunc
4911                   (unsignedp ? udivmod_optab : sdivmod_optab,
4912                    op0, op1,
4913                    NULL_RTX, remainder,
4914                    unsignedp ? UMOD : MOD))
4915                 remainder = NULL_RTX;
4916             }
4917         }
4918       else
4919         {
4920           /* We divided.  Now finish doing X - Y * (X / Y).  */
4921           remainder = expand_mult (compute_mode, quotient, op1,
4922                                    NULL_RTX, unsignedp);
4923           remainder = expand_binop (compute_mode, sub_optab, op0,
4924                                     remainder, target, unsignedp,
4925                                     OPTAB_LIB_WIDEN);
4926         }
4927     }
4928
4929   return gen_lowpart (mode, rem_flag ? remainder : quotient);
4930 }
4931 \f
4932 /* Return a tree node with data type TYPE, describing the value of X.
4933    Usually this is an VAR_DECL, if there is no obvious better choice.
4934    X may be an expression, however we only support those expressions
4935    generated by loop.c.  */
4936
4937 tree
4938 make_tree (tree type, rtx x)
4939 {
4940   tree t;
4941
4942   switch (GET_CODE (x))
4943     {
4944     case CONST_INT:
4945       {
4946         HOST_WIDE_INT hi = 0;
4947
4948         if (INTVAL (x) < 0
4949             && !(TYPE_UNSIGNED (type)
4950                  && (GET_MODE_BITSIZE (TYPE_MODE (type))
4951                      < HOST_BITS_PER_WIDE_INT)))
4952           hi = -1;
4953
4954         t = build_int_cst_wide (type, INTVAL (x), hi);
4955
4956         return t;
4957       }
4958
4959     case CONST_DOUBLE:
4960       if (GET_MODE (x) == VOIDmode)
4961         t = build_int_cst_wide (type,
4962                                 CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
4963       else
4964         {
4965           REAL_VALUE_TYPE d;
4966
4967           REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4968           t = build_real (type, d);
4969         }
4970
4971       return t;
4972
4973     case CONST_VECTOR:
4974       {
4975         int units = CONST_VECTOR_NUNITS (x);
4976         tree itype = TREE_TYPE (type);
4977         tree t = NULL_TREE;
4978         int i;
4979
4980
4981         /* Build a tree with vector elements.  */
4982         for (i = units - 1; i >= 0; --i)
4983           {
4984             rtx elt = CONST_VECTOR_ELT (x, i);
4985             t = tree_cons (NULL_TREE, make_tree (itype, elt), t);
4986           }
4987
4988         return build_vector (type, t);
4989       }
4990
4991     case PLUS:
4992       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4993                           make_tree (type, XEXP (x, 1)));
4994
4995     case MINUS:
4996       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4997                           make_tree (type, XEXP (x, 1)));
4998
4999     case NEG:
5000       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5001
5002     case MULT:
5003       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5004                           make_tree (type, XEXP (x, 1)));
5005
5006     case ASHIFT:
5007       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5008                           make_tree (type, XEXP (x, 1)));
5009
5010     case LSHIFTRT:
5011       t = unsigned_type_for (type);
5012       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5013                                          make_tree (t, XEXP (x, 0)),
5014                                          make_tree (type, XEXP (x, 1))));
5015
5016     case ASHIFTRT:
5017       t = signed_type_for (type);
5018       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5019                                          make_tree (t, XEXP (x, 0)),
5020                                          make_tree (type, XEXP (x, 1))));
5021
5022     case DIV:
5023       if (TREE_CODE (type) != REAL_TYPE)
5024         t = signed_type_for (type);
5025       else
5026         t = type;
5027
5028       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5029                                          make_tree (t, XEXP (x, 0)),
5030                                          make_tree (t, XEXP (x, 1))));
5031     case UDIV:
5032       t = unsigned_type_for (type);
5033       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5034                                          make_tree (t, XEXP (x, 0)),
5035                                          make_tree (t, XEXP (x, 1))));
5036
5037     case SIGN_EXTEND:
5038     case ZERO_EXTEND:
5039       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5040                                           GET_CODE (x) == ZERO_EXTEND);
5041       return fold_convert (type, make_tree (t, XEXP (x, 0)));
5042
5043     case CONST:
5044       return make_tree (type, XEXP (x, 0));
5045
5046     case SYMBOL_REF:
5047       t = SYMBOL_REF_DECL (x);
5048       if (t)
5049         return fold_convert (type, build_fold_addr_expr (t));
5050       /* else fall through.  */
5051
5052     default:
5053       t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5054
5055       /* If TYPE is a POINTER_TYPE, we might need to convert X from
5056          address mode to pointer mode.  */
5057       if (POINTER_TYPE_P (type))
5058         x = convert_memory_address_addr_space
5059               (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5060
5061       /* Note that we do *not* use SET_DECL_RTL here, because we do not
5062          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5063       t->decl_with_rtl.rtl = x;
5064
5065       return t;
5066     }
5067 }
5068 \f
5069 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5070    and returning TARGET.
5071
5072    If TARGET is 0, a pseudo-register or constant is returned.  */
5073
5074 rtx
5075 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
5076 {
5077   rtx tem = 0;
5078
5079   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5080     tem = simplify_binary_operation (AND, mode, op0, op1);
5081   if (tem == 0)
5082     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5083
5084   if (target == 0)
5085     target = tem;
5086   else if (tem != target)
5087     emit_move_insn (target, tem);
5088   return target;
5089 }
5090
5091 /* Helper function for emit_store_flag.  */
5092 static rtx
5093 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5094              enum machine_mode mode, enum machine_mode compare_mode,
5095              int unsignedp, rtx x, rtx y, int normalizep,
5096              enum machine_mode target_mode)
5097 {
5098   struct expand_operand ops[4];
5099   rtx op0, last, comparison, subtarget;
5100   enum machine_mode result_mode = insn_data[(int) icode].operand[0].mode;
5101
5102   last = get_last_insn ();
5103   x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5104   y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5105   if (!x || !y)
5106     {
5107       delete_insns_since (last);
5108       return NULL_RTX;
5109     }
5110
5111   if (target_mode == VOIDmode)
5112     target_mode = result_mode;
5113   if (!target)
5114     target = gen_reg_rtx (target_mode);
5115
5116   comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5117
5118   create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5119   create_fixed_operand (&ops[1], comparison);
5120   create_fixed_operand (&ops[2], x);
5121   create_fixed_operand (&ops[3], y);
5122   if (!maybe_expand_insn (icode, 4, ops))
5123     {
5124       delete_insns_since (last);
5125       return NULL_RTX;
5126     }
5127   subtarget = ops[0].value;
5128
5129   /* If we are converting to a wider mode, first convert to
5130      TARGET_MODE, then normalize.  This produces better combining
5131      opportunities on machines that have a SIGN_EXTRACT when we are
5132      testing a single bit.  This mostly benefits the 68k.
5133
5134      If STORE_FLAG_VALUE does not have the sign bit set when
5135      interpreted in MODE, we can do this conversion as unsigned, which
5136      is usually more efficient.  */
5137   if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5138     {
5139       convert_move (target, subtarget,
5140                     val_signbit_known_clear_p (result_mode,
5141                                                STORE_FLAG_VALUE));
5142       op0 = target;
5143       result_mode = target_mode;
5144     }
5145   else
5146     op0 = subtarget;
5147
5148   /* If we want to keep subexpressions around, don't reuse our last
5149      target.  */
5150   if (optimize)
5151     subtarget = 0;
5152
5153   /* Now normalize to the proper value in MODE.  Sometimes we don't
5154      have to do anything.  */
5155   if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5156     ;
5157   /* STORE_FLAG_VALUE might be the most negative number, so write
5158      the comparison this way to avoid a compiler-time warning.  */
5159   else if (- normalizep == STORE_FLAG_VALUE)
5160     op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5161
5162   /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5163      it hard to use a value of just the sign bit due to ANSI integer
5164      constant typing rules.  */
5165   else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5166     op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5167                         GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5168                         normalizep == 1);
5169   else
5170     {
5171       gcc_assert (STORE_FLAG_VALUE & 1);
5172
5173       op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5174       if (normalizep == -1)
5175         op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5176     }
5177
5178   /* If we were converting to a smaller mode, do the conversion now.  */
5179   if (target_mode != result_mode)
5180     {
5181       convert_move (target, op0, 0);
5182       return target;
5183     }
5184   else
5185     return op0;
5186 }
5187
5188
5189 /* A subroutine of emit_store_flag only including "tricks" that do not
5190    need a recursive call.  These are kept separate to avoid infinite
5191    loops.  */
5192
5193 static rtx
5194 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5195                    enum machine_mode mode, int unsignedp, int normalizep,
5196                    enum machine_mode target_mode)
5197 {
5198   rtx subtarget;
5199   enum insn_code icode;
5200   enum machine_mode compare_mode;
5201   enum mode_class mclass;
5202   enum rtx_code scode;
5203   rtx tem;
5204
5205   if (unsignedp)
5206     code = unsigned_condition (code);
5207   scode = swap_condition (code);
5208
5209   /* If one operand is constant, make it the second one.  Only do this
5210      if the other operand is not constant as well.  */
5211
5212   if (swap_commutative_operands_p (op0, op1))
5213     {
5214       tem = op0;
5215       op0 = op1;
5216       op1 = tem;
5217       code = swap_condition (code);
5218     }
5219
5220   if (mode == VOIDmode)
5221     mode = GET_MODE (op0);
5222
5223   /* For some comparisons with 1 and -1, we can convert this to
5224      comparisons with zero.  This will often produce more opportunities for
5225      store-flag insns.  */
5226
5227   switch (code)
5228     {
5229     case LT:
5230       if (op1 == const1_rtx)
5231         op1 = const0_rtx, code = LE;
5232       break;
5233     case LE:
5234       if (op1 == constm1_rtx)
5235         op1 = const0_rtx, code = LT;
5236       break;
5237     case GE:
5238       if (op1 == const1_rtx)
5239         op1 = const0_rtx, code = GT;
5240       break;
5241     case GT:
5242       if (op1 == constm1_rtx)
5243         op1 = const0_rtx, code = GE;
5244       break;
5245     case GEU:
5246       if (op1 == const1_rtx)
5247         op1 = const0_rtx, code = NE;
5248       break;
5249     case LTU:
5250       if (op1 == const1_rtx)
5251         op1 = const0_rtx, code = EQ;
5252       break;
5253     default:
5254       break;
5255     }
5256
5257   /* If we are comparing a double-word integer with zero or -1, we can
5258      convert the comparison into one involving a single word.  */
5259   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5260       && GET_MODE_CLASS (mode) == MODE_INT
5261       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5262     {
5263       if ((code == EQ || code == NE)
5264           && (op1 == const0_rtx || op1 == constm1_rtx))
5265         {
5266           rtx op00, op01;
5267
5268           /* Do a logical OR or AND of the two words and compare the
5269              result.  */
5270           op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5271           op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5272           tem = expand_binop (word_mode,
5273                               op1 == const0_rtx ? ior_optab : and_optab,
5274                               op00, op01, NULL_RTX, unsignedp,
5275                               OPTAB_DIRECT);
5276
5277           if (tem != 0)
5278             tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5279                                    unsignedp, normalizep);
5280         }
5281       else if ((code == LT || code == GE) && op1 == const0_rtx)
5282         {
5283           rtx op0h;
5284
5285           /* If testing the sign bit, can just test on high word.  */
5286           op0h = simplify_gen_subreg (word_mode, op0, mode,
5287                                       subreg_highpart_offset (word_mode,
5288                                                               mode));
5289           tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5290                                  unsignedp, normalizep);
5291         }
5292       else
5293         tem = NULL_RTX;
5294
5295       if (tem)
5296         {
5297           if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5298             return tem;
5299           if (!target)
5300             target = gen_reg_rtx (target_mode);
5301
5302           convert_move (target, tem,
5303                         !val_signbit_known_set_p (word_mode,
5304                                                   (normalizep ? normalizep
5305                                                    : STORE_FLAG_VALUE)));
5306           return target;
5307         }
5308     }
5309
5310   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5311      complement of A (for GE) and shifting the sign bit to the low bit.  */
5312   if (op1 == const0_rtx && (code == LT || code == GE)
5313       && GET_MODE_CLASS (mode) == MODE_INT
5314       && (normalizep || STORE_FLAG_VALUE == 1
5315           || val_signbit_p (mode, STORE_FLAG_VALUE)))
5316     {
5317       subtarget = target;
5318
5319       if (!target)
5320         target_mode = mode;
5321
5322       /* If the result is to be wider than OP0, it is best to convert it
5323          first.  If it is to be narrower, it is *incorrect* to convert it
5324          first.  */
5325       else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5326         {
5327           op0 = convert_modes (target_mode, mode, op0, 0);
5328           mode = target_mode;
5329         }
5330
5331       if (target_mode != mode)
5332         subtarget = 0;
5333
5334       if (code == GE)
5335         op0 = expand_unop (mode, one_cmpl_optab, op0,
5336                            ((STORE_FLAG_VALUE == 1 || normalizep)
5337                             ? 0 : subtarget), 0);
5338
5339       if (STORE_FLAG_VALUE == 1 || normalizep)
5340         /* If we are supposed to produce a 0/1 value, we want to do
5341            a logical shift from the sign bit to the low-order bit; for
5342            a -1/0 value, we do an arithmetic shift.  */
5343         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5344                             GET_MODE_BITSIZE (mode) - 1,
5345                             subtarget, normalizep != -1);
5346
5347       if (mode != target_mode)
5348         op0 = convert_modes (target_mode, mode, op0, 0);
5349
5350       return op0;
5351     }
5352
5353   mclass = GET_MODE_CLASS (mode);
5354   for (compare_mode = mode; compare_mode != VOIDmode;
5355        compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5356     {
5357      enum machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5358      icode = optab_handler (cstore_optab, optab_mode);
5359      if (icode != CODE_FOR_nothing)
5360         {
5361           do_pending_stack_adjust ();
5362           tem = emit_cstore (target, icode, code, mode, compare_mode,
5363                              unsignedp, op0, op1, normalizep, target_mode);
5364           if (tem)
5365             return tem;
5366
5367           if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5368             {
5369               tem = emit_cstore (target, icode, scode, mode, compare_mode,
5370                                  unsignedp, op1, op0, normalizep, target_mode);
5371               if (tem)
5372                 return tem;
5373             }
5374           break;
5375         }
5376     }
5377
5378   return 0;
5379 }
5380
5381 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5382    and storing in TARGET.  Normally return TARGET.
5383    Return 0 if that cannot be done.
5384
5385    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5386    it is VOIDmode, they cannot both be CONST_INT.
5387
5388    UNSIGNEDP is for the case where we have to widen the operands
5389    to perform the operation.  It says to use zero-extension.
5390
5391    NORMALIZEP is 1 if we should convert the result to be either zero
5392    or one.  Normalize is -1 if we should convert the result to be
5393    either zero or -1.  If NORMALIZEP is zero, the result will be left
5394    "raw" out of the scc insn.  */
5395
5396 rtx
5397 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5398                  enum machine_mode mode, int unsignedp, int normalizep)
5399 {
5400   enum machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5401   enum rtx_code rcode;
5402   rtx subtarget;
5403   rtx tem, last, trueval;
5404
5405   tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5406                            target_mode);
5407   if (tem)
5408     return tem;
5409
5410   /* If we reached here, we can't do this with a scc insn, however there
5411      are some comparisons that can be done in other ways.  Don't do any
5412      of these cases if branches are very cheap.  */
5413   if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5414     return 0;
5415
5416   /* See what we need to return.  We can only return a 1, -1, or the
5417      sign bit.  */
5418
5419   if (normalizep == 0)
5420     {
5421       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5422         normalizep = STORE_FLAG_VALUE;
5423
5424       else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5425         ;
5426       else
5427         return 0;
5428     }
5429
5430   last = get_last_insn ();
5431
5432   /* If optimizing, use different pseudo registers for each insn, instead
5433      of reusing the same pseudo.  This leads to better CSE, but slows
5434      down the compiler, since there are more pseudos */
5435   subtarget = (!optimize
5436                && (target_mode == mode)) ? target : NULL_RTX;
5437   trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5438
5439   /* For floating-point comparisons, try the reverse comparison or try
5440      changing the "orderedness" of the comparison.  */
5441   if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5442     {
5443       enum rtx_code first_code;
5444       bool and_them;
5445
5446       rcode = reverse_condition_maybe_unordered (code);
5447       if (can_compare_p (rcode, mode, ccp_store_flag)
5448           && (code == ORDERED || code == UNORDERED
5449               || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5450               || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5451         {
5452           int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5453                           || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5454
5455           /* For the reverse comparison, use either an addition or a XOR.  */
5456           if (want_add
5457               && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5458                            optimize_insn_for_speed_p ()) == 0)
5459             {
5460               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5461                                        STORE_FLAG_VALUE, target_mode);
5462               if (tem)
5463                 return expand_binop (target_mode, add_optab, tem,
5464                                      GEN_INT (normalizep),
5465                                      target, 0, OPTAB_WIDEN);
5466             }
5467           else if (!want_add
5468                    && rtx_cost (trueval, XOR, 1,
5469                                 optimize_insn_for_speed_p ()) == 0)
5470             {
5471               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5472                                        normalizep, target_mode);
5473               if (tem)
5474                 return expand_binop (target_mode, xor_optab, tem, trueval,
5475                                      target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5476             }
5477         }
5478
5479       delete_insns_since (last);
5480
5481       /* Cannot split ORDERED and UNORDERED, only try the above trick.   */
5482       if (code == ORDERED || code == UNORDERED)
5483         return 0;
5484
5485       and_them = split_comparison (code, mode, &first_code, &code);
5486
5487       /* If there are no NaNs, the first comparison should always fall through.
5488          Effectively change the comparison to the other one.  */
5489       if (!HONOR_NANS (mode))
5490         {
5491           gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5492           return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5493                                     target_mode);
5494         }
5495
5496 #ifdef HAVE_conditional_move
5497       /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5498          conditional move.  */
5499       tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5500                                normalizep, target_mode);
5501       if (tem == 0)
5502         return 0;
5503
5504       if (and_them)
5505         tem = emit_conditional_move (target, code, op0, op1, mode,
5506                                      tem, const0_rtx, GET_MODE (tem), 0);
5507       else
5508         tem = emit_conditional_move (target, code, op0, op1, mode,
5509                                      trueval, tem, GET_MODE (tem), 0);
5510
5511       if (tem == 0)
5512         delete_insns_since (last);
5513       return tem;
5514 #else
5515       return 0;
5516 #endif
5517     }
5518
5519   /* The remaining tricks only apply to integer comparisons.  */
5520
5521   if (GET_MODE_CLASS (mode) != MODE_INT)
5522     return 0;
5523
5524   /* If this is an equality comparison of integers, we can try to exclusive-or
5525      (or subtract) the two operands and use a recursive call to try the
5526      comparison with zero.  Don't do any of these cases if branches are
5527      very cheap.  */
5528
5529   if ((code == EQ || code == NE) && op1 != const0_rtx)
5530     {
5531       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5532                           OPTAB_WIDEN);
5533
5534       if (tem == 0)
5535         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5536                             OPTAB_WIDEN);
5537       if (tem != 0)
5538         tem = emit_store_flag (target, code, tem, const0_rtx,
5539                                mode, unsignedp, normalizep);
5540       if (tem != 0)
5541         return tem;
5542
5543       delete_insns_since (last);
5544     }
5545
5546   /* For integer comparisons, try the reverse comparison.  However, for
5547      small X and if we'd have anyway to extend, implementing "X != 0"
5548      as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0".  */
5549   rcode = reverse_condition (code);
5550   if (can_compare_p (rcode, mode, ccp_store_flag)
5551       && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5552             && code == NE
5553             && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5554             && op1 == const0_rtx))
5555     {
5556       int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5557                       || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5558
5559       /* Again, for the reverse comparison, use either an addition or a XOR.  */
5560       if (want_add
5561           && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5562                        optimize_insn_for_speed_p ()) == 0)
5563         {
5564           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5565                                    STORE_FLAG_VALUE, target_mode);
5566           if (tem != 0)
5567             tem = expand_binop (target_mode, add_optab, tem,
5568                                 GEN_INT (normalizep), target, 0, OPTAB_WIDEN);
5569         }
5570       else if (!want_add
5571                && rtx_cost (trueval, XOR, 1,
5572                             optimize_insn_for_speed_p ()) == 0)
5573         {
5574           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5575                                    normalizep, target_mode);
5576           if (tem != 0)
5577             tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5578                                 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5579         }
5580
5581       if (tem != 0)
5582         return tem;
5583       delete_insns_since (last);
5584     }
5585
5586   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5587      the constant zero.  Reject all other comparisons at this point.  Only
5588      do LE and GT if branches are expensive since they are expensive on
5589      2-operand machines.  */
5590
5591   if (op1 != const0_rtx
5592       || (code != EQ && code != NE
5593           && (BRANCH_COST (optimize_insn_for_speed_p (),
5594                            false) <= 1 || (code != LE && code != GT))))
5595     return 0;
5596
5597   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5598      do the necessary operation below.  */
5599
5600   tem = 0;
5601
5602   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5603      the sign bit set.  */
5604
5605   if (code == LE)
5606     {
5607       /* This is destructive, so SUBTARGET can't be OP0.  */
5608       if (rtx_equal_p (subtarget, op0))
5609         subtarget = 0;
5610
5611       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5612                           OPTAB_WIDEN);
5613       if (tem)
5614         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5615                             OPTAB_WIDEN);
5616     }
5617
5618   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5619      number of bits in the mode of OP0, minus one.  */
5620
5621   if (code == GT)
5622     {
5623       if (rtx_equal_p (subtarget, op0))
5624         subtarget = 0;
5625
5626       tem = expand_shift (RSHIFT_EXPR, mode, op0,
5627                           GET_MODE_BITSIZE (mode) - 1,
5628                           subtarget, 0);
5629       tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5630                           OPTAB_WIDEN);
5631     }
5632
5633   if (code == EQ || code == NE)
5634     {
5635       /* For EQ or NE, one way to do the comparison is to apply an operation
5636          that converts the operand into a positive number if it is nonzero
5637          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5638          for NE we negate.  This puts the result in the sign bit.  Then we
5639          normalize with a shift, if needed.
5640
5641          Two operations that can do the above actions are ABS and FFS, so try
5642          them.  If that doesn't work, and MODE is smaller than a full word,
5643          we can use zero-extension to the wider mode (an unsigned conversion)
5644          as the operation.  */
5645
5646       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5647          that is compensated by the subsequent overflow when subtracting
5648          one / negating.  */
5649
5650       if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5651         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5652       else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5653         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5654       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5655         {
5656           tem = convert_modes (word_mode, mode, op0, 1);
5657           mode = word_mode;
5658         }
5659
5660       if (tem != 0)
5661         {
5662           if (code == EQ)
5663             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5664                                 0, OPTAB_WIDEN);
5665           else
5666             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5667         }
5668
5669       /* If we couldn't do it that way, for NE we can "or" the two's complement
5670          of the value with itself.  For EQ, we take the one's complement of
5671          that "or", which is an extra insn, so we only handle EQ if branches
5672          are expensive.  */
5673
5674       if (tem == 0
5675           && (code == NE
5676               || BRANCH_COST (optimize_insn_for_speed_p (),
5677                               false) > 1))
5678         {
5679           if (rtx_equal_p (subtarget, op0))
5680             subtarget = 0;
5681
5682           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5683           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5684                               OPTAB_WIDEN);
5685
5686           if (tem && code == EQ)
5687             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5688         }
5689     }
5690
5691   if (tem && normalizep)
5692     tem = expand_shift (RSHIFT_EXPR, mode, tem,
5693                         GET_MODE_BITSIZE (mode) - 1,
5694                         subtarget, normalizep == 1);
5695
5696   if (tem)
5697     {
5698       if (!target)
5699         ;
5700       else if (GET_MODE (tem) != target_mode)
5701         {
5702           convert_move (target, tem, 0);
5703           tem = target;
5704         }
5705       else if (!subtarget)
5706         {
5707           emit_move_insn (target, tem);
5708           tem = target;
5709         }
5710     }
5711   else
5712     delete_insns_since (last);
5713
5714   return tem;
5715 }
5716
5717 /* Like emit_store_flag, but always succeeds.  */
5718
5719 rtx
5720 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5721                        enum machine_mode mode, int unsignedp, int normalizep)
5722 {
5723   rtx tem, label;
5724   rtx trueval, falseval;
5725
5726   /* First see if emit_store_flag can do the job.  */
5727   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5728   if (tem != 0)
5729     return tem;
5730
5731   if (!target)
5732     target = gen_reg_rtx (word_mode);
5733
5734   /* If this failed, we have to do this with set/compare/jump/set code.
5735      For foo != 0, if foo is in OP0, just replace it with 1 if nonzero.  */
5736   trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5737   if (code == NE
5738       && GET_MODE_CLASS (mode) == MODE_INT
5739       && REG_P (target)
5740       && op0 == target
5741       && op1 == const0_rtx)
5742     {
5743       label = gen_label_rtx ();
5744       do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp,
5745                                mode, NULL_RTX, NULL_RTX, label, -1);
5746       emit_move_insn (target, trueval);
5747       emit_label (label);
5748       return target;
5749     }
5750
5751   if (!REG_P (target)
5752       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5753     target = gen_reg_rtx (GET_MODE (target));
5754
5755   /* Jump in the right direction if the target cannot implement CODE
5756      but can jump on its reverse condition.  */
5757   falseval = const0_rtx;
5758   if (! can_compare_p (code, mode, ccp_jump)
5759       && (! FLOAT_MODE_P (mode)
5760           || code == ORDERED || code == UNORDERED
5761           || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5762           || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5763     {
5764       enum rtx_code rcode;
5765       if (FLOAT_MODE_P (mode))
5766         rcode = reverse_condition_maybe_unordered (code);
5767       else
5768         rcode = reverse_condition (code);
5769
5770       /* Canonicalize to UNORDERED for the libcall.  */
5771       if (can_compare_p (rcode, mode, ccp_jump)
5772           || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5773         {
5774           falseval = trueval;
5775           trueval = const0_rtx;
5776           code = rcode;
5777         }
5778     }
5779
5780   emit_move_insn (target, trueval);
5781   label = gen_label_rtx ();
5782   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5783                            NULL_RTX, label, -1);
5784
5785   emit_move_insn (target, falseval);
5786   emit_label (label);
5787
5788   return target;
5789 }
5790 \f
5791 /* Perform possibly multi-word comparison and conditional jump to LABEL
5792    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
5793    now a thin wrapper around do_compare_rtx_and_jump.  */
5794
5795 static void
5796 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5797                  rtx label)
5798 {
5799   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5800   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode,
5801                            NULL_RTX, NULL_RTX, label, -1);
5802 }